riml 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +20 -0
- data/README.md +148 -0
- data/bin/riml +69 -0
- data/config/environment.rb +10 -0
- data/lib/ast_rewriter.rb +268 -0
- data/lib/class_map.rb +46 -0
- data/lib/compiler.rb +579 -0
- data/lib/constants.rb +280 -0
- data/lib/errors.rb +4 -0
- data/lib/grammar.y +485 -0
- data/lib/helper.rb +45 -0
- data/lib/lexer.rb +276 -0
- data/lib/nodes.rb +723 -0
- data/lib/parser.rb +2748 -0
- data/lib/walker.rb +15 -0
- data/version.rb +4 -0
- metadata +75 -0
data/lib/lexer.rb
ADDED
@@ -0,0 +1,276 @@
|
|
1
|
+
require File.expand_path('../constants', __FILE__)
|
2
|
+
require File.expand_path('../errors', __FILE__)
|
3
|
+
|
4
|
+
module Riml
|
5
|
+
class Lexer
|
6
|
+
include Riml::Constants
|
7
|
+
|
8
|
+
SINGLE_LINE_COMMENT_REGEX = /\A\s*"(.*)$/
|
9
|
+
OPERATOR_REGEX = /\A#{Regexp.union(['||', '&&', '===', '+=', '-='] + COMPARISON_OPERATORS)}/
|
10
|
+
|
11
|
+
attr_reader :tokens, :prev_token, :lineno, :chunk
|
12
|
+
|
13
|
+
def initialize(code)
|
14
|
+
@code = code
|
15
|
+
@code.chomp!
|
16
|
+
@i = 0 # number of characters consumed
|
17
|
+
@token_buf = []
|
18
|
+
@tokens = []
|
19
|
+
@prev_token = nil
|
20
|
+
@lineno = 1
|
21
|
+
@current_indent = 0
|
22
|
+
@indent_pending = false
|
23
|
+
@dedent_pending = false
|
24
|
+
@one_line_conditional_END_pending = false
|
25
|
+
@splat_allowed = false
|
26
|
+
end
|
27
|
+
|
28
|
+
def tokenize
|
29
|
+
while more_code_to_tokenize?
|
30
|
+
new_token = next_token
|
31
|
+
@tokens << new_token unless new_token.nil?
|
32
|
+
end
|
33
|
+
@tokens
|
34
|
+
end
|
35
|
+
|
36
|
+
def next_token
|
37
|
+
while @token_buf.empty? && more_code_to_tokenize?
|
38
|
+
tokenize_chunk(get_new_chunk)
|
39
|
+
end
|
40
|
+
if @token_buf.any?
|
41
|
+
return @prev_token = @token_buf.shift
|
42
|
+
end
|
43
|
+
check_indentation
|
44
|
+
nil
|
45
|
+
end
|
46
|
+
|
47
|
+
def tokenize_chunk(chunk)
|
48
|
+
@chunk = chunk
|
49
|
+
# deal with line continuations
|
50
|
+
if cont = chunk[/\A(\n*)\s*\\/]
|
51
|
+
@i += cont.size
|
52
|
+
@lineno += $1.size
|
53
|
+
return
|
54
|
+
end
|
55
|
+
|
56
|
+
# all lines that start with ':' pass right through unmodified
|
57
|
+
if (prev_token.nil? || prev_token[0] == :NEWLINE) && (ex_literal = chunk[/\A\s*:(.*)?$/])
|
58
|
+
@i += ex_literal.size
|
59
|
+
@token_buf << [:EX_LITERAL, $1]
|
60
|
+
return
|
61
|
+
end
|
62
|
+
|
63
|
+
if splat_var = chunk[/\Aa:\d+/]
|
64
|
+
@i += splat_var.size
|
65
|
+
@token_buf << [:SCOPE_MODIFIER, 'a:'] << [:IDENTIFIER, splat_var[2..-1]]
|
66
|
+
# the 'n' scope modifier is added by riml
|
67
|
+
elsif scope_modifier = chunk[/\A([bwtglsavn]:)[\w_]/]
|
68
|
+
@i += 2
|
69
|
+
@token_buf << [:SCOPE_MODIFIER, $1]
|
70
|
+
elsif scope_modifier_literal = chunk[/\A([bwtglsavn]:)/]
|
71
|
+
@i += 2
|
72
|
+
@token_buf << [:SCOPE_MODIFIER_LITERAL, $1]
|
73
|
+
elsif special_var_prefix = chunk[/\A(&(\w:)?|\$|@)/]
|
74
|
+
@token_buf << [:SPECIAL_VAR_PREFIX, special_var_prefix.strip]
|
75
|
+
@expecting_identifier = true
|
76
|
+
@i += special_var_prefix.size
|
77
|
+
elsif function_method = chunk[/\A(function)\(/]
|
78
|
+
@token_buf << [:IDENTIFIER, $1]
|
79
|
+
@i += $1.size
|
80
|
+
elsif identifier = chunk[/\A[a-zA-Z_][\w#]*(\?|!)?/]
|
81
|
+
# keyword identifiers
|
82
|
+
if KEYWORDS.include?(identifier)
|
83
|
+
if identifier.match(/\Afunction/)
|
84
|
+
old_identifier = identifier.dup
|
85
|
+
identifier.sub!(/function/, "def")
|
86
|
+
@i += (old_identifier.size - identifier.size)
|
87
|
+
elsif identifier == 'finally'
|
88
|
+
identifier = 'ensure'
|
89
|
+
@i += 1 # diff b/t the two string lengths
|
90
|
+
elsif VIML_END_KEYWORDS.include? identifier
|
91
|
+
old_identifier = identifier.dup
|
92
|
+
identifier = 'end'
|
93
|
+
@i += old_identifier.size - identifier.size
|
94
|
+
end
|
95
|
+
|
96
|
+
if DEFINE_KEYWORDS.include?(identifier)
|
97
|
+
@in_function_declaration = true
|
98
|
+
end
|
99
|
+
|
100
|
+
# strip '?' out of token names and replace '!' with '_bang'
|
101
|
+
token_name = identifier.sub(/\?\Z/, "").sub(/!\Z/, "_bang").upcase
|
102
|
+
|
103
|
+
track_indent_level(chunk, identifier)
|
104
|
+
@token_buf << [token_name.intern, identifier]
|
105
|
+
|
106
|
+
elsif BUILTIN_COMMANDS.include? identifier
|
107
|
+
@token_buf << [:BUILTIN_COMMAND, identifier]
|
108
|
+
# method names and variable names
|
109
|
+
else
|
110
|
+
@token_buf << [:IDENTIFIER, identifier]
|
111
|
+
end
|
112
|
+
|
113
|
+
@i += identifier.size
|
114
|
+
|
115
|
+
parse_dict_vals!
|
116
|
+
|
117
|
+
if @in_function_declaration
|
118
|
+
@in_function_declaration = false unless DEFINE_KEYWORDS.include?(identifier) && @token_buf.size == 1
|
119
|
+
end
|
120
|
+
elsif splat = chunk[/\A(\.{3}|\*[a-zA-Z_]\w*)/]
|
121
|
+
raise SyntaxError, "unexpected splat, has to be enclosed in parentheses" unless @splat_allowed
|
122
|
+
@token_buf << [:SPLAT, splat]
|
123
|
+
@splat_allowed = false
|
124
|
+
@i += splat.size
|
125
|
+
# integer (octal)
|
126
|
+
elsif octal = chunk[/\A0[0-7]+/]
|
127
|
+
@token_buf << [:NUMBER, octal.to_s]
|
128
|
+
@i += octal.size
|
129
|
+
# integer (hex)
|
130
|
+
elsif hex = chunk[/\A0[xX]\h+/]
|
131
|
+
@token_buf << [:NUMBER, hex.to_s]
|
132
|
+
@i += hex.size
|
133
|
+
# integer or float (decimal)
|
134
|
+
elsif decimal = chunk[/\A[0-9]+(\.[0-9]+)?/]
|
135
|
+
@token_buf << [:NUMBER, decimal.to_s]
|
136
|
+
@i += decimal.size
|
137
|
+
elsif interpolation = chunk[/\A"(.*?)(\#\{(.*?)\})(.*?)"/]
|
138
|
+
# "#{hey} guys" = "hey" . " guys"
|
139
|
+
unless $1.empty?
|
140
|
+
@token_buf << [:STRING_D, $1]
|
141
|
+
@token_buf << ['.', '.']
|
142
|
+
end
|
143
|
+
@token_buf << [:IDENTIFIER, $3]
|
144
|
+
unless $4.empty?
|
145
|
+
@token_buf << ['.', '.']
|
146
|
+
@token_buf << [ :STRING_D, " #{$4[1..-1]}" ]
|
147
|
+
end
|
148
|
+
@i += interpolation.size
|
149
|
+
elsif single_line_comment = chunk[SINGLE_LINE_COMMENT_REGEX] && (prev_token.nil? || prev_token[0] == :NEWLINE)
|
150
|
+
comment = chunk[SINGLE_LINE_COMMENT_REGEX]
|
151
|
+
@i += comment.size + 1 # consume next newline character
|
152
|
+
@lineno += 1
|
153
|
+
elsif inline_comment = chunk[/\A\s*"[^"]*?$/]
|
154
|
+
@i += inline_comment.size # inline comment, don't consume newline character
|
155
|
+
elsif string_double = chunk[/\A"(.*?)"/, 1]
|
156
|
+
@token_buf << [:STRING_D, string_double]
|
157
|
+
@i += string_double.size + 2
|
158
|
+
elsif string_single = chunk[/\A'(([^']|'')*)'/, 1]
|
159
|
+
@token_buf << [:STRING_S, string_single]
|
160
|
+
@i += string_single.size + 2
|
161
|
+
elsif newlines = chunk[/\A(\n+)/, 1]
|
162
|
+
# push only 1 newline
|
163
|
+
@token_buf << [:NEWLINE, "\n"] unless prev_token && prev_token[0] == :NEWLINE
|
164
|
+
|
165
|
+
# pending indents/dedents
|
166
|
+
if @one_line_conditional_END_pending
|
167
|
+
@one_line_conditional_END_pending = false
|
168
|
+
elsif @indent_pending
|
169
|
+
@indent_pending = false
|
170
|
+
elsif @dedent_pending
|
171
|
+
@dedent_pending = false
|
172
|
+
end
|
173
|
+
|
174
|
+
@i += newlines.size
|
175
|
+
@lineno += newlines.size
|
176
|
+
elsif heredoc_pattern = chunk[%r{\A<<(.+?)\r?\n}]
|
177
|
+
pattern = $1
|
178
|
+
@i += heredoc_pattern.size
|
179
|
+
@token_buf << [:HEREDOC, pattern]
|
180
|
+
new_chunk = get_new_chunk
|
181
|
+
heredoc_string = new_chunk[%r|(.+?\r?\n)(#{Regexp.escape(pattern)})|]
|
182
|
+
@i += heredoc_string.size + $2.size
|
183
|
+
@token_buf << [:STRING_D, $1]
|
184
|
+
@lineno += (1 + heredoc_string.each_line.to_a.size)
|
185
|
+
# operators of more than 1 char
|
186
|
+
elsif operator = chunk[OPERATOR_REGEX]
|
187
|
+
@token_buf << [operator, operator]
|
188
|
+
@i += operator.size
|
189
|
+
# FIXME: this doesn't work well enough
|
190
|
+
elsif regexp = chunk[%r{\A/.*?[^\\]/}]
|
191
|
+
@token_buf << [:REGEXP, regexp]
|
192
|
+
@i += regexp.size
|
193
|
+
elsif whitespaces = chunk[/\A\s+/]
|
194
|
+
@i += whitespaces.size
|
195
|
+
# operators and tokens of single chars, one of: ( ) , . [ ] ! + - = < > /
|
196
|
+
else
|
197
|
+
value = chunk[0, 1]
|
198
|
+
if value == '|'
|
199
|
+
@token_buf << [:NEWLINE, "\n"]
|
200
|
+
else
|
201
|
+
@token_buf << [value, value]
|
202
|
+
end
|
203
|
+
@splat_allowed = true if value == '('
|
204
|
+
@splat_allowed = false if value == ')'
|
205
|
+
@i += 1
|
206
|
+
if value == ']' || value == ')' && chunk[1, 1] == '.'
|
207
|
+
parse_dict_vals!
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
private
|
213
|
+
def track_indent_level(chunk, identifier)
|
214
|
+
case identifier.to_sym
|
215
|
+
when :def, :def!, :defm, :defm!, :while, :until, :for, :try, :class
|
216
|
+
@current_indent += 2
|
217
|
+
@indent_pending = true
|
218
|
+
when :if, :unless
|
219
|
+
if one_line_conditional?(chunk)
|
220
|
+
@one_line_conditional_END_pending = true
|
221
|
+
elsif !statement_modifier?(chunk)
|
222
|
+
@current_indent += 2
|
223
|
+
@indent_pending = true
|
224
|
+
end
|
225
|
+
when :end
|
226
|
+
unless @one_line_conditional_END_pending
|
227
|
+
@current_indent -= 2
|
228
|
+
@dedent_pending = true
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
def parse_dict_vals!
|
234
|
+
# dict.key OR dict.key.other_key
|
235
|
+
new_chunk = get_new_chunk
|
236
|
+
if new_chunk[/\A\.([\w.]+)/]
|
237
|
+
parts = $1.split('.')
|
238
|
+
@i += $1.size + 1
|
239
|
+
if @in_function_declaration
|
240
|
+
@token_buf.last[1] << ".#{$1}"
|
241
|
+
else
|
242
|
+
while key = parts.shift
|
243
|
+
@token_buf << [:DICT_VAL, key]
|
244
|
+
end
|
245
|
+
end
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
def check_indentation
|
250
|
+
raise SyntaxError, "Missing #{(@current_indent / 2)} END identifier(s), " if @current_indent > 0
|
251
|
+
raise SyntaxError, "#{(@current_indent / 2).abs} too many END identifiers" if @current_indent < 0
|
252
|
+
end
|
253
|
+
|
254
|
+
def one_line_conditional?(chunk)
|
255
|
+
chunk[/^(if|unless).+?(else)?.+?end$/]
|
256
|
+
end
|
257
|
+
|
258
|
+
def statement_modifier?(chunk)
|
259
|
+
old_i = @i
|
260
|
+
# backtrack until the beginning of the line
|
261
|
+
@i -= 1 while @code[@i-1] =~ /[^\n\r]/ && !@code[@i-1].empty?
|
262
|
+
new_chunk = get_new_chunk
|
263
|
+
new_chunk[/^(.+?)(if|unless).+$/] && !$1.strip.empty?
|
264
|
+
ensure
|
265
|
+
@i = old_i
|
266
|
+
end
|
267
|
+
|
268
|
+
def get_new_chunk
|
269
|
+
@code[@i..-1]
|
270
|
+
end
|
271
|
+
|
272
|
+
def more_code_to_tokenize?
|
273
|
+
@i < @code.size
|
274
|
+
end
|
275
|
+
end
|
276
|
+
end
|