riml 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/README.md +148 -0
- data/bin/riml +69 -0
- data/config/environment.rb +10 -0
- data/lib/ast_rewriter.rb +268 -0
- data/lib/class_map.rb +46 -0
- data/lib/compiler.rb +579 -0
- data/lib/constants.rb +280 -0
- data/lib/errors.rb +4 -0
- data/lib/grammar.y +485 -0
- data/lib/helper.rb +45 -0
- data/lib/lexer.rb +276 -0
- data/lib/nodes.rb +723 -0
- data/lib/parser.rb +2748 -0
- data/lib/walker.rb +15 -0
- data/version.rb +4 -0
- metadata +75 -0
data/lib/lexer.rb
ADDED
@@ -0,0 +1,276 @@
|
|
1
|
+
require File.expand_path('../constants', __FILE__)
|
2
|
+
require File.expand_path('../errors', __FILE__)
|
3
|
+
|
4
|
+
module Riml
|
5
|
+
class Lexer
|
6
|
+
include Riml::Constants
|
7
|
+
|
8
|
+
SINGLE_LINE_COMMENT_REGEX = /\A\s*"(.*)$/
|
9
|
+
OPERATOR_REGEX = /\A#{Regexp.union(['||', '&&', '===', '+=', '-='] + COMPARISON_OPERATORS)}/
|
10
|
+
|
11
|
+
attr_reader :tokens, :prev_token, :lineno, :chunk
|
12
|
+
|
13
|
+
def initialize(code)
|
14
|
+
@code = code
|
15
|
+
@code.chomp!
|
16
|
+
@i = 0 # number of characters consumed
|
17
|
+
@token_buf = []
|
18
|
+
@tokens = []
|
19
|
+
@prev_token = nil
|
20
|
+
@lineno = 1
|
21
|
+
@current_indent = 0
|
22
|
+
@indent_pending = false
|
23
|
+
@dedent_pending = false
|
24
|
+
@one_line_conditional_END_pending = false
|
25
|
+
@splat_allowed = false
|
26
|
+
end
|
27
|
+
|
28
|
+
def tokenize
|
29
|
+
while more_code_to_tokenize?
|
30
|
+
new_token = next_token
|
31
|
+
@tokens << new_token unless new_token.nil?
|
32
|
+
end
|
33
|
+
@tokens
|
34
|
+
end
|
35
|
+
|
36
|
+
def next_token
|
37
|
+
while @token_buf.empty? && more_code_to_tokenize?
|
38
|
+
tokenize_chunk(get_new_chunk)
|
39
|
+
end
|
40
|
+
if @token_buf.any?
|
41
|
+
return @prev_token = @token_buf.shift
|
42
|
+
end
|
43
|
+
check_indentation
|
44
|
+
nil
|
45
|
+
end
|
46
|
+
|
47
|
+
def tokenize_chunk(chunk)
|
48
|
+
@chunk = chunk
|
49
|
+
# deal with line continuations
|
50
|
+
if cont = chunk[/\A(\n*)\s*\\/]
|
51
|
+
@i += cont.size
|
52
|
+
@lineno += $1.size
|
53
|
+
return
|
54
|
+
end
|
55
|
+
|
56
|
+
# all lines that start with ':' pass right through unmodified
|
57
|
+
if (prev_token.nil? || prev_token[0] == :NEWLINE) && (ex_literal = chunk[/\A\s*:(.*)?$/])
|
58
|
+
@i += ex_literal.size
|
59
|
+
@token_buf << [:EX_LITERAL, $1]
|
60
|
+
return
|
61
|
+
end
|
62
|
+
|
63
|
+
if splat_var = chunk[/\Aa:\d+/]
|
64
|
+
@i += splat_var.size
|
65
|
+
@token_buf << [:SCOPE_MODIFIER, 'a:'] << [:IDENTIFIER, splat_var[2..-1]]
|
66
|
+
# the 'n' scope modifier is added by riml
|
67
|
+
elsif scope_modifier = chunk[/\A([bwtglsavn]:)[\w_]/]
|
68
|
+
@i += 2
|
69
|
+
@token_buf << [:SCOPE_MODIFIER, $1]
|
70
|
+
elsif scope_modifier_literal = chunk[/\A([bwtglsavn]:)/]
|
71
|
+
@i += 2
|
72
|
+
@token_buf << [:SCOPE_MODIFIER_LITERAL, $1]
|
73
|
+
elsif special_var_prefix = chunk[/\A(&(\w:)?|\$|@)/]
|
74
|
+
@token_buf << [:SPECIAL_VAR_PREFIX, special_var_prefix.strip]
|
75
|
+
@expecting_identifier = true
|
76
|
+
@i += special_var_prefix.size
|
77
|
+
elsif function_method = chunk[/\A(function)\(/]
|
78
|
+
@token_buf << [:IDENTIFIER, $1]
|
79
|
+
@i += $1.size
|
80
|
+
elsif identifier = chunk[/\A[a-zA-Z_][\w#]*(\?|!)?/]
|
81
|
+
# keyword identifiers
|
82
|
+
if KEYWORDS.include?(identifier)
|
83
|
+
if identifier.match(/\Afunction/)
|
84
|
+
old_identifier = identifier.dup
|
85
|
+
identifier.sub!(/function/, "def")
|
86
|
+
@i += (old_identifier.size - identifier.size)
|
87
|
+
elsif identifier == 'finally'
|
88
|
+
identifier = 'ensure'
|
89
|
+
@i += 1 # diff b/t the two string lengths
|
90
|
+
elsif VIML_END_KEYWORDS.include? identifier
|
91
|
+
old_identifier = identifier.dup
|
92
|
+
identifier = 'end'
|
93
|
+
@i += old_identifier.size - identifier.size
|
94
|
+
end
|
95
|
+
|
96
|
+
if DEFINE_KEYWORDS.include?(identifier)
|
97
|
+
@in_function_declaration = true
|
98
|
+
end
|
99
|
+
|
100
|
+
# strip '?' out of token names and replace '!' with '_bang'
|
101
|
+
token_name = identifier.sub(/\?\Z/, "").sub(/!\Z/, "_bang").upcase
|
102
|
+
|
103
|
+
track_indent_level(chunk, identifier)
|
104
|
+
@token_buf << [token_name.intern, identifier]
|
105
|
+
|
106
|
+
elsif BUILTIN_COMMANDS.include? identifier
|
107
|
+
@token_buf << [:BUILTIN_COMMAND, identifier]
|
108
|
+
# method names and variable names
|
109
|
+
else
|
110
|
+
@token_buf << [:IDENTIFIER, identifier]
|
111
|
+
end
|
112
|
+
|
113
|
+
@i += identifier.size
|
114
|
+
|
115
|
+
parse_dict_vals!
|
116
|
+
|
117
|
+
if @in_function_declaration
|
118
|
+
@in_function_declaration = false unless DEFINE_KEYWORDS.include?(identifier) && @token_buf.size == 1
|
119
|
+
end
|
120
|
+
elsif splat = chunk[/\A(\.{3}|\*[a-zA-Z_]\w*)/]
|
121
|
+
raise SyntaxError, "unexpected splat, has to be enclosed in parentheses" unless @splat_allowed
|
122
|
+
@token_buf << [:SPLAT, splat]
|
123
|
+
@splat_allowed = false
|
124
|
+
@i += splat.size
|
125
|
+
# integer (octal)
|
126
|
+
elsif octal = chunk[/\A0[0-7]+/]
|
127
|
+
@token_buf << [:NUMBER, octal.to_s]
|
128
|
+
@i += octal.size
|
129
|
+
# integer (hex)
|
130
|
+
elsif hex = chunk[/\A0[xX]\h+/]
|
131
|
+
@token_buf << [:NUMBER, hex.to_s]
|
132
|
+
@i += hex.size
|
133
|
+
# integer or float (decimal)
|
134
|
+
elsif decimal = chunk[/\A[0-9]+(\.[0-9]+)?/]
|
135
|
+
@token_buf << [:NUMBER, decimal.to_s]
|
136
|
+
@i += decimal.size
|
137
|
+
elsif interpolation = chunk[/\A"(.*?)(\#\{(.*?)\})(.*?)"/]
|
138
|
+
# "#{hey} guys" = "hey" . " guys"
|
139
|
+
unless $1.empty?
|
140
|
+
@token_buf << [:STRING_D, $1]
|
141
|
+
@token_buf << ['.', '.']
|
142
|
+
end
|
143
|
+
@token_buf << [:IDENTIFIER, $3]
|
144
|
+
unless $4.empty?
|
145
|
+
@token_buf << ['.', '.']
|
146
|
+
@token_buf << [ :STRING_D, " #{$4[1..-1]}" ]
|
147
|
+
end
|
148
|
+
@i += interpolation.size
|
149
|
+
elsif single_line_comment = chunk[SINGLE_LINE_COMMENT_REGEX] && (prev_token.nil? || prev_token[0] == :NEWLINE)
|
150
|
+
comment = chunk[SINGLE_LINE_COMMENT_REGEX]
|
151
|
+
@i += comment.size + 1 # consume next newline character
|
152
|
+
@lineno += 1
|
153
|
+
elsif inline_comment = chunk[/\A\s*"[^"]*?$/]
|
154
|
+
@i += inline_comment.size # inline comment, don't consume newline character
|
155
|
+
elsif string_double = chunk[/\A"(.*?)"/, 1]
|
156
|
+
@token_buf << [:STRING_D, string_double]
|
157
|
+
@i += string_double.size + 2
|
158
|
+
elsif string_single = chunk[/\A'(([^']|'')*)'/, 1]
|
159
|
+
@token_buf << [:STRING_S, string_single]
|
160
|
+
@i += string_single.size + 2
|
161
|
+
elsif newlines = chunk[/\A(\n+)/, 1]
|
162
|
+
# push only 1 newline
|
163
|
+
@token_buf << [:NEWLINE, "\n"] unless prev_token && prev_token[0] == :NEWLINE
|
164
|
+
|
165
|
+
# pending indents/dedents
|
166
|
+
if @one_line_conditional_END_pending
|
167
|
+
@one_line_conditional_END_pending = false
|
168
|
+
elsif @indent_pending
|
169
|
+
@indent_pending = false
|
170
|
+
elsif @dedent_pending
|
171
|
+
@dedent_pending = false
|
172
|
+
end
|
173
|
+
|
174
|
+
@i += newlines.size
|
175
|
+
@lineno += newlines.size
|
176
|
+
elsif heredoc_pattern = chunk[%r{\A<<(.+?)\r?\n}]
|
177
|
+
pattern = $1
|
178
|
+
@i += heredoc_pattern.size
|
179
|
+
@token_buf << [:HEREDOC, pattern]
|
180
|
+
new_chunk = get_new_chunk
|
181
|
+
heredoc_string = new_chunk[%r|(.+?\r?\n)(#{Regexp.escape(pattern)})|]
|
182
|
+
@i += heredoc_string.size + $2.size
|
183
|
+
@token_buf << [:STRING_D, $1]
|
184
|
+
@lineno += (1 + heredoc_string.each_line.to_a.size)
|
185
|
+
# operators of more than 1 char
|
186
|
+
elsif operator = chunk[OPERATOR_REGEX]
|
187
|
+
@token_buf << [operator, operator]
|
188
|
+
@i += operator.size
|
189
|
+
# FIXME: this doesn't work well enough
|
190
|
+
elsif regexp = chunk[%r{\A/.*?[^\\]/}]
|
191
|
+
@token_buf << [:REGEXP, regexp]
|
192
|
+
@i += regexp.size
|
193
|
+
elsif whitespaces = chunk[/\A\s+/]
|
194
|
+
@i += whitespaces.size
|
195
|
+
# operators and tokens of single chars, one of: ( ) , . [ ] ! + - = < > /
|
196
|
+
else
|
197
|
+
value = chunk[0, 1]
|
198
|
+
if value == '|'
|
199
|
+
@token_buf << [:NEWLINE, "\n"]
|
200
|
+
else
|
201
|
+
@token_buf << [value, value]
|
202
|
+
end
|
203
|
+
@splat_allowed = true if value == '('
|
204
|
+
@splat_allowed = false if value == ')'
|
205
|
+
@i += 1
|
206
|
+
if value == ']' || value == ')' && chunk[1, 1] == '.'
|
207
|
+
parse_dict_vals!
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
private
|
213
|
+
def track_indent_level(chunk, identifier)
|
214
|
+
case identifier.to_sym
|
215
|
+
when :def, :def!, :defm, :defm!, :while, :until, :for, :try, :class
|
216
|
+
@current_indent += 2
|
217
|
+
@indent_pending = true
|
218
|
+
when :if, :unless
|
219
|
+
if one_line_conditional?(chunk)
|
220
|
+
@one_line_conditional_END_pending = true
|
221
|
+
elsif !statement_modifier?(chunk)
|
222
|
+
@current_indent += 2
|
223
|
+
@indent_pending = true
|
224
|
+
end
|
225
|
+
when :end
|
226
|
+
unless @one_line_conditional_END_pending
|
227
|
+
@current_indent -= 2
|
228
|
+
@dedent_pending = true
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
def parse_dict_vals!
|
234
|
+
# dict.key OR dict.key.other_key
|
235
|
+
new_chunk = get_new_chunk
|
236
|
+
if new_chunk[/\A\.([\w.]+)/]
|
237
|
+
parts = $1.split('.')
|
238
|
+
@i += $1.size + 1
|
239
|
+
if @in_function_declaration
|
240
|
+
@token_buf.last[1] << ".#{$1}"
|
241
|
+
else
|
242
|
+
while key = parts.shift
|
243
|
+
@token_buf << [:DICT_VAL, key]
|
244
|
+
end
|
245
|
+
end
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
def check_indentation
|
250
|
+
raise SyntaxError, "Missing #{(@current_indent / 2)} END identifier(s), " if @current_indent > 0
|
251
|
+
raise SyntaxError, "#{(@current_indent / 2).abs} too many END identifiers" if @current_indent < 0
|
252
|
+
end
|
253
|
+
|
254
|
+
def one_line_conditional?(chunk)
|
255
|
+
chunk[/^(if|unless).+?(else)?.+?end$/]
|
256
|
+
end
|
257
|
+
|
258
|
+
def statement_modifier?(chunk)
|
259
|
+
old_i = @i
|
260
|
+
# backtrack until the beginning of the line
|
261
|
+
@i -= 1 while @code[@i-1] =~ /[^\n\r]/ && !@code[@i-1].empty?
|
262
|
+
new_chunk = get_new_chunk
|
263
|
+
new_chunk[/^(.+?)(if|unless).+$/] && !$1.strip.empty?
|
264
|
+
ensure
|
265
|
+
@i = old_i
|
266
|
+
end
|
267
|
+
|
268
|
+
def get_new_chunk
|
269
|
+
@code[@i..-1]
|
270
|
+
end
|
271
|
+
|
272
|
+
def more_code_to_tokenize?
|
273
|
+
@i < @code.size
|
274
|
+
end
|
275
|
+
end
|
276
|
+
end
|