riml 0.3.6 → 0.3.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -34,8 +34,10 @@ module Riml
34
34
  def self.trap(*signals, &block)
35
35
  signals.each do |sig|
36
36
  Signal.trap(sig) do
37
- rollback! if @guarding > 0
38
- block.call if block
37
+ if @guarding > 0
38
+ rollback!
39
+ block.call if block
40
+ end
39
41
  end
40
42
  end
41
43
  end
@@ -39,8 +39,8 @@ rule
39
39
 
40
40
  Root:
41
41
  /* nothing */ { result = make_node(val) { |_| Riml::Nodes.new([]) } }
42
- | Statements { result = val[0] }
43
42
  | Terminator { result = make_node(val) { |_| Riml::Nodes.new([]) } }
43
+ | Statements { result = val[0] }
44
44
  ;
45
45
 
46
46
  # any list of expressions
@@ -163,10 +163,12 @@ rule
163
163
  DictionaryLiteral { result = make_node(val) { |v| Riml::DictionaryNode.new(v[0]) } }
164
164
  ;
165
165
 
166
- # {'key': 'value', 'key': 'value'}
166
+ # {'key': 'value', 'key2': 'value2'}
167
+ # Save as [['key', 'value'], ['key2', 'value2']] because ruby-1.8.7 offers
168
+ # no guarantee for key-value pair ordering.
167
169
  DictionaryLiteral:
168
- '{' DictItems '}' { result = Hash[val[1]] }
169
- | '{' DictItems ',' '}' { result = Hash[val[1]] }
170
+ '{' DictItems '}' { result = val[1] }
171
+ | '{' DictItems ',' '}' { result = val[1] }
170
172
  ;
171
173
 
172
174
  # [[key, value], [key, value]]
@@ -558,8 +560,8 @@ end
558
560
  ---- header
559
561
  require File.expand_path("../lexer", __FILE__)
560
562
  require File.expand_path("../nodes", __FILE__)
561
- require File.expand_path("../ast_rewriter", __FILE__)
562
563
  require File.expand_path("../errors", __FILE__)
564
+ require File.expand_path("../ast_rewriter", __FILE__)
563
565
  ---- inner
564
566
  # This code will be put as-is in the parser class
565
567
 
@@ -590,13 +592,14 @@ end
590
592
  ast = do_parse
591
593
  rescue Racc::ParseError => e
592
594
  raise unless @lexer
593
- if @lexer.prev_token_is_keyword?
594
- warning = "#{@lexer.invalid_keyword.inspect} is a keyword, and cannot " \
595
+ if (invalid_token = @lexer.prev_token_is_keyword?)
596
+ warning = "#{invalid_token.inspect} is a keyword, and cannot " \
595
597
  "be used as a variable name"
596
598
  end
597
- error_msg = "#{e.message} at #{@lexer.filename}:#{@lexer.lineno}"
598
- error_msg << "\n\n#{warning}" if warning
599
- raise Riml::ParseError, error_msg
599
+ error_msg = e.message
600
+ error_msg << "\nWARNING: #{warning}" if warning
601
+ error = Riml::ParseError.new(error_msg, @lexer.filename, @lexer.lineno)
602
+ raise error
600
603
  end
601
604
  self.class.ast_cache[filename] = ast if filename
602
605
  end
@@ -613,7 +616,9 @@ end
613
616
  def next_token
614
617
  return @tokens.shift unless @lexer
615
618
  token = @lexer.next_token
616
- @current_parser_info = token.pop if token
619
+ if token && @lexer.parser_info
620
+ @current_parser_info = token.pop
621
+ end
617
622
  token
618
623
  end
619
624
 
@@ -50,11 +50,11 @@ module Riml
50
50
  end
51
51
 
52
52
  def constructor_name
53
- "#{name}Constructor"
53
+ "#{@name}Constructor"
54
54
  end
55
55
 
56
56
  def constructor_obj_name
57
- @name[0].downcase + @name[1..-1] + "Obj"
57
+ @name[0, 1].downcase + @name[1..-1] + "Obj"
58
58
  end
59
59
 
60
60
  private
@@ -1,3 +1,6 @@
1
+ # encoding: utf-8
2
+
3
+ require 'strscan'
1
4
  require File.expand_path('../constants', __FILE__)
2
5
  require File.expand_path('../errors', __FILE__)
3
6
 
@@ -11,23 +14,17 @@ module Riml
11
14
  ANCHORED_INTERPOLATION_REGEX = /\A#{INTERPOLATION_REGEX}/m
12
15
  INTERPOLATION_SPLIT_REGEX = /(\#\{.*?\})/m
13
16
 
14
- attr_reader :tokens, :prev_token, :chunk, :current_indent,
15
- :invalid_keyword, :filename, :parser_info
17
+ attr_reader :tokens, :prev_token, :current_indent,
18
+ :filename, :parser_info
16
19
  attr_accessor :lineno
17
20
  # for REPL
18
21
  attr_accessor :ignore_indentation_check
19
22
 
20
23
  def initialize(code, filename = nil, parser_info = false)
21
- @code = code
22
- @code.chomp!
23
- @filename = filename
24
+ code.chomp!
25
+ @s = StringScanner.new(code)
26
+ @filename = filename || COMPILED_STRING_LOCATION
24
27
  @parser_info = parser_info
25
- set_start_state!
26
- end
27
-
28
- def set_start_state!
29
- # number of characters consumed
30
- @i = 0
31
28
  # array of doubles and triples: [tokenname, tokenval, lineno_to_add(optional)]
32
29
  # ex: [[:NEWLINE, "\n"]] OR [[:NEWLINE, "\n", 1]]
33
30
  @token_buf = []
@@ -43,18 +40,16 @@ module Riml
43
40
  @indent_pending = false
44
41
  @dedent_pending = false
45
42
  @in_function_declaration = false
46
- @invalid_keyword = nil
47
43
  end
48
44
 
49
45
  def tokenize
50
- set_start_state!
51
46
  while next_token != nil; end
52
47
  @tokens
53
48
  end
54
49
 
55
50
  def next_token
56
- while @token_buf.empty? && more_code_to_tokenize?
57
- tokenize_chunk(get_new_chunk)
51
+ while @token_buf.empty? && !@s.eos?
52
+ tokenize_chunk
58
53
  end
59
54
  if !@token_buf.empty?
60
55
  token = @token_buf.shift
@@ -74,55 +69,48 @@ module Riml
74
69
  nil
75
70
  end
76
71
 
77
- def tokenize_chunk(chunk)
78
- @chunk = chunk
72
+ def tokenize_chunk
79
73
  # deal with line continuations
80
- if cont = chunk[/\A\r?\n*[ \t\f]*\\/m]
81
- @i += cont.size
74
+ if cont = @s.scan(/\A\r?\n*[ \t\f]*\\/m)
82
75
  @lineno += cont.each_line.to_a.size - 1
83
76
  return
84
77
  end
85
78
 
86
79
  # all lines that start with ':' pass right through unmodified
87
- if (prev_token.nil? || prev_token[0] == :NEWLINE) && (ex_literal = chunk[/\A[ \t\f]*:(.*)?$/])
88
- @i += ex_literal.size
89
- @token_buf << [:EX_LITERAL, $1]
80
+ if (prev_token.nil? || prev_token[0] == :NEWLINE) && @s.scan(/\A[ \t\f]*:(.*)?$/)
81
+ @token_buf << [:EX_LITERAL, @s[1]]
90
82
  return
91
83
  end
92
84
 
93
- if splat_var = chunk[/\Aa:\d+/]
94
- @i += splat_var.size
85
+ if splat_var = @s.scan(/\Aa:\d+/)
95
86
  @token_buf << [:SCOPE_MODIFIER, 'a:'] << [:IDENTIFIER, splat_var[2..-1]]
96
87
  # the 'n' scope modifier is added by riml
97
- elsif scope_modifier = chunk[/\A([bwtglsavn]:)(\w|\{)/, 1]
98
- @i += 2
99
- @token_buf << [:SCOPE_MODIFIER, scope_modifier]
100
- elsif scope_modifier_literal = chunk[/\A([bwtglsavn]:)/]
101
- @i += scope_modifier_literal.size
88
+ elsif @s.check(/\A([bwtglsavn]:)(\w|\{)/)
89
+ @token_buf << [:SCOPE_MODIFIER, @s[1]]
90
+ @s.pos += 2
91
+ elsif scope_modifier_literal = @s.scan(/\A([bwtglsavn]:)/)
102
92
  @token_buf << [:SCOPE_MODIFIER_LITERAL, scope_modifier_literal]
103
- elsif special_var_prefix = chunk[/\A(&(\w:)?(?!&)|\$|@)/]
93
+ elsif special_var_prefix = (!@s.check(/\A&(\w:)?&/) && @s.scan(/\A(&(\w:)?|\$|@)/))
104
94
  @token_buf << [:SPECIAL_VAR_PREFIX, special_var_prefix.strip]
105
- @i += special_var_prefix.size
106
95
  if special_var_prefix == '@'
107
- new_chunk = get_new_chunk
108
- next_char = new_chunk[0]
96
+ next_char = @s.peek(1)
109
97
  if REGISTERS.include?(next_char)
110
98
  @token_buf << [:IDENTIFIER, next_char]
111
- @i += 1
99
+ @s.getch
112
100
  end
113
101
  else
114
102
  @expecting_identifier = true
115
103
  end
116
- elsif function_method = chunk[/\A(function)\(/, 1]
117
- @token_buf << [:IDENTIFIER, function_method]
118
- @i += function_method.size
119
- elsif identifier = chunk[/\A[a-zA-Z_][\w#]*(\?|!)?/]
104
+ elsif @s.scan(/\A(function)\(/)
105
+ @token_buf << [:IDENTIFIER, @s[1]]
106
+ @s.pos -= 1
107
+ elsif identifier = @s.check(/\A[a-zA-Z_][\w#]*(\?|!)?/)
120
108
  # keyword identifiers
121
109
  if KEYWORDS.include?(identifier)
122
110
  if identifier.match(/\Afunction/)
123
111
  old_identifier = identifier.dup
124
112
  identifier.sub!(/function/, "def")
125
- @i += (old_identifier.size - identifier.size)
113
+ @s.pos += (old_identifier.size - identifier.size)
126
114
  end
127
115
 
128
116
  if DEFINE_KEYWORDS.include?(identifier)
@@ -131,72 +119,62 @@ module Riml
131
119
 
132
120
  # strip '?' out of token names and replace '!' with '_bang'
133
121
  token_name = identifier.sub(/\?\Z/, "").sub(/!\Z/, "_bang").upcase
134
- track_indent_level(chunk, identifier)
122
+ track_indent_level(identifier)
135
123
 
136
124
  if VIML_END_KEYWORDS.include?(identifier)
137
125
  token_name = :END
138
126
  end
139
127
 
140
- @token_buf << [token_name.intern, identifier]
128
+ @token_buf << [token_name.to_sym, identifier]
141
129
 
142
- elsif BUILTIN_COMMANDS.include?(identifier) && peek(identifier.size) != '('
130
+ elsif BUILTIN_COMMANDS.include?(identifier) && @s.peek(identifier.size + 1)[-1, 1] != '('
143
131
  @token_buf << [:BUILTIN_COMMAND, identifier]
144
132
  elsif RIML_FILE_COMMANDS.include? identifier
145
133
  @token_buf << [:RIML_FILE_COMMAND, identifier]
146
134
  elsif RIML_CLASS_COMMANDS.include? identifier
147
135
  @token_buf << [:RIML_CLASS_COMMAND, identifier]
148
136
  elsif VIML_COMMANDS.include?(identifier) && (prev_token.nil? || prev_token[0] == :NEWLINE)
149
- @i += identifier.size
150
- new_chunk = get_new_chunk
151
- until_eol = new_chunk[/.*$/].to_s
137
+ @s.pos += identifier.size
138
+ until_eol = @s.scan(/.*$/).to_s
152
139
  @token_buf << [:EX_LITERAL, identifier << until_eol]
153
- @i += until_eol.size
154
140
  return
155
141
  # method names and variable names
156
142
  else
157
143
  @token_buf << [:IDENTIFIER, identifier]
158
144
  end
159
145
 
160
- @i += identifier.size
146
+ @s.pos += identifier.size
161
147
 
162
148
  parse_dict_vals!
163
149
 
164
- elsif @in_function_declaration && (splat_param = chunk[/\A(\.{3}|\*[a-zA-Z_]\w*)/])
150
+ elsif @in_function_declaration && (splat_param = @s.scan(/\A(\.{3}|\*[a-zA-Z_]\w*)/))
165
151
  @token_buf << [:SPLAT_PARAM, splat_param]
166
- @i += splat_param.size
167
- elsif !@in_function_declaration && (splat_arg = chunk[/\A\*([bwtglsavn]:)?([a-zA-Z_]\w*|\d+)/])
152
+ elsif !@in_function_declaration && (splat_arg = @s.scan(/\A\*([bwtglsavn]:)?([a-zA-Z_]\w*|\d+)/))
168
153
  @token_buf << [:SPLAT_ARG, splat_arg]
169
- @i += splat_arg.size
170
154
  # integer (octal)
171
- elsif octal = chunk[/\A0[0-7]+/]
155
+ elsif octal = @s.scan(/\A0[0-7]+/)
172
156
  @token_buf << [:NUMBER, octal]
173
- @i += octal.size
174
157
  # integer (hex)
175
- elsif hex = chunk[/\A0[xX]\h+/]
158
+ elsif hex = @s.scan(/\A0[xX][0-9a-fA-F]+/)
176
159
  @token_buf << [:NUMBER, hex]
177
- @i += hex.size
178
160
  # integer or float (decimal)
179
- elsif decimal = chunk[/\A[0-9]+(\.[0-9]+([eE][+-]?[0-9]+)?)?/]
161
+ elsif decimal = @s.scan(/\A[0-9]+(\.[0-9]+([eE][+-]?[0-9]+)?)?/)
180
162
  @token_buf << [:NUMBER, decimal]
181
- @i += decimal.size
182
- elsif interpolation = chunk[ANCHORED_INTERPOLATION_REGEX]
163
+ elsif interpolation = @s.scan(ANCHORED_INTERPOLATION_REGEX)
183
164
  # "hey there, #{name}" = "hey there, " . name
184
165
  parts = interpolation[1...-1].split(INTERPOLATION_SPLIT_REGEX)
185
166
  handle_interpolation(*parts)
186
- @i += interpolation.size
187
- elsif (single_line_comment = chunk[SINGLE_LINE_COMMENT_REGEX]) && (prev_token.nil? || prev_token[0] == :NEWLINE)
188
- @i += single_line_comment.size + 1 # consume next newline character
167
+ elsif (single_line_comment = @s.check(SINGLE_LINE_COMMENT_REGEX)) && (prev_token.nil? || prev_token[0] == :NEWLINE)
168
+ @s.pos += single_line_comment.size
169
+ @s.pos += 1 unless @s.eos? # consume newline
189
170
  @lineno += single_line_comment.each_line.to_a.size
190
- elsif inline_comment = chunk[/\A[ \t\f]*"[^"]*?$/]
191
- @i += inline_comment.size # inline comment, don't consume newline character
171
+ elsif inline_comment = @s.scan(/\A[ \t\f]*"[^"]*?$/)
192
172
  @lineno += inline_comment.each_line.to_a.size - 1
193
- elsif string_double = chunk[/\A"(.*?)(?<!\\)"/, 1]
194
- @token_buf << [:STRING_D, string_double]
195
- @i += string_double.size + 2
196
- elsif string_single = chunk[/\A'(([^']|'')*)'/, 1]
197
- @token_buf << [:STRING_S, string_single]
198
- @i += string_single.size + 2
199
- elsif newlines = chunk[/\A([\r\n]+)/, 1]
173
+ elsif (str = lex_string_double)
174
+ @token_buf << [:STRING_D, str]
175
+ elsif @s.scan(/\A'(([^']|'')*)'/)
176
+ @token_buf << [:STRING_S, @s[1]]
177
+ elsif newlines = @s.scan(/\A([\r\n]+)/)
200
178
  # push only 1 newline
201
179
  @token_buf << [:NEWLINE, "\n"] unless prev_token && prev_token[0] == :NEWLINE
202
180
 
@@ -210,14 +188,13 @@ module Riml
210
188
  @in_function_declaration = false
211
189
  end
212
190
 
213
- @i += newlines.size
214
191
  @lineno += newlines.size
215
- elsif heredoc_pattern = chunk[%r{\A<<(.+?)\r?\n}]
216
- pattern = $1
217
- @i += heredoc_pattern.size
218
- new_chunk = get_new_chunk
219
- heredoc_string = new_chunk[%r|(.+?\r?\n)(#{Regexp.escape(pattern)})|m, 1]
220
- @i += heredoc_string.size + pattern.size
192
+ # heredoc
193
+ elsif @s.scan(%r{\A<<(.+?)\r?\n})
194
+ pattern = @s[1]
195
+ @s.check(%r|(.+?\r?\n)(#{Regexp.escape(pattern)})|m)
196
+ heredoc_string = @s[1]
197
+ @s.pos += (pattern.size + heredoc_string.size)
221
198
  heredoc_string.chomp!
222
199
  if heredoc_string =~ INTERPOLATION_REGEX || %Q("#{heredoc_string}") =~ INTERPOLATION_REGEX
223
200
  parts = heredoc_string.split(INTERPOLATION_SPLIT_REGEX)
@@ -227,37 +204,39 @@ module Riml
227
204
  end
228
205
  @lineno += heredoc_string.each_line.to_a.size
229
206
  # operators of more than 1 char
230
- elsif operator = chunk[OPERATOR_REGEX]
207
+ elsif operator = @s.scan(OPERATOR_REGEX)
231
208
  @token_buf << [operator, operator]
232
- @i += operator.size
233
- elsif regexp = chunk[%r{\A/.*?[^\\]/}]
209
+ elsif regexp = @s.scan(%r{\A/.*?[^\\]/})
234
210
  @token_buf << [:REGEXP, regexp]
235
- @i += regexp.size
236
- elsif whitespaces = chunk[/\A[ \t\f]+/]
237
- @i += whitespaces.size
211
+ # whitespaces
212
+ elsif @s.scan(/\A[ \t\f]+/)
238
213
  # operators and tokens of single chars, one of: ( ) , . [ ] ! + - = < > /
239
214
  else
240
- value = chunk[0, 1]
215
+ value = @s.getch
241
216
  if value == '|'
242
217
  @token_buf << [:NEWLINE, "\n"]
243
218
  else
244
219
  @token_buf << [value, value]
245
220
  end
246
- @i += 1
247
- if value == ']' || value == ')' && (chunk[1, 1] == '.' && chunk[3, 1] != ':')
221
+ # if we encounter `funcCall().`, the next character must be treated as
222
+ # a dictionary retrieval operation, not a string concatenation
223
+ # operation.
224
+ # However, if we see `funcCall().l:localVar`, we know it must be a
225
+ # string concatenation operation.
226
+ if value == ']' || value == ')' && (@s.peek(1) == '.' && @s.peek(3) != ':')
248
227
  parse_dict_vals!
249
228
  end
250
229
  end
251
230
  end
252
231
 
253
232
  # Checks if any of previous n tokens are keywords.
254
- # If any found, sets `@invalid_keyword` to the found token value.
233
+ # If any found, return the keyword, otherwise returns `false`.
255
234
  def prev_token_is_keyword?(n = 2)
256
235
  return false if n <= 0
257
236
  (1..n).each do |i|
258
237
  t = tokens[-i]
259
238
  if t && t[1] && KEYWORDS.include?(t[1])
260
- return @invalid_keyword = t[1]
239
+ return t[1]
261
240
  end
262
241
  end
263
242
  false
@@ -265,6 +244,35 @@ module Riml
265
244
 
266
245
  private
267
246
 
247
+ # we have negative lookbehind in regexp engine
248
+ if RUBY_VERSION >= '1.9'
249
+ # have to use string constructor, as parser would throw SyntaxError if
250
+ # RUBY_VERSION < '1.9'. Literal regexp is `/\A"(.*?)(?<!\\)"/`
251
+ STRING_DOUBLE_NEGATIVE_LOOKBEHIND_REGEX = Regexp.new('\A"(.*?)(?<!\\\\)"')
252
+ def lex_string_double
253
+ @s.scan(STRING_DOUBLE_NEGATIVE_LOOKBEHIND_REGEX) && @s[1]
254
+ end
255
+ # we don't have negative lookbehind in regexp engine
256
+ else
257
+ def lex_string_double
258
+ str = ''
259
+ regex = /\A"(.*?)"/
260
+ pos = @s.pos
261
+ while @s.scan(regex)
262
+ match = @s[1]
263
+ str << match
264
+ if match[-1, 1] == '\\'
265
+ str << '"'
266
+ regex = /\A(.*?)"/
267
+ else
268
+ return str
269
+ end
270
+ end
271
+ @s.pos = pos
272
+ nil
273
+ end
274
+ end
275
+
268
276
  def decorate_token(token)
269
277
  token << {
270
278
  :lineno => @lineno,
@@ -273,7 +281,7 @@ module Riml
273
281
  token
274
282
  end
275
283
 
276
- def track_indent_level(chunk, identifier)
284
+ def track_indent_level(identifier)
277
285
  case identifier.to_sym
278
286
  when :def, :def!, :defm, :defm!, :while, :until, :for, :try, :class
279
287
  @current_indent += 2
@@ -289,12 +297,11 @@ module Riml
289
297
  end
290
298
  end
291
299
 
300
+ # `dict.key` or `dict.key.other_key`, etc.
292
301
  def parse_dict_vals!
293
- # dict.key OR dict.key.other_key
294
- new_chunk = get_new_chunk
295
- if vals = new_chunk[/\A\.([\w.]+)(?!:)/, 1]
302
+ if @s.scan(/\A\.([\w.]+)(?!:)/)
303
+ vals = @s[1]
296
304
  parts = vals.split('.')
297
- @i += vals.size + 1
298
305
  if @in_function_declaration
299
306
  @token_buf.last[1] << ".#{vals}"
300
307
  else
@@ -306,13 +313,20 @@ module Riml
306
313
  end
307
314
 
308
315
  def check_indentation
309
- raise SyntaxError, "Missing #{(@current_indent / 2)} END identifier(s), " if @current_indent > 0
310
- raise SyntaxError, "#{(@current_indent / 2).abs} too many END identifiers" if @current_indent < 0
316
+ if @current_indent > 0
317
+ error_msg = "Missing #{(@current_indent / 2)} END identifier(s)"
318
+ error = Riml::SyntaxError.new(error_msg, @filename, @lineno)
319
+ raise error
320
+ elsif @current_indent < 0
321
+ error_msg = "#{(@current_indent / 2).abs} too many END identifiers"
322
+ error = Riml::SyntaxError.new(error_msg, @filename, @lineno)
323
+ raise error
324
+ end
311
325
  end
312
326
 
313
327
  def handle_interpolation(*parts)
314
328
  parts.delete_if {|p| p.empty?}.each_with_index do |part, i|
315
- if part[0..1] == '#{' && part[-1] == '}'
329
+ if part[0..1] == '#{' && part[-1, 1] == '}'
316
330
  interpolation_content = part[2...-1]
317
331
  @token_buf.concat tokenize_without_moving_pos(interpolation_content)
318
332
  else
@@ -331,30 +345,18 @@ module Riml
331
345
 
332
346
  def tokenize_without_moving_pos(code)
333
347
  Lexer.new(code, filename, false).tap do |l|
334
- l.lineno = lineno
348
+ l.lineno = @lineno
335
349
  end.tokenize
336
350
  end
337
351
 
338
352
  def statement_modifier?
339
- old_i = @i
353
+ old_pos = @s.pos
340
354
  # backtrack until the beginning of the line
341
- @i -= 1 while @code && @code[@i-1] !~ /\n|\r/ && !@code[@i-1].to_s.empty?
342
- new_chunk = get_new_chunk
343
- new_chunk.to_s[/\A(.+?)(if|unless).+?$/] && !$1.strip.empty?
355
+ @s.pos -= 1 until @s.bol?
356
+ @s.check(/\A(.+?)(if|unless).+?$/) && !@s[1].strip.empty?
344
357
  ensure
345
- @i = old_i
346
- end
347
-
348
- def get_new_chunk
349
- @code[@i..-1]
358
+ @s.pos = old_pos
350
359
  end
351
360
 
352
- def more_code_to_tokenize?
353
- @i < @code.size
354
- end
355
-
356
- def peek(n = 1)
357
- @chunk[n]
358
- end
359
- end
361
+ end unless defined?(Riml::Lexer)
360
362
  end