riml 0.3.6 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -34,8 +34,10 @@ module Riml
34
34
  def self.trap(*signals, &block)
35
35
  signals.each do |sig|
36
36
  Signal.trap(sig) do
37
- rollback! if @guarding > 0
38
- block.call if block
37
+ if @guarding > 0
38
+ rollback!
39
+ block.call if block
40
+ end
39
41
  end
40
42
  end
41
43
  end
@@ -39,8 +39,8 @@ rule
39
39
 
40
40
  Root:
41
41
  /* nothing */ { result = make_node(val) { |_| Riml::Nodes.new([]) } }
42
- | Statements { result = val[0] }
43
42
  | Terminator { result = make_node(val) { |_| Riml::Nodes.new([]) } }
43
+ | Statements { result = val[0] }
44
44
  ;
45
45
 
46
46
  # any list of expressions
@@ -163,10 +163,12 @@ rule
163
163
  DictionaryLiteral { result = make_node(val) { |v| Riml::DictionaryNode.new(v[0]) } }
164
164
  ;
165
165
 
166
- # {'key': 'value', 'key': 'value'}
166
+ # {'key': 'value', 'key2': 'value2'}
167
+ # Save as [['key', 'value'], ['key2', 'value2']] because ruby-1.8.7 offers
168
+ # no guarantee for key-value pair ordering.
167
169
  DictionaryLiteral:
168
- '{' DictItems '}' { result = Hash[val[1]] }
169
- | '{' DictItems ',' '}' { result = Hash[val[1]] }
170
+ '{' DictItems '}' { result = val[1] }
171
+ | '{' DictItems ',' '}' { result = val[1] }
170
172
  ;
171
173
 
172
174
  # [[key, value], [key, value]]
@@ -558,8 +560,8 @@ end
558
560
  ---- header
559
561
  require File.expand_path("../lexer", __FILE__)
560
562
  require File.expand_path("../nodes", __FILE__)
561
- require File.expand_path("../ast_rewriter", __FILE__)
562
563
  require File.expand_path("../errors", __FILE__)
564
+ require File.expand_path("../ast_rewriter", __FILE__)
563
565
  ---- inner
564
566
  # This code will be put as-is in the parser class
565
567
 
@@ -590,13 +592,14 @@ end
590
592
  ast = do_parse
591
593
  rescue Racc::ParseError => e
592
594
  raise unless @lexer
593
- if @lexer.prev_token_is_keyword?
594
- warning = "#{@lexer.invalid_keyword.inspect} is a keyword, and cannot " \
595
+ if (invalid_token = @lexer.prev_token_is_keyword?)
596
+ warning = "#{invalid_token.inspect} is a keyword, and cannot " \
595
597
  "be used as a variable name"
596
598
  end
597
- error_msg = "#{e.message} at #{@lexer.filename}:#{@lexer.lineno}"
598
- error_msg << "\n\n#{warning}" if warning
599
- raise Riml::ParseError, error_msg
599
+ error_msg = e.message
600
+ error_msg << "\nWARNING: #{warning}" if warning
601
+ error = Riml::ParseError.new(error_msg, @lexer.filename, @lexer.lineno)
602
+ raise error
600
603
  end
601
604
  self.class.ast_cache[filename] = ast if filename
602
605
  end
@@ -613,7 +616,9 @@ end
613
616
  def next_token
614
617
  return @tokens.shift unless @lexer
615
618
  token = @lexer.next_token
616
- @current_parser_info = token.pop if token
619
+ if token && @lexer.parser_info
620
+ @current_parser_info = token.pop
621
+ end
617
622
  token
618
623
  end
619
624
 
@@ -50,11 +50,11 @@ module Riml
50
50
  end
51
51
 
52
52
  def constructor_name
53
- "#{name}Constructor"
53
+ "#{@name}Constructor"
54
54
  end
55
55
 
56
56
  def constructor_obj_name
57
- @name[0].downcase + @name[1..-1] + "Obj"
57
+ @name[0, 1].downcase + @name[1..-1] + "Obj"
58
58
  end
59
59
 
60
60
  private
@@ -1,3 +1,6 @@
1
+ # encoding: utf-8
2
+
3
+ require 'strscan'
1
4
  require File.expand_path('../constants', __FILE__)
2
5
  require File.expand_path('../errors', __FILE__)
3
6
 
@@ -11,23 +14,17 @@ module Riml
11
14
  ANCHORED_INTERPOLATION_REGEX = /\A#{INTERPOLATION_REGEX}/m
12
15
  INTERPOLATION_SPLIT_REGEX = /(\#\{.*?\})/m
13
16
 
14
- attr_reader :tokens, :prev_token, :chunk, :current_indent,
15
- :invalid_keyword, :filename, :parser_info
17
+ attr_reader :tokens, :prev_token, :current_indent,
18
+ :filename, :parser_info
16
19
  attr_accessor :lineno
17
20
  # for REPL
18
21
  attr_accessor :ignore_indentation_check
19
22
 
20
23
  def initialize(code, filename = nil, parser_info = false)
21
- @code = code
22
- @code.chomp!
23
- @filename = filename
24
+ code.chomp!
25
+ @s = StringScanner.new(code)
26
+ @filename = filename || COMPILED_STRING_LOCATION
24
27
  @parser_info = parser_info
25
- set_start_state!
26
- end
27
-
28
- def set_start_state!
29
- # number of characters consumed
30
- @i = 0
31
28
  # array of doubles and triples: [tokenname, tokenval, lineno_to_add(optional)]
32
29
  # ex: [[:NEWLINE, "\n"]] OR [[:NEWLINE, "\n", 1]]
33
30
  @token_buf = []
@@ -43,18 +40,16 @@ module Riml
43
40
  @indent_pending = false
44
41
  @dedent_pending = false
45
42
  @in_function_declaration = false
46
- @invalid_keyword = nil
47
43
  end
48
44
 
49
45
  def tokenize
50
- set_start_state!
51
46
  while next_token != nil; end
52
47
  @tokens
53
48
  end
54
49
 
55
50
  def next_token
56
- while @token_buf.empty? && more_code_to_tokenize?
57
- tokenize_chunk(get_new_chunk)
51
+ while @token_buf.empty? && !@s.eos?
52
+ tokenize_chunk
58
53
  end
59
54
  if !@token_buf.empty?
60
55
  token = @token_buf.shift
@@ -74,55 +69,48 @@ module Riml
74
69
  nil
75
70
  end
76
71
 
77
- def tokenize_chunk(chunk)
78
- @chunk = chunk
72
+ def tokenize_chunk
79
73
  # deal with line continuations
80
- if cont = chunk[/\A\r?\n*[ \t\f]*\\/m]
81
- @i += cont.size
74
+ if cont = @s.scan(/\A\r?\n*[ \t\f]*\\/m)
82
75
  @lineno += cont.each_line.to_a.size - 1
83
76
  return
84
77
  end
85
78
 
86
79
  # all lines that start with ':' pass right through unmodified
87
- if (prev_token.nil? || prev_token[0] == :NEWLINE) && (ex_literal = chunk[/\A[ \t\f]*:(.*)?$/])
88
- @i += ex_literal.size
89
- @token_buf << [:EX_LITERAL, $1]
80
+ if (prev_token.nil? || prev_token[0] == :NEWLINE) && @s.scan(/\A[ \t\f]*:(.*)?$/)
81
+ @token_buf << [:EX_LITERAL, @s[1]]
90
82
  return
91
83
  end
92
84
 
93
- if splat_var = chunk[/\Aa:\d+/]
94
- @i += splat_var.size
85
+ if splat_var = @s.scan(/\Aa:\d+/)
95
86
  @token_buf << [:SCOPE_MODIFIER, 'a:'] << [:IDENTIFIER, splat_var[2..-1]]
96
87
  # the 'n' scope modifier is added by riml
97
- elsif scope_modifier = chunk[/\A([bwtglsavn]:)(\w|\{)/, 1]
98
- @i += 2
99
- @token_buf << [:SCOPE_MODIFIER, scope_modifier]
100
- elsif scope_modifier_literal = chunk[/\A([bwtglsavn]:)/]
101
- @i += scope_modifier_literal.size
88
+ elsif @s.check(/\A([bwtglsavn]:)(\w|\{)/)
89
+ @token_buf << [:SCOPE_MODIFIER, @s[1]]
90
+ @s.pos += 2
91
+ elsif scope_modifier_literal = @s.scan(/\A([bwtglsavn]:)/)
102
92
  @token_buf << [:SCOPE_MODIFIER_LITERAL, scope_modifier_literal]
103
- elsif special_var_prefix = chunk[/\A(&(\w:)?(?!&)|\$|@)/]
93
+ elsif special_var_prefix = (!@s.check(/\A&(\w:)?&/) && @s.scan(/\A(&(\w:)?|\$|@)/))
104
94
  @token_buf << [:SPECIAL_VAR_PREFIX, special_var_prefix.strip]
105
- @i += special_var_prefix.size
106
95
  if special_var_prefix == '@'
107
- new_chunk = get_new_chunk
108
- next_char = new_chunk[0]
96
+ next_char = @s.peek(1)
109
97
  if REGISTERS.include?(next_char)
110
98
  @token_buf << [:IDENTIFIER, next_char]
111
- @i += 1
99
+ @s.getch
112
100
  end
113
101
  else
114
102
  @expecting_identifier = true
115
103
  end
116
- elsif function_method = chunk[/\A(function)\(/, 1]
117
- @token_buf << [:IDENTIFIER, function_method]
118
- @i += function_method.size
119
- elsif identifier = chunk[/\A[a-zA-Z_][\w#]*(\?|!)?/]
104
+ elsif @s.scan(/\A(function)\(/)
105
+ @token_buf << [:IDENTIFIER, @s[1]]
106
+ @s.pos -= 1
107
+ elsif identifier = @s.check(/\A[a-zA-Z_][\w#]*(\?|!)?/)
120
108
  # keyword identifiers
121
109
  if KEYWORDS.include?(identifier)
122
110
  if identifier.match(/\Afunction/)
123
111
  old_identifier = identifier.dup
124
112
  identifier.sub!(/function/, "def")
125
- @i += (old_identifier.size - identifier.size)
113
+ @s.pos += (old_identifier.size - identifier.size)
126
114
  end
127
115
 
128
116
  if DEFINE_KEYWORDS.include?(identifier)
@@ -131,72 +119,62 @@ module Riml
131
119
 
132
120
  # strip '?' out of token names and replace '!' with '_bang'
133
121
  token_name = identifier.sub(/\?\Z/, "").sub(/!\Z/, "_bang").upcase
134
- track_indent_level(chunk, identifier)
122
+ track_indent_level(identifier)
135
123
 
136
124
  if VIML_END_KEYWORDS.include?(identifier)
137
125
  token_name = :END
138
126
  end
139
127
 
140
- @token_buf << [token_name.intern, identifier]
128
+ @token_buf << [token_name.to_sym, identifier]
141
129
 
142
- elsif BUILTIN_COMMANDS.include?(identifier) && peek(identifier.size) != '('
130
+ elsif BUILTIN_COMMANDS.include?(identifier) && @s.peek(identifier.size + 1)[-1, 1] != '('
143
131
  @token_buf << [:BUILTIN_COMMAND, identifier]
144
132
  elsif RIML_FILE_COMMANDS.include? identifier
145
133
  @token_buf << [:RIML_FILE_COMMAND, identifier]
146
134
  elsif RIML_CLASS_COMMANDS.include? identifier
147
135
  @token_buf << [:RIML_CLASS_COMMAND, identifier]
148
136
  elsif VIML_COMMANDS.include?(identifier) && (prev_token.nil? || prev_token[0] == :NEWLINE)
149
- @i += identifier.size
150
- new_chunk = get_new_chunk
151
- until_eol = new_chunk[/.*$/].to_s
137
+ @s.pos += identifier.size
138
+ until_eol = @s.scan(/.*$/).to_s
152
139
  @token_buf << [:EX_LITERAL, identifier << until_eol]
153
- @i += until_eol.size
154
140
  return
155
141
  # method names and variable names
156
142
  else
157
143
  @token_buf << [:IDENTIFIER, identifier]
158
144
  end
159
145
 
160
- @i += identifier.size
146
+ @s.pos += identifier.size
161
147
 
162
148
  parse_dict_vals!
163
149
 
164
- elsif @in_function_declaration && (splat_param = chunk[/\A(\.{3}|\*[a-zA-Z_]\w*)/])
150
+ elsif @in_function_declaration && (splat_param = @s.scan(/\A(\.{3}|\*[a-zA-Z_]\w*)/))
165
151
  @token_buf << [:SPLAT_PARAM, splat_param]
166
- @i += splat_param.size
167
- elsif !@in_function_declaration && (splat_arg = chunk[/\A\*([bwtglsavn]:)?([a-zA-Z_]\w*|\d+)/])
152
+ elsif !@in_function_declaration && (splat_arg = @s.scan(/\A\*([bwtglsavn]:)?([a-zA-Z_]\w*|\d+)/))
168
153
  @token_buf << [:SPLAT_ARG, splat_arg]
169
- @i += splat_arg.size
170
154
  # integer (octal)
171
- elsif octal = chunk[/\A0[0-7]+/]
155
+ elsif octal = @s.scan(/\A0[0-7]+/)
172
156
  @token_buf << [:NUMBER, octal]
173
- @i += octal.size
174
157
  # integer (hex)
175
- elsif hex = chunk[/\A0[xX]\h+/]
158
+ elsif hex = @s.scan(/\A0[xX][0-9a-fA-F]+/)
176
159
  @token_buf << [:NUMBER, hex]
177
- @i += hex.size
178
160
  # integer or float (decimal)
179
- elsif decimal = chunk[/\A[0-9]+(\.[0-9]+([eE][+-]?[0-9]+)?)?/]
161
+ elsif decimal = @s.scan(/\A[0-9]+(\.[0-9]+([eE][+-]?[0-9]+)?)?/)
180
162
  @token_buf << [:NUMBER, decimal]
181
- @i += decimal.size
182
- elsif interpolation = chunk[ANCHORED_INTERPOLATION_REGEX]
163
+ elsif interpolation = @s.scan(ANCHORED_INTERPOLATION_REGEX)
183
164
  # "hey there, #{name}" = "hey there, " . name
184
165
  parts = interpolation[1...-1].split(INTERPOLATION_SPLIT_REGEX)
185
166
  handle_interpolation(*parts)
186
- @i += interpolation.size
187
- elsif (single_line_comment = chunk[SINGLE_LINE_COMMENT_REGEX]) && (prev_token.nil? || prev_token[0] == :NEWLINE)
188
- @i += single_line_comment.size + 1 # consume next newline character
167
+ elsif (single_line_comment = @s.check(SINGLE_LINE_COMMENT_REGEX)) && (prev_token.nil? || prev_token[0] == :NEWLINE)
168
+ @s.pos += single_line_comment.size
169
+ @s.pos += 1 unless @s.eos? # consume newline
189
170
  @lineno += single_line_comment.each_line.to_a.size
190
- elsif inline_comment = chunk[/\A[ \t\f]*"[^"]*?$/]
191
- @i += inline_comment.size # inline comment, don't consume newline character
171
+ elsif inline_comment = @s.scan(/\A[ \t\f]*"[^"]*?$/)
192
172
  @lineno += inline_comment.each_line.to_a.size - 1
193
- elsif string_double = chunk[/\A"(.*?)(?<!\\)"/, 1]
194
- @token_buf << [:STRING_D, string_double]
195
- @i += string_double.size + 2
196
- elsif string_single = chunk[/\A'(([^']|'')*)'/, 1]
197
- @token_buf << [:STRING_S, string_single]
198
- @i += string_single.size + 2
199
- elsif newlines = chunk[/\A([\r\n]+)/, 1]
173
+ elsif (str = lex_string_double)
174
+ @token_buf << [:STRING_D, str]
175
+ elsif @s.scan(/\A'(([^']|'')*)'/)
176
+ @token_buf << [:STRING_S, @s[1]]
177
+ elsif newlines = @s.scan(/\A([\r\n]+)/)
200
178
  # push only 1 newline
201
179
  @token_buf << [:NEWLINE, "\n"] unless prev_token && prev_token[0] == :NEWLINE
202
180
 
@@ -210,14 +188,13 @@ module Riml
210
188
  @in_function_declaration = false
211
189
  end
212
190
 
213
- @i += newlines.size
214
191
  @lineno += newlines.size
215
- elsif heredoc_pattern = chunk[%r{\A<<(.+?)\r?\n}]
216
- pattern = $1
217
- @i += heredoc_pattern.size
218
- new_chunk = get_new_chunk
219
- heredoc_string = new_chunk[%r|(.+?\r?\n)(#{Regexp.escape(pattern)})|m, 1]
220
- @i += heredoc_string.size + pattern.size
192
+ # heredoc
193
+ elsif @s.scan(%r{\A<<(.+?)\r?\n})
194
+ pattern = @s[1]
195
+ @s.check(%r|(.+?\r?\n)(#{Regexp.escape(pattern)})|m)
196
+ heredoc_string = @s[1]
197
+ @s.pos += (pattern.size + heredoc_string.size)
221
198
  heredoc_string.chomp!
222
199
  if heredoc_string =~ INTERPOLATION_REGEX || %Q("#{heredoc_string}") =~ INTERPOLATION_REGEX
223
200
  parts = heredoc_string.split(INTERPOLATION_SPLIT_REGEX)
@@ -227,37 +204,39 @@ module Riml
227
204
  end
228
205
  @lineno += heredoc_string.each_line.to_a.size
229
206
  # operators of more than 1 char
230
- elsif operator = chunk[OPERATOR_REGEX]
207
+ elsif operator = @s.scan(OPERATOR_REGEX)
231
208
  @token_buf << [operator, operator]
232
- @i += operator.size
233
- elsif regexp = chunk[%r{\A/.*?[^\\]/}]
209
+ elsif regexp = @s.scan(%r{\A/.*?[^\\]/})
234
210
  @token_buf << [:REGEXP, regexp]
235
- @i += regexp.size
236
- elsif whitespaces = chunk[/\A[ \t\f]+/]
237
- @i += whitespaces.size
211
+ # whitespaces
212
+ elsif @s.scan(/\A[ \t\f]+/)
238
213
  # operators and tokens of single chars, one of: ( ) , . [ ] ! + - = < > /
239
214
  else
240
- value = chunk[0, 1]
215
+ value = @s.getch
241
216
  if value == '|'
242
217
  @token_buf << [:NEWLINE, "\n"]
243
218
  else
244
219
  @token_buf << [value, value]
245
220
  end
246
- @i += 1
247
- if value == ']' || value == ')' && (chunk[1, 1] == '.' && chunk[3, 1] != ':')
221
+ # if we encounter `funcCall().`, the next character must be treated as
222
+ # a dictionary retrieval operation, not a string concatenation
223
+ # operation.
224
+ # However, if we see `funcCall().l:localVar`, we know it must be a
225
+ # string concatenation operation.
226
+ if value == ']' || value == ')' && (@s.peek(1) == '.' && @s.peek(3) != ':')
248
227
  parse_dict_vals!
249
228
  end
250
229
  end
251
230
  end
252
231
 
253
232
  # Checks if any of previous n tokens are keywords.
254
- # If any found, sets `@invalid_keyword` to the found token value.
233
+ # If any found, return the keyword, otherwise returns `false`.
255
234
  def prev_token_is_keyword?(n = 2)
256
235
  return false if n <= 0
257
236
  (1..n).each do |i|
258
237
  t = tokens[-i]
259
238
  if t && t[1] && KEYWORDS.include?(t[1])
260
- return @invalid_keyword = t[1]
239
+ return t[1]
261
240
  end
262
241
  end
263
242
  false
@@ -265,6 +244,35 @@ module Riml
265
244
 
266
245
  private
267
246
 
247
+ # we have negative lookbehind in regexp engine
248
+ if RUBY_VERSION >= '1.9'
249
+ # have to use string constructor, as parser would throw SyntaxError if
250
+ # RUBY_VERSION < '1.9'. Literal regexp is `/\A"(.*?)(?<!\\)"/`
251
+ STRING_DOUBLE_NEGATIVE_LOOKBEHIND_REGEX = Regexp.new('\A"(.*?)(?<!\\\\)"')
252
+ def lex_string_double
253
+ @s.scan(STRING_DOUBLE_NEGATIVE_LOOKBEHIND_REGEX) && @s[1]
254
+ end
255
+ # we don't have negative lookbehind in regexp engine
256
+ else
257
+ def lex_string_double
258
+ str = ''
259
+ regex = /\A"(.*?)"/
260
+ pos = @s.pos
261
+ while @s.scan(regex)
262
+ match = @s[1]
263
+ str << match
264
+ if match[-1, 1] == '\\'
265
+ str << '"'
266
+ regex = /\A(.*?)"/
267
+ else
268
+ return str
269
+ end
270
+ end
271
+ @s.pos = pos
272
+ nil
273
+ end
274
+ end
275
+
268
276
  def decorate_token(token)
269
277
  token << {
270
278
  :lineno => @lineno,
@@ -273,7 +281,7 @@ module Riml
273
281
  token
274
282
  end
275
283
 
276
- def track_indent_level(chunk, identifier)
284
+ def track_indent_level(identifier)
277
285
  case identifier.to_sym
278
286
  when :def, :def!, :defm, :defm!, :while, :until, :for, :try, :class
279
287
  @current_indent += 2
@@ -289,12 +297,11 @@ module Riml
289
297
  end
290
298
  end
291
299
 
300
+ # `dict.key` or `dict.key.other_key`, etc.
292
301
  def parse_dict_vals!
293
- # dict.key OR dict.key.other_key
294
- new_chunk = get_new_chunk
295
- if vals = new_chunk[/\A\.([\w.]+)(?!:)/, 1]
302
+ if @s.scan(/\A\.([\w.]+)(?!:)/)
303
+ vals = @s[1]
296
304
  parts = vals.split('.')
297
- @i += vals.size + 1
298
305
  if @in_function_declaration
299
306
  @token_buf.last[1] << ".#{vals}"
300
307
  else
@@ -306,13 +313,20 @@ module Riml
306
313
  end
307
314
 
308
315
  def check_indentation
309
- raise SyntaxError, "Missing #{(@current_indent / 2)} END identifier(s), " if @current_indent > 0
310
- raise SyntaxError, "#{(@current_indent / 2).abs} too many END identifiers" if @current_indent < 0
316
+ if @current_indent > 0
317
+ error_msg = "Missing #{(@current_indent / 2)} END identifier(s)"
318
+ error = Riml::SyntaxError.new(error_msg, @filename, @lineno)
319
+ raise error
320
+ elsif @current_indent < 0
321
+ error_msg = "#{(@current_indent / 2).abs} too many END identifiers"
322
+ error = Riml::SyntaxError.new(error_msg, @filename, @lineno)
323
+ raise error
324
+ end
311
325
  end
312
326
 
313
327
  def handle_interpolation(*parts)
314
328
  parts.delete_if {|p| p.empty?}.each_with_index do |part, i|
315
- if part[0..1] == '#{' && part[-1] == '}'
329
+ if part[0..1] == '#{' && part[-1, 1] == '}'
316
330
  interpolation_content = part[2...-1]
317
331
  @token_buf.concat tokenize_without_moving_pos(interpolation_content)
318
332
  else
@@ -331,30 +345,18 @@ module Riml
331
345
 
332
346
  def tokenize_without_moving_pos(code)
333
347
  Lexer.new(code, filename, false).tap do |l|
334
- l.lineno = lineno
348
+ l.lineno = @lineno
335
349
  end.tokenize
336
350
  end
337
351
 
338
352
  def statement_modifier?
339
- old_i = @i
353
+ old_pos = @s.pos
340
354
  # backtrack until the beginning of the line
341
- @i -= 1 while @code && @code[@i-1] !~ /\n|\r/ && !@code[@i-1].to_s.empty?
342
- new_chunk = get_new_chunk
343
- new_chunk.to_s[/\A(.+?)(if|unless).+?$/] && !$1.strip.empty?
355
+ @s.pos -= 1 until @s.bol?
356
+ @s.check(/\A(.+?)(if|unless).+?$/) && !@s[1].strip.empty?
344
357
  ensure
345
- @i = old_i
346
- end
347
-
348
- def get_new_chunk
349
- @code[@i..-1]
358
+ @s.pos = old_pos
350
359
  end
351
360
 
352
- def more_code_to_tokenize?
353
- @i < @code.size
354
- end
355
-
356
- def peek(n = 1)
357
- @chunk[n]
358
- end
359
- end
361
+ end unless defined?(Riml::Lexer)
360
362
  end