ruby_parser 3.1.3 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/ruby_parser.rb CHANGED
@@ -1,4 +1,4 @@
1
-
2
1
  require 'ruby18_parser'
3
2
  require 'ruby19_parser'
3
+ require 'ruby20_parser'
4
4
  require 'ruby_parser_extras'
@@ -99,7 +99,8 @@ class RPStringScanner < StringScanner
99
99
  alias :old_scan :scan
100
100
  def scan re
101
101
  s = old_scan re
102
- d :scan => [s, caller.first] if s
102
+ where = caller.first.split(/:/).first(2).join(":")
103
+ d :scan => [s, where] if s
103
104
  s
104
105
  end
105
106
  end
@@ -110,11 +111,39 @@ class RPStringScanner < StringScanner
110
111
  end
111
112
 
112
113
  module RubyParserStuff
113
- VERSION = "3.1.3" unless constants.include? "VERSION" # SIGH
114
+ VERSION = "3.2.0" unless constants.include? "VERSION" # SIGH
114
115
 
115
116
  attr_accessor :lexer, :in_def, :in_single, :file
116
117
  attr_reader :env, :comments
117
118
 
119
+ $good20 = []
120
+
121
+ %w[
122
+ ].map(&:to_i).each do |n|
123
+ $good20[n] = n
124
+ end
125
+
126
+ def debug20 n, v = nil, r = nil
127
+ raise "not yet #{n} #{v.inspect} => #{r.inspect}" unless $good20[n]
128
+ end
129
+
130
+ ruby19 = "".respond_to? :encoding
131
+
132
+ # Rhis is in sorted order of occurrence according to
133
+ # charlock_holmes against 500k files, with UTF_8 forced
134
+ # to the top.
135
+ #
136
+ # Overwrite this contstant if you need something different.
137
+ ENCODING_ORDER = [
138
+ Encoding::UTF_8, # moved to top to reflect default in 2.0
139
+ Encoding::ISO_8859_1,
140
+ Encoding::ISO_8859_2,
141
+ Encoding::ISO_8859_9,
142
+ Encoding::SHIFT_JIS,
143
+ Encoding::WINDOWS_1252,
144
+ Encoding::EUC_JP
145
+ ] if ruby19
146
+
118
147
  def syntax_error msg
119
148
  raise RubyParser::SyntaxError, msg
120
149
  end
@@ -129,7 +158,7 @@ module RubyParserStuff
129
158
  end
130
159
 
131
160
  def arg_blk_pass node1, node2 # TODO: nuke
132
- node1 = s(:arglist, node1) unless [:arglist, :array].include? node1.first
161
+ node1 = s(:arglist, node1) unless [:arglist, :call_args, :array, :args].include? node1.first
133
162
  node1 << node2 if node2
134
163
  node1
135
164
  end
@@ -146,12 +175,14 @@ module RubyParserStuff
146
175
  if sexp.size == 2 and sexp[1].sexp_type == :array then
147
176
  s(:masgn, *sexp[1][1..-1].map { |sub| clean_mlhs sub })
148
177
  else
178
+ debug20 5
149
179
  sexp
150
180
  end
151
181
  when :gasgn, :iasgn, :lasgn, :cvasgn then
152
182
  if sexp.size == 2 then
153
183
  sexp.last
154
184
  else
185
+ debug20 7
155
186
  sexp # optional value
156
187
  end
157
188
  else
@@ -182,6 +213,34 @@ module RubyParserStuff
182
213
  end
183
214
  end
184
215
 
216
+ def array_to_hash array
217
+ s(:hash, *array[1..-1])
218
+ end
219
+
220
+ def call_args args
221
+ result = s(:call_args)
222
+
223
+ args.each do |arg|
224
+ case arg
225
+ when Sexp then
226
+ case arg.sexp_type
227
+ when :array, :args, :call_args then # HACK? remove array at some point
228
+ result.concat arg[1..-1]
229
+ else
230
+ result << arg
231
+ end
232
+ when Symbol then
233
+ result << arg
234
+ when ",", nil then
235
+ # ignore
236
+ else
237
+ raise "unhandled: #{arg.inspect} in #{args.inspect}"
238
+ end
239
+ end
240
+
241
+ result
242
+ end
243
+
185
244
  def args args
186
245
  result = s(:args)
187
246
 
@@ -189,21 +248,27 @@ module RubyParserStuff
189
248
  case arg
190
249
  when Sexp then
191
250
  case arg.sexp_type
192
- when :args, :block, :array then
251
+ when :args, :block, :array, :call_args then # HACK call_args mismatch
193
252
  result.concat arg[1..-1]
194
253
  when :block_arg then
195
254
  result << :"&#{arg.last}"
196
- when :masgn then
255
+ when :shadow then
256
+ if Sexp === result.last and result.last.sexp_type == :shadow then
257
+ result.last << arg.last
258
+ else
259
+ result << arg
260
+ end
261
+ when :masgn, :block_pass, :hash then # HACK: remove. prolly call_args
197
262
  result << arg
198
263
  else
199
- raise "unhandled: #{arg.inspect}"
264
+ raise "unhandled: #{arg.sexp_type} in #{args.inspect}"
200
265
  end
201
266
  when Symbol then
202
267
  result << arg
203
- when ",", nil then
268
+ when ",", "|", ";", "(", ")", nil then
204
269
  # ignore
205
270
  else
206
- raise "unhandled: #{arg.inspect}"
271
+ raise "unhandled: #{arg.inspect} in #{args.inspect}"
207
272
  end
208
273
  end
209
274
 
@@ -216,7 +281,7 @@ module RubyParserStuff
216
281
  end
217
282
 
218
283
  def assignable(lhs, value = nil)
219
- id = lhs.to_sym
284
+ id = lhs.to_sym unless Sexp === lhs
220
285
  id = id.to_sym if Sexp === id
221
286
 
222
287
  raise "write a test 1" if id.to_s =~ /^(?:self|nil|true|false|__LINE__|__FILE__)$/
@@ -254,7 +319,7 @@ module RubyParserStuff
254
319
  end
255
320
  end
256
321
 
257
- self.env[id] ||= :lvar
322
+ self.env[id] ||= :lvar unless result.sexp_type == :cdecl # HACK? cdecl
258
323
 
259
324
  result << value if value
260
325
 
@@ -526,7 +591,7 @@ module RubyParserStuff
526
591
  # TODO: need a test with f(&b) { } to produce warning
527
592
 
528
593
  args ||= s(:arglist)
529
- args[0] = :arglist if args.first == :array
594
+ args[0] = :arglist if [:array, :call_args].include? args.first
530
595
  args = s(:arglist, args) unless args.first == :arglist
531
596
 
532
597
  # HACK quick hack to make this work quickly... easy to clean up above
@@ -932,18 +997,8 @@ module RubyParserStuff
932
997
  end
933
998
 
934
999
  def hack_encoding str, extra = nil
935
- # this is in sorted order of occurrence according to
936
- # charlock_holmes against 500k files
937
- encodings = [
938
- extra,
939
- Encoding::ISO_8859_1,
940
- Encoding::UTF_8,
941
- Encoding::ISO_8859_2,
942
- Encoding::ISO_8859_9,
943
- Encoding::SHIFT_JIS,
944
- Encoding::WINDOWS_1252,
945
- Encoding::EUC_JP,
946
- ].compact
1000
+ encodings = ENCODING_ORDER.dup
1001
+ encodings.unshift(extra) unless extra.nil?
947
1002
 
948
1003
  # terrible, horrible, no good, very bad, last ditch effort.
949
1004
  encodings.each do |enc|
@@ -1134,6 +1189,8 @@ module RubyParserStuff
1134
1189
  WORDLIST18 = Hash[*wordlist.map { |o| [o.name, o] }.flatten]
1135
1190
  WORDLIST19 = Hash[*wordlist.map { |o| [o.name, o] }.flatten]
1136
1191
 
1192
+ WORDLIST18.delete "__ENCODING__"
1193
+
1137
1194
  %w[and case elsif for if in module or unless until when while].each do |k|
1138
1195
  WORDLIST19[k] = WORDLIST19[k].dup
1139
1196
  WORDLIST19[k].state = :expr_value
@@ -1267,6 +1324,10 @@ module RubyParserStuff
1267
1324
  end
1268
1325
  end
1269
1326
 
1327
+ class Ruby20Parser < Racc::Parser
1328
+ include RubyParserStuff
1329
+ end
1330
+
1270
1331
  class Ruby19Parser < Racc::Parser
1271
1332
  include RubyParserStuff
1272
1333
  end
@@ -1286,12 +1347,17 @@ class RubyParser
1286
1347
  def initialize
1287
1348
  @p18 = Ruby18Parser.new
1288
1349
  @p19 = Ruby19Parser.new
1350
+ @p20 = Ruby20Parser.new
1289
1351
  end
1290
1352
 
1291
1353
  def process(s, f = "(string)", t = 10) # parens for emacs *sigh*
1292
- @p19.process s, f, t
1293
- rescue Racc::ParseError
1294
- @p18.process s, f, t
1354
+ @p20.process s, f, t
1355
+ rescue Racc::ParseError, RubyParser::SyntaxError
1356
+ begin
1357
+ @p19.process s, f, t
1358
+ rescue Racc::ParseError, RubyParser::SyntaxError
1359
+ @p18.process s, f, t
1360
+ end
1295
1361
  end
1296
1362
 
1297
1363
  alias :parse :process
@@ -1307,6 +1373,8 @@ class RubyParser
1307
1373
  Ruby18Parser.new
1308
1374
  when /^1\.9/ then
1309
1375
  Ruby19Parser.new
1376
+ when /^2.0/ then
1377
+ Ruby20Parser.new
1310
1378
  else
1311
1379
  raise "unrecognized RUBY_VERSION #{RUBY_VERSION}"
1312
1380
  end
@@ -1337,7 +1405,7 @@ class Sexp
1337
1405
  end
1338
1406
 
1339
1407
  def to_sym
1340
- raise "no"
1408
+ raise "no: #{self.inspect}.to_sym is a bug"
1341
1409
  self.value.to_sym
1342
1410
  end
1343
1411
 
@@ -2,30 +2,49 @@
2
2
  # encoding: US-ASCII
3
3
 
4
4
  require 'rubygems'
5
- gem "minitest"
6
-
7
5
  require 'minitest/autorun'
8
6
  require 'ruby_lexer'
9
7
  require 'ruby18_parser'
8
+ require 'ruby20_parser'
9
+
10
+ class TestRubyLexer < Minitest::Test
11
+ attr_accessor :processor, :lex, :parser_class
10
12
 
11
- class TestRubyLexer < MiniTest::Unit::TestCase
12
- alias :deny :refute
13
+ alias :lexer :lex # lets me copy/paste code from parser
14
+ alias :lexer= :lex=
13
15
 
14
16
  def setup
15
- setup_lexer Ruby18Parser
17
+ setup_lexer_class Ruby20Parser
16
18
  end
17
19
 
18
- def setup_lexer parser_class
19
- p = parser_class.new
20
- @lex = p.lexer
21
- @lex.src = "blah blah"
22
- @lex.lex_state = :expr_beg
20
+ def setup_lexer input, exp_sexp = nil
21
+ setup_new_parser
22
+ lex.src = input
23
+ lex.lex_state = :expr_beg
24
+ assert_equal exp_sexp, processor.class.new.parse(input) if exp_sexp
25
+ end
26
+
27
+ def setup_new_parser
28
+ self.processor = parser_class.new
29
+ self.lex = processor.lexer
30
+ end
31
+
32
+ def setup_lexer_class parser_class
33
+ self.parser_class = parser_class
34
+ setup_new_parser
35
+ setup_lexer "blah blah"
23
36
  end
24
37
 
25
38
  def test_advance
26
39
  assert @lex.advance # blah
27
40
  assert @lex.advance # blah
28
- deny @lex.advance # nada
41
+ refute @lex.advance # nada
42
+ end
43
+
44
+ def test_unicode_ident
45
+ s = "@\u1088\u1077\u1093\u1072"
46
+ util_lex_token(s.dup,
47
+ :tIVAR, s.dup)
29
48
  end
30
49
 
31
50
  def test_read_escape
@@ -44,6 +63,11 @@ class TestRubyLexer < MiniTest::Unit::TestCase
44
63
  util_escape "\010", "b"
45
64
  util_escape " ", "s"
46
65
  util_escape "q", "q" # plain vanilla escape
66
+
67
+ util_escape "8", "8" # ugh... mri... WHY?!?
68
+ util_escape "9", "9" # ugh... mri... WHY?!?
69
+
70
+ util_escape "$", "444" # ugh
47
71
  end
48
72
 
49
73
  def test_read_escape_c
@@ -133,6 +157,8 @@ class TestRubyLexer < MiniTest::Unit::TestCase
133
157
  end
134
158
 
135
159
  def test_yylex_label__18
160
+ setup_lexer_class Ruby18Parser
161
+
136
162
  util_lex_token("{a:",
137
163
  :tLBRACE, "{",
138
164
  :tIDENTIFIER, "a",
@@ -140,6 +166,8 @@ class TestRubyLexer < MiniTest::Unit::TestCase
140
166
  end
141
167
 
142
168
  def test_yylex_label_in_params__18
169
+ setup_lexer_class Ruby18Parser
170
+
143
171
  util_lex_token("foo(a:",
144
172
  :tIDENTIFIER, "foo",
145
173
  :tLPAREN2, "(",
@@ -148,7 +176,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
148
176
  end
149
177
 
150
178
  def test_yylex_label__19
151
- setup_lexer Ruby19Parser
179
+ setup_lexer_class Ruby19Parser
152
180
 
153
181
  util_lex_token("{a:",
154
182
  :tLBRACE, "{",
@@ -156,7 +184,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
156
184
  end
157
185
 
158
186
  def test_yylex_label_in_params__19
159
- setup_lexer Ruby19Parser
187
+ setup_lexer_class Ruby19Parser
160
188
 
161
189
  util_lex_token("foo(a:",
162
190
  :tIDENTIFIER, "foo",
@@ -164,6 +192,345 @@ class TestRubyLexer < MiniTest::Unit::TestCase
164
192
  :tLABEL, "a")
165
193
  end
166
194
 
195
+ def assert_next_lexeme token=nil, value=nil, state=nil, paren=nil, brace=nil
196
+ assert @lex.advance, "no more tokens"
197
+
198
+ msg = message {
199
+ act = [@lex.token, @lex.yacc_value, @lex.lex_state,
200
+ @lex.paren_nest, @lex.brace_nest]
201
+ exp = [token, value, state, paren, brace]
202
+ "#{exp.inspect} vs #{act.inspect}"
203
+ }
204
+
205
+ act_value = @lex.yacc_value
206
+ act_value = act_value.first if Array === act_value
207
+
208
+ assert_equal token, @lex.token, msg
209
+ assert_equal value, act_value, msg
210
+ assert_equal state, @lex.lex_state, msg
211
+ assert_equal paren, @lex.paren_nest, msg if paren
212
+ assert_equal brace, @lex.brace_nest, msg if brace
213
+ end
214
+
215
+ def refute_lexeme
216
+ refute @lex.advance, "not empty: #{[@lex.token, @lex.yacc_value].inspect}"
217
+ end
218
+
219
+ def assert_lex input, exp_sexp, *args
220
+ setup_lexer input, exp_sexp
221
+
222
+ args.each_slice(5) do |token, value, state, paren, brace|
223
+ assert_next_lexeme token, value, state, paren, brace
224
+ end
225
+
226
+ refute_lexeme
227
+ end
228
+
229
+ def emulate_string_interpolation
230
+ lex_strterm = lexer.lex_strterm
231
+ string_nest = lexer.string_nest
232
+ brace_nest = lexer.brace_nest
233
+
234
+ lexer.string_nest = 0
235
+ lexer.brace_nest = 0
236
+ lexer.cond.push false
237
+ lexer.cmdarg.push false
238
+
239
+ lexer.lex_strterm = nil
240
+ lexer.lex_state = :expr_beg
241
+
242
+ yield
243
+
244
+ lexer.lex_state = :expr_endarg
245
+ assert_next_lexeme :tRCURLY, "}", :expr_endarg, 0
246
+
247
+ lexer.lex_strterm = lex_strterm
248
+ lexer.lex_state = :expr_beg
249
+ lexer.string_nest = string_nest
250
+ lexer.brace_nest = brace_nest
251
+
252
+ lexer.cond.lexpop
253
+ lexer.cmdarg.lexpop
254
+ end
255
+
256
+ def test_yylex_paren_string_parens_interpolated
257
+ setup_lexer('%((#{b}#{d}))',
258
+ s(:dstr,
259
+ "(",
260
+ s(:evstr, s(:call, nil, :b)),
261
+ s(:evstr, s(:call, nil, :d)),
262
+ s(:str, ")")))
263
+
264
+ assert_next_lexeme :tSTRING_BEG, "%)", :expr_beg, 0, 0
265
+ assert_next_lexeme :tSTRING_CONTENT, "(", :expr_beg, 0, 0
266
+ assert_next_lexeme :tSTRING_DBEG, nil, :expr_beg, 0, 0
267
+
268
+ emulate_string_interpolation do
269
+ assert_next_lexeme :tIDENTIFIER, "b", :expr_arg, 0, 0
270
+ end
271
+
272
+ assert_next_lexeme :tSTRING_DBEG, nil, :expr_beg, 0, 0
273
+
274
+ emulate_string_interpolation do
275
+ assert_next_lexeme :tIDENTIFIER, "d", :expr_arg, 0, 0
276
+ end
277
+
278
+ assert_next_lexeme :tSTRING_CONTENT, ")", :expr_beg, 0, 0
279
+ assert_next_lexeme :tSTRING_END, ")", :expr_end, 0, 0
280
+
281
+ refute_lexeme
282
+ end
283
+
284
+ def test_yylex_paren_string_interpolated_regexp
285
+ setup_lexer('%( #{(/abcd/)} )',
286
+ s(:dstr, " ", s(:evstr, s(:lit, /abcd/)), s(:str, " ")))
287
+
288
+ assert_next_lexeme :tSTRING_BEG, "%)", :expr_beg, 0, 0
289
+ assert_next_lexeme :tSTRING_CONTENT, " ", :expr_beg, 0, 0
290
+ assert_next_lexeme :tSTRING_DBEG, nil, :expr_beg, 0, 0
291
+
292
+ emulate_string_interpolation do
293
+ assert_next_lexeme :tLPAREN, "(", :expr_beg, 1, 0
294
+ assert_next_lexeme :tREGEXP_BEG, "/", :expr_beg, 1, 0
295
+ assert_next_lexeme :tSTRING_CONTENT, "abcd", :expr_beg, 1, 0
296
+ assert_next_lexeme :tREGEXP_END, "", :expr_end, 1, 0
297
+ assert_next_lexeme :tRPAREN, ")", :expr_endfn, 0, 0
298
+ end
299
+
300
+ assert_next_lexeme :tSTRING_CONTENT, " ", :expr_beg, 0, 0
301
+ assert_next_lexeme :tSTRING_END, ")", :expr_end, 0, 0
302
+
303
+ refute_lexeme
304
+ end
305
+
306
+ def test_yylex_not_at_defn
307
+ assert_lex("def +@; end",
308
+ s(:defn, :+@, s(:args), s(:nil)),
309
+
310
+ :kDEF, "def", :expr_fname, 0, 0,
311
+ :tUPLUS, "+@", :expr_arg, 0, 0,
312
+ :tSEMI, ";", :expr_beg, 0, 0,
313
+ :kEND, "end", :expr_end, 0, 0)
314
+
315
+ assert_lex("def !@; end",
316
+ s(:defn, :"!@", s(:args), s(:nil)),
317
+
318
+ :kDEF, "def", :expr_fname, 0, 0,
319
+ :tUBANG, "!@", :expr_arg, 0, 0,
320
+ :tSEMI, ";", :expr_beg, 0, 0,
321
+ :kEND, "end", :expr_end, 0, 0)
322
+ end
323
+
324
+ def test_yylex_not_at_ivar
325
+ assert_lex("!@ivar",
326
+ s(:call, s(:ivar, :@ivar), :"!"),
327
+
328
+ :tBANG, "!", :expr_beg, 0, 0,
329
+ :tIVAR, "@ivar", :expr_end, 0, 0)
330
+ end
331
+
332
+ def test_yylex_number_times_ident_times_return_number
333
+ assert_lex("1 * b * 3",
334
+ s(:call,
335
+ s(:call, s(:lit, 1), :*, s(:call, nil, :b)),
336
+ :*, s(:lit, 3)),
337
+
338
+ :tINTEGER, 1, :expr_end, 0, 0,
339
+ :tSTAR2, "*", :expr_beg, 0, 0,
340
+ :tIDENTIFIER, "b", :expr_arg, 0, 0,
341
+ :tSTAR2, "*", :expr_beg, 0, 0,
342
+ :tINTEGER, 3, :expr_end, 0, 0)
343
+
344
+ assert_lex("1 * b *\n 3",
345
+ s(:call,
346
+ s(:call, s(:lit, 1), :*, s(:call, nil, :b)),
347
+ :*, s(:lit, 3)),
348
+
349
+ :tINTEGER, 1, :expr_end, 0, 0,
350
+ :tSTAR2, "*", :expr_beg, 0, 0,
351
+ :tIDENTIFIER, "b", :expr_arg, 0, 0,
352
+ :tSTAR2, "*", :expr_beg, 0, 0,
353
+ :tINTEGER, 3, :expr_end, 0, 0)
354
+ end
355
+
356
+ def test_yylex_paren_string_parens_interpolated_regexp
357
+ setup_lexer('%((#{(/abcd/)}))',
358
+ s(:dstr, "(", s(:evstr, s(:lit, /abcd/)), s(:str, ")")))
359
+
360
+ assert_next_lexeme :tSTRING_BEG, "%)", :expr_beg, 0, 0
361
+ assert_next_lexeme :tSTRING_CONTENT, "(", :expr_beg, 0, 0
362
+
363
+ assert_next_lexeme :tSTRING_DBEG, nil, :expr_beg, 0, 0
364
+
365
+ emulate_string_interpolation do
366
+ assert_next_lexeme :tLPAREN, "(", :expr_beg, 1, 0
367
+ assert_next_lexeme :tREGEXP_BEG, "/", :expr_beg, 1, 0
368
+ assert_next_lexeme :tSTRING_CONTENT, "abcd", :expr_beg, 1, 0
369
+ assert_next_lexeme :tREGEXP_END, "", :expr_end, 1, 0
370
+ assert_next_lexeme :tRPAREN, ")", :expr_endfn, 0, 0
371
+ end
372
+
373
+ assert_next_lexeme :tSTRING_CONTENT, ")", :expr_beg, 0, 0
374
+ assert_next_lexeme :tSTRING_END, ")", :expr_end, 0, 0
375
+
376
+ refute_lexeme
377
+ end
378
+
379
+ def test_yylex_method_parens_chevron
380
+ assert_lex("a()<<1",
381
+ s(:call, s(:call, nil, :a), :<<, s(:lit, 1)),
382
+ :tIDENTIFIER, "a", :expr_cmdarg, 0, 0,
383
+ :tLPAREN2, "(", :expr_beg, 1, 0,
384
+ :tRPAREN, ")", :expr_endfn, 0, 0,
385
+ :tLSHFT, "<<" , :expr_beg, 0, 0,
386
+ :tINTEGER, 1, :expr_end, 0, 0)
387
+ end
388
+
389
+ def test_yylex_lambda_args__20
390
+ setup_lexer_class Ruby20Parser
391
+
392
+ assert_lex("-> (a) { }",
393
+ s(:iter, s(:call, nil, :lambda),
394
+ s(:args, :a)),
395
+
396
+ :tLAMBDA, nil, :expr_endfn, 0, 0,
397
+ :tLPAREN2, "(", :expr_beg, 1, 0,
398
+ :tIDENTIFIER, "a", :expr_arg, 1, 0,
399
+ :tRPAREN, ")", :expr_endfn, 0, 0,
400
+ :tLCURLY, "{", :expr_beg, 0, 1,
401
+ :tRCURLY, "}", :expr_endarg, 0, 0)
402
+ end
403
+
404
+ def test_yylex_lambda_args_opt__20
405
+ setup_lexer_class Ruby20Parser
406
+
407
+ assert_lex("-> (a=nil) { }",
408
+ s(:iter, s(:call, nil, :lambda),
409
+ s(:args, s(:lasgn, :a, s(:nil)))),
410
+
411
+ :tLAMBDA, nil, :expr_endfn, 0, 0,
412
+ :tLPAREN2, "(", :expr_beg, 1, 0,
413
+ :tIDENTIFIER, "a", :expr_arg, 1, 0,
414
+ :tEQL, "=", :expr_beg, 1, 0,
415
+ :kNIL, "nil", :expr_end, 1, 0,
416
+ :tRPAREN, ")", :expr_endfn, 0, 0,
417
+ :tLCURLY, "{", :expr_beg, 0, 1,
418
+ :tRCURLY, "}", :expr_endarg, 0, 0)
419
+ end
420
+
421
+ def test_yylex_lambda_hash__20
422
+ setup_lexer_class Ruby20Parser
423
+
424
+ assert_lex("-> (a={}) { }",
425
+ s(:iter, s(:call, nil, :lambda),
426
+ s(:args, s(:lasgn, :a, s(:hash)))),
427
+
428
+ :tLAMBDA, nil, :expr_endfn, 0, 0,
429
+ :tLPAREN2, "(", :expr_beg, 1, 0,
430
+ :tIDENTIFIER, "a", :expr_arg, 1, 0,
431
+ :tEQL, "=", :expr_beg, 1, 0,
432
+ :tLBRACE, "{", :expr_beg, 1, 1,
433
+ :tRCURLY, "}", :expr_endarg, 1, 0,
434
+ :tRPAREN, ")", :expr_endfn, 0, 0,
435
+ :tLCURLY, "{", :expr_beg, 0, 1,
436
+ :tRCURLY, "}", :expr_endarg, 0, 0)
437
+ end
438
+
439
+ def test_yylex_iter_array_curly
440
+ assert_lex("f :a, [:b] { |c, d| }", # yes, this is bad code
441
+ s(:iter,
442
+ s(:call, nil, :f, s(:lit, :a), s(:array, s(:lit, :b))),
443
+ s(:args, :c, :d)),
444
+
445
+ :tIDENTIFIER, "f", :expr_cmdarg, 0, 0,
446
+ :tSYMBOL, "a", :expr_end, 0, 0,
447
+ :tCOMMA, ",", :expr_beg, 0, 0,
448
+ :tLBRACK, "[", :expr_beg, 1, 0,
449
+ :tSYMBOL, "b", :expr_end, 1, 0,
450
+ :tRBRACK, "]", :expr_endarg, 0, 0,
451
+ :tLBRACE_ARG, "{", :expr_beg, 0, 1,
452
+ :tPIPE, "|", :expr_beg, 0, 1,
453
+ :tIDENTIFIER, "c", :expr_arg, 0, 1,
454
+ :tCOMMA, ",", :expr_beg, 0, 1,
455
+ :tIDENTIFIER, "d", :expr_arg, 0, 1,
456
+ :tPIPE, "|", :expr_beg, 0, 1,
457
+ :tRCURLY, "}", :expr_endarg, 0, 0)
458
+ end
459
+
460
+ def test_yylex_const_call_same_name
461
+ assert_lex("X = a { }; b { f :c }",
462
+ s(:block,
463
+ s(:cdecl, :X, s(:iter, s(:call, nil, :a), s(:args))),
464
+ s(:iter,
465
+ s(:call, nil, :b),
466
+ s(:args),
467
+ s(:call, nil, :f, s(:lit, :c)))),
468
+
469
+ :tCONSTANT, "X", :expr_cmdarg, 0, 0,
470
+ :tEQL, "=", :expr_beg, 0, 0,
471
+ :tIDENTIFIER, "a", :expr_arg, 0, 0,
472
+ :tLCURLY, "{", :expr_beg, 0, 1,
473
+ :tRCURLY, "}", :expr_endarg, 0, 0,
474
+ :tSEMI, ";", :expr_beg, 0, 0,
475
+
476
+ :tIDENTIFIER, "b", :expr_cmdarg, 0, 0,
477
+ :tLCURLY, "{", :expr_beg, 0, 1,
478
+ :tIDENTIFIER, "f", :expr_cmdarg, 0, 1, # different
479
+ :tSYMBOL, "c", :expr_end, 0, 1,
480
+ :tRCURLY, "}", :expr_endarg, 0, 0)
481
+
482
+ assert_lex("X = a { }; b { X :c }",
483
+ s(:block,
484
+ s(:cdecl, :X, s(:iter, s(:call, nil, :a), s(:args))),
485
+ s(:iter,
486
+ s(:call, nil, :b),
487
+ s(:args),
488
+ s(:call, nil, :X, s(:lit, :c)))),
489
+
490
+ :tCONSTANT, "X", :expr_cmdarg, 0, 0,
491
+ :tEQL, "=", :expr_beg, 0, 0,
492
+ :tIDENTIFIER, "a", :expr_arg, 0, 0,
493
+ :tLCURLY, "{", :expr_beg, 0, 1,
494
+ :tRCURLY, "}", :expr_endarg, 0, 0,
495
+ :tSEMI, ";", :expr_beg, 0, 0,
496
+
497
+ :tIDENTIFIER, "b", :expr_cmdarg, 0, 0,
498
+ :tLCURLY, "{", :expr_beg, 0, 1,
499
+ :tCONSTANT, "X", :expr_cmdarg, 0, 1, # same
500
+ :tSYMBOL, "c", :expr_end, 0, 1,
501
+ :tRCURLY, "}", :expr_endarg, 0, 0)
502
+ end
503
+
504
+ def test_yylex_lasgn_call_same_name
505
+ assert_lex("a = b.c :d => 1",
506
+ s(:lasgn, :a,
507
+ s(:call, s(:call, nil, :b), :c,
508
+ s(:hash, s(:lit, :d), s(:lit, 1)))),
509
+
510
+ :tIDENTIFIER, "a", :expr_cmdarg, 0, 0,
511
+ :tEQL, "=", :expr_beg, 0, 0,
512
+ :tIDENTIFIER, "b", :expr_arg, 0, 0,
513
+ :tDOT, ".", :expr_dot, 0, 0,
514
+ :tIDENTIFIER, "c", :expr_arg, 0, 0, # different
515
+ :tSYMBOL, "d", :expr_end, 0, 0,
516
+ :tASSOC, "=>", :expr_beg, 0, 0,
517
+ :tINTEGER, 1, :expr_end, 0, 0)
518
+
519
+ assert_lex("a = b.a :d => 1",
520
+ s(:lasgn, :a,
521
+ s(:call, s(:call, nil, :b), :a,
522
+ s(:hash, s(:lit, :d), s(:lit, 1)))),
523
+
524
+ :tIDENTIFIER, "a", :expr_cmdarg, 0, 0,
525
+ :tEQL, "=", :expr_beg, 0, 0,
526
+ :tIDENTIFIER, "b", :expr_arg, 0, 0,
527
+ :tDOT, ".", :expr_dot, 0, 0,
528
+ :tIDENTIFIER, "a", :expr_arg, 0, 0, # same as lvar
529
+ :tSYMBOL, "d", :expr_end, 0, 0,
530
+ :tASSOC, "=>", :expr_beg, 0, 0,
531
+ :tINTEGER, 1, :expr_end, 0, 0)
532
+ end
533
+
167
534
  def test_yylex_back_ref
168
535
  util_lex_token("[$&, $`, $', $+]",
169
536
  :tLBRACK, "[",
@@ -628,6 +995,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
628
995
  end
629
996
 
630
997
  def test_yylex_heredoc_double_interp
998
+ # TODO: convert to assert_lex
631
999
  util_lex_token("a = <<\"EOF\"\n#x a \#@a b \#$b c \#{3} \nEOF\n",
632
1000
  :tIDENTIFIER, "a",
633
1001
  :tEQL, "=",
@@ -739,10 +1107,18 @@ class TestRubyLexer < MiniTest::Unit::TestCase
739
1107
  util_lex_fname "<=>", :tCMP
740
1108
  end
741
1109
 
742
- def test_yylex_identifier_def
1110
+ def test_yylex_identifier_def__18
1111
+ setup_lexer_class Ruby18Parser
1112
+
743
1113
  util_lex_fname "identifier", :tIDENTIFIER, :expr_end
744
1114
  end
745
1115
 
1116
+ def test_yylex_identifier_def__1920
1117
+ setup_lexer_class Ruby19Parser
1118
+
1119
+ util_lex_fname "identifier", :tIDENTIFIER, :expr_endfn
1120
+ end
1121
+
746
1122
  def test_yylex_identifier_eh
747
1123
  util_lex_token("identifier?", :tFID, "identifier?")
748
1124
  end
@@ -774,10 +1150,18 @@ class TestRubyLexer < MiniTest::Unit::TestCase
774
1150
  util_lex_fname "^", :tCARET
775
1151
  end
776
1152
 
777
- def test_yylex_identifier_equals_def
1153
+ def test_yylex_identifier_equals_def__18
1154
+ setup_lexer_class Ruby18Parser
1155
+
778
1156
  util_lex_fname "identifier=", :tIDENTIFIER, :expr_end
779
1157
  end
780
1158
 
1159
+ def test_yylex_identifier_equals_def__1920
1160
+ setup_lexer_class Ruby19Parser
1161
+
1162
+ util_lex_fname "identifier=", :tIDENTIFIER, :expr_endfn
1163
+ end
1164
+
781
1165
  def test_yylex_identifier_equals_def2
782
1166
  util_lex_fname "==", :tEQ
783
1167
  end
@@ -868,25 +1252,25 @@ class TestRubyLexer < MiniTest::Unit::TestCase
868
1252
  end
869
1253
 
870
1254
  def test_yylex_question_eh_a__18
871
- @lex = RubyLexer.new 18
1255
+ setup_lexer_class Ruby18Parser
872
1256
 
873
1257
  util_lex_token "?a", :tINTEGER, 97
874
1258
  end
875
1259
 
876
1260
  def test_yylex_question_eh_a__19
877
- @lex = RubyLexer.new 19
1261
+ setup_lexer_class Ruby19Parser
878
1262
 
879
1263
  util_lex_token '?a', :tSTRING, "a"
880
1264
  end
881
1265
 
882
1266
  def test_yylex_question_eh_escape_M_escape_C__18
883
- @lex = RubyLexer.new 18
1267
+ setup_lexer_class Ruby18Parser
884
1268
 
885
1269
  util_lex_token '?\M-\C-a', :tINTEGER, 129
886
1270
  end
887
1271
 
888
1272
  def test_yylex_question_eh_escape_M_escape_C__19
889
- @lex = RubyLexer.new 19
1273
+ setup_lexer_class Ruby19Parser
890
1274
 
891
1275
  util_lex_token '?\M-\C-a', :tSTRING, "\M-\C-a"
892
1276
  end
@@ -911,6 +1295,10 @@ class TestRubyLexer < MiniTest::Unit::TestCase
911
1295
  util_bad_token "08"
912
1296
  end
913
1297
 
1298
+ def test_yylex_integer_oct_bad_range2
1299
+ util_bad_token "08"
1300
+ end
1301
+
914
1302
  def test_yylex_integer_oct_bad_underscores
915
1303
  util_bad_token "01__23"
916
1304
  end
@@ -1060,11 +1448,20 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1060
1448
  util_lex_token(" (", :tLPAREN_ARG, "(")
1061
1449
  end
1062
1450
 
1063
- def test_yylex_open_bracket_exprarg
1451
+ def test_yylex_open_bracket_exprarg__18
1452
+ setup_lexer_class Ruby18Parser
1453
+
1064
1454
  @lex.lex_state = :expr_arg
1065
1455
  util_lex_token(" (", :tLPAREN2, "(")
1066
1456
  end
1067
1457
 
1458
+ def test_yylex_open_bracket_exprarg__19
1459
+ setup_lexer_class Ruby19Parser
1460
+
1461
+ @lex.lex_state = :expr_arg
1462
+ util_lex_token(" (", :tLPAREN_ARG, "(")
1463
+ end
1464
+
1068
1465
  def test_yylex_open_curly_bracket
1069
1466
  util_lex_token("{",
1070
1467
  :tLBRACE, "{")
@@ -1166,6 +1563,13 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1166
1563
  util_lex_token "+@", :tUPLUS, "+@"
1167
1564
  end
1168
1565
 
1566
+ def test_yylex_not_unary_method
1567
+ skip "not yet"
1568
+
1569
+ @lex.lex_state = :expr_fname
1570
+ util_lex_token "!@", :tUBANG, "!@"
1571
+ end
1572
+
1169
1573
  def test_yylex_numbers
1170
1574
  util_lex_token "0b10", :tINTEGER, 2
1171
1575
  util_lex_token "0B10", :tINTEGER, 2
@@ -1211,13 +1615,13 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1211
1615
  end
1212
1616
 
1213
1617
  def test_yylex_question__18
1214
- @lex = RubyLexer.new 18
1618
+ setup_lexer_class Ruby18Parser
1215
1619
 
1216
1620
  util_lex_token "?*", :tINTEGER, 42
1217
1621
  end
1218
1622
 
1219
1623
  def test_yylex_question__19
1220
- @lex = RubyLexer.new 19
1624
+ setup_lexer_class Ruby19Parser
1221
1625
 
1222
1626
  util_lex_token "?*", :tSTRING, "*"
1223
1627
  end
@@ -1236,7 +1640,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1236
1640
  end
1237
1641
 
1238
1642
  def test_yylex_question_ws_backslashed__18
1239
- @lex = RubyLexer.new 18
1643
+ setup_lexer_class Ruby18Parser
1240
1644
 
1241
1645
  @lex.lex_state = :expr_beg
1242
1646
  util_lex_token "?\\ ", :tINTEGER, 32
@@ -1253,7 +1657,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1253
1657
  end
1254
1658
 
1255
1659
  def test_yylex_question_ws_backslashed__19
1256
- @lex = RubyLexer.new 19
1660
+ setup_lexer_class Ruby19Parser
1257
1661
 
1258
1662
  @lex.lex_state = :expr_beg
1259
1663
  util_lex_token "?\\ ", :tSTRING, " "
@@ -1617,8 +2021,17 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1617
2021
  end
1618
2022
 
1619
2023
  def test_yylex_string_double_escape_M
2024
+ chr = "\341"
2025
+ chr.force_encoding("UTF-8") if RubyLexer::RUBY19
2026
+
1620
2027
  util_lex_token('"\\M-a"',
1621
- :tSTRING, "\341")
2028
+ :tSTRING, chr)
2029
+ end
2030
+
2031
+ def test_why_does_ruby_hate_me?
2032
+ util_lex_token('"Nl%\000\000A\000\999"', # you should be ashamed
2033
+ :tSTRING,
2034
+ ["Nl%", "\x00", "\x00", "A", "\x00", "999"].join)
1622
2035
  end
1623
2036
 
1624
2037
  def test_yylex_string_double_escape_M_backslash
@@ -1684,6 +2097,11 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1684
2097
  :tSTRING, "n = ABC")
1685
2098
  end
1686
2099
 
2100
+ def test_yylex_string_double_escape_octal_fucked
2101
+ util_lex_token('"n = \\444"',
2102
+ :tSTRING, "n = $")
2103
+ end
2104
+
1687
2105
  def test_yylex_string_double_interp
1688
2106
  util_lex_token("\"blah #x a \#@a b \#$b c \#{3} # \"",
1689
2107
  :tSTRING_BEG, "\"",
@@ -1717,6 +2135,30 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1717
2135
  :tSTRING, "\000")
1718
2136
  end
1719
2137
 
2138
+ def test_yylex_string_pct_i
2139
+ util_lex_token("%i[s1 s2\ns3]",
2140
+ :tQSYMBOLS_BEG, "%i[",
2141
+ :tSTRING_CONTENT, "s1",
2142
+ :tSPACE, nil,
2143
+ :tSTRING_CONTENT, "s2",
2144
+ :tSPACE, nil,
2145
+ :tSTRING_CONTENT, "s3",
2146
+ :tSPACE, nil,
2147
+ :tSTRING_END, nil)
2148
+ end
2149
+
2150
+ def test_yylex_string_pct_I
2151
+ util_lex_token("%I[s1 s2\ns3]",
2152
+ :tSYMBOLS_BEG, "%I[",
2153
+ :tSTRING_CONTENT, "s1",
2154
+ :tSPACE, nil,
2155
+ :tSTRING_CONTENT, "s2",
2156
+ :tSPACE, nil,
2157
+ :tSTRING_CONTENT, "s3",
2158
+ :tSPACE, nil,
2159
+ :tSTRING_END, nil)
2160
+ end
2161
+
1720
2162
  def test_yylex_string_pct_Q
1721
2163
  util_lex_token("%Q[s1 s2]",
1722
2164
  :tSTRING_BEG, "%Q[",
@@ -1877,7 +2319,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1877
2319
 
1878
2320
  def test_yylex_underscore_end
1879
2321
  @lex.src = "__END__\n"
1880
- deny @lex.advance
2322
+ refute @lex.advance
1881
2323
  end
1882
2324
 
1883
2325
  def test_yylex_uplus
@@ -1943,7 +2385,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1943
2385
 
1944
2386
  def util_escape expected, input
1945
2387
  @lex.src = input
1946
- assert_equal expected, @lex.read_escape
2388
+ assert_equal expected, @lex.read_escape, input
1947
2389
  end
1948
2390
 
1949
2391
  def util_escape_bad input
@@ -1956,7 +2398,11 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1956
2398
  def util_lex_fname name, type, end_state = :expr_arg
1957
2399
  @lex.lex_state = :expr_fname # can only set via parser's defs
1958
2400
 
1959
- util_lex_token("def #{name} ", :kDEF, "def", type, name)
2401
+ assert_lex("def #{name} ",
2402
+ nil,
2403
+
2404
+ :kDEF, "def", :expr_fname, 0, 0,
2405
+ type, name, end_state, 0, 0)
1960
2406
 
1961
2407
  assert_equal end_state, @lex.lex_state
1962
2408
  end
@@ -1968,9 +2414,10 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1968
2414
  token = args.shift
1969
2415
  value = args.shift
1970
2416
  assert @lex.advance, "no more tokens"
2417
+ # assert_equal [token, value].map(&:encoding), [@lex.token, [@lex.yacc_value].flatten.first].map(&:encoding), input # TODO
1971
2418
  assert_equal [token, value], [@lex.token, [@lex.yacc_value].flatten.first], input
1972
2419
  end
1973
2420
 
1974
- deny @lex.advance, "must be empty, but had #{[@lex.token, @lex.yacc_value].inspect}"
2421
+ refute @lex.advance, "must be empty, but had #{[@lex.token, @lex.yacc_value].inspect}"
1975
2422
  end
1976
2423
  end