ruby_parser 3.1.3 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/ruby_parser.rb CHANGED
@@ -1,4 +1,4 @@
1
-
2
1
  require 'ruby18_parser'
3
2
  require 'ruby19_parser'
3
+ require 'ruby20_parser'
4
4
  require 'ruby_parser_extras'
@@ -99,7 +99,8 @@ class RPStringScanner < StringScanner
99
99
  alias :old_scan :scan
100
100
  def scan re
101
101
  s = old_scan re
102
- d :scan => [s, caller.first] if s
102
+ where = caller.first.split(/:/).first(2).join(":")
103
+ d :scan => [s, where] if s
103
104
  s
104
105
  end
105
106
  end
@@ -110,11 +111,39 @@ class RPStringScanner < StringScanner
110
111
  end
111
112
 
112
113
  module RubyParserStuff
113
- VERSION = "3.1.3" unless constants.include? "VERSION" # SIGH
114
+ VERSION = "3.2.0" unless constants.include? "VERSION" # SIGH
114
115
 
115
116
  attr_accessor :lexer, :in_def, :in_single, :file
116
117
  attr_reader :env, :comments
117
118
 
119
+ $good20 = []
120
+
121
+ %w[
122
+ ].map(&:to_i).each do |n|
123
+ $good20[n] = n
124
+ end
125
+
126
+ def debug20 n, v = nil, r = nil
127
+ raise "not yet #{n} #{v.inspect} => #{r.inspect}" unless $good20[n]
128
+ end
129
+
130
+ ruby19 = "".respond_to? :encoding
131
+
132
+ # Rhis is in sorted order of occurrence according to
133
+ # charlock_holmes against 500k files, with UTF_8 forced
134
+ # to the top.
135
+ #
136
+ # Overwrite this contstant if you need something different.
137
+ ENCODING_ORDER = [
138
+ Encoding::UTF_8, # moved to top to reflect default in 2.0
139
+ Encoding::ISO_8859_1,
140
+ Encoding::ISO_8859_2,
141
+ Encoding::ISO_8859_9,
142
+ Encoding::SHIFT_JIS,
143
+ Encoding::WINDOWS_1252,
144
+ Encoding::EUC_JP
145
+ ] if ruby19
146
+
118
147
  def syntax_error msg
119
148
  raise RubyParser::SyntaxError, msg
120
149
  end
@@ -129,7 +158,7 @@ module RubyParserStuff
129
158
  end
130
159
 
131
160
  def arg_blk_pass node1, node2 # TODO: nuke
132
- node1 = s(:arglist, node1) unless [:arglist, :array].include? node1.first
161
+ node1 = s(:arglist, node1) unless [:arglist, :call_args, :array, :args].include? node1.first
133
162
  node1 << node2 if node2
134
163
  node1
135
164
  end
@@ -146,12 +175,14 @@ module RubyParserStuff
146
175
  if sexp.size == 2 and sexp[1].sexp_type == :array then
147
176
  s(:masgn, *sexp[1][1..-1].map { |sub| clean_mlhs sub })
148
177
  else
178
+ debug20 5
149
179
  sexp
150
180
  end
151
181
  when :gasgn, :iasgn, :lasgn, :cvasgn then
152
182
  if sexp.size == 2 then
153
183
  sexp.last
154
184
  else
185
+ debug20 7
155
186
  sexp # optional value
156
187
  end
157
188
  else
@@ -182,6 +213,34 @@ module RubyParserStuff
182
213
  end
183
214
  end
184
215
 
216
+ def array_to_hash array
217
+ s(:hash, *array[1..-1])
218
+ end
219
+
220
+ def call_args args
221
+ result = s(:call_args)
222
+
223
+ args.each do |arg|
224
+ case arg
225
+ when Sexp then
226
+ case arg.sexp_type
227
+ when :array, :args, :call_args then # HACK? remove array at some point
228
+ result.concat arg[1..-1]
229
+ else
230
+ result << arg
231
+ end
232
+ when Symbol then
233
+ result << arg
234
+ when ",", nil then
235
+ # ignore
236
+ else
237
+ raise "unhandled: #{arg.inspect} in #{args.inspect}"
238
+ end
239
+ end
240
+
241
+ result
242
+ end
243
+
185
244
  def args args
186
245
  result = s(:args)
187
246
 
@@ -189,21 +248,27 @@ module RubyParserStuff
189
248
  case arg
190
249
  when Sexp then
191
250
  case arg.sexp_type
192
- when :args, :block, :array then
251
+ when :args, :block, :array, :call_args then # HACK call_args mismatch
193
252
  result.concat arg[1..-1]
194
253
  when :block_arg then
195
254
  result << :"&#{arg.last}"
196
- when :masgn then
255
+ when :shadow then
256
+ if Sexp === result.last and result.last.sexp_type == :shadow then
257
+ result.last << arg.last
258
+ else
259
+ result << arg
260
+ end
261
+ when :masgn, :block_pass, :hash then # HACK: remove. prolly call_args
197
262
  result << arg
198
263
  else
199
- raise "unhandled: #{arg.inspect}"
264
+ raise "unhandled: #{arg.sexp_type} in #{args.inspect}"
200
265
  end
201
266
  when Symbol then
202
267
  result << arg
203
- when ",", nil then
268
+ when ",", "|", ";", "(", ")", nil then
204
269
  # ignore
205
270
  else
206
- raise "unhandled: #{arg.inspect}"
271
+ raise "unhandled: #{arg.inspect} in #{args.inspect}"
207
272
  end
208
273
  end
209
274
 
@@ -216,7 +281,7 @@ module RubyParserStuff
216
281
  end
217
282
 
218
283
  def assignable(lhs, value = nil)
219
- id = lhs.to_sym
284
+ id = lhs.to_sym unless Sexp === lhs
220
285
  id = id.to_sym if Sexp === id
221
286
 
222
287
  raise "write a test 1" if id.to_s =~ /^(?:self|nil|true|false|__LINE__|__FILE__)$/
@@ -254,7 +319,7 @@ module RubyParserStuff
254
319
  end
255
320
  end
256
321
 
257
- self.env[id] ||= :lvar
322
+ self.env[id] ||= :lvar unless result.sexp_type == :cdecl # HACK? cdecl
258
323
 
259
324
  result << value if value
260
325
 
@@ -526,7 +591,7 @@ module RubyParserStuff
526
591
  # TODO: need a test with f(&b) { } to produce warning
527
592
 
528
593
  args ||= s(:arglist)
529
- args[0] = :arglist if args.first == :array
594
+ args[0] = :arglist if [:array, :call_args].include? args.first
530
595
  args = s(:arglist, args) unless args.first == :arglist
531
596
 
532
597
  # HACK quick hack to make this work quickly... easy to clean up above
@@ -932,18 +997,8 @@ module RubyParserStuff
932
997
  end
933
998
 
934
999
  def hack_encoding str, extra = nil
935
- # this is in sorted order of occurrence according to
936
- # charlock_holmes against 500k files
937
- encodings = [
938
- extra,
939
- Encoding::ISO_8859_1,
940
- Encoding::UTF_8,
941
- Encoding::ISO_8859_2,
942
- Encoding::ISO_8859_9,
943
- Encoding::SHIFT_JIS,
944
- Encoding::WINDOWS_1252,
945
- Encoding::EUC_JP,
946
- ].compact
1000
+ encodings = ENCODING_ORDER.dup
1001
+ encodings.unshift(extra) unless extra.nil?
947
1002
 
948
1003
  # terrible, horrible, no good, very bad, last ditch effort.
949
1004
  encodings.each do |enc|
@@ -1134,6 +1189,8 @@ module RubyParserStuff
1134
1189
  WORDLIST18 = Hash[*wordlist.map { |o| [o.name, o] }.flatten]
1135
1190
  WORDLIST19 = Hash[*wordlist.map { |o| [o.name, o] }.flatten]
1136
1191
 
1192
+ WORDLIST18.delete "__ENCODING__"
1193
+
1137
1194
  %w[and case elsif for if in module or unless until when while].each do |k|
1138
1195
  WORDLIST19[k] = WORDLIST19[k].dup
1139
1196
  WORDLIST19[k].state = :expr_value
@@ -1267,6 +1324,10 @@ module RubyParserStuff
1267
1324
  end
1268
1325
  end
1269
1326
 
1327
+ class Ruby20Parser < Racc::Parser
1328
+ include RubyParserStuff
1329
+ end
1330
+
1270
1331
  class Ruby19Parser < Racc::Parser
1271
1332
  include RubyParserStuff
1272
1333
  end
@@ -1286,12 +1347,17 @@ class RubyParser
1286
1347
  def initialize
1287
1348
  @p18 = Ruby18Parser.new
1288
1349
  @p19 = Ruby19Parser.new
1350
+ @p20 = Ruby20Parser.new
1289
1351
  end
1290
1352
 
1291
1353
  def process(s, f = "(string)", t = 10) # parens for emacs *sigh*
1292
- @p19.process s, f, t
1293
- rescue Racc::ParseError
1294
- @p18.process s, f, t
1354
+ @p20.process s, f, t
1355
+ rescue Racc::ParseError, RubyParser::SyntaxError
1356
+ begin
1357
+ @p19.process s, f, t
1358
+ rescue Racc::ParseError, RubyParser::SyntaxError
1359
+ @p18.process s, f, t
1360
+ end
1295
1361
  end
1296
1362
 
1297
1363
  alias :parse :process
@@ -1307,6 +1373,8 @@ class RubyParser
1307
1373
  Ruby18Parser.new
1308
1374
  when /^1\.9/ then
1309
1375
  Ruby19Parser.new
1376
+ when /^2.0/ then
1377
+ Ruby20Parser.new
1310
1378
  else
1311
1379
  raise "unrecognized RUBY_VERSION #{RUBY_VERSION}"
1312
1380
  end
@@ -1337,7 +1405,7 @@ class Sexp
1337
1405
  end
1338
1406
 
1339
1407
  def to_sym
1340
- raise "no"
1408
+ raise "no: #{self.inspect}.to_sym is a bug"
1341
1409
  self.value.to_sym
1342
1410
  end
1343
1411
 
@@ -2,30 +2,49 @@
2
2
  # encoding: US-ASCII
3
3
 
4
4
  require 'rubygems'
5
- gem "minitest"
6
-
7
5
  require 'minitest/autorun'
8
6
  require 'ruby_lexer'
9
7
  require 'ruby18_parser'
8
+ require 'ruby20_parser'
9
+
10
+ class TestRubyLexer < Minitest::Test
11
+ attr_accessor :processor, :lex, :parser_class
10
12
 
11
- class TestRubyLexer < MiniTest::Unit::TestCase
12
- alias :deny :refute
13
+ alias :lexer :lex # lets me copy/paste code from parser
14
+ alias :lexer= :lex=
13
15
 
14
16
  def setup
15
- setup_lexer Ruby18Parser
17
+ setup_lexer_class Ruby20Parser
16
18
  end
17
19
 
18
- def setup_lexer parser_class
19
- p = parser_class.new
20
- @lex = p.lexer
21
- @lex.src = "blah blah"
22
- @lex.lex_state = :expr_beg
20
+ def setup_lexer input, exp_sexp = nil
21
+ setup_new_parser
22
+ lex.src = input
23
+ lex.lex_state = :expr_beg
24
+ assert_equal exp_sexp, processor.class.new.parse(input) if exp_sexp
25
+ end
26
+
27
+ def setup_new_parser
28
+ self.processor = parser_class.new
29
+ self.lex = processor.lexer
30
+ end
31
+
32
+ def setup_lexer_class parser_class
33
+ self.parser_class = parser_class
34
+ setup_new_parser
35
+ setup_lexer "blah blah"
23
36
  end
24
37
 
25
38
  def test_advance
26
39
  assert @lex.advance # blah
27
40
  assert @lex.advance # blah
28
- deny @lex.advance # nada
41
+ refute @lex.advance # nada
42
+ end
43
+
44
+ def test_unicode_ident
45
+ s = "@\u1088\u1077\u1093\u1072"
46
+ util_lex_token(s.dup,
47
+ :tIVAR, s.dup)
29
48
  end
30
49
 
31
50
  def test_read_escape
@@ -44,6 +63,11 @@ class TestRubyLexer < MiniTest::Unit::TestCase
44
63
  util_escape "\010", "b"
45
64
  util_escape " ", "s"
46
65
  util_escape "q", "q" # plain vanilla escape
66
+
67
+ util_escape "8", "8" # ugh... mri... WHY?!?
68
+ util_escape "9", "9" # ugh... mri... WHY?!?
69
+
70
+ util_escape "$", "444" # ugh
47
71
  end
48
72
 
49
73
  def test_read_escape_c
@@ -133,6 +157,8 @@ class TestRubyLexer < MiniTest::Unit::TestCase
133
157
  end
134
158
 
135
159
  def test_yylex_label__18
160
+ setup_lexer_class Ruby18Parser
161
+
136
162
  util_lex_token("{a:",
137
163
  :tLBRACE, "{",
138
164
  :tIDENTIFIER, "a",
@@ -140,6 +166,8 @@ class TestRubyLexer < MiniTest::Unit::TestCase
140
166
  end
141
167
 
142
168
  def test_yylex_label_in_params__18
169
+ setup_lexer_class Ruby18Parser
170
+
143
171
  util_lex_token("foo(a:",
144
172
  :tIDENTIFIER, "foo",
145
173
  :tLPAREN2, "(",
@@ -148,7 +176,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
148
176
  end
149
177
 
150
178
  def test_yylex_label__19
151
- setup_lexer Ruby19Parser
179
+ setup_lexer_class Ruby19Parser
152
180
 
153
181
  util_lex_token("{a:",
154
182
  :tLBRACE, "{",
@@ -156,7 +184,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
156
184
  end
157
185
 
158
186
  def test_yylex_label_in_params__19
159
- setup_lexer Ruby19Parser
187
+ setup_lexer_class Ruby19Parser
160
188
 
161
189
  util_lex_token("foo(a:",
162
190
  :tIDENTIFIER, "foo",
@@ -164,6 +192,345 @@ class TestRubyLexer < MiniTest::Unit::TestCase
164
192
  :tLABEL, "a")
165
193
  end
166
194
 
195
+ def assert_next_lexeme token=nil, value=nil, state=nil, paren=nil, brace=nil
196
+ assert @lex.advance, "no more tokens"
197
+
198
+ msg = message {
199
+ act = [@lex.token, @lex.yacc_value, @lex.lex_state,
200
+ @lex.paren_nest, @lex.brace_nest]
201
+ exp = [token, value, state, paren, brace]
202
+ "#{exp.inspect} vs #{act.inspect}"
203
+ }
204
+
205
+ act_value = @lex.yacc_value
206
+ act_value = act_value.first if Array === act_value
207
+
208
+ assert_equal token, @lex.token, msg
209
+ assert_equal value, act_value, msg
210
+ assert_equal state, @lex.lex_state, msg
211
+ assert_equal paren, @lex.paren_nest, msg if paren
212
+ assert_equal brace, @lex.brace_nest, msg if brace
213
+ end
214
+
215
+ def refute_lexeme
216
+ refute @lex.advance, "not empty: #{[@lex.token, @lex.yacc_value].inspect}"
217
+ end
218
+
219
+ def assert_lex input, exp_sexp, *args
220
+ setup_lexer input, exp_sexp
221
+
222
+ args.each_slice(5) do |token, value, state, paren, brace|
223
+ assert_next_lexeme token, value, state, paren, brace
224
+ end
225
+
226
+ refute_lexeme
227
+ end
228
+
229
+ def emulate_string_interpolation
230
+ lex_strterm = lexer.lex_strterm
231
+ string_nest = lexer.string_nest
232
+ brace_nest = lexer.brace_nest
233
+
234
+ lexer.string_nest = 0
235
+ lexer.brace_nest = 0
236
+ lexer.cond.push false
237
+ lexer.cmdarg.push false
238
+
239
+ lexer.lex_strterm = nil
240
+ lexer.lex_state = :expr_beg
241
+
242
+ yield
243
+
244
+ lexer.lex_state = :expr_endarg
245
+ assert_next_lexeme :tRCURLY, "}", :expr_endarg, 0
246
+
247
+ lexer.lex_strterm = lex_strterm
248
+ lexer.lex_state = :expr_beg
249
+ lexer.string_nest = string_nest
250
+ lexer.brace_nest = brace_nest
251
+
252
+ lexer.cond.lexpop
253
+ lexer.cmdarg.lexpop
254
+ end
255
+
256
+ def test_yylex_paren_string_parens_interpolated
257
+ setup_lexer('%((#{b}#{d}))',
258
+ s(:dstr,
259
+ "(",
260
+ s(:evstr, s(:call, nil, :b)),
261
+ s(:evstr, s(:call, nil, :d)),
262
+ s(:str, ")")))
263
+
264
+ assert_next_lexeme :tSTRING_BEG, "%)", :expr_beg, 0, 0
265
+ assert_next_lexeme :tSTRING_CONTENT, "(", :expr_beg, 0, 0
266
+ assert_next_lexeme :tSTRING_DBEG, nil, :expr_beg, 0, 0
267
+
268
+ emulate_string_interpolation do
269
+ assert_next_lexeme :tIDENTIFIER, "b", :expr_arg, 0, 0
270
+ end
271
+
272
+ assert_next_lexeme :tSTRING_DBEG, nil, :expr_beg, 0, 0
273
+
274
+ emulate_string_interpolation do
275
+ assert_next_lexeme :tIDENTIFIER, "d", :expr_arg, 0, 0
276
+ end
277
+
278
+ assert_next_lexeme :tSTRING_CONTENT, ")", :expr_beg, 0, 0
279
+ assert_next_lexeme :tSTRING_END, ")", :expr_end, 0, 0
280
+
281
+ refute_lexeme
282
+ end
283
+
284
+ def test_yylex_paren_string_interpolated_regexp
285
+ setup_lexer('%( #{(/abcd/)} )',
286
+ s(:dstr, " ", s(:evstr, s(:lit, /abcd/)), s(:str, " ")))
287
+
288
+ assert_next_lexeme :tSTRING_BEG, "%)", :expr_beg, 0, 0
289
+ assert_next_lexeme :tSTRING_CONTENT, " ", :expr_beg, 0, 0
290
+ assert_next_lexeme :tSTRING_DBEG, nil, :expr_beg, 0, 0
291
+
292
+ emulate_string_interpolation do
293
+ assert_next_lexeme :tLPAREN, "(", :expr_beg, 1, 0
294
+ assert_next_lexeme :tREGEXP_BEG, "/", :expr_beg, 1, 0
295
+ assert_next_lexeme :tSTRING_CONTENT, "abcd", :expr_beg, 1, 0
296
+ assert_next_lexeme :tREGEXP_END, "", :expr_end, 1, 0
297
+ assert_next_lexeme :tRPAREN, ")", :expr_endfn, 0, 0
298
+ end
299
+
300
+ assert_next_lexeme :tSTRING_CONTENT, " ", :expr_beg, 0, 0
301
+ assert_next_lexeme :tSTRING_END, ")", :expr_end, 0, 0
302
+
303
+ refute_lexeme
304
+ end
305
+
306
+ def test_yylex_not_at_defn
307
+ assert_lex("def +@; end",
308
+ s(:defn, :+@, s(:args), s(:nil)),
309
+
310
+ :kDEF, "def", :expr_fname, 0, 0,
311
+ :tUPLUS, "+@", :expr_arg, 0, 0,
312
+ :tSEMI, ";", :expr_beg, 0, 0,
313
+ :kEND, "end", :expr_end, 0, 0)
314
+
315
+ assert_lex("def !@; end",
316
+ s(:defn, :"!@", s(:args), s(:nil)),
317
+
318
+ :kDEF, "def", :expr_fname, 0, 0,
319
+ :tUBANG, "!@", :expr_arg, 0, 0,
320
+ :tSEMI, ";", :expr_beg, 0, 0,
321
+ :kEND, "end", :expr_end, 0, 0)
322
+ end
323
+
324
+ def test_yylex_not_at_ivar
325
+ assert_lex("!@ivar",
326
+ s(:call, s(:ivar, :@ivar), :"!"),
327
+
328
+ :tBANG, "!", :expr_beg, 0, 0,
329
+ :tIVAR, "@ivar", :expr_end, 0, 0)
330
+ end
331
+
332
+ def test_yylex_number_times_ident_times_return_number
333
+ assert_lex("1 * b * 3",
334
+ s(:call,
335
+ s(:call, s(:lit, 1), :*, s(:call, nil, :b)),
336
+ :*, s(:lit, 3)),
337
+
338
+ :tINTEGER, 1, :expr_end, 0, 0,
339
+ :tSTAR2, "*", :expr_beg, 0, 0,
340
+ :tIDENTIFIER, "b", :expr_arg, 0, 0,
341
+ :tSTAR2, "*", :expr_beg, 0, 0,
342
+ :tINTEGER, 3, :expr_end, 0, 0)
343
+
344
+ assert_lex("1 * b *\n 3",
345
+ s(:call,
346
+ s(:call, s(:lit, 1), :*, s(:call, nil, :b)),
347
+ :*, s(:lit, 3)),
348
+
349
+ :tINTEGER, 1, :expr_end, 0, 0,
350
+ :tSTAR2, "*", :expr_beg, 0, 0,
351
+ :tIDENTIFIER, "b", :expr_arg, 0, 0,
352
+ :tSTAR2, "*", :expr_beg, 0, 0,
353
+ :tINTEGER, 3, :expr_end, 0, 0)
354
+ end
355
+
356
+ def test_yylex_paren_string_parens_interpolated_regexp
357
+ setup_lexer('%((#{(/abcd/)}))',
358
+ s(:dstr, "(", s(:evstr, s(:lit, /abcd/)), s(:str, ")")))
359
+
360
+ assert_next_lexeme :tSTRING_BEG, "%)", :expr_beg, 0, 0
361
+ assert_next_lexeme :tSTRING_CONTENT, "(", :expr_beg, 0, 0
362
+
363
+ assert_next_lexeme :tSTRING_DBEG, nil, :expr_beg, 0, 0
364
+
365
+ emulate_string_interpolation do
366
+ assert_next_lexeme :tLPAREN, "(", :expr_beg, 1, 0
367
+ assert_next_lexeme :tREGEXP_BEG, "/", :expr_beg, 1, 0
368
+ assert_next_lexeme :tSTRING_CONTENT, "abcd", :expr_beg, 1, 0
369
+ assert_next_lexeme :tREGEXP_END, "", :expr_end, 1, 0
370
+ assert_next_lexeme :tRPAREN, ")", :expr_endfn, 0, 0
371
+ end
372
+
373
+ assert_next_lexeme :tSTRING_CONTENT, ")", :expr_beg, 0, 0
374
+ assert_next_lexeme :tSTRING_END, ")", :expr_end, 0, 0
375
+
376
+ refute_lexeme
377
+ end
378
+
379
+ def test_yylex_method_parens_chevron
380
+ assert_lex("a()<<1",
381
+ s(:call, s(:call, nil, :a), :<<, s(:lit, 1)),
382
+ :tIDENTIFIER, "a", :expr_cmdarg, 0, 0,
383
+ :tLPAREN2, "(", :expr_beg, 1, 0,
384
+ :tRPAREN, ")", :expr_endfn, 0, 0,
385
+ :tLSHFT, "<<" , :expr_beg, 0, 0,
386
+ :tINTEGER, 1, :expr_end, 0, 0)
387
+ end
388
+
389
+ def test_yylex_lambda_args__20
390
+ setup_lexer_class Ruby20Parser
391
+
392
+ assert_lex("-> (a) { }",
393
+ s(:iter, s(:call, nil, :lambda),
394
+ s(:args, :a)),
395
+
396
+ :tLAMBDA, nil, :expr_endfn, 0, 0,
397
+ :tLPAREN2, "(", :expr_beg, 1, 0,
398
+ :tIDENTIFIER, "a", :expr_arg, 1, 0,
399
+ :tRPAREN, ")", :expr_endfn, 0, 0,
400
+ :tLCURLY, "{", :expr_beg, 0, 1,
401
+ :tRCURLY, "}", :expr_endarg, 0, 0)
402
+ end
403
+
404
+ def test_yylex_lambda_args_opt__20
405
+ setup_lexer_class Ruby20Parser
406
+
407
+ assert_lex("-> (a=nil) { }",
408
+ s(:iter, s(:call, nil, :lambda),
409
+ s(:args, s(:lasgn, :a, s(:nil)))),
410
+
411
+ :tLAMBDA, nil, :expr_endfn, 0, 0,
412
+ :tLPAREN2, "(", :expr_beg, 1, 0,
413
+ :tIDENTIFIER, "a", :expr_arg, 1, 0,
414
+ :tEQL, "=", :expr_beg, 1, 0,
415
+ :kNIL, "nil", :expr_end, 1, 0,
416
+ :tRPAREN, ")", :expr_endfn, 0, 0,
417
+ :tLCURLY, "{", :expr_beg, 0, 1,
418
+ :tRCURLY, "}", :expr_endarg, 0, 0)
419
+ end
420
+
421
+ def test_yylex_lambda_hash__20
422
+ setup_lexer_class Ruby20Parser
423
+
424
+ assert_lex("-> (a={}) { }",
425
+ s(:iter, s(:call, nil, :lambda),
426
+ s(:args, s(:lasgn, :a, s(:hash)))),
427
+
428
+ :tLAMBDA, nil, :expr_endfn, 0, 0,
429
+ :tLPAREN2, "(", :expr_beg, 1, 0,
430
+ :tIDENTIFIER, "a", :expr_arg, 1, 0,
431
+ :tEQL, "=", :expr_beg, 1, 0,
432
+ :tLBRACE, "{", :expr_beg, 1, 1,
433
+ :tRCURLY, "}", :expr_endarg, 1, 0,
434
+ :tRPAREN, ")", :expr_endfn, 0, 0,
435
+ :tLCURLY, "{", :expr_beg, 0, 1,
436
+ :tRCURLY, "}", :expr_endarg, 0, 0)
437
+ end
438
+
439
+ def test_yylex_iter_array_curly
440
+ assert_lex("f :a, [:b] { |c, d| }", # yes, this is bad code
441
+ s(:iter,
442
+ s(:call, nil, :f, s(:lit, :a), s(:array, s(:lit, :b))),
443
+ s(:args, :c, :d)),
444
+
445
+ :tIDENTIFIER, "f", :expr_cmdarg, 0, 0,
446
+ :tSYMBOL, "a", :expr_end, 0, 0,
447
+ :tCOMMA, ",", :expr_beg, 0, 0,
448
+ :tLBRACK, "[", :expr_beg, 1, 0,
449
+ :tSYMBOL, "b", :expr_end, 1, 0,
450
+ :tRBRACK, "]", :expr_endarg, 0, 0,
451
+ :tLBRACE_ARG, "{", :expr_beg, 0, 1,
452
+ :tPIPE, "|", :expr_beg, 0, 1,
453
+ :tIDENTIFIER, "c", :expr_arg, 0, 1,
454
+ :tCOMMA, ",", :expr_beg, 0, 1,
455
+ :tIDENTIFIER, "d", :expr_arg, 0, 1,
456
+ :tPIPE, "|", :expr_beg, 0, 1,
457
+ :tRCURLY, "}", :expr_endarg, 0, 0)
458
+ end
459
+
460
+ def test_yylex_const_call_same_name
461
+ assert_lex("X = a { }; b { f :c }",
462
+ s(:block,
463
+ s(:cdecl, :X, s(:iter, s(:call, nil, :a), s(:args))),
464
+ s(:iter,
465
+ s(:call, nil, :b),
466
+ s(:args),
467
+ s(:call, nil, :f, s(:lit, :c)))),
468
+
469
+ :tCONSTANT, "X", :expr_cmdarg, 0, 0,
470
+ :tEQL, "=", :expr_beg, 0, 0,
471
+ :tIDENTIFIER, "a", :expr_arg, 0, 0,
472
+ :tLCURLY, "{", :expr_beg, 0, 1,
473
+ :tRCURLY, "}", :expr_endarg, 0, 0,
474
+ :tSEMI, ";", :expr_beg, 0, 0,
475
+
476
+ :tIDENTIFIER, "b", :expr_cmdarg, 0, 0,
477
+ :tLCURLY, "{", :expr_beg, 0, 1,
478
+ :tIDENTIFIER, "f", :expr_cmdarg, 0, 1, # different
479
+ :tSYMBOL, "c", :expr_end, 0, 1,
480
+ :tRCURLY, "}", :expr_endarg, 0, 0)
481
+
482
+ assert_lex("X = a { }; b { X :c }",
483
+ s(:block,
484
+ s(:cdecl, :X, s(:iter, s(:call, nil, :a), s(:args))),
485
+ s(:iter,
486
+ s(:call, nil, :b),
487
+ s(:args),
488
+ s(:call, nil, :X, s(:lit, :c)))),
489
+
490
+ :tCONSTANT, "X", :expr_cmdarg, 0, 0,
491
+ :tEQL, "=", :expr_beg, 0, 0,
492
+ :tIDENTIFIER, "a", :expr_arg, 0, 0,
493
+ :tLCURLY, "{", :expr_beg, 0, 1,
494
+ :tRCURLY, "}", :expr_endarg, 0, 0,
495
+ :tSEMI, ";", :expr_beg, 0, 0,
496
+
497
+ :tIDENTIFIER, "b", :expr_cmdarg, 0, 0,
498
+ :tLCURLY, "{", :expr_beg, 0, 1,
499
+ :tCONSTANT, "X", :expr_cmdarg, 0, 1, # same
500
+ :tSYMBOL, "c", :expr_end, 0, 1,
501
+ :tRCURLY, "}", :expr_endarg, 0, 0)
502
+ end
503
+
504
+ def test_yylex_lasgn_call_same_name
505
+ assert_lex("a = b.c :d => 1",
506
+ s(:lasgn, :a,
507
+ s(:call, s(:call, nil, :b), :c,
508
+ s(:hash, s(:lit, :d), s(:lit, 1)))),
509
+
510
+ :tIDENTIFIER, "a", :expr_cmdarg, 0, 0,
511
+ :tEQL, "=", :expr_beg, 0, 0,
512
+ :tIDENTIFIER, "b", :expr_arg, 0, 0,
513
+ :tDOT, ".", :expr_dot, 0, 0,
514
+ :tIDENTIFIER, "c", :expr_arg, 0, 0, # different
515
+ :tSYMBOL, "d", :expr_end, 0, 0,
516
+ :tASSOC, "=>", :expr_beg, 0, 0,
517
+ :tINTEGER, 1, :expr_end, 0, 0)
518
+
519
+ assert_lex("a = b.a :d => 1",
520
+ s(:lasgn, :a,
521
+ s(:call, s(:call, nil, :b), :a,
522
+ s(:hash, s(:lit, :d), s(:lit, 1)))),
523
+
524
+ :tIDENTIFIER, "a", :expr_cmdarg, 0, 0,
525
+ :tEQL, "=", :expr_beg, 0, 0,
526
+ :tIDENTIFIER, "b", :expr_arg, 0, 0,
527
+ :tDOT, ".", :expr_dot, 0, 0,
528
+ :tIDENTIFIER, "a", :expr_arg, 0, 0, # same as lvar
529
+ :tSYMBOL, "d", :expr_end, 0, 0,
530
+ :tASSOC, "=>", :expr_beg, 0, 0,
531
+ :tINTEGER, 1, :expr_end, 0, 0)
532
+ end
533
+
167
534
  def test_yylex_back_ref
168
535
  util_lex_token("[$&, $`, $', $+]",
169
536
  :tLBRACK, "[",
@@ -628,6 +995,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
628
995
  end
629
996
 
630
997
  def test_yylex_heredoc_double_interp
998
+ # TODO: convert to assert_lex
631
999
  util_lex_token("a = <<\"EOF\"\n#x a \#@a b \#$b c \#{3} \nEOF\n",
632
1000
  :tIDENTIFIER, "a",
633
1001
  :tEQL, "=",
@@ -739,10 +1107,18 @@ class TestRubyLexer < MiniTest::Unit::TestCase
739
1107
  util_lex_fname "<=>", :tCMP
740
1108
  end
741
1109
 
742
- def test_yylex_identifier_def
1110
+ def test_yylex_identifier_def__18
1111
+ setup_lexer_class Ruby18Parser
1112
+
743
1113
  util_lex_fname "identifier", :tIDENTIFIER, :expr_end
744
1114
  end
745
1115
 
1116
+ def test_yylex_identifier_def__1920
1117
+ setup_lexer_class Ruby19Parser
1118
+
1119
+ util_lex_fname "identifier", :tIDENTIFIER, :expr_endfn
1120
+ end
1121
+
746
1122
  def test_yylex_identifier_eh
747
1123
  util_lex_token("identifier?", :tFID, "identifier?")
748
1124
  end
@@ -774,10 +1150,18 @@ class TestRubyLexer < MiniTest::Unit::TestCase
774
1150
  util_lex_fname "^", :tCARET
775
1151
  end
776
1152
 
777
- def test_yylex_identifier_equals_def
1153
+ def test_yylex_identifier_equals_def__18
1154
+ setup_lexer_class Ruby18Parser
1155
+
778
1156
  util_lex_fname "identifier=", :tIDENTIFIER, :expr_end
779
1157
  end
780
1158
 
1159
+ def test_yylex_identifier_equals_def__1920
1160
+ setup_lexer_class Ruby19Parser
1161
+
1162
+ util_lex_fname "identifier=", :tIDENTIFIER, :expr_endfn
1163
+ end
1164
+
781
1165
  def test_yylex_identifier_equals_def2
782
1166
  util_lex_fname "==", :tEQ
783
1167
  end
@@ -868,25 +1252,25 @@ class TestRubyLexer < MiniTest::Unit::TestCase
868
1252
  end
869
1253
 
870
1254
  def test_yylex_question_eh_a__18
871
- @lex = RubyLexer.new 18
1255
+ setup_lexer_class Ruby18Parser
872
1256
 
873
1257
  util_lex_token "?a", :tINTEGER, 97
874
1258
  end
875
1259
 
876
1260
  def test_yylex_question_eh_a__19
877
- @lex = RubyLexer.new 19
1261
+ setup_lexer_class Ruby19Parser
878
1262
 
879
1263
  util_lex_token '?a', :tSTRING, "a"
880
1264
  end
881
1265
 
882
1266
  def test_yylex_question_eh_escape_M_escape_C__18
883
- @lex = RubyLexer.new 18
1267
+ setup_lexer_class Ruby18Parser
884
1268
 
885
1269
  util_lex_token '?\M-\C-a', :tINTEGER, 129
886
1270
  end
887
1271
 
888
1272
  def test_yylex_question_eh_escape_M_escape_C__19
889
- @lex = RubyLexer.new 19
1273
+ setup_lexer_class Ruby19Parser
890
1274
 
891
1275
  util_lex_token '?\M-\C-a', :tSTRING, "\M-\C-a"
892
1276
  end
@@ -911,6 +1295,10 @@ class TestRubyLexer < MiniTest::Unit::TestCase
911
1295
  util_bad_token "08"
912
1296
  end
913
1297
 
1298
+ def test_yylex_integer_oct_bad_range2
1299
+ util_bad_token "08"
1300
+ end
1301
+
914
1302
  def test_yylex_integer_oct_bad_underscores
915
1303
  util_bad_token "01__23"
916
1304
  end
@@ -1060,11 +1448,20 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1060
1448
  util_lex_token(" (", :tLPAREN_ARG, "(")
1061
1449
  end
1062
1450
 
1063
- def test_yylex_open_bracket_exprarg
1451
+ def test_yylex_open_bracket_exprarg__18
1452
+ setup_lexer_class Ruby18Parser
1453
+
1064
1454
  @lex.lex_state = :expr_arg
1065
1455
  util_lex_token(" (", :tLPAREN2, "(")
1066
1456
  end
1067
1457
 
1458
+ def test_yylex_open_bracket_exprarg__19
1459
+ setup_lexer_class Ruby19Parser
1460
+
1461
+ @lex.lex_state = :expr_arg
1462
+ util_lex_token(" (", :tLPAREN_ARG, "(")
1463
+ end
1464
+
1068
1465
  def test_yylex_open_curly_bracket
1069
1466
  util_lex_token("{",
1070
1467
  :tLBRACE, "{")
@@ -1166,6 +1563,13 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1166
1563
  util_lex_token "+@", :tUPLUS, "+@"
1167
1564
  end
1168
1565
 
1566
+ def test_yylex_not_unary_method
1567
+ skip "not yet"
1568
+
1569
+ @lex.lex_state = :expr_fname
1570
+ util_lex_token "!@", :tUBANG, "!@"
1571
+ end
1572
+
1169
1573
  def test_yylex_numbers
1170
1574
  util_lex_token "0b10", :tINTEGER, 2
1171
1575
  util_lex_token "0B10", :tINTEGER, 2
@@ -1211,13 +1615,13 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1211
1615
  end
1212
1616
 
1213
1617
  def test_yylex_question__18
1214
- @lex = RubyLexer.new 18
1618
+ setup_lexer_class Ruby18Parser
1215
1619
 
1216
1620
  util_lex_token "?*", :tINTEGER, 42
1217
1621
  end
1218
1622
 
1219
1623
  def test_yylex_question__19
1220
- @lex = RubyLexer.new 19
1624
+ setup_lexer_class Ruby19Parser
1221
1625
 
1222
1626
  util_lex_token "?*", :tSTRING, "*"
1223
1627
  end
@@ -1236,7 +1640,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1236
1640
  end
1237
1641
 
1238
1642
  def test_yylex_question_ws_backslashed__18
1239
- @lex = RubyLexer.new 18
1643
+ setup_lexer_class Ruby18Parser
1240
1644
 
1241
1645
  @lex.lex_state = :expr_beg
1242
1646
  util_lex_token "?\\ ", :tINTEGER, 32
@@ -1253,7 +1657,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1253
1657
  end
1254
1658
 
1255
1659
  def test_yylex_question_ws_backslashed__19
1256
- @lex = RubyLexer.new 19
1660
+ setup_lexer_class Ruby19Parser
1257
1661
 
1258
1662
  @lex.lex_state = :expr_beg
1259
1663
  util_lex_token "?\\ ", :tSTRING, " "
@@ -1617,8 +2021,17 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1617
2021
  end
1618
2022
 
1619
2023
  def test_yylex_string_double_escape_M
2024
+ chr = "\341"
2025
+ chr.force_encoding("UTF-8") if RubyLexer::RUBY19
2026
+
1620
2027
  util_lex_token('"\\M-a"',
1621
- :tSTRING, "\341")
2028
+ :tSTRING, chr)
2029
+ end
2030
+
2031
+ def test_why_does_ruby_hate_me?
2032
+ util_lex_token('"Nl%\000\000A\000\999"', # you should be ashamed
2033
+ :tSTRING,
2034
+ ["Nl%", "\x00", "\x00", "A", "\x00", "999"].join)
1622
2035
  end
1623
2036
 
1624
2037
  def test_yylex_string_double_escape_M_backslash
@@ -1684,6 +2097,11 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1684
2097
  :tSTRING, "n = ABC")
1685
2098
  end
1686
2099
 
2100
+ def test_yylex_string_double_escape_octal_fucked
2101
+ util_lex_token('"n = \\444"',
2102
+ :tSTRING, "n = $")
2103
+ end
2104
+
1687
2105
  def test_yylex_string_double_interp
1688
2106
  util_lex_token("\"blah #x a \#@a b \#$b c \#{3} # \"",
1689
2107
  :tSTRING_BEG, "\"",
@@ -1717,6 +2135,30 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1717
2135
  :tSTRING, "\000")
1718
2136
  end
1719
2137
 
2138
+ def test_yylex_string_pct_i
2139
+ util_lex_token("%i[s1 s2\ns3]",
2140
+ :tQSYMBOLS_BEG, "%i[",
2141
+ :tSTRING_CONTENT, "s1",
2142
+ :tSPACE, nil,
2143
+ :tSTRING_CONTENT, "s2",
2144
+ :tSPACE, nil,
2145
+ :tSTRING_CONTENT, "s3",
2146
+ :tSPACE, nil,
2147
+ :tSTRING_END, nil)
2148
+ end
2149
+
2150
+ def test_yylex_string_pct_I
2151
+ util_lex_token("%I[s1 s2\ns3]",
2152
+ :tSYMBOLS_BEG, "%I[",
2153
+ :tSTRING_CONTENT, "s1",
2154
+ :tSPACE, nil,
2155
+ :tSTRING_CONTENT, "s2",
2156
+ :tSPACE, nil,
2157
+ :tSTRING_CONTENT, "s3",
2158
+ :tSPACE, nil,
2159
+ :tSTRING_END, nil)
2160
+ end
2161
+
1720
2162
  def test_yylex_string_pct_Q
1721
2163
  util_lex_token("%Q[s1 s2]",
1722
2164
  :tSTRING_BEG, "%Q[",
@@ -1877,7 +2319,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1877
2319
 
1878
2320
  def test_yylex_underscore_end
1879
2321
  @lex.src = "__END__\n"
1880
- deny @lex.advance
2322
+ refute @lex.advance
1881
2323
  end
1882
2324
 
1883
2325
  def test_yylex_uplus
@@ -1943,7 +2385,7 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1943
2385
 
1944
2386
  def util_escape expected, input
1945
2387
  @lex.src = input
1946
- assert_equal expected, @lex.read_escape
2388
+ assert_equal expected, @lex.read_escape, input
1947
2389
  end
1948
2390
 
1949
2391
  def util_escape_bad input
@@ -1956,7 +2398,11 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1956
2398
  def util_lex_fname name, type, end_state = :expr_arg
1957
2399
  @lex.lex_state = :expr_fname # can only set via parser's defs
1958
2400
 
1959
- util_lex_token("def #{name} ", :kDEF, "def", type, name)
2401
+ assert_lex("def #{name} ",
2402
+ nil,
2403
+
2404
+ :kDEF, "def", :expr_fname, 0, 0,
2405
+ type, name, end_state, 0, 0)
1960
2406
 
1961
2407
  assert_equal end_state, @lex.lex_state
1962
2408
  end
@@ -1968,9 +2414,10 @@ class TestRubyLexer < MiniTest::Unit::TestCase
1968
2414
  token = args.shift
1969
2415
  value = args.shift
1970
2416
  assert @lex.advance, "no more tokens"
2417
+ # assert_equal [token, value].map(&:encoding), [@lex.token, [@lex.yacc_value].flatten.first].map(&:encoding), input # TODO
1971
2418
  assert_equal [token, value], [@lex.token, [@lex.yacc_value].flatten.first], input
1972
2419
  end
1973
2420
 
1974
- deny @lex.advance, "must be empty, but had #{[@lex.token, @lex.yacc_value].inspect}"
2421
+ refute @lex.advance, "must be empty, but had #{[@lex.token, @lex.yacc_value].inspect}"
1975
2422
  end
1976
2423
  end