rogue_parser 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1766 @@
1
+ #!/usr/local/bin/ruby
2
+
3
+ require 'rubygems'
4
+
5
+ require 'test/unit'
6
+ require 'ruby_lexer'
7
+
8
+ class TestRubyLexer < Test::Unit::TestCase
9
+ def deny cond, msg = nil
10
+ assert ! cond, msg
11
+ end
12
+
13
+ def setup
14
+ @lex = RubyLexer.new
15
+ @lex.src = "blah blah"
16
+ @lex.lex_state = :expr_beg
17
+ end
18
+
19
+ def test_advance
20
+ assert @lex.advance # blah
21
+ assert @lex.advance # blah
22
+ deny @lex.advance # nada
23
+ end
24
+
25
+ def test_read_escape
26
+ util_escape "\\", '\\'
27
+ util_escape "\n", 'n'
28
+ util_escape "\t", 't'
29
+ util_escape "\r", 'r'
30
+ util_escape "\f", 'f'
31
+ util_escape "\13", 'v'
32
+ util_escape "\0", '0'
33
+ util_escape "\07", 'a'
34
+ util_escape "\007", 'a'
35
+ util_escape "\033", 'e'
36
+ util_escape "\377", '377'
37
+ util_escape "\377", 'xff'
38
+ util_escape "\010", 'b'
39
+ util_escape " ", 's'
40
+ util_escape "q", 'q' # plain vanilla escape
41
+ end
42
+
43
+ def test_read_escape_c
44
+ util_escape "\030", "C-x"
45
+ util_escape "\030", "cx"
46
+ util_escape "\230", 'C-\M-x'
47
+ util_escape "\230", 'c\M-x'
48
+
49
+ util_escape "\177", "C-?"
50
+ util_escape "\177", "c?"
51
+ end
52
+
53
+ def test_read_escape_errors
54
+ util_escape_bad ""
55
+
56
+ util_escape_bad "M"
57
+ util_escape_bad "M-"
58
+ util_escape_bad "Mx"
59
+
60
+ util_escape_bad "Cx"
61
+ util_escape_bad "C"
62
+ util_escape_bad "C-"
63
+
64
+ util_escape_bad "c"
65
+ end
66
+
67
+ def test_read_escape_m
68
+ util_escape "\370", "M-x"
69
+ util_escape "\230", 'M-\C-x'
70
+ util_escape "\230", 'M-\cx'
71
+ end
72
+
73
+ def test_yylex_ambiguous_uminus
74
+ util_lex_token("m -3",
75
+ :tIDENTIFIER, s("m"),
76
+ :tUMINUS_NUM, s("-"),
77
+ :tINTEGER, 3)
78
+ # TODO: verify warning
79
+ end
80
+
81
+ def test_yylex_ambiguous_uplus
82
+ util_lex_token("m +3",
83
+ :tIDENTIFIER, s("m"),
84
+ :tINTEGER, 3)
85
+ # TODO: verify warning
86
+ end
87
+
88
+ def test_yylex_and
89
+ util_lex_token "&", :tAMPER, s("&")
90
+ end
91
+
92
+ def test_yylex_and2
93
+ util_lex_token "&&", :tANDOP, s("&&")
94
+ end
95
+
96
+ def test_yylex_and2_equals
97
+ util_lex_token "&&=", :tOP_ASGN, s("&&")
98
+ end
99
+
100
+ def test_yylex_and_arg
101
+ @lex.lex_state = :expr_arg
102
+
103
+ util_lex_token(" &y",
104
+ :tAMPER, s("&"),
105
+ :tIDENTIFIER, s("y"))
106
+ end
107
+
108
+ def test_yylex_and_equals
109
+ util_lex_token "&=", :tOP_ASGN, s("&")
110
+ end
111
+
112
+ def test_yylex_and_expr
113
+ @lex.lex_state = :expr_arg
114
+
115
+ util_lex_token("x & y",
116
+ :tIDENTIFIER, s("x"),
117
+ :tAMPER2, s("&"),
118
+ :tIDENTIFIER, s("y"))
119
+ end
120
+
121
+ def test_yylex_and_meth
122
+ util_lex_fname "&", :tAMPER2
123
+ end
124
+
125
+ def test_yylex_assoc
126
+ util_lex_token "=>", :tASSOC, s("=>")
127
+ end
128
+
129
+ def test_yylex_at
130
+ util_lex_token " @ ", "@", s("@")
131
+ end
132
+
133
+ def test_yylex_back_ref
134
+ util_lex_token("[$&, $`, $', $+]",
135
+ :tLBRACK, s("["),
136
+ :tBACK_REF, s(:back_ref, :"&"), ",", s(","),
137
+ :tBACK_REF, s(:back_ref, :"`"), ",", s(","),
138
+ :tBACK_REF, s(:back_ref, :"'"), ",", s(","),
139
+ :tBACK_REF, s(:back_ref, :"+"),
140
+ :tRBRACK, s("]"))
141
+ end
142
+
143
+ def test_yylex_backslash
144
+ util_lex_token("1 \\\n+ 2",
145
+ :tINTEGER, 1,
146
+ :tPLUS, s("+"),
147
+ :tINTEGER, 2)
148
+ end
149
+
150
+ def test_yylex_backslash_bad
151
+ util_bad_token("1 \\ + 2",
152
+ :tINTEGER, 1)
153
+ end
154
+
155
+ def test_yylex_backtick
156
+ util_lex_token("`ls`",
157
+ :tXSTRING_BEG, s("`"),
158
+ :tSTRING_CONTENT, s(:str, "ls"),
159
+ :tSTRING_END, s("`"))
160
+ end
161
+
162
+ def test_yylex_backtick_cmdarg
163
+ @lex.lex_state = :expr_dot
164
+ util_lex_token("\n`", :tBACK_REF2, s("`")) # \n ensures expr_cmd
165
+
166
+ assert_equal :expr_cmdarg, @lex.lex_state
167
+ end
168
+
169
+ def test_yylex_backtick_dot
170
+ @lex.lex_state = :expr_dot
171
+ util_lex_token("a.`(3)",
172
+ :tIDENTIFIER, s("a"),
173
+ :tDOT, s("."),
174
+ :tBACK_REF2, s("`"),
175
+ :tLPAREN2, s("("),
176
+ :tINTEGER, 3,
177
+ :tRPAREN, s(")"))
178
+ end
179
+
180
+ def test_yylex_backtick_method
181
+ @lex.lex_state = :expr_fname
182
+ util_lex_token("`", :tBACK_REF2, s("`"))
183
+ assert_equal :expr_end, @lex.lex_state
184
+ end
185
+
186
+ def test_yylex_bad_char
187
+ util_bad_token(" \010 ")
188
+ end
189
+
190
+ def test_yylex_bang
191
+ util_lex_token "!", :tBANG, s("!")
192
+ end
193
+
194
+ def test_yylex_bang_equals
195
+ util_lex_token "!=", :tNEQ, s("!=")
196
+ end
197
+
198
+ def test_yylex_bang_tilde
199
+ util_lex_token "!~", :tNMATCH, s("!~")
200
+ end
201
+
202
+ def test_yylex_carat
203
+ util_lex_token "^", :tCARET, s("^")
204
+ end
205
+
206
+ def test_yylex_carat_equals
207
+ util_lex_token "^=", :tOP_ASGN, s("^")
208
+ end
209
+
210
+ def test_yylex_colon2
211
+ util_lex_token("A::B",
212
+ :tCONSTANT, s("A"),
213
+ :tCOLON2, s("::"),
214
+ :tCONSTANT, s("B"))
215
+ end
216
+
217
+ def test_yylex_colon3
218
+ util_lex_token("::Array",
219
+ :tCOLON3, s("::"),
220
+ :tCONSTANT, s("Array"))
221
+ end
222
+
223
+ def test_yylex_comma
224
+ util_lex_token ",", ",", s(",") # FIX
225
+ end
226
+
227
+ def test_yylex_comment
228
+ util_lex_token("1 # one\n# two\n2",
229
+ :tINTEGER, 1,
230
+ "\n", nil,
231
+ :tINTEGER, 2)
232
+ assert_equal "# one\n# two\n", @lex.comments
233
+ end
234
+
235
+ def test_yylex_comment_begin
236
+ util_lex_token("=begin\nblah\nblah\n=end\n42",
237
+ :tINTEGER, 42)
238
+ assert_equal "=begin\nblah\nblah\n=end\n", @lex.comments
239
+ end
240
+
241
+ def test_yylex_comment_begin_bad
242
+ util_bad_token("=begin\nblah\nblah\n")
243
+ assert_equal '', @lex.comments
244
+ end
245
+
246
+ def test_yylex_comment_begin_not_comment
247
+ util_lex_token("beginfoo = 5\np x \\\n=beginfoo",
248
+ :tIDENTIFIER, s("beginfoo"),
249
+ '=', s('='),
250
+ :tINTEGER, 5,
251
+ "\n", nil,
252
+ :tIDENTIFIER, s("p"),
253
+ :tIDENTIFIER, s("x"),
254
+ '=', s('='),
255
+ :tIDENTIFIER, s("beginfoo"))
256
+ end
257
+
258
+ def test_yylex_comment_begin_space
259
+ util_lex_token("=begin blah\nblah\n=end\n")
260
+ assert_equal "=begin blah\nblah\n=end\n", @lex.comments
261
+ end
262
+
263
+ def test_yylex_comment_eos
264
+ util_lex_token("# comment")
265
+ end
266
+
267
+ def test_yylex_constant
268
+ util_lex_token("ArgumentError",
269
+ :tCONSTANT, s("ArgumentError"))
270
+ end
271
+
272
+ def test_yylex_constant_semi
273
+ util_lex_token("ArgumentError;",
274
+ :tCONSTANT, s("ArgumentError"),
275
+ ";", s(";"))
276
+ end
277
+
278
+ def test_yylex_cvar
279
+ util_lex_token "@@blah", :tCVAR, s("@@blah")
280
+ end
281
+
282
+ def test_yylex_cvar_bad
283
+ assert_raises SyntaxError do
284
+ util_lex_token "@@1"
285
+ end
286
+ end
287
+
288
+ def test_yylex_def_bad_name
289
+ @lex.lex_state = :expr_fname
290
+ util_bad_token("def [ ", :kDEF, s("def"))
291
+ end
292
+
293
+ def test_yylex_div
294
+ util_lex_token("a / 2",
295
+ :tIDENTIFIER, s("a"),
296
+ :tDIVIDE, s("/"),
297
+ :tINTEGER, 2)
298
+ end
299
+
300
+ def test_yylex_div_equals
301
+ util_lex_token("a /= 2",
302
+ :tIDENTIFIER, s("a"),
303
+ :tOP_ASGN, s("/"),
304
+ :tINTEGER, 2)
305
+ end
306
+
307
+ def test_yylex_do
308
+ util_lex_token("x do 42 end",
309
+ :tIDENTIFIER, s("x"),
310
+ :kDO, s("do"),
311
+ :tINTEGER, 42,
312
+ :kEND, s("end"))
313
+ end
314
+
315
+ def test_yylex_do_block
316
+ @lex.lex_state = :expr_endarg
317
+ @lex.cmdarg.push true
318
+
319
+ util_lex_token("x.y do 42 end",
320
+ :tIDENTIFIER, s("x"),
321
+ :tDOT, s("."),
322
+ :tIDENTIFIER, s("y"),
323
+ :kDO_BLOCK, s("do"),
324
+ :tINTEGER, 42,
325
+ :kEND, s("end"))
326
+ end
327
+
328
+ def test_yylex_do_block2
329
+ @lex.lex_state = :expr_endarg
330
+
331
+ util_lex_token("do 42 end",
332
+ :kDO_BLOCK, s("do"),
333
+ :tINTEGER, 42,
334
+ :kEND, s("end"))
335
+ end
336
+
337
+ def test_yylex_do_cond
338
+ @lex.cond.push true
339
+
340
+ util_lex_token("x do 42 end",
341
+ :tIDENTIFIER, s("x"),
342
+ :kDO_COND, s("do"),
343
+ :tINTEGER, 42,
344
+ :kEND, s("end"))
345
+ end
346
+
347
+ def test_yylex_dollar
348
+ util_lex_token('$', '$', s('$')) # FIX: wtf is this?!?
349
+ end
350
+
351
+ def test_yylex_dot # HINT message sends
352
+ util_lex_token ".", :tDOT, s(".")
353
+ end
354
+
355
+ def test_yylex_dot2
356
+ util_lex_token "..", :tDOT2, s("..")
357
+ end
358
+
359
+ def test_yylex_dot3
360
+ util_lex_token "...", :tDOT3, s("...")
361
+ end
362
+
363
+ def test_yylex_equals
364
+ util_lex_token "=", '=', s("=") # FIX: this sucks
365
+ end
366
+
367
+ def test_yylex_equals2
368
+ util_lex_token "==", :tEQ, s("==")
369
+ end
370
+
371
+ def test_yylex_equals3
372
+ util_lex_token "===", :tEQQ, s("===")
373
+ end
374
+
375
+ def test_yylex_equals_tilde
376
+ util_lex_token "=~", :tMATCH, s("=~")
377
+ end
378
+
379
+ def test_yylex_float
380
+ util_lex_token "1.0", :tFLOAT, 1.0
381
+ end
382
+
383
+ def test_yylex_float_bad_no_underscores
384
+ util_bad_token "1__0.0"
385
+ end
386
+
387
+ def test_yylex_float_bad_no_zero_leading
388
+ util_bad_token ".0"
389
+ end
390
+
391
+ def test_yylex_float_bad_trailing_underscore
392
+ util_bad_token "123_.0"
393
+ end
394
+
395
+ def test_yylex_float_call
396
+ util_lex_token("1.0.to_s",
397
+ :tFLOAT, 1.0,
398
+ :tDOT, s('.'),
399
+ :tIDENTIFIER, s('to_s'))
400
+ end
401
+
402
+ def test_yylex_float_dot_E
403
+ util_lex_token "1.0E10", :tFLOAT, 1.0e10
404
+ end
405
+
406
+ def test_yylex_float_dot_E_neg
407
+ util_lex_token("-1.0E10",
408
+ :tUMINUS_NUM, s("-"),
409
+ :tFLOAT, 1.0e10)
410
+ end
411
+
412
+ def test_yylex_float_dot_e
413
+ util_lex_token "1.0e10", :tFLOAT, 1.0e10
414
+ end
415
+
416
+ def test_yylex_float_dot_e_neg
417
+ util_lex_token("-1.0e10",
418
+ :tUMINUS_NUM, s("-"),
419
+ :tFLOAT, 1.0e10)
420
+ end
421
+
422
+ def test_yylex_float_e
423
+ util_lex_token "1e10", :tFLOAT, 1e10
424
+ end
425
+
426
+ def test_yylex_float_e_bad_double_e
427
+ util_bad_token "1e2e3"
428
+ end
429
+
430
+ def test_yylex_float_e_bad_trailing_underscore
431
+ util_bad_token "123_e10"
432
+ end
433
+
434
+ def test_yylex_float_e_minus
435
+ util_lex_token "1e-10", :tFLOAT, 1e-10
436
+ end
437
+
438
+ def test_yylex_float_e_neg
439
+ util_lex_token("-1e10",
440
+ :tUMINUS_NUM, s("-"),
441
+ :tFLOAT, 1e10)
442
+ end
443
+
444
+ def test_yylex_float_e_neg_minus
445
+ util_lex_token("-1e-10",
446
+ :tUMINUS_NUM, s("-"),
447
+ :tFLOAT, 1e-10)
448
+ end
449
+
450
+ def test_yylex_float_e_neg_plus
451
+ util_lex_token("-1e+10",
452
+ :tUMINUS_NUM, s("-"),
453
+ :tFLOAT, 1e10)
454
+ end
455
+
456
+ def test_yylex_float_e_plus
457
+ util_lex_token "1e+10", :tFLOAT, 1e10
458
+ end
459
+
460
+ def test_yylex_float_e_zero
461
+ util_lex_token "0e0", :tFLOAT, 0e0
462
+ end
463
+
464
+ def test_yylex_float_neg
465
+ util_lex_token("-1.0",
466
+ :tUMINUS_NUM, s("-"),
467
+ :tFLOAT, 1.0)
468
+ end
469
+
470
+ def test_yylex_ge
471
+ util_lex_token("a >= 2",
472
+ :tIDENTIFIER, s("a"),
473
+ :tGEQ, s(">="),
474
+ :tINTEGER, 2)
475
+ end
476
+
477
+ def test_yylex_global
478
+ util_lex_token("$blah", :tGVAR, s("$blah"))
479
+ end
480
+
481
+ def test_yylex_global_backref
482
+ @lex.lex_state = :expr_fname
483
+ util_lex_token("$`", :tGVAR, s("$`"))
484
+ end
485
+
486
+ def test_yylex_global_dash_nothing
487
+ util_lex_token('$- ', :tGVAR, s("$-"))
488
+ end
489
+
490
+ def test_yylex_global_dash_something
491
+ util_lex_token('$-x', :tGVAR, s("$-x"))
492
+ end
493
+
494
+ def test_yylex_global_number
495
+ @lex.lex_state = :expr_fname
496
+ util_lex_token("$1", :tGVAR, s("$1"))
497
+ end
498
+
499
+ def test_yylex_global_number_big
500
+ @lex.lex_state = :expr_fname
501
+ util_lex_token("$1234", :tGVAR, s("$1234"))
502
+ end
503
+
504
+ def test_yylex_global_other
505
+ util_lex_token('[$~, $*, $$, $?, $!, $@, $/, $\, $;, $,, $., $=, $:, $<, $>, $"]',
506
+ :tLBRACK, s("["),
507
+ :tGVAR, s("$~"), ",", s(","),
508
+ :tGVAR, s("$*"), ",", s(","),
509
+ :tGVAR, s("$$"), ",", s(","),
510
+ :tGVAR, s("$?"), ",", s(","),
511
+ :tGVAR, s("$!"), ",", s(","),
512
+ :tGVAR, s("$@"), ",", s(","),
513
+ :tGVAR, s("$/"), ",", s(","),
514
+ :tGVAR, s("$\\"), ",", s(","),
515
+ :tGVAR, s("$;"), ",", s(","),
516
+ :tGVAR, s("$,"), ",", s(","),
517
+ :tGVAR, s("$."), ",", s(","),
518
+ :tGVAR, s("$="), ",", s(","),
519
+ :tGVAR, s("$:"), ",", s(","),
520
+ :tGVAR, s("$<"), ",", s(","),
521
+ :tGVAR, s("$>"), ",", s(","),
522
+ :tGVAR, s("$\""),
523
+ :tRBRACK, s("]"))
524
+ end
525
+
526
+ def test_yylex_global_underscore
527
+ util_lex_token("$_",
528
+ :tGVAR, s("$_"))
529
+ end
530
+
531
+ def test_yylex_global_wierd
532
+ util_lex_token("$__blah",
533
+ :tGVAR, s("$__blah"))
534
+ end
535
+
536
+ def test_yylex_global_zero
537
+ util_lex_token('$0', :tGVAR, s("$0"))
538
+ end
539
+
540
+ def test_yylex_gt
541
+ util_lex_token("a > 2",
542
+ :tIDENTIFIER, s("a"),
543
+ :tGT, s(">"),
544
+ :tINTEGER, 2)
545
+ end
546
+
547
+ def test_yylex_heredoc_backtick
548
+ util_lex_token("a = <<`EOF`\n blah blah\nEOF\n",
549
+ :tIDENTIFIER, s("a"),
550
+ "=", s("="),
551
+ :tXSTRING_BEG, s("`"),
552
+ :tSTRING_CONTENT, s(:str, " blah blah\n"),
553
+ :tSTRING_END, s("EOF"),
554
+ "\n", nil)
555
+ end
556
+
557
+ def test_yylex_heredoc_double
558
+ util_lex_token("a = <<\"EOF\"\n blah blah\nEOF\n",
559
+ :tIDENTIFIER, s("a"),
560
+ "=", s("="),
561
+ :tSTRING_BEG, s("\""),
562
+ :tSTRING_CONTENT, s(:str, " blah blah\n"),
563
+ :tSTRING_END, s("EOF"),
564
+ "\n", nil)
565
+ end
566
+
567
+ def test_yylex_heredoc_double_dash
568
+ util_lex_token("a = <<-\"EOF\"\n blah blah\n EOF\n",
569
+ :tIDENTIFIER, s("a"),
570
+ "=", s("="),
571
+ :tSTRING_BEG, s("\""),
572
+ :tSTRING_CONTENT, s(:str, " blah blah\n"),
573
+ :tSTRING_END, s("EOF"),
574
+ "\n", nil)
575
+ end
576
+
577
+ def test_yylex_heredoc_double_eos
578
+ util_bad_token("a = <<\"EOF\"\nblah",
579
+ :tIDENTIFIER, s("a"),
580
+ "=", s("="),
581
+ :tSTRING_BEG, s("\""))
582
+ end
583
+
584
+ def test_yylex_heredoc_double_eos_nl
585
+ util_bad_token("a = <<\"EOF\"\nblah\n",
586
+ :tIDENTIFIER, s("a"),
587
+ "=", s("="),
588
+ :tSTRING_BEG, s("\""))
589
+ end
590
+
591
+ def test_yylex_heredoc_double_interp
592
+ util_lex_token("a = <<\"EOF\"\n#x a \#@a b \#$b c \#{3} \nEOF\n",
593
+ :tIDENTIFIER, s("a"),
594
+ "=", s("="),
595
+ :tSTRING_BEG, s("\""),
596
+ :tSTRING_CONTENT, s(:str, "#x a "),
597
+ :tSTRING_DVAR, s("\#@"),
598
+ :tSTRING_CONTENT, s(:str, "@a b "), # HUH?
599
+ :tSTRING_DVAR, s("\#$"),
600
+ :tSTRING_CONTENT, s(:str, "$b c "), # HUH?
601
+ :tSTRING_DBEG, s("\#{"),
602
+ :tSTRING_CONTENT, s(:str, "3} \n"), # HUH?
603
+ :tSTRING_END, s("EOF"),
604
+ "\n", nil)
605
+ end
606
+
607
+ def test_yylex_heredoc_none
608
+ util_lex_token("a = <<EOF\nblah\nblah\nEOF",
609
+ :tIDENTIFIER, s("a"),
610
+ "=", s("="),
611
+ :tSTRING_BEG, s("\""),
612
+ :tSTRING_CONTENT, s(:str, "blah\nblah\n"),
613
+ :tSTRING_CONTENT, s(:str, ""),
614
+ :tSTRING_END, s("EOF"),
615
+ "\n", nil)
616
+ end
617
+
618
+ def test_yylex_heredoc_none_bad_eos
619
+ util_bad_token("a = <<EOF",
620
+ :tIDENTIFIER, s("a"),
621
+ "=", s("="),
622
+ :tSTRING_BEG, s("\""))
623
+ end
624
+
625
+ def test_yylex_heredoc_none_dash
626
+ util_lex_token("a = <<-EOF\nblah\nblah\n EOF",
627
+ :tIDENTIFIER, s("a"),
628
+ "=", s("="),
629
+ :tSTRING_BEG, s("\""),
630
+ :tSTRING_CONTENT, s(:str, "blah\nblah\n"),
631
+ :tSTRING_CONTENT, s(:str, ""),
632
+ :tSTRING_END, s("EOF"),
633
+ "\n", nil)
634
+ end
635
+
636
+ def test_yylex_heredoc_single
637
+ util_lex_token("a = <<'EOF'\n blah blah\nEOF\n",
638
+ :tIDENTIFIER, s("a"),
639
+ "=", s("="),
640
+ :tSTRING_BEG, s("\""),
641
+ :tSTRING_CONTENT, s(:str, " blah blah\n"),
642
+ :tSTRING_END, s("EOF"),
643
+ "\n", nil)
644
+ end
645
+
646
+ def test_yylex_heredoc_single_bad_eos_body
647
+ util_bad_token("a = <<'EOF'\nblah",
648
+ :tIDENTIFIER, s("a"),
649
+ "=", s("="),
650
+ :tSTRING_BEG, s("\""))
651
+ end
652
+
653
+ def test_yylex_heredoc_single_bad_eos_empty
654
+ util_bad_token("a = <<''\n",
655
+ :tIDENTIFIER, s("a"),
656
+ "=", s("="),
657
+ :tSTRING_BEG, s("\""))
658
+ end
659
+
660
+ def test_yylex_heredoc_single_bad_eos_term
661
+ util_bad_token("a = <<'EOF",
662
+ :tIDENTIFIER, s("a"),
663
+ "=", s("="),
664
+ :tSTRING_BEG, s("\""))
665
+ end
666
+
667
+ def test_yylex_heredoc_single_bad_eos_term_nl
668
+ util_bad_token("a = <<'EOF\ns = 'blah blah'",
669
+ :tIDENTIFIER, s("a"),
670
+ "=", s("="),
671
+ :tSTRING_BEG, s("\""))
672
+ end
673
+
674
+ def test_yylex_heredoc_single_dash
675
+ util_lex_token("a = <<-'EOF'\n blah blah\n EOF\n",
676
+ :tIDENTIFIER, s("a"),
677
+ "=", s("="),
678
+ :tSTRING_BEG, s("\""),
679
+ :tSTRING_CONTENT, s(:str, " blah blah\n"),
680
+ :tSTRING_END, s("EOF"),
681
+ "\n", nil)
682
+ end
683
+
684
+ def test_yylex_identifier
685
+ util_lex_token("identifier", :tIDENTIFIER, s("identifier"))
686
+ end
687
+
688
+ def test_yylex_identifier_bang
689
+ util_lex_token("identifier!", :tFID, s("identifier!"))
690
+ end
691
+
692
+ def test_yylex_identifier_cmp
693
+ util_lex_fname "<=>", :tCMP
694
+ end
695
+
696
+ def test_yylex_identifier_def
697
+ util_lex_fname "identifier", :tIDENTIFIER, :expr_end
698
+ end
699
+
700
+ def test_yylex_identifier_eh
701
+ util_lex_token("identifier?", :tFID, s("identifier?"))
702
+ end
703
+
704
+ def test_yylex_identifier_equals_arrow
705
+ @lex.lex_state = :expr_fname
706
+ util_lex_token(":blah==>",
707
+ :tSYMBEG, s(":"),
708
+ :tIDENTIFIER, s("blah="),
709
+ :tASSOC, s("=>"))
710
+ end
711
+
712
+ def test_yylex_identifier_equals_caret
713
+ util_lex_fname "^", :tCARET
714
+ end
715
+
716
+ def test_yylex_identifier_equals_def
717
+ util_lex_fname "identifier=", :tIDENTIFIER, :expr_end
718
+ end
719
+
720
+ def test_yylex_identifier_equals_def2
721
+ util_lex_fname "==", :tEQ
722
+ end
723
+
724
+ def test_yylex_identifier_equals_expr
725
+ @lex.lex_state = :expr_dot
726
+ util_lex_token("y = arg",
727
+ :tIDENTIFIER, s("y"),
728
+ "=", s("="),
729
+ :tIDENTIFIER, s("arg"))
730
+
731
+ assert_equal :expr_arg, @lex.lex_state
732
+ end
733
+
734
+ def test_yylex_identifier_equals_or
735
+ util_lex_fname "|", :tPIPE
736
+ end
737
+
738
+ def test_yylex_identifier_equals_slash
739
+ util_lex_fname "/", :tDIVIDE
740
+ end
741
+
742
+ def test_yylex_identifier_equals_tilde
743
+ @lex.lex_state = :expr_fname # can only set via parser's defs
744
+ util_lex_token("identifier=~",
745
+ :tIDENTIFIER, s("identifier"),
746
+ :tMATCH, s("=~"))
747
+ end
748
+
749
+ def test_yylex_identifier_gt
750
+ util_lex_fname ">", :tGT
751
+ end
752
+
753
+ def test_yylex_identifier_le
754
+ util_lex_fname "<=", :tLEQ
755
+ end
756
+
757
+ def test_yylex_identifier_lt
758
+ util_lex_fname "<", :tLT
759
+ end
760
+
761
+ def test_yylex_identifier_tilde
762
+ util_lex_fname "~", :tTILDE
763
+ end
764
+
765
+ def test_yylex_index
766
+ util_lex_fname "[]", :tAREF
767
+ end
768
+
769
+ def test_yylex_index_equals
770
+ util_lex_fname "[]=", :tASET
771
+ end
772
+
773
+ def test_yylex_integer
774
+ util_lex_token "42", :tINTEGER, 42
775
+ end
776
+
777
+ def test_yylex_integer_bin
778
+ util_lex_token "0b101010", :tINTEGER, 42
779
+ end
780
+
781
+ def test_yylex_integer_bin_bad_none
782
+ util_bad_token "0b "
783
+ end
784
+
785
+ def test_yylex_integer_bin_bad_underscores
786
+ util_bad_token "0b10__01"
787
+ end
788
+
789
+ def test_yylex_integer_dec
790
+ util_lex_token "42", :tINTEGER, 42
791
+ end
792
+
793
+ def test_yylex_integer_dec_bad_underscores
794
+ util_bad_token "42__24"
795
+ end
796
+
797
+ def test_yylex_integer_dec_d
798
+ util_lex_token "0d42", :tINTEGER, 42
799
+ end
800
+
801
+ def test_yylex_integer_dec_d_bad_none
802
+ util_bad_token "0d"
803
+ end
804
+
805
+ def test_yylex_integer_dec_d_bad_underscores
806
+ util_bad_token "0d42__24"
807
+ end
808
+
809
+ def test_yylex_integer_eh_a
810
+ util_lex_token '?a', :tINTEGER, 97
811
+ end
812
+
813
+ def test_yylex_integer_eh_escape_M_escape_C
814
+ util_lex_token '?\M-\C-a', :tINTEGER, 129
815
+ end
816
+
817
+ def test_yylex_integer_hex
818
+ util_lex_token "0x2a", :tINTEGER, 42
819
+ end
820
+
821
+ def test_yylex_integer_hex_bad_none
822
+ util_bad_token "0x "
823
+ end
824
+
825
+ def test_yylex_integer_hex_bad_underscores
826
+ util_bad_token "0xab__cd"
827
+ end
828
+
829
+ def test_yylex_integer_oct
830
+ util_lex_token "052", :tINTEGER, 42
831
+ end
832
+
833
+ def test_yylex_integer_oct_bad_range
834
+ util_bad_token "08"
835
+ end
836
+
837
+ def test_yylex_integer_oct_bad_underscores
838
+ util_bad_token "01__23"
839
+ end
840
+
841
+ def test_yylex_integer_oct_o
842
+ util_lex_token "0o52", :tINTEGER, 42
843
+ end
844
+
845
+ def test_yylex_integer_oct_o_bad_range
846
+ util_bad_token "0o8"
847
+ end
848
+
849
+ def test_yylex_integer_oct_o_bad_underscores
850
+ util_bad_token "0o1__23"
851
+ end
852
+
853
+ def test_yylex_integer_oct_o_not_bad_none
854
+ util_lex_token "0o ", :tINTEGER, 0
855
+ end
856
+
857
+ def test_yylex_integer_trailing
858
+ util_lex_token("1.to_s",
859
+ :tINTEGER, 1,
860
+ :tDOT, s('.'),
861
+ :tIDENTIFIER, s('to_s'))
862
+ end
863
+
864
+ def test_yylex_integer_underscore
865
+ util_lex_token "4_2", :tINTEGER, 42
866
+ end
867
+
868
+ def test_yylex_integer_underscore_bad
869
+ util_bad_token "4__2"
870
+ end
871
+
872
+ def test_yylex_integer_zero
873
+ util_lex_token "0", :tINTEGER, 0
874
+ end
875
+
876
+ def test_yylex_ivar
877
+ util_lex_token "@blah", :tIVAR, s("@blah")
878
+ end
879
+
880
+ def test_yylex_ivar_bad
881
+ assert_raises SyntaxError do
882
+ util_lex_token "@1"
883
+ end
884
+ end
885
+
886
+ def test_yylex_keyword_expr
887
+ @lex.lex_state = :expr_endarg
888
+
889
+ util_lex_token("if", :kIF_MOD, s("if"))
890
+
891
+ assert_equal :expr_beg, @lex.lex_state
892
+ end
893
+
894
+ def test_yylex_lt
895
+ util_lex_token "<", :tLT, s("<")
896
+ end
897
+
898
+ def test_yylex_lt2
899
+ util_lex_token("a <\< b",
900
+ :tIDENTIFIER, s("a"),
901
+ :tLSHFT, s("<\<"),
902
+ :tIDENTIFIER, s("b"))
903
+
904
+ end
905
+
906
+ def test_yylex_lt2_equals
907
+ util_lex_token("a <\<= b",
908
+ :tIDENTIFIER, s("a"),
909
+ :tOP_ASGN, s("<\<"),
910
+ :tIDENTIFIER, s("b"))
911
+ end
912
+
913
+ def test_yylex_lt_equals
914
+ util_lex_token "<=", :tLEQ, s("<=")
915
+ end
916
+
917
+ def test_yylex_minus
918
+ util_lex_token("1 - 2",
919
+ :tINTEGER, 1,
920
+ :tMINUS, s("-"),
921
+ :tINTEGER, 2)
922
+ end
923
+
924
+ def test_yylex_minus_equals
925
+ util_lex_token "-=", :tOP_ASGN, s("-")
926
+ end
927
+
928
+ def test_yylex_minus_method
929
+ @lex.lex_state = :expr_fname
930
+ util_lex_token "-", :tMINUS, s("-")
931
+ end
932
+
933
+ def test_yylex_minus_unary_method
934
+ @lex.lex_state = :expr_fname
935
+ util_lex_token "-@", :tUMINUS, s("-@")
936
+ end
937
+
938
+ def test_yylex_minus_unary_number
939
+ util_lex_token("-42",
940
+ :tUMINUS_NUM, s("-"),
941
+ :tINTEGER, 42)
942
+ end
943
+
944
+ def test_yylex_nth_ref
945
+ util_lex_token('[$1, $2, $3, $4, $5, $6, $7, $8, $9]',
946
+ :tLBRACK, s("["),
947
+ :tNTH_REF, s(:nth_ref, 1), ",", s(","),
948
+ :tNTH_REF, s(:nth_ref, 2), ",", s(","),
949
+ :tNTH_REF, s(:nth_ref, 3), ",", s(","),
950
+ :tNTH_REF, s(:nth_ref, 4), ",", s(","),
951
+ :tNTH_REF, s(:nth_ref, 5), ",", s(","),
952
+ :tNTH_REF, s(:nth_ref, 6), ",", s(","),
953
+ :tNTH_REF, s(:nth_ref, 7), ",", s(","),
954
+ :tNTH_REF, s(:nth_ref, 8), ",", s(","),
955
+ :tNTH_REF, s(:nth_ref, 9),
956
+ :tRBRACK, s("]"))
957
+ end
958
+
959
+ def test_yylex_open_bracket
960
+ util_lex_token("(", :tLPAREN, s("("))
961
+ end
962
+
963
+ def test_yylex_open_bracket_cmdarg
964
+ @lex.lex_state = :expr_cmdarg
965
+ util_lex_token(" (", :tLPAREN_ARG, s("("))
966
+ end
967
+
968
+ def test_yylex_open_bracket_exprarg
969
+ @lex.lex_state = :expr_arg
970
+ util_lex_token(" (", :tLPAREN2, s("("))
971
+ end
972
+
973
+ def test_yylex_open_curly_bracket
974
+ util_lex_token("{",
975
+ :tLBRACE, s("{"))
976
+ end
977
+
978
+ def test_yylex_open_curly_bracket_arg
979
+ @lex.lex_state = :expr_arg
980
+ util_lex_token("m { 3 }",
981
+ :tIDENTIFIER, s("m"),
982
+ :tLCURLY, s("{"),
983
+ :tINTEGER, 3,
984
+ :tRCURLY, s("}"))
985
+ end
986
+
987
+ def test_yylex_open_curly_bracket_block
988
+ @lex.lex_state = :expr_endarg # seen m(3)
989
+ util_lex_token("{ 4 }",
990
+ :tLBRACE_ARG, s("{"),
991
+ :tINTEGER, 4,
992
+ :tRCURLY, s("}"))
993
+ end
994
+
995
+ def test_yylex_open_square_bracket_arg
996
+ @lex.lex_state = :expr_arg
997
+ util_lex_token("m [ 3 ]",
998
+ :tIDENTIFIER, s("m"),
999
+ :tLBRACK, s("["),
1000
+ :tINTEGER, 3,
1001
+ :tRBRACK, s("]"))
1002
+ end
1003
+
1004
+ def test_yylex_open_square_bracket_ary
1005
+ util_lex_token("[1, 2, 3]",
1006
+ :tLBRACK, s("["),
1007
+ :tINTEGER, 1,
1008
+ ",", s(","),
1009
+ :tINTEGER, 2,
1010
+ ",", s(","),
1011
+ :tINTEGER, 3,
1012
+ :tRBRACK, s("]"))
1013
+ end
1014
+
1015
+ def test_yylex_open_square_bracket_meth
1016
+ util_lex_token("m[3]",
1017
+ :tIDENTIFIER, s("m"),
1018
+ "[", s("["),
1019
+ :tINTEGER, 3,
1020
+ :tRBRACK, s("]"))
1021
+ end
1022
+
1023
+ def test_yylex_or
1024
+ util_lex_token "|", :tPIPE, s("|")
1025
+ end
1026
+
1027
+ def test_yylex_or2
1028
+ util_lex_token "||", :tOROP, s("||")
1029
+ end
1030
+
1031
+ def test_yylex_or2_equals
1032
+ util_lex_token "||=", :tOP_ASGN, s("||")
1033
+ end
1034
+
1035
+ def test_yylex_or_equals
1036
+ util_lex_token "|=", :tOP_ASGN, s("|")
1037
+ end
1038
+
1039
+ def test_yylex_percent
1040
+ util_lex_token("a % 2",
1041
+ :tIDENTIFIER, s("a"),
1042
+ :tPERCENT, s("%"),
1043
+ :tINTEGER, 2)
1044
+ end
1045
+
1046
+ def test_yylex_percent_equals
1047
+ util_lex_token("a %= 2",
1048
+ :tIDENTIFIER, s("a"),
1049
+ :tOP_ASGN, s("%"),
1050
+ :tINTEGER, 2)
1051
+ end
1052
+
1053
+ def test_yylex_plus
1054
+ util_lex_token("1 + 1", # TODO lex_state?
1055
+ :tINTEGER, 1,
1056
+ :tPLUS, s("+"),
1057
+ :tINTEGER, 1)
1058
+ end
1059
+
1060
+ def test_yylex_plus_equals
1061
+ util_lex_token "+=", :tOP_ASGN, s("+")
1062
+ end
1063
+
1064
+ def test_yylex_plus_method
1065
+ @lex.lex_state = :expr_fname
1066
+ util_lex_token "+", :tPLUS, s("+")
1067
+ end
1068
+
1069
+ def test_yylex_plus_unary_method
1070
+ @lex.lex_state = :expr_fname
1071
+ util_lex_token "+@", :tUPLUS, s("+@")
1072
+ end
1073
+
1074
+ def test_yylex_plus_unary_number
1075
+ util_lex_token("+42",
1076
+ :tINTEGER, 42)
1077
+ end
1078
+
1079
+ def test_yylex_question
1080
+ util_lex_token "?*", :tINTEGER, 42
1081
+ end
1082
+
1083
+ def test_yylex_question_bad_eos
1084
+ util_bad_token "?"
1085
+ end
1086
+
1087
+ def test_yylex_question_ws
1088
+ util_lex_token "? ", "?", s("?")
1089
+ util_lex_token "?\n", "?", s("?")
1090
+ util_lex_token "?\t", "?", s("?")
1091
+ util_lex_token "?\v", "?", s("?")
1092
+ util_lex_token "?\r", "?", s("?")
1093
+ util_lex_token "?\f", "?", s("?")
1094
+ end
1095
+
1096
+ def test_yylex_question_ws_backslashed
1097
+ @lex.lex_state = :expr_beg
1098
+ util_lex_token "?\\ ", :tINTEGER, 32
1099
+ @lex.lex_state = :expr_beg
1100
+ util_lex_token "?\\n", :tINTEGER, 10
1101
+ @lex.lex_state = :expr_beg
1102
+ util_lex_token "?\\t", :tINTEGER, 9
1103
+ @lex.lex_state = :expr_beg
1104
+ util_lex_token "?\\v", :tINTEGER, 11
1105
+ @lex.lex_state = :expr_beg
1106
+ util_lex_token "?\\r", :tINTEGER, 13
1107
+ @lex.lex_state = :expr_beg
1108
+ util_lex_token "?\\f", :tINTEGER, 12
1109
+ end
1110
+
1111
+ def test_yylex_rbracket
1112
+ util_lex_token "]", :tRBRACK, s("]")
1113
+ end
1114
+
1115
+ def test_yylex_rcurly
1116
+ util_lex_token "}", :tRCURLY, s("}")
1117
+ end
1118
+
1119
+ def test_yylex_regexp
1120
+ util_lex_token("/regexp/",
1121
+ :tREGEXP_BEG, s("/"),
1122
+ :tSTRING_CONTENT, s(:str, "regexp"),
1123
+ :tREGEXP_END, "")
1124
+ end
1125
+
1126
+ def test_yylex_regexp_ambiguous
1127
+ util_lex_token("method /regexp/",
1128
+ :tIDENTIFIER, s("method"),
1129
+ :tREGEXP_BEG, s("/"),
1130
+ :tSTRING_CONTENT, s(:str, "regexp"),
1131
+ :tREGEXP_END, "")
1132
+ end
1133
+
1134
+ def test_yylex_regexp_bad
1135
+ util_bad_token("/.*/xyz",
1136
+ :tREGEXP_BEG, s("/"),
1137
+ :tSTRING_CONTENT, s(:str, ".*"))
1138
+ end
1139
+
1140
+ def test_yylex_regexp_escape_C
1141
+ util_lex_token('/regex\\C-x/',
1142
+ :tREGEXP_BEG, s("/"),
1143
+ :tSTRING_CONTENT, s(:str, "regex\\C-x"),
1144
+ :tREGEXP_END, "")
1145
+ end
1146
+
1147
+ def test_yylex_regexp_escape_C_M
1148
+ util_lex_token('/regex\\C-\\M-x/',
1149
+ :tREGEXP_BEG, s("/"),
1150
+ :tSTRING_CONTENT, s(:str, "regex\\C-\\M-x"),
1151
+ :tREGEXP_END, "")
1152
+ end
1153
+
1154
+ def test_yylex_regexp_escape_C_M_craaaazy
1155
+ util_lex_token("/regex\\C-\\\n\\M-x/",
1156
+ :tREGEXP_BEG, s("/"),
1157
+ :tSTRING_CONTENT, s(:str, "regex\\C-\\M-x"),
1158
+ :tREGEXP_END, "")
1159
+ end
1160
+
1161
+ def test_yylex_regexp_escape_C_bad_dash
1162
+ util_bad_token '/regex\\Cx/', :tREGEXP_BEG, s("/")
1163
+ end
1164
+
1165
+ def test_yylex_regexp_escape_C_bad_dash_eos
1166
+ util_bad_token '/regex\\C-/', :tREGEXP_BEG, s("/")
1167
+ end
1168
+
1169
+ def test_yylex_regexp_escape_C_bad_dash_eos2
1170
+ util_bad_token '/regex\\C-', :tREGEXP_BEG, s("/")
1171
+ end
1172
+
1173
+ def test_yylex_regexp_escape_C_bad_eos
1174
+ util_bad_token '/regex\\C/', :tREGEXP_BEG, s("/")
1175
+ end
1176
+
1177
+ def test_yylex_regexp_escape_C_bad_eos2
1178
+ util_bad_token '/regex\\c', :tREGEXP_BEG, s("/")
1179
+ end
1180
+
1181
+ def test_yylex_regexp_escape_M
1182
+ util_lex_token('/regex\\M-x/',
1183
+ :tREGEXP_BEG, s("/"),
1184
+ :tSTRING_CONTENT, s(:str, "regex\\M-x"),
1185
+ :tREGEXP_END, "")
1186
+ end
1187
+
1188
+ def test_yylex_regexp_escape_M_C
1189
+ util_lex_token('/regex\\M-\\C-x/',
1190
+ :tREGEXP_BEG, s("/"),
1191
+ :tSTRING_CONTENT, s(:str, "regex\\M-\\C-x"),
1192
+ :tREGEXP_END, "")
1193
+ end
1194
+
1195
+ def test_yylex_regexp_escape_M_bad_dash
1196
+ util_bad_token '/regex\\Mx/', :tREGEXP_BEG, s("/")
1197
+ end
1198
+
1199
+ def test_yylex_regexp_escape_M_bad_dash_eos
1200
+ util_bad_token '/regex\\M-/', :tREGEXP_BEG, s("/")
1201
+ end
1202
+
1203
+ def test_yylex_regexp_escape_M_bad_dash_eos2
1204
+ util_bad_token '/regex\\M-', :tREGEXP_BEG, s("/")
1205
+ end
1206
+
1207
+ def test_yylex_regexp_escape_M_bad_eos
1208
+ util_bad_token '/regex\\M/', :tREGEXP_BEG, s("/")
1209
+ end
1210
+
1211
+ def test_yylex_regexp_escape_backslash_slash
1212
+ util_lex_token('/\\//',
1213
+ :tREGEXP_BEG, s("/"),
1214
+ :tSTRING_CONTENT, s(:str, '\\/'),
1215
+ :tREGEXP_END, "")
1216
+ end
1217
+
1218
+ def test_yylex_regexp_escape_backslash_terminator
1219
+ util_lex_token('%r%blah\\%blah%',
1220
+ :tREGEXP_BEG, s("%r\000"), # FIX ?!?
1221
+ :tSTRING_CONTENT, s(:str, "blah\\%blah"),
1222
+ :tREGEXP_END, "")
1223
+ end
1224
+
1225
+ def test_yylex_regexp_escape_backslash_terminator_meta1
1226
+ util_lex_token('%r{blah\\}blah}',
1227
+ :tREGEXP_BEG, s("%r{"), # FIX ?!?
1228
+ :tSTRING_CONTENT, s(:str, "blah\\}blah"),
1229
+ :tREGEXP_END, "")
1230
+ end
1231
+
1232
+ def test_yylex_regexp_escape_backslash_terminator_meta2
1233
+ util_lex_token('%r/blah\\/blah/',
1234
+ :tREGEXP_BEG, s("%r\000"), # FIX ?!?
1235
+ :tSTRING_CONTENT, s(:str, "blah\\/blah"),
1236
+ :tREGEXP_END, "")
1237
+ end
1238
+
1239
+ def test_yylex_regexp_escape_backslash_terminator_meta3
1240
+ util_lex_token('%r/blah\\%blah/',
1241
+ :tREGEXP_BEG, s("%r\000"), # FIX ?!?
1242
+ :tSTRING_CONTENT, s(:str, "blah\\%blah"),
1243
+ :tREGEXP_END, "")
1244
+ end
1245
+
1246
+ def test_yylex_regexp_escape_bad_eos
1247
+ util_bad_token '/regex\\', :tREGEXP_BEG, s("/")
1248
+ end
1249
+
1250
+ def test_yylex_regexp_escape_bs
1251
+ util_lex_token('/regex\\\\regex/',
1252
+ :tREGEXP_BEG, s("/"),
1253
+ :tSTRING_CONTENT, s(:str, "regex\\\\regex"),
1254
+ :tREGEXP_END, "")
1255
+ end
1256
+
1257
+ def test_yylex_regexp_escape_c
1258
+ util_lex_token('/regex\\cxxx/',
1259
+ :tREGEXP_BEG, s("/"),
1260
+ :tSTRING_CONTENT, s(:str, "regex\\cxxx"),
1261
+ :tREGEXP_END, "")
1262
+ end
1263
+
1264
+ def test_yylex_regexp_escape_c_backslash
1265
+ util_lex_token('/regex\\c\\n/',
1266
+ :tREGEXP_BEG, s("/"),
1267
+ :tSTRING_CONTENT, s(:str, "regex\\c\\n"),
1268
+ :tREGEXP_END, "")
1269
+ end
1270
+
1271
+ def test_yylex_regexp_escape_chars
1272
+ util_lex_token('/re\\tge\\nxp/',
1273
+ :tREGEXP_BEG, s("/"),
1274
+ :tSTRING_CONTENT, s(:str, "re\\tge\\nxp"),
1275
+ :tREGEXP_END, "")
1276
+ end
1277
+
1278
+ def test_yylex_regexp_escape_double_backslash
1279
+ regexp = '/[\\/\\\\]$/'
1280
+ util_lex_token(regexp,
1281
+ :tREGEXP_BEG, s("/"),
1282
+ :tSTRING_CONTENT, s(:str, regexp[1..-2]),
1283
+ :tREGEXP_END, "")
1284
+ end
1285
+
1286
+ def test_yylex_regexp_escape_hex
1287
+ util_lex_token('/regex\\x61xp/',
1288
+ :tREGEXP_BEG, s("/"),
1289
+ :tSTRING_CONTENT, s(:str, "regex\\x61xp"),
1290
+ :tREGEXP_END, "")
1291
+ end
1292
+
1293
+ def test_yylex_regexp_escape_hex_one
1294
+ util_lex_token('/^[\\xd\\xa]{2}/on',
1295
+ :tREGEXP_BEG, s('/'),
1296
+ :tSTRING_CONTENT, s(:str, '^[\\xd\\xa]{2}'),
1297
+ :tREGEXP_END, 'on')
1298
+ end
1299
+
1300
+ def test_yylex_regexp_escape_hex_bad
1301
+ util_bad_token '/regex\\xzxp/', :tREGEXP_BEG, s("/")
1302
+ end
1303
+
1304
+ def test_yylex_regexp_escape_oct1
1305
+ util_lex_token('/regex\\0xp/',
1306
+ :tREGEXP_BEG, s("/"),
1307
+ :tSTRING_CONTENT, s(:str, "regex\\0xp"),
1308
+ :tREGEXP_END, "")
1309
+ end
1310
+
1311
+ def test_yylex_regexp_escape_oct2
1312
+ util_lex_token('/regex\\07xp/',
1313
+ :tREGEXP_BEG, s("/"),
1314
+ :tSTRING_CONTENT, s(:str, "regex\\07xp"),
1315
+ :tREGEXP_END, "")
1316
+ end
1317
+
1318
+ def test_yylex_regexp_escape_oct3
1319
+ util_lex_token('/regex\\10142/',
1320
+ :tREGEXP_BEG, s("/"),
1321
+ :tSTRING_CONTENT, s(:str, "regex\\10142"),
1322
+ :tREGEXP_END, "")
1323
+ end
1324
+
1325
+ def test_yylex_regexp_escape_return
1326
+ util_lex_token("/regex\\\nregex/",
1327
+ :tREGEXP_BEG, s("/"),
1328
+ :tSTRING_CONTENT, s(:str, "regexregex"),
1329
+ :tREGEXP_END, "")
1330
+ end
1331
+
1332
+ def test_yylex_regexp_nm
1333
+ util_lex_token("/.*/nm",
1334
+ :tREGEXP_BEG, s("/"),
1335
+ :tSTRING_CONTENT, s(:str, ".*"),
1336
+ :tREGEXP_END, "nm")
1337
+ end
1338
+
1339
+ def test_yylex_rparen
1340
+ util_lex_token ")", :tRPAREN, s(")")
1341
+ end
1342
+
1343
+ def test_yylex_rshft
1344
+ util_lex_token("a >> 2",
1345
+ :tIDENTIFIER, s("a"),
1346
+ :tRSHFT, s(">>"),
1347
+ :tINTEGER, 2)
1348
+ end
1349
+
1350
+ def test_yylex_rshft_equals
1351
+ util_lex_token("a >>= 2",
1352
+ :tIDENTIFIER, s("a"),
1353
+ :tOP_ASGN, s(">>"),
1354
+ :tINTEGER, 2)
1355
+ end
1356
+
1357
+ def test_yylex_star
1358
+ util_lex_token("a * ",
1359
+ :tIDENTIFIER, s("a"),
1360
+ :tSTAR2, s("*"))
1361
+
1362
+ assert_equal :expr_beg, @lex.lex_state
1363
+ end
1364
+
1365
+ def test_yylex_star2
1366
+ util_lex_token("a ** ",
1367
+ :tIDENTIFIER, s("a"),
1368
+ :tPOW, s("**"))
1369
+
1370
+ assert_equal :expr_beg, @lex.lex_state
1371
+ end
1372
+
1373
+ def test_yylex_star2_equals
1374
+ util_lex_token("a **= ",
1375
+ :tIDENTIFIER, s("a"),
1376
+ :tOP_ASGN, s("**"))
1377
+
1378
+ assert_equal :expr_beg, @lex.lex_state
1379
+ end
1380
+
1381
+ def test_yylex_star_arg
1382
+ @lex.lex_state = :expr_arg
1383
+
1384
+ util_lex_token(" *a",
1385
+ :tSTAR, s("*"),
1386
+ :tIDENTIFIER, s("a"))
1387
+
1388
+ assert_equal :expr_arg, @lex.lex_state
1389
+ end
1390
+
1391
+ def test_yylex_star_arg_beg
1392
+ @lex.lex_state = :expr_beg
1393
+
1394
+ util_lex_token("*a",
1395
+ :tSTAR, s("*"),
1396
+ :tIDENTIFIER, s("a"))
1397
+
1398
+ assert_equal :expr_arg, @lex.lex_state
1399
+ end
1400
+
1401
+ def test_yylex_star_arg_beg_fname
1402
+ @lex.lex_state = :expr_fname
1403
+
1404
+ util_lex_token("*a",
1405
+ :tSTAR2, s("*"),
1406
+ :tIDENTIFIER, s("a"))
1407
+
1408
+ assert_equal :expr_arg, @lex.lex_state
1409
+ end
1410
+
1411
+ def test_yylex_star_equals
1412
+ util_lex_token("a *= ",
1413
+ :tIDENTIFIER, s("a"),
1414
+ :tOP_ASGN, s("*"))
1415
+
1416
+ assert_equal :expr_beg, @lex.lex_state
1417
+ end
1418
+
1419
+ def test_yylex_string_bad_eos
1420
+ util_bad_token('%',
1421
+ :tSTRING_BEG, s('%'))
1422
+ end
1423
+
1424
+ def test_yylex_string_bad_eos_quote
1425
+ util_bad_token('%{nest',
1426
+ :tSTRING_BEG, s('%}'))
1427
+ end
1428
+
1429
+ def test_yylex_string_double
1430
+ util_lex_token('"string"',
1431
+ :tSTRING_BEG, s('"'),
1432
+ :tSTRING_CONTENT, s(:str, "string"),
1433
+ :tSTRING_END, s('"'))
1434
+ end
1435
+
1436
+ def test_yylex_string_double_escape_M
1437
+ util_lex_token('"\\M-g"',
1438
+ :tSTRING_BEG, s('"'),
1439
+ :tSTRING_CONTENT, s(:str, "\347"),
1440
+ :tSTRING_END, s('"'))
1441
+ end
1442
+
1443
+ def test_yylex_string_double_escape_chars
1444
+ util_lex_token('"s\\tri\\ng"',
1445
+ :tSTRING_BEG, s('"'),
1446
+ :tSTRING_CONTENT, s(:str, "s\tri\ng"),
1447
+ :tSTRING_END, s('"'))
1448
+ end
1449
+
1450
+ def test_yylex_string_double_escape_hex
1451
+ util_lex_token('"n = \\x61\\x62\\x63"',
1452
+ :tSTRING_BEG, s('"'),
1453
+ :tSTRING_CONTENT, s(:str, "n = abc"),
1454
+ :tSTRING_END, s('"'))
1455
+ end
1456
+
1457
+ def test_yylex_string_double_escape_bs1
1458
+ util_lex_token('"a\\a\\a"',
1459
+ :tSTRING_BEG, s('"'),
1460
+ :tSTRING_CONTENT, s(:str, "a\a\a"),
1461
+ :tSTRING_END, s('"'))
1462
+ end
1463
+
1464
+ def test_yylex_string_double_escape_bs2
1465
+ util_lex_token('"a\\\\a"',
1466
+ :tSTRING_BEG, s('"'),
1467
+ :tSTRING_CONTENT, s(:str, "a\\a"),
1468
+ :tSTRING_END, s('"'))
1469
+ end
1470
+
1471
+ def test_yylex_string_double_escape_octal
1472
+ util_lex_token('"n = \\101\\102\\103"',
1473
+ :tSTRING_BEG, s('"'),
1474
+ :tSTRING_CONTENT, s(:str, "n = ABC"),
1475
+ :tSTRING_END, s('"'))
1476
+ end
1477
+
1478
+ def test_yylex_string_double_interp
1479
+ util_lex_token("\"blah #x a \#@a b \#$b c \#{3} # \"",
1480
+ :tSTRING_BEG, s("\""),
1481
+ :tSTRING_CONTENT, s(:str, "blah #x a "),
1482
+ :tSTRING_DVAR, nil,
1483
+ :tSTRING_CONTENT, s(:str, "@a b "),
1484
+ :tSTRING_DVAR, nil,
1485
+ :tSTRING_CONTENT, s(:str, "$b c "),
1486
+ :tSTRING_DBEG, nil,
1487
+ :tSTRING_CONTENT, s(:str, "3} # "),
1488
+ :tSTRING_END, s("\""))
1489
+ end
1490
+
1491
+ def test_yylex_string_double_nested_curlies
1492
+ util_lex_token('%{nest{one{two}one}nest}',
1493
+ :tSTRING_BEG, s('%}'),
1494
+ :tSTRING_CONTENT, s(:str, "nest{one{two}one}nest"),
1495
+ :tSTRING_END, s('}'))
1496
+ end
1497
+
1498
+ def test_yylex_string_double_no_interp
1499
+ util_lex_token("\"# blah\"", # pound first
1500
+ :tSTRING_BEG, s("\""),
1501
+ :tSTRING_CONTENT, s(:str, "# blah"),
1502
+ :tSTRING_END, s("\""))
1503
+
1504
+ util_lex_token("\"blah # blah\"", # pound not first
1505
+ :tSTRING_BEG, s("\""),
1506
+ :tSTRING_CONTENT, s(:str, "blah # blah"),
1507
+ :tSTRING_END, s("\""))
1508
+ end
1509
+
1510
+ def test_yylex_string_pct_Q
1511
+ util_lex_token("%Q[s1 s2]",
1512
+ :tSTRING_BEG, s("%Q["),
1513
+ :tSTRING_CONTENT, s(:str, "s1 s2"),
1514
+ :tSTRING_END, s("]"))
1515
+ end
1516
+
1517
+ def test_yylex_string_pct_W
1518
+ util_lex_token("%W[s1 s2\ns3]", # TODO: add interpolation to these
1519
+ :tWORDS_BEG, s("%W["),
1520
+ :tSTRING_CONTENT, s(:str, "s1"),
1521
+ " ", nil,
1522
+ :tSTRING_CONTENT, s(:str, "s2"),
1523
+ " ", nil,
1524
+ :tSTRING_CONTENT, s(:str, "s3"),
1525
+ " ", nil,
1526
+ :tSTRING_END, nil)
1527
+ end
1528
+
1529
+ def test_yylex_string_pct_W_bs_nl
1530
+ util_lex_token("%W[s1 \\\ns2]", # TODO: add interpolation to these
1531
+ :tWORDS_BEG, s("%W["),
1532
+ :tSTRING_CONTENT, s(:str, "s1"),
1533
+ " ", nil,
1534
+ :tSTRING_CONTENT, s(:str, "\ns2"),
1535
+ " ", nil,
1536
+ :tSTRING_END, nil)
1537
+ end
1538
+
1539
+ def test_yylex_string_pct_angle
1540
+ util_lex_token("%<blah>",
1541
+ :tSTRING_BEG, s("%>"),
1542
+ :tSTRING_CONTENT, s(:str, "blah"),
1543
+ :tSTRING_END, s(">"))
1544
+ end
1545
+
1546
+ def test_yylex_string_pct_other
1547
+ util_lex_token("%%blah%",
1548
+ :tSTRING_BEG, s("%%"),
1549
+ :tSTRING_CONTENT, s(:str, "blah"),
1550
+ :tSTRING_END, s("%"))
1551
+ end
1552
+
1553
+ def test_yylex_string_pct_w
1554
+ util_bad_token("%w[s1 s2 ",
1555
+ :tAWORDS_BEG, s("%w["),
1556
+ :tSTRING_CONTENT, s(:str, "s1"),
1557
+ " ", nil,
1558
+ :tSTRING_CONTENT, s(:str, "s2"),
1559
+ " ", nil)
1560
+ end
1561
+
1562
+ def test_yylex_string_pct_w_bs_nl
1563
+ util_lex_token("%w[s1 \\\ns2]",
1564
+ :tAWORDS_BEG, s("%w["),
1565
+ :tSTRING_CONTENT, s(:str, "s1"),
1566
+ " ", nil,
1567
+ :tSTRING_CONTENT, s(:str, "\ns2"),
1568
+ " ", nil,
1569
+ :tSTRING_END, nil)
1570
+ end
1571
+
1572
+ def test_yylex_string_pct_w_bs_sp
1573
+ util_lex_token("%w[s\\ 1 s\\ 2]",
1574
+ :tAWORDS_BEG, s("%w["),
1575
+ :tSTRING_CONTENT, s(:str, "s 1"),
1576
+ " ", nil,
1577
+ :tSTRING_CONTENT, s(:str, "s 2"),
1578
+ " ", nil,
1579
+ :tSTRING_END, nil)
1580
+ end
1581
+
1582
+ def test_yylex_string_single
1583
+ util_lex_token("'string'",
1584
+ :tSTRING_BEG, s("'"),
1585
+ :tSTRING_CONTENT, s(:str, "string"),
1586
+ :tSTRING_END, s("'"))
1587
+ end
1588
+
1589
+ def test_yylex_string_single_escape_chars
1590
+ util_lex_token("'s\\tri\\ng'",
1591
+ :tSTRING_BEG, s("'"),
1592
+ :tSTRING_CONTENT, s(:str, "s\\tri\\ng"),
1593
+ :tSTRING_END, s("'"))
1594
+ end
1595
+
1596
+ def test_yylex_string_single_nl
1597
+ util_lex_token("'blah\\\nblah'",
1598
+ :tSTRING_BEG, s("'"),
1599
+ :tSTRING_CONTENT, s(:str, "blah\\\nblah"),
1600
+ :tSTRING_END, s("'"))
1601
+ end
1602
+
1603
+ def test_yylex_symbol
1604
+ util_lex_token(":symbol",
1605
+ :tSYMBEG, s(":"),
1606
+ :tIDENTIFIER, s("symbol"))
1607
+ end
1608
+
1609
+ def test_yylex_symbol_bad_zero
1610
+ util_bad_token(":\"blah\0\"",
1611
+ :tSYMBEG, s(":"))
1612
+ end
1613
+
1614
+ def test_yylex_symbol_double
1615
+ util_lex_token(":\"symbol\"",
1616
+ :tSYMBEG, s(":"),
1617
+ :tSTRING_CONTENT, s(:str, "symbol"),
1618
+ :tSTRING_END, s('"'))
1619
+ end
1620
+
1621
+ def test_yylex_symbol_single
1622
+ util_lex_token(":'symbol'",
1623
+ :tSYMBEG, s(":"),
1624
+ :tSTRING_CONTENT, s(:str, "symbol"),
1625
+ :tSTRING_END, s("'"))
1626
+ end
1627
+
1628
+ def test_yylex_ternary
1629
+ util_lex_token("a ? b : c",
1630
+ :tIDENTIFIER, s("a"),
1631
+ "?", s("?"), # FIX
1632
+ :tIDENTIFIER, s("b"),
1633
+ ":", s(":"), # FIX
1634
+ :tIDENTIFIER, s("c"))
1635
+
1636
+ util_lex_token("a ?bb : c", # GAH! MATZ!!!
1637
+ :tIDENTIFIER, s("a"),
1638
+ "?", s("?"), # FIX
1639
+ :tIDENTIFIER, s("bb"),
1640
+ ":", s(":"), # FIX
1641
+ :tIDENTIFIER, s("c"))
1642
+
1643
+ util_lex_token("42 ?", # 42 forces expr_end
1644
+ :tINTEGER, 42,
1645
+ "?", s("?"))
1646
+ end
1647
+
1648
+ def test_yylex_tilde
1649
+ util_lex_token "~", :tTILDE, s("~")
1650
+ end
1651
+
1652
+ def test_yylex_tilde_unary
1653
+ @lex.lex_state = :expr_fname
1654
+ util_lex_token "~@", :tTILDE, s("~")
1655
+ end
1656
+
1657
+ def test_yylex_uminus
1658
+ util_lex_token("-blah",
1659
+ :tUMINUS, s("-"),
1660
+ :tIDENTIFIER, s("blah"))
1661
+ end
1662
+
1663
+ def test_yylex_underscore
1664
+ util_lex_token("_var", :tIDENTIFIER, s("_var"))
1665
+ end
1666
+
1667
+ def test_yylex_underscore_end
1668
+ @lex.src = "__END__\n"
1669
+ deny @lex.advance
1670
+ end
1671
+
1672
+ def test_yylex_uplus
1673
+ util_lex_token("+blah",
1674
+ :tUPLUS, s("+"),
1675
+ :tIDENTIFIER, s("blah"))
1676
+ end
1677
+
1678
+ def test_zbug_float_in_decl
1679
+ util_lex_token("def initialize(u = ",
1680
+ :kDEF, s("def"),
1681
+ :tIDENTIFIER, s("initialize"),
1682
+ :tLPAREN2, s("("),
1683
+ :tIDENTIFIER, s("u"),
1684
+ '=', s("="))
1685
+
1686
+ assert_equal :expr_beg, @lex.lex_state
1687
+
1688
+ util_lex_token("0.0, s = 0.0",
1689
+ :tFLOAT, 0.0,
1690
+ ',', s(','),
1691
+ :tIDENTIFIER, s("s"),
1692
+ '=', s("="),
1693
+ :tFLOAT, 0.0)
1694
+ end
1695
+
1696
+ def test_zbug_id_equals
1697
+ util_lex_token("a =",
1698
+ :tIDENTIFIER, s("a"),
1699
+ '=', s("="))
1700
+
1701
+ assert_equal :expr_beg, @lex.lex_state
1702
+
1703
+ util_lex_token("0.0",
1704
+ :tFLOAT, 0.0)
1705
+ end
1706
+
1707
+ def test_zbug_no_spaces_in_decl
1708
+ util_lex_token("def initialize(u=",
1709
+ :kDEF, s("def"),
1710
+ :tIDENTIFIER, s("initialize"),
1711
+ :tLPAREN2, s("("),
1712
+ :tIDENTIFIER, s("u"),
1713
+ '=', s("="))
1714
+
1715
+ assert_equal :expr_beg, @lex.lex_state
1716
+
1717
+ util_lex_token("0.0,s=0.0",
1718
+ :tFLOAT, 0.0,
1719
+ ',', s(','),
1720
+ :tIDENTIFIER, s("s"),
1721
+ '=', s("="),
1722
+ :tFLOAT, 0.0)
1723
+ end
1724
+
1725
+ ############################################################
1726
+
1727
+ def util_bad_token s, *args
1728
+ assert_raises SyntaxError do
1729
+ util_lex_token s, *args
1730
+ end
1731
+ end
1732
+
1733
+ def util_escape expected, input
1734
+ @lex.src = input
1735
+ assert_equal expected, @lex.read_escape
1736
+ end
1737
+
1738
+ def util_escape_bad input
1739
+ @lex.src = input
1740
+ assert_raises SyntaxError do
1741
+ @lex.read_escape
1742
+ end
1743
+ end
1744
+
1745
+ def util_lex_fname name, type, end_state = :expr_arg
1746
+ @lex.lex_state = :expr_fname # can only set via parser's defs
1747
+
1748
+ util_lex_token("def #{name} ", :kDEF, s("def"), type, s(name))
1749
+
1750
+ assert_equal end_state, @lex.lex_state
1751
+ end
1752
+
1753
+ def util_lex_token input, *args
1754
+ @lex.src = input
1755
+
1756
+ until args.empty? do
1757
+ token = args.shift
1758
+ value = args.shift
1759
+ assert @lex.advance, "no more tokens"
1760
+ assert_equal [token, value], [@lex.token, @lex.yacc_value]
1761
+ end
1762
+
1763
+ deny @lex.advance, "must be empty, but had #{[@lex.token, @lex.yacc_value].inspect}"
1764
+ end
1765
+ end
1766
+