oedipus_lex 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ <?xml version="1.0" encoding="Shift_JIS"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
3
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
4
+ <html xmlns="http://www.w3.org/1999/xhtml">
5
+ <body>
6
+ <p>
7
+ XHTML 1.1
8
+ </p>
9
+ </body>
10
+ </html>
@@ -0,0 +1,629 @@
1
+ gem "minitest"
2
+ require "minitest/autorun"
3
+ require "oedipus_lex"
4
+ require "stringio"
5
+
6
+ class TestOedipusLex < Minitest::Test
7
+ attr_accessor :option
8
+
9
+ def setup
10
+ self.option = {}
11
+ end
12
+
13
+ def generate_lexer grammar
14
+ rex = OedipusLex.new option
15
+ rex.parse cleanup grammar
16
+ rex.generate
17
+ end
18
+
19
+ def assert_generate_error grammar, expected_msg
20
+ rex = OedipusLex.new option
21
+
22
+ e = assert_raises OedipusLex::ScanError do
23
+ rex.parse cleanup grammar
24
+ end
25
+
26
+ assert_match expected_msg, e.message
27
+ end
28
+
29
+ def cleanup s
30
+ s.gsub(/^ {6}/, "")
31
+ end
32
+
33
+ def eval_lexer grammar
34
+ ruby = generate_lexer grammar
35
+
36
+ if option[:wtf]
37
+ puts
38
+ puts ruby
39
+ puts
40
+ end
41
+
42
+ mod = Module.new
43
+ mod.module_eval ruby
44
+
45
+ return ruby, mod
46
+ end
47
+
48
+ def assert_lexer grammar, input, expected
49
+ _, mod = eval_lexer grammar
50
+
51
+ calc = mod::Calculator.new
52
+
53
+ def calc.do_parse
54
+ tokens = []
55
+ while token = next_token
56
+ tokens << token
57
+ end
58
+ tokens
59
+ end
60
+
61
+ tokens = calc.parse input
62
+
63
+ assert_equal expected, tokens
64
+ end
65
+
66
+ def assert_lexer_error grammar, input, expected_msg
67
+ _, mod = eval_lexer grammar
68
+
69
+ calc = mod::Calculator.new
70
+
71
+ def calc.do_parse
72
+ tokens = []
73
+ while token = next_token
74
+ tokens << token
75
+ end
76
+ tokens
77
+ end
78
+
79
+ e = assert_raises mod::Calculator::ScanError do
80
+ calc.parse input
81
+ end
82
+
83
+ assert_equal expected_msg, e.message
84
+ end
85
+
86
+ def test_simple_scanner
87
+ src = <<-'REX'
88
+ class Calculator
89
+ rule
90
+ /\d+/ { [:number, text.to_i] }
91
+ /\s+/
92
+ /[+-]/ { [:op, text] }
93
+ end
94
+ REX
95
+
96
+ txt = "1 + 2 + 3"
97
+
98
+ exp = [[:number, 1],
99
+ [:op, "+"],
100
+ [:number, 2],
101
+ [:op, "+"],
102
+ [:number, 3]]
103
+
104
+ assert_lexer src, txt, exp
105
+ end
106
+
107
+ def test_simple_scanner_bug_trailing_comment
108
+ src = <<-'REX'
109
+ class Calculator
110
+ rule
111
+ /\d+/ { [:number, text.to_i] } # numbers
112
+ /\s+/ # do nothing
113
+ /[+-]/ { [:op, text] }
114
+ end
115
+ REX
116
+
117
+ txt = "1 + 2 + 3"
118
+
119
+ exp = [[:number, 1],
120
+ [:op, "+"],
121
+ [:number, 2],
122
+ [:op, "+"],
123
+ [:number, 3]]
124
+
125
+ assert_lexer src, txt, exp
126
+ end
127
+
128
+ def test_simple_scanner_multiline_action_error
129
+ src = <<-'REX'
130
+ class Calculator
131
+ rule
132
+ /\d+/ {
133
+ [:number, text.to_i]
134
+ }
135
+ /\s+/
136
+ /[+-]/ { [:op, text] }
137
+ end
138
+ REX
139
+
140
+ assert_generate_error src, "can not match (:rule): '"
141
+ end
142
+
143
+ def test_simple_scanner_macro
144
+ src = <<-'REX'
145
+ class Calculator
146
+ macro
147
+ N /\d+/
148
+ rule
149
+ /#{N}/ { [:number, text.to_i] }
150
+ /\s+/
151
+ /[+-]/ { [:op, text] }
152
+ end
153
+ REX
154
+
155
+ txt = "1 + 2 + 30"
156
+
157
+ exp = [[:number, 1],
158
+ [:op, "+"],
159
+ [:number, 2],
160
+ [:op, "+"],
161
+ [:number, 30]]
162
+
163
+ assert_lexer src, txt, exp
164
+ end
165
+
166
+ def test_simple_scanner_macro_slashes
167
+ src = <<-'REX'
168
+ class Calculator
169
+ macro
170
+ N /\d+/i
171
+ rule
172
+ /#{N}/o { [:number, text.to_i] }
173
+ /\s+/
174
+ /[+-]/ { [:op, text] }
175
+ end
176
+ REX
177
+
178
+ txt = "1 + 2 + 30"
179
+
180
+ exp = [[:number, 1],
181
+ [:op, "+"],
182
+ [:number, 2],
183
+ [:op, "+"],
184
+ [:number, 30]]
185
+
186
+ assert_lexer src, txt, exp
187
+ end
188
+
189
+ def test_simple_scanner_macro_slash_n_generator
190
+ src = <<-'REX'
191
+ class Calculator
192
+ macro
193
+ N /\d+/n
194
+ rule
195
+ /#{N}/o { [:number, text.to_i] }
196
+ /\s+/
197
+ /[+-]/ { [:op, text] }
198
+ end
199
+ REX
200
+
201
+ ruby = generate_lexer src
202
+
203
+ assert_match "/\\d+/n", ruby
204
+ end
205
+
206
+ def test_simple_scanner_recursive_macro
207
+ src = <<-'REX'
208
+ class Calculator
209
+ macro
210
+ D /\d/
211
+ N /#{D}+/
212
+ rule
213
+ /#{N}/ { [:number, text.to_i] }
214
+ /\s+/
215
+ /[+-]/ { [:op, text] }
216
+ end
217
+ REX
218
+
219
+ txt = "1 + 2 + 30"
220
+
221
+ exp = [[:number, 1],
222
+ [:op, "+"],
223
+ [:number, 2],
224
+ [:op, "+"],
225
+ [:number, 30]]
226
+
227
+ assert_lexer src, txt, exp
228
+ end
229
+
230
+ def test_simple_scanner_debug_arg
231
+ src = <<-'REX'
232
+ class Calculator
233
+ rule
234
+ /\d+/ { [:number, text.to_i] }
235
+ /\s+/
236
+ /[+-]/ { [:op, text] }
237
+ end
238
+ REX
239
+
240
+ txt = "1 + 2 + 30"
241
+
242
+ exp = [[:number, 1],
243
+ [:op, "+"],
244
+ [:number, 2],
245
+ [:op, "+"],
246
+ [:number, 30]]
247
+
248
+ option[:debug] = true
249
+
250
+ out, err = capture_io do
251
+ assert_lexer src, txt, exp
252
+ end
253
+
254
+ exp = exp.zip([nil]).flatten(1) # ugly, but much more compact
255
+ exp.pop # remove last nil
256
+ exp = exp.map(&:inspect).join("\n") + "\n"
257
+
258
+ assert_equal "", err
259
+ assert_match "[:number, 1]", out
260
+ assert_match "[:op, \"+\"]", out
261
+ end
262
+
263
+ def test_simple_scanner_debug_src
264
+ src = <<-'REX'
265
+ class Calculator
266
+ option
267
+ debug
268
+ rule
269
+ /\d+/ { [:number, text.to_i] }
270
+ /\s+/
271
+ /[+-]/ { [:op, text] }
272
+ end
273
+ REX
274
+
275
+ txt = "1 + 2 + 30"
276
+
277
+ exp = [[:number, 1],
278
+ [:op, "+"],
279
+ [:number, 2],
280
+ [:op, "+"],
281
+ [:number, 30]]
282
+
283
+ out, err = capture_io do
284
+ assert_lexer src, txt, exp
285
+ end
286
+
287
+ exp = exp.zip([nil]).flatten(1) # ugly, but much more compact
288
+ exp.pop # remove last nil
289
+ exp = exp.map(&:inspect).join("\n") + "\n"
290
+
291
+ assert_equal "", err
292
+ assert_match "[:number, 1]", out
293
+ assert_match "[:op, \"+\"]", out
294
+ end
295
+
296
+ def test_simple_scanner_inclusive
297
+ src = <<-'REX'
298
+ class Calculator
299
+ rule
300
+ /\d+/ { [:number, text.to_i] }
301
+ /\s+/
302
+ /[+-]/ { @state = :op; [:op, text] }
303
+
304
+ # nil state always goes first, so we won't get this
305
+ :op /\d+/ { @state = nil; [:bad, text.to_i] }
306
+ end
307
+ REX
308
+
309
+ txt = "1 + 2 + 30"
310
+
311
+ exp = [[:number, 1],
312
+ [:op, "+"],
313
+ [:number, 2],
314
+ [:op, "+"],
315
+ [:number, 30]]
316
+
317
+ assert_lexer src, txt, exp
318
+ end
319
+
320
+ def test_simple_scanner_exclusive
321
+ src = <<-'REX'
322
+ class Calculator
323
+ rule
324
+ /\d+/ { [:number, text.to_i] }
325
+ /\s+/
326
+ /[+-]/ { @state = :OP; [:op, text] }
327
+
328
+ :OP /\s+/
329
+ :OP /\d+/ { @state = nil; [:number2, text.to_i] }
330
+ end
331
+ REX
332
+
333
+ txt = "1 + 2 + 30"
334
+
335
+ exp = [[:number, 1],
336
+ [:op, "+"],
337
+ [:number2, 2],
338
+ [:op, "+"],
339
+ [:number2, 30]]
340
+
341
+ assert_lexer src, txt, exp
342
+ end
343
+
344
+ def test_simple_scanner_auto_action
345
+ src = <<-'REX'
346
+ class Calculator
347
+ rule
348
+ /rpn/ { [:state, :RPN] }
349
+ /\d+/ { [:number, text.to_i] }
350
+ /\s+/
351
+ /[+-]/ { [:op, text] }
352
+
353
+ :RPN /\s+/
354
+ :RPN /[+-]/ { [:op2, text] }
355
+ :RPN /\d+/ { [:number2, text.to_i] }
356
+ :RPN /alg/ { [:state, nil] }
357
+ end
358
+ REX
359
+
360
+ txt = "rpn 1 2 30 + + alg"
361
+
362
+ exp = [[:state, :RPN],
363
+ [:number2, 1],
364
+ [:number2, 2],
365
+ [:number2, 30],
366
+ [:op2, "+"],
367
+ [:op2, "+"],
368
+ [:state, nil]]
369
+
370
+ assert_lexer src, txt, exp
371
+ end
372
+
373
+ def test_simple_scanner_auto_action_symbol
374
+ src = <<-'REX'
375
+ class Calculator
376
+ rule
377
+ /rpn/ :RPN
378
+ /\d+/ { [:number, text.to_i] }
379
+ /\s+/
380
+ /[+-]/ { [:op, text] }
381
+
382
+ :RPN /\s+/
383
+ :RPN /[+-]/ { [:op2, text] }
384
+ :RPN /\d+/ { [:number2, text.to_i] }
385
+ :RPN /alg/ nil
386
+ end
387
+ REX
388
+
389
+ txt = "rpn 1 2 30 + + alg"
390
+
391
+ exp = [[:state, :RPN],
392
+ [:number2, 1],
393
+ [:number2, 2],
394
+ [:number2, 30],
395
+ [:op2, "+"],
396
+ [:op2, "+"],
397
+ [:state, nil]]
398
+
399
+ assert_lexer src, txt, exp
400
+ end
401
+
402
+ def test_simple_scanner_predicate_generator
403
+ src = <<-'REX'
404
+ class Calculator
405
+ rules
406
+
407
+ /\d+/ { [:number, text.to_i] }
408
+ /\s+/
409
+ :ARG /\d+/
410
+ poot? /[+-]/ { [:bad1, text] }
411
+ woot? /[+-]/ { [:op, text] }
412
+ end
413
+ REX
414
+
415
+ ruby = generate_lexer src
416
+
417
+ assert_match "when poot? && (text = ss.scan(/[+-]/)) then", ruby
418
+ assert_match "when woot? && (text = ss.scan(/[+-]/)) then", ruby
419
+ assert_match "when nil then", ruby
420
+ assert_match "when :ARG then", ruby
421
+ end
422
+
423
+ def test_generator_start
424
+ src = <<-'REX'
425
+ class Calculator
426
+ start
427
+ do_the_thing
428
+ rules
429
+ /\d+/ { [:number, text.to_i] }
430
+ /\s+/
431
+ end
432
+ REX
433
+
434
+ ruby = generate_lexer src
435
+
436
+ assert_match " def next_token\n do_the_thing", ruby
437
+ end
438
+
439
+ def test_simple_scanner_predicate
440
+ src = <<-'REX'
441
+ class Calculator
442
+ inner
443
+ def woot?
444
+ true
445
+ end
446
+ def poot?
447
+ false
448
+ end
449
+
450
+ rules
451
+
452
+ /\d+/ { [:number, text.to_i] }
453
+ /\s+/
454
+ poot? /[+-]/ { [:bad1, text] }
455
+ woot? /[+-]/ { [:op, text] }
456
+ end
457
+ REX
458
+
459
+ txt = "1 + 2 + 30"
460
+
461
+ exp = [[:number, 1],
462
+ [:op, "+"],
463
+ [:number, 2],
464
+ [:op, "+"],
465
+ [:number, 30]]
466
+
467
+ assert_lexer src, txt, exp
468
+ end
469
+
470
+ def test_simple_scanner_method_actions
471
+ src = <<-'REX'
472
+ class Calculator
473
+ inner
474
+ def thingy text
475
+ [:number, text.to_i]
476
+ end
477
+ rule
478
+ /\d+/ thingy
479
+ /\s+/
480
+ /[+-]/ { [:op, text] }
481
+ end
482
+ REX
483
+
484
+ txt = "1 + 2 + 30"
485
+
486
+ exp = [[:number, 1],
487
+ [:op, "+"],
488
+ [:number, 2],
489
+ [:op, "+"],
490
+ [:number, 30]]
491
+
492
+ assert_lexer src, txt, exp
493
+ end
494
+
495
+ def test_header_is_written_after_module
496
+ src = <<-'REX'
497
+ module X
498
+ module Y
499
+ class Calculator
500
+ rule
501
+ /\d+/ { [:number, text.to_i] }
502
+ /\s+/
503
+ /[+-]/ { [:op, text] }
504
+ end
505
+ end
506
+ end
507
+ REX
508
+
509
+ ruby = generate_lexer src
510
+
511
+ exp = ["#--\n",
512
+ "# This file is automatically generated. Do not modify it.\n",
513
+ "# Generated by: oedipus_lex version #{OedipusLex::VERSION}.\n",
514
+ "#++\n",
515
+ "\n",
516
+ "module X\n",
517
+ "module Y\n"]
518
+
519
+ assert_equal exp, ruby.lines.first(7)
520
+ end
521
+
522
+ def test_read_non_existent_file
523
+ rex = OedipusLex.new
524
+
525
+ assert_raises Errno::ENOENT do
526
+ rex.parse_file 'non_existent_file'
527
+ end
528
+ end
529
+
530
+ def test_scanner_nests_classes
531
+ src = <<-'REX'
532
+ module Foo
533
+ class Baz::Calculator < Bar
534
+ rule
535
+ /\d+/ { [:number, text.to_i] }
536
+ /\s+/ { [:S, text] }
537
+ end
538
+ end
539
+ REX
540
+
541
+ ruby = generate_lexer src
542
+
543
+ assert_match 'Baz::Calculator < Bar', ruby
544
+ end
545
+
546
+ def test_scanner_inherits
547
+ source = generate_lexer <<-'REX'
548
+ class Calculator < Bar
549
+ rule
550
+ /\d+/ { [:number, text.to_i] }
551
+ /\s+/ { [:S, text] }
552
+ end
553
+ REX
554
+
555
+ assert_match 'Calculator < Bar', source
556
+ end
557
+
558
+ def test_scanner_inherits_many_levels
559
+ source = generate_lexer <<-'REX'
560
+ class Calculator < Foo::Bar
561
+ rule
562
+ /\d+/ { [:number, text.to_i] }
563
+ /\s+/ { [:S, text] }
564
+ end
565
+ REX
566
+
567
+ assert_match 'Calculator < Foo::Bar', source
568
+ end
569
+
570
+ def test_parses_macros_with_escapes
571
+ source = generate_lexer %q{
572
+ class Foo
573
+ macro
574
+ W /[\ \t]+/
575
+ rule
576
+ /#{W}/ { [:SPACE, text] }
577
+ end
578
+ }
579
+
580
+ assert_match 'ss.scan(/#{W}/)', source
581
+ end
582
+
583
+ def test_parses_regexp_with_interpolation_o
584
+ source = generate_lexer %q{
585
+ class Foo
586
+ rule
587
+ /#{W}/o { [:SPACE, text] }
588
+ end
589
+ }
590
+
591
+ assert_match 'ss.scan(/#{W}/o)', source
592
+ end
593
+
594
+ def test_parses_regexp_with_interpolation_o_macro
595
+ source = generate_lexer %q{
596
+ class Foo
597
+ macro
598
+ W /[\ \t]+/
599
+ rule
600
+ /#{X}/ { [:SPACE, text] }
601
+ /#{W}/o { [:X, text] }
602
+ end
603
+ }
604
+
605
+ assert_match 'W = /[\ \t]+/', source
606
+ assert_match 'ss.scan(/#{W}/o)', source
607
+ assert_match 'ss.scan(/#{X}/)', source
608
+ end
609
+
610
+ def test_changing_state_during_lexing
611
+ src = <<-'REX'
612
+ class Calculator
613
+ rule
614
+ /a/ { self.state = :B ; [:A, text] }
615
+ :B /b/ { self.state = nil ; [:B, text] }
616
+ end
617
+ REX
618
+
619
+ txt = "aba"
620
+ exp = [[:A, 'a'], [:B, 'b'], [:A, 'a']]
621
+
622
+ assert_lexer src, txt, exp
623
+
624
+ txt = "aa"
625
+ exp = [[:A, 'a'], [:B, 'b'], [:A, 'a'], [:B, 'b'], [:A, 'a']]
626
+
627
+ assert_lexer_error src, txt, "can not match (:B): 'a'"
628
+ end
629
+ end