oedipus_lex 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,10 @@
1
+ <?xml version="1.0" encoding="Shift_JIS"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
3
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
4
+ <html xmlns="http://www.w3.org/1999/xhtml">
5
+ <body>
6
+ <p>
7
+ XHTML 1.1
8
+ </p>
9
+ </body>
10
+ </html>
@@ -0,0 +1,629 @@
1
+ gem "minitest"
2
+ require "minitest/autorun"
3
+ require "oedipus_lex"
4
+ require "stringio"
5
+
6
+ class TestOedipusLex < Minitest::Test
7
+ attr_accessor :option
8
+
9
+ def setup
10
+ self.option = {}
11
+ end
12
+
13
+ def generate_lexer grammar
14
+ rex = OedipusLex.new option
15
+ rex.parse cleanup grammar
16
+ rex.generate
17
+ end
18
+
19
+ def assert_generate_error grammar, expected_msg
20
+ rex = OedipusLex.new option
21
+
22
+ e = assert_raises OedipusLex::ScanError do
23
+ rex.parse cleanup grammar
24
+ end
25
+
26
+ assert_match expected_msg, e.message
27
+ end
28
+
29
+ def cleanup s
30
+ s.gsub(/^ {6}/, "")
31
+ end
32
+
33
+ def eval_lexer grammar
34
+ ruby = generate_lexer grammar
35
+
36
+ if option[:wtf]
37
+ puts
38
+ puts ruby
39
+ puts
40
+ end
41
+
42
+ mod = Module.new
43
+ mod.module_eval ruby
44
+
45
+ return ruby, mod
46
+ end
47
+
48
+ def assert_lexer grammar, input, expected
49
+ _, mod = eval_lexer grammar
50
+
51
+ calc = mod::Calculator.new
52
+
53
+ def calc.do_parse
54
+ tokens = []
55
+ while token = next_token
56
+ tokens << token
57
+ end
58
+ tokens
59
+ end
60
+
61
+ tokens = calc.parse input
62
+
63
+ assert_equal expected, tokens
64
+ end
65
+
66
+ def assert_lexer_error grammar, input, expected_msg
67
+ _, mod = eval_lexer grammar
68
+
69
+ calc = mod::Calculator.new
70
+
71
+ def calc.do_parse
72
+ tokens = []
73
+ while token = next_token
74
+ tokens << token
75
+ end
76
+ tokens
77
+ end
78
+
79
+ e = assert_raises mod::Calculator::ScanError do
80
+ calc.parse input
81
+ end
82
+
83
+ assert_equal expected_msg, e.message
84
+ end
85
+
86
+ def test_simple_scanner
87
+ src = <<-'REX'
88
+ class Calculator
89
+ rule
90
+ /\d+/ { [:number, text.to_i] }
91
+ /\s+/
92
+ /[+-]/ { [:op, text] }
93
+ end
94
+ REX
95
+
96
+ txt = "1 + 2 + 3"
97
+
98
+ exp = [[:number, 1],
99
+ [:op, "+"],
100
+ [:number, 2],
101
+ [:op, "+"],
102
+ [:number, 3]]
103
+
104
+ assert_lexer src, txt, exp
105
+ end
106
+
107
+ def test_simple_scanner_bug_trailing_comment
108
+ src = <<-'REX'
109
+ class Calculator
110
+ rule
111
+ /\d+/ { [:number, text.to_i] } # numbers
112
+ /\s+/ # do nothing
113
+ /[+-]/ { [:op, text] }
114
+ end
115
+ REX
116
+
117
+ txt = "1 + 2 + 3"
118
+
119
+ exp = [[:number, 1],
120
+ [:op, "+"],
121
+ [:number, 2],
122
+ [:op, "+"],
123
+ [:number, 3]]
124
+
125
+ assert_lexer src, txt, exp
126
+ end
127
+
128
+ def test_simple_scanner_multiline_action_error
129
+ src = <<-'REX'
130
+ class Calculator
131
+ rule
132
+ /\d+/ {
133
+ [:number, text.to_i]
134
+ }
135
+ /\s+/
136
+ /[+-]/ { [:op, text] }
137
+ end
138
+ REX
139
+
140
+ assert_generate_error src, "can not match (:rule): '"
141
+ end
142
+
143
+ def test_simple_scanner_macro
144
+ src = <<-'REX'
145
+ class Calculator
146
+ macro
147
+ N /\d+/
148
+ rule
149
+ /#{N}/ { [:number, text.to_i] }
150
+ /\s+/
151
+ /[+-]/ { [:op, text] }
152
+ end
153
+ REX
154
+
155
+ txt = "1 + 2 + 30"
156
+
157
+ exp = [[:number, 1],
158
+ [:op, "+"],
159
+ [:number, 2],
160
+ [:op, "+"],
161
+ [:number, 30]]
162
+
163
+ assert_lexer src, txt, exp
164
+ end
165
+
166
+ def test_simple_scanner_macro_slashes
167
+ src = <<-'REX'
168
+ class Calculator
169
+ macro
170
+ N /\d+/i
171
+ rule
172
+ /#{N}/o { [:number, text.to_i] }
173
+ /\s+/
174
+ /[+-]/ { [:op, text] }
175
+ end
176
+ REX
177
+
178
+ txt = "1 + 2 + 30"
179
+
180
+ exp = [[:number, 1],
181
+ [:op, "+"],
182
+ [:number, 2],
183
+ [:op, "+"],
184
+ [:number, 30]]
185
+
186
+ assert_lexer src, txt, exp
187
+ end
188
+
189
+ def test_simple_scanner_macro_slash_n_generator
190
+ src = <<-'REX'
191
+ class Calculator
192
+ macro
193
+ N /\d+/n
194
+ rule
195
+ /#{N}/o { [:number, text.to_i] }
196
+ /\s+/
197
+ /[+-]/ { [:op, text] }
198
+ end
199
+ REX
200
+
201
+ ruby = generate_lexer src
202
+
203
+ assert_match "/\\d+/n", ruby
204
+ end
205
+
206
+ def test_simple_scanner_recursive_macro
207
+ src = <<-'REX'
208
+ class Calculator
209
+ macro
210
+ D /\d/
211
+ N /#{D}+/
212
+ rule
213
+ /#{N}/ { [:number, text.to_i] }
214
+ /\s+/
215
+ /[+-]/ { [:op, text] }
216
+ end
217
+ REX
218
+
219
+ txt = "1 + 2 + 30"
220
+
221
+ exp = [[:number, 1],
222
+ [:op, "+"],
223
+ [:number, 2],
224
+ [:op, "+"],
225
+ [:number, 30]]
226
+
227
+ assert_lexer src, txt, exp
228
+ end
229
+
230
+ def test_simple_scanner_debug_arg
231
+ src = <<-'REX'
232
+ class Calculator
233
+ rule
234
+ /\d+/ { [:number, text.to_i] }
235
+ /\s+/
236
+ /[+-]/ { [:op, text] }
237
+ end
238
+ REX
239
+
240
+ txt = "1 + 2 + 30"
241
+
242
+ exp = [[:number, 1],
243
+ [:op, "+"],
244
+ [:number, 2],
245
+ [:op, "+"],
246
+ [:number, 30]]
247
+
248
+ option[:debug] = true
249
+
250
+ out, err = capture_io do
251
+ assert_lexer src, txt, exp
252
+ end
253
+
254
+ exp = exp.zip([nil]).flatten(1) # ugly, but much more compact
255
+ exp.pop # remove last nil
256
+ exp = exp.map(&:inspect).join("\n") + "\n"
257
+
258
+ assert_equal "", err
259
+ assert_match "[:number, 1]", out
260
+ assert_match "[:op, \"+\"]", out
261
+ end
262
+
263
+ def test_simple_scanner_debug_src
264
+ src = <<-'REX'
265
+ class Calculator
266
+ option
267
+ debug
268
+ rule
269
+ /\d+/ { [:number, text.to_i] }
270
+ /\s+/
271
+ /[+-]/ { [:op, text] }
272
+ end
273
+ REX
274
+
275
+ txt = "1 + 2 + 30"
276
+
277
+ exp = [[:number, 1],
278
+ [:op, "+"],
279
+ [:number, 2],
280
+ [:op, "+"],
281
+ [:number, 30]]
282
+
283
+ out, err = capture_io do
284
+ assert_lexer src, txt, exp
285
+ end
286
+
287
+ exp = exp.zip([nil]).flatten(1) # ugly, but much more compact
288
+ exp.pop # remove last nil
289
+ exp = exp.map(&:inspect).join("\n") + "\n"
290
+
291
+ assert_equal "", err
292
+ assert_match "[:number, 1]", out
293
+ assert_match "[:op, \"+\"]", out
294
+ end
295
+
296
+ def test_simple_scanner_inclusive
297
+ src = <<-'REX'
298
+ class Calculator
299
+ rule
300
+ /\d+/ { [:number, text.to_i] }
301
+ /\s+/
302
+ /[+-]/ { @state = :op; [:op, text] }
303
+
304
+ # nil state always goes first, so we won't get this
305
+ :op /\d+/ { @state = nil; [:bad, text.to_i] }
306
+ end
307
+ REX
308
+
309
+ txt = "1 + 2 + 30"
310
+
311
+ exp = [[:number, 1],
312
+ [:op, "+"],
313
+ [:number, 2],
314
+ [:op, "+"],
315
+ [:number, 30]]
316
+
317
+ assert_lexer src, txt, exp
318
+ end
319
+
320
+ def test_simple_scanner_exclusive
321
+ src = <<-'REX'
322
+ class Calculator
323
+ rule
324
+ /\d+/ { [:number, text.to_i] }
325
+ /\s+/
326
+ /[+-]/ { @state = :OP; [:op, text] }
327
+
328
+ :OP /\s+/
329
+ :OP /\d+/ { @state = nil; [:number2, text.to_i] }
330
+ end
331
+ REX
332
+
333
+ txt = "1 + 2 + 30"
334
+
335
+ exp = [[:number, 1],
336
+ [:op, "+"],
337
+ [:number2, 2],
338
+ [:op, "+"],
339
+ [:number2, 30]]
340
+
341
+ assert_lexer src, txt, exp
342
+ end
343
+
344
+ def test_simple_scanner_auto_action
345
+ src = <<-'REX'
346
+ class Calculator
347
+ rule
348
+ /rpn/ { [:state, :RPN] }
349
+ /\d+/ { [:number, text.to_i] }
350
+ /\s+/
351
+ /[+-]/ { [:op, text] }
352
+
353
+ :RPN /\s+/
354
+ :RPN /[+-]/ { [:op2, text] }
355
+ :RPN /\d+/ { [:number2, text.to_i] }
356
+ :RPN /alg/ { [:state, nil] }
357
+ end
358
+ REX
359
+
360
+ txt = "rpn 1 2 30 + + alg"
361
+
362
+ exp = [[:state, :RPN],
363
+ [:number2, 1],
364
+ [:number2, 2],
365
+ [:number2, 30],
366
+ [:op2, "+"],
367
+ [:op2, "+"],
368
+ [:state, nil]]
369
+
370
+ assert_lexer src, txt, exp
371
+ end
372
+
373
+ def test_simple_scanner_auto_action_symbol
374
+ src = <<-'REX'
375
+ class Calculator
376
+ rule
377
+ /rpn/ :RPN
378
+ /\d+/ { [:number, text.to_i] }
379
+ /\s+/
380
+ /[+-]/ { [:op, text] }
381
+
382
+ :RPN /\s+/
383
+ :RPN /[+-]/ { [:op2, text] }
384
+ :RPN /\d+/ { [:number2, text.to_i] }
385
+ :RPN /alg/ nil
386
+ end
387
+ REX
388
+
389
+ txt = "rpn 1 2 30 + + alg"
390
+
391
+ exp = [[:state, :RPN],
392
+ [:number2, 1],
393
+ [:number2, 2],
394
+ [:number2, 30],
395
+ [:op2, "+"],
396
+ [:op2, "+"],
397
+ [:state, nil]]
398
+
399
+ assert_lexer src, txt, exp
400
+ end
401
+
402
+ def test_simple_scanner_predicate_generator
403
+ src = <<-'REX'
404
+ class Calculator
405
+ rules
406
+
407
+ /\d+/ { [:number, text.to_i] }
408
+ /\s+/
409
+ :ARG /\d+/
410
+ poot? /[+-]/ { [:bad1, text] }
411
+ woot? /[+-]/ { [:op, text] }
412
+ end
413
+ REX
414
+
415
+ ruby = generate_lexer src
416
+
417
+ assert_match "when poot? && (text = ss.scan(/[+-]/)) then", ruby
418
+ assert_match "when woot? && (text = ss.scan(/[+-]/)) then", ruby
419
+ assert_match "when nil then", ruby
420
+ assert_match "when :ARG then", ruby
421
+ end
422
+
423
+ def test_generator_start
424
+ src = <<-'REX'
425
+ class Calculator
426
+ start
427
+ do_the_thing
428
+ rules
429
+ /\d+/ { [:number, text.to_i] }
430
+ /\s+/
431
+ end
432
+ REX
433
+
434
+ ruby = generate_lexer src
435
+
436
+ assert_match " def next_token\n do_the_thing", ruby
437
+ end
438
+
439
+ def test_simple_scanner_predicate
440
+ src = <<-'REX'
441
+ class Calculator
442
+ inner
443
+ def woot?
444
+ true
445
+ end
446
+ def poot?
447
+ false
448
+ end
449
+
450
+ rules
451
+
452
+ /\d+/ { [:number, text.to_i] }
453
+ /\s+/
454
+ poot? /[+-]/ { [:bad1, text] }
455
+ woot? /[+-]/ { [:op, text] }
456
+ end
457
+ REX
458
+
459
+ txt = "1 + 2 + 30"
460
+
461
+ exp = [[:number, 1],
462
+ [:op, "+"],
463
+ [:number, 2],
464
+ [:op, "+"],
465
+ [:number, 30]]
466
+
467
+ assert_lexer src, txt, exp
468
+ end
469
+
470
+ def test_simple_scanner_method_actions
471
+ src = <<-'REX'
472
+ class Calculator
473
+ inner
474
+ def thingy text
475
+ [:number, text.to_i]
476
+ end
477
+ rule
478
+ /\d+/ thingy
479
+ /\s+/
480
+ /[+-]/ { [:op, text] }
481
+ end
482
+ REX
483
+
484
+ txt = "1 + 2 + 30"
485
+
486
+ exp = [[:number, 1],
487
+ [:op, "+"],
488
+ [:number, 2],
489
+ [:op, "+"],
490
+ [:number, 30]]
491
+
492
+ assert_lexer src, txt, exp
493
+ end
494
+
495
+ def test_header_is_written_after_module
496
+ src = <<-'REX'
497
+ module X
498
+ module Y
499
+ class Calculator
500
+ rule
501
+ /\d+/ { [:number, text.to_i] }
502
+ /\s+/
503
+ /[+-]/ { [:op, text] }
504
+ end
505
+ end
506
+ end
507
+ REX
508
+
509
+ ruby = generate_lexer src
510
+
511
+ exp = ["#--\n",
512
+ "# This file is automatically generated. Do not modify it.\n",
513
+ "# Generated by: oedipus_lex version #{OedipusLex::VERSION}.\n",
514
+ "#++\n",
515
+ "\n",
516
+ "module X\n",
517
+ "module Y\n"]
518
+
519
+ assert_equal exp, ruby.lines.first(7)
520
+ end
521
+
522
+ def test_read_non_existent_file
523
+ rex = OedipusLex.new
524
+
525
+ assert_raises Errno::ENOENT do
526
+ rex.parse_file 'non_existent_file'
527
+ end
528
+ end
529
+
530
+ def test_scanner_nests_classes
531
+ src = <<-'REX'
532
+ module Foo
533
+ class Baz::Calculator < Bar
534
+ rule
535
+ /\d+/ { [:number, text.to_i] }
536
+ /\s+/ { [:S, text] }
537
+ end
538
+ end
539
+ REX
540
+
541
+ ruby = generate_lexer src
542
+
543
+ assert_match 'Baz::Calculator < Bar', ruby
544
+ end
545
+
546
+ def test_scanner_inherits
547
+ source = generate_lexer <<-'REX'
548
+ class Calculator < Bar
549
+ rule
550
+ /\d+/ { [:number, text.to_i] }
551
+ /\s+/ { [:S, text] }
552
+ end
553
+ REX
554
+
555
+ assert_match 'Calculator < Bar', source
556
+ end
557
+
558
+ def test_scanner_inherits_many_levels
559
+ source = generate_lexer <<-'REX'
560
+ class Calculator < Foo::Bar
561
+ rule
562
+ /\d+/ { [:number, text.to_i] }
563
+ /\s+/ { [:S, text] }
564
+ end
565
+ REX
566
+
567
+ assert_match 'Calculator < Foo::Bar', source
568
+ end
569
+
570
+ def test_parses_macros_with_escapes
571
+ source = generate_lexer %q{
572
+ class Foo
573
+ macro
574
+ W /[\ \t]+/
575
+ rule
576
+ /#{W}/ { [:SPACE, text] }
577
+ end
578
+ }
579
+
580
+ assert_match 'ss.scan(/#{W}/)', source
581
+ end
582
+
583
+ def test_parses_regexp_with_interpolation_o
584
+ source = generate_lexer %q{
585
+ class Foo
586
+ rule
587
+ /#{W}/o { [:SPACE, text] }
588
+ end
589
+ }
590
+
591
+ assert_match 'ss.scan(/#{W}/o)', source
592
+ end
593
+
594
+ def test_parses_regexp_with_interpolation_o_macro
595
+ source = generate_lexer %q{
596
+ class Foo
597
+ macro
598
+ W /[\ \t]+/
599
+ rule
600
+ /#{X}/ { [:SPACE, text] }
601
+ /#{W}/o { [:X, text] }
602
+ end
603
+ }
604
+
605
+ assert_match 'W = /[\ \t]+/', source
606
+ assert_match 'ss.scan(/#{W}/o)', source
607
+ assert_match 'ss.scan(/#{X}/)', source
608
+ end
609
+
610
+ def test_changing_state_during_lexing
611
+ src = <<-'REX'
612
+ class Calculator
613
+ rule
614
+ /a/ { self.state = :B ; [:A, text] }
615
+ :B /b/ { self.state = nil ; [:B, text] }
616
+ end
617
+ REX
618
+
619
+ txt = "aba"
620
+ exp = [[:A, 'a'], [:B, 'b'], [:A, 'a']]
621
+
622
+ assert_lexer src, txt, exp
623
+
624
+ txt = "aa"
625
+ exp = [[:A, 'a'], [:B, 'b'], [:A, 'a'], [:B, 'b'], [:A, 'a']]
626
+
627
+ assert_lexer_error src, txt, "can not match (:B): 'a'"
628
+ end
629
+ end