tdparser 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/tdparser.rb ADDED
@@ -0,0 +1,916 @@
1
+ # frozen_string_literal: true
2
+
3
+ # -*- ruby -*-
4
+ #
5
+ # Top-down parser for embedded in a ruby script.
6
+ #
7
+
8
+ module TDParser
9
+ class ParserException < RuntimeError
10
+ end
11
+
12
+ class TokenGenerator
13
+ def initialize(args = nil, &block)
14
+ enumerator = Enumerator.new do |y|
15
+ if args
16
+ args.each { |arg| y << arg }
17
+ else
18
+ block.call(y)
19
+ end
20
+ end
21
+ @enumerator = enumerator
22
+
23
+ @buffer = []
24
+ end
25
+
26
+ def next
27
+ @enumerator.next
28
+ end
29
+
30
+ def next?
31
+ @enumerator.peek
32
+ true
33
+ rescue StopIteration
34
+ false
35
+ end
36
+
37
+ def to_a
38
+ @enumerator.to_a
39
+ end
40
+
41
+ def shift
42
+ if @buffer.empty?
43
+ (self.next if next?)
44
+ else
45
+ @buffer.shift
46
+ end
47
+ end
48
+
49
+ def unshift(*token)
50
+ @buffer.unshift(*token)
51
+ end
52
+ end
53
+
54
+ class TokenBuffer < Array
55
+ attr_accessor :map
56
+
57
+ def initialize(*args)
58
+ super(*args)
59
+ @map = {}
60
+ end
61
+
62
+ def [](idx)
63
+ case idx
64
+ when Symbol, String
65
+ @map[idx]
66
+ else
67
+ super(idx)
68
+ end
69
+ end
70
+
71
+ def []=(idx, val)
72
+ case idx
73
+ when Symbol, String
74
+ @map[idx] = val
75
+ else
76
+ super(idx, val)
77
+ end
78
+ end
79
+
80
+ def state
81
+ @map[:__state__]
82
+ end
83
+
84
+ def state=(s)
85
+ @map[:__state__] = s
86
+ end
87
+
88
+ def clear
89
+ super()
90
+ @map.clear
91
+ end
92
+ end
93
+
94
+ class Sequence < Array
95
+ def +(other)
96
+ dup.concat(other)
97
+ end
98
+ end
99
+
100
+ module BufferUtils
101
+ def prepare(buff)
102
+ b = TokenBuffer.new
103
+ b.map = buff.map
104
+ b
105
+ end
106
+
107
+ def recover(buff, ts)
108
+ buff.each { |b| ts.unshift(b) }
109
+ end
110
+ end
111
+ include BufferUtils
112
+
113
+ class Parser
114
+ include BufferUtils
115
+ include TDParser
116
+
117
+ def to_proc
118
+ proc { |*x| call(*x) }
119
+ end
120
+
121
+ def to_s
122
+ '??'
123
+ end
124
+
125
+ def call(*args); end
126
+
127
+ # def [](*args)
128
+ # call(*args)
129
+ # end
130
+
131
+ def optimize(_default = false)
132
+ dup
133
+ end
134
+
135
+ def ==(_other)
136
+ false
137
+ end
138
+
139
+ def same?(r)
140
+ self == r
141
+ end
142
+
143
+ def -(other)
144
+ ConcatParser.new(self, other)
145
+ end
146
+
147
+ def +(other)
148
+ ParallelParser.new(self, other)
149
+ end
150
+
151
+ def |(other)
152
+ ChoiceParser.new(self, other).optimize(true)
153
+ end
154
+
155
+ def *(other)
156
+ if other.is_a?(Range)
157
+ n = other.min
158
+ else
159
+ n = other
160
+ other = nil
161
+ end
162
+ IterationParser.new(self, n, other)
163
+ end
164
+
165
+ def >>(other)
166
+ ActionParser.new(self, other)
167
+ end
168
+
169
+ def /(other)
170
+ LabelParser.new(self, other)
171
+ end
172
+
173
+ def %(other)
174
+ StackParser.new(self, other)
175
+ end
176
+
177
+ def >(other)
178
+ Parser.new do |tokens, buff|
179
+ buff[other] = buff.dup
180
+ self[tokens, buff]
181
+ end
182
+ end
183
+
184
+ def ~@
185
+ NegativeParser.new(self)
186
+ end
187
+
188
+ def parse(tokens = nil, buff = nil, &blk)
189
+ buff ||= TokenBuffer.new
190
+ @tokens = if blk.nil?
191
+ if tokens.respond_to?(:shift) && tokens.respond_to?(:unshift)
192
+ tokens
193
+ elsif tokens.respond_to?(:each)
194
+ TokenGenerator.new(tokens)
195
+ else
196
+ tokens
197
+ end
198
+ else
199
+ TokenGenerator.new(&blk)
200
+ end
201
+ r = call(@tokens, buff)
202
+ if r.nil?
203
+ nil
204
+ else
205
+ r[0]
206
+ end
207
+ end
208
+
209
+ def peek
210
+ t = @tokens.shift
211
+ @tokens.unshift(t) unless t.nil?
212
+ t
213
+ end
214
+
215
+ def do(&block)
216
+ self >> block
217
+ end
218
+ end
219
+ # end of Parser
220
+
221
+ class NonTerminalParser < Parser
222
+ attr_reader :context, :symbol, :options
223
+
224
+ def initialize(context, sym, *options)
225
+ @context = context
226
+ @symbol = sym
227
+ @options = options
228
+ end
229
+
230
+ def call(tokens, buff)
231
+ res = nil
232
+ case @symbol
233
+ when Symbol, String
234
+ res = @context.__send__(@symbol, *@options).call(tokens, buff)
235
+ when Parser
236
+ res = @symbol.call(tokens, buff)
237
+ end
238
+ res
239
+ end
240
+
241
+ def ==(other)
242
+ (self.class == other.class) &&
243
+ (@context == other.context) &&
244
+ (@symbol == other.symbol) &&
245
+ (@options == other.options)
246
+ end
247
+
248
+ def to_s
249
+ @symbol.to_s
250
+ end
251
+ end
252
+
253
+ class TerminalParser < Parser
254
+ attr_reader :symbol, :equality
255
+
256
+ def initialize(obj, eqsym)
257
+ @symbol = obj
258
+ @equality = eqsym
259
+ end
260
+
261
+ def call(tokens, buff)
262
+ t = tokens.shift
263
+ buff.unshift(t)
264
+ return unless @symbol.__send__(@equality, t) || t.__send__(@equality, @symbol)
265
+
266
+ Sequence[t]
267
+ end
268
+
269
+ def ==(other)
270
+ (self.class == other.class) &&
271
+ (@symbol == other.symbol) &&
272
+ (@equality == other.equality)
273
+ end
274
+
275
+ def to_s
276
+ @symbol.to_s
277
+ end
278
+ end
279
+
280
+ class CompositeParser < Parser
281
+ attr_accessor :parsers
282
+
283
+ def initialize(*parsers)
284
+ @parsers = parsers
285
+ end
286
+
287
+ def optimize(default = false)
288
+ parser = dup
289
+ parser.parsers = @parsers.collect { |x| x.optimize(default) }
290
+ parser
291
+ end
292
+
293
+ def ==(other)
294
+ (self.class == other.class) &&
295
+ (@parsers == other.parsers)
296
+ end
297
+
298
+ def same?(r)
299
+ super(r) &&
300
+ @parsers.zip(r.parsers).all? { |x, y| x.same?(y) }
301
+ end
302
+
303
+ def to_s
304
+ "<composite: #{@parsers.collect(&:to_s)}>"
305
+ end
306
+ end
307
+
308
+ class ActionParser < CompositeParser
309
+ attr_reader :action
310
+
311
+ def initialize(parser, act)
312
+ @action = act
313
+ super(parser)
314
+ end
315
+
316
+ def call(tokens, buff)
317
+ if (x = @parsers[0].call(tokens, buff)).nil?
318
+ nil
319
+ else
320
+ x = TokenBuffer[*x]
321
+ x.map = buff.map
322
+ Sequence[@action[x]]
323
+ end
324
+ end
325
+
326
+ def ==(other)
327
+ super(other) &&
328
+ (@action == other.action)
329
+ end
330
+
331
+ def to_s
332
+ "(#{@parsers[0]} <action>)"
333
+ end
334
+ end
335
+
336
+ class LabelParser < CompositeParser
337
+ attr_reader :label
338
+
339
+ def initialize(parser, label)
340
+ @label = label
341
+ super(parser)
342
+ end
343
+
344
+ def call(tokens, buff)
345
+ x = @parsers[0].call(tokens, buff)
346
+ buff.map[@label] = x
347
+ x
348
+ end
349
+
350
+ def ==(other)
351
+ super(other) &&
352
+ (@label == other.label)
353
+ end
354
+
355
+ def to_s
356
+ "(#{@parsers[0]}/#{@label})"
357
+ end
358
+ end
359
+
360
+ class StackParser < CompositeParser
361
+ attr_reader :stack
362
+
363
+ def initialize(parser, stack)
364
+ @stack = stack
365
+ super(parser)
366
+ end
367
+
368
+ def call(tokens, buff)
369
+ x = @parsers[0].call(tokens, buff)
370
+ @stack.push(x)
371
+ x
372
+ end
373
+
374
+ def ==(other)
375
+ super(other) &&
376
+ (@stack == other.stack)
377
+ end
378
+
379
+ def same?(_r)
380
+ false
381
+ end
382
+
383
+ def to_s
384
+ "<stack:#{@stack.object_id}>"
385
+ end
386
+ end
387
+
388
+ class ConcatParser < CompositeParser
389
+ def call(tokens, buff)
390
+ if (x = @parsers[0].call(tokens, buff)).nil?
391
+ nil
392
+ elsif (y = @parsers[1].call(tokens, buff)).nil?
393
+ nil
394
+ else
395
+ x + y
396
+ end
397
+ end
398
+
399
+ def -(other)
400
+ @parsers[0] - (@parsers[1] - other)
401
+ end
402
+
403
+ def to_s
404
+ "(#{@parsers[0]} #{@parsers[1]})"
405
+ end
406
+ end
407
+
408
+ class ChoiceParser < CompositeParser
409
+ def call(tokens, buff)
410
+ b = prepare(buff)
411
+ if (x = @parsers[0].call(tokens, b)).nil?
412
+ recover(b, tokens)
413
+ @parsers[1].call(tokens, buff)
414
+ else
415
+ buff.insert(0, *b)
416
+ x
417
+ end
418
+ end
419
+
420
+ def to_s
421
+ "(#{@parsers[0]} | #{@parsers[1]})"
422
+ end
423
+
424
+ def shared_sequence(r1, r2)
425
+ if r1.is_a?(ConcatParser) && r2.is_a?(ConcatParser)
426
+ r11 = r1.parsers[0]
427
+ r12 = r1.parsers[1]
428
+ r21 = r2.parsers[0]
429
+ r22 = r2.parsers[1]
430
+ if r11.same?(r21)
431
+ share, r12, r22, = shared_sequence(r12, r22)
432
+ return [r11 - share, r12, r22] if share
433
+
434
+ return [r11, r12, r22]
435
+
436
+ end
437
+ end
438
+ [nil, r1, r2]
439
+ end
440
+
441
+ def optimize(default = false)
442
+ r1 = @parsers[0]
443
+ r2 = @parsers[1]
444
+ if r1.is_a?(ActionParser)
445
+ act1 = r1.action
446
+ r1 = r1.parsers[0]
447
+ end
448
+ if r2.is_a?(ActionParser)
449
+ act2 = r2.action
450
+ r2 = r2.parsers[0]
451
+ end
452
+ share, r12, r22, = shared_sequence(r1, r2)
453
+ if share
454
+ r = share - (r12 + r22)
455
+ if act1
456
+ r = if act2
457
+ r >> proc do |x|
458
+ y0, y1, *ys = x.pop
459
+ if y0
460
+ act1.call(x.push(*y0))
461
+ else
462
+ act2.call(x.push(*y1))
463
+ end
464
+ end
465
+ else
466
+ r >> proc do |x|
467
+ y0, y1, *ys = x.pop
468
+ act1.call(x.push(*y0)) if y0
469
+ end
470
+ end
471
+ elsif act2
472
+ r = r >> proc do |x|
473
+ y0, y1, *ys = x.pop
474
+ act2.call(x.push(*y1)) if y1
475
+ end
476
+ end
477
+ return r
478
+ end
479
+ if default
480
+ dup
481
+ else
482
+ super(default)
483
+ end
484
+ end
485
+ end
486
+
487
+ class ParallelParser < CompositeParser
488
+ def call(tokens, buff)
489
+ b = prepare(buff)
490
+ if (x = @parsers[0].call(tokens, b)).nil?
491
+ recover(b, tokens)
492
+ Sequence[Sequence[nil, @parsers[1].call(tokens, buff)]]
493
+ else
494
+ buff.insert(0, *b)
495
+ Sequence[Sequence[x, nil]]
496
+ end
497
+ end
498
+
499
+ def to_s
500
+ "(#{@parsers[0]} + #{@parsers[1]})"
501
+ end
502
+ end
503
+
504
+ class IterationParser < CompositeParser
505
+ attr_reader :min, :range
506
+
507
+ def initialize(parser, n, range)
508
+ @min = n
509
+ @range = range
510
+ super(parser)
511
+ end
512
+
513
+ def call(ts, buff)
514
+ r = @parsers[0]
515
+ n = @min
516
+ x = true
517
+ xs = []
518
+ while n.positive?
519
+ n -= 1
520
+ b = prepare(buff)
521
+ if (x = r.call(ts, b)).nil?
522
+ recover(b, ts)
523
+ break
524
+ else
525
+ buff.insert(0, *b)
526
+ xs.push(x)
527
+ end
528
+ end
529
+ if x.nil?
530
+ nil
531
+ else
532
+ if range
533
+ range.each do
534
+ loop do
535
+ y = x
536
+ b = prepare(buff)
537
+ if (x = r.call(ts, b)).nil?
538
+ recover(b, ts)
539
+ x = y
540
+ break
541
+ else
542
+ buff.insert(0, *b)
543
+ xs.push(x)
544
+ end
545
+ end
546
+ end
547
+ else
548
+ loop do
549
+ y = x
550
+ b = prepare(buff)
551
+ if (x = r.call(ts, b)).nil?
552
+ recover(b, ts)
553
+ x = y
554
+ break
555
+ else
556
+ buff.insert(0, *b)
557
+ xs.push(x)
558
+ end
559
+ end
560
+ end
561
+ Sequence[xs]
562
+ end
563
+ end
564
+
565
+ def to_s
566
+ "(#{@parsers[0]})*#{@range ? @range.to_s : @min.to_s}"
567
+ end
568
+
569
+ def ==(other)
570
+ super(other) &&
571
+ (@min == other.min) &&
572
+ (@range == other.range)
573
+ end
574
+ end
575
+
576
+ class NegativeParser < CompositeParser
577
+ def call(tokens, buff)
578
+ b = prepare(buff)
579
+ r = @parsers[0].call(tokens, b)
580
+ rev = b.reverse
581
+ recover(b, tokens)
582
+ return unless r.nil?
583
+
584
+ Sequence[Sequence[*rev]]
585
+ end
586
+
587
+ def to_s
588
+ "~#{@parsers[0]}"
589
+ end
590
+ end
591
+
592
+ class FailParser < Parser
593
+ def call(_tokens, _buff)
594
+ nil
595
+ end
596
+
597
+ def to_s
598
+ '<fail>'
599
+ end
600
+
601
+ def ==
602
+ (self.class == r.class)
603
+ end
604
+ end
605
+
606
+ class EmptyParser < Parser
607
+ def call(_tokens, _buff)
608
+ Sequence[nil]
609
+ end
610
+
611
+ def to_s
612
+ '<empty>'
613
+ end
614
+
615
+ def ==(_other)
616
+ true
617
+ end
618
+ end
619
+
620
+ class AnyParser < Parser
621
+ def call(tokens, _buff)
622
+ t = tokens.shift
623
+ if t.nil?
624
+ nil
625
+ else
626
+ Sequence[t]
627
+ end
628
+ end
629
+
630
+ def to_s
631
+ '<any>'
632
+ end
633
+
634
+ def ==(_other)
635
+ true
636
+ end
637
+ end
638
+
639
+ class NoneParser < Parser
640
+ def call(tokens, _buff)
641
+ t = tokens.shift
642
+ return unless t.nil?
643
+
644
+ Sequence[nil]
645
+ end
646
+
647
+ def to_s
648
+ '<none>'
649
+ end
650
+
651
+ def ==(_other)
652
+ true
653
+ end
654
+ end
655
+
656
+ class ReferenceParser < Parser
657
+ def __backref__(xs, eqsym)
658
+ x = xs.shift
659
+ xs.inject(token(x, eqsym)) do |acc, x|
660
+ case x
661
+ when Sequence
662
+ acc - __backref__(x, eqsym)
663
+ else
664
+ acc - token(x, eqsym)
665
+ end
666
+ end
667
+ end
668
+
669
+ def same?(_r)
670
+ false
671
+ end
672
+ end
673
+
674
+ class BackrefParser < ReferenceParser
675
+ attr_reader :label, :equality
676
+
677
+ def initialize(label, eqsym)
678
+ @label = label
679
+ @equality = eqsym
680
+ end
681
+
682
+ def call(tokens, buff)
683
+ ys = buff.map[@label]
684
+ if ys.nil? || ys.empty?
685
+ nil
686
+ else
687
+ __backref__(ys.dup, @equality).call(tokens, buff)
688
+ end
689
+ end
690
+
691
+ def to_s
692
+ "<backref:#{@label}>"
693
+ end
694
+
695
+ def ==(other)
696
+ super(other) &&
697
+ (@label == other.label) &&
698
+ (@equality == other.equality)
699
+ end
700
+ end
701
+
702
+ class StackrefParser < ReferenceParser
703
+ attr_reader :stack, :equality
704
+
705
+ def initialize(stack, eqsym)
706
+ @stack = stack
707
+ @equality = eqsym
708
+ end
709
+
710
+ def call(tokens, buff)
711
+ ys = @stack.pop
712
+ if ys.nil? || ys.empty?
713
+ nil
714
+ else
715
+ __backref__(ys.dup, @equality).call(tokens, buff)
716
+ end
717
+ end
718
+
719
+ def to_s
720
+ "<stackref:#{@stack.object_id}>"
721
+ end
722
+
723
+ def ==(other)
724
+ super(other) &&
725
+ @stack.equal?(other.stack) &&
726
+ (@equality == other.equality)
727
+ end
728
+ end
729
+
730
+ class ConditionParser < Parser
731
+ attr_reader :condition
732
+
733
+ def initialize(&condition)
734
+ @condition = condition
735
+ end
736
+
737
+ def call(_tokens, buff)
738
+ return unless (res = @condition.call(buff.map))
739
+
740
+ Sequence[res]
741
+ end
742
+
743
+ def to_s
744
+ "<condition:#{@condition}>"
745
+ end
746
+
747
+ def ==(other)
748
+ super(other) &&
749
+ (@condition == other.condition)
750
+ end
751
+
752
+ def same?(_r)
753
+ false
754
+ end
755
+ end
756
+
757
+ class StateParser < Parser
758
+ attr_reader :state
759
+
760
+ def initialize(s)
761
+ @state = s
762
+ end
763
+
764
+ def call(_tokens, buff)
765
+ return unless buff.map[:state] == @state
766
+
767
+ Sequence[@state]
768
+ end
769
+
770
+ def to_s
771
+ "<state:#{@state}>"
772
+ end
773
+
774
+ def ==(other)
775
+ super(other) &&
776
+ (@state == other.state)
777
+ end
778
+
779
+ def same?(_r)
780
+ false
781
+ end
782
+ end
783
+
784
+ def rule(sym, *opts)
785
+ NonTerminalParser.new(self, sym, *opts)
786
+ end
787
+
788
+ def token(x, eqsym = :===)
789
+ TerminalParser.new(x, eqsym)
790
+ end
791
+
792
+ def backref(x, eqsym = :===)
793
+ BackrefParser.new(x, eqsym)
794
+ end
795
+
796
+ def stackref(stack, eqsym = :===)
797
+ StackrefParser.new(stack, eqsym)
798
+ end
799
+
800
+ def state(s)
801
+ StateParser.new(s)
802
+ end
803
+
804
+ def empty_rule(&)
805
+ EmptyParser.new(&)
806
+ end
807
+ alias empty empty_rule
808
+
809
+ def any_rule
810
+ AnyParser.new
811
+ end
812
+ alias any any_rule
813
+
814
+ def none_rule
815
+ NoneParser.new
816
+ end
817
+ alias none none_rule
818
+
819
+ def fail_rule
820
+ FailParser.new
821
+ end
822
+ alias fail fail_rule
823
+
824
+ def condition_rule(&)
825
+ ConditionParser.new(&)
826
+ end
827
+ alias condition condition_rule
828
+
829
+ def leftrec(*rules, &act)
830
+ f = proc do |x|
831
+ x[1].inject(x[0]) do |acc, y|
832
+ act.call(Sequence[acc, *y])
833
+ end
834
+ end
835
+ base = rules.shift
836
+ rules.collect { |r| (base - (r * 0)) >> f }.inject(fail) { |acc, r| r | acc }
837
+ end
838
+
839
+ def rightrec(*rules, &act)
840
+ f = proc do |x|
841
+ x[0].reverse.inject(x[1]) do |acc, y|
842
+ ys = y.dup
843
+ ys.push(acc)
844
+ act.call(Sequence[*ys])
845
+ end
846
+ end
847
+ base = rules.pop
848
+ rules.collect { |r| ((r * 0) - base) >> f }.inject(fail) { |acc, r| r | acc }
849
+ end
850
+
851
+ def chainl(base, *infixes, &)
852
+ infixes.inject(base) do |acc, r|
853
+ leftrec(acc, r - acc, &)
854
+ end
855
+ end
856
+
857
+ def chainr(base, *infixes, &)
858
+ infixes.inject(base) do |acc, r|
859
+ rightrec(acc - r, acc, &)
860
+ end
861
+ end
862
+
863
+ class Grammar
864
+ include TDParser
865
+
866
+ def define(&block)
867
+ instance_eval do
868
+ alias method_missing g_method_missing
869
+ block.call(self)
870
+ ensure
871
+ undef method_missing
872
+ end
873
+ end
874
+
875
+ def g_method_missing(sym, *args)
876
+ arg0 = args[0]
877
+ sym = sym.to_s
878
+ if sym[-1, 1] == '='
879
+ case arg0
880
+ when Parser
881
+ self.class.instance_eval do
882
+ define_method(sym[0..-2]) { arg0 }
883
+ end
884
+ else
885
+ t = token(arg0)
886
+ self.class.instance_eval do
887
+ define_method(sym[0..-2]) { t }
888
+ end
889
+ end
890
+ elsif args.empty?
891
+ rule(sym)
892
+ else
893
+ raise(NoMethodError, "undefined method `#{sym}' for #{inspect}")
894
+ end
895
+ end
896
+
897
+ alias method_missing g_method_missing
898
+ end
899
+
900
+ def self.define(*_args, &)
901
+ klass = Class.new(Grammar)
902
+ g = klass.new
903
+ begin
904
+ if defined?(g.instance_exec)
905
+ g.instance_exec(g, &)
906
+ else
907
+ g.instance_eval(&)
908
+ end
909
+ ensure
910
+ g.instance_eval do
911
+ undef method_missing
912
+ end
913
+ end
914
+ g
915
+ end
916
+ end