kpeg 0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,807 @@
1
+ require 'strscan'
2
+ require 'kpeg/parser'
3
+ require 'kpeg/match'
4
+
5
+ module KPeg
6
+ class Rule
7
+ def initialize(name, op, args=nil)
8
+ @name = name
9
+ @op = op
10
+ @arguments = args
11
+ end
12
+
13
+ attr_reader :name, :op, :arguments
14
+ end
15
+
16
+ class Operator
17
+ def initialize
18
+ @action = nil
19
+ @has_tags = false
20
+ end
21
+
22
+ attr_accessor :action
23
+
24
+ def set_action(act)
25
+ @action = act
26
+ end
27
+
28
+ def detect_tags(ops)
29
+ tags = []
30
+ ops.each_with_index do |r,idx|
31
+ if r.kind_of?(Tag)
32
+ @has_tags = true
33
+ tags << idx
34
+ end
35
+ end
36
+
37
+ @tags = tags if @has_tags
38
+ end
39
+
40
+ def prune_values(values)
41
+ return values unless @has_tags
42
+ return values.values_at(*@tags)
43
+ end
44
+
45
+ def inspect_type(tag, body)
46
+ "#<#{tag} #{body}>"
47
+ end
48
+
49
+ def |(other)
50
+ Choice.new(self, Grammar.resolve(other))
51
+ end
52
+ end
53
+
54
+ class Dot < Operator
55
+ def match(x)
56
+ if str = x.get_byte
57
+ MatchString.new(self, str)
58
+ else
59
+ x.fail(self)
60
+ end
61
+ end
62
+
63
+ def ==(obj)
64
+ Dot === obj ? true : false
65
+ end
66
+
67
+ def inspect
68
+ "#<dot>"
69
+ end
70
+ end
71
+
72
+ class LiteralString < Operator
73
+ def initialize(str)
74
+ super()
75
+ @string = str
76
+ @reg = Regexp.new Regexp.quote(str)
77
+ end
78
+
79
+ attr_reader :string
80
+
81
+ def match(x)
82
+ if str = x.scan(@reg)
83
+ MatchString.new(self, str)
84
+ else
85
+ x.fail(self)
86
+ end
87
+ end
88
+
89
+ def ==(obj)
90
+ case obj
91
+ when LiteralString
92
+ @string == obj.string
93
+ else
94
+ super
95
+ end
96
+ end
97
+
98
+ def inspect
99
+ inspect_type 'str', @string.inspect
100
+ end
101
+ end
102
+
103
+ class LiteralRegexp < Operator
104
+ def initialize(reg, opts=nil)
105
+ super()
106
+
107
+ if reg.kind_of? String
108
+ flags = 0
109
+ lang = nil
110
+
111
+ if opts
112
+ opts.split("").each do |o|
113
+ case o
114
+ when "n", "N", "e", "E", "s", "S", "u", "U"
115
+ lang = o
116
+ when "m"
117
+ flags |= Regexp::MULTILINE
118
+ when "x"
119
+ flags |= Regexp::EXTENDED
120
+ when "i"
121
+ flags |= Regexp::IGNORECASE
122
+ end
123
+ end
124
+ end
125
+
126
+ @regexp = Regexp.new(reg, flags, lang)
127
+ else
128
+ @regexp = reg
129
+ end
130
+ end
131
+
132
+ attr_reader :regexp
133
+
134
+ def string
135
+ @regexp.source
136
+ end
137
+
138
+ def match(x)
139
+ if str = x.scan(@regexp)
140
+ MatchString.new(self, str)
141
+ else
142
+ x.fail(self)
143
+ end
144
+ end
145
+
146
+ def ==(obj)
147
+ case obj
148
+ when LiteralRegexp
149
+ @regexp == obj.regexp
150
+ else
151
+ super
152
+ end
153
+ end
154
+
155
+ def inspect
156
+ inspect_type 'reg', @regexp.inspect
157
+ end
158
+ end
159
+
160
+ class CharRange < Operator
161
+ def initialize(start, fin)
162
+ super()
163
+ @start = start
164
+ @fin = fin
165
+ @regexp = Regexp.new "[#{Regexp.quote start}-#{Regexp.quote fin}]"
166
+ end
167
+
168
+ attr_reader :start, :fin
169
+
170
+ def string
171
+ @regexp.source
172
+ end
173
+
174
+ def match(x)
175
+ if str = x.scan(@regexp)
176
+ MatchString.new(self, str)
177
+ else
178
+ x.fail(self)
179
+ end
180
+ end
181
+
182
+ def ==(obj)
183
+ case obj
184
+ when CharRange
185
+ @start == obj.start and @fin == obj.fin
186
+ else
187
+ super
188
+ end
189
+ end
190
+
191
+ def inspect
192
+ inspect_type 'range', "#{@start}-#{@fin}"
193
+ end
194
+ end
195
+
196
+ class Choice < Operator
197
+ def initialize(*many)
198
+ super()
199
+ @ops = many
200
+ end
201
+
202
+ attr_reader :ops
203
+
204
+ def |(other)
205
+ @ops << Grammar.resolve(other)
206
+ self
207
+ end
208
+
209
+ def match(x)
210
+ pos = x.pos
211
+
212
+ @ops.each do |c|
213
+ if m = c.match(x)
214
+ return m
215
+ end
216
+
217
+ x.pos = pos
218
+ end
219
+
220
+ return nil
221
+ end
222
+
223
+ def ==(obj)
224
+ case obj
225
+ when Choice
226
+ @ops == obj.ops
227
+ else
228
+ super
229
+ end
230
+ end
231
+
232
+ def inspect
233
+ inspect_type "any", @ops.map { |i| i.inspect }.join(' | ')
234
+ end
235
+ end
236
+
237
+ class Multiple < Operator
238
+ def initialize(op, min, max)
239
+ super()
240
+ @op = op
241
+ @min = min
242
+ @max = max
243
+ @save_values = nil
244
+ end
245
+
246
+ attr_reader :op, :min, :max, :save_values
247
+
248
+ def save_values!
249
+ @save_values = true
250
+ end
251
+
252
+ def match(x)
253
+ n = 0
254
+ matches = []
255
+
256
+ start = x.pos
257
+
258
+ while true
259
+ if m = @op.match(x)
260
+ matches << m
261
+ else
262
+ break
263
+ end
264
+
265
+ n += 1
266
+
267
+ if @max and n > @max
268
+ x.pos = start
269
+ return nil
270
+ end
271
+ end
272
+
273
+ if n >= @min
274
+ return MatchComposition.new(self, matches)
275
+ end
276
+
277
+ x.pos = start
278
+ return nil
279
+ end
280
+
281
+ def ==(obj)
282
+ case obj
283
+ when Multiple
284
+ @op == obj.op and @min == obj.min and @max == obj.max
285
+ else
286
+ super
287
+ end
288
+ end
289
+
290
+ def inspect
291
+ inspect_type "multi", "#{@min} #{@max ? @max : "*"} #{@op.inspect}"
292
+ end
293
+ end
294
+
295
+ class Sequence < Operator
296
+ def initialize(*ops)
297
+ super()
298
+ @ops = ops
299
+ detect_tags ops
300
+ end
301
+
302
+ attr_reader :ops
303
+
304
+ def match(x)
305
+ start = x.pos
306
+ matches = @ops.map do |n|
307
+ m = n.match(x)
308
+ unless m
309
+ x.pos = start
310
+ return nil
311
+ end
312
+ m
313
+ end
314
+ MatchComposition.new(self, matches)
315
+ end
316
+
317
+ def ==(obj)
318
+ case obj
319
+ when Sequence
320
+ @ops == obj.ops
321
+ else
322
+ super
323
+ end
324
+ end
325
+
326
+ def inspect
327
+ inspect_type "seq", @ops.map { |i| i.inspect }.join(' ')
328
+ end
329
+ end
330
+
331
+ class AndPredicate < Operator
332
+ def initialize(op)
333
+ super()
334
+ @op = op
335
+ end
336
+
337
+ attr_reader :op
338
+
339
+ def match(x)
340
+ pos = x.pos
341
+ m = @op.match(x)
342
+ x.pos = pos
343
+
344
+ return m ? MatchString.new(self, "") : nil
345
+ end
346
+
347
+ def ==(obj)
348
+ case obj
349
+ when AndPredicate
350
+ @op == obj.op
351
+ else
352
+ super
353
+ end
354
+ end
355
+
356
+ def inspect
357
+ inspect_type "andp", @op.inspect
358
+ end
359
+ end
360
+
361
+ class NotPredicate < Operator
362
+ def initialize(op)
363
+ super()
364
+ @op = op
365
+ end
366
+
367
+ attr_reader :op
368
+
369
+ def match(x)
370
+ pos = x.pos
371
+ m = @op.match(x)
372
+ x.pos = pos
373
+
374
+ return m ? nil : MatchString.new(self, "")
375
+ end
376
+
377
+ def ==(obj)
378
+ case obj
379
+ when NotPredicate
380
+ @op == obj.op
381
+ else
382
+ super
383
+ end
384
+ end
385
+
386
+ def inspect
387
+ inspect_type "notp", @op.inspect
388
+ end
389
+ end
390
+
391
+ class RuleReference < Operator
392
+ def initialize(name, grammar=nil)
393
+ super()
394
+ @rule_name = name
395
+ @grammar = grammar
396
+ end
397
+
398
+ attr_reader :rule_name
399
+
400
+ def match(x)
401
+ if @grammar and @grammar != x.grammar
402
+ x.switch_grammar(@grammar) do
403
+ rule = @grammar.find(@rule_name)
404
+ raise "Unknown rule: '#{@rule_name}'" unless rule
405
+ x.apply rule
406
+ end
407
+ else
408
+ rule = x.grammar.find(@rule_name)
409
+ raise "Unknown rule: '#{@rule_name}'" unless rule
410
+ x.apply rule
411
+ end
412
+ end
413
+
414
+ def ==(obj)
415
+ case obj
416
+ when RuleReference
417
+ @rule_name == obj.rule_name
418
+ else
419
+ super
420
+ end
421
+ end
422
+
423
+ def inspect
424
+ inspect_type "ref", @rule_name
425
+ end
426
+ end
427
+
428
+ class InvokeRule < Operator
429
+ def initialize(name, args=nil)
430
+ super()
431
+ @rule_name = name
432
+ @arguments = args
433
+ end
434
+
435
+ attr_reader :rule_name, :arguments
436
+
437
+ def match(x)
438
+ rule = x.grammar.find(@rule_name)
439
+ raise "Unknown rule: '#{@rule_name}'" unless rule
440
+ x.invoke rule
441
+ end
442
+
443
+ def ==(obj)
444
+ case obj
445
+ when InvokeRule
446
+ @rule_name == obj.rule_name and @arguments == obj.arguments
447
+ else
448
+ super
449
+ end
450
+ end
451
+
452
+ def inspect
453
+ if @arguments
454
+ body = "#{@rule_name} #{@arguments}"
455
+ else
456
+ body = @rule_name
457
+ end
458
+ inspect_type "invoke", body
459
+ end
460
+ end
461
+
462
+ class ForeignInvokeRule < Operator
463
+ def initialize(grammar, name, args=nil)
464
+ super()
465
+ @grammar_name = grammar
466
+ @rule_name = name
467
+ if !args or args.empty?
468
+ @arguments = nil
469
+ else
470
+ @arguments = args
471
+ end
472
+ end
473
+
474
+ attr_reader :grammar_name, :rule_name, :arguments
475
+
476
+ def match(x)
477
+ rule = x.grammar.find(@rule_name)
478
+ raise "Unknown rule: '#{@rule_name}'" unless rule
479
+ x.invoke rule
480
+ end
481
+
482
+ def ==(obj)
483
+ case obj
484
+ when ForeignInvokeRule
485
+ @grammar_name == obj.grammar_name and \
486
+ @rule_name == obj.rule_name and @arguments == obj.arguments
487
+ else
488
+ super
489
+ end
490
+ end
491
+
492
+ def inspect
493
+ if @arguments
494
+ body = "%#{@grammar}.#{@rule_name} #{@arguments}"
495
+ else
496
+ body = "%#{@grammar}.#{@rule_name}"
497
+ end
498
+ inspect_type "invoke", body
499
+ end
500
+ end
501
+
502
+ class Tag < Operator
503
+ def initialize(op, tag_name)
504
+ super()
505
+ if op.kind_of? Multiple
506
+ op.save_values!
507
+ end
508
+
509
+ @op = op
510
+ @tag_name = tag_name
511
+ end
512
+
513
+ attr_reader :op, :tag_name
514
+
515
+ def match(x)
516
+ if m = @op.match(x)
517
+ MatchComposition.new(self, [m])
518
+ end
519
+ end
520
+
521
+ def ==(obj)
522
+ case obj
523
+ when Tag
524
+ @op == obj.op and @tag_name == obj.tag_name
525
+ when Operator
526
+ @op == obj
527
+ else
528
+ super
529
+ end
530
+ end
531
+
532
+ def inspect
533
+ if @tag_name
534
+ body = "@#{tag_name} "
535
+ else
536
+ body = ""
537
+ end
538
+
539
+ body << @op.inspect
540
+
541
+ inspect_type "tag", body
542
+ end
543
+ end
544
+
545
+ class Action < Operator
546
+ def initialize(action)
547
+ super()
548
+ @action = action
549
+ end
550
+
551
+ attr_reader :action
552
+
553
+ def match(x)
554
+ return MatchString.new(self, "")
555
+ end
556
+
557
+ def ==(obj)
558
+ case obj
559
+ when Action
560
+ @action == obj.action
561
+ else
562
+ super
563
+ end
564
+ end
565
+
566
+ def inspect
567
+ inspect_type "action", "=> #{action.inspect}"
568
+ end
569
+ end
570
+
571
+ class Collect < Operator
572
+ def initialize(op)
573
+ super()
574
+ @op = op
575
+ end
576
+
577
+ attr_reader :op
578
+
579
+ def match(x)
580
+ start = x.pos
581
+ if @op.match(x)
582
+ MatchString.new(self, x.string[start..x.pos])
583
+ end
584
+ end
585
+
586
+ def ==(obj)
587
+ case obj
588
+ when Collect
589
+ @op == obj.op
590
+ else
591
+ super
592
+ end
593
+ end
594
+
595
+ def inspect
596
+ inspect_type "collect", @op.inspect
597
+ end
598
+ end
599
+
600
+ class Grammar
601
+ def initialize
602
+ @rules = {}
603
+ @rule_order = []
604
+ @setup_actions = []
605
+ @foreign_grammars = {}
606
+ @variables = {}
607
+ end
608
+
609
+ attr_reader :rules, :rule_order, :setup_actions, :foreign_grammars
610
+ attr_reader :variables
611
+
612
+ def add_setup(act)
613
+ @setup_actions << act
614
+ end
615
+
616
+ def add_foreign_grammar(name, str)
617
+ @foreign_grammars[name] = str
618
+ end
619
+
620
+ def set_variable(name, val)
621
+ @variables[name] = val
622
+ end
623
+
624
+ def root
625
+ @rules["root"]
626
+ end
627
+
628
+ def set(name, op, args=nil)
629
+ if @rules.key? name
630
+ raise "Already set rule named '#{name}'"
631
+ end
632
+
633
+ op = Grammar.resolve(op)
634
+
635
+ @rule_order << name
636
+
637
+ rule = Rule.new(name, op, args)
638
+ @rules[name] = rule
639
+ end
640
+
641
+ def find(name)
642
+ @rules[name]
643
+ end
644
+
645
+ def self.resolve(obj)
646
+ case obj
647
+ when Operator
648
+ return obj
649
+ when Symbol
650
+ return RuleReference.new(obj.to_s)
651
+ when String
652
+ return LiteralString.new(obj)
653
+ when Array
654
+ ops = []
655
+ obj.each do |x|
656
+ case x
657
+ when Sequence
658
+ ops.concat x.ops
659
+ when Operator
660
+ ops << x
661
+ else
662
+ ops << resolve(x)
663
+ end
664
+ end
665
+
666
+ return Sequence.new(*ops)
667
+ when Range
668
+ return CharRange.new(obj.begin.to_s, obj.end.to_s)
669
+ when Regexp
670
+ return LiteralRegexp.new(obj)
671
+ else
672
+ raise "Unknown obj type - #{obj.inspect}"
673
+ end
674
+ end
675
+
676
+ # Use these to access the rules unambigiously
677
+ def [](rule)
678
+ ref(rule.to_s)
679
+ end
680
+
681
+ def []=(name, rule)
682
+ set(name, rule)
683
+ end
684
+
685
+ def method_missing(meth, *args)
686
+ meth_s = meth.to_s
687
+
688
+ if meth_s[-1,1] == "="
689
+ rule = args.first
690
+ set(meth_s[0..-2], rule)
691
+ return rule
692
+ elsif !args.empty?
693
+ super
694
+ end
695
+
696
+ # Hm, I guess this is fine. It might end up confusing people though.
697
+ return ref(meth.to_s)
698
+ end
699
+
700
+ def lit(obj, &b)
701
+ op = Grammar.resolve(obj)
702
+ op.set_action(b) if b
703
+ op
704
+ end
705
+
706
+ def dot(&b)
707
+ op = Dot.new
708
+ op.set_action(b) if b
709
+ op
710
+ end
711
+
712
+ def str(str, &b)
713
+ op = LiteralString.new str
714
+ op.set_action(b) if b
715
+ op
716
+ end
717
+
718
+ def reg(reg, opts=nil, &b)
719
+ op = LiteralRegexp.new reg, opts
720
+ op.set_action(b) if b
721
+ op
722
+ end
723
+
724
+ def range(start, fin, &b)
725
+ op = CharRange.new(start, fin)
726
+ op.set_action(b) if b
727
+ op
728
+ end
729
+
730
+ def any(*nodes, &b)
731
+ nodes.map! { |x| Grammar.resolve(x) }
732
+ op = Choice.new(*nodes)
733
+ op.set_action(b) if b
734
+ op
735
+ end
736
+
737
+ def multiple(node, min, max, &b)
738
+ op = Multiple.new Grammar.resolve(node), min, max
739
+ op.set_action(b) if b
740
+ op
741
+ end
742
+
743
+ def maybe(node, &b)
744
+ op = multiple Grammar.resolve(node), 0, 1, &b
745
+ end
746
+
747
+ def many(node, &b)
748
+ multiple Grammar.resolve(node), 1, nil, &b
749
+ end
750
+
751
+ def kleene(node, &b)
752
+ multiple Grammar.resolve(node), 0, nil, &b
753
+ end
754
+
755
+ def seq(*nodes, &b)
756
+ ops = []
757
+ nodes.each do |x|
758
+ case x
759
+ when Sequence
760
+ ops.concat x.ops
761
+ when Operator
762
+ ops << x
763
+ else
764
+ ops << Grammar.resolve(x)
765
+ end
766
+ end
767
+
768
+ op = Sequence.new(*ops)
769
+ op.set_action(b) if b
770
+ op
771
+ end
772
+
773
+ def andp(node)
774
+ AndPredicate.new Grammar.resolve(node)
775
+ end
776
+
777
+ def notp(node)
778
+ NotPredicate.new Grammar.resolve(node)
779
+ end
780
+
781
+ def ref(name, other_grammar=nil)
782
+ RuleReference.new name.to_s, other_grammar
783
+ end
784
+
785
+ def invoke(name, args=nil)
786
+ InvokeRule.new name.to_s, args
787
+ end
788
+
789
+ def foreign_invoke(gram, name, args=nil)
790
+ ForeignInvokeRule.new gram, name.to_s, args
791
+ end
792
+
793
+ def t(op, name=nil)
794
+ Tag.new Grammar.resolve(op), name
795
+ end
796
+
797
+ def action(action)
798
+ Action.new action
799
+ end
800
+
801
+ def collect(op)
802
+ Collect.new Grammar.resolve(op)
803
+ end
804
+ end
805
+
806
+
807
+ end