kpeg 0.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,807 @@
1
+ require 'strscan'
2
+ require 'kpeg/parser'
3
+ require 'kpeg/match'
4
+
5
+ module KPeg
6
+ class Rule
7
+ def initialize(name, op, args=nil)
8
+ @name = name
9
+ @op = op
10
+ @arguments = args
11
+ end
12
+
13
+ attr_reader :name, :op, :arguments
14
+ end
15
+
16
+ class Operator
17
+ def initialize
18
+ @action = nil
19
+ @has_tags = false
20
+ end
21
+
22
+ attr_accessor :action
23
+
24
+ def set_action(act)
25
+ @action = act
26
+ end
27
+
28
+ def detect_tags(ops)
29
+ tags = []
30
+ ops.each_with_index do |r,idx|
31
+ if r.kind_of?(Tag)
32
+ @has_tags = true
33
+ tags << idx
34
+ end
35
+ end
36
+
37
+ @tags = tags if @has_tags
38
+ end
39
+
40
+ def prune_values(values)
41
+ return values unless @has_tags
42
+ return values.values_at(*@tags)
43
+ end
44
+
45
+ def inspect_type(tag, body)
46
+ "#<#{tag} #{body}>"
47
+ end
48
+
49
+ def |(other)
50
+ Choice.new(self, Grammar.resolve(other))
51
+ end
52
+ end
53
+
54
+ class Dot < Operator
55
+ def match(x)
56
+ if str = x.get_byte
57
+ MatchString.new(self, str)
58
+ else
59
+ x.fail(self)
60
+ end
61
+ end
62
+
63
+ def ==(obj)
64
+ Dot === obj ? true : false
65
+ end
66
+
67
+ def inspect
68
+ "#<dot>"
69
+ end
70
+ end
71
+
72
+ class LiteralString < Operator
73
+ def initialize(str)
74
+ super()
75
+ @string = str
76
+ @reg = Regexp.new Regexp.quote(str)
77
+ end
78
+
79
+ attr_reader :string
80
+
81
+ def match(x)
82
+ if str = x.scan(@reg)
83
+ MatchString.new(self, str)
84
+ else
85
+ x.fail(self)
86
+ end
87
+ end
88
+
89
+ def ==(obj)
90
+ case obj
91
+ when LiteralString
92
+ @string == obj.string
93
+ else
94
+ super
95
+ end
96
+ end
97
+
98
+ def inspect
99
+ inspect_type 'str', @string.inspect
100
+ end
101
+ end
102
+
103
+ class LiteralRegexp < Operator
104
+ def initialize(reg, opts=nil)
105
+ super()
106
+
107
+ if reg.kind_of? String
108
+ flags = 0
109
+ lang = nil
110
+
111
+ if opts
112
+ opts.split("").each do |o|
113
+ case o
114
+ when "n", "N", "e", "E", "s", "S", "u", "U"
115
+ lang = o
116
+ when "m"
117
+ flags |= Regexp::MULTILINE
118
+ when "x"
119
+ flags |= Regexp::EXTENDED
120
+ when "i"
121
+ flags |= Regexp::IGNORECASE
122
+ end
123
+ end
124
+ end
125
+
126
+ @regexp = Regexp.new(reg, flags, lang)
127
+ else
128
+ @regexp = reg
129
+ end
130
+ end
131
+
132
+ attr_reader :regexp
133
+
134
+ def string
135
+ @regexp.source
136
+ end
137
+
138
+ def match(x)
139
+ if str = x.scan(@regexp)
140
+ MatchString.new(self, str)
141
+ else
142
+ x.fail(self)
143
+ end
144
+ end
145
+
146
+ def ==(obj)
147
+ case obj
148
+ when LiteralRegexp
149
+ @regexp == obj.regexp
150
+ else
151
+ super
152
+ end
153
+ end
154
+
155
+ def inspect
156
+ inspect_type 'reg', @regexp.inspect
157
+ end
158
+ end
159
+
160
+ class CharRange < Operator
161
+ def initialize(start, fin)
162
+ super()
163
+ @start = start
164
+ @fin = fin
165
+ @regexp = Regexp.new "[#{Regexp.quote start}-#{Regexp.quote fin}]"
166
+ end
167
+
168
+ attr_reader :start, :fin
169
+
170
+ def string
171
+ @regexp.source
172
+ end
173
+
174
+ def match(x)
175
+ if str = x.scan(@regexp)
176
+ MatchString.new(self, str)
177
+ else
178
+ x.fail(self)
179
+ end
180
+ end
181
+
182
+ def ==(obj)
183
+ case obj
184
+ when CharRange
185
+ @start == obj.start and @fin == obj.fin
186
+ else
187
+ super
188
+ end
189
+ end
190
+
191
+ def inspect
192
+ inspect_type 'range', "#{@start}-#{@fin}"
193
+ end
194
+ end
195
+
196
+ class Choice < Operator
197
+ def initialize(*many)
198
+ super()
199
+ @ops = many
200
+ end
201
+
202
+ attr_reader :ops
203
+
204
+ def |(other)
205
+ @ops << Grammar.resolve(other)
206
+ self
207
+ end
208
+
209
+ def match(x)
210
+ pos = x.pos
211
+
212
+ @ops.each do |c|
213
+ if m = c.match(x)
214
+ return m
215
+ end
216
+
217
+ x.pos = pos
218
+ end
219
+
220
+ return nil
221
+ end
222
+
223
+ def ==(obj)
224
+ case obj
225
+ when Choice
226
+ @ops == obj.ops
227
+ else
228
+ super
229
+ end
230
+ end
231
+
232
+ def inspect
233
+ inspect_type "any", @ops.map { |i| i.inspect }.join(' | ')
234
+ end
235
+ end
236
+
237
+ class Multiple < Operator
238
+ def initialize(op, min, max)
239
+ super()
240
+ @op = op
241
+ @min = min
242
+ @max = max
243
+ @save_values = nil
244
+ end
245
+
246
+ attr_reader :op, :min, :max, :save_values
247
+
248
+ def save_values!
249
+ @save_values = true
250
+ end
251
+
252
+ def match(x)
253
+ n = 0
254
+ matches = []
255
+
256
+ start = x.pos
257
+
258
+ while true
259
+ if m = @op.match(x)
260
+ matches << m
261
+ else
262
+ break
263
+ end
264
+
265
+ n += 1
266
+
267
+ if @max and n > @max
268
+ x.pos = start
269
+ return nil
270
+ end
271
+ end
272
+
273
+ if n >= @min
274
+ return MatchComposition.new(self, matches)
275
+ end
276
+
277
+ x.pos = start
278
+ return nil
279
+ end
280
+
281
+ def ==(obj)
282
+ case obj
283
+ when Multiple
284
+ @op == obj.op and @min == obj.min and @max == obj.max
285
+ else
286
+ super
287
+ end
288
+ end
289
+
290
+ def inspect
291
+ inspect_type "multi", "#{@min} #{@max ? @max : "*"} #{@op.inspect}"
292
+ end
293
+ end
294
+
295
+ class Sequence < Operator
296
+ def initialize(*ops)
297
+ super()
298
+ @ops = ops
299
+ detect_tags ops
300
+ end
301
+
302
+ attr_reader :ops
303
+
304
+ def match(x)
305
+ start = x.pos
306
+ matches = @ops.map do |n|
307
+ m = n.match(x)
308
+ unless m
309
+ x.pos = start
310
+ return nil
311
+ end
312
+ m
313
+ end
314
+ MatchComposition.new(self, matches)
315
+ end
316
+
317
+ def ==(obj)
318
+ case obj
319
+ when Sequence
320
+ @ops == obj.ops
321
+ else
322
+ super
323
+ end
324
+ end
325
+
326
+ def inspect
327
+ inspect_type "seq", @ops.map { |i| i.inspect }.join(' ')
328
+ end
329
+ end
330
+
331
+ class AndPredicate < Operator
332
+ def initialize(op)
333
+ super()
334
+ @op = op
335
+ end
336
+
337
+ attr_reader :op
338
+
339
+ def match(x)
340
+ pos = x.pos
341
+ m = @op.match(x)
342
+ x.pos = pos
343
+
344
+ return m ? MatchString.new(self, "") : nil
345
+ end
346
+
347
+ def ==(obj)
348
+ case obj
349
+ when AndPredicate
350
+ @op == obj.op
351
+ else
352
+ super
353
+ end
354
+ end
355
+
356
+ def inspect
357
+ inspect_type "andp", @op.inspect
358
+ end
359
+ end
360
+
361
+ class NotPredicate < Operator
362
+ def initialize(op)
363
+ super()
364
+ @op = op
365
+ end
366
+
367
+ attr_reader :op
368
+
369
+ def match(x)
370
+ pos = x.pos
371
+ m = @op.match(x)
372
+ x.pos = pos
373
+
374
+ return m ? nil : MatchString.new(self, "")
375
+ end
376
+
377
+ def ==(obj)
378
+ case obj
379
+ when NotPredicate
380
+ @op == obj.op
381
+ else
382
+ super
383
+ end
384
+ end
385
+
386
+ def inspect
387
+ inspect_type "notp", @op.inspect
388
+ end
389
+ end
390
+
391
+ class RuleReference < Operator
392
+ def initialize(name, grammar=nil)
393
+ super()
394
+ @rule_name = name
395
+ @grammar = grammar
396
+ end
397
+
398
+ attr_reader :rule_name
399
+
400
+ def match(x)
401
+ if @grammar and @grammar != x.grammar
402
+ x.switch_grammar(@grammar) do
403
+ rule = @grammar.find(@rule_name)
404
+ raise "Unknown rule: '#{@rule_name}'" unless rule
405
+ x.apply rule
406
+ end
407
+ else
408
+ rule = x.grammar.find(@rule_name)
409
+ raise "Unknown rule: '#{@rule_name}'" unless rule
410
+ x.apply rule
411
+ end
412
+ end
413
+
414
+ def ==(obj)
415
+ case obj
416
+ when RuleReference
417
+ @rule_name == obj.rule_name
418
+ else
419
+ super
420
+ end
421
+ end
422
+
423
+ def inspect
424
+ inspect_type "ref", @rule_name
425
+ end
426
+ end
427
+
428
+ class InvokeRule < Operator
429
+ def initialize(name, args=nil)
430
+ super()
431
+ @rule_name = name
432
+ @arguments = args
433
+ end
434
+
435
+ attr_reader :rule_name, :arguments
436
+
437
+ def match(x)
438
+ rule = x.grammar.find(@rule_name)
439
+ raise "Unknown rule: '#{@rule_name}'" unless rule
440
+ x.invoke rule
441
+ end
442
+
443
+ def ==(obj)
444
+ case obj
445
+ when InvokeRule
446
+ @rule_name == obj.rule_name and @arguments == obj.arguments
447
+ else
448
+ super
449
+ end
450
+ end
451
+
452
+ def inspect
453
+ if @arguments
454
+ body = "#{@rule_name} #{@arguments}"
455
+ else
456
+ body = @rule_name
457
+ end
458
+ inspect_type "invoke", body
459
+ end
460
+ end
461
+
462
+ class ForeignInvokeRule < Operator
463
+ def initialize(grammar, name, args=nil)
464
+ super()
465
+ @grammar_name = grammar
466
+ @rule_name = name
467
+ if !args or args.empty?
468
+ @arguments = nil
469
+ else
470
+ @arguments = args
471
+ end
472
+ end
473
+
474
+ attr_reader :grammar_name, :rule_name, :arguments
475
+
476
+ def match(x)
477
+ rule = x.grammar.find(@rule_name)
478
+ raise "Unknown rule: '#{@rule_name}'" unless rule
479
+ x.invoke rule
480
+ end
481
+
482
+ def ==(obj)
483
+ case obj
484
+ when ForeignInvokeRule
485
+ @grammar_name == obj.grammar_name and \
486
+ @rule_name == obj.rule_name and @arguments == obj.arguments
487
+ else
488
+ super
489
+ end
490
+ end
491
+
492
+ def inspect
493
+ if @arguments
494
+ body = "%#{@grammar}.#{@rule_name} #{@arguments}"
495
+ else
496
+ body = "%#{@grammar}.#{@rule_name}"
497
+ end
498
+ inspect_type "invoke", body
499
+ end
500
+ end
501
+
502
+ class Tag < Operator
503
+ def initialize(op, tag_name)
504
+ super()
505
+ if op.kind_of? Multiple
506
+ op.save_values!
507
+ end
508
+
509
+ @op = op
510
+ @tag_name = tag_name
511
+ end
512
+
513
+ attr_reader :op, :tag_name
514
+
515
+ def match(x)
516
+ if m = @op.match(x)
517
+ MatchComposition.new(self, [m])
518
+ end
519
+ end
520
+
521
+ def ==(obj)
522
+ case obj
523
+ when Tag
524
+ @op == obj.op and @tag_name == obj.tag_name
525
+ when Operator
526
+ @op == obj
527
+ else
528
+ super
529
+ end
530
+ end
531
+
532
+ def inspect
533
+ if @tag_name
534
+ body = "@#{tag_name} "
535
+ else
536
+ body = ""
537
+ end
538
+
539
+ body << @op.inspect
540
+
541
+ inspect_type "tag", body
542
+ end
543
+ end
544
+
545
+ class Action < Operator
546
+ def initialize(action)
547
+ super()
548
+ @action = action
549
+ end
550
+
551
+ attr_reader :action
552
+
553
+ def match(x)
554
+ return MatchString.new(self, "")
555
+ end
556
+
557
+ def ==(obj)
558
+ case obj
559
+ when Action
560
+ @action == obj.action
561
+ else
562
+ super
563
+ end
564
+ end
565
+
566
+ def inspect
567
+ inspect_type "action", "=> #{action.inspect}"
568
+ end
569
+ end
570
+
571
+ class Collect < Operator
572
+ def initialize(op)
573
+ super()
574
+ @op = op
575
+ end
576
+
577
+ attr_reader :op
578
+
579
+ def match(x)
580
+ start = x.pos
581
+ if @op.match(x)
582
+ MatchString.new(self, x.string[start..x.pos])
583
+ end
584
+ end
585
+
586
+ def ==(obj)
587
+ case obj
588
+ when Collect
589
+ @op == obj.op
590
+ else
591
+ super
592
+ end
593
+ end
594
+
595
+ def inspect
596
+ inspect_type "collect", @op.inspect
597
+ end
598
+ end
599
+
600
+ class Grammar
601
+ def initialize
602
+ @rules = {}
603
+ @rule_order = []
604
+ @setup_actions = []
605
+ @foreign_grammars = {}
606
+ @variables = {}
607
+ end
608
+
609
+ attr_reader :rules, :rule_order, :setup_actions, :foreign_grammars
610
+ attr_reader :variables
611
+
612
+ def add_setup(act)
613
+ @setup_actions << act
614
+ end
615
+
616
+ def add_foreign_grammar(name, str)
617
+ @foreign_grammars[name] = str
618
+ end
619
+
620
+ def set_variable(name, val)
621
+ @variables[name] = val
622
+ end
623
+
624
+ def root
625
+ @rules["root"]
626
+ end
627
+
628
+ def set(name, op, args=nil)
629
+ if @rules.key? name
630
+ raise "Already set rule named '#{name}'"
631
+ end
632
+
633
+ op = Grammar.resolve(op)
634
+
635
+ @rule_order << name
636
+
637
+ rule = Rule.new(name, op, args)
638
+ @rules[name] = rule
639
+ end
640
+
641
+ def find(name)
642
+ @rules[name]
643
+ end
644
+
645
+ def self.resolve(obj)
646
+ case obj
647
+ when Operator
648
+ return obj
649
+ when Symbol
650
+ return RuleReference.new(obj.to_s)
651
+ when String
652
+ return LiteralString.new(obj)
653
+ when Array
654
+ ops = []
655
+ obj.each do |x|
656
+ case x
657
+ when Sequence
658
+ ops.concat x.ops
659
+ when Operator
660
+ ops << x
661
+ else
662
+ ops << resolve(x)
663
+ end
664
+ end
665
+
666
+ return Sequence.new(*ops)
667
+ when Range
668
+ return CharRange.new(obj.begin.to_s, obj.end.to_s)
669
+ when Regexp
670
+ return LiteralRegexp.new(obj)
671
+ else
672
+ raise "Unknown obj type - #{obj.inspect}"
673
+ end
674
+ end
675
+
676
+ # Use these to access the rules unambigiously
677
+ def [](rule)
678
+ ref(rule.to_s)
679
+ end
680
+
681
+ def []=(name, rule)
682
+ set(name, rule)
683
+ end
684
+
685
+ def method_missing(meth, *args)
686
+ meth_s = meth.to_s
687
+
688
+ if meth_s[-1,1] == "="
689
+ rule = args.first
690
+ set(meth_s[0..-2], rule)
691
+ return rule
692
+ elsif !args.empty?
693
+ super
694
+ end
695
+
696
+ # Hm, I guess this is fine. It might end up confusing people though.
697
+ return ref(meth.to_s)
698
+ end
699
+
700
+ def lit(obj, &b)
701
+ op = Grammar.resolve(obj)
702
+ op.set_action(b) if b
703
+ op
704
+ end
705
+
706
+ def dot(&b)
707
+ op = Dot.new
708
+ op.set_action(b) if b
709
+ op
710
+ end
711
+
712
+ def str(str, &b)
713
+ op = LiteralString.new str
714
+ op.set_action(b) if b
715
+ op
716
+ end
717
+
718
+ def reg(reg, opts=nil, &b)
719
+ op = LiteralRegexp.new reg, opts
720
+ op.set_action(b) if b
721
+ op
722
+ end
723
+
724
+ def range(start, fin, &b)
725
+ op = CharRange.new(start, fin)
726
+ op.set_action(b) if b
727
+ op
728
+ end
729
+
730
+ def any(*nodes, &b)
731
+ nodes.map! { |x| Grammar.resolve(x) }
732
+ op = Choice.new(*nodes)
733
+ op.set_action(b) if b
734
+ op
735
+ end
736
+
737
+ def multiple(node, min, max, &b)
738
+ op = Multiple.new Grammar.resolve(node), min, max
739
+ op.set_action(b) if b
740
+ op
741
+ end
742
+
743
+ def maybe(node, &b)
744
+ op = multiple Grammar.resolve(node), 0, 1, &b
745
+ end
746
+
747
+ def many(node, &b)
748
+ multiple Grammar.resolve(node), 1, nil, &b
749
+ end
750
+
751
+ def kleene(node, &b)
752
+ multiple Grammar.resolve(node), 0, nil, &b
753
+ end
754
+
755
+ def seq(*nodes, &b)
756
+ ops = []
757
+ nodes.each do |x|
758
+ case x
759
+ when Sequence
760
+ ops.concat x.ops
761
+ when Operator
762
+ ops << x
763
+ else
764
+ ops << Grammar.resolve(x)
765
+ end
766
+ end
767
+
768
+ op = Sequence.new(*ops)
769
+ op.set_action(b) if b
770
+ op
771
+ end
772
+
773
+ def andp(node)
774
+ AndPredicate.new Grammar.resolve(node)
775
+ end
776
+
777
+ def notp(node)
778
+ NotPredicate.new Grammar.resolve(node)
779
+ end
780
+
781
+ def ref(name, other_grammar=nil)
782
+ RuleReference.new name.to_s, other_grammar
783
+ end
784
+
785
+ def invoke(name, args=nil)
786
+ InvokeRule.new name.to_s, args
787
+ end
788
+
789
+ def foreign_invoke(gram, name, args=nil)
790
+ ForeignInvokeRule.new gram, name.to_s, args
791
+ end
792
+
793
+ def t(op, name=nil)
794
+ Tag.new Grammar.resolve(op), name
795
+ end
796
+
797
+ def action(action)
798
+ Action.new action
799
+ end
800
+
801
+ def collect(op)
802
+ Collect.new Grammar.resolve(op)
803
+ end
804
+ end
805
+
806
+
807
+ end