kpeg 0.8.5 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. data/.autotest +10 -0
  2. data/.gemtest +0 -0
  3. data/Gemfile +11 -3
  4. data/History.txt +21 -0
  5. data/LICENSE +25 -0
  6. data/Manifest.txt +47 -0
  7. data/README.rdoc +222 -0
  8. data/Rakefile +23 -11
  9. data/bin/kpeg +4 -2
  10. data/examples/calculator/calculator.kpeg +17 -0
  11. data/examples/calculator/calculator.rb +7 -0
  12. data/examples/foreign_reference/literals.kpeg +5 -0
  13. data/examples/foreign_reference/matcher.kpeg +9 -0
  14. data/examples/foreign_reference/matcher.rb +5 -0
  15. data/examples/lua_string/driver.rb +21 -0
  16. data/examples/lua_string/lua_string.kpeg +14 -0
  17. data/examples/lua_string/lua_string.kpeg.rb +460 -0
  18. data/examples/phone_number/README.md +3 -0
  19. data/examples/phone_number/phone_number.kpeg +20 -0
  20. data/examples/phone_number/phone_number.rb +6 -0
  21. data/examples/upper/README.md +83 -0
  22. data/examples/upper/upper.kpeg +24 -0
  23. data/examples/upper/upper.rb +9 -0
  24. data/kpeg.gemspec +35 -17
  25. data/lib/hoe/kpeg.rb +94 -0
  26. data/lib/kpeg.rb +3 -0
  27. data/lib/kpeg/code_generator.rb +16 -3
  28. data/lib/kpeg/compiled_parser.rb +18 -28
  29. data/lib/kpeg/format_parser.kpeg +129 -0
  30. data/lib/kpeg/format_parser.rb +88 -49
  31. data/lib/kpeg/grammar.rb +10 -0
  32. data/lib/kpeg/string_escape.kpeg +20 -0
  33. data/test/inputs/comments.kpeg +5 -0
  34. data/test/test_file_parser_roundtrip.rb +3 -3
  35. data/test/test_gen_calc.rb +2 -2
  36. data/test/test_kpeg.rb +2 -2
  37. data/test/test_kpeg_code_generator.rb +65 -2
  38. data/test/test_kpeg_compiled_parser.rb +2 -2
  39. data/test/test_kpeg_format.rb +49 -4
  40. data/test/test_kpeg_grammar_renderer.rb +2 -2
  41. data/test/test_left_recursion.rb +2 -2
  42. data/{doc → vim}/syntax_kpeg/ftdetect/kpeg.vim +0 -0
  43. data/{doc → vim}/syntax_kpeg/syntax/kpeg.vim +0 -0
  44. metadata +89 -26
  45. data/README.md +0 -183
  46. data/lib/kpeg/version.rb +0 -3
@@ -0,0 +1,129 @@
1
+ %% name = KPeg::FormatParser
2
+
3
+ %% {
4
+ require 'kpeg/grammar'
5
+
6
+ def initialize(str, debug=false)
7
+ setup_parser(str, debug)
8
+ @g = KPeg::Grammar.new
9
+ end
10
+
11
+ attr_reader :g
12
+ alias_method :grammar, :g
13
+ }
14
+
15
+
16
+ eol = "\n"
17
+ eof_comment = "#" (!eof .)*
18
+
19
+ comment = "#" (!eol .)* eol
20
+ space = " " | "\t" | eol
21
+ - = (space | comment)*
22
+ kleene = "*"
23
+
24
+ # Allow - by itself, but not at the beginning
25
+ var = < "-" | /[a-zA-Z][\-_a-zA-Z0-9]*/ > { text }
26
+ method = < /[a-zA-Z_][a-zA-Z0-9_]*/ > { text }
27
+
28
+ dbl_escapes = "n" { "\n" }
29
+ | "s" { " " }
30
+ | "r" { "\r" }
31
+ | "t" { "\t" }
32
+ | "v" { "\v" }
33
+ | "f" { "\f" }
34
+ | "b" { "\b" }
35
+ | "a" { "\a" }
36
+ | "e" { "\e" }
37
+ | "\\" { "\\" }
38
+ | "\"" { "\"" }
39
+ | num_escapes
40
+ | < . > { text }
41
+ num_escapes = < /[0-7]{1,3}/ > { [text.to_i(8)].pack("U") }
42
+ | "x" < /[0-9a-fA-F]{2}/ > { [text.to_i(16)].pack("U") }
43
+ dbl_seq = < /[^\\"]+/ > { text }
44
+ dbl_not_quote = ("\\" dbl_escapes:s | dbl_seq:s)*:ary { Array(ary) }
45
+ dbl_string = "\"" dbl_not_quote:s "\"" { @g.str(s.join) }
46
+ sgl_escape_quote = "\\'" { "'" }
47
+ sgl_seq = < /[^']/ > { text }
48
+ sgl_not_quote = (sgl_escape_quote | sgl_seq)*:segs { Array(segs) }
49
+ sgl_string = "'" sgl_not_quote:s "'" { @g.str(s.join) }
50
+ string = dbl_string
51
+ | sgl_string
52
+
53
+ not_slash = < ("\\/" | /[^\/]/)+ > { text }
54
+ regexp_opts = < [a-z]* > { text }
55
+ regexp = "/" not_slash:body "/" regexp_opts:opts
56
+ { @g.reg body, opts }
57
+
58
+ char = < /[a-zA-Z0-9]/ > { text }
59
+ char_range = "[" char:l "-" char:r "]" { @g.range(l,r) }
60
+
61
+ range_num = < /[1-9][0-9]*/ > { text }
62
+ range_elem = < range_num|kleene > { text }
63
+ mult_range = "[" - range_elem:l - "," - range_elem:r - "]"
64
+ { [l == "*" ? nil : l.to_i, r == "*" ? nil : r.to_i] }
65
+ | "[" - range_num:e - "]" { [e.to_i, e.to_i] }
66
+
67
+ curly_block = curly
68
+ curly = "{" < (/[^{}"']+/ | string | curly)* > "}" { @g.action(text) }
69
+ nested_paren = "(" (/[^()"']+/ | string | nested_paren)* ")"
70
+
71
+ value = value:v ":" var:n { @g.t(v,n) }
72
+ | value:v "?" { @g.maybe(v) }
73
+ | value:v "+" { @g.many(v) }
74
+ | value:v "*" { @g.kleene(v) }
75
+ | value:v mult_range:r { @g.multiple(v, *r) }
76
+ | "&" value:v { @g.andp(v) }
77
+ | "!" value:v { @g.notp(v) }
78
+ | "(" - expression:o - ")" { o }
79
+ | "@<" - expression:o - ">" { @g.bounds(o) }
80
+ | "<" - expression:o - ">" { @g.collect(o) }
81
+ | curly_block
82
+ | "~" method:m < nested_paren? >
83
+ { @g.action("#{m}#{text}") }
84
+ | "." { @g.dot }
85
+ | "@" var:name < nested_paren? > !(- "=")
86
+ { @g.invoke(name, text.empty? ? nil : text) }
87
+ | "^" var:name < nested_paren? >
88
+ { @g.foreign_invoke("parent", name, text) }
89
+ | "%" var:gram "." var:name < nested_paren? >
90
+ { @g.foreign_invoke(gram, name, text) }
91
+ | var:name < nested_paren? > !(- "=")
92
+ { @g.ref(name, nil, text.empty? ? nil : text) }
93
+ | char_range
94
+ | regexp
95
+ | string
96
+
97
+ spaces = (space | comment)+
98
+ values = values:s spaces value:v { @g.seq(s, v) }
99
+ | value:l spaces value:r { @g.seq(l, r) }
100
+ | value
101
+ choose_cont = - "|" - values:v { v }
102
+ expression = values:v choose_cont+:alts { @g.any(v, *alts) }
103
+ | values
104
+ args = args:a "," - var:n - { a + [n] }
105
+ | - var:n - { [n] }
106
+ statement = - var:v "(" args:a ")" - "=" - expression:o { @g.set(v, o, a) }
107
+ | - var:v - "=" - expression:o { @g.set(v, o) }
108
+ | - "%" var:name - "=" - < /[::A-Za-z0-9_]+/ >
109
+ { @g.add_foreign_grammar(name, text) }
110
+ | - "%%" - curly:act { @g.add_setup act }
111
+ | - "%%" - var:name - curly:act { @g.add_directive name, act }
112
+ | - "%%" - var:name - "=" - < (!"\n" .)+ >
113
+ { @g.set_variable(name, text) }
114
+ statements = statement (- statements)?
115
+ eof = !.
116
+ root = statements - eof_comment? eof
117
+
118
+ # These are a seperate set of rules used to parse an ast declaration
119
+
120
+ ast_constant = < /[A-Z][A-Za-z0-9_]*/ > { text }
121
+ ast_word = < /[A-Za-z_][A-Za-z0-9_]*/ > { text }
122
+
123
+ ast_sp = (" " | "\t")*
124
+
125
+ ast_words = ast_words:r ast_sp "," ast_sp ast_word:w { r + [w] }
126
+ | ast_word:w { [w] }
127
+
128
+ ast_root = ast_constant:c "(" ast_words:w ")" { [c, w] }
129
+ | ast_constant:c "()"? { [c, []] }
@@ -186,41 +186,37 @@ class KPeg::FormatParser
186
186
  end
187
187
 
188
188
  def parse(rule=nil)
189
+ # We invoke the rules indirectly via apply
190
+ # instead of by just calling them as methods because
191
+ # if the rules use left recursion, apply needs to
192
+ # manage that.
193
+
189
194
  if !rule
190
- _root ? true : false
195
+ apply(:_root)
191
196
  else
192
- # This is not shared with code_generator.rb so this can be standalone
193
197
  method = rule.gsub("-","_hyphen_")
194
- __send__("_#{method}") ? true : false
198
+ apply :"_#{method}"
195
199
  end
196
200
  end
197
201
 
198
- class LeftRecursive
199
- def initialize(detected=false)
200
- @detected = detected
201
- end
202
-
203
- attr_accessor :detected
204
- end
205
-
206
202
  class MemoEntry
207
203
  def initialize(ans, pos)
208
204
  @ans = ans
209
205
  @pos = pos
210
- @uses = 1
211
206
  @result = nil
207
+ @set = false
208
+ @left_rec = false
212
209
  end
213
210
 
214
- attr_reader :ans, :pos, :uses, :result
215
-
216
- def inc!
217
- @uses += 1
218
- end
211
+ attr_reader :ans, :pos, :result, :set
212
+ attr_accessor :left_rec
219
213
 
220
214
  def move!(ans, pos, result)
221
215
  @ans = ans
222
216
  @pos = pos
223
217
  @result = result
218
+ @set = true
219
+ @left_rec = false
224
220
  end
225
221
  end
226
222
 
@@ -248,12 +244,10 @@ class KPeg::FormatParser
248
244
  def apply_with_args(rule, *args)
249
245
  memo_key = [rule, args]
250
246
  if m = @memoizations[memo_key][@pos]
251
- m.inc!
252
-
253
247
  prev = @pos
254
248
  @pos = m.pos
255
- if m.ans.kind_of? LeftRecursive
256
- m.ans.detected = true
249
+ if !m.set
250
+ m.left_rec = true
257
251
  return nil
258
252
  end
259
253
 
@@ -261,18 +255,19 @@ class KPeg::FormatParser
261
255
 
262
256
  return m.ans
263
257
  else
264
- lr = LeftRecursive.new(false)
265
- m = MemoEntry.new(lr, @pos)
258
+ m = MemoEntry.new(nil, @pos)
266
259
  @memoizations[memo_key][@pos] = m
267
260
  start_pos = @pos
268
261
 
269
262
  ans = __send__ rule, *args
270
263
 
264
+ lr = m.left_rec
265
+
271
266
  m.move! ans, @pos, @result
272
267
 
273
268
  # Don't bother trying to grow the left recursion
274
269
  # if it's failing straight away (thus there is no seed)
275
- if ans and lr.detected
270
+ if ans and lr
276
271
  return grow_lr(rule, args, start_pos, m)
277
272
  else
278
273
  return ans
@@ -284,12 +279,10 @@ class KPeg::FormatParser
284
279
 
285
280
  def apply(rule)
286
281
  if m = @memoizations[rule][@pos]
287
- m.inc!
288
-
289
282
  prev = @pos
290
283
  @pos = m.pos
291
- if m.ans.kind_of? LeftRecursive
292
- m.ans.detected = true
284
+ if !m.set
285
+ m.left_rec = true
293
286
  return nil
294
287
  end
295
288
 
@@ -297,18 +290,19 @@ class KPeg::FormatParser
297
290
 
298
291
  return m.ans
299
292
  else
300
- lr = LeftRecursive.new(false)
301
- m = MemoEntry.new(lr, @pos)
293
+ m = MemoEntry.new(nil, @pos)
302
294
  @memoizations[rule][@pos] = m
303
295
  start_pos = @pos
304
296
 
305
297
  ans = __send__ rule
306
298
 
299
+ lr = m.left_rec
300
+
307
301
  m.move! ans, @pos, @result
308
302
 
309
303
  # Don't bother trying to grow the left recursion
310
304
  # if it's failing straight away (thus there is no seed)
311
- if ans and lr.detected
305
+ if ans and lr
312
306
  return grow_lr(rule, nil, start_pos, m)
313
307
  else
314
308
  return ans
@@ -2474,7 +2468,7 @@ class KPeg::FormatParser
2474
2468
  return _tmp
2475
2469
  end
2476
2470
 
2477
- # statement = (- var:v "(" args:a ")" - "=" - expression:o { @g.set(v, o, a) } | - var:v - "=" - expression:o { @g.set(v, o) } | - "%" var:name - "=" - < /[::A-Za-z0-9_]+/ > { @g.add_foreign_grammar(name, text) } | - "%%" - curly:act { @g.add_setup act } | - "%%" - var:name - "=" - < (!"\n" .)+ > { @g.set_variable(name, text) })
2471
+ # statement = (- var:v "(" args:a ")" - "=" - expression:o { @g.set(v, o, a) } | - var:v - "=" - expression:o { @g.set(v, o) } | - "%" var:name - "=" - < /[::A-Za-z0-9_]+/ > { @g.add_foreign_grammar(name, text) } | - "%%" - curly:act { @g.add_setup act } | - "%%" - var:name - curly:act { @g.add_directive name, act } | - "%%" - var:name - "=" - < (!"\n" .)+ > { @g.set_variable(name, text) })
2478
2472
  def _statement
2479
2473
 
2480
2474
  _save = self.pos
@@ -2701,32 +2695,77 @@ class KPeg::FormatParser
2701
2695
  self.pos = _save5
2702
2696
  break
2703
2697
  end
2704
- _tmp = match_string("=")
2698
+ _tmp = apply(:_curly)
2699
+ act = @result
2705
2700
  unless _tmp
2706
2701
  self.pos = _save5
2707
2702
  break
2708
2703
  end
2709
- _tmp = apply(:__hyphen_)
2704
+ @result = begin; @g.add_directive name, act ; end
2705
+ _tmp = true
2710
2706
  unless _tmp
2711
2707
  self.pos = _save5
2708
+ end
2709
+ break
2710
+ end # end sequence
2711
+
2712
+ break if _tmp
2713
+ self.pos = _save
2714
+
2715
+ _save6 = self.pos
2716
+ while true # sequence
2717
+ _tmp = apply(:__hyphen_)
2718
+ unless _tmp
2719
+ self.pos = _save6
2720
+ break
2721
+ end
2722
+ _tmp = match_string("%%")
2723
+ unless _tmp
2724
+ self.pos = _save6
2725
+ break
2726
+ end
2727
+ _tmp = apply(:__hyphen_)
2728
+ unless _tmp
2729
+ self.pos = _save6
2730
+ break
2731
+ end
2732
+ _tmp = apply(:_var)
2733
+ name = @result
2734
+ unless _tmp
2735
+ self.pos = _save6
2736
+ break
2737
+ end
2738
+ _tmp = apply(:__hyphen_)
2739
+ unless _tmp
2740
+ self.pos = _save6
2741
+ break
2742
+ end
2743
+ _tmp = match_string("=")
2744
+ unless _tmp
2745
+ self.pos = _save6
2746
+ break
2747
+ end
2748
+ _tmp = apply(:__hyphen_)
2749
+ unless _tmp
2750
+ self.pos = _save6
2712
2751
  break
2713
2752
  end
2714
2753
  _text_start = self.pos
2715
- _save6 = self.pos
2716
-
2717
2754
  _save7 = self.pos
2755
+
2756
+ _save8 = self.pos
2718
2757
  while true # sequence
2719
- _save8 = self.pos
2758
+ _save9 = self.pos
2720
2759
  _tmp = match_string("\n")
2721
2760
  _tmp = _tmp ? nil : true
2722
- self.pos = _save8
2761
+ self.pos = _save9
2723
2762
  unless _tmp
2724
- self.pos = _save7
2763
+ self.pos = _save8
2725
2764
  break
2726
2765
  end
2727
2766
  _tmp = get_byte
2728
2767
  unless _tmp
2729
- self.pos = _save7
2768
+ self.pos = _save8
2730
2769
  end
2731
2770
  break
2732
2771
  end # end sequence
@@ -2734,19 +2773,19 @@ class KPeg::FormatParser
2734
2773
  if _tmp
2735
2774
  while true
2736
2775
 
2737
- _save9 = self.pos
2776
+ _save10 = self.pos
2738
2777
  while true # sequence
2739
- _save10 = self.pos
2778
+ _save11 = self.pos
2740
2779
  _tmp = match_string("\n")
2741
2780
  _tmp = _tmp ? nil : true
2742
- self.pos = _save10
2781
+ self.pos = _save11
2743
2782
  unless _tmp
2744
- self.pos = _save9
2783
+ self.pos = _save10
2745
2784
  break
2746
2785
  end
2747
2786
  _tmp = get_byte
2748
2787
  unless _tmp
2749
- self.pos = _save9
2788
+ self.pos = _save10
2750
2789
  end
2751
2790
  break
2752
2791
  end # end sequence
@@ -2755,19 +2794,19 @@ class KPeg::FormatParser
2755
2794
  end
2756
2795
  _tmp = true
2757
2796
  else
2758
- self.pos = _save6
2797
+ self.pos = _save7
2759
2798
  end
2760
2799
  if _tmp
2761
2800
  text = get_text(_text_start)
2762
2801
  end
2763
2802
  unless _tmp
2764
- self.pos = _save5
2803
+ self.pos = _save6
2765
2804
  break
2766
2805
  end
2767
2806
  @result = begin; @g.set_variable(name, text) ; end
2768
2807
  _tmp = true
2769
2808
  unless _tmp
2770
- self.pos = _save5
2809
+ self.pos = _save6
2771
2810
  end
2772
2811
  break
2773
2812
  end # end sequence
@@ -3124,7 +3163,7 @@ class KPeg::FormatParser
3124
3163
  Rules[:_choose_cont] = rule_info("choose_cont", "- \"|\" - values:v { v }")
3125
3164
  Rules[:_expression] = rule_info("expression", "(values:v choose_cont+:alts { @g.any(v, *alts) } | values)")
3126
3165
  Rules[:_args] = rule_info("args", "(args:a \",\" - var:n - { a + [n] } | - var:n - { [n] })")
3127
- Rules[:_statement] = rule_info("statement", "(- var:v \"(\" args:a \")\" - \"=\" - expression:o { @g.set(v, o, a) } | - var:v - \"=\" - expression:o { @g.set(v, o) } | - \"%\" var:name - \"=\" - < /[::A-Za-z0-9_]+/ > { @g.add_foreign_grammar(name, text) } | - \"%%\" - curly:act { @g.add_setup act } | - \"%%\" - var:name - \"=\" - < (!\"\\n\" .)+ > { @g.set_variable(name, text) })")
3166
+ Rules[:_statement] = rule_info("statement", "(- var:v \"(\" args:a \")\" - \"=\" - expression:o { @g.set(v, o, a) } | - var:v - \"=\" - expression:o { @g.set(v, o) } | - \"%\" var:name - \"=\" - < /[::A-Za-z0-9_]+/ > { @g.add_foreign_grammar(name, text) } | - \"%%\" - curly:act { @g.add_setup act } | - \"%%\" - var:name - curly:act { @g.add_directive name, act } | - \"%%\" - var:name - \"=\" - < (!\"\\n\" .)+ > { @g.set_variable(name, text) })")
3128
3167
  Rules[:_statements] = rule_info("statements", "statement (- statements)?")
3129
3168
  Rules[:_eof] = rule_info("eof", "!.")
3130
3169
  Rules[:_root] = rule_info("root", "statements - eof_comment? eof")
data/lib/kpeg/grammar.rb CHANGED
@@ -634,6 +634,7 @@ module KPeg
634
634
 
635
635
  class Grammar
636
636
  def initialize
637
+ @directives = {}
637
638
  @rules = {}
638
639
  @rule_order = []
639
640
  @setup_actions = []
@@ -641,9 +642,18 @@ module KPeg
641
642
  @variables = {}
642
643
  end
643
644
 
645
+ attr_reader :directives
644
646
  attr_reader :rules, :rule_order, :setup_actions, :foreign_grammars
645
647
  attr_reader :variables
646
648
 
649
+ def add_directive(name, body)
650
+ if @directives.include? name
651
+ warn "directive #{name.inspect} appears more than once"
652
+ end
653
+
654
+ @directives[name] = body
655
+ end
656
+
647
657
  def add_setup(act)
648
658
  @setup_actions << act
649
659
  end
@@ -0,0 +1,20 @@
1
+ %% name = KPeg::StringEscape
2
+
3
+ %% {
4
+ attr_reader :text
5
+ }
6
+
7
+ segment = < /[\w ]+/ > { text } # Don't use \s because that matchs \n
8
+ | "\\" { "\\\\" }
9
+ | "\n" { "\\n" }
10
+ | "\t" { "\\t" }
11
+ | "\b" { "\\b" }
12
+ | "\"" { "\\\"" }
13
+ | < . > { text }
14
+
15
+ root = segment*:s { @text = s.join }
16
+
17
+ embed_seg = "#" { "\\#" }
18
+ | segment
19
+
20
+ embed = embed_seg*:s { @text = s.join }