kpeg 0.8.5 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. data/.autotest +10 -0
  2. data/.gemtest +0 -0
  3. data/Gemfile +11 -3
  4. data/History.txt +21 -0
  5. data/LICENSE +25 -0
  6. data/Manifest.txt +47 -0
  7. data/README.rdoc +222 -0
  8. data/Rakefile +23 -11
  9. data/bin/kpeg +4 -2
  10. data/examples/calculator/calculator.kpeg +17 -0
  11. data/examples/calculator/calculator.rb +7 -0
  12. data/examples/foreign_reference/literals.kpeg +5 -0
  13. data/examples/foreign_reference/matcher.kpeg +9 -0
  14. data/examples/foreign_reference/matcher.rb +5 -0
  15. data/examples/lua_string/driver.rb +21 -0
  16. data/examples/lua_string/lua_string.kpeg +14 -0
  17. data/examples/lua_string/lua_string.kpeg.rb +460 -0
  18. data/examples/phone_number/README.md +3 -0
  19. data/examples/phone_number/phone_number.kpeg +20 -0
  20. data/examples/phone_number/phone_number.rb +6 -0
  21. data/examples/upper/README.md +83 -0
  22. data/examples/upper/upper.kpeg +24 -0
  23. data/examples/upper/upper.rb +9 -0
  24. data/kpeg.gemspec +35 -17
  25. data/lib/hoe/kpeg.rb +94 -0
  26. data/lib/kpeg.rb +3 -0
  27. data/lib/kpeg/code_generator.rb +16 -3
  28. data/lib/kpeg/compiled_parser.rb +18 -28
  29. data/lib/kpeg/format_parser.kpeg +129 -0
  30. data/lib/kpeg/format_parser.rb +88 -49
  31. data/lib/kpeg/grammar.rb +10 -0
  32. data/lib/kpeg/string_escape.kpeg +20 -0
  33. data/test/inputs/comments.kpeg +5 -0
  34. data/test/test_file_parser_roundtrip.rb +3 -3
  35. data/test/test_gen_calc.rb +2 -2
  36. data/test/test_kpeg.rb +2 -2
  37. data/test/test_kpeg_code_generator.rb +65 -2
  38. data/test/test_kpeg_compiled_parser.rb +2 -2
  39. data/test/test_kpeg_format.rb +49 -4
  40. data/test/test_kpeg_grammar_renderer.rb +2 -2
  41. data/test/test_left_recursion.rb +2 -2
  42. data/{doc → vim}/syntax_kpeg/ftdetect/kpeg.vim +0 -0
  43. data/{doc → vim}/syntax_kpeg/syntax/kpeg.vim +0 -0
  44. metadata +89 -26
  45. data/README.md +0 -183
  46. data/lib/kpeg/version.rb +0 -3
@@ -0,0 +1,129 @@
1
+ %% name = KPeg::FormatParser
2
+
3
+ %% {
4
+ require 'kpeg/grammar'
5
+
6
+ def initialize(str, debug=false)
7
+ setup_parser(str, debug)
8
+ @g = KPeg::Grammar.new
9
+ end
10
+
11
+ attr_reader :g
12
+ alias_method :grammar, :g
13
+ }
14
+
15
+
16
+ eol = "\n"
17
+ eof_comment = "#" (!eof .)*
18
+
19
+ comment = "#" (!eol .)* eol
20
+ space = " " | "\t" | eol
21
+ - = (space | comment)*
22
+ kleene = "*"
23
+
24
+ # Allow - by itself, but not at the beginning
25
+ var = < "-" | /[a-zA-Z][\-_a-zA-Z0-9]*/ > { text }
26
+ method = < /[a-zA-Z_][a-zA-Z0-9_]*/ > { text }
27
+
28
+ dbl_escapes = "n" { "\n" }
29
+ | "s" { " " }
30
+ | "r" { "\r" }
31
+ | "t" { "\t" }
32
+ | "v" { "\v" }
33
+ | "f" { "\f" }
34
+ | "b" { "\b" }
35
+ | "a" { "\a" }
36
+ | "e" { "\e" }
37
+ | "\\" { "\\" }
38
+ | "\"" { "\"" }
39
+ | num_escapes
40
+ | < . > { text }
41
+ num_escapes = < /[0-7]{1,3}/ > { [text.to_i(8)].pack("U") }
42
+ | "x" < /[0-9a-fA-F]{2}/ > { [text.to_i(16)].pack("U") }
43
+ dbl_seq = < /[^\\"]+/ > { text }
44
+ dbl_not_quote = ("\\" dbl_escapes:s | dbl_seq:s)*:ary { Array(ary) }
45
+ dbl_string = "\"" dbl_not_quote:s "\"" { @g.str(s.join) }
46
+ sgl_escape_quote = "\\'" { "'" }
47
+ sgl_seq = < /[^']/ > { text }
48
+ sgl_not_quote = (sgl_escape_quote | sgl_seq)*:segs { Array(segs) }
49
+ sgl_string = "'" sgl_not_quote:s "'" { @g.str(s.join) }
50
+ string = dbl_string
51
+ | sgl_string
52
+
53
+ not_slash = < ("\\/" | /[^\/]/)+ > { text }
54
+ regexp_opts = < [a-z]* > { text }
55
+ regexp = "/" not_slash:body "/" regexp_opts:opts
56
+ { @g.reg body, opts }
57
+
58
+ char = < /[a-zA-Z0-9]/ > { text }
59
+ char_range = "[" char:l "-" char:r "]" { @g.range(l,r) }
60
+
61
+ range_num = < /[1-9][0-9]*/ > { text }
62
+ range_elem = < range_num|kleene > { text }
63
+ mult_range = "[" - range_elem:l - "," - range_elem:r - "]"
64
+ { [l == "*" ? nil : l.to_i, r == "*" ? nil : r.to_i] }
65
+ | "[" - range_num:e - "]" { [e.to_i, e.to_i] }
66
+
67
+ curly_block = curly
68
+ curly = "{" < (/[^{}"']+/ | string | curly)* > "}" { @g.action(text) }
69
+ nested_paren = "(" (/[^()"']+/ | string | nested_paren)* ")"
70
+
71
+ value = value:v ":" var:n { @g.t(v,n) }
72
+ | value:v "?" { @g.maybe(v) }
73
+ | value:v "+" { @g.many(v) }
74
+ | value:v "*" { @g.kleene(v) }
75
+ | value:v mult_range:r { @g.multiple(v, *r) }
76
+ | "&" value:v { @g.andp(v) }
77
+ | "!" value:v { @g.notp(v) }
78
+ | "(" - expression:o - ")" { o }
79
+ | "@<" - expression:o - ">" { @g.bounds(o) }
80
+ | "<" - expression:o - ">" { @g.collect(o) }
81
+ | curly_block
82
+ | "~" method:m < nested_paren? >
83
+ { @g.action("#{m}#{text}") }
84
+ | "." { @g.dot }
85
+ | "@" var:name < nested_paren? > !(- "=")
86
+ { @g.invoke(name, text.empty? ? nil : text) }
87
+ | "^" var:name < nested_paren? >
88
+ { @g.foreign_invoke("parent", name, text) }
89
+ | "%" var:gram "." var:name < nested_paren? >
90
+ { @g.foreign_invoke(gram, name, text) }
91
+ | var:name < nested_paren? > !(- "=")
92
+ { @g.ref(name, nil, text.empty? ? nil : text) }
93
+ | char_range
94
+ | regexp
95
+ | string
96
+
97
+ spaces = (space | comment)+
98
+ values = values:s spaces value:v { @g.seq(s, v) }
99
+ | value:l spaces value:r { @g.seq(l, r) }
100
+ | value
101
+ choose_cont = - "|" - values:v { v }
102
+ expression = values:v choose_cont+:alts { @g.any(v, *alts) }
103
+ | values
104
+ args = args:a "," - var:n - { a + [n] }
105
+ | - var:n - { [n] }
106
+ statement = - var:v "(" args:a ")" - "=" - expression:o { @g.set(v, o, a) }
107
+ | - var:v - "=" - expression:o { @g.set(v, o) }
108
+ | - "%" var:name - "=" - < /[::A-Za-z0-9_]+/ >
109
+ { @g.add_foreign_grammar(name, text) }
110
+ | - "%%" - curly:act { @g.add_setup act }
111
+ | - "%%" - var:name - curly:act { @g.add_directive name, act }
112
+ | - "%%" - var:name - "=" - < (!"\n" .)+ >
113
+ { @g.set_variable(name, text) }
114
+ statements = statement (- statements)?
115
+ eof = !.
116
+ root = statements - eof_comment? eof
117
+
118
+ # These are a seperate set of rules used to parse an ast declaration
119
+
120
+ ast_constant = < /[A-Z][A-Za-z0-9_]*/ > { text }
121
+ ast_word = < /[A-Za-z_][A-Za-z0-9_]*/ > { text }
122
+
123
+ ast_sp = (" " | "\t")*
124
+
125
+ ast_words = ast_words:r ast_sp "," ast_sp ast_word:w { r + [w] }
126
+ | ast_word:w { [w] }
127
+
128
+ ast_root = ast_constant:c "(" ast_words:w ")" { [c, w] }
129
+ | ast_constant:c "()"? { [c, []] }
@@ -186,41 +186,37 @@ class KPeg::FormatParser
186
186
  end
187
187
 
188
188
  def parse(rule=nil)
189
+ # We invoke the rules indirectly via apply
190
+ # instead of by just calling them as methods because
191
+ # if the rules use left recursion, apply needs to
192
+ # manage that.
193
+
189
194
  if !rule
190
- _root ? true : false
195
+ apply(:_root)
191
196
  else
192
- # This is not shared with code_generator.rb so this can be standalone
193
197
  method = rule.gsub("-","_hyphen_")
194
- __send__("_#{method}") ? true : false
198
+ apply :"_#{method}"
195
199
  end
196
200
  end
197
201
 
198
- class LeftRecursive
199
- def initialize(detected=false)
200
- @detected = detected
201
- end
202
-
203
- attr_accessor :detected
204
- end
205
-
206
202
  class MemoEntry
207
203
  def initialize(ans, pos)
208
204
  @ans = ans
209
205
  @pos = pos
210
- @uses = 1
211
206
  @result = nil
207
+ @set = false
208
+ @left_rec = false
212
209
  end
213
210
 
214
- attr_reader :ans, :pos, :uses, :result
215
-
216
- def inc!
217
- @uses += 1
218
- end
211
+ attr_reader :ans, :pos, :result, :set
212
+ attr_accessor :left_rec
219
213
 
220
214
  def move!(ans, pos, result)
221
215
  @ans = ans
222
216
  @pos = pos
223
217
  @result = result
218
+ @set = true
219
+ @left_rec = false
224
220
  end
225
221
  end
226
222
 
@@ -248,12 +244,10 @@ class KPeg::FormatParser
248
244
  def apply_with_args(rule, *args)
249
245
  memo_key = [rule, args]
250
246
  if m = @memoizations[memo_key][@pos]
251
- m.inc!
252
-
253
247
  prev = @pos
254
248
  @pos = m.pos
255
- if m.ans.kind_of? LeftRecursive
256
- m.ans.detected = true
249
+ if !m.set
250
+ m.left_rec = true
257
251
  return nil
258
252
  end
259
253
 
@@ -261,18 +255,19 @@ class KPeg::FormatParser
261
255
 
262
256
  return m.ans
263
257
  else
264
- lr = LeftRecursive.new(false)
265
- m = MemoEntry.new(lr, @pos)
258
+ m = MemoEntry.new(nil, @pos)
266
259
  @memoizations[memo_key][@pos] = m
267
260
  start_pos = @pos
268
261
 
269
262
  ans = __send__ rule, *args
270
263
 
264
+ lr = m.left_rec
265
+
271
266
  m.move! ans, @pos, @result
272
267
 
273
268
  # Don't bother trying to grow the left recursion
274
269
  # if it's failing straight away (thus there is no seed)
275
- if ans and lr.detected
270
+ if ans and lr
276
271
  return grow_lr(rule, args, start_pos, m)
277
272
  else
278
273
  return ans
@@ -284,12 +279,10 @@ class KPeg::FormatParser
284
279
 
285
280
  def apply(rule)
286
281
  if m = @memoizations[rule][@pos]
287
- m.inc!
288
-
289
282
  prev = @pos
290
283
  @pos = m.pos
291
- if m.ans.kind_of? LeftRecursive
292
- m.ans.detected = true
284
+ if !m.set
285
+ m.left_rec = true
293
286
  return nil
294
287
  end
295
288
 
@@ -297,18 +290,19 @@ class KPeg::FormatParser
297
290
 
298
291
  return m.ans
299
292
  else
300
- lr = LeftRecursive.new(false)
301
- m = MemoEntry.new(lr, @pos)
293
+ m = MemoEntry.new(nil, @pos)
302
294
  @memoizations[rule][@pos] = m
303
295
  start_pos = @pos
304
296
 
305
297
  ans = __send__ rule
306
298
 
299
+ lr = m.left_rec
300
+
307
301
  m.move! ans, @pos, @result
308
302
 
309
303
  # Don't bother trying to grow the left recursion
310
304
  # if it's failing straight away (thus there is no seed)
311
- if ans and lr.detected
305
+ if ans and lr
312
306
  return grow_lr(rule, nil, start_pos, m)
313
307
  else
314
308
  return ans
@@ -2474,7 +2468,7 @@ class KPeg::FormatParser
2474
2468
  return _tmp
2475
2469
  end
2476
2470
 
2477
- # statement = (- var:v "(" args:a ")" - "=" - expression:o { @g.set(v, o, a) } | - var:v - "=" - expression:o { @g.set(v, o) } | - "%" var:name - "=" - < /[::A-Za-z0-9_]+/ > { @g.add_foreign_grammar(name, text) } | - "%%" - curly:act { @g.add_setup act } | - "%%" - var:name - "=" - < (!"\n" .)+ > { @g.set_variable(name, text) })
2471
+ # statement = (- var:v "(" args:a ")" - "=" - expression:o { @g.set(v, o, a) } | - var:v - "=" - expression:o { @g.set(v, o) } | - "%" var:name - "=" - < /[::A-Za-z0-9_]+/ > { @g.add_foreign_grammar(name, text) } | - "%%" - curly:act { @g.add_setup act } | - "%%" - var:name - curly:act { @g.add_directive name, act } | - "%%" - var:name - "=" - < (!"\n" .)+ > { @g.set_variable(name, text) })
2478
2472
  def _statement
2479
2473
 
2480
2474
  _save = self.pos
@@ -2701,32 +2695,77 @@ class KPeg::FormatParser
2701
2695
  self.pos = _save5
2702
2696
  break
2703
2697
  end
2704
- _tmp = match_string("=")
2698
+ _tmp = apply(:_curly)
2699
+ act = @result
2705
2700
  unless _tmp
2706
2701
  self.pos = _save5
2707
2702
  break
2708
2703
  end
2709
- _tmp = apply(:__hyphen_)
2704
+ @result = begin; @g.add_directive name, act ; end
2705
+ _tmp = true
2710
2706
  unless _tmp
2711
2707
  self.pos = _save5
2708
+ end
2709
+ break
2710
+ end # end sequence
2711
+
2712
+ break if _tmp
2713
+ self.pos = _save
2714
+
2715
+ _save6 = self.pos
2716
+ while true # sequence
2717
+ _tmp = apply(:__hyphen_)
2718
+ unless _tmp
2719
+ self.pos = _save6
2720
+ break
2721
+ end
2722
+ _tmp = match_string("%%")
2723
+ unless _tmp
2724
+ self.pos = _save6
2725
+ break
2726
+ end
2727
+ _tmp = apply(:__hyphen_)
2728
+ unless _tmp
2729
+ self.pos = _save6
2730
+ break
2731
+ end
2732
+ _tmp = apply(:_var)
2733
+ name = @result
2734
+ unless _tmp
2735
+ self.pos = _save6
2736
+ break
2737
+ end
2738
+ _tmp = apply(:__hyphen_)
2739
+ unless _tmp
2740
+ self.pos = _save6
2741
+ break
2742
+ end
2743
+ _tmp = match_string("=")
2744
+ unless _tmp
2745
+ self.pos = _save6
2746
+ break
2747
+ end
2748
+ _tmp = apply(:__hyphen_)
2749
+ unless _tmp
2750
+ self.pos = _save6
2712
2751
  break
2713
2752
  end
2714
2753
  _text_start = self.pos
2715
- _save6 = self.pos
2716
-
2717
2754
  _save7 = self.pos
2755
+
2756
+ _save8 = self.pos
2718
2757
  while true # sequence
2719
- _save8 = self.pos
2758
+ _save9 = self.pos
2720
2759
  _tmp = match_string("\n")
2721
2760
  _tmp = _tmp ? nil : true
2722
- self.pos = _save8
2761
+ self.pos = _save9
2723
2762
  unless _tmp
2724
- self.pos = _save7
2763
+ self.pos = _save8
2725
2764
  break
2726
2765
  end
2727
2766
  _tmp = get_byte
2728
2767
  unless _tmp
2729
- self.pos = _save7
2768
+ self.pos = _save8
2730
2769
  end
2731
2770
  break
2732
2771
  end # end sequence
@@ -2734,19 +2773,19 @@ class KPeg::FormatParser
2734
2773
  if _tmp
2735
2774
  while true
2736
2775
 
2737
- _save9 = self.pos
2776
+ _save10 = self.pos
2738
2777
  while true # sequence
2739
- _save10 = self.pos
2778
+ _save11 = self.pos
2740
2779
  _tmp = match_string("\n")
2741
2780
  _tmp = _tmp ? nil : true
2742
- self.pos = _save10
2781
+ self.pos = _save11
2743
2782
  unless _tmp
2744
- self.pos = _save9
2783
+ self.pos = _save10
2745
2784
  break
2746
2785
  end
2747
2786
  _tmp = get_byte
2748
2787
  unless _tmp
2749
- self.pos = _save9
2788
+ self.pos = _save10
2750
2789
  end
2751
2790
  break
2752
2791
  end # end sequence
@@ -2755,19 +2794,19 @@ class KPeg::FormatParser
2755
2794
  end
2756
2795
  _tmp = true
2757
2796
  else
2758
- self.pos = _save6
2797
+ self.pos = _save7
2759
2798
  end
2760
2799
  if _tmp
2761
2800
  text = get_text(_text_start)
2762
2801
  end
2763
2802
  unless _tmp
2764
- self.pos = _save5
2803
+ self.pos = _save6
2765
2804
  break
2766
2805
  end
2767
2806
  @result = begin; @g.set_variable(name, text) ; end
2768
2807
  _tmp = true
2769
2808
  unless _tmp
2770
- self.pos = _save5
2809
+ self.pos = _save6
2771
2810
  end
2772
2811
  break
2773
2812
  end # end sequence
@@ -3124,7 +3163,7 @@ class KPeg::FormatParser
3124
3163
  Rules[:_choose_cont] = rule_info("choose_cont", "- \"|\" - values:v { v }")
3125
3164
  Rules[:_expression] = rule_info("expression", "(values:v choose_cont+:alts { @g.any(v, *alts) } | values)")
3126
3165
  Rules[:_args] = rule_info("args", "(args:a \",\" - var:n - { a + [n] } | - var:n - { [n] })")
3127
- Rules[:_statement] = rule_info("statement", "(- var:v \"(\" args:a \")\" - \"=\" - expression:o { @g.set(v, o, a) } | - var:v - \"=\" - expression:o { @g.set(v, o) } | - \"%\" var:name - \"=\" - < /[::A-Za-z0-9_]+/ > { @g.add_foreign_grammar(name, text) } | - \"%%\" - curly:act { @g.add_setup act } | - \"%%\" - var:name - \"=\" - < (!\"\\n\" .)+ > { @g.set_variable(name, text) })")
3166
+ Rules[:_statement] = rule_info("statement", "(- var:v \"(\" args:a \")\" - \"=\" - expression:o { @g.set(v, o, a) } | - var:v - \"=\" - expression:o { @g.set(v, o) } | - \"%\" var:name - \"=\" - < /[::A-Za-z0-9_]+/ > { @g.add_foreign_grammar(name, text) } | - \"%%\" - curly:act { @g.add_setup act } | - \"%%\" - var:name - curly:act { @g.add_directive name, act } | - \"%%\" - var:name - \"=\" - < (!\"\\n\" .)+ > { @g.set_variable(name, text) })")
3128
3167
  Rules[:_statements] = rule_info("statements", "statement (- statements)?")
3129
3168
  Rules[:_eof] = rule_info("eof", "!.")
3130
3169
  Rules[:_root] = rule_info("root", "statements - eof_comment? eof")
data/lib/kpeg/grammar.rb CHANGED
@@ -634,6 +634,7 @@ module KPeg
634
634
 
635
635
  class Grammar
636
636
  def initialize
637
+ @directives = {}
637
638
  @rules = {}
638
639
  @rule_order = []
639
640
  @setup_actions = []
@@ -641,9 +642,18 @@ module KPeg
641
642
  @variables = {}
642
643
  end
643
644
 
645
+ attr_reader :directives
644
646
  attr_reader :rules, :rule_order, :setup_actions, :foreign_grammars
645
647
  attr_reader :variables
646
648
 
649
+ def add_directive(name, body)
650
+ if @directives.include? name
651
+ warn "directive #{name.inspect} appears more than once"
652
+ end
653
+
654
+ @directives[name] = body
655
+ end
656
+
647
657
  def add_setup(act)
648
658
  @setup_actions << act
649
659
  end
@@ -0,0 +1,20 @@
1
+ %% name = KPeg::StringEscape
2
+
3
+ %% {
4
+ attr_reader :text
5
+ }
6
+
7
+ segment = < /[\w ]+/ > { text } # Don't use \s because that matchs \n
8
+ | "\\" { "\\\\" }
9
+ | "\n" { "\\n" }
10
+ | "\t" { "\\t" }
11
+ | "\b" { "\\b" }
12
+ | "\"" { "\\\"" }
13
+ | < . > { text }
14
+
15
+ root = segment*:s { @text = s.join }
16
+
17
+ embed_seg = "#" { "\\#" }
18
+ | segment
19
+
20
+ embed = embed_seg*:s { @text = s.join }