kpeg 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +15 -1
- data/Manifest.txt +2 -4
- data/README.rdoc +32 -0
- data/Rakefile +10 -3
- data/lib/hoe/kpeg.rb +6 -5
- data/lib/kpeg.rb +1 -1
- data/lib/kpeg/code_generator.rb +72 -39
- data/lib/kpeg/compiled_parser.rb +15 -11
- data/lib/kpeg/format_parser.kpeg +21 -9
- data/lib/kpeg/format_parser.rb +42 -39
- data/lib/kpeg/grammar.rb +1 -1
- data/lib/kpeg/grammar_renderer.rb +14 -0
- data/lib/kpeg/position.rb +1 -0
- data/lib/kpeg/string_escape.rb +355 -2
- data/test/test_kpeg_code_generator.rb +166 -0
- data/test/test_kpeg_format.rb +2 -2
- data/test/{test_file_parser_roundtrip.rb → test_kpeg_format_parser_round_trip.rb} +1 -1
- data/test/{test_gen_calc.rb → test_kpeg_grammar.rb} +48 -5
- data/test/test_kpeg_grammar_renderer.rb +46 -5
- metadata +17 -20
- data/Gemfile +0 -12
- data/test/test_left_recursion.rb +0 -50
data/lib/kpeg/format_parser.rb
CHANGED
@@ -1,5 +1,9 @@
|
|
1
|
+
require 'kpeg/grammar'
|
1
2
|
class KPeg::FormatParser
|
2
|
-
#
|
3
|
+
# :stopdoc:
|
4
|
+
|
5
|
+
# Prepares for parsing +str+. If you define a custom initialize you must
|
6
|
+
# call this method before #parse
|
3
7
|
def setup_parser(str, debug=false)
|
4
8
|
@string = str
|
5
9
|
@pos = 0
|
@@ -11,19 +15,11 @@ class KPeg::FormatParser
|
|
11
15
|
setup_foreign_grammar
|
12
16
|
end
|
13
17
|
|
14
|
-
# This is distinct from setup_parser so that a standalone parser
|
15
|
-
# can redefine #initialize and still have access to the proper
|
16
|
-
# parser setup code.
|
17
|
-
#
|
18
|
-
def initialize(str, debug=false)
|
19
|
-
setup_parser(str, debug)
|
20
|
-
end
|
21
|
-
|
22
18
|
attr_reader :string
|
23
19
|
attr_reader :failing_rule_offset
|
24
20
|
attr_accessor :result, :pos
|
25
21
|
|
26
|
-
|
22
|
+
|
27
23
|
def current_column(target=pos)
|
28
24
|
if c = string.rindex("\n", target-1)
|
29
25
|
return target - c - 1
|
@@ -51,7 +47,7 @@ class KPeg::FormatParser
|
|
51
47
|
lines
|
52
48
|
end
|
53
49
|
|
54
|
-
|
50
|
+
|
55
51
|
|
56
52
|
def get_text(start)
|
57
53
|
@string[start..@pos-1]
|
@@ -244,7 +240,6 @@ class KPeg::FormatParser
|
|
244
240
|
def apply_with_args(rule, *args)
|
245
241
|
memo_key = [rule, args]
|
246
242
|
if m = @memoizations[memo_key][@pos]
|
247
|
-
prev = @pos
|
248
243
|
@pos = m.pos
|
249
244
|
if !m.set
|
250
245
|
m.left_rec = true
|
@@ -279,7 +274,6 @@ class KPeg::FormatParser
|
|
279
274
|
|
280
275
|
def apply(rule)
|
281
276
|
if m = @memoizations[rule][@pos]
|
282
|
-
prev = @pos
|
283
277
|
@pos = m.pos
|
284
278
|
if !m.set
|
285
279
|
m.left_rec = true
|
@@ -347,20 +341,28 @@ class KPeg::FormatParser
|
|
347
341
|
RuleInfo.new(name, rendered)
|
348
342
|
end
|
349
343
|
|
350
|
-
|
344
|
+
|
345
|
+
# :startdoc:
|
346
|
+
|
351
347
|
|
352
348
|
|
353
|
-
|
349
|
+
##
|
350
|
+
# Creates a new kpeg format parser for +str+.
|
354
351
|
|
355
352
|
def initialize(str, debug=false)
|
356
353
|
setup_parser(str, debug)
|
357
354
|
@g = KPeg::Grammar.new
|
358
355
|
end
|
359
356
|
|
357
|
+
##
|
358
|
+
# The parsed grammar
|
359
|
+
|
360
360
|
attr_reader :g
|
361
|
+
|
361
362
|
alias_method :grammar, :g
|
362
363
|
|
363
364
|
|
365
|
+
# :stopdoc:
|
364
366
|
def setup_foreign_grammar; end
|
365
367
|
|
366
368
|
# eol = "\n"
|
@@ -509,7 +511,7 @@ class KPeg::FormatParser
|
|
509
511
|
return _tmp
|
510
512
|
end
|
511
513
|
|
512
|
-
# var = < ("-" | /[a-
|
514
|
+
# var = < ("-" | /[a-z][\w-]*/i) > { text }
|
513
515
|
def _var
|
514
516
|
|
515
517
|
_save = self.pos
|
@@ -521,7 +523,7 @@ class KPeg::FormatParser
|
|
521
523
|
_tmp = match_string("-")
|
522
524
|
break if _tmp
|
523
525
|
self.pos = _save1
|
524
|
-
_tmp = scan(/\A(
|
526
|
+
_tmp = scan(/\A(?i-mx:[a-z][\w-]*)/)
|
525
527
|
break if _tmp
|
526
528
|
self.pos = _save1
|
527
529
|
break
|
@@ -546,13 +548,13 @@ class KPeg::FormatParser
|
|
546
548
|
return _tmp
|
547
549
|
end
|
548
550
|
|
549
|
-
# method = < /[a-
|
551
|
+
# method = < /[a-z_]\w*/i > { text }
|
550
552
|
def _method
|
551
553
|
|
552
554
|
_save = self.pos
|
553
555
|
while true # sequence
|
554
556
|
_text_start = self.pos
|
555
|
-
_tmp = scan(/\A(
|
557
|
+
_tmp = scan(/\A(?i-mx:[a-z_]\w*)/)
|
556
558
|
if _tmp
|
557
559
|
text = get_text(_text_start)
|
558
560
|
end
|
@@ -807,7 +809,7 @@ class KPeg::FormatParser
|
|
807
809
|
return _tmp
|
808
810
|
end
|
809
811
|
|
810
|
-
# num_escapes = (< /[0-7]{1,3}/ > { [text.to_i(8)].pack("U") } | "x" < /[
|
812
|
+
# num_escapes = (< /[0-7]{1,3}/ > { [text.to_i(8)].pack("U") } | "x" < /[a-f\d]{2}/i > { [text.to_i(16)].pack("U") })
|
811
813
|
def _num_escapes
|
812
814
|
|
813
815
|
_save = self.pos
|
@@ -843,7 +845,7 @@ class KPeg::FormatParser
|
|
843
845
|
break
|
844
846
|
end
|
845
847
|
_text_start = self.pos
|
846
|
-
_tmp = scan(/\A(
|
848
|
+
_tmp = scan(/\A(?i-mx:[a-f\d]{2})/)
|
847
849
|
if _tmp
|
848
850
|
text = get_text(_text_start)
|
849
851
|
end
|
@@ -1258,13 +1260,13 @@ class KPeg::FormatParser
|
|
1258
1260
|
return _tmp
|
1259
1261
|
end
|
1260
1262
|
|
1261
|
-
# char = < /[a-
|
1263
|
+
# char = < /[a-z\d]/i > { text }
|
1262
1264
|
def _char
|
1263
1265
|
|
1264
1266
|
_save = self.pos
|
1265
1267
|
while true # sequence
|
1266
1268
|
_text_start = self.pos
|
1267
|
-
_tmp = scan(/\A(
|
1269
|
+
_tmp = scan(/\A(?i-mx:[a-z\d])/)
|
1268
1270
|
if _tmp
|
1269
1271
|
text = get_text(_text_start)
|
1270
1272
|
end
|
@@ -1328,13 +1330,13 @@ class KPeg::FormatParser
|
|
1328
1330
|
return _tmp
|
1329
1331
|
end
|
1330
1332
|
|
1331
|
-
# range_num = < /[1-9]
|
1333
|
+
# range_num = < /[1-9]\d*/ > { text }
|
1332
1334
|
def _range_num
|
1333
1335
|
|
1334
1336
|
_save = self.pos
|
1335
1337
|
while true # sequence
|
1336
1338
|
_text_start = self.pos
|
1337
|
-
_tmp = scan(/\A(?-mix:[1-9]
|
1339
|
+
_tmp = scan(/\A(?-mix:[1-9]\d*)/)
|
1338
1340
|
if _tmp
|
1339
1341
|
text = get_text(_text_start)
|
1340
1342
|
end
|
@@ -2468,7 +2470,7 @@ class KPeg::FormatParser
|
|
2468
2470
|
return _tmp
|
2469
2471
|
end
|
2470
2472
|
|
2471
|
-
# statement = (- var:v "(" args:a ")" - "=" - expression:o { @g.set(v, o, a) } | - var:v - "=" - expression:o { @g.set(v, o) } | - "%" var:name - "=" - < /[
|
2473
|
+
# statement = (- var:v "(" args:a ")" - "=" - expression:o { @g.set(v, o, a) } | - var:v - "=" - expression:o { @g.set(v, o) } | - "%" var:name - "=" - < /[:\w]+/ > { @g.add_foreign_grammar(name, text) } | - "%%" - curly:act { @g.add_setup act } | - "%%" - var:name - curly:act { @g.add_directive name, act } | - "%%" - var:name - "=" - < (!"\n" .)+ > { @g.set_variable(name, text) })
|
2472
2474
|
def _statement
|
2473
2475
|
|
2474
2476
|
_save = self.pos
|
@@ -2614,7 +2616,7 @@ class KPeg::FormatParser
|
|
2614
2616
|
break
|
2615
2617
|
end
|
2616
2618
|
_text_start = self.pos
|
2617
|
-
_tmp = scan(/\A(?-mix:[
|
2619
|
+
_tmp = scan(/\A(?-mix:[:\w]+)/)
|
2618
2620
|
if _tmp
|
2619
2621
|
text = get_text(_text_start)
|
2620
2622
|
end
|
@@ -2906,13 +2908,13 @@ class KPeg::FormatParser
|
|
2906
2908
|
return _tmp
|
2907
2909
|
end
|
2908
2910
|
|
2909
|
-
# ast_constant = < /[A-Z]
|
2911
|
+
# ast_constant = < /[A-Z]\w*/ > { text }
|
2910
2912
|
def _ast_constant
|
2911
2913
|
|
2912
2914
|
_save = self.pos
|
2913
2915
|
while true # sequence
|
2914
2916
|
_text_start = self.pos
|
2915
|
-
_tmp = scan(/\A(?-mix:[A-Z]
|
2917
|
+
_tmp = scan(/\A(?-mix:[A-Z]\w*)/)
|
2916
2918
|
if _tmp
|
2917
2919
|
text = get_text(_text_start)
|
2918
2920
|
end
|
@@ -2932,13 +2934,13 @@ class KPeg::FormatParser
|
|
2932
2934
|
return _tmp
|
2933
2935
|
end
|
2934
2936
|
|
2935
|
-
# ast_word = < /[
|
2937
|
+
# ast_word = < /[a-z_]\w*/i > { text }
|
2936
2938
|
def _ast_word
|
2937
2939
|
|
2938
2940
|
_save = self.pos
|
2939
2941
|
while true # sequence
|
2940
2942
|
_text_start = self.pos
|
2941
|
-
_tmp = scan(/\A(
|
2943
|
+
_tmp = scan(/\A(?i-mx:[a-z_]\w*)/)
|
2942
2944
|
if _tmp
|
2943
2945
|
text = get_text(_text_start)
|
2944
2946
|
end
|
@@ -3134,10 +3136,10 @@ class KPeg::FormatParser
|
|
3134
3136
|
Rules[:_space] = rule_info("space", "(\" \" | \"\\t\" | eol)")
|
3135
3137
|
Rules[:__hyphen_] = rule_info("-", "(space | comment)*")
|
3136
3138
|
Rules[:_kleene] = rule_info("kleene", "\"*\"")
|
3137
|
-
Rules[:_var] = rule_info("var", "< (\"-\" | /[a-
|
3138
|
-
Rules[:_method] = rule_info("method", "< /[a-
|
3139
|
+
Rules[:_var] = rule_info("var", "< (\"-\" | /[a-z][\\w-]*/i) > { text }")
|
3140
|
+
Rules[:_method] = rule_info("method", "< /[a-z_]\\w*/i > { text }")
|
3139
3141
|
Rules[:_dbl_escapes] = rule_info("dbl_escapes", "(\"n\" { \"\\n\" } | \"s\" { \" \" } | \"r\" { \"\\r\" } | \"t\" { \"\\t\" } | \"v\" { \"\\v\" } | \"f\" { \"\\f\" } | \"b\" { \"\\b\" } | \"a\" { \"\\a\" } | \"e\" { \"\\e\" } | \"\\\\\" { \"\\\\\" } | \"\\\"\" { \"\\\"\" } | num_escapes | < . > { text })")
|
3140
|
-
Rules[:_num_escapes] = rule_info("num_escapes", "(< /[0-7]{1,3}/ > { [text.to_i(8)].pack(\"U\") } | \"x\" < /[
|
3142
|
+
Rules[:_num_escapes] = rule_info("num_escapes", "(< /[0-7]{1,3}/ > { [text.to_i(8)].pack(\"U\") } | \"x\" < /[a-f\\d]{2}/i > { [text.to_i(16)].pack(\"U\") })")
|
3141
3143
|
Rules[:_dbl_seq] = rule_info("dbl_seq", "< /[^\\\\\"]+/ > { text }")
|
3142
3144
|
Rules[:_dbl_not_quote] = rule_info("dbl_not_quote", "(\"\\\\\" dbl_escapes:s | dbl_seq:s)*:ary { Array(ary) }")
|
3143
3145
|
Rules[:_dbl_string] = rule_info("dbl_string", "\"\\\"\" dbl_not_quote:s \"\\\"\" { @g.str(s.join) }")
|
@@ -3149,9 +3151,9 @@ class KPeg::FormatParser
|
|
3149
3151
|
Rules[:_not_slash] = rule_info("not_slash", "< (\"\\\\/\" | /[^\\/]/)+ > { text }")
|
3150
3152
|
Rules[:_regexp_opts] = rule_info("regexp_opts", "< [a-z]* > { text }")
|
3151
3153
|
Rules[:_regexp] = rule_info("regexp", "\"/\" not_slash:body \"/\" regexp_opts:opts { @g.reg body, opts }")
|
3152
|
-
Rules[:_char] = rule_info("char", "< /[a-
|
3154
|
+
Rules[:_char] = rule_info("char", "< /[a-z\\d]/i > { text }")
|
3153
3155
|
Rules[:_char_range] = rule_info("char_range", "\"[\" char:l \"-\" char:r \"]\" { @g.range(l,r) }")
|
3154
|
-
Rules[:_range_num] = rule_info("range_num", "< /[1-9]
|
3156
|
+
Rules[:_range_num] = rule_info("range_num", "< /[1-9]\\d*/ > { text }")
|
3155
3157
|
Rules[:_range_elem] = rule_info("range_elem", "< (range_num | kleene) > { text }")
|
3156
3158
|
Rules[:_mult_range] = rule_info("mult_range", "(\"[\" - range_elem:l - \",\" - range_elem:r - \"]\" { [l == \"*\" ? nil : l.to_i, r == \"*\" ? nil : r.to_i] } | \"[\" - range_num:e - \"]\" { [e.to_i, e.to_i] })")
|
3157
3159
|
Rules[:_curly_block] = rule_info("curly_block", "curly")
|
@@ -3163,13 +3165,14 @@ class KPeg::FormatParser
|
|
3163
3165
|
Rules[:_choose_cont] = rule_info("choose_cont", "- \"|\" - values:v { v }")
|
3164
3166
|
Rules[:_expression] = rule_info("expression", "(values:v choose_cont+:alts { @g.any(v, *alts) } | values)")
|
3165
3167
|
Rules[:_args] = rule_info("args", "(args:a \",\" - var:n - { a + [n] } | - var:n - { [n] })")
|
3166
|
-
Rules[:_statement] = rule_info("statement", "(- var:v \"(\" args:a \")\" - \"=\" - expression:o { @g.set(v, o, a) } | - var:v - \"=\" - expression:o { @g.set(v, o) } | - \"%\" var:name - \"=\" - < /[
|
3168
|
+
Rules[:_statement] = rule_info("statement", "(- var:v \"(\" args:a \")\" - \"=\" - expression:o { @g.set(v, o, a) } | - var:v - \"=\" - expression:o { @g.set(v, o) } | - \"%\" var:name - \"=\" - < /[:\\w]+/ > { @g.add_foreign_grammar(name, text) } | - \"%%\" - curly:act { @g.add_setup act } | - \"%%\" - var:name - curly:act { @g.add_directive name, act } | - \"%%\" - var:name - \"=\" - < (!\"\\n\" .)+ > { @g.set_variable(name, text) })")
|
3167
3169
|
Rules[:_statements] = rule_info("statements", "statement (- statements)?")
|
3168
3170
|
Rules[:_eof] = rule_info("eof", "!.")
|
3169
3171
|
Rules[:_root] = rule_info("root", "statements - eof_comment? eof")
|
3170
|
-
Rules[:_ast_constant] = rule_info("ast_constant", "< /[A-Z]
|
3171
|
-
Rules[:_ast_word] = rule_info("ast_word", "< /[
|
3172
|
+
Rules[:_ast_constant] = rule_info("ast_constant", "< /[A-Z]\\w*/ > { text }")
|
3173
|
+
Rules[:_ast_word] = rule_info("ast_word", "< /[a-z_]\\w*/i > { text }")
|
3172
3174
|
Rules[:_ast_sp] = rule_info("ast_sp", "(\" \" | \"\\t\")*")
|
3173
3175
|
Rules[:_ast_words] = rule_info("ast_words", "(ast_words:r ast_sp \",\" ast_sp ast_word:w { r + [w] } | ast_word:w { [w] })")
|
3174
3176
|
Rules[:_ast_root] = rule_info("ast_root", "(ast_constant:c \"(\" ast_words:w \")\" { [c, w] } | ast_constant:c \"()\"? { [c, []] })")
|
3177
|
+
# :startdoc:
|
3175
3178
|
end
|
data/lib/kpeg/grammar.rb
CHANGED
@@ -10,6 +10,20 @@ module KPeg
|
|
10
10
|
widest = @grammar.rules.keys.sort { |a,b| a.size <=> b.size }.last
|
11
11
|
indent = widest.size
|
12
12
|
|
13
|
+
@grammar.variables.sort.each do |name, value|
|
14
|
+
io.print "%% #{name} = #{value}\n"
|
15
|
+
end
|
16
|
+
|
17
|
+
unless @grammar.variables.empty?
|
18
|
+
io.print "\n"
|
19
|
+
end
|
20
|
+
|
21
|
+
@grammar.directives.sort_by { |name,| name }.each do |name, act|
|
22
|
+
io.print "%% #{name} {"
|
23
|
+
io.print act.action
|
24
|
+
io.print "}\n\n"
|
25
|
+
end
|
26
|
+
|
13
27
|
@grammar.setup_actions.each do |act|
|
14
28
|
io.print "%% {"
|
15
29
|
io.print act.action
|
data/lib/kpeg/position.rb
CHANGED
data/lib/kpeg/string_escape.rb
CHANGED
@@ -1,11 +1,363 @@
|
|
1
|
-
|
1
|
+
class KPeg::StringEscape
|
2
|
+
# :stopdoc:
|
3
|
+
|
4
|
+
# This is distinct from setup_parser so that a standalone parser
|
5
|
+
# can redefine #initialize and still have access to the proper
|
6
|
+
# parser setup code.
|
7
|
+
def initialize(str, debug=false)
|
8
|
+
setup_parser(str, debug)
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
# Prepares for parsing +str+. If you define a custom initialize you must
|
14
|
+
# call this method before #parse
|
15
|
+
def setup_parser(str, debug=false)
|
16
|
+
@string = str
|
17
|
+
@pos = 0
|
18
|
+
@memoizations = Hash.new { |h,k| h[k] = {} }
|
19
|
+
@result = nil
|
20
|
+
@failed_rule = nil
|
21
|
+
@failing_rule_offset = -1
|
22
|
+
|
23
|
+
setup_foreign_grammar
|
24
|
+
end
|
25
|
+
|
26
|
+
attr_reader :string
|
27
|
+
attr_reader :failing_rule_offset
|
28
|
+
attr_accessor :result, :pos
|
29
|
+
|
30
|
+
|
31
|
+
def current_column(target=pos)
|
32
|
+
if c = string.rindex("\n", target-1)
|
33
|
+
return target - c - 1
|
34
|
+
end
|
35
|
+
|
36
|
+
target + 1
|
37
|
+
end
|
38
|
+
|
39
|
+
def current_line(target=pos)
|
40
|
+
cur_offset = 0
|
41
|
+
cur_line = 0
|
42
|
+
|
43
|
+
string.each_line do |line|
|
44
|
+
cur_line += 1
|
45
|
+
cur_offset += line.size
|
46
|
+
return cur_line if cur_offset >= target
|
47
|
+
end
|
48
|
+
|
49
|
+
-1
|
50
|
+
end
|
51
|
+
|
52
|
+
def lines
|
53
|
+
lines = []
|
54
|
+
string.each_line { |l| lines << l }
|
55
|
+
lines
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
|
60
|
+
def get_text(start)
|
61
|
+
@string[start..@pos-1]
|
62
|
+
end
|
63
|
+
|
64
|
+
def show_pos
|
65
|
+
width = 10
|
66
|
+
if @pos < width
|
67
|
+
"#{@pos} (\"#{@string[0,@pos]}\" @ \"#{@string[@pos,width]}\")"
|
68
|
+
else
|
69
|
+
"#{@pos} (\"... #{@string[@pos - width, width]}\" @ \"#{@string[@pos,width]}\")"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def failure_info
|
74
|
+
l = current_line @failing_rule_offset
|
75
|
+
c = current_column @failing_rule_offset
|
76
|
+
|
77
|
+
if @failed_rule.kind_of? Symbol
|
78
|
+
info = self.class::Rules[@failed_rule]
|
79
|
+
"line #{l}, column #{c}: failed rule '#{info.name}' = '#{info.rendered}'"
|
80
|
+
else
|
81
|
+
"line #{l}, column #{c}: failed rule '#{@failed_rule}'"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def failure_caret
|
86
|
+
l = current_line @failing_rule_offset
|
87
|
+
c = current_column @failing_rule_offset
|
88
|
+
|
89
|
+
line = lines[l-1]
|
90
|
+
"#{line}\n#{' ' * (c - 1)}^"
|
91
|
+
end
|
92
|
+
|
93
|
+
def failure_character
|
94
|
+
l = current_line @failing_rule_offset
|
95
|
+
c = current_column @failing_rule_offset
|
96
|
+
lines[l-1][c-1, 1]
|
97
|
+
end
|
98
|
+
|
99
|
+
def failure_oneline
|
100
|
+
l = current_line @failing_rule_offset
|
101
|
+
c = current_column @failing_rule_offset
|
102
|
+
|
103
|
+
char = lines[l-1][c-1, 1]
|
104
|
+
|
105
|
+
if @failed_rule.kind_of? Symbol
|
106
|
+
info = self.class::Rules[@failed_rule]
|
107
|
+
"@#{l}:#{c} failed rule '#{info.name}', got '#{char}'"
|
108
|
+
else
|
109
|
+
"@#{l}:#{c} failed rule '#{@failed_rule}', got '#{char}'"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
class ParseError < RuntimeError
|
114
|
+
end
|
115
|
+
|
116
|
+
def raise_error
|
117
|
+
raise ParseError, failure_oneline
|
118
|
+
end
|
119
|
+
|
120
|
+
def show_error(io=STDOUT)
|
121
|
+
error_pos = @failing_rule_offset
|
122
|
+
line_no = current_line(error_pos)
|
123
|
+
col_no = current_column(error_pos)
|
124
|
+
|
125
|
+
io.puts "On line #{line_no}, column #{col_no}:"
|
126
|
+
|
127
|
+
if @failed_rule.kind_of? Symbol
|
128
|
+
info = self.class::Rules[@failed_rule]
|
129
|
+
io.puts "Failed to match '#{info.rendered}' (rule '#{info.name}')"
|
130
|
+
else
|
131
|
+
io.puts "Failed to match rule '#{@failed_rule}'"
|
132
|
+
end
|
133
|
+
|
134
|
+
io.puts "Got: #{string[error_pos,1].inspect}"
|
135
|
+
line = lines[line_no-1]
|
136
|
+
io.puts "=> #{line}"
|
137
|
+
io.print(" " * (col_no + 3))
|
138
|
+
io.puts "^"
|
139
|
+
end
|
140
|
+
|
141
|
+
def set_failed_rule(name)
|
142
|
+
if @pos > @failing_rule_offset
|
143
|
+
@failed_rule = name
|
144
|
+
@failing_rule_offset = @pos
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
attr_reader :failed_rule
|
149
|
+
|
150
|
+
def match_string(str)
|
151
|
+
len = str.size
|
152
|
+
if @string[pos,len] == str
|
153
|
+
@pos += len
|
154
|
+
return str
|
155
|
+
end
|
156
|
+
|
157
|
+
return nil
|
158
|
+
end
|
159
|
+
|
160
|
+
def scan(reg)
|
161
|
+
if m = reg.match(@string[@pos..-1])
|
162
|
+
width = m.end(0)
|
163
|
+
@pos += width
|
164
|
+
return true
|
165
|
+
end
|
166
|
+
|
167
|
+
return nil
|
168
|
+
end
|
169
|
+
|
170
|
+
if "".respond_to? :getbyte
|
171
|
+
def get_byte
|
172
|
+
if @pos >= @string.size
|
173
|
+
return nil
|
174
|
+
end
|
175
|
+
|
176
|
+
s = @string.getbyte @pos
|
177
|
+
@pos += 1
|
178
|
+
s
|
179
|
+
end
|
180
|
+
else
|
181
|
+
def get_byte
|
182
|
+
if @pos >= @string.size
|
183
|
+
return nil
|
184
|
+
end
|
185
|
+
|
186
|
+
s = @string[@pos]
|
187
|
+
@pos += 1
|
188
|
+
s
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
def parse(rule=nil)
|
193
|
+
# We invoke the rules indirectly via apply
|
194
|
+
# instead of by just calling them as methods because
|
195
|
+
# if the rules use left recursion, apply needs to
|
196
|
+
# manage that.
|
197
|
+
|
198
|
+
if !rule
|
199
|
+
apply(:_root)
|
200
|
+
else
|
201
|
+
method = rule.gsub("-","_hyphen_")
|
202
|
+
apply :"_#{method}"
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
class MemoEntry
|
207
|
+
def initialize(ans, pos)
|
208
|
+
@ans = ans
|
209
|
+
@pos = pos
|
210
|
+
@result = nil
|
211
|
+
@set = false
|
212
|
+
@left_rec = false
|
213
|
+
end
|
214
|
+
|
215
|
+
attr_reader :ans, :pos, :result, :set
|
216
|
+
attr_accessor :left_rec
|
217
|
+
|
218
|
+
def move!(ans, pos, result)
|
219
|
+
@ans = ans
|
220
|
+
@pos = pos
|
221
|
+
@result = result
|
222
|
+
@set = true
|
223
|
+
@left_rec = false
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
def external_invoke(other, rule, *args)
|
228
|
+
old_pos = @pos
|
229
|
+
old_string = @string
|
230
|
+
|
231
|
+
@pos = other.pos
|
232
|
+
@string = other.string
|
233
|
+
|
234
|
+
begin
|
235
|
+
if val = __send__(rule, *args)
|
236
|
+
other.pos = @pos
|
237
|
+
other.result = @result
|
238
|
+
else
|
239
|
+
other.set_failed_rule "#{self.class}##{rule}"
|
240
|
+
end
|
241
|
+
val
|
242
|
+
ensure
|
243
|
+
@pos = old_pos
|
244
|
+
@string = old_string
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
def apply_with_args(rule, *args)
|
249
|
+
memo_key = [rule, args]
|
250
|
+
if m = @memoizations[memo_key][@pos]
|
251
|
+
@pos = m.pos
|
252
|
+
if !m.set
|
253
|
+
m.left_rec = true
|
254
|
+
return nil
|
255
|
+
end
|
256
|
+
|
257
|
+
@result = m.result
|
258
|
+
|
259
|
+
return m.ans
|
260
|
+
else
|
261
|
+
m = MemoEntry.new(nil, @pos)
|
262
|
+
@memoizations[memo_key][@pos] = m
|
263
|
+
start_pos = @pos
|
264
|
+
|
265
|
+
ans = __send__ rule, *args
|
266
|
+
|
267
|
+
lr = m.left_rec
|
268
|
+
|
269
|
+
m.move! ans, @pos, @result
|
270
|
+
|
271
|
+
# Don't bother trying to grow the left recursion
|
272
|
+
# if it's failing straight away (thus there is no seed)
|
273
|
+
if ans and lr
|
274
|
+
return grow_lr(rule, args, start_pos, m)
|
275
|
+
else
|
276
|
+
return ans
|
277
|
+
end
|
278
|
+
|
279
|
+
return ans
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
def apply(rule)
|
284
|
+
if m = @memoizations[rule][@pos]
|
285
|
+
@pos = m.pos
|
286
|
+
if !m.set
|
287
|
+
m.left_rec = true
|
288
|
+
return nil
|
289
|
+
end
|
290
|
+
|
291
|
+
@result = m.result
|
292
|
+
|
293
|
+
return m.ans
|
294
|
+
else
|
295
|
+
m = MemoEntry.new(nil, @pos)
|
296
|
+
@memoizations[rule][@pos] = m
|
297
|
+
start_pos = @pos
|
298
|
+
|
299
|
+
ans = __send__ rule
|
300
|
+
|
301
|
+
lr = m.left_rec
|
302
|
+
|
303
|
+
m.move! ans, @pos, @result
|
304
|
+
|
305
|
+
# Don't bother trying to grow the left recursion
|
306
|
+
# if it's failing straight away (thus there is no seed)
|
307
|
+
if ans and lr
|
308
|
+
return grow_lr(rule, nil, start_pos, m)
|
309
|
+
else
|
310
|
+
return ans
|
311
|
+
end
|
312
|
+
|
313
|
+
return ans
|
314
|
+
end
|
315
|
+
end
|
316
|
+
|
317
|
+
def grow_lr(rule, args, start_pos, m)
|
318
|
+
while true
|
319
|
+
@pos = start_pos
|
320
|
+
@result = m.result
|
321
|
+
|
322
|
+
if args
|
323
|
+
ans = __send__ rule, *args
|
324
|
+
else
|
325
|
+
ans = __send__ rule
|
326
|
+
end
|
327
|
+
return nil unless ans
|
328
|
+
|
329
|
+
break if @pos <= m.pos
|
330
|
+
|
331
|
+
m.move! ans, @pos, @result
|
332
|
+
end
|
333
|
+
|
334
|
+
@result = m.result
|
335
|
+
@pos = m.pos
|
336
|
+
return m.ans
|
337
|
+
end
|
338
|
+
|
339
|
+
class RuleInfo
|
340
|
+
def initialize(name, rendered)
|
341
|
+
@name = name
|
342
|
+
@rendered = rendered
|
343
|
+
end
|
344
|
+
|
345
|
+
attr_reader :name, :rendered
|
346
|
+
end
|
347
|
+
|
348
|
+
def self.rule_info(name, rendered)
|
349
|
+
RuleInfo.new(name, rendered)
|
350
|
+
end
|
351
|
+
|
2
352
|
|
3
|
-
|
353
|
+
# :startdoc:
|
4
354
|
|
5
355
|
|
6
356
|
attr_reader :text
|
7
357
|
|
8
358
|
|
359
|
+
# :stopdoc:
|
360
|
+
def setup_foreign_grammar; end
|
9
361
|
|
10
362
|
# segment = (< /[\w ]+/ > { text } | "\\" { "\\\\" } | "\n" { "\\n" } | "\t" { "\\t" } | "\b" { "\\b" } | "\"" { "\\\"" } | < . > { text })
|
11
363
|
def _segment
|
@@ -251,4 +603,5 @@ class KPeg::StringEscape < KPeg::CompiledParser
|
|
251
603
|
Rules[:_root] = rule_info("root", "segment*:s { @text = s.join }")
|
252
604
|
Rules[:_embed_seg] = rule_info("embed_seg", "(\"\#\" { \"\\\\\#\" } | segment)")
|
253
605
|
Rules[:_embed] = rule_info("embed", "embed_seg*:s { @text = s.join }")
|
606
|
+
# :startdoc:
|
254
607
|
end
|