kpeg 0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -0
- data/LICENSE +25 -0
- data/README.md +8 -0
- data/Rakefile +24 -0
- data/bin/kpeg +126 -0
- data/doc/syntax_kpeg/ftdetect/kpeg.vim +1 -0
- data/doc/syntax_kpeg/syntax/kpeg.vim +55 -0
- data/kpeg.gemspec +24 -0
- data/lib/kpeg.rb +50 -0
- data/lib/kpeg/code_generator.rb +355 -0
- data/lib/kpeg/compiled_parser.rb +299 -0
- data/lib/kpeg/format_parser.rb +2440 -0
- data/lib/kpeg/grammar.rb +807 -0
- data/lib/kpeg/grammar_renderer.rb +172 -0
- data/lib/kpeg/match.rb +70 -0
- data/lib/kpeg/parser.rb +193 -0
- data/lib/kpeg/position.rb +34 -0
- data/lib/kpeg/string_escape.rb +322 -0
- data/lib/kpeg/version.rb +3 -0
- data/test/test_file_parser_roundtrip.rb +112 -0
- data/test/test_gen_calc.rb +63 -0
- data/test/test_kpeg.rb +416 -0
- data/test/test_kpeg_code_generator.rb +1307 -0
- data/test/test_kpeg_compiled_parser.rb +81 -0
- data/test/test_kpeg_format.rb +467 -0
- data/test/test_kpeg_grammar_renderer.rb +223 -0
- metadata +97 -0
@@ -0,0 +1,172 @@
|
|
1
|
+
require 'kpeg/string_escape'
|
2
|
+
|
3
|
+
module KPeg
|
4
|
+
class GrammarRenderer
|
5
|
+
def initialize(gram)
|
6
|
+
@grammar = gram
|
7
|
+
end
|
8
|
+
|
9
|
+
def render(io)
|
10
|
+
widest = @grammar.rules.keys.sort { |a,b| a.size <=> b.size }.last
|
11
|
+
indent = widest.size
|
12
|
+
|
13
|
+
@grammar.setup_actions.each do |act|
|
14
|
+
io.print "%% {"
|
15
|
+
io.print act.action
|
16
|
+
io.print "}\n\n"
|
17
|
+
end
|
18
|
+
|
19
|
+
@grammar.rule_order.each do |name|
|
20
|
+
rule = @grammar.find(name)
|
21
|
+
|
22
|
+
io.print(' ' * (indent - name.size))
|
23
|
+
io.print "#{name} = "
|
24
|
+
|
25
|
+
op = rule.op
|
26
|
+
|
27
|
+
if op.kind_of? Choice
|
28
|
+
op.ops.each_with_index do |r,idx|
|
29
|
+
unless idx == 0
|
30
|
+
io.print "\n#{' ' * (indent+1)}| "
|
31
|
+
end
|
32
|
+
|
33
|
+
render_op io, r
|
34
|
+
end
|
35
|
+
else
|
36
|
+
render_op io, op
|
37
|
+
end
|
38
|
+
|
39
|
+
io.puts
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def parens?(op)
|
44
|
+
case op
|
45
|
+
when Sequence, AndPredicate, NotPredicate
|
46
|
+
return true
|
47
|
+
end
|
48
|
+
|
49
|
+
false
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.escape(str, embed=false)
|
53
|
+
parc = StringEscape.new(str)
|
54
|
+
|
55
|
+
rule = (embed ? "embed" : nil)
|
56
|
+
|
57
|
+
unless parc.parse(rule)
|
58
|
+
parc.raise_error
|
59
|
+
end
|
60
|
+
|
61
|
+
return parc.text
|
62
|
+
end
|
63
|
+
|
64
|
+
def render_op(io, op)
|
65
|
+
case op
|
66
|
+
when Dot
|
67
|
+
io.print "."
|
68
|
+
when LiteralString
|
69
|
+
esc = GrammarRenderer.escape op.string
|
70
|
+
io.print '"'
|
71
|
+
io.print esc
|
72
|
+
io.print '"'
|
73
|
+
when LiteralRegexp
|
74
|
+
io.print op.regexp.inspect
|
75
|
+
when CharRange
|
76
|
+
io.print "[#{op.start}-#{op.fin}]"
|
77
|
+
when Sequence
|
78
|
+
op.ops.each_with_index do |r,idx|
|
79
|
+
unless idx == 0
|
80
|
+
io.print " "
|
81
|
+
end
|
82
|
+
render_op io, r
|
83
|
+
end
|
84
|
+
when Choice
|
85
|
+
io.print "("
|
86
|
+
op.ops.each_with_index do |r,idx|
|
87
|
+
unless idx == 0
|
88
|
+
io.print " | "
|
89
|
+
end
|
90
|
+
|
91
|
+
render_op io, r
|
92
|
+
end
|
93
|
+
io.print ")"
|
94
|
+
when Multiple
|
95
|
+
if parens?(op.op)
|
96
|
+
io.print "("
|
97
|
+
render_op io, op.op
|
98
|
+
io.print ")"
|
99
|
+
else
|
100
|
+
render_op io, op.op
|
101
|
+
end
|
102
|
+
|
103
|
+
if op.max
|
104
|
+
if op.min == 0 and op.max == 1
|
105
|
+
io.print "?"
|
106
|
+
else
|
107
|
+
io.print "[#{op.min}, #{op.max}]"
|
108
|
+
end
|
109
|
+
elsif op.min == 0
|
110
|
+
io.print "*"
|
111
|
+
elsif op.min == 1
|
112
|
+
io.print "+"
|
113
|
+
else
|
114
|
+
io.print "[#{op.min},*]"
|
115
|
+
end
|
116
|
+
when AndPredicate
|
117
|
+
io.print "&"
|
118
|
+
if parens?(op.op)
|
119
|
+
io.print "("
|
120
|
+
render_op io, op.op
|
121
|
+
io.print ")"
|
122
|
+
else
|
123
|
+
render_op io, op.op
|
124
|
+
end
|
125
|
+
when NotPredicate
|
126
|
+
io.print "!"
|
127
|
+
if parens?(op.op)
|
128
|
+
io.print "("
|
129
|
+
render_op io, op.op
|
130
|
+
io.print ")"
|
131
|
+
else
|
132
|
+
render_op io, op.op
|
133
|
+
end
|
134
|
+
when RuleReference
|
135
|
+
io.print op.rule_name
|
136
|
+
when InvokeRule
|
137
|
+
if op.arguments
|
138
|
+
io.print "#{op.rule_name}#{op.arguments}"
|
139
|
+
else
|
140
|
+
io.print "@#{op.rule_name}"
|
141
|
+
end
|
142
|
+
when ForeignInvokeRule
|
143
|
+
if op.arguments
|
144
|
+
io.print "%#{op.grammar_name}.#{op.rule_name}#{op.arguments}"
|
145
|
+
else
|
146
|
+
io.print "%#{op.grammar_name}.#{op.rule_name}"
|
147
|
+
end
|
148
|
+
when Tag
|
149
|
+
if parens?(op.op)
|
150
|
+
io.print "("
|
151
|
+
render_op io, op.op
|
152
|
+
io.print ")"
|
153
|
+
else
|
154
|
+
render_op io, op.op
|
155
|
+
end
|
156
|
+
|
157
|
+
if op.tag_name
|
158
|
+
io.print ":#{op.tag_name}"
|
159
|
+
end
|
160
|
+
when Action
|
161
|
+
io.print "{#{op.action}}"
|
162
|
+
when Collect
|
163
|
+
io.print "< "
|
164
|
+
render_op io, op.op
|
165
|
+
io.print " >"
|
166
|
+
else
|
167
|
+
raise "Unknown op type - #{op.class}"
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
end
|
data/lib/kpeg/match.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
module KPeg
|
2
|
+
class Match; end
|
3
|
+
|
4
|
+
class MatchString < Match
|
5
|
+
def initialize(op, string)
|
6
|
+
@op = op
|
7
|
+
@string = string
|
8
|
+
end
|
9
|
+
|
10
|
+
attr_reader :op, :string
|
11
|
+
|
12
|
+
def explain(indent="")
|
13
|
+
puts "#{indent}KPeg::Match:#{object_id.to_s(16)}"
|
14
|
+
puts "#{indent} op: #{@op.inspect}"
|
15
|
+
puts "#{indent} string: #{@string.inspect}"
|
16
|
+
end
|
17
|
+
|
18
|
+
alias_method :total_string, :string
|
19
|
+
|
20
|
+
def value(obj=nil)
|
21
|
+
return @string unless @op.action
|
22
|
+
if obj
|
23
|
+
obj.instance_exec(@string, &@op.action)
|
24
|
+
else
|
25
|
+
@op.action.call(@string)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class MatchComposition < Match
|
31
|
+
def initialize(op, matches)
|
32
|
+
@op = op
|
33
|
+
@matches = matches
|
34
|
+
end
|
35
|
+
|
36
|
+
attr_reader :op, :matches
|
37
|
+
|
38
|
+
def explain(indent="")
|
39
|
+
puts "#{indent}KPeg::Match:#{object_id.to_s(16)}"
|
40
|
+
puts "#{indent} op: #{@op.inspect}"
|
41
|
+
puts "#{indent} matches:"
|
42
|
+
@matches.each do |m|
|
43
|
+
m.explain("#{indent} ")
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def total_string
|
48
|
+
@matches.map { |m| m.total_string }.join
|
49
|
+
end
|
50
|
+
|
51
|
+
def value(obj=nil)
|
52
|
+
values = @matches.map { |m| m.value(obj) }
|
53
|
+
|
54
|
+
values = @op.prune_values(values)
|
55
|
+
|
56
|
+
unless @op.action
|
57
|
+
return values.first if values.size == 1
|
58
|
+
return values
|
59
|
+
end
|
60
|
+
|
61
|
+
if obj
|
62
|
+
obj.instance_exec(*values, &@op.action)
|
63
|
+
else
|
64
|
+
@op.action.call(*values)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
end
|
data/lib/kpeg/parser.rb
ADDED
@@ -0,0 +1,193 @@
|
|
1
|
+
require 'kpeg/position'
|
2
|
+
|
3
|
+
module KPeg
|
4
|
+
class Parser < StringScanner
|
5
|
+
def initialize(str, grammar, log=false)
|
6
|
+
super str
|
7
|
+
|
8
|
+
@grammar = grammar
|
9
|
+
# A 2 level hash.
|
10
|
+
@memoizations = Hash.new { |h,k| h[k] = {} }
|
11
|
+
|
12
|
+
@failing_offset = nil
|
13
|
+
@failing_op = nil
|
14
|
+
@log = log
|
15
|
+
end
|
16
|
+
|
17
|
+
attr_reader :grammar, :memoizations, :failing_offset
|
18
|
+
attr_accessor :failing_op
|
19
|
+
|
20
|
+
include Position
|
21
|
+
|
22
|
+
def switch_grammar(gram)
|
23
|
+
begin
|
24
|
+
old = @grammar
|
25
|
+
@grammar = gram
|
26
|
+
yield
|
27
|
+
ensure
|
28
|
+
@grammar = old
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def fail(op)
|
33
|
+
@failing_offset = pos
|
34
|
+
@failing_op = op
|
35
|
+
return nil
|
36
|
+
end
|
37
|
+
|
38
|
+
def expected_string
|
39
|
+
case @failing_op
|
40
|
+
when Choice
|
41
|
+
return Range.new(@failing_op.start, @failing_op.fin)
|
42
|
+
when Dot
|
43
|
+
return nil
|
44
|
+
else
|
45
|
+
@failing_op.string
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
class LeftRecursive
|
50
|
+
def initialize(detected=false)
|
51
|
+
@detected = detected
|
52
|
+
end
|
53
|
+
|
54
|
+
attr_accessor :detected
|
55
|
+
end
|
56
|
+
|
57
|
+
class MemoEntry
|
58
|
+
def initialize(ans, pos)
|
59
|
+
@ans = ans
|
60
|
+
@pos = pos
|
61
|
+
@uses = 1
|
62
|
+
end
|
63
|
+
|
64
|
+
attr_reader :ans, :pos, :uses
|
65
|
+
|
66
|
+
def inc!
|
67
|
+
@uses += 1
|
68
|
+
end
|
69
|
+
|
70
|
+
def move!(ans, pos)
|
71
|
+
@ans = ans
|
72
|
+
@pos = pos
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Call a rule without memoization
|
77
|
+
def invoke(rule)
|
78
|
+
rule.op.match(self)
|
79
|
+
end
|
80
|
+
|
81
|
+
def apply(rule)
|
82
|
+
ans = nil
|
83
|
+
if m = @memoizations[rule][pos]
|
84
|
+
m.inc!
|
85
|
+
|
86
|
+
self.pos = m.pos
|
87
|
+
if m.ans.kind_of? LeftRecursive
|
88
|
+
m.ans.detected = true
|
89
|
+
if @log
|
90
|
+
puts "LR #{rule.name} @ #{self.inspect}"
|
91
|
+
end
|
92
|
+
return nil
|
93
|
+
end
|
94
|
+
|
95
|
+
ans = m.ans
|
96
|
+
else
|
97
|
+
lr = LeftRecursive.new(false)
|
98
|
+
m = MemoEntry.new(lr, pos)
|
99
|
+
@memoizations[rule][pos] = m
|
100
|
+
start_pos = pos
|
101
|
+
|
102
|
+
if @log
|
103
|
+
puts "START #{rule.name} @ #{self.inspect}"
|
104
|
+
end
|
105
|
+
|
106
|
+
ans = rule.op.match(self)
|
107
|
+
|
108
|
+
m.move! ans, pos
|
109
|
+
|
110
|
+
# Don't bother trying to grow the left recursion
|
111
|
+
# if it's failing straight away (thus there is no seed)
|
112
|
+
if ans and lr.detected
|
113
|
+
ans = grow_lr(rule, start_pos, m)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
if @log
|
118
|
+
if ans
|
119
|
+
puts " OK #{rule.name} @ #{self.inspect}"
|
120
|
+
else
|
121
|
+
puts " FAIL #{rule.name} @ #{self.inspect}"
|
122
|
+
end
|
123
|
+
end
|
124
|
+
return ans
|
125
|
+
end
|
126
|
+
|
127
|
+
def grow_lr(rule, start_pos, m)
|
128
|
+
while true
|
129
|
+
self.pos = start_pos
|
130
|
+
ans = rule.op.match(self)
|
131
|
+
return nil unless ans
|
132
|
+
|
133
|
+
break if pos <= m.pos
|
134
|
+
|
135
|
+
m.move! ans, pos
|
136
|
+
end
|
137
|
+
|
138
|
+
self.pos = m.pos
|
139
|
+
return m.ans
|
140
|
+
end
|
141
|
+
|
142
|
+
def failed?
|
143
|
+
!!@failing_op
|
144
|
+
end
|
145
|
+
|
146
|
+
def parse(name=nil)
|
147
|
+
if name
|
148
|
+
rule = @grammar.find(name)
|
149
|
+
unless rule
|
150
|
+
raise "Unknown rule - #{name}"
|
151
|
+
end
|
152
|
+
else
|
153
|
+
rule = @grammar.root
|
154
|
+
end
|
155
|
+
|
156
|
+
match = apply rule
|
157
|
+
|
158
|
+
if pos == string.size
|
159
|
+
@failing_op = nil
|
160
|
+
end
|
161
|
+
|
162
|
+
return match
|
163
|
+
end
|
164
|
+
|
165
|
+
def expectation
|
166
|
+
error_pos = @failing_offset
|
167
|
+
line_no = current_line(error_pos)
|
168
|
+
col_no = current_column(error_pos)
|
169
|
+
|
170
|
+
expected = expected_string()
|
171
|
+
|
172
|
+
prefix = nil
|
173
|
+
|
174
|
+
case expected
|
175
|
+
when String
|
176
|
+
prefix = expected.inspect
|
177
|
+
when Range
|
178
|
+
prefix = "to be between #{expected.begin} and #{expected.end}"
|
179
|
+
when Array
|
180
|
+
prefix = "to be one of #{expected.inspect}"
|
181
|
+
when nil
|
182
|
+
prefix = "anything (no more input)"
|
183
|
+
else
|
184
|
+
prefix = "unknown"
|
185
|
+
end
|
186
|
+
|
187
|
+
return "Expected #{prefix} at line #{line_no}, column #{col_no} (offset #{error_pos})"
|
188
|
+
end
|
189
|
+
|
190
|
+
end
|
191
|
+
|
192
|
+
|
193
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module KPeg
|
2
|
+
module Position
|
3
|
+
# STANDALONE START
|
4
|
+
def current_column(target=pos)
|
5
|
+
if c = string.rindex("\n", target-1)
|
6
|
+
return target - c - 1
|
7
|
+
end
|
8
|
+
|
9
|
+
target + 1
|
10
|
+
end
|
11
|
+
|
12
|
+
def current_line(target=pos)
|
13
|
+
cur_offset = 0
|
14
|
+
cur_line = 0
|
15
|
+
|
16
|
+
string.each_line do |line|
|
17
|
+
cur_line += 1
|
18
|
+
cur_offset += line.size
|
19
|
+
return cur_line if cur_offset >= target
|
20
|
+
end
|
21
|
+
|
22
|
+
-1
|
23
|
+
end
|
24
|
+
|
25
|
+
def lines
|
26
|
+
lines = []
|
27
|
+
string.each_line { |l| lines << l }
|
28
|
+
lines
|
29
|
+
end
|
30
|
+
|
31
|
+
# STANDALONE END
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|