kpeg 0.7
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -0
- data/LICENSE +25 -0
- data/README.md +8 -0
- data/Rakefile +24 -0
- data/bin/kpeg +126 -0
- data/doc/syntax_kpeg/ftdetect/kpeg.vim +1 -0
- data/doc/syntax_kpeg/syntax/kpeg.vim +55 -0
- data/kpeg.gemspec +24 -0
- data/lib/kpeg.rb +50 -0
- data/lib/kpeg/code_generator.rb +355 -0
- data/lib/kpeg/compiled_parser.rb +299 -0
- data/lib/kpeg/format_parser.rb +2440 -0
- data/lib/kpeg/grammar.rb +807 -0
- data/lib/kpeg/grammar_renderer.rb +172 -0
- data/lib/kpeg/match.rb +70 -0
- data/lib/kpeg/parser.rb +193 -0
- data/lib/kpeg/position.rb +34 -0
- data/lib/kpeg/string_escape.rb +322 -0
- data/lib/kpeg/version.rb +3 -0
- data/test/test_file_parser_roundtrip.rb +112 -0
- data/test/test_gen_calc.rb +63 -0
- data/test/test_kpeg.rb +416 -0
- data/test/test_kpeg_code_generator.rb +1307 -0
- data/test/test_kpeg_compiled_parser.rb +81 -0
- data/test/test_kpeg_format.rb +467 -0
- data/test/test_kpeg_grammar_renderer.rb +223 -0
- metadata +97 -0
@@ -0,0 +1,172 @@
|
|
1
|
+
require 'kpeg/string_escape'
|
2
|
+
|
3
|
+
module KPeg
|
4
|
+
class GrammarRenderer
|
5
|
+
def initialize(gram)
|
6
|
+
@grammar = gram
|
7
|
+
end
|
8
|
+
|
9
|
+
def render(io)
|
10
|
+
widest = @grammar.rules.keys.sort { |a,b| a.size <=> b.size }.last
|
11
|
+
indent = widest.size
|
12
|
+
|
13
|
+
@grammar.setup_actions.each do |act|
|
14
|
+
io.print "%% {"
|
15
|
+
io.print act.action
|
16
|
+
io.print "}\n\n"
|
17
|
+
end
|
18
|
+
|
19
|
+
@grammar.rule_order.each do |name|
|
20
|
+
rule = @grammar.find(name)
|
21
|
+
|
22
|
+
io.print(' ' * (indent - name.size))
|
23
|
+
io.print "#{name} = "
|
24
|
+
|
25
|
+
op = rule.op
|
26
|
+
|
27
|
+
if op.kind_of? Choice
|
28
|
+
op.ops.each_with_index do |r,idx|
|
29
|
+
unless idx == 0
|
30
|
+
io.print "\n#{' ' * (indent+1)}| "
|
31
|
+
end
|
32
|
+
|
33
|
+
render_op io, r
|
34
|
+
end
|
35
|
+
else
|
36
|
+
render_op io, op
|
37
|
+
end
|
38
|
+
|
39
|
+
io.puts
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def parens?(op)
|
44
|
+
case op
|
45
|
+
when Sequence, AndPredicate, NotPredicate
|
46
|
+
return true
|
47
|
+
end
|
48
|
+
|
49
|
+
false
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.escape(str, embed=false)
|
53
|
+
parc = StringEscape.new(str)
|
54
|
+
|
55
|
+
rule = (embed ? "embed" : nil)
|
56
|
+
|
57
|
+
unless parc.parse(rule)
|
58
|
+
parc.raise_error
|
59
|
+
end
|
60
|
+
|
61
|
+
return parc.text
|
62
|
+
end
|
63
|
+
|
64
|
+
def render_op(io, op)
|
65
|
+
case op
|
66
|
+
when Dot
|
67
|
+
io.print "."
|
68
|
+
when LiteralString
|
69
|
+
esc = GrammarRenderer.escape op.string
|
70
|
+
io.print '"'
|
71
|
+
io.print esc
|
72
|
+
io.print '"'
|
73
|
+
when LiteralRegexp
|
74
|
+
io.print op.regexp.inspect
|
75
|
+
when CharRange
|
76
|
+
io.print "[#{op.start}-#{op.fin}]"
|
77
|
+
when Sequence
|
78
|
+
op.ops.each_with_index do |r,idx|
|
79
|
+
unless idx == 0
|
80
|
+
io.print " "
|
81
|
+
end
|
82
|
+
render_op io, r
|
83
|
+
end
|
84
|
+
when Choice
|
85
|
+
io.print "("
|
86
|
+
op.ops.each_with_index do |r,idx|
|
87
|
+
unless idx == 0
|
88
|
+
io.print " | "
|
89
|
+
end
|
90
|
+
|
91
|
+
render_op io, r
|
92
|
+
end
|
93
|
+
io.print ")"
|
94
|
+
when Multiple
|
95
|
+
if parens?(op.op)
|
96
|
+
io.print "("
|
97
|
+
render_op io, op.op
|
98
|
+
io.print ")"
|
99
|
+
else
|
100
|
+
render_op io, op.op
|
101
|
+
end
|
102
|
+
|
103
|
+
if op.max
|
104
|
+
if op.min == 0 and op.max == 1
|
105
|
+
io.print "?"
|
106
|
+
else
|
107
|
+
io.print "[#{op.min}, #{op.max}]"
|
108
|
+
end
|
109
|
+
elsif op.min == 0
|
110
|
+
io.print "*"
|
111
|
+
elsif op.min == 1
|
112
|
+
io.print "+"
|
113
|
+
else
|
114
|
+
io.print "[#{op.min},*]"
|
115
|
+
end
|
116
|
+
when AndPredicate
|
117
|
+
io.print "&"
|
118
|
+
if parens?(op.op)
|
119
|
+
io.print "("
|
120
|
+
render_op io, op.op
|
121
|
+
io.print ")"
|
122
|
+
else
|
123
|
+
render_op io, op.op
|
124
|
+
end
|
125
|
+
when NotPredicate
|
126
|
+
io.print "!"
|
127
|
+
if parens?(op.op)
|
128
|
+
io.print "("
|
129
|
+
render_op io, op.op
|
130
|
+
io.print ")"
|
131
|
+
else
|
132
|
+
render_op io, op.op
|
133
|
+
end
|
134
|
+
when RuleReference
|
135
|
+
io.print op.rule_name
|
136
|
+
when InvokeRule
|
137
|
+
if op.arguments
|
138
|
+
io.print "#{op.rule_name}#{op.arguments}"
|
139
|
+
else
|
140
|
+
io.print "@#{op.rule_name}"
|
141
|
+
end
|
142
|
+
when ForeignInvokeRule
|
143
|
+
if op.arguments
|
144
|
+
io.print "%#{op.grammar_name}.#{op.rule_name}#{op.arguments}"
|
145
|
+
else
|
146
|
+
io.print "%#{op.grammar_name}.#{op.rule_name}"
|
147
|
+
end
|
148
|
+
when Tag
|
149
|
+
if parens?(op.op)
|
150
|
+
io.print "("
|
151
|
+
render_op io, op.op
|
152
|
+
io.print ")"
|
153
|
+
else
|
154
|
+
render_op io, op.op
|
155
|
+
end
|
156
|
+
|
157
|
+
if op.tag_name
|
158
|
+
io.print ":#{op.tag_name}"
|
159
|
+
end
|
160
|
+
when Action
|
161
|
+
io.print "{#{op.action}}"
|
162
|
+
when Collect
|
163
|
+
io.print "< "
|
164
|
+
render_op io, op.op
|
165
|
+
io.print " >"
|
166
|
+
else
|
167
|
+
raise "Unknown op type - #{op.class}"
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
end
|
data/lib/kpeg/match.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
module KPeg
|
2
|
+
class Match; end
|
3
|
+
|
4
|
+
class MatchString < Match
|
5
|
+
def initialize(op, string)
|
6
|
+
@op = op
|
7
|
+
@string = string
|
8
|
+
end
|
9
|
+
|
10
|
+
attr_reader :op, :string
|
11
|
+
|
12
|
+
def explain(indent="")
|
13
|
+
puts "#{indent}KPeg::Match:#{object_id.to_s(16)}"
|
14
|
+
puts "#{indent} op: #{@op.inspect}"
|
15
|
+
puts "#{indent} string: #{@string.inspect}"
|
16
|
+
end
|
17
|
+
|
18
|
+
alias_method :total_string, :string
|
19
|
+
|
20
|
+
def value(obj=nil)
|
21
|
+
return @string unless @op.action
|
22
|
+
if obj
|
23
|
+
obj.instance_exec(@string, &@op.action)
|
24
|
+
else
|
25
|
+
@op.action.call(@string)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class MatchComposition < Match
|
31
|
+
def initialize(op, matches)
|
32
|
+
@op = op
|
33
|
+
@matches = matches
|
34
|
+
end
|
35
|
+
|
36
|
+
attr_reader :op, :matches
|
37
|
+
|
38
|
+
def explain(indent="")
|
39
|
+
puts "#{indent}KPeg::Match:#{object_id.to_s(16)}"
|
40
|
+
puts "#{indent} op: #{@op.inspect}"
|
41
|
+
puts "#{indent} matches:"
|
42
|
+
@matches.each do |m|
|
43
|
+
m.explain("#{indent} ")
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def total_string
|
48
|
+
@matches.map { |m| m.total_string }.join
|
49
|
+
end
|
50
|
+
|
51
|
+
def value(obj=nil)
|
52
|
+
values = @matches.map { |m| m.value(obj) }
|
53
|
+
|
54
|
+
values = @op.prune_values(values)
|
55
|
+
|
56
|
+
unless @op.action
|
57
|
+
return values.first if values.size == 1
|
58
|
+
return values
|
59
|
+
end
|
60
|
+
|
61
|
+
if obj
|
62
|
+
obj.instance_exec(*values, &@op.action)
|
63
|
+
else
|
64
|
+
@op.action.call(*values)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
end
|
data/lib/kpeg/parser.rb
ADDED
@@ -0,0 +1,193 @@
|
|
1
|
+
require 'kpeg/position'
|
2
|
+
|
3
|
+
module KPeg
|
4
|
+
class Parser < StringScanner
|
5
|
+
def initialize(str, grammar, log=false)
|
6
|
+
super str
|
7
|
+
|
8
|
+
@grammar = grammar
|
9
|
+
# A 2 level hash.
|
10
|
+
@memoizations = Hash.new { |h,k| h[k] = {} }
|
11
|
+
|
12
|
+
@failing_offset = nil
|
13
|
+
@failing_op = nil
|
14
|
+
@log = log
|
15
|
+
end
|
16
|
+
|
17
|
+
attr_reader :grammar, :memoizations, :failing_offset
|
18
|
+
attr_accessor :failing_op
|
19
|
+
|
20
|
+
include Position
|
21
|
+
|
22
|
+
def switch_grammar(gram)
|
23
|
+
begin
|
24
|
+
old = @grammar
|
25
|
+
@grammar = gram
|
26
|
+
yield
|
27
|
+
ensure
|
28
|
+
@grammar = old
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def fail(op)
|
33
|
+
@failing_offset = pos
|
34
|
+
@failing_op = op
|
35
|
+
return nil
|
36
|
+
end
|
37
|
+
|
38
|
+
def expected_string
|
39
|
+
case @failing_op
|
40
|
+
when Choice
|
41
|
+
return Range.new(@failing_op.start, @failing_op.fin)
|
42
|
+
when Dot
|
43
|
+
return nil
|
44
|
+
else
|
45
|
+
@failing_op.string
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
class LeftRecursive
|
50
|
+
def initialize(detected=false)
|
51
|
+
@detected = detected
|
52
|
+
end
|
53
|
+
|
54
|
+
attr_accessor :detected
|
55
|
+
end
|
56
|
+
|
57
|
+
class MemoEntry
|
58
|
+
def initialize(ans, pos)
|
59
|
+
@ans = ans
|
60
|
+
@pos = pos
|
61
|
+
@uses = 1
|
62
|
+
end
|
63
|
+
|
64
|
+
attr_reader :ans, :pos, :uses
|
65
|
+
|
66
|
+
def inc!
|
67
|
+
@uses += 1
|
68
|
+
end
|
69
|
+
|
70
|
+
def move!(ans, pos)
|
71
|
+
@ans = ans
|
72
|
+
@pos = pos
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Call a rule without memoization
|
77
|
+
def invoke(rule)
|
78
|
+
rule.op.match(self)
|
79
|
+
end
|
80
|
+
|
81
|
+
def apply(rule)
|
82
|
+
ans = nil
|
83
|
+
if m = @memoizations[rule][pos]
|
84
|
+
m.inc!
|
85
|
+
|
86
|
+
self.pos = m.pos
|
87
|
+
if m.ans.kind_of? LeftRecursive
|
88
|
+
m.ans.detected = true
|
89
|
+
if @log
|
90
|
+
puts "LR #{rule.name} @ #{self.inspect}"
|
91
|
+
end
|
92
|
+
return nil
|
93
|
+
end
|
94
|
+
|
95
|
+
ans = m.ans
|
96
|
+
else
|
97
|
+
lr = LeftRecursive.new(false)
|
98
|
+
m = MemoEntry.new(lr, pos)
|
99
|
+
@memoizations[rule][pos] = m
|
100
|
+
start_pos = pos
|
101
|
+
|
102
|
+
if @log
|
103
|
+
puts "START #{rule.name} @ #{self.inspect}"
|
104
|
+
end
|
105
|
+
|
106
|
+
ans = rule.op.match(self)
|
107
|
+
|
108
|
+
m.move! ans, pos
|
109
|
+
|
110
|
+
# Don't bother trying to grow the left recursion
|
111
|
+
# if it's failing straight away (thus there is no seed)
|
112
|
+
if ans and lr.detected
|
113
|
+
ans = grow_lr(rule, start_pos, m)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
if @log
|
118
|
+
if ans
|
119
|
+
puts " OK #{rule.name} @ #{self.inspect}"
|
120
|
+
else
|
121
|
+
puts " FAIL #{rule.name} @ #{self.inspect}"
|
122
|
+
end
|
123
|
+
end
|
124
|
+
return ans
|
125
|
+
end
|
126
|
+
|
127
|
+
def grow_lr(rule, start_pos, m)
|
128
|
+
while true
|
129
|
+
self.pos = start_pos
|
130
|
+
ans = rule.op.match(self)
|
131
|
+
return nil unless ans
|
132
|
+
|
133
|
+
break if pos <= m.pos
|
134
|
+
|
135
|
+
m.move! ans, pos
|
136
|
+
end
|
137
|
+
|
138
|
+
self.pos = m.pos
|
139
|
+
return m.ans
|
140
|
+
end
|
141
|
+
|
142
|
+
def failed?
|
143
|
+
!!@failing_op
|
144
|
+
end
|
145
|
+
|
146
|
+
def parse(name=nil)
|
147
|
+
if name
|
148
|
+
rule = @grammar.find(name)
|
149
|
+
unless rule
|
150
|
+
raise "Unknown rule - #{name}"
|
151
|
+
end
|
152
|
+
else
|
153
|
+
rule = @grammar.root
|
154
|
+
end
|
155
|
+
|
156
|
+
match = apply rule
|
157
|
+
|
158
|
+
if pos == string.size
|
159
|
+
@failing_op = nil
|
160
|
+
end
|
161
|
+
|
162
|
+
return match
|
163
|
+
end
|
164
|
+
|
165
|
+
def expectation
|
166
|
+
error_pos = @failing_offset
|
167
|
+
line_no = current_line(error_pos)
|
168
|
+
col_no = current_column(error_pos)
|
169
|
+
|
170
|
+
expected = expected_string()
|
171
|
+
|
172
|
+
prefix = nil
|
173
|
+
|
174
|
+
case expected
|
175
|
+
when String
|
176
|
+
prefix = expected.inspect
|
177
|
+
when Range
|
178
|
+
prefix = "to be between #{expected.begin} and #{expected.end}"
|
179
|
+
when Array
|
180
|
+
prefix = "to be one of #{expected.inspect}"
|
181
|
+
when nil
|
182
|
+
prefix = "anything (no more input)"
|
183
|
+
else
|
184
|
+
prefix = "unknown"
|
185
|
+
end
|
186
|
+
|
187
|
+
return "Expected #{prefix} at line #{line_no}, column #{col_no} (offset #{error_pos})"
|
188
|
+
end
|
189
|
+
|
190
|
+
end
|
191
|
+
|
192
|
+
|
193
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module KPeg
|
2
|
+
module Position
|
3
|
+
# STANDALONE START
|
4
|
+
def current_column(target=pos)
|
5
|
+
if c = string.rindex("\n", target-1)
|
6
|
+
return target - c - 1
|
7
|
+
end
|
8
|
+
|
9
|
+
target + 1
|
10
|
+
end
|
11
|
+
|
12
|
+
def current_line(target=pos)
|
13
|
+
cur_offset = 0
|
14
|
+
cur_line = 0
|
15
|
+
|
16
|
+
string.each_line do |line|
|
17
|
+
cur_line += 1
|
18
|
+
cur_offset += line.size
|
19
|
+
return cur_line if cur_offset >= target
|
20
|
+
end
|
21
|
+
|
22
|
+
-1
|
23
|
+
end
|
24
|
+
|
25
|
+
def lines
|
26
|
+
lines = []
|
27
|
+
string.each_line { |l| lines << l }
|
28
|
+
lines
|
29
|
+
end
|
30
|
+
|
31
|
+
# STANDALONE END
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|