kpeg 0.7
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -0
- data/LICENSE +25 -0
- data/README.md +8 -0
- data/Rakefile +24 -0
- data/bin/kpeg +126 -0
- data/doc/syntax_kpeg/ftdetect/kpeg.vim +1 -0
- data/doc/syntax_kpeg/syntax/kpeg.vim +55 -0
- data/kpeg.gemspec +24 -0
- data/lib/kpeg.rb +50 -0
- data/lib/kpeg/code_generator.rb +355 -0
- data/lib/kpeg/compiled_parser.rb +299 -0
- data/lib/kpeg/format_parser.rb +2440 -0
- data/lib/kpeg/grammar.rb +807 -0
- data/lib/kpeg/grammar_renderer.rb +172 -0
- data/lib/kpeg/match.rb +70 -0
- data/lib/kpeg/parser.rb +193 -0
- data/lib/kpeg/position.rb +34 -0
- data/lib/kpeg/string_escape.rb +322 -0
- data/lib/kpeg/version.rb +3 -0
- data/test/test_file_parser_roundtrip.rb +112 -0
- data/test/test_gen_calc.rb +63 -0
- data/test/test_kpeg.rb +416 -0
- data/test/test_kpeg_code_generator.rb +1307 -0
- data/test/test_kpeg_compiled_parser.rb +81 -0
- data/test/test_kpeg_format.rb +467 -0
- data/test/test_kpeg_grammar_renderer.rb +223 -0
- metadata +97 -0
@@ -0,0 +1,322 @@
|
|
1
|
+
require 'kpeg/compiled_parser'
|
2
|
+
|
3
|
+
class KPeg::StringEscape < KPeg::CompiledParser
|
4
|
+
|
5
|
+
|
6
|
+
attr_reader :text
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
# segment = (< /[\w ]+/ > { text } | "\\" { "\\\\" } | "\n" { "\\n" } | "\t" { "\\t" } | "\b" { "\\b" } | "\"" { "\\\"" } | < . > { text })
|
11
|
+
def _segment
|
12
|
+
|
13
|
+
_save = self.pos
|
14
|
+
while true # choice
|
15
|
+
|
16
|
+
_save1 = self.pos
|
17
|
+
while true # sequence
|
18
|
+
_text_start = self.pos
|
19
|
+
_tmp = scan(/\A(?-mix:[\w ]+)/)
|
20
|
+
if _tmp
|
21
|
+
text = get_text(_text_start)
|
22
|
+
end
|
23
|
+
unless _tmp
|
24
|
+
self.pos = _save1
|
25
|
+
break
|
26
|
+
end
|
27
|
+
@result = begin; text ; end
|
28
|
+
_tmp = true
|
29
|
+
unless _tmp
|
30
|
+
self.pos = _save1
|
31
|
+
end
|
32
|
+
break
|
33
|
+
end # end sequence
|
34
|
+
|
35
|
+
break if _tmp
|
36
|
+
self.pos = _save
|
37
|
+
|
38
|
+
_save2 = self.pos
|
39
|
+
while true # sequence
|
40
|
+
_tmp = match_string("\\")
|
41
|
+
unless _tmp
|
42
|
+
self.pos = _save2
|
43
|
+
break
|
44
|
+
end
|
45
|
+
@result = begin; "\\\\" ; end
|
46
|
+
_tmp = true
|
47
|
+
unless _tmp
|
48
|
+
self.pos = _save2
|
49
|
+
end
|
50
|
+
break
|
51
|
+
end # end sequence
|
52
|
+
|
53
|
+
break if _tmp
|
54
|
+
self.pos = _save
|
55
|
+
|
56
|
+
_save3 = self.pos
|
57
|
+
while true # sequence
|
58
|
+
_tmp = match_string("\n")
|
59
|
+
unless _tmp
|
60
|
+
self.pos = _save3
|
61
|
+
break
|
62
|
+
end
|
63
|
+
@result = begin; "\\n" ; end
|
64
|
+
_tmp = true
|
65
|
+
unless _tmp
|
66
|
+
self.pos = _save3
|
67
|
+
end
|
68
|
+
break
|
69
|
+
end # end sequence
|
70
|
+
|
71
|
+
break if _tmp
|
72
|
+
self.pos = _save
|
73
|
+
|
74
|
+
_save4 = self.pos
|
75
|
+
while true # sequence
|
76
|
+
_tmp = match_string("\t")
|
77
|
+
unless _tmp
|
78
|
+
self.pos = _save4
|
79
|
+
break
|
80
|
+
end
|
81
|
+
@result = begin; "\\t" ; end
|
82
|
+
_tmp = true
|
83
|
+
unless _tmp
|
84
|
+
self.pos = _save4
|
85
|
+
end
|
86
|
+
break
|
87
|
+
end # end sequence
|
88
|
+
|
89
|
+
break if _tmp
|
90
|
+
self.pos = _save
|
91
|
+
|
92
|
+
_save5 = self.pos
|
93
|
+
while true # sequence
|
94
|
+
_tmp = match_string("\b")
|
95
|
+
unless _tmp
|
96
|
+
self.pos = _save5
|
97
|
+
break
|
98
|
+
end
|
99
|
+
@result = begin; "\\b" ; end
|
100
|
+
_tmp = true
|
101
|
+
unless _tmp
|
102
|
+
self.pos = _save5
|
103
|
+
end
|
104
|
+
break
|
105
|
+
end # end sequence
|
106
|
+
|
107
|
+
break if _tmp
|
108
|
+
self.pos = _save
|
109
|
+
|
110
|
+
_save6 = self.pos
|
111
|
+
while true # sequence
|
112
|
+
_tmp = match_string("\"")
|
113
|
+
unless _tmp
|
114
|
+
self.pos = _save6
|
115
|
+
break
|
116
|
+
end
|
117
|
+
@result = begin; "\\\"" ; end
|
118
|
+
_tmp = true
|
119
|
+
unless _tmp
|
120
|
+
self.pos = _save6
|
121
|
+
end
|
122
|
+
break
|
123
|
+
end # end sequence
|
124
|
+
|
125
|
+
break if _tmp
|
126
|
+
self.pos = _save
|
127
|
+
|
128
|
+
_save7 = self.pos
|
129
|
+
while true # sequence
|
130
|
+
_text_start = self.pos
|
131
|
+
_tmp = get_byte
|
132
|
+
if _tmp
|
133
|
+
text = get_text(_text_start)
|
134
|
+
end
|
135
|
+
unless _tmp
|
136
|
+
self.pos = _save7
|
137
|
+
break
|
138
|
+
end
|
139
|
+
@result = begin; text ; end
|
140
|
+
_tmp = true
|
141
|
+
unless _tmp
|
142
|
+
self.pos = _save7
|
143
|
+
end
|
144
|
+
break
|
145
|
+
end # end sequence
|
146
|
+
|
147
|
+
break if _tmp
|
148
|
+
self.pos = _save
|
149
|
+
break
|
150
|
+
end # end choice
|
151
|
+
|
152
|
+
set_failed_rule :_segment unless _tmp
|
153
|
+
return _tmp
|
154
|
+
end
|
155
|
+
|
156
|
+
# segments = (segment:s segments:r { "#{s}#{r}" } | segment)
|
157
|
+
def _segments
|
158
|
+
|
159
|
+
_save = self.pos
|
160
|
+
while true # choice
|
161
|
+
|
162
|
+
_save1 = self.pos
|
163
|
+
while true # sequence
|
164
|
+
_tmp = apply(:_segment)
|
165
|
+
s = @result
|
166
|
+
unless _tmp
|
167
|
+
self.pos = _save1
|
168
|
+
break
|
169
|
+
end
|
170
|
+
_tmp = apply(:_segments)
|
171
|
+
r = @result
|
172
|
+
unless _tmp
|
173
|
+
self.pos = _save1
|
174
|
+
break
|
175
|
+
end
|
176
|
+
@result = begin; "#{s}#{r}" ; end
|
177
|
+
_tmp = true
|
178
|
+
unless _tmp
|
179
|
+
self.pos = _save1
|
180
|
+
end
|
181
|
+
break
|
182
|
+
end # end sequence
|
183
|
+
|
184
|
+
break if _tmp
|
185
|
+
self.pos = _save
|
186
|
+
_tmp = apply(:_segment)
|
187
|
+
break if _tmp
|
188
|
+
self.pos = _save
|
189
|
+
break
|
190
|
+
end # end choice
|
191
|
+
|
192
|
+
set_failed_rule :_segments unless _tmp
|
193
|
+
return _tmp
|
194
|
+
end
|
195
|
+
|
196
|
+
# root = segments:s { @text = s }
|
197
|
+
def _root
|
198
|
+
|
199
|
+
_save = self.pos
|
200
|
+
while true # sequence
|
201
|
+
_tmp = apply(:_segments)
|
202
|
+
s = @result
|
203
|
+
unless _tmp
|
204
|
+
self.pos = _save
|
205
|
+
break
|
206
|
+
end
|
207
|
+
@result = begin; @text = s ; end
|
208
|
+
_tmp = true
|
209
|
+
unless _tmp
|
210
|
+
self.pos = _save
|
211
|
+
end
|
212
|
+
break
|
213
|
+
end # end sequence
|
214
|
+
|
215
|
+
set_failed_rule :_root unless _tmp
|
216
|
+
return _tmp
|
217
|
+
end
|
218
|
+
|
219
|
+
# embed_seg = ("#" { "\\#" } | segment)
|
220
|
+
def _embed_seg
|
221
|
+
|
222
|
+
_save = self.pos
|
223
|
+
while true # choice
|
224
|
+
|
225
|
+
_save1 = self.pos
|
226
|
+
while true # sequence
|
227
|
+
_tmp = match_string("#")
|
228
|
+
unless _tmp
|
229
|
+
self.pos = _save1
|
230
|
+
break
|
231
|
+
end
|
232
|
+
@result = begin; "\\#" ; end
|
233
|
+
_tmp = true
|
234
|
+
unless _tmp
|
235
|
+
self.pos = _save1
|
236
|
+
end
|
237
|
+
break
|
238
|
+
end # end sequence
|
239
|
+
|
240
|
+
break if _tmp
|
241
|
+
self.pos = _save
|
242
|
+
_tmp = apply(:_segment)
|
243
|
+
break if _tmp
|
244
|
+
self.pos = _save
|
245
|
+
break
|
246
|
+
end # end choice
|
247
|
+
|
248
|
+
set_failed_rule :_embed_seg unless _tmp
|
249
|
+
return _tmp
|
250
|
+
end
|
251
|
+
|
252
|
+
# embed_segs = (embed_seg:s embed_segs:r { "#{s}#{r}" } | embed_seg)
|
253
|
+
def _embed_segs
|
254
|
+
|
255
|
+
_save = self.pos
|
256
|
+
while true # choice
|
257
|
+
|
258
|
+
_save1 = self.pos
|
259
|
+
while true # sequence
|
260
|
+
_tmp = apply(:_embed_seg)
|
261
|
+
s = @result
|
262
|
+
unless _tmp
|
263
|
+
self.pos = _save1
|
264
|
+
break
|
265
|
+
end
|
266
|
+
_tmp = apply(:_embed_segs)
|
267
|
+
r = @result
|
268
|
+
unless _tmp
|
269
|
+
self.pos = _save1
|
270
|
+
break
|
271
|
+
end
|
272
|
+
@result = begin; "#{s}#{r}" ; end
|
273
|
+
_tmp = true
|
274
|
+
unless _tmp
|
275
|
+
self.pos = _save1
|
276
|
+
end
|
277
|
+
break
|
278
|
+
end # end sequence
|
279
|
+
|
280
|
+
break if _tmp
|
281
|
+
self.pos = _save
|
282
|
+
_tmp = apply(:_embed_seg)
|
283
|
+
break if _tmp
|
284
|
+
self.pos = _save
|
285
|
+
break
|
286
|
+
end # end choice
|
287
|
+
|
288
|
+
set_failed_rule :_embed_segs unless _tmp
|
289
|
+
return _tmp
|
290
|
+
end
|
291
|
+
|
292
|
+
# embed = embed_segs:s { @text = s }
|
293
|
+
def _embed
|
294
|
+
|
295
|
+
_save = self.pos
|
296
|
+
while true # sequence
|
297
|
+
_tmp = apply(:_embed_segs)
|
298
|
+
s = @result
|
299
|
+
unless _tmp
|
300
|
+
self.pos = _save
|
301
|
+
break
|
302
|
+
end
|
303
|
+
@result = begin; @text = s ; end
|
304
|
+
_tmp = true
|
305
|
+
unless _tmp
|
306
|
+
self.pos = _save
|
307
|
+
end
|
308
|
+
break
|
309
|
+
end # end sequence
|
310
|
+
|
311
|
+
set_failed_rule :_embed unless _tmp
|
312
|
+
return _tmp
|
313
|
+
end
|
314
|
+
|
315
|
+
Rules = {}
|
316
|
+
Rules[:_segment] = rule_info("segment", "(< /[\\w ]+/ > { text } | \"\\\\\" { \"\\\\\\\\\" } | \"\\n\" { \"\\\\n\" } | \"\\t\" { \"\\\\t\" } | \"\\b\" { \"\\\\b\" } | \"\\\"\" { \"\\\\\\\"\" } | < . > { text })")
|
317
|
+
Rules[:_segments] = rule_info("segments", "(segment:s segments:r { \"\#{s}\#{r}\" } | segment)")
|
318
|
+
Rules[:_root] = rule_info("root", "segments:s { @text = s }")
|
319
|
+
Rules[:_embed_seg] = rule_info("embed_seg", "(\"\#\" { \"\\\\\#\" } | segment)")
|
320
|
+
Rules[:_embed_segs] = rule_info("embed_segs", "(embed_seg:s embed_segs:r { \"\#{s}\#{r}\" } | embed_seg)")
|
321
|
+
Rules[:_embed] = rule_info("embed", "embed_segs:s { @text = s }")
|
322
|
+
end
|
data/lib/kpeg/version.rb
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
require 'kpeg'
|
2
|
+
require 'kpeg/format_parser'
|
3
|
+
require 'kpeg/grammar_renderer'
|
4
|
+
require 'kpeg/code_generator'
|
5
|
+
require 'stringio'
|
6
|
+
require 'test/unit'
|
7
|
+
|
8
|
+
class TestKPegRoundtrip < Test::Unit::TestCase
|
9
|
+
PATH = File.expand_path("../../lib/kpeg/format.kpeg", __FILE__)
|
10
|
+
def test_roundtrip
|
11
|
+
data = File.read(PATH)
|
12
|
+
|
13
|
+
pr = KPeg::FormatParser.new data
|
14
|
+
assert pr.parse, "Couldn't parse with builtin parser"
|
15
|
+
|
16
|
+
io = StringIO.new
|
17
|
+
gr = KPeg::GrammarRenderer.new(pr.g)
|
18
|
+
gr.render io
|
19
|
+
|
20
|
+
cg1 = KPeg::CodeGenerator.new("Test1", pr.g, false)
|
21
|
+
pr2 = cg1.make(io.string)
|
22
|
+
g2 = KPeg::Grammar.new
|
23
|
+
pr2.instance_variable_set(:@g, g2)
|
24
|
+
|
25
|
+
assert pr2.parse, "Couldn't parse with 2nd generation parser"
|
26
|
+
|
27
|
+
io2 = StringIO.new
|
28
|
+
gr2 = KPeg::GrammarRenderer.new(g2)
|
29
|
+
gr2.render io2
|
30
|
+
|
31
|
+
assert_equal io2.string, io.string
|
32
|
+
|
33
|
+
cg2 = KPeg::CodeGenerator.new("Test2", g2, false)
|
34
|
+
pr3 = cg2.make(io2.string)
|
35
|
+
g3 = KPeg::Grammar.new
|
36
|
+
pr3.instance_variable_set(:@g, g3)
|
37
|
+
|
38
|
+
assert pr3.parse, "Couldn't parse with 3rd generation parser"
|
39
|
+
|
40
|
+
io3 = StringIO.new
|
41
|
+
gr3 = KPeg::GrammarRenderer.new(g3)
|
42
|
+
gr3.render io3
|
43
|
+
|
44
|
+
assert_equal io3.string, io2.string
|
45
|
+
|
46
|
+
cg3 = KPeg::CodeGenerator.new("Test3", g3, false)
|
47
|
+
pr4 = cg3.make(io3.string)
|
48
|
+
g4 = KPeg::Grammar.new
|
49
|
+
pr4.instance_variable_set(:@g, g4)
|
50
|
+
|
51
|
+
assert pr4.parse, "Couldn't parse with 4th generation parser"
|
52
|
+
|
53
|
+
io4 = StringIO.new
|
54
|
+
gr4 = KPeg::GrammarRenderer.new(g4)
|
55
|
+
gr4.render io4
|
56
|
+
|
57
|
+
assert_equal io4.string, io3.string
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_roundtrip_standalone
|
61
|
+
data = File.read(PATH)
|
62
|
+
|
63
|
+
pr = KPeg::FormatParser.new data
|
64
|
+
assert pr.parse, "Couldn't parse with builtin parser"
|
65
|
+
|
66
|
+
io = StringIO.new
|
67
|
+
gr = KPeg::GrammarRenderer.new(pr.g)
|
68
|
+
gr.render io
|
69
|
+
|
70
|
+
cg1 = KPeg::CodeGenerator.new("Test1", pr.g, false)
|
71
|
+
cg1.standalone = true
|
72
|
+
pr2 = cg1.make(io.string)
|
73
|
+
g2 = KPeg::Grammar.new
|
74
|
+
pr2.instance_variable_set(:@g, g2)
|
75
|
+
|
76
|
+
assert pr2.parse, "Couldn't parse with 2nd generation parser"
|
77
|
+
|
78
|
+
io2 = StringIO.new
|
79
|
+
gr2 = KPeg::GrammarRenderer.new(g2)
|
80
|
+
gr2.render io2
|
81
|
+
|
82
|
+
assert_equal io2.string, io.string
|
83
|
+
|
84
|
+
cg2 = KPeg::CodeGenerator.new("Test2", g2, false)
|
85
|
+
cg2.standalone = true
|
86
|
+
pr3 = cg2.make(io2.string)
|
87
|
+
g3 = KPeg::Grammar.new
|
88
|
+
pr3.instance_variable_set(:@g, g3)
|
89
|
+
|
90
|
+
assert pr3.parse, "Couldn't parse with 3rd generation parser"
|
91
|
+
|
92
|
+
io3 = StringIO.new
|
93
|
+
gr3 = KPeg::GrammarRenderer.new(g3)
|
94
|
+
gr3.render io3
|
95
|
+
|
96
|
+
assert_equal io3.string, io2.string
|
97
|
+
|
98
|
+
cg3 = KPeg::CodeGenerator.new("Test3", g3, false)
|
99
|
+
cg3.standalone = true
|
100
|
+
pr4 = cg3.make(io3.string)
|
101
|
+
g4 = KPeg::Grammar.new
|
102
|
+
pr4.instance_variable_set(:@g, g4)
|
103
|
+
|
104
|
+
assert pr4.parse, "Couldn't parse with 4th generation parser"
|
105
|
+
|
106
|
+
io4 = StringIO.new
|
107
|
+
gr4 = KPeg::GrammarRenderer.new(g4)
|
108
|
+
gr4.render io4
|
109
|
+
|
110
|
+
assert_equal io4.string, io3.string
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'kpeg'
|
3
|
+
require 'kpeg/format_parser'
|
4
|
+
require 'kpeg/code_generator'
|
5
|
+
require 'stringio'
|
6
|
+
|
7
|
+
class TestKPegCodeGenerator < Test::Unit::TestCase
|
8
|
+
GRAMMAR = <<-'STR'
|
9
|
+
Stmt = - Expr:e EOL { @answers << e }
|
10
|
+
| ( !EOL . )* EOL { puts "error" }
|
11
|
+
|
12
|
+
Expr = ID:i ASSIGN Sum:s { @vars[i] = s }
|
13
|
+
| Sum:s { s }
|
14
|
+
|
15
|
+
Sum = Product:l
|
16
|
+
( PLUS Product:r { l += r }
|
17
|
+
| MINUS Product:r { l -= r }
|
18
|
+
)* { l }
|
19
|
+
|
20
|
+
Product = Value:l
|
21
|
+
( TIMES Value:r { l *= r }
|
22
|
+
| DIVIDE Value:r { l /= r }
|
23
|
+
)* { l }
|
24
|
+
|
25
|
+
Value = NUMBER:i { i }
|
26
|
+
| ID:i !ASSIGN { @vars[i] }
|
27
|
+
| OPEN Expr:i CLOSE { i }
|
28
|
+
|
29
|
+
NUMBER = < [0-9]+ > - { text.to_i }
|
30
|
+
ID = < [a-z] > - { text }
|
31
|
+
ASSIGN = '=' -
|
32
|
+
PLUS = '+' -
|
33
|
+
MINUS = '-' -
|
34
|
+
TIMES = '*' -
|
35
|
+
DIVIDE = '/' -
|
36
|
+
OPEN = '(' -
|
37
|
+
CLOSE = ')' -
|
38
|
+
|
39
|
+
- = (' ' | '\t')*
|
40
|
+
EOL = ('\n' | '\r\n' | '\r' | ';') -
|
41
|
+
|
42
|
+
root = Stmt+
|
43
|
+
STR
|
44
|
+
|
45
|
+
def test_parse
|
46
|
+
parc = KPeg::FormatParser.new(GRAMMAR)
|
47
|
+
assert parc.parse, "Unable to parse"
|
48
|
+
|
49
|
+
gram = parc.grammar
|
50
|
+
|
51
|
+
# gr = KPeg::GrammarRenderer.new(gram)
|
52
|
+
# puts
|
53
|
+
# gr.render(STDOUT)
|
54
|
+
|
55
|
+
cg = KPeg::CodeGenerator.new "TestCalc", gram
|
56
|
+
|
57
|
+
code = cg.make("i = 3+4; j = i*8; i + j * 2;")
|
58
|
+
code.instance_variable_set(:@vars, {})
|
59
|
+
code.instance_variable_set(:@answers, [])
|
60
|
+
assert_equal true, code.parse
|
61
|
+
assert_equal [7,56,119], code.instance_variable_get(:@answers)
|
62
|
+
end
|
63
|
+
end
|