kpeg 0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -0
- data/LICENSE +25 -0
- data/README.md +8 -0
- data/Rakefile +24 -0
- data/bin/kpeg +126 -0
- data/doc/syntax_kpeg/ftdetect/kpeg.vim +1 -0
- data/doc/syntax_kpeg/syntax/kpeg.vim +55 -0
- data/kpeg.gemspec +24 -0
- data/lib/kpeg.rb +50 -0
- data/lib/kpeg/code_generator.rb +355 -0
- data/lib/kpeg/compiled_parser.rb +299 -0
- data/lib/kpeg/format_parser.rb +2440 -0
- data/lib/kpeg/grammar.rb +807 -0
- data/lib/kpeg/grammar_renderer.rb +172 -0
- data/lib/kpeg/match.rb +70 -0
- data/lib/kpeg/parser.rb +193 -0
- data/lib/kpeg/position.rb +34 -0
- data/lib/kpeg/string_escape.rb +322 -0
- data/lib/kpeg/version.rb +3 -0
- data/test/test_file_parser_roundtrip.rb +112 -0
- data/test/test_gen_calc.rb +63 -0
- data/test/test_kpeg.rb +416 -0
- data/test/test_kpeg_code_generator.rb +1307 -0
- data/test/test_kpeg_compiled_parser.rb +81 -0
- data/test/test_kpeg_format.rb +467 -0
- data/test/test_kpeg_grammar_renderer.rb +223 -0
- metadata +97 -0
@@ -0,0 +1,322 @@
|
|
1
|
+
require 'kpeg/compiled_parser'
|
2
|
+
|
3
|
+
class KPeg::StringEscape < KPeg::CompiledParser
|
4
|
+
|
5
|
+
|
6
|
+
attr_reader :text
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
# segment = (< /[\w ]+/ > { text } | "\\" { "\\\\" } | "\n" { "\\n" } | "\t" { "\\t" } | "\b" { "\\b" } | "\"" { "\\\"" } | < . > { text })
|
11
|
+
def _segment
|
12
|
+
|
13
|
+
_save = self.pos
|
14
|
+
while true # choice
|
15
|
+
|
16
|
+
_save1 = self.pos
|
17
|
+
while true # sequence
|
18
|
+
_text_start = self.pos
|
19
|
+
_tmp = scan(/\A(?-mix:[\w ]+)/)
|
20
|
+
if _tmp
|
21
|
+
text = get_text(_text_start)
|
22
|
+
end
|
23
|
+
unless _tmp
|
24
|
+
self.pos = _save1
|
25
|
+
break
|
26
|
+
end
|
27
|
+
@result = begin; text ; end
|
28
|
+
_tmp = true
|
29
|
+
unless _tmp
|
30
|
+
self.pos = _save1
|
31
|
+
end
|
32
|
+
break
|
33
|
+
end # end sequence
|
34
|
+
|
35
|
+
break if _tmp
|
36
|
+
self.pos = _save
|
37
|
+
|
38
|
+
_save2 = self.pos
|
39
|
+
while true # sequence
|
40
|
+
_tmp = match_string("\\")
|
41
|
+
unless _tmp
|
42
|
+
self.pos = _save2
|
43
|
+
break
|
44
|
+
end
|
45
|
+
@result = begin; "\\\\" ; end
|
46
|
+
_tmp = true
|
47
|
+
unless _tmp
|
48
|
+
self.pos = _save2
|
49
|
+
end
|
50
|
+
break
|
51
|
+
end # end sequence
|
52
|
+
|
53
|
+
break if _tmp
|
54
|
+
self.pos = _save
|
55
|
+
|
56
|
+
_save3 = self.pos
|
57
|
+
while true # sequence
|
58
|
+
_tmp = match_string("\n")
|
59
|
+
unless _tmp
|
60
|
+
self.pos = _save3
|
61
|
+
break
|
62
|
+
end
|
63
|
+
@result = begin; "\\n" ; end
|
64
|
+
_tmp = true
|
65
|
+
unless _tmp
|
66
|
+
self.pos = _save3
|
67
|
+
end
|
68
|
+
break
|
69
|
+
end # end sequence
|
70
|
+
|
71
|
+
break if _tmp
|
72
|
+
self.pos = _save
|
73
|
+
|
74
|
+
_save4 = self.pos
|
75
|
+
while true # sequence
|
76
|
+
_tmp = match_string("\t")
|
77
|
+
unless _tmp
|
78
|
+
self.pos = _save4
|
79
|
+
break
|
80
|
+
end
|
81
|
+
@result = begin; "\\t" ; end
|
82
|
+
_tmp = true
|
83
|
+
unless _tmp
|
84
|
+
self.pos = _save4
|
85
|
+
end
|
86
|
+
break
|
87
|
+
end # end sequence
|
88
|
+
|
89
|
+
break if _tmp
|
90
|
+
self.pos = _save
|
91
|
+
|
92
|
+
_save5 = self.pos
|
93
|
+
while true # sequence
|
94
|
+
_tmp = match_string("\b")
|
95
|
+
unless _tmp
|
96
|
+
self.pos = _save5
|
97
|
+
break
|
98
|
+
end
|
99
|
+
@result = begin; "\\b" ; end
|
100
|
+
_tmp = true
|
101
|
+
unless _tmp
|
102
|
+
self.pos = _save5
|
103
|
+
end
|
104
|
+
break
|
105
|
+
end # end sequence
|
106
|
+
|
107
|
+
break if _tmp
|
108
|
+
self.pos = _save
|
109
|
+
|
110
|
+
_save6 = self.pos
|
111
|
+
while true # sequence
|
112
|
+
_tmp = match_string("\"")
|
113
|
+
unless _tmp
|
114
|
+
self.pos = _save6
|
115
|
+
break
|
116
|
+
end
|
117
|
+
@result = begin; "\\\"" ; end
|
118
|
+
_tmp = true
|
119
|
+
unless _tmp
|
120
|
+
self.pos = _save6
|
121
|
+
end
|
122
|
+
break
|
123
|
+
end # end sequence
|
124
|
+
|
125
|
+
break if _tmp
|
126
|
+
self.pos = _save
|
127
|
+
|
128
|
+
_save7 = self.pos
|
129
|
+
while true # sequence
|
130
|
+
_text_start = self.pos
|
131
|
+
_tmp = get_byte
|
132
|
+
if _tmp
|
133
|
+
text = get_text(_text_start)
|
134
|
+
end
|
135
|
+
unless _tmp
|
136
|
+
self.pos = _save7
|
137
|
+
break
|
138
|
+
end
|
139
|
+
@result = begin; text ; end
|
140
|
+
_tmp = true
|
141
|
+
unless _tmp
|
142
|
+
self.pos = _save7
|
143
|
+
end
|
144
|
+
break
|
145
|
+
end # end sequence
|
146
|
+
|
147
|
+
break if _tmp
|
148
|
+
self.pos = _save
|
149
|
+
break
|
150
|
+
end # end choice
|
151
|
+
|
152
|
+
set_failed_rule :_segment unless _tmp
|
153
|
+
return _tmp
|
154
|
+
end
|
155
|
+
|
156
|
+
# segments = (segment:s segments:r { "#{s}#{r}" } | segment)
|
157
|
+
def _segments
|
158
|
+
|
159
|
+
_save = self.pos
|
160
|
+
while true # choice
|
161
|
+
|
162
|
+
_save1 = self.pos
|
163
|
+
while true # sequence
|
164
|
+
_tmp = apply(:_segment)
|
165
|
+
s = @result
|
166
|
+
unless _tmp
|
167
|
+
self.pos = _save1
|
168
|
+
break
|
169
|
+
end
|
170
|
+
_tmp = apply(:_segments)
|
171
|
+
r = @result
|
172
|
+
unless _tmp
|
173
|
+
self.pos = _save1
|
174
|
+
break
|
175
|
+
end
|
176
|
+
@result = begin; "#{s}#{r}" ; end
|
177
|
+
_tmp = true
|
178
|
+
unless _tmp
|
179
|
+
self.pos = _save1
|
180
|
+
end
|
181
|
+
break
|
182
|
+
end # end sequence
|
183
|
+
|
184
|
+
break if _tmp
|
185
|
+
self.pos = _save
|
186
|
+
_tmp = apply(:_segment)
|
187
|
+
break if _tmp
|
188
|
+
self.pos = _save
|
189
|
+
break
|
190
|
+
end # end choice
|
191
|
+
|
192
|
+
set_failed_rule :_segments unless _tmp
|
193
|
+
return _tmp
|
194
|
+
end
|
195
|
+
|
196
|
+
# root = segments:s { @text = s }
|
197
|
+
def _root
|
198
|
+
|
199
|
+
_save = self.pos
|
200
|
+
while true # sequence
|
201
|
+
_tmp = apply(:_segments)
|
202
|
+
s = @result
|
203
|
+
unless _tmp
|
204
|
+
self.pos = _save
|
205
|
+
break
|
206
|
+
end
|
207
|
+
@result = begin; @text = s ; end
|
208
|
+
_tmp = true
|
209
|
+
unless _tmp
|
210
|
+
self.pos = _save
|
211
|
+
end
|
212
|
+
break
|
213
|
+
end # end sequence
|
214
|
+
|
215
|
+
set_failed_rule :_root unless _tmp
|
216
|
+
return _tmp
|
217
|
+
end
|
218
|
+
|
219
|
+
# embed_seg = ("#" { "\\#" } | segment)
|
220
|
+
def _embed_seg
|
221
|
+
|
222
|
+
_save = self.pos
|
223
|
+
while true # choice
|
224
|
+
|
225
|
+
_save1 = self.pos
|
226
|
+
while true # sequence
|
227
|
+
_tmp = match_string("#")
|
228
|
+
unless _tmp
|
229
|
+
self.pos = _save1
|
230
|
+
break
|
231
|
+
end
|
232
|
+
@result = begin; "\\#" ; end
|
233
|
+
_tmp = true
|
234
|
+
unless _tmp
|
235
|
+
self.pos = _save1
|
236
|
+
end
|
237
|
+
break
|
238
|
+
end # end sequence
|
239
|
+
|
240
|
+
break if _tmp
|
241
|
+
self.pos = _save
|
242
|
+
_tmp = apply(:_segment)
|
243
|
+
break if _tmp
|
244
|
+
self.pos = _save
|
245
|
+
break
|
246
|
+
end # end choice
|
247
|
+
|
248
|
+
set_failed_rule :_embed_seg unless _tmp
|
249
|
+
return _tmp
|
250
|
+
end
|
251
|
+
|
252
|
+
# embed_segs = (embed_seg:s embed_segs:r { "#{s}#{r}" } | embed_seg)
|
253
|
+
def _embed_segs
|
254
|
+
|
255
|
+
_save = self.pos
|
256
|
+
while true # choice
|
257
|
+
|
258
|
+
_save1 = self.pos
|
259
|
+
while true # sequence
|
260
|
+
_tmp = apply(:_embed_seg)
|
261
|
+
s = @result
|
262
|
+
unless _tmp
|
263
|
+
self.pos = _save1
|
264
|
+
break
|
265
|
+
end
|
266
|
+
_tmp = apply(:_embed_segs)
|
267
|
+
r = @result
|
268
|
+
unless _tmp
|
269
|
+
self.pos = _save1
|
270
|
+
break
|
271
|
+
end
|
272
|
+
@result = begin; "#{s}#{r}" ; end
|
273
|
+
_tmp = true
|
274
|
+
unless _tmp
|
275
|
+
self.pos = _save1
|
276
|
+
end
|
277
|
+
break
|
278
|
+
end # end sequence
|
279
|
+
|
280
|
+
break if _tmp
|
281
|
+
self.pos = _save
|
282
|
+
_tmp = apply(:_embed_seg)
|
283
|
+
break if _tmp
|
284
|
+
self.pos = _save
|
285
|
+
break
|
286
|
+
end # end choice
|
287
|
+
|
288
|
+
set_failed_rule :_embed_segs unless _tmp
|
289
|
+
return _tmp
|
290
|
+
end
|
291
|
+
|
292
|
+
# embed = embed_segs:s { @text = s }
|
293
|
+
def _embed
|
294
|
+
|
295
|
+
_save = self.pos
|
296
|
+
while true # sequence
|
297
|
+
_tmp = apply(:_embed_segs)
|
298
|
+
s = @result
|
299
|
+
unless _tmp
|
300
|
+
self.pos = _save
|
301
|
+
break
|
302
|
+
end
|
303
|
+
@result = begin; @text = s ; end
|
304
|
+
_tmp = true
|
305
|
+
unless _tmp
|
306
|
+
self.pos = _save
|
307
|
+
end
|
308
|
+
break
|
309
|
+
end # end sequence
|
310
|
+
|
311
|
+
set_failed_rule :_embed unless _tmp
|
312
|
+
return _tmp
|
313
|
+
end
|
314
|
+
|
315
|
+
Rules = {}
|
316
|
+
Rules[:_segment] = rule_info("segment", "(< /[\\w ]+/ > { text } | \"\\\\\" { \"\\\\\\\\\" } | \"\\n\" { \"\\\\n\" } | \"\\t\" { \"\\\\t\" } | \"\\b\" { \"\\\\b\" } | \"\\\"\" { \"\\\\\\\"\" } | < . > { text })")
|
317
|
+
Rules[:_segments] = rule_info("segments", "(segment:s segments:r { \"\#{s}\#{r}\" } | segment)")
|
318
|
+
Rules[:_root] = rule_info("root", "segments:s { @text = s }")
|
319
|
+
Rules[:_embed_seg] = rule_info("embed_seg", "(\"\#\" { \"\\\\\#\" } | segment)")
|
320
|
+
Rules[:_embed_segs] = rule_info("embed_segs", "(embed_seg:s embed_segs:r { \"\#{s}\#{r}\" } | embed_seg)")
|
321
|
+
Rules[:_embed] = rule_info("embed", "embed_segs:s { @text = s }")
|
322
|
+
end
|
data/lib/kpeg/version.rb
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
require 'kpeg'
|
2
|
+
require 'kpeg/format_parser'
|
3
|
+
require 'kpeg/grammar_renderer'
|
4
|
+
require 'kpeg/code_generator'
|
5
|
+
require 'stringio'
|
6
|
+
require 'test/unit'
|
7
|
+
|
8
|
+
class TestKPegRoundtrip < Test::Unit::TestCase
|
9
|
+
PATH = File.expand_path("../../lib/kpeg/format.kpeg", __FILE__)
|
10
|
+
def test_roundtrip
|
11
|
+
data = File.read(PATH)
|
12
|
+
|
13
|
+
pr = KPeg::FormatParser.new data
|
14
|
+
assert pr.parse, "Couldn't parse with builtin parser"
|
15
|
+
|
16
|
+
io = StringIO.new
|
17
|
+
gr = KPeg::GrammarRenderer.new(pr.g)
|
18
|
+
gr.render io
|
19
|
+
|
20
|
+
cg1 = KPeg::CodeGenerator.new("Test1", pr.g, false)
|
21
|
+
pr2 = cg1.make(io.string)
|
22
|
+
g2 = KPeg::Grammar.new
|
23
|
+
pr2.instance_variable_set(:@g, g2)
|
24
|
+
|
25
|
+
assert pr2.parse, "Couldn't parse with 2nd generation parser"
|
26
|
+
|
27
|
+
io2 = StringIO.new
|
28
|
+
gr2 = KPeg::GrammarRenderer.new(g2)
|
29
|
+
gr2.render io2
|
30
|
+
|
31
|
+
assert_equal io2.string, io.string
|
32
|
+
|
33
|
+
cg2 = KPeg::CodeGenerator.new("Test2", g2, false)
|
34
|
+
pr3 = cg2.make(io2.string)
|
35
|
+
g3 = KPeg::Grammar.new
|
36
|
+
pr3.instance_variable_set(:@g, g3)
|
37
|
+
|
38
|
+
assert pr3.parse, "Couldn't parse with 3rd generation parser"
|
39
|
+
|
40
|
+
io3 = StringIO.new
|
41
|
+
gr3 = KPeg::GrammarRenderer.new(g3)
|
42
|
+
gr3.render io3
|
43
|
+
|
44
|
+
assert_equal io3.string, io2.string
|
45
|
+
|
46
|
+
cg3 = KPeg::CodeGenerator.new("Test3", g3, false)
|
47
|
+
pr4 = cg3.make(io3.string)
|
48
|
+
g4 = KPeg::Grammar.new
|
49
|
+
pr4.instance_variable_set(:@g, g4)
|
50
|
+
|
51
|
+
assert pr4.parse, "Couldn't parse with 4th generation parser"
|
52
|
+
|
53
|
+
io4 = StringIO.new
|
54
|
+
gr4 = KPeg::GrammarRenderer.new(g4)
|
55
|
+
gr4.render io4
|
56
|
+
|
57
|
+
assert_equal io4.string, io3.string
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_roundtrip_standalone
|
61
|
+
data = File.read(PATH)
|
62
|
+
|
63
|
+
pr = KPeg::FormatParser.new data
|
64
|
+
assert pr.parse, "Couldn't parse with builtin parser"
|
65
|
+
|
66
|
+
io = StringIO.new
|
67
|
+
gr = KPeg::GrammarRenderer.new(pr.g)
|
68
|
+
gr.render io
|
69
|
+
|
70
|
+
cg1 = KPeg::CodeGenerator.new("Test1", pr.g, false)
|
71
|
+
cg1.standalone = true
|
72
|
+
pr2 = cg1.make(io.string)
|
73
|
+
g2 = KPeg::Grammar.new
|
74
|
+
pr2.instance_variable_set(:@g, g2)
|
75
|
+
|
76
|
+
assert pr2.parse, "Couldn't parse with 2nd generation parser"
|
77
|
+
|
78
|
+
io2 = StringIO.new
|
79
|
+
gr2 = KPeg::GrammarRenderer.new(g2)
|
80
|
+
gr2.render io2
|
81
|
+
|
82
|
+
assert_equal io2.string, io.string
|
83
|
+
|
84
|
+
cg2 = KPeg::CodeGenerator.new("Test2", g2, false)
|
85
|
+
cg2.standalone = true
|
86
|
+
pr3 = cg2.make(io2.string)
|
87
|
+
g3 = KPeg::Grammar.new
|
88
|
+
pr3.instance_variable_set(:@g, g3)
|
89
|
+
|
90
|
+
assert pr3.parse, "Couldn't parse with 3rd generation parser"
|
91
|
+
|
92
|
+
io3 = StringIO.new
|
93
|
+
gr3 = KPeg::GrammarRenderer.new(g3)
|
94
|
+
gr3.render io3
|
95
|
+
|
96
|
+
assert_equal io3.string, io2.string
|
97
|
+
|
98
|
+
cg3 = KPeg::CodeGenerator.new("Test3", g3, false)
|
99
|
+
cg3.standalone = true
|
100
|
+
pr4 = cg3.make(io3.string)
|
101
|
+
g4 = KPeg::Grammar.new
|
102
|
+
pr4.instance_variable_set(:@g, g4)
|
103
|
+
|
104
|
+
assert pr4.parse, "Couldn't parse with 4th generation parser"
|
105
|
+
|
106
|
+
io4 = StringIO.new
|
107
|
+
gr4 = KPeg::GrammarRenderer.new(g4)
|
108
|
+
gr4.render io4
|
109
|
+
|
110
|
+
assert_equal io4.string, io3.string
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'kpeg'
|
3
|
+
require 'kpeg/format_parser'
|
4
|
+
require 'kpeg/code_generator'
|
5
|
+
require 'stringio'
|
6
|
+
|
7
|
+
class TestKPegCodeGenerator < Test::Unit::TestCase
|
8
|
+
GRAMMAR = <<-'STR'
|
9
|
+
Stmt = - Expr:e EOL { @answers << e }
|
10
|
+
| ( !EOL . )* EOL { puts "error" }
|
11
|
+
|
12
|
+
Expr = ID:i ASSIGN Sum:s { @vars[i] = s }
|
13
|
+
| Sum:s { s }
|
14
|
+
|
15
|
+
Sum = Product:l
|
16
|
+
( PLUS Product:r { l += r }
|
17
|
+
| MINUS Product:r { l -= r }
|
18
|
+
)* { l }
|
19
|
+
|
20
|
+
Product = Value:l
|
21
|
+
( TIMES Value:r { l *= r }
|
22
|
+
| DIVIDE Value:r { l /= r }
|
23
|
+
)* { l }
|
24
|
+
|
25
|
+
Value = NUMBER:i { i }
|
26
|
+
| ID:i !ASSIGN { @vars[i] }
|
27
|
+
| OPEN Expr:i CLOSE { i }
|
28
|
+
|
29
|
+
NUMBER = < [0-9]+ > - { text.to_i }
|
30
|
+
ID = < [a-z] > - { text }
|
31
|
+
ASSIGN = '=' -
|
32
|
+
PLUS = '+' -
|
33
|
+
MINUS = '-' -
|
34
|
+
TIMES = '*' -
|
35
|
+
DIVIDE = '/' -
|
36
|
+
OPEN = '(' -
|
37
|
+
CLOSE = ')' -
|
38
|
+
|
39
|
+
- = (' ' | '\t')*
|
40
|
+
EOL = ('\n' | '\r\n' | '\r' | ';') -
|
41
|
+
|
42
|
+
root = Stmt+
|
43
|
+
STR
|
44
|
+
|
45
|
+
def test_parse
|
46
|
+
parc = KPeg::FormatParser.new(GRAMMAR)
|
47
|
+
assert parc.parse, "Unable to parse"
|
48
|
+
|
49
|
+
gram = parc.grammar
|
50
|
+
|
51
|
+
# gr = KPeg::GrammarRenderer.new(gram)
|
52
|
+
# puts
|
53
|
+
# gr.render(STDOUT)
|
54
|
+
|
55
|
+
cg = KPeg::CodeGenerator.new "TestCalc", gram
|
56
|
+
|
57
|
+
code = cg.make("i = 3+4; j = i*8; i + j * 2;")
|
58
|
+
code.instance_variable_set(:@vars, {})
|
59
|
+
code.instance_variable_set(:@answers, [])
|
60
|
+
assert_equal true, code.parse
|
61
|
+
assert_equal [7,56,119], code.instance_variable_get(:@answers)
|
62
|
+
end
|
63
|
+
end
|