kpeg 0.8.5 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +10 -0
- data/.gemtest +0 -0
- data/Gemfile +11 -3
- data/History.txt +21 -0
- data/LICENSE +25 -0
- data/Manifest.txt +47 -0
- data/README.rdoc +222 -0
- data/Rakefile +23 -11
- data/bin/kpeg +4 -2
- data/examples/calculator/calculator.kpeg +17 -0
- data/examples/calculator/calculator.rb +7 -0
- data/examples/foreign_reference/literals.kpeg +5 -0
- data/examples/foreign_reference/matcher.kpeg +9 -0
- data/examples/foreign_reference/matcher.rb +5 -0
- data/examples/lua_string/driver.rb +21 -0
- data/examples/lua_string/lua_string.kpeg +14 -0
- data/examples/lua_string/lua_string.kpeg.rb +460 -0
- data/examples/phone_number/README.md +3 -0
- data/examples/phone_number/phone_number.kpeg +20 -0
- data/examples/phone_number/phone_number.rb +6 -0
- data/examples/upper/README.md +83 -0
- data/examples/upper/upper.kpeg +24 -0
- data/examples/upper/upper.rb +9 -0
- data/kpeg.gemspec +35 -17
- data/lib/hoe/kpeg.rb +94 -0
- data/lib/kpeg.rb +3 -0
- data/lib/kpeg/code_generator.rb +16 -3
- data/lib/kpeg/compiled_parser.rb +18 -28
- data/lib/kpeg/format_parser.kpeg +129 -0
- data/lib/kpeg/format_parser.rb +88 -49
- data/lib/kpeg/grammar.rb +10 -0
- data/lib/kpeg/string_escape.kpeg +20 -0
- data/test/inputs/comments.kpeg +5 -0
- data/test/test_file_parser_roundtrip.rb +3 -3
- data/test/test_gen_calc.rb +2 -2
- data/test/test_kpeg.rb +2 -2
- data/test/test_kpeg_code_generator.rb +65 -2
- data/test/test_kpeg_compiled_parser.rb +2 -2
- data/test/test_kpeg_format.rb +49 -4
- data/test/test_kpeg_grammar_renderer.rb +2 -2
- data/test/test_left_recursion.rb +2 -2
- data/{doc → vim}/syntax_kpeg/ftdetect/kpeg.vim +0 -0
- data/{doc → vim}/syntax_kpeg/syntax/kpeg.vim +0 -0
- metadata +89 -26
- data/README.md +0 -183
- data/lib/kpeg/version.rb +0 -3
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'lua_string.kpeg.rb'
|
2
|
+
|
3
|
+
ls = LuaString.new("[[blah]]")
|
4
|
+
ls.parse
|
5
|
+
|
6
|
+
p ls.result
|
7
|
+
|
8
|
+
ls = LuaString.new("[==[blah2]==]")
|
9
|
+
ls.parse
|
10
|
+
|
11
|
+
p ls.result
|
12
|
+
|
13
|
+
ls = LuaString.new("[==[embeded]stuff]==]")
|
14
|
+
ls.parse
|
15
|
+
|
16
|
+
p ls.result
|
17
|
+
|
18
|
+
ls = LuaString.new("[==[embeded]=]stuff]==]")
|
19
|
+
ls.parse
|
20
|
+
|
21
|
+
p ls.result
|
@@ -0,0 +1,460 @@
|
|
1
|
+
class LuaString
|
2
|
+
# STANDALONE START
|
3
|
+
def setup_parser(str, debug=false)
|
4
|
+
@string = str
|
5
|
+
@pos = 0
|
6
|
+
@memoizations = Hash.new { |h,k| h[k] = {} }
|
7
|
+
@result = nil
|
8
|
+
@failed_rule = nil
|
9
|
+
@failing_rule_offset = -1
|
10
|
+
|
11
|
+
setup_foreign_grammar
|
12
|
+
end
|
13
|
+
|
14
|
+
# This is distinct from setup_parser so that a standalone parser
|
15
|
+
# can redefine #initialize and still have access to the proper
|
16
|
+
# parser setup code.
|
17
|
+
#
|
18
|
+
def initialize(str, debug=false)
|
19
|
+
setup_parser(str, debug)
|
20
|
+
end
|
21
|
+
|
22
|
+
attr_reader :string
|
23
|
+
attr_reader :result, :failing_rule_offset
|
24
|
+
attr_accessor :pos
|
25
|
+
|
26
|
+
# STANDALONE START
|
27
|
+
def current_column(target=pos)
|
28
|
+
if c = string.rindex("\n", target-1)
|
29
|
+
return target - c - 1
|
30
|
+
end
|
31
|
+
|
32
|
+
target + 1
|
33
|
+
end
|
34
|
+
|
35
|
+
def current_line(target=pos)
|
36
|
+
cur_offset = 0
|
37
|
+
cur_line = 0
|
38
|
+
|
39
|
+
string.each_line do |line|
|
40
|
+
cur_line += 1
|
41
|
+
cur_offset += line.size
|
42
|
+
return cur_line if cur_offset >= target
|
43
|
+
end
|
44
|
+
|
45
|
+
-1
|
46
|
+
end
|
47
|
+
|
48
|
+
def lines
|
49
|
+
lines = []
|
50
|
+
string.each_line { |l| lines << l }
|
51
|
+
lines
|
52
|
+
end
|
53
|
+
|
54
|
+
#
|
55
|
+
|
56
|
+
def get_text(start)
|
57
|
+
@string[start..@pos-1]
|
58
|
+
end
|
59
|
+
|
60
|
+
def show_pos
|
61
|
+
width = 10
|
62
|
+
if @pos < width
|
63
|
+
"#{@pos} (\"#{@string[0,@pos]}\" @ \"#{@string[@pos,width]}\")"
|
64
|
+
else
|
65
|
+
"#{@pos} (\"... #{@string[@pos - width, width]}\" @ \"#{@string[@pos,width]}\")"
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def failure_info
|
70
|
+
l = current_line @failing_rule_offset
|
71
|
+
c = current_column @failing_rule_offset
|
72
|
+
|
73
|
+
if @failed_rule.kind_of? Symbol
|
74
|
+
info = self.class::Rules[@failed_rule]
|
75
|
+
"line #{l}, column #{c}: failed rule '#{info.name}' = '#{info.rendered}'"
|
76
|
+
else
|
77
|
+
"line #{l}, column #{c}: failed rule '#{@failed_rule}'"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def failure_caret
|
82
|
+
l = current_line @failing_rule_offset
|
83
|
+
c = current_column @failing_rule_offset
|
84
|
+
|
85
|
+
line = lines[l-1]
|
86
|
+
"#{line}\n#{' ' * (c - 1)}^"
|
87
|
+
end
|
88
|
+
|
89
|
+
def failure_character
|
90
|
+
l = current_line @failing_rule_offset
|
91
|
+
c = current_column @failing_rule_offset
|
92
|
+
lines[l-1][c-1, 1]
|
93
|
+
end
|
94
|
+
|
95
|
+
def failure_oneline
|
96
|
+
l = current_line @failing_rule_offset
|
97
|
+
c = current_column @failing_rule_offset
|
98
|
+
|
99
|
+
char = lines[l-1][c-1, 1]
|
100
|
+
|
101
|
+
if @failed_rule.kind_of? Symbol
|
102
|
+
info = self.class::Rules[@failed_rule]
|
103
|
+
"@#{l}:#{c} failed rule '#{info.name}', got '#{char}'"
|
104
|
+
else
|
105
|
+
"@#{l}:#{c} failed rule '#{@failed_rule}', got '#{char}'"
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
class ParseError < RuntimeError
|
110
|
+
end
|
111
|
+
|
112
|
+
def raise_error
|
113
|
+
raise ParseError, failure_oneline
|
114
|
+
end
|
115
|
+
|
116
|
+
def show_error(io=STDOUT)
|
117
|
+
error_pos = @failing_rule_offset
|
118
|
+
line_no = current_line(error_pos)
|
119
|
+
col_no = current_column(error_pos)
|
120
|
+
|
121
|
+
io.puts "On line #{line_no}, column #{col_no}:"
|
122
|
+
|
123
|
+
if @failed_rule.kind_of? Symbol
|
124
|
+
info = self.class::Rules[@failed_rule]
|
125
|
+
io.puts "Failed to match '#{info.rendered}' (rule '#{info.name}')"
|
126
|
+
else
|
127
|
+
io.puts "Failed to match rule '#{@failed_rule}'"
|
128
|
+
end
|
129
|
+
|
130
|
+
io.puts "Got: #{string[error_pos,1].inspect}"
|
131
|
+
line = lines[line_no-1]
|
132
|
+
io.puts "=> #{line}"
|
133
|
+
io.print(" " * (col_no + 3))
|
134
|
+
io.puts "^"
|
135
|
+
end
|
136
|
+
|
137
|
+
def set_failed_rule(name)
|
138
|
+
if @pos > @failing_rule_offset
|
139
|
+
@failed_rule = name
|
140
|
+
@failing_rule_offset = @pos
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
attr_reader :failed_rule
|
145
|
+
|
146
|
+
def match_string(str)
|
147
|
+
len = str.size
|
148
|
+
if @string[pos,len] == str
|
149
|
+
@pos += len
|
150
|
+
return str
|
151
|
+
end
|
152
|
+
|
153
|
+
return nil
|
154
|
+
end
|
155
|
+
|
156
|
+
def scan(reg)
|
157
|
+
if m = reg.match(@string[@pos..-1])
|
158
|
+
width = m.end(0)
|
159
|
+
@pos += width
|
160
|
+
return true
|
161
|
+
end
|
162
|
+
|
163
|
+
return nil
|
164
|
+
end
|
165
|
+
|
166
|
+
if "".respond_to? :getbyte
|
167
|
+
def get_byte
|
168
|
+
if @pos >= @string.size
|
169
|
+
return nil
|
170
|
+
end
|
171
|
+
|
172
|
+
s = @string.getbyte @pos
|
173
|
+
@pos += 1
|
174
|
+
s
|
175
|
+
end
|
176
|
+
else
|
177
|
+
def get_byte
|
178
|
+
if @pos >= @string.size
|
179
|
+
return nil
|
180
|
+
end
|
181
|
+
|
182
|
+
s = @string[@pos]
|
183
|
+
@pos += 1
|
184
|
+
s
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def parse(rule=nil)
|
189
|
+
if !rule
|
190
|
+
_root ? true : false
|
191
|
+
else
|
192
|
+
# This is not shared with code_generator.rb so this can be standalone
|
193
|
+
method = rule.gsub("-","_hyphen_")
|
194
|
+
__send__("_#{method}") ? true : false
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
class LeftRecursive
|
199
|
+
def initialize(detected=false)
|
200
|
+
@detected = detected
|
201
|
+
end
|
202
|
+
|
203
|
+
attr_accessor :detected
|
204
|
+
end
|
205
|
+
|
206
|
+
class MemoEntry
|
207
|
+
def initialize(ans, pos)
|
208
|
+
@ans = ans
|
209
|
+
@pos = pos
|
210
|
+
@uses = 1
|
211
|
+
@result = nil
|
212
|
+
end
|
213
|
+
|
214
|
+
attr_reader :ans, :pos, :uses, :result
|
215
|
+
|
216
|
+
def inc!
|
217
|
+
@uses += 1
|
218
|
+
end
|
219
|
+
|
220
|
+
def move!(ans, pos, result)
|
221
|
+
@ans = ans
|
222
|
+
@pos = pos
|
223
|
+
@result = result
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
def external_invoke(other, rule, *args)
|
228
|
+
old_pos = @pos
|
229
|
+
old_string = @string
|
230
|
+
|
231
|
+
@pos = other.pos
|
232
|
+
@string = other.string
|
233
|
+
|
234
|
+
begin
|
235
|
+
if val = __send__(rule, *args)
|
236
|
+
other.pos = @pos
|
237
|
+
else
|
238
|
+
other.set_failed_rule "#{self.class}##{rule}"
|
239
|
+
end
|
240
|
+
val
|
241
|
+
ensure
|
242
|
+
@pos = old_pos
|
243
|
+
@string = old_string
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
def apply(rule)
|
248
|
+
if m = @memoizations[rule][@pos]
|
249
|
+
m.inc!
|
250
|
+
|
251
|
+
prev = @pos
|
252
|
+
@pos = m.pos
|
253
|
+
if m.ans.kind_of? LeftRecursive
|
254
|
+
m.ans.detected = true
|
255
|
+
return nil
|
256
|
+
end
|
257
|
+
|
258
|
+
@result = m.result
|
259
|
+
|
260
|
+
return m.ans
|
261
|
+
else
|
262
|
+
lr = LeftRecursive.new(false)
|
263
|
+
m = MemoEntry.new(lr, @pos)
|
264
|
+
@memoizations[rule][@pos] = m
|
265
|
+
start_pos = @pos
|
266
|
+
|
267
|
+
ans = __send__ rule
|
268
|
+
|
269
|
+
m.move! ans, @pos, @result
|
270
|
+
|
271
|
+
# Don't bother trying to grow the left recursion
|
272
|
+
# if it's failing straight away (thus there is no seed)
|
273
|
+
if ans and lr.detected
|
274
|
+
return grow_lr(rule, start_pos, m)
|
275
|
+
else
|
276
|
+
return ans
|
277
|
+
end
|
278
|
+
|
279
|
+
return ans
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
def grow_lr(rule, start_pos, m)
|
284
|
+
while true
|
285
|
+
@pos = start_pos
|
286
|
+
@result = m.result
|
287
|
+
|
288
|
+
ans = __send__ rule
|
289
|
+
return nil unless ans
|
290
|
+
|
291
|
+
break if @pos <= m.pos
|
292
|
+
|
293
|
+
m.move! ans, @pos, @result
|
294
|
+
end
|
295
|
+
|
296
|
+
@result = m.result
|
297
|
+
@pos = m.pos
|
298
|
+
return m.ans
|
299
|
+
end
|
300
|
+
|
301
|
+
class RuleInfo
|
302
|
+
def initialize(name, rendered)
|
303
|
+
@name = name
|
304
|
+
@rendered = rendered
|
305
|
+
end
|
306
|
+
|
307
|
+
attr_reader :name, :rendered
|
308
|
+
end
|
309
|
+
|
310
|
+
def self.rule_info(name, rendered)
|
311
|
+
RuleInfo.new(name, rendered)
|
312
|
+
end
|
313
|
+
|
314
|
+
#
|
315
|
+
|
316
|
+
|
317
|
+
attr_accessor :result
|
318
|
+
|
319
|
+
|
320
|
+
def setup_foreign_grammar; end
|
321
|
+
|
322
|
+
# equals = < "="* > { text }
|
323
|
+
def _equals
|
324
|
+
|
325
|
+
_save = self.pos
|
326
|
+
while true # sequence
|
327
|
+
_text_start = self.pos
|
328
|
+
while true
|
329
|
+
_tmp = match_string("=")
|
330
|
+
break unless _tmp
|
331
|
+
end
|
332
|
+
_tmp = true
|
333
|
+
if _tmp
|
334
|
+
text = get_text(_text_start)
|
335
|
+
end
|
336
|
+
unless _tmp
|
337
|
+
self.pos = _save
|
338
|
+
break
|
339
|
+
end
|
340
|
+
@result = begin; text ; end
|
341
|
+
_tmp = true
|
342
|
+
unless _tmp
|
343
|
+
self.pos = _save
|
344
|
+
end
|
345
|
+
break
|
346
|
+
end # end sequence
|
347
|
+
|
348
|
+
set_failed_rule :_equals unless _tmp
|
349
|
+
return _tmp
|
350
|
+
end
|
351
|
+
|
352
|
+
# equal_ending = "]" equals:x &{ x == start } "]"
|
353
|
+
def _equal_ending(start)
|
354
|
+
|
355
|
+
_save = self.pos
|
356
|
+
while true # sequence
|
357
|
+
_tmp = match_string("]")
|
358
|
+
unless _tmp
|
359
|
+
self.pos = _save
|
360
|
+
break
|
361
|
+
end
|
362
|
+
_tmp = apply(:_equals)
|
363
|
+
x = @result
|
364
|
+
unless _tmp
|
365
|
+
self.pos = _save
|
366
|
+
break
|
367
|
+
end
|
368
|
+
_save1 = self.pos
|
369
|
+
_tmp = begin; x == start ; end
|
370
|
+
self.pos = _save1
|
371
|
+
unless _tmp
|
372
|
+
self.pos = _save
|
373
|
+
break
|
374
|
+
end
|
375
|
+
_tmp = match_string("]")
|
376
|
+
unless _tmp
|
377
|
+
self.pos = _save
|
378
|
+
end
|
379
|
+
break
|
380
|
+
end # end sequence
|
381
|
+
|
382
|
+
set_failed_rule :_equal_ending unless _tmp
|
383
|
+
return _tmp
|
384
|
+
end
|
385
|
+
|
386
|
+
# root = "[" equals:e "[" < (!equal_ending(e) .)* > equal_ending(e) { @result = text }
|
387
|
+
def _root
|
388
|
+
|
389
|
+
_save = self.pos
|
390
|
+
while true # sequence
|
391
|
+
_tmp = match_string("[")
|
392
|
+
unless _tmp
|
393
|
+
self.pos = _save
|
394
|
+
break
|
395
|
+
end
|
396
|
+
_tmp = apply(:_equals)
|
397
|
+
e = @result
|
398
|
+
unless _tmp
|
399
|
+
self.pos = _save
|
400
|
+
break
|
401
|
+
end
|
402
|
+
_tmp = match_string("[")
|
403
|
+
unless _tmp
|
404
|
+
self.pos = _save
|
405
|
+
break
|
406
|
+
end
|
407
|
+
_text_start = self.pos
|
408
|
+
while true
|
409
|
+
|
410
|
+
_save2 = self.pos
|
411
|
+
while true # sequence
|
412
|
+
_save3 = self.pos
|
413
|
+
_tmp = _equal_ending(e)
|
414
|
+
_tmp = _tmp ? nil : true
|
415
|
+
self.pos = _save3
|
416
|
+
unless _tmp
|
417
|
+
self.pos = _save2
|
418
|
+
break
|
419
|
+
end
|
420
|
+
_tmp = get_byte
|
421
|
+
unless _tmp
|
422
|
+
self.pos = _save2
|
423
|
+
end
|
424
|
+
break
|
425
|
+
end # end sequence
|
426
|
+
|
427
|
+
break unless _tmp
|
428
|
+
end
|
429
|
+
_tmp = true
|
430
|
+
if _tmp
|
431
|
+
text = get_text(_text_start)
|
432
|
+
end
|
433
|
+
unless _tmp
|
434
|
+
self.pos = _save
|
435
|
+
break
|
436
|
+
end
|
437
|
+
_tmp = _equal_ending(e)
|
438
|
+
unless _tmp
|
439
|
+
self.pos = _save
|
440
|
+
break
|
441
|
+
end
|
442
|
+
@result = begin;
|
443
|
+
@result = text
|
444
|
+
; end
|
445
|
+
_tmp = true
|
446
|
+
unless _tmp
|
447
|
+
self.pos = _save
|
448
|
+
end
|
449
|
+
break
|
450
|
+
end # end sequence
|
451
|
+
|
452
|
+
set_failed_rule :_root unless _tmp
|
453
|
+
return _tmp
|
454
|
+
end
|
455
|
+
|
456
|
+
Rules = {}
|
457
|
+
Rules[:_equals] = rule_info("equals", "< \"=\"* > { text }")
|
458
|
+
Rules[:_equal_ending] = rule_info("equal_ending", "\"]\" equals:x &{ x == start } \"]\"")
|
459
|
+
Rules[:_root] = rule_info("root", "\"[\" equals:e \"[\" < (!equal_ending(e) .)* > equal_ending(e) { @result = text }")
|
460
|
+
end
|