kpeg 0.9.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.hoeignore +12 -0
- data/Gemfile +2 -11
- data/History.txt +22 -1
- data/Manifest.txt +10 -3
- data/README.rdoc +40 -8
- data/Rakefile +12 -3
- data/bin/kpeg +9 -2
- data/examples/lua_string/lua_string.kpeg.rb +0 -2
- data/examples/tiny_markdown/Rakefile +3 -0
- data/examples/tiny_markdown/driver.rb +10 -0
- data/examples/tiny_markdown/node.rb +107 -0
- data/examples/tiny_markdown/sample.md +51 -0
- data/examples/tiny_markdown/tiny_markdown.kpeg +199 -0
- data/examples/tiny_markdown/tiny_markdown.kpeg.rb +3892 -0
- data/kpeg.gemspec +20 -20
- data/lib/hoe/kpeg.rb +6 -5
- data/lib/kpeg/code_generator.rb +81 -45
- data/lib/kpeg/compiled_parser.rb +32 -28
- data/lib/kpeg/format_parser.kpeg +22 -10
- data/lib/kpeg/format_parser.rb +94 -73
- data/lib/kpeg/grammar.rb +2 -2
- data/lib/kpeg/grammar_renderer.rb +14 -0
- data/lib/kpeg/position.rb +25 -8
- data/lib/kpeg/string_escape.kpeg +1 -0
- data/lib/kpeg/string_escape.rb +399 -13
- data/lib/kpeg.rb +1 -1
- data/test/test_kpeg.rb +1 -1
- data/test/test_kpeg_code_generator.rb +186 -14
- data/test/test_kpeg_compiled_parser.rb +1 -1
- data/test/test_kpeg_format.rb +25 -6
- data/test/{test_file_parser_roundtrip.rb → test_kpeg_format_parser_round_trip.rb} +1 -1
- data/test/{test_gen_calc.rb → test_kpeg_grammar.rb} +48 -5
- data/test/test_kpeg_grammar_renderer.rb +47 -6
- data/test/test_kpeg_string_escape.rb +42 -0
- metadata +90 -101
- data/.gemtest +0 -0
- data/test/test_left_recursion.rb +0 -50
data/lib/kpeg/string_escape.rb
CHANGED
@@ -1,13 +1,380 @@
|
|
1
|
-
|
1
|
+
class KPeg::StringEscape
|
2
|
+
# :stopdoc:
|
3
|
+
|
4
|
+
# This is distinct from setup_parser so that a standalone parser
|
5
|
+
# can redefine #initialize and still have access to the proper
|
6
|
+
# parser setup code.
|
7
|
+
def initialize(str, debug=false)
|
8
|
+
setup_parser(str, debug)
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
# Prepares for parsing +str+. If you define a custom initialize you must
|
14
|
+
# call this method before #parse
|
15
|
+
def setup_parser(str, debug=false)
|
16
|
+
set_string str, 0
|
17
|
+
@memoizations = Hash.new { |h,k| h[k] = {} }
|
18
|
+
@result = nil
|
19
|
+
@failed_rule = nil
|
20
|
+
@failing_rule_offset = -1
|
21
|
+
@line_offsets = nil
|
22
|
+
|
23
|
+
setup_foreign_grammar
|
24
|
+
end
|
25
|
+
|
26
|
+
attr_reader :string
|
27
|
+
attr_reader :failing_rule_offset
|
28
|
+
attr_accessor :result, :pos
|
29
|
+
|
30
|
+
def current_column(target=pos)
|
31
|
+
if c = string.rindex("\n", target-1)
|
32
|
+
return target - c - 1
|
33
|
+
end
|
34
|
+
|
35
|
+
target + 1
|
36
|
+
end
|
37
|
+
|
38
|
+
if [].respond_to? :bsearch_index
|
39
|
+
def current_line(target=pos)
|
40
|
+
unless @line_offsets
|
41
|
+
@line_offsets = [-1]
|
42
|
+
total = 0
|
43
|
+
string.each_line do |line|
|
44
|
+
@line_offsets << total
|
45
|
+
total += line.size
|
46
|
+
end
|
47
|
+
@line_offsets << total
|
48
|
+
end
|
49
|
+
|
50
|
+
@line_offsets.bsearch_index {|x| x >= target } || -1
|
51
|
+
end
|
52
|
+
else
|
53
|
+
def current_line(target=pos)
|
54
|
+
cur_offset = 0
|
55
|
+
cur_line = 0
|
56
|
+
|
57
|
+
string.each_line do |line|
|
58
|
+
cur_line += 1
|
59
|
+
cur_offset += line.size
|
60
|
+
return cur_line if cur_offset >= target
|
61
|
+
end
|
62
|
+
|
63
|
+
-1
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def lines
|
68
|
+
lines = []
|
69
|
+
string.each_line { |l| lines << l }
|
70
|
+
lines
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
def get_text(start)
|
76
|
+
@string[start..@pos-1]
|
77
|
+
end
|
78
|
+
|
79
|
+
# Sets the string and current parsing position for the parser.
|
80
|
+
def set_string string, pos
|
81
|
+
@string = string
|
82
|
+
@string_size = string ? string.size : 0
|
83
|
+
@pos = pos
|
84
|
+
end
|
85
|
+
|
86
|
+
def show_pos
|
87
|
+
width = 10
|
88
|
+
if @pos < width
|
89
|
+
"#{@pos} (\"#{@string[0,@pos]}\" @ \"#{@string[@pos,width]}\")"
|
90
|
+
else
|
91
|
+
"#{@pos} (\"... #{@string[@pos - width, width]}\" @ \"#{@string[@pos,width]}\")"
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def failure_info
|
96
|
+
l = current_line @failing_rule_offset
|
97
|
+
c = current_column @failing_rule_offset
|
98
|
+
|
99
|
+
if @failed_rule.kind_of? Symbol
|
100
|
+
info = self.class::Rules[@failed_rule]
|
101
|
+
"line #{l}, column #{c}: failed rule '#{info.name}' = '#{info.rendered}'"
|
102
|
+
else
|
103
|
+
"line #{l}, column #{c}: failed rule '#{@failed_rule}'"
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def failure_caret
|
108
|
+
l = current_line @failing_rule_offset
|
109
|
+
c = current_column @failing_rule_offset
|
110
|
+
|
111
|
+
line = lines[l-1]
|
112
|
+
"#{line}\n#{' ' * (c - 1)}^"
|
113
|
+
end
|
114
|
+
|
115
|
+
def failure_character
|
116
|
+
l = current_line @failing_rule_offset
|
117
|
+
c = current_column @failing_rule_offset
|
118
|
+
lines[l-1][c-1, 1]
|
119
|
+
end
|
120
|
+
|
121
|
+
def failure_oneline
|
122
|
+
l = current_line @failing_rule_offset
|
123
|
+
c = current_column @failing_rule_offset
|
124
|
+
|
125
|
+
char = lines[l-1][c-1, 1]
|
126
|
+
|
127
|
+
if @failed_rule.kind_of? Symbol
|
128
|
+
info = self.class::Rules[@failed_rule]
|
129
|
+
"@#{l}:#{c} failed rule '#{info.name}', got '#{char}'"
|
130
|
+
else
|
131
|
+
"@#{l}:#{c} failed rule '#{@failed_rule}', got '#{char}'"
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
class ParseError < RuntimeError
|
136
|
+
end
|
137
|
+
|
138
|
+
def raise_error
|
139
|
+
raise ParseError, failure_oneline
|
140
|
+
end
|
141
|
+
|
142
|
+
def show_error(io=STDOUT)
|
143
|
+
error_pos = @failing_rule_offset
|
144
|
+
line_no = current_line(error_pos)
|
145
|
+
col_no = current_column(error_pos)
|
146
|
+
|
147
|
+
io.puts "On line #{line_no}, column #{col_no}:"
|
148
|
+
|
149
|
+
if @failed_rule.kind_of? Symbol
|
150
|
+
info = self.class::Rules[@failed_rule]
|
151
|
+
io.puts "Failed to match '#{info.rendered}' (rule '#{info.name}')"
|
152
|
+
else
|
153
|
+
io.puts "Failed to match rule '#{@failed_rule}'"
|
154
|
+
end
|
155
|
+
|
156
|
+
io.puts "Got: #{string[error_pos,1].inspect}"
|
157
|
+
line = lines[line_no-1]
|
158
|
+
io.puts "=> #{line}"
|
159
|
+
io.print(" " * (col_no + 3))
|
160
|
+
io.puts "^"
|
161
|
+
end
|
162
|
+
|
163
|
+
def set_failed_rule(name)
|
164
|
+
if @pos > @failing_rule_offset
|
165
|
+
@failed_rule = name
|
166
|
+
@failing_rule_offset = @pos
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
attr_reader :failed_rule
|
171
|
+
|
172
|
+
def match_string(str)
|
173
|
+
len = str.size
|
174
|
+
if @string[pos,len] == str
|
175
|
+
@pos += len
|
176
|
+
return str
|
177
|
+
end
|
178
|
+
|
179
|
+
return nil
|
180
|
+
end
|
181
|
+
|
182
|
+
def scan(reg)
|
183
|
+
if m = reg.match(@string, @pos)
|
184
|
+
@pos = m.end(0)
|
185
|
+
return true
|
186
|
+
end
|
187
|
+
|
188
|
+
return nil
|
189
|
+
end
|
190
|
+
|
191
|
+
if "".respond_to? :ord
|
192
|
+
def get_byte
|
193
|
+
if @pos >= @string_size
|
194
|
+
return nil
|
195
|
+
end
|
196
|
+
|
197
|
+
s = @string[@pos].ord
|
198
|
+
@pos += 1
|
199
|
+
s
|
200
|
+
end
|
201
|
+
else
|
202
|
+
def get_byte
|
203
|
+
if @pos >= @string_size
|
204
|
+
return nil
|
205
|
+
end
|
206
|
+
|
207
|
+
s = @string[@pos]
|
208
|
+
@pos += 1
|
209
|
+
s
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
def parse(rule=nil)
|
214
|
+
# We invoke the rules indirectly via apply
|
215
|
+
# instead of by just calling them as methods because
|
216
|
+
# if the rules use left recursion, apply needs to
|
217
|
+
# manage that.
|
218
|
+
|
219
|
+
if !rule
|
220
|
+
apply(:_root)
|
221
|
+
else
|
222
|
+
method = rule.gsub("-","_hyphen_")
|
223
|
+
apply :"_#{method}"
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
class MemoEntry
|
228
|
+
def initialize(ans, pos)
|
229
|
+
@ans = ans
|
230
|
+
@pos = pos
|
231
|
+
@result = nil
|
232
|
+
@set = false
|
233
|
+
@left_rec = false
|
234
|
+
end
|
235
|
+
|
236
|
+
attr_reader :ans, :pos, :result, :set
|
237
|
+
attr_accessor :left_rec
|
238
|
+
|
239
|
+
def move!(ans, pos, result)
|
240
|
+
@ans = ans
|
241
|
+
@pos = pos
|
242
|
+
@result = result
|
243
|
+
@set = true
|
244
|
+
@left_rec = false
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
def external_invoke(other, rule, *args)
|
249
|
+
old_pos = @pos
|
250
|
+
old_string = @string
|
251
|
+
|
252
|
+
set_string other.string, other.pos
|
253
|
+
|
254
|
+
begin
|
255
|
+
if val = __send__(rule, *args)
|
256
|
+
other.pos = @pos
|
257
|
+
other.result = @result
|
258
|
+
else
|
259
|
+
other.set_failed_rule "#{self.class}##{rule}"
|
260
|
+
end
|
261
|
+
val
|
262
|
+
ensure
|
263
|
+
set_string old_string, old_pos
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
def apply_with_args(rule, *args)
|
268
|
+
memo_key = [rule, args]
|
269
|
+
if m = @memoizations[memo_key][@pos]
|
270
|
+
@pos = m.pos
|
271
|
+
if !m.set
|
272
|
+
m.left_rec = true
|
273
|
+
return nil
|
274
|
+
end
|
275
|
+
|
276
|
+
@result = m.result
|
2
277
|
|
3
|
-
|
278
|
+
return m.ans
|
279
|
+
else
|
280
|
+
m = MemoEntry.new(nil, @pos)
|
281
|
+
@memoizations[memo_key][@pos] = m
|
282
|
+
start_pos = @pos
|
283
|
+
|
284
|
+
ans = __send__ rule, *args
|
285
|
+
|
286
|
+
lr = m.left_rec
|
287
|
+
|
288
|
+
m.move! ans, @pos, @result
|
289
|
+
|
290
|
+
# Don't bother trying to grow the left recursion
|
291
|
+
# if it's failing straight away (thus there is no seed)
|
292
|
+
if ans and lr
|
293
|
+
return grow_lr(rule, args, start_pos, m)
|
294
|
+
else
|
295
|
+
return ans
|
296
|
+
end
|
297
|
+
end
|
298
|
+
end
|
299
|
+
|
300
|
+
def apply(rule)
|
301
|
+
if m = @memoizations[rule][@pos]
|
302
|
+
@pos = m.pos
|
303
|
+
if !m.set
|
304
|
+
m.left_rec = true
|
305
|
+
return nil
|
306
|
+
end
|
307
|
+
|
308
|
+
@result = m.result
|
309
|
+
|
310
|
+
return m.ans
|
311
|
+
else
|
312
|
+
m = MemoEntry.new(nil, @pos)
|
313
|
+
@memoizations[rule][@pos] = m
|
314
|
+
start_pos = @pos
|
315
|
+
|
316
|
+
ans = __send__ rule
|
317
|
+
|
318
|
+
lr = m.left_rec
|
319
|
+
|
320
|
+
m.move! ans, @pos, @result
|
321
|
+
|
322
|
+
# Don't bother trying to grow the left recursion
|
323
|
+
# if it's failing straight away (thus there is no seed)
|
324
|
+
if ans and lr
|
325
|
+
return grow_lr(rule, nil, start_pos, m)
|
326
|
+
else
|
327
|
+
return ans
|
328
|
+
end
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
def grow_lr(rule, args, start_pos, m)
|
333
|
+
while true
|
334
|
+
@pos = start_pos
|
335
|
+
@result = m.result
|
336
|
+
|
337
|
+
if args
|
338
|
+
ans = __send__ rule, *args
|
339
|
+
else
|
340
|
+
ans = __send__ rule
|
341
|
+
end
|
342
|
+
return nil unless ans
|
343
|
+
|
344
|
+
break if @pos <= m.pos
|
345
|
+
|
346
|
+
m.move! ans, @pos, @result
|
347
|
+
end
|
348
|
+
|
349
|
+
@result = m.result
|
350
|
+
@pos = m.pos
|
351
|
+
return m.ans
|
352
|
+
end
|
353
|
+
|
354
|
+
class RuleInfo
|
355
|
+
def initialize(name, rendered)
|
356
|
+
@name = name
|
357
|
+
@rendered = rendered
|
358
|
+
end
|
359
|
+
|
360
|
+
attr_reader :name, :rendered
|
361
|
+
end
|
362
|
+
|
363
|
+
def self.rule_info(name, rendered)
|
364
|
+
RuleInfo.new(name, rendered)
|
365
|
+
end
|
366
|
+
|
367
|
+
|
368
|
+
# :startdoc:
|
4
369
|
|
5
370
|
|
6
371
|
attr_reader :text
|
7
372
|
|
8
373
|
|
374
|
+
# :stopdoc:
|
375
|
+
def setup_foreign_grammar; end
|
9
376
|
|
10
|
-
# segment = (< /[\w ]+/ > { text } | "\\" { "\\\\" } | "\n" { "\\n" } | "\t" { "\\t" } | "\b" { "\\b" } | "\"" { "\\\"" } | < . > { text })
|
377
|
+
# segment = (< /[\w ]+/ > { text } | "\\" { "\\\\" } | "\n" { "\\n" } | "\r" { "\\r" } | "\t" { "\\t" } | "\b" { "\\b" } | "\"" { "\\\"" } | < . > { text })
|
11
378
|
def _segment
|
12
379
|
|
13
380
|
_save = self.pos
|
@@ -16,7 +383,7 @@ class KPeg::StringEscape < KPeg::CompiledParser
|
|
16
383
|
_save1 = self.pos
|
17
384
|
while true # sequence
|
18
385
|
_text_start = self.pos
|
19
|
-
_tmp = scan(/\
|
386
|
+
_tmp = scan(/\G(?-mix:[\w ]+)/)
|
20
387
|
if _tmp
|
21
388
|
text = get_text(_text_start)
|
22
389
|
end
|
@@ -73,12 +440,12 @@ class KPeg::StringEscape < KPeg::CompiledParser
|
|
73
440
|
|
74
441
|
_save4 = self.pos
|
75
442
|
while true # sequence
|
76
|
-
_tmp = match_string("\
|
443
|
+
_tmp = match_string("\r")
|
77
444
|
unless _tmp
|
78
445
|
self.pos = _save4
|
79
446
|
break
|
80
447
|
end
|
81
|
-
@result = begin; "\\
|
448
|
+
@result = begin; "\\r" ; end
|
82
449
|
_tmp = true
|
83
450
|
unless _tmp
|
84
451
|
self.pos = _save4
|
@@ -91,12 +458,12 @@ class KPeg::StringEscape < KPeg::CompiledParser
|
|
91
458
|
|
92
459
|
_save5 = self.pos
|
93
460
|
while true # sequence
|
94
|
-
_tmp = match_string("\
|
461
|
+
_tmp = match_string("\t")
|
95
462
|
unless _tmp
|
96
463
|
self.pos = _save5
|
97
464
|
break
|
98
465
|
end
|
99
|
-
@result = begin; "\\
|
466
|
+
@result = begin; "\\t" ; end
|
100
467
|
_tmp = true
|
101
468
|
unless _tmp
|
102
469
|
self.pos = _save5
|
@@ -109,12 +476,12 @@ class KPeg::StringEscape < KPeg::CompiledParser
|
|
109
476
|
|
110
477
|
_save6 = self.pos
|
111
478
|
while true # sequence
|
112
|
-
_tmp = match_string("\"
|
479
|
+
_tmp = match_string("\b")
|
113
480
|
unless _tmp
|
114
481
|
self.pos = _save6
|
115
482
|
break
|
116
483
|
end
|
117
|
-
@result = begin; "
|
484
|
+
@result = begin; "\\b" ; end
|
118
485
|
_tmp = true
|
119
486
|
unless _tmp
|
120
487
|
self.pos = _save6
|
@@ -126,6 +493,24 @@ class KPeg::StringEscape < KPeg::CompiledParser
|
|
126
493
|
self.pos = _save
|
127
494
|
|
128
495
|
_save7 = self.pos
|
496
|
+
while true # sequence
|
497
|
+
_tmp = match_string("\"")
|
498
|
+
unless _tmp
|
499
|
+
self.pos = _save7
|
500
|
+
break
|
501
|
+
end
|
502
|
+
@result = begin; "\\\"" ; end
|
503
|
+
_tmp = true
|
504
|
+
unless _tmp
|
505
|
+
self.pos = _save7
|
506
|
+
end
|
507
|
+
break
|
508
|
+
end # end sequence
|
509
|
+
|
510
|
+
break if _tmp
|
511
|
+
self.pos = _save
|
512
|
+
|
513
|
+
_save8 = self.pos
|
129
514
|
while true # sequence
|
130
515
|
_text_start = self.pos
|
131
516
|
_tmp = get_byte
|
@@ -133,13 +518,13 @@ class KPeg::StringEscape < KPeg::CompiledParser
|
|
133
518
|
text = get_text(_text_start)
|
134
519
|
end
|
135
520
|
unless _tmp
|
136
|
-
self.pos =
|
521
|
+
self.pos = _save8
|
137
522
|
break
|
138
523
|
end
|
139
524
|
@result = begin; text ; end
|
140
525
|
_tmp = true
|
141
526
|
unless _tmp
|
142
|
-
self.pos =
|
527
|
+
self.pos = _save8
|
143
528
|
end
|
144
529
|
break
|
145
530
|
end # end sequence
|
@@ -247,8 +632,9 @@ class KPeg::StringEscape < KPeg::CompiledParser
|
|
247
632
|
end
|
248
633
|
|
249
634
|
Rules = {}
|
250
|
-
Rules[:_segment] = rule_info("segment", "(< /[\\w ]+/ > { text } | \"\\\\\" { \"\\\\\\\\\" } | \"\\n\" { \"\\\\n\" } | \"\\t\" { \"\\\\t\" } | \"\\b\" { \"\\\\b\" } | \"\\\"\" { \"\\\\\\\"\" } | < . > { text })")
|
635
|
+
Rules[:_segment] = rule_info("segment", "(< /[\\w ]+/ > { text } | \"\\\\\" { \"\\\\\\\\\" } | \"\\n\" { \"\\\\n\" } | \"\\r\" { \"\\\\r\" } | \"\\t\" { \"\\\\t\" } | \"\\b\" { \"\\\\b\" } | \"\\\"\" { \"\\\\\\\"\" } | < . > { text })")
|
251
636
|
Rules[:_root] = rule_info("root", "segment*:s { @text = s.join }")
|
252
637
|
Rules[:_embed_seg] = rule_info("embed_seg", "(\"\#\" { \"\\\\\#\" } | segment)")
|
253
638
|
Rules[:_embed] = rule_info("embed", "embed_seg*:s { @text = s.join }")
|
639
|
+
# :startdoc:
|
254
640
|
end
|
data/lib/kpeg.rb
CHANGED
data/test/test_kpeg.rb
CHANGED