kpeg 0.10.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.hoeignore +12 -0
- data/Gemfile +3 -0
- data/History.txt +14 -0
- data/Manifest.txt +9 -0
- data/README.rdoc +8 -8
- data/Rakefile +2 -0
- data/bin/kpeg +9 -2
- data/examples/lua_string/lua_string.kpeg.rb +0 -2
- data/examples/tiny_markdown/Rakefile +3 -0
- data/examples/tiny_markdown/driver.rb +10 -0
- data/examples/tiny_markdown/node.rb +107 -0
- data/examples/tiny_markdown/sample.md +51 -0
- data/examples/tiny_markdown/tiny_markdown.kpeg +199 -0
- data/examples/tiny_markdown/tiny_markdown.kpeg.rb +3892 -0
- data/kpeg.gemspec +20 -20
- data/lib/kpeg/code_generator.rb +10 -7
- data/lib/kpeg/compiled_parser.rb +17 -17
- data/lib/kpeg/format_parser.kpeg +1 -1
- data/lib/kpeg/format_parser.rb +60 -43
- data/lib/kpeg/grammar.rb +1 -1
- data/lib/kpeg/position.rb +25 -10
- data/lib/kpeg/string_escape.kpeg +1 -0
- data/lib/kpeg/string_escape.rb +69 -37
- data/lib/kpeg.rb +1 -1
- data/test/test_kpeg.rb +1 -1
- data/test/test_kpeg_code_generator.rb +20 -14
- data/test/test_kpeg_compiled_parser.rb +11 -2
- data/test/test_kpeg_format.rb +23 -4
- data/test/test_kpeg_format_parser_round_trip.rb +1 -1
- data/test/test_kpeg_grammar.rb +1 -1
- data/test/test_kpeg_grammar_renderer.rb +1 -1
- data/test/test_kpeg_string_escape.rb +42 -0
- metadata +89 -97
- data/.gemtest +0 -0
data/lib/kpeg/string_escape.rb
CHANGED
@@ -13,12 +13,12 @@ class KPeg::StringEscape
|
|
13
13
|
# Prepares for parsing +str+. If you define a custom initialize you must
|
14
14
|
# call this method before #parse
|
15
15
|
def setup_parser(str, debug=false)
|
16
|
-
|
17
|
-
@pos = 0
|
16
|
+
set_string str, 0
|
18
17
|
@memoizations = Hash.new { |h,k| h[k] = {} }
|
19
18
|
@result = nil
|
20
19
|
@failed_rule = nil
|
21
20
|
@failing_rule_offset = -1
|
21
|
+
@line_offsets = nil
|
22
22
|
|
23
23
|
setup_foreign_grammar
|
24
24
|
end
|
@@ -27,7 +27,6 @@ class KPeg::StringEscape
|
|
27
27
|
attr_reader :failing_rule_offset
|
28
28
|
attr_accessor :result, :pos
|
29
29
|
|
30
|
-
|
31
30
|
def current_column(target=pos)
|
32
31
|
if c = string.rindex("\n", target-1)
|
33
32
|
return target - c - 1
|
@@ -36,17 +35,32 @@ class KPeg::StringEscape
|
|
36
35
|
target + 1
|
37
36
|
end
|
38
37
|
|
39
|
-
|
40
|
-
|
41
|
-
|
38
|
+
if [].respond_to? :bsearch_index
|
39
|
+
def current_line(target=pos)
|
40
|
+
unless @line_offsets
|
41
|
+
@line_offsets = []
|
42
|
+
total = 0
|
43
|
+
string.each_line do |line|
|
44
|
+
total += line.size
|
45
|
+
@line_offsets << total
|
46
|
+
end
|
47
|
+
end
|
42
48
|
|
43
|
-
|
44
|
-
cur_line += 1
|
45
|
-
cur_offset += line.size
|
46
|
-
return cur_line if cur_offset >= target
|
49
|
+
@line_offsets.bsearch_index {|x| x >= target } + 1 || -1
|
47
50
|
end
|
51
|
+
else
|
52
|
+
def current_line(target=pos)
|
53
|
+
cur_offset = 0
|
54
|
+
cur_line = 0
|
55
|
+
|
56
|
+
string.each_line do |line|
|
57
|
+
cur_line += 1
|
58
|
+
cur_offset += line.size
|
59
|
+
return cur_line if cur_offset >= target
|
60
|
+
end
|
48
61
|
|
49
|
-
|
62
|
+
-1
|
63
|
+
end
|
50
64
|
end
|
51
65
|
|
52
66
|
def lines
|
@@ -61,6 +75,13 @@ class KPeg::StringEscape
|
|
61
75
|
@string[start..@pos-1]
|
62
76
|
end
|
63
77
|
|
78
|
+
# Sets the string and current parsing position for the parser.
|
79
|
+
def set_string string, pos
|
80
|
+
@string = string
|
81
|
+
@string_size = string ? string.size : 0
|
82
|
+
@pos = pos
|
83
|
+
end
|
84
|
+
|
64
85
|
def show_pos
|
65
86
|
width = 10
|
66
87
|
if @pos < width
|
@@ -158,28 +179,27 @@ class KPeg::StringEscape
|
|
158
179
|
end
|
159
180
|
|
160
181
|
def scan(reg)
|
161
|
-
if m = reg.match(@string
|
162
|
-
|
163
|
-
@pos += width
|
182
|
+
if m = reg.match(@string, @pos)
|
183
|
+
@pos = m.end(0)
|
164
184
|
return true
|
165
185
|
end
|
166
186
|
|
167
187
|
return nil
|
168
188
|
end
|
169
189
|
|
170
|
-
if "".respond_to? :
|
190
|
+
if "".respond_to? :ord
|
171
191
|
def get_byte
|
172
|
-
if @pos >= @
|
192
|
+
if @pos >= @string_size
|
173
193
|
return nil
|
174
194
|
end
|
175
195
|
|
176
|
-
s = @string
|
196
|
+
s = @string[@pos].ord
|
177
197
|
@pos += 1
|
178
198
|
s
|
179
199
|
end
|
180
200
|
else
|
181
201
|
def get_byte
|
182
|
-
if @pos >= @
|
202
|
+
if @pos >= @string_size
|
183
203
|
return nil
|
184
204
|
end
|
185
205
|
|
@@ -228,8 +248,7 @@ class KPeg::StringEscape
|
|
228
248
|
old_pos = @pos
|
229
249
|
old_string = @string
|
230
250
|
|
231
|
-
|
232
|
-
@string = other.string
|
251
|
+
set_string other.string, other.pos
|
233
252
|
|
234
253
|
begin
|
235
254
|
if val = __send__(rule, *args)
|
@@ -240,8 +259,7 @@ class KPeg::StringEscape
|
|
240
259
|
end
|
241
260
|
val
|
242
261
|
ensure
|
243
|
-
|
244
|
-
@string = old_string
|
262
|
+
set_string old_string, old_pos
|
245
263
|
end
|
246
264
|
end
|
247
265
|
|
@@ -275,8 +293,6 @@ class KPeg::StringEscape
|
|
275
293
|
else
|
276
294
|
return ans
|
277
295
|
end
|
278
|
-
|
279
|
-
return ans
|
280
296
|
end
|
281
297
|
end
|
282
298
|
|
@@ -309,8 +325,6 @@ class KPeg::StringEscape
|
|
309
325
|
else
|
310
326
|
return ans
|
311
327
|
end
|
312
|
-
|
313
|
-
return ans
|
314
328
|
end
|
315
329
|
end
|
316
330
|
|
@@ -359,7 +373,7 @@ class KPeg::StringEscape
|
|
359
373
|
# :stopdoc:
|
360
374
|
def setup_foreign_grammar; end
|
361
375
|
|
362
|
-
# segment = (< /[\w ]+/ > { text } | "\\" { "\\\\" } | "\n" { "\\n" } | "\t" { "\\t" } | "\b" { "\\b" } | "\"" { "\\\"" } | < . > { text })
|
376
|
+
# segment = (< /[\w ]+/ > { text } | "\\" { "\\\\" } | "\n" { "\\n" } | "\r" { "\\r" } | "\t" { "\\t" } | "\b" { "\\b" } | "\"" { "\\\"" } | < . > { text })
|
363
377
|
def _segment
|
364
378
|
|
365
379
|
_save = self.pos
|
@@ -368,7 +382,7 @@ class KPeg::StringEscape
|
|
368
382
|
_save1 = self.pos
|
369
383
|
while true # sequence
|
370
384
|
_text_start = self.pos
|
371
|
-
_tmp = scan(/\
|
385
|
+
_tmp = scan(/\G(?-mix:[\w ]+)/)
|
372
386
|
if _tmp
|
373
387
|
text = get_text(_text_start)
|
374
388
|
end
|
@@ -425,12 +439,12 @@ class KPeg::StringEscape
|
|
425
439
|
|
426
440
|
_save4 = self.pos
|
427
441
|
while true # sequence
|
428
|
-
_tmp = match_string("\
|
442
|
+
_tmp = match_string("\r")
|
429
443
|
unless _tmp
|
430
444
|
self.pos = _save4
|
431
445
|
break
|
432
446
|
end
|
433
|
-
@result = begin; "\\
|
447
|
+
@result = begin; "\\r" ; end
|
434
448
|
_tmp = true
|
435
449
|
unless _tmp
|
436
450
|
self.pos = _save4
|
@@ -443,12 +457,12 @@ class KPeg::StringEscape
|
|
443
457
|
|
444
458
|
_save5 = self.pos
|
445
459
|
while true # sequence
|
446
|
-
_tmp = match_string("\
|
460
|
+
_tmp = match_string("\t")
|
447
461
|
unless _tmp
|
448
462
|
self.pos = _save5
|
449
463
|
break
|
450
464
|
end
|
451
|
-
@result = begin; "\\
|
465
|
+
@result = begin; "\\t" ; end
|
452
466
|
_tmp = true
|
453
467
|
unless _tmp
|
454
468
|
self.pos = _save5
|
@@ -461,12 +475,12 @@ class KPeg::StringEscape
|
|
461
475
|
|
462
476
|
_save6 = self.pos
|
463
477
|
while true # sequence
|
464
|
-
_tmp = match_string("\"
|
478
|
+
_tmp = match_string("\b")
|
465
479
|
unless _tmp
|
466
480
|
self.pos = _save6
|
467
481
|
break
|
468
482
|
end
|
469
|
-
@result = begin; "
|
483
|
+
@result = begin; "\\b" ; end
|
470
484
|
_tmp = true
|
471
485
|
unless _tmp
|
472
486
|
self.pos = _save6
|
@@ -478,6 +492,24 @@ class KPeg::StringEscape
|
|
478
492
|
self.pos = _save
|
479
493
|
|
480
494
|
_save7 = self.pos
|
495
|
+
while true # sequence
|
496
|
+
_tmp = match_string("\"")
|
497
|
+
unless _tmp
|
498
|
+
self.pos = _save7
|
499
|
+
break
|
500
|
+
end
|
501
|
+
@result = begin; "\\\"" ; end
|
502
|
+
_tmp = true
|
503
|
+
unless _tmp
|
504
|
+
self.pos = _save7
|
505
|
+
end
|
506
|
+
break
|
507
|
+
end # end sequence
|
508
|
+
|
509
|
+
break if _tmp
|
510
|
+
self.pos = _save
|
511
|
+
|
512
|
+
_save8 = self.pos
|
481
513
|
while true # sequence
|
482
514
|
_text_start = self.pos
|
483
515
|
_tmp = get_byte
|
@@ -485,13 +517,13 @@ class KPeg::StringEscape
|
|
485
517
|
text = get_text(_text_start)
|
486
518
|
end
|
487
519
|
unless _tmp
|
488
|
-
self.pos =
|
520
|
+
self.pos = _save8
|
489
521
|
break
|
490
522
|
end
|
491
523
|
@result = begin; text ; end
|
492
524
|
_tmp = true
|
493
525
|
unless _tmp
|
494
|
-
self.pos =
|
526
|
+
self.pos = _save8
|
495
527
|
end
|
496
528
|
break
|
497
529
|
end # end sequence
|
@@ -599,7 +631,7 @@ class KPeg::StringEscape
|
|
599
631
|
end
|
600
632
|
|
601
633
|
Rules = {}
|
602
|
-
Rules[:_segment] = rule_info("segment", "(< /[\\w ]+/ > { text } | \"\\\\\" { \"\\\\\\\\\" } | \"\\n\" { \"\\\\n\" } | \"\\t\" { \"\\\\t\" } | \"\\b\" { \"\\\\b\" } | \"\\\"\" { \"\\\\\\\"\" } | < . > { text })")
|
634
|
+
Rules[:_segment] = rule_info("segment", "(< /[\\w ]+/ > { text } | \"\\\\\" { \"\\\\\\\\\" } | \"\\n\" { \"\\\\n\" } | \"\\r\" { \"\\\\r\" } | \"\\t\" { \"\\\\t\" } | \"\\b\" { \"\\\\b\" } | \"\\\"\" { \"\\\\\\\"\" } | < . > { text })")
|
603
635
|
Rules[:_root] = rule_info("root", "segment*:s { @text = s.join }")
|
604
636
|
Rules[:_embed_seg] = rule_info("embed_seg", "(\"\#\" { \"\\\\\#\" } | segment)")
|
605
637
|
Rules[:_embed] = rule_info("embed", "embed_seg*:s { @text = s.join }")
|
data/lib/kpeg.rb
CHANGED
data/test/test_kpeg.rb
CHANGED
@@ -4,7 +4,7 @@ require 'kpeg'
|
|
4
4
|
require 'kpeg/code_generator'
|
5
5
|
require 'stringio'
|
6
6
|
|
7
|
-
class TestKPegCodeGenerator <
|
7
|
+
class TestKPegCodeGenerator < Minitest::Test
|
8
8
|
def test_dot
|
9
9
|
gram = KPeg.grammar do |g|
|
10
10
|
g.root = g.dot
|
@@ -80,7 +80,7 @@ class Test < KPeg::CompiledParser
|
|
80
80
|
|
81
81
|
# root = /[0-9]/
|
82
82
|
def _root
|
83
|
-
_tmp = scan(/\\
|
83
|
+
_tmp = scan(/\\G(?-mix:[0-9])/)
|
84
84
|
set_failed_rule :_root unless _tmp
|
85
85
|
return _tmp
|
86
86
|
end
|
@@ -114,7 +114,7 @@ class Test < KPeg::CompiledParser
|
|
114
114
|
|
115
115
|
# root = /./
|
116
116
|
def _root
|
117
|
-
_tmp = scan(/\\
|
117
|
+
_tmp = scan(/\\G(?-mix:.)/)
|
118
118
|
set_failed_rule :_root unless _tmp
|
119
119
|
return _tmp
|
120
120
|
end
|
@@ -133,7 +133,7 @@ class Test < KPeg::CompiledParser
|
|
133
133
|
|
134
134
|
# root = /./u
|
135
135
|
def _root
|
136
|
-
_tmp = scan(/\\
|
136
|
+
_tmp = scan(/\\G(?-mix:.)/u)
|
137
137
|
set_failed_rule :_root unless _tmp
|
138
138
|
return _tmp
|
139
139
|
end
|
@@ -1661,15 +1661,18 @@ class Test < KPeg::CompiledParser
|
|
1661
1661
|
end
|
1662
1662
|
end
|
1663
1663
|
end
|
1664
|
-
|
1665
|
-
|
1666
|
-
|
1667
|
-
|
1668
|
-
|
1669
|
-
|
1670
|
-
|
1671
|
-
|
1664
|
+
module ASTConstruction
|
1665
|
+
def bracket(receiver, argument)
|
1666
|
+
AST::BracketOperator.new(receiver, argument)
|
1667
|
+
end
|
1668
|
+
def simple()
|
1669
|
+
AST::Simple.new()
|
1670
|
+
end
|
1671
|
+
def simple2()
|
1672
|
+
AST::Simple2.new()
|
1673
|
+
end
|
1672
1674
|
end
|
1675
|
+
include ASTConstruction
|
1673
1676
|
|
1674
1677
|
# root = .
|
1675
1678
|
def _root
|
@@ -1715,9 +1718,12 @@ class Test < KPeg::CompiledParser
|
|
1715
1718
|
attr_reader :argument
|
1716
1719
|
end
|
1717
1720
|
end
|
1718
|
-
|
1719
|
-
|
1721
|
+
module MegaASTConstruction
|
1722
|
+
def bracket(receiver, argument)
|
1723
|
+
MegaAST::BracketOperator.new(receiver, argument)
|
1724
|
+
end
|
1720
1725
|
end
|
1726
|
+
include MegaASTConstruction
|
1721
1727
|
|
1722
1728
|
# root = .
|
1723
1729
|
def _root
|
@@ -3,10 +3,11 @@ require 'kpeg'
|
|
3
3
|
require 'kpeg/compiled_parser'
|
4
4
|
require 'stringio'
|
5
5
|
|
6
|
-
class TestKPegCompiledParser <
|
6
|
+
class TestKPegCompiledParser < Minitest::Test
|
7
7
|
|
8
8
|
gram = <<-GRAM
|
9
9
|
letter = [a-z]
|
10
|
+
number = [0-9]
|
10
11
|
root = letter
|
11
12
|
GRAM
|
12
13
|
|
@@ -14,7 +15,7 @@ class TestKPegCompiledParser < MiniTest::Unit::TestCase
|
|
14
15
|
|
15
16
|
gram = <<-GRAM
|
16
17
|
%test = TestKPegCompiledParser::TestParser
|
17
|
-
root = %test.letter "!"
|
18
|
+
root = %test.letter %test.number? "!"
|
18
19
|
GRAM
|
19
20
|
|
20
21
|
KPeg.compile gram, "CompTestParser", self
|
@@ -78,4 +79,12 @@ class TestKPegCompiledParser < MiniTest::Unit::TestCase
|
|
78
79
|
assert_equal expected, r.failure_oneline
|
79
80
|
end
|
80
81
|
|
82
|
+
def test_composite_two_char_error
|
83
|
+
r = CompTestParser.new "aa"
|
84
|
+
assert_nil r.parse, "should not parse"
|
85
|
+
|
86
|
+
expected = "@1:2 failed rule 'TestKPegCompiledParser::TestParser#_number', got 'a'"
|
87
|
+
assert_equal expected, r.failure_oneline
|
88
|
+
end
|
89
|
+
|
81
90
|
end
|
data/test/test_kpeg_format.rb
CHANGED
@@ -5,7 +5,7 @@ require 'kpeg/grammar_renderer'
|
|
5
5
|
require 'stringio'
|
6
6
|
require 'rubygems'
|
7
7
|
|
8
|
-
class TestKPegFormat <
|
8
|
+
class TestKPegFormat < Minitest::Test
|
9
9
|
G = KPeg::Grammar.new
|
10
10
|
|
11
11
|
gram = File.read File.expand_path("../../lib/kpeg/format_parser.kpeg", __FILE__)
|
@@ -157,9 +157,9 @@ b(p) = x
|
|
157
157
|
end
|
158
158
|
|
159
159
|
def test_regexp
|
160
|
-
assert_rule G.reg(
|
161
|
-
assert_rule G.reg(
|
162
|
-
assert_rule G.reg(
|
160
|
+
assert_rule G.reg('foo'), match('a=/foo/')
|
161
|
+
assert_rule G.reg('foo\\/bar'), match('a=/foo\/bar/')
|
162
|
+
assert_rule G.reg('[^"]'), match('a=/[^"]/')
|
163
163
|
end
|
164
164
|
|
165
165
|
def test_regexp_options
|
@@ -431,6 +431,25 @@ a=b
|
|
431
431
|
assert_equal expected, m.directives
|
432
432
|
end
|
433
433
|
|
434
|
+
def test_parser_directive_single_quote
|
435
|
+
m = match <<-GRAMMAR
|
436
|
+
%% header {
|
437
|
+
# It's a bug I found
|
438
|
+
}
|
439
|
+
|
440
|
+
a=b
|
441
|
+
GRAMMAR
|
442
|
+
|
443
|
+
assert_rule G.ref("b"), m
|
444
|
+
|
445
|
+
expected = {
|
446
|
+
"header" => KPeg::Action.new("\n# It's a bug I found\n")
|
447
|
+
}
|
448
|
+
|
449
|
+
assert_equal expected, m.directives
|
450
|
+
end
|
451
|
+
|
452
|
+
|
434
453
|
def test_parser_setup
|
435
454
|
m = match "%% { def initialize; end }\na=b"
|
436
455
|
assert_rule G.ref("b"), m
|
@@ -5,7 +5,7 @@ require 'kpeg/grammar_renderer'
|
|
5
5
|
require 'kpeg/code_generator'
|
6
6
|
require 'stringio'
|
7
7
|
|
8
|
-
class TestKPegFormatParserRoundtrip <
|
8
|
+
class TestKPegFormatParserRoundtrip < Minitest::Test
|
9
9
|
PATH = File.expand_path("../../lib/kpeg/format_parser.kpeg", __FILE__)
|
10
10
|
def test_roundtrip
|
11
11
|
data = File.read(PATH)
|
data/test/test_kpeg_grammar.rb
CHANGED
@@ -3,7 +3,7 @@ require 'kpeg'
|
|
3
3
|
require 'kpeg/grammar_renderer'
|
4
4
|
require 'stringio'
|
5
5
|
|
6
|
-
class TestKPegGrammarRenderer <
|
6
|
+
class TestKPegGrammarRenderer < Minitest::Test
|
7
7
|
def test_escape
|
8
8
|
str = "hello\nbob"
|
9
9
|
assert_equal 'hello\nbob', KPeg::GrammarRenderer.escape(str)
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'kpeg'
|
3
|
+
require 'kpeg/string_escape'
|
4
|
+
|
5
|
+
class TestKPegStringEscape < Minitest::Test
|
6
|
+
|
7
|
+
def test_bell
|
8
|
+
assert_equal '\b', parse("\b")
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_carriage_return
|
12
|
+
assert_equal '\r', parse("\r")
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_newline
|
16
|
+
assert_equal '\n', parse("\n")
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_quote
|
20
|
+
assert_equal '\\\\\"', parse('\\"')
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_slash
|
24
|
+
assert_equal '\\\\', parse('\\')
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_tab
|
28
|
+
assert_equal '\t', parse("\t")
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse(str, embed = false)
|
32
|
+
se = KPeg::StringEscape.new(str)
|
33
|
+
|
34
|
+
rule = (embed ? 'embed' : nil)
|
35
|
+
|
36
|
+
se.raise_error unless se.parse(rule)
|
37
|
+
|
38
|
+
se.text
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|