kpeg 0.10.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,12 +13,12 @@ class KPeg::StringEscape
13
13
  # Prepares for parsing +str+. If you define a custom initialize you must
14
14
  # call this method before #parse
15
15
  def setup_parser(str, debug=false)
16
- @string = str
17
- @pos = 0
16
+ set_string str, 0
18
17
  @memoizations = Hash.new { |h,k| h[k] = {} }
19
18
  @result = nil
20
19
  @failed_rule = nil
21
20
  @failing_rule_offset = -1
21
+ @line_offsets = nil
22
22
 
23
23
  setup_foreign_grammar
24
24
  end
@@ -27,7 +27,6 @@ class KPeg::StringEscape
27
27
  attr_reader :failing_rule_offset
28
28
  attr_accessor :result, :pos
29
29
 
30
-
31
30
  def current_column(target=pos)
32
31
  if c = string.rindex("\n", target-1)
33
32
  return target - c - 1
@@ -36,17 +35,32 @@ class KPeg::StringEscape
36
35
  target + 1
37
36
  end
38
37
 
39
- def current_line(target=pos)
40
- cur_offset = 0
41
- cur_line = 0
38
+ if [].respond_to? :bsearch_index
39
+ def current_line(target=pos)
40
+ unless @line_offsets
41
+ @line_offsets = []
42
+ total = 0
43
+ string.each_line do |line|
44
+ total += line.size
45
+ @line_offsets << total
46
+ end
47
+ end
42
48
 
43
- string.each_line do |line|
44
- cur_line += 1
45
- cur_offset += line.size
46
- return cur_line if cur_offset >= target
49
+ @line_offsets.bsearch_index {|x| x >= target } + 1 || -1
47
50
  end
51
+ else
52
+ def current_line(target=pos)
53
+ cur_offset = 0
54
+ cur_line = 0
55
+
56
+ string.each_line do |line|
57
+ cur_line += 1
58
+ cur_offset += line.size
59
+ return cur_line if cur_offset >= target
60
+ end
48
61
 
49
- -1
62
+ -1
63
+ end
50
64
  end
51
65
 
52
66
  def lines
@@ -61,6 +75,13 @@ class KPeg::StringEscape
61
75
  @string[start..@pos-1]
62
76
  end
63
77
 
78
+ # Sets the string and current parsing position for the parser.
79
+ def set_string string, pos
80
+ @string = string
81
+ @string_size = string ? string.size : 0
82
+ @pos = pos
83
+ end
84
+
64
85
  def show_pos
65
86
  width = 10
66
87
  if @pos < width
@@ -158,28 +179,27 @@ class KPeg::StringEscape
158
179
  end
159
180
 
160
181
  def scan(reg)
161
- if m = reg.match(@string[@pos..-1])
162
- width = m.end(0)
163
- @pos += width
182
+ if m = reg.match(@string, @pos)
183
+ @pos = m.end(0)
164
184
  return true
165
185
  end
166
186
 
167
187
  return nil
168
188
  end
169
189
 
170
- if "".respond_to? :getbyte
190
+ if "".respond_to? :ord
171
191
  def get_byte
172
- if @pos >= @string.size
192
+ if @pos >= @string_size
173
193
  return nil
174
194
  end
175
195
 
176
- s = @string.getbyte @pos
196
+ s = @string[@pos].ord
177
197
  @pos += 1
178
198
  s
179
199
  end
180
200
  else
181
201
  def get_byte
182
- if @pos >= @string.size
202
+ if @pos >= @string_size
183
203
  return nil
184
204
  end
185
205
 
@@ -228,8 +248,7 @@ class KPeg::StringEscape
228
248
  old_pos = @pos
229
249
  old_string = @string
230
250
 
231
- @pos = other.pos
232
- @string = other.string
251
+ set_string other.string, other.pos
233
252
 
234
253
  begin
235
254
  if val = __send__(rule, *args)
@@ -240,8 +259,7 @@ class KPeg::StringEscape
240
259
  end
241
260
  val
242
261
  ensure
243
- @pos = old_pos
244
- @string = old_string
262
+ set_string old_string, old_pos
245
263
  end
246
264
  end
247
265
 
@@ -275,8 +293,6 @@ class KPeg::StringEscape
275
293
  else
276
294
  return ans
277
295
  end
278
-
279
- return ans
280
296
  end
281
297
  end
282
298
 
@@ -309,8 +325,6 @@ class KPeg::StringEscape
309
325
  else
310
326
  return ans
311
327
  end
312
-
313
- return ans
314
328
  end
315
329
  end
316
330
 
@@ -359,7 +373,7 @@ class KPeg::StringEscape
359
373
  # :stopdoc:
360
374
  def setup_foreign_grammar; end
361
375
 
362
- # segment = (< /[\w ]+/ > { text } | "\\" { "\\\\" } | "\n" { "\\n" } | "\t" { "\\t" } | "\b" { "\\b" } | "\"" { "\\\"" } | < . > { text })
376
+ # segment = (< /[\w ]+/ > { text } | "\\" { "\\\\" } | "\n" { "\\n" } | "\r" { "\\r" } | "\t" { "\\t" } | "\b" { "\\b" } | "\"" { "\\\"" } | < . > { text })
363
377
  def _segment
364
378
 
365
379
  _save = self.pos
@@ -368,7 +382,7 @@ class KPeg::StringEscape
368
382
  _save1 = self.pos
369
383
  while true # sequence
370
384
  _text_start = self.pos
371
- _tmp = scan(/\A(?-mix:[\w ]+)/)
385
+ _tmp = scan(/\G(?-mix:[\w ]+)/)
372
386
  if _tmp
373
387
  text = get_text(_text_start)
374
388
  end
@@ -425,12 +439,12 @@ class KPeg::StringEscape
425
439
 
426
440
  _save4 = self.pos
427
441
  while true # sequence
428
- _tmp = match_string("\t")
442
+ _tmp = match_string("\r")
429
443
  unless _tmp
430
444
  self.pos = _save4
431
445
  break
432
446
  end
433
- @result = begin; "\\t" ; end
447
+ @result = begin; "\\r" ; end
434
448
  _tmp = true
435
449
  unless _tmp
436
450
  self.pos = _save4
@@ -443,12 +457,12 @@ class KPeg::StringEscape
443
457
 
444
458
  _save5 = self.pos
445
459
  while true # sequence
446
- _tmp = match_string("\b")
460
+ _tmp = match_string("\t")
447
461
  unless _tmp
448
462
  self.pos = _save5
449
463
  break
450
464
  end
451
- @result = begin; "\\b" ; end
465
+ @result = begin; "\\t" ; end
452
466
  _tmp = true
453
467
  unless _tmp
454
468
  self.pos = _save5
@@ -461,12 +475,12 @@ class KPeg::StringEscape
461
475
 
462
476
  _save6 = self.pos
463
477
  while true # sequence
464
- _tmp = match_string("\"")
478
+ _tmp = match_string("\b")
465
479
  unless _tmp
466
480
  self.pos = _save6
467
481
  break
468
482
  end
469
- @result = begin; "\\\"" ; end
483
+ @result = begin; "\\b" ; end
470
484
  _tmp = true
471
485
  unless _tmp
472
486
  self.pos = _save6
@@ -478,6 +492,24 @@ class KPeg::StringEscape
478
492
  self.pos = _save
479
493
 
480
494
  _save7 = self.pos
495
+ while true # sequence
496
+ _tmp = match_string("\"")
497
+ unless _tmp
498
+ self.pos = _save7
499
+ break
500
+ end
501
+ @result = begin; "\\\"" ; end
502
+ _tmp = true
503
+ unless _tmp
504
+ self.pos = _save7
505
+ end
506
+ break
507
+ end # end sequence
508
+
509
+ break if _tmp
510
+ self.pos = _save
511
+
512
+ _save8 = self.pos
481
513
  while true # sequence
482
514
  _text_start = self.pos
483
515
  _tmp = get_byte
@@ -485,13 +517,13 @@ class KPeg::StringEscape
485
517
  text = get_text(_text_start)
486
518
  end
487
519
  unless _tmp
488
- self.pos = _save7
520
+ self.pos = _save8
489
521
  break
490
522
  end
491
523
  @result = begin; text ; end
492
524
  _tmp = true
493
525
  unless _tmp
494
- self.pos = _save7
526
+ self.pos = _save8
495
527
  end
496
528
  break
497
529
  end # end sequence
@@ -599,7 +631,7 @@ class KPeg::StringEscape
599
631
  end
600
632
 
601
633
  Rules = {}
602
- Rules[:_segment] = rule_info("segment", "(< /[\\w ]+/ > { text } | \"\\\\\" { \"\\\\\\\\\" } | \"\\n\" { \"\\\\n\" } | \"\\t\" { \"\\\\t\" } | \"\\b\" { \"\\\\b\" } | \"\\\"\" { \"\\\\\\\"\" } | < . > { text })")
634
+ Rules[:_segment] = rule_info("segment", "(< /[\\w ]+/ > { text } | \"\\\\\" { \"\\\\\\\\\" } | \"\\n\" { \"\\\\n\" } | \"\\r\" { \"\\\\r\" } | \"\\t\" { \"\\\\t\" } | \"\\b\" { \"\\\\b\" } | \"\\\"\" { \"\\\\\\\"\" } | < . > { text })")
603
635
  Rules[:_root] = rule_info("root", "segment*:s { @text = s.join }")
604
636
  Rules[:_embed_seg] = rule_info("embed_seg", "(\"\#\" { \"\\\\\#\" } | segment)")
605
637
  Rules[:_embed] = rule_info("embed", "embed_seg*:s { @text = s.join }")
data/lib/kpeg.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  module KPeg
2
2
 
3
- VERSION = "0.10.0"
3
+ VERSION = "1.3.0"
4
4
 
5
5
  def self.grammar
6
6
  g = Grammar.new
data/test/test_kpeg.rb CHANGED
@@ -2,7 +2,7 @@ require 'minitest/autorun'
2
2
  require 'kpeg'
3
3
  require 'stringio'
4
4
 
5
- class TestKPeg < MiniTest::Unit::TestCase
5
+ class TestKPeg < Minitest::Test
6
6
  def assert_match(m, str)
7
7
  assert_kind_of KPeg::MatchString, m
8
8
  assert_equal str, m.string
@@ -4,7 +4,7 @@ require 'kpeg'
4
4
  require 'kpeg/code_generator'
5
5
  require 'stringio'
6
6
 
7
- class TestKPegCodeGenerator < MiniTest::Unit::TestCase
7
+ class TestKPegCodeGenerator < Minitest::Test
8
8
  def test_dot
9
9
  gram = KPeg.grammar do |g|
10
10
  g.root = g.dot
@@ -80,7 +80,7 @@ class Test < KPeg::CompiledParser
80
80
 
81
81
  # root = /[0-9]/
82
82
  def _root
83
- _tmp = scan(/\\A(?-mix:[0-9])/)
83
+ _tmp = scan(/\\G(?-mix:[0-9])/)
84
84
  set_failed_rule :_root unless _tmp
85
85
  return _tmp
86
86
  end
@@ -114,7 +114,7 @@ class Test < KPeg::CompiledParser
114
114
 
115
115
  # root = /./
116
116
  def _root
117
- _tmp = scan(/\\A(?-mix:.)/)
117
+ _tmp = scan(/\\G(?-mix:.)/)
118
118
  set_failed_rule :_root unless _tmp
119
119
  return _tmp
120
120
  end
@@ -133,7 +133,7 @@ class Test < KPeg::CompiledParser
133
133
 
134
134
  # root = /./u
135
135
  def _root
136
- _tmp = scan(/\\A(?-mix:.)/u)
136
+ _tmp = scan(/\\G(?-mix:.)/u)
137
137
  set_failed_rule :_root unless _tmp
138
138
  return _tmp
139
139
  end
@@ -1661,15 +1661,18 @@ class Test < KPeg::CompiledParser
1661
1661
  end
1662
1662
  end
1663
1663
  end
1664
- def bracket(receiver, argument)
1665
- AST::BracketOperator.new(receiver, argument)
1666
- end
1667
- def simple()
1668
- AST::Simple.new()
1669
- end
1670
- def simple2()
1671
- AST::Simple2.new()
1664
+ module ASTConstruction
1665
+ def bracket(receiver, argument)
1666
+ AST::BracketOperator.new(receiver, argument)
1667
+ end
1668
+ def simple()
1669
+ AST::Simple.new()
1670
+ end
1671
+ def simple2()
1672
+ AST::Simple2.new()
1673
+ end
1672
1674
  end
1675
+ include ASTConstruction
1673
1676
 
1674
1677
  # root = .
1675
1678
  def _root
@@ -1715,9 +1718,12 @@ class Test < KPeg::CompiledParser
1715
1718
  attr_reader :argument
1716
1719
  end
1717
1720
  end
1718
- def bracket(receiver, argument)
1719
- MegaAST::BracketOperator.new(receiver, argument)
1721
+ module MegaASTConstruction
1722
+ def bracket(receiver, argument)
1723
+ MegaAST::BracketOperator.new(receiver, argument)
1724
+ end
1720
1725
  end
1726
+ include MegaASTConstruction
1721
1727
 
1722
1728
  # root = .
1723
1729
  def _root
@@ -3,10 +3,11 @@ require 'kpeg'
3
3
  require 'kpeg/compiled_parser'
4
4
  require 'stringio'
5
5
 
6
- class TestKPegCompiledParser < MiniTest::Unit::TestCase
6
+ class TestKPegCompiledParser < Minitest::Test
7
7
 
8
8
  gram = <<-GRAM
9
9
  letter = [a-z]
10
+ number = [0-9]
10
11
  root = letter
11
12
  GRAM
12
13
 
@@ -14,7 +15,7 @@ class TestKPegCompiledParser < MiniTest::Unit::TestCase
14
15
 
15
16
  gram = <<-GRAM
16
17
  %test = TestKPegCompiledParser::TestParser
17
- root = %test.letter "!"
18
+ root = %test.letter %test.number? "!"
18
19
  GRAM
19
20
 
20
21
  KPeg.compile gram, "CompTestParser", self
@@ -78,4 +79,12 @@ class TestKPegCompiledParser < MiniTest::Unit::TestCase
78
79
  assert_equal expected, r.failure_oneline
79
80
  end
80
81
 
82
+ def test_composite_two_char_error
83
+ r = CompTestParser.new "aa"
84
+ assert_nil r.parse, "should not parse"
85
+
86
+ expected = "@1:2 failed rule 'TestKPegCompiledParser::TestParser#_number', got 'a'"
87
+ assert_equal expected, r.failure_oneline
88
+ end
89
+
81
90
  end
@@ -5,7 +5,7 @@ require 'kpeg/grammar_renderer'
5
5
  require 'stringio'
6
6
  require 'rubygems'
7
7
 
8
- class TestKPegFormat < MiniTest::Unit::TestCase
8
+ class TestKPegFormat < Minitest::Test
9
9
  G = KPeg::Grammar.new
10
10
 
11
11
  gram = File.read File.expand_path("../../lib/kpeg/format_parser.kpeg", __FILE__)
@@ -157,9 +157,9 @@ b(p) = x
157
157
  end
158
158
 
159
159
  def test_regexp
160
- assert_rule G.reg(/foo/), match('a=/foo/')
161
- assert_rule G.reg(/foo\/bar/), match('a=/foo\/bar/')
162
- assert_rule G.reg(/[^"]/), match('a=/[^"]/')
160
+ assert_rule G.reg('foo'), match('a=/foo/')
161
+ assert_rule G.reg('foo\\/bar'), match('a=/foo\/bar/')
162
+ assert_rule G.reg('[^"]'), match('a=/[^"]/')
163
163
  end
164
164
 
165
165
  def test_regexp_options
@@ -431,6 +431,25 @@ a=b
431
431
  assert_equal expected, m.directives
432
432
  end
433
433
 
434
+ def test_parser_directive_single_quote
435
+ m = match <<-GRAMMAR
436
+ %% header {
437
+ # It's a bug I found
438
+ }
439
+
440
+ a=b
441
+ GRAMMAR
442
+
443
+ assert_rule G.ref("b"), m
444
+
445
+ expected = {
446
+ "header" => KPeg::Action.new("\n# It's a bug I found\n")
447
+ }
448
+
449
+ assert_equal expected, m.directives
450
+ end
451
+
452
+
434
453
  def test_parser_setup
435
454
  m = match "%% { def initialize; end }\na=b"
436
455
  assert_rule G.ref("b"), m
@@ -5,7 +5,7 @@ require 'kpeg/grammar_renderer'
5
5
  require 'kpeg/code_generator'
6
6
  require 'stringio'
7
7
 
8
- class TestKPegFormatParserRoundtrip < MiniTest::Unit::TestCase
8
+ class TestKPegFormatParserRoundtrip < Minitest::Test
9
9
  PATH = File.expand_path("../../lib/kpeg/format_parser.kpeg", __FILE__)
10
10
  def test_roundtrip
11
11
  data = File.read(PATH)
@@ -4,7 +4,7 @@ require 'kpeg/format_parser'
4
4
  require 'kpeg/code_generator'
5
5
  require 'stringio'
6
6
 
7
- class TestKpegGrammar < MiniTest::Unit::TestCase
7
+ class TestKpegGrammar < Minitest::Test
8
8
  LEFT_RECURSION = <<-'STR'
9
9
 
10
10
  name = name:n "[]" { [:array, n] }
@@ -3,7 +3,7 @@ require 'kpeg'
3
3
  require 'kpeg/grammar_renderer'
4
4
  require 'stringio'
5
5
 
6
- class TestKPegGrammarRenderer < MiniTest::Unit::TestCase
6
+ class TestKPegGrammarRenderer < Minitest::Test
7
7
  def test_escape
8
8
  str = "hello\nbob"
9
9
  assert_equal 'hello\nbob', KPeg::GrammarRenderer.escape(str)
@@ -0,0 +1,42 @@
1
+ require 'minitest/autorun'
2
+ require 'kpeg'
3
+ require 'kpeg/string_escape'
4
+
5
+ class TestKPegStringEscape < Minitest::Test
6
+
7
+ def test_bell
8
+ assert_equal '\b', parse("\b")
9
+ end
10
+
11
+ def test_carriage_return
12
+ assert_equal '\r', parse("\r")
13
+ end
14
+
15
+ def test_newline
16
+ assert_equal '\n', parse("\n")
17
+ end
18
+
19
+ def test_quote
20
+ assert_equal '\\\\\"', parse('\\"')
21
+ end
22
+
23
+ def test_slash
24
+ assert_equal '\\\\', parse('\\')
25
+ end
26
+
27
+ def test_tab
28
+ assert_equal '\t', parse("\t")
29
+ end
30
+
31
+ def parse(str, embed = false)
32
+ se = KPeg::StringEscape.new(str)
33
+
34
+ rule = (embed ? 'embed' : nil)
35
+
36
+ se.raise_error unless se.parse(rule)
37
+
38
+ se.text
39
+ end
40
+
41
+ end
42
+