rsec 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bench/bench.rb +60 -0
- data/bench/little.rb +328 -0
- data/bench/profile.rb +14 -0
- data/examples/arithmetic.rb +28 -0
- data/examples/bnf.rb +31 -0
- data/examples/c_minus.rb +175 -0
- data/examples/hello.scm +18 -0
- data/examples/s_exp.rb +17 -0
- data/examples/scheme.rb +84 -0
- data/examples/slow_json.rb +68 -0
- data/lib/rsec.rb +40 -0
- data/lib/rsec/helpers.rb +447 -0
- data/lib/rsec/parser.rb +64 -0
- data/lib/rsec/parsers/join.rb +86 -0
- data/lib/rsec/parsers/misc.rb +201 -0
- data/lib/rsec/parsers/prim.rb +102 -0
- data/lib/rsec/parsers/repeat.rb +90 -0
- data/lib/rsec/parsers/seq.rb +94 -0
- data/lib/rsec/utils.rb +116 -0
- data/license.txt +1 -0
- data/readme.rdoc +30 -0
- data/test/helpers.rb +24 -0
- data/test/test_branch.rb +14 -0
- data/test/test_examples.rb +36 -0
- data/test/test_join.rb +52 -0
- data/test/test_lookahead.rb +16 -0
- data/test/test_misc.rb +56 -0
- data/test/test_one_of.rb +39 -0
- data/test/test_pattern.rb +53 -0
- data/test/test_prim.rb +59 -0
- data/test/test_repeat.rb +50 -0
- data/test/test_rsec.rb +10 -0
- data/test/test_seq.rb +51 -0
- metadata +80 -0
@@ -0,0 +1,90 @@
|
|
1
|
+
module Rsec
|
2
|
+
|
3
|
+
# the content appears 1 or 0 time
|
4
|
+
class Maybe < Unary
|
5
|
+
def _parse ctx
|
6
|
+
save = ctx.pos
|
7
|
+
res = some._parse ctx
|
8
|
+
if INVALID[res]
|
9
|
+
ctx.pos = save
|
10
|
+
[]
|
11
|
+
else
|
12
|
+
[res]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# repeat from range.begin.abs to range.end.abs <br/>
|
18
|
+
# note: range's max should always be > 0<br/>
|
19
|
+
# see also helpers
|
20
|
+
class RepeatRange
|
21
|
+
include Parser
|
22
|
+
|
23
|
+
def self.[] base, range
|
24
|
+
self.new base, range
|
25
|
+
end
|
26
|
+
|
27
|
+
def initialize base, range
|
28
|
+
@base = base
|
29
|
+
@at_least = range.min.abs
|
30
|
+
@optional = range.max - @at_least
|
31
|
+
end
|
32
|
+
|
33
|
+
def _parse ctx
|
34
|
+
rp_node = []
|
35
|
+
@at_least.times do
|
36
|
+
res = @base._parse ctx
|
37
|
+
return INVALID if INVALID[res]
|
38
|
+
rp_node.push res
|
39
|
+
end
|
40
|
+
@optional.times do
|
41
|
+
save = ctx.pos
|
42
|
+
res = @base._parse ctx
|
43
|
+
if INVALID[res]
|
44
|
+
ctx.pos = save
|
45
|
+
break
|
46
|
+
end
|
47
|
+
rp_node.push res
|
48
|
+
end
|
49
|
+
rp_node
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# matches exactly n.abs times repeat<br/>
|
54
|
+
class RepeatN < Struct.new(:base, :n)
|
55
|
+
include Parser
|
56
|
+
def _parse ctx
|
57
|
+
n.times.inject([]) do |rp_node|
|
58
|
+
res = base._parse ctx
|
59
|
+
return INVALID if INVALID[res]
|
60
|
+
rp_node.push res
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# repeat at least n.abs times <- [n, inf) <br/>
|
66
|
+
class RepeatAtLeastN < Struct.new(:base, :n)
|
67
|
+
include Parser
|
68
|
+
def _parse ctx
|
69
|
+
rp_node = []
|
70
|
+
n.times do
|
71
|
+
res = base._parse(ctx)
|
72
|
+
return INVALID if INVALID[res]
|
73
|
+
rp_node.push res
|
74
|
+
end
|
75
|
+
# note this may be an infinite action
|
76
|
+
# returns if the pos didn't change
|
77
|
+
loop do
|
78
|
+
save = ctx.pos
|
79
|
+
res = base._parse ctx
|
80
|
+
if (INVALID[res] or ctx.pos == save)
|
81
|
+
ctx.pos = save
|
82
|
+
break
|
83
|
+
end
|
84
|
+
rp_node.push res
|
85
|
+
end
|
86
|
+
rp_node
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
module Rsec
|
2
|
+
|
3
|
+
# sequence combinator<br/>
|
4
|
+
# result in an array
|
5
|
+
class Seq < Unary
|
6
|
+
def _parse ctx
|
7
|
+
some.map do |e|
|
8
|
+
res = e._parse ctx
|
9
|
+
return INVALID if INVALID[res]
|
10
|
+
res
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
# sequence combinator<br/>
|
16
|
+
# the result is the result of the parser at idx
|
17
|
+
class SeqOne < Struct.new(:parsers, :idx)
|
18
|
+
include Parser
|
19
|
+
|
20
|
+
def _parse ctx
|
21
|
+
ret = INVALID
|
22
|
+
parsers.each_with_index do |p, i|
|
23
|
+
res = p._parse ctx
|
24
|
+
return INVALID if INVALID[res]
|
25
|
+
ret = res if i == idx
|
26
|
+
end
|
27
|
+
ret
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# skips skipper between tokens
|
32
|
+
class Seq_ < Struct.new(:first, :rest, :skipper)
|
33
|
+
include Parser
|
34
|
+
|
35
|
+
def _parse ctx
|
36
|
+
res = first._parse ctx
|
37
|
+
return INVALID if INVALID[res]
|
38
|
+
ret = [res]
|
39
|
+
|
40
|
+
rest.each do |e|
|
41
|
+
return INVALID if INVALID[skipper._parse ctx]
|
42
|
+
res = e._parse ctx
|
43
|
+
return INVALID if INVALID[res]
|
44
|
+
ret << res
|
45
|
+
end
|
46
|
+
ret
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# skips skipper between tokens
|
51
|
+
class SeqOne_ < Struct.new(:first, :rest, :skipper, :idx)
|
52
|
+
include Parser
|
53
|
+
|
54
|
+
def _parse ctx
|
55
|
+
ret = INVALID
|
56
|
+
|
57
|
+
res = first._parse ctx
|
58
|
+
return INVALID if INVALID[res]
|
59
|
+
ret = res if 0 == idx
|
60
|
+
|
61
|
+
check = idx - 1
|
62
|
+
rest.each_with_index do |p, i|
|
63
|
+
return INVALID if INVALID[skipper._parse ctx]
|
64
|
+
res = p._parse ctx
|
65
|
+
return INVALID if INVALID[res]
|
66
|
+
ret = res if i == check
|
67
|
+
end
|
68
|
+
ret
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
# unbox result size
|
73
|
+
# only work for seq and join and maybe'ed seq and join
|
74
|
+
class Unbox < Unary
|
75
|
+
def _parse ctx
|
76
|
+
res = some._parse ctx
|
77
|
+
return INVALID if INVALID[res]
|
78
|
+
res.size == 1 ? res.first : res
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
# inner
|
83
|
+
# only work for seq
|
84
|
+
class Inner < Unary
|
85
|
+
def _parse ctx
|
86
|
+
res = some._parse ctx
|
87
|
+
return INVALID if INVALID[res]
|
88
|
+
res.shift
|
89
|
+
res.pop
|
90
|
+
res
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
data/lib/rsec/utils.rb
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
module Rsec #:nodoc:
|
4
|
+
|
5
|
+
# error class for rescue
|
6
|
+
class SyntaxError < StandardError
|
7
|
+
attr_reader :msg, :line_text, :line, :col
|
8
|
+
|
9
|
+
# constructor
|
10
|
+
def initialize msg, line_text, line, col
|
11
|
+
@msg, @line_text, @line, @col = msg, line_text, line, col
|
12
|
+
end
|
13
|
+
|
14
|
+
# info with source position
|
15
|
+
def to_s
|
16
|
+
%Q<#@msg\n#@line_text\n#{' ' * @col}^>
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# parse context inherits from StringScanner<br/>
|
21
|
+
# <br/>
|
22
|
+
# attributes:<br/>
|
23
|
+
# <pre>
|
24
|
+
# [R] string: string to parse
|
25
|
+
# [RW] pos: current position
|
26
|
+
# [R] source: source file name
|
27
|
+
# [R] current_line_text: current line text
|
28
|
+
# [R] cache: for memoization
|
29
|
+
# </pre>
|
30
|
+
class ParseContext < StringScanner
|
31
|
+
attr_reader :source, :cache, :last_fail_pos
|
32
|
+
attr_accessor :attr_names
|
33
|
+
|
34
|
+
def initialize str, source
|
35
|
+
super(str)
|
36
|
+
@source = source
|
37
|
+
@cache = {}
|
38
|
+
@last_fail_pos = 0
|
39
|
+
@last_fail_mask = 0
|
40
|
+
end
|
41
|
+
|
42
|
+
# clear packrat parser cache
|
43
|
+
def clear_cache
|
44
|
+
@cache.clear
|
45
|
+
end
|
46
|
+
|
47
|
+
# add fail message
|
48
|
+
def on_fail mask
|
49
|
+
if pos > @last_fail_pos
|
50
|
+
@last_fail_pos = pos
|
51
|
+
@last_fail_mask = mask
|
52
|
+
elsif pos == @last_fail_pos
|
53
|
+
@last_fail_mask |= mask
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# generate parse error
|
58
|
+
def generate_error source
|
59
|
+
if self.pos <= @last_fail_pos
|
60
|
+
line = line @last_fail_pos
|
61
|
+
col = col @last_fail_pos
|
62
|
+
line_text = line_text @last_fail_pos
|
63
|
+
expect_tokens = Fail.get_tokens @last_fail_mask
|
64
|
+
expects = ", expect token [ #{expect_tokens.join ' | '} ]"
|
65
|
+
else
|
66
|
+
line = line pos
|
67
|
+
col = col pos
|
68
|
+
line_text = line_text pos
|
69
|
+
expects = nil
|
70
|
+
end
|
71
|
+
msg = "\nin #{source}:#{line} at #{col}#{expects}"
|
72
|
+
SyntaxError.new msg, line_text, line, col
|
73
|
+
end
|
74
|
+
|
75
|
+
# get line number
|
76
|
+
def line pos
|
77
|
+
string[0...pos].count("\n") + 1
|
78
|
+
end
|
79
|
+
|
80
|
+
# get column number: position in line
|
81
|
+
def col pos
|
82
|
+
return 1 if pos == 0
|
83
|
+
newline_pos = string.rindex "\n", pos - 1
|
84
|
+
if newline_pos
|
85
|
+
pos - newline_pos
|
86
|
+
else
|
87
|
+
pos + 1
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
# get line text containing pos
|
92
|
+
# the text is 80 at most
|
93
|
+
def line_text pos
|
94
|
+
from = string.rindex "\n", pos
|
95
|
+
from = from ? from + 1 : 0
|
96
|
+
from = pos - 40 if (from < pos - 40)
|
97
|
+
|
98
|
+
to = string.index("\n", pos)
|
99
|
+
to = to ? to - 1 : string.size
|
100
|
+
to = pos + 40 if (to > pos + 40)
|
101
|
+
|
102
|
+
string[from..to]
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# the invalid token
|
107
|
+
INVALID = Object.new
|
108
|
+
class << INVALID
|
109
|
+
def to_str
|
110
|
+
'INVALID_TOKEN'
|
111
|
+
end
|
112
|
+
alias :[] :==
|
113
|
+
alias inspect to_str
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
data/license.txt
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
As Ruby's
|
data/readme.rdoc
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
== Parser / Regexp Combinator for Ruby.
|
2
|
+
|
3
|
+
Easier and faster than treetop / rex+racc.
|
4
|
+
|
5
|
+
It's ruby1.9 only.
|
6
|
+
|
7
|
+
== License
|
8
|
+
|
9
|
+
As Ruby's
|
10
|
+
|
11
|
+
== Install
|
12
|
+
|
13
|
+
The pure Ruby gem is fast enough (about 10+x faster than treetop generated code):
|
14
|
+
|
15
|
+
gem in rsec
|
16
|
+
|
17
|
+
For extreme performance under C Ruby:
|
18
|
+
|
19
|
+
gem in rsec-ext
|
20
|
+
|
21
|
+
It is about 30% faster than Haskell Parsec in the benchmark.
|
22
|
+
|
23
|
+
== Doc
|
24
|
+
|
25
|
+
http://rsec.heroku.com
|
26
|
+
|
27
|
+
== Code
|
28
|
+
|
29
|
+
http://github.com/luikore/rsec/tree/master
|
30
|
+
|
data/test/helpers.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
$:.unshift "#{File.dirname(__FILE__)}/../lib"
|
4
|
+
$:.unshift "#{File.dirname(__FILE__)}/../ext"
|
5
|
+
require "rsec"
|
6
|
+
include Rsec::Helpers
|
7
|
+
require "test/unit"
|
8
|
+
|
9
|
+
TC = Test::Unit::TestCase
|
10
|
+
class TC
|
11
|
+
INVALID = Rsec::INVALID
|
12
|
+
end
|
13
|
+
|
14
|
+
module Test::Unit::Assertions
|
15
|
+
alias ase assert_equal
|
16
|
+
def asr
|
17
|
+
assert_raise(Rsec::SyntaxError) { yield }
|
18
|
+
end
|
19
|
+
# assert parse returns s
|
20
|
+
def asp s, p
|
21
|
+
assert_equal(s, p.parse(s))
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
data/test/test_branch.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require "#{File.dirname(__FILE__)}/helpers.rb"
|
2
|
+
|
3
|
+
class TestBranch < TC
|
4
|
+
def test_branch
|
5
|
+
p = 'a'.r | /\d+/ | seq('c', 'd')
|
6
|
+
ase ['c','d'], p.parse('cd')
|
7
|
+
ase '3', p.parse('3')
|
8
|
+
ase INVALID, p.parse('c')
|
9
|
+
|
10
|
+
p = 'x'.r | 'y'
|
11
|
+
ase INVALID, p.parse('')
|
12
|
+
ase 'y', p.parse('y')
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require "#{File.dirname(__FILE__)}/helpers.rb"
|
2
|
+
$:.unshift "#{File.dirname __FILE__}/../examples"
|
3
|
+
require "arithmetic"
|
4
|
+
require "s_exp"
|
5
|
+
|
6
|
+
class TestExamples < TC
|
7
|
+
def initialize *xs
|
8
|
+
super(*xs)
|
9
|
+
@a = arithmetic()
|
10
|
+
@s_exp = s_exp()
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_arithmetic
|
14
|
+
# step by step
|
15
|
+
s = '1'
|
16
|
+
ase eval(s), @a.parse(s)
|
17
|
+
s = '3+ 2'
|
18
|
+
ase eval(s), @a.parse(s)
|
19
|
+
s = '5-2*1'
|
20
|
+
ase eval(s), @a.parse(s)
|
21
|
+
s = '(2)'
|
22
|
+
ase eval(s), @a.parse(s)
|
23
|
+
s = '1+(2- (3+ 4))/5 * 2*4 +1'
|
24
|
+
ase eval(s), @a.parse(s)
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_s_exp
|
28
|
+
res = @s_exp.parse! '(a 3 4.3 (add 1 3) (minus (multi 4 5)))'
|
29
|
+
expected = ['a', 3.0, 4.3, ['add', 1, 3], ['minus', ['multi', 4, 5]]]
|
30
|
+
ase expected, res
|
31
|
+
|
32
|
+
res = @s_exp.parse! '(a (3) ce2 (add 1 3))'
|
33
|
+
expected = ['a', 3.0, 'ce2', ['add', 1, 3]]
|
34
|
+
ase expected, res
|
35
|
+
end
|
36
|
+
end
|
data/test/test_join.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require "#{File.dirname(__FILE__)}/helpers.rb"
|
2
|
+
|
3
|
+
class TestJoin < TC
|
4
|
+
def test_join
|
5
|
+
p0 = /\w{1,3}/.r.join '+'
|
6
|
+
ase ['abc'], p0.eof.parse('abc')
|
7
|
+
ase ['a','+','bc','+','d'], p0.parse('a+bc+d')
|
8
|
+
ase INVALID, p0.eof.parse('a+ bc+d')
|
9
|
+
ase INVALID, p0.eof.parse('a+b+')
|
10
|
+
|
11
|
+
p1 = seq(/[a-z]{1,3}/, '3')[0].join seq(/\s/.r, '+', /\s/)[1]
|
12
|
+
ase ['abc'], p1.eof.parse('abc3')
|
13
|
+
ase %w[a + bc + d], p1.parse('a3 + bc3 + d3')
|
14
|
+
ase INVALID, p1.eof.parse('a+b+')
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_nest_join
|
18
|
+
p = 'a'.r.join(/\s*\*\s*/.r).join(/\s*\+\s*/.r)
|
19
|
+
ase [['a'], ' + ', ['a', ' * ', 'a'], ' +', ['a']], p.parse('a + a * a +a')
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_join_with_mapping_block
|
23
|
+
p = 'a'.r.join('+'){|res| res.grep /\+/ }
|
24
|
+
ase ['+', '+'], p.parse('a+a+a')
|
25
|
+
ase [], p.parse('a')
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_join_even
|
29
|
+
p = 'a'.r.join('+').even
|
30
|
+
ase %w[a a a], p.parse('a+a+a')
|
31
|
+
ase %w[a], p.parse('a')
|
32
|
+
ase INVALID, p.eof.parse('a+')
|
33
|
+
ase INVALID, p.parse('b')
|
34
|
+
ase INVALID, p.parse('')
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_join_odd
|
38
|
+
p = 'a'.r.join('+').odd
|
39
|
+
ase %w[+ +], p.parse('a+a+a')
|
40
|
+
ase [], p.parse('a')
|
41
|
+
ase INVALID, p.parse('')
|
42
|
+
ase INVALID, p.parse('+')
|
43
|
+
ase INVALID, p.parse('b')
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_nest_join_even_odd
|
47
|
+
p = 'a'.r.join('+').odd.join('*')
|
48
|
+
ase [['+'], '*', []], p.parse('a+a*a')
|
49
|
+
p = 'a'.r.join('+').even.join('*')
|
50
|
+
ase [['a','a'], '*', ['a']], p.parse('a+a*a')
|
51
|
+
end
|
52
|
+
end
|