rsec 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- data/bench/bench.rb +60 -0
- data/bench/little.rb +328 -0
- data/bench/profile.rb +14 -0
- data/examples/arithmetic.rb +28 -0
- data/examples/bnf.rb +31 -0
- data/examples/c_minus.rb +175 -0
- data/examples/hello.scm +18 -0
- data/examples/s_exp.rb +17 -0
- data/examples/scheme.rb +84 -0
- data/examples/slow_json.rb +68 -0
- data/lib/rsec.rb +40 -0
- data/lib/rsec/helpers.rb +447 -0
- data/lib/rsec/parser.rb +64 -0
- data/lib/rsec/parsers/join.rb +86 -0
- data/lib/rsec/parsers/misc.rb +201 -0
- data/lib/rsec/parsers/prim.rb +102 -0
- data/lib/rsec/parsers/repeat.rb +90 -0
- data/lib/rsec/parsers/seq.rb +94 -0
- data/lib/rsec/utils.rb +116 -0
- data/license.txt +1 -0
- data/readme.rdoc +30 -0
- data/test/helpers.rb +24 -0
- data/test/test_branch.rb +14 -0
- data/test/test_examples.rb +36 -0
- data/test/test_join.rb +52 -0
- data/test/test_lookahead.rb +16 -0
- data/test/test_misc.rb +56 -0
- data/test/test_one_of.rb +39 -0
- data/test/test_pattern.rb +53 -0
- data/test/test_prim.rb +59 -0
- data/test/test_repeat.rb +50 -0
- data/test/test_rsec.rb +10 -0
- data/test/test_seq.rb +51 -0
- metadata +80 -0
@@ -0,0 +1,90 @@
|
|
1
|
+
module Rsec
|
2
|
+
|
3
|
+
# the content appears 1 or 0 time
|
4
|
+
class Maybe < Unary
|
5
|
+
def _parse ctx
|
6
|
+
save = ctx.pos
|
7
|
+
res = some._parse ctx
|
8
|
+
if INVALID[res]
|
9
|
+
ctx.pos = save
|
10
|
+
[]
|
11
|
+
else
|
12
|
+
[res]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# repeat from range.begin.abs to range.end.abs <br/>
|
18
|
+
# note: range's max should always be > 0<br/>
|
19
|
+
# see also helpers
|
20
|
+
class RepeatRange
|
21
|
+
include Parser
|
22
|
+
|
23
|
+
def self.[] base, range
|
24
|
+
self.new base, range
|
25
|
+
end
|
26
|
+
|
27
|
+
def initialize base, range
|
28
|
+
@base = base
|
29
|
+
@at_least = range.min.abs
|
30
|
+
@optional = range.max - @at_least
|
31
|
+
end
|
32
|
+
|
33
|
+
def _parse ctx
|
34
|
+
rp_node = []
|
35
|
+
@at_least.times do
|
36
|
+
res = @base._parse ctx
|
37
|
+
return INVALID if INVALID[res]
|
38
|
+
rp_node.push res
|
39
|
+
end
|
40
|
+
@optional.times do
|
41
|
+
save = ctx.pos
|
42
|
+
res = @base._parse ctx
|
43
|
+
if INVALID[res]
|
44
|
+
ctx.pos = save
|
45
|
+
break
|
46
|
+
end
|
47
|
+
rp_node.push res
|
48
|
+
end
|
49
|
+
rp_node
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# matches exactly n.abs times repeat<br/>
|
54
|
+
class RepeatN < Struct.new(:base, :n)
|
55
|
+
include Parser
|
56
|
+
def _parse ctx
|
57
|
+
n.times.inject([]) do |rp_node|
|
58
|
+
res = base._parse ctx
|
59
|
+
return INVALID if INVALID[res]
|
60
|
+
rp_node.push res
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# repeat at least n.abs times <- [n, inf) <br/>
|
66
|
+
class RepeatAtLeastN < Struct.new(:base, :n)
|
67
|
+
include Parser
|
68
|
+
def _parse ctx
|
69
|
+
rp_node = []
|
70
|
+
n.times do
|
71
|
+
res = base._parse(ctx)
|
72
|
+
return INVALID if INVALID[res]
|
73
|
+
rp_node.push res
|
74
|
+
end
|
75
|
+
# note this may be an infinite action
|
76
|
+
# returns if the pos didn't change
|
77
|
+
loop do
|
78
|
+
save = ctx.pos
|
79
|
+
res = base._parse ctx
|
80
|
+
if (INVALID[res] or ctx.pos == save)
|
81
|
+
ctx.pos = save
|
82
|
+
break
|
83
|
+
end
|
84
|
+
rp_node.push res
|
85
|
+
end
|
86
|
+
rp_node
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
module Rsec
|
2
|
+
|
3
|
+
# sequence combinator<br/>
|
4
|
+
# result in an array
|
5
|
+
class Seq < Unary
|
6
|
+
def _parse ctx
|
7
|
+
some.map do |e|
|
8
|
+
res = e._parse ctx
|
9
|
+
return INVALID if INVALID[res]
|
10
|
+
res
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
# sequence combinator<br/>
|
16
|
+
# the result is the result of the parser at idx
|
17
|
+
class SeqOne < Struct.new(:parsers, :idx)
|
18
|
+
include Parser
|
19
|
+
|
20
|
+
def _parse ctx
|
21
|
+
ret = INVALID
|
22
|
+
parsers.each_with_index do |p, i|
|
23
|
+
res = p._parse ctx
|
24
|
+
return INVALID if INVALID[res]
|
25
|
+
ret = res if i == idx
|
26
|
+
end
|
27
|
+
ret
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# skips skipper between tokens
|
32
|
+
class Seq_ < Struct.new(:first, :rest, :skipper)
|
33
|
+
include Parser
|
34
|
+
|
35
|
+
def _parse ctx
|
36
|
+
res = first._parse ctx
|
37
|
+
return INVALID if INVALID[res]
|
38
|
+
ret = [res]
|
39
|
+
|
40
|
+
rest.each do |e|
|
41
|
+
return INVALID if INVALID[skipper._parse ctx]
|
42
|
+
res = e._parse ctx
|
43
|
+
return INVALID if INVALID[res]
|
44
|
+
ret << res
|
45
|
+
end
|
46
|
+
ret
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# skips skipper between tokens
|
51
|
+
class SeqOne_ < Struct.new(:first, :rest, :skipper, :idx)
|
52
|
+
include Parser
|
53
|
+
|
54
|
+
def _parse ctx
|
55
|
+
ret = INVALID
|
56
|
+
|
57
|
+
res = first._parse ctx
|
58
|
+
return INVALID if INVALID[res]
|
59
|
+
ret = res if 0 == idx
|
60
|
+
|
61
|
+
check = idx - 1
|
62
|
+
rest.each_with_index do |p, i|
|
63
|
+
return INVALID if INVALID[skipper._parse ctx]
|
64
|
+
res = p._parse ctx
|
65
|
+
return INVALID if INVALID[res]
|
66
|
+
ret = res if i == check
|
67
|
+
end
|
68
|
+
ret
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
# unbox result size
|
73
|
+
# only work for seq and join and maybe'ed seq and join
|
74
|
+
class Unbox < Unary
|
75
|
+
def _parse ctx
|
76
|
+
res = some._parse ctx
|
77
|
+
return INVALID if INVALID[res]
|
78
|
+
res.size == 1 ? res.first : res
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
# inner
|
83
|
+
# only work for seq
|
84
|
+
class Inner < Unary
|
85
|
+
def _parse ctx
|
86
|
+
res = some._parse ctx
|
87
|
+
return INVALID if INVALID[res]
|
88
|
+
res.shift
|
89
|
+
res.pop
|
90
|
+
res
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
data/lib/rsec/utils.rb
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
module Rsec #:nodoc:
|
4
|
+
|
5
|
+
# error class for rescue
|
6
|
+
class SyntaxError < StandardError
|
7
|
+
attr_reader :msg, :line_text, :line, :col
|
8
|
+
|
9
|
+
# constructor
|
10
|
+
def initialize msg, line_text, line, col
|
11
|
+
@msg, @line_text, @line, @col = msg, line_text, line, col
|
12
|
+
end
|
13
|
+
|
14
|
+
# info with source position
|
15
|
+
def to_s
|
16
|
+
%Q<#@msg\n#@line_text\n#{' ' * @col}^>
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# parse context inherits from StringScanner<br/>
|
21
|
+
# <br/>
|
22
|
+
# attributes:<br/>
|
23
|
+
# <pre>
|
24
|
+
# [R] string: string to parse
|
25
|
+
# [RW] pos: current position
|
26
|
+
# [R] source: source file name
|
27
|
+
# [R] current_line_text: current line text
|
28
|
+
# [R] cache: for memoization
|
29
|
+
# </pre>
|
30
|
+
class ParseContext < StringScanner
|
31
|
+
attr_reader :source, :cache, :last_fail_pos
|
32
|
+
attr_accessor :attr_names
|
33
|
+
|
34
|
+
def initialize str, source
|
35
|
+
super(str)
|
36
|
+
@source = source
|
37
|
+
@cache = {}
|
38
|
+
@last_fail_pos = 0
|
39
|
+
@last_fail_mask = 0
|
40
|
+
end
|
41
|
+
|
42
|
+
# clear packrat parser cache
|
43
|
+
def clear_cache
|
44
|
+
@cache.clear
|
45
|
+
end
|
46
|
+
|
47
|
+
# add fail message
|
48
|
+
def on_fail mask
|
49
|
+
if pos > @last_fail_pos
|
50
|
+
@last_fail_pos = pos
|
51
|
+
@last_fail_mask = mask
|
52
|
+
elsif pos == @last_fail_pos
|
53
|
+
@last_fail_mask |= mask
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# generate parse error
|
58
|
+
def generate_error source
|
59
|
+
if self.pos <= @last_fail_pos
|
60
|
+
line = line @last_fail_pos
|
61
|
+
col = col @last_fail_pos
|
62
|
+
line_text = line_text @last_fail_pos
|
63
|
+
expect_tokens = Fail.get_tokens @last_fail_mask
|
64
|
+
expects = ", expect token [ #{expect_tokens.join ' | '} ]"
|
65
|
+
else
|
66
|
+
line = line pos
|
67
|
+
col = col pos
|
68
|
+
line_text = line_text pos
|
69
|
+
expects = nil
|
70
|
+
end
|
71
|
+
msg = "\nin #{source}:#{line} at #{col}#{expects}"
|
72
|
+
SyntaxError.new msg, line_text, line, col
|
73
|
+
end
|
74
|
+
|
75
|
+
# get line number
|
76
|
+
def line pos
|
77
|
+
string[0...pos].count("\n") + 1
|
78
|
+
end
|
79
|
+
|
80
|
+
# get column number: position in line
|
81
|
+
def col pos
|
82
|
+
return 1 if pos == 0
|
83
|
+
newline_pos = string.rindex "\n", pos - 1
|
84
|
+
if newline_pos
|
85
|
+
pos - newline_pos
|
86
|
+
else
|
87
|
+
pos + 1
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
# get line text containing pos
|
92
|
+
# the text is 80 at most
|
93
|
+
def line_text pos
|
94
|
+
from = string.rindex "\n", pos
|
95
|
+
from = from ? from + 1 : 0
|
96
|
+
from = pos - 40 if (from < pos - 40)
|
97
|
+
|
98
|
+
to = string.index("\n", pos)
|
99
|
+
to = to ? to - 1 : string.size
|
100
|
+
to = pos + 40 if (to > pos + 40)
|
101
|
+
|
102
|
+
string[from..to]
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# the invalid token
|
107
|
+
INVALID = Object.new
|
108
|
+
class << INVALID
|
109
|
+
def to_str
|
110
|
+
'INVALID_TOKEN'
|
111
|
+
end
|
112
|
+
alias :[] :==
|
113
|
+
alias inspect to_str
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
data/license.txt
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
As Ruby's
|
data/readme.rdoc
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
== Parser / Regexp Combinator for Ruby.
|
2
|
+
|
3
|
+
Easier and faster than treetop / rex+racc.
|
4
|
+
|
5
|
+
It's ruby1.9 only.
|
6
|
+
|
7
|
+
== License
|
8
|
+
|
9
|
+
As Ruby's
|
10
|
+
|
11
|
+
== Install
|
12
|
+
|
13
|
+
The pure Ruby gem is fast enough (about 10+x faster than treetop generated code):
|
14
|
+
|
15
|
+
gem in rsec
|
16
|
+
|
17
|
+
For extreme performance under C Ruby:
|
18
|
+
|
19
|
+
gem in rsec-ext
|
20
|
+
|
21
|
+
It is about 30% faster than Haskell Parsec in the benchmark.
|
22
|
+
|
23
|
+
== Doc
|
24
|
+
|
25
|
+
http://rsec.heroku.com
|
26
|
+
|
27
|
+
== Code
|
28
|
+
|
29
|
+
http://github.com/luikore/rsec/tree/master
|
30
|
+
|
data/test/helpers.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
$:.unshift "#{File.dirname(__FILE__)}/../lib"
|
4
|
+
$:.unshift "#{File.dirname(__FILE__)}/../ext"
|
5
|
+
require "rsec"
|
6
|
+
include Rsec::Helpers
|
7
|
+
require "test/unit"
|
8
|
+
|
9
|
+
TC = Test::Unit::TestCase
|
10
|
+
class TC
|
11
|
+
INVALID = Rsec::INVALID
|
12
|
+
end
|
13
|
+
|
14
|
+
module Test::Unit::Assertions
|
15
|
+
alias ase assert_equal
|
16
|
+
def asr
|
17
|
+
assert_raise(Rsec::SyntaxError) { yield }
|
18
|
+
end
|
19
|
+
# assert parse returns s
|
20
|
+
def asp s, p
|
21
|
+
assert_equal(s, p.parse(s))
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
data/test/test_branch.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require "#{File.dirname(__FILE__)}/helpers.rb"
|
2
|
+
|
3
|
+
class TestBranch < TC
|
4
|
+
def test_branch
|
5
|
+
p = 'a'.r | /\d+/ | seq('c', 'd')
|
6
|
+
ase ['c','d'], p.parse('cd')
|
7
|
+
ase '3', p.parse('3')
|
8
|
+
ase INVALID, p.parse('c')
|
9
|
+
|
10
|
+
p = 'x'.r | 'y'
|
11
|
+
ase INVALID, p.parse('')
|
12
|
+
ase 'y', p.parse('y')
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require "#{File.dirname(__FILE__)}/helpers.rb"
|
2
|
+
$:.unshift "#{File.dirname __FILE__}/../examples"
|
3
|
+
require "arithmetic"
|
4
|
+
require "s_exp"
|
5
|
+
|
6
|
+
class TestExamples < TC
|
7
|
+
def initialize *xs
|
8
|
+
super(*xs)
|
9
|
+
@a = arithmetic()
|
10
|
+
@s_exp = s_exp()
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_arithmetic
|
14
|
+
# step by step
|
15
|
+
s = '1'
|
16
|
+
ase eval(s), @a.parse(s)
|
17
|
+
s = '3+ 2'
|
18
|
+
ase eval(s), @a.parse(s)
|
19
|
+
s = '5-2*1'
|
20
|
+
ase eval(s), @a.parse(s)
|
21
|
+
s = '(2)'
|
22
|
+
ase eval(s), @a.parse(s)
|
23
|
+
s = '1+(2- (3+ 4))/5 * 2*4 +1'
|
24
|
+
ase eval(s), @a.parse(s)
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_s_exp
|
28
|
+
res = @s_exp.parse! '(a 3 4.3 (add 1 3) (minus (multi 4 5)))'
|
29
|
+
expected = ['a', 3.0, 4.3, ['add', 1, 3], ['minus', ['multi', 4, 5]]]
|
30
|
+
ase expected, res
|
31
|
+
|
32
|
+
res = @s_exp.parse! '(a (3) ce2 (add 1 3))'
|
33
|
+
expected = ['a', 3.0, 'ce2', ['add', 1, 3]]
|
34
|
+
ase expected, res
|
35
|
+
end
|
36
|
+
end
|
data/test/test_join.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require "#{File.dirname(__FILE__)}/helpers.rb"
|
2
|
+
|
3
|
+
class TestJoin < TC
|
4
|
+
def test_join
|
5
|
+
p0 = /\w{1,3}/.r.join '+'
|
6
|
+
ase ['abc'], p0.eof.parse('abc')
|
7
|
+
ase ['a','+','bc','+','d'], p0.parse('a+bc+d')
|
8
|
+
ase INVALID, p0.eof.parse('a+ bc+d')
|
9
|
+
ase INVALID, p0.eof.parse('a+b+')
|
10
|
+
|
11
|
+
p1 = seq(/[a-z]{1,3}/, '3')[0].join seq(/\s/.r, '+', /\s/)[1]
|
12
|
+
ase ['abc'], p1.eof.parse('abc3')
|
13
|
+
ase %w[a + bc + d], p1.parse('a3 + bc3 + d3')
|
14
|
+
ase INVALID, p1.eof.parse('a+b+')
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_nest_join
|
18
|
+
p = 'a'.r.join(/\s*\*\s*/.r).join(/\s*\+\s*/.r)
|
19
|
+
ase [['a'], ' + ', ['a', ' * ', 'a'], ' +', ['a']], p.parse('a + a * a +a')
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_join_with_mapping_block
|
23
|
+
p = 'a'.r.join('+'){|res| res.grep /\+/ }
|
24
|
+
ase ['+', '+'], p.parse('a+a+a')
|
25
|
+
ase [], p.parse('a')
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_join_even
|
29
|
+
p = 'a'.r.join('+').even
|
30
|
+
ase %w[a a a], p.parse('a+a+a')
|
31
|
+
ase %w[a], p.parse('a')
|
32
|
+
ase INVALID, p.eof.parse('a+')
|
33
|
+
ase INVALID, p.parse('b')
|
34
|
+
ase INVALID, p.parse('')
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_join_odd
|
38
|
+
p = 'a'.r.join('+').odd
|
39
|
+
ase %w[+ +], p.parse('a+a+a')
|
40
|
+
ase [], p.parse('a')
|
41
|
+
ase INVALID, p.parse('')
|
42
|
+
ase INVALID, p.parse('+')
|
43
|
+
ase INVALID, p.parse('b')
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_nest_join_even_odd
|
47
|
+
p = 'a'.r.join('+').odd.join('*')
|
48
|
+
ase [['+'], '*', []], p.parse('a+a*a')
|
49
|
+
p = 'a'.r.join('+').even.join('*')
|
50
|
+
ase [['a','a'], '*', ['a']], p.parse('a+a*a')
|
51
|
+
end
|
52
|
+
end
|