rsec 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- data/bench/bench.rb +60 -0
- data/bench/little.rb +328 -0
- data/bench/profile.rb +14 -0
- data/examples/arithmetic.rb +28 -0
- data/examples/bnf.rb +31 -0
- data/examples/c_minus.rb +175 -0
- data/examples/hello.scm +18 -0
- data/examples/s_exp.rb +17 -0
- data/examples/scheme.rb +84 -0
- data/examples/slow_json.rb +68 -0
- data/lib/rsec.rb +40 -0
- data/lib/rsec/helpers.rb +447 -0
- data/lib/rsec/parser.rb +64 -0
- data/lib/rsec/parsers/join.rb +86 -0
- data/lib/rsec/parsers/misc.rb +201 -0
- data/lib/rsec/parsers/prim.rb +102 -0
- data/lib/rsec/parsers/repeat.rb +90 -0
- data/lib/rsec/parsers/seq.rb +94 -0
- data/lib/rsec/utils.rb +116 -0
- data/license.txt +1 -0
- data/readme.rdoc +30 -0
- data/test/helpers.rb +24 -0
- data/test/test_branch.rb +14 -0
- data/test/test_examples.rb +36 -0
- data/test/test_join.rb +52 -0
- data/test/test_lookahead.rb +16 -0
- data/test/test_misc.rb +56 -0
- data/test/test_one_of.rb +39 -0
- data/test/test_pattern.rb +53 -0
- data/test/test_prim.rb +59 -0
- data/test/test_repeat.rb +50 -0
- data/test/test_rsec.rb +10 -0
- data/test/test_seq.rb +51 -0
- metadata +80 -0
data/lib/rsec/parser.rb
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
module Rsec #:nodoc:
|
4
|
+
# parser base
|
5
|
+
module Parser
|
6
|
+
# parses string<br/>
|
7
|
+
# returns nil if unparsed
|
8
|
+
def parse str, source_name='source'
|
9
|
+
ctx = ParseContext.new str, source_name
|
10
|
+
_parse ctx
|
11
|
+
end
|
12
|
+
|
13
|
+
# almost the same as parse<br/>
|
14
|
+
# but raises SyntaxError
|
15
|
+
def parse! str, source_name='source'
|
16
|
+
ctx = ParseContext.new str, source_name
|
17
|
+
ret = _parse ctx
|
18
|
+
if INVALID[ret]
|
19
|
+
raise ctx.generate_error source_name
|
20
|
+
end
|
21
|
+
ret
|
22
|
+
end
|
23
|
+
|
24
|
+
attr_accessor :name
|
25
|
+
def inspect
|
26
|
+
# TODO move
|
27
|
+
@name ||= self.class.to_s[/\w+$/]
|
28
|
+
case self
|
29
|
+
when Lazy
|
30
|
+
"<#{name}>"
|
31
|
+
when Binary
|
32
|
+
"<#{name} #{left.inspect} #{right.inspect}>"
|
33
|
+
when Seq, Seq_, Branch
|
34
|
+
# don't use redefined map!
|
35
|
+
res = []
|
36
|
+
each{|e| res << e.inspect}
|
37
|
+
"<#{name} #{res.join ' '}>"
|
38
|
+
when Unary
|
39
|
+
"<#{name} #{some.inspect}>"
|
40
|
+
else
|
41
|
+
"<#{name}>"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# parsers with 2 params base
|
47
|
+
Binary = Struct.new :left, :right
|
48
|
+
class Binary
|
49
|
+
include Parser
|
50
|
+
end
|
51
|
+
|
52
|
+
# parsers with 1 param base
|
53
|
+
Unary = Struct.new :some
|
54
|
+
class Unary
|
55
|
+
include Parser
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
require "rsec/parsers/misc"
|
60
|
+
require "rsec/parsers/seq"
|
61
|
+
require "rsec/parsers/prim"
|
62
|
+
require "rsec/parsers/join"
|
63
|
+
require "rsec/parsers/repeat"
|
64
|
+
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module Rsec
|
2
|
+
|
3
|
+
# Join base
|
4
|
+
class Join < Binary
|
5
|
+
def _parse ctx
|
6
|
+
e = left._parse ctx
|
7
|
+
return INVALID if INVALID[e]
|
8
|
+
ret = [e]
|
9
|
+
loop do
|
10
|
+
save_point = ctx.pos
|
11
|
+
i = right._parse ctx
|
12
|
+
if INVALID[i]
|
13
|
+
ctx.pos = save_point
|
14
|
+
break
|
15
|
+
end
|
16
|
+
|
17
|
+
t = left._parse ctx
|
18
|
+
if INVALID[t]
|
19
|
+
ctx.pos = save_point
|
20
|
+
break
|
21
|
+
end
|
22
|
+
|
23
|
+
break if save_point == ctx.pos # stop if no advance, prevent infinite loop
|
24
|
+
ret << i
|
25
|
+
ret << t
|
26
|
+
end # loop
|
27
|
+
ret
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# keep only tokens
|
32
|
+
class JoinEven < Binary
|
33
|
+
def _parse ctx
|
34
|
+
e = left._parse ctx
|
35
|
+
return INVALID if INVALID[e]
|
36
|
+
ret = [e]
|
37
|
+
loop do
|
38
|
+
save_point = ctx.pos
|
39
|
+
i = right._parse ctx
|
40
|
+
if INVALID[i]
|
41
|
+
ctx.pos = save_point
|
42
|
+
break
|
43
|
+
end
|
44
|
+
|
45
|
+
t = left._parse ctx
|
46
|
+
if INVALID[t]
|
47
|
+
ctx.pos = save_point
|
48
|
+
break
|
49
|
+
end
|
50
|
+
|
51
|
+
break if save_point == ctx.pos # stop if no advance, prevent infinite loop
|
52
|
+
ret << t
|
53
|
+
end # loop
|
54
|
+
ret
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# keep only inters
|
59
|
+
# NOTE if only 1 token matches, return empty array
|
60
|
+
class JoinOdd < Binary
|
61
|
+
def _parse ctx
|
62
|
+
e = left._parse ctx
|
63
|
+
return INVALID if INVALID[e]
|
64
|
+
ret = []
|
65
|
+
loop do
|
66
|
+
save_point = ctx.pos
|
67
|
+
i = right._parse ctx
|
68
|
+
if INVALID[i]
|
69
|
+
ctx.pos = save_point
|
70
|
+
break
|
71
|
+
end
|
72
|
+
|
73
|
+
t = left._parse ctx
|
74
|
+
if INVALID[t]
|
75
|
+
ctx.pos = save_point
|
76
|
+
break
|
77
|
+
end
|
78
|
+
|
79
|
+
break if save_point == ctx.pos # stop if no advance, prevent infinite loop
|
80
|
+
ret << i
|
81
|
+
end # loop
|
82
|
+
ret
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
@@ -0,0 +1,201 @@
|
|
1
|
+
module Rsec #:nodoc
|
2
|
+
|
3
|
+
# transform parse result
|
4
|
+
class Map < Binary
|
5
|
+
def _parse ctx
|
6
|
+
res = left()._parse ctx
|
7
|
+
return INVALID if INVALID[res]
|
8
|
+
right()[res]
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
# set expect tokens for parsing error in ctx<br/>
|
13
|
+
# if left failed, the error would be registered
|
14
|
+
class Fail < Binary
|
15
|
+
def Fail.[] left, tokens
|
16
|
+
# TODO mutex
|
17
|
+
@mask_bit ||= 0
|
18
|
+
@token_table ||= []
|
19
|
+
if @mask_bit > 1000
|
20
|
+
raise "You've created too many fail parsers, If it is your intention, call Rsec::Fail.reset when previous expect settings can be thrown away."
|
21
|
+
end
|
22
|
+
parser = super(left, (1<<@mask_bit))
|
23
|
+
@token_table[@mask_bit] = tokens
|
24
|
+
@mask_bit += 1
|
25
|
+
parser
|
26
|
+
end
|
27
|
+
|
28
|
+
def Fail.reset
|
29
|
+
@mask_bit = 0
|
30
|
+
@token_table = []
|
31
|
+
end
|
32
|
+
|
33
|
+
def Fail.get_tokens mask
|
34
|
+
res = []
|
35
|
+
@token_table.each_with_index do |tokens, idx|
|
36
|
+
next unless tokens
|
37
|
+
if (mask & (1<<idx)) > 0
|
38
|
+
res += tokens
|
39
|
+
end
|
40
|
+
end
|
41
|
+
res.uniq!
|
42
|
+
res
|
43
|
+
end
|
44
|
+
|
45
|
+
def _parse ctx
|
46
|
+
res = left()._parse ctx
|
47
|
+
ctx.on_fail right if INVALID[res]
|
48
|
+
res
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# look ahead
|
53
|
+
class LookAhead < Binary
|
54
|
+
def _parse ctx
|
55
|
+
res = left()._parse ctx
|
56
|
+
pos = ctx.pos
|
57
|
+
return INVALID if INVALID[right()._parse ctx]
|
58
|
+
ctx.pos = pos
|
59
|
+
res
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# negative look ahead
|
64
|
+
class NegativeLookAhead < Binary
|
65
|
+
def _parse ctx
|
66
|
+
res = left()._parse ctx
|
67
|
+
pos = ctx.pos
|
68
|
+
if INVALID[right()._parse ctx]
|
69
|
+
ctx.pos = pos
|
70
|
+
res
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# branch combinator<br/>
|
76
|
+
# result in one of the members, or INVALID
|
77
|
+
class Branch < Unary
|
78
|
+
def _parse ctx
|
79
|
+
save_point = ctx.pos
|
80
|
+
some.each do |e|
|
81
|
+
res = e._parse ctx
|
82
|
+
return res unless INVALID[res]
|
83
|
+
ctx.pos = save_point
|
84
|
+
end
|
85
|
+
INVALID
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# matches a pattern
|
90
|
+
class Pattern < Unary
|
91
|
+
def _parse ctx
|
92
|
+
ctx.scan some() or INVALID
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# scan until the pattern<br/>
|
97
|
+
# for optimizing
|
98
|
+
class UntilPattern < Unary
|
99
|
+
def _parse ctx
|
100
|
+
ctx.scan_until some() or INVALID
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# for optimization, not disposed to users
|
105
|
+
class SkipPattern < Unary
|
106
|
+
def _parse ctx
|
107
|
+
ctx.skip some() or INVALID
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# for optimization, not disposed to users
|
112
|
+
class SkipUntilPattern < Unary
|
113
|
+
def _parse ctx
|
114
|
+
ctx.skip_until some() or INVALID
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# should be end-of-file after parsing
|
119
|
+
class Eof < Unary
|
120
|
+
def _parse ctx
|
121
|
+
ret = some()._parse ctx
|
122
|
+
ctx.eos? ? ret : INVALID
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# one of char in string
|
127
|
+
class OneOf < Unary
|
128
|
+
def _parse ctx
|
129
|
+
return INVALID if ctx.eos?
|
130
|
+
chr = ctx.getch
|
131
|
+
if some().index(chr)
|
132
|
+
chr
|
133
|
+
else
|
134
|
+
ctx.pos = ctx.pos - 1
|
135
|
+
INVALID
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# one of char in string
|
141
|
+
class OneOf_ < Unary
|
142
|
+
def _parse ctx
|
143
|
+
ctx.skip /\s*/
|
144
|
+
return INVALID if ctx.eos?
|
145
|
+
chr = ctx.getch
|
146
|
+
unless some().index(chr)
|
147
|
+
return INVALID
|
148
|
+
end
|
149
|
+
ctx.skip /\s*/
|
150
|
+
chr
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
# sometimes a variable is not defined yet<br/>
|
155
|
+
# lazy is used to capture it later
|
156
|
+
# NOTE the value is captured the first time it is called
|
157
|
+
class Lazy < Unary
|
158
|
+
def _parse ctx
|
159
|
+
@some ||= \
|
160
|
+
begin
|
161
|
+
some()[]
|
162
|
+
rescue NameError => ex
|
163
|
+
some().binding.eval ex.name.to_s
|
164
|
+
end
|
165
|
+
@some._parse ctx
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
# parse result is cached in ctx.
|
170
|
+
# may improve performance
|
171
|
+
class Cached
|
172
|
+
include Parser
|
173
|
+
|
174
|
+
def self.[] parser
|
175
|
+
self.new parser
|
176
|
+
end
|
177
|
+
|
178
|
+
def initialize parser
|
179
|
+
@parser = parser
|
180
|
+
@salt = object_id() << 32
|
181
|
+
end
|
182
|
+
|
183
|
+
def _parse ctx
|
184
|
+
key = ctx.pos | @salt
|
185
|
+
cache = ctx.cache
|
186
|
+
# result maybe nil, so don't use ||=
|
187
|
+
if cache.has_key? key
|
188
|
+
ret, pos = cache[key]
|
189
|
+
ctx.pos = pos
|
190
|
+
ret
|
191
|
+
else
|
192
|
+
ret = @parser._parse ctx
|
193
|
+
pos = ctx.pos
|
194
|
+
cache[key] = [ret, pos]
|
195
|
+
ret
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
end
|
201
|
+
|
@@ -0,0 +1,102 @@
|
|
1
|
+
module Rsec
|
2
|
+
|
3
|
+
# primitive base
|
4
|
+
module Prim
|
5
|
+
def sign_strategy_to_pattern sign_strategy
|
6
|
+
case sign_strategy
|
7
|
+
when 3; '[\+\-]?'
|
8
|
+
when 2; '\+?'
|
9
|
+
when 1; '\-?'
|
10
|
+
when 0; ''
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
# double precision float parser
|
16
|
+
class PDouble < Binary
|
17
|
+
include Prim
|
18
|
+
|
19
|
+
def float_pattern sign_strategy, is_hex
|
20
|
+
sign = sign_strategy_to_pattern sign_strategy
|
21
|
+
if is_hex
|
22
|
+
/#{sign}0x[\da-f]+(\.[\da-f]+)?/i
|
23
|
+
else
|
24
|
+
/#{sign}\d+(\.\d+)?(e[\+\-]?\d+)?/i
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def initialize sign_strategy, is_hex
|
29
|
+
self.left = float_pattern sign_strategy, is_hex
|
30
|
+
end
|
31
|
+
|
32
|
+
def _parse ctx
|
33
|
+
if (d = ctx.scan left)
|
34
|
+
d = Float(d)
|
35
|
+
return d if d.finite?
|
36
|
+
end
|
37
|
+
INVALID
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# primitive int parser commons
|
42
|
+
class PInt < Binary
|
43
|
+
include Prim
|
44
|
+
|
45
|
+
def int_pattern sign_strategy, base
|
46
|
+
sign = sign_strategy_to_pattern sign_strategy
|
47
|
+
if base > 10
|
48
|
+
d_hi = 9
|
49
|
+
char_range = "a-#{('a'.ord + base - 11).chr}"
|
50
|
+
else
|
51
|
+
d_hi = base - 1
|
52
|
+
char_range = ''
|
53
|
+
end
|
54
|
+
/#{sign}[0-#{d_hi}#{char_range}]+/i
|
55
|
+
end
|
56
|
+
|
57
|
+
def _parse ctx
|
58
|
+
if (d = ctx.scan left)
|
59
|
+
d = d.to_i @base
|
60
|
+
return d if right.include?(d)
|
61
|
+
end
|
62
|
+
INVALID
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# 32-bit int parser
|
67
|
+
class PInt32 < PInt
|
68
|
+
def initialize sign_strategy, base
|
69
|
+
@base = base
|
70
|
+
self.left = int_pattern sign_strategy, base
|
71
|
+
self.right = (-(1<<31))..((1<<31)-1)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# unsigned 32 bit int parser
|
76
|
+
class PUnsignedInt32 < PInt
|
77
|
+
def initialize sign_strategy, base
|
78
|
+
@base = base
|
79
|
+
self.left = int_pattern sign_strategy, base
|
80
|
+
self.right = 0...(1<<32)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# 64-bit int parser
|
85
|
+
class PInt64 < PInt
|
86
|
+
def initialize sign_strategy, base
|
87
|
+
@base = base
|
88
|
+
self.left = int_pattern sign_strategy, base
|
89
|
+
self.right = (-(1<<63))..((1<<63)-1)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# unsigned 64-bit int parser
|
94
|
+
class PUnsignedInt64 < PInt
|
95
|
+
def initialize sign_strategy, base
|
96
|
+
@base = base
|
97
|
+
self.left = int_pattern sign_strategy, base
|
98
|
+
self.right = 0...(1<<64)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|