rsec 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bench/bench.rb +60 -0
- data/bench/little.rb +328 -0
- data/bench/profile.rb +14 -0
- data/examples/arithmetic.rb +28 -0
- data/examples/bnf.rb +31 -0
- data/examples/c_minus.rb +175 -0
- data/examples/hello.scm +18 -0
- data/examples/s_exp.rb +17 -0
- data/examples/scheme.rb +84 -0
- data/examples/slow_json.rb +68 -0
- data/lib/rsec.rb +40 -0
- data/lib/rsec/helpers.rb +447 -0
- data/lib/rsec/parser.rb +64 -0
- data/lib/rsec/parsers/join.rb +86 -0
- data/lib/rsec/parsers/misc.rb +201 -0
- data/lib/rsec/parsers/prim.rb +102 -0
- data/lib/rsec/parsers/repeat.rb +90 -0
- data/lib/rsec/parsers/seq.rb +94 -0
- data/lib/rsec/utils.rb +116 -0
- data/license.txt +1 -0
- data/readme.rdoc +30 -0
- data/test/helpers.rb +24 -0
- data/test/test_branch.rb +14 -0
- data/test/test_examples.rb +36 -0
- data/test/test_join.rb +52 -0
- data/test/test_lookahead.rb +16 -0
- data/test/test_misc.rb +56 -0
- data/test/test_one_of.rb +39 -0
- data/test/test_pattern.rb +53 -0
- data/test/test_prim.rb +59 -0
- data/test/test_repeat.rb +50 -0
- data/test/test_rsec.rb +10 -0
- data/test/test_seq.rb +51 -0
- metadata +80 -0
data/lib/rsec/parser.rb
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
module Rsec #:nodoc:
|
4
|
+
# parser base
|
5
|
+
module Parser
|
6
|
+
# parses string<br/>
|
7
|
+
# returns nil if unparsed
|
8
|
+
def parse str, source_name='source'
|
9
|
+
ctx = ParseContext.new str, source_name
|
10
|
+
_parse ctx
|
11
|
+
end
|
12
|
+
|
13
|
+
# almost the same as parse<br/>
|
14
|
+
# but raises SyntaxError
|
15
|
+
def parse! str, source_name='source'
|
16
|
+
ctx = ParseContext.new str, source_name
|
17
|
+
ret = _parse ctx
|
18
|
+
if INVALID[ret]
|
19
|
+
raise ctx.generate_error source_name
|
20
|
+
end
|
21
|
+
ret
|
22
|
+
end
|
23
|
+
|
24
|
+
attr_accessor :name
|
25
|
+
def inspect
|
26
|
+
# TODO move
|
27
|
+
@name ||= self.class.to_s[/\w+$/]
|
28
|
+
case self
|
29
|
+
when Lazy
|
30
|
+
"<#{name}>"
|
31
|
+
when Binary
|
32
|
+
"<#{name} #{left.inspect} #{right.inspect}>"
|
33
|
+
when Seq, Seq_, Branch
|
34
|
+
# don't use redefined map!
|
35
|
+
res = []
|
36
|
+
each{|e| res << e.inspect}
|
37
|
+
"<#{name} #{res.join ' '}>"
|
38
|
+
when Unary
|
39
|
+
"<#{name} #{some.inspect}>"
|
40
|
+
else
|
41
|
+
"<#{name}>"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# parsers with 2 params base
|
47
|
+
Binary = Struct.new :left, :right
|
48
|
+
class Binary
|
49
|
+
include Parser
|
50
|
+
end
|
51
|
+
|
52
|
+
# parsers with 1 param base
|
53
|
+
Unary = Struct.new :some
|
54
|
+
class Unary
|
55
|
+
include Parser
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
require "rsec/parsers/misc"
|
60
|
+
require "rsec/parsers/seq"
|
61
|
+
require "rsec/parsers/prim"
|
62
|
+
require "rsec/parsers/join"
|
63
|
+
require "rsec/parsers/repeat"
|
64
|
+
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module Rsec
|
2
|
+
|
3
|
+
# Join base
|
4
|
+
class Join < Binary
|
5
|
+
def _parse ctx
|
6
|
+
e = left._parse ctx
|
7
|
+
return INVALID if INVALID[e]
|
8
|
+
ret = [e]
|
9
|
+
loop do
|
10
|
+
save_point = ctx.pos
|
11
|
+
i = right._parse ctx
|
12
|
+
if INVALID[i]
|
13
|
+
ctx.pos = save_point
|
14
|
+
break
|
15
|
+
end
|
16
|
+
|
17
|
+
t = left._parse ctx
|
18
|
+
if INVALID[t]
|
19
|
+
ctx.pos = save_point
|
20
|
+
break
|
21
|
+
end
|
22
|
+
|
23
|
+
break if save_point == ctx.pos # stop if no advance, prevent infinite loop
|
24
|
+
ret << i
|
25
|
+
ret << t
|
26
|
+
end # loop
|
27
|
+
ret
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# keep only tokens
|
32
|
+
class JoinEven < Binary
|
33
|
+
def _parse ctx
|
34
|
+
e = left._parse ctx
|
35
|
+
return INVALID if INVALID[e]
|
36
|
+
ret = [e]
|
37
|
+
loop do
|
38
|
+
save_point = ctx.pos
|
39
|
+
i = right._parse ctx
|
40
|
+
if INVALID[i]
|
41
|
+
ctx.pos = save_point
|
42
|
+
break
|
43
|
+
end
|
44
|
+
|
45
|
+
t = left._parse ctx
|
46
|
+
if INVALID[t]
|
47
|
+
ctx.pos = save_point
|
48
|
+
break
|
49
|
+
end
|
50
|
+
|
51
|
+
break if save_point == ctx.pos # stop if no advance, prevent infinite loop
|
52
|
+
ret << t
|
53
|
+
end # loop
|
54
|
+
ret
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# keep only inters
|
59
|
+
# NOTE if only 1 token matches, return empty array
|
60
|
+
class JoinOdd < Binary
|
61
|
+
def _parse ctx
|
62
|
+
e = left._parse ctx
|
63
|
+
return INVALID if INVALID[e]
|
64
|
+
ret = []
|
65
|
+
loop do
|
66
|
+
save_point = ctx.pos
|
67
|
+
i = right._parse ctx
|
68
|
+
if INVALID[i]
|
69
|
+
ctx.pos = save_point
|
70
|
+
break
|
71
|
+
end
|
72
|
+
|
73
|
+
t = left._parse ctx
|
74
|
+
if INVALID[t]
|
75
|
+
ctx.pos = save_point
|
76
|
+
break
|
77
|
+
end
|
78
|
+
|
79
|
+
break if save_point == ctx.pos # stop if no advance, prevent infinite loop
|
80
|
+
ret << i
|
81
|
+
end # loop
|
82
|
+
ret
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
@@ -0,0 +1,201 @@
|
|
1
|
+
module Rsec #:nodoc
|
2
|
+
|
3
|
+
# transform parse result
|
4
|
+
class Map < Binary
|
5
|
+
def _parse ctx
|
6
|
+
res = left()._parse ctx
|
7
|
+
return INVALID if INVALID[res]
|
8
|
+
right()[res]
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
# set expect tokens for parsing error in ctx<br/>
|
13
|
+
# if left failed, the error would be registered
|
14
|
+
class Fail < Binary
|
15
|
+
def Fail.[] left, tokens
|
16
|
+
# TODO mutex
|
17
|
+
@mask_bit ||= 0
|
18
|
+
@token_table ||= []
|
19
|
+
if @mask_bit > 1000
|
20
|
+
raise "You've created too many fail parsers, If it is your intention, call Rsec::Fail.reset when previous expect settings can be thrown away."
|
21
|
+
end
|
22
|
+
parser = super(left, (1<<@mask_bit))
|
23
|
+
@token_table[@mask_bit] = tokens
|
24
|
+
@mask_bit += 1
|
25
|
+
parser
|
26
|
+
end
|
27
|
+
|
28
|
+
def Fail.reset
|
29
|
+
@mask_bit = 0
|
30
|
+
@token_table = []
|
31
|
+
end
|
32
|
+
|
33
|
+
def Fail.get_tokens mask
|
34
|
+
res = []
|
35
|
+
@token_table.each_with_index do |tokens, idx|
|
36
|
+
next unless tokens
|
37
|
+
if (mask & (1<<idx)) > 0
|
38
|
+
res += tokens
|
39
|
+
end
|
40
|
+
end
|
41
|
+
res.uniq!
|
42
|
+
res
|
43
|
+
end
|
44
|
+
|
45
|
+
def _parse ctx
|
46
|
+
res = left()._parse ctx
|
47
|
+
ctx.on_fail right if INVALID[res]
|
48
|
+
res
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# look ahead
|
53
|
+
class LookAhead < Binary
|
54
|
+
def _parse ctx
|
55
|
+
res = left()._parse ctx
|
56
|
+
pos = ctx.pos
|
57
|
+
return INVALID if INVALID[right()._parse ctx]
|
58
|
+
ctx.pos = pos
|
59
|
+
res
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# negative look ahead
|
64
|
+
class NegativeLookAhead < Binary
|
65
|
+
def _parse ctx
|
66
|
+
res = left()._parse ctx
|
67
|
+
pos = ctx.pos
|
68
|
+
if INVALID[right()._parse ctx]
|
69
|
+
ctx.pos = pos
|
70
|
+
res
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# branch combinator<br/>
|
76
|
+
# result in one of the members, or INVALID
|
77
|
+
class Branch < Unary
|
78
|
+
def _parse ctx
|
79
|
+
save_point = ctx.pos
|
80
|
+
some.each do |e|
|
81
|
+
res = e._parse ctx
|
82
|
+
return res unless INVALID[res]
|
83
|
+
ctx.pos = save_point
|
84
|
+
end
|
85
|
+
INVALID
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# matches a pattern
|
90
|
+
class Pattern < Unary
|
91
|
+
def _parse ctx
|
92
|
+
ctx.scan some() or INVALID
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# scan until the pattern<br/>
|
97
|
+
# for optimizing
|
98
|
+
class UntilPattern < Unary
|
99
|
+
def _parse ctx
|
100
|
+
ctx.scan_until some() or INVALID
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# for optimization, not disposed to users
|
105
|
+
class SkipPattern < Unary
|
106
|
+
def _parse ctx
|
107
|
+
ctx.skip some() or INVALID
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# for optimization, not disposed to users
|
112
|
+
class SkipUntilPattern < Unary
|
113
|
+
def _parse ctx
|
114
|
+
ctx.skip_until some() or INVALID
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# should be end-of-file after parsing
|
119
|
+
class Eof < Unary
|
120
|
+
def _parse ctx
|
121
|
+
ret = some()._parse ctx
|
122
|
+
ctx.eos? ? ret : INVALID
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# one of char in string
|
127
|
+
class OneOf < Unary
|
128
|
+
def _parse ctx
|
129
|
+
return INVALID if ctx.eos?
|
130
|
+
chr = ctx.getch
|
131
|
+
if some().index(chr)
|
132
|
+
chr
|
133
|
+
else
|
134
|
+
ctx.pos = ctx.pos - 1
|
135
|
+
INVALID
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# one of char in string
|
141
|
+
class OneOf_ < Unary
|
142
|
+
def _parse ctx
|
143
|
+
ctx.skip /\s*/
|
144
|
+
return INVALID if ctx.eos?
|
145
|
+
chr = ctx.getch
|
146
|
+
unless some().index(chr)
|
147
|
+
return INVALID
|
148
|
+
end
|
149
|
+
ctx.skip /\s*/
|
150
|
+
chr
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
# sometimes a variable is not defined yet<br/>
|
155
|
+
# lazy is used to capture it later
|
156
|
+
# NOTE the value is captured the first time it is called
|
157
|
+
class Lazy < Unary
|
158
|
+
def _parse ctx
|
159
|
+
@some ||= \
|
160
|
+
begin
|
161
|
+
some()[]
|
162
|
+
rescue NameError => ex
|
163
|
+
some().binding.eval ex.name.to_s
|
164
|
+
end
|
165
|
+
@some._parse ctx
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
# parse result is cached in ctx.
|
170
|
+
# may improve performance
|
171
|
+
class Cached
|
172
|
+
include Parser
|
173
|
+
|
174
|
+
def self.[] parser
|
175
|
+
self.new parser
|
176
|
+
end
|
177
|
+
|
178
|
+
def initialize parser
|
179
|
+
@parser = parser
|
180
|
+
@salt = object_id() << 32
|
181
|
+
end
|
182
|
+
|
183
|
+
def _parse ctx
|
184
|
+
key = ctx.pos | @salt
|
185
|
+
cache = ctx.cache
|
186
|
+
# result maybe nil, so don't use ||=
|
187
|
+
if cache.has_key? key
|
188
|
+
ret, pos = cache[key]
|
189
|
+
ctx.pos = pos
|
190
|
+
ret
|
191
|
+
else
|
192
|
+
ret = @parser._parse ctx
|
193
|
+
pos = ctx.pos
|
194
|
+
cache[key] = [ret, pos]
|
195
|
+
ret
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
end
|
201
|
+
|
@@ -0,0 +1,102 @@
|
|
1
|
+
module Rsec
|
2
|
+
|
3
|
+
# primitive base
|
4
|
+
module Prim
|
5
|
+
def sign_strategy_to_pattern sign_strategy
|
6
|
+
case sign_strategy
|
7
|
+
when 3; '[\+\-]?'
|
8
|
+
when 2; '\+?'
|
9
|
+
when 1; '\-?'
|
10
|
+
when 0; ''
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
# double precision float parser
|
16
|
+
class PDouble < Binary
|
17
|
+
include Prim
|
18
|
+
|
19
|
+
def float_pattern sign_strategy, is_hex
|
20
|
+
sign = sign_strategy_to_pattern sign_strategy
|
21
|
+
if is_hex
|
22
|
+
/#{sign}0x[\da-f]+(\.[\da-f]+)?/i
|
23
|
+
else
|
24
|
+
/#{sign}\d+(\.\d+)?(e[\+\-]?\d+)?/i
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def initialize sign_strategy, is_hex
|
29
|
+
self.left = float_pattern sign_strategy, is_hex
|
30
|
+
end
|
31
|
+
|
32
|
+
def _parse ctx
|
33
|
+
if (d = ctx.scan left)
|
34
|
+
d = Float(d)
|
35
|
+
return d if d.finite?
|
36
|
+
end
|
37
|
+
INVALID
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# primitive int parser commons
|
42
|
+
class PInt < Binary
|
43
|
+
include Prim
|
44
|
+
|
45
|
+
def int_pattern sign_strategy, base
|
46
|
+
sign = sign_strategy_to_pattern sign_strategy
|
47
|
+
if base > 10
|
48
|
+
d_hi = 9
|
49
|
+
char_range = "a-#{('a'.ord + base - 11).chr}"
|
50
|
+
else
|
51
|
+
d_hi = base - 1
|
52
|
+
char_range = ''
|
53
|
+
end
|
54
|
+
/#{sign}[0-#{d_hi}#{char_range}]+/i
|
55
|
+
end
|
56
|
+
|
57
|
+
def _parse ctx
|
58
|
+
if (d = ctx.scan left)
|
59
|
+
d = d.to_i @base
|
60
|
+
return d if right.include?(d)
|
61
|
+
end
|
62
|
+
INVALID
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# 32-bit int parser
|
67
|
+
class PInt32 < PInt
|
68
|
+
def initialize sign_strategy, base
|
69
|
+
@base = base
|
70
|
+
self.left = int_pattern sign_strategy, base
|
71
|
+
self.right = (-(1<<31))..((1<<31)-1)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# unsigned 32 bit int parser
|
76
|
+
class PUnsignedInt32 < PInt
|
77
|
+
def initialize sign_strategy, base
|
78
|
+
@base = base
|
79
|
+
self.left = int_pattern sign_strategy, base
|
80
|
+
self.right = 0...(1<<32)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# 64-bit int parser
|
85
|
+
class PInt64 < PInt
|
86
|
+
def initialize sign_strategy, base
|
87
|
+
@base = base
|
88
|
+
self.left = int_pattern sign_strategy, base
|
89
|
+
self.right = (-(1<<63))..((1<<63)-1)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# unsigned 64-bit int parser
|
94
|
+
class PUnsignedInt64 < PInt
|
95
|
+
def initialize sign_strategy, base
|
96
|
+
@base = base
|
97
|
+
self.left = int_pattern sign_strategy, base
|
98
|
+
self.right = 0...(1<<64)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|