rsec 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- data/bench/bench.rb +60 -0
- data/bench/little.rb +328 -0
- data/bench/profile.rb +14 -0
- data/examples/arithmetic.rb +28 -0
- data/examples/bnf.rb +31 -0
- data/examples/c_minus.rb +175 -0
- data/examples/hello.scm +18 -0
- data/examples/s_exp.rb +17 -0
- data/examples/scheme.rb +84 -0
- data/examples/slow_json.rb +68 -0
- data/lib/rsec.rb +40 -0
- data/lib/rsec/helpers.rb +447 -0
- data/lib/rsec/parser.rb +64 -0
- data/lib/rsec/parsers/join.rb +86 -0
- data/lib/rsec/parsers/misc.rb +201 -0
- data/lib/rsec/parsers/prim.rb +102 -0
- data/lib/rsec/parsers/repeat.rb +90 -0
- data/lib/rsec/parsers/seq.rb +94 -0
- data/lib/rsec/utils.rb +116 -0
- data/license.txt +1 -0
- data/readme.rdoc +30 -0
- data/test/helpers.rb +24 -0
- data/test/test_branch.rb +14 -0
- data/test/test_examples.rb +36 -0
- data/test/test_join.rb +52 -0
- data/test/test_lookahead.rb +16 -0
- data/test/test_misc.rb +56 -0
- data/test/test_one_of.rb +39 -0
- data/test/test_pattern.rb +53 -0
- data/test/test_prim.rb +59 -0
- data/test/test_repeat.rb +50 -0
- data/test/test_rsec.rb +10 -0
- data/test/test_seq.rb +51 -0
- metadata +80 -0
data/bench/bench.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# Compare performance between rsec and treetop
|
2
|
+
# NOTE simple parser doesn't require much backtracking, so treetop's caching is sure to fail.
|
3
|
+
# Next step is to compare really complex parsers.
|
4
|
+
|
5
|
+
# string to be parsed
|
6
|
+
s = '(3+24/5)/10-3*4+((82321+12-3)-3*4+(82321+12-3))/5'
|
7
|
+
|
8
|
+
# rsec
|
9
|
+
$:.unshift "#{File.dirname(__FILE__)}/../lib"
|
10
|
+
$:.unshift "#{File.dirname(__FILE__)}/../ext"
|
11
|
+
require "#{File.dirname(__FILE__)}/../examples/arithmetic"
|
12
|
+
|
13
|
+
# treetop
|
14
|
+
require "treetop"
|
15
|
+
require "#{File.dirname(__FILE__)}/little.rb"
|
16
|
+
|
17
|
+
require "benchmark"
|
18
|
+
|
19
|
+
def measure &proc
|
20
|
+
puts proc[]
|
21
|
+
200.times &proc
|
22
|
+
puts((Benchmark.measure{
|
23
|
+
1000.times &proc
|
24
|
+
}), '')
|
25
|
+
end
|
26
|
+
|
27
|
+
# ------------------------------------------------------------------------------
|
28
|
+
|
29
|
+
puts ''
|
30
|
+
puts Benchmark::CAPTION
|
31
|
+
puts ''
|
32
|
+
|
33
|
+
print 'rsec result:', "\t"
|
34
|
+
p = arithmetic()
|
35
|
+
measure{ p.parse! s }
|
36
|
+
|
37
|
+
print 'treetop result:', "\t"
|
38
|
+
t = ArithmeticParser.new
|
39
|
+
measure{ t.parse(s).value }
|
40
|
+
|
41
|
+
puts 'treetop without calculation'
|
42
|
+
t = ArithmeticParser.new
|
43
|
+
measure{ t.parse s }
|
44
|
+
|
45
|
+
PARSEC_ARITH_SO = "#{File.dirname(__FILE__)}/parsec/Arithmetic.so"
|
46
|
+
if File.exist?(PARSEC_ARITH_SO)
|
47
|
+
require 'dl/import'
|
48
|
+
require 'dl/types'
|
49
|
+
module Arithmetic
|
50
|
+
extend DL::Importer
|
51
|
+
dlload PARSEC_ARITH_SO
|
52
|
+
extern "long calculate(char *)", :stdcall
|
53
|
+
extern "long donothing(char *)", :stdcall
|
54
|
+
end
|
55
|
+
print 'Haskell Parsec result:', "\t"
|
56
|
+
measure{ Arithmetic.calculate s }
|
57
|
+
else
|
58
|
+
puts 'Haskell Parsec benchmark requires ghc installation. cd bench/parsec and run make.sh(unix) or make.bat(windows)'
|
59
|
+
end
|
60
|
+
|
data/bench/little.rb
ADDED
@@ -0,0 +1,328 @@
|
|
1
|
+
# Autogenerated from a Treetop grammar. Edits may be lost.
|
2
|
+
|
3
|
+
|
4
|
+
module Arithmetic
|
5
|
+
include Treetop::Runtime
|
6
|
+
|
7
|
+
def root
|
8
|
+
@root || :expr
|
9
|
+
end
|
10
|
+
|
11
|
+
module Expr0
|
12
|
+
def op
|
13
|
+
elements[0]
|
14
|
+
end
|
15
|
+
|
16
|
+
def right
|
17
|
+
elements[1]
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
module Expr1
|
22
|
+
def left
|
23
|
+
elements[0]
|
24
|
+
end
|
25
|
+
|
26
|
+
def right_opt
|
27
|
+
elements[1]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
module Expr2
|
32
|
+
def value
|
33
|
+
right_opt.elements.inject(left.value) {|acc, plus_and_right|
|
34
|
+
acc.send \
|
35
|
+
plus_and_right.op.text_value,
|
36
|
+
plus_and_right.right.value
|
37
|
+
}
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def _nt_expr
|
42
|
+
start_index = index
|
43
|
+
if node_cache[:expr].has_key?(index)
|
44
|
+
cached = node_cache[:expr][index]
|
45
|
+
if cached
|
46
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
47
|
+
@index = cached.interval.end
|
48
|
+
end
|
49
|
+
return cached
|
50
|
+
end
|
51
|
+
|
52
|
+
i0, s0 = index, []
|
53
|
+
r1 = _nt_term
|
54
|
+
s0 << r1
|
55
|
+
if r1
|
56
|
+
s2, i2 = [], index
|
57
|
+
loop do
|
58
|
+
i3, s3 = index, []
|
59
|
+
if has_terminal?('\G[\\+\\-]', true, index)
|
60
|
+
r4 = true
|
61
|
+
@index += 1
|
62
|
+
else
|
63
|
+
r4 = nil
|
64
|
+
end
|
65
|
+
s3 << r4
|
66
|
+
if r4
|
67
|
+
r5 = _nt_term
|
68
|
+
s3 << r5
|
69
|
+
end
|
70
|
+
if s3.last
|
71
|
+
r3 = instantiate_node(SyntaxNode,input, i3...index, s3)
|
72
|
+
r3.extend(Expr0)
|
73
|
+
else
|
74
|
+
@index = i3
|
75
|
+
r3 = nil
|
76
|
+
end
|
77
|
+
if r3
|
78
|
+
s2 << r3
|
79
|
+
else
|
80
|
+
break
|
81
|
+
end
|
82
|
+
end
|
83
|
+
r2 = instantiate_node(SyntaxNode,input, i2...index, s2)
|
84
|
+
s0 << r2
|
85
|
+
end
|
86
|
+
if s0.last
|
87
|
+
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
88
|
+
r0.extend(Expr1)
|
89
|
+
r0.extend(Expr2)
|
90
|
+
else
|
91
|
+
@index = i0
|
92
|
+
r0 = nil
|
93
|
+
end
|
94
|
+
|
95
|
+
node_cache[:expr][start_index] = r0
|
96
|
+
|
97
|
+
r0
|
98
|
+
end
|
99
|
+
|
100
|
+
module Term0
|
101
|
+
def op
|
102
|
+
elements[0]
|
103
|
+
end
|
104
|
+
|
105
|
+
def right
|
106
|
+
elements[1]
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
module Term1
|
111
|
+
def left
|
112
|
+
elements[0]
|
113
|
+
end
|
114
|
+
|
115
|
+
def right_opt
|
116
|
+
elements[1]
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
module Term2
|
121
|
+
def value
|
122
|
+
right_opt.elements.inject(left.value) {|acc, mul_and_right|
|
123
|
+
acc.send \
|
124
|
+
mul_and_right.op.text_value,
|
125
|
+
mul_and_right.right.value
|
126
|
+
}
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def _nt_term
|
131
|
+
start_index = index
|
132
|
+
if node_cache[:term].has_key?(index)
|
133
|
+
cached = node_cache[:term][index]
|
134
|
+
if cached
|
135
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
136
|
+
@index = cached.interval.end
|
137
|
+
end
|
138
|
+
return cached
|
139
|
+
end
|
140
|
+
|
141
|
+
i0, s0 = index, []
|
142
|
+
r1 = _nt_factor
|
143
|
+
s0 << r1
|
144
|
+
if r1
|
145
|
+
s2, i2 = [], index
|
146
|
+
loop do
|
147
|
+
i3, s3 = index, []
|
148
|
+
if has_terminal?('\G[\\*\\/]', true, index)
|
149
|
+
r4 = true
|
150
|
+
@index += 1
|
151
|
+
else
|
152
|
+
r4 = nil
|
153
|
+
end
|
154
|
+
s3 << r4
|
155
|
+
if r4
|
156
|
+
r5 = _nt_factor
|
157
|
+
s3 << r5
|
158
|
+
end
|
159
|
+
if s3.last
|
160
|
+
r3 = instantiate_node(SyntaxNode,input, i3...index, s3)
|
161
|
+
r3.extend(Term0)
|
162
|
+
else
|
163
|
+
@index = i3
|
164
|
+
r3 = nil
|
165
|
+
end
|
166
|
+
if r3
|
167
|
+
s2 << r3
|
168
|
+
else
|
169
|
+
break
|
170
|
+
end
|
171
|
+
end
|
172
|
+
r2 = instantiate_node(SyntaxNode,input, i2...index, s2)
|
173
|
+
s0 << r2
|
174
|
+
end
|
175
|
+
if s0.last
|
176
|
+
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
177
|
+
r0.extend(Term1)
|
178
|
+
r0.extend(Term2)
|
179
|
+
else
|
180
|
+
@index = i0
|
181
|
+
r0 = nil
|
182
|
+
end
|
183
|
+
|
184
|
+
node_cache[:term][start_index] = r0
|
185
|
+
|
186
|
+
r0
|
187
|
+
end
|
188
|
+
|
189
|
+
module Factor0
|
190
|
+
def expr
|
191
|
+
elements[1]
|
192
|
+
end
|
193
|
+
|
194
|
+
end
|
195
|
+
|
196
|
+
module Factor1
|
197
|
+
def value
|
198
|
+
expr.value
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
def _nt_factor
|
203
|
+
start_index = index
|
204
|
+
if node_cache[:factor].has_key?(index)
|
205
|
+
cached = node_cache[:factor][index]
|
206
|
+
if cached
|
207
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
208
|
+
@index = cached.interval.end
|
209
|
+
end
|
210
|
+
return cached
|
211
|
+
end
|
212
|
+
|
213
|
+
i0 = index
|
214
|
+
i1, s1 = index, []
|
215
|
+
if has_terminal?('(', false, index)
|
216
|
+
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
217
|
+
@index += 1
|
218
|
+
else
|
219
|
+
terminal_parse_failure('(')
|
220
|
+
r2 = nil
|
221
|
+
end
|
222
|
+
s1 << r2
|
223
|
+
if r2
|
224
|
+
r3 = _nt_expr
|
225
|
+
s1 << r3
|
226
|
+
if r3
|
227
|
+
if has_terminal?(')', false, index)
|
228
|
+
r4 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
229
|
+
@index += 1
|
230
|
+
else
|
231
|
+
terminal_parse_failure(')')
|
232
|
+
r4 = nil
|
233
|
+
end
|
234
|
+
s1 << r4
|
235
|
+
end
|
236
|
+
end
|
237
|
+
if s1.last
|
238
|
+
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
239
|
+
r1.extend(Factor0)
|
240
|
+
r1.extend(Factor1)
|
241
|
+
else
|
242
|
+
@index = i1
|
243
|
+
r1 = nil
|
244
|
+
end
|
245
|
+
if r1
|
246
|
+
r0 = r1
|
247
|
+
else
|
248
|
+
r5 = _nt_number
|
249
|
+
if r5
|
250
|
+
r0 = r5
|
251
|
+
else
|
252
|
+
@index = i0
|
253
|
+
r0 = nil
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
node_cache[:factor][start_index] = r0
|
258
|
+
|
259
|
+
r0
|
260
|
+
end
|
261
|
+
|
262
|
+
module Number0
|
263
|
+
end
|
264
|
+
|
265
|
+
module Number1
|
266
|
+
def value
|
267
|
+
text_value.to_f
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
def _nt_number
|
272
|
+
start_index = index
|
273
|
+
if node_cache[:number].has_key?(index)
|
274
|
+
cached = node_cache[:number][index]
|
275
|
+
if cached
|
276
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
277
|
+
@index = cached.interval.end
|
278
|
+
end
|
279
|
+
return cached
|
280
|
+
end
|
281
|
+
|
282
|
+
i0, s0 = index, []
|
283
|
+
if has_terminal?('\G[1-9]', true, index)
|
284
|
+
r1 = true
|
285
|
+
@index += 1
|
286
|
+
else
|
287
|
+
r1 = nil
|
288
|
+
end
|
289
|
+
s0 << r1
|
290
|
+
if r1
|
291
|
+
s2, i2 = [], index
|
292
|
+
loop do
|
293
|
+
if has_terminal?('\G[0-9]', true, index)
|
294
|
+
r3 = true
|
295
|
+
@index += 1
|
296
|
+
else
|
297
|
+
r3 = nil
|
298
|
+
end
|
299
|
+
if r3
|
300
|
+
s2 << r3
|
301
|
+
else
|
302
|
+
break
|
303
|
+
end
|
304
|
+
end
|
305
|
+
r2 = instantiate_node(SyntaxNode,input, i2...index, s2)
|
306
|
+
s0 << r2
|
307
|
+
end
|
308
|
+
if s0.last
|
309
|
+
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
310
|
+
r0.extend(Number0)
|
311
|
+
r0.extend(Number1)
|
312
|
+
else
|
313
|
+
@index = i0
|
314
|
+
r0 = nil
|
315
|
+
end
|
316
|
+
|
317
|
+
node_cache[:number][start_index] = r0
|
318
|
+
|
319
|
+
r0
|
320
|
+
end
|
321
|
+
|
322
|
+
end
|
323
|
+
|
324
|
+
class ArithmeticParser < Treetop::Runtime::CompiledParser
|
325
|
+
include Arithmetic
|
326
|
+
end
|
327
|
+
|
328
|
+
|
data/bench/profile.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# string to be parsed
|
2
|
+
s = '(3+24/5)/10-3*4+((82321+12-3)-3*4+(82321+12-3))/5'
|
3
|
+
|
4
|
+
# rsec
|
5
|
+
$:.unshift "#{File.dirname(__FILE__)}/../lib"
|
6
|
+
$:.unshift "#{File.dirname(__FILE__)}/../ext"
|
7
|
+
require "#{File.dirname(__FILE__)}/../examples/arithmetic"
|
8
|
+
|
9
|
+
p = arithmetic
|
10
|
+
|
11
|
+
require "profile"
|
12
|
+
1000.times{
|
13
|
+
p.parse s
|
14
|
+
}
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# arithmetic parser
|
2
|
+
|
3
|
+
require "rsec"
|
4
|
+
|
5
|
+
include Rsec::Helpers
|
6
|
+
|
7
|
+
def arithmetic
|
8
|
+
calculate = proc do |(p, *ps)|
|
9
|
+
ps.each_slice(2).inject(p) do |left, (op, right)|
|
10
|
+
left.send op, right
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
num = prim(:double).fail 'number'
|
15
|
+
paren = '('.r >> lazy{expr} << ')'
|
16
|
+
factor = num | paren
|
17
|
+
term = factor.join(one_of_('*/%').fail 'operator').map &calculate
|
18
|
+
expr = term.join(one_of_('+-').fail 'operator').map &calculate
|
19
|
+
expr.eof
|
20
|
+
end
|
21
|
+
|
22
|
+
if __FILE__ == $PROGRAM_NAME
|
23
|
+
print '1+ 2*4 = '
|
24
|
+
p arithmetic.parse! '1+ 2*4' #=> 9
|
25
|
+
print '1+ 2*/4 = '
|
26
|
+
p arithmetic.parse! '1+ 2*/4' #=> syntax error
|
27
|
+
end
|
28
|
+
|
data/examples/bnf.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# BNF grammar parser
|
2
|
+
# http://en.wikipedia.org/wiki/Backus-Naur_form
|
3
|
+
|
4
|
+
require "rsec"
|
5
|
+
|
6
|
+
include Rsec::Helpers
|
7
|
+
|
8
|
+
def bnf
|
9
|
+
nbsp = /[\ \t]*/.r
|
10
|
+
spacee = /\s*/.r # include \n
|
11
|
+
literal = /".*?"|'.*?'/.r
|
12
|
+
rule_name = /\<.*?\>/
|
13
|
+
term = literal | rule_name
|
14
|
+
list = term.join(nbsp).even
|
15
|
+
expr = list.join seq(nbsp, '|', nbsp)[1]
|
16
|
+
rule = seq_ rule_name, '::=', expr
|
17
|
+
spacee.join(rule).eof
|
18
|
+
end
|
19
|
+
|
20
|
+
require "pp"
|
21
|
+
pp bnf.parse! DATA.read
|
22
|
+
|
23
|
+
__END__
|
24
|
+
<syntax> ::= <rule> | <rule> <syntax>
|
25
|
+
<rule> ::= <opt-whitespace> "<" <rule-name> ">" <opt-whitespace> "::=" <opt-whitespace> <expression> <line-end>
|
26
|
+
<opt-whitespace> ::= " " <opt-whitespace> | ""
|
27
|
+
<expression> ::= <list> | <list> "|" <expression>
|
28
|
+
<line-end> ::= <opt-whitespace> <EOL> | <line-end> <line-end>
|
29
|
+
<list> ::= <term> | <term> <opt-whitespace> <list>
|
30
|
+
<term> ::= <literal> | "<" <rule-name> ">"
|
31
|
+
<literal> ::= '"' <text> '"' | "'" <text> "'"
|