rsec 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bench/bench.rb +60 -0
- data/bench/little.rb +328 -0
- data/bench/profile.rb +14 -0
- data/examples/arithmetic.rb +28 -0
- data/examples/bnf.rb +31 -0
- data/examples/c_minus.rb +175 -0
- data/examples/hello.scm +18 -0
- data/examples/s_exp.rb +17 -0
- data/examples/scheme.rb +84 -0
- data/examples/slow_json.rb +68 -0
- data/lib/rsec.rb +40 -0
- data/lib/rsec/helpers.rb +447 -0
- data/lib/rsec/parser.rb +64 -0
- data/lib/rsec/parsers/join.rb +86 -0
- data/lib/rsec/parsers/misc.rb +201 -0
- data/lib/rsec/parsers/prim.rb +102 -0
- data/lib/rsec/parsers/repeat.rb +90 -0
- data/lib/rsec/parsers/seq.rb +94 -0
- data/lib/rsec/utils.rb +116 -0
- data/license.txt +1 -0
- data/readme.rdoc +30 -0
- data/test/helpers.rb +24 -0
- data/test/test_branch.rb +14 -0
- data/test/test_examples.rb +36 -0
- data/test/test_join.rb +52 -0
- data/test/test_lookahead.rb +16 -0
- data/test/test_misc.rb +56 -0
- data/test/test_one_of.rb +39 -0
- data/test/test_pattern.rb +53 -0
- data/test/test_prim.rb +59 -0
- data/test/test_repeat.rb +50 -0
- data/test/test_rsec.rb +10 -0
- data/test/test_seq.rb +51 -0
- metadata +80 -0
data/bench/bench.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# Compare performance between rsec and treetop
|
2
|
+
# NOTE simple parser doesn't require much backtracking, so treetop's caching is sure to fail.
|
3
|
+
# Next step is to compare really complex parsers.
|
4
|
+
|
5
|
+
# string to be parsed
|
6
|
+
s = '(3+24/5)/10-3*4+((82321+12-3)-3*4+(82321+12-3))/5'
|
7
|
+
|
8
|
+
# rsec
|
9
|
+
$:.unshift "#{File.dirname(__FILE__)}/../lib"
|
10
|
+
$:.unshift "#{File.dirname(__FILE__)}/../ext"
|
11
|
+
require "#{File.dirname(__FILE__)}/../examples/arithmetic"
|
12
|
+
|
13
|
+
# treetop
|
14
|
+
require "treetop"
|
15
|
+
require "#{File.dirname(__FILE__)}/little.rb"
|
16
|
+
|
17
|
+
require "benchmark"
|
18
|
+
|
19
|
+
def measure &proc
|
20
|
+
puts proc[]
|
21
|
+
200.times &proc
|
22
|
+
puts((Benchmark.measure{
|
23
|
+
1000.times &proc
|
24
|
+
}), '')
|
25
|
+
end
|
26
|
+
|
27
|
+
# ------------------------------------------------------------------------------
|
28
|
+
|
29
|
+
puts ''
|
30
|
+
puts Benchmark::CAPTION
|
31
|
+
puts ''
|
32
|
+
|
33
|
+
print 'rsec result:', "\t"
|
34
|
+
p = arithmetic()
|
35
|
+
measure{ p.parse! s }
|
36
|
+
|
37
|
+
print 'treetop result:', "\t"
|
38
|
+
t = ArithmeticParser.new
|
39
|
+
measure{ t.parse(s).value }
|
40
|
+
|
41
|
+
puts 'treetop without calculation'
|
42
|
+
t = ArithmeticParser.new
|
43
|
+
measure{ t.parse s }
|
44
|
+
|
45
|
+
PARSEC_ARITH_SO = "#{File.dirname(__FILE__)}/parsec/Arithmetic.so"
|
46
|
+
if File.exist?(PARSEC_ARITH_SO)
|
47
|
+
require 'dl/import'
|
48
|
+
require 'dl/types'
|
49
|
+
module Arithmetic
|
50
|
+
extend DL::Importer
|
51
|
+
dlload PARSEC_ARITH_SO
|
52
|
+
extern "long calculate(char *)", :stdcall
|
53
|
+
extern "long donothing(char *)", :stdcall
|
54
|
+
end
|
55
|
+
print 'Haskell Parsec result:', "\t"
|
56
|
+
measure{ Arithmetic.calculate s }
|
57
|
+
else
|
58
|
+
puts 'Haskell Parsec benchmark requires ghc installation. cd bench/parsec and run make.sh(unix) or make.bat(windows)'
|
59
|
+
end
|
60
|
+
|
data/bench/little.rb
ADDED
@@ -0,0 +1,328 @@
|
|
1
|
+
# Autogenerated from a Treetop grammar. Edits may be lost.
|
2
|
+
|
3
|
+
|
4
|
+
module Arithmetic
|
5
|
+
include Treetop::Runtime
|
6
|
+
|
7
|
+
def root
|
8
|
+
@root || :expr
|
9
|
+
end
|
10
|
+
|
11
|
+
module Expr0
|
12
|
+
def op
|
13
|
+
elements[0]
|
14
|
+
end
|
15
|
+
|
16
|
+
def right
|
17
|
+
elements[1]
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
module Expr1
|
22
|
+
def left
|
23
|
+
elements[0]
|
24
|
+
end
|
25
|
+
|
26
|
+
def right_opt
|
27
|
+
elements[1]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
module Expr2
|
32
|
+
def value
|
33
|
+
right_opt.elements.inject(left.value) {|acc, plus_and_right|
|
34
|
+
acc.send \
|
35
|
+
plus_and_right.op.text_value,
|
36
|
+
plus_and_right.right.value
|
37
|
+
}
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def _nt_expr
|
42
|
+
start_index = index
|
43
|
+
if node_cache[:expr].has_key?(index)
|
44
|
+
cached = node_cache[:expr][index]
|
45
|
+
if cached
|
46
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
47
|
+
@index = cached.interval.end
|
48
|
+
end
|
49
|
+
return cached
|
50
|
+
end
|
51
|
+
|
52
|
+
i0, s0 = index, []
|
53
|
+
r1 = _nt_term
|
54
|
+
s0 << r1
|
55
|
+
if r1
|
56
|
+
s2, i2 = [], index
|
57
|
+
loop do
|
58
|
+
i3, s3 = index, []
|
59
|
+
if has_terminal?('\G[\\+\\-]', true, index)
|
60
|
+
r4 = true
|
61
|
+
@index += 1
|
62
|
+
else
|
63
|
+
r4 = nil
|
64
|
+
end
|
65
|
+
s3 << r4
|
66
|
+
if r4
|
67
|
+
r5 = _nt_term
|
68
|
+
s3 << r5
|
69
|
+
end
|
70
|
+
if s3.last
|
71
|
+
r3 = instantiate_node(SyntaxNode,input, i3...index, s3)
|
72
|
+
r3.extend(Expr0)
|
73
|
+
else
|
74
|
+
@index = i3
|
75
|
+
r3 = nil
|
76
|
+
end
|
77
|
+
if r3
|
78
|
+
s2 << r3
|
79
|
+
else
|
80
|
+
break
|
81
|
+
end
|
82
|
+
end
|
83
|
+
r2 = instantiate_node(SyntaxNode,input, i2...index, s2)
|
84
|
+
s0 << r2
|
85
|
+
end
|
86
|
+
if s0.last
|
87
|
+
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
88
|
+
r0.extend(Expr1)
|
89
|
+
r0.extend(Expr2)
|
90
|
+
else
|
91
|
+
@index = i0
|
92
|
+
r0 = nil
|
93
|
+
end
|
94
|
+
|
95
|
+
node_cache[:expr][start_index] = r0
|
96
|
+
|
97
|
+
r0
|
98
|
+
end
|
99
|
+
|
100
|
+
module Term0
|
101
|
+
def op
|
102
|
+
elements[0]
|
103
|
+
end
|
104
|
+
|
105
|
+
def right
|
106
|
+
elements[1]
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
module Term1
|
111
|
+
def left
|
112
|
+
elements[0]
|
113
|
+
end
|
114
|
+
|
115
|
+
def right_opt
|
116
|
+
elements[1]
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
module Term2
|
121
|
+
def value
|
122
|
+
right_opt.elements.inject(left.value) {|acc, mul_and_right|
|
123
|
+
acc.send \
|
124
|
+
mul_and_right.op.text_value,
|
125
|
+
mul_and_right.right.value
|
126
|
+
}
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def _nt_term
|
131
|
+
start_index = index
|
132
|
+
if node_cache[:term].has_key?(index)
|
133
|
+
cached = node_cache[:term][index]
|
134
|
+
if cached
|
135
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
136
|
+
@index = cached.interval.end
|
137
|
+
end
|
138
|
+
return cached
|
139
|
+
end
|
140
|
+
|
141
|
+
i0, s0 = index, []
|
142
|
+
r1 = _nt_factor
|
143
|
+
s0 << r1
|
144
|
+
if r1
|
145
|
+
s2, i2 = [], index
|
146
|
+
loop do
|
147
|
+
i3, s3 = index, []
|
148
|
+
if has_terminal?('\G[\\*\\/]', true, index)
|
149
|
+
r4 = true
|
150
|
+
@index += 1
|
151
|
+
else
|
152
|
+
r4 = nil
|
153
|
+
end
|
154
|
+
s3 << r4
|
155
|
+
if r4
|
156
|
+
r5 = _nt_factor
|
157
|
+
s3 << r5
|
158
|
+
end
|
159
|
+
if s3.last
|
160
|
+
r3 = instantiate_node(SyntaxNode,input, i3...index, s3)
|
161
|
+
r3.extend(Term0)
|
162
|
+
else
|
163
|
+
@index = i3
|
164
|
+
r3 = nil
|
165
|
+
end
|
166
|
+
if r3
|
167
|
+
s2 << r3
|
168
|
+
else
|
169
|
+
break
|
170
|
+
end
|
171
|
+
end
|
172
|
+
r2 = instantiate_node(SyntaxNode,input, i2...index, s2)
|
173
|
+
s0 << r2
|
174
|
+
end
|
175
|
+
if s0.last
|
176
|
+
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
177
|
+
r0.extend(Term1)
|
178
|
+
r0.extend(Term2)
|
179
|
+
else
|
180
|
+
@index = i0
|
181
|
+
r0 = nil
|
182
|
+
end
|
183
|
+
|
184
|
+
node_cache[:term][start_index] = r0
|
185
|
+
|
186
|
+
r0
|
187
|
+
end
|
188
|
+
|
189
|
+
module Factor0
|
190
|
+
def expr
|
191
|
+
elements[1]
|
192
|
+
end
|
193
|
+
|
194
|
+
end
|
195
|
+
|
196
|
+
module Factor1
|
197
|
+
def value
|
198
|
+
expr.value
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
def _nt_factor
|
203
|
+
start_index = index
|
204
|
+
if node_cache[:factor].has_key?(index)
|
205
|
+
cached = node_cache[:factor][index]
|
206
|
+
if cached
|
207
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
208
|
+
@index = cached.interval.end
|
209
|
+
end
|
210
|
+
return cached
|
211
|
+
end
|
212
|
+
|
213
|
+
i0 = index
|
214
|
+
i1, s1 = index, []
|
215
|
+
if has_terminal?('(', false, index)
|
216
|
+
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
217
|
+
@index += 1
|
218
|
+
else
|
219
|
+
terminal_parse_failure('(')
|
220
|
+
r2 = nil
|
221
|
+
end
|
222
|
+
s1 << r2
|
223
|
+
if r2
|
224
|
+
r3 = _nt_expr
|
225
|
+
s1 << r3
|
226
|
+
if r3
|
227
|
+
if has_terminal?(')', false, index)
|
228
|
+
r4 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
229
|
+
@index += 1
|
230
|
+
else
|
231
|
+
terminal_parse_failure(')')
|
232
|
+
r4 = nil
|
233
|
+
end
|
234
|
+
s1 << r4
|
235
|
+
end
|
236
|
+
end
|
237
|
+
if s1.last
|
238
|
+
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
239
|
+
r1.extend(Factor0)
|
240
|
+
r1.extend(Factor1)
|
241
|
+
else
|
242
|
+
@index = i1
|
243
|
+
r1 = nil
|
244
|
+
end
|
245
|
+
if r1
|
246
|
+
r0 = r1
|
247
|
+
else
|
248
|
+
r5 = _nt_number
|
249
|
+
if r5
|
250
|
+
r0 = r5
|
251
|
+
else
|
252
|
+
@index = i0
|
253
|
+
r0 = nil
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
node_cache[:factor][start_index] = r0
|
258
|
+
|
259
|
+
r0
|
260
|
+
end
|
261
|
+
|
262
|
+
module Number0
|
263
|
+
end
|
264
|
+
|
265
|
+
module Number1
|
266
|
+
def value
|
267
|
+
text_value.to_f
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
def _nt_number
|
272
|
+
start_index = index
|
273
|
+
if node_cache[:number].has_key?(index)
|
274
|
+
cached = node_cache[:number][index]
|
275
|
+
if cached
|
276
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
277
|
+
@index = cached.interval.end
|
278
|
+
end
|
279
|
+
return cached
|
280
|
+
end
|
281
|
+
|
282
|
+
i0, s0 = index, []
|
283
|
+
if has_terminal?('\G[1-9]', true, index)
|
284
|
+
r1 = true
|
285
|
+
@index += 1
|
286
|
+
else
|
287
|
+
r1 = nil
|
288
|
+
end
|
289
|
+
s0 << r1
|
290
|
+
if r1
|
291
|
+
s2, i2 = [], index
|
292
|
+
loop do
|
293
|
+
if has_terminal?('\G[0-9]', true, index)
|
294
|
+
r3 = true
|
295
|
+
@index += 1
|
296
|
+
else
|
297
|
+
r3 = nil
|
298
|
+
end
|
299
|
+
if r3
|
300
|
+
s2 << r3
|
301
|
+
else
|
302
|
+
break
|
303
|
+
end
|
304
|
+
end
|
305
|
+
r2 = instantiate_node(SyntaxNode,input, i2...index, s2)
|
306
|
+
s0 << r2
|
307
|
+
end
|
308
|
+
if s0.last
|
309
|
+
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
310
|
+
r0.extend(Number0)
|
311
|
+
r0.extend(Number1)
|
312
|
+
else
|
313
|
+
@index = i0
|
314
|
+
r0 = nil
|
315
|
+
end
|
316
|
+
|
317
|
+
node_cache[:number][start_index] = r0
|
318
|
+
|
319
|
+
r0
|
320
|
+
end
|
321
|
+
|
322
|
+
end
|
323
|
+
|
324
|
+
class ArithmeticParser < Treetop::Runtime::CompiledParser
|
325
|
+
include Arithmetic
|
326
|
+
end
|
327
|
+
|
328
|
+
|
data/bench/profile.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# string to be parsed
|
2
|
+
s = '(3+24/5)/10-3*4+((82321+12-3)-3*4+(82321+12-3))/5'
|
3
|
+
|
4
|
+
# rsec
|
5
|
+
$:.unshift "#{File.dirname(__FILE__)}/../lib"
|
6
|
+
$:.unshift "#{File.dirname(__FILE__)}/../ext"
|
7
|
+
require "#{File.dirname(__FILE__)}/../examples/arithmetic"
|
8
|
+
|
9
|
+
p = arithmetic
|
10
|
+
|
11
|
+
require "profile"
|
12
|
+
1000.times{
|
13
|
+
p.parse s
|
14
|
+
}
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# arithmetic parser
|
2
|
+
|
3
|
+
require "rsec"
|
4
|
+
|
5
|
+
include Rsec::Helpers
|
6
|
+
|
7
|
+
def arithmetic
|
8
|
+
calculate = proc do |(p, *ps)|
|
9
|
+
ps.each_slice(2).inject(p) do |left, (op, right)|
|
10
|
+
left.send op, right
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
num = prim(:double).fail 'number'
|
15
|
+
paren = '('.r >> lazy{expr} << ')'
|
16
|
+
factor = num | paren
|
17
|
+
term = factor.join(one_of_('*/%').fail 'operator').map &calculate
|
18
|
+
expr = term.join(one_of_('+-').fail 'operator').map &calculate
|
19
|
+
expr.eof
|
20
|
+
end
|
21
|
+
|
22
|
+
if __FILE__ == $PROGRAM_NAME
|
23
|
+
print '1+ 2*4 = '
|
24
|
+
p arithmetic.parse! '1+ 2*4' #=> 9
|
25
|
+
print '1+ 2*/4 = '
|
26
|
+
p arithmetic.parse! '1+ 2*/4' #=> syntax error
|
27
|
+
end
|
28
|
+
|
data/examples/bnf.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# BNF grammar parser
|
2
|
+
# http://en.wikipedia.org/wiki/Backus-Naur_form
|
3
|
+
|
4
|
+
require "rsec"
|
5
|
+
|
6
|
+
include Rsec::Helpers
|
7
|
+
|
8
|
+
def bnf
|
9
|
+
nbsp = /[\ \t]*/.r
|
10
|
+
spacee = /\s*/.r # include \n
|
11
|
+
literal = /".*?"|'.*?'/.r
|
12
|
+
rule_name = /\<.*?\>/
|
13
|
+
term = literal | rule_name
|
14
|
+
list = term.join(nbsp).even
|
15
|
+
expr = list.join seq(nbsp, '|', nbsp)[1]
|
16
|
+
rule = seq_ rule_name, '::=', expr
|
17
|
+
spacee.join(rule).eof
|
18
|
+
end
|
19
|
+
|
20
|
+
require "pp"
|
21
|
+
pp bnf.parse! DATA.read
|
22
|
+
|
23
|
+
__END__
|
24
|
+
<syntax> ::= <rule> | <rule> <syntax>
|
25
|
+
<rule> ::= <opt-whitespace> "<" <rule-name> ">" <opt-whitespace> "::=" <opt-whitespace> <expression> <line-end>
|
26
|
+
<opt-whitespace> ::= " " <opt-whitespace> | ""
|
27
|
+
<expression> ::= <list> | <list> "|" <expression>
|
28
|
+
<line-end> ::= <opt-whitespace> <EOL> | <line-end> <line-end>
|
29
|
+
<list> ::= <term> | <term> <opt-whitespace> <list>
|
30
|
+
<term> ::= <literal> | "<" <rule-name> ">"
|
31
|
+
<literal> ::= '"' <text> '"' | "'" <text> "'"
|