rsec 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- data/bench/bench.rb +60 -0
- data/bench/little.rb +328 -0
- data/bench/profile.rb +14 -0
- data/examples/arithmetic.rb +28 -0
- data/examples/bnf.rb +31 -0
- data/examples/c_minus.rb +175 -0
- data/examples/hello.scm +18 -0
- data/examples/s_exp.rb +17 -0
- data/examples/scheme.rb +84 -0
- data/examples/slow_json.rb +68 -0
- data/lib/rsec.rb +40 -0
- data/lib/rsec/helpers.rb +447 -0
- data/lib/rsec/parser.rb +64 -0
- data/lib/rsec/parsers/join.rb +86 -0
- data/lib/rsec/parsers/misc.rb +201 -0
- data/lib/rsec/parsers/prim.rb +102 -0
- data/lib/rsec/parsers/repeat.rb +90 -0
- data/lib/rsec/parsers/seq.rb +94 -0
- data/lib/rsec/utils.rb +116 -0
- data/license.txt +1 -0
- data/readme.rdoc +30 -0
- data/test/helpers.rb +24 -0
- data/test/test_branch.rb +14 -0
- data/test/test_examples.rb +36 -0
- data/test/test_join.rb +52 -0
- data/test/test_lookahead.rb +16 -0
- data/test/test_misc.rb +56 -0
- data/test/test_one_of.rb +39 -0
- data/test/test_pattern.rb +53 -0
- data/test/test_prim.rb +59 -0
- data/test/test_repeat.rb +50 -0
- data/test/test_rsec.rb +10 -0
- data/test/test_seq.rb +51 -0
- metadata +80 -0
data/examples/c_minus.rb
ADDED
@@ -0,0 +1,175 @@
|
|
1
|
+
$:.unshift '../lib'
|
2
|
+
$:.unshift '../ext'
|
3
|
+
require "rsec"
|
4
|
+
require "pp"
|
5
|
+
|
6
|
+
module FixPP
|
7
|
+
def pretty_print(q)
|
8
|
+
q.group(1, sprintf("<%s", self.class.name[/\w+$/]), '>') {
|
9
|
+
q.seplist(self.members, ->{}) {|member|
|
10
|
+
q.breakable
|
11
|
+
q.text member.to_s
|
12
|
+
q.text '='
|
13
|
+
q.group(1) {
|
14
|
+
q.breakable ''
|
15
|
+
q.pp self[member]
|
16
|
+
}
|
17
|
+
}
|
18
|
+
}
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class CMinus
|
23
|
+
include Rsec::Helpers
|
24
|
+
extend Rsec::Helpers
|
25
|
+
|
26
|
+
# node decls
|
27
|
+
|
28
|
+
class Function < Struct.new :type, :id, :params, :body
|
29
|
+
include FixPP
|
30
|
+
end
|
31
|
+
|
32
|
+
class Expr < Struct.new :expr
|
33
|
+
include FixPP
|
34
|
+
end
|
35
|
+
|
36
|
+
class Block < Struct.new :var_decls, :statements
|
37
|
+
include FixPP
|
38
|
+
end
|
39
|
+
|
40
|
+
class Call < Struct.new :function, :args
|
41
|
+
include FixPP
|
42
|
+
end
|
43
|
+
|
44
|
+
class GetIndex < Struct.new :id, :idx
|
45
|
+
include FixPP
|
46
|
+
end
|
47
|
+
|
48
|
+
# "terminal" rules
|
49
|
+
|
50
|
+
NUM = prim :unsigned_int64
|
51
|
+
INT = prim :int64
|
52
|
+
NBSP = /[\ \t]*/.r
|
53
|
+
SPACE = /\s*/.r
|
54
|
+
ID = /[a-zA-Z]\w*/.r 'id'
|
55
|
+
TYPE = (word('int') | word('void')).fail 'type'
|
56
|
+
EOSTMT = ';'.r 'end of statement'
|
57
|
+
ELSE = word('else').fail 'keyword_else'
|
58
|
+
IF = word('if').fail 'keyword_if'
|
59
|
+
WHILE = word('while').fail 'keyword_while'
|
60
|
+
RETURN = word('return').fail 'keyword_return'
|
61
|
+
MUL_OP = symbol(/[\*\/%]/)
|
62
|
+
ADD_OP = symbol(/[\+\-]/)
|
63
|
+
COMP_OP = symbol(/(\<=|\<|\>|\>=|==|!=)/).fail 'compare operator'
|
64
|
+
COMMA = /\s*,\s*/.r 'comma'
|
65
|
+
EMPTY_BRA = /\[\s*\]/.r 'empty square bracket'
|
66
|
+
|
67
|
+
# call(function apply) expression
|
68
|
+
def call expr
|
69
|
+
args = expr.join(COMMA).even
|
70
|
+
seq_(ID, '(', args._?, ')') {
|
71
|
+
|(id, _, args, _)|
|
72
|
+
Call[id, *args]
|
73
|
+
}
|
74
|
+
end
|
75
|
+
|
76
|
+
# (binary) expression
|
77
|
+
def expression
|
78
|
+
binary_arithmetic = lazy{factor}
|
79
|
+
.join(MUL_OP).unbox
|
80
|
+
.join(ADD_OP).unbox
|
81
|
+
.join(COMP_OP).unbox
|
82
|
+
expr = lazy{assign} | binary_arithmetic
|
83
|
+
# abc
|
84
|
+
# abc[12]
|
85
|
+
var = seq_(ID, seq_('[', expr, ']')[1]._?) {
|
86
|
+
|(id, (index))|
|
87
|
+
index ? GetIndex[id, index] : id
|
88
|
+
}
|
89
|
+
assign = seq_(var, '=', expr)
|
90
|
+
factor = seq_('(', expr, ')')[1] | call(expr) | var | INT
|
91
|
+
# p expr.parse! "gcd (v ,u- u/v *v)"
|
92
|
+
expr.map{|e| Expr[e] }
|
93
|
+
end
|
94
|
+
|
95
|
+
# statement parser builder, returns [stmt, block]
|
96
|
+
def statement var_decl
|
97
|
+
expr = expression()
|
98
|
+
brace = seq_('(', expr, ')')[1]
|
99
|
+
# statement
|
100
|
+
_stmt = lazy{stmt} # to reduce the use of lazy{}
|
101
|
+
|
102
|
+
expr_stmt = seq_(expr, EOSTMT)[0] | EOSTMT
|
103
|
+
else_stmt = seq_(ELSE, _stmt)[1]
|
104
|
+
if_stmt = seq_(IF, brace, _stmt, else_stmt._?)
|
105
|
+
while_stmt = seq_(WHILE, brace, _stmt)
|
106
|
+
return_stmt = seq_(RETURN, expr._?, EOSTMT){
|
107
|
+
|(ret, maybe_expr)|
|
108
|
+
[ret, *maybe_expr]
|
109
|
+
}
|
110
|
+
# { var_decls statements }
|
111
|
+
block = seq('{', SPACE.join(var_decl).odd, SPACE.join(_stmt).odd, '}'){
|
112
|
+
|(_, vars, stats, _)|
|
113
|
+
Block[vars, stats]
|
114
|
+
}
|
115
|
+
stmt = block | if_stmt | while_stmt | return_stmt | expr_stmt
|
116
|
+
# p if_stmt.parse! 'if(v == 0)return u;'
|
117
|
+
[stmt, block]
|
118
|
+
end
|
119
|
+
|
120
|
+
def initialize
|
121
|
+
type_id = seq_(TYPE, ID).cached
|
122
|
+
# p type_id.parse! 'int a'
|
123
|
+
|
124
|
+
var_decl = seq_(type_id, seq_('[', NUM, ']')[1]._?, EOSTMT){
|
125
|
+
|(id, maybe_num)|
|
126
|
+
[id, *maybe_num]
|
127
|
+
}
|
128
|
+
# p var_decl.parse! 'int a[12];'
|
129
|
+
# p var_decl.parse! 'int a;'
|
130
|
+
|
131
|
+
stmt, block = statement(var_decl)
|
132
|
+
# p block.parse! "{int a;}"
|
133
|
+
# p stmt.parse! 'if(3==2) {return 4;}'
|
134
|
+
|
135
|
+
param = seq_(type_id, EMPTY_BRA._?) {
|
136
|
+
|((ty, id), maybe_bra)|
|
137
|
+
[ty, id, *maybe_bra]
|
138
|
+
}
|
139
|
+
params = param.join(COMMA).even | 'void'.r{[]}
|
140
|
+
brace = seq_('(', params, ')')[1]
|
141
|
+
fun_decl = seq_(type_id, brace, block){
|
142
|
+
|(type, id), params, block|
|
143
|
+
Function[type, id, params, block]
|
144
|
+
}
|
145
|
+
# p fun_decl.parse! 'int gcd(int u, int v){return 2;}'
|
146
|
+
@program = SPACE.join(fun_decl | var_decl | EOSTMT).odd.eof
|
147
|
+
end
|
148
|
+
|
149
|
+
attr_reader :program
|
150
|
+
end
|
151
|
+
|
152
|
+
if __FILE__ == $PROGRAM_NAME
|
153
|
+
c_minus = CMinus.new
|
154
|
+
nodes = c_minus.program.parse! %Q[
|
155
|
+
int gcd(int u, int v)
|
156
|
+
{
|
157
|
+
if (v == 0) return u;
|
158
|
+
else return gcd(v,u-u / v*v);
|
159
|
+
}
|
160
|
+
|
161
|
+
void main(void)
|
162
|
+
{
|
163
|
+
int x; int y;
|
164
|
+
while (1) {
|
165
|
+
x = input();
|
166
|
+
y = input();
|
167
|
+
output(gcd(x ,y)) ;
|
168
|
+
}
|
169
|
+
}
|
170
|
+
]
|
171
|
+
nodes.each do |node|
|
172
|
+
pp node
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
data/examples/hello.scm
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
(display 4)
|
2
|
+
|
3
|
+
(define (fact x)
|
4
|
+
(if (= x 0)
|
5
|
+
1
|
6
|
+
(* x (fact (- x 1)))))
|
7
|
+
|
8
|
+
(display (fact 6))
|
9
|
+
|
10
|
+
(define (add x)
|
11
|
+
(lambda (y)
|
12
|
+
(+ x y)))
|
13
|
+
|
14
|
+
(define add4 (add 4))
|
15
|
+
(define add5 (add 5))
|
16
|
+
|
17
|
+
(display (add4 3))
|
18
|
+
(display (add5 3))
|
data/examples/s_exp.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# s-expression parser
|
2
|
+
|
3
|
+
require "rsec"
|
4
|
+
|
5
|
+
include Rsec::Helpers
|
6
|
+
|
7
|
+
def s_exp
|
8
|
+
id = /[a-zA-Z][\w\-]*/.r.fail 'id'
|
9
|
+
num = prim(:double).fail 'num'
|
10
|
+
|
11
|
+
naked_unit = id | num | seq_('(', lazy{exp}, ')')[1]
|
12
|
+
unit = naked_unit | seq_('(', lazy{unit}, ')')[1]
|
13
|
+
units = unit.join(/\s+/).even._?
|
14
|
+
exp = seq_(id, units) {|(id, (units))| [id, *units]}
|
15
|
+
seq_('(', exp, ')')[1].eof
|
16
|
+
end
|
17
|
+
|
data/examples/scheme.rb
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
# A simple-as-shit scheme interpreter. Usage: ruby scheme.rb hello.scm
|
2
|
+
require "rsec"
|
3
|
+
|
4
|
+
class Scheme
|
5
|
+
include Rsec::Helpers
|
6
|
+
|
7
|
+
Value = Struct.new :val
|
8
|
+
|
9
|
+
class Bind < Hash
|
10
|
+
def initialize parent = {}
|
11
|
+
@parent = parent
|
12
|
+
end
|
13
|
+
|
14
|
+
def define id, &p # define lambda
|
15
|
+
self[id] = -> bind, xs {
|
16
|
+
p[* xs.map{|x| bind.eval x }]
|
17
|
+
}
|
18
|
+
end
|
19
|
+
|
20
|
+
def eval node
|
21
|
+
case node
|
22
|
+
when Value; node.val
|
23
|
+
when String; self[node]
|
24
|
+
when Array
|
25
|
+
head, *tail = node
|
26
|
+
case head
|
27
|
+
when String
|
28
|
+
pr = self[head]
|
29
|
+
pr.is_a?(Proc) ? pr[self, tail] : pr # invoke lambda
|
30
|
+
when Array
|
31
|
+
node.map{|n| self.eval n }.last # sequence execution
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def [] key
|
37
|
+
super(key) || @parent[key]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def initialize
|
42
|
+
boolean = /\#[tf]/. r {|n| Value[n=='#t'] }
|
43
|
+
integer = /0|[1-9]\d*/.r {|n| Value[n.to_i] }
|
44
|
+
id = /[^\s\(\)\[\]]+/.r
|
45
|
+
atom = boolean | integer | id
|
46
|
+
cell = atom | lazy{list}
|
47
|
+
cells = /\s*/.r.join(cell).odd
|
48
|
+
list = '('.r >> cells << ')'
|
49
|
+
@parser = cells.eof
|
50
|
+
|
51
|
+
@vm = Bind.new
|
52
|
+
@vm['define'] = -> bind, (param, body) {
|
53
|
+
if param.is_a?(String)
|
54
|
+
@vm[param] = bind.eval body
|
55
|
+
else
|
56
|
+
func, *xs = param
|
57
|
+
@vm[func] = @vm['lambda'][bind, [xs, body]]
|
58
|
+
end
|
59
|
+
}
|
60
|
+
# declare: (lambda (xs[0] xs[1]) body)
|
61
|
+
@vm['lambda'] = -> bind_def, (xs, body) {
|
62
|
+
xs = [xs] if xs.is_a?(String)
|
63
|
+
# calling: (some vs[0] vs[1])
|
64
|
+
-> bind_call, vs {
|
65
|
+
vs = vs.map{|v| bind_call.eval v }
|
66
|
+
new_bind = Bind.new bind_def
|
67
|
+
xs.zip(vs){|x, v| new_bind[x] = v }
|
68
|
+
new_bind.eval body
|
69
|
+
}
|
70
|
+
}
|
71
|
+
@vm['if'] = -> bind, (p, left, right) {
|
72
|
+
bind.eval(bind.eval(p) ? left : right)
|
73
|
+
}
|
74
|
+
%w|+ - * / ** % > <|.each{|s| @vm.define s, &s.to_sym }
|
75
|
+
@vm.define '=', &:==
|
76
|
+
@vm.define('display'){|x| puts x}
|
77
|
+
end
|
78
|
+
|
79
|
+
def run source
|
80
|
+
@vm.eval @parser.parse! source
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
ARGV[0] ? Scheme.new.run(File.read ARGV[0]) : puts('need a scheme file name')
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
# grammar from
|
4
|
+
# http://www.json.org/
|
5
|
+
|
6
|
+
require "rsec"
|
7
|
+
|
8
|
+
class SlowJSON
|
9
|
+
|
10
|
+
include Rsec::Helper
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
generate_parser
|
14
|
+
@parser = seq(/\s*/, @value, /\s*/)[1].eof
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse s
|
18
|
+
@parser.parse! s
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
# term (, term)*
|
24
|
+
def elem_parser term
|
25
|
+
term.join(/\s*,\s*/.r).even
|
26
|
+
end
|
27
|
+
|
28
|
+
def chars_parser
|
29
|
+
unicode_bytes = /[0-9a-f]{4}/i.r{|bytes|
|
30
|
+
[bytes].pack('H*').force_encoding('utf-16be').encode!('utf-8')
|
31
|
+
}
|
32
|
+
escape_char = '"'.r | "\\" | '/' |
|
33
|
+
'b'.r{"\b"} |
|
34
|
+
'f'.r{"\f"} |
|
35
|
+
'n'.r{"\n"} |
|
36
|
+
'r'.r{"\r"} |
|
37
|
+
't'.r{"\t"} |
|
38
|
+
seq('u'.r, unicode_bytes)[1]
|
39
|
+
/[^"\\]+/.r | seq('\\', escape_char)[1]
|
40
|
+
end
|
41
|
+
|
42
|
+
def generate_parser
|
43
|
+
string = '"'.r >> chars_parser.star.map(&:join) << '"'
|
44
|
+
# -? int frac? exp?
|
45
|
+
number = prim(:double, allowed_sign: '-')
|
46
|
+
@value = string | number | lazy{@object} | lazy{@array} |
|
47
|
+
'true'.r{true} |
|
48
|
+
'false'.r{false} |
|
49
|
+
'null'.r{nil}
|
50
|
+
pair = seq(string, /\s*:\s*/.r, @value){|k, _, v| [k, v]}
|
51
|
+
@array = /\[\s*\]/.r{[]} | '['.r >> elem_parser(@value) << ']'
|
52
|
+
@object = /\{\s*\}/.r{{}} | ('{'.r >> elem_parser(pair) << '}').map{|arr|Hash[arr]}
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
if __FILE__ == $PROGRAM_NAME
|
58
|
+
j = SlowJSON.new
|
59
|
+
p j.parse '""'
|
60
|
+
p j.parse '123.4e5'
|
61
|
+
p j.parse 'null'
|
62
|
+
p j.parse '[]'
|
63
|
+
p j.parse '{}'
|
64
|
+
p j.parse '{"no": [3, 4]}'
|
65
|
+
p j.parse '[{}]'
|
66
|
+
p j.parse '[{"S":321061,"T":"GetAttributeResp"},{"ERROR":null,"TS":0,"VAL":{"SqlList":[{"BatchSizeMax":0,"BatchSizeTotal":0,"ConcurrentMax":1,"DataSource":"jdbc:wrap-jdbc:filters=default,encoding:name=ds-offer:jdbc:mysql://100.10.10.10:8066/xxxx","EffectedRowCount":0,"ErrorCount":0,"ExecuteCount":5,"FetchRowCount":5,"File":null,"ID":2001,"LastError":null,"LastTime":1292742908178,"MaxTimespan":16,"MaxTimespanOccurTime":1292742668191,"Name":null,"RunningCount":0,"SQL":"SELECT @@SQL_MODE","TotalTime":83}]}}]'
|
67
|
+
end
|
68
|
+
|
data/lib/rsec.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# load the gem
|
3
|
+
|
4
|
+
# All code is under this module
|
5
|
+
module Rsec
|
6
|
+
# preload configs
|
7
|
+
|
8
|
+
# config method name
|
9
|
+
# default is :r
|
10
|
+
unless Rsec.const_defined?(:TO_PARSER_METHOD)
|
11
|
+
TO_PARSER_METHOD = :r
|
12
|
+
end
|
13
|
+
|
14
|
+
# config C extension usage
|
15
|
+
# options:
|
16
|
+
# :try - default
|
17
|
+
# :no - don't use
|
18
|
+
# :yes - use
|
19
|
+
unless Rsec.const_defined?(:USE_CEXT)
|
20
|
+
USE_CEXT = :try
|
21
|
+
end
|
22
|
+
|
23
|
+
VERSION = '0.3'
|
24
|
+
end
|
25
|
+
|
26
|
+
require "strscan"
|
27
|
+
require "rsec/utils"
|
28
|
+
require "rsec/parser"
|
29
|
+
require "rsec/helpers"
|
30
|
+
|
31
|
+
case Rsec::USE_CEXT
|
32
|
+
when :try
|
33
|
+
require "rsec/ext" rescue nil
|
34
|
+
when :yes
|
35
|
+
require "rsec/ext"
|
36
|
+
when :no
|
37
|
+
else
|
38
|
+
warn "Rsec::USE_CEXT should be one of :try, :yes, :no"
|
39
|
+
end
|
40
|
+
|
data/lib/rsec/helpers.rb
ADDED
@@ -0,0 +1,447 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# ------------------------------------------------------------------------------
|
3
|
+
# Helpers(combinators) to construct parser
|
4
|
+
|
5
|
+
module Rsec #:nodoc:
|
6
|
+
|
7
|
+
# ------------------------------------------------------------------------------
|
8
|
+
# these are not callable from a parser
|
9
|
+
module Helpers
|
10
|
+
|
11
|
+
# @ desc.helper
|
12
|
+
# Lazy parser
|
13
|
+
# @ example
|
14
|
+
# parser = lazy{future}
|
15
|
+
# future = 'jim'.r
|
16
|
+
# assert_equal 'jim', parser.parse '12323'
|
17
|
+
def lazy &p
|
18
|
+
raise ArgumentError, 'lazy() requires a block' unless p
|
19
|
+
Lazy[p]
|
20
|
+
end
|
21
|
+
|
22
|
+
# @ desc.helper
|
23
|
+
# Parses one of chars in str
|
24
|
+
# @ example
|
25
|
+
# multiplicative = one_of '*/%'
|
26
|
+
# assert_equal '/', multiplicative.parse '/'
|
27
|
+
# assert_equal Rsec::INVALID, actualmultiplicative.parse '+'
|
28
|
+
def one_of str, &p
|
29
|
+
Rsec.assert_type str, String
|
30
|
+
raise ArgumentError, 'str len should > 0' if str.empty?
|
31
|
+
one_of_klass =
|
32
|
+
if (str.bytesize == str.size) and Rsec.const_defined?(:OneOfByte)
|
33
|
+
# for C-ext
|
34
|
+
OneOfByte
|
35
|
+
else
|
36
|
+
OneOf
|
37
|
+
end
|
38
|
+
one_of_klass[str.dup.freeze].map p
|
39
|
+
end
|
40
|
+
|
41
|
+
# @ desc.helper
|
42
|
+
# See also #one_of#, with leading and trailing optional breakable spaces
|
43
|
+
# @ example
|
44
|
+
# additive = one_of_('+-')
|
45
|
+
# assert_equal '+', additive.parse(' +')
|
46
|
+
def one_of_ str, &p
|
47
|
+
Rsec.assert_type str, String
|
48
|
+
raise ArgumentError, 'str len should > 0' if str.empty?
|
49
|
+
raise ArgumentError, 'str should be ascii' unless str.bytesize == str.size
|
50
|
+
raise ArgumentError, 'str should not contain space' if str =~ /\s/
|
51
|
+
spaced_one_of_klass =
|
52
|
+
if (str.bytesize == str.size) and Rsec.const_defined?(:OneOfByte_)
|
53
|
+
# for C-ext
|
54
|
+
OneOfByte_
|
55
|
+
else
|
56
|
+
OneOf_
|
57
|
+
end
|
58
|
+
spaced_one_of_klass[str.dup.freeze].map p
|
59
|
+
end
|
60
|
+
|
61
|
+
# @ desc.helper
|
62
|
+
# Primitive parser, returns nil if overflow or underflow.
|
63
|
+
# There can be an optional '+' or '-' at the beginning of string except unsinged_int32 | unsinged_int64.
|
64
|
+
# type =
|
65
|
+
# :double |
|
66
|
+
# :hex_double |
|
67
|
+
# :int32 |
|
68
|
+
# :int64 |
|
69
|
+
# :unsigned_int32 |
|
70
|
+
# :unsigned_int64
|
71
|
+
# options:
|
72
|
+
# :allowed_sign => '+' | '-' | '' | '+-' (default '+-')
|
73
|
+
# :allowed_signs => (same as :allowed_sign)
|
74
|
+
# :base => integer only (default 10)
|
75
|
+
# @ example
|
76
|
+
# p = prim :double
|
77
|
+
# assert_equal 1.23, p.parse('1.23')
|
78
|
+
# p = prim :double, allowed_sign: '-'
|
79
|
+
# assert_equal 1.23, p.parse('1.23')
|
80
|
+
# assert_equal -1.23, p.parse('-1.23')
|
81
|
+
# assert_equal Rsec::INVALID, p.parse('+1.23')
|
82
|
+
# p = prim :int32, base: 36
|
83
|
+
# assert_equal 49713, p.parse('12cx')
|
84
|
+
def prim type, options={}, &p
|
85
|
+
base = options[:base]
|
86
|
+
if [:double, :hex_double].index base
|
87
|
+
raise 'Floating points does not allow :base'
|
88
|
+
end
|
89
|
+
base ||= 10
|
90
|
+
Rsec.assert_type base, Fixnum
|
91
|
+
unless (2..36).include? base
|
92
|
+
raise RangeError, ":base should be in 2..36, but got #{base}"
|
93
|
+
end
|
94
|
+
|
95
|
+
sign_strategy = \
|
96
|
+
case (options[:allowed_sign] or options[:allowed_signs])
|
97
|
+
when nil, '+-', '-+'; 3
|
98
|
+
when '+'; 2
|
99
|
+
when '-'; 1
|
100
|
+
when ''; 0
|
101
|
+
else raise "allowed_sign should be one of nil, '', '+', '-', '+-', '-+'"
|
102
|
+
end
|
103
|
+
|
104
|
+
parser = \
|
105
|
+
case type
|
106
|
+
when :double; PDouble.new sign_strategy, false # decimal
|
107
|
+
when :hex_double; PDouble.new sign_strategy, true # hex
|
108
|
+
when :int32; PInt32.new sign_strategy, base
|
109
|
+
when :int64; PInt64.new sign_strategy, base
|
110
|
+
when :unsigned_int32;
|
111
|
+
raise 'unsigned int not allow - sign' if options[:allowed_signs] =~ /-/
|
112
|
+
PUnsignedInt32.new sign_strategy, base
|
113
|
+
when :unsigned_int64;
|
114
|
+
raise 'unsigned int not allow - sign' if options[:allowed_signs] =~ /-/
|
115
|
+
PUnsignedInt64.new sign_strategy, base
|
116
|
+
else
|
117
|
+
raise "Invalid primitive type #{type}"
|
118
|
+
end
|
119
|
+
parser.map p
|
120
|
+
end
|
121
|
+
|
122
|
+
# @ desc.helper
|
123
|
+
# Sequence parser
|
124
|
+
# @ example
|
125
|
+
# assert_equal ['a', 'b', 'c'], actualseq('a', 'b', 'c').parse('abc')
|
126
|
+
def seq *xs, &p
|
127
|
+
xs.map! {|x| Rsec.make_parser x }
|
128
|
+
Seq[xs].map p
|
129
|
+
end
|
130
|
+
|
131
|
+
# @ desc.helper
|
132
|
+
# Sequence parser with skippable pattern(or parser)
|
133
|
+
# option
|
134
|
+
# :skip default= /\s*/
|
135
|
+
# @ example
|
136
|
+
# assert_equal ['a', 'b', 'c'], actualseq_('a', 'b', 'c', skip: ',').parse('a,b,c')
|
137
|
+
def seq_ *xs, &p
|
138
|
+
skipper =
|
139
|
+
if (xs.last.is_a? Hash)
|
140
|
+
xs.pop[:skip]
|
141
|
+
end
|
142
|
+
skipper = skipper ? Rsec.make_parser(skipper) : /\s*/.r
|
143
|
+
xs.map! {|x| Rsec.make_parser x }
|
144
|
+
first, *rest = xs
|
145
|
+
raise 'sequence should not be empty' unless first
|
146
|
+
Seq_[first, rest, skipper].map p
|
147
|
+
end
|
148
|
+
|
149
|
+
# @ desc.helper
|
150
|
+
# A symbol is something wrapped with optional space
|
151
|
+
def symbol pattern, skip=/\s*/, &p
|
152
|
+
pattern = Rsec.make_parser pattern
|
153
|
+
skip = Rsec.try_skip_pattern Rsec.make_parser skip
|
154
|
+
SeqOne[[skip, pattern, skip], 1].map p
|
155
|
+
end
|
156
|
+
|
157
|
+
# @ desc.helper
|
158
|
+
# A word is wrapped with word boundaries
|
159
|
+
# @ example
|
160
|
+
# assert_equal ['yes', '3'], seq('yes', '3').parse('yes3')
|
161
|
+
# assert_equal INVALID, seq(word('yes'), '3').parse('yes3')
|
162
|
+
def word pattern, &p
|
163
|
+
parser = Rsec.make_parser pattern
|
164
|
+
# TODO check pattern type
|
165
|
+
Pattern[/\b#{parser.some}\b/].map p
|
166
|
+
end
|
167
|
+
end # helpers
|
168
|
+
|
169
|
+
# robust
|
170
|
+
Helper = Helpers
|
171
|
+
|
172
|
+
# ------------------------------------------------------------------------------
|
173
|
+
# combinators attached to parsers
|
174
|
+
|
175
|
+
module Parser #:nodoc:
|
176
|
+
|
177
|
+
# @ desc
|
178
|
+
# Transform result
|
179
|
+
# @ example
|
180
|
+
# parser = /\w+/.r.map{|word| word * 2}
|
181
|
+
# assert_equal 'hellohello', parser.parse!('hello')
|
182
|
+
def map lambda_p=nil, &p
|
183
|
+
return self if (lambda_p.nil? and p.nil?)
|
184
|
+
p = lambda_p || p
|
185
|
+
raise TypeError, 'should give a proc or lambda' unless (p.is_a? Proc)
|
186
|
+
Map[self, p]
|
187
|
+
end
|
188
|
+
|
189
|
+
# @ desc
|
190
|
+
# "p.join('+')" parses strings like "p+p+p+p+p".
|
191
|
+
# Note that at least 1 of p appears in the string.
|
192
|
+
# Sometimes it is useful to reverse the joining:
|
193
|
+
# /\s*/.r.join('p').odd parses string like " p p p "
|
194
|
+
def join inter, &p
|
195
|
+
inter = Rsec.make_parser inter
|
196
|
+
Join[self, inter].map p
|
197
|
+
end
|
198
|
+
|
199
|
+
# @ desc
|
200
|
+
# Branch parser, note that rsec is a PEG parser generator,
|
201
|
+
# beware of the difference between PEG and CFG.
|
202
|
+
def | y, &p
|
203
|
+
y = Rsec.make_parser y
|
204
|
+
arr =
|
205
|
+
if (is_a?(Branch) and !p)
|
206
|
+
[*some, y]
|
207
|
+
else
|
208
|
+
[self, y]
|
209
|
+
end
|
210
|
+
Branch[arr].map p
|
211
|
+
end
|
212
|
+
|
213
|
+
# @ desc
|
214
|
+
# Repeat n or in a range.
|
215
|
+
# If range.end < 0, repeat at least range.begin
|
216
|
+
# (Infinity and -Infinity are considered)
|
217
|
+
def * n, &p
|
218
|
+
# FIXME if self is an epsilon parser, will cause infinite loop
|
219
|
+
parser =
|
220
|
+
if n.is_a?(Range)
|
221
|
+
raise "invalid n: #{n}" if n.begin < 0
|
222
|
+
Rsec.assert_type n.begin, Integer
|
223
|
+
end_inf = (n.end.infinite? rescue false)
|
224
|
+
(Rsec.assert_type n.end, Integer) unless end_inf
|
225
|
+
if n.end > 0
|
226
|
+
RepeatRange[self, n]
|
227
|
+
else
|
228
|
+
RepeatAtLeastN[self, n.begin]
|
229
|
+
end
|
230
|
+
else
|
231
|
+
Rsec.assert_type n, Integer
|
232
|
+
raise "invalid n: #{n}" if n < 0
|
233
|
+
RepeatN[self, n]
|
234
|
+
end
|
235
|
+
parser.map p
|
236
|
+
end
|
237
|
+
|
238
|
+
# @ desc
|
239
|
+
# Appears 0 or 1 times, result is wrapped in an array
|
240
|
+
# @ example
|
241
|
+
# parser = 'a'.r.maybe
|
242
|
+
# assert_equal ['a'], parser.parse('a')
|
243
|
+
# assert_equal [], parser.parse('')
|
244
|
+
def maybe &p
|
245
|
+
Maybe[self].map &p
|
246
|
+
end
|
247
|
+
alias _? maybe
|
248
|
+
|
249
|
+
# @ desc
|
250
|
+
# Kleen star, 0 or more any times
|
251
|
+
def star &p
|
252
|
+
self.* (0..-1), &p
|
253
|
+
end
|
254
|
+
|
255
|
+
# @ desc
|
256
|
+
# Lookahead predicate, note that other can be a very complex parser
|
257
|
+
def & other, &p
|
258
|
+
other = Rsec.make_parser other
|
259
|
+
LookAhead[self, other].map p
|
260
|
+
end
|
261
|
+
|
262
|
+
# @ desc
|
263
|
+
# Negative lookahead predicate
|
264
|
+
def ^ other, &p
|
265
|
+
other = Rsec.make_parser other
|
266
|
+
NegativeLookAhead[self, other].map p
|
267
|
+
end
|
268
|
+
|
269
|
+
# @ desc
|
270
|
+
# When parsing failed, show "expect tokens" error
|
271
|
+
def fail *tokens, &p
|
272
|
+
return self if tokens.empty?
|
273
|
+
Fail[self, tokens].map p
|
274
|
+
end
|
275
|
+
alias expect fail
|
276
|
+
|
277
|
+
# @ desc
|
278
|
+
# Short for seq_(parser, other)[1]
|
279
|
+
def >> other, &p
|
280
|
+
other = Rsec.make_parser other
|
281
|
+
left = Rsec.try_skip_pattern self
|
282
|
+
SeqOne_[left, [other], SkipPattern[/\s*/], 1].map p
|
283
|
+
end
|
284
|
+
|
285
|
+
# @ desc
|
286
|
+
# Short for seq_(parser, other)[0]
|
287
|
+
def << other, &p
|
288
|
+
other = Rsec.make_parser other
|
289
|
+
right = Rsec.try_skip_pattern other
|
290
|
+
SeqOne_[self, [right], SkipPattern[/\s*/], 0].map p
|
291
|
+
end
|
292
|
+
|
293
|
+
# @ desc
|
294
|
+
# Should be end of input after parse
|
295
|
+
def eof &p
|
296
|
+
Eof[self].map p
|
297
|
+
end
|
298
|
+
|
299
|
+
# @ desc
|
300
|
+
# Packrat parser combinator, returns a parser that caches parse result, may optimize performance
|
301
|
+
def cached &p
|
302
|
+
Cached[self].map p
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
# ------------------------------------------------------------------------------
|
307
|
+
# additional helper methods for special classes
|
308
|
+
|
309
|
+
class Seq
|
310
|
+
# @ desc.seq, seq_
|
311
|
+
# Returns the parse result at idx, shorter and faster than map{|array| array[idx]}
|
312
|
+
# @ example
|
313
|
+
# assert_equal 'b', seq('a', 'b', 'c')[1].parse('abc')
|
314
|
+
def [] idx, &p
|
315
|
+
raise 'index out of range' if (idx >= some().size or idx < 0)
|
316
|
+
# optimize
|
317
|
+
parsers = some().map.with_index do |p, i|
|
318
|
+
i == idx ? p : Rsec.try_skip_pattern(p)
|
319
|
+
end
|
320
|
+
SeqOne[parsers, idx].map p
|
321
|
+
end
|
322
|
+
|
323
|
+
# @ desc.seq, seq_, join, join.even, join.odd
|
324
|
+
# If parse result contains only 1 element, return the element instead of the array
|
325
|
+
def unbox &p
|
326
|
+
Unbox[self].map p
|
327
|
+
end
|
328
|
+
|
329
|
+
# @ desc
|
330
|
+
# Think about "innerHTML"!
|
331
|
+
# @ example
|
332
|
+
# parser = seq('<b>', /[\w\s]+/, '</b>').inner
|
333
|
+
# parser.parse('<b>the inside</b>')
|
334
|
+
def inner &p
|
335
|
+
Inner[self].map p
|
336
|
+
end
|
337
|
+
end
|
338
|
+
|
339
|
+
class Seq_
|
340
|
+
def [] idx, &p
|
341
|
+
raise 'index out of range' if idx > rest.size or idx < 0
|
342
|
+
# optimize parsers, use skip if possible
|
343
|
+
new_first = (0 == idx ? first : Rsec.try_skip_pattern(first))
|
344
|
+
new_rest = rest().map.with_index do |p, i|
|
345
|
+
# NOTE rest start with 1
|
346
|
+
(i+1) == idx ? p : Rsec.try_skip_pattern(p)
|
347
|
+
end
|
348
|
+
SeqOne_[new_first, new_rest, skipper, idx].map p
|
349
|
+
end
|
350
|
+
|
351
|
+
def unbox &p
|
352
|
+
Unbox[self].map p
|
353
|
+
end
|
354
|
+
|
355
|
+
def inner &p
|
356
|
+
Inner[self].map p
|
357
|
+
end
|
358
|
+
end
|
359
|
+
|
360
|
+
class Join
|
361
|
+
def unbox &p
|
362
|
+
Unbox[self].map p
|
363
|
+
end
|
364
|
+
|
365
|
+
# @ desc.join
|
366
|
+
# Only keep the even(left, token) parts
|
367
|
+
def even &p
|
368
|
+
JoinEven[left, Rsec.try_skip_pattern(right)].map p
|
369
|
+
end
|
370
|
+
|
371
|
+
# @ desc.join
|
372
|
+
# Only keep the odd(right, inter) parts
|
373
|
+
def odd &p
|
374
|
+
JoinOdd[Rsec.try_skip_pattern(left), right].map p
|
375
|
+
end
|
376
|
+
end
|
377
|
+
|
378
|
+
class JoinEven
|
379
|
+
def unbox &p
|
380
|
+
Unbox[self].map p
|
381
|
+
end
|
382
|
+
end
|
383
|
+
|
384
|
+
class JoinOdd
|
385
|
+
def unbox &p
|
386
|
+
Unbox[self].map p
|
387
|
+
end
|
388
|
+
end
|
389
|
+
|
390
|
+
class Pattern
|
391
|
+
# @ desc.r
|
392
|
+
# Scan until the pattern happens
|
393
|
+
def until &p
|
394
|
+
UntilPattern[some()].map p
|
395
|
+
end
|
396
|
+
end
|
397
|
+
|
398
|
+
# ------------------------------------------------------------------------------
|
399
|
+
# helper methods for parser generation
|
400
|
+
|
401
|
+
# ensure x is a parser
|
402
|
+
def Rsec.make_parser x
|
403
|
+
return x if x.is_a?(Parser)
|
404
|
+
x = x.send(TO_PARSER_METHOD) if x.respond_to?(TO_PARSER_METHOD)
|
405
|
+
Rsec.assert_type x, Parser
|
406
|
+
x
|
407
|
+
end
|
408
|
+
|
409
|
+
# type assertion
|
410
|
+
def Rsec.assert_type obj, type
|
411
|
+
(raise TypeError, "#{obj} should be a #{type}") unless (obj.is_a? type)
|
412
|
+
end
|
413
|
+
|
414
|
+
# try to convert Pattern -> SkipPattern
|
415
|
+
def Rsec.try_skip_pattern p
|
416
|
+
# for C-ext
|
417
|
+
if Rsec.const_defined?(:FixString) and p.is_a?(FixString)
|
418
|
+
return SkipPattern[/#{Regexp.escape p.some}/]
|
419
|
+
end
|
420
|
+
|
421
|
+
case p
|
422
|
+
when Pattern
|
423
|
+
SkipPattern[p.some]
|
424
|
+
when UntilPattern
|
425
|
+
SkipUntilPattern[p.some]
|
426
|
+
else
|
427
|
+
p
|
428
|
+
end
|
429
|
+
end
|
430
|
+
end
|
431
|
+
|
432
|
+
class String #:nodoc:
|
433
|
+
# String#r: convert self to parser
|
434
|
+
# convienient string-to-parser transformer
|
435
|
+
define_method ::Rsec::TO_PARSER_METHOD, ->(*expects, &p){
|
436
|
+
::Rsec::Pattern[/#{Regexp.escape self}/].fail(*expects).map p
|
437
|
+
}
|
438
|
+
end
|
439
|
+
|
440
|
+
class Regexp #:nodoc:
|
441
|
+
# Regexp#r: convert self to parser
|
442
|
+
# convienient regexp-to-parser transformer
|
443
|
+
define_method ::Rsec::TO_PARSER_METHOD, ->(*expects, &p){
|
444
|
+
::Rsec::Pattern[self].fail(*expects).map p
|
445
|
+
}
|
446
|
+
end
|
447
|
+
|