rsec 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bench/bench.rb +60 -0
- data/bench/little.rb +328 -0
- data/bench/profile.rb +14 -0
- data/examples/arithmetic.rb +28 -0
- data/examples/bnf.rb +31 -0
- data/examples/c_minus.rb +175 -0
- data/examples/hello.scm +18 -0
- data/examples/s_exp.rb +17 -0
- data/examples/scheme.rb +84 -0
- data/examples/slow_json.rb +68 -0
- data/lib/rsec.rb +40 -0
- data/lib/rsec/helpers.rb +447 -0
- data/lib/rsec/parser.rb +64 -0
- data/lib/rsec/parsers/join.rb +86 -0
- data/lib/rsec/parsers/misc.rb +201 -0
- data/lib/rsec/parsers/prim.rb +102 -0
- data/lib/rsec/parsers/repeat.rb +90 -0
- data/lib/rsec/parsers/seq.rb +94 -0
- data/lib/rsec/utils.rb +116 -0
- data/license.txt +1 -0
- data/readme.rdoc +30 -0
- data/test/helpers.rb +24 -0
- data/test/test_branch.rb +14 -0
- data/test/test_examples.rb +36 -0
- data/test/test_join.rb +52 -0
- data/test/test_lookahead.rb +16 -0
- data/test/test_misc.rb +56 -0
- data/test/test_one_of.rb +39 -0
- data/test/test_pattern.rb +53 -0
- data/test/test_prim.rb +59 -0
- data/test/test_repeat.rb +50 -0
- data/test/test_rsec.rb +10 -0
- data/test/test_seq.rb +51 -0
- metadata +80 -0
data/examples/c_minus.rb
ADDED
@@ -0,0 +1,175 @@
|
|
1
|
+
$:.unshift '../lib'
|
2
|
+
$:.unshift '../ext'
|
3
|
+
require "rsec"
|
4
|
+
require "pp"
|
5
|
+
|
6
|
+
module FixPP
|
7
|
+
def pretty_print(q)
|
8
|
+
q.group(1, sprintf("<%s", self.class.name[/\w+$/]), '>') {
|
9
|
+
q.seplist(self.members, ->{}) {|member|
|
10
|
+
q.breakable
|
11
|
+
q.text member.to_s
|
12
|
+
q.text '='
|
13
|
+
q.group(1) {
|
14
|
+
q.breakable ''
|
15
|
+
q.pp self[member]
|
16
|
+
}
|
17
|
+
}
|
18
|
+
}
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class CMinus
|
23
|
+
include Rsec::Helpers
|
24
|
+
extend Rsec::Helpers
|
25
|
+
|
26
|
+
# node decls
|
27
|
+
|
28
|
+
class Function < Struct.new :type, :id, :params, :body
|
29
|
+
include FixPP
|
30
|
+
end
|
31
|
+
|
32
|
+
class Expr < Struct.new :expr
|
33
|
+
include FixPP
|
34
|
+
end
|
35
|
+
|
36
|
+
class Block < Struct.new :var_decls, :statements
|
37
|
+
include FixPP
|
38
|
+
end
|
39
|
+
|
40
|
+
class Call < Struct.new :function, :args
|
41
|
+
include FixPP
|
42
|
+
end
|
43
|
+
|
44
|
+
class GetIndex < Struct.new :id, :idx
|
45
|
+
include FixPP
|
46
|
+
end
|
47
|
+
|
48
|
+
# "terminal" rules
|
49
|
+
|
50
|
+
NUM = prim :unsigned_int64
|
51
|
+
INT = prim :int64
|
52
|
+
NBSP = /[\ \t]*/.r
|
53
|
+
SPACE = /\s*/.r
|
54
|
+
ID = /[a-zA-Z]\w*/.r 'id'
|
55
|
+
TYPE = (word('int') | word('void')).fail 'type'
|
56
|
+
EOSTMT = ';'.r 'end of statement'
|
57
|
+
ELSE = word('else').fail 'keyword_else'
|
58
|
+
IF = word('if').fail 'keyword_if'
|
59
|
+
WHILE = word('while').fail 'keyword_while'
|
60
|
+
RETURN = word('return').fail 'keyword_return'
|
61
|
+
MUL_OP = symbol(/[\*\/%]/)
|
62
|
+
ADD_OP = symbol(/[\+\-]/)
|
63
|
+
COMP_OP = symbol(/(\<=|\<|\>|\>=|==|!=)/).fail 'compare operator'
|
64
|
+
COMMA = /\s*,\s*/.r 'comma'
|
65
|
+
EMPTY_BRA = /\[\s*\]/.r 'empty square bracket'
|
66
|
+
|
67
|
+
# call(function apply) expression
|
68
|
+
def call expr
|
69
|
+
args = expr.join(COMMA).even
|
70
|
+
seq_(ID, '(', args._?, ')') {
|
71
|
+
|(id, _, args, _)|
|
72
|
+
Call[id, *args]
|
73
|
+
}
|
74
|
+
end
|
75
|
+
|
76
|
+
# (binary) expression
|
77
|
+
def expression
|
78
|
+
binary_arithmetic = lazy{factor}
|
79
|
+
.join(MUL_OP).unbox
|
80
|
+
.join(ADD_OP).unbox
|
81
|
+
.join(COMP_OP).unbox
|
82
|
+
expr = lazy{assign} | binary_arithmetic
|
83
|
+
# abc
|
84
|
+
# abc[12]
|
85
|
+
var = seq_(ID, seq_('[', expr, ']')[1]._?) {
|
86
|
+
|(id, (index))|
|
87
|
+
index ? GetIndex[id, index] : id
|
88
|
+
}
|
89
|
+
assign = seq_(var, '=', expr)
|
90
|
+
factor = seq_('(', expr, ')')[1] | call(expr) | var | INT
|
91
|
+
# p expr.parse! "gcd (v ,u- u/v *v)"
|
92
|
+
expr.map{|e| Expr[e] }
|
93
|
+
end
|
94
|
+
|
95
|
+
# statement parser builder, returns [stmt, block]
|
96
|
+
def statement var_decl
|
97
|
+
expr = expression()
|
98
|
+
brace = seq_('(', expr, ')')[1]
|
99
|
+
# statement
|
100
|
+
_stmt = lazy{stmt} # to reduce the use of lazy{}
|
101
|
+
|
102
|
+
expr_stmt = seq_(expr, EOSTMT)[0] | EOSTMT
|
103
|
+
else_stmt = seq_(ELSE, _stmt)[1]
|
104
|
+
if_stmt = seq_(IF, brace, _stmt, else_stmt._?)
|
105
|
+
while_stmt = seq_(WHILE, brace, _stmt)
|
106
|
+
return_stmt = seq_(RETURN, expr._?, EOSTMT){
|
107
|
+
|(ret, maybe_expr)|
|
108
|
+
[ret, *maybe_expr]
|
109
|
+
}
|
110
|
+
# { var_decls statements }
|
111
|
+
block = seq('{', SPACE.join(var_decl).odd, SPACE.join(_stmt).odd, '}'){
|
112
|
+
|(_, vars, stats, _)|
|
113
|
+
Block[vars, stats]
|
114
|
+
}
|
115
|
+
stmt = block | if_stmt | while_stmt | return_stmt | expr_stmt
|
116
|
+
# p if_stmt.parse! 'if(v == 0)return u;'
|
117
|
+
[stmt, block]
|
118
|
+
end
|
119
|
+
|
120
|
+
def initialize
|
121
|
+
type_id = seq_(TYPE, ID).cached
|
122
|
+
# p type_id.parse! 'int a'
|
123
|
+
|
124
|
+
var_decl = seq_(type_id, seq_('[', NUM, ']')[1]._?, EOSTMT){
|
125
|
+
|(id, maybe_num)|
|
126
|
+
[id, *maybe_num]
|
127
|
+
}
|
128
|
+
# p var_decl.parse! 'int a[12];'
|
129
|
+
# p var_decl.parse! 'int a;'
|
130
|
+
|
131
|
+
stmt, block = statement(var_decl)
|
132
|
+
# p block.parse! "{int a;}"
|
133
|
+
# p stmt.parse! 'if(3==2) {return 4;}'
|
134
|
+
|
135
|
+
param = seq_(type_id, EMPTY_BRA._?) {
|
136
|
+
|((ty, id), maybe_bra)|
|
137
|
+
[ty, id, *maybe_bra]
|
138
|
+
}
|
139
|
+
params = param.join(COMMA).even | 'void'.r{[]}
|
140
|
+
brace = seq_('(', params, ')')[1]
|
141
|
+
fun_decl = seq_(type_id, brace, block){
|
142
|
+
|(type, id), params, block|
|
143
|
+
Function[type, id, params, block]
|
144
|
+
}
|
145
|
+
# p fun_decl.parse! 'int gcd(int u, int v){return 2;}'
|
146
|
+
@program = SPACE.join(fun_decl | var_decl | EOSTMT).odd.eof
|
147
|
+
end
|
148
|
+
|
149
|
+
attr_reader :program
|
150
|
+
end
|
151
|
+
|
152
|
+
if __FILE__ == $PROGRAM_NAME
|
153
|
+
c_minus = CMinus.new
|
154
|
+
nodes = c_minus.program.parse! %Q[
|
155
|
+
int gcd(int u, int v)
|
156
|
+
{
|
157
|
+
if (v == 0) return u;
|
158
|
+
else return gcd(v,u-u / v*v);
|
159
|
+
}
|
160
|
+
|
161
|
+
void main(void)
|
162
|
+
{
|
163
|
+
int x; int y;
|
164
|
+
while (1) {
|
165
|
+
x = input();
|
166
|
+
y = input();
|
167
|
+
output(gcd(x ,y)) ;
|
168
|
+
}
|
169
|
+
}
|
170
|
+
]
|
171
|
+
nodes.each do |node|
|
172
|
+
pp node
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
data/examples/hello.scm
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
(display 4)
|
2
|
+
|
3
|
+
(define (fact x)
|
4
|
+
(if (= x 0)
|
5
|
+
1
|
6
|
+
(* x (fact (- x 1)))))
|
7
|
+
|
8
|
+
(display (fact 6))
|
9
|
+
|
10
|
+
(define (add x)
|
11
|
+
(lambda (y)
|
12
|
+
(+ x y)))
|
13
|
+
|
14
|
+
(define add4 (add 4))
|
15
|
+
(define add5 (add 5))
|
16
|
+
|
17
|
+
(display (add4 3))
|
18
|
+
(display (add5 3))
|
data/examples/s_exp.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# s-expression parser
|
2
|
+
|
3
|
+
require "rsec"
|
4
|
+
|
5
|
+
include Rsec::Helpers
|
6
|
+
|
7
|
+
def s_exp
|
8
|
+
id = /[a-zA-Z][\w\-]*/.r.fail 'id'
|
9
|
+
num = prim(:double).fail 'num'
|
10
|
+
|
11
|
+
naked_unit = id | num | seq_('(', lazy{exp}, ')')[1]
|
12
|
+
unit = naked_unit | seq_('(', lazy{unit}, ')')[1]
|
13
|
+
units = unit.join(/\s+/).even._?
|
14
|
+
exp = seq_(id, units) {|(id, (units))| [id, *units]}
|
15
|
+
seq_('(', exp, ')')[1].eof
|
16
|
+
end
|
17
|
+
|
data/examples/scheme.rb
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
# A simple-as-shit scheme interpreter. Usage: ruby scheme.rb hello.scm
|
2
|
+
require "rsec"
|
3
|
+
|
4
|
+
class Scheme
|
5
|
+
include Rsec::Helpers
|
6
|
+
|
7
|
+
Value = Struct.new :val
|
8
|
+
|
9
|
+
class Bind < Hash
|
10
|
+
def initialize parent = {}
|
11
|
+
@parent = parent
|
12
|
+
end
|
13
|
+
|
14
|
+
def define id, &p # define lambda
|
15
|
+
self[id] = -> bind, xs {
|
16
|
+
p[* xs.map{|x| bind.eval x }]
|
17
|
+
}
|
18
|
+
end
|
19
|
+
|
20
|
+
def eval node
|
21
|
+
case node
|
22
|
+
when Value; node.val
|
23
|
+
when String; self[node]
|
24
|
+
when Array
|
25
|
+
head, *tail = node
|
26
|
+
case head
|
27
|
+
when String
|
28
|
+
pr = self[head]
|
29
|
+
pr.is_a?(Proc) ? pr[self, tail] : pr # invoke lambda
|
30
|
+
when Array
|
31
|
+
node.map{|n| self.eval n }.last # sequence execution
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def [] key
|
37
|
+
super(key) || @parent[key]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def initialize
|
42
|
+
boolean = /\#[tf]/. r {|n| Value[n=='#t'] }
|
43
|
+
integer = /0|[1-9]\d*/.r {|n| Value[n.to_i] }
|
44
|
+
id = /[^\s\(\)\[\]]+/.r
|
45
|
+
atom = boolean | integer | id
|
46
|
+
cell = atom | lazy{list}
|
47
|
+
cells = /\s*/.r.join(cell).odd
|
48
|
+
list = '('.r >> cells << ')'
|
49
|
+
@parser = cells.eof
|
50
|
+
|
51
|
+
@vm = Bind.new
|
52
|
+
@vm['define'] = -> bind, (param, body) {
|
53
|
+
if param.is_a?(String)
|
54
|
+
@vm[param] = bind.eval body
|
55
|
+
else
|
56
|
+
func, *xs = param
|
57
|
+
@vm[func] = @vm['lambda'][bind, [xs, body]]
|
58
|
+
end
|
59
|
+
}
|
60
|
+
# declare: (lambda (xs[0] xs[1]) body)
|
61
|
+
@vm['lambda'] = -> bind_def, (xs, body) {
|
62
|
+
xs = [xs] if xs.is_a?(String)
|
63
|
+
# calling: (some vs[0] vs[1])
|
64
|
+
-> bind_call, vs {
|
65
|
+
vs = vs.map{|v| bind_call.eval v }
|
66
|
+
new_bind = Bind.new bind_def
|
67
|
+
xs.zip(vs){|x, v| new_bind[x] = v }
|
68
|
+
new_bind.eval body
|
69
|
+
}
|
70
|
+
}
|
71
|
+
@vm['if'] = -> bind, (p, left, right) {
|
72
|
+
bind.eval(bind.eval(p) ? left : right)
|
73
|
+
}
|
74
|
+
%w|+ - * / ** % > <|.each{|s| @vm.define s, &s.to_sym }
|
75
|
+
@vm.define '=', &:==
|
76
|
+
@vm.define('display'){|x| puts x}
|
77
|
+
end
|
78
|
+
|
79
|
+
def run source
|
80
|
+
@vm.eval @parser.parse! source
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
ARGV[0] ? Scheme.new.run(File.read ARGV[0]) : puts('need a scheme file name')
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
# grammar from
|
4
|
+
# http://www.json.org/
|
5
|
+
|
6
|
+
require "rsec"
|
7
|
+
|
8
|
+
class SlowJSON
|
9
|
+
|
10
|
+
include Rsec::Helper
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
generate_parser
|
14
|
+
@parser = seq(/\s*/, @value, /\s*/)[1].eof
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse s
|
18
|
+
@parser.parse! s
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
# term (, term)*
|
24
|
+
def elem_parser term
|
25
|
+
term.join(/\s*,\s*/.r).even
|
26
|
+
end
|
27
|
+
|
28
|
+
def chars_parser
|
29
|
+
unicode_bytes = /[0-9a-f]{4}/i.r{|bytes|
|
30
|
+
[bytes].pack('H*').force_encoding('utf-16be').encode!('utf-8')
|
31
|
+
}
|
32
|
+
escape_char = '"'.r | "\\" | '/' |
|
33
|
+
'b'.r{"\b"} |
|
34
|
+
'f'.r{"\f"} |
|
35
|
+
'n'.r{"\n"} |
|
36
|
+
'r'.r{"\r"} |
|
37
|
+
't'.r{"\t"} |
|
38
|
+
seq('u'.r, unicode_bytes)[1]
|
39
|
+
/[^"\\]+/.r | seq('\\', escape_char)[1]
|
40
|
+
end
|
41
|
+
|
42
|
+
def generate_parser
|
43
|
+
string = '"'.r >> chars_parser.star.map(&:join) << '"'
|
44
|
+
# -? int frac? exp?
|
45
|
+
number = prim(:double, allowed_sign: '-')
|
46
|
+
@value = string | number | lazy{@object} | lazy{@array} |
|
47
|
+
'true'.r{true} |
|
48
|
+
'false'.r{false} |
|
49
|
+
'null'.r{nil}
|
50
|
+
pair = seq(string, /\s*:\s*/.r, @value){|k, _, v| [k, v]}
|
51
|
+
@array = /\[\s*\]/.r{[]} | '['.r >> elem_parser(@value) << ']'
|
52
|
+
@object = /\{\s*\}/.r{{}} | ('{'.r >> elem_parser(pair) << '}').map{|arr|Hash[arr]}
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
if __FILE__ == $PROGRAM_NAME
|
58
|
+
j = SlowJSON.new
|
59
|
+
p j.parse '""'
|
60
|
+
p j.parse '123.4e5'
|
61
|
+
p j.parse 'null'
|
62
|
+
p j.parse '[]'
|
63
|
+
p j.parse '{}'
|
64
|
+
p j.parse '{"no": [3, 4]}'
|
65
|
+
p j.parse '[{}]'
|
66
|
+
p j.parse '[{"S":321061,"T":"GetAttributeResp"},{"ERROR":null,"TS":0,"VAL":{"SqlList":[{"BatchSizeMax":0,"BatchSizeTotal":0,"ConcurrentMax":1,"DataSource":"jdbc:wrap-jdbc:filters=default,encoding:name=ds-offer:jdbc:mysql://100.10.10.10:8066/xxxx","EffectedRowCount":0,"ErrorCount":0,"ExecuteCount":5,"FetchRowCount":5,"File":null,"ID":2001,"LastError":null,"LastTime":1292742908178,"MaxTimespan":16,"MaxTimespanOccurTime":1292742668191,"Name":null,"RunningCount":0,"SQL":"SELECT @@SQL_MODE","TotalTime":83}]}}]'
|
67
|
+
end
|
68
|
+
|
data/lib/rsec.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# load the gem
|
3
|
+
|
4
|
+
# All code is under this module
|
5
|
+
module Rsec
|
6
|
+
# preload configs
|
7
|
+
|
8
|
+
# config method name
|
9
|
+
# default is :r
|
10
|
+
unless Rsec.const_defined?(:TO_PARSER_METHOD)
|
11
|
+
TO_PARSER_METHOD = :r
|
12
|
+
end
|
13
|
+
|
14
|
+
# config C extension usage
|
15
|
+
# options:
|
16
|
+
# :try - default
|
17
|
+
# :no - don't use
|
18
|
+
# :yes - use
|
19
|
+
unless Rsec.const_defined?(:USE_CEXT)
|
20
|
+
USE_CEXT = :try
|
21
|
+
end
|
22
|
+
|
23
|
+
VERSION = '0.3'
|
24
|
+
end
|
25
|
+
|
26
|
+
require "strscan"
|
27
|
+
require "rsec/utils"
|
28
|
+
require "rsec/parser"
|
29
|
+
require "rsec/helpers"
|
30
|
+
|
31
|
+
case Rsec::USE_CEXT
|
32
|
+
when :try
|
33
|
+
require "rsec/ext" rescue nil
|
34
|
+
when :yes
|
35
|
+
require "rsec/ext"
|
36
|
+
when :no
|
37
|
+
else
|
38
|
+
warn "Rsec::USE_CEXT should be one of :try, :yes, :no"
|
39
|
+
end
|
40
|
+
|
data/lib/rsec/helpers.rb
ADDED
@@ -0,0 +1,447 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# ------------------------------------------------------------------------------
|
3
|
+
# Helpers(combinators) to construct parser
|
4
|
+
|
5
|
+
module Rsec #:nodoc:
|
6
|
+
|
7
|
+
# ------------------------------------------------------------------------------
|
8
|
+
# these are not callable from a parser
|
9
|
+
module Helpers
|
10
|
+
|
11
|
+
# @ desc.helper
|
12
|
+
# Lazy parser
|
13
|
+
# @ example
|
14
|
+
# parser = lazy{future}
|
15
|
+
# future = 'jim'.r
|
16
|
+
# assert_equal 'jim', parser.parse '12323'
|
17
|
+
def lazy &p
|
18
|
+
raise ArgumentError, 'lazy() requires a block' unless p
|
19
|
+
Lazy[p]
|
20
|
+
end
|
21
|
+
|
22
|
+
# @ desc.helper
|
23
|
+
# Parses one of chars in str
|
24
|
+
# @ example
|
25
|
+
# multiplicative = one_of '*/%'
|
26
|
+
# assert_equal '/', multiplicative.parse '/'
|
27
|
+
# assert_equal Rsec::INVALID, actualmultiplicative.parse '+'
|
28
|
+
def one_of str, &p
|
29
|
+
Rsec.assert_type str, String
|
30
|
+
raise ArgumentError, 'str len should > 0' if str.empty?
|
31
|
+
one_of_klass =
|
32
|
+
if (str.bytesize == str.size) and Rsec.const_defined?(:OneOfByte)
|
33
|
+
# for C-ext
|
34
|
+
OneOfByte
|
35
|
+
else
|
36
|
+
OneOf
|
37
|
+
end
|
38
|
+
one_of_klass[str.dup.freeze].map p
|
39
|
+
end
|
40
|
+
|
41
|
+
# @ desc.helper
|
42
|
+
# See also #one_of#, with leading and trailing optional breakable spaces
|
43
|
+
# @ example
|
44
|
+
# additive = one_of_('+-')
|
45
|
+
# assert_equal '+', additive.parse(' +')
|
46
|
+
def one_of_ str, &p
|
47
|
+
Rsec.assert_type str, String
|
48
|
+
raise ArgumentError, 'str len should > 0' if str.empty?
|
49
|
+
raise ArgumentError, 'str should be ascii' unless str.bytesize == str.size
|
50
|
+
raise ArgumentError, 'str should not contain space' if str =~ /\s/
|
51
|
+
spaced_one_of_klass =
|
52
|
+
if (str.bytesize == str.size) and Rsec.const_defined?(:OneOfByte_)
|
53
|
+
# for C-ext
|
54
|
+
OneOfByte_
|
55
|
+
else
|
56
|
+
OneOf_
|
57
|
+
end
|
58
|
+
spaced_one_of_klass[str.dup.freeze].map p
|
59
|
+
end
|
60
|
+
|
61
|
+
# @ desc.helper
|
62
|
+
# Primitive parser, returns nil if overflow or underflow.
|
63
|
+
# There can be an optional '+' or '-' at the beginning of string except unsinged_int32 | unsinged_int64.
|
64
|
+
# type =
|
65
|
+
# :double |
|
66
|
+
# :hex_double |
|
67
|
+
# :int32 |
|
68
|
+
# :int64 |
|
69
|
+
# :unsigned_int32 |
|
70
|
+
# :unsigned_int64
|
71
|
+
# options:
|
72
|
+
# :allowed_sign => '+' | '-' | '' | '+-' (default '+-')
|
73
|
+
# :allowed_signs => (same as :allowed_sign)
|
74
|
+
# :base => integer only (default 10)
|
75
|
+
# @ example
|
76
|
+
# p = prim :double
|
77
|
+
# assert_equal 1.23, p.parse('1.23')
|
78
|
+
# p = prim :double, allowed_sign: '-'
|
79
|
+
# assert_equal 1.23, p.parse('1.23')
|
80
|
+
# assert_equal -1.23, p.parse('-1.23')
|
81
|
+
# assert_equal Rsec::INVALID, p.parse('+1.23')
|
82
|
+
# p = prim :int32, base: 36
|
83
|
+
# assert_equal 49713, p.parse('12cx')
|
84
|
+
def prim type, options={}, &p
|
85
|
+
base = options[:base]
|
86
|
+
if [:double, :hex_double].index base
|
87
|
+
raise 'Floating points does not allow :base'
|
88
|
+
end
|
89
|
+
base ||= 10
|
90
|
+
Rsec.assert_type base, Fixnum
|
91
|
+
unless (2..36).include? base
|
92
|
+
raise RangeError, ":base should be in 2..36, but got #{base}"
|
93
|
+
end
|
94
|
+
|
95
|
+
sign_strategy = \
|
96
|
+
case (options[:allowed_sign] or options[:allowed_signs])
|
97
|
+
when nil, '+-', '-+'; 3
|
98
|
+
when '+'; 2
|
99
|
+
when '-'; 1
|
100
|
+
when ''; 0
|
101
|
+
else raise "allowed_sign should be one of nil, '', '+', '-', '+-', '-+'"
|
102
|
+
end
|
103
|
+
|
104
|
+
parser = \
|
105
|
+
case type
|
106
|
+
when :double; PDouble.new sign_strategy, false # decimal
|
107
|
+
when :hex_double; PDouble.new sign_strategy, true # hex
|
108
|
+
when :int32; PInt32.new sign_strategy, base
|
109
|
+
when :int64; PInt64.new sign_strategy, base
|
110
|
+
when :unsigned_int32;
|
111
|
+
raise 'unsigned int not allow - sign' if options[:allowed_signs] =~ /-/
|
112
|
+
PUnsignedInt32.new sign_strategy, base
|
113
|
+
when :unsigned_int64;
|
114
|
+
raise 'unsigned int not allow - sign' if options[:allowed_signs] =~ /-/
|
115
|
+
PUnsignedInt64.new sign_strategy, base
|
116
|
+
else
|
117
|
+
raise "Invalid primitive type #{type}"
|
118
|
+
end
|
119
|
+
parser.map p
|
120
|
+
end
|
121
|
+
|
122
|
+
# @ desc.helper
|
123
|
+
# Sequence parser
|
124
|
+
# @ example
|
125
|
+
# assert_equal ['a', 'b', 'c'], actualseq('a', 'b', 'c').parse('abc')
|
126
|
+
def seq *xs, &p
|
127
|
+
xs.map! {|x| Rsec.make_parser x }
|
128
|
+
Seq[xs].map p
|
129
|
+
end
|
130
|
+
|
131
|
+
# @ desc.helper
|
132
|
+
# Sequence parser with skippable pattern(or parser)
|
133
|
+
# option
|
134
|
+
# :skip default= /\s*/
|
135
|
+
# @ example
|
136
|
+
# assert_equal ['a', 'b', 'c'], actualseq_('a', 'b', 'c', skip: ',').parse('a,b,c')
|
137
|
+
def seq_ *xs, &p
|
138
|
+
skipper =
|
139
|
+
if (xs.last.is_a? Hash)
|
140
|
+
xs.pop[:skip]
|
141
|
+
end
|
142
|
+
skipper = skipper ? Rsec.make_parser(skipper) : /\s*/.r
|
143
|
+
xs.map! {|x| Rsec.make_parser x }
|
144
|
+
first, *rest = xs
|
145
|
+
raise 'sequence should not be empty' unless first
|
146
|
+
Seq_[first, rest, skipper].map p
|
147
|
+
end
|
148
|
+
|
149
|
+
# @ desc.helper
|
150
|
+
# A symbol is something wrapped with optional space
|
151
|
+
def symbol pattern, skip=/\s*/, &p
|
152
|
+
pattern = Rsec.make_parser pattern
|
153
|
+
skip = Rsec.try_skip_pattern Rsec.make_parser skip
|
154
|
+
SeqOne[[skip, pattern, skip], 1].map p
|
155
|
+
end
|
156
|
+
|
157
|
+
# @ desc.helper
|
158
|
+
# A word is wrapped with word boundaries
|
159
|
+
# @ example
|
160
|
+
# assert_equal ['yes', '3'], seq('yes', '3').parse('yes3')
|
161
|
+
# assert_equal INVALID, seq(word('yes'), '3').parse('yes3')
|
162
|
+
def word pattern, &p
|
163
|
+
parser = Rsec.make_parser pattern
|
164
|
+
# TODO check pattern type
|
165
|
+
Pattern[/\b#{parser.some}\b/].map p
|
166
|
+
end
|
167
|
+
end # helpers
|
168
|
+
|
169
|
+
# robust
|
170
|
+
Helper = Helpers
|
171
|
+
|
172
|
+
# ------------------------------------------------------------------------------
|
173
|
+
# combinators attached to parsers
|
174
|
+
|
175
|
+
module Parser #:nodoc:
|
176
|
+
|
177
|
+
# @ desc
|
178
|
+
# Transform result
|
179
|
+
# @ example
|
180
|
+
# parser = /\w+/.r.map{|word| word * 2}
|
181
|
+
# assert_equal 'hellohello', parser.parse!('hello')
|
182
|
+
def map lambda_p=nil, &p
|
183
|
+
return self if (lambda_p.nil? and p.nil?)
|
184
|
+
p = lambda_p || p
|
185
|
+
raise TypeError, 'should give a proc or lambda' unless (p.is_a? Proc)
|
186
|
+
Map[self, p]
|
187
|
+
end
|
188
|
+
|
189
|
+
# @ desc
|
190
|
+
# "p.join('+')" parses strings like "p+p+p+p+p".
|
191
|
+
# Note that at least 1 of p appears in the string.
|
192
|
+
# Sometimes it is useful to reverse the joining:
|
193
|
+
# /\s*/.r.join('p').odd parses string like " p p p "
|
194
|
+
def join inter, &p
|
195
|
+
inter = Rsec.make_parser inter
|
196
|
+
Join[self, inter].map p
|
197
|
+
end
|
198
|
+
|
199
|
+
# @ desc
|
200
|
+
# Branch parser, note that rsec is a PEG parser generator,
|
201
|
+
# beware of the difference between PEG and CFG.
|
202
|
+
def | y, &p
|
203
|
+
y = Rsec.make_parser y
|
204
|
+
arr =
|
205
|
+
if (is_a?(Branch) and !p)
|
206
|
+
[*some, y]
|
207
|
+
else
|
208
|
+
[self, y]
|
209
|
+
end
|
210
|
+
Branch[arr].map p
|
211
|
+
end
|
212
|
+
|
213
|
+
# @ desc
|
214
|
+
# Repeat n or in a range.
|
215
|
+
# If range.end < 0, repeat at least range.begin
|
216
|
+
# (Infinity and -Infinity are considered)
|
217
|
+
def * n, &p
|
218
|
+
# FIXME if self is an epsilon parser, will cause infinite loop
|
219
|
+
parser =
|
220
|
+
if n.is_a?(Range)
|
221
|
+
raise "invalid n: #{n}" if n.begin < 0
|
222
|
+
Rsec.assert_type n.begin, Integer
|
223
|
+
end_inf = (n.end.infinite? rescue false)
|
224
|
+
(Rsec.assert_type n.end, Integer) unless end_inf
|
225
|
+
if n.end > 0
|
226
|
+
RepeatRange[self, n]
|
227
|
+
else
|
228
|
+
RepeatAtLeastN[self, n.begin]
|
229
|
+
end
|
230
|
+
else
|
231
|
+
Rsec.assert_type n, Integer
|
232
|
+
raise "invalid n: #{n}" if n < 0
|
233
|
+
RepeatN[self, n]
|
234
|
+
end
|
235
|
+
parser.map p
|
236
|
+
end
|
237
|
+
|
238
|
+
# @ desc
|
239
|
+
# Appears 0 or 1 times, result is wrapped in an array
|
240
|
+
# @ example
|
241
|
+
# parser = 'a'.r.maybe
|
242
|
+
# assert_equal ['a'], parser.parse('a')
|
243
|
+
# assert_equal [], parser.parse('')
|
244
|
+
def maybe &p
|
245
|
+
Maybe[self].map &p
|
246
|
+
end
|
247
|
+
alias _? maybe
|
248
|
+
|
249
|
+
# @ desc
|
250
|
+
# Kleen star, 0 or more any times
|
251
|
+
def star &p
|
252
|
+
self.* (0..-1), &p
|
253
|
+
end
|
254
|
+
|
255
|
+
# @ desc
|
256
|
+
# Lookahead predicate, note that other can be a very complex parser
|
257
|
+
def & other, &p
|
258
|
+
other = Rsec.make_parser other
|
259
|
+
LookAhead[self, other].map p
|
260
|
+
end
|
261
|
+
|
262
|
+
# @ desc
|
263
|
+
# Negative lookahead predicate
|
264
|
+
def ^ other, &p
|
265
|
+
other = Rsec.make_parser other
|
266
|
+
NegativeLookAhead[self, other].map p
|
267
|
+
end
|
268
|
+
|
269
|
+
# @ desc
|
270
|
+
# When parsing failed, show "expect tokens" error
|
271
|
+
def fail *tokens, &p
|
272
|
+
return self if tokens.empty?
|
273
|
+
Fail[self, tokens].map p
|
274
|
+
end
|
275
|
+
alias expect fail
|
276
|
+
|
277
|
+
# @ desc
|
278
|
+
# Short for seq_(parser, other)[1]
|
279
|
+
def >> other, &p
|
280
|
+
other = Rsec.make_parser other
|
281
|
+
left = Rsec.try_skip_pattern self
|
282
|
+
SeqOne_[left, [other], SkipPattern[/\s*/], 1].map p
|
283
|
+
end
|
284
|
+
|
285
|
+
# @ desc
|
286
|
+
# Short for seq_(parser, other)[0]
|
287
|
+
def << other, &p
|
288
|
+
other = Rsec.make_parser other
|
289
|
+
right = Rsec.try_skip_pattern other
|
290
|
+
SeqOne_[self, [right], SkipPattern[/\s*/], 0].map p
|
291
|
+
end
|
292
|
+
|
293
|
+
# @ desc
|
294
|
+
# Should be end of input after parse
|
295
|
+
def eof &p
|
296
|
+
Eof[self].map p
|
297
|
+
end
|
298
|
+
|
299
|
+
# @ desc
|
300
|
+
# Packrat parser combinator, returns a parser that caches parse result, may optimize performance
|
301
|
+
def cached &p
|
302
|
+
Cached[self].map p
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
# ------------------------------------------------------------------------------
|
307
|
+
# additional helper methods for special classes
|
308
|
+
|
309
|
+
class Seq
|
310
|
+
# @ desc.seq, seq_
|
311
|
+
# Returns the parse result at idx, shorter and faster than map{|array| array[idx]}
|
312
|
+
# @ example
|
313
|
+
# assert_equal 'b', seq('a', 'b', 'c')[1].parse('abc')
|
314
|
+
def [] idx, &p
|
315
|
+
raise 'index out of range' if (idx >= some().size or idx < 0)
|
316
|
+
# optimize
|
317
|
+
parsers = some().map.with_index do |p, i|
|
318
|
+
i == idx ? p : Rsec.try_skip_pattern(p)
|
319
|
+
end
|
320
|
+
SeqOne[parsers, idx].map p
|
321
|
+
end
|
322
|
+
|
323
|
+
# @ desc.seq, seq_, join, join.even, join.odd
|
324
|
+
# If parse result contains only 1 element, return the element instead of the array
|
325
|
+
def unbox &p
|
326
|
+
Unbox[self].map p
|
327
|
+
end
|
328
|
+
|
329
|
+
# @ desc
|
330
|
+
# Think about "innerHTML"!
|
331
|
+
# @ example
|
332
|
+
# parser = seq('<b>', /[\w\s]+/, '</b>').inner
|
333
|
+
# parser.parse('<b>the inside</b>')
|
334
|
+
def inner &p
|
335
|
+
Inner[self].map p
|
336
|
+
end
|
337
|
+
end
|
338
|
+
|
339
|
+
class Seq_
|
340
|
+
def [] idx, &p
|
341
|
+
raise 'index out of range' if idx > rest.size or idx < 0
|
342
|
+
# optimize parsers, use skip if possible
|
343
|
+
new_first = (0 == idx ? first : Rsec.try_skip_pattern(first))
|
344
|
+
new_rest = rest().map.with_index do |p, i|
|
345
|
+
# NOTE rest start with 1
|
346
|
+
(i+1) == idx ? p : Rsec.try_skip_pattern(p)
|
347
|
+
end
|
348
|
+
SeqOne_[new_first, new_rest, skipper, idx].map p
|
349
|
+
end
|
350
|
+
|
351
|
+
def unbox &p
|
352
|
+
Unbox[self].map p
|
353
|
+
end
|
354
|
+
|
355
|
+
def inner &p
|
356
|
+
Inner[self].map p
|
357
|
+
end
|
358
|
+
end
|
359
|
+
|
360
|
+
class Join
|
361
|
+
def unbox &p
|
362
|
+
Unbox[self].map p
|
363
|
+
end
|
364
|
+
|
365
|
+
# @ desc.join
|
366
|
+
# Only keep the even(left, token) parts
|
367
|
+
def even &p
|
368
|
+
JoinEven[left, Rsec.try_skip_pattern(right)].map p
|
369
|
+
end
|
370
|
+
|
371
|
+
# @ desc.join
|
372
|
+
# Only keep the odd(right, inter) parts
|
373
|
+
def odd &p
|
374
|
+
JoinOdd[Rsec.try_skip_pattern(left), right].map p
|
375
|
+
end
|
376
|
+
end
|
377
|
+
|
378
|
+
class JoinEven
|
379
|
+
def unbox &p
|
380
|
+
Unbox[self].map p
|
381
|
+
end
|
382
|
+
end
|
383
|
+
|
384
|
+
class JoinOdd
|
385
|
+
def unbox &p
|
386
|
+
Unbox[self].map p
|
387
|
+
end
|
388
|
+
end
|
389
|
+
|
390
|
+
class Pattern
|
391
|
+
# @ desc.r
|
392
|
+
# Scan until the pattern happens
|
393
|
+
def until &p
|
394
|
+
UntilPattern[some()].map p
|
395
|
+
end
|
396
|
+
end
|
397
|
+
|
398
|
+
# ------------------------------------------------------------------------------
|
399
|
+
# helper methods for parser generation
|
400
|
+
|
401
|
+
# ensure x is a parser
|
402
|
+
def Rsec.make_parser x
|
403
|
+
return x if x.is_a?(Parser)
|
404
|
+
x = x.send(TO_PARSER_METHOD) if x.respond_to?(TO_PARSER_METHOD)
|
405
|
+
Rsec.assert_type x, Parser
|
406
|
+
x
|
407
|
+
end
|
408
|
+
|
409
|
+
# type assertion
|
410
|
+
def Rsec.assert_type obj, type
|
411
|
+
(raise TypeError, "#{obj} should be a #{type}") unless (obj.is_a? type)
|
412
|
+
end
|
413
|
+
|
414
|
+
# try to convert Pattern -> SkipPattern
|
415
|
+
def Rsec.try_skip_pattern p
|
416
|
+
# for C-ext
|
417
|
+
if Rsec.const_defined?(:FixString) and p.is_a?(FixString)
|
418
|
+
return SkipPattern[/#{Regexp.escape p.some}/]
|
419
|
+
end
|
420
|
+
|
421
|
+
case p
|
422
|
+
when Pattern
|
423
|
+
SkipPattern[p.some]
|
424
|
+
when UntilPattern
|
425
|
+
SkipUntilPattern[p.some]
|
426
|
+
else
|
427
|
+
p
|
428
|
+
end
|
429
|
+
end
|
430
|
+
end
|
431
|
+
|
432
|
+
class String #:nodoc:
|
433
|
+
# String#r: convert self to parser
|
434
|
+
# convienient string-to-parser transformer
|
435
|
+
define_method ::Rsec::TO_PARSER_METHOD, ->(*expects, &p){
|
436
|
+
::Rsec::Pattern[/#{Regexp.escape self}/].fail(*expects).map p
|
437
|
+
}
|
438
|
+
end
|
439
|
+
|
440
|
+
class Regexp #:nodoc:
|
441
|
+
# Regexp#r: convert self to parser
|
442
|
+
# convienient regexp-to-parser transformer
|
443
|
+
define_method ::Rsec::TO_PARSER_METHOD, ->(*expects, &p){
|
444
|
+
::Rsec::Pattern[self].fail(*expects).map p
|
445
|
+
}
|
446
|
+
end
|
447
|
+
|