rsec 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,175 @@
1
+ $:.unshift '../lib'
2
+ $:.unshift '../ext'
3
+ require "rsec"
4
+ require "pp"
5
+
6
+ module FixPP
7
+ def pretty_print(q)
8
+ q.group(1, sprintf("<%s", self.class.name[/\w+$/]), '>') {
9
+ q.seplist(self.members, ->{}) {|member|
10
+ q.breakable
11
+ q.text member.to_s
12
+ q.text '='
13
+ q.group(1) {
14
+ q.breakable ''
15
+ q.pp self[member]
16
+ }
17
+ }
18
+ }
19
+ end
20
+ end
21
+
22
+ class CMinus
23
+ include Rsec::Helpers
24
+ extend Rsec::Helpers
25
+
26
+ # node decls
27
+
28
+ class Function < Struct.new :type, :id, :params, :body
29
+ include FixPP
30
+ end
31
+
32
+ class Expr < Struct.new :expr
33
+ include FixPP
34
+ end
35
+
36
+ class Block < Struct.new :var_decls, :statements
37
+ include FixPP
38
+ end
39
+
40
+ class Call < Struct.new :function, :args
41
+ include FixPP
42
+ end
43
+
44
+ class GetIndex < Struct.new :id, :idx
45
+ include FixPP
46
+ end
47
+
48
+ # "terminal" rules
49
+
50
+ NUM = prim :unsigned_int64
51
+ INT = prim :int64
52
+ NBSP = /[\ \t]*/.r
53
+ SPACE = /\s*/.r
54
+ ID = /[a-zA-Z]\w*/.r 'id'
55
+ TYPE = (word('int') | word('void')).fail 'type'
56
+ EOSTMT = ';'.r 'end of statement'
57
+ ELSE = word('else').fail 'keyword_else'
58
+ IF = word('if').fail 'keyword_if'
59
+ WHILE = word('while').fail 'keyword_while'
60
+ RETURN = word('return').fail 'keyword_return'
61
+ MUL_OP = symbol(/[\*\/%]/)
62
+ ADD_OP = symbol(/[\+\-]/)
63
+ COMP_OP = symbol(/(\<=|\<|\>|\>=|==|!=)/).fail 'compare operator'
64
+ COMMA = /\s*,\s*/.r 'comma'
65
+ EMPTY_BRA = /\[\s*\]/.r 'empty square bracket'
66
+
67
+ # call(function apply) expression
68
+ def call expr
69
+ args = expr.join(COMMA).even
70
+ seq_(ID, '(', args._?, ')') {
71
+ |(id, _, args, _)|
72
+ Call[id, *args]
73
+ }
74
+ end
75
+
76
+ # (binary) expression
77
+ def expression
78
+ binary_arithmetic = lazy{factor}
79
+ .join(MUL_OP).unbox
80
+ .join(ADD_OP).unbox
81
+ .join(COMP_OP).unbox
82
+ expr = lazy{assign} | binary_arithmetic
83
+ # abc
84
+ # abc[12]
85
+ var = seq_(ID, seq_('[', expr, ']')[1]._?) {
86
+ |(id, (index))|
87
+ index ? GetIndex[id, index] : id
88
+ }
89
+ assign = seq_(var, '=', expr)
90
+ factor = seq_('(', expr, ')')[1] | call(expr) | var | INT
91
+ # p expr.parse! "gcd (v ,u- u/v *v)"
92
+ expr.map{|e| Expr[e] }
93
+ end
94
+
95
+ # statement parser builder, returns [stmt, block]
96
+ def statement var_decl
97
+ expr = expression()
98
+ brace = seq_('(', expr, ')')[1]
99
+ # statement
100
+ _stmt = lazy{stmt} # to reduce the use of lazy{}
101
+
102
+ expr_stmt = seq_(expr, EOSTMT)[0] | EOSTMT
103
+ else_stmt = seq_(ELSE, _stmt)[1]
104
+ if_stmt = seq_(IF, brace, _stmt, else_stmt._?)
105
+ while_stmt = seq_(WHILE, brace, _stmt)
106
+ return_stmt = seq_(RETURN, expr._?, EOSTMT){
107
+ |(ret, maybe_expr)|
108
+ [ret, *maybe_expr]
109
+ }
110
+ # { var_decls statements }
111
+ block = seq('{', SPACE.join(var_decl).odd, SPACE.join(_stmt).odd, '}'){
112
+ |(_, vars, stats, _)|
113
+ Block[vars, stats]
114
+ }
115
+ stmt = block | if_stmt | while_stmt | return_stmt | expr_stmt
116
+ # p if_stmt.parse! 'if(v == 0)return u;'
117
+ [stmt, block]
118
+ end
119
+
120
+ def initialize
121
+ type_id = seq_(TYPE, ID).cached
122
+ # p type_id.parse! 'int a'
123
+
124
+ var_decl = seq_(type_id, seq_('[', NUM, ']')[1]._?, EOSTMT){
125
+ |(id, maybe_num)|
126
+ [id, *maybe_num]
127
+ }
128
+ # p var_decl.parse! 'int a[12];'
129
+ # p var_decl.parse! 'int a;'
130
+
131
+ stmt, block = statement(var_decl)
132
+ # p block.parse! "{int a;}"
133
+ # p stmt.parse! 'if(3==2) {return 4;}'
134
+
135
+ param = seq_(type_id, EMPTY_BRA._?) {
136
+ |((ty, id), maybe_bra)|
137
+ [ty, id, *maybe_bra]
138
+ }
139
+ params = param.join(COMMA).even | 'void'.r{[]}
140
+ brace = seq_('(', params, ')')[1]
141
+ fun_decl = seq_(type_id, brace, block){
142
+ |(type, id), params, block|
143
+ Function[type, id, params, block]
144
+ }
145
+ # p fun_decl.parse! 'int gcd(int u, int v){return 2;}'
146
+ @program = SPACE.join(fun_decl | var_decl | EOSTMT).odd.eof
147
+ end
148
+
149
+ attr_reader :program
150
+ end
151
+
152
+ if __FILE__ == $PROGRAM_NAME
153
+ c_minus = CMinus.new
154
+ nodes = c_minus.program.parse! %Q[
155
+ int gcd(int u, int v)
156
+ {
157
+ if (v == 0) return u;
158
+ else return gcd(v,u-u / v*v);
159
+ }
160
+
161
+ void main(void)
162
+ {
163
+ int x; int y;
164
+ while (1) {
165
+ x = input();
166
+ y = input();
167
+ output(gcd(x ,y)) ;
168
+ }
169
+ }
170
+ ]
171
+ nodes.each do |node|
172
+ pp node
173
+ end
174
+ end
175
+
@@ -0,0 +1,18 @@
1
+ (display 4)
2
+
3
+ (define (fact x)
4
+ (if (= x 0)
5
+ 1
6
+ (* x (fact (- x 1)))))
7
+
8
+ (display (fact 6))
9
+
10
+ (define (add x)
11
+ (lambda (y)
12
+ (+ x y)))
13
+
14
+ (define add4 (add 4))
15
+ (define add5 (add 5))
16
+
17
+ (display (add4 3))
18
+ (display (add5 3))
data/examples/s_exp.rb ADDED
@@ -0,0 +1,17 @@
1
+ # s-expression parser
2
+
3
+ require "rsec"
4
+
5
+ include Rsec::Helpers
6
+
7
+ def s_exp
8
+ id = /[a-zA-Z][\w\-]*/.r.fail 'id'
9
+ num = prim(:double).fail 'num'
10
+
11
+ naked_unit = id | num | seq_('(', lazy{exp}, ')')[1]
12
+ unit = naked_unit | seq_('(', lazy{unit}, ')')[1]
13
+ units = unit.join(/\s+/).even._?
14
+ exp = seq_(id, units) {|(id, (units))| [id, *units]}
15
+ seq_('(', exp, ')')[1].eof
16
+ end
17
+
@@ -0,0 +1,84 @@
1
+ # A simple-as-shit scheme interpreter. Usage: ruby scheme.rb hello.scm
2
+ require "rsec"
3
+
4
+ class Scheme
5
+ include Rsec::Helpers
6
+
7
+ Value = Struct.new :val
8
+
9
+ class Bind < Hash
10
+ def initialize parent = {}
11
+ @parent = parent
12
+ end
13
+
14
+ def define id, &p # define lambda
15
+ self[id] = -> bind, xs {
16
+ p[* xs.map{|x| bind.eval x }]
17
+ }
18
+ end
19
+
20
+ def eval node
21
+ case node
22
+ when Value; node.val
23
+ when String; self[node]
24
+ when Array
25
+ head, *tail = node
26
+ case head
27
+ when String
28
+ pr = self[head]
29
+ pr.is_a?(Proc) ? pr[self, tail] : pr # invoke lambda
30
+ when Array
31
+ node.map{|n| self.eval n }.last # sequence execution
32
+ end
33
+ end
34
+ end
35
+
36
+ def [] key
37
+ super(key) || @parent[key]
38
+ end
39
+ end
40
+
41
+ def initialize
42
+ boolean = /\#[tf]/. r {|n| Value[n=='#t'] }
43
+ integer = /0|[1-9]\d*/.r {|n| Value[n.to_i] }
44
+ id = /[^\s\(\)\[\]]+/.r
45
+ atom = boolean | integer | id
46
+ cell = atom | lazy{list}
47
+ cells = /\s*/.r.join(cell).odd
48
+ list = '('.r >> cells << ')'
49
+ @parser = cells.eof
50
+
51
+ @vm = Bind.new
52
+ @vm['define'] = -> bind, (param, body) {
53
+ if param.is_a?(String)
54
+ @vm[param] = bind.eval body
55
+ else
56
+ func, *xs = param
57
+ @vm[func] = @vm['lambda'][bind, [xs, body]]
58
+ end
59
+ }
60
+ # declare: (lambda (xs[0] xs[1]) body)
61
+ @vm['lambda'] = -> bind_def, (xs, body) {
62
+ xs = [xs] if xs.is_a?(String)
63
+ # calling: (some vs[0] vs[1])
64
+ -> bind_call, vs {
65
+ vs = vs.map{|v| bind_call.eval v }
66
+ new_bind = Bind.new bind_def
67
+ xs.zip(vs){|x, v| new_bind[x] = v }
68
+ new_bind.eval body
69
+ }
70
+ }
71
+ @vm['if'] = -> bind, (p, left, right) {
72
+ bind.eval(bind.eval(p) ? left : right)
73
+ }
74
+ %w|+ - * / ** % > <|.each{|s| @vm.define s, &s.to_sym }
75
+ @vm.define '=', &:==
76
+ @vm.define('display'){|x| puts x}
77
+ end
78
+
79
+ def run source
80
+ @vm.eval @parser.parse! source
81
+ end
82
+ end
83
+
84
+ ARGV[0] ? Scheme.new.run(File.read ARGV[0]) : puts('need a scheme file name')
@@ -0,0 +1,68 @@
1
+ # coding: utf-8
2
+
3
+ # grammar from
4
+ # http://www.json.org/
5
+
6
+ require "rsec"
7
+
8
+ class SlowJSON
9
+
10
+ include Rsec::Helper
11
+
12
+ def initialize
13
+ generate_parser
14
+ @parser = seq(/\s*/, @value, /\s*/)[1].eof
15
+ end
16
+
17
+ def parse s
18
+ @parser.parse! s
19
+ end
20
+
21
+ private
22
+
23
+ # term (, term)*
24
+ def elem_parser term
25
+ term.join(/\s*,\s*/.r).even
26
+ end
27
+
28
+ def chars_parser
29
+ unicode_bytes = /[0-9a-f]{4}/i.r{|bytes|
30
+ [bytes].pack('H*').force_encoding('utf-16be').encode!('utf-8')
31
+ }
32
+ escape_char = '"'.r | "\\" | '/' |
33
+ 'b'.r{"\b"} |
34
+ 'f'.r{"\f"} |
35
+ 'n'.r{"\n"} |
36
+ 'r'.r{"\r"} |
37
+ 't'.r{"\t"} |
38
+ seq('u'.r, unicode_bytes)[1]
39
+ /[^"\\]+/.r | seq('\\', escape_char)[1]
40
+ end
41
+
42
+ def generate_parser
43
+ string = '"'.r >> chars_parser.star.map(&:join) << '"'
44
+ # -? int frac? exp?
45
+ number = prim(:double, allowed_sign: '-')
46
+ @value = string | number | lazy{@object} | lazy{@array} |
47
+ 'true'.r{true} |
48
+ 'false'.r{false} |
49
+ 'null'.r{nil}
50
+ pair = seq(string, /\s*:\s*/.r, @value){|k, _, v| [k, v]}
51
+ @array = /\[\s*\]/.r{[]} | '['.r >> elem_parser(@value) << ']'
52
+ @object = /\{\s*\}/.r{{}} | ('{'.r >> elem_parser(pair) << '}').map{|arr|Hash[arr]}
53
+ end
54
+
55
+ end
56
+
57
+ if __FILE__ == $PROGRAM_NAME
58
+ j = SlowJSON.new
59
+ p j.parse '""'
60
+ p j.parse '123.4e5'
61
+ p j.parse 'null'
62
+ p j.parse '[]'
63
+ p j.parse '{}'
64
+ p j.parse '{"no": [3, 4]}'
65
+ p j.parse '[{}]'
66
+ p j.parse '[{"S":321061,"T":"GetAttributeResp"},{"ERROR":null,"TS":0,"VAL":{"SqlList":[{"BatchSizeMax":0,"BatchSizeTotal":0,"ConcurrentMax":1,"DataSource":"jdbc:wrap-jdbc:filters=default,encoding:name=ds-offer:jdbc:mysql://100.10.10.10:8066/xxxx","EffectedRowCount":0,"ErrorCount":0,"ExecuteCount":5,"FetchRowCount":5,"File":null,"ID":2001,"LastError":null,"LastTime":1292742908178,"MaxTimespan":16,"MaxTimespanOccurTime":1292742668191,"Name":null,"RunningCount":0,"SQL":"SELECT @@SQL_MODE","TotalTime":83}]}}]'
67
+ end
68
+
data/lib/rsec.rb ADDED
@@ -0,0 +1,40 @@
1
+ # coding: utf-8
2
+ # load the gem
3
+
4
+ # All code is under this module
5
+ module Rsec
6
+ # preload configs
7
+
8
+ # config method name
9
+ # default is :r
10
+ unless Rsec.const_defined?(:TO_PARSER_METHOD)
11
+ TO_PARSER_METHOD = :r
12
+ end
13
+
14
+ # config C extension usage
15
+ # options:
16
+ # :try - default
17
+ # :no - don't use
18
+ # :yes - use
19
+ unless Rsec.const_defined?(:USE_CEXT)
20
+ USE_CEXT = :try
21
+ end
22
+
23
+ VERSION = '0.3'
24
+ end
25
+
26
+ require "strscan"
27
+ require "rsec/utils"
28
+ require "rsec/parser"
29
+ require "rsec/helpers"
30
+
31
+ case Rsec::USE_CEXT
32
+ when :try
33
+ require "rsec/ext" rescue nil
34
+ when :yes
35
+ require "rsec/ext"
36
+ when :no
37
+ else
38
+ warn "Rsec::USE_CEXT should be one of :try, :yes, :no"
39
+ end
40
+
@@ -0,0 +1,447 @@
1
+ # coding: utf-8
2
+ # ------------------------------------------------------------------------------
3
+ # Helpers(combinators) to construct parser
4
+
5
+ module Rsec #:nodoc:
6
+
7
+ # ------------------------------------------------------------------------------
8
+ # these are not callable from a parser
9
+ module Helpers
10
+
11
+ # @ desc.helper
12
+ # Lazy parser
13
+ # @ example
14
+ # parser = lazy{future}
15
+ # future = 'jim'.r
16
+ # assert_equal 'jim', parser.parse '12323'
17
+ def lazy &p
18
+ raise ArgumentError, 'lazy() requires a block' unless p
19
+ Lazy[p]
20
+ end
21
+
22
+ # @ desc.helper
23
+ # Parses one of chars in str
24
+ # @ example
25
+ # multiplicative = one_of '*/%'
26
+ # assert_equal '/', multiplicative.parse '/'
27
+ # assert_equal Rsec::INVALID, actualmultiplicative.parse '+'
28
+ def one_of str, &p
29
+ Rsec.assert_type str, String
30
+ raise ArgumentError, 'str len should > 0' if str.empty?
31
+ one_of_klass =
32
+ if (str.bytesize == str.size) and Rsec.const_defined?(:OneOfByte)
33
+ # for C-ext
34
+ OneOfByte
35
+ else
36
+ OneOf
37
+ end
38
+ one_of_klass[str.dup.freeze].map p
39
+ end
40
+
41
+ # @ desc.helper
42
+ # See also #one_of#, with leading and trailing optional breakable spaces
43
+ # @ example
44
+ # additive = one_of_('+-')
45
+ # assert_equal '+', additive.parse(' +')
46
+ def one_of_ str, &p
47
+ Rsec.assert_type str, String
48
+ raise ArgumentError, 'str len should > 0' if str.empty?
49
+ raise ArgumentError, 'str should be ascii' unless str.bytesize == str.size
50
+ raise ArgumentError, 'str should not contain space' if str =~ /\s/
51
+ spaced_one_of_klass =
52
+ if (str.bytesize == str.size) and Rsec.const_defined?(:OneOfByte_)
53
+ # for C-ext
54
+ OneOfByte_
55
+ else
56
+ OneOf_
57
+ end
58
+ spaced_one_of_klass[str.dup.freeze].map p
59
+ end
60
+
61
+ # @ desc.helper
62
+ # Primitive parser, returns nil if overflow or underflow.
63
+ # There can be an optional '+' or '-' at the beginning of string except unsinged_int32 | unsinged_int64.
64
+ # type =
65
+ # :double |
66
+ # :hex_double |
67
+ # :int32 |
68
+ # :int64 |
69
+ # :unsigned_int32 |
70
+ # :unsigned_int64
71
+ # options:
72
+ # :allowed_sign => '+' | '-' | '' | '+-' (default '+-')
73
+ # :allowed_signs => (same as :allowed_sign)
74
+ # :base => integer only (default 10)
75
+ # @ example
76
+ # p = prim :double
77
+ # assert_equal 1.23, p.parse('1.23')
78
+ # p = prim :double, allowed_sign: '-'
79
+ # assert_equal 1.23, p.parse('1.23')
80
+ # assert_equal -1.23, p.parse('-1.23')
81
+ # assert_equal Rsec::INVALID, p.parse('+1.23')
82
+ # p = prim :int32, base: 36
83
+ # assert_equal 49713, p.parse('12cx')
84
+ def prim type, options={}, &p
85
+ base = options[:base]
86
+ if [:double, :hex_double].index base
87
+ raise 'Floating points does not allow :base'
88
+ end
89
+ base ||= 10
90
+ Rsec.assert_type base, Fixnum
91
+ unless (2..36).include? base
92
+ raise RangeError, ":base should be in 2..36, but got #{base}"
93
+ end
94
+
95
+ sign_strategy = \
96
+ case (options[:allowed_sign] or options[:allowed_signs])
97
+ when nil, '+-', '-+'; 3
98
+ when '+'; 2
99
+ when '-'; 1
100
+ when ''; 0
101
+ else raise "allowed_sign should be one of nil, '', '+', '-', '+-', '-+'"
102
+ end
103
+
104
+ parser = \
105
+ case type
106
+ when :double; PDouble.new sign_strategy, false # decimal
107
+ when :hex_double; PDouble.new sign_strategy, true # hex
108
+ when :int32; PInt32.new sign_strategy, base
109
+ when :int64; PInt64.new sign_strategy, base
110
+ when :unsigned_int32;
111
+ raise 'unsigned int not allow - sign' if options[:allowed_signs] =~ /-/
112
+ PUnsignedInt32.new sign_strategy, base
113
+ when :unsigned_int64;
114
+ raise 'unsigned int not allow - sign' if options[:allowed_signs] =~ /-/
115
+ PUnsignedInt64.new sign_strategy, base
116
+ else
117
+ raise "Invalid primitive type #{type}"
118
+ end
119
+ parser.map p
120
+ end
121
+
122
+ # @ desc.helper
123
+ # Sequence parser
124
+ # @ example
125
+ # assert_equal ['a', 'b', 'c'], actualseq('a', 'b', 'c').parse('abc')
126
+ def seq *xs, &p
127
+ xs.map! {|x| Rsec.make_parser x }
128
+ Seq[xs].map p
129
+ end
130
+
131
+ # @ desc.helper
132
+ # Sequence parser with skippable pattern(or parser)
133
+ # option
134
+ # :skip default= /\s*/
135
+ # @ example
136
+ # assert_equal ['a', 'b', 'c'], actualseq_('a', 'b', 'c', skip: ',').parse('a,b,c')
137
+ def seq_ *xs, &p
138
+ skipper =
139
+ if (xs.last.is_a? Hash)
140
+ xs.pop[:skip]
141
+ end
142
+ skipper = skipper ? Rsec.make_parser(skipper) : /\s*/.r
143
+ xs.map! {|x| Rsec.make_parser x }
144
+ first, *rest = xs
145
+ raise 'sequence should not be empty' unless first
146
+ Seq_[first, rest, skipper].map p
147
+ end
148
+
149
+ # @ desc.helper
150
+ # A symbol is something wrapped with optional space
151
+ def symbol pattern, skip=/\s*/, &p
152
+ pattern = Rsec.make_parser pattern
153
+ skip = Rsec.try_skip_pattern Rsec.make_parser skip
154
+ SeqOne[[skip, pattern, skip], 1].map p
155
+ end
156
+
157
+ # @ desc.helper
158
+ # A word is wrapped with word boundaries
159
+ # @ example
160
+ # assert_equal ['yes', '3'], seq('yes', '3').parse('yes3')
161
+ # assert_equal INVALID, seq(word('yes'), '3').parse('yes3')
162
+ def word pattern, &p
163
+ parser = Rsec.make_parser pattern
164
+ # TODO check pattern type
165
+ Pattern[/\b#{parser.some}\b/].map p
166
+ end
167
+ end # helpers
168
+
169
+ # robust
170
+ Helper = Helpers
171
+
172
+ # ------------------------------------------------------------------------------
173
+ # combinators attached to parsers
174
+
175
+ module Parser #:nodoc:
176
+
177
+ # @ desc
178
+ # Transform result
179
+ # @ example
180
+ # parser = /\w+/.r.map{|word| word * 2}
181
+ # assert_equal 'hellohello', parser.parse!('hello')
182
+ def map lambda_p=nil, &p
183
+ return self if (lambda_p.nil? and p.nil?)
184
+ p = lambda_p || p
185
+ raise TypeError, 'should give a proc or lambda' unless (p.is_a? Proc)
186
+ Map[self, p]
187
+ end
188
+
189
+ # @ desc
190
+ # "p.join('+')" parses strings like "p+p+p+p+p".
191
+ # Note that at least 1 of p appears in the string.
192
+ # Sometimes it is useful to reverse the joining:
193
+ # /\s*/.r.join('p').odd parses string like " p p p "
194
+ def join inter, &p
195
+ inter = Rsec.make_parser inter
196
+ Join[self, inter].map p
197
+ end
198
+
199
+ # @ desc
200
+ # Branch parser, note that rsec is a PEG parser generator,
201
+ # beware of the difference between PEG and CFG.
202
+ def | y, &p
203
+ y = Rsec.make_parser y
204
+ arr =
205
+ if (is_a?(Branch) and !p)
206
+ [*some, y]
207
+ else
208
+ [self, y]
209
+ end
210
+ Branch[arr].map p
211
+ end
212
+
213
+ # @ desc
214
+ # Repeat n or in a range.
215
+ # If range.end < 0, repeat at least range.begin
216
+ # (Infinity and -Infinity are considered)
217
+ def * n, &p
218
+ # FIXME if self is an epsilon parser, will cause infinite loop
219
+ parser =
220
+ if n.is_a?(Range)
221
+ raise "invalid n: #{n}" if n.begin < 0
222
+ Rsec.assert_type n.begin, Integer
223
+ end_inf = (n.end.infinite? rescue false)
224
+ (Rsec.assert_type n.end, Integer) unless end_inf
225
+ if n.end > 0
226
+ RepeatRange[self, n]
227
+ else
228
+ RepeatAtLeastN[self, n.begin]
229
+ end
230
+ else
231
+ Rsec.assert_type n, Integer
232
+ raise "invalid n: #{n}" if n < 0
233
+ RepeatN[self, n]
234
+ end
235
+ parser.map p
236
+ end
237
+
238
+ # @ desc
239
+ # Appears 0 or 1 times, result is wrapped in an array
240
+ # @ example
241
+ # parser = 'a'.r.maybe
242
+ # assert_equal ['a'], parser.parse('a')
243
+ # assert_equal [], parser.parse('')
244
+ def maybe &p
245
+ Maybe[self].map &p
246
+ end
247
+ alias _? maybe
248
+
249
+ # @ desc
250
+ # Kleen star, 0 or more any times
251
+ def star &p
252
+ self.* (0..-1), &p
253
+ end
254
+
255
+ # @ desc
256
+ # Lookahead predicate, note that other can be a very complex parser
257
+ def & other, &p
258
+ other = Rsec.make_parser other
259
+ LookAhead[self, other].map p
260
+ end
261
+
262
+ # @ desc
263
+ # Negative lookahead predicate
264
+ def ^ other, &p
265
+ other = Rsec.make_parser other
266
+ NegativeLookAhead[self, other].map p
267
+ end
268
+
269
+ # @ desc
270
+ # When parsing failed, show "expect tokens" error
271
+ def fail *tokens, &p
272
+ return self if tokens.empty?
273
+ Fail[self, tokens].map p
274
+ end
275
+ alias expect fail
276
+
277
+ # @ desc
278
+ # Short for seq_(parser, other)[1]
279
+ def >> other, &p
280
+ other = Rsec.make_parser other
281
+ left = Rsec.try_skip_pattern self
282
+ SeqOne_[left, [other], SkipPattern[/\s*/], 1].map p
283
+ end
284
+
285
+ # @ desc
286
+ # Short for seq_(parser, other)[0]
287
+ def << other, &p
288
+ other = Rsec.make_parser other
289
+ right = Rsec.try_skip_pattern other
290
+ SeqOne_[self, [right], SkipPattern[/\s*/], 0].map p
291
+ end
292
+
293
+ # @ desc
294
+ # Should be end of input after parse
295
+ def eof &p
296
+ Eof[self].map p
297
+ end
298
+
299
+ # @ desc
300
+ # Packrat parser combinator, returns a parser that caches parse result, may optimize performance
301
+ def cached &p
302
+ Cached[self].map p
303
+ end
304
+ end
305
+
306
+ # ------------------------------------------------------------------------------
307
+ # additional helper methods for special classes
308
+
309
+ class Seq
310
+ # @ desc.seq, seq_
311
+ # Returns the parse result at idx, shorter and faster than map{|array| array[idx]}
312
+ # @ example
313
+ # assert_equal 'b', seq('a', 'b', 'c')[1].parse('abc')
314
+ def [] idx, &p
315
+ raise 'index out of range' if (idx >= some().size or idx < 0)
316
+ # optimize
317
+ parsers = some().map.with_index do |p, i|
318
+ i == idx ? p : Rsec.try_skip_pattern(p)
319
+ end
320
+ SeqOne[parsers, idx].map p
321
+ end
322
+
323
+ # @ desc.seq, seq_, join, join.even, join.odd
324
+ # If parse result contains only 1 element, return the element instead of the array
325
+ def unbox &p
326
+ Unbox[self].map p
327
+ end
328
+
329
+ # @ desc
330
+ # Think about "innerHTML"!
331
+ # @ example
332
+ # parser = seq('<b>', /[\w\s]+/, '</b>').inner
333
+ # parser.parse('<b>the inside</b>')
334
+ def inner &p
335
+ Inner[self].map p
336
+ end
337
+ end
338
+
339
+ class Seq_
340
+ def [] idx, &p
341
+ raise 'index out of range' if idx > rest.size or idx < 0
342
+ # optimize parsers, use skip if possible
343
+ new_first = (0 == idx ? first : Rsec.try_skip_pattern(first))
344
+ new_rest = rest().map.with_index do |p, i|
345
+ # NOTE rest start with 1
346
+ (i+1) == idx ? p : Rsec.try_skip_pattern(p)
347
+ end
348
+ SeqOne_[new_first, new_rest, skipper, idx].map p
349
+ end
350
+
351
+ def unbox &p
352
+ Unbox[self].map p
353
+ end
354
+
355
+ def inner &p
356
+ Inner[self].map p
357
+ end
358
+ end
359
+
360
+ class Join
361
+ def unbox &p
362
+ Unbox[self].map p
363
+ end
364
+
365
+ # @ desc.join
366
+ # Only keep the even(left, token) parts
367
+ def even &p
368
+ JoinEven[left, Rsec.try_skip_pattern(right)].map p
369
+ end
370
+
371
+ # @ desc.join
372
+ # Only keep the odd(right, inter) parts
373
+ def odd &p
374
+ JoinOdd[Rsec.try_skip_pattern(left), right].map p
375
+ end
376
+ end
377
+
378
+ class JoinEven
379
+ def unbox &p
380
+ Unbox[self].map p
381
+ end
382
+ end
383
+
384
+ class JoinOdd
385
+ def unbox &p
386
+ Unbox[self].map p
387
+ end
388
+ end
389
+
390
+ class Pattern
391
+ # @ desc.r
392
+ # Scan until the pattern happens
393
+ def until &p
394
+ UntilPattern[some()].map p
395
+ end
396
+ end
397
+
398
+ # ------------------------------------------------------------------------------
399
+ # helper methods for parser generation
400
+
401
+ # ensure x is a parser
402
+ def Rsec.make_parser x
403
+ return x if x.is_a?(Parser)
404
+ x = x.send(TO_PARSER_METHOD) if x.respond_to?(TO_PARSER_METHOD)
405
+ Rsec.assert_type x, Parser
406
+ x
407
+ end
408
+
409
+ # type assertion
410
+ def Rsec.assert_type obj, type
411
+ (raise TypeError, "#{obj} should be a #{type}") unless (obj.is_a? type)
412
+ end
413
+
414
+ # try to convert Pattern -> SkipPattern
415
+ def Rsec.try_skip_pattern p
416
+ # for C-ext
417
+ if Rsec.const_defined?(:FixString) and p.is_a?(FixString)
418
+ return SkipPattern[/#{Regexp.escape p.some}/]
419
+ end
420
+
421
+ case p
422
+ when Pattern
423
+ SkipPattern[p.some]
424
+ when UntilPattern
425
+ SkipUntilPattern[p.some]
426
+ else
427
+ p
428
+ end
429
+ end
430
+ end
431
+
432
+ class String #:nodoc:
433
+ # String#r: convert self to parser
434
+ # convienient string-to-parser transformer
435
+ define_method ::Rsec::TO_PARSER_METHOD, ->(*expects, &p){
436
+ ::Rsec::Pattern[/#{Regexp.escape self}/].fail(*expects).map p
437
+ }
438
+ end
439
+
440
+ class Regexp #:nodoc:
441
+ # Regexp#r: convert self to parser
442
+ # convienient regexp-to-parser transformer
443
+ define_method ::Rsec::TO_PARSER_METHOD, ->(*expects, &p){
444
+ ::Rsec::Pattern[self].fail(*expects).map p
445
+ }
446
+ end
447
+