rsec 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,175 @@
1
+ $:.unshift '../lib'
2
+ $:.unshift '../ext'
3
+ require "rsec"
4
+ require "pp"
5
+
6
+ module FixPP
7
+ def pretty_print(q)
8
+ q.group(1, sprintf("<%s", self.class.name[/\w+$/]), '>') {
9
+ q.seplist(self.members, ->{}) {|member|
10
+ q.breakable
11
+ q.text member.to_s
12
+ q.text '='
13
+ q.group(1) {
14
+ q.breakable ''
15
+ q.pp self[member]
16
+ }
17
+ }
18
+ }
19
+ end
20
+ end
21
+
22
+ class CMinus
23
+ include Rsec::Helpers
24
+ extend Rsec::Helpers
25
+
26
+ # node decls
27
+
28
+ class Function < Struct.new :type, :id, :params, :body
29
+ include FixPP
30
+ end
31
+
32
+ class Expr < Struct.new :expr
33
+ include FixPP
34
+ end
35
+
36
+ class Block < Struct.new :var_decls, :statements
37
+ include FixPP
38
+ end
39
+
40
+ class Call < Struct.new :function, :args
41
+ include FixPP
42
+ end
43
+
44
+ class GetIndex < Struct.new :id, :idx
45
+ include FixPP
46
+ end
47
+
48
+ # "terminal" rules
49
+
50
+ NUM = prim :unsigned_int64
51
+ INT = prim :int64
52
+ NBSP = /[\ \t]*/.r
53
+ SPACE = /\s*/.r
54
+ ID = /[a-zA-Z]\w*/.r 'id'
55
+ TYPE = (word('int') | word('void')).fail 'type'
56
+ EOSTMT = ';'.r 'end of statement'
57
+ ELSE = word('else').fail 'keyword_else'
58
+ IF = word('if').fail 'keyword_if'
59
+ WHILE = word('while').fail 'keyword_while'
60
+ RETURN = word('return').fail 'keyword_return'
61
+ MUL_OP = symbol(/[\*\/%]/)
62
+ ADD_OP = symbol(/[\+\-]/)
63
+ COMP_OP = symbol(/(\<=|\<|\>|\>=|==|!=)/).fail 'compare operator'
64
+ COMMA = /\s*,\s*/.r 'comma'
65
+ EMPTY_BRA = /\[\s*\]/.r 'empty square bracket'
66
+
67
+ # call(function apply) expression
68
+ def call expr
69
+ args = expr.join(COMMA).even
70
+ seq_(ID, '(', args._?, ')') {
71
+ |(id, _, args, _)|
72
+ Call[id, *args]
73
+ }
74
+ end
75
+
76
+ # (binary) expression
77
+ def expression
78
+ binary_arithmetic = lazy{factor}
79
+ .join(MUL_OP).unbox
80
+ .join(ADD_OP).unbox
81
+ .join(COMP_OP).unbox
82
+ expr = lazy{assign} | binary_arithmetic
83
+ # abc
84
+ # abc[12]
85
+ var = seq_(ID, seq_('[', expr, ']')[1]._?) {
86
+ |(id, (index))|
87
+ index ? GetIndex[id, index] : id
88
+ }
89
+ assign = seq_(var, '=', expr)
90
+ factor = seq_('(', expr, ')')[1] | call(expr) | var | INT
91
+ # p expr.parse! "gcd (v ,u- u/v *v)"
92
+ expr.map{|e| Expr[e] }
93
+ end
94
+
95
+ # statement parser builder, returns [stmt, block]
96
+ def statement var_decl
97
+ expr = expression()
98
+ brace = seq_('(', expr, ')')[1]
99
+ # statement
100
+ _stmt = lazy{stmt} # to reduce the use of lazy{}
101
+
102
+ expr_stmt = seq_(expr, EOSTMT)[0] | EOSTMT
103
+ else_stmt = seq_(ELSE, _stmt)[1]
104
+ if_stmt = seq_(IF, brace, _stmt, else_stmt._?)
105
+ while_stmt = seq_(WHILE, brace, _stmt)
106
+ return_stmt = seq_(RETURN, expr._?, EOSTMT){
107
+ |(ret, maybe_expr)|
108
+ [ret, *maybe_expr]
109
+ }
110
+ # { var_decls statements }
111
+ block = seq('{', SPACE.join(var_decl).odd, SPACE.join(_stmt).odd, '}'){
112
+ |(_, vars, stats, _)|
113
+ Block[vars, stats]
114
+ }
115
+ stmt = block | if_stmt | while_stmt | return_stmt | expr_stmt
116
+ # p if_stmt.parse! 'if(v == 0)return u;'
117
+ [stmt, block]
118
+ end
119
+
120
+ def initialize
121
+ type_id = seq_(TYPE, ID).cached
122
+ # p type_id.parse! 'int a'
123
+
124
+ var_decl = seq_(type_id, seq_('[', NUM, ']')[1]._?, EOSTMT){
125
+ |(id, maybe_num)|
126
+ [id, *maybe_num]
127
+ }
128
+ # p var_decl.parse! 'int a[12];'
129
+ # p var_decl.parse! 'int a;'
130
+
131
+ stmt, block = statement(var_decl)
132
+ # p block.parse! "{int a;}"
133
+ # p stmt.parse! 'if(3==2) {return 4;}'
134
+
135
+ param = seq_(type_id, EMPTY_BRA._?) {
136
+ |((ty, id), maybe_bra)|
137
+ [ty, id, *maybe_bra]
138
+ }
139
+ params = param.join(COMMA).even | 'void'.r{[]}
140
+ brace = seq_('(', params, ')')[1]
141
+ fun_decl = seq_(type_id, brace, block){
142
+ |(type, id), params, block|
143
+ Function[type, id, params, block]
144
+ }
145
+ # p fun_decl.parse! 'int gcd(int u, int v){return 2;}'
146
+ @program = SPACE.join(fun_decl | var_decl | EOSTMT).odd.eof
147
+ end
148
+
149
+ attr_reader :program
150
+ end
151
+
152
+ if __FILE__ == $PROGRAM_NAME
153
+ c_minus = CMinus.new
154
+ nodes = c_minus.program.parse! %Q[
155
+ int gcd(int u, int v)
156
+ {
157
+ if (v == 0) return u;
158
+ else return gcd(v,u-u / v*v);
159
+ }
160
+
161
+ void main(void)
162
+ {
163
+ int x; int y;
164
+ while (1) {
165
+ x = input();
166
+ y = input();
167
+ output(gcd(x ,y)) ;
168
+ }
169
+ }
170
+ ]
171
+ nodes.each do |node|
172
+ pp node
173
+ end
174
+ end
175
+
@@ -0,0 +1,18 @@
1
+ (display 4)
2
+
3
+ (define (fact x)
4
+ (if (= x 0)
5
+ 1
6
+ (* x (fact (- x 1)))))
7
+
8
+ (display (fact 6))
9
+
10
+ (define (add x)
11
+ (lambda (y)
12
+ (+ x y)))
13
+
14
+ (define add4 (add 4))
15
+ (define add5 (add 5))
16
+
17
+ (display (add4 3))
18
+ (display (add5 3))
data/examples/s_exp.rb ADDED
@@ -0,0 +1,17 @@
1
+ # s-expression parser
2
+
3
+ require "rsec"
4
+
5
+ include Rsec::Helpers
6
+
7
+ def s_exp
8
+ id = /[a-zA-Z][\w\-]*/.r.fail 'id'
9
+ num = prim(:double).fail 'num'
10
+
11
+ naked_unit = id | num | seq_('(', lazy{exp}, ')')[1]
12
+ unit = naked_unit | seq_('(', lazy{unit}, ')')[1]
13
+ units = unit.join(/\s+/).even._?
14
+ exp = seq_(id, units) {|(id, (units))| [id, *units]}
15
+ seq_('(', exp, ')')[1].eof
16
+ end
17
+
@@ -0,0 +1,84 @@
1
+ # A simple-as-shit scheme interpreter. Usage: ruby scheme.rb hello.scm
2
+ require "rsec"
3
+
4
+ class Scheme
5
+ include Rsec::Helpers
6
+
7
+ Value = Struct.new :val
8
+
9
+ class Bind < Hash
10
+ def initialize parent = {}
11
+ @parent = parent
12
+ end
13
+
14
+ def define id, &p # define lambda
15
+ self[id] = -> bind, xs {
16
+ p[* xs.map{|x| bind.eval x }]
17
+ }
18
+ end
19
+
20
+ def eval node
21
+ case node
22
+ when Value; node.val
23
+ when String; self[node]
24
+ when Array
25
+ head, *tail = node
26
+ case head
27
+ when String
28
+ pr = self[head]
29
+ pr.is_a?(Proc) ? pr[self, tail] : pr # invoke lambda
30
+ when Array
31
+ node.map{|n| self.eval n }.last # sequence execution
32
+ end
33
+ end
34
+ end
35
+
36
+ def [] key
37
+ super(key) || @parent[key]
38
+ end
39
+ end
40
+
41
+ def initialize
42
+ boolean = /\#[tf]/. r {|n| Value[n=='#t'] }
43
+ integer = /0|[1-9]\d*/.r {|n| Value[n.to_i] }
44
+ id = /[^\s\(\)\[\]]+/.r
45
+ atom = boolean | integer | id
46
+ cell = atom | lazy{list}
47
+ cells = /\s*/.r.join(cell).odd
48
+ list = '('.r >> cells << ')'
49
+ @parser = cells.eof
50
+
51
+ @vm = Bind.new
52
+ @vm['define'] = -> bind, (param, body) {
53
+ if param.is_a?(String)
54
+ @vm[param] = bind.eval body
55
+ else
56
+ func, *xs = param
57
+ @vm[func] = @vm['lambda'][bind, [xs, body]]
58
+ end
59
+ }
60
+ # declare: (lambda (xs[0] xs[1]) body)
61
+ @vm['lambda'] = -> bind_def, (xs, body) {
62
+ xs = [xs] if xs.is_a?(String)
63
+ # calling: (some vs[0] vs[1])
64
+ -> bind_call, vs {
65
+ vs = vs.map{|v| bind_call.eval v }
66
+ new_bind = Bind.new bind_def
67
+ xs.zip(vs){|x, v| new_bind[x] = v }
68
+ new_bind.eval body
69
+ }
70
+ }
71
+ @vm['if'] = -> bind, (p, left, right) {
72
+ bind.eval(bind.eval(p) ? left : right)
73
+ }
74
+ %w|+ - * / ** % > <|.each{|s| @vm.define s, &s.to_sym }
75
+ @vm.define '=', &:==
76
+ @vm.define('display'){|x| puts x}
77
+ end
78
+
79
+ def run source
80
+ @vm.eval @parser.parse! source
81
+ end
82
+ end
83
+
84
+ ARGV[0] ? Scheme.new.run(File.read ARGV[0]) : puts('need a scheme file name')
@@ -0,0 +1,68 @@
1
+ # coding: utf-8
2
+
3
+ # grammar from
4
+ # http://www.json.org/
5
+
6
+ require "rsec"
7
+
8
+ class SlowJSON
9
+
10
+ include Rsec::Helper
11
+
12
+ def initialize
13
+ generate_parser
14
+ @parser = seq(/\s*/, @value, /\s*/)[1].eof
15
+ end
16
+
17
+ def parse s
18
+ @parser.parse! s
19
+ end
20
+
21
+ private
22
+
23
+ # term (, term)*
24
+ def elem_parser term
25
+ term.join(/\s*,\s*/.r).even
26
+ end
27
+
28
+ def chars_parser
29
+ unicode_bytes = /[0-9a-f]{4}/i.r{|bytes|
30
+ [bytes].pack('H*').force_encoding('utf-16be').encode!('utf-8')
31
+ }
32
+ escape_char = '"'.r | "\\" | '/' |
33
+ 'b'.r{"\b"} |
34
+ 'f'.r{"\f"} |
35
+ 'n'.r{"\n"} |
36
+ 'r'.r{"\r"} |
37
+ 't'.r{"\t"} |
38
+ seq('u'.r, unicode_bytes)[1]
39
+ /[^"\\]+/.r | seq('\\', escape_char)[1]
40
+ end
41
+
42
+ def generate_parser
43
+ string = '"'.r >> chars_parser.star.map(&:join) << '"'
44
+ # -? int frac? exp?
45
+ number = prim(:double, allowed_sign: '-')
46
+ @value = string | number | lazy{@object} | lazy{@array} |
47
+ 'true'.r{true} |
48
+ 'false'.r{false} |
49
+ 'null'.r{nil}
50
+ pair = seq(string, /\s*:\s*/.r, @value){|k, _, v| [k, v]}
51
+ @array = /\[\s*\]/.r{[]} | '['.r >> elem_parser(@value) << ']'
52
+ @object = /\{\s*\}/.r{{}} | ('{'.r >> elem_parser(pair) << '}').map{|arr|Hash[arr]}
53
+ end
54
+
55
+ end
56
+
57
+ if __FILE__ == $PROGRAM_NAME
58
+ j = SlowJSON.new
59
+ p j.parse '""'
60
+ p j.parse '123.4e5'
61
+ p j.parse 'null'
62
+ p j.parse '[]'
63
+ p j.parse '{}'
64
+ p j.parse '{"no": [3, 4]}'
65
+ p j.parse '[{}]'
66
+ p j.parse '[{"S":321061,"T":"GetAttributeResp"},{"ERROR":null,"TS":0,"VAL":{"SqlList":[{"BatchSizeMax":0,"BatchSizeTotal":0,"ConcurrentMax":1,"DataSource":"jdbc:wrap-jdbc:filters=default,encoding:name=ds-offer:jdbc:mysql://100.10.10.10:8066/xxxx","EffectedRowCount":0,"ErrorCount":0,"ExecuteCount":5,"FetchRowCount":5,"File":null,"ID":2001,"LastError":null,"LastTime":1292742908178,"MaxTimespan":16,"MaxTimespanOccurTime":1292742668191,"Name":null,"RunningCount":0,"SQL":"SELECT @@SQL_MODE","TotalTime":83}]}}]'
67
+ end
68
+
data/lib/rsec.rb ADDED
@@ -0,0 +1,40 @@
1
+ # coding: utf-8
2
+ # load the gem
3
+
4
+ # All code is under this module
5
+ module Rsec
6
+ # preload configs
7
+
8
+ # config method name
9
+ # default is :r
10
+ unless Rsec.const_defined?(:TO_PARSER_METHOD)
11
+ TO_PARSER_METHOD = :r
12
+ end
13
+
14
+ # config C extension usage
15
+ # options:
16
+ # :try - default
17
+ # :no - don't use
18
+ # :yes - use
19
+ unless Rsec.const_defined?(:USE_CEXT)
20
+ USE_CEXT = :try
21
+ end
22
+
23
+ VERSION = '0.3'
24
+ end
25
+
26
+ require "strscan"
27
+ require "rsec/utils"
28
+ require "rsec/parser"
29
+ require "rsec/helpers"
30
+
31
+ case Rsec::USE_CEXT
32
+ when :try
33
+ require "rsec/ext" rescue nil
34
+ when :yes
35
+ require "rsec/ext"
36
+ when :no
37
+ else
38
+ warn "Rsec::USE_CEXT should be one of :try, :yes, :no"
39
+ end
40
+
@@ -0,0 +1,447 @@
1
+ # coding: utf-8
2
+ # ------------------------------------------------------------------------------
3
+ # Helpers(combinators) to construct parser
4
+
5
+ module Rsec #:nodoc:
6
+
7
+ # ------------------------------------------------------------------------------
8
+ # these are not callable from a parser
9
+ module Helpers
10
+
11
+ # @ desc.helper
12
+ # Lazy parser
13
+ # @ example
14
+ # parser = lazy{future}
15
+ # future = 'jim'.r
16
+ # assert_equal 'jim', parser.parse '12323'
17
+ def lazy &p
18
+ raise ArgumentError, 'lazy() requires a block' unless p
19
+ Lazy[p]
20
+ end
21
+
22
+ # @ desc.helper
23
+ # Parses one of chars in str
24
+ # @ example
25
+ # multiplicative = one_of '*/%'
26
+ # assert_equal '/', multiplicative.parse '/'
27
+ # assert_equal Rsec::INVALID, actualmultiplicative.parse '+'
28
+ def one_of str, &p
29
+ Rsec.assert_type str, String
30
+ raise ArgumentError, 'str len should > 0' if str.empty?
31
+ one_of_klass =
32
+ if (str.bytesize == str.size) and Rsec.const_defined?(:OneOfByte)
33
+ # for C-ext
34
+ OneOfByte
35
+ else
36
+ OneOf
37
+ end
38
+ one_of_klass[str.dup.freeze].map p
39
+ end
40
+
41
+ # @ desc.helper
42
+ # See also #one_of#, with leading and trailing optional breakable spaces
43
+ # @ example
44
+ # additive = one_of_('+-')
45
+ # assert_equal '+', additive.parse(' +')
46
+ def one_of_ str, &p
47
+ Rsec.assert_type str, String
48
+ raise ArgumentError, 'str len should > 0' if str.empty?
49
+ raise ArgumentError, 'str should be ascii' unless str.bytesize == str.size
50
+ raise ArgumentError, 'str should not contain space' if str =~ /\s/
51
+ spaced_one_of_klass =
52
+ if (str.bytesize == str.size) and Rsec.const_defined?(:OneOfByte_)
53
+ # for C-ext
54
+ OneOfByte_
55
+ else
56
+ OneOf_
57
+ end
58
+ spaced_one_of_klass[str.dup.freeze].map p
59
+ end
60
+
61
+ # @ desc.helper
62
+ # Primitive parser, returns nil if overflow or underflow.
63
+ # There can be an optional '+' or '-' at the beginning of string except unsinged_int32 | unsinged_int64.
64
+ # type =
65
+ # :double |
66
+ # :hex_double |
67
+ # :int32 |
68
+ # :int64 |
69
+ # :unsigned_int32 |
70
+ # :unsigned_int64
71
+ # options:
72
+ # :allowed_sign => '+' | '-' | '' | '+-' (default '+-')
73
+ # :allowed_signs => (same as :allowed_sign)
74
+ # :base => integer only (default 10)
75
+ # @ example
76
+ # p = prim :double
77
+ # assert_equal 1.23, p.parse('1.23')
78
+ # p = prim :double, allowed_sign: '-'
79
+ # assert_equal 1.23, p.parse('1.23')
80
+ # assert_equal -1.23, p.parse('-1.23')
81
+ # assert_equal Rsec::INVALID, p.parse('+1.23')
82
+ # p = prim :int32, base: 36
83
+ # assert_equal 49713, p.parse('12cx')
84
+ def prim type, options={}, &p
85
+ base = options[:base]
86
+ if [:double, :hex_double].index base
87
+ raise 'Floating points does not allow :base'
88
+ end
89
+ base ||= 10
90
+ Rsec.assert_type base, Fixnum
91
+ unless (2..36).include? base
92
+ raise RangeError, ":base should be in 2..36, but got #{base}"
93
+ end
94
+
95
+ sign_strategy = \
96
+ case (options[:allowed_sign] or options[:allowed_signs])
97
+ when nil, '+-', '-+'; 3
98
+ when '+'; 2
99
+ when '-'; 1
100
+ when ''; 0
101
+ else raise "allowed_sign should be one of nil, '', '+', '-', '+-', '-+'"
102
+ end
103
+
104
+ parser = \
105
+ case type
106
+ when :double; PDouble.new sign_strategy, false # decimal
107
+ when :hex_double; PDouble.new sign_strategy, true # hex
108
+ when :int32; PInt32.new sign_strategy, base
109
+ when :int64; PInt64.new sign_strategy, base
110
+ when :unsigned_int32;
111
+ raise 'unsigned int not allow - sign' if options[:allowed_signs] =~ /-/
112
+ PUnsignedInt32.new sign_strategy, base
113
+ when :unsigned_int64;
114
+ raise 'unsigned int not allow - sign' if options[:allowed_signs] =~ /-/
115
+ PUnsignedInt64.new sign_strategy, base
116
+ else
117
+ raise "Invalid primitive type #{type}"
118
+ end
119
+ parser.map p
120
+ end
121
+
122
+ # @ desc.helper
123
+ # Sequence parser
124
+ # @ example
125
+ # assert_equal ['a', 'b', 'c'], actualseq('a', 'b', 'c').parse('abc')
126
+ def seq *xs, &p
127
+ xs.map! {|x| Rsec.make_parser x }
128
+ Seq[xs].map p
129
+ end
130
+
131
+ # @ desc.helper
132
+ # Sequence parser with skippable pattern(or parser)
133
+ # option
134
+ # :skip default= /\s*/
135
+ # @ example
136
+ # assert_equal ['a', 'b', 'c'], actualseq_('a', 'b', 'c', skip: ',').parse('a,b,c')
137
+ def seq_ *xs, &p
138
+ skipper =
139
+ if (xs.last.is_a? Hash)
140
+ xs.pop[:skip]
141
+ end
142
+ skipper = skipper ? Rsec.make_parser(skipper) : /\s*/.r
143
+ xs.map! {|x| Rsec.make_parser x }
144
+ first, *rest = xs
145
+ raise 'sequence should not be empty' unless first
146
+ Seq_[first, rest, skipper].map p
147
+ end
148
+
149
+ # @ desc.helper
150
+ # A symbol is something wrapped with optional space
151
+ def symbol pattern, skip=/\s*/, &p
152
+ pattern = Rsec.make_parser pattern
153
+ skip = Rsec.try_skip_pattern Rsec.make_parser skip
154
+ SeqOne[[skip, pattern, skip], 1].map p
155
+ end
156
+
157
+ # @ desc.helper
158
+ # A word is wrapped with word boundaries
159
+ # @ example
160
+ # assert_equal ['yes', '3'], seq('yes', '3').parse('yes3')
161
+ # assert_equal INVALID, seq(word('yes'), '3').parse('yes3')
162
+ def word pattern, &p
163
+ parser = Rsec.make_parser pattern
164
+ # TODO check pattern type
165
+ Pattern[/\b#{parser.some}\b/].map p
166
+ end
167
+ end # helpers
168
+
169
+ # robust
170
+ Helper = Helpers
171
+
172
+ # ------------------------------------------------------------------------------
173
+ # combinators attached to parsers
174
+
175
+ module Parser #:nodoc:
176
+
177
+ # @ desc
178
+ # Transform result
179
+ # @ example
180
+ # parser = /\w+/.r.map{|word| word * 2}
181
+ # assert_equal 'hellohello', parser.parse!('hello')
182
+ def map lambda_p=nil, &p
183
+ return self if (lambda_p.nil? and p.nil?)
184
+ p = lambda_p || p
185
+ raise TypeError, 'should give a proc or lambda' unless (p.is_a? Proc)
186
+ Map[self, p]
187
+ end
188
+
189
+ # @ desc
190
+ # "p.join('+')" parses strings like "p+p+p+p+p".
191
+ # Note that at least 1 of p appears in the string.
192
+ # Sometimes it is useful to reverse the joining:
193
+ # /\s*/.r.join('p').odd parses string like " p p p "
194
+ def join inter, &p
195
+ inter = Rsec.make_parser inter
196
+ Join[self, inter].map p
197
+ end
198
+
199
+ # @ desc
200
+ # Branch parser, note that rsec is a PEG parser generator,
201
+ # beware of the difference between PEG and CFG.
202
+ def | y, &p
203
+ y = Rsec.make_parser y
204
+ arr =
205
+ if (is_a?(Branch) and !p)
206
+ [*some, y]
207
+ else
208
+ [self, y]
209
+ end
210
+ Branch[arr].map p
211
+ end
212
+
213
+ # @ desc
214
+ # Repeat n or in a range.
215
+ # If range.end < 0, repeat at least range.begin
216
+ # (Infinity and -Infinity are considered)
217
+ def * n, &p
218
+ # FIXME if self is an epsilon parser, will cause infinite loop
219
+ parser =
220
+ if n.is_a?(Range)
221
+ raise "invalid n: #{n}" if n.begin < 0
222
+ Rsec.assert_type n.begin, Integer
223
+ end_inf = (n.end.infinite? rescue false)
224
+ (Rsec.assert_type n.end, Integer) unless end_inf
225
+ if n.end > 0
226
+ RepeatRange[self, n]
227
+ else
228
+ RepeatAtLeastN[self, n.begin]
229
+ end
230
+ else
231
+ Rsec.assert_type n, Integer
232
+ raise "invalid n: #{n}" if n < 0
233
+ RepeatN[self, n]
234
+ end
235
+ parser.map p
236
+ end
237
+
238
+ # @ desc
239
+ # Appears 0 or 1 times, result is wrapped in an array
240
+ # @ example
241
+ # parser = 'a'.r.maybe
242
+ # assert_equal ['a'], parser.parse('a')
243
+ # assert_equal [], parser.parse('')
244
+ def maybe &p
245
+ Maybe[self].map &p
246
+ end
247
+ alias _? maybe
248
+
249
+ # @ desc
250
+ # Kleen star, 0 or more any times
251
+ def star &p
252
+ self.* (0..-1), &p
253
+ end
254
+
255
+ # @ desc
256
+ # Lookahead predicate, note that other can be a very complex parser
257
+ def & other, &p
258
+ other = Rsec.make_parser other
259
+ LookAhead[self, other].map p
260
+ end
261
+
262
+ # @ desc
263
+ # Negative lookahead predicate
264
+ def ^ other, &p
265
+ other = Rsec.make_parser other
266
+ NegativeLookAhead[self, other].map p
267
+ end
268
+
269
+ # @ desc
270
+ # When parsing failed, show "expect tokens" error
271
+ def fail *tokens, &p
272
+ return self if tokens.empty?
273
+ Fail[self, tokens].map p
274
+ end
275
+ alias expect fail
276
+
277
+ # @ desc
278
+ # Short for seq_(parser, other)[1]
279
+ def >> other, &p
280
+ other = Rsec.make_parser other
281
+ left = Rsec.try_skip_pattern self
282
+ SeqOne_[left, [other], SkipPattern[/\s*/], 1].map p
283
+ end
284
+
285
+ # @ desc
286
+ # Short for seq_(parser, other)[0]
287
+ def << other, &p
288
+ other = Rsec.make_parser other
289
+ right = Rsec.try_skip_pattern other
290
+ SeqOne_[self, [right], SkipPattern[/\s*/], 0].map p
291
+ end
292
+
293
+ # @ desc
294
+ # Should be end of input after parse
295
+ def eof &p
296
+ Eof[self].map p
297
+ end
298
+
299
+ # @ desc
300
+ # Packrat parser combinator, returns a parser that caches parse result, may optimize performance
301
+ def cached &p
302
+ Cached[self].map p
303
+ end
304
+ end
305
+
306
+ # ------------------------------------------------------------------------------
307
+ # additional helper methods for special classes
308
+
309
+ class Seq
310
+ # @ desc.seq, seq_
311
+ # Returns the parse result at idx, shorter and faster than map{|array| array[idx]}
312
+ # @ example
313
+ # assert_equal 'b', seq('a', 'b', 'c')[1].parse('abc')
314
+ def [] idx, &p
315
+ raise 'index out of range' if (idx >= some().size or idx < 0)
316
+ # optimize
317
+ parsers = some().map.with_index do |p, i|
318
+ i == idx ? p : Rsec.try_skip_pattern(p)
319
+ end
320
+ SeqOne[parsers, idx].map p
321
+ end
322
+
323
+ # @ desc.seq, seq_, join, join.even, join.odd
324
+ # If parse result contains only 1 element, return the element instead of the array
325
+ def unbox &p
326
+ Unbox[self].map p
327
+ end
328
+
329
+ # @ desc
330
+ # Think about "innerHTML"!
331
+ # @ example
332
+ # parser = seq('<b>', /[\w\s]+/, '</b>').inner
333
+ # parser.parse('<b>the inside</b>')
334
+ def inner &p
335
+ Inner[self].map p
336
+ end
337
+ end
338
+
339
+ class Seq_
340
+ def [] idx, &p
341
+ raise 'index out of range' if idx > rest.size or idx < 0
342
+ # optimize parsers, use skip if possible
343
+ new_first = (0 == idx ? first : Rsec.try_skip_pattern(first))
344
+ new_rest = rest().map.with_index do |p, i|
345
+ # NOTE rest start with 1
346
+ (i+1) == idx ? p : Rsec.try_skip_pattern(p)
347
+ end
348
+ SeqOne_[new_first, new_rest, skipper, idx].map p
349
+ end
350
+
351
+ def unbox &p
352
+ Unbox[self].map p
353
+ end
354
+
355
+ def inner &p
356
+ Inner[self].map p
357
+ end
358
+ end
359
+
360
+ class Join
361
+ def unbox &p
362
+ Unbox[self].map p
363
+ end
364
+
365
+ # @ desc.join
366
+ # Only keep the even(left, token) parts
367
+ def even &p
368
+ JoinEven[left, Rsec.try_skip_pattern(right)].map p
369
+ end
370
+
371
+ # @ desc.join
372
+ # Only keep the odd(right, inter) parts
373
+ def odd &p
374
+ JoinOdd[Rsec.try_skip_pattern(left), right].map p
375
+ end
376
+ end
377
+
378
+ class JoinEven
379
+ def unbox &p
380
+ Unbox[self].map p
381
+ end
382
+ end
383
+
384
+ class JoinOdd
385
+ def unbox &p
386
+ Unbox[self].map p
387
+ end
388
+ end
389
+
390
+ class Pattern
391
+ # @ desc.r
392
+ # Scan until the pattern happens
393
+ def until &p
394
+ UntilPattern[some()].map p
395
+ end
396
+ end
397
+
398
+ # ------------------------------------------------------------------------------
399
+ # helper methods for parser generation
400
+
401
+ # ensure x is a parser
402
+ def Rsec.make_parser x
403
+ return x if x.is_a?(Parser)
404
+ x = x.send(TO_PARSER_METHOD) if x.respond_to?(TO_PARSER_METHOD)
405
+ Rsec.assert_type x, Parser
406
+ x
407
+ end
408
+
409
+ # type assertion
410
+ def Rsec.assert_type obj, type
411
+ (raise TypeError, "#{obj} should be a #{type}") unless (obj.is_a? type)
412
+ end
413
+
414
+ # try to convert Pattern -> SkipPattern
415
+ def Rsec.try_skip_pattern p
416
+ # for C-ext
417
+ if Rsec.const_defined?(:FixString) and p.is_a?(FixString)
418
+ return SkipPattern[/#{Regexp.escape p.some}/]
419
+ end
420
+
421
+ case p
422
+ when Pattern
423
+ SkipPattern[p.some]
424
+ when UntilPattern
425
+ SkipUntilPattern[p.some]
426
+ else
427
+ p
428
+ end
429
+ end
430
+ end
431
+
432
+ class String #:nodoc:
433
+ # String#r: convert self to parser
434
+ # convienient string-to-parser transformer
435
+ define_method ::Rsec::TO_PARSER_METHOD, ->(*expects, &p){
436
+ ::Rsec::Pattern[/#{Regexp.escape self}/].fail(*expects).map p
437
+ }
438
+ end
439
+
440
+ class Regexp #:nodoc:
441
+ # Regexp#r: convert self to parser
442
+ # convienient regexp-to-parser transformer
443
+ define_method ::Rsec::TO_PARSER_METHOD, ->(*expects, &p){
444
+ ::Rsec::Pattern[self].fail(*expects).map p
445
+ }
446
+ end
447
+