rparsec 0.4 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,11 @@
1
+ class IdMonad
2
+ def value v
3
+ v
4
+ end
5
+ def bind prev
6
+ yield prev
7
+ end
8
+ def mplus a, b
9
+ a
10
+ end
11
+ end
@@ -0,0 +1,90 @@
1
+ require 'rparsec/parser'
2
+
3
+ #
4
+ # This class helps building lexers and parsers for keywords.
5
+ #
6
+ class Keywords
7
+ extend Parsers
8
+ private_class_method :new
9
+ attr_reader :keyword_symbol, :lexer
10
+ #
11
+ # Do we lex case sensitively?
12
+ #
13
+ def case_sensitive?
14
+ @case_sensitive
15
+ end
16
+ #
17
+ # To create an instance that lexes the given keywords
18
+ # case sensitively.
19
+ # _default_lexer_ is used to lex a token first, the token text is then compared with
20
+ # the given keywords. If it matches any of the keyword, a keyword token is generated instead
21
+ # using _keyword_symbol_.
22
+ # The _block_ parameter, if present, is used to convert the token text to another object
23
+ # when the token is recognized during grammar parsing phase.
24
+ #
25
+ def self.case_sensitive(words, default_lexer=word.token(:word), keyword_symbol=:keyword, &block)
26
+ new(words, true, default_lexer, keyword_symbol, &block)
27
+ end
28
+ #
29
+ # To create an instance that lexes the given keywords
30
+ # case insensitively.
31
+ # _default_lexer_ is used to lex a token first, the token text is then compared with
32
+ # the given keywords. If it matches any of the keyword, a keyword token is generated instead
33
+ # using _keyword_symbol_.
34
+ # The _block_ parameter, if present, is used to convert the token text to another object
35
+ # when the token is recognized during parsing phase.
36
+ #
37
+ def self.case_insensitive(words, default_lexer=word.token(:word), keyword_symbol=:keyword, &block)
38
+ new(words, false, default_lexer, keyword_symbol, &block)
39
+ end
40
+ # scanner has to return a string
41
+ def initialize(words, case_sensitive, default_lexer, keyword_symbol, &block)
42
+ @default_lexer, @case_sensitive, @keyword_symbol = default_lexer, case_sensitive, keyword_symbol
43
+ # this guarantees that we have copy of the words array and all the word strings.
44
+ words = copy_words(words, case_sensitive)
45
+ @name_map = {}
46
+ @symbol_map = {}
47
+ word_map = {}
48
+ words.each do |w|
49
+ symbol = "#{keyword_symbol}:#{w}".to_sym
50
+ word_map[w] = symbol
51
+ parser = Parsers.token(symbol, &block)
52
+ @symbol_map["#{w}".to_sym] = parser
53
+ @name_map[w] = parser
54
+ end
55
+ @lexer = make_lexer(default_lexer, word_map)
56
+ end
57
+ #
58
+ # Get the parser that recognizes the token of the given keyword during the parsing phase.
59
+ #
60
+ def parser(key)
61
+ result = nil
62
+ if key.kind_of? String
63
+ name = canonical_name(key)
64
+ result = @name_map[name]
65
+ else
66
+ result = @symbol_map[key]
67
+ end
68
+ raise ArgumentError, "parser not found for #{key}" if result.nil?
69
+ result
70
+ end
71
+ alias [] parser
72
+ private
73
+ def make_lexer(default_lexer, word_map)
74
+ default_lexer.map do |tok|
75
+ text,ind = tok.text, tok.index
76
+ key = canonical_name(text)
77
+ my_symbol = word_map[key]
78
+ case when my_symbol.nil? : tok
79
+ else Token.new(my_symbol, text, ind) end
80
+ end
81
+ end
82
+ def canonical_name(name)
83
+ case when @case_sensitive: name else name.downcase end
84
+ end
85
+ def copy_words(words, case_sensitive)
86
+ words.map do |w|
87
+ case when case_sensitive: w.dup else w.downcase end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,32 @@
1
+ require 'rparsec/misc'
2
+
3
+ class CodeLocator
4
+ extend DefHelper
5
+ def_readable :code
6
+ LF = ?\n
7
+ def locate(ind)
8
+ return _locateEof if ind >= code.length
9
+ line, col = 1,1
10
+ return line,col if ind<=0
11
+ for i in (0...ind)
12
+ c = code[i]
13
+ if c == LF
14
+ line, col = line+1, 1
15
+ else
16
+ col = col+1
17
+ end
18
+ end
19
+ return line, col
20
+ end
21
+ def _locateEof
22
+ line, col = 1, 1
23
+ code.each_byte do |c|
24
+ if c == LF
25
+ line, col = line+1, 1
26
+ else
27
+ col = col+1
28
+ end
29
+ end
30
+ return line, col
31
+ end
32
+ end
@@ -0,0 +1,102 @@
1
+ #
2
+ # Helpers for defining ctor.
3
+ #
4
+ module DefHelper
5
+ def def_ctor(*vars)
6
+ define_method(:initialize) do |*params|
7
+ vars.each_with_index do |var, i|
8
+ instance_variable_set("@"+var.to_s, params[i])
9
+ end
10
+ end
11
+ end
12
+
13
+ def def_readable(*vars)
14
+ attr_reader(*vars)
15
+ def_ctor(*vars)
16
+ end
17
+
18
+ def def_mutable(*vars)
19
+ attr_accessor(*vars)
20
+ def_ctor(*vars)
21
+ end
22
+ end
23
+
24
+ #
25
+ # To type check method parameters.
26
+ #
27
+ module TypeChecker
28
+ private
29
+ def nth n
30
+ th = case n when 0: 'st' when 1: 'nd' else 'th' end
31
+ "#{n+1}#{th}"
32
+ end
33
+ public
34
+ def check_arg_type expected, obj, mtd, n=0
35
+ unless obj.kind_of? expected
36
+ raise ArgumentError,
37
+ "#{obj.class} assigned to #{expected} for the #{nth n} argument of #{mtd}."
38
+ end
39
+ end
40
+ def check_arg_array_type elem_type, arg, mtd, n=0
41
+ check_arg_type Array, arg, mtd, n
42
+ arg.each_with_index do |x, i|
43
+ unless x.kind_of? elem_type
44
+ raise ArgumentError,
45
+ "#{x.class} assigned to #{elem_type} for the #{nth i} element of the #{nth n} argument of #{mtd}."
46
+ end
47
+ end
48
+ end
49
+ def check_vararg_type expected, args, mtd, n = 0
50
+ (n...args.length).each do |i|
51
+ check_arg_type expected, args[i], mtd, i
52
+ end
53
+ end
54
+ extend self
55
+ end
56
+
57
+ #
58
+ # To add declarative signature support.
59
+ #
60
+ module Signature
61
+ # Signatures = {}
62
+ def def_sig sym, *types
63
+ types.each_with_index do |t,i|
64
+ unless t.kind_of? Class
65
+ TypeChecker.check_arg_type Class, t, :def_sig, i unless t.kind_of? Array
66
+ TypeChecker.check_arg_type Class, t, :def_sig, i unless t.length <= 1
67
+ TypeChecker.check_arg_array_type Class, t, :def_sig, i
68
+ end
69
+ end
70
+ # Signatures[sym] = types
71
+ __intercept_method_to_check_param_types__(sym, types)
72
+ end
73
+ private
74
+
75
+ def __intercept_method_to_check_param_types__(sym, types)
76
+ mtd = instance_method(sym)
77
+ helper = "_#{sym}_param_types_checked_helper".to_sym
78
+ define_method(helper) do |*params|
79
+ star_type, star_ind = nil, nil
80
+ types.each_with_index do |t, i|
81
+ t = star_type unless star_type.nil?
82
+ arg = params[i]
83
+ if t.kind_of? Class
84
+ TypeChecker.check_arg_type t, arg, sym, i
85
+ elsif t.empty?
86
+ TypeChecker.check_arg_type Array, arg, sym, i
87
+ else
88
+ star_type, star_ind = t[0], i
89
+ break
90
+ end
91
+ end
92
+ TypeChecker.check_vararg_type star_type, params, sym, star_ind unless star_ind.nil?
93
+ mtd.bind(self)
94
+ end
95
+ module_eval """
96
+ def #{sym}(*params, &block)
97
+ #{helper}(*params).call(*params, &block)
98
+ end
99
+ """
100
+ end
101
+ end
102
+
@@ -0,0 +1,52 @@
1
+ #
2
+ # module for Monad
3
+ #
4
+ module Monad
5
+ attr_reader :this
6
+ #
7
+ # To initialize with a monad implementation and an object that obeys the monad law.
8
+ #
9
+ def initMonad(m, v)
10
+ raise ArgumentError, 'monad cannot be nil' if m.nil?
11
+ @monad = m;
12
+ @this = v;
13
+ end
14
+ #
15
+ # To create a value based on the monad impl.
16
+ #
17
+ def value v
18
+ @monad.value v
19
+ end
20
+ #
21
+ # Run the _bind_ operation on the encapsulated object following the monad law.
22
+ #
23
+ def bind(&binder)
24
+ @monad.bind(@this, &binder)
25
+ end
26
+ #
27
+ # Run the _seq_ operation on the encapsulated object following the monad law.
28
+ # If _seq_ is not defined by the monad impl, use _bind_ to implement.
29
+ #
30
+ def seq(other)
31
+ if @monad.respond_to? :seq
32
+ @monad.seq(other)
33
+ else bind {|x|other}
34
+ end
35
+ end
36
+ #
37
+ # Run the _map_ operation on the encapsulated object following the monad law.
38
+ # _bind_ is used to implement.
39
+ #
40
+ def map(&mapper)
41
+ bind do |v|
42
+ result = mapper.call v;
43
+ value(result);
44
+ end
45
+ end
46
+ #
47
+ # Run the _plus_ operation on the encapsulated object following the MonadPlus law.
48
+ #
49
+ def plus other
50
+ @monad.mplus(@this, other.this)
51
+ end
52
+ end
@@ -0,0 +1,110 @@
1
+ require 'rparsec/parser'
2
+
3
+ #
4
+ # utility functions for string manipulations.
5
+ #
6
+ module StringUtils
7
+ #
8
+ # Does _str_ starts with the _sub_ string?
9
+ #
10
+ def self.starts_with? str, sub
11
+ return true if sub.nil?
12
+ len = sub.length
13
+ return false if len > str.length
14
+ for i in (0...len)
15
+ return false if str[i] != sub[i]
16
+ end
17
+ true
18
+ end
19
+ end
20
+
21
+ #
22
+ # This class helps building lexer and parser for operators.
23
+ # The case that one operator (++ for example) contains another operator (+)
24
+ # is automatically handled so client code don't have to worry about ambiguity.
25
+ #
26
+ class Operators
27
+ #
28
+ # To create an instance of Operators for the given operators.
29
+ # The _block_ parameter, if present, is used to convert the token text to another object
30
+ # when the token is recognized during grammar parsing phase.
31
+ #
32
+ def initialize(ops, &block)
33
+ @lexers = {}
34
+ @parsers = {}
35
+ sorted = Operators.sort(ops)
36
+ lexers = sorted.map do |op|
37
+ symbol = op.to_sym
38
+ result = nil
39
+ if op.length == 1
40
+ result = Parsers.char(op)
41
+ else
42
+ result = Parsers.str(op)
43
+ end
44
+ result = result.token(symbol)
45
+ @lexers[symbol] = result
46
+ @parsers[symbol] = Parsers.token(symbol, &block)
47
+ result
48
+ end
49
+ @lexer = Parsers.sum(*lexers)
50
+ end
51
+ #
52
+ # Get the parser for the given operator.
53
+ #
54
+ def parser(op)
55
+ result = @parsers[op.to_sym]
56
+ raise ArgumentError, "parser not found for #{op}" if result.nil?
57
+ result
58
+ end
59
+ alias [] parser
60
+ #
61
+ # Get the lexer that lexes operators.
62
+ # If an operator is specified, the lexer for that operator is returned.
63
+ #
64
+ def lexer(op=nil)
65
+ return @lexer if op.nil?
66
+ @lexers[op.to_sym]
67
+ end
68
+ #
69
+ # Sort an array of operators so that contained operator appears after containers.
70
+ # When no containment exist between two operators, the shorter one takes precedence.
71
+ #
72
+ def self.sort(ops)
73
+ #sort the array by longer-string-first.
74
+ ordered = ops.sort {|x, y|y.length <=> x.length}
75
+ suites = []
76
+ # loop from the longer to shorter string
77
+ ordered.each do |s|
78
+ populate_suites(suites, s)
79
+ end
80
+ # suites are populated with bigger suite first
81
+ to_array suites
82
+ end
83
+ private
84
+ def self.populate_suites(suites, s)
85
+ # populate the suites so that bigger suite first
86
+ # this way we can use << operator for non-contained strings.
87
+
88
+ # we need to start from bigger suite. So loop in reverse order
89
+ for suite in suites
90
+ return if populate_suite(suite, s)
91
+ end
92
+ suites << [s]
93
+ end
94
+ def self.populate_suite(suite, s)
95
+ # loop from the tail of the suite
96
+ for i in (1..suite.length)
97
+ ind = suite.length - i
98
+ cur = suite[ind]
99
+ if StringUtils.starts_with? cur, s
100
+ suite.insert(ind+1, s) unless cur == s
101
+ return true
102
+ end
103
+ end
104
+ false
105
+ end
106
+ def self.to_array suites
107
+ result = []
108
+ suites.reverse!.flatten!
109
+ end
110
+ end
@@ -0,0 +1,794 @@
1
+ %w{
2
+ monad misc error context locator token functors parser_monad
3
+ }.each {|lib| require "rparsec/#{lib}"}
4
+ require 'strscan'
5
+
6
+
7
+ #
8
+ # Represents a parser that parses a certain grammar rule.
9
+ #
10
+ class Parser
11
+ include Functors
12
+ include Monad
13
+ extend Signature
14
+ extend DefHelper
15
+ MyMonad = ParserMonad.new
16
+ attr_accessor :name
17
+ private
18
+ def initialize
19
+ initMonad(MyMonad, self)
20
+ end
21
+ def self.init(*vars)
22
+ parser_checker = {}
23
+ vars.each_with_index do |var, i|
24
+ name = var.to_s
25
+ parser_checker[i] = var if name.include?('parser') && !name.include?('parsers')
26
+ end
27
+ define_method(:initialize) do |*params|
28
+ super()
29
+ vars.each_with_index do |var, i|
30
+ param = params[i]
31
+ if parser_checker.include? i
32
+ TypeChecker.check_arg_type Parser, param, self, i
33
+ end
34
+ instance_variable_set("@"+var.to_s, param)
35
+ end
36
+ end
37
+ end
38
+ def _display_current_input(input, code, index)
39
+ return 'EOF' if input.nil?
40
+ c = input
41
+ case c when Fixnum: "'"<<c<<"'" when Token: c.text else c.to_s end
42
+ end
43
+ def _add_encountered_error(msg, encountered)
44
+ result = msg.dup
45
+ result << ', ' unless msg.strip.length == 0 || msg =~ /.*(\.|,)\s*$/
46
+ "#{result}#{encountered}"
47
+ end
48
+ def _add_location_to_error(locator, ctxt, msg, code)
49
+ line, col = locator.locate(ctxt.error.index)
50
+ msg << " at line #{line}, col #{col}."
51
+ end
52
+ public
53
+ #
54
+ # parses a string.
55
+ #
56
+ def parse(src)
57
+ ctxt = ParseContext.new(src)
58
+ return ctxt.result if _parse ctxt
59
+ ctxt.prepare_error
60
+ locator = CodeLocator.new(src)
61
+ raise ParserException.new(ctxt.error.index),
62
+ _add_location_to_error(locator, ctxt,
63
+ _add_encountered_error(ctxt.to_msg,
64
+ _display_current_input(ctxt.error.input, src, ctxt.index)), src)
65
+ end
66
+ #
67
+ # Set name for the parser.
68
+ # self is returned.
69
+ #
70
+ def setName(nm)
71
+ @name = nm
72
+ self
73
+ end
74
+ #
75
+ # a.map{|x|x+1} will first execute parser a, when it succeeds,
76
+ # the associated block is executed to transform the result to a new value
77
+ # (increment it in this case).
78
+ #
79
+ def map(&block)
80
+ return self unless block
81
+ MapParser.new(self, block)
82
+ end
83
+ #
84
+ # _self_ is first executed, the parser result is then passed as parameter to the associated block,
85
+ # which evaluates to another Parser object at runtime. This new Parser object is then executed
86
+ # to get the final parser result.
87
+ ##
88
+ # Different from _bind_, parser result of _self_ will be expanded first if it is an array.
89
+ #
90
+ def bindn(&block)
91
+ return self unless block
92
+ BoundnParser.new(self, block)
93
+ end
94
+ #
95
+ # a.mapn{|x,y|x+y} will first execute parser a, when it succeeds,
96
+ # the array result (if any) is expanded and passed as parameters
97
+ # to the associated block. The result of the block is then used
98
+ # as the parsing result.
99
+ #
100
+ def mapn(&block)
101
+ return self unless block
102
+ MapnParser.new(self, block)
103
+ end
104
+
105
+ #
106
+ # Create a new parser that's atomic.,
107
+ # meaning that when it fails, input consumption is undone.
108
+ #
109
+ def atomize
110
+ AtomParser.new(self).setName(@name)
111
+ end
112
+ #
113
+ # Create a new parser that looks at inputs whthout consuming them.
114
+ #
115
+ def peek
116
+ PeekParser.new(self).setName(@name)
117
+ end
118
+ #
119
+ # To create a new parser that succeed only if self fails.
120
+ #
121
+ def not(msg="#{self} unexpected")
122
+ NotParser.new(self, msg)
123
+ end
124
+ #
125
+ # To create a parser that does "look ahead" for n inputs.
126
+ #
127
+ def lookahead n
128
+ self
129
+ end
130
+ #
131
+ # To create a parser that fails with a given error message.
132
+ #
133
+ def expect msg
134
+ ExpectParser.new(self, msg)
135
+ end
136
+ #
137
+ # a.followed b will sequentially run a and b;
138
+ # result of a is preserved as the ultimate return value.
139
+ #
140
+ def followed(other)
141
+ FollowedParser.new(self, other)
142
+ end
143
+ def_sig :followed, Parser
144
+ #
145
+ # To create a parser that repeats self for a minimum _min_ times,
146
+ # and maximally _max_ times.
147
+ # Only the return value of the last execution is preserved.
148
+ #
149
+ def repeat_(min, max=min)
150
+ return Parsers.failure("min=#{min}, max=#{max}") if min > max
151
+ if(min==max)
152
+ return Parsers.one if max <= 0
153
+ return self if max == 1
154
+ Repeat_Parser.new(self, max)
155
+ else
156
+ Some_Parser.new(self, min, max)
157
+ end
158
+ end
159
+ #
160
+ # To create a parser that repeats self for a minimum _min_ times,
161
+ # and maximally _max_ times.
162
+ # All return values are collected in an array.
163
+ #
164
+ def repeat(min, max=min)
165
+ return Parsers.failure("min=#{min}, max=#{max}") if min > max
166
+ if(min==max)
167
+ RepeatParser.new(self, max)
168
+ else
169
+ SomeParser.new(self, min, max)
170
+ end
171
+ end
172
+ #
173
+ # To create a parser that repeats self for at least _least_ times.
174
+ # parser.many_ is equivalent to bnf notation "parser*".
175
+ # Only the return value of the last execution is preserved.
176
+ #
177
+ def many_(least=0)
178
+ Many_Parser.new(self, least)
179
+ end
180
+ #
181
+ # To create a parser that repeats self for at least _least_ times.
182
+ # All return values are collected in an array.
183
+ #
184
+ def many(least=0)
185
+ ManyParser.new(self, least)
186
+ end
187
+ #
188
+ # To create a parser that repeats self for at most _max_ times.
189
+ # Only the return value of the last execution is preserved.
190
+ #
191
+ def some_(max)
192
+ repeat_(0, max)
193
+ end
194
+ #
195
+ # To create a parser that repeats self for at most _max_ times.
196
+ # All return values are collected in an array.
197
+ #
198
+ def some(max)
199
+ repeat(0, max)
200
+ end
201
+ #
202
+ # To create a parser that repeats self for unlimited times,
203
+ # with the pattern recognized by _delim_ as separator that separates each occurrence.
204
+ # self has to match for at least once.
205
+ # Return values of self are collected in an array.
206
+ #
207
+ def separated1 delim
208
+ rest = delim >> self
209
+ self.bind do |v0|
210
+ result = [v0]
211
+ (rest.map {|v| result << v}).many_ >> value(result)
212
+ end
213
+ end
214
+ #
215
+ # To create a parser that repeats self for unlimited times,
216
+ # with the pattern recognized by _delim_ as separator that separates each occurrence.
217
+ # Return values of self are collected in an array.
218
+ #
219
+ def separated delim
220
+ separated1(delim).plus value([])
221
+ end
222
+ #
223
+ # To create a parser that repeats self for unlimited times,
224
+ # with the pattern recognized by _delim_ as separator that separates each occurrence
225
+ # and also possibly ends the pattern.
226
+ # self has to match for at least once.
227
+ # Return values of self are collected in an array.
228
+ #
229
+ def delimited1 delim
230
+ rest = delim >> (self.plus Parsers.throwp(:__end_delimiter__))
231
+ self.bind do |v0|
232
+ result = [v0]
233
+ (rest.map {|v| result << v}).many_.catchp(:__end_delimiter__) >> value(result)
234
+ end
235
+ end
236
+ #
237
+ # To create a parser that repeats self for unlimited times,
238
+ # with the pattern recognized by _delim_ as separator that separates each occurrence
239
+ # and also possibly ends the pattern.
240
+ # Return values of self are collected in an array.
241
+ #
242
+ def delimited delim
243
+ delimited1(delim).plus value([])
244
+ end
245
+ #
246
+ # String representation
247
+ #
248
+ def to_s
249
+ return name unless name.nil?
250
+ self.class.to_s
251
+ end
252
+ #
253
+ # a | b will run b when a fails.
254
+ # b is auto-boxed to Parser when it is not of type Parser.
255
+ #
256
+ def | other
257
+ AltParser.new([self, autobox_parser(other)])
258
+ end
259
+ #
260
+ # a.optional(default) is equivalent to a.plus(value(default))
261
+ #
262
+ def optional(default=nil)
263
+ self.plus(value(default))
264
+ end
265
+ #
266
+ # a.catchp(:somesymbol) will catch the :somesymbol thrown by a.
267
+ #
268
+ def catchp(symbol)
269
+ CatchParser.new(symbol, self)
270
+ end
271
+ #
272
+ # a.fragment will return the string matched by a.
273
+ #
274
+ def fragment
275
+ FragmentParser.new(self)
276
+ end
277
+ #
278
+ # a.nested b will feed the token array returned by parser a to parser b
279
+ # for a nested parsing.
280
+ #
281
+ def nested(parser)
282
+ NestedParser.new(self, parser)
283
+ end
284
+ #
285
+ # a.lexeme(delim) will parse _a_ for 0 or more times and ignore all
286
+ # patterns recognized by _delim_.
287
+ # Values returned by _a_ are collected in an array.
288
+ #
289
+ def lexeme(delim = Parsers.whitespaces)
290
+ delim = delim.many_
291
+ delim >> self.delimited(delim)
292
+ end
293
+ #
294
+ # For prefix unary operator.
295
+ # a.prefix op will run parser _op_ for 0 or more times and eventually run parser _a_
296
+ # for one time.
297
+ # _op_ should return a Proc that accepts one parameter.
298
+ # Proc objects returned by _op_ is then fed with the value returned by _a_
299
+ # from right to left.
300
+ # The final result is returned as return value.
301
+ #
302
+ def prefix(op)
303
+ Parsers.sequence(op.many, self) do |funcs, v|
304
+ funcs.reverse_each {|f|v=f.call(v)}
305
+ v
306
+ end
307
+ end
308
+ #
309
+ # For postfix unary operator.
310
+ # a.postfix op will run parser _a_ for once and then _op_ for 0 or more times.
311
+ # _op_ should return a Proc that accepts one parameter.
312
+ # Proc objects returned by _op_ is then fed with the value returned by _a_
313
+ # from left to right.
314
+ # The final result is returned as return value.
315
+ #
316
+ def postfix(op)
317
+ Parsers.sequence(self, op.many) do |v, funcs|
318
+ funcs.each{|f|v=f.call(v)}
319
+ v
320
+ end
321
+ end
322
+ #
323
+ # For non-associative infix binary operator.
324
+ # _op_ has to return a Proc that takes two parameters, who
325
+ # are returned by the _self_ parser as operands.
326
+ #
327
+ def infixn(op)
328
+ bind do |v1|
329
+ bin = Parsers.sequence(op, self) do |f, v2|
330
+ f.call(v1,v2)
331
+ end
332
+ bin | value(v1)
333
+ end
334
+ end
335
+ #
336
+ # For left-associative infix binary operator.
337
+ # _op_ has to return a Proc that takes two parameters, who
338
+ # are returned by the _self_ parser as operands.
339
+ #
340
+ def infixl(op)
341
+ Parsers.sequence(self, _infix_rest(op, self).many) do |v, rests|
342
+ rests.each do |r|
343
+ f, v1 = *r
344
+ v = f.call(v,v1)
345
+ end
346
+ v
347
+ end
348
+ end
349
+ #
350
+ # For right-associative infix binary operator.
351
+ # _op_ has to return a Proc that takes two parameters, who
352
+ # are returned by the _self_ parser as operands.
353
+ #
354
+ def infixr(op)
355
+ Parsers.sequence(self, _infix_rest(op, self).many) do |v, rests|
356
+ if rests.empty?
357
+ v
358
+ else
359
+ f, seed = *rests.last
360
+ for i in (0...rests.length-1)
361
+ cur = rests.length-2-i
362
+ f1, v1 = *rests[cur]
363
+ seed = f.call(v1, seed)
364
+ f = f1
365
+ end
366
+ f.call(v, seed)
367
+ end
368
+ end
369
+ end
370
+ #
371
+ # a.token(:word_token) will return a Token object when _a_ succeeds.
372
+ # The matched string (or the string returned by _a_, if any) is
373
+ # encapsulated in the token, together with the :word_token symbol and
374
+ # the starting index of the match.
375
+ #
376
+ def token(kind)
377
+ TokenParser.new(kind, self)
378
+ end
379
+ #
380
+ # a.seq b will sequentially run a then b.
381
+ # The result of b is preserved as return value.
382
+ # If a block is associated, values returned by _a_ and _b_
383
+ # are passed into the block and the return value of
384
+ # the block is used as the final result of the parser.
385
+ #
386
+ def seq(other, &block)
387
+ # TypeChecker.check_arg_type Parser, other, :seq
388
+ Parsers.sequence(self, other, &block)
389
+ end
390
+ def_sig :seq, Parser
391
+ #
392
+ # Similar to _seq_. _other_ is auto-boxed if it is not of type Parser.
393
+ #
394
+ def >> (other)
395
+ seq(autobox_parser(other))
396
+ end
397
+ private
398
+ def autobox_parser(val)
399
+ return Parsers.value(val) unless val.kind_of? Parser
400
+ val
401
+ end
402
+ def _infix_rest(operator, operand)
403
+ Parsers.sequence(operator, operand, &Idn)
404
+ end
405
+ public
406
+ alias ~ not
407
+ alias << followed
408
+ alias * repeat_
409
+ def_sig :plus, Parser
410
+ private
411
+ def _parse(ctxt)
412
+ false
413
+ end
414
+ end
415
+ #
416
+ # This module provides all out-of-box parser implementations.
417
+ #
418
+ module Parsers
419
+ extend Signature
420
+ #
421
+ # A parser that always fails with the given error message.
422
+ #
423
+ def failure msg
424
+ FailureParser.new(msg)
425
+ end
426
+ #
427
+ # A parser that always succeeds with the given return value.
428
+ #
429
+ def value v
430
+ ValueParser.new(v)
431
+ end
432
+ #
433
+ # A parser that calls alternative parsers until one succeed,
434
+ # or any failure with input consumption beyond the current look-ahead.
435
+ #
436
+ def sum(*alts)
437
+ # TypeChecker.check_vararg_type Parser, alts, :sum
438
+ PlusParser.new(alts)
439
+ end
440
+ def_sig :sum, [Parser]
441
+
442
+ #
443
+ # A parser that calls alternative parsers until one succeeds.
444
+ #
445
+ def alt(*alts)
446
+ AltParser.new(alts)
447
+ end
448
+ def_sig :alt, [Parser]
449
+ #
450
+ # A parser that succeeds when the given predicate returns true
451
+ # (with the current input as the parameter).
452
+ # _expected_ is the error message when _pred_ returns false.
453
+ #
454
+ def satisfies(expected, &pred)
455
+ SatisfiesParser.new(pred, expected)
456
+ end
457
+ #
458
+ # A parser that succeeds when the the current input is equal to the given value.
459
+ # _expected_ is the error message when _pred_ returns false.
460
+ #
461
+ def is(v, expected="#{v} expected")
462
+ satisfies(expected) {|c|c==v}
463
+ end
464
+ #
465
+ # A parser that succeeds when the the current input is not equal to the given value.
466
+ # _expected_ is the error message when _pred_ returns false.
467
+ #
468
+ def isnt(v, expected="#{v} unexpected")
469
+ satisfies(expected) {|c|c!=v}
470
+ end
471
+ #
472
+ # A parser that succeeds when the the current input is among the given values.
473
+ #
474
+ def among(*vals)
475
+ expected="one of [#{vals.join(', ')}] expected"
476
+ vals = as_list vals
477
+ satisfies(expected) {|c|vals.include? c}
478
+ end
479
+ #
480
+ # A parser that succeeds when the the current input is not among the given values.
481
+ #
482
+ def not_among(*vals)
483
+ expected = "one of [#{vals.join(', ')}] unexpected"
484
+ vals = as_list vals
485
+ satisfies(expected) {|c|!vals.include? c}
486
+ end
487
+ #
488
+ # A parser that succeeds when the the current input is the given character.
489
+ #
490
+ def char(c)
491
+ if c.kind_of? Fixnum
492
+ nm = c.chr
493
+ is(c, "'#{nm}' expected").setName(nm)
494
+ else
495
+ is(c[0], "'#{c}' expected").setName(c)
496
+ end
497
+ end
498
+ #
499
+ # A parser that succeeds when the the current input is not the given character.
500
+ #
501
+ def not_char(c)
502
+ if c.kind_of? Fixnum
503
+ nm = c.chr
504
+ isnt(c, "'#{nm}' unexpected").setName("~#{nm}")
505
+ else
506
+ isnt(c[0], "'#{c}' unexpected").setName("~#{c}")
507
+ end
508
+ end
509
+
510
+ #
511
+ # A parser that succeeds when there's no input available.
512
+ #
513
+ def eof(expected="EOF expected")
514
+ EofParser.new(expected).setName('EOF')
515
+ end
516
+ #
517
+ # A parser that tries to match the current inputs one by one
518
+ # with the given values.
519
+ # It succeeds only when all given values are matched, in which case all the
520
+ # matched inputs are consumed.
521
+ #
522
+ def are(vals, expected="#{vals} expected")
523
+ AreParser.new(vals, expected)
524
+ end
525
+ #
526
+ # A parser that makes sure that the given values don't match
527
+ # the current inputs. One input is consumed if it succeeds.
528
+ #
529
+ def arent(vals, expected="#{vals} unexpected")
530
+ are(vals, '').not(expected) >> any
531
+ end
532
+ #
533
+ # A parser that matches the given string.
534
+ #
535
+ def string(str, msg = "\"#{str}\" expected")
536
+ are(str, msg).setName(str)
537
+ end
538
+ #
539
+ # A parser that makes sure that the current input doesn't match a string.
540
+ # One character is consumed if it succeeds.
541
+ #
542
+ def not_string(str, msg="\"#{str}\" unexpected")
543
+ string(str).not(msg) >> any
544
+ end
545
+ alias str string
546
+ #
547
+ # A parser that sequentially run the given parsers.
548
+ # The result of the last parser is used as return value.
549
+ # If a block is given, the results of the parsers are passed
550
+ # into the block as parameters, and the block return value
551
+ # is used as result instead.
552
+ #
553
+ def sequence(*parsers, &proc)
554
+ # TypeChecker.check_vararg_type Parser, parsers, :sequence
555
+ SequenceParser.new(parsers, proc)
556
+ end
557
+ def_sig :sequence, [Parser]
558
+ #
559
+ # A parser that returns the current input index (starting from 0).
560
+ #
561
+ def get_index
562
+ GetIndexParser.new.setName('get_index')
563
+ end
564
+ #
565
+ # A parser that moves the current input pointer to a certain index.
566
+ #
567
+ def set_index ind
568
+ SetIndexParser.new(ind).setName('set_index')
569
+ end
570
+ #
571
+ # A parser that tries all given alternative parsers
572
+ # and picks the one with the longest match.
573
+ #
574
+ def longest(*parsers)
575
+ # TypeChecker.check_vararg_type Parser, parsers, :longest
576
+ BestParser.new(parsers, true)
577
+ end
578
+ def_sig :longest, [Parser]
579
+ #
580
+ # A parser that tries all given alternative parsers
581
+ # and picks the one with the shortest match.
582
+ #
583
+ def shortest(*parsers)
584
+ # TypeChecker.check_vararg_type Parser, parsers, :shortest
585
+ BestParser.new(parsers, false)
586
+ end
587
+ def_sig :shortest, [Parser]
588
+ alias shorter shortest
589
+ alias longer longest
590
+ #
591
+ # A parser that consumes one input.
592
+ #
593
+ def any
594
+ AnyParser.new
595
+ end
596
+ #
597
+ # A parser that always fails.
598
+ #
599
+ def zero
600
+ ZeroParser.new
601
+ end
602
+ #
603
+ # A parser that always succeeds.
604
+ #
605
+ def one
606
+ OneParser.new
607
+ end
608
+ #
609
+ # A parser that succeeds if the current input is within a certain range.
610
+ #
611
+ def range(from, to, msg="#{as_char from}..#{as_char to} expected")
612
+ from, to = as_num(from), as_num(to)
613
+ satisfies(msg) {|c| c <= to && c >= from}
614
+ end
615
+ #
616
+ # A parser that throws a symbol.
617
+ #
618
+ def throwp(symbol)
619
+ ThrowParser.new(symbol)
620
+ end
621
+ #
622
+ # A parser that succeeds if the current inputs match
623
+ # the given regular expression.
624
+ # The matched string is consumed and returned as result.
625
+ #
626
+ def regexp(ptn, expected="/#{ptn.to_s}/ expected")
627
+ RegexpParser.new(as_regexp(ptn), expected).setName(expected)
628
+ end
629
+ #
630
+ # A parser that parses a word
631
+ # (starting with alpha or underscore, followed by 0 or more alpha, number or underscore).
632
+ # and return the matched word as string.
633
+ #
634
+ def word(expected='word expected')
635
+ regexp(/[a-zA-Z_]\w*/, expected)
636
+ end
637
+ #
638
+ # A parser that parses an integer
639
+ # and return the matched integer as string.
640
+ #
641
+ def integer(expected='integer expected')
642
+ regexp(/\d+(?!\w)/, expected)
643
+ end
644
+ #
645
+ # A parser that parses a number (integer, or decimal number)
646
+ # and return the matched number as string.
647
+ #
648
+ def number(expected='number expected')
649
+ regexp(/\d+(\.\d+)?/, expected)
650
+ end
651
+ #
652
+ # A parser that matches the given string, case insensitively.
653
+ #
654
+ def string_nocase(str, expected="'#{str}' expected")
655
+ StringCaseInsensitiveParser.new(str, expected).setName(str)
656
+ end
657
+ #
658
+ # A parser that succeeds when the current input
659
+ # is a token with one of the the given token kinds.
660
+ # If a block is given, the token text is passed to the block
661
+ # as parameter, and the block return value is used as result.
662
+ # Otherwise, the token object is used as result.
663
+ #
664
+ def token(*kinds, &proc)
665
+ expected="#{kinds.join(' or ')} expected"
666
+ recognizer = nil
667
+ if kinds.length==1
668
+ kind = kinds[0]
669
+ recognizer = satisfies(expected) do |tok|
670
+ tok.respond_to? :kind, :text and kind == tok.kind
671
+ end
672
+ else
673
+ recognizer = satisfies(expected) do |tok|
674
+ tok.respond_to? :kind, :text and kinds.include? tok.kind
675
+ end
676
+ end
677
+ recognizer = recognizer.map{|tok|proc.call(tok.text)} if proc
678
+ recognizer
679
+ end
680
+ #
681
+ # A parser that parses a white space character.
682
+ #
683
+ def whitespace(expected="whitespace expected")
684
+ satisfies(expected) {|c| Whitespaces.include? c}
685
+ end
686
+ #
687
+ # A parser that parses 1 or more white space characters.
688
+ #
689
+ def whitespaces(expected="whitespace(s) expected")
690
+ whitespace(expected).many_(1)
691
+ end
692
+ #
693
+ # A parser that parses a line started with _start_.
694
+ # nil is the result.
695
+ #
696
+ def comment_line start
697
+ string(start) >> not_char(?\n).many_ >> char(?\n).optional >> value(nil)
698
+ end
699
+ #
700
+ # A parser that parses a chunk of text started with _open_
701
+ # and ended by _close_.
702
+ # nil is the result.
703
+ #
704
+ def comment_block open, close
705
+ string(open) >> not_string(close).many_ >> string(close) >> value(nil)
706
+ end
707
+ #
708
+ # A lazy parser, when executed, calls the given block
709
+ # to get a parser object and delegate the call to this lazily
710
+ # instantiated parser.
711
+ #
712
+ def lazy(&block)
713
+ LazyParser.new(block)
714
+ end
715
+ #
716
+ # A parser that watches the current parser result without changing it.
717
+ # The following assert will succeed:
718
+ ##
719
+ # char(?a) >> watch{|x|assert_equal(?a, x)}
720
+ ##
721
+ # watch can also be used as a handy tool to print trace information,
722
+ # for example:
723
+ ##
724
+ # some_parser >> watch {puts "some_parser succeeded."}
725
+ #
726
+ def watch(&block)
727
+ return one unless block
728
+ WatchParser.new(block)
729
+ end
730
+ #
731
+ # A parser that watches the current parser result without changing it.
732
+ # The following assert will succeed:
733
+ ##
734
+ # char(?a).repeat(2) >> watchn{|x,y|assert_equal([?a,?a], [x,y])}
735
+ ##
736
+ # Slightly different from _watch_, _watchn_ expands the current parser result
737
+ # before passing it into the associated block.
738
+ #
739
+ def watchn(&block)
740
+ return one unless block
741
+ WatchnParser.new(block)
742
+ end
743
+ #
744
+ # A parser that maps current parser result to a new result using
745
+ # the given block.
746
+ ##
747
+ # Different from Parser#map, this method does not need to be combined
748
+ # with any Parser object. It is rather an independent Parser object
749
+ # that maps the _current_ parser result.
750
+ ##
751
+ # parser1.map{|x|...} is equivalent to parser1 >> map{|x|...}
752
+ #
753
+ def map(&block)
754
+ return one unless block
755
+ MapCurrentParser.new(block)
756
+ end
757
+ #
758
+ # A parser that maps current parser result to a new result using
759
+ # the given block. If the current parser result is an array, the array
760
+ # elements are expanded and then passed as parameters to the block.
761
+ ##
762
+ # Different from Parser#mapn, this method does not need to be combined
763
+ # with any Parser object. It is rather an independent Parser object
764
+ # that maps the _current_ parser result.
765
+ ##
766
+ # parser1.mapn{|x,y|...} is equivalent to parser1 >> mapn{|x,y|...}
767
+ #
768
+ def mapn(&block)
769
+ return one unless block
770
+ MapnCurrentParser.new(block)
771
+ end
772
+ private
773
+ #
774
+ # characters considered white space.
775
+ #
776
+ Whitespaces = " \t\r\n"
777
+ def as_regexp ptn
778
+ case ptn when String: Regexp.new(ptn) else ptn end
779
+ end
780
+ def as_char c
781
+ case c when String: c else c.chr end
782
+ end
783
+ def as_num c
784
+ case c when String: c[0] else c end
785
+ end
786
+ def as_list vals
787
+ return vals unless vals.length==1
788
+ val = vals[0]
789
+ return vals unless val.kind_of? String
790
+ val
791
+ end
792
+ extend self
793
+ end
794
+