rparsec 0.4.1 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,52 +1,58 @@
1
- #
2
- # module for Monad
3
- #
4
- module Monad
5
- attr_reader :this
6
- #
7
- # To initialize with a monad implementation and an object that obeys the monad law.
8
- #
9
- def initMonad(m, v)
10
- raise ArgumentError, 'monad cannot be nil' if m.nil?
11
- @monad = m;
12
- @this = v;
13
- end
14
- #
15
- # To create a value based on the monad impl.
16
- #
17
- def value v
18
- @monad.value v
19
- end
20
- #
21
- # Run the _bind_ operation on the encapsulated object following the monad law.
22
- #
23
- def bind(&binder)
24
- @monad.bind(@this, &binder)
25
- end
26
- #
27
- # Run the _seq_ operation on the encapsulated object following the monad law.
28
- # If _seq_ is not defined by the monad impl, use _bind_ to implement.
29
- #
30
- def seq(other)
31
- if @monad.respond_to? :seq
32
- @monad.seq(other)
33
- else bind {|x|other}
34
- end
35
- end
36
- #
37
- # Run the _map_ operation on the encapsulated object following the monad law.
38
- # _bind_ is used to implement.
39
- #
40
- def map(&mapper)
41
- bind do |v|
42
- result = mapper.call v;
43
- value(result);
44
- end
45
- end
46
- #
47
- # Run the _plus_ operation on the encapsulated object following the MonadPlus law.
48
- #
49
- def plus other
50
- @monad.mplus(@this, other.this)
51
- end
1
+ #
2
+ # module for Monad
3
+ #
4
+ module Monad
5
+ attr_reader :this
6
+
7
+ #
8
+ # To initialize with a monad implementation and an object that obeys the monad law.
9
+ #
10
+ def initMonad(m, v)
11
+ raise ArgumentError, 'monad cannot be nil' if m.nil?
12
+ @monad = m;
13
+ @this = v;
14
+ end
15
+
16
+ #
17
+ # To create a value based on the monad impl.
18
+ #
19
+ def value v
20
+ @monad.value v
21
+ end
22
+
23
+ #
24
+ # Run the _bind_ operation on the encapsulated object following the monad law.
25
+ #
26
+ def bind(&binder)
27
+ @monad.bind(@this, &binder)
28
+ end
29
+
30
+ #
31
+ # Run the _seq_ operation on the encapsulated object following the monad law.
32
+ # If _seq_ is not defined by the monad impl, use _bind_ to implement.
33
+ #
34
+ def seq(other)
35
+ if @monad.respond_to? :seq
36
+ @monad.seq(other)
37
+ else bind {|x|other}
38
+ end
39
+ end
40
+
41
+ #
42
+ # Run the _map_ operation on the encapsulated object following the monad law.
43
+ # _bind_ is used to implement.
44
+ #
45
+ def map(&mapper)
46
+ bind do |v|
47
+ result = mapper.call v;
48
+ value(result);
49
+ end
50
+ end
51
+
52
+ #
53
+ # Run the _plus_ operation on the encapsulated object following the MonadPlus law.
54
+ #
55
+ def plus other
56
+ @monad.mplus(@this, other.this)
57
+ end
52
58
  end
@@ -1,110 +1,117 @@
1
- require 'rparsec/parser'
2
-
3
- #
4
- # utility functions for string manipulations.
5
- #
6
- module StringUtils
7
- #
8
- # Does _str_ starts with the _sub_ string?
9
- #
10
- def self.starts_with? str, sub
11
- return true if sub.nil?
12
- len = sub.length
13
- return false if len > str.length
14
- for i in (0...len)
15
- return false if str[i] != sub[i]
16
- end
17
- true
18
- end
19
- end
20
-
21
- #
22
- # This class helps building lexer and parser for operators.
23
- # The case that one operator (++ for example) contains another operator (+)
24
- # is automatically handled so client code don't have to worry about ambiguity.
25
- #
26
- class Operators
27
- #
28
- # To create an instance of Operators for the given operators.
29
- # The _block_ parameter, if present, is used to convert the token text to another object
30
- # when the token is recognized during grammar parsing phase.
31
- #
32
- def initialize(ops, &block)
33
- @lexers = {}
34
- @parsers = {}
35
- sorted = Operators.sort(ops)
36
- lexers = sorted.map do |op|
37
- symbol = op.to_sym
38
- result = nil
39
- if op.length == 1
40
- result = Parsers.char(op)
41
- else
42
- result = Parsers.str(op)
43
- end
44
- result = result.token(symbol)
45
- @lexers[symbol] = result
46
- @parsers[symbol] = Parsers.token(symbol, &block)
47
- result
48
- end
49
- @lexer = Parsers.sum(*lexers)
50
- end
51
- #
52
- # Get the parser for the given operator.
53
- #
54
- def parser(op)
55
- result = @parsers[op.to_sym]
56
- raise ArgumentError, "parser not found for #{op}" if result.nil?
57
- result
58
- end
59
- alias [] parser
60
- #
61
- # Get the lexer that lexes operators.
62
- # If an operator is specified, the lexer for that operator is returned.
63
- #
64
- def lexer(op=nil)
65
- return @lexer if op.nil?
66
- @lexers[op.to_sym]
67
- end
68
- #
69
- # Sort an array of operators so that contained operator appears after containers.
70
- # When no containment exist between two operators, the shorter one takes precedence.
71
- #
72
- def self.sort(ops)
73
- #sort the array by longer-string-first.
74
- ordered = ops.sort {|x, y|y.length <=> x.length}
75
- suites = []
76
- # loop from the longer to shorter string
77
- ordered.each do |s|
78
- populate_suites(suites, s)
79
- end
80
- # suites are populated with bigger suite first
81
- to_array suites
82
- end
83
- private
84
- def self.populate_suites(suites, s)
85
- # populate the suites so that bigger suite first
86
- # this way we can use << operator for non-contained strings.
87
-
88
- # we need to start from bigger suite. So loop in reverse order
89
- for suite in suites
90
- return if populate_suite(suite, s)
91
- end
92
- suites << [s]
93
- end
94
- def self.populate_suite(suite, s)
95
- # loop from the tail of the suite
96
- for i in (1..suite.length)
97
- ind = suite.length - i
98
- cur = suite[ind]
99
- if StringUtils.starts_with? cur, s
100
- suite.insert(ind+1, s) unless cur == s
101
- return true
102
- end
103
- end
104
- false
105
- end
106
- def self.to_array suites
107
- result = []
108
- suites.reverse!.flatten!
109
- end
1
+ require 'rparsec/parser'
2
+
3
+ #
4
+ # utility functions for string manipulations.
5
+ #
6
+ module StringUtils
7
+ #
8
+ # Does _str_ starts with the _sub_ string?
9
+ #
10
+ def self.starts_with? str, sub
11
+ return true if sub.nil?
12
+ len = sub.length
13
+ return false if len > str.length
14
+ for i in (0...len)
15
+ return false if str[i] != sub[i]
16
+ end
17
+ true
18
+ end
19
+ end
20
+
21
+ #
22
+ # This class helps building lexer and parser for operators.
23
+ # The case that one operator (++ for example) contains another operator (+)
24
+ # is automatically handled so client code don't have to worry about ambiguity.
25
+ #
26
+ class Operators
27
+ #
28
+ # To create an instance of Operators for the given operators.
29
+ # The _block_ parameter, if present, is used to convert the token text to another object
30
+ # when the token is recognized during grammar parsing phase.
31
+ #
32
+ def initialize(ops, &block)
33
+ @lexers = {}
34
+ @parsers = {}
35
+ sorted = Operators.sort(ops)
36
+ lexers = sorted.map do |op|
37
+ symbol = op.to_sym
38
+ result = nil
39
+ if op.length == 1
40
+ result = Parsers.char(op)
41
+ else
42
+ result = Parsers.str(op)
43
+ end
44
+ result = result.token(symbol)
45
+ @lexers[symbol] = result
46
+ @parsers[symbol] = Parsers.token(symbol, &block)
47
+ result
48
+ end
49
+ @lexer = Parsers.sum(*lexers)
50
+ end
51
+
52
+ #
53
+ # Get the parser for the given operator.
54
+ #
55
+ def parser(op)
56
+ result = @parsers[op.to_sym]
57
+ raise ArgumentError, "parser not found for #{op}" if result.nil?
58
+ result
59
+ end
60
+
61
+ alias [] parser
62
+
63
+ #
64
+ # Get the lexer that lexes operators.
65
+ # If an operator is specified, the lexer for that operator is returned.
66
+ #
67
+ def lexer(op=nil)
68
+ return @lexer if op.nil?
69
+ @lexers[op.to_sym]
70
+ end
71
+
72
+ #
73
+ # Sort an array of operators so that contained operator appears after containers.
74
+ # When no containment exist between two operators, the shorter one takes precedence.
75
+ #
76
+ def self.sort(ops)
77
+ #sort the array by longer-string-first.
78
+ ordered = ops.sort {|x, y|y.length <=> x.length}
79
+ suites = []
80
+ # loop from the longer to shorter string
81
+ ordered.each do |s|
82
+ populate_suites(suites, s)
83
+ end
84
+ # suites are populated with bigger suite first
85
+ to_array suites
86
+ end
87
+
88
+ private
89
+
90
+ def self.populate_suites(suites, s)
91
+ # populate the suites so that bigger suite first
92
+ # this way we can use << operator for non-contained strings.
93
+
94
+ # we need to start from bigger suite. So loop in reverse order
95
+ for suite in suites
96
+ return if populate_suite(suite, s)
97
+ end
98
+ suites << [s]
99
+ end
100
+
101
+ def self.populate_suite(suite, s)
102
+ # loop from the tail of the suite
103
+ for i in (1..suite.length)
104
+ ind = suite.length - i
105
+ cur = suite[ind]
106
+ if StringUtils.starts_with? cur, s
107
+ suite.insert(ind+1, s) unless cur == s
108
+ return true
109
+ end
110
+ end
111
+ false
112
+ end
113
+
114
+ def self.to_array suites
115
+ suites.reverse!.flatten!
116
+ end
110
117
  end
@@ -1,794 +1,892 @@
1
- %w{
2
- monad misc error context locator token functors parser_monad
3
- }.each {|lib| require "rparsec/#{lib}"}
4
- require 'strscan'
5
-
6
-
7
- #
8
- # Represents a parser that parses a certain grammar rule.
9
- #
10
- class Parser
11
- include Functors
12
- include Monad
13
- extend Signature
14
- extend DefHelper
15
- MyMonad = ParserMonad.new
16
- attr_accessor :name
17
- private
18
- def initialize
19
- initMonad(MyMonad, self)
20
- end
21
- def self.init(*vars)
22
- parser_checker = {}
23
- vars.each_with_index do |var, i|
24
- name = var.to_s
25
- parser_checker[i] = var if name.include?('parser') && !name.include?('parsers')
26
- end
27
- define_method(:initialize) do |*params|
28
- super()
29
- vars.each_with_index do |var, i|
30
- param = params[i]
31
- if parser_checker.include? i
32
- TypeChecker.check_arg_type Parser, param, self, i
33
- end
34
- instance_variable_set("@"+var.to_s, param)
35
- end
36
- end
37
- end
38
- def _display_current_input(input, code, index)
39
- return 'EOF' if input.nil?
40
- c = input
41
- case c when Fixnum: "'"<<c<<"'" when Token: c.text else c.to_s end
42
- end
43
- def _add_encountered_error(msg, encountered)
44
- result = msg.dup
45
- result << ', ' unless msg.strip.length == 0 || msg =~ /.*(\.|,)\s*$/
46
- "#{result}#{encountered}"
47
- end
48
- def _add_location_to_error(locator, ctxt, msg, code)
49
- line, col = locator.locate(ctxt.error.index)
50
- msg << " at line #{line}, col #{col}."
51
- end
52
- public
53
- #
54
- # parses a string.
55
- #
56
- def parse(src)
57
- ctxt = ParseContext.new(src)
58
- return ctxt.result if _parse ctxt
59
- ctxt.prepare_error
60
- locator = CodeLocator.new(src)
61
- raise ParserException.new(ctxt.error.index),
62
- _add_location_to_error(locator, ctxt,
63
- _add_encountered_error(ctxt.to_msg,
64
- _display_current_input(ctxt.error.input, src, ctxt.index)), src)
65
- end
66
- #
67
- # Set name for the parser.
68
- # self is returned.
69
- #
70
- def setName(nm)
71
- @name = nm
72
- self
73
- end
74
- #
75
- # a.map{|x|x+1} will first execute parser a, when it succeeds,
76
- # the associated block is executed to transform the result to a new value
77
- # (increment it in this case).
78
- #
79
- def map(&block)
80
- return self unless block
81
- MapParser.new(self, block)
82
- end
83
- #
84
- # _self_ is first executed, the parser result is then passed as parameter to the associated block,
85
- # which evaluates to another Parser object at runtime. This new Parser object is then executed
86
- # to get the final parser result.
87
- ##
88
- # Different from _bind_, parser result of _self_ will be expanded first if it is an array.
89
- #
90
- def bindn(&block)
91
- return self unless block
92
- BoundnParser.new(self, block)
93
- end
94
- #
95
- # a.mapn{|x,y|x+y} will first execute parser a, when it succeeds,
96
- # the array result (if any) is expanded and passed as parameters
97
- # to the associated block. The result of the block is then used
98
- # as the parsing result.
99
- #
100
- def mapn(&block)
101
- return self unless block
102
- MapnParser.new(self, block)
103
- end
104
-
105
- #
106
- # Create a new parser that's atomic.,
107
- # meaning that when it fails, input consumption is undone.
108
- #
109
- def atomize
110
- AtomParser.new(self).setName(@name)
111
- end
112
- #
113
- # Create a new parser that looks at inputs whthout consuming them.
114
- #
115
- def peek
116
- PeekParser.new(self).setName(@name)
117
- end
118
- #
119
- # To create a new parser that succeed only if self fails.
120
- #
121
- def not(msg="#{self} unexpected")
122
- NotParser.new(self, msg)
123
- end
124
- #
125
- # To create a parser that does "look ahead" for n inputs.
126
- #
127
- def lookahead n
128
- self
129
- end
130
- #
131
- # To create a parser that fails with a given error message.
132
- #
133
- def expect msg
134
- ExpectParser.new(self, msg)
135
- end
136
- #
137
- # a.followed b will sequentially run a and b;
138
- # result of a is preserved as the ultimate return value.
139
- #
140
- def followed(other)
141
- FollowedParser.new(self, other)
142
- end
143
- def_sig :followed, Parser
144
- #
145
- # To create a parser that repeats self for a minimum _min_ times,
146
- # and maximally _max_ times.
147
- # Only the return value of the last execution is preserved.
148
- #
149
- def repeat_(min, max=min)
150
- return Parsers.failure("min=#{min}, max=#{max}") if min > max
151
- if(min==max)
152
- return Parsers.one if max <= 0
153
- return self if max == 1
154
- Repeat_Parser.new(self, max)
155
- else
156
- Some_Parser.new(self, min, max)
157
- end
158
- end
159
- #
160
- # To create a parser that repeats self for a minimum _min_ times,
161
- # and maximally _max_ times.
162
- # All return values are collected in an array.
163
- #
164
- def repeat(min, max=min)
165
- return Parsers.failure("min=#{min}, max=#{max}") if min > max
166
- if(min==max)
167
- RepeatParser.new(self, max)
168
- else
169
- SomeParser.new(self, min, max)
170
- end
171
- end
172
- #
173
- # To create a parser that repeats self for at least _least_ times.
174
- # parser.many_ is equivalent to bnf notation "parser*".
175
- # Only the return value of the last execution is preserved.
176
- #
177
- def many_(least=0)
178
- Many_Parser.new(self, least)
179
- end
180
- #
181
- # To create a parser that repeats self for at least _least_ times.
182
- # All return values are collected in an array.
183
- #
184
- def many(least=0)
185
- ManyParser.new(self, least)
186
- end
187
- #
188
- # To create a parser that repeats self for at most _max_ times.
189
- # Only the return value of the last execution is preserved.
190
- #
191
- def some_(max)
192
- repeat_(0, max)
193
- end
194
- #
195
- # To create a parser that repeats self for at most _max_ times.
196
- # All return values are collected in an array.
197
- #
198
- def some(max)
199
- repeat(0, max)
200
- end
201
- #
202
- # To create a parser that repeats self for unlimited times,
203
- # with the pattern recognized by _delim_ as separator that separates each occurrence.
204
- # self has to match for at least once.
205
- # Return values of self are collected in an array.
206
- #
207
- def separated1 delim
208
- rest = delim >> self
209
- self.bind do |v0|
210
- result = [v0]
211
- (rest.map {|v| result << v}).many_ >> value(result)
212
- end
213
- end
214
- #
215
- # To create a parser that repeats self for unlimited times,
216
- # with the pattern recognized by _delim_ as separator that separates each occurrence.
217
- # Return values of self are collected in an array.
218
- #
219
- def separated delim
220
- separated1(delim).plus value([])
221
- end
222
- #
223
- # To create a parser that repeats self for unlimited times,
224
- # with the pattern recognized by _delim_ as separator that separates each occurrence
225
- # and also possibly ends the pattern.
226
- # self has to match for at least once.
227
- # Return values of self are collected in an array.
228
- #
229
- def delimited1 delim
230
- rest = delim >> (self.plus Parsers.throwp(:__end_delimiter__))
231
- self.bind do |v0|
232
- result = [v0]
233
- (rest.map {|v| result << v}).many_.catchp(:__end_delimiter__) >> value(result)
234
- end
235
- end
236
- #
237
- # To create a parser that repeats self for unlimited times,
238
- # with the pattern recognized by _delim_ as separator that separates each occurrence
239
- # and also possibly ends the pattern.
240
- # Return values of self are collected in an array.
241
- #
242
- def delimited delim
243
- delimited1(delim).plus value([])
244
- end
245
- #
246
- # String representation
247
- #
248
- def to_s
249
- return name unless name.nil?
250
- self.class.to_s
251
- end
252
- #
253
- # a | b will run b when a fails.
254
- # b is auto-boxed to Parser when it is not of type Parser.
255
- #
256
- def | other
257
- AltParser.new([self, autobox_parser(other)])
258
- end
259
- #
260
- # a.optional(default) is equivalent to a.plus(value(default))
261
- #
262
- def optional(default=nil)
263
- self.plus(value(default))
264
- end
265
- #
266
- # a.catchp(:somesymbol) will catch the :somesymbol thrown by a.
267
- #
268
- def catchp(symbol)
269
- CatchParser.new(symbol, self)
270
- end
271
- #
272
- # a.fragment will return the string matched by a.
273
- #
274
- def fragment
275
- FragmentParser.new(self)
276
- end
277
- #
278
- # a.nested b will feed the token array returned by parser a to parser b
279
- # for a nested parsing.
280
- #
281
- def nested(parser)
282
- NestedParser.new(self, parser)
283
- end
284
- #
285
- # a.lexeme(delim) will parse _a_ for 0 or more times and ignore all
286
- # patterns recognized by _delim_.
287
- # Values returned by _a_ are collected in an array.
288
- #
289
- def lexeme(delim = Parsers.whitespaces)
290
- delim = delim.many_
291
- delim >> self.delimited(delim)
292
- end
293
- #
294
- # For prefix unary operator.
295
- # a.prefix op will run parser _op_ for 0 or more times and eventually run parser _a_
296
- # for one time.
297
- # _op_ should return a Proc that accepts one parameter.
298
- # Proc objects returned by _op_ is then fed with the value returned by _a_
299
- # from right to left.
300
- # The final result is returned as return value.
301
- #
302
- def prefix(op)
303
- Parsers.sequence(op.many, self) do |funcs, v|
304
- funcs.reverse_each {|f|v=f.call(v)}
305
- v
306
- end
307
- end
308
- #
309
- # For postfix unary operator.
310
- # a.postfix op will run parser _a_ for once and then _op_ for 0 or more times.
311
- # _op_ should return a Proc that accepts one parameter.
312
- # Proc objects returned by _op_ is then fed with the value returned by _a_
313
- # from left to right.
314
- # The final result is returned as return value.
315
- #
316
- def postfix(op)
317
- Parsers.sequence(self, op.many) do |v, funcs|
318
- funcs.each{|f|v=f.call(v)}
319
- v
320
- end
321
- end
322
- #
323
- # For non-associative infix binary operator.
324
- # _op_ has to return a Proc that takes two parameters, who
325
- # are returned by the _self_ parser as operands.
326
- #
327
- def infixn(op)
328
- bind do |v1|
329
- bin = Parsers.sequence(op, self) do |f, v2|
330
- f.call(v1,v2)
331
- end
332
- bin | value(v1)
333
- end
334
- end
335
- #
336
- # For left-associative infix binary operator.
337
- # _op_ has to return a Proc that takes two parameters, who
338
- # are returned by the _self_ parser as operands.
339
- #
340
- def infixl(op)
341
- Parsers.sequence(self, _infix_rest(op, self).many) do |v, rests|
342
- rests.each do |r|
343
- f, v1 = *r
344
- v = f.call(v,v1)
345
- end
346
- v
347
- end
348
- end
349
- #
350
- # For right-associative infix binary operator.
351
- # _op_ has to return a Proc that takes two parameters, who
352
- # are returned by the _self_ parser as operands.
353
- #
354
- def infixr(op)
355
- Parsers.sequence(self, _infix_rest(op, self).many) do |v, rests|
356
- if rests.empty?
357
- v
358
- else
359
- f, seed = *rests.last
360
- for i in (0...rests.length-1)
361
- cur = rests.length-2-i
362
- f1, v1 = *rests[cur]
363
- seed = f.call(v1, seed)
364
- f = f1
365
- end
366
- f.call(v, seed)
367
- end
368
- end
369
- end
370
- #
371
- # a.token(:word_token) will return a Token object when _a_ succeeds.
372
- # The matched string (or the string returned by _a_, if any) is
373
- # encapsulated in the token, together with the :word_token symbol and
374
- # the starting index of the match.
375
- #
376
- def token(kind)
377
- TokenParser.new(kind, self)
378
- end
379
- #
380
- # a.seq b will sequentially run a then b.
381
- # The result of b is preserved as return value.
382
- # If a block is associated, values returned by _a_ and _b_
383
- # are passed into the block and the return value of
384
- # the block is used as the final result of the parser.
385
- #
386
- def seq(other, &block)
387
- # TypeChecker.check_arg_type Parser, other, :seq
388
- Parsers.sequence(self, other, &block)
389
- end
390
- def_sig :seq, Parser
391
- #
392
- # Similar to _seq_. _other_ is auto-boxed if it is not of type Parser.
393
- #
394
- def >> (other)
395
- seq(autobox_parser(other))
396
- end
397
- private
398
- def autobox_parser(val)
399
- return Parsers.value(val) unless val.kind_of? Parser
400
- val
401
- end
402
- def _infix_rest(operator, operand)
403
- Parsers.sequence(operator, operand, &Idn)
404
- end
405
- public
406
- alias ~ not
407
- alias << followed
408
- alias * repeat_
409
- def_sig :plus, Parser
410
- private
411
- def _parse(ctxt)
412
- false
413
- end
414
- end
415
- #
416
- # This module provides all out-of-box parser implementations.
417
- #
418
- module Parsers
419
- extend Signature
420
- #
421
- # A parser that always fails with the given error message.
422
- #
423
- def failure msg
424
- FailureParser.new(msg)
425
- end
426
- #
427
- # A parser that always succeeds with the given return value.
428
- #
429
- def value v
430
- ValueParser.new(v)
431
- end
432
- #
433
- # A parser that calls alternative parsers until one succeed,
434
- # or any failure with input consumption beyond the current look-ahead.
435
- #
436
- def sum(*alts)
437
- # TypeChecker.check_vararg_type Parser, alts, :sum
438
- PlusParser.new(alts)
439
- end
440
- def_sig :sum, [Parser]
441
-
442
- #
443
- # A parser that calls alternative parsers until one succeeds.
444
- #
445
- def alt(*alts)
446
- AltParser.new(alts)
447
- end
448
- def_sig :alt, [Parser]
449
- #
450
- # A parser that succeeds when the given predicate returns true
451
- # (with the current input as the parameter).
452
- # _expected_ is the error message when _pred_ returns false.
453
- #
454
- def satisfies(expected, &pred)
455
- SatisfiesParser.new(pred, expected)
456
- end
457
- #
458
- # A parser that succeeds when the the current input is equal to the given value.
459
- # _expected_ is the error message when _pred_ returns false.
460
- #
461
- def is(v, expected="#{v} expected")
462
- satisfies(expected) {|c|c==v}
463
- end
464
- #
465
- # A parser that succeeds when the the current input is not equal to the given value.
466
- # _expected_ is the error message when _pred_ returns false.
467
- #
468
- def isnt(v, expected="#{v} unexpected")
469
- satisfies(expected) {|c|c!=v}
470
- end
471
- #
472
- # A parser that succeeds when the the current input is among the given values.
473
- #
474
- def among(*vals)
475
- expected="one of [#{vals.join(', ')}] expected"
476
- vals = as_list vals
477
- satisfies(expected) {|c|vals.include? c}
478
- end
479
- #
480
- # A parser that succeeds when the the current input is not among the given values.
481
- #
482
- def not_among(*vals)
483
- expected = "one of [#{vals.join(', ')}] unexpected"
484
- vals = as_list vals
485
- satisfies(expected) {|c|!vals.include? c}
486
- end
487
- #
488
- # A parser that succeeds when the the current input is the given character.
489
- #
490
- def char(c)
491
- if c.kind_of? Fixnum
492
- nm = c.chr
493
- is(c, "'#{nm}' expected").setName(nm)
494
- else
495
- is(c[0], "'#{c}' expected").setName(c)
496
- end
497
- end
498
- #
499
- # A parser that succeeds when the the current input is not the given character.
500
- #
501
- def not_char(c)
502
- if c.kind_of? Fixnum
503
- nm = c.chr
504
- isnt(c, "'#{nm}' unexpected").setName("~#{nm}")
505
- else
506
- isnt(c[0], "'#{c}' unexpected").setName("~#{c}")
507
- end
508
- end
509
-
510
- #
511
- # A parser that succeeds when there's no input available.
512
- #
513
- def eof(expected="EOF expected")
514
- EofParser.new(expected).setName('EOF')
515
- end
516
- #
517
- # A parser that tries to match the current inputs one by one
518
- # with the given values.
519
- # It succeeds only when all given values are matched, in which case all the
520
- # matched inputs are consumed.
521
- #
522
- def are(vals, expected="#{vals} expected")
523
- AreParser.new(vals, expected)
524
- end
525
- #
526
- # A parser that makes sure that the given values don't match
527
- # the current inputs. One input is consumed if it succeeds.
528
- #
529
- def arent(vals, expected="#{vals} unexpected")
530
- are(vals, '').not(expected) >> any
531
- end
532
- #
533
- # A parser that matches the given string.
534
- #
535
- def string(str, msg = "\"#{str}\" expected")
536
- are(str, msg).setName(str)
537
- end
538
- #
539
- # A parser that makes sure that the current input doesn't match a string.
540
- # One character is consumed if it succeeds.
541
- #
542
- def not_string(str, msg="\"#{str}\" unexpected")
543
- string(str).not(msg) >> any
544
- end
545
- alias str string
546
- #
547
- # A parser that sequentially run the given parsers.
548
- # The result of the last parser is used as return value.
549
- # If a block is given, the results of the parsers are passed
550
- # into the block as parameters, and the block return value
551
- # is used as result instead.
552
- #
553
- def sequence(*parsers, &proc)
554
- # TypeChecker.check_vararg_type Parser, parsers, :sequence
555
- SequenceParser.new(parsers, proc)
556
- end
557
- def_sig :sequence, [Parser]
558
- #
559
- # A parser that returns the current input index (starting from 0).
560
- #
561
- def get_index
562
- GetIndexParser.new.setName('get_index')
563
- end
564
- #
565
- # A parser that moves the current input pointer to a certain index.
566
- #
567
- def set_index ind
568
- SetIndexParser.new(ind).setName('set_index')
569
- end
570
- #
571
- # A parser that tries all given alternative parsers
572
- # and picks the one with the longest match.
573
- #
574
- def longest(*parsers)
575
- # TypeChecker.check_vararg_type Parser, parsers, :longest
576
- BestParser.new(parsers, true)
577
- end
578
- def_sig :longest, [Parser]
579
- #
580
- # A parser that tries all given alternative parsers
581
- # and picks the one with the shortest match.
582
- #
583
- def shortest(*parsers)
584
- # TypeChecker.check_vararg_type Parser, parsers, :shortest
585
- BestParser.new(parsers, false)
586
- end
587
- def_sig :shortest, [Parser]
588
- alias shorter shortest
589
- alias longer longest
590
- #
591
- # A parser that consumes one input.
592
- #
593
- def any
594
- AnyParser.new
595
- end
596
- #
597
- # A parser that always fails.
598
- #
599
- def zero
600
- ZeroParser.new
601
- end
602
- #
603
- # A parser that always succeeds.
604
- #
605
- def one
606
- OneParser.new
607
- end
608
- #
609
- # A parser that succeeds if the current input is within a certain range.
610
- #
611
- def range(from, to, msg="#{as_char from}..#{as_char to} expected")
612
- from, to = as_num(from), as_num(to)
613
- satisfies(msg) {|c| c <= to && c >= from}
614
- end
615
- #
616
- # A parser that throws a symbol.
617
- #
618
- def throwp(symbol)
619
- ThrowParser.new(symbol)
620
- end
621
- #
622
- # A parser that succeeds if the current inputs match
623
- # the given regular expression.
624
- # The matched string is consumed and returned as result.
625
- #
626
- def regexp(ptn, expected="/#{ptn.to_s}/ expected")
627
- RegexpParser.new(as_regexp(ptn), expected).setName(expected)
628
- end
629
- #
630
- # A parser that parses a word
631
- # (starting with alpha or underscore, followed by 0 or more alpha, number or underscore).
632
- # and return the matched word as string.
633
- #
634
- def word(expected='word expected')
635
- regexp(/[a-zA-Z_]\w*/, expected)
636
- end
637
- #
638
- # A parser that parses an integer
639
- # and return the matched integer as string.
640
- #
641
- def integer(expected='integer expected')
642
- regexp(/\d+(?!\w)/, expected)
643
- end
644
- #
645
- # A parser that parses a number (integer, or decimal number)
646
- # and return the matched number as string.
647
- #
648
- def number(expected='number expected')
649
- regexp(/\d+(\.\d+)?/, expected)
650
- end
651
- #
652
- # A parser that matches the given string, case insensitively.
653
- #
654
- def string_nocase(str, expected="'#{str}' expected")
655
- StringCaseInsensitiveParser.new(str, expected).setName(str)
656
- end
657
- #
658
- # A parser that succeeds when the current input
659
- # is a token with one of the the given token kinds.
660
- # If a block is given, the token text is passed to the block
661
- # as parameter, and the block return value is used as result.
662
- # Otherwise, the token object is used as result.
663
- #
664
- def token(*kinds, &proc)
665
- expected="#{kinds.join(' or ')} expected"
666
- recognizer = nil
667
- if kinds.length==1
668
- kind = kinds[0]
669
- recognizer = satisfies(expected) do |tok|
670
- tok.respond_to? :kind, :text and kind == tok.kind
671
- end
672
- else
673
- recognizer = satisfies(expected) do |tok|
674
- tok.respond_to? :kind, :text and kinds.include? tok.kind
675
- end
676
- end
677
- recognizer = recognizer.map{|tok|proc.call(tok.text)} if proc
678
- recognizer
679
- end
680
- #
681
- # A parser that parses a white space character.
682
- #
683
- def whitespace(expected="whitespace expected")
684
- satisfies(expected) {|c| Whitespaces.include? c}
685
- end
686
- #
687
- # A parser that parses 1 or more white space characters.
688
- #
689
- def whitespaces(expected="whitespace(s) expected")
690
- whitespace(expected).many_(1)
691
- end
692
- #
693
- # A parser that parses a line started with _start_.
694
- # nil is the result.
695
- #
696
- def comment_line start
697
- string(start) >> not_char(?\n).many_ >> char(?\n).optional >> value(nil)
698
- end
699
- #
700
- # A parser that parses a chunk of text started with _open_
701
- # and ended by _close_.
702
- # nil is the result.
703
- #
704
- def comment_block open, close
705
- string(open) >> not_string(close).many_ >> string(close) >> value(nil)
706
- end
707
- #
708
- # A lazy parser, when executed, calls the given block
709
- # to get a parser object and delegate the call to this lazily
710
- # instantiated parser.
711
- #
712
- def lazy(&block)
713
- LazyParser.new(block)
714
- end
715
- #
716
- # A parser that watches the current parser result without changing it.
717
- # The following assert will succeed:
718
- ##
719
- # char(?a) >> watch{|x|assert_equal(?a, x)}
720
- ##
721
- # watch can also be used as a handy tool to print trace information,
722
- # for example:
723
- ##
724
- # some_parser >> watch {puts "some_parser succeeded."}
725
- #
726
- def watch(&block)
727
- return one unless block
728
- WatchParser.new(block)
729
- end
730
- #
731
- # A parser that watches the current parser result without changing it.
732
- # The following assert will succeed:
733
- ##
734
- # char(?a).repeat(2) >> watchn{|x,y|assert_equal([?a,?a], [x,y])}
735
- ##
736
- # Slightly different from _watch_, _watchn_ expands the current parser result
737
- # before passing it into the associated block.
738
- #
739
- def watchn(&block)
740
- return one unless block
741
- WatchnParser.new(block)
742
- end
743
- #
744
- # A parser that maps current parser result to a new result using
745
- # the given block.
746
- ##
747
- # Different from Parser#map, this method does not need to be combined
748
- # with any Parser object. It is rather an independent Parser object
749
- # that maps the _current_ parser result.
750
- ##
751
- # parser1.map{|x|...} is equivalent to parser1 >> map{|x|...}
752
- #
753
- def map(&block)
754
- return one unless block
755
- MapCurrentParser.new(block)
756
- end
757
- #
758
- # A parser that maps current parser result to a new result using
759
- # the given block. If the current parser result is an array, the array
760
- # elements are expanded and then passed as parameters to the block.
761
- ##
762
- # Different from Parser#mapn, this method does not need to be combined
763
- # with any Parser object. It is rather an independent Parser object
764
- # that maps the _current_ parser result.
765
- ##
766
- # parser1.mapn{|x,y|...} is equivalent to parser1 >> mapn{|x,y|...}
767
- #
768
- def mapn(&block)
769
- return one unless block
770
- MapnCurrentParser.new(block)
771
- end
772
- private
773
- #
774
- # characters considered white space.
775
- #
776
- Whitespaces = " \t\r\n"
777
- def as_regexp ptn
778
- case ptn when String: Regexp.new(ptn) else ptn end
779
- end
780
- def as_char c
781
- case c when String: c else c.chr end
782
- end
783
- def as_num c
784
- case c when String: c[0] else c end
785
- end
786
- def as_list vals
787
- return vals unless vals.length==1
788
- val = vals[0]
789
- return vals unless val.kind_of? String
790
- val
791
- end
792
- extend self
793
- end
794
-
1
+ %w{
2
+ monad misc error context locator token functors parser_monad
3
+ }.each {|lib| require "rparsec/#{lib}"}
4
+ require 'strscan'
5
+
6
+
7
+ #
8
+ # Represents a parser that parses a certain grammar rule.
9
+ #
10
+ class Parser
11
+ include Functors
12
+ include Monad
13
+ extend Signature
14
+ extend DefHelper
15
+ MyMonad = ParserMonad.new
16
+ attr_accessor :name
17
+
18
+ private
19
+
20
+ def initialize
21
+ initMonad(MyMonad, self)
22
+ end
23
+
24
+ def self.init(*vars)
25
+ parser_checker = {}
26
+ vars.each_with_index do |var, i|
27
+ name = var.to_s
28
+ parser_checker[i] = var if name.include?('parser') && !name.include?('parsers')
29
+ end
30
+ define_method(:initialize) do |*params|
31
+ super()
32
+ vars.each_with_index do |var, i|
33
+ param = params[i]
34
+ if parser_checker.include? i
35
+ TypeChecker.check_arg_type Parser, param, self, i
36
+ end
37
+ instance_variable_set("@"+var.to_s, param)
38
+ end
39
+ end
40
+ end
41
+
42
+ def _display_current_input(input, code, index)
43
+ return 'EOF' if input.nil?
44
+ c = input
45
+ case c when Fixnum then "'"<<c<<"'" when Token then c.text else c.to_s end
46
+ end
47
+
48
+ def _add_encountered_error(msg, encountered)
49
+ result = msg.dup
50
+ result << ', ' unless msg.strip.length == 0 || msg =~ /.*(\.|,)\s*$/
51
+ "#{result}#{encountered}"
52
+ end
53
+
54
+ def _add_location_to_error(locator, ctxt, msg, code)
55
+ line, col = locator.locate(ctxt.error.index)
56
+ msg << " at line #{line}, col #{col}."
57
+ end
58
+
59
+ public
60
+
61
+ #
62
+ # parses a string.
63
+ #
64
+ def parse(src)
65
+ ctxt = ParseContext.new(src)
66
+ return ctxt.result if _parse ctxt
67
+ ctxt.prepare_error
68
+ locator = CodeLocator.new(src)
69
+ raise ParserException.new(ctxt.error.index),
70
+ _add_location_to_error(locator, ctxt,
71
+ _add_encountered_error(ctxt.to_msg,
72
+ _display_current_input(ctxt.error.input, src, ctxt.index)), src)
73
+ end
74
+
75
+ #
76
+ # Set name for the parser.
77
+ # self is returned.
78
+ #
79
+ def setName(nm)
80
+ @name = nm
81
+ self
82
+ end
83
+
84
+ #
85
+ # a.map{|x|x+1} will first execute parser a, when it succeeds,
86
+ # the associated block is executed to transform the result to a new value
87
+ # (increment it in this case).
88
+ #
89
+ def map(&block)
90
+ return self unless block
91
+ MapParser.new(self, block)
92
+ end
93
+
94
+ #
95
+ # _self_ is first executed, the parser result is then passed as parameter to the associated block,
96
+ # which evaluates to another Parser object at runtime. This new Parser object is then executed
97
+ # to get the final parser result.
98
+ #
99
+ # Different from _bind_, parser result of _self_ will be expanded first if it is an array.
100
+ #
101
+ def bindn(&block)
102
+ return self unless block
103
+ BoundnParser.new(self, block)
104
+ end
105
+
106
+ #
107
+ # a.mapn{|x,y|x+y} will first execute parser a, when it succeeds,
108
+ # the array result (if any) is expanded and passed as parameters
109
+ # to the associated block. The result of the block is then used
110
+ # as the parsing result.
111
+ #
112
+ def mapn(&block)
113
+ return self unless block
114
+ MapnParser.new(self, block)
115
+ end
116
+
117
+ #
118
+ # Create a new parser that's atomic.,
119
+ # meaning that when it fails, input consumption is undone.
120
+ #
121
+ def atomize
122
+ AtomParser.new(self).setName(@name)
123
+ end
124
+
125
+ #
126
+ # Create a new parser that looks at inputs whthout consuming them.
127
+ #
128
+ def peek
129
+ PeekParser.new(self).setName(@name)
130
+ end
131
+
132
+ #
133
+ # To create a new parser that succeed only if self fails.
134
+ #
135
+ def not(msg="#{self} unexpected")
136
+ NotParser.new(self, msg)
137
+ end
138
+
139
+ #
140
+ # To create a parser that does "look ahead" for n inputs.
141
+ #
142
+ def lookahead n
143
+ self
144
+ end
145
+
146
+ #
147
+ # To create a parser that fails with a given error message.
148
+ #
149
+ def expect msg
150
+ ExpectParser.new(self, msg)
151
+ end
152
+
153
+ #
154
+ # a.followed b will sequentially run a and b;
155
+ # result of a is preserved as the ultimate return value.
156
+ #
157
+ def followed(other)
158
+ FollowedParser.new(self, other)
159
+ end
160
+ def_sig :followed, Parser
161
+
162
+ #
163
+ # To create a parser that repeats self for a minimum _min_ times,
164
+ # and maximally _max_ times.
165
+ # Only the return value of the last execution is preserved.
166
+ #
167
+ def repeat_(min, max=min)
168
+ return Parsers.failure("min=#{min}, max=#{max}") if min > max
169
+ if(min==max)
170
+ return Parsers.one if max <= 0
171
+ return self if max == 1
172
+ Repeat_Parser.new(self, max)
173
+ else
174
+ Some_Parser.new(self, min, max)
175
+ end
176
+ end
177
+
178
+ #
179
+ # To create a parser that repeats self for a minimum _min_ times,
180
+ # and maximally _max_ times.
181
+ # All return values are collected in an array.
182
+ #
183
+ def repeat(min, max=min)
184
+ return Parsers.failure("min=#{min}, max=#{max}") if min > max
185
+ if(min==max)
186
+ RepeatParser.new(self, max)
187
+ else
188
+ SomeParser.new(self, min, max)
189
+ end
190
+ end
191
+
192
+ #
193
+ # To create a parser that repeats self for at least _least_ times.
194
+ # parser.many_ is equivalent to bnf notation "parser*".
195
+ # Only the return value of the last execution is preserved.
196
+ #
197
+ def many_(least=0)
198
+ Many_Parser.new(self, least)
199
+ end
200
+
201
+ #
202
+ # To create a parser that repeats self for at least _least_ times.
203
+ # All return values are collected in an array.
204
+ #
205
+ def many(least=0)
206
+ ManyParser.new(self, least)
207
+ end
208
+
209
+ #
210
+ # To create a parser that repeats self for at most _max_ times.
211
+ # Only the return value of the last execution is preserved.
212
+ #
213
+ def some_(max)
214
+ repeat_(0, max)
215
+ end
216
+
217
+ #
218
+ # To create a parser that repeats self for at most _max_ times.
219
+ # All return values are collected in an array.
220
+ #
221
+ def some(max)
222
+ repeat(0, max)
223
+ end
224
+
225
+ #
226
+ # To create a parser that repeats self for unlimited times,
227
+ # with the pattern recognized by _delim_ as separator that separates each occurrence.
228
+ # self has to match for at least once.
229
+ # Return values of self are collected in an array.
230
+ #
231
+ def separated1 delim
232
+ rest = delim >> self
233
+ self.bind do |v0|
234
+ result = [v0]
235
+ (rest.map {|v| result << v}).many_ >> value(result)
236
+ end
237
+ end
238
+
239
+ #
240
+ # To create a parser that repeats self for unlimited times,
241
+ # with the pattern recognized by _delim_ as separator that separates each occurrence.
242
+ # Return values of self are collected in an array.
243
+ #
244
+ def separated delim
245
+ separated1(delim).plus value([])
246
+ end
247
+
248
+ #
249
+ # To create a parser that repeats self for unlimited times,
250
+ # with the pattern recognized by _delim_ as separator that separates each occurrence
251
+ # and also possibly ends the pattern.
252
+ # self has to match for at least once.
253
+ # Return values of self are collected in an array.
254
+ #
255
+ def delimited1 delim
256
+ rest = delim >> (self.plus Parsers.throwp(:__end_delimiter__))
257
+ self.bind do |v0|
258
+ result = [v0]
259
+ (rest.map {|v| result << v}).many_.catchp(:__end_delimiter__) >> value(result)
260
+ end
261
+ end
262
+
263
+ #
264
+ # To create a parser that repeats self for unlimited times,
265
+ # with the pattern recognized by _delim_ as separator that separates each occurrence
266
+ # and also possibly ends the pattern.
267
+ # Return values of self are collected in an array.
268
+ #
269
+ def delimited delim
270
+ delimited1(delim).plus value([])
271
+ end
272
+
273
+ #
274
+ # String representation
275
+ #
276
+ def to_s
277
+ return name unless name.nil?
278
+ self.class.to_s
279
+ end
280
+
281
+ #
282
+ # a | b will run b when a fails.
283
+ # b is auto-boxed to Parser when it is not of type Parser.
284
+ #
285
+ def | other
286
+ AltParser.new([self, autobox_parser(other)])
287
+ end
288
+
289
+ #
290
+ # a.optional(default) is equivalent to a.plus(value(default))
291
+ #
292
+ def optional(default=nil)
293
+ self.plus(value(default))
294
+ end
295
+
296
+ #
297
+ # a.catchp(:somesymbol) will catch the :somesymbol thrown by a.
298
+ #
299
+ def catchp(symbol)
300
+ CatchParser.new(symbol, self)
301
+ end
302
+
303
+ #
304
+ # a.fragment will return the string matched by a.
305
+ #
306
+ def fragment
307
+ FragmentParser.new(self)
308
+ end
309
+
310
+ #
311
+ # a.nested b will feed the token array returned by parser a to parser b
312
+ # for a nested parsing.
313
+ #
314
+ def nested(parser)
315
+ NestedParser.new(self, parser)
316
+ end
317
+
318
+ #
319
+ # a.lexeme(delim) will parse _a_ for 0 or more times and ignore all
320
+ # patterns recognized by _delim_.
321
+ # Values returned by _a_ are collected in an array.
322
+ #
323
+ def lexeme(delim = Parsers.whitespaces)
324
+ delim = delim.many_
325
+ delim >> self.delimited(delim)
326
+ end
327
+
328
+ #
329
+ # For prefix unary operator.
330
+ # a.prefix op will run parser _op_ for 0 or more times and eventually run parser _a_
331
+ # for one time.
332
+ # _op_ should return a Proc that accepts one parameter.
333
+ # Proc objects returned by _op_ is then fed with the value returned by _a_
334
+ # from right to left.
335
+ # The final result is returned as return value.
336
+ #
337
+ def prefix(op)
338
+ Parsers.sequence(op.many, self) do |funcs, v|
339
+ funcs.reverse_each {|f|v=f.call(v)}
340
+ v
341
+ end
342
+ end
343
+
344
+ #
345
+ # For postfix unary operator.
346
+ # a.postfix op will run parser _a_ for once and then _op_ for 0 or more times.
347
+ # _op_ should return a Proc that accepts one parameter.
348
+ # Proc objects returned by _op_ is then fed with the value returned by _a_
349
+ # from left to right.
350
+ # The final result is returned as return value.
351
+ #
352
+ def postfix(op)
353
+ Parsers.sequence(self, op.many) do |v, funcs|
354
+ funcs.each{|f|v=f.call(v)}
355
+ v
356
+ end
357
+ end
358
+
359
+ #
360
+ # For non-associative infix binary operator.
361
+ # _op_ has to return a Proc that takes two parameters, who
362
+ # are returned by the _self_ parser as operands.
363
+ #
364
+ def infixn(op)
365
+ bind do |v1|
366
+ bin = Parsers.sequence(op, self) do |f, v2|
367
+ f.call(v1,v2)
368
+ end
369
+ bin | value(v1)
370
+ end
371
+ end
372
+
373
+ #
374
+ # For left-associative infix binary operator.
375
+ # _op_ has to return a Proc that takes two parameters, who
376
+ # are returned by the _self_ parser as operands.
377
+ #
378
+ def infixl(op)
379
+ Parsers.sequence(self, _infix_rest(op, self).many) do |v, rests|
380
+ rests.each do |r|
381
+ f, v1 = *r
382
+ v = f.call(v,v1)
383
+ end
384
+ v
385
+ end
386
+ end
387
+
388
+ #
389
+ # For right-associative infix binary operator.
390
+ # _op_ has to return a Proc that takes two parameters, who
391
+ # are returned by the _self_ parser as operands.
392
+ #
393
+ def infixr(op)
394
+ Parsers.sequence(self, _infix_rest(op, self).many) do |v, rests|
395
+ if rests.empty?
396
+ v
397
+ else
398
+ f, seed = *rests.last
399
+ for i in (0...rests.length-1)
400
+ cur = rests.length-2-i
401
+ f1, v1 = *rests[cur]
402
+ seed = f.call(v1, seed)
403
+ f = f1
404
+ end
405
+ f.call(v, seed)
406
+ end
407
+ end
408
+ end
409
+
410
+ #
411
+ # a.token(:word_token) will return a Token object when _a_ succeeds.
412
+ # The matched string (or the string returned by _a_, if any) is
413
+ # encapsulated in the token, together with the :word_token symbol and
414
+ # the starting index of the match.
415
+ #
416
+ def token(kind)
417
+ TokenParser.new(kind, self)
418
+ end
419
+
420
+ #
421
+ # a.seq b will sequentially run a then b.
422
+ # The result of b is preserved as return value.
423
+ # If a block is associated, values returned by _a_ and _b_
424
+ # are passed into the block and the return value of
425
+ # the block is used as the final result of the parser.
426
+ #
427
+ def seq(other, &block)
428
+ # TypeChecker.check_arg_type Parser, other, :seq
429
+ Parsers.sequence(self, other, &block)
430
+ end
431
+ def_sig :seq, Parser
432
+
433
+ #
434
+ # Similar to _seq_. _other_ is auto-boxed if it is not of type Parser.
435
+ #
436
+ def >> (other)
437
+ seq(autobox_parser(other))
438
+ end
439
+
440
+ private
441
+
442
+ def autobox_parser(val)
443
+ return Parsers.value(val) unless val.kind_of? Parser
444
+ val
445
+ end
446
+
447
+ def _infix_rest(operator, operand)
448
+ Parsers.sequence(operator, operand, &Idn)
449
+ end
450
+
451
+ public
452
+
453
+ alias ~ not
454
+ alias << followed
455
+ alias * repeat_
456
+
457
+ def_sig :plus, Parser
458
+
459
+ private
460
+
461
+ def _parse(ctxt)
462
+ false
463
+ end
464
+ end
465
+ #
466
+ # This module provides all out-of-box parser implementations.
467
+ #
468
+ module Parsers
469
+ extend Signature
470
+
471
+ #
472
+ # A parser that always fails with the given error message.
473
+ #
474
+ def failure msg
475
+ FailureParser.new(msg)
476
+ end
477
+
478
+ #
479
+ # A parser that always succeeds with the given return value.
480
+ #
481
+ def value v
482
+ ValueParser.new(v)
483
+ end
484
+
485
+ #
486
+ # A parser that calls alternative parsers until one succeed,
487
+ # or any failure with input consumption beyond the current look-ahead.
488
+ #
489
+ def sum(*alts)
490
+ # TypeChecker.check_vararg_type Parser, alts, :sum
491
+ PlusParser.new(alts)
492
+ end
493
+ def_sig :sum, [Parser]
494
+
495
+ #
496
+ # A parser that calls alternative parsers until one succeeds.
497
+ #
498
+ def alt(*alts)
499
+ AltParser.new(alts)
500
+ end
501
+ def_sig :alt, [Parser]
502
+
503
+ #
504
+ # A parser that succeeds when the given predicate returns true
505
+ # (with the current input as the parameter).
506
+ # _expected_ is the error message when _pred_ returns false.
507
+ #
508
+ def satisfies(expected, &pred)
509
+ SatisfiesParser.new(pred, expected)
510
+ end
511
+
512
+ #
513
+ # A parser that succeeds when the the current input is equal to the given value.
514
+ # _expected_ is the error message when _pred_ returns false.
515
+ #
516
+ def is(v, expected="#{v} expected")
517
+ satisfies(expected) {|c|c==v}
518
+ end
519
+
520
+ #
521
+ # A parser that succeeds when the the current input is not equal to the given value.
522
+ # _expected_ is the error message when _pred_ returns false.
523
+ #
524
+ def isnt(v, expected="#{v} unexpected")
525
+ satisfies(expected) {|c|c!=v}
526
+ end
527
+
528
+ #
529
+ # A parser that succeeds when the the current input is among the given values.
530
+ #
531
+ def among(*vals)
532
+ expected="one of [#{vals.join(', ')}] expected"
533
+ vals = as_list vals
534
+ satisfies(expected) {|c|vals.include? c}
535
+ end
536
+
537
+ #
538
+ # A parser that succeeds when the the current input is not among the given values.
539
+ #
540
+ def not_among(*vals)
541
+ expected = "one of [#{vals.join(', ')}] unexpected"
542
+ vals = as_list vals
543
+ satisfies(expected) {|c|!vals.include? c}
544
+ end
545
+
546
+ #
547
+ # A parser that succeeds when the the current input is the given character.
548
+ #
549
+ def char(c)
550
+ if c.kind_of? Fixnum
551
+ nm = c.chr
552
+ is(c, "'#{nm}' expected").setName(nm)
553
+ else
554
+ is(c[0], "'#{c}' expected").setName(c)
555
+ end
556
+ end
557
+
558
+ #
559
+ # A parser that succeeds when the the current input is not the given character.
560
+ #
561
+ def not_char(c)
562
+ if c.kind_of? Fixnum
563
+ nm = c.chr
564
+ isnt(c, "'#{nm}' unexpected").setName("~#{nm}")
565
+ else
566
+ isnt(c[0], "'#{c}' unexpected").setName("~#{c}")
567
+ end
568
+ end
569
+
570
+ #
571
+ # A parser that succeeds when there's no input available.
572
+ #
573
+ def eof(expected="EOF expected")
574
+ EofParser.new(expected).setName('EOF')
575
+ end
576
+
577
+ #
578
+ # A parser that tries to match the current inputs one by one
579
+ # with the given values.
580
+ # It succeeds only when all given values are matched, in which case all the
581
+ # matched inputs are consumed.
582
+ #
583
+ def are(vals, expected="#{vals} expected")
584
+ AreParser.new(vals, expected)
585
+ end
586
+
587
+ #
588
+ # A parser that makes sure that the given values don't match
589
+ # the current inputs. One input is consumed if it succeeds.
590
+ #
591
+ def arent(vals, expected="#{vals} unexpected")
592
+ are(vals, '').not(expected) >> any
593
+ end
594
+
595
+ #
596
+ # A parser that matches the given string.
597
+ #
598
+ def string(str, msg = "\"#{str}\" expected")
599
+ are(str, msg).setName(str)
600
+ end
601
+
602
+ #
603
+ # A parser that makes sure that the current input doesn't match a string.
604
+ # One character is consumed if it succeeds.
605
+ #
606
+ def not_string(str, msg="\"#{str}\" unexpected")
607
+ string(str).not(msg) >> any
608
+ end
609
+
610
+ alias str string
611
+
612
+ #
613
+ # A parser that sequentially run the given parsers.
614
+ # The result of the last parser is used as return value.
615
+ # If a block is given, the results of the parsers are passed
616
+ # into the block as parameters, and the block return value
617
+ # is used as result instead.
618
+ #
619
+ def sequence(*parsers, &proc)
620
+ # TypeChecker.check_vararg_type Parser, parsers, :sequence
621
+ SequenceParser.new(parsers, proc)
622
+ end
623
+ def_sig :sequence, [Parser]
624
+
625
+ #
626
+ # A parser that returns the current input index (starting from 0).
627
+ #
628
+ def get_index
629
+ GetIndexParser.new.setName('get_index')
630
+ end
631
+
632
+ #
633
+ # A parser that moves the current input pointer to a certain index.
634
+ #
635
+ def set_index ind
636
+ SetIndexParser.new(ind).setName('set_index')
637
+ end
638
+
639
+ #
640
+ # A parser that tries all given alternative parsers
641
+ # and picks the one with the longest match.
642
+ #
643
+ def longest(*parsers)
644
+ # TypeChecker.check_vararg_type Parser, parsers, :longest
645
+ BestParser.new(parsers, true)
646
+ end
647
+ def_sig :longest, [Parser]
648
+
649
+ #
650
+ # A parser that tries all given alternative parsers
651
+ # and picks the one with the shortest match.
652
+ #
653
+ def shortest(*parsers)
654
+ # TypeChecker.check_vararg_type Parser, parsers, :shortest
655
+ BestParser.new(parsers, false)
656
+ end
657
+ def_sig :shortest, [Parser]
658
+
659
+ alias shorter shortest
660
+ alias longer longest
661
+
662
+ #
663
+ # A parser that consumes one input.
664
+ #
665
+ def any
666
+ AnyParser.new
667
+ end
668
+
669
+ #
670
+ # A parser that always fails.
671
+ #
672
+ def zero
673
+ ZeroParser.new
674
+ end
675
+
676
+ #
677
+ # A parser that always succeeds.
678
+ #
679
+ def one
680
+ OneParser.new
681
+ end
682
+
683
+ #
684
+ # A parser that succeeds if the current input is within a certain range.
685
+ #
686
+ def range(from, to, msg="#{as_char from}..#{as_char to} expected")
687
+ from, to = as_num(from), as_num(to)
688
+ satisfies(msg) {|c| c <= to && c >= from}
689
+ end
690
+
691
+ #
692
+ # A parser that throws a symbol.
693
+ #
694
+ def throwp(symbol)
695
+ ThrowParser.new(symbol)
696
+ end
697
+
698
+ #
699
+ # A parser that succeeds if the current inputs match
700
+ # the given regular expression.
701
+ # The matched string is consumed and returned as result.
702
+ #
703
+ def regexp(ptn, expected="/#{ptn.to_s}/ expected")
704
+ RegexpParser.new(as_regexp(ptn), expected).setName(expected)
705
+ end
706
+
707
+ #
708
+ # A parser that parses a word
709
+ # (starting with alpha or underscore, followed by 0 or more alpha, number or underscore).
710
+ # and return the matched word as string.
711
+ #
712
+ def word(expected='word expected')
713
+ regexp(/[a-zA-Z_]\w*/, expected)
714
+ end
715
+
716
+ #
717
+ # A parser that parses an integer
718
+ # and return the matched integer as string.
719
+ #
720
+ def integer(expected='integer expected')
721
+ regexp(/\d+(?!\w)/, expected)
722
+ end
723
+
724
+ #
725
+ # A parser that parses a number (integer, or decimal number)
726
+ # and return the matched number as string.
727
+ #
728
+ def number(expected='number expected')
729
+ regexp(/\d+(\.\d+)?/, expected)
730
+ end
731
+
732
+ #
733
+ # A parser that matches the given string, case insensitively.
734
+ #
735
+ def string_nocase(str, expected="'#{str}' expected")
736
+ StringCaseInsensitiveParser.new(str, expected).setName(str)
737
+ end
738
+
739
+ #
740
+ # A parser that succeeds when the current input
741
+ # is a token with one of the the given token kinds.
742
+ # If a block is given, the token text is passed to the block
743
+ # as parameter, and the block return value is used as result.
744
+ # Otherwise, the token object is used as result.
745
+ #
746
+ def token(*kinds, &proc)
747
+ expected="#{kinds.join(' or ')} expected"
748
+ recognizer = nil
749
+ if kinds.length==1
750
+ kind = kinds[0]
751
+ recognizer = satisfies(expected) do |tok|
752
+ tok.respond_to? :kind, :text and kind == tok.kind
753
+ end
754
+ else
755
+ recognizer = satisfies(expected) do |tok|
756
+ tok.respond_to? :kind, :text and kinds.include? tok.kind
757
+ end
758
+ end
759
+ recognizer = recognizer.map{|tok|proc.call(tok.text)} if proc
760
+ recognizer
761
+ end
762
+
763
+ #
764
+ # A parser that parses a white space character.
765
+ #
766
+ def whitespace(expected="whitespace expected")
767
+ satisfies(expected) {|c| Whitespaces.include? c}
768
+ end
769
+
770
+ #
771
+ # A parser that parses 1 or more white space characters.
772
+ #
773
+ def whitespaces(expected="whitespace(s) expected")
774
+ whitespace(expected).many_(1)
775
+ end
776
+
777
+ #
778
+ # A parser that parses a line started with _start_.
779
+ # nil is the result.
780
+ #
781
+ def comment_line start
782
+ string(start) >> not_char(?\n).many_ >> char(?\n).optional >> value(nil)
783
+ end
784
+
785
+ #
786
+ # A parser that parses a chunk of text started with _open_
787
+ # and ended by _close_.
788
+ # nil is the result.
789
+ #
790
+ def comment_block open, close
791
+ string(open) >> not_string(close).many_ >> string(close) >> value(nil)
792
+ end
793
+
794
+ #
795
+ # A lazy parser, when executed, calls the given block
796
+ # to get a parser object and delegate the call to this lazily
797
+ # instantiated parser.
798
+ #
799
+ def lazy(&block)
800
+ LazyParser.new(block)
801
+ end
802
+
803
+ #
804
+ # A parser that watches the current parser result without changing it.
805
+ # The following assert will succeed:
806
+ ##
807
+ # char(?a) >> watch{|x|assert_equal(?a, x)}
808
+ ##
809
+ # watch can also be used as a handy tool to print trace information,
810
+ # for example:
811
+ ##
812
+ # some_parser >> watch {puts "some_parser succeeded."}
813
+ #
814
+ def watch(&block)
815
+ return one unless block
816
+ WatchParser.new(block)
817
+ end
818
+
819
+ #
820
+ # A parser that watches the current parser result without changing it.
821
+ # The following assert will succeed:
822
+ ##
823
+ # char(?a).repeat(2) >> watchn{|x,y|assert_equal([?a,?a], [x,y])}
824
+ ##
825
+ # Slightly different from _watch_, _watchn_ expands the current parser result
826
+ # before passing it into the associated block.
827
+ #
828
+ def watchn(&block)
829
+ return one unless block
830
+ WatchnParser.new(block)
831
+ end
832
+
833
+ #
834
+ # A parser that maps current parser result to a new result using
835
+ # the given block.
836
+ ##
837
+ # Different from Parser#map, this method does not need to be combined
838
+ # with any Parser object. It is rather an independent Parser object
839
+ # that maps the _current_ parser result.
840
+ ##
841
+ # parser1.map{|x|...} is equivalent to parser1 >> map{|x|...}
842
+ #
843
+ def map(&block)
844
+ return one unless block
845
+ MapCurrentParser.new(block)
846
+ end
847
+
848
+ #
849
+ # A parser that maps current parser result to a new result using
850
+ # the given block. If the current parser result is an array, the array
851
+ # elements are expanded and then passed as parameters to the block.
852
+ ##
853
+ # Different from Parser#mapn, this method does not need to be combined
854
+ # with any Parser object. It is rather an independent Parser object
855
+ # that maps the _current_ parser result.
856
+ ##
857
+ # parser1.mapn{|x,y|...} is equivalent to parser1 >> mapn{|x,y|...}
858
+ #
859
+ def mapn(&block)
860
+ return one unless block
861
+ MapnCurrentParser.new(block)
862
+ end
863
+
864
+ private
865
+
866
+ #
867
+ # characters considered white space.
868
+ #
869
+ Whitespaces = " \t\r\n"
870
+
871
+ def as_regexp ptn
872
+ case ptn when String then Regexp.new(ptn) else ptn end
873
+ end
874
+
875
+ def as_char c
876
+ case c when String then c else c.chr end
877
+ end
878
+
879
+ def as_num c
880
+ case c when String: c[0] else c end
881
+ end
882
+
883
+ def as_list vals
884
+ return vals unless vals.length==1
885
+ val = vals[0]
886
+ return vals unless val.kind_of? String
887
+ val
888
+ end
889
+
890
+ extend self
891
+ end
892
+