rparsec2 1.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,893 +1,471 @@
1
- %w{
2
- monad misc error context locator token functors parser_monad
3
- }.each { |lib| require "rparsec/#{lib}" }
1
+ # frozen_string_literal: true
2
+
3
+ require "rparsec/functors"
4
+ require "rparsec/monad"
5
+ require "rparsec/misc"
6
+ require "rparsec/parser_monad"
7
+ require "rparsec/context"
8
+ require "rparsec/error"
9
+ require "rparsec/token"
10
+ require "rparsec/locator"
4
11
 
5
12
  module RParsec
6
13
 
7
- #
8
- # Represents a parser that parses a certain grammar rule.
9
- #
10
- class Parser
11
- include Functors
12
- include Monad
13
- extend Signature
14
- extend DefHelper
15
- MyMonad = ParserMonad.new
16
- attr_accessor :name
17
-
18
- class << self
19
- private
14
+ #
15
+ # Represents a parser that parses a certain grammar rule.
16
+ #
17
+ class Parser
18
+ include Functors
19
+ include Monad
20
+ extend DefHelper
21
+ MyMonad = ParserMonad.new
22
+ attr_accessor :name
20
23
 
21
- def init(*vars)
22
- parser_checker = {}
23
- vars.each_with_index do |var, i|
24
- name = var.to_s
25
- parser_checker[i] = var if name.include?('parser') && !name.include?('parsers')
26
- end
27
- define_method(:initialize) do |*params|
28
- super()
24
+ class << self
25
+ private
26
+
27
+ def init(*vars)
28
+ parser_checker = {}
29
29
  vars.each_with_index do |var, i|
30
- param = params[i]
31
- if parser_checker.include? i
32
- TypeChecker.check_arg_type Parser, param, self, i
30
+ name = var.to_s
31
+ parser_checker[i] = var if name.include?('parser') && !name.include?('parsers')
32
+ end
33
+ define_method(:initialize) do |*params|
34
+ super()
35
+ vars.each_with_index do |var, i|
36
+ instance_variable_set("@#{var}", params[i])
33
37
  end
34
- instance_variable_set("@" + var.to_s, param)
35
38
  end
36
39
  end
37
40
  end
38
- end
39
-
40
- private
41
41
 
42
- def initialize
43
- initMonad(MyMonad, self)
44
- end
45
-
46
- def _display_current_input(input, _code, _index)
47
- return 'EOF' if input.nil?
48
- c = input
49
- case c when Integer then "'" << c << "'" when Token then c.text else c.to_s end
50
- end
51
-
52
- def _add_encountered_error(msg, encountered)
53
- result = msg.dup
54
- result << ', ' unless msg.strip.length == 0 || msg =~ /.*(\.|,)\s*$/
55
- "#{result}#{encountered}"
56
- end
57
-
58
- def _add_location_to_error(locator, ctxt, msg, _code)
59
- line, col = locator.locate(ctxt.error.index)
60
- msg << " at line #{line}, col #{col}."
61
- end
62
-
63
- public
64
-
65
- #
66
- # parses a string.
67
- #
68
- def parse(src)
69
- ctxt = ParseContext.new(src)
70
- return ctxt.result if _parse ctxt
71
- ctxt.prepare_error
72
- locator = CodeLocator.new(src)
73
- raise ParserException.new(ctxt.error.index),
74
- _add_location_to_error(locator, ctxt,
75
- _add_encountered_error(ctxt.to_msg,
76
- _display_current_input(ctxt.error.input, src, ctxt.index)), src)
77
- end
78
-
79
- #
80
- # Set name for the parser.
81
- # self is returned.
82
- #
83
- def setName(nm)
84
- @name = nm
85
- self
86
- end
87
-
88
- #
89
- # a.map{|x|x+1} will first execute parser a, when it succeeds,
90
- # the associated block is executed to transform the result to a new value
91
- # (increment it in this case).
92
- #
93
- def map(&block)
94
- return self unless block
95
- MapParser.new(self, block)
96
- end
97
-
98
- #
99
- # _self_ is first executed, the parser result is then passed as parameter to the associated block,
100
- # which evaluates to another Parser object at runtime. This new Parser object is then executed
101
- # to get the final parser result.
102
- #
103
- # Different from _bind_, parser result of _self_ will be expanded first if it is an array.
104
- #
105
- def bindn(&block)
106
- return self unless block
107
- BoundnParser.new(self, block)
108
- end
109
-
110
- #
111
- # a.mapn{|x,y|x+y} will first execute parser a, when it succeeds,
112
- # the array result (if any) is expanded and passed as parameters
113
- # to the associated block. The result of the block is then used
114
- # as the parsing result.
115
- #
116
- def mapn(&block)
117
- return self unless block
118
- MapnParser.new(self, block)
119
- end
120
-
121
- #
122
- # Create a new parser that's atomic.,
123
- # meaning that when it fails, input consumption is undone.
124
- #
125
- def atomize
126
- AtomParser.new(self).setName(@name)
127
- end
128
-
129
- #
130
- # Create a new parser that looks at inputs whthout consuming them.
131
- #
132
- def peek
133
- PeekParser.new(self).setName(@name)
134
- end
135
-
136
- #
137
- # To create a new parser that succeed only if self fails.
138
- #
139
- def not(msg = "#{self} unexpected")
140
- NotParser.new(self, msg)
141
- end
142
-
143
- #
144
- # To create a parser that does "look ahead" for n inputs.
145
- #
146
- def lookahead _n
147
- self
148
- end
149
-
150
- #
151
- # To create a parser that fails with a given error message.
152
- #
153
- def expect msg
154
- ExpectParser.new(self, msg)
155
- end
156
-
157
- #
158
- # a.followed b will sequentially run a and b;
159
- # result of a is preserved as the ultimate return value.
160
- #
161
- def followed(other)
162
- FollowedParser.new(self, other)
163
- end
164
- def_sig :followed, Parser
42
+ private
165
43
 
166
- #
167
- # To create a parser that repeats self for a minimum _min_ times,
168
- # and maximally _max_ times.
169
- # Only the return value of the last execution is preserved.
170
- #
171
- def repeat_(min, max = min)
172
- return Parsers.failure("min=#{min}, max=#{max}") if min > max
173
- if min == max
174
- return Parsers.one if max <= 0
175
- return self if max == 1
176
- Repeat_Parser.new(self, max)
177
- else
178
- Some_Parser.new(self, min, max)
44
+ def initialize
45
+ initMonad(MyMonad, self)
179
46
  end
180
- end
181
47
 
182
- #
183
- # To create a parser that repeats self for a minimum _min_ times,
184
- # and maximally _max_ times.
185
- # All return values are collected in an array.
186
- #
187
- def repeat(min, max = min)
188
- return Parsers.failure("min=#{min}, max=#{max}") if min > max
189
- if min == max
190
- RepeatParser.new(self, max)
191
- else
192
- SomeParser.new(self, min, max)
48
+ def _display_current_input(input, _code, _index)
49
+ return 'EOF' if input.nil?
50
+ c = input
51
+ case c when Integer then "'" << c << "'" when Token then c.text else c.to_s end
193
52
  end
194
- end
195
-
196
- #
197
- # To create a parser that repeats self for at least _least_ times.
198
- # parser.many_ is equivalent to bnf notation "parser*".
199
- # Only the return value of the last execution is preserved.
200
- #
201
- def many_(least = 0)
202
- Many_Parser.new(self, least)
203
- end
204
-
205
- #
206
- # To create a parser that repeats self for at least _least_ times.
207
- # All return values are collected in an array.
208
- #
209
- def many(least = 0)
210
- ManyParser.new(self, least)
211
- end
212
-
213
- #
214
- # To create a parser that repeats self for at most _max_ times.
215
- # Only the return value of the last execution is preserved.
216
- #
217
- def some_(max)
218
- repeat_(0, max)
219
- end
220
53
 
221
- #
222
- # To create a parser that repeats self for at most _max_ times.
223
- # All return values are collected in an array.
224
- #
225
- def some(max)
226
- repeat(0, max)
227
- end
228
-
229
- #
230
- # To create a parser that repeats self for unlimited times,
231
- # with the pattern recognized by _delim_ as separator that separates each occurrence.
232
- # self has to match for at least once.
233
- # Return values of self are collected in an array.
234
- #
235
- def separated1 delim
236
- rest = delim >> self
237
- self.bind do |v0|
238
- result = [v0]
239
- (rest.map { |v| result << v }).many_ >> value(result)
54
+ def _add_encountered_error(msg, encountered)
55
+ result = msg.dup
56
+ result << ', ' unless msg.strip.length == 0 || msg =~ /.*(\.|,)\s*$/
57
+ "#{result}#{encountered}"
240
58
  end
241
- end
242
59
 
243
- #
244
- # To create a parser that repeats self for unlimited times,
245
- # with the pattern recognized by _delim_ as separator that separates each occurrence.
246
- # Return values of self are collected in an array.
247
- #
248
- def separated delim
249
- separated1(delim).plus value([])
250
- end
60
+ def _add_location_to_error(locator, ctxt, msg, _code)
61
+ line, col = locator.locate(ctxt.error.index)
62
+ msg << " at line #{line}, col #{col}."
63
+ end
251
64
 
252
- #
253
- # To create a parser that repeats self for unlimited times,
254
- # with the pattern recognized by _delim_ as separator that separates each occurrence
255
- # and also possibly ends the pattern.
256
- # self has to match for at least once.
257
- # Return values of self are collected in an array.
258
- #
259
- def delimited1 delim
260
- rest = delim >> (self.plus Parsers.throwp(:__end_delimiter__))
261
- self.bind do |v0|
262
- result = [v0]
263
- (rest.map { |v| result << v }).many_.catchp(:__end_delimiter__) >> value(result)
65
+ public
66
+
67
+ #
68
+ # parses a string.
69
+ #
70
+ def parse(src)
71
+ ctxt = ParseContext.new(src)
72
+ return ctxt.result if _parse ctxt
73
+ ctxt.prepare_error
74
+ locator = CodeLocator.new(src)
75
+ raise ParserException.new(ctxt.error.index),
76
+ _add_location_to_error(locator, ctxt,
77
+ _add_encountered_error(ctxt.to_msg,
78
+ _display_current_input(ctxt.error.input, src, ctxt.index)), src)
264
79
  end
265
- end
266
80
 
267
- #
268
- # To create a parser that repeats self for unlimited times,
269
- # with the pattern recognized by _delim_ as separator that separates each occurrence
270
- # and also possibly ends the pattern.
271
- # Return values of self are collected in an array.
272
- #
273
- def delimited delim
274
- delimited1(delim).plus value([])
275
- end
81
+ #
82
+ # Set name for the parser. +self+ is returned.
83
+ #
84
+ def set_name(nm)
85
+ @name = nm
86
+ self
87
+ end
276
88
 
277
- #
278
- # String representation
279
- #
280
- def to_s
281
- return name unless name.nil?
282
- self.class.to_s
283
- end
89
+ alias setName set_name
284
90
 
285
- #
286
- # a | b will run b when a fails.
287
- # b is auto-boxed to Parser when it is not of type Parser.
288
- #
289
- def | other
290
- AltParser.new([self, autobox_parser(other)])
291
- end
91
+ #
92
+ # <tt>a.map { |x| x + 1 }</tt> will first execute parser +a+, when
93
+ # it succeeds, the associated block is executed to transform the
94
+ # result to a new value (increment it in this case).
95
+ #
96
+ def map(&block)
97
+ return self unless block
98
+ MapParser.new(self, block)
99
+ end
292
100
 
293
- #
294
- # a.optional(default) is equivalent to a.plus(value(default))
295
- #
296
- def optional(default = nil)
297
- self.plus(value(default))
298
- end
101
+ #
102
+ # +self+ is first executed, the parser result is then passed as
103
+ # parameter to the associated +block+, which evaluates to another
104
+ # Parser object at runtime. This new Parser object is then
105
+ # executed to get the final parser result.
106
+ #
107
+ # Different from #bind, parser result of +self+ will be expanded
108
+ # first if it is an array.
109
+ #
110
+ def bindn(&block)
111
+ return self unless block
112
+ BoundnParser.new(self, block)
113
+ end
299
114
 
300
- #
301
- # a.catchp(:somesymbol) will catch the :somesymbol thrown by a.
302
- #
303
- def catchp(symbol)
304
- CatchParser.new(symbol, self)
305
- end
115
+ #
116
+ # <tt>a.mapn { |x, y| x + y }</tt> will first execute parser +a+,
117
+ # when it succeeds, the array result (if any) is expanded and
118
+ # passed as parameters to the associated block. The result of the
119
+ # block is then used as the parsing result.
120
+ #
121
+ def mapn(&block)
122
+ return self unless block
123
+ MapnParser.new(self, block)
124
+ end
306
125
 
307
- #
308
- # a.fragment will return the string matched by a.
309
- #
310
- def fragment
311
- FragmentParser.new(self)
312
- end
126
+ #
127
+ # Create a new parser that's atomic, meaning that when it fails,
128
+ # input consumption is undone.
129
+ #
130
+ def atomize
131
+ AtomParser.new(self).setName(@name)
132
+ end
313
133
 
314
- #
315
- # a.nested b will feed the token array returned by parser a to parser b
316
- # for a nested parsing.
317
- #
318
- def nested(parser)
319
- NestedParser.new(self, parser)
320
- end
134
+ #
135
+ # Create a new parser that looks at inputs whthout consuming them.
136
+ #
137
+ def peek
138
+ PeekParser.new(self).setName(@name)
139
+ end
321
140
 
322
- #
323
- # a.lexeme(delim) will parse _a_ for 0 or more times and ignore all
324
- # patterns recognized by _delim_.
325
- # Values returned by _a_ are collected in an array.
326
- #
327
- def lexeme(delim = Parsers.whitespaces)
328
- delim = delim.many_
329
- delim >> self.delimited(delim)
330
- end
141
+ #
142
+ # To create a new parser that succeed only if +self+ fails.
143
+ #
144
+ def not(msg = "#{self} unexpected")
145
+ NotParser.new(self, msg)
146
+ end
331
147
 
332
- #
333
- # For prefix unary operator.
334
- # a.prefix op will run parser _op_ for 0 or more times and eventually run parser _a_
335
- # for one time.
336
- # _op_ should return a Proc that accepts one parameter.
337
- # Proc objects returned by _op_ is then fed with the value returned by _a_
338
- # from right to left.
339
- # The final result is returned as return value.
340
- #
341
- def prefix(op)
342
- Parsers.sequence(op.many, self) do |funcs, v|
343
- funcs.reverse_each { |f| v = f.call(v) }
344
- v
148
+ #
149
+ # To create a parser that does "look ahead" for _n_ inputs.
150
+ #
151
+ # WARNING: Not implemented yet?
152
+ #
153
+ def lookahead _n # :nodoc:
154
+ self
345
155
  end
346
- end
347
156
 
348
- #
349
- # For postfix unary operator.
350
- # a.postfix op will run parser _a_ for once and then _op_ for 0 or more times.
351
- # _op_ should return a Proc that accepts one parameter.
352
- # Proc objects returned by _op_ is then fed with the value returned by _a_
353
- # from left to right.
354
- # The final result is returned as return value.
355
- #
356
- def postfix(op)
357
- Parsers.sequence(self, op.many) do |v, funcs|
358
- funcs.each { |f| v = f.call(v) }
359
- v
157
+ #
158
+ # To create a parser that fails with a given error message.
159
+ #
160
+ def expect msg
161
+ ExpectParser.new(self, msg)
360
162
  end
361
- end
362
163
 
363
- #
364
- # For non-associative infix binary operator.
365
- # _op_ has to return a Proc that takes two parameters, who
366
- # are returned by the _self_ parser as operands.
367
- #
368
- def infixn(op)
369
- bind do |v1|
370
- bin = Parsers.sequence(op, self) do |f, v2|
371
- f.call(v1, v2)
372
- end
373
- bin | value(v1)
164
+ #
165
+ # <tt>a.followed b</tt> will sequentially run +a+ and +b+; result
166
+ # of a is preserved as the ultimate return value.
167
+ #
168
+ def followed(other)
169
+ FollowedParser.new(self, other)
374
170
  end
375
- end
376
171
 
377
- #
378
- # For left-associative infix binary operator.
379
- # _op_ has to return a Proc that takes two parameters, who
380
- # are returned by the _self_ parser as operands.
381
- #
382
- def infixl(op)
383
- Parsers.sequence(self, _infix_rest(op, self).many) do |v, rests|
384
- rests.each do |r|
385
- f, v1 = *r
386
- v = f.call(v, v1)
172
+ #
173
+ # To create a parser that repeats +self+ for a minimum +min+
174
+ # times, and maximally +max+ times. Only the return value of the
175
+ # last execution is preserved.
176
+ #
177
+ def repeat_(min, max = min)
178
+ return Parsers.failure("min=#{min}, max=#{max}") if min > max
179
+ if min == max
180
+ return Parsers.one if max <= 0
181
+ return self if max == 1
182
+ Repeat_Parser.new(self, max)
183
+ else
184
+ Some_Parser.new(self, min, max)
387
185
  end
388
- v
389
186
  end
390
- end
391
187
 
392
- #
393
- # For right-associative infix binary operator.
394
- # _op_ has to return a Proc that takes two parameters, who
395
- # are returned by the _self_ parser as operands.
396
- #
397
- def infixr(op)
398
- Parsers.sequence(self, _infix_rest(op, self).many) do |v, rests|
399
- if rests.empty?
400
- v
188
+ #
189
+ # To create a parser that repeats +self+ for a minimum +min+
190
+ # times, and maximally +max+ times. All return values are
191
+ # collected in an array.
192
+ #
193
+ def repeat(min, max = min)
194
+ return Parsers.failure("min=#{min}, max=#{max}") if min > max
195
+ if min == max
196
+ RepeatParser.new(self, max)
401
197
  else
402
- f, seed = *rests.last
403
- for i in (0...rests.length - 1)
404
- cur = rests.length - 2 - i
405
- f1, v1 = *rests[cur]
406
- seed = f.call(v1, seed)
407
- f = f1
408
- end
409
- f.call(v, seed)
198
+ SomeParser.new(self, min, max)
410
199
  end
411
200
  end
412
- end
413
-
414
- #
415
- # a.token(:word_token) will return a Token object when _a_ succeeds.
416
- # The matched string (or the string returned by _a_, if any) is
417
- # encapsulated in the token, together with the :word_token symbol and
418
- # the starting index of the match.
419
- #
420
- def token(kind)
421
- TokenParser.new(kind, self)
422
- end
423
-
424
- #
425
- # a.seq b will sequentially run a then b.
426
- # The result of b is preserved as return value.
427
- # If a block is associated, values returned by _a_ and _b_
428
- # are passed into the block and the return value of
429
- # the block is used as the final result of the parser.
430
- #
431
- def seq(other, &block)
432
- # TypeChecker.check_arg_type Parser, other, :seq
433
- Parsers.sequence(self, other, &block)
434
- end
435
- def_sig :seq, Parser
436
-
437
- #
438
- # Similar to _seq_. _other_ is auto-boxed if it is not of type Parser.
439
- #
440
- def >>(other)
441
- seq(autobox_parser(other))
442
- end
443
-
444
- private
445
-
446
- def autobox_parser(val)
447
- return Parsers.value(val) unless val.kind_of? Parser
448
- val
449
- end
450
-
451
- def _infix_rest(operator, operand)
452
- Parsers.sequence(operator, operand, &Idn)
453
- end
454
-
455
- alias ~ not
456
- alias << followed
457
- alias * repeat_
458
-
459
- def_sig :plus, Parser
460
-
461
- def _parse(_ctxt)
462
- false
463
- end
464
- end
465
- #
466
- # This module provides all out-of-box parser implementations.
467
- #
468
- module Parsers
469
- extend Signature
470
-
471
- #
472
- # A parser that always fails with the given error message.
473
- #
474
- def failure msg
475
- FailureParser.new(msg)
476
- end
477
-
478
- #
479
- # A parser that always succeeds with the given return value.
480
- #
481
- def value v
482
- ValueParser.new(v)
483
- end
484
-
485
- #
486
- # A parser that calls alternative parsers until one succeed,
487
- # or any failure with input consumption beyond the current look-ahead.
488
- #
489
- def sum(*alts)
490
- # TypeChecker.check_vararg_type Parser, alts, :sum
491
- PlusParser.new(alts)
492
- end
493
- def_sig :sum, [Parser]
494
-
495
- #
496
- # A parser that calls alternative parsers until one succeeds.
497
- #
498
- def alt(*alts)
499
- AltParser.new(alts)
500
- end
501
- def_sig :alt, [Parser]
502
-
503
- #
504
- # A parser that succeeds when the given predicate returns true
505
- # (with the current input as the parameter).
506
- # _expected_ is the error message when _pred_ returns false.
507
- #
508
- def satisfies(expected, &pred)
509
- SatisfiesParser.new(pred, expected)
510
- end
511
201
 
512
- #
513
- # A parser that succeeds when the the current input is equal to the given value.
514
- # _expected_ is the error message when _pred_ returns false.
515
- #
516
- def is(v, expected = "#{v} expected")
517
- satisfies(expected) { |c| c == v }
518
- end
519
-
520
- #
521
- # A parser that succeeds when the the current input is not equal to the given value.
522
- # _expected_ is the error message when _pred_ returns false.
523
- #
524
- def isnt(v, expected = "#{v} unexpected")
525
- satisfies(expected) { |c| c != v }
526
- end
527
-
528
- #
529
- # A parser that succeeds when the the current input is among the given values.
530
- #
531
- def among(*vals)
532
- expected = "one of [#{vals.join(', ')}] expected"
533
- vals = as_list vals
534
- satisfies(expected) { |c| vals.include? c }
535
- end
536
-
537
- #
538
- # A parser that succeeds when the the current input is not among the given values.
539
- #
540
- def not_among(*vals)
541
- expected = "one of [#{vals.join(', ')}] unexpected"
542
- vals = as_list vals
543
- satisfies(expected) { |c| !vals.include? c }
544
- end
545
-
546
- #
547
- # A parser that succeeds when the the current input is the given character.
548
- #
549
- def char(c)
550
- if c.kind_of? Integer
551
- nm = c.chr
552
- is(c, "'#{nm}' expected").setName(nm)
553
- else
554
- is(c[0], "'#{c}' expected").setName(c)
202
+ #
203
+ # To create a parser that repeats +self+ for at least +least+
204
+ # times. <tt>parser.many_</tt> is equivalent to bnf notation
205
+ # <tt>parser*</tt>. Only the return value of the last execution
206
+ # is preserved.
207
+ #
208
+ def many_(least = 0)
209
+ Many_Parser.new(self, least)
555
210
  end
556
- end
557
211
 
558
- #
559
- # A parser that succeeds when the the current input is not the given character.
560
- #
561
- def not_char(c)
562
- if c.kind_of? Integer
563
- nm = c.chr
564
- isnt(c, "'#{nm}' unexpected").setName("~#{nm}")
565
- else
566
- isnt(c[0], "'#{c}' unexpected").setName("~#{c}")
212
+ #
213
+ # To create a parser that repeats +self+ for at least +least+
214
+ # times. All return values are collected in an array.
215
+ #
216
+ def many(least = 0)
217
+ ManyParser.new(self, least)
567
218
  end
568
- end
569
-
570
- #
571
- # A parser that succeeds when there's no input available.
572
- #
573
- def eof(expected = "EOF expected")
574
- EofParser.new(expected).setName('EOF')
575
- end
576
-
577
- #
578
- # A parser that tries to match the current inputs one by one
579
- # with the given values.
580
- # It succeeds only when all given values are matched, in which case all the
581
- # matched inputs are consumed.
582
- #
583
- def are(vals, expected = "#{vals} expected")
584
- AreParser.new(vals, expected)
585
- end
586
219
 
587
- #
588
- # A parser that makes sure that the given values don't match
589
- # the current inputs. One input is consumed if it succeeds.
590
- #
591
- def arent(vals, expected = "#{vals} unexpected")
592
- are(vals, '').not(expected) >> any
593
- end
594
-
595
- #
596
- # A parser that matches the given string.
597
- #
598
- def string(str, msg = "\"#{str}\" expected")
599
- are(str, msg).setName(str)
600
- end
601
-
602
- #
603
- # A parser that makes sure that the current input doesn't match a string.
604
- # One character is consumed if it succeeds.
605
- #
606
- def not_string(str, msg = "\"#{str}\" unexpected")
607
- string(str).not(msg) >> any
608
- end
609
-
610
- alias str string
611
-
612
- #
613
- # A parser that sequentially run the given parsers.
614
- # The result of the last parser is used as return value.
615
- # If a block is given, the results of the parsers are passed
616
- # into the block as parameters, and the block return value
617
- # is used as result instead.
618
- #
619
- def sequence(*parsers, &proc)
620
- # TypeChecker.check_vararg_type Parser, parsers, :sequence
621
- SequenceParser.new(parsers, proc)
622
- end
623
- def_sig :sequence, [Parser]
624
-
625
- #
626
- # A parser that returns the current input index (starting from 0).
627
- #
628
- def get_index
629
- GetIndexParser.new.setName('get_index')
630
- end
631
-
632
- #
633
- # A parser that moves the current input pointer to a certain index.
634
- #
635
- def set_index ind
636
- SetIndexParser.new(ind).setName('set_index')
637
- end
638
-
639
- #
640
- # A parser that tries all given alternative parsers
641
- # and picks the one with the longest match.
642
- #
643
- def longest(*parsers)
644
- # TypeChecker.check_vararg_type Parser, parsers, :longest
645
- BestParser.new(parsers, true)
646
- end
647
- def_sig :longest, [Parser]
220
+ #
221
+ # To create a parser that repeats +self+ for at most +max+ times.
222
+ # Only the return value of the last execution is preserved.
223
+ #
224
+ def some_(max)
225
+ repeat_(0, max)
226
+ end
648
227
 
649
- #
650
- # A parser that tries all given alternative parsers
651
- # and picks the one with the shortest match.
652
- #
653
- def shortest(*parsers)
654
- # TypeChecker.check_vararg_type Parser, parsers, :shortest
655
- BestParser.new(parsers, false)
656
- end
657
- def_sig :shortest, [Parser]
228
+ #
229
+ # To create a parser that repeats +self+ for at most +max+ times.
230
+ # All return values are collected in an array.
231
+ #
232
+ def some(max)
233
+ repeat(0, max)
234
+ end
658
235
 
659
- alias shorter shortest
660
- alias longer longest
236
+ #
237
+ # To create a parser that repeats +self+ for unlimited times, with
238
+ # the pattern recognized by +delim+ as separator that separates
239
+ # each occurrence. +self+ has to match for at least once. Return
240
+ # values of self are collected in an array.
241
+ #
242
+ def separated1 delim
243
+ rest = delim >> self
244
+ self.bind do |v0|
245
+ result = [v0]
246
+ (rest.map { |v| result << v }).many_ >> value(result)
247
+ end
248
+ end
661
249
 
662
- #
663
- # A parser that consumes one input.
664
- #
665
- def any
666
- AnyParser.new
667
- end
250
+ #
251
+ # To create a parser that repeats +self+ for unlimited times, with
252
+ # the pattern recognized by +delim+ as separator that separates
253
+ # each occurrence. Return values of +self+ are collected in an
254
+ # array.
255
+ #
256
+ def separated delim
257
+ separated1(delim).plus value([])
258
+ end
668
259
 
669
- #
670
- # A parser that always fails.
671
- #
672
- def zero
673
- ZeroParser.new
674
- end
260
+ #
261
+ # To create a parser that repeats +self+ for unlimited times, with
262
+ # the pattern recognized by +delim+ as separator that separates
263
+ # each occurrence and also possibly ends the pattern. +self+ has
264
+ # to match for at least once. Return values of +self+ are
265
+ # collected in an array.
266
+ #
267
+ def delimited1 delim
268
+ rest = delim >> (self.plus Parsers.throwp(:__end_delimiter__))
269
+ self.bind do |v0|
270
+ result = [v0]
271
+ (rest.map { |v| result << v }).many_.catchp(:__end_delimiter__) >> value(result)
272
+ end
273
+ end
675
274
 
676
- #
677
- # A parser that always succeeds.
678
- #
679
- def one
680
- OneParser.new
681
- end
275
+ #
276
+ # To create a parser that repeats +self+ for unlimited times, with
277
+ # the pattern recognized by +delim+ as separator that separates
278
+ # each occurrence and also possibly ends the pattern. Return
279
+ # values of +self+ are collected in an array.
280
+ #
281
+ def delimited delim
282
+ delimited1(delim).plus value([])
283
+ end
682
284
 
683
- #
684
- # A parser that succeeds if the current input is within a certain range.
685
- #
686
- def range(from, to, msg = "#{as_char from}..#{as_char to} expected")
687
- from, to = as_num(from), as_num(to)
688
- satisfies(msg) { |c| c <= to && c >= from }
689
- end
285
+ #
286
+ # String representation
287
+ #
288
+ def to_s
289
+ return name unless name.nil?
290
+ self.class.to_s
291
+ end
690
292
 
691
- #
692
- # A parser that throws a symbol.
693
- #
694
- def throwp(symbol)
695
- ThrowParser.new(symbol)
696
- end
293
+ #
294
+ # <tt>a | b</tt> will run +b+ when +a+ fails. +b+ is auto-boxed
295
+ # to Parser when it is not of type Parser.
296
+ #
297
+ def | other
298
+ AltParser.new([self, autobox_parser(other)])
299
+ end
697
300
 
698
- #
699
- # A parser that succeeds if the current inputs match
700
- # the given regular expression.
701
- # The matched string is consumed and returned as result.
702
- #
703
- def regexp(ptn, expected = "/#{ptn}/ expected")
704
- RegexpParser.new(as_regexp(ptn), expected).setName(expected)
705
- end
301
+ #
302
+ # <tt>a.optional(default)</tt> is equivalent to
303
+ # <tt>a.plus(value(default))</tt>. See also #plus and #value.
304
+ #
305
+ def optional(default = nil)
306
+ self.plus(value(default))
307
+ end
706
308
 
707
- #
708
- # A parser that parses a word
709
- # (starting with alpha or underscore, followed by 0 or more alpha, number or underscore).
710
- # and return the matched word as string.
711
- #
712
- def word(expected = 'word expected')
713
- regexp(/[a-zA-Z_]\w*/, expected)
714
- end
309
+ #
310
+ # <tt>a.catchp(:somesymbol)</tt> will catch the
311
+ # <tt>:somesymbol</tt> thrown by +a+.
312
+ #
313
+ def catchp(symbol)
314
+ CatchParser.new(symbol, self)
315
+ end
715
316
 
716
- #
717
- # A parser that parses an integer
718
- # and return the matched integer as string.
719
- #
720
- def integer(expected = 'integer expected')
721
- regexp(/\d+(?!\w)/, expected)
722
- end
317
+ #
318
+ # <tt>a.fragment</tt> will return the string matched by +a+.
319
+ #
320
+ def fragment
321
+ FragmentParser.new(self)
322
+ end
723
323
 
724
- #
725
- # A parser that parses a number (integer, or decimal number)
726
- # and return the matched number as string.
727
- #
728
- def number(expected = 'number expected')
729
- regexp(/\d+(\.\d+)?/, expected)
730
- end
324
+ #
325
+ # <tt>a.nested b</tt> will feed the token array returned by parser
326
+ # +a+ to parser +b+ for a nested parsing.
327
+ #
328
+ def nested(parser)
329
+ NestedParser.new(self, parser)
330
+ end
731
331
 
732
- #
733
- # A parser that matches the given string, case insensitively.
734
- #
735
- def string_nocase(str, expected = "'#{str}' expected")
736
- StringCaseInsensitiveParser.new(str, expected).setName(str)
737
- end
332
+ #
333
+ # <tt>a.lexeme(delim)</tt> will parse +a+ for 0 or more times and
334
+ # ignore all patterns recognized by +delim+. Values returned by
335
+ # +a+ are collected in an array.
336
+ #
337
+ def lexeme(delim = Parsers.whitespaces)
338
+ delim = delim.many_
339
+ delim >> self.delimited(delim)
340
+ end
738
341
 
739
- #
740
- # A parser that succeeds when the current input
741
- # is a token with one of the the given token kinds.
742
- # If a block is given, the token text is passed to the block
743
- # as parameter, and the block return value is used as result.
744
- # Otherwise, the token object is used as result.
745
- #
746
- def token(*kinds, &proc)
747
- expected = "#{kinds.join(' or ')} expected"
748
- recognizer = nil
749
- if kinds.length == 1
750
- kind = kinds[0]
751
- recognizer = satisfies(expected) do |tok|
752
- tok.respond_to? :kind, :text and kind == tok.kind
753
- end
754
- else
755
- recognizer = satisfies(expected) do |tok|
756
- tok.respond_to? :kind, :text and kinds.include? tok.kind
342
+ #
343
+ # For prefix unary operator. <tt>a.prefix op</tt> will run parser
344
+ # +op+ for 0 or more times and eventually run parser +a+ for one
345
+ # time. +op+ should return a +Proc+ that accepts one parameter.
346
+ # +Proc+ objects returned by +op+ is then fed with the value
347
+ # returned by +a+ from right to left. The final result is
348
+ # returned as return value.
349
+ #
350
+ def prefix(op)
351
+ Parsers.sequence(op.many, self) do |funcs, v|
352
+ funcs.reverse_each { |f| v = f.call(v) }
353
+ v
757
354
  end
758
355
  end
759
- recognizer = recognizer.map { |tok| proc.call(tok.text) } if proc
760
- recognizer
761
- end
762
-
763
- #
764
- # A parser that parses a white space character.
765
- #
766
- def whitespace(expected = "whitespace expected")
767
- satisfies(expected) { |c| Whitespaces.include? c }
768
- end
769
-
770
- #
771
- # A parser that parses 1 or more white space characters.
772
- #
773
- def whitespaces(expected = "whitespace(s) expected")
774
- whitespace(expected).many_(1)
775
- end
776
356
 
777
- #
778
- # A parser that parses a line started with _start_.
779
- # nil is the result.
780
- #
781
- def comment_line start
782
- string(start) >> not_char(?\n).many_ >> char(?\n).optional >> value(nil)
783
- end
784
-
785
- #
786
- # A parser that parses a chunk of text started with _open_
787
- # and ended by _close_.
788
- # nil is the result.
789
- #
790
- def comment_block open, close
791
- string(open) >> not_string(close).many_ >> string(close) >> value(nil)
792
- end
357
+ #
358
+ # For postfix unary operator. <tt>a.postfix op</tt> will run
359
+ # parser +a+ for once and then +op+ for 0 or more times. +op+
360
+ # should return a +Proc+ that accepts one parameter. +Proc+
361
+ # objects returned by +op+ is then fed with the value returned by
362
+ # +a+ from left to right. The final result is returned as return
363
+ # value.
364
+ #
365
+ def postfix(op)
366
+ Parsers.sequence(self, op.many) do |v, funcs|
367
+ funcs.each { |f| v = f.call(v) }
368
+ v
369
+ end
370
+ end
793
371
 
794
- #
795
- # A lazy parser, when executed, calls the given block
796
- # to get a parser object and delegate the call to this lazily
797
- # instantiated parser.
798
- #
799
- def lazy(&block)
800
- LazyParser.new(block)
801
- end
372
+ #
373
+ # For non-associative infix binary operator. +op+ has to return a
374
+ # +Proc+ that takes two parameters, who are returned by the +self+
375
+ # parser as operands.
376
+ #
377
+ def infixn(op)
378
+ bind do |v1|
379
+ bin = Parsers.sequence(op, self) do |f, v2|
380
+ f.call(v1, v2)
381
+ end
382
+ bin | value(v1)
383
+ end
384
+ end
802
385
 
803
- #
804
- # A parser that watches the current parser result without changing it.
805
- # The following assert will succeed:
806
- ##
807
- # char(?a) >> watch{|x|assert_equal(?a, x)}
808
- ##
809
- # watch can also be used as a handy tool to print trace information,
810
- # for example:
811
- ##
812
- # some_parser >> watch {puts "some_parser succeeded."}
813
- #
814
- def watch(&block)
815
- return one unless block
816
- WatchParser.new(block)
817
- end
386
+ #
387
+ # For left-associative infix binary operator. +op+ has to return
388
+ # a +Proc+ that takes two parameters, who are returned by the
389
+ # +self+ parser as operands.
390
+ #
391
+ def infixl(op)
392
+ Parsers.sequence(self, _infix_rest(op, self).many) do |v, rests|
393
+ rests.each do |r|
394
+ f, v1 = *r
395
+ v = f.call(v, v1)
396
+ end
397
+ v
398
+ end
399
+ end
818
400
 
819
- #
820
- # A parser that watches the current parser result without changing it.
821
- # The following assert will succeed:
822
- ##
823
- # char(?a).repeat(2) >> watchn{|x,y|assert_equal([?a,?a], [x,y])}
824
- ##
825
- # Slightly different from _watch_, _watchn_ expands the current parser result
826
- # before passing it into the associated block.
827
- #
828
- def watchn(&block)
829
- return one unless block
830
- WatchnParser.new(block)
831
- end
401
+ #
402
+ # For right-associative infix binary operator. +op+ has to return
403
+ # a +Proc+ that takes two parameters, who are returned by the
404
+ # +self+ parser as operands.
405
+ #
406
+ def infixr(op)
407
+ Parsers.sequence(self, _infix_rest(op, self).many) do |v, rests|
408
+ if rests.empty?
409
+ v
410
+ else
411
+ f, seed = *rests.last
412
+ for i in (0...rests.length - 1)
413
+ cur = rests.length - 2 - i
414
+ f1, v1 = *rests[cur]
415
+ seed = f.call(v1, seed)
416
+ f = f1
417
+ end
418
+ f.call(v, seed)
419
+ end
420
+ end
421
+ end
832
422
 
833
- #
834
- # A parser that maps current parser result to a new result using
835
- # the given block.
836
- ##
837
- # Different from Parser#map, this method does not need to be combined
838
- # with any Parser object. It is rather an independent Parser object
839
- # that maps the _current_ parser result.
840
- ##
841
- # parser1.map{|x|...} is equivalent to parser1 >> map{|x|...}
842
- #
843
- def map(&block)
844
- return one unless block
845
- MapCurrentParser.new(block)
846
- end
423
+ #
424
+ # <tt>a.token(:word_token)</tt> will return a Token object when
425
+ # +a+ succeeds. The matched string (or the string returned by
426
+ # +a+, if any) is encapsulated in the token, together with the
427
+ # <tt>:word_token</tt> symbol and the starting index of the match.
428
+ #
429
+ def token(kind)
430
+ TokenParser.new(kind, self)
431
+ end
847
432
 
848
- #
849
- # A parser that maps current parser result to a new result using
850
- # the given block. If the current parser result is an array, the array
851
- # elements are expanded and then passed as parameters to the block.
852
- ##
853
- # Different from Parser#mapn, this method does not need to be combined
854
- # with any Parser object. It is rather an independent Parser object
855
- # that maps the _current_ parser result.
856
- ##
857
- # parser1.mapn{|x,y|...} is equivalent to parser1 >> mapn{|x,y|...}
858
- #
859
- def mapn(&block)
860
- return one unless block
861
- MapnCurrentParser.new(block)
862
- end
433
+ #
434
+ # <tt>a.seq b</tt> will sequentially run +a+ then +b+. The result
435
+ # of +b+ is preserved as return value. If a +block+ is
436
+ # associated, values returned by +a+ and +b+ are passed into the
437
+ # +block+ and the return value of the +block+ is used as the final
438
+ # result of the parser.
439
+ #
440
+ def seq(other, &block)
441
+ Parsers.sequence(self, other, &block)
442
+ end
863
443
 
864
- private
444
+ #
445
+ # Similar to #seq. +other+ is auto-boxed if it is not of type
446
+ # Parser.
447
+ #
448
+ def >>(other)
449
+ seq(autobox_parser(other))
450
+ end
865
451
 
866
- #
867
- # characters considered white space.
868
- #
869
- Whitespaces = " \t\r\n"
452
+ private
870
453
 
871
- def as_regexp ptn
872
- case ptn when String then Regexp.new(ptn) else ptn end
873
- end
454
+ def autobox_parser(val)
455
+ return Parsers.value(val) unless val.kind_of? Parser
456
+ val
457
+ end
874
458
 
875
- def as_char c
876
- case c when String then c else c.chr end
877
- end
459
+ def _infix_rest(operator, operand)
460
+ Parsers.sequence(operator, operand, &Idn)
461
+ end
878
462
 
879
- def as_num c
880
- case c when String then c[0] else c end
881
- end
463
+ alias ~ not
464
+ alias << followed
465
+ alias * repeat_
882
466
 
883
- def as_list vals
884
- return vals unless vals.length == 1
885
- val = vals[0]
886
- return vals unless val.kind_of? String
887
- val
467
+ def _parse(_ctxt)
468
+ false
469
+ end
888
470
  end
889
-
890
- extend self
891
471
  end
892
-
893
- end # module