rparsec 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,52 +1,58 @@
1
- #
2
- # module for Monad
3
- #
4
- module Monad
5
- attr_reader :this
6
- #
7
- # To initialize with a monad implementation and an object that obeys the monad law.
8
- #
9
- def initMonad(m, v)
10
- raise ArgumentError, 'monad cannot be nil' if m.nil?
11
- @monad = m;
12
- @this = v;
13
- end
14
- #
15
- # To create a value based on the monad impl.
16
- #
17
- def value v
18
- @monad.value v
19
- end
20
- #
21
- # Run the _bind_ operation on the encapsulated object following the monad law.
22
- #
23
- def bind(&binder)
24
- @monad.bind(@this, &binder)
25
- end
26
- #
27
- # Run the _seq_ operation on the encapsulated object following the monad law.
28
- # If _seq_ is not defined by the monad impl, use _bind_ to implement.
29
- #
30
- def seq(other)
31
- if @monad.respond_to? :seq
32
- @monad.seq(other)
33
- else bind {|x|other}
34
- end
35
- end
36
- #
37
- # Run the _map_ operation on the encapsulated object following the monad law.
38
- # _bind_ is used to implement.
39
- #
40
- def map(&mapper)
41
- bind do |v|
42
- result = mapper.call v;
43
- value(result);
44
- end
45
- end
46
- #
47
- # Run the _plus_ operation on the encapsulated object following the MonadPlus law.
48
- #
49
- def plus other
50
- @monad.mplus(@this, other.this)
51
- end
1
+ #
2
+ # module for Monad
3
+ #
4
+ module Monad
5
+ attr_reader :this
6
+
7
+ #
8
+ # To initialize with a monad implementation and an object that obeys the monad law.
9
+ #
10
+ def initMonad(m, v)
11
+ raise ArgumentError, 'monad cannot be nil' if m.nil?
12
+ @monad = m;
13
+ @this = v;
14
+ end
15
+
16
+ #
17
+ # To create a value based on the monad impl.
18
+ #
19
+ def value v
20
+ @monad.value v
21
+ end
22
+
23
+ #
24
+ # Run the _bind_ operation on the encapsulated object following the monad law.
25
+ #
26
+ def bind(&binder)
27
+ @monad.bind(@this, &binder)
28
+ end
29
+
30
+ #
31
+ # Run the _seq_ operation on the encapsulated object following the monad law.
32
+ # If _seq_ is not defined by the monad impl, use _bind_ to implement.
33
+ #
34
+ def seq(other)
35
+ if @monad.respond_to? :seq
36
+ @monad.seq(other)
37
+ else bind {|x|other}
38
+ end
39
+ end
40
+
41
+ #
42
+ # Run the _map_ operation on the encapsulated object following the monad law.
43
+ # _bind_ is used to implement.
44
+ #
45
+ def map(&mapper)
46
+ bind do |v|
47
+ result = mapper.call v;
48
+ value(result);
49
+ end
50
+ end
51
+
52
+ #
53
+ # Run the _plus_ operation on the encapsulated object following the MonadPlus law.
54
+ #
55
+ def plus other
56
+ @monad.mplus(@this, other.this)
57
+ end
52
58
  end
@@ -1,110 +1,117 @@
1
- require 'rparsec/parser'
2
-
3
- #
4
- # utility functions for string manipulations.
5
- #
6
- module StringUtils
7
- #
8
- # Does _str_ starts with the _sub_ string?
9
- #
10
- def self.starts_with? str, sub
11
- return true if sub.nil?
12
- len = sub.length
13
- return false if len > str.length
14
- for i in (0...len)
15
- return false if str[i] != sub[i]
16
- end
17
- true
18
- end
19
- end
20
-
21
- #
22
- # This class helps building lexer and parser for operators.
23
- # The case that one operator (++ for example) contains another operator (+)
24
- # is automatically handled so client code don't have to worry about ambiguity.
25
- #
26
- class Operators
27
- #
28
- # To create an instance of Operators for the given operators.
29
- # The _block_ parameter, if present, is used to convert the token text to another object
30
- # when the token is recognized during grammar parsing phase.
31
- #
32
- def initialize(ops, &block)
33
- @lexers = {}
34
- @parsers = {}
35
- sorted = Operators.sort(ops)
36
- lexers = sorted.map do |op|
37
- symbol = op.to_sym
38
- result = nil
39
- if op.length == 1
40
- result = Parsers.char(op)
41
- else
42
- result = Parsers.str(op)
43
- end
44
- result = result.token(symbol)
45
- @lexers[symbol] = result
46
- @parsers[symbol] = Parsers.token(symbol, &block)
47
- result
48
- end
49
- @lexer = Parsers.sum(*lexers)
50
- end
51
- #
52
- # Get the parser for the given operator.
53
- #
54
- def parser(op)
55
- result = @parsers[op.to_sym]
56
- raise ArgumentError, "parser not found for #{op}" if result.nil?
57
- result
58
- end
59
- alias [] parser
60
- #
61
- # Get the lexer that lexes operators.
62
- # If an operator is specified, the lexer for that operator is returned.
63
- #
64
- def lexer(op=nil)
65
- return @lexer if op.nil?
66
- @lexers[op.to_sym]
67
- end
68
- #
69
- # Sort an array of operators so that contained operator appears after containers.
70
- # When no containment exist between two operators, the shorter one takes precedence.
71
- #
72
- def self.sort(ops)
73
- #sort the array by longer-string-first.
74
- ordered = ops.sort {|x, y|y.length <=> x.length}
75
- suites = []
76
- # loop from the longer to shorter string
77
- ordered.each do |s|
78
- populate_suites(suites, s)
79
- end
80
- # suites are populated with bigger suite first
81
- to_array suites
82
- end
83
- private
84
- def self.populate_suites(suites, s)
85
- # populate the suites so that bigger suite first
86
- # this way we can use << operator for non-contained strings.
87
-
88
- # we need to start from bigger suite. So loop in reverse order
89
- for suite in suites
90
- return if populate_suite(suite, s)
91
- end
92
- suites << [s]
93
- end
94
- def self.populate_suite(suite, s)
95
- # loop from the tail of the suite
96
- for i in (1..suite.length)
97
- ind = suite.length - i
98
- cur = suite[ind]
99
- if StringUtils.starts_with? cur, s
100
- suite.insert(ind+1, s) unless cur == s
101
- return true
102
- end
103
- end
104
- false
105
- end
106
- def self.to_array suites
107
- result = []
108
- suites.reverse!.flatten!
109
- end
1
+ require 'rparsec/parser'
2
+
3
+ #
4
+ # utility functions for string manipulations.
5
+ #
6
+ module StringUtils
7
+ #
8
+ # Does _str_ starts with the _sub_ string?
9
+ #
10
+ def self.starts_with? str, sub
11
+ return true if sub.nil?
12
+ len = sub.length
13
+ return false if len > str.length
14
+ for i in (0...len)
15
+ return false if str[i] != sub[i]
16
+ end
17
+ true
18
+ end
19
+ end
20
+
21
+ #
22
+ # This class helps building lexer and parser for operators.
23
+ # The case that one operator (++ for example) contains another operator (+)
24
+ # is automatically handled so client code don't have to worry about ambiguity.
25
+ #
26
+ class Operators
27
+ #
28
+ # To create an instance of Operators for the given operators.
29
+ # The _block_ parameter, if present, is used to convert the token text to another object
30
+ # when the token is recognized during grammar parsing phase.
31
+ #
32
+ def initialize(ops, &block)
33
+ @lexers = {}
34
+ @parsers = {}
35
+ sorted = Operators.sort(ops)
36
+ lexers = sorted.map do |op|
37
+ symbol = op.to_sym
38
+ result = nil
39
+ if op.length == 1
40
+ result = Parsers.char(op)
41
+ else
42
+ result = Parsers.str(op)
43
+ end
44
+ result = result.token(symbol)
45
+ @lexers[symbol] = result
46
+ @parsers[symbol] = Parsers.token(symbol, &block)
47
+ result
48
+ end
49
+ @lexer = Parsers.sum(*lexers)
50
+ end
51
+
52
+ #
53
+ # Get the parser for the given operator.
54
+ #
55
+ def parser(op)
56
+ result = @parsers[op.to_sym]
57
+ raise ArgumentError, "parser not found for #{op}" if result.nil?
58
+ result
59
+ end
60
+
61
+ alias [] parser
62
+
63
+ #
64
+ # Get the lexer that lexes operators.
65
+ # If an operator is specified, the lexer for that operator is returned.
66
+ #
67
+ def lexer(op=nil)
68
+ return @lexer if op.nil?
69
+ @lexers[op.to_sym]
70
+ end
71
+
72
+ #
73
+ # Sort an array of operators so that contained operator appears after containers.
74
+ # When no containment exist between two operators, the shorter one takes precedence.
75
+ #
76
+ def self.sort(ops)
77
+ #sort the array by longer-string-first.
78
+ ordered = ops.sort {|x, y|y.length <=> x.length}
79
+ suites = []
80
+ # loop from the longer to shorter string
81
+ ordered.each do |s|
82
+ populate_suites(suites, s)
83
+ end
84
+ # suites are populated with bigger suite first
85
+ to_array suites
86
+ end
87
+
88
+ private
89
+
90
+ def self.populate_suites(suites, s)
91
+ # populate the suites so that bigger suite first
92
+ # this way we can use << operator for non-contained strings.
93
+
94
+ # we need to start from bigger suite. So loop in reverse order
95
+ for suite in suites
96
+ return if populate_suite(suite, s)
97
+ end
98
+ suites << [s]
99
+ end
100
+
101
+ def self.populate_suite(suite, s)
102
+ # loop from the tail of the suite
103
+ for i in (1..suite.length)
104
+ ind = suite.length - i
105
+ cur = suite[ind]
106
+ if StringUtils.starts_with? cur, s
107
+ suite.insert(ind+1, s) unless cur == s
108
+ return true
109
+ end
110
+ end
111
+ false
112
+ end
113
+
114
+ def self.to_array suites
115
+ suites.reverse!.flatten!
116
+ end
110
117
  end
@@ -1,794 +1,892 @@
1
- %w{
2
- monad misc error context locator token functors parser_monad
3
- }.each {|lib| require "rparsec/#{lib}"}
4
- require 'strscan'
5
-
6
-
7
- #
8
- # Represents a parser that parses a certain grammar rule.
9
- #
10
- class Parser
11
- include Functors
12
- include Monad
13
- extend Signature
14
- extend DefHelper
15
- MyMonad = ParserMonad.new
16
- attr_accessor :name
17
- private
18
- def initialize
19
- initMonad(MyMonad, self)
20
- end
21
- def self.init(*vars)
22
- parser_checker = {}
23
- vars.each_with_index do |var, i|
24
- name = var.to_s
25
- parser_checker[i] = var if name.include?('parser') && !name.include?('parsers')
26
- end
27
- define_method(:initialize) do |*params|
28
- super()
29
- vars.each_with_index do |var, i|
30
- param = params[i]
31
- if parser_checker.include? i
32
- TypeChecker.check_arg_type Parser, param, self, i
33
- end
34
- instance_variable_set("@"+var.to_s, param)
35
- end
36
- end
37
- end
38
- def _display_current_input(input, code, index)
39
- return 'EOF' if input.nil?
40
- c = input
41
- case c when Fixnum: "'"<<c<<"'" when Token: c.text else c.to_s end
42
- end
43
- def _add_encountered_error(msg, encountered)
44
- result = msg.dup
45
- result << ', ' unless msg.strip.length == 0 || msg =~ /.*(\.|,)\s*$/
46
- "#{result}#{encountered}"
47
- end
48
- def _add_location_to_error(locator, ctxt, msg, code)
49
- line, col = locator.locate(ctxt.error.index)
50
- msg << " at line #{line}, col #{col}."
51
- end
52
- public
53
- #
54
- # parses a string.
55
- #
56
- def parse(src)
57
- ctxt = ParseContext.new(src)
58
- return ctxt.result if _parse ctxt
59
- ctxt.prepare_error
60
- locator = CodeLocator.new(src)
61
- raise ParserException.new(ctxt.error.index),
62
- _add_location_to_error(locator, ctxt,
63
- _add_encountered_error(ctxt.to_msg,
64
- _display_current_input(ctxt.error.input, src, ctxt.index)), src)
65
- end
66
- #
67
- # Set name for the parser.
68
- # self is returned.
69
- #
70
- def setName(nm)
71
- @name = nm
72
- self
73
- end
74
- #
75
- # a.map{|x|x+1} will first execute parser a, when it succeeds,
76
- # the associated block is executed to transform the result to a new value
77
- # (increment it in this case).
78
- #
79
- def map(&block)
80
- return self unless block
81
- MapParser.new(self, block)
82
- end
83
- #
84
- # _self_ is first executed, the parser result is then passed as parameter to the associated block,
85
- # which evaluates to another Parser object at runtime. This new Parser object is then executed
86
- # to get the final parser result.
87
- ##
88
- # Different from _bind_, parser result of _self_ will be expanded first if it is an array.
89
- #
90
- def bindn(&block)
91
- return self unless block
92
- BoundnParser.new(self, block)
93
- end
94
- #
95
- # a.mapn{|x,y|x+y} will first execute parser a, when it succeeds,
96
- # the array result (if any) is expanded and passed as parameters
97
- # to the associated block. The result of the block is then used
98
- # as the parsing result.
99
- #
100
- def mapn(&block)
101
- return self unless block
102
- MapnParser.new(self, block)
103
- end
104
-
105
- #
106
- # Create a new parser that's atomic.,
107
- # meaning that when it fails, input consumption is undone.
108
- #
109
- def atomize
110
- AtomParser.new(self).setName(@name)
111
- end
112
- #
113
- # Create a new parser that looks at inputs whthout consuming them.
114
- #
115
- def peek
116
- PeekParser.new(self).setName(@name)
117
- end
118
- #
119
- # To create a new parser that succeed only if self fails.
120
- #
121
- def not(msg="#{self} unexpected")
122
- NotParser.new(self, msg)
123
- end
124
- #
125
- # To create a parser that does "look ahead" for n inputs.
126
- #
127
- def lookahead n
128
- self
129
- end
130
- #
131
- # To create a parser that fails with a given error message.
132
- #
133
- def expect msg
134
- ExpectParser.new(self, msg)
135
- end
136
- #
137
- # a.followed b will sequentially run a and b;
138
- # result of a is preserved as the ultimate return value.
139
- #
140
- def followed(other)
141
- FollowedParser.new(self, other)
142
- end
143
- def_sig :followed, Parser
144
- #
145
- # To create a parser that repeats self for a minimum _min_ times,
146
- # and maximally _max_ times.
147
- # Only the return value of the last execution is preserved.
148
- #
149
- def repeat_(min, max=min)
150
- return Parsers.failure("min=#{min}, max=#{max}") if min > max
151
- if(min==max)
152
- return Parsers.one if max <= 0
153
- return self if max == 1
154
- Repeat_Parser.new(self, max)
155
- else
156
- Some_Parser.new(self, min, max)
157
- end
158
- end
159
- #
160
- # To create a parser that repeats self for a minimum _min_ times,
161
- # and maximally _max_ times.
162
- # All return values are collected in an array.
163
- #
164
- def repeat(min, max=min)
165
- return Parsers.failure("min=#{min}, max=#{max}") if min > max
166
- if(min==max)
167
- RepeatParser.new(self, max)
168
- else
169
- SomeParser.new(self, min, max)
170
- end
171
- end
172
- #
173
- # To create a parser that repeats self for at least _least_ times.
174
- # parser.many_ is equivalent to bnf notation "parser*".
175
- # Only the return value of the last execution is preserved.
176
- #
177
- def many_(least=0)
178
- Many_Parser.new(self, least)
179
- end
180
- #
181
- # To create a parser that repeats self for at least _least_ times.
182
- # All return values are collected in an array.
183
- #
184
- def many(least=0)
185
- ManyParser.new(self, least)
186
- end
187
- #
188
- # To create a parser that repeats self for at most _max_ times.
189
- # Only the return value of the last execution is preserved.
190
- #
191
- def some_(max)
192
- repeat_(0, max)
193
- end
194
- #
195
- # To create a parser that repeats self for at most _max_ times.
196
- # All return values are collected in an array.
197
- #
198
- def some(max)
199
- repeat(0, max)
200
- end
201
- #
202
- # To create a parser that repeats self for unlimited times,
203
- # with the pattern recognized by _delim_ as separator that separates each occurrence.
204
- # self has to match for at least once.
205
- # Return values of self are collected in an array.
206
- #
207
- def separated1 delim
208
- rest = delim >> self
209
- self.bind do |v0|
210
- result = [v0]
211
- (rest.map {|v| result << v}).many_ >> value(result)
212
- end
213
- end
214
- #
215
- # To create a parser that repeats self for unlimited times,
216
- # with the pattern recognized by _delim_ as separator that separates each occurrence.
217
- # Return values of self are collected in an array.
218
- #
219
- def separated delim
220
- separated1(delim).plus value([])
221
- end
222
- #
223
- # To create a parser that repeats self for unlimited times,
224
- # with the pattern recognized by _delim_ as separator that separates each occurrence
225
- # and also possibly ends the pattern.
226
- # self has to match for at least once.
227
- # Return values of self are collected in an array.
228
- #
229
- def delimited1 delim
230
- rest = delim >> (self.plus Parsers.throwp(:__end_delimiter__))
231
- self.bind do |v0|
232
- result = [v0]
233
- (rest.map {|v| result << v}).many_.catchp(:__end_delimiter__) >> value(result)
234
- end
235
- end
236
- #
237
- # To create a parser that repeats self for unlimited times,
238
- # with the pattern recognized by _delim_ as separator that separates each occurrence
239
- # and also possibly ends the pattern.
240
- # Return values of self are collected in an array.
241
- #
242
- def delimited delim
243
- delimited1(delim).plus value([])
244
- end
245
- #
246
- # String representation
247
- #
248
- def to_s
249
- return name unless name.nil?
250
- self.class.to_s
251
- end
252
- #
253
- # a | b will run b when a fails.
254
- # b is auto-boxed to Parser when it is not of type Parser.
255
- #
256
- def | other
257
- AltParser.new([self, autobox_parser(other)])
258
- end
259
- #
260
- # a.optional(default) is equivalent to a.plus(value(default))
261
- #
262
- def optional(default=nil)
263
- self.plus(value(default))
264
- end
265
- #
266
- # a.catchp(:somesymbol) will catch the :somesymbol thrown by a.
267
- #
268
- def catchp(symbol)
269
- CatchParser.new(symbol, self)
270
- end
271
- #
272
- # a.fragment will return the string matched by a.
273
- #
274
- def fragment
275
- FragmentParser.new(self)
276
- end
277
- #
278
- # a.nested b will feed the token array returned by parser a to parser b
279
- # for a nested parsing.
280
- #
281
- def nested(parser)
282
- NestedParser.new(self, parser)
283
- end
284
- #
285
- # a.lexeme(delim) will parse _a_ for 0 or more times and ignore all
286
- # patterns recognized by _delim_.
287
- # Values returned by _a_ are collected in an array.
288
- #
289
- def lexeme(delim = Parsers.whitespaces)
290
- delim = delim.many_
291
- delim >> self.delimited(delim)
292
- end
293
- #
294
- # For prefix unary operator.
295
- # a.prefix op will run parser _op_ for 0 or more times and eventually run parser _a_
296
- # for one time.
297
- # _op_ should return a Proc that accepts one parameter.
298
- # Proc objects returned by _op_ is then fed with the value returned by _a_
299
- # from right to left.
300
- # The final result is returned as return value.
301
- #
302
- def prefix(op)
303
- Parsers.sequence(op.many, self) do |funcs, v|
304
- funcs.reverse_each {|f|v=f.call(v)}
305
- v
306
- end
307
- end
308
- #
309
- # For postfix unary operator.
310
- # a.postfix op will run parser _a_ for once and then _op_ for 0 or more times.
311
- # _op_ should return a Proc that accepts one parameter.
312
- # Proc objects returned by _op_ is then fed with the value returned by _a_
313
- # from left to right.
314
- # The final result is returned as return value.
315
- #
316
- def postfix(op)
317
- Parsers.sequence(self, op.many) do |v, funcs|
318
- funcs.each{|f|v=f.call(v)}
319
- v
320
- end
321
- end
322
- #
323
- # For non-associative infix binary operator.
324
- # _op_ has to return a Proc that takes two parameters, who
325
- # are returned by the _self_ parser as operands.
326
- #
327
- def infixn(op)
328
- bind do |v1|
329
- bin = Parsers.sequence(op, self) do |f, v2|
330
- f.call(v1,v2)
331
- end
332
- bin | value(v1)
333
- end
334
- end
335
- #
336
- # For left-associative infix binary operator.
337
- # _op_ has to return a Proc that takes two parameters, who
338
- # are returned by the _self_ parser as operands.
339
- #
340
- def infixl(op)
341
- Parsers.sequence(self, _infix_rest(op, self).many) do |v, rests|
342
- rests.each do |r|
343
- f, v1 = *r
344
- v = f.call(v,v1)
345
- end
346
- v
347
- end
348
- end
349
- #
350
- # For right-associative infix binary operator.
351
- # _op_ has to return a Proc that takes two parameters, who
352
- # are returned by the _self_ parser as operands.
353
- #
354
- def infixr(op)
355
- Parsers.sequence(self, _infix_rest(op, self).many) do |v, rests|
356
- if rests.empty?
357
- v
358
- else
359
- f, seed = *rests.last
360
- for i in (0...rests.length-1)
361
- cur = rests.length-2-i
362
- f1, v1 = *rests[cur]
363
- seed = f.call(v1, seed)
364
- f = f1
365
- end
366
- f.call(v, seed)
367
- end
368
- end
369
- end
370
- #
371
- # a.token(:word_token) will return a Token object when _a_ succeeds.
372
- # The matched string (or the string returned by _a_, if any) is
373
- # encapsulated in the token, together with the :word_token symbol and
374
- # the starting index of the match.
375
- #
376
- def token(kind)
377
- TokenParser.new(kind, self)
378
- end
379
- #
380
- # a.seq b will sequentially run a then b.
381
- # The result of b is preserved as return value.
382
- # If a block is associated, values returned by _a_ and _b_
383
- # are passed into the block and the return value of
384
- # the block is used as the final result of the parser.
385
- #
386
- def seq(other, &block)
387
- # TypeChecker.check_arg_type Parser, other, :seq
388
- Parsers.sequence(self, other, &block)
389
- end
390
- def_sig :seq, Parser
391
- #
392
- # Similar to _seq_. _other_ is auto-boxed if it is not of type Parser.
393
- #
394
- def >> (other)
395
- seq(autobox_parser(other))
396
- end
397
- private
398
- def autobox_parser(val)
399
- return Parsers.value(val) unless val.kind_of? Parser
400
- val
401
- end
402
- def _infix_rest(operator, operand)
403
- Parsers.sequence(operator, operand, &Idn)
404
- end
405
- public
406
- alias ~ not
407
- alias << followed
408
- alias * repeat_
409
- def_sig :plus, Parser
410
- private
411
- def _parse(ctxt)
412
- false
413
- end
414
- end
415
- #
416
- # This module provides all out-of-box parser implementations.
417
- #
418
- module Parsers
419
- extend Signature
420
- #
421
- # A parser that always fails with the given error message.
422
- #
423
- def failure msg
424
- FailureParser.new(msg)
425
- end
426
- #
427
- # A parser that always succeeds with the given return value.
428
- #
429
- def value v
430
- ValueParser.new(v)
431
- end
432
- #
433
- # A parser that calls alternative parsers until one succeed,
434
- # or any failure with input consumption beyond the current look-ahead.
435
- #
436
- def sum(*alts)
437
- # TypeChecker.check_vararg_type Parser, alts, :sum
438
- PlusParser.new(alts)
439
- end
440
- def_sig :sum, [Parser]
441
-
442
- #
443
- # A parser that calls alternative parsers until one succeeds.
444
- #
445
- def alt(*alts)
446
- AltParser.new(alts)
447
- end
448
- def_sig :alt, [Parser]
449
- #
450
- # A parser that succeeds when the given predicate returns true
451
- # (with the current input as the parameter).
452
- # _expected_ is the error message when _pred_ returns false.
453
- #
454
- def satisfies(expected, &pred)
455
- SatisfiesParser.new(pred, expected)
456
- end
457
- #
458
- # A parser that succeeds when the the current input is equal to the given value.
459
- # _expected_ is the error message when _pred_ returns false.
460
- #
461
- def is(v, expected="#{v} expected")
462
- satisfies(expected) {|c|c==v}
463
- end
464
- #
465
- # A parser that succeeds when the the current input is not equal to the given value.
466
- # _expected_ is the error message when _pred_ returns false.
467
- #
468
- def isnt(v, expected="#{v} unexpected")
469
- satisfies(expected) {|c|c!=v}
470
- end
471
- #
472
- # A parser that succeeds when the the current input is among the given values.
473
- #
474
- def among(*vals)
475
- expected="one of [#{vals.join(', ')}] expected"
476
- vals = as_list vals
477
- satisfies(expected) {|c|vals.include? c}
478
- end
479
- #
480
- # A parser that succeeds when the the current input is not among the given values.
481
- #
482
- def not_among(*vals)
483
- expected = "one of [#{vals.join(', ')}] unexpected"
484
- vals = as_list vals
485
- satisfies(expected) {|c|!vals.include? c}
486
- end
487
- #
488
- # A parser that succeeds when the the current input is the given character.
489
- #
490
- def char(c)
491
- if c.kind_of? Fixnum
492
- nm = c.chr
493
- is(c, "'#{nm}' expected").setName(nm)
494
- else
495
- is(c[0], "'#{c}' expected").setName(c)
496
- end
497
- end
498
- #
499
- # A parser that succeeds when the the current input is not the given character.
500
- #
501
- def not_char(c)
502
- if c.kind_of? Fixnum
503
- nm = c.chr
504
- isnt(c, "'#{nm}' unexpected").setName("~#{nm}")
505
- else
506
- isnt(c[0], "'#{c}' unexpected").setName("~#{c}")
507
- end
508
- end
509
-
510
- #
511
- # A parser that succeeds when there's no input available.
512
- #
513
- def eof(expected="EOF expected")
514
- EofParser.new(expected).setName('EOF')
515
- end
516
- #
517
- # A parser that tries to match the current inputs one by one
518
- # with the given values.
519
- # It succeeds only when all given values are matched, in which case all the
520
- # matched inputs are consumed.
521
- #
522
- def are(vals, expected="#{vals} expected")
523
- AreParser.new(vals, expected)
524
- end
525
- #
526
- # A parser that makes sure that the given values don't match
527
- # the current inputs. One input is consumed if it succeeds.
528
- #
529
- def arent(vals, expected="#{vals} unexpected")
530
- are(vals, '').not(expected) >> any
531
- end
532
- #
533
- # A parser that matches the given string.
534
- #
535
- def string(str, msg = "\"#{str}\" expected")
536
- are(str, msg).setName(str)
537
- end
538
- #
539
- # A parser that makes sure that the current input doesn't match a string.
540
- # One character is consumed if it succeeds.
541
- #
542
- def not_string(str, msg="\"#{str}\" unexpected")
543
- string(str).not(msg) >> any
544
- end
545
- alias str string
546
- #
547
- # A parser that sequentially run the given parsers.
548
- # The result of the last parser is used as return value.
549
- # If a block is given, the results of the parsers are passed
550
- # into the block as parameters, and the block return value
551
- # is used as result instead.
552
- #
553
- def sequence(*parsers, &proc)
554
- # TypeChecker.check_vararg_type Parser, parsers, :sequence
555
- SequenceParser.new(parsers, proc)
556
- end
557
- def_sig :sequence, [Parser]
558
- #
559
- # A parser that returns the current input index (starting from 0).
560
- #
561
- def get_index
562
- GetIndexParser.new.setName('get_index')
563
- end
564
- #
565
- # A parser that moves the current input pointer to a certain index.
566
- #
567
- def set_index ind
568
- SetIndexParser.new(ind).setName('set_index')
569
- end
570
- #
571
- # A parser that tries all given alternative parsers
572
- # and picks the one with the longest match.
573
- #
574
- def longest(*parsers)
575
- # TypeChecker.check_vararg_type Parser, parsers, :longest
576
- BestParser.new(parsers, true)
577
- end
578
- def_sig :longest, [Parser]
579
- #
580
- # A parser that tries all given alternative parsers
581
- # and picks the one with the shortest match.
582
- #
583
- def shortest(*parsers)
584
- # TypeChecker.check_vararg_type Parser, parsers, :shortest
585
- BestParser.new(parsers, false)
586
- end
587
- def_sig :shortest, [Parser]
588
- alias shorter shortest
589
- alias longer longest
590
- #
591
- # A parser that consumes one input.
592
- #
593
- def any
594
- AnyParser.new
595
- end
596
- #
597
- # A parser that always fails.
598
- #
599
- def zero
600
- ZeroParser.new
601
- end
602
- #
603
- # A parser that always succeeds.
604
- #
605
- def one
606
- OneParser.new
607
- end
608
- #
609
- # A parser that succeeds if the current input is within a certain range.
610
- #
611
- def range(from, to, msg="#{as_char from}..#{as_char to} expected")
612
- from, to = as_num(from), as_num(to)
613
- satisfies(msg) {|c| c <= to && c >= from}
614
- end
615
- #
616
- # A parser that throws a symbol.
617
- #
618
- def throwp(symbol)
619
- ThrowParser.new(symbol)
620
- end
621
- #
622
- # A parser that succeeds if the current inputs match
623
- # the given regular expression.
624
- # The matched string is consumed and returned as result.
625
- #
626
- def regexp(ptn, expected="/#{ptn.to_s}/ expected")
627
- RegexpParser.new(as_regexp(ptn), expected).setName(expected)
628
- end
629
- #
630
- # A parser that parses a word
631
- # (starting with alpha or underscore, followed by 0 or more alpha, number or underscore).
632
- # and return the matched word as string.
633
- #
634
- def word(expected='word expected')
635
- regexp(/[a-zA-Z_]\w*/, expected)
636
- end
637
- #
638
- # A parser that parses an integer
639
- # and return the matched integer as string.
640
- #
641
- def integer(expected='integer expected')
642
- regexp(/\d+(?!\w)/, expected)
643
- end
644
- #
645
- # A parser that parses a number (integer, or decimal number)
646
- # and return the matched number as string.
647
- #
648
- def number(expected='number expected')
649
- regexp(/\d+(\.\d+)?/, expected)
650
- end
651
- #
652
- # A parser that matches the given string, case insensitively.
653
- #
654
- def string_nocase(str, expected="'#{str}' expected")
655
- StringCaseInsensitiveParser.new(str, expected).setName(str)
656
- end
657
- #
658
- # A parser that succeeds when the current input
659
- # is a token with one of the the given token kinds.
660
- # If a block is given, the token text is passed to the block
661
- # as parameter, and the block return value is used as result.
662
- # Otherwise, the token object is used as result.
663
- #
664
- def token(*kinds, &proc)
665
- expected="#{kinds.join(' or ')} expected"
666
- recognizer = nil
667
- if kinds.length==1
668
- kind = kinds[0]
669
- recognizer = satisfies(expected) do |tok|
670
- tok.respond_to? :kind, :text and kind == tok.kind
671
- end
672
- else
673
- recognizer = satisfies(expected) do |tok|
674
- tok.respond_to? :kind, :text and kinds.include? tok.kind
675
- end
676
- end
677
- recognizer = recognizer.map{|tok|proc.call(tok.text)} if proc
678
- recognizer
679
- end
680
- #
681
- # A parser that parses a white space character.
682
- #
683
- def whitespace(expected="whitespace expected")
684
- satisfies(expected) {|c| Whitespaces.include? c}
685
- end
686
- #
687
- # A parser that parses 1 or more white space characters.
688
- #
689
- def whitespaces(expected="whitespace(s) expected")
690
- whitespace(expected).many_(1)
691
- end
692
- #
693
- # A parser that parses a line started with _start_.
694
- # nil is the result.
695
- #
696
- def comment_line start
697
- string(start) >> not_char(?\n).many_ >> char(?\n).optional >> value(nil)
698
- end
699
- #
700
- # A parser that parses a chunk of text started with _open_
701
- # and ended by _close_.
702
- # nil is the result.
703
- #
704
- def comment_block open, close
705
- string(open) >> not_string(close).many_ >> string(close) >> value(nil)
706
- end
707
- #
708
- # A lazy parser, when executed, calls the given block
709
- # to get a parser object and delegate the call to this lazily
710
- # instantiated parser.
711
- #
712
- def lazy(&block)
713
- LazyParser.new(block)
714
- end
715
- #
716
- # A parser that watches the current parser result without changing it.
717
- # The following assert will succeed:
718
- ##
719
- # char(?a) >> watch{|x|assert_equal(?a, x)}
720
- ##
721
- # watch can also be used as a handy tool to print trace information,
722
- # for example:
723
- ##
724
- # some_parser >> watch {puts "some_parser succeeded."}
725
- #
726
- def watch(&block)
727
- return one unless block
728
- WatchParser.new(block)
729
- end
730
- #
731
- # A parser that watches the current parser result without changing it.
732
- # The following assert will succeed:
733
- ##
734
- # char(?a).repeat(2) >> watchn{|x,y|assert_equal([?a,?a], [x,y])}
735
- ##
736
- # Slightly different from _watch_, _watchn_ expands the current parser result
737
- # before passing it into the associated block.
738
- #
739
- def watchn(&block)
740
- return one unless block
741
- WatchnParser.new(block)
742
- end
743
- #
744
- # A parser that maps current parser result to a new result using
745
- # the given block.
746
- ##
747
- # Different from Parser#map, this method does not need to be combined
748
- # with any Parser object. It is rather an independent Parser object
749
- # that maps the _current_ parser result.
750
- ##
751
- # parser1.map{|x|...} is equivalent to parser1 >> map{|x|...}
752
- #
753
- def map(&block)
754
- return one unless block
755
- MapCurrentParser.new(block)
756
- end
757
- #
758
- # A parser that maps current parser result to a new result using
759
- # the given block. If the current parser result is an array, the array
760
- # elements are expanded and then passed as parameters to the block.
761
- ##
762
- # Different from Parser#mapn, this method does not need to be combined
763
- # with any Parser object. It is rather an independent Parser object
764
- # that maps the _current_ parser result.
765
- ##
766
- # parser1.mapn{|x,y|...} is equivalent to parser1 >> mapn{|x,y|...}
767
- #
768
- def mapn(&block)
769
- return one unless block
770
- MapnCurrentParser.new(block)
771
- end
772
- private
773
- #
774
- # characters considered white space.
775
- #
776
- Whitespaces = " \t\r\n"
777
- def as_regexp ptn
778
- case ptn when String: Regexp.new(ptn) else ptn end
779
- end
780
- def as_char c
781
- case c when String: c else c.chr end
782
- end
783
- def as_num c
784
- case c when String: c[0] else c end
785
- end
786
- def as_list vals
787
- return vals unless vals.length==1
788
- val = vals[0]
789
- return vals unless val.kind_of? String
790
- val
791
- end
792
- extend self
793
- end
794
-
1
+ %w{
2
+ monad misc error context locator token functors parser_monad
3
+ }.each {|lib| require "rparsec/#{lib}"}
4
+ require 'strscan'
5
+
6
+
7
+ #
8
+ # Represents a parser that parses a certain grammar rule.
9
+ #
10
+ class Parser
11
+ include Functors
12
+ include Monad
13
+ extend Signature
14
+ extend DefHelper
15
+ MyMonad = ParserMonad.new
16
+ attr_accessor :name
17
+
18
+ private
19
+
20
+ def initialize
21
+ initMonad(MyMonad, self)
22
+ end
23
+
24
+ def self.init(*vars)
25
+ parser_checker = {}
26
+ vars.each_with_index do |var, i|
27
+ name = var.to_s
28
+ parser_checker[i] = var if name.include?('parser') && !name.include?('parsers')
29
+ end
30
+ define_method(:initialize) do |*params|
31
+ super()
32
+ vars.each_with_index do |var, i|
33
+ param = params[i]
34
+ if parser_checker.include? i
35
+ TypeChecker.check_arg_type Parser, param, self, i
36
+ end
37
+ instance_variable_set("@"+var.to_s, param)
38
+ end
39
+ end
40
+ end
41
+
42
+ def _display_current_input(input, code, index)
43
+ return 'EOF' if input.nil?
44
+ c = input
45
+ case c when Fixnum then "'"<<c<<"'" when Token then c.text else c.to_s end
46
+ end
47
+
48
+ def _add_encountered_error(msg, encountered)
49
+ result = msg.dup
50
+ result << ', ' unless msg.strip.length == 0 || msg =~ /.*(\.|,)\s*$/
51
+ "#{result}#{encountered}"
52
+ end
53
+
54
+ def _add_location_to_error(locator, ctxt, msg, code)
55
+ line, col = locator.locate(ctxt.error.index)
56
+ msg << " at line #{line}, col #{col}."
57
+ end
58
+
59
+ public
60
+
61
+ #
62
+ # parses a string.
63
+ #
64
+ def parse(src)
65
+ ctxt = ParseContext.new(src)
66
+ return ctxt.result if _parse ctxt
67
+ ctxt.prepare_error
68
+ locator = CodeLocator.new(src)
69
+ raise ParserException.new(ctxt.error.index),
70
+ _add_location_to_error(locator, ctxt,
71
+ _add_encountered_error(ctxt.to_msg,
72
+ _display_current_input(ctxt.error.input, src, ctxt.index)), src)
73
+ end
74
+
75
+ #
76
+ # Set name for the parser.
77
+ # self is returned.
78
+ #
79
+ def setName(nm)
80
+ @name = nm
81
+ self
82
+ end
83
+
84
+ #
85
+ # a.map{|x|x+1} will first execute parser a, when it succeeds,
86
+ # the associated block is executed to transform the result to a new value
87
+ # (increment it in this case).
88
+ #
89
+ def map(&block)
90
+ return self unless block
91
+ MapParser.new(self, block)
92
+ end
93
+
94
+ #
95
+ # _self_ is first executed, the parser result is then passed as parameter to the associated block,
96
+ # which evaluates to another Parser object at runtime. This new Parser object is then executed
97
+ # to get the final parser result.
98
+ #
99
+ # Different from _bind_, parser result of _self_ will be expanded first if it is an array.
100
+ #
101
+ def bindn(&block)
102
+ return self unless block
103
+ BoundnParser.new(self, block)
104
+ end
105
+
106
+ #
107
+ # a.mapn{|x,y|x+y} will first execute parser a, when it succeeds,
108
+ # the array result (if any) is expanded and passed as parameters
109
+ # to the associated block. The result of the block is then used
110
+ # as the parsing result.
111
+ #
112
+ def mapn(&block)
113
+ return self unless block
114
+ MapnParser.new(self, block)
115
+ end
116
+
117
+ #
118
+ # Create a new parser that's atomic.,
119
+ # meaning that when it fails, input consumption is undone.
120
+ #
121
+ def atomize
122
+ AtomParser.new(self).setName(@name)
123
+ end
124
+
125
+ #
126
+ # Create a new parser that looks at inputs whthout consuming them.
127
+ #
128
+ def peek
129
+ PeekParser.new(self).setName(@name)
130
+ end
131
+
132
+ #
133
+ # To create a new parser that succeed only if self fails.
134
+ #
135
+ def not(msg="#{self} unexpected")
136
+ NotParser.new(self, msg)
137
+ end
138
+
139
+ #
140
+ # To create a parser that does "look ahead" for n inputs.
141
+ #
142
+ def lookahead n
143
+ self
144
+ end
145
+
146
+ #
147
+ # To create a parser that fails with a given error message.
148
+ #
149
+ def expect msg
150
+ ExpectParser.new(self, msg)
151
+ end
152
+
153
+ #
154
+ # a.followed b will sequentially run a and b;
155
+ # result of a is preserved as the ultimate return value.
156
+ #
157
+ def followed(other)
158
+ FollowedParser.new(self, other)
159
+ end
160
+ def_sig :followed, Parser
161
+
162
+ #
163
+ # To create a parser that repeats self for a minimum _min_ times,
164
+ # and maximally _max_ times.
165
+ # Only the return value of the last execution is preserved.
166
+ #
167
+ def repeat_(min, max=min)
168
+ return Parsers.failure("min=#{min}, max=#{max}") if min > max
169
+ if(min==max)
170
+ return Parsers.one if max <= 0
171
+ return self if max == 1
172
+ Repeat_Parser.new(self, max)
173
+ else
174
+ Some_Parser.new(self, min, max)
175
+ end
176
+ end
177
+
178
+ #
179
+ # To create a parser that repeats self for a minimum _min_ times,
180
+ # and maximally _max_ times.
181
+ # All return values are collected in an array.
182
+ #
183
+ def repeat(min, max=min)
184
+ return Parsers.failure("min=#{min}, max=#{max}") if min > max
185
+ if(min==max)
186
+ RepeatParser.new(self, max)
187
+ else
188
+ SomeParser.new(self, min, max)
189
+ end
190
+ end
191
+
192
+ #
193
+ # To create a parser that repeats self for at least _least_ times.
194
+ # parser.many_ is equivalent to bnf notation "parser*".
195
+ # Only the return value of the last execution is preserved.
196
+ #
197
+ def many_(least=0)
198
+ Many_Parser.new(self, least)
199
+ end
200
+
201
+ #
202
+ # To create a parser that repeats self for at least _least_ times.
203
+ # All return values are collected in an array.
204
+ #
205
+ def many(least=0)
206
+ ManyParser.new(self, least)
207
+ end
208
+
209
+ #
210
+ # To create a parser that repeats self for at most _max_ times.
211
+ # Only the return value of the last execution is preserved.
212
+ #
213
+ def some_(max)
214
+ repeat_(0, max)
215
+ end
216
+
217
+ #
218
+ # To create a parser that repeats self for at most _max_ times.
219
+ # All return values are collected in an array.
220
+ #
221
+ def some(max)
222
+ repeat(0, max)
223
+ end
224
+
225
+ #
226
+ # To create a parser that repeats self for unlimited times,
227
+ # with the pattern recognized by _delim_ as separator that separates each occurrence.
228
+ # self has to match for at least once.
229
+ # Return values of self are collected in an array.
230
+ #
231
+ def separated1 delim
232
+ rest = delim >> self
233
+ self.bind do |v0|
234
+ result = [v0]
235
+ (rest.map {|v| result << v}).many_ >> value(result)
236
+ end
237
+ end
238
+
239
+ #
240
+ # To create a parser that repeats self for unlimited times,
241
+ # with the pattern recognized by _delim_ as separator that separates each occurrence.
242
+ # Return values of self are collected in an array.
243
+ #
244
+ def separated delim
245
+ separated1(delim).plus value([])
246
+ end
247
+
248
+ #
249
+ # To create a parser that repeats self for unlimited times,
250
+ # with the pattern recognized by _delim_ as separator that separates each occurrence
251
+ # and also possibly ends the pattern.
252
+ # self has to match for at least once.
253
+ # Return values of self are collected in an array.
254
+ #
255
+ def delimited1 delim
256
+ rest = delim >> (self.plus Parsers.throwp(:__end_delimiter__))
257
+ self.bind do |v0|
258
+ result = [v0]
259
+ (rest.map {|v| result << v}).many_.catchp(:__end_delimiter__) >> value(result)
260
+ end
261
+ end
262
+
263
+ #
264
+ # To create a parser that repeats self for unlimited times,
265
+ # with the pattern recognized by _delim_ as separator that separates each occurrence
266
+ # and also possibly ends the pattern.
267
+ # Return values of self are collected in an array.
268
+ #
269
+ def delimited delim
270
+ delimited1(delim).plus value([])
271
+ end
272
+
273
+ #
274
+ # String representation
275
+ #
276
+ def to_s
277
+ return name unless name.nil?
278
+ self.class.to_s
279
+ end
280
+
281
+ #
282
+ # a | b will run b when a fails.
283
+ # b is auto-boxed to Parser when it is not of type Parser.
284
+ #
285
+ def | other
286
+ AltParser.new([self, autobox_parser(other)])
287
+ end
288
+
289
+ #
290
+ # a.optional(default) is equivalent to a.plus(value(default))
291
+ #
292
+ def optional(default=nil)
293
+ self.plus(value(default))
294
+ end
295
+
296
+ #
297
+ # a.catchp(:somesymbol) will catch the :somesymbol thrown by a.
298
+ #
299
+ def catchp(symbol)
300
+ CatchParser.new(symbol, self)
301
+ end
302
+
303
+ #
304
+ # a.fragment will return the string matched by a.
305
+ #
306
+ def fragment
307
+ FragmentParser.new(self)
308
+ end
309
+
310
+ #
311
+ # a.nested b will feed the token array returned by parser a to parser b
312
+ # for a nested parsing.
313
+ #
314
+ def nested(parser)
315
+ NestedParser.new(self, parser)
316
+ end
317
+
318
+ #
319
+ # a.lexeme(delim) will parse _a_ for 0 or more times and ignore all
320
+ # patterns recognized by _delim_.
321
+ # Values returned by _a_ are collected in an array.
322
+ #
323
+ def lexeme(delim = Parsers.whitespaces)
324
+ delim = delim.many_
325
+ delim >> self.delimited(delim)
326
+ end
327
+
328
+ #
329
+ # For prefix unary operator.
330
+ # a.prefix op will run parser _op_ for 0 or more times and eventually run parser _a_
331
+ # for one time.
332
+ # _op_ should return a Proc that accepts one parameter.
333
+ # Proc objects returned by _op_ is then fed with the value returned by _a_
334
+ # from right to left.
335
+ # The final result is returned as return value.
336
+ #
337
+ def prefix(op)
338
+ Parsers.sequence(op.many, self) do |funcs, v|
339
+ funcs.reverse_each {|f|v=f.call(v)}
340
+ v
341
+ end
342
+ end
343
+
344
+ #
345
+ # For postfix unary operator.
346
+ # a.postfix op will run parser _a_ for once and then _op_ for 0 or more times.
347
+ # _op_ should return a Proc that accepts one parameter.
348
+ # Proc objects returned by _op_ is then fed with the value returned by _a_
349
+ # from left to right.
350
+ # The final result is returned as return value.
351
+ #
352
+ def postfix(op)
353
+ Parsers.sequence(self, op.many) do |v, funcs|
354
+ funcs.each{|f|v=f.call(v)}
355
+ v
356
+ end
357
+ end
358
+
359
+ #
360
+ # For non-associative infix binary operator.
361
+ # _op_ has to return a Proc that takes two parameters, who
362
+ # are returned by the _self_ parser as operands.
363
+ #
364
+ def infixn(op)
365
+ bind do |v1|
366
+ bin = Parsers.sequence(op, self) do |f, v2|
367
+ f.call(v1,v2)
368
+ end
369
+ bin | value(v1)
370
+ end
371
+ end
372
+
373
+ #
374
+ # For left-associative infix binary operator.
375
+ # _op_ has to return a Proc that takes two parameters, who
376
+ # are returned by the _self_ parser as operands.
377
+ #
378
+ def infixl(op)
379
+ Parsers.sequence(self, _infix_rest(op, self).many) do |v, rests|
380
+ rests.each do |r|
381
+ f, v1 = *r
382
+ v = f.call(v,v1)
383
+ end
384
+ v
385
+ end
386
+ end
387
+
388
+ #
389
+ # For right-associative infix binary operator.
390
+ # _op_ has to return a Proc that takes two parameters, who
391
+ # are returned by the _self_ parser as operands.
392
+ #
393
+ def infixr(op)
394
+ Parsers.sequence(self, _infix_rest(op, self).many) do |v, rests|
395
+ if rests.empty?
396
+ v
397
+ else
398
+ f, seed = *rests.last
399
+ for i in (0...rests.length-1)
400
+ cur = rests.length-2-i
401
+ f1, v1 = *rests[cur]
402
+ seed = f.call(v1, seed)
403
+ f = f1
404
+ end
405
+ f.call(v, seed)
406
+ end
407
+ end
408
+ end
409
+
410
+ #
411
+ # a.token(:word_token) will return a Token object when _a_ succeeds.
412
+ # The matched string (or the string returned by _a_, if any) is
413
+ # encapsulated in the token, together with the :word_token symbol and
414
+ # the starting index of the match.
415
+ #
416
+ def token(kind)
417
+ TokenParser.new(kind, self)
418
+ end
419
+
420
+ #
421
+ # a.seq b will sequentially run a then b.
422
+ # The result of b is preserved as return value.
423
+ # If a block is associated, values returned by _a_ and _b_
424
+ # are passed into the block and the return value of
425
+ # the block is used as the final result of the parser.
426
+ #
427
+ def seq(other, &block)
428
+ # TypeChecker.check_arg_type Parser, other, :seq
429
+ Parsers.sequence(self, other, &block)
430
+ end
431
+ def_sig :seq, Parser
432
+
433
+ #
434
+ # Similar to _seq_. _other_ is auto-boxed if it is not of type Parser.
435
+ #
436
+ def >> (other)
437
+ seq(autobox_parser(other))
438
+ end
439
+
440
+ private
441
+
442
+ def autobox_parser(val)
443
+ return Parsers.value(val) unless val.kind_of? Parser
444
+ val
445
+ end
446
+
447
+ def _infix_rest(operator, operand)
448
+ Parsers.sequence(operator, operand, &Idn)
449
+ end
450
+
451
+ public
452
+
453
+ alias ~ not
454
+ alias << followed
455
+ alias * repeat_
456
+
457
+ def_sig :plus, Parser
458
+
459
+ private
460
+
461
+ def _parse(ctxt)
462
+ false
463
+ end
464
+ end
465
+ #
466
+ # This module provides all out-of-box parser implementations.
467
+ #
468
+ module Parsers
469
+ extend Signature
470
+
471
+ #
472
+ # A parser that always fails with the given error message.
473
+ #
474
+ def failure msg
475
+ FailureParser.new(msg)
476
+ end
477
+
478
+ #
479
+ # A parser that always succeeds with the given return value.
480
+ #
481
+ def value v
482
+ ValueParser.new(v)
483
+ end
484
+
485
+ #
486
+ # A parser that calls alternative parsers until one succeed,
487
+ # or any failure with input consumption beyond the current look-ahead.
488
+ #
489
+ def sum(*alts)
490
+ # TypeChecker.check_vararg_type Parser, alts, :sum
491
+ PlusParser.new(alts)
492
+ end
493
+ def_sig :sum, [Parser]
494
+
495
+ #
496
+ # A parser that calls alternative parsers until one succeeds.
497
+ #
498
+ def alt(*alts)
499
+ AltParser.new(alts)
500
+ end
501
+ def_sig :alt, [Parser]
502
+
503
+ #
504
+ # A parser that succeeds when the given predicate returns true
505
+ # (with the current input as the parameter).
506
+ # _expected_ is the error message when _pred_ returns false.
507
+ #
508
+ def satisfies(expected, &pred)
509
+ SatisfiesParser.new(pred, expected)
510
+ end
511
+
512
+ #
513
+ # A parser that succeeds when the the current input is equal to the given value.
514
+ # _expected_ is the error message when _pred_ returns false.
515
+ #
516
+ def is(v, expected="#{v} expected")
517
+ satisfies(expected) {|c|c==v}
518
+ end
519
+
520
+ #
521
+ # A parser that succeeds when the the current input is not equal to the given value.
522
+ # _expected_ is the error message when _pred_ returns false.
523
+ #
524
+ def isnt(v, expected="#{v} unexpected")
525
+ satisfies(expected) {|c|c!=v}
526
+ end
527
+
528
+ #
529
+ # A parser that succeeds when the the current input is among the given values.
530
+ #
531
+ def among(*vals)
532
+ expected="one of [#{vals.join(', ')}] expected"
533
+ vals = as_list vals
534
+ satisfies(expected) {|c|vals.include? c}
535
+ end
536
+
537
+ #
538
+ # A parser that succeeds when the the current input is not among the given values.
539
+ #
540
+ def not_among(*vals)
541
+ expected = "one of [#{vals.join(', ')}] unexpected"
542
+ vals = as_list vals
543
+ satisfies(expected) {|c|!vals.include? c}
544
+ end
545
+
546
+ #
547
+ # A parser that succeeds when the the current input is the given character.
548
+ #
549
+ def char(c)
550
+ if c.kind_of? Fixnum
551
+ nm = c.chr
552
+ is(c, "'#{nm}' expected").setName(nm)
553
+ else
554
+ is(c[0], "'#{c}' expected").setName(c)
555
+ end
556
+ end
557
+
558
+ #
559
+ # A parser that succeeds when the the current input is not the given character.
560
+ #
561
+ def not_char(c)
562
+ if c.kind_of? Fixnum
563
+ nm = c.chr
564
+ isnt(c, "'#{nm}' unexpected").setName("~#{nm}")
565
+ else
566
+ isnt(c[0], "'#{c}' unexpected").setName("~#{c}")
567
+ end
568
+ end
569
+
570
+ #
571
+ # A parser that succeeds when there's no input available.
572
+ #
573
+ def eof(expected="EOF expected")
574
+ EofParser.new(expected).setName('EOF')
575
+ end
576
+
577
+ #
578
+ # A parser that tries to match the current inputs one by one
579
+ # with the given values.
580
+ # It succeeds only when all given values are matched, in which case all the
581
+ # matched inputs are consumed.
582
+ #
583
+ def are(vals, expected="#{vals} expected")
584
+ AreParser.new(vals, expected)
585
+ end
586
+
587
+ #
588
+ # A parser that makes sure that the given values don't match
589
+ # the current inputs. One input is consumed if it succeeds.
590
+ #
591
+ def arent(vals, expected="#{vals} unexpected")
592
+ are(vals, '').not(expected) >> any
593
+ end
594
+
595
+ #
596
+ # A parser that matches the given string.
597
+ #
598
+ def string(str, msg = "\"#{str}\" expected")
599
+ are(str, msg).setName(str)
600
+ end
601
+
602
+ #
603
+ # A parser that makes sure that the current input doesn't match a string.
604
+ # One character is consumed if it succeeds.
605
+ #
606
+ def not_string(str, msg="\"#{str}\" unexpected")
607
+ string(str).not(msg) >> any
608
+ end
609
+
610
+ alias str string
611
+
612
+ #
613
+ # A parser that sequentially run the given parsers.
614
+ # The result of the last parser is used as return value.
615
+ # If a block is given, the results of the parsers are passed
616
+ # into the block as parameters, and the block return value
617
+ # is used as result instead.
618
+ #
619
+ def sequence(*parsers, &proc)
620
+ # TypeChecker.check_vararg_type Parser, parsers, :sequence
621
+ SequenceParser.new(parsers, proc)
622
+ end
623
+ def_sig :sequence, [Parser]
624
+
625
+ #
626
+ # A parser that returns the current input index (starting from 0).
627
+ #
628
+ def get_index
629
+ GetIndexParser.new.setName('get_index')
630
+ end
631
+
632
+ #
633
+ # A parser that moves the current input pointer to a certain index.
634
+ #
635
+ def set_index ind
636
+ SetIndexParser.new(ind).setName('set_index')
637
+ end
638
+
639
+ #
640
+ # A parser that tries all given alternative parsers
641
+ # and picks the one with the longest match.
642
+ #
643
+ def longest(*parsers)
644
+ # TypeChecker.check_vararg_type Parser, parsers, :longest
645
+ BestParser.new(parsers, true)
646
+ end
647
+ def_sig :longest, [Parser]
648
+
649
+ #
650
+ # A parser that tries all given alternative parsers
651
+ # and picks the one with the shortest match.
652
+ #
653
+ def shortest(*parsers)
654
+ # TypeChecker.check_vararg_type Parser, parsers, :shortest
655
+ BestParser.new(parsers, false)
656
+ end
657
+ def_sig :shortest, [Parser]
658
+
659
+ alias shorter shortest
660
+ alias longer longest
661
+
662
+ #
663
+ # A parser that consumes one input.
664
+ #
665
+ def any
666
+ AnyParser.new
667
+ end
668
+
669
+ #
670
+ # A parser that always fails.
671
+ #
672
+ def zero
673
+ ZeroParser.new
674
+ end
675
+
676
+ #
677
+ # A parser that always succeeds.
678
+ #
679
+ def one
680
+ OneParser.new
681
+ end
682
+
683
+ #
684
+ # A parser that succeeds if the current input is within a certain range.
685
+ #
686
+ def range(from, to, msg="#{as_char from}..#{as_char to} expected")
687
+ from, to = as_num(from), as_num(to)
688
+ satisfies(msg) {|c| c <= to && c >= from}
689
+ end
690
+
691
+ #
692
+ # A parser that throws a symbol.
693
+ #
694
+ def throwp(symbol)
695
+ ThrowParser.new(symbol)
696
+ end
697
+
698
+ #
699
+ # A parser that succeeds if the current inputs match
700
+ # the given regular expression.
701
+ # The matched string is consumed and returned as result.
702
+ #
703
+ def regexp(ptn, expected="/#{ptn.to_s}/ expected")
704
+ RegexpParser.new(as_regexp(ptn), expected).setName(expected)
705
+ end
706
+
707
+ #
708
+ # A parser that parses a word
709
+ # (starting with alpha or underscore, followed by 0 or more alpha, number or underscore).
710
+ # and return the matched word as string.
711
+ #
712
+ def word(expected='word expected')
713
+ regexp(/[a-zA-Z_]\w*/, expected)
714
+ end
715
+
716
+ #
717
+ # A parser that parses an integer
718
+ # and return the matched integer as string.
719
+ #
720
+ def integer(expected='integer expected')
721
+ regexp(/\d+(?!\w)/, expected)
722
+ end
723
+
724
+ #
725
+ # A parser that parses a number (integer, or decimal number)
726
+ # and return the matched number as string.
727
+ #
728
+ def number(expected='number expected')
729
+ regexp(/\d+(\.\d+)?/, expected)
730
+ end
731
+
732
+ #
733
+ # A parser that matches the given string, case insensitively.
734
+ #
735
+ def string_nocase(str, expected="'#{str}' expected")
736
+ StringCaseInsensitiveParser.new(str, expected).setName(str)
737
+ end
738
+
739
+ #
740
+ # A parser that succeeds when the current input
741
+ # is a token with one of the the given token kinds.
742
+ # If a block is given, the token text is passed to the block
743
+ # as parameter, and the block return value is used as result.
744
+ # Otherwise, the token object is used as result.
745
+ #
746
+ def token(*kinds, &proc)
747
+ expected="#{kinds.join(' or ')} expected"
748
+ recognizer = nil
749
+ if kinds.length==1
750
+ kind = kinds[0]
751
+ recognizer = satisfies(expected) do |tok|
752
+ tok.respond_to? :kind, :text and kind == tok.kind
753
+ end
754
+ else
755
+ recognizer = satisfies(expected) do |tok|
756
+ tok.respond_to? :kind, :text and kinds.include? tok.kind
757
+ end
758
+ end
759
+ recognizer = recognizer.map{|tok|proc.call(tok.text)} if proc
760
+ recognizer
761
+ end
762
+
763
+ #
764
+ # A parser that parses a white space character.
765
+ #
766
+ def whitespace(expected="whitespace expected")
767
+ satisfies(expected) {|c| Whitespaces.include? c}
768
+ end
769
+
770
+ #
771
+ # A parser that parses 1 or more white space characters.
772
+ #
773
+ def whitespaces(expected="whitespace(s) expected")
774
+ whitespace(expected).many_(1)
775
+ end
776
+
777
+ #
778
+ # A parser that parses a line started with _start_.
779
+ # nil is the result.
780
+ #
781
+ def comment_line start
782
+ string(start) >> not_char(?\n).many_ >> char(?\n).optional >> value(nil)
783
+ end
784
+
785
+ #
786
+ # A parser that parses a chunk of text started with _open_
787
+ # and ended by _close_.
788
+ # nil is the result.
789
+ #
790
+ def comment_block open, close
791
+ string(open) >> not_string(close).many_ >> string(close) >> value(nil)
792
+ end
793
+
794
+ #
795
+ # A lazy parser, when executed, calls the given block
796
+ # to get a parser object and delegate the call to this lazily
797
+ # instantiated parser.
798
+ #
799
+ def lazy(&block)
800
+ LazyParser.new(block)
801
+ end
802
+
803
+ #
804
+ # A parser that watches the current parser result without changing it.
805
+ # The following assert will succeed:
806
+ ##
807
+ # char(?a) >> watch{|x|assert_equal(?a, x)}
808
+ ##
809
+ # watch can also be used as a handy tool to print trace information,
810
+ # for example:
811
+ ##
812
+ # some_parser >> watch {puts "some_parser succeeded."}
813
+ #
814
+ def watch(&block)
815
+ return one unless block
816
+ WatchParser.new(block)
817
+ end
818
+
819
+ #
820
+ # A parser that watches the current parser result without changing it.
821
+ # The following assert will succeed:
822
+ ##
823
+ # char(?a).repeat(2) >> watchn{|x,y|assert_equal([?a,?a], [x,y])}
824
+ ##
825
+ # Slightly different from _watch_, _watchn_ expands the current parser result
826
+ # before passing it into the associated block.
827
+ #
828
+ def watchn(&block)
829
+ return one unless block
830
+ WatchnParser.new(block)
831
+ end
832
+
833
+ #
834
+ # A parser that maps current parser result to a new result using
835
+ # the given block.
836
+ ##
837
+ # Different from Parser#map, this method does not need to be combined
838
+ # with any Parser object. It is rather an independent Parser object
839
+ # that maps the _current_ parser result.
840
+ ##
841
+ # parser1.map{|x|...} is equivalent to parser1 >> map{|x|...}
842
+ #
843
+ def map(&block)
844
+ return one unless block
845
+ MapCurrentParser.new(block)
846
+ end
847
+
848
+ #
849
+ # A parser that maps current parser result to a new result using
850
+ # the given block. If the current parser result is an array, the array
851
+ # elements are expanded and then passed as parameters to the block.
852
+ ##
853
+ # Different from Parser#mapn, this method does not need to be combined
854
+ # with any Parser object. It is rather an independent Parser object
855
+ # that maps the _current_ parser result.
856
+ ##
857
+ # parser1.mapn{|x,y|...} is equivalent to parser1 >> mapn{|x,y|...}
858
+ #
859
+ def mapn(&block)
860
+ return one unless block
861
+ MapnCurrentParser.new(block)
862
+ end
863
+
864
+ private
865
+
866
+ #
867
+ # characters considered white space.
868
+ #
869
+ Whitespaces = " \t\r\n"
870
+
871
+ def as_regexp ptn
872
+ case ptn when String then Regexp.new(ptn) else ptn end
873
+ end
874
+
875
+ def as_char c
876
+ case c when String then c else c.chr end
877
+ end
878
+
879
+ def as_num c
880
+ case c when String: c[0] else c end
881
+ end
882
+
883
+ def as_list vals
884
+ return vals unless vals.length==1
885
+ val = vals[0]
886
+ return vals unless val.kind_of? String
887
+ val
888
+ end
889
+
890
+ extend self
891
+ end
892
+