rparsec2 1.1.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rparsec/context.rb +80 -78
- data/lib/rparsec/error.rb +21 -18
- data/lib/rparsec/expressions.rb +75 -163
- data/lib/rparsec/functor_mixin.rb +102 -0
- data/lib/rparsec/functors.rb +151 -255
- data/lib/rparsec/id_monad.rb +13 -11
- data/lib/rparsec/keywords.rb +95 -93
- data/lib/rparsec/locator.rb +27 -25
- data/lib/rparsec/misc.rb +14 -97
- data/lib/rparsec/monad.rb +53 -50
- data/lib/rparsec/operator_table.rb +89 -0
- data/lib/rparsec/operators.rb +85 -81
- data/lib/rparsec/parser.rb +397 -819
- data/lib/rparsec/parser_monad.rb +18 -16
- data/lib/rparsec/parsers.rb +922 -499
- data/lib/rparsec/token.rb +32 -30
- data/lib/rparsec.rb +7 -4
- metadata +11 -3
data/lib/rparsec/parser.rb
CHANGED
@@ -1,893 +1,471 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "rparsec/functors"
|
4
|
+
require "rparsec/monad"
|
5
|
+
require "rparsec/misc"
|
6
|
+
require "rparsec/parser_monad"
|
7
|
+
require "rparsec/context"
|
8
|
+
require "rparsec/error"
|
9
|
+
require "rparsec/token"
|
10
|
+
require "rparsec/locator"
|
4
11
|
|
5
12
|
module RParsec
|
6
13
|
|
7
|
-
#
|
8
|
-
# Represents a parser that parses a certain grammar rule.
|
9
|
-
#
|
10
|
-
class Parser
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
attr_accessor :name
|
17
|
-
|
18
|
-
class << self
|
19
|
-
private
|
14
|
+
#
|
15
|
+
# Represents a parser that parses a certain grammar rule.
|
16
|
+
#
|
17
|
+
class Parser
|
18
|
+
include Functors
|
19
|
+
include Monad
|
20
|
+
extend DefHelper
|
21
|
+
MyMonad = ParserMonad.new
|
22
|
+
attr_accessor :name
|
20
23
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
parser_checker
|
26
|
-
end
|
27
|
-
define_method(:initialize) do |*params|
|
28
|
-
super()
|
24
|
+
class << self
|
25
|
+
private
|
26
|
+
|
27
|
+
def init(*vars)
|
28
|
+
parser_checker = {}
|
29
29
|
vars.each_with_index do |var, i|
|
30
|
-
|
31
|
-
if
|
32
|
-
|
30
|
+
name = var.to_s
|
31
|
+
parser_checker[i] = var if name.include?('parser') && !name.include?('parsers')
|
32
|
+
end
|
33
|
+
define_method(:initialize) do |*params|
|
34
|
+
super()
|
35
|
+
vars.each_with_index do |var, i|
|
36
|
+
instance_variable_set("@#{var}", params[i])
|
33
37
|
end
|
34
|
-
instance_variable_set("@" + var.to_s, param)
|
35
38
|
end
|
36
39
|
end
|
37
40
|
end
|
38
|
-
end
|
39
|
-
|
40
|
-
private
|
41
41
|
|
42
|
-
|
43
|
-
initMonad(MyMonad, self)
|
44
|
-
end
|
45
|
-
|
46
|
-
def _display_current_input(input, _code, _index)
|
47
|
-
return 'EOF' if input.nil?
|
48
|
-
c = input
|
49
|
-
case c when Integer then "'" << c << "'" when Token then c.text else c.to_s end
|
50
|
-
end
|
51
|
-
|
52
|
-
def _add_encountered_error(msg, encountered)
|
53
|
-
result = msg.dup
|
54
|
-
result << ', ' unless msg.strip.length == 0 || msg =~ /.*(\.|,)\s*$/
|
55
|
-
"#{result}#{encountered}"
|
56
|
-
end
|
57
|
-
|
58
|
-
def _add_location_to_error(locator, ctxt, msg, _code)
|
59
|
-
line, col = locator.locate(ctxt.error.index)
|
60
|
-
msg << " at line #{line}, col #{col}."
|
61
|
-
end
|
62
|
-
|
63
|
-
public
|
64
|
-
|
65
|
-
#
|
66
|
-
# parses a string.
|
67
|
-
#
|
68
|
-
def parse(src)
|
69
|
-
ctxt = ParseContext.new(src)
|
70
|
-
return ctxt.result if _parse ctxt
|
71
|
-
ctxt.prepare_error
|
72
|
-
locator = CodeLocator.new(src)
|
73
|
-
raise ParserException.new(ctxt.error.index),
|
74
|
-
_add_location_to_error(locator, ctxt,
|
75
|
-
_add_encountered_error(ctxt.to_msg,
|
76
|
-
_display_current_input(ctxt.error.input, src, ctxt.index)), src)
|
77
|
-
end
|
78
|
-
|
79
|
-
#
|
80
|
-
# Set name for the parser.
|
81
|
-
# self is returned.
|
82
|
-
#
|
83
|
-
def setName(nm)
|
84
|
-
@name = nm
|
85
|
-
self
|
86
|
-
end
|
87
|
-
|
88
|
-
#
|
89
|
-
# a.map{|x|x+1} will first execute parser a, when it succeeds,
|
90
|
-
# the associated block is executed to transform the result to a new value
|
91
|
-
# (increment it in this case).
|
92
|
-
#
|
93
|
-
def map(&block)
|
94
|
-
return self unless block
|
95
|
-
MapParser.new(self, block)
|
96
|
-
end
|
97
|
-
|
98
|
-
#
|
99
|
-
# _self_ is first executed, the parser result is then passed as parameter to the associated block,
|
100
|
-
# which evaluates to another Parser object at runtime. This new Parser object is then executed
|
101
|
-
# to get the final parser result.
|
102
|
-
#
|
103
|
-
# Different from _bind_, parser result of _self_ will be expanded first if it is an array.
|
104
|
-
#
|
105
|
-
def bindn(&block)
|
106
|
-
return self unless block
|
107
|
-
BoundnParser.new(self, block)
|
108
|
-
end
|
109
|
-
|
110
|
-
#
|
111
|
-
# a.mapn{|x,y|x+y} will first execute parser a, when it succeeds,
|
112
|
-
# the array result (if any) is expanded and passed as parameters
|
113
|
-
# to the associated block. The result of the block is then used
|
114
|
-
# as the parsing result.
|
115
|
-
#
|
116
|
-
def mapn(&block)
|
117
|
-
return self unless block
|
118
|
-
MapnParser.new(self, block)
|
119
|
-
end
|
120
|
-
|
121
|
-
#
|
122
|
-
# Create a new parser that's atomic.,
|
123
|
-
# meaning that when it fails, input consumption is undone.
|
124
|
-
#
|
125
|
-
def atomize
|
126
|
-
AtomParser.new(self).setName(@name)
|
127
|
-
end
|
128
|
-
|
129
|
-
#
|
130
|
-
# Create a new parser that looks at inputs whthout consuming them.
|
131
|
-
#
|
132
|
-
def peek
|
133
|
-
PeekParser.new(self).setName(@name)
|
134
|
-
end
|
135
|
-
|
136
|
-
#
|
137
|
-
# To create a new parser that succeed only if self fails.
|
138
|
-
#
|
139
|
-
def not(msg = "#{self} unexpected")
|
140
|
-
NotParser.new(self, msg)
|
141
|
-
end
|
142
|
-
|
143
|
-
#
|
144
|
-
# To create a parser that does "look ahead" for n inputs.
|
145
|
-
#
|
146
|
-
def lookahead _n
|
147
|
-
self
|
148
|
-
end
|
149
|
-
|
150
|
-
#
|
151
|
-
# To create a parser that fails with a given error message.
|
152
|
-
#
|
153
|
-
def expect msg
|
154
|
-
ExpectParser.new(self, msg)
|
155
|
-
end
|
156
|
-
|
157
|
-
#
|
158
|
-
# a.followed b will sequentially run a and b;
|
159
|
-
# result of a is preserved as the ultimate return value.
|
160
|
-
#
|
161
|
-
def followed(other)
|
162
|
-
FollowedParser.new(self, other)
|
163
|
-
end
|
164
|
-
def_sig :followed, Parser
|
42
|
+
private
|
165
43
|
|
166
|
-
|
167
|
-
|
168
|
-
# and maximally _max_ times.
|
169
|
-
# Only the return value of the last execution is preserved.
|
170
|
-
#
|
171
|
-
def repeat_(min, max = min)
|
172
|
-
return Parsers.failure("min=#{min}, max=#{max}") if min > max
|
173
|
-
if min == max
|
174
|
-
return Parsers.one if max <= 0
|
175
|
-
return self if max == 1
|
176
|
-
Repeat_Parser.new(self, max)
|
177
|
-
else
|
178
|
-
Some_Parser.new(self, min, max)
|
44
|
+
def initialize
|
45
|
+
initMonad(MyMonad, self)
|
179
46
|
end
|
180
|
-
end
|
181
47
|
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
#
|
187
|
-
def repeat(min, max = min)
|
188
|
-
return Parsers.failure("min=#{min}, max=#{max}") if min > max
|
189
|
-
if min == max
|
190
|
-
RepeatParser.new(self, max)
|
191
|
-
else
|
192
|
-
SomeParser.new(self, min, max)
|
48
|
+
def _display_current_input(input, _code, _index)
|
49
|
+
return 'EOF' if input.nil?
|
50
|
+
c = input
|
51
|
+
case c when Integer then "'" << c << "'" when Token then c.text else c.to_s end
|
193
52
|
end
|
194
|
-
end
|
195
|
-
|
196
|
-
#
|
197
|
-
# To create a parser that repeats self for at least _least_ times.
|
198
|
-
# parser.many_ is equivalent to bnf notation "parser*".
|
199
|
-
# Only the return value of the last execution is preserved.
|
200
|
-
#
|
201
|
-
def many_(least = 0)
|
202
|
-
Many_Parser.new(self, least)
|
203
|
-
end
|
204
|
-
|
205
|
-
#
|
206
|
-
# To create a parser that repeats self for at least _least_ times.
|
207
|
-
# All return values are collected in an array.
|
208
|
-
#
|
209
|
-
def many(least = 0)
|
210
|
-
ManyParser.new(self, least)
|
211
|
-
end
|
212
|
-
|
213
|
-
#
|
214
|
-
# To create a parser that repeats self for at most _max_ times.
|
215
|
-
# Only the return value of the last execution is preserved.
|
216
|
-
#
|
217
|
-
def some_(max)
|
218
|
-
repeat_(0, max)
|
219
|
-
end
|
220
53
|
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
def some(max)
|
226
|
-
repeat(0, max)
|
227
|
-
end
|
228
|
-
|
229
|
-
#
|
230
|
-
# To create a parser that repeats self for unlimited times,
|
231
|
-
# with the pattern recognized by _delim_ as separator that separates each occurrence.
|
232
|
-
# self has to match for at least once.
|
233
|
-
# Return values of self are collected in an array.
|
234
|
-
#
|
235
|
-
def separated1 delim
|
236
|
-
rest = delim >> self
|
237
|
-
self.bind do |v0|
|
238
|
-
result = [v0]
|
239
|
-
(rest.map { |v| result << v }).many_ >> value(result)
|
54
|
+
def _add_encountered_error(msg, encountered)
|
55
|
+
result = msg.dup
|
56
|
+
result << ', ' unless msg.strip.length == 0 || msg =~ /.*(\.|,)\s*$/
|
57
|
+
"#{result}#{encountered}"
|
240
58
|
end
|
241
|
-
end
|
242
59
|
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
#
|
248
|
-
def separated delim
|
249
|
-
separated1(delim).plus value([])
|
250
|
-
end
|
60
|
+
def _add_location_to_error(locator, ctxt, msg, _code)
|
61
|
+
line, col = locator.locate(ctxt.error.index)
|
62
|
+
msg << " at line #{line}, col #{col}."
|
63
|
+
end
|
251
64
|
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
65
|
+
public
|
66
|
+
|
67
|
+
#
|
68
|
+
# parses a string.
|
69
|
+
#
|
70
|
+
def parse(src)
|
71
|
+
ctxt = ParseContext.new(src)
|
72
|
+
return ctxt.result if _parse ctxt
|
73
|
+
ctxt.prepare_error
|
74
|
+
locator = CodeLocator.new(src)
|
75
|
+
raise ParserException.new(ctxt.error.index),
|
76
|
+
_add_location_to_error(locator, ctxt,
|
77
|
+
_add_encountered_error(ctxt.to_msg,
|
78
|
+
_display_current_input(ctxt.error.input, src, ctxt.index)), src)
|
264
79
|
end
|
265
|
-
end
|
266
80
|
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
delimited1(delim).plus value([])
|
275
|
-
end
|
81
|
+
#
|
82
|
+
# Set name for the parser. +self+ is returned.
|
83
|
+
#
|
84
|
+
def set_name(nm)
|
85
|
+
@name = nm
|
86
|
+
self
|
87
|
+
end
|
276
88
|
|
277
|
-
|
278
|
-
# String representation
|
279
|
-
#
|
280
|
-
def to_s
|
281
|
-
return name unless name.nil?
|
282
|
-
self.class.to_s
|
283
|
-
end
|
89
|
+
alias setName set_name
|
284
90
|
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
91
|
+
#
|
92
|
+
# <tt>a.map { |x| x + 1 }</tt> will first execute parser +a+, when
|
93
|
+
# it succeeds, the associated block is executed to transform the
|
94
|
+
# result to a new value (increment it in this case).
|
95
|
+
#
|
96
|
+
def map(&block)
|
97
|
+
return self unless block
|
98
|
+
MapParser.new(self, block)
|
99
|
+
end
|
292
100
|
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
101
|
+
#
|
102
|
+
# +self+ is first executed, the parser result is then passed as
|
103
|
+
# parameter to the associated +block+, which evaluates to another
|
104
|
+
# Parser object at runtime. This new Parser object is then
|
105
|
+
# executed to get the final parser result.
|
106
|
+
#
|
107
|
+
# Different from #bind, parser result of +self+ will be expanded
|
108
|
+
# first if it is an array.
|
109
|
+
#
|
110
|
+
def bindn(&block)
|
111
|
+
return self unless block
|
112
|
+
BoundnParser.new(self, block)
|
113
|
+
end
|
299
114
|
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
115
|
+
#
|
116
|
+
# <tt>a.mapn { |x, y| x + y }</tt> will first execute parser +a+,
|
117
|
+
# when it succeeds, the array result (if any) is expanded and
|
118
|
+
# passed as parameters to the associated block. The result of the
|
119
|
+
# block is then used as the parsing result.
|
120
|
+
#
|
121
|
+
def mapn(&block)
|
122
|
+
return self unless block
|
123
|
+
MapnParser.new(self, block)
|
124
|
+
end
|
306
125
|
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
126
|
+
#
|
127
|
+
# Create a new parser that's atomic, meaning that when it fails,
|
128
|
+
# input consumption is undone.
|
129
|
+
#
|
130
|
+
def atomize
|
131
|
+
AtomParser.new(self).setName(@name)
|
132
|
+
end
|
313
133
|
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
end
|
134
|
+
#
|
135
|
+
# Create a new parser that looks at inputs whthout consuming them.
|
136
|
+
#
|
137
|
+
def peek
|
138
|
+
PeekParser.new(self).setName(@name)
|
139
|
+
end
|
321
140
|
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
delim = delim.many_
|
329
|
-
delim >> self.delimited(delim)
|
330
|
-
end
|
141
|
+
#
|
142
|
+
# To create a new parser that succeed only if +self+ fails.
|
143
|
+
#
|
144
|
+
def not(msg = "#{self} unexpected")
|
145
|
+
NotParser.new(self, msg)
|
146
|
+
end
|
331
147
|
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
# The final result is returned as return value.
|
340
|
-
#
|
341
|
-
def prefix(op)
|
342
|
-
Parsers.sequence(op.many, self) do |funcs, v|
|
343
|
-
funcs.reverse_each { |f| v = f.call(v) }
|
344
|
-
v
|
148
|
+
#
|
149
|
+
# To create a parser that does "look ahead" for _n_ inputs.
|
150
|
+
#
|
151
|
+
# WARNING: Not implemented yet?
|
152
|
+
#
|
153
|
+
def lookahead _n # :nodoc:
|
154
|
+
self
|
345
155
|
end
|
346
|
-
end
|
347
156
|
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
# from left to right.
|
354
|
-
# The final result is returned as return value.
|
355
|
-
#
|
356
|
-
def postfix(op)
|
357
|
-
Parsers.sequence(self, op.many) do |v, funcs|
|
358
|
-
funcs.each { |f| v = f.call(v) }
|
359
|
-
v
|
157
|
+
#
|
158
|
+
# To create a parser that fails with a given error message.
|
159
|
+
#
|
160
|
+
def expect msg
|
161
|
+
ExpectParser.new(self, msg)
|
360
162
|
end
|
361
|
-
end
|
362
163
|
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
bind do |v1|
|
370
|
-
bin = Parsers.sequence(op, self) do |f, v2|
|
371
|
-
f.call(v1, v2)
|
372
|
-
end
|
373
|
-
bin | value(v1)
|
164
|
+
#
|
165
|
+
# <tt>a.followed b</tt> will sequentially run +a+ and +b+; result
|
166
|
+
# of a is preserved as the ultimate return value.
|
167
|
+
#
|
168
|
+
def followed(other)
|
169
|
+
FollowedParser.new(self, other)
|
374
170
|
end
|
375
|
-
end
|
376
171
|
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
172
|
+
#
|
173
|
+
# To create a parser that repeats +self+ for a minimum +min+
|
174
|
+
# times, and maximally +max+ times. Only the return value of the
|
175
|
+
# last execution is preserved.
|
176
|
+
#
|
177
|
+
def repeat_(min, max = min)
|
178
|
+
return Parsers.failure("min=#{min}, max=#{max}") if min > max
|
179
|
+
if min == max
|
180
|
+
return Parsers.one if max <= 0
|
181
|
+
return self if max == 1
|
182
|
+
Repeat_Parser.new(self, max)
|
183
|
+
else
|
184
|
+
Some_Parser.new(self, min, max)
|
387
185
|
end
|
388
|
-
v
|
389
186
|
end
|
390
|
-
end
|
391
187
|
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
if
|
400
|
-
|
188
|
+
#
|
189
|
+
# To create a parser that repeats +self+ for a minimum +min+
|
190
|
+
# times, and maximally +max+ times. All return values are
|
191
|
+
# collected in an array.
|
192
|
+
#
|
193
|
+
def repeat(min, max = min)
|
194
|
+
return Parsers.failure("min=#{min}, max=#{max}") if min > max
|
195
|
+
if min == max
|
196
|
+
RepeatParser.new(self, max)
|
401
197
|
else
|
402
|
-
|
403
|
-
for i in (0...rests.length - 1)
|
404
|
-
cur = rests.length - 2 - i
|
405
|
-
f1, v1 = *rests[cur]
|
406
|
-
seed = f.call(v1, seed)
|
407
|
-
f = f1
|
408
|
-
end
|
409
|
-
f.call(v, seed)
|
198
|
+
SomeParser.new(self, min, max)
|
410
199
|
end
|
411
200
|
end
|
412
|
-
end
|
413
|
-
|
414
|
-
#
|
415
|
-
# a.token(:word_token) will return a Token object when _a_ succeeds.
|
416
|
-
# The matched string (or the string returned by _a_, if any) is
|
417
|
-
# encapsulated in the token, together with the :word_token symbol and
|
418
|
-
# the starting index of the match.
|
419
|
-
#
|
420
|
-
def token(kind)
|
421
|
-
TokenParser.new(kind, self)
|
422
|
-
end
|
423
|
-
|
424
|
-
#
|
425
|
-
# a.seq b will sequentially run a then b.
|
426
|
-
# The result of b is preserved as return value.
|
427
|
-
# If a block is associated, values returned by _a_ and _b_
|
428
|
-
# are passed into the block and the return value of
|
429
|
-
# the block is used as the final result of the parser.
|
430
|
-
#
|
431
|
-
def seq(other, &block)
|
432
|
-
# TypeChecker.check_arg_type Parser, other, :seq
|
433
|
-
Parsers.sequence(self, other, &block)
|
434
|
-
end
|
435
|
-
def_sig :seq, Parser
|
436
|
-
|
437
|
-
#
|
438
|
-
# Similar to _seq_. _other_ is auto-boxed if it is not of type Parser.
|
439
|
-
#
|
440
|
-
def >>(other)
|
441
|
-
seq(autobox_parser(other))
|
442
|
-
end
|
443
|
-
|
444
|
-
private
|
445
|
-
|
446
|
-
def autobox_parser(val)
|
447
|
-
return Parsers.value(val) unless val.kind_of? Parser
|
448
|
-
val
|
449
|
-
end
|
450
|
-
|
451
|
-
def _infix_rest(operator, operand)
|
452
|
-
Parsers.sequence(operator, operand, &Idn)
|
453
|
-
end
|
454
|
-
|
455
|
-
alias ~ not
|
456
|
-
alias << followed
|
457
|
-
alias * repeat_
|
458
|
-
|
459
|
-
def_sig :plus, Parser
|
460
|
-
|
461
|
-
def _parse(_ctxt)
|
462
|
-
false
|
463
|
-
end
|
464
|
-
end
|
465
|
-
#
|
466
|
-
# This module provides all out-of-box parser implementations.
|
467
|
-
#
|
468
|
-
module Parsers
|
469
|
-
extend Signature
|
470
|
-
|
471
|
-
#
|
472
|
-
# A parser that always fails with the given error message.
|
473
|
-
#
|
474
|
-
def failure msg
|
475
|
-
FailureParser.new(msg)
|
476
|
-
end
|
477
|
-
|
478
|
-
#
|
479
|
-
# A parser that always succeeds with the given return value.
|
480
|
-
#
|
481
|
-
def value v
|
482
|
-
ValueParser.new(v)
|
483
|
-
end
|
484
|
-
|
485
|
-
#
|
486
|
-
# A parser that calls alternative parsers until one succeed,
|
487
|
-
# or any failure with input consumption beyond the current look-ahead.
|
488
|
-
#
|
489
|
-
def sum(*alts)
|
490
|
-
# TypeChecker.check_vararg_type Parser, alts, :sum
|
491
|
-
PlusParser.new(alts)
|
492
|
-
end
|
493
|
-
def_sig :sum, [Parser]
|
494
|
-
|
495
|
-
#
|
496
|
-
# A parser that calls alternative parsers until one succeeds.
|
497
|
-
#
|
498
|
-
def alt(*alts)
|
499
|
-
AltParser.new(alts)
|
500
|
-
end
|
501
|
-
def_sig :alt, [Parser]
|
502
|
-
|
503
|
-
#
|
504
|
-
# A parser that succeeds when the given predicate returns true
|
505
|
-
# (with the current input as the parameter).
|
506
|
-
# _expected_ is the error message when _pred_ returns false.
|
507
|
-
#
|
508
|
-
def satisfies(expected, &pred)
|
509
|
-
SatisfiesParser.new(pred, expected)
|
510
|
-
end
|
511
201
|
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
#
|
521
|
-
# A parser that succeeds when the the current input is not equal to the given value.
|
522
|
-
# _expected_ is the error message when _pred_ returns false.
|
523
|
-
#
|
524
|
-
def isnt(v, expected = "#{v} unexpected")
|
525
|
-
satisfies(expected) { |c| c != v }
|
526
|
-
end
|
527
|
-
|
528
|
-
#
|
529
|
-
# A parser that succeeds when the the current input is among the given values.
|
530
|
-
#
|
531
|
-
def among(*vals)
|
532
|
-
expected = "one of [#{vals.join(', ')}] expected"
|
533
|
-
vals = as_list vals
|
534
|
-
satisfies(expected) { |c| vals.include? c }
|
535
|
-
end
|
536
|
-
|
537
|
-
#
|
538
|
-
# A parser that succeeds when the the current input is not among the given values.
|
539
|
-
#
|
540
|
-
def not_among(*vals)
|
541
|
-
expected = "one of [#{vals.join(', ')}] unexpected"
|
542
|
-
vals = as_list vals
|
543
|
-
satisfies(expected) { |c| !vals.include? c }
|
544
|
-
end
|
545
|
-
|
546
|
-
#
|
547
|
-
# A parser that succeeds when the the current input is the given character.
|
548
|
-
#
|
549
|
-
def char(c)
|
550
|
-
if c.kind_of? Integer
|
551
|
-
nm = c.chr
|
552
|
-
is(c, "'#{nm}' expected").setName(nm)
|
553
|
-
else
|
554
|
-
is(c[0], "'#{c}' expected").setName(c)
|
202
|
+
#
|
203
|
+
# To create a parser that repeats +self+ for at least +least+
|
204
|
+
# times. <tt>parser.many_</tt> is equivalent to bnf notation
|
205
|
+
# <tt>parser*</tt>. Only the return value of the last execution
|
206
|
+
# is preserved.
|
207
|
+
#
|
208
|
+
def many_(least = 0)
|
209
|
+
Many_Parser.new(self, least)
|
555
210
|
end
|
556
|
-
end
|
557
211
|
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
isnt(c, "'#{nm}' unexpected").setName("~#{nm}")
|
565
|
-
else
|
566
|
-
isnt(c[0], "'#{c}' unexpected").setName("~#{c}")
|
212
|
+
#
|
213
|
+
# To create a parser that repeats +self+ for at least +least+
|
214
|
+
# times. All return values are collected in an array.
|
215
|
+
#
|
216
|
+
def many(least = 0)
|
217
|
+
ManyParser.new(self, least)
|
567
218
|
end
|
568
|
-
end
|
569
|
-
|
570
|
-
#
|
571
|
-
# A parser that succeeds when there's no input available.
|
572
|
-
#
|
573
|
-
def eof(expected = "EOF expected")
|
574
|
-
EofParser.new(expected).setName('EOF')
|
575
|
-
end
|
576
|
-
|
577
|
-
#
|
578
|
-
# A parser that tries to match the current inputs one by one
|
579
|
-
# with the given values.
|
580
|
-
# It succeeds only when all given values are matched, in which case all the
|
581
|
-
# matched inputs are consumed.
|
582
|
-
#
|
583
|
-
def are(vals, expected = "#{vals} expected")
|
584
|
-
AreParser.new(vals, expected)
|
585
|
-
end
|
586
219
|
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
#
|
596
|
-
# A parser that matches the given string.
|
597
|
-
#
|
598
|
-
def string(str, msg = "\"#{str}\" expected")
|
599
|
-
are(str, msg).setName(str)
|
600
|
-
end
|
601
|
-
|
602
|
-
#
|
603
|
-
# A parser that makes sure that the current input doesn't match a string.
|
604
|
-
# One character is consumed if it succeeds.
|
605
|
-
#
|
606
|
-
def not_string(str, msg = "\"#{str}\" unexpected")
|
607
|
-
string(str).not(msg) >> any
|
608
|
-
end
|
609
|
-
|
610
|
-
alias str string
|
611
|
-
|
612
|
-
#
|
613
|
-
# A parser that sequentially run the given parsers.
|
614
|
-
# The result of the last parser is used as return value.
|
615
|
-
# If a block is given, the results of the parsers are passed
|
616
|
-
# into the block as parameters, and the block return value
|
617
|
-
# is used as result instead.
|
618
|
-
#
|
619
|
-
def sequence(*parsers, &proc)
|
620
|
-
# TypeChecker.check_vararg_type Parser, parsers, :sequence
|
621
|
-
SequenceParser.new(parsers, proc)
|
622
|
-
end
|
623
|
-
def_sig :sequence, [Parser]
|
624
|
-
|
625
|
-
#
|
626
|
-
# A parser that returns the current input index (starting from 0).
|
627
|
-
#
|
628
|
-
def get_index
|
629
|
-
GetIndexParser.new.setName('get_index')
|
630
|
-
end
|
631
|
-
|
632
|
-
#
|
633
|
-
# A parser that moves the current input pointer to a certain index.
|
634
|
-
#
|
635
|
-
def set_index ind
|
636
|
-
SetIndexParser.new(ind).setName('set_index')
|
637
|
-
end
|
638
|
-
|
639
|
-
#
|
640
|
-
# A parser that tries all given alternative parsers
|
641
|
-
# and picks the one with the longest match.
|
642
|
-
#
|
643
|
-
def longest(*parsers)
|
644
|
-
# TypeChecker.check_vararg_type Parser, parsers, :longest
|
645
|
-
BestParser.new(parsers, true)
|
646
|
-
end
|
647
|
-
def_sig :longest, [Parser]
|
220
|
+
#
|
221
|
+
# To create a parser that repeats +self+ for at most +max+ times.
|
222
|
+
# Only the return value of the last execution is preserved.
|
223
|
+
#
|
224
|
+
def some_(max)
|
225
|
+
repeat_(0, max)
|
226
|
+
end
|
648
227
|
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
end
|
657
|
-
def_sig :shortest, [Parser]
|
228
|
+
#
|
229
|
+
# To create a parser that repeats +self+ for at most +max+ times.
|
230
|
+
# All return values are collected in an array.
|
231
|
+
#
|
232
|
+
def some(max)
|
233
|
+
repeat(0, max)
|
234
|
+
end
|
658
235
|
|
659
|
-
|
660
|
-
|
236
|
+
#
|
237
|
+
# To create a parser that repeats +self+ for unlimited times, with
|
238
|
+
# the pattern recognized by +delim+ as separator that separates
|
239
|
+
# each occurrence. +self+ has to match for at least once. Return
|
240
|
+
# values of self are collected in an array.
|
241
|
+
#
|
242
|
+
def separated1 delim
|
243
|
+
rest = delim >> self
|
244
|
+
self.bind do |v0|
|
245
|
+
result = [v0]
|
246
|
+
(rest.map { |v| result << v }).many_ >> value(result)
|
247
|
+
end
|
248
|
+
end
|
661
249
|
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
250
|
+
#
|
251
|
+
# To create a parser that repeats +self+ for unlimited times, with
|
252
|
+
# the pattern recognized by +delim+ as separator that separates
|
253
|
+
# each occurrence. Return values of +self+ are collected in an
|
254
|
+
# array.
|
255
|
+
#
|
256
|
+
def separated delim
|
257
|
+
separated1(delim).plus value([])
|
258
|
+
end
|
668
259
|
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
260
|
+
#
|
261
|
+
# To create a parser that repeats +self+ for unlimited times, with
|
262
|
+
# the pattern recognized by +delim+ as separator that separates
|
263
|
+
# each occurrence and also possibly ends the pattern. +self+ has
|
264
|
+
# to match for at least once. Return values of +self+ are
|
265
|
+
# collected in an array.
|
266
|
+
#
|
267
|
+
def delimited1 delim
|
268
|
+
rest = delim >> (self.plus Parsers.throwp(:__end_delimiter__))
|
269
|
+
self.bind do |v0|
|
270
|
+
result = [v0]
|
271
|
+
(rest.map { |v| result << v }).many_.catchp(:__end_delimiter__) >> value(result)
|
272
|
+
end
|
273
|
+
end
|
675
274
|
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
275
|
+
#
|
276
|
+
# To create a parser that repeats +self+ for unlimited times, with
|
277
|
+
# the pattern recognized by +delim+ as separator that separates
|
278
|
+
# each occurrence and also possibly ends the pattern. Return
|
279
|
+
# values of +self+ are collected in an array.
|
280
|
+
#
|
281
|
+
def delimited delim
|
282
|
+
delimited1(delim).plus value([])
|
283
|
+
end
|
682
284
|
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
285
|
+
#
|
286
|
+
# String representation
|
287
|
+
#
|
288
|
+
def to_s
|
289
|
+
return name unless name.nil?
|
290
|
+
self.class.to_s
|
291
|
+
end
|
690
292
|
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
293
|
+
#
|
294
|
+
# <tt>a | b</tt> will run +b+ when +a+ fails. +b+ is auto-boxed
|
295
|
+
# to Parser when it is not of type Parser.
|
296
|
+
#
|
297
|
+
def | other
|
298
|
+
AltParser.new([self, autobox_parser(other)])
|
299
|
+
end
|
697
300
|
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
end
|
301
|
+
#
|
302
|
+
# <tt>a.optional(default)</tt> is equivalent to
|
303
|
+
# <tt>a.plus(value(default))</tt>. See also #plus and #value.
|
304
|
+
#
|
305
|
+
def optional(default = nil)
|
306
|
+
self.plus(value(default))
|
307
|
+
end
|
706
308
|
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
end
|
309
|
+
#
|
310
|
+
# <tt>a.catchp(:somesymbol)</tt> will catch the
|
311
|
+
# <tt>:somesymbol</tt> thrown by +a+.
|
312
|
+
#
|
313
|
+
def catchp(symbol)
|
314
|
+
CatchParser.new(symbol, self)
|
315
|
+
end
|
715
316
|
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
end
|
317
|
+
#
|
318
|
+
# <tt>a.fragment</tt> will return the string matched by +a+.
|
319
|
+
#
|
320
|
+
def fragment
|
321
|
+
FragmentParser.new(self)
|
322
|
+
end
|
723
323
|
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
324
|
+
#
|
325
|
+
# <tt>a.nested b</tt> will feed the token array returned by parser
|
326
|
+
# +a+ to parser +b+ for a nested parsing.
|
327
|
+
#
|
328
|
+
def nested(parser)
|
329
|
+
NestedParser.new(self, parser)
|
330
|
+
end
|
731
331
|
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
332
|
+
#
|
333
|
+
# <tt>a.lexeme(delim)</tt> will parse +a+ for 0 or more times and
|
334
|
+
# ignore all patterns recognized by +delim+. Values returned by
|
335
|
+
# +a+ are collected in an array.
|
336
|
+
#
|
337
|
+
def lexeme(delim = Parsers.whitespaces)
|
338
|
+
delim = delim.many_
|
339
|
+
delim >> self.delimited(delim)
|
340
|
+
end
|
738
341
|
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
recognizer = satisfies(expected) do |tok|
|
752
|
-
tok.respond_to? :kind, :text and kind == tok.kind
|
753
|
-
end
|
754
|
-
else
|
755
|
-
recognizer = satisfies(expected) do |tok|
|
756
|
-
tok.respond_to? :kind, :text and kinds.include? tok.kind
|
342
|
+
#
|
343
|
+
# For prefix unary operator. <tt>a.prefix op</tt> will run parser
|
344
|
+
# +op+ for 0 or more times and eventually run parser +a+ for one
|
345
|
+
# time. +op+ should return a +Proc+ that accepts one parameter.
|
346
|
+
# +Proc+ objects returned by +op+ is then fed with the value
|
347
|
+
# returned by +a+ from right to left. The final result is
|
348
|
+
# returned as return value.
|
349
|
+
#
|
350
|
+
def prefix(op)
|
351
|
+
Parsers.sequence(op.many, self) do |funcs, v|
|
352
|
+
funcs.reverse_each { |f| v = f.call(v) }
|
353
|
+
v
|
757
354
|
end
|
758
355
|
end
|
759
|
-
recognizer = recognizer.map { |tok| proc.call(tok.text) } if proc
|
760
|
-
recognizer
|
761
|
-
end
|
762
|
-
|
763
|
-
#
|
764
|
-
# A parser that parses a white space character.
|
765
|
-
#
|
766
|
-
def whitespace(expected = "whitespace expected")
|
767
|
-
satisfies(expected) { |c| Whitespaces.include? c }
|
768
|
-
end
|
769
|
-
|
770
|
-
#
|
771
|
-
# A parser that parses 1 or more white space characters.
|
772
|
-
#
|
773
|
-
def whitespaces(expected = "whitespace(s) expected")
|
774
|
-
whitespace(expected).many_(1)
|
775
|
-
end
|
776
356
|
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
string(open) >> not_string(close).many_ >> string(close) >> value(nil)
|
792
|
-
end
|
357
|
+
#
|
358
|
+
# For postfix unary operator. <tt>a.postfix op</tt> will run
|
359
|
+
# parser +a+ for once and then +op+ for 0 or more times. +op+
|
360
|
+
# should return a +Proc+ that accepts one parameter. +Proc+
|
361
|
+
# objects returned by +op+ is then fed with the value returned by
|
362
|
+
# +a+ from left to right. The final result is returned as return
|
363
|
+
# value.
|
364
|
+
#
|
365
|
+
def postfix(op)
|
366
|
+
Parsers.sequence(self, op.many) do |v, funcs|
|
367
|
+
funcs.each { |f| v = f.call(v) }
|
368
|
+
v
|
369
|
+
end
|
370
|
+
end
|
793
371
|
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
372
|
+
#
|
373
|
+
# For non-associative infix binary operator. +op+ has to return a
|
374
|
+
# +Proc+ that takes two parameters, who are returned by the +self+
|
375
|
+
# parser as operands.
|
376
|
+
#
|
377
|
+
def infixn(op)
|
378
|
+
bind do |v1|
|
379
|
+
bin = Parsers.sequence(op, self) do |f, v2|
|
380
|
+
f.call(v1, v2)
|
381
|
+
end
|
382
|
+
bin | value(v1)
|
383
|
+
end
|
384
|
+
end
|
802
385
|
|
803
|
-
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
end
|
386
|
+
#
|
387
|
+
# For left-associative infix binary operator. +op+ has to return
|
388
|
+
# a +Proc+ that takes two parameters, who are returned by the
|
389
|
+
# +self+ parser as operands.
|
390
|
+
#
|
391
|
+
def infixl(op)
|
392
|
+
Parsers.sequence(self, _infix_rest(op, self).many) do |v, rests|
|
393
|
+
rests.each do |r|
|
394
|
+
f, v1 = *r
|
395
|
+
v = f.call(v, v1)
|
396
|
+
end
|
397
|
+
v
|
398
|
+
end
|
399
|
+
end
|
818
400
|
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
401
|
+
#
|
402
|
+
# For right-associative infix binary operator. +op+ has to return
|
403
|
+
# a +Proc+ that takes two parameters, who are returned by the
|
404
|
+
# +self+ parser as operands.
|
405
|
+
#
|
406
|
+
def infixr(op)
|
407
|
+
Parsers.sequence(self, _infix_rest(op, self).many) do |v, rests|
|
408
|
+
if rests.empty?
|
409
|
+
v
|
410
|
+
else
|
411
|
+
f, seed = *rests.last
|
412
|
+
for i in (0...rests.length - 1)
|
413
|
+
cur = rests.length - 2 - i
|
414
|
+
f1, v1 = *rests[cur]
|
415
|
+
seed = f.call(v1, seed)
|
416
|
+
f = f1
|
417
|
+
end
|
418
|
+
f.call(v, seed)
|
419
|
+
end
|
420
|
+
end
|
421
|
+
end
|
832
422
|
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
#
|
843
|
-
def map(&block)
|
844
|
-
return one unless block
|
845
|
-
MapCurrentParser.new(block)
|
846
|
-
end
|
423
|
+
#
|
424
|
+
# <tt>a.token(:word_token)</tt> will return a Token object when
|
425
|
+
# +a+ succeeds. The matched string (or the string returned by
|
426
|
+
# +a+, if any) is encapsulated in the token, together with the
|
427
|
+
# <tt>:word_token</tt> symbol and the starting index of the match.
|
428
|
+
#
|
429
|
+
def token(kind)
|
430
|
+
TokenParser.new(kind, self)
|
431
|
+
end
|
847
432
|
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
853
|
-
|
854
|
-
|
855
|
-
|
856
|
-
|
857
|
-
|
858
|
-
#
|
859
|
-
def mapn(&block)
|
860
|
-
return one unless block
|
861
|
-
MapnCurrentParser.new(block)
|
862
|
-
end
|
433
|
+
#
|
434
|
+
# <tt>a.seq b</tt> will sequentially run +a+ then +b+. The result
|
435
|
+
# of +b+ is preserved as return value. If a +block+ is
|
436
|
+
# associated, values returned by +a+ and +b+ are passed into the
|
437
|
+
# +block+ and the return value of the +block+ is used as the final
|
438
|
+
# result of the parser.
|
439
|
+
#
|
440
|
+
def seq(other, &block)
|
441
|
+
Parsers.sequence(self, other, &block)
|
442
|
+
end
|
863
443
|
|
864
|
-
|
444
|
+
#
|
445
|
+
# Similar to #seq. +other+ is auto-boxed if it is not of type
|
446
|
+
# Parser.
|
447
|
+
#
|
448
|
+
def >>(other)
|
449
|
+
seq(autobox_parser(other))
|
450
|
+
end
|
865
451
|
|
866
|
-
|
867
|
-
# characters considered white space.
|
868
|
-
#
|
869
|
-
Whitespaces = " \t\r\n"
|
452
|
+
private
|
870
453
|
|
871
|
-
|
872
|
-
|
873
|
-
|
454
|
+
def autobox_parser(val)
|
455
|
+
return Parsers.value(val) unless val.kind_of? Parser
|
456
|
+
val
|
457
|
+
end
|
874
458
|
|
875
|
-
|
876
|
-
|
877
|
-
|
459
|
+
def _infix_rest(operator, operand)
|
460
|
+
Parsers.sequence(operator, operand, &Idn)
|
461
|
+
end
|
878
462
|
|
879
|
-
|
880
|
-
|
881
|
-
|
463
|
+
alias ~ not
|
464
|
+
alias << followed
|
465
|
+
alias * repeat_
|
882
466
|
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
return vals unless val.kind_of? String
|
887
|
-
val
|
467
|
+
def _parse(_ctxt)
|
468
|
+
false
|
469
|
+
end
|
888
470
|
end
|
889
|
-
|
890
|
-
extend self
|
891
471
|
end
|
892
|
-
|
893
|
-
end # module
|