parslet 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +7 -0
- data/HISTORY.txt +21 -0
- data/LICENSE +23 -0
- data/README +101 -0
- data/Rakefile +73 -0
- data/lib/parslet.rb +301 -0
- data/lib/parslet/atoms.rb +492 -0
- data/lib/parslet/error_tree.rb +50 -0
- data/lib/parslet/pattern.rb +144 -0
- data/lib/parslet/pattern/binding.rb +40 -0
- data/lib/parslet/transform.rb +118 -0
- metadata +100 -0
@@ -0,0 +1,492 @@
|
|
1
|
+
module Parslet::Atoms
|
2
|
+
module Precedence
|
3
|
+
prec = 0
|
4
|
+
BASE = (prec+=1) # everything else
|
5
|
+
LOOKAHEAD = (prec+=1) # &SOMETHING
|
6
|
+
REPETITION = (prec+=1) # 'a'+, 'a'?
|
7
|
+
SEQUENCE = (prec+=1) # 'a' 'b'
|
8
|
+
ALTERNATE = (prec+=1) # 'a' | 'b'
|
9
|
+
OUTER = (prec+=1) # printing is done here.
|
10
|
+
end
|
11
|
+
|
12
|
+
# Base class for all parslets, handles orchestration of calls and implements
|
13
|
+
# a lot of the operator and chaining methods.
|
14
|
+
#
|
15
|
+
class Base
|
16
|
+
def parse(io)
|
17
|
+
if io.respond_to? :to_str
|
18
|
+
io = StringIO.new(io)
|
19
|
+
end
|
20
|
+
|
21
|
+
result = apply(io)
|
22
|
+
|
23
|
+
# If we haven't consumed the input, then the pattern doesn't match. Try
|
24
|
+
# to provide a good error message (even asking down below)
|
25
|
+
unless io.eof?
|
26
|
+
# Do we know why we stopped matching input? If yes, that's a good
|
27
|
+
# error to fail with. Otherwise just report that we cannot consume the
|
28
|
+
# input.
|
29
|
+
if cause
|
30
|
+
raise Parslet::ParseFailed, "Unconsumed input, maybe because of this: #{cause}"
|
31
|
+
else
|
32
|
+
error(io, "Don't know what to do with #{io.string[io.pos,100]}")
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
return flatten(result)
|
37
|
+
end
|
38
|
+
|
39
|
+
def apply(io)
|
40
|
+
# p [:start, self, io.string[io.pos, 10]]
|
41
|
+
|
42
|
+
old_pos = io.pos
|
43
|
+
|
44
|
+
# p [:try, self, io.string[io.pos, 20]]
|
45
|
+
begin
|
46
|
+
r = try(io)
|
47
|
+
# p [:return_from, self, flatten(r)]
|
48
|
+
@last_cause = nil
|
49
|
+
return r
|
50
|
+
rescue Parslet::ParseFailed => ex
|
51
|
+
# p [:failing, self, io.string[io.pos, 20]]
|
52
|
+
io.pos = old_pos; raise ex
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def repeat(min=0, max=nil)
|
57
|
+
Repetition.new(self, min, max)
|
58
|
+
end
|
59
|
+
def maybe
|
60
|
+
Repetition.new(self, 0, 1, :maybe)
|
61
|
+
end
|
62
|
+
def >>(parslet)
|
63
|
+
Sequence.new(self, parslet)
|
64
|
+
end
|
65
|
+
def |(parslet)
|
66
|
+
Alternative.new(self, parslet)
|
67
|
+
end
|
68
|
+
def absnt?
|
69
|
+
Lookahead.new(self, false)
|
70
|
+
end
|
71
|
+
def prsnt?
|
72
|
+
Lookahead.new(self, true)
|
73
|
+
end
|
74
|
+
def as(name)
|
75
|
+
Named.new(self, name)
|
76
|
+
end
|
77
|
+
|
78
|
+
def flatten(value)
|
79
|
+
# Passes through everything that isn't an array of things
|
80
|
+
return value unless value.instance_of? Array
|
81
|
+
|
82
|
+
# Extracts the s-expression tag
|
83
|
+
tag, *tail = value
|
84
|
+
|
85
|
+
# Merges arrays:
|
86
|
+
result = tail.
|
87
|
+
map { |e| flatten(e) } # first flatten each element
|
88
|
+
|
89
|
+
case tag
|
90
|
+
when :sequence
|
91
|
+
return flatten_sequence(result)
|
92
|
+
when :maybe
|
93
|
+
return result.first
|
94
|
+
when :repetition
|
95
|
+
return flatten_repetition(result)
|
96
|
+
end
|
97
|
+
|
98
|
+
fail "BUG: Unknown tag #{tag.inspect}."
|
99
|
+
end
|
100
|
+
def flatten_sequence(list)
|
101
|
+
list.inject('') { |r, e| # and then merge flat elements
|
102
|
+
case [r, e].map { |o| o.class }
|
103
|
+
when [Hash, Hash] # two keyed subtrees: make one
|
104
|
+
warn_about_duplicate_keys(r, e)
|
105
|
+
r.merge(e)
|
106
|
+
# a keyed tree and an array (push down)
|
107
|
+
when [Hash, Array]
|
108
|
+
[r] + e
|
109
|
+
when [Array, Hash]
|
110
|
+
r + [e]
|
111
|
+
when [String, String]
|
112
|
+
r << e
|
113
|
+
else
|
114
|
+
if r.instance_of? Hash
|
115
|
+
r # Ignore e, since its not a hash we can merge
|
116
|
+
else
|
117
|
+
e # Whatever e is at this point, we keep it
|
118
|
+
end
|
119
|
+
end
|
120
|
+
}
|
121
|
+
end
|
122
|
+
def flatten_repetition(list)
|
123
|
+
if list.any? { |e| e.instance_of?(Hash) }
|
124
|
+
# If keyed subtrees are in the array, we'll want to discard all
|
125
|
+
# strings inbetween. To keep them, name them.
|
126
|
+
return list.select { |e| e.instance_of?(Hash) }
|
127
|
+
end
|
128
|
+
|
129
|
+
if list.any? { |e| e.instance_of?(Array) }
|
130
|
+
# If any arrays are nested in this array, flatten all arrays to this
|
131
|
+
# level.
|
132
|
+
return list.
|
133
|
+
select { |e| e.instance_of?(Array) }.
|
134
|
+
flatten(1)
|
135
|
+
end
|
136
|
+
|
137
|
+
# If there are only strings, concatenate them and return that.
|
138
|
+
list.inject('') { |s,e| s<<(e||'') }
|
139
|
+
end
|
140
|
+
|
141
|
+
def self.precedence(prec)
|
142
|
+
define_method(:precedence) { prec }
|
143
|
+
end
|
144
|
+
precedence Precedence::BASE
|
145
|
+
def to_s(outer_prec)
|
146
|
+
if outer_prec < precedence
|
147
|
+
"("+to_s_inner(precedence)+")"
|
148
|
+
else
|
149
|
+
to_s_inner(precedence)
|
150
|
+
end
|
151
|
+
end
|
152
|
+
def inspect
|
153
|
+
to_s(Precedence::OUTER)
|
154
|
+
end
|
155
|
+
|
156
|
+
# Cause should return the current best approximation of this parslet
|
157
|
+
# of what went wrong with the parse. Not relevant if the parse succeeds,
|
158
|
+
# but needed for clever error reports.
|
159
|
+
#
|
160
|
+
def cause
|
161
|
+
@last_cause
|
162
|
+
end
|
163
|
+
|
164
|
+
# Error tree returns what went wrong here plus what went wrong inside
|
165
|
+
# subexpressions as a tree. The error stored for this node will be equal
|
166
|
+
# with #cause.
|
167
|
+
#
|
168
|
+
def error_tree
|
169
|
+
Parslet::ErrorTree.new(self) if cause?
|
170
|
+
end
|
171
|
+
def cause?
|
172
|
+
not @last_cause.nil?
|
173
|
+
end
|
174
|
+
private
|
175
|
+
# Report/raise a parse error with the given message, printing the current
|
176
|
+
# position as well. Appends 'at line X char Y.' to the message you give.
|
177
|
+
# If +pos+ is given, it is used as the real position the error happened,
|
178
|
+
# correcting the io's current position.
|
179
|
+
#
|
180
|
+
def error(io, str, pos=nil)
|
181
|
+
pre = io.string[0..(pos||io.pos)]
|
182
|
+
lines = Array(pre.lines)
|
183
|
+
|
184
|
+
if lines.empty?
|
185
|
+
formatted_cause = str
|
186
|
+
else
|
187
|
+
pos = lines.last.length
|
188
|
+
formatted_cause = "#{str} at line #{lines.count} char #{pos}."
|
189
|
+
end
|
190
|
+
|
191
|
+
@last_cause = formatted_cause
|
192
|
+
|
193
|
+
raise Parslet::ParseFailed, formatted_cause, nil
|
194
|
+
end
|
195
|
+
def warn_about_duplicate_keys(h1, h2)
|
196
|
+
d = h1.keys & h2.keys
|
197
|
+
unless d.empty?
|
198
|
+
warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+
|
199
|
+
" of the latter will be kept. (keys: #{d.inspect})"
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
class Named < Base
|
205
|
+
attr_reader :parslet, :name
|
206
|
+
def initialize(parslet, name)
|
207
|
+
@parslet, @name = parslet, name
|
208
|
+
end
|
209
|
+
|
210
|
+
def apply(io)
|
211
|
+
value = parslet.apply(io)
|
212
|
+
|
213
|
+
produce_return_value value
|
214
|
+
end
|
215
|
+
|
216
|
+
def to_s_inner(prec)
|
217
|
+
"#{name}:#{parslet.to_s(prec)}"
|
218
|
+
end
|
219
|
+
|
220
|
+
def error_tree
|
221
|
+
parslet.error_tree
|
222
|
+
end
|
223
|
+
private
|
224
|
+
def produce_return_value(val)
|
225
|
+
{ name => flatten(val) }
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
class Lookahead < Base
|
230
|
+
attr_reader :positive
|
231
|
+
attr_reader :bound_parslet
|
232
|
+
|
233
|
+
def initialize(bound_parslet, positive=true)
|
234
|
+
# Model positive and negative lookahead by testing this flag.
|
235
|
+
@positive = positive
|
236
|
+
@bound_parslet = bound_parslet
|
237
|
+
end
|
238
|
+
|
239
|
+
def try(io)
|
240
|
+
pos = io.pos
|
241
|
+
begin
|
242
|
+
bound_parslet.apply(io)
|
243
|
+
rescue Parslet::ParseFailed
|
244
|
+
return fail(io)
|
245
|
+
ensure
|
246
|
+
io.pos = pos
|
247
|
+
end
|
248
|
+
return success(io)
|
249
|
+
end
|
250
|
+
|
251
|
+
def fail(io)
|
252
|
+
if positive
|
253
|
+
error(io, "lookahead: #{bound_parslet.inspect} didn't match, but should have")
|
254
|
+
else
|
255
|
+
# TODO: Squash this down to nothing? Return value handling here...
|
256
|
+
return nil
|
257
|
+
end
|
258
|
+
end
|
259
|
+
def success(io)
|
260
|
+
if positive
|
261
|
+
return nil # see above, TODO
|
262
|
+
else
|
263
|
+
error(
|
264
|
+
io,
|
265
|
+
"negative lookahead: #{bound_parslet.inspect} matched, but shouldn't have")
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
precedence Precedence::LOOKAHEAD
|
270
|
+
def to_s_inner(prec)
|
271
|
+
char = positive ? '&' : '!'
|
272
|
+
|
273
|
+
"#{char}#{bound_parslet.to_s(prec)}"
|
274
|
+
end
|
275
|
+
|
276
|
+
def error_tree
|
277
|
+
bound_parslet.error_tree
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
class Alternative < Base
|
282
|
+
attr_reader :alternatives
|
283
|
+
def initialize(*alternatives)
|
284
|
+
@alternatives = alternatives
|
285
|
+
end
|
286
|
+
|
287
|
+
def |(parslet)
|
288
|
+
@alternatives << parslet
|
289
|
+
self
|
290
|
+
end
|
291
|
+
|
292
|
+
def try(io)
|
293
|
+
alternatives.each { |a|
|
294
|
+
begin
|
295
|
+
return a.apply(io)
|
296
|
+
rescue Parslet::ParseFailed => ex
|
297
|
+
end
|
298
|
+
}
|
299
|
+
# If we reach this point, all alternatives have failed.
|
300
|
+
error(io, "Expected one of #{alternatives.inspect}.")
|
301
|
+
end
|
302
|
+
|
303
|
+
precedence Precedence::ALTERNATE
|
304
|
+
def to_s_inner(prec)
|
305
|
+
alternatives.map { |a| a.to_s(prec) }.join(' | ')
|
306
|
+
end
|
307
|
+
|
308
|
+
def error_tree
|
309
|
+
Parslet::ErrorTree.new(self, *alternatives.
|
310
|
+
map { |child| child.error_tree })
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
# A sequence of parslets, matched from left to right. Denoted by '>>'
|
315
|
+
#
|
316
|
+
class Sequence < Base
|
317
|
+
attr_reader :parslets
|
318
|
+
def initialize(*parslets)
|
319
|
+
@parslets = parslets
|
320
|
+
end
|
321
|
+
|
322
|
+
def >>(parslet)
|
323
|
+
@parslets << parslet
|
324
|
+
self
|
325
|
+
end
|
326
|
+
|
327
|
+
def try(io)
|
328
|
+
[:sequence]+parslets.map { |p|
|
329
|
+
# Save each parslet as potentially offending (raising an error).
|
330
|
+
@offending_parslet = p
|
331
|
+
p.apply(io)
|
332
|
+
}
|
333
|
+
rescue Parslet::ParseFailed
|
334
|
+
error(io, "Failed to match sequence (#{self.inspect})")
|
335
|
+
end
|
336
|
+
|
337
|
+
precedence Precedence::SEQUENCE
|
338
|
+
def to_s_inner(prec)
|
339
|
+
parslets.map { |p| p.to_s(prec) }.join(' ')
|
340
|
+
end
|
341
|
+
|
342
|
+
def error_tree
|
343
|
+
Parslet::ErrorTree.new(self).tap { |t|
|
344
|
+
t.children << @offending_parslet.error_tree if @offending_parslet }
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
class Repetition < Base
|
349
|
+
attr_reader :min, :max, :parslet
|
350
|
+
def initialize(parslet, min, max, tag=:repetition)
|
351
|
+
@parslet = parslet
|
352
|
+
@min, @max = min, max
|
353
|
+
@tag = tag
|
354
|
+
end
|
355
|
+
|
356
|
+
def try(io)
|
357
|
+
occ = 0
|
358
|
+
result = [@tag] # initialize the result array with the tag (for flattening)
|
359
|
+
loop do
|
360
|
+
begin
|
361
|
+
result << parslet.apply(io)
|
362
|
+
occ += 1
|
363
|
+
|
364
|
+
# If we're not greedy (max is defined), check if that has been
|
365
|
+
# reached.
|
366
|
+
return result if max && occ>=max
|
367
|
+
rescue Parslet::ParseFailed => ex
|
368
|
+
# Greedy matcher has produced a failure. Check if occ (which will
|
369
|
+
# contain the number of sucesses) is in {min, max}.
|
370
|
+
# p [:repetition, occ, min, max]
|
371
|
+
error(io, "Expected at least #{min} of #{parslet.inspect}") if occ < min
|
372
|
+
return result
|
373
|
+
end
|
374
|
+
end
|
375
|
+
end
|
376
|
+
|
377
|
+
precedence Precedence::REPETITION
|
378
|
+
def to_s_inner(prec)
|
379
|
+
minmax = "{#{min}, #{max}}"
|
380
|
+
minmax = '?' if min == 0 && max == 1
|
381
|
+
|
382
|
+
parslet.to_s(prec) + minmax
|
383
|
+
end
|
384
|
+
|
385
|
+
def cause
|
386
|
+
# Either the repetition failed or the parslet inside failed to repeat.
|
387
|
+
super || parslet.cause
|
388
|
+
end
|
389
|
+
def error_tree
|
390
|
+
if cause?
|
391
|
+
Parslet::ErrorTree.new(self, parslet.error_tree)
|
392
|
+
else
|
393
|
+
parslet.error_tree
|
394
|
+
end
|
395
|
+
end
|
396
|
+
end
|
397
|
+
|
398
|
+
# Matches a special kind of regular expression that only ever matches one
|
399
|
+
# character at a time. Useful members of this family are: character ranges,
|
400
|
+
# \w, \d, \r, \n, ...
|
401
|
+
#
|
402
|
+
class Re < Base
|
403
|
+
attr_reader :match
|
404
|
+
def initialize(match)
|
405
|
+
@match = match
|
406
|
+
end
|
407
|
+
|
408
|
+
def try(io)
|
409
|
+
r = Regexp.new(match, Regexp::MULTILINE)
|
410
|
+
s = io.read(1)
|
411
|
+
error(io, "Premature end of input") unless s
|
412
|
+
error(io, "Failed to match #{match.inspect[1..-2]}") unless s.match(r)
|
413
|
+
return s
|
414
|
+
end
|
415
|
+
|
416
|
+
def to_s_inner(prec)
|
417
|
+
match.inspect[1..-2]
|
418
|
+
end
|
419
|
+
end
|
420
|
+
|
421
|
+
# Matches a string of characters.
|
422
|
+
#
|
423
|
+
class Str < Base
|
424
|
+
attr_reader :str
|
425
|
+
def initialize(str)
|
426
|
+
@str = str
|
427
|
+
end
|
428
|
+
|
429
|
+
def try(io)
|
430
|
+
old_pos = io.pos
|
431
|
+
s = io.read(str.size)
|
432
|
+
error(io, "Premature end of input") unless s && s.size==str.size
|
433
|
+
error(io, "Expected #{str.inspect}, but got #{s.inspect}", old_pos) \
|
434
|
+
unless s==str
|
435
|
+
return s
|
436
|
+
end
|
437
|
+
|
438
|
+
def to_s_inner(prec)
|
439
|
+
"'#{str}'"
|
440
|
+
end
|
441
|
+
end
|
442
|
+
|
443
|
+
# This wraps pieces of parslet definition and gives them a name. The wrapped
|
444
|
+
# piece is lazily evaluated and cached. This has two purposes:
|
445
|
+
#
|
446
|
+
# a) Avoid infinite recursion during evaluation of the definition
|
447
|
+
#
|
448
|
+
# b) Be able to print things by their name, not by their sometimes
|
449
|
+
# complicated content.
|
450
|
+
#
|
451
|
+
# You don't normally use this directly, instead you should generated it by
|
452
|
+
# using the structuring method Parslet#rule.
|
453
|
+
#
|
454
|
+
class Entity < Base
|
455
|
+
attr_reader :name, :context, :block
|
456
|
+
def initialize(name, context, block)
|
457
|
+
super()
|
458
|
+
|
459
|
+
@name = name
|
460
|
+
@context = context
|
461
|
+
@block = block
|
462
|
+
end
|
463
|
+
|
464
|
+
def try(io)
|
465
|
+
parslet.apply(io)
|
466
|
+
end
|
467
|
+
|
468
|
+
def parslet
|
469
|
+
@parslet ||= context.instance_eval(&block).tap { |p|
|
470
|
+
raise_not_implemented unless p
|
471
|
+
}
|
472
|
+
end
|
473
|
+
|
474
|
+
def to_s_inner(prec)
|
475
|
+
name.to_s.upcase
|
476
|
+
end
|
477
|
+
|
478
|
+
def error_tree
|
479
|
+
parslet.error_tree
|
480
|
+
end
|
481
|
+
|
482
|
+
private
|
483
|
+
def raise_not_implemented
|
484
|
+
trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport
|
485
|
+
exception = NotImplementedError.new("rule(#{name.inspect}) { ... } returns nil. Still not implemented, but already used?")
|
486
|
+
exception.set_backtrace(trace)
|
487
|
+
|
488
|
+
raise exception
|
489
|
+
end
|
490
|
+
end
|
491
|
+
end
|
492
|
+
|