parslet 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +7 -0
- data/HISTORY.txt +21 -0
- data/LICENSE +23 -0
- data/README +101 -0
- data/Rakefile +73 -0
- data/lib/parslet.rb +301 -0
- data/lib/parslet/atoms.rb +492 -0
- data/lib/parslet/error_tree.rb +50 -0
- data/lib/parslet/pattern.rb +144 -0
- data/lib/parslet/pattern/binding.rb +40 -0
- data/lib/parslet/transform.rb +118 -0
- metadata +100 -0
@@ -0,0 +1,492 @@
|
|
1
|
+
module Parslet::Atoms
|
2
|
+
module Precedence
|
3
|
+
prec = 0
|
4
|
+
BASE = (prec+=1) # everything else
|
5
|
+
LOOKAHEAD = (prec+=1) # &SOMETHING
|
6
|
+
REPETITION = (prec+=1) # 'a'+, 'a'?
|
7
|
+
SEQUENCE = (prec+=1) # 'a' 'b'
|
8
|
+
ALTERNATE = (prec+=1) # 'a' | 'b'
|
9
|
+
OUTER = (prec+=1) # printing is done here.
|
10
|
+
end
|
11
|
+
|
12
|
+
# Base class for all parslets, handles orchestration of calls and implements
|
13
|
+
# a lot of the operator and chaining methods.
|
14
|
+
#
|
15
|
+
class Base
|
16
|
+
def parse(io)
|
17
|
+
if io.respond_to? :to_str
|
18
|
+
io = StringIO.new(io)
|
19
|
+
end
|
20
|
+
|
21
|
+
result = apply(io)
|
22
|
+
|
23
|
+
# If we haven't consumed the input, then the pattern doesn't match. Try
|
24
|
+
# to provide a good error message (even asking down below)
|
25
|
+
unless io.eof?
|
26
|
+
# Do we know why we stopped matching input? If yes, that's a good
|
27
|
+
# error to fail with. Otherwise just report that we cannot consume the
|
28
|
+
# input.
|
29
|
+
if cause
|
30
|
+
raise Parslet::ParseFailed, "Unconsumed input, maybe because of this: #{cause}"
|
31
|
+
else
|
32
|
+
error(io, "Don't know what to do with #{io.string[io.pos,100]}")
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
return flatten(result)
|
37
|
+
end
|
38
|
+
|
39
|
+
def apply(io)
|
40
|
+
# p [:start, self, io.string[io.pos, 10]]
|
41
|
+
|
42
|
+
old_pos = io.pos
|
43
|
+
|
44
|
+
# p [:try, self, io.string[io.pos, 20]]
|
45
|
+
begin
|
46
|
+
r = try(io)
|
47
|
+
# p [:return_from, self, flatten(r)]
|
48
|
+
@last_cause = nil
|
49
|
+
return r
|
50
|
+
rescue Parslet::ParseFailed => ex
|
51
|
+
# p [:failing, self, io.string[io.pos, 20]]
|
52
|
+
io.pos = old_pos; raise ex
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def repeat(min=0, max=nil)
|
57
|
+
Repetition.new(self, min, max)
|
58
|
+
end
|
59
|
+
def maybe
|
60
|
+
Repetition.new(self, 0, 1, :maybe)
|
61
|
+
end
|
62
|
+
def >>(parslet)
|
63
|
+
Sequence.new(self, parslet)
|
64
|
+
end
|
65
|
+
def |(parslet)
|
66
|
+
Alternative.new(self, parslet)
|
67
|
+
end
|
68
|
+
def absnt?
|
69
|
+
Lookahead.new(self, false)
|
70
|
+
end
|
71
|
+
def prsnt?
|
72
|
+
Lookahead.new(self, true)
|
73
|
+
end
|
74
|
+
def as(name)
|
75
|
+
Named.new(self, name)
|
76
|
+
end
|
77
|
+
|
78
|
+
def flatten(value)
|
79
|
+
# Passes through everything that isn't an array of things
|
80
|
+
return value unless value.instance_of? Array
|
81
|
+
|
82
|
+
# Extracts the s-expression tag
|
83
|
+
tag, *tail = value
|
84
|
+
|
85
|
+
# Merges arrays:
|
86
|
+
result = tail.
|
87
|
+
map { |e| flatten(e) } # first flatten each element
|
88
|
+
|
89
|
+
case tag
|
90
|
+
when :sequence
|
91
|
+
return flatten_sequence(result)
|
92
|
+
when :maybe
|
93
|
+
return result.first
|
94
|
+
when :repetition
|
95
|
+
return flatten_repetition(result)
|
96
|
+
end
|
97
|
+
|
98
|
+
fail "BUG: Unknown tag #{tag.inspect}."
|
99
|
+
end
|
100
|
+
def flatten_sequence(list)
|
101
|
+
list.inject('') { |r, e| # and then merge flat elements
|
102
|
+
case [r, e].map { |o| o.class }
|
103
|
+
when [Hash, Hash] # two keyed subtrees: make one
|
104
|
+
warn_about_duplicate_keys(r, e)
|
105
|
+
r.merge(e)
|
106
|
+
# a keyed tree and an array (push down)
|
107
|
+
when [Hash, Array]
|
108
|
+
[r] + e
|
109
|
+
when [Array, Hash]
|
110
|
+
r + [e]
|
111
|
+
when [String, String]
|
112
|
+
r << e
|
113
|
+
else
|
114
|
+
if r.instance_of? Hash
|
115
|
+
r # Ignore e, since its not a hash we can merge
|
116
|
+
else
|
117
|
+
e # Whatever e is at this point, we keep it
|
118
|
+
end
|
119
|
+
end
|
120
|
+
}
|
121
|
+
end
|
122
|
+
def flatten_repetition(list)
|
123
|
+
if list.any? { |e| e.instance_of?(Hash) }
|
124
|
+
# If keyed subtrees are in the array, we'll want to discard all
|
125
|
+
# strings inbetween. To keep them, name them.
|
126
|
+
return list.select { |e| e.instance_of?(Hash) }
|
127
|
+
end
|
128
|
+
|
129
|
+
if list.any? { |e| e.instance_of?(Array) }
|
130
|
+
# If any arrays are nested in this array, flatten all arrays to this
|
131
|
+
# level.
|
132
|
+
return list.
|
133
|
+
select { |e| e.instance_of?(Array) }.
|
134
|
+
flatten(1)
|
135
|
+
end
|
136
|
+
|
137
|
+
# If there are only strings, concatenate them and return that.
|
138
|
+
list.inject('') { |s,e| s<<(e||'') }
|
139
|
+
end
|
140
|
+
|
141
|
+
def self.precedence(prec)
|
142
|
+
define_method(:precedence) { prec }
|
143
|
+
end
|
144
|
+
precedence Precedence::BASE
|
145
|
+
def to_s(outer_prec)
|
146
|
+
if outer_prec < precedence
|
147
|
+
"("+to_s_inner(precedence)+")"
|
148
|
+
else
|
149
|
+
to_s_inner(precedence)
|
150
|
+
end
|
151
|
+
end
|
152
|
+
def inspect
|
153
|
+
to_s(Precedence::OUTER)
|
154
|
+
end
|
155
|
+
|
156
|
+
# Cause should return the current best approximation of this parslet
|
157
|
+
# of what went wrong with the parse. Not relevant if the parse succeeds,
|
158
|
+
# but needed for clever error reports.
|
159
|
+
#
|
160
|
+
def cause
|
161
|
+
@last_cause
|
162
|
+
end
|
163
|
+
|
164
|
+
# Error tree returns what went wrong here plus what went wrong inside
|
165
|
+
# subexpressions as a tree. The error stored for this node will be equal
|
166
|
+
# with #cause.
|
167
|
+
#
|
168
|
+
def error_tree
|
169
|
+
Parslet::ErrorTree.new(self) if cause?
|
170
|
+
end
|
171
|
+
def cause?
|
172
|
+
not @last_cause.nil?
|
173
|
+
end
|
174
|
+
private
|
175
|
+
# Report/raise a parse error with the given message, printing the current
|
176
|
+
# position as well. Appends 'at line X char Y.' to the message you give.
|
177
|
+
# If +pos+ is given, it is used as the real position the error happened,
|
178
|
+
# correcting the io's current position.
|
179
|
+
#
|
180
|
+
def error(io, str, pos=nil)
|
181
|
+
pre = io.string[0..(pos||io.pos)]
|
182
|
+
lines = Array(pre.lines)
|
183
|
+
|
184
|
+
if lines.empty?
|
185
|
+
formatted_cause = str
|
186
|
+
else
|
187
|
+
pos = lines.last.length
|
188
|
+
formatted_cause = "#{str} at line #{lines.count} char #{pos}."
|
189
|
+
end
|
190
|
+
|
191
|
+
@last_cause = formatted_cause
|
192
|
+
|
193
|
+
raise Parslet::ParseFailed, formatted_cause, nil
|
194
|
+
end
|
195
|
+
def warn_about_duplicate_keys(h1, h2)
|
196
|
+
d = h1.keys & h2.keys
|
197
|
+
unless d.empty?
|
198
|
+
warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+
|
199
|
+
" of the latter will be kept. (keys: #{d.inspect})"
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
class Named < Base
|
205
|
+
attr_reader :parslet, :name
|
206
|
+
def initialize(parslet, name)
|
207
|
+
@parslet, @name = parslet, name
|
208
|
+
end
|
209
|
+
|
210
|
+
def apply(io)
|
211
|
+
value = parslet.apply(io)
|
212
|
+
|
213
|
+
produce_return_value value
|
214
|
+
end
|
215
|
+
|
216
|
+
def to_s_inner(prec)
|
217
|
+
"#{name}:#{parslet.to_s(prec)}"
|
218
|
+
end
|
219
|
+
|
220
|
+
def error_tree
|
221
|
+
parslet.error_tree
|
222
|
+
end
|
223
|
+
private
|
224
|
+
def produce_return_value(val)
|
225
|
+
{ name => flatten(val) }
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
class Lookahead < Base
|
230
|
+
attr_reader :positive
|
231
|
+
attr_reader :bound_parslet
|
232
|
+
|
233
|
+
def initialize(bound_parslet, positive=true)
|
234
|
+
# Model positive and negative lookahead by testing this flag.
|
235
|
+
@positive = positive
|
236
|
+
@bound_parslet = bound_parslet
|
237
|
+
end
|
238
|
+
|
239
|
+
def try(io)
|
240
|
+
pos = io.pos
|
241
|
+
begin
|
242
|
+
bound_parslet.apply(io)
|
243
|
+
rescue Parslet::ParseFailed
|
244
|
+
return fail(io)
|
245
|
+
ensure
|
246
|
+
io.pos = pos
|
247
|
+
end
|
248
|
+
return success(io)
|
249
|
+
end
|
250
|
+
|
251
|
+
def fail(io)
|
252
|
+
if positive
|
253
|
+
error(io, "lookahead: #{bound_parslet.inspect} didn't match, but should have")
|
254
|
+
else
|
255
|
+
# TODO: Squash this down to nothing? Return value handling here...
|
256
|
+
return nil
|
257
|
+
end
|
258
|
+
end
|
259
|
+
def success(io)
|
260
|
+
if positive
|
261
|
+
return nil # see above, TODO
|
262
|
+
else
|
263
|
+
error(
|
264
|
+
io,
|
265
|
+
"negative lookahead: #{bound_parslet.inspect} matched, but shouldn't have")
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
precedence Precedence::LOOKAHEAD
|
270
|
+
def to_s_inner(prec)
|
271
|
+
char = positive ? '&' : '!'
|
272
|
+
|
273
|
+
"#{char}#{bound_parslet.to_s(prec)}"
|
274
|
+
end
|
275
|
+
|
276
|
+
def error_tree
|
277
|
+
bound_parslet.error_tree
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
class Alternative < Base
|
282
|
+
attr_reader :alternatives
|
283
|
+
def initialize(*alternatives)
|
284
|
+
@alternatives = alternatives
|
285
|
+
end
|
286
|
+
|
287
|
+
def |(parslet)
|
288
|
+
@alternatives << parslet
|
289
|
+
self
|
290
|
+
end
|
291
|
+
|
292
|
+
def try(io)
|
293
|
+
alternatives.each { |a|
|
294
|
+
begin
|
295
|
+
return a.apply(io)
|
296
|
+
rescue Parslet::ParseFailed => ex
|
297
|
+
end
|
298
|
+
}
|
299
|
+
# If we reach this point, all alternatives have failed.
|
300
|
+
error(io, "Expected one of #{alternatives.inspect}.")
|
301
|
+
end
|
302
|
+
|
303
|
+
precedence Precedence::ALTERNATE
|
304
|
+
def to_s_inner(prec)
|
305
|
+
alternatives.map { |a| a.to_s(prec) }.join(' | ')
|
306
|
+
end
|
307
|
+
|
308
|
+
def error_tree
|
309
|
+
Parslet::ErrorTree.new(self, *alternatives.
|
310
|
+
map { |child| child.error_tree })
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
# A sequence of parslets, matched from left to right. Denoted by '>>'
|
315
|
+
#
|
316
|
+
class Sequence < Base
|
317
|
+
attr_reader :parslets
|
318
|
+
def initialize(*parslets)
|
319
|
+
@parslets = parslets
|
320
|
+
end
|
321
|
+
|
322
|
+
def >>(parslet)
|
323
|
+
@parslets << parslet
|
324
|
+
self
|
325
|
+
end
|
326
|
+
|
327
|
+
def try(io)
|
328
|
+
[:sequence]+parslets.map { |p|
|
329
|
+
# Save each parslet as potentially offending (raising an error).
|
330
|
+
@offending_parslet = p
|
331
|
+
p.apply(io)
|
332
|
+
}
|
333
|
+
rescue Parslet::ParseFailed
|
334
|
+
error(io, "Failed to match sequence (#{self.inspect})")
|
335
|
+
end
|
336
|
+
|
337
|
+
precedence Precedence::SEQUENCE
|
338
|
+
def to_s_inner(prec)
|
339
|
+
parslets.map { |p| p.to_s(prec) }.join(' ')
|
340
|
+
end
|
341
|
+
|
342
|
+
def error_tree
|
343
|
+
Parslet::ErrorTree.new(self).tap { |t|
|
344
|
+
t.children << @offending_parslet.error_tree if @offending_parslet }
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
class Repetition < Base
|
349
|
+
attr_reader :min, :max, :parslet
|
350
|
+
def initialize(parslet, min, max, tag=:repetition)
|
351
|
+
@parslet = parslet
|
352
|
+
@min, @max = min, max
|
353
|
+
@tag = tag
|
354
|
+
end
|
355
|
+
|
356
|
+
def try(io)
|
357
|
+
occ = 0
|
358
|
+
result = [@tag] # initialize the result array with the tag (for flattening)
|
359
|
+
loop do
|
360
|
+
begin
|
361
|
+
result << parslet.apply(io)
|
362
|
+
occ += 1
|
363
|
+
|
364
|
+
# If we're not greedy (max is defined), check if that has been
|
365
|
+
# reached.
|
366
|
+
return result if max && occ>=max
|
367
|
+
rescue Parslet::ParseFailed => ex
|
368
|
+
# Greedy matcher has produced a failure. Check if occ (which will
|
369
|
+
# contain the number of sucesses) is in {min, max}.
|
370
|
+
# p [:repetition, occ, min, max]
|
371
|
+
error(io, "Expected at least #{min} of #{parslet.inspect}") if occ < min
|
372
|
+
return result
|
373
|
+
end
|
374
|
+
end
|
375
|
+
end
|
376
|
+
|
377
|
+
precedence Precedence::REPETITION
|
378
|
+
def to_s_inner(prec)
|
379
|
+
minmax = "{#{min}, #{max}}"
|
380
|
+
minmax = '?' if min == 0 && max == 1
|
381
|
+
|
382
|
+
parslet.to_s(prec) + minmax
|
383
|
+
end
|
384
|
+
|
385
|
+
def cause
|
386
|
+
# Either the repetition failed or the parslet inside failed to repeat.
|
387
|
+
super || parslet.cause
|
388
|
+
end
|
389
|
+
def error_tree
|
390
|
+
if cause?
|
391
|
+
Parslet::ErrorTree.new(self, parslet.error_tree)
|
392
|
+
else
|
393
|
+
parslet.error_tree
|
394
|
+
end
|
395
|
+
end
|
396
|
+
end
|
397
|
+
|
398
|
+
# Matches a special kind of regular expression that only ever matches one
|
399
|
+
# character at a time. Useful members of this family are: character ranges,
|
400
|
+
# \w, \d, \r, \n, ...
|
401
|
+
#
|
402
|
+
class Re < Base
|
403
|
+
attr_reader :match
|
404
|
+
def initialize(match)
|
405
|
+
@match = match
|
406
|
+
end
|
407
|
+
|
408
|
+
def try(io)
|
409
|
+
r = Regexp.new(match, Regexp::MULTILINE)
|
410
|
+
s = io.read(1)
|
411
|
+
error(io, "Premature end of input") unless s
|
412
|
+
error(io, "Failed to match #{match.inspect[1..-2]}") unless s.match(r)
|
413
|
+
return s
|
414
|
+
end
|
415
|
+
|
416
|
+
def to_s_inner(prec)
|
417
|
+
match.inspect[1..-2]
|
418
|
+
end
|
419
|
+
end
|
420
|
+
|
421
|
+
# Matches a string of characters.
|
422
|
+
#
|
423
|
+
class Str < Base
|
424
|
+
attr_reader :str
|
425
|
+
def initialize(str)
|
426
|
+
@str = str
|
427
|
+
end
|
428
|
+
|
429
|
+
def try(io)
|
430
|
+
old_pos = io.pos
|
431
|
+
s = io.read(str.size)
|
432
|
+
error(io, "Premature end of input") unless s && s.size==str.size
|
433
|
+
error(io, "Expected #{str.inspect}, but got #{s.inspect}", old_pos) \
|
434
|
+
unless s==str
|
435
|
+
return s
|
436
|
+
end
|
437
|
+
|
438
|
+
def to_s_inner(prec)
|
439
|
+
"'#{str}'"
|
440
|
+
end
|
441
|
+
end
|
442
|
+
|
443
|
+
# This wraps pieces of parslet definition and gives them a name. The wrapped
|
444
|
+
# piece is lazily evaluated and cached. This has two purposes:
|
445
|
+
#
|
446
|
+
# a) Avoid infinite recursion during evaluation of the definition
|
447
|
+
#
|
448
|
+
# b) Be able to print things by their name, not by their sometimes
|
449
|
+
# complicated content.
|
450
|
+
#
|
451
|
+
# You don't normally use this directly, instead you should generated it by
|
452
|
+
# using the structuring method Parslet#rule.
|
453
|
+
#
|
454
|
+
class Entity < Base
|
455
|
+
attr_reader :name, :context, :block
|
456
|
+
def initialize(name, context, block)
|
457
|
+
super()
|
458
|
+
|
459
|
+
@name = name
|
460
|
+
@context = context
|
461
|
+
@block = block
|
462
|
+
end
|
463
|
+
|
464
|
+
def try(io)
|
465
|
+
parslet.apply(io)
|
466
|
+
end
|
467
|
+
|
468
|
+
def parslet
|
469
|
+
@parslet ||= context.instance_eval(&block).tap { |p|
|
470
|
+
raise_not_implemented unless p
|
471
|
+
}
|
472
|
+
end
|
473
|
+
|
474
|
+
def to_s_inner(prec)
|
475
|
+
name.to_s.upcase
|
476
|
+
end
|
477
|
+
|
478
|
+
def error_tree
|
479
|
+
parslet.error_tree
|
480
|
+
end
|
481
|
+
|
482
|
+
private
|
483
|
+
def raise_not_implemented
|
484
|
+
trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport
|
485
|
+
exception = NotImplementedError.new("rule(#{name.inspect}) { ... } returns nil. Still not implemented, but already used?")
|
486
|
+
exception.set_backtrace(trace)
|
487
|
+
|
488
|
+
raise exception
|
489
|
+
end
|
490
|
+
end
|
491
|
+
end
|
492
|
+
|