parslet 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,492 @@
1
+ module Parslet::Atoms
2
+ module Precedence
3
+ prec = 0
4
+ BASE = (prec+=1) # everything else
5
+ LOOKAHEAD = (prec+=1) # &SOMETHING
6
+ REPETITION = (prec+=1) # 'a'+, 'a'?
7
+ SEQUENCE = (prec+=1) # 'a' 'b'
8
+ ALTERNATE = (prec+=1) # 'a' | 'b'
9
+ OUTER = (prec+=1) # printing is done here.
10
+ end
11
+
12
+ # Base class for all parslets, handles orchestration of calls and implements
13
+ # a lot of the operator and chaining methods.
14
+ #
15
+ class Base
16
+ def parse(io)
17
+ if io.respond_to? :to_str
18
+ io = StringIO.new(io)
19
+ end
20
+
21
+ result = apply(io)
22
+
23
+ # If we haven't consumed the input, then the pattern doesn't match. Try
24
+ # to provide a good error message (even asking down below)
25
+ unless io.eof?
26
+ # Do we know why we stopped matching input? If yes, that's a good
27
+ # error to fail with. Otherwise just report that we cannot consume the
28
+ # input.
29
+ if cause
30
+ raise Parslet::ParseFailed, "Unconsumed input, maybe because of this: #{cause}"
31
+ else
32
+ error(io, "Don't know what to do with #{io.string[io.pos,100]}")
33
+ end
34
+ end
35
+
36
+ return flatten(result)
37
+ end
38
+
39
+ def apply(io)
40
+ # p [:start, self, io.string[io.pos, 10]]
41
+
42
+ old_pos = io.pos
43
+
44
+ # p [:try, self, io.string[io.pos, 20]]
45
+ begin
46
+ r = try(io)
47
+ # p [:return_from, self, flatten(r)]
48
+ @last_cause = nil
49
+ return r
50
+ rescue Parslet::ParseFailed => ex
51
+ # p [:failing, self, io.string[io.pos, 20]]
52
+ io.pos = old_pos; raise ex
53
+ end
54
+ end
55
+
56
+ def repeat(min=0, max=nil)
57
+ Repetition.new(self, min, max)
58
+ end
59
+ def maybe
60
+ Repetition.new(self, 0, 1, :maybe)
61
+ end
62
+ def >>(parslet)
63
+ Sequence.new(self, parslet)
64
+ end
65
+ def |(parslet)
66
+ Alternative.new(self, parslet)
67
+ end
68
+ def absnt?
69
+ Lookahead.new(self, false)
70
+ end
71
+ def prsnt?
72
+ Lookahead.new(self, true)
73
+ end
74
+ def as(name)
75
+ Named.new(self, name)
76
+ end
77
+
78
+ def flatten(value)
79
+ # Passes through everything that isn't an array of things
80
+ return value unless value.instance_of? Array
81
+
82
+ # Extracts the s-expression tag
83
+ tag, *tail = value
84
+
85
+ # Merges arrays:
86
+ result = tail.
87
+ map { |e| flatten(e) } # first flatten each element
88
+
89
+ case tag
90
+ when :sequence
91
+ return flatten_sequence(result)
92
+ when :maybe
93
+ return result.first
94
+ when :repetition
95
+ return flatten_repetition(result)
96
+ end
97
+
98
+ fail "BUG: Unknown tag #{tag.inspect}."
99
+ end
100
+ def flatten_sequence(list)
101
+ list.inject('') { |r, e| # and then merge flat elements
102
+ case [r, e].map { |o| o.class }
103
+ when [Hash, Hash] # two keyed subtrees: make one
104
+ warn_about_duplicate_keys(r, e)
105
+ r.merge(e)
106
+ # a keyed tree and an array (push down)
107
+ when [Hash, Array]
108
+ [r] + e
109
+ when [Array, Hash]
110
+ r + [e]
111
+ when [String, String]
112
+ r << e
113
+ else
114
+ if r.instance_of? Hash
115
+ r # Ignore e, since its not a hash we can merge
116
+ else
117
+ e # Whatever e is at this point, we keep it
118
+ end
119
+ end
120
+ }
121
+ end
122
+ def flatten_repetition(list)
123
+ if list.any? { |e| e.instance_of?(Hash) }
124
+ # If keyed subtrees are in the array, we'll want to discard all
125
+ # strings inbetween. To keep them, name them.
126
+ return list.select { |e| e.instance_of?(Hash) }
127
+ end
128
+
129
+ if list.any? { |e| e.instance_of?(Array) }
130
+ # If any arrays are nested in this array, flatten all arrays to this
131
+ # level.
132
+ return list.
133
+ select { |e| e.instance_of?(Array) }.
134
+ flatten(1)
135
+ end
136
+
137
+ # If there are only strings, concatenate them and return that.
138
+ list.inject('') { |s,e| s<<(e||'') }
139
+ end
140
+
141
+ def self.precedence(prec)
142
+ define_method(:precedence) { prec }
143
+ end
144
+ precedence Precedence::BASE
145
+ def to_s(outer_prec)
146
+ if outer_prec < precedence
147
+ "("+to_s_inner(precedence)+")"
148
+ else
149
+ to_s_inner(precedence)
150
+ end
151
+ end
152
+ def inspect
153
+ to_s(Precedence::OUTER)
154
+ end
155
+
156
+ # Cause should return the current best approximation of this parslet
157
+ # of what went wrong with the parse. Not relevant if the parse succeeds,
158
+ # but needed for clever error reports.
159
+ #
160
+ def cause
161
+ @last_cause
162
+ end
163
+
164
+ # Error tree returns what went wrong here plus what went wrong inside
165
+ # subexpressions as a tree. The error stored for this node will be equal
166
+ # with #cause.
167
+ #
168
+ def error_tree
169
+ Parslet::ErrorTree.new(self) if cause?
170
+ end
171
+ def cause?
172
+ not @last_cause.nil?
173
+ end
174
+ private
175
+ # Report/raise a parse error with the given message, printing the current
176
+ # position as well. Appends 'at line X char Y.' to the message you give.
177
+ # If +pos+ is given, it is used as the real position the error happened,
178
+ # correcting the io's current position.
179
+ #
180
+ def error(io, str, pos=nil)
181
+ pre = io.string[0..(pos||io.pos)]
182
+ lines = Array(pre.lines)
183
+
184
+ if lines.empty?
185
+ formatted_cause = str
186
+ else
187
+ pos = lines.last.length
188
+ formatted_cause = "#{str} at line #{lines.count} char #{pos}."
189
+ end
190
+
191
+ @last_cause = formatted_cause
192
+
193
+ raise Parslet::ParseFailed, formatted_cause, nil
194
+ end
195
+ def warn_about_duplicate_keys(h1, h2)
196
+ d = h1.keys & h2.keys
197
+ unless d.empty?
198
+ warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+
199
+ " of the latter will be kept. (keys: #{d.inspect})"
200
+ end
201
+ end
202
+ end
203
+
204
+ class Named < Base
205
+ attr_reader :parslet, :name
206
+ def initialize(parslet, name)
207
+ @parslet, @name = parslet, name
208
+ end
209
+
210
+ def apply(io)
211
+ value = parslet.apply(io)
212
+
213
+ produce_return_value value
214
+ end
215
+
216
+ def to_s_inner(prec)
217
+ "#{name}:#{parslet.to_s(prec)}"
218
+ end
219
+
220
+ def error_tree
221
+ parslet.error_tree
222
+ end
223
+ private
224
+ def produce_return_value(val)
225
+ { name => flatten(val) }
226
+ end
227
+ end
228
+
229
+ class Lookahead < Base
230
+ attr_reader :positive
231
+ attr_reader :bound_parslet
232
+
233
+ def initialize(bound_parslet, positive=true)
234
+ # Model positive and negative lookahead by testing this flag.
235
+ @positive = positive
236
+ @bound_parslet = bound_parslet
237
+ end
238
+
239
+ def try(io)
240
+ pos = io.pos
241
+ begin
242
+ bound_parslet.apply(io)
243
+ rescue Parslet::ParseFailed
244
+ return fail(io)
245
+ ensure
246
+ io.pos = pos
247
+ end
248
+ return success(io)
249
+ end
250
+
251
+ def fail(io)
252
+ if positive
253
+ error(io, "lookahead: #{bound_parslet.inspect} didn't match, but should have")
254
+ else
255
+ # TODO: Squash this down to nothing? Return value handling here...
256
+ return nil
257
+ end
258
+ end
259
+ def success(io)
260
+ if positive
261
+ return nil # see above, TODO
262
+ else
263
+ error(
264
+ io,
265
+ "negative lookahead: #{bound_parslet.inspect} matched, but shouldn't have")
266
+ end
267
+ end
268
+
269
+ precedence Precedence::LOOKAHEAD
270
+ def to_s_inner(prec)
271
+ char = positive ? '&' : '!'
272
+
273
+ "#{char}#{bound_parslet.to_s(prec)}"
274
+ end
275
+
276
+ def error_tree
277
+ bound_parslet.error_tree
278
+ end
279
+ end
280
+
281
+ class Alternative < Base
282
+ attr_reader :alternatives
283
+ def initialize(*alternatives)
284
+ @alternatives = alternatives
285
+ end
286
+
287
+ def |(parslet)
288
+ @alternatives << parslet
289
+ self
290
+ end
291
+
292
+ def try(io)
293
+ alternatives.each { |a|
294
+ begin
295
+ return a.apply(io)
296
+ rescue Parslet::ParseFailed => ex
297
+ end
298
+ }
299
+ # If we reach this point, all alternatives have failed.
300
+ error(io, "Expected one of #{alternatives.inspect}.")
301
+ end
302
+
303
+ precedence Precedence::ALTERNATE
304
+ def to_s_inner(prec)
305
+ alternatives.map { |a| a.to_s(prec) }.join(' | ')
306
+ end
307
+
308
+ def error_tree
309
+ Parslet::ErrorTree.new(self, *alternatives.
310
+ map { |child| child.error_tree })
311
+ end
312
+ end
313
+
314
+ # A sequence of parslets, matched from left to right. Denoted by '>>'
315
+ #
316
+ class Sequence < Base
317
+ attr_reader :parslets
318
+ def initialize(*parslets)
319
+ @parslets = parslets
320
+ end
321
+
322
+ def >>(parslet)
323
+ @parslets << parslet
324
+ self
325
+ end
326
+
327
+ def try(io)
328
+ [:sequence]+parslets.map { |p|
329
+ # Save each parslet as potentially offending (raising an error).
330
+ @offending_parslet = p
331
+ p.apply(io)
332
+ }
333
+ rescue Parslet::ParseFailed
334
+ error(io, "Failed to match sequence (#{self.inspect})")
335
+ end
336
+
337
+ precedence Precedence::SEQUENCE
338
+ def to_s_inner(prec)
339
+ parslets.map { |p| p.to_s(prec) }.join(' ')
340
+ end
341
+
342
+ def error_tree
343
+ Parslet::ErrorTree.new(self).tap { |t|
344
+ t.children << @offending_parslet.error_tree if @offending_parslet }
345
+ end
346
+ end
347
+
348
+ class Repetition < Base
349
+ attr_reader :min, :max, :parslet
350
+ def initialize(parslet, min, max, tag=:repetition)
351
+ @parslet = parslet
352
+ @min, @max = min, max
353
+ @tag = tag
354
+ end
355
+
356
+ def try(io)
357
+ occ = 0
358
+ result = [@tag] # initialize the result array with the tag (for flattening)
359
+ loop do
360
+ begin
361
+ result << parslet.apply(io)
362
+ occ += 1
363
+
364
+ # If we're not greedy (max is defined), check if that has been
365
+ # reached.
366
+ return result if max && occ>=max
367
+ rescue Parslet::ParseFailed => ex
368
+ # Greedy matcher has produced a failure. Check if occ (which will
369
+ # contain the number of sucesses) is in {min, max}.
370
+ # p [:repetition, occ, min, max]
371
+ error(io, "Expected at least #{min} of #{parslet.inspect}") if occ < min
372
+ return result
373
+ end
374
+ end
375
+ end
376
+
377
+ precedence Precedence::REPETITION
378
+ def to_s_inner(prec)
379
+ minmax = "{#{min}, #{max}}"
380
+ minmax = '?' if min == 0 && max == 1
381
+
382
+ parslet.to_s(prec) + minmax
383
+ end
384
+
385
+ def cause
386
+ # Either the repetition failed or the parslet inside failed to repeat.
387
+ super || parslet.cause
388
+ end
389
+ def error_tree
390
+ if cause?
391
+ Parslet::ErrorTree.new(self, parslet.error_tree)
392
+ else
393
+ parslet.error_tree
394
+ end
395
+ end
396
+ end
397
+
398
+ # Matches a special kind of regular expression that only ever matches one
399
+ # character at a time. Useful members of this family are: character ranges,
400
+ # \w, \d, \r, \n, ...
401
+ #
402
+ class Re < Base
403
+ attr_reader :match
404
+ def initialize(match)
405
+ @match = match
406
+ end
407
+
408
+ def try(io)
409
+ r = Regexp.new(match, Regexp::MULTILINE)
410
+ s = io.read(1)
411
+ error(io, "Premature end of input") unless s
412
+ error(io, "Failed to match #{match.inspect[1..-2]}") unless s.match(r)
413
+ return s
414
+ end
415
+
416
+ def to_s_inner(prec)
417
+ match.inspect[1..-2]
418
+ end
419
+ end
420
+
421
+ # Matches a string of characters.
422
+ #
423
+ class Str < Base
424
+ attr_reader :str
425
+ def initialize(str)
426
+ @str = str
427
+ end
428
+
429
+ def try(io)
430
+ old_pos = io.pos
431
+ s = io.read(str.size)
432
+ error(io, "Premature end of input") unless s && s.size==str.size
433
+ error(io, "Expected #{str.inspect}, but got #{s.inspect}", old_pos) \
434
+ unless s==str
435
+ return s
436
+ end
437
+
438
+ def to_s_inner(prec)
439
+ "'#{str}'"
440
+ end
441
+ end
442
+
443
+ # This wraps pieces of parslet definition and gives them a name. The wrapped
444
+ # piece is lazily evaluated and cached. This has two purposes:
445
+ #
446
+ # a) Avoid infinite recursion during evaluation of the definition
447
+ #
448
+ # b) Be able to print things by their name, not by their sometimes
449
+ # complicated content.
450
+ #
451
+ # You don't normally use this directly, instead you should generated it by
452
+ # using the structuring method Parslet#rule.
453
+ #
454
+ class Entity < Base
455
+ attr_reader :name, :context, :block
456
+ def initialize(name, context, block)
457
+ super()
458
+
459
+ @name = name
460
+ @context = context
461
+ @block = block
462
+ end
463
+
464
+ def try(io)
465
+ parslet.apply(io)
466
+ end
467
+
468
+ def parslet
469
+ @parslet ||= context.instance_eval(&block).tap { |p|
470
+ raise_not_implemented unless p
471
+ }
472
+ end
473
+
474
+ def to_s_inner(prec)
475
+ name.to_s.upcase
476
+ end
477
+
478
+ def error_tree
479
+ parslet.error_tree
480
+ end
481
+
482
+ private
483
+ def raise_not_implemented
484
+ trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport
485
+ exception = NotImplementedError.new("rule(#{name.inspect}) { ... } returns nil. Still not implemented, but already used?")
486
+ exception.set_backtrace(trace)
487
+
488
+ raise exception
489
+ end
490
+ end
491
+ end
492
+