parslet 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,492 @@
1
+ module Parslet::Atoms
2
+ module Precedence
3
+ prec = 0
4
+ BASE = (prec+=1) # everything else
5
+ LOOKAHEAD = (prec+=1) # &SOMETHING
6
+ REPETITION = (prec+=1) # 'a'+, 'a'?
7
+ SEQUENCE = (prec+=1) # 'a' 'b'
8
+ ALTERNATE = (prec+=1) # 'a' | 'b'
9
+ OUTER = (prec+=1) # printing is done here.
10
+ end
11
+
12
+ # Base class for all parslets, handles orchestration of calls and implements
13
+ # a lot of the operator and chaining methods.
14
+ #
15
+ class Base
16
+ def parse(io)
17
+ if io.respond_to? :to_str
18
+ io = StringIO.new(io)
19
+ end
20
+
21
+ result = apply(io)
22
+
23
+ # If we haven't consumed the input, then the pattern doesn't match. Try
24
+ # to provide a good error message (even asking down below)
25
+ unless io.eof?
26
+ # Do we know why we stopped matching input? If yes, that's a good
27
+ # error to fail with. Otherwise just report that we cannot consume the
28
+ # input.
29
+ if cause
30
+ raise Parslet::ParseFailed, "Unconsumed input, maybe because of this: #{cause}"
31
+ else
32
+ error(io, "Don't know what to do with #{io.string[io.pos,100]}")
33
+ end
34
+ end
35
+
36
+ return flatten(result)
37
+ end
38
+
39
+ def apply(io)
40
+ # p [:start, self, io.string[io.pos, 10]]
41
+
42
+ old_pos = io.pos
43
+
44
+ # p [:try, self, io.string[io.pos, 20]]
45
+ begin
46
+ r = try(io)
47
+ # p [:return_from, self, flatten(r)]
48
+ @last_cause = nil
49
+ return r
50
+ rescue Parslet::ParseFailed => ex
51
+ # p [:failing, self, io.string[io.pos, 20]]
52
+ io.pos = old_pos; raise ex
53
+ end
54
+ end
55
+
56
+ def repeat(min=0, max=nil)
57
+ Repetition.new(self, min, max)
58
+ end
59
+ def maybe
60
+ Repetition.new(self, 0, 1, :maybe)
61
+ end
62
+ def >>(parslet)
63
+ Sequence.new(self, parslet)
64
+ end
65
+ def |(parslet)
66
+ Alternative.new(self, parslet)
67
+ end
68
+ def absnt?
69
+ Lookahead.new(self, false)
70
+ end
71
+ def prsnt?
72
+ Lookahead.new(self, true)
73
+ end
74
+ def as(name)
75
+ Named.new(self, name)
76
+ end
77
+
78
+ def flatten(value)
79
+ # Passes through everything that isn't an array of things
80
+ return value unless value.instance_of? Array
81
+
82
+ # Extracts the s-expression tag
83
+ tag, *tail = value
84
+
85
+ # Merges arrays:
86
+ result = tail.
87
+ map { |e| flatten(e) } # first flatten each element
88
+
89
+ case tag
90
+ when :sequence
91
+ return flatten_sequence(result)
92
+ when :maybe
93
+ return result.first
94
+ when :repetition
95
+ return flatten_repetition(result)
96
+ end
97
+
98
+ fail "BUG: Unknown tag #{tag.inspect}."
99
+ end
100
+ def flatten_sequence(list)
101
+ list.inject('') { |r, e| # and then merge flat elements
102
+ case [r, e].map { |o| o.class }
103
+ when [Hash, Hash] # two keyed subtrees: make one
104
+ warn_about_duplicate_keys(r, e)
105
+ r.merge(e)
106
+ # a keyed tree and an array (push down)
107
+ when [Hash, Array]
108
+ [r] + e
109
+ when [Array, Hash]
110
+ r + [e]
111
+ when [String, String]
112
+ r << e
113
+ else
114
+ if r.instance_of? Hash
115
+ r # Ignore e, since its not a hash we can merge
116
+ else
117
+ e # Whatever e is at this point, we keep it
118
+ end
119
+ end
120
+ }
121
+ end
122
+ def flatten_repetition(list)
123
+ if list.any? { |e| e.instance_of?(Hash) }
124
+ # If keyed subtrees are in the array, we'll want to discard all
125
+ # strings inbetween. To keep them, name them.
126
+ return list.select { |e| e.instance_of?(Hash) }
127
+ end
128
+
129
+ if list.any? { |e| e.instance_of?(Array) }
130
+ # If any arrays are nested in this array, flatten all arrays to this
131
+ # level.
132
+ return list.
133
+ select { |e| e.instance_of?(Array) }.
134
+ flatten(1)
135
+ end
136
+
137
+ # If there are only strings, concatenate them and return that.
138
+ list.inject('') { |s,e| s<<(e||'') }
139
+ end
140
+
141
+ def self.precedence(prec)
142
+ define_method(:precedence) { prec }
143
+ end
144
+ precedence Precedence::BASE
145
+ def to_s(outer_prec)
146
+ if outer_prec < precedence
147
+ "("+to_s_inner(precedence)+")"
148
+ else
149
+ to_s_inner(precedence)
150
+ end
151
+ end
152
+ def inspect
153
+ to_s(Precedence::OUTER)
154
+ end
155
+
156
+ # Cause should return the current best approximation of this parslet
157
+ # of what went wrong with the parse. Not relevant if the parse succeeds,
158
+ # but needed for clever error reports.
159
+ #
160
+ def cause
161
+ @last_cause
162
+ end
163
+
164
+ # Error tree returns what went wrong here plus what went wrong inside
165
+ # subexpressions as a tree. The error stored for this node will be equal
166
+ # with #cause.
167
+ #
168
+ def error_tree
169
+ Parslet::ErrorTree.new(self) if cause?
170
+ end
171
+ def cause?
172
+ not @last_cause.nil?
173
+ end
174
+ private
175
+ # Report/raise a parse error with the given message, printing the current
176
+ # position as well. Appends 'at line X char Y.' to the message you give.
177
+ # If +pos+ is given, it is used as the real position the error happened,
178
+ # correcting the io's current position.
179
+ #
180
+ def error(io, str, pos=nil)
181
+ pre = io.string[0..(pos||io.pos)]
182
+ lines = Array(pre.lines)
183
+
184
+ if lines.empty?
185
+ formatted_cause = str
186
+ else
187
+ pos = lines.last.length
188
+ formatted_cause = "#{str} at line #{lines.count} char #{pos}."
189
+ end
190
+
191
+ @last_cause = formatted_cause
192
+
193
+ raise Parslet::ParseFailed, formatted_cause, nil
194
+ end
195
+ def warn_about_duplicate_keys(h1, h2)
196
+ d = h1.keys & h2.keys
197
+ unless d.empty?
198
+ warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+
199
+ " of the latter will be kept. (keys: #{d.inspect})"
200
+ end
201
+ end
202
+ end
203
+
204
+ class Named < Base
205
+ attr_reader :parslet, :name
206
+ def initialize(parslet, name)
207
+ @parslet, @name = parslet, name
208
+ end
209
+
210
+ def apply(io)
211
+ value = parslet.apply(io)
212
+
213
+ produce_return_value value
214
+ end
215
+
216
+ def to_s_inner(prec)
217
+ "#{name}:#{parslet.to_s(prec)}"
218
+ end
219
+
220
+ def error_tree
221
+ parslet.error_tree
222
+ end
223
+ private
224
+ def produce_return_value(val)
225
+ { name => flatten(val) }
226
+ end
227
+ end
228
+
229
+ class Lookahead < Base
230
+ attr_reader :positive
231
+ attr_reader :bound_parslet
232
+
233
+ def initialize(bound_parslet, positive=true)
234
+ # Model positive and negative lookahead by testing this flag.
235
+ @positive = positive
236
+ @bound_parslet = bound_parslet
237
+ end
238
+
239
+ def try(io)
240
+ pos = io.pos
241
+ begin
242
+ bound_parslet.apply(io)
243
+ rescue Parslet::ParseFailed
244
+ return fail(io)
245
+ ensure
246
+ io.pos = pos
247
+ end
248
+ return success(io)
249
+ end
250
+
251
+ def fail(io)
252
+ if positive
253
+ error(io, "lookahead: #{bound_parslet.inspect} didn't match, but should have")
254
+ else
255
+ # TODO: Squash this down to nothing? Return value handling here...
256
+ return nil
257
+ end
258
+ end
259
+ def success(io)
260
+ if positive
261
+ return nil # see above, TODO
262
+ else
263
+ error(
264
+ io,
265
+ "negative lookahead: #{bound_parslet.inspect} matched, but shouldn't have")
266
+ end
267
+ end
268
+
269
+ precedence Precedence::LOOKAHEAD
270
+ def to_s_inner(prec)
271
+ char = positive ? '&' : '!'
272
+
273
+ "#{char}#{bound_parslet.to_s(prec)}"
274
+ end
275
+
276
+ def error_tree
277
+ bound_parslet.error_tree
278
+ end
279
+ end
280
+
281
+ class Alternative < Base
282
+ attr_reader :alternatives
283
+ def initialize(*alternatives)
284
+ @alternatives = alternatives
285
+ end
286
+
287
+ def |(parslet)
288
+ @alternatives << parslet
289
+ self
290
+ end
291
+
292
+ def try(io)
293
+ alternatives.each { |a|
294
+ begin
295
+ return a.apply(io)
296
+ rescue Parslet::ParseFailed => ex
297
+ end
298
+ }
299
+ # If we reach this point, all alternatives have failed.
300
+ error(io, "Expected one of #{alternatives.inspect}.")
301
+ end
302
+
303
+ precedence Precedence::ALTERNATE
304
+ def to_s_inner(prec)
305
+ alternatives.map { |a| a.to_s(prec) }.join(' | ')
306
+ end
307
+
308
+ def error_tree
309
+ Parslet::ErrorTree.new(self, *alternatives.
310
+ map { |child| child.error_tree })
311
+ end
312
+ end
313
+
314
+ # A sequence of parslets, matched from left to right. Denoted by '>>'
315
+ #
316
+ class Sequence < Base
317
+ attr_reader :parslets
318
+ def initialize(*parslets)
319
+ @parslets = parslets
320
+ end
321
+
322
+ def >>(parslet)
323
+ @parslets << parslet
324
+ self
325
+ end
326
+
327
+ def try(io)
328
+ [:sequence]+parslets.map { |p|
329
+ # Save each parslet as potentially offending (raising an error).
330
+ @offending_parslet = p
331
+ p.apply(io)
332
+ }
333
+ rescue Parslet::ParseFailed
334
+ error(io, "Failed to match sequence (#{self.inspect})")
335
+ end
336
+
337
+ precedence Precedence::SEQUENCE
338
+ def to_s_inner(prec)
339
+ parslets.map { |p| p.to_s(prec) }.join(' ')
340
+ end
341
+
342
+ def error_tree
343
+ Parslet::ErrorTree.new(self).tap { |t|
344
+ t.children << @offending_parslet.error_tree if @offending_parslet }
345
+ end
346
+ end
347
+
348
+ class Repetition < Base
349
+ attr_reader :min, :max, :parslet
350
+ def initialize(parslet, min, max, tag=:repetition)
351
+ @parslet = parslet
352
+ @min, @max = min, max
353
+ @tag = tag
354
+ end
355
+
356
+ def try(io)
357
+ occ = 0
358
+ result = [@tag] # initialize the result array with the tag (for flattening)
359
+ loop do
360
+ begin
361
+ result << parslet.apply(io)
362
+ occ += 1
363
+
364
+ # If we're not greedy (max is defined), check if that has been
365
+ # reached.
366
+ return result if max && occ>=max
367
+ rescue Parslet::ParseFailed => ex
368
+ # Greedy matcher has produced a failure. Check if occ (which will
369
+ # contain the number of sucesses) is in {min, max}.
370
+ # p [:repetition, occ, min, max]
371
+ error(io, "Expected at least #{min} of #{parslet.inspect}") if occ < min
372
+ return result
373
+ end
374
+ end
375
+ end
376
+
377
+ precedence Precedence::REPETITION
378
+ def to_s_inner(prec)
379
+ minmax = "{#{min}, #{max}}"
380
+ minmax = '?' if min == 0 && max == 1
381
+
382
+ parslet.to_s(prec) + minmax
383
+ end
384
+
385
+ def cause
386
+ # Either the repetition failed or the parslet inside failed to repeat.
387
+ super || parslet.cause
388
+ end
389
+ def error_tree
390
+ if cause?
391
+ Parslet::ErrorTree.new(self, parslet.error_tree)
392
+ else
393
+ parslet.error_tree
394
+ end
395
+ end
396
+ end
397
+
398
+ # Matches a special kind of regular expression that only ever matches one
399
+ # character at a time. Useful members of this family are: character ranges,
400
+ # \w, \d, \r, \n, ...
401
+ #
402
+ class Re < Base
403
+ attr_reader :match
404
+ def initialize(match)
405
+ @match = match
406
+ end
407
+
408
+ def try(io)
409
+ r = Regexp.new(match, Regexp::MULTILINE)
410
+ s = io.read(1)
411
+ error(io, "Premature end of input") unless s
412
+ error(io, "Failed to match #{match.inspect[1..-2]}") unless s.match(r)
413
+ return s
414
+ end
415
+
416
+ def to_s_inner(prec)
417
+ match.inspect[1..-2]
418
+ end
419
+ end
420
+
421
+ # Matches a string of characters.
422
+ #
423
+ class Str < Base
424
+ attr_reader :str
425
+ def initialize(str)
426
+ @str = str
427
+ end
428
+
429
+ def try(io)
430
+ old_pos = io.pos
431
+ s = io.read(str.size)
432
+ error(io, "Premature end of input") unless s && s.size==str.size
433
+ error(io, "Expected #{str.inspect}, but got #{s.inspect}", old_pos) \
434
+ unless s==str
435
+ return s
436
+ end
437
+
438
+ def to_s_inner(prec)
439
+ "'#{str}'"
440
+ end
441
+ end
442
+
443
+ # This wraps pieces of parslet definition and gives them a name. The wrapped
444
+ # piece is lazily evaluated and cached. This has two purposes:
445
+ #
446
+ # a) Avoid infinite recursion during evaluation of the definition
447
+ #
448
+ # b) Be able to print things by their name, not by their sometimes
449
+ # complicated content.
450
+ #
451
+ # You don't normally use this directly, instead you should generated it by
452
+ # using the structuring method Parslet#rule.
453
+ #
454
+ class Entity < Base
455
+ attr_reader :name, :context, :block
456
+ def initialize(name, context, block)
457
+ super()
458
+
459
+ @name = name
460
+ @context = context
461
+ @block = block
462
+ end
463
+
464
+ def try(io)
465
+ parslet.apply(io)
466
+ end
467
+
468
+ def parslet
469
+ @parslet ||= context.instance_eval(&block).tap { |p|
470
+ raise_not_implemented unless p
471
+ }
472
+ end
473
+
474
+ def to_s_inner(prec)
475
+ name.to_s.upcase
476
+ end
477
+
478
+ def error_tree
479
+ parslet.error_tree
480
+ end
481
+
482
+ private
483
+ def raise_not_implemented
484
+ trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport
485
+ exception = NotImplementedError.new("rule(#{name.inspect}) { ... } returns nil. Still not implemented, but already used?")
486
+ exception.set_backtrace(trace)
487
+
488
+ raise exception
489
+ end
490
+ end
491
+ end
492
+