parslet 0.9.0 → 0.10.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,40 @@
1
+
2
+ # Alternative during matching. Contains a list of parslets that is tried each
3
+ # one in turn. Only fails if all alternatives fail.
4
+ #
5
+ # Example:
6
+ #
7
+ # str('a') | str('b') # matches either 'a' or 'b'
8
+ #
9
+ class Parslet::Atoms::Alternative < Parslet::Atoms::Base
10
+ attr_reader :alternatives
11
+ def initialize(*alternatives)
12
+ @alternatives = alternatives
13
+ end
14
+
15
+ def |(parslet)
16
+ @alternatives << parslet
17
+ self
18
+ end
19
+
20
+ def try(io)
21
+ alternatives.each { |a|
22
+ begin
23
+ return a.apply(io)
24
+ rescue Parslet::ParseFailed => ex
25
+ end
26
+ }
27
+ # If we reach this point, all alternatives have failed.
28
+ error(io, "Expected one of #{alternatives.inspect}.")
29
+ end
30
+
31
+ precedence ALTERNATE
32
+ def to_s_inner(prec)
33
+ alternatives.map { |a| a.to_s(prec) }.join(' | ')
34
+ end
35
+
36
+ def error_tree
37
+ Parslet::ErrorTree.new(self, *alternatives.
38
+ map { |child| child.error_tree })
39
+ end
40
+ end
@@ -0,0 +1,196 @@
1
+ # Base class for all parslets, handles orchestration of calls and implements
2
+ # a lot of the operator and chaining methods.
3
+ #
4
+ class Parslet::Atoms::Base
5
+ include Parslet::Atoms::Precedence
6
+
7
+ def parse(io)
8
+ if io.respond_to? :to_str
9
+ io = StringIO.new(io)
10
+ end
11
+
12
+ result = apply(io)
13
+
14
+ # If we haven't consumed the input, then the pattern doesn't match. Try
15
+ # to provide a good error message (even asking down below)
16
+ unless io.eof?
17
+ # Do we know why we stopped matching input? If yes, that's a good
18
+ # error to fail with. Otherwise just report that we cannot consume the
19
+ # input.
20
+ if cause
21
+ raise Parslet::ParseFailed, "Unconsumed input, maybe because of this: #{cause}"
22
+ else
23
+ error(io, "Don't know what to do with #{io.string[io.pos,100]}")
24
+ end
25
+ end
26
+
27
+ return flatten(result)
28
+ end
29
+
30
+ def apply(io)
31
+ # p [:start, self, io.string[io.pos, 10]]
32
+
33
+ old_pos = io.pos
34
+
35
+ # p [:try, self, io.string[io.pos, 20]]
36
+ begin
37
+ r = try(io)
38
+ # p [:return_from, self, flatten(r)]
39
+ @last_cause = nil
40
+ return r
41
+ rescue Parslet::ParseFailed => ex
42
+ # p [:failing, self, io.string[io.pos, 20]]
43
+ io.pos = old_pos; raise ex
44
+ end
45
+ end
46
+
47
+ def repeat(min=0, max=nil)
48
+ Parslet::Atoms::Repetition.new(self, min, max)
49
+ end
50
+ def maybe
51
+ Parslet::Atoms::Repetition.new(self, 0, 1, :maybe)
52
+ end
53
+ def >>(parslet)
54
+ Parslet::Atoms::Sequence.new(self, parslet)
55
+ end
56
+ def |(parslet)
57
+ Parslet::Atoms::Alternative.new(self, parslet)
58
+ end
59
+ def absnt?
60
+ Parslet::Atoms::Lookahead.new(self, false)
61
+ end
62
+ def prsnt?
63
+ Parslet::Atoms::Lookahead.new(self, true)
64
+ end
65
+ def as(name)
66
+ Parslet::Atoms::Named.new(self, name)
67
+ end
68
+
69
+ def flatten(value)
70
+ # Passes through everything that isn't an array of things
71
+ return value unless value.instance_of? Array
72
+
73
+ # Extracts the s-expression tag
74
+ tag, *tail = value
75
+
76
+ # Merges arrays:
77
+ result = tail.
78
+ map { |e| flatten(e) } # first flatten each element
79
+
80
+ case tag
81
+ when :sequence
82
+ return flatten_sequence(result)
83
+ when :maybe
84
+ return result.first
85
+ when :repetition
86
+ return flatten_repetition(result)
87
+ end
88
+
89
+ fail "BUG: Unknown tag #{tag.inspect}."
90
+ end
91
+ def flatten_sequence(list)
92
+ list.inject('') { |r, e| # and then merge flat elements
93
+ case [r, e].map { |o| o.class }
94
+ when [Hash, Hash] # two keyed subtrees: make one
95
+ warn_about_duplicate_keys(r, e)
96
+ r.merge(e)
97
+ # a keyed tree and an array (push down)
98
+ when [Hash, Array]
99
+ [r] + e
100
+ when [Array, Hash]
101
+ r + [e]
102
+ when [String, String]
103
+ r << e
104
+ else
105
+ if r.instance_of? Hash
106
+ r # Ignore e, since its not a hash we can merge
107
+ else
108
+ # Now e is either nil, in which case we drop it, or something else.
109
+ # If it is something else, it is probably more important than r,
110
+ # since we've checked for important values of r above.
111
+ e||r
112
+ end
113
+ end
114
+ }
115
+ end
116
+ def flatten_repetition(list)
117
+ if list.any? { |e| e.instance_of?(Hash) }
118
+ # If keyed subtrees are in the array, we'll want to discard all
119
+ # strings inbetween. To keep them, name them.
120
+ return list.select { |e| e.instance_of?(Hash) }
121
+ end
122
+
123
+ if list.any? { |e| e.instance_of?(Array) }
124
+ # If any arrays are nested in this array, flatten all arrays to this
125
+ # level.
126
+ return list.
127
+ select { |e| e.instance_of?(Array) }.
128
+ flatten(1)
129
+ end
130
+
131
+ # If there are only strings, concatenate them and return that.
132
+ list.inject('') { |s,e| s<<(e||'') }
133
+ end
134
+
135
+ def self.precedence(prec)
136
+ define_method(:precedence) { prec }
137
+ end
138
+ precedence BASE
139
+ def to_s(outer_prec)
140
+ if outer_prec < precedence
141
+ "("+to_s_inner(precedence)+")"
142
+ else
143
+ to_s_inner(precedence)
144
+ end
145
+ end
146
+ def inspect
147
+ to_s(OUTER)
148
+ end
149
+
150
+ # Cause should return the current best approximation of this parslet
151
+ # of what went wrong with the parse. Not relevant if the parse succeeds,
152
+ # but needed for clever error reports.
153
+ #
154
+ def cause
155
+ @last_cause
156
+ end
157
+
158
+ # Error tree returns what went wrong here plus what went wrong inside
159
+ # subexpressions as a tree. The error stored for this node will be equal
160
+ # with #cause.
161
+ #
162
+ def error_tree
163
+ Parslet::ErrorTree.new(self) if cause?
164
+ end
165
+ def cause?
166
+ not @last_cause.nil?
167
+ end
168
+ private
169
+ # Report/raise a parse error with the given message, printing the current
170
+ # position as well. Appends 'at line X char Y.' to the message you give.
171
+ # If +pos+ is given, it is used as the real position the error happened,
172
+ # correcting the io's current position.
173
+ #
174
+ def error(io, str, pos=nil)
175
+ pre = io.string[0..(pos||io.pos)]
176
+ lines = Array(pre.lines)
177
+
178
+ if lines.empty?
179
+ formatted_cause = str
180
+ else
181
+ pos = lines.last.length
182
+ formatted_cause = "#{str} at line #{lines.count} char #{pos}."
183
+ end
184
+
185
+ @last_cause = formatted_cause
186
+
187
+ raise Parslet::ParseFailed, formatted_cause, nil
188
+ end
189
+ def warn_about_duplicate_keys(h1, h2)
190
+ d = h1.keys & h2.keys
191
+ unless d.empty?
192
+ warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+
193
+ " of the latter will be kept. (keys: #{d.inspect})"
194
+ end
195
+ end
196
+ end
@@ -0,0 +1,48 @@
1
+ # This wraps pieces of parslet definition and gives them a name. The wrapped
2
+ # piece is lazily evaluated and cached. This has two purposes:
3
+ #
4
+ # a) Avoid infinite recursion during evaluation of the definition
5
+ #
6
+ # b) Be able to print things by their name, not by their sometimes
7
+ # complicated content.
8
+ #
9
+ # You don't normally use this directly, instead you should generated it by
10
+ # using the structuring method Parslet#rule.
11
+ #
12
+ class Parslet::Atoms::Entity < Parslet::Atoms::Base
13
+ attr_reader :name, :context, :block
14
+ def initialize(name, context, block)
15
+ super()
16
+
17
+ @name = name
18
+ @context = context
19
+ @block = block
20
+ end
21
+
22
+ def try(io)
23
+ parslet.apply(io)
24
+ end
25
+
26
+ def parslet
27
+ @parslet ||= context.instance_eval(&block).tap { |p|
28
+ raise_not_implemented unless p
29
+ }
30
+ end
31
+
32
+ def to_s_inner(prec)
33
+ name.to_s.upcase
34
+ end
35
+
36
+ def error_tree
37
+ parslet.error_tree
38
+ end
39
+
40
+ private
41
+ def raise_not_implemented
42
+ trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport
43
+ exception = NotImplementedError.new("rule(#{name.inspect}) { ... } returns nil. Still not implemented, but already used?")
44
+ exception.set_backtrace(trace)
45
+
46
+ raise exception
47
+ end
48
+ end
@@ -0,0 +1,57 @@
1
+ # Either positive or negative lookahead, doesn't consume its input.
2
+ #
3
+ # Example:
4
+ #
5
+ # str('foo').prsnt? # matches when the input contains 'foo', but leaves it
6
+ #
7
+ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
8
+ attr_reader :positive
9
+ attr_reader :bound_parslet
10
+
11
+ def initialize(bound_parslet, positive=true)
12
+ # Model positive and negative lookahead by testing this flag.
13
+ @positive = positive
14
+ @bound_parslet = bound_parslet
15
+ end
16
+
17
+ def try(io)
18
+ pos = io.pos
19
+ begin
20
+ bound_parslet.apply(io)
21
+ rescue Parslet::ParseFailed
22
+ return fail(io)
23
+ ensure
24
+ io.pos = pos
25
+ end
26
+ return success(io)
27
+ end
28
+
29
+ def fail(io)
30
+ if positive
31
+ error(io, "lookahead: #{bound_parslet.inspect} didn't match, but should have")
32
+ else
33
+ # TODO: Squash this down to nothing? Return value handling here...
34
+ return nil
35
+ end
36
+ end
37
+ def success(io)
38
+ if positive
39
+ return nil # see above, TODO
40
+ else
41
+ error(
42
+ io,
43
+ "negative lookahead: #{bound_parslet.inspect} matched, but shouldn't have")
44
+ end
45
+ end
46
+
47
+ precedence LOOKAHEAD
48
+ def to_s_inner(prec)
49
+ char = positive ? '&' : '!'
50
+
51
+ "#{char}#{bound_parslet.to_s(prec)}"
52
+ end
53
+
54
+ def error_tree
55
+ bound_parslet.error_tree
56
+ end
57
+ end
@@ -0,0 +1,31 @@
1
+ # Names a match to influence tree construction.
2
+ #
3
+ # Example:
4
+ #
5
+ # str('foo') # will return 'foo',
6
+ # str('foo').as(:foo) # will return :foo => 'foo'
7
+ #
8
+ class Parslet::Atoms::Named < Parslet::Atoms::Base
9
+ attr_reader :parslet, :name
10
+ def initialize(parslet, name)
11
+ @parslet, @name = parslet, name
12
+ end
13
+
14
+ def apply(io)
15
+ value = parslet.apply(io)
16
+
17
+ produce_return_value value
18
+ end
19
+
20
+ def to_s_inner(prec)
21
+ "#{name}:#{parslet.to_s(prec)}"
22
+ end
23
+
24
+ def error_tree
25
+ parslet.error_tree
26
+ end
27
+ private
28
+ def produce_return_value(val)
29
+ { name => flatten(val) }
30
+ end
31
+ end
@@ -0,0 +1,28 @@
1
+ # Matches a special kind of regular expression that only ever matches one
2
+ # character at a time. Useful members of this family are: character ranges,
3
+ # \w, \d, \r, \n, ...
4
+ #
5
+ # Example:
6
+ #
7
+ # match('[a-z]') # matches a-z
8
+ # match('\s') # like regexps: matches space characters
9
+ #
10
+ class Parslet::Atoms::Re < Parslet::Atoms::Base
11
+ attr_reader :match
12
+ def initialize(match)
13
+ @match = match
14
+ end
15
+
16
+ def try(io)
17
+ r = Regexp.new(match, Regexp::MULTILINE)
18
+ s = io.read(1)
19
+ error(io, "Premature end of input") unless s
20
+ error(io, "Failed to match #{match.inspect[1..-2]}") unless s.match(r)
21
+ return s
22
+ end
23
+
24
+ def to_s_inner(prec)
25
+ match.inspect[1..-2]
26
+ end
27
+ end
28
+
@@ -0,0 +1,58 @@
1
+
2
+ # Matches a parslet repeatedly.
3
+ #
4
+ # Example:
5
+ #
6
+ # str('a').repeat(1,3) # matches 'a' at least once, but at most three times
7
+ # str('a').maybe # matches 'a' if it is present in the input (repeat(0,1))
8
+ #
9
+ class Parslet::Atoms::Repetition < Parslet::Atoms::Base
10
+ attr_reader :min, :max, :parslet
11
+ def initialize(parslet, min, max, tag=:repetition)
12
+ @parslet = parslet
13
+ @min, @max = min, max
14
+ @tag = tag
15
+ end
16
+
17
+ def try(io)
18
+ occ = 0
19
+ result = [@tag] # initialize the result array with the tag (for flattening)
20
+ loop do
21
+ begin
22
+ result << parslet.apply(io)
23
+ occ += 1
24
+
25
+ # If we're not greedy (max is defined), check if that has been
26
+ # reached.
27
+ return result if max && occ>=max
28
+ rescue Parslet::ParseFailed => ex
29
+ # Greedy matcher has produced a failure. Check if occ (which will
30
+ # contain the number of sucesses) is in {min, max}.
31
+ # p [:repetition, occ, min, max]
32
+ error(io, "Expected at least #{min} of #{parslet.inspect}") if occ < min
33
+ return result
34
+ end
35
+ end
36
+ end
37
+
38
+ precedence REPETITION
39
+ def to_s_inner(prec)
40
+ minmax = "{#{min}, #{max}}"
41
+ minmax = '?' if min == 0 && max == 1
42
+
43
+ parslet.to_s(prec) + minmax
44
+ end
45
+
46
+ def cause
47
+ # Either the repetition failed or the parslet inside failed to repeat.
48
+ super || parslet.cause
49
+ end
50
+ def error_tree
51
+ if cause?
52
+ Parslet::ErrorTree.new(self, parslet.error_tree)
53
+ else
54
+ parslet.error_tree
55
+ end
56
+ end
57
+ end
58
+