parslet 0.9.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,40 @@
1
+
2
+ # Alternative during matching. Contains a list of parslets that is tried each
3
+ # one in turn. Only fails if all alternatives fail.
4
+ #
5
+ # Example:
6
+ #
7
+ # str('a') | str('b') # matches either 'a' or 'b'
8
+ #
9
+ class Parslet::Atoms::Alternative < Parslet::Atoms::Base
10
+ attr_reader :alternatives
11
+ def initialize(*alternatives)
12
+ @alternatives = alternatives
13
+ end
14
+
15
+ def |(parslet)
16
+ @alternatives << parslet
17
+ self
18
+ end
19
+
20
+ def try(io)
21
+ alternatives.each { |a|
22
+ begin
23
+ return a.apply(io)
24
+ rescue Parslet::ParseFailed => ex
25
+ end
26
+ }
27
+ # If we reach this point, all alternatives have failed.
28
+ error(io, "Expected one of #{alternatives.inspect}.")
29
+ end
30
+
31
+ precedence ALTERNATE
32
+ def to_s_inner(prec)
33
+ alternatives.map { |a| a.to_s(prec) }.join(' | ')
34
+ end
35
+
36
+ def error_tree
37
+ Parslet::ErrorTree.new(self, *alternatives.
38
+ map { |child| child.error_tree })
39
+ end
40
+ end
@@ -0,0 +1,196 @@
1
+ # Base class for all parslets, handles orchestration of calls and implements
2
+ # a lot of the operator and chaining methods.
3
+ #
4
+ class Parslet::Atoms::Base
5
+ include Parslet::Atoms::Precedence
6
+
7
+ def parse(io)
8
+ if io.respond_to? :to_str
9
+ io = StringIO.new(io)
10
+ end
11
+
12
+ result = apply(io)
13
+
14
+ # If we haven't consumed the input, then the pattern doesn't match. Try
15
+ # to provide a good error message (even asking down below)
16
+ unless io.eof?
17
+ # Do we know why we stopped matching input? If yes, that's a good
18
+ # error to fail with. Otherwise just report that we cannot consume the
19
+ # input.
20
+ if cause
21
+ raise Parslet::ParseFailed, "Unconsumed input, maybe because of this: #{cause}"
22
+ else
23
+ error(io, "Don't know what to do with #{io.string[io.pos,100]}")
24
+ end
25
+ end
26
+
27
+ return flatten(result)
28
+ end
29
+
30
+ def apply(io)
31
+ # p [:start, self, io.string[io.pos, 10]]
32
+
33
+ old_pos = io.pos
34
+
35
+ # p [:try, self, io.string[io.pos, 20]]
36
+ begin
37
+ r = try(io)
38
+ # p [:return_from, self, flatten(r)]
39
+ @last_cause = nil
40
+ return r
41
+ rescue Parslet::ParseFailed => ex
42
+ # p [:failing, self, io.string[io.pos, 20]]
43
+ io.pos = old_pos; raise ex
44
+ end
45
+ end
46
+
47
+ def repeat(min=0, max=nil)
48
+ Parslet::Atoms::Repetition.new(self, min, max)
49
+ end
50
+ def maybe
51
+ Parslet::Atoms::Repetition.new(self, 0, 1, :maybe)
52
+ end
53
+ def >>(parslet)
54
+ Parslet::Atoms::Sequence.new(self, parslet)
55
+ end
56
+ def |(parslet)
57
+ Parslet::Atoms::Alternative.new(self, parslet)
58
+ end
59
+ def absnt?
60
+ Parslet::Atoms::Lookahead.new(self, false)
61
+ end
62
+ def prsnt?
63
+ Parslet::Atoms::Lookahead.new(self, true)
64
+ end
65
+ def as(name)
66
+ Parslet::Atoms::Named.new(self, name)
67
+ end
68
+
69
+ def flatten(value)
70
+ # Passes through everything that isn't an array of things
71
+ return value unless value.instance_of? Array
72
+
73
+ # Extracts the s-expression tag
74
+ tag, *tail = value
75
+
76
+ # Merges arrays:
77
+ result = tail.
78
+ map { |e| flatten(e) } # first flatten each element
79
+
80
+ case tag
81
+ when :sequence
82
+ return flatten_sequence(result)
83
+ when :maybe
84
+ return result.first
85
+ when :repetition
86
+ return flatten_repetition(result)
87
+ end
88
+
89
+ fail "BUG: Unknown tag #{tag.inspect}."
90
+ end
91
+ def flatten_sequence(list)
92
+ list.inject('') { |r, e| # and then merge flat elements
93
+ case [r, e].map { |o| o.class }
94
+ when [Hash, Hash] # two keyed subtrees: make one
95
+ warn_about_duplicate_keys(r, e)
96
+ r.merge(e)
97
+ # a keyed tree and an array (push down)
98
+ when [Hash, Array]
99
+ [r] + e
100
+ when [Array, Hash]
101
+ r + [e]
102
+ when [String, String]
103
+ r << e
104
+ else
105
+ if r.instance_of? Hash
106
+ r # Ignore e, since its not a hash we can merge
107
+ else
108
+ # Now e is either nil, in which case we drop it, or something else.
109
+ # If it is something else, it is probably more important than r,
110
+ # since we've checked for important values of r above.
111
+ e||r
112
+ end
113
+ end
114
+ }
115
+ end
116
+ def flatten_repetition(list)
117
+ if list.any? { |e| e.instance_of?(Hash) }
118
+ # If keyed subtrees are in the array, we'll want to discard all
119
+ # strings inbetween. To keep them, name them.
120
+ return list.select { |e| e.instance_of?(Hash) }
121
+ end
122
+
123
+ if list.any? { |e| e.instance_of?(Array) }
124
+ # If any arrays are nested in this array, flatten all arrays to this
125
+ # level.
126
+ return list.
127
+ select { |e| e.instance_of?(Array) }.
128
+ flatten(1)
129
+ end
130
+
131
+ # If there are only strings, concatenate them and return that.
132
+ list.inject('') { |s,e| s<<(e||'') }
133
+ end
134
+
135
+ def self.precedence(prec)
136
+ define_method(:precedence) { prec }
137
+ end
138
+ precedence BASE
139
+ def to_s(outer_prec)
140
+ if outer_prec < precedence
141
+ "("+to_s_inner(precedence)+")"
142
+ else
143
+ to_s_inner(precedence)
144
+ end
145
+ end
146
+ def inspect
147
+ to_s(OUTER)
148
+ end
149
+
150
+ # Cause should return the current best approximation of this parslet
151
+ # of what went wrong with the parse. Not relevant if the parse succeeds,
152
+ # but needed for clever error reports.
153
+ #
154
+ def cause
155
+ @last_cause
156
+ end
157
+
158
+ # Error tree returns what went wrong here plus what went wrong inside
159
+ # subexpressions as a tree. The error stored for this node will be equal
160
+ # with #cause.
161
+ #
162
+ def error_tree
163
+ Parslet::ErrorTree.new(self) if cause?
164
+ end
165
+ def cause?
166
+ not @last_cause.nil?
167
+ end
168
+ private
169
+ # Report/raise a parse error with the given message, printing the current
170
+ # position as well. Appends 'at line X char Y.' to the message you give.
171
+ # If +pos+ is given, it is used as the real position the error happened,
172
+ # correcting the io's current position.
173
+ #
174
+ def error(io, str, pos=nil)
175
+ pre = io.string[0..(pos||io.pos)]
176
+ lines = Array(pre.lines)
177
+
178
+ if lines.empty?
179
+ formatted_cause = str
180
+ else
181
+ pos = lines.last.length
182
+ formatted_cause = "#{str} at line #{lines.count} char #{pos}."
183
+ end
184
+
185
+ @last_cause = formatted_cause
186
+
187
+ raise Parslet::ParseFailed, formatted_cause, nil
188
+ end
189
+ def warn_about_duplicate_keys(h1, h2)
190
+ d = h1.keys & h2.keys
191
+ unless d.empty?
192
+ warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+
193
+ " of the latter will be kept. (keys: #{d.inspect})"
194
+ end
195
+ end
196
+ end
@@ -0,0 +1,48 @@
1
+ # This wraps pieces of parslet definition and gives them a name. The wrapped
2
+ # piece is lazily evaluated and cached. This has two purposes:
3
+ #
4
+ # a) Avoid infinite recursion during evaluation of the definition
5
+ #
6
+ # b) Be able to print things by their name, not by their sometimes
7
+ # complicated content.
8
+ #
9
+ # You don't normally use this directly, instead you should generated it by
10
+ # using the structuring method Parslet#rule.
11
+ #
12
+ class Parslet::Atoms::Entity < Parslet::Atoms::Base
13
+ attr_reader :name, :context, :block
14
+ def initialize(name, context, block)
15
+ super()
16
+
17
+ @name = name
18
+ @context = context
19
+ @block = block
20
+ end
21
+
22
+ def try(io)
23
+ parslet.apply(io)
24
+ end
25
+
26
+ def parslet
27
+ @parslet ||= context.instance_eval(&block).tap { |p|
28
+ raise_not_implemented unless p
29
+ }
30
+ end
31
+
32
+ def to_s_inner(prec)
33
+ name.to_s.upcase
34
+ end
35
+
36
+ def error_tree
37
+ parslet.error_tree
38
+ end
39
+
40
+ private
41
+ def raise_not_implemented
42
+ trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport
43
+ exception = NotImplementedError.new("rule(#{name.inspect}) { ... } returns nil. Still not implemented, but already used?")
44
+ exception.set_backtrace(trace)
45
+
46
+ raise exception
47
+ end
48
+ end
@@ -0,0 +1,57 @@
1
+ # Either positive or negative lookahead, doesn't consume its input.
2
+ #
3
+ # Example:
4
+ #
5
+ # str('foo').prsnt? # matches when the input contains 'foo', but leaves it
6
+ #
7
+ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
8
+ attr_reader :positive
9
+ attr_reader :bound_parslet
10
+
11
+ def initialize(bound_parslet, positive=true)
12
+ # Model positive and negative lookahead by testing this flag.
13
+ @positive = positive
14
+ @bound_parslet = bound_parslet
15
+ end
16
+
17
+ def try(io)
18
+ pos = io.pos
19
+ begin
20
+ bound_parslet.apply(io)
21
+ rescue Parslet::ParseFailed
22
+ return fail(io)
23
+ ensure
24
+ io.pos = pos
25
+ end
26
+ return success(io)
27
+ end
28
+
29
+ def fail(io)
30
+ if positive
31
+ error(io, "lookahead: #{bound_parslet.inspect} didn't match, but should have")
32
+ else
33
+ # TODO: Squash this down to nothing? Return value handling here...
34
+ return nil
35
+ end
36
+ end
37
+ def success(io)
38
+ if positive
39
+ return nil # see above, TODO
40
+ else
41
+ error(
42
+ io,
43
+ "negative lookahead: #{bound_parslet.inspect} matched, but shouldn't have")
44
+ end
45
+ end
46
+
47
+ precedence LOOKAHEAD
48
+ def to_s_inner(prec)
49
+ char = positive ? '&' : '!'
50
+
51
+ "#{char}#{bound_parslet.to_s(prec)}"
52
+ end
53
+
54
+ def error_tree
55
+ bound_parslet.error_tree
56
+ end
57
+ end
@@ -0,0 +1,31 @@
1
+ # Names a match to influence tree construction.
2
+ #
3
+ # Example:
4
+ #
5
+ # str('foo') # will return 'foo',
6
+ # str('foo').as(:foo) # will return :foo => 'foo'
7
+ #
8
+ class Parslet::Atoms::Named < Parslet::Atoms::Base
9
+ attr_reader :parslet, :name
10
+ def initialize(parslet, name)
11
+ @parslet, @name = parslet, name
12
+ end
13
+
14
+ def apply(io)
15
+ value = parslet.apply(io)
16
+
17
+ produce_return_value value
18
+ end
19
+
20
+ def to_s_inner(prec)
21
+ "#{name}:#{parslet.to_s(prec)}"
22
+ end
23
+
24
+ def error_tree
25
+ parslet.error_tree
26
+ end
27
+ private
28
+ def produce_return_value(val)
29
+ { name => flatten(val) }
30
+ end
31
+ end
@@ -0,0 +1,28 @@
1
+ # Matches a special kind of regular expression that only ever matches one
2
+ # character at a time. Useful members of this family are: character ranges,
3
+ # \w, \d, \r, \n, ...
4
+ #
5
+ # Example:
6
+ #
7
+ # match('[a-z]') # matches a-z
8
+ # match('\s') # like regexps: matches space characters
9
+ #
10
+ class Parslet::Atoms::Re < Parslet::Atoms::Base
11
+ attr_reader :match
12
+ def initialize(match)
13
+ @match = match
14
+ end
15
+
16
+ def try(io)
17
+ r = Regexp.new(match, Regexp::MULTILINE)
18
+ s = io.read(1)
19
+ error(io, "Premature end of input") unless s
20
+ error(io, "Failed to match #{match.inspect[1..-2]}") unless s.match(r)
21
+ return s
22
+ end
23
+
24
+ def to_s_inner(prec)
25
+ match.inspect[1..-2]
26
+ end
27
+ end
28
+
@@ -0,0 +1,58 @@
1
+
2
+ # Matches a parslet repeatedly.
3
+ #
4
+ # Example:
5
+ #
6
+ # str('a').repeat(1,3) # matches 'a' at least once, but at most three times
7
+ # str('a').maybe # matches 'a' if it is present in the input (repeat(0,1))
8
+ #
9
+ class Parslet::Atoms::Repetition < Parslet::Atoms::Base
10
+ attr_reader :min, :max, :parslet
11
+ def initialize(parslet, min, max, tag=:repetition)
12
+ @parslet = parslet
13
+ @min, @max = min, max
14
+ @tag = tag
15
+ end
16
+
17
+ def try(io)
18
+ occ = 0
19
+ result = [@tag] # initialize the result array with the tag (for flattening)
20
+ loop do
21
+ begin
22
+ result << parslet.apply(io)
23
+ occ += 1
24
+
25
+ # If we're not greedy (max is defined), check if that has been
26
+ # reached.
27
+ return result if max && occ>=max
28
+ rescue Parslet::ParseFailed => ex
29
+ # Greedy matcher has produced a failure. Check if occ (which will
30
+ # contain the number of sucesses) is in {min, max}.
31
+ # p [:repetition, occ, min, max]
32
+ error(io, "Expected at least #{min} of #{parslet.inspect}") if occ < min
33
+ return result
34
+ end
35
+ end
36
+ end
37
+
38
+ precedence REPETITION
39
+ def to_s_inner(prec)
40
+ minmax = "{#{min}, #{max}}"
41
+ minmax = '?' if min == 0 && max == 1
42
+
43
+ parslet.to_s(prec) + minmax
44
+ end
45
+
46
+ def cause
47
+ # Either the repetition failed or the parslet inside failed to repeat.
48
+ super || parslet.cause
49
+ end
50
+ def error_tree
51
+ if cause?
52
+ Parslet::ErrorTree.new(self, parslet.error_tree)
53
+ else
54
+ parslet.error_tree
55
+ end
56
+ end
57
+ end
58
+