parslet 0.9.0 → 0.10.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -0
- data/HISTORY.txt +24 -1
- data/README +23 -66
- data/Rakefile +10 -6
- data/lib/parslet.rb +50 -137
- data/lib/parslet/atoms.rb +12 -479
- data/lib/parslet/atoms/alternative.rb +40 -0
- data/lib/parslet/atoms/base.rb +196 -0
- data/lib/parslet/atoms/entity.rb +48 -0
- data/lib/parslet/atoms/lookahead.rb +57 -0
- data/lib/parslet/atoms/named.rb +31 -0
- data/lib/parslet/atoms/re.rb +28 -0
- data/lib/parslet/atoms/repetition.rb +58 -0
- data/lib/parslet/atoms/sequence.rb +37 -0
- data/lib/parslet/atoms/str.rb +26 -0
- data/lib/parslet/error_tree.rb +2 -2
- data/lib/parslet/expression.rb +41 -0
- data/lib/parslet/expression/treetop.rb +53 -0
- data/lib/parslet/parser.rb +17 -0
- data/lib/parslet/pattern.rb +22 -12
- data/lib/parslet/pattern/binding.rb +25 -16
- data/lib/parslet/pattern/context.rb +24 -0
- data/lib/parslet/transform.rb +70 -25
- metadata +37 -8
@@ -0,0 +1,40 @@
|
|
1
|
+
|
2
|
+
# Alternative during matching. Contains a list of parslets that is tried each
|
3
|
+
# one in turn. Only fails if all alternatives fail.
|
4
|
+
#
|
5
|
+
# Example:
|
6
|
+
#
|
7
|
+
# str('a') | str('b') # matches either 'a' or 'b'
|
8
|
+
#
|
9
|
+
class Parslet::Atoms::Alternative < Parslet::Atoms::Base
|
10
|
+
attr_reader :alternatives
|
11
|
+
def initialize(*alternatives)
|
12
|
+
@alternatives = alternatives
|
13
|
+
end
|
14
|
+
|
15
|
+
def |(parslet)
|
16
|
+
@alternatives << parslet
|
17
|
+
self
|
18
|
+
end
|
19
|
+
|
20
|
+
def try(io)
|
21
|
+
alternatives.each { |a|
|
22
|
+
begin
|
23
|
+
return a.apply(io)
|
24
|
+
rescue Parslet::ParseFailed => ex
|
25
|
+
end
|
26
|
+
}
|
27
|
+
# If we reach this point, all alternatives have failed.
|
28
|
+
error(io, "Expected one of #{alternatives.inspect}.")
|
29
|
+
end
|
30
|
+
|
31
|
+
precedence ALTERNATE
|
32
|
+
def to_s_inner(prec)
|
33
|
+
alternatives.map { |a| a.to_s(prec) }.join(' | ')
|
34
|
+
end
|
35
|
+
|
36
|
+
def error_tree
|
37
|
+
Parslet::ErrorTree.new(self, *alternatives.
|
38
|
+
map { |child| child.error_tree })
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,196 @@
|
|
1
|
+
# Base class for all parslets, handles orchestration of calls and implements
|
2
|
+
# a lot of the operator and chaining methods.
|
3
|
+
#
|
4
|
+
class Parslet::Atoms::Base
|
5
|
+
include Parslet::Atoms::Precedence
|
6
|
+
|
7
|
+
def parse(io)
|
8
|
+
if io.respond_to? :to_str
|
9
|
+
io = StringIO.new(io)
|
10
|
+
end
|
11
|
+
|
12
|
+
result = apply(io)
|
13
|
+
|
14
|
+
# If we haven't consumed the input, then the pattern doesn't match. Try
|
15
|
+
# to provide a good error message (even asking down below)
|
16
|
+
unless io.eof?
|
17
|
+
# Do we know why we stopped matching input? If yes, that's a good
|
18
|
+
# error to fail with. Otherwise just report that we cannot consume the
|
19
|
+
# input.
|
20
|
+
if cause
|
21
|
+
raise Parslet::ParseFailed, "Unconsumed input, maybe because of this: #{cause}"
|
22
|
+
else
|
23
|
+
error(io, "Don't know what to do with #{io.string[io.pos,100]}")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
return flatten(result)
|
28
|
+
end
|
29
|
+
|
30
|
+
def apply(io)
|
31
|
+
# p [:start, self, io.string[io.pos, 10]]
|
32
|
+
|
33
|
+
old_pos = io.pos
|
34
|
+
|
35
|
+
# p [:try, self, io.string[io.pos, 20]]
|
36
|
+
begin
|
37
|
+
r = try(io)
|
38
|
+
# p [:return_from, self, flatten(r)]
|
39
|
+
@last_cause = nil
|
40
|
+
return r
|
41
|
+
rescue Parslet::ParseFailed => ex
|
42
|
+
# p [:failing, self, io.string[io.pos, 20]]
|
43
|
+
io.pos = old_pos; raise ex
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def repeat(min=0, max=nil)
|
48
|
+
Parslet::Atoms::Repetition.new(self, min, max)
|
49
|
+
end
|
50
|
+
def maybe
|
51
|
+
Parslet::Atoms::Repetition.new(self, 0, 1, :maybe)
|
52
|
+
end
|
53
|
+
def >>(parslet)
|
54
|
+
Parslet::Atoms::Sequence.new(self, parslet)
|
55
|
+
end
|
56
|
+
def |(parslet)
|
57
|
+
Parslet::Atoms::Alternative.new(self, parslet)
|
58
|
+
end
|
59
|
+
def absnt?
|
60
|
+
Parslet::Atoms::Lookahead.new(self, false)
|
61
|
+
end
|
62
|
+
def prsnt?
|
63
|
+
Parslet::Atoms::Lookahead.new(self, true)
|
64
|
+
end
|
65
|
+
def as(name)
|
66
|
+
Parslet::Atoms::Named.new(self, name)
|
67
|
+
end
|
68
|
+
|
69
|
+
def flatten(value)
|
70
|
+
# Passes through everything that isn't an array of things
|
71
|
+
return value unless value.instance_of? Array
|
72
|
+
|
73
|
+
# Extracts the s-expression tag
|
74
|
+
tag, *tail = value
|
75
|
+
|
76
|
+
# Merges arrays:
|
77
|
+
result = tail.
|
78
|
+
map { |e| flatten(e) } # first flatten each element
|
79
|
+
|
80
|
+
case tag
|
81
|
+
when :sequence
|
82
|
+
return flatten_sequence(result)
|
83
|
+
when :maybe
|
84
|
+
return result.first
|
85
|
+
when :repetition
|
86
|
+
return flatten_repetition(result)
|
87
|
+
end
|
88
|
+
|
89
|
+
fail "BUG: Unknown tag #{tag.inspect}."
|
90
|
+
end
|
91
|
+
def flatten_sequence(list)
|
92
|
+
list.inject('') { |r, e| # and then merge flat elements
|
93
|
+
case [r, e].map { |o| o.class }
|
94
|
+
when [Hash, Hash] # two keyed subtrees: make one
|
95
|
+
warn_about_duplicate_keys(r, e)
|
96
|
+
r.merge(e)
|
97
|
+
# a keyed tree and an array (push down)
|
98
|
+
when [Hash, Array]
|
99
|
+
[r] + e
|
100
|
+
when [Array, Hash]
|
101
|
+
r + [e]
|
102
|
+
when [String, String]
|
103
|
+
r << e
|
104
|
+
else
|
105
|
+
if r.instance_of? Hash
|
106
|
+
r # Ignore e, since its not a hash we can merge
|
107
|
+
else
|
108
|
+
# Now e is either nil, in which case we drop it, or something else.
|
109
|
+
# If it is something else, it is probably more important than r,
|
110
|
+
# since we've checked for important values of r above.
|
111
|
+
e||r
|
112
|
+
end
|
113
|
+
end
|
114
|
+
}
|
115
|
+
end
|
116
|
+
def flatten_repetition(list)
|
117
|
+
if list.any? { |e| e.instance_of?(Hash) }
|
118
|
+
# If keyed subtrees are in the array, we'll want to discard all
|
119
|
+
# strings inbetween. To keep them, name them.
|
120
|
+
return list.select { |e| e.instance_of?(Hash) }
|
121
|
+
end
|
122
|
+
|
123
|
+
if list.any? { |e| e.instance_of?(Array) }
|
124
|
+
# If any arrays are nested in this array, flatten all arrays to this
|
125
|
+
# level.
|
126
|
+
return list.
|
127
|
+
select { |e| e.instance_of?(Array) }.
|
128
|
+
flatten(1)
|
129
|
+
end
|
130
|
+
|
131
|
+
# If there are only strings, concatenate them and return that.
|
132
|
+
list.inject('') { |s,e| s<<(e||'') }
|
133
|
+
end
|
134
|
+
|
135
|
+
def self.precedence(prec)
|
136
|
+
define_method(:precedence) { prec }
|
137
|
+
end
|
138
|
+
precedence BASE
|
139
|
+
def to_s(outer_prec)
|
140
|
+
if outer_prec < precedence
|
141
|
+
"("+to_s_inner(precedence)+")"
|
142
|
+
else
|
143
|
+
to_s_inner(precedence)
|
144
|
+
end
|
145
|
+
end
|
146
|
+
def inspect
|
147
|
+
to_s(OUTER)
|
148
|
+
end
|
149
|
+
|
150
|
+
# Cause should return the current best approximation of this parslet
|
151
|
+
# of what went wrong with the parse. Not relevant if the parse succeeds,
|
152
|
+
# but needed for clever error reports.
|
153
|
+
#
|
154
|
+
def cause
|
155
|
+
@last_cause
|
156
|
+
end
|
157
|
+
|
158
|
+
# Error tree returns what went wrong here plus what went wrong inside
|
159
|
+
# subexpressions as a tree. The error stored for this node will be equal
|
160
|
+
# with #cause.
|
161
|
+
#
|
162
|
+
def error_tree
|
163
|
+
Parslet::ErrorTree.new(self) if cause?
|
164
|
+
end
|
165
|
+
def cause?
|
166
|
+
not @last_cause.nil?
|
167
|
+
end
|
168
|
+
private
|
169
|
+
# Report/raise a parse error with the given message, printing the current
|
170
|
+
# position as well. Appends 'at line X char Y.' to the message you give.
|
171
|
+
# If +pos+ is given, it is used as the real position the error happened,
|
172
|
+
# correcting the io's current position.
|
173
|
+
#
|
174
|
+
def error(io, str, pos=nil)
|
175
|
+
pre = io.string[0..(pos||io.pos)]
|
176
|
+
lines = Array(pre.lines)
|
177
|
+
|
178
|
+
if lines.empty?
|
179
|
+
formatted_cause = str
|
180
|
+
else
|
181
|
+
pos = lines.last.length
|
182
|
+
formatted_cause = "#{str} at line #{lines.count} char #{pos}."
|
183
|
+
end
|
184
|
+
|
185
|
+
@last_cause = formatted_cause
|
186
|
+
|
187
|
+
raise Parslet::ParseFailed, formatted_cause, nil
|
188
|
+
end
|
189
|
+
def warn_about_duplicate_keys(h1, h2)
|
190
|
+
d = h1.keys & h2.keys
|
191
|
+
unless d.empty?
|
192
|
+
warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+
|
193
|
+
" of the latter will be kept. (keys: #{d.inspect})"
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# This wraps pieces of parslet definition and gives them a name. The wrapped
|
2
|
+
# piece is lazily evaluated and cached. This has two purposes:
|
3
|
+
#
|
4
|
+
# a) Avoid infinite recursion during evaluation of the definition
|
5
|
+
#
|
6
|
+
# b) Be able to print things by their name, not by their sometimes
|
7
|
+
# complicated content.
|
8
|
+
#
|
9
|
+
# You don't normally use this directly, instead you should generated it by
|
10
|
+
# using the structuring method Parslet#rule.
|
11
|
+
#
|
12
|
+
class Parslet::Atoms::Entity < Parslet::Atoms::Base
|
13
|
+
attr_reader :name, :context, :block
|
14
|
+
def initialize(name, context, block)
|
15
|
+
super()
|
16
|
+
|
17
|
+
@name = name
|
18
|
+
@context = context
|
19
|
+
@block = block
|
20
|
+
end
|
21
|
+
|
22
|
+
def try(io)
|
23
|
+
parslet.apply(io)
|
24
|
+
end
|
25
|
+
|
26
|
+
def parslet
|
27
|
+
@parslet ||= context.instance_eval(&block).tap { |p|
|
28
|
+
raise_not_implemented unless p
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
def to_s_inner(prec)
|
33
|
+
name.to_s.upcase
|
34
|
+
end
|
35
|
+
|
36
|
+
def error_tree
|
37
|
+
parslet.error_tree
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
def raise_not_implemented
|
42
|
+
trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport
|
43
|
+
exception = NotImplementedError.new("rule(#{name.inspect}) { ... } returns nil. Still not implemented, but already used?")
|
44
|
+
exception.set_backtrace(trace)
|
45
|
+
|
46
|
+
raise exception
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# Either positive or negative lookahead, doesn't consume its input.
|
2
|
+
#
|
3
|
+
# Example:
|
4
|
+
#
|
5
|
+
# str('foo').prsnt? # matches when the input contains 'foo', but leaves it
|
6
|
+
#
|
7
|
+
class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
|
8
|
+
attr_reader :positive
|
9
|
+
attr_reader :bound_parslet
|
10
|
+
|
11
|
+
def initialize(bound_parslet, positive=true)
|
12
|
+
# Model positive and negative lookahead by testing this flag.
|
13
|
+
@positive = positive
|
14
|
+
@bound_parslet = bound_parslet
|
15
|
+
end
|
16
|
+
|
17
|
+
def try(io)
|
18
|
+
pos = io.pos
|
19
|
+
begin
|
20
|
+
bound_parslet.apply(io)
|
21
|
+
rescue Parslet::ParseFailed
|
22
|
+
return fail(io)
|
23
|
+
ensure
|
24
|
+
io.pos = pos
|
25
|
+
end
|
26
|
+
return success(io)
|
27
|
+
end
|
28
|
+
|
29
|
+
def fail(io)
|
30
|
+
if positive
|
31
|
+
error(io, "lookahead: #{bound_parslet.inspect} didn't match, but should have")
|
32
|
+
else
|
33
|
+
# TODO: Squash this down to nothing? Return value handling here...
|
34
|
+
return nil
|
35
|
+
end
|
36
|
+
end
|
37
|
+
def success(io)
|
38
|
+
if positive
|
39
|
+
return nil # see above, TODO
|
40
|
+
else
|
41
|
+
error(
|
42
|
+
io,
|
43
|
+
"negative lookahead: #{bound_parslet.inspect} matched, but shouldn't have")
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
precedence LOOKAHEAD
|
48
|
+
def to_s_inner(prec)
|
49
|
+
char = positive ? '&' : '!'
|
50
|
+
|
51
|
+
"#{char}#{bound_parslet.to_s(prec)}"
|
52
|
+
end
|
53
|
+
|
54
|
+
def error_tree
|
55
|
+
bound_parslet.error_tree
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# Names a match to influence tree construction.
|
2
|
+
#
|
3
|
+
# Example:
|
4
|
+
#
|
5
|
+
# str('foo') # will return 'foo',
|
6
|
+
# str('foo').as(:foo) # will return :foo => 'foo'
|
7
|
+
#
|
8
|
+
class Parslet::Atoms::Named < Parslet::Atoms::Base
|
9
|
+
attr_reader :parslet, :name
|
10
|
+
def initialize(parslet, name)
|
11
|
+
@parslet, @name = parslet, name
|
12
|
+
end
|
13
|
+
|
14
|
+
def apply(io)
|
15
|
+
value = parslet.apply(io)
|
16
|
+
|
17
|
+
produce_return_value value
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_s_inner(prec)
|
21
|
+
"#{name}:#{parslet.to_s(prec)}"
|
22
|
+
end
|
23
|
+
|
24
|
+
def error_tree
|
25
|
+
parslet.error_tree
|
26
|
+
end
|
27
|
+
private
|
28
|
+
def produce_return_value(val)
|
29
|
+
{ name => flatten(val) }
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# Matches a special kind of regular expression that only ever matches one
|
2
|
+
# character at a time. Useful members of this family are: character ranges,
|
3
|
+
# \w, \d, \r, \n, ...
|
4
|
+
#
|
5
|
+
# Example:
|
6
|
+
#
|
7
|
+
# match('[a-z]') # matches a-z
|
8
|
+
# match('\s') # like regexps: matches space characters
|
9
|
+
#
|
10
|
+
class Parslet::Atoms::Re < Parslet::Atoms::Base
|
11
|
+
attr_reader :match
|
12
|
+
def initialize(match)
|
13
|
+
@match = match
|
14
|
+
end
|
15
|
+
|
16
|
+
def try(io)
|
17
|
+
r = Regexp.new(match, Regexp::MULTILINE)
|
18
|
+
s = io.read(1)
|
19
|
+
error(io, "Premature end of input") unless s
|
20
|
+
error(io, "Failed to match #{match.inspect[1..-2]}") unless s.match(r)
|
21
|
+
return s
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_s_inner(prec)
|
25
|
+
match.inspect[1..-2]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
@@ -0,0 +1,58 @@
|
|
1
|
+
|
2
|
+
# Matches a parslet repeatedly.
|
3
|
+
#
|
4
|
+
# Example:
|
5
|
+
#
|
6
|
+
# str('a').repeat(1,3) # matches 'a' at least once, but at most three times
|
7
|
+
# str('a').maybe # matches 'a' if it is present in the input (repeat(0,1))
|
8
|
+
#
|
9
|
+
class Parslet::Atoms::Repetition < Parslet::Atoms::Base
|
10
|
+
attr_reader :min, :max, :parslet
|
11
|
+
def initialize(parslet, min, max, tag=:repetition)
|
12
|
+
@parslet = parslet
|
13
|
+
@min, @max = min, max
|
14
|
+
@tag = tag
|
15
|
+
end
|
16
|
+
|
17
|
+
def try(io)
|
18
|
+
occ = 0
|
19
|
+
result = [@tag] # initialize the result array with the tag (for flattening)
|
20
|
+
loop do
|
21
|
+
begin
|
22
|
+
result << parslet.apply(io)
|
23
|
+
occ += 1
|
24
|
+
|
25
|
+
# If we're not greedy (max is defined), check if that has been
|
26
|
+
# reached.
|
27
|
+
return result if max && occ>=max
|
28
|
+
rescue Parslet::ParseFailed => ex
|
29
|
+
# Greedy matcher has produced a failure. Check if occ (which will
|
30
|
+
# contain the number of sucesses) is in {min, max}.
|
31
|
+
# p [:repetition, occ, min, max]
|
32
|
+
error(io, "Expected at least #{min} of #{parslet.inspect}") if occ < min
|
33
|
+
return result
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
precedence REPETITION
|
39
|
+
def to_s_inner(prec)
|
40
|
+
minmax = "{#{min}, #{max}}"
|
41
|
+
minmax = '?' if min == 0 && max == 1
|
42
|
+
|
43
|
+
parslet.to_s(prec) + minmax
|
44
|
+
end
|
45
|
+
|
46
|
+
def cause
|
47
|
+
# Either the repetition failed or the parslet inside failed to repeat.
|
48
|
+
super || parslet.cause
|
49
|
+
end
|
50
|
+
def error_tree
|
51
|
+
if cause?
|
52
|
+
Parslet::ErrorTree.new(self, parslet.error_tree)
|
53
|
+
else
|
54
|
+
parslet.error_tree
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|