parslet 0.9.0 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -0
- data/HISTORY.txt +24 -1
- data/README +23 -66
- data/Rakefile +10 -6
- data/lib/parslet.rb +50 -137
- data/lib/parslet/atoms.rb +12 -479
- data/lib/parslet/atoms/alternative.rb +40 -0
- data/lib/parslet/atoms/base.rb +196 -0
- data/lib/parslet/atoms/entity.rb +48 -0
- data/lib/parslet/atoms/lookahead.rb +57 -0
- data/lib/parslet/atoms/named.rb +31 -0
- data/lib/parslet/atoms/re.rb +28 -0
- data/lib/parslet/atoms/repetition.rb +58 -0
- data/lib/parslet/atoms/sequence.rb +37 -0
- data/lib/parslet/atoms/str.rb +26 -0
- data/lib/parslet/error_tree.rb +2 -2
- data/lib/parslet/expression.rb +41 -0
- data/lib/parslet/expression/treetop.rb +53 -0
- data/lib/parslet/parser.rb +17 -0
- data/lib/parslet/pattern.rb +22 -12
- data/lib/parslet/pattern/binding.rb +25 -16
- data/lib/parslet/pattern/context.rb +24 -0
- data/lib/parslet/transform.rb +70 -25
- metadata +37 -8
@@ -0,0 +1,40 @@
|
|
1
|
+
|
2
|
+
# Alternative during matching. Contains a list of parslets that is tried each
|
3
|
+
# one in turn. Only fails if all alternatives fail.
|
4
|
+
#
|
5
|
+
# Example:
|
6
|
+
#
|
7
|
+
# str('a') | str('b') # matches either 'a' or 'b'
|
8
|
+
#
|
9
|
+
class Parslet::Atoms::Alternative < Parslet::Atoms::Base
|
10
|
+
attr_reader :alternatives
|
11
|
+
def initialize(*alternatives)
|
12
|
+
@alternatives = alternatives
|
13
|
+
end
|
14
|
+
|
15
|
+
def |(parslet)
|
16
|
+
@alternatives << parslet
|
17
|
+
self
|
18
|
+
end
|
19
|
+
|
20
|
+
def try(io)
|
21
|
+
alternatives.each { |a|
|
22
|
+
begin
|
23
|
+
return a.apply(io)
|
24
|
+
rescue Parslet::ParseFailed => ex
|
25
|
+
end
|
26
|
+
}
|
27
|
+
# If we reach this point, all alternatives have failed.
|
28
|
+
error(io, "Expected one of #{alternatives.inspect}.")
|
29
|
+
end
|
30
|
+
|
31
|
+
precedence ALTERNATE
|
32
|
+
def to_s_inner(prec)
|
33
|
+
alternatives.map { |a| a.to_s(prec) }.join(' | ')
|
34
|
+
end
|
35
|
+
|
36
|
+
def error_tree
|
37
|
+
Parslet::ErrorTree.new(self, *alternatives.
|
38
|
+
map { |child| child.error_tree })
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,196 @@
|
|
1
|
+
# Base class for all parslets, handles orchestration of calls and implements
|
2
|
+
# a lot of the operator and chaining methods.
|
3
|
+
#
|
4
|
+
class Parslet::Atoms::Base
|
5
|
+
include Parslet::Atoms::Precedence
|
6
|
+
|
7
|
+
def parse(io)
|
8
|
+
if io.respond_to? :to_str
|
9
|
+
io = StringIO.new(io)
|
10
|
+
end
|
11
|
+
|
12
|
+
result = apply(io)
|
13
|
+
|
14
|
+
# If we haven't consumed the input, then the pattern doesn't match. Try
|
15
|
+
# to provide a good error message (even asking down below)
|
16
|
+
unless io.eof?
|
17
|
+
# Do we know why we stopped matching input? If yes, that's a good
|
18
|
+
# error to fail with. Otherwise just report that we cannot consume the
|
19
|
+
# input.
|
20
|
+
if cause
|
21
|
+
raise Parslet::ParseFailed, "Unconsumed input, maybe because of this: #{cause}"
|
22
|
+
else
|
23
|
+
error(io, "Don't know what to do with #{io.string[io.pos,100]}")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
return flatten(result)
|
28
|
+
end
|
29
|
+
|
30
|
+
def apply(io)
|
31
|
+
# p [:start, self, io.string[io.pos, 10]]
|
32
|
+
|
33
|
+
old_pos = io.pos
|
34
|
+
|
35
|
+
# p [:try, self, io.string[io.pos, 20]]
|
36
|
+
begin
|
37
|
+
r = try(io)
|
38
|
+
# p [:return_from, self, flatten(r)]
|
39
|
+
@last_cause = nil
|
40
|
+
return r
|
41
|
+
rescue Parslet::ParseFailed => ex
|
42
|
+
# p [:failing, self, io.string[io.pos, 20]]
|
43
|
+
io.pos = old_pos; raise ex
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def repeat(min=0, max=nil)
|
48
|
+
Parslet::Atoms::Repetition.new(self, min, max)
|
49
|
+
end
|
50
|
+
def maybe
|
51
|
+
Parslet::Atoms::Repetition.new(self, 0, 1, :maybe)
|
52
|
+
end
|
53
|
+
def >>(parslet)
|
54
|
+
Parslet::Atoms::Sequence.new(self, parslet)
|
55
|
+
end
|
56
|
+
def |(parslet)
|
57
|
+
Parslet::Atoms::Alternative.new(self, parslet)
|
58
|
+
end
|
59
|
+
def absnt?
|
60
|
+
Parslet::Atoms::Lookahead.new(self, false)
|
61
|
+
end
|
62
|
+
def prsnt?
|
63
|
+
Parslet::Atoms::Lookahead.new(self, true)
|
64
|
+
end
|
65
|
+
def as(name)
|
66
|
+
Parslet::Atoms::Named.new(self, name)
|
67
|
+
end
|
68
|
+
|
69
|
+
def flatten(value)
|
70
|
+
# Passes through everything that isn't an array of things
|
71
|
+
return value unless value.instance_of? Array
|
72
|
+
|
73
|
+
# Extracts the s-expression tag
|
74
|
+
tag, *tail = value
|
75
|
+
|
76
|
+
# Merges arrays:
|
77
|
+
result = tail.
|
78
|
+
map { |e| flatten(e) } # first flatten each element
|
79
|
+
|
80
|
+
case tag
|
81
|
+
when :sequence
|
82
|
+
return flatten_sequence(result)
|
83
|
+
when :maybe
|
84
|
+
return result.first
|
85
|
+
when :repetition
|
86
|
+
return flatten_repetition(result)
|
87
|
+
end
|
88
|
+
|
89
|
+
fail "BUG: Unknown tag #{tag.inspect}."
|
90
|
+
end
|
91
|
+
def flatten_sequence(list)
|
92
|
+
list.inject('') { |r, e| # and then merge flat elements
|
93
|
+
case [r, e].map { |o| o.class }
|
94
|
+
when [Hash, Hash] # two keyed subtrees: make one
|
95
|
+
warn_about_duplicate_keys(r, e)
|
96
|
+
r.merge(e)
|
97
|
+
# a keyed tree and an array (push down)
|
98
|
+
when [Hash, Array]
|
99
|
+
[r] + e
|
100
|
+
when [Array, Hash]
|
101
|
+
r + [e]
|
102
|
+
when [String, String]
|
103
|
+
r << e
|
104
|
+
else
|
105
|
+
if r.instance_of? Hash
|
106
|
+
r # Ignore e, since its not a hash we can merge
|
107
|
+
else
|
108
|
+
# Now e is either nil, in which case we drop it, or something else.
|
109
|
+
# If it is something else, it is probably more important than r,
|
110
|
+
# since we've checked for important values of r above.
|
111
|
+
e||r
|
112
|
+
end
|
113
|
+
end
|
114
|
+
}
|
115
|
+
end
|
116
|
+
def flatten_repetition(list)
|
117
|
+
if list.any? { |e| e.instance_of?(Hash) }
|
118
|
+
# If keyed subtrees are in the array, we'll want to discard all
|
119
|
+
# strings inbetween. To keep them, name them.
|
120
|
+
return list.select { |e| e.instance_of?(Hash) }
|
121
|
+
end
|
122
|
+
|
123
|
+
if list.any? { |e| e.instance_of?(Array) }
|
124
|
+
# If any arrays are nested in this array, flatten all arrays to this
|
125
|
+
# level.
|
126
|
+
return list.
|
127
|
+
select { |e| e.instance_of?(Array) }.
|
128
|
+
flatten(1)
|
129
|
+
end
|
130
|
+
|
131
|
+
# If there are only strings, concatenate them and return that.
|
132
|
+
list.inject('') { |s,e| s<<(e||'') }
|
133
|
+
end
|
134
|
+
|
135
|
+
def self.precedence(prec)
|
136
|
+
define_method(:precedence) { prec }
|
137
|
+
end
|
138
|
+
precedence BASE
|
139
|
+
def to_s(outer_prec)
|
140
|
+
if outer_prec < precedence
|
141
|
+
"("+to_s_inner(precedence)+")"
|
142
|
+
else
|
143
|
+
to_s_inner(precedence)
|
144
|
+
end
|
145
|
+
end
|
146
|
+
def inspect
|
147
|
+
to_s(OUTER)
|
148
|
+
end
|
149
|
+
|
150
|
+
# Cause should return the current best approximation of this parslet
|
151
|
+
# of what went wrong with the parse. Not relevant if the parse succeeds,
|
152
|
+
# but needed for clever error reports.
|
153
|
+
#
|
154
|
+
def cause
|
155
|
+
@last_cause
|
156
|
+
end
|
157
|
+
|
158
|
+
# Error tree returns what went wrong here plus what went wrong inside
|
159
|
+
# subexpressions as a tree. The error stored for this node will be equal
|
160
|
+
# with #cause.
|
161
|
+
#
|
162
|
+
def error_tree
|
163
|
+
Parslet::ErrorTree.new(self) if cause?
|
164
|
+
end
|
165
|
+
def cause?
|
166
|
+
not @last_cause.nil?
|
167
|
+
end
|
168
|
+
private
|
169
|
+
# Report/raise a parse error with the given message, printing the current
|
170
|
+
# position as well. Appends 'at line X char Y.' to the message you give.
|
171
|
+
# If +pos+ is given, it is used as the real position the error happened,
|
172
|
+
# correcting the io's current position.
|
173
|
+
#
|
174
|
+
def error(io, str, pos=nil)
|
175
|
+
pre = io.string[0..(pos||io.pos)]
|
176
|
+
lines = Array(pre.lines)
|
177
|
+
|
178
|
+
if lines.empty?
|
179
|
+
formatted_cause = str
|
180
|
+
else
|
181
|
+
pos = lines.last.length
|
182
|
+
formatted_cause = "#{str} at line #{lines.count} char #{pos}."
|
183
|
+
end
|
184
|
+
|
185
|
+
@last_cause = formatted_cause
|
186
|
+
|
187
|
+
raise Parslet::ParseFailed, formatted_cause, nil
|
188
|
+
end
|
189
|
+
def warn_about_duplicate_keys(h1, h2)
|
190
|
+
d = h1.keys & h2.keys
|
191
|
+
unless d.empty?
|
192
|
+
warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+
|
193
|
+
" of the latter will be kept. (keys: #{d.inspect})"
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# This wraps pieces of parslet definition and gives them a name. The wrapped
|
2
|
+
# piece is lazily evaluated and cached. This has two purposes:
|
3
|
+
#
|
4
|
+
# a) Avoid infinite recursion during evaluation of the definition
|
5
|
+
#
|
6
|
+
# b) Be able to print things by their name, not by their sometimes
|
7
|
+
# complicated content.
|
8
|
+
#
|
9
|
+
# You don't normally use this directly, instead you should generated it by
|
10
|
+
# using the structuring method Parslet#rule.
|
11
|
+
#
|
12
|
+
class Parslet::Atoms::Entity < Parslet::Atoms::Base
|
13
|
+
attr_reader :name, :context, :block
|
14
|
+
def initialize(name, context, block)
|
15
|
+
super()
|
16
|
+
|
17
|
+
@name = name
|
18
|
+
@context = context
|
19
|
+
@block = block
|
20
|
+
end
|
21
|
+
|
22
|
+
def try(io)
|
23
|
+
parslet.apply(io)
|
24
|
+
end
|
25
|
+
|
26
|
+
def parslet
|
27
|
+
@parslet ||= context.instance_eval(&block).tap { |p|
|
28
|
+
raise_not_implemented unless p
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
def to_s_inner(prec)
|
33
|
+
name.to_s.upcase
|
34
|
+
end
|
35
|
+
|
36
|
+
def error_tree
|
37
|
+
parslet.error_tree
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
def raise_not_implemented
|
42
|
+
trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport
|
43
|
+
exception = NotImplementedError.new("rule(#{name.inspect}) { ... } returns nil. Still not implemented, but already used?")
|
44
|
+
exception.set_backtrace(trace)
|
45
|
+
|
46
|
+
raise exception
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# Either positive or negative lookahead, doesn't consume its input.
|
2
|
+
#
|
3
|
+
# Example:
|
4
|
+
#
|
5
|
+
# str('foo').prsnt? # matches when the input contains 'foo', but leaves it
|
6
|
+
#
|
7
|
+
class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
|
8
|
+
attr_reader :positive
|
9
|
+
attr_reader :bound_parslet
|
10
|
+
|
11
|
+
def initialize(bound_parslet, positive=true)
|
12
|
+
# Model positive and negative lookahead by testing this flag.
|
13
|
+
@positive = positive
|
14
|
+
@bound_parslet = bound_parslet
|
15
|
+
end
|
16
|
+
|
17
|
+
def try(io)
|
18
|
+
pos = io.pos
|
19
|
+
begin
|
20
|
+
bound_parslet.apply(io)
|
21
|
+
rescue Parslet::ParseFailed
|
22
|
+
return fail(io)
|
23
|
+
ensure
|
24
|
+
io.pos = pos
|
25
|
+
end
|
26
|
+
return success(io)
|
27
|
+
end
|
28
|
+
|
29
|
+
def fail(io)
|
30
|
+
if positive
|
31
|
+
error(io, "lookahead: #{bound_parslet.inspect} didn't match, but should have")
|
32
|
+
else
|
33
|
+
# TODO: Squash this down to nothing? Return value handling here...
|
34
|
+
return nil
|
35
|
+
end
|
36
|
+
end
|
37
|
+
def success(io)
|
38
|
+
if positive
|
39
|
+
return nil # see above, TODO
|
40
|
+
else
|
41
|
+
error(
|
42
|
+
io,
|
43
|
+
"negative lookahead: #{bound_parslet.inspect} matched, but shouldn't have")
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
precedence LOOKAHEAD
|
48
|
+
def to_s_inner(prec)
|
49
|
+
char = positive ? '&' : '!'
|
50
|
+
|
51
|
+
"#{char}#{bound_parslet.to_s(prec)}"
|
52
|
+
end
|
53
|
+
|
54
|
+
def error_tree
|
55
|
+
bound_parslet.error_tree
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# Names a match to influence tree construction.
|
2
|
+
#
|
3
|
+
# Example:
|
4
|
+
#
|
5
|
+
# str('foo') # will return 'foo',
|
6
|
+
# str('foo').as(:foo) # will return :foo => 'foo'
|
7
|
+
#
|
8
|
+
class Parslet::Atoms::Named < Parslet::Atoms::Base
|
9
|
+
attr_reader :parslet, :name
|
10
|
+
def initialize(parslet, name)
|
11
|
+
@parslet, @name = parslet, name
|
12
|
+
end
|
13
|
+
|
14
|
+
def apply(io)
|
15
|
+
value = parslet.apply(io)
|
16
|
+
|
17
|
+
produce_return_value value
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_s_inner(prec)
|
21
|
+
"#{name}:#{parslet.to_s(prec)}"
|
22
|
+
end
|
23
|
+
|
24
|
+
def error_tree
|
25
|
+
parslet.error_tree
|
26
|
+
end
|
27
|
+
private
|
28
|
+
def produce_return_value(val)
|
29
|
+
{ name => flatten(val) }
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# Matches a special kind of regular expression that only ever matches one
|
2
|
+
# character at a time. Useful members of this family are: character ranges,
|
3
|
+
# \w, \d, \r, \n, ...
|
4
|
+
#
|
5
|
+
# Example:
|
6
|
+
#
|
7
|
+
# match('[a-z]') # matches a-z
|
8
|
+
# match('\s') # like regexps: matches space characters
|
9
|
+
#
|
10
|
+
class Parslet::Atoms::Re < Parslet::Atoms::Base
|
11
|
+
attr_reader :match
|
12
|
+
def initialize(match)
|
13
|
+
@match = match
|
14
|
+
end
|
15
|
+
|
16
|
+
def try(io)
|
17
|
+
r = Regexp.new(match, Regexp::MULTILINE)
|
18
|
+
s = io.read(1)
|
19
|
+
error(io, "Premature end of input") unless s
|
20
|
+
error(io, "Failed to match #{match.inspect[1..-2]}") unless s.match(r)
|
21
|
+
return s
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_s_inner(prec)
|
25
|
+
match.inspect[1..-2]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
@@ -0,0 +1,58 @@
|
|
1
|
+
|
2
|
+
# Matches a parslet repeatedly.
|
3
|
+
#
|
4
|
+
# Example:
|
5
|
+
#
|
6
|
+
# str('a').repeat(1,3) # matches 'a' at least once, but at most three times
|
7
|
+
# str('a').maybe # matches 'a' if it is present in the input (repeat(0,1))
|
8
|
+
#
|
9
|
+
class Parslet::Atoms::Repetition < Parslet::Atoms::Base
|
10
|
+
attr_reader :min, :max, :parslet
|
11
|
+
def initialize(parslet, min, max, tag=:repetition)
|
12
|
+
@parslet = parslet
|
13
|
+
@min, @max = min, max
|
14
|
+
@tag = tag
|
15
|
+
end
|
16
|
+
|
17
|
+
def try(io)
|
18
|
+
occ = 0
|
19
|
+
result = [@tag] # initialize the result array with the tag (for flattening)
|
20
|
+
loop do
|
21
|
+
begin
|
22
|
+
result << parslet.apply(io)
|
23
|
+
occ += 1
|
24
|
+
|
25
|
+
# If we're not greedy (max is defined), check if that has been
|
26
|
+
# reached.
|
27
|
+
return result if max && occ>=max
|
28
|
+
rescue Parslet::ParseFailed => ex
|
29
|
+
# Greedy matcher has produced a failure. Check if occ (which will
|
30
|
+
# contain the number of sucesses) is in {min, max}.
|
31
|
+
# p [:repetition, occ, min, max]
|
32
|
+
error(io, "Expected at least #{min} of #{parslet.inspect}") if occ < min
|
33
|
+
return result
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
precedence REPETITION
|
39
|
+
def to_s_inner(prec)
|
40
|
+
minmax = "{#{min}, #{max}}"
|
41
|
+
minmax = '?' if min == 0 && max == 1
|
42
|
+
|
43
|
+
parslet.to_s(prec) + minmax
|
44
|
+
end
|
45
|
+
|
46
|
+
def cause
|
47
|
+
# Either the repetition failed or the parslet inside failed to repeat.
|
48
|
+
super || parslet.cause
|
49
|
+
end
|
50
|
+
def error_tree
|
51
|
+
if cause?
|
52
|
+
Parslet::ErrorTree.new(self, parslet.error_tree)
|
53
|
+
else
|
54
|
+
parslet.error_tree
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|