parslet 1.1.1 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY.txt +29 -0
- data/README +2 -4
- data/Rakefile +18 -4
- data/example/comments.rb +11 -13
- data/example/documentation.rb +1 -1
- data/example/email_parser.rb +5 -5
- data/example/empty.rb +2 -2
- data/example/erb.rb +6 -3
- data/example/ip_address.rb +2 -2
- data/example/local.rb +34 -0
- data/example/minilisp.rb +2 -2
- data/example/output/comments.out +8 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/local.out +3 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/parens.out +8 -0
- data/example/output/readme.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/parens.rb +1 -3
- data/example/readme.rb +4 -10
- data/example/seasons.rb +2 -1
- data/example/simple_xml.rb +5 -8
- data/example/string_parser.rb +7 -5
- data/lib/parslet.rb +20 -31
- data/lib/parslet/atoms.rb +1 -0
- data/lib/parslet/atoms/base.rb +46 -87
- data/lib/parslet/atoms/dsl.rb +98 -0
- data/lib/parslet/atoms/entity.rb +3 -4
- data/lib/parslet/atoms/lookahead.rb +1 -1
- data/lib/parslet/atoms/re.rb +2 -2
- data/lib/parslet/atoms/str.rb +5 -2
- data/lib/parslet/atoms/transform.rb +75 -0
- data/lib/parslet/atoms/visitor.rb +9 -9
- data/lib/parslet/convenience.rb +3 -3
- data/lib/parslet/export.rb +13 -13
- data/lib/parslet/expression/treetop.rb +2 -2
- data/lib/parslet/parser.rb +55 -1
- data/lib/parslet/rig/rspec.rb +36 -10
- data/lib/parslet/slice.rb +172 -0
- data/lib/parslet/source.rb +72 -83
- data/lib/parslet/source/line_cache.rb +90 -0
- metadata +22 -20
data/example/seasons.rb
CHANGED
data/example/simple_xml.rb
CHANGED
@@ -1,14 +1,12 @@
|
|
1
1
|
# A simple xml parser. It is simple in the respect as that it doesn't address
|
2
2
|
# any of the complexities of XML. This is ruby 1.9.
|
3
3
|
|
4
|
-
$:.unshift
|
4
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
5
5
|
|
6
6
|
require 'pp'
|
7
7
|
require 'parslet'
|
8
8
|
|
9
|
-
|
10
|
-
include Parslet
|
11
|
-
|
9
|
+
class XML < Parslet::Parser
|
12
10
|
root :document
|
13
11
|
|
14
12
|
rule(:document) {
|
@@ -23,7 +21,7 @@ module XML
|
|
23
21
|
|
24
22
|
parslet = str('<')
|
25
23
|
parslet = parslet >> str('/') if close
|
26
|
-
parslet = parslet >> (str('>').
|
24
|
+
parslet = parslet >> (str('>').absent? >> match("[a-zA-Z]")).repeat(1).as(:name)
|
27
25
|
parslet = parslet >> str('>')
|
28
26
|
|
29
27
|
parslet
|
@@ -35,8 +33,7 @@ module XML
|
|
35
33
|
end
|
36
34
|
|
37
35
|
def check(xml)
|
38
|
-
|
39
|
-
r=parse(xml)
|
36
|
+
r = XML.new.parse(xml)
|
40
37
|
|
41
38
|
# We'll validate the tree by reducing valid pairs of tags into simply the
|
42
39
|
# string "verified". If the transformation ends on a string, then the
|
@@ -54,4 +51,4 @@ def check(xml)
|
|
54
51
|
end
|
55
52
|
|
56
53
|
pp check("<a><b>some text in the tags</b></a>")
|
57
|
-
pp check("<b><b>some text in the tags</b></a>")
|
54
|
+
pp check("<b><b>some text in the tags</b></a>")
|
data/example/string_parser.rb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
require 'pp'
|
6
6
|
|
7
|
-
$:.unshift
|
7
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
8
8
|
require 'parslet'
|
9
9
|
|
10
10
|
include Parslet
|
@@ -26,7 +26,7 @@ class LiteralsParser < Parslet::Parser
|
|
26
26
|
str('"') >>
|
27
27
|
(
|
28
28
|
(str('\\') >> any) |
|
29
|
-
(str('"').
|
29
|
+
(str('"').absent? >> any)
|
30
30
|
).repeat.as(:string) >>
|
31
31
|
str('"')
|
32
32
|
end
|
@@ -50,8 +50,10 @@ class LiteralsParser < Parslet::Parser
|
|
50
50
|
root :literals
|
51
51
|
end
|
52
52
|
|
53
|
-
|
54
|
-
|
53
|
+
input_name = File.join(File.dirname(__FILE__), 'simple.lit')
|
54
|
+
file = File.read(input_name)
|
55
|
+
|
56
|
+
parsetree = LiteralsParser.new.parse(file)
|
55
57
|
|
56
58
|
class Lit < Struct.new(:text)
|
57
59
|
def to_s
|
@@ -72,4 +74,4 @@ transform = Parslet::Transform.new do
|
|
72
74
|
end
|
73
75
|
|
74
76
|
ast = transform.apply(parsetree)
|
75
|
-
pp ast
|
77
|
+
pp ast
|
data/lib/parslet.rb
CHANGED
@@ -7,7 +7,7 @@
|
|
7
7
|
# root(:a)
|
8
8
|
# end
|
9
9
|
#
|
10
|
-
# pp MyParser.new.parse('aaaa') # => 'aaaa'
|
10
|
+
# pp MyParser.new.parse('aaaa') # => 'aaaa'@0
|
11
11
|
# pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed:
|
12
12
|
# # Don't know what to do with bbbb at line 1 char 1.
|
13
13
|
#
|
@@ -64,38 +64,18 @@ module Parslet
|
|
64
64
|
# puts parslet.error_tree
|
65
65
|
# end
|
66
66
|
#
|
67
|
+
# Alternatively, you can just require 'parslet/convenience' and call
|
68
|
+
# the method #parse_with_debug instead of #parse. This method will never
|
69
|
+
# raise and print error trees to stdout.
|
70
|
+
#
|
71
|
+
# Example:
|
72
|
+
# require 'parslet/convenience'
|
73
|
+
# parslet.parse_with_debug(str)
|
74
|
+
#
|
67
75
|
class ParseFailed < StandardError
|
68
76
|
end
|
69
77
|
|
70
78
|
module ClassMethods
|
71
|
-
# Define the parsers #root function. This is the place where you start
|
72
|
-
# parsing; if you have a rule for 'file' that describes what should be
|
73
|
-
# in a file, this would be your root declaration:
|
74
|
-
#
|
75
|
-
# class Parser
|
76
|
-
# root :file
|
77
|
-
# rule(:file) { ... }
|
78
|
-
# end
|
79
|
-
#
|
80
|
-
# #root declares a 'parse' function that works just like the parse
|
81
|
-
# function that you can call on a simple parslet, taking a string as input
|
82
|
-
# and producing parse output.
|
83
|
-
#
|
84
|
-
# In a way, #root is a shorthand for:
|
85
|
-
#
|
86
|
-
# def parse(str)
|
87
|
-
# your_parser_root.parse(str)
|
88
|
-
# end
|
89
|
-
#
|
90
|
-
def root(name)
|
91
|
-
define_method(:root) do
|
92
|
-
self.send(name)
|
93
|
-
end
|
94
|
-
define_method(:parse) do |str|
|
95
|
-
root.parse(str)
|
96
|
-
end
|
97
|
-
end
|
98
|
-
|
99
79
|
# Define an entity for the parser. This generates a method of the same
|
100
80
|
# name that can be used as part of other patterns. Those methods can be
|
101
81
|
# freely mixed in your parser class with real ruby methods.
|
@@ -116,8 +96,14 @@ module Parslet
|
|
116
96
|
def rule(name, &definition)
|
117
97
|
define_method(name) do
|
118
98
|
@rules ||= {} # <name, rule> memoization
|
119
|
-
@rules[name]
|
120
|
-
|
99
|
+
return @rules[name] if @rules.has_key?(name)
|
100
|
+
|
101
|
+
# Capture the self of the parser class along with the definition.
|
102
|
+
definition_closure = proc {
|
103
|
+
self.instance_eval(&definition)
|
104
|
+
}
|
105
|
+
|
106
|
+
@rules[name] = Atoms::Entity.new(name, &definition_closure)
|
121
107
|
end
|
122
108
|
end
|
123
109
|
end
|
@@ -164,6 +150,8 @@ module Parslet
|
|
164
150
|
# Returns an atom matching any character. It acts like the '.' (dot)
|
165
151
|
# character in regular expressions.
|
166
152
|
#
|
153
|
+
# Example:
|
154
|
+
#
|
167
155
|
# any.parse('a') # => 'a'
|
168
156
|
#
|
169
157
|
def any
|
@@ -227,6 +215,7 @@ module Parslet
|
|
227
215
|
autoload :Expression, 'parslet/expression'
|
228
216
|
end
|
229
217
|
|
218
|
+
require 'parslet/slice'
|
230
219
|
require 'parslet/source'
|
231
220
|
require 'parslet/error_tree'
|
232
221
|
require 'parslet/atoms'
|
data/lib/parslet/atoms.rb
CHANGED
data/lib/parslet/atoms/base.rb
CHANGED
@@ -1,8 +1,11 @@
|
|
1
1
|
# Base class for all parslets, handles orchestration of calls and implements
|
2
2
|
# a lot of the operator and chaining methods.
|
3
3
|
#
|
4
|
+
# Also see Parslet::Atoms::DSL chaining parslet atoms together.
|
5
|
+
#
|
4
6
|
class Parslet::Atoms::Base
|
5
7
|
include Parslet::Atoms::Precedence
|
8
|
+
include Parslet::Atoms::DSL
|
6
9
|
|
7
10
|
# Internally, all parsing functions return either an instance of Fail
|
8
11
|
# or an instance of Success.
|
@@ -89,84 +92,6 @@ class Parslet::Atoms::Base
|
|
89
92
|
"Atoms::Base doesn't have behaviour, please implement #try(source, context)."
|
90
93
|
end
|
91
94
|
|
92
|
-
# Construct a new atom that repeats the current atom min times at least and
|
93
|
-
# at most max times. max can be nil to indicate that no maximum is present.
|
94
|
-
#
|
95
|
-
# Example:
|
96
|
-
# # match any number of 'a's
|
97
|
-
# str('a').repeat
|
98
|
-
#
|
99
|
-
# # match between 1 and 3 'a's
|
100
|
-
# str('a').repeat(1,3)
|
101
|
-
#
|
102
|
-
def repeat(min=0, max=nil)
|
103
|
-
Parslet::Atoms::Repetition.new(self, min, max)
|
104
|
-
end
|
105
|
-
|
106
|
-
# Returns a new parslet atom that is only maybe present in the input. This
|
107
|
-
# is synonymous to calling #repeat(0,1). Generated tree value will be
|
108
|
-
# either nil (if atom is not present in the input) or the matched subtree.
|
109
|
-
#
|
110
|
-
# Example:
|
111
|
-
# str('foo').maybe
|
112
|
-
#
|
113
|
-
def maybe
|
114
|
-
Parslet::Atoms::Repetition.new(self, 0, 1, :maybe)
|
115
|
-
end
|
116
|
-
|
117
|
-
# Chains two parslet atoms together as a sequence.
|
118
|
-
#
|
119
|
-
# Example:
|
120
|
-
# str('a') >> str('b')
|
121
|
-
#
|
122
|
-
def >>(parslet)
|
123
|
-
Parslet::Atoms::Sequence.new(self, parslet)
|
124
|
-
end
|
125
|
-
|
126
|
-
# Chains two parslet atoms together to express alternation. A match will
|
127
|
-
# always be attempted with the parslet on the left side first. If it doesn't
|
128
|
-
# match, the right side will be tried.
|
129
|
-
#
|
130
|
-
# Example:
|
131
|
-
# # matches either 'a' OR 'b'
|
132
|
-
# str('a') | str('b')
|
133
|
-
#
|
134
|
-
def |(parslet)
|
135
|
-
Parslet::Atoms::Alternative.new(self, parslet)
|
136
|
-
end
|
137
|
-
|
138
|
-
# Tests for absence of a parslet atom in the input stream without consuming
|
139
|
-
# it.
|
140
|
-
#
|
141
|
-
# Example:
|
142
|
-
# # Only proceed the parse if 'a' is absent.
|
143
|
-
# str('a').absnt?
|
144
|
-
#
|
145
|
-
def absnt?
|
146
|
-
Parslet::Atoms::Lookahead.new(self, false)
|
147
|
-
end
|
148
|
-
|
149
|
-
# Tests for presence of a parslet atom in the input stream without consuming
|
150
|
-
# it.
|
151
|
-
#
|
152
|
-
# Example:
|
153
|
-
# # Only proceed the parse if 'a' is present.
|
154
|
-
# str('a').prsnt?
|
155
|
-
#
|
156
|
-
def prsnt?
|
157
|
-
Parslet::Atoms::Lookahead.new(self, true)
|
158
|
-
end
|
159
|
-
|
160
|
-
# Marks a parslet atom as important for the tree output. This must be used
|
161
|
-
# to achieve meaningful output from the #parse method.
|
162
|
-
#
|
163
|
-
# Example:
|
164
|
-
# str('a').as(:b) # will produce {:b => 'a'}
|
165
|
-
#
|
166
|
-
def as(name)
|
167
|
-
Parslet::Atoms::Named.new(self, name)
|
168
|
-
end
|
169
|
-
|
170
95
|
# Takes a mixed value coming out of a parslet and converts it to a return
|
171
96
|
# value for the user by dropping things and merging hashes.
|
172
97
|
#
|
@@ -192,14 +117,24 @@ class Parslet::Atoms::Base
|
|
192
117
|
|
193
118
|
fail "BUG: Unknown tag #{tag.inspect}."
|
194
119
|
end
|
120
|
+
|
121
|
+
# Lisp style fold left where the first element builds the basis for
|
122
|
+
# an inject.
|
123
|
+
#
|
124
|
+
def foldl(list, &block)
|
125
|
+
return '' if list.empty?
|
126
|
+
list[1..-1].inject(list.first, &block)
|
127
|
+
end
|
195
128
|
|
129
|
+
# Flatten results from a sequence of parslets.
|
130
|
+
#
|
196
131
|
def flatten_sequence(list) # :nodoc:
|
197
|
-
list.compact
|
132
|
+
foldl(list.compact) { |r, e| # and then merge flat elements
|
198
133
|
merge_fold(r, e)
|
199
134
|
}
|
200
135
|
end
|
201
136
|
def merge_fold(l, r) # :nodoc:
|
202
|
-
# equal pairs: merge.
|
137
|
+
# equal pairs: merge. ----------------------------------------------------
|
203
138
|
if l.class == r.class
|
204
139
|
if l.is_a?(Hash)
|
205
140
|
warn_about_duplicate_keys(l, r)
|
@@ -209,11 +144,20 @@ class Parslet::Atoms::Base
|
|
209
144
|
end
|
210
145
|
end
|
211
146
|
|
212
|
-
# unequal pairs: hoist to same level.
|
147
|
+
# unequal pairs: hoist to same level. ------------------------------------
|
213
148
|
|
214
|
-
#
|
215
|
-
|
216
|
-
|
149
|
+
# Maybe classes are not equal, but both are stringlike?
|
150
|
+
if l.respond_to?(:to_str) && r.respond_to?(:to_str)
|
151
|
+
# if we're merging a String with a Slice, the slice wins.
|
152
|
+
return r if r.respond_to? :to_slice
|
153
|
+
return l if l.respond_to? :to_slice
|
154
|
+
|
155
|
+
fail "NOTREACHED: What other stringlike classes are there?"
|
156
|
+
end
|
157
|
+
|
158
|
+
# special case: If one of them is a string/slice, the other is more important
|
159
|
+
return l if r.respond_to? :to_str
|
160
|
+
return r if l.respond_to? :to_str
|
217
161
|
|
218
162
|
# otherwise just create an array for one of them to live in
|
219
163
|
return l + [r] if r.class == Hash
|
@@ -222,6 +166,11 @@ class Parslet::Atoms::Base
|
|
222
166
|
fail "Unhandled case when foldr'ing sequence."
|
223
167
|
end
|
224
168
|
|
169
|
+
# Flatten results from a repetition of a single parslet. named indicates
|
170
|
+
# whether the user has named the result or not. If the user has named
|
171
|
+
# the results, we want to leave an empty list alone - otherwise it is
|
172
|
+
# turned into an empty string.
|
173
|
+
#
|
225
174
|
def flatten_repetition(list, named) # :nodoc:
|
226
175
|
if list.any? { |e| e.instance_of?(Hash) }
|
227
176
|
# If keyed subtrees are in the array, we'll want to discard all
|
@@ -241,9 +190,11 @@ class Parslet::Atoms::Base
|
|
241
190
|
return [] if named && list.empty?
|
242
191
|
|
243
192
|
# If there are only strings, concatenate them and return that.
|
244
|
-
list
|
193
|
+
foldl(list) { |s,e| s+e }
|
245
194
|
end
|
246
195
|
|
196
|
+
# Debug printing - in Treetop syntax.
|
197
|
+
#
|
247
198
|
def self.precedence(prec) # :nodoc:
|
248
199
|
define_method(:precedence) { prec }
|
249
200
|
end
|
@@ -272,7 +223,7 @@ class Parslet::Atoms::Base
|
|
272
223
|
|
273
224
|
# Error tree returns what went wrong here plus what went wrong inside
|
274
225
|
# subexpressions as a tree. The error stored for this node will be equal
|
275
|
-
#
|
226
|
+
# to #cause.
|
276
227
|
#
|
277
228
|
def error_tree
|
278
229
|
Parslet::ErrorTree.new(self)
|
@@ -301,10 +252,18 @@ private
|
|
301
252
|
@last_cause.to_s
|
302
253
|
end
|
303
254
|
|
255
|
+
# An internal class that allows delaying the construction of error messages
|
256
|
+
# (as strings) until we really need to print them.
|
257
|
+
#
|
304
258
|
class Cause < Struct.new(:message, :source, :pos)
|
305
259
|
def to_s
|
306
260
|
line, column = source.line_and_column(pos)
|
307
|
-
message
|
261
|
+
# Allow message to be a list of objects. Join them here, since we now
|
262
|
+
# really need it.
|
263
|
+
Array(message).map { |o|
|
264
|
+
o.respond_to?(:to_slice) ?
|
265
|
+
o.str.inspect :
|
266
|
+
o.to_s }.join + " at line #{line} char #{column}."
|
308
267
|
end
|
309
268
|
end
|
310
269
|
|
@@ -0,0 +1,98 @@
|
|
1
|
+
|
2
|
+
# A mixin module that defines operations that can be called on any subclass
|
3
|
+
# of Parslet::Atoms::Base. These operations make parslets atoms chainable and
|
4
|
+
# allow combination of parslet atoms to form bigger parsers.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# str('foo') >> str('bar')
|
9
|
+
# str('f').repeat
|
10
|
+
# any.absent? # also called The Epsilon
|
11
|
+
#
|
12
|
+
module Parslet::Atoms::DSL
|
13
|
+
# Construct a new atom that repeats the current atom min times at least and
|
14
|
+
# at most max times. max can be nil to indicate that no maximum is present.
|
15
|
+
#
|
16
|
+
# Example:
|
17
|
+
# # match any number of 'a's
|
18
|
+
# str('a').repeat
|
19
|
+
#
|
20
|
+
# # match between 1 and 3 'a's
|
21
|
+
# str('a').repeat(1,3)
|
22
|
+
#
|
23
|
+
def repeat(min=0, max=nil)
|
24
|
+
Parslet::Atoms::Repetition.new(self, min, max)
|
25
|
+
end
|
26
|
+
|
27
|
+
# Returns a new parslet atom that is only maybe present in the input. This
|
28
|
+
# is synonymous to calling #repeat(0,1). Generated tree value will be
|
29
|
+
# either nil (if atom is not present in the input) or the matched subtree.
|
30
|
+
#
|
31
|
+
# Example:
|
32
|
+
# str('foo').maybe
|
33
|
+
#
|
34
|
+
def maybe
|
35
|
+
Parslet::Atoms::Repetition.new(self, 0, 1, :maybe)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Chains two parslet atoms together as a sequence.
|
39
|
+
#
|
40
|
+
# Example:
|
41
|
+
# str('a') >> str('b')
|
42
|
+
#
|
43
|
+
def >>(parslet)
|
44
|
+
Parslet::Atoms::Sequence.new(self, parslet)
|
45
|
+
end
|
46
|
+
|
47
|
+
# Chains two parslet atoms together to express alternation. A match will
|
48
|
+
# always be attempted with the parslet on the left side first. If it doesn't
|
49
|
+
# match, the right side will be tried.
|
50
|
+
#
|
51
|
+
# Example:
|
52
|
+
# # matches either 'a' OR 'b'
|
53
|
+
# str('a') | str('b')
|
54
|
+
#
|
55
|
+
def |(parslet)
|
56
|
+
Parslet::Atoms::Alternative.new(self, parslet)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Tests for absence of a parslet atom in the input stream without consuming
|
60
|
+
# it.
|
61
|
+
#
|
62
|
+
# Example:
|
63
|
+
# # Only proceed the parse if 'a' is absent.
|
64
|
+
# str('a').absent?
|
65
|
+
#
|
66
|
+
def absent?
|
67
|
+
Parslet::Atoms::Lookahead.new(self, false)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Tests for presence of a parslet atom in the input stream without consuming
|
71
|
+
# it.
|
72
|
+
#
|
73
|
+
# Example:
|
74
|
+
# # Only proceed the parse if 'a' is present.
|
75
|
+
# str('a').present?
|
76
|
+
#
|
77
|
+
def present?
|
78
|
+
Parslet::Atoms::Lookahead.new(self, true)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Alias for present? that will disappear in 2.0 (deprecated)
|
82
|
+
#
|
83
|
+
alias prsnt? present?
|
84
|
+
|
85
|
+
# Alias for absent? that will disappear in 2.0 (deprecated)
|
86
|
+
#
|
87
|
+
alias absnt? absent?
|
88
|
+
|
89
|
+
# Marks a parslet atom as important for the tree output. This must be used
|
90
|
+
# to achieve meaningful output from the #parse method.
|
91
|
+
#
|
92
|
+
# Example:
|
93
|
+
# str('a').as(:b) # will produce {:b => 'a'}
|
94
|
+
#
|
95
|
+
def as(name)
|
96
|
+
Parslet::Atoms::Named.new(self, name)
|
97
|
+
end
|
98
|
+
end
|