parslet 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HISTORY.txt +29 -0
- data/README +2 -4
- data/Rakefile +18 -4
- data/example/comments.rb +11 -13
- data/example/documentation.rb +1 -1
- data/example/email_parser.rb +5 -5
- data/example/empty.rb +2 -2
- data/example/erb.rb +6 -3
- data/example/ip_address.rb +2 -2
- data/example/local.rb +34 -0
- data/example/minilisp.rb +2 -2
- data/example/output/comments.out +8 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/local.out +3 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/parens.out +8 -0
- data/example/output/readme.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/parens.rb +1 -3
- data/example/readme.rb +4 -10
- data/example/seasons.rb +2 -1
- data/example/simple_xml.rb +5 -8
- data/example/string_parser.rb +7 -5
- data/lib/parslet.rb +20 -31
- data/lib/parslet/atoms.rb +1 -0
- data/lib/parslet/atoms/base.rb +46 -87
- data/lib/parslet/atoms/dsl.rb +98 -0
- data/lib/parslet/atoms/entity.rb +3 -4
- data/lib/parslet/atoms/lookahead.rb +1 -1
- data/lib/parslet/atoms/re.rb +2 -2
- data/lib/parslet/atoms/str.rb +5 -2
- data/lib/parslet/atoms/transform.rb +75 -0
- data/lib/parslet/atoms/visitor.rb +9 -9
- data/lib/parslet/convenience.rb +3 -3
- data/lib/parslet/export.rb +13 -13
- data/lib/parslet/expression/treetop.rb +2 -2
- data/lib/parslet/parser.rb +55 -1
- data/lib/parslet/rig/rspec.rb +36 -10
- data/lib/parslet/slice.rb +172 -0
- data/lib/parslet/source.rb +72 -83
- data/lib/parslet/source/line_cache.rb +90 -0
- metadata +22 -20
data/example/seasons.rb
CHANGED
data/example/simple_xml.rb
CHANGED
@@ -1,14 +1,12 @@
|
|
1
1
|
# A simple xml parser. It is simple in the respect as that it doesn't address
|
2
2
|
# any of the complexities of XML. This is ruby 1.9.
|
3
3
|
|
4
|
-
$:.unshift
|
4
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
5
5
|
|
6
6
|
require 'pp'
|
7
7
|
require 'parslet'
|
8
8
|
|
9
|
-
|
10
|
-
include Parslet
|
11
|
-
|
9
|
+
class XML < Parslet::Parser
|
12
10
|
root :document
|
13
11
|
|
14
12
|
rule(:document) {
|
@@ -23,7 +21,7 @@ module XML
|
|
23
21
|
|
24
22
|
parslet = str('<')
|
25
23
|
parslet = parslet >> str('/') if close
|
26
|
-
parslet = parslet >> (str('>').
|
24
|
+
parslet = parslet >> (str('>').absent? >> match("[a-zA-Z]")).repeat(1).as(:name)
|
27
25
|
parslet = parslet >> str('>')
|
28
26
|
|
29
27
|
parslet
|
@@ -35,8 +33,7 @@ module XML
|
|
35
33
|
end
|
36
34
|
|
37
35
|
def check(xml)
|
38
|
-
|
39
|
-
r=parse(xml)
|
36
|
+
r = XML.new.parse(xml)
|
40
37
|
|
41
38
|
# We'll validate the tree by reducing valid pairs of tags into simply the
|
42
39
|
# string "verified". If the transformation ends on a string, then the
|
@@ -54,4 +51,4 @@ def check(xml)
|
|
54
51
|
end
|
55
52
|
|
56
53
|
pp check("<a><b>some text in the tags</b></a>")
|
57
|
-
pp check("<b><b>some text in the tags</b></a>")
|
54
|
+
pp check("<b><b>some text in the tags</b></a>")
|
data/example/string_parser.rb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
require 'pp'
|
6
6
|
|
7
|
-
$:.unshift
|
7
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
8
8
|
require 'parslet'
|
9
9
|
|
10
10
|
include Parslet
|
@@ -26,7 +26,7 @@ class LiteralsParser < Parslet::Parser
|
|
26
26
|
str('"') >>
|
27
27
|
(
|
28
28
|
(str('\\') >> any) |
|
29
|
-
(str('"').
|
29
|
+
(str('"').absent? >> any)
|
30
30
|
).repeat.as(:string) >>
|
31
31
|
str('"')
|
32
32
|
end
|
@@ -50,8 +50,10 @@ class LiteralsParser < Parslet::Parser
|
|
50
50
|
root :literals
|
51
51
|
end
|
52
52
|
|
53
|
-
|
54
|
-
|
53
|
+
input_name = File.join(File.dirname(__FILE__), 'simple.lit')
|
54
|
+
file = File.read(input_name)
|
55
|
+
|
56
|
+
parsetree = LiteralsParser.new.parse(file)
|
55
57
|
|
56
58
|
class Lit < Struct.new(:text)
|
57
59
|
def to_s
|
@@ -72,4 +74,4 @@ transform = Parslet::Transform.new do
|
|
72
74
|
end
|
73
75
|
|
74
76
|
ast = transform.apply(parsetree)
|
75
|
-
pp ast
|
77
|
+
pp ast
|
data/lib/parslet.rb
CHANGED
@@ -7,7 +7,7 @@
|
|
7
7
|
# root(:a)
|
8
8
|
# end
|
9
9
|
#
|
10
|
-
# pp MyParser.new.parse('aaaa') # => 'aaaa'
|
10
|
+
# pp MyParser.new.parse('aaaa') # => 'aaaa'@0
|
11
11
|
# pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed:
|
12
12
|
# # Don't know what to do with bbbb at line 1 char 1.
|
13
13
|
#
|
@@ -64,38 +64,18 @@ module Parslet
|
|
64
64
|
# puts parslet.error_tree
|
65
65
|
# end
|
66
66
|
#
|
67
|
+
# Alternatively, you can just require 'parslet/convenience' and call
|
68
|
+
# the method #parse_with_debug instead of #parse. This method will never
|
69
|
+
# raise and print error trees to stdout.
|
70
|
+
#
|
71
|
+
# Example:
|
72
|
+
# require 'parslet/convenience'
|
73
|
+
# parslet.parse_with_debug(str)
|
74
|
+
#
|
67
75
|
class ParseFailed < StandardError
|
68
76
|
end
|
69
77
|
|
70
78
|
module ClassMethods
|
71
|
-
# Define the parsers #root function. This is the place where you start
|
72
|
-
# parsing; if you have a rule for 'file' that describes what should be
|
73
|
-
# in a file, this would be your root declaration:
|
74
|
-
#
|
75
|
-
# class Parser
|
76
|
-
# root :file
|
77
|
-
# rule(:file) { ... }
|
78
|
-
# end
|
79
|
-
#
|
80
|
-
# #root declares a 'parse' function that works just like the parse
|
81
|
-
# function that you can call on a simple parslet, taking a string as input
|
82
|
-
# and producing parse output.
|
83
|
-
#
|
84
|
-
# In a way, #root is a shorthand for:
|
85
|
-
#
|
86
|
-
# def parse(str)
|
87
|
-
# your_parser_root.parse(str)
|
88
|
-
# end
|
89
|
-
#
|
90
|
-
def root(name)
|
91
|
-
define_method(:root) do
|
92
|
-
self.send(name)
|
93
|
-
end
|
94
|
-
define_method(:parse) do |str|
|
95
|
-
root.parse(str)
|
96
|
-
end
|
97
|
-
end
|
98
|
-
|
99
79
|
# Define an entity for the parser. This generates a method of the same
|
100
80
|
# name that can be used as part of other patterns. Those methods can be
|
101
81
|
# freely mixed in your parser class with real ruby methods.
|
@@ -116,8 +96,14 @@ module Parslet
|
|
116
96
|
def rule(name, &definition)
|
117
97
|
define_method(name) do
|
118
98
|
@rules ||= {} # <name, rule> memoization
|
119
|
-
@rules[name]
|
120
|
-
|
99
|
+
return @rules[name] if @rules.has_key?(name)
|
100
|
+
|
101
|
+
# Capture the self of the parser class along with the definition.
|
102
|
+
definition_closure = proc {
|
103
|
+
self.instance_eval(&definition)
|
104
|
+
}
|
105
|
+
|
106
|
+
@rules[name] = Atoms::Entity.new(name, &definition_closure)
|
121
107
|
end
|
122
108
|
end
|
123
109
|
end
|
@@ -164,6 +150,8 @@ module Parslet
|
|
164
150
|
# Returns an atom matching any character. It acts like the '.' (dot)
|
165
151
|
# character in regular expressions.
|
166
152
|
#
|
153
|
+
# Example:
|
154
|
+
#
|
167
155
|
# any.parse('a') # => 'a'
|
168
156
|
#
|
169
157
|
def any
|
@@ -227,6 +215,7 @@ module Parslet
|
|
227
215
|
autoload :Expression, 'parslet/expression'
|
228
216
|
end
|
229
217
|
|
218
|
+
require 'parslet/slice'
|
230
219
|
require 'parslet/source'
|
231
220
|
require 'parslet/error_tree'
|
232
221
|
require 'parslet/atoms'
|
data/lib/parslet/atoms.rb
CHANGED
data/lib/parslet/atoms/base.rb
CHANGED
@@ -1,8 +1,11 @@
|
|
1
1
|
# Base class for all parslets, handles orchestration of calls and implements
|
2
2
|
# a lot of the operator and chaining methods.
|
3
3
|
#
|
4
|
+
# Also see Parslet::Atoms::DSL chaining parslet atoms together.
|
5
|
+
#
|
4
6
|
class Parslet::Atoms::Base
|
5
7
|
include Parslet::Atoms::Precedence
|
8
|
+
include Parslet::Atoms::DSL
|
6
9
|
|
7
10
|
# Internally, all parsing functions return either an instance of Fail
|
8
11
|
# or an instance of Success.
|
@@ -89,84 +92,6 @@ class Parslet::Atoms::Base
|
|
89
92
|
"Atoms::Base doesn't have behaviour, please implement #try(source, context)."
|
90
93
|
end
|
91
94
|
|
92
|
-
# Construct a new atom that repeats the current atom min times at least and
|
93
|
-
# at most max times. max can be nil to indicate that no maximum is present.
|
94
|
-
#
|
95
|
-
# Example:
|
96
|
-
# # match any number of 'a's
|
97
|
-
# str('a').repeat
|
98
|
-
#
|
99
|
-
# # match between 1 and 3 'a's
|
100
|
-
# str('a').repeat(1,3)
|
101
|
-
#
|
102
|
-
def repeat(min=0, max=nil)
|
103
|
-
Parslet::Atoms::Repetition.new(self, min, max)
|
104
|
-
end
|
105
|
-
|
106
|
-
# Returns a new parslet atom that is only maybe present in the input. This
|
107
|
-
# is synonymous to calling #repeat(0,1). Generated tree value will be
|
108
|
-
# either nil (if atom is not present in the input) or the matched subtree.
|
109
|
-
#
|
110
|
-
# Example:
|
111
|
-
# str('foo').maybe
|
112
|
-
#
|
113
|
-
def maybe
|
114
|
-
Parslet::Atoms::Repetition.new(self, 0, 1, :maybe)
|
115
|
-
end
|
116
|
-
|
117
|
-
# Chains two parslet atoms together as a sequence.
|
118
|
-
#
|
119
|
-
# Example:
|
120
|
-
# str('a') >> str('b')
|
121
|
-
#
|
122
|
-
def >>(parslet)
|
123
|
-
Parslet::Atoms::Sequence.new(self, parslet)
|
124
|
-
end
|
125
|
-
|
126
|
-
# Chains two parslet atoms together to express alternation. A match will
|
127
|
-
# always be attempted with the parslet on the left side first. If it doesn't
|
128
|
-
# match, the right side will be tried.
|
129
|
-
#
|
130
|
-
# Example:
|
131
|
-
# # matches either 'a' OR 'b'
|
132
|
-
# str('a') | str('b')
|
133
|
-
#
|
134
|
-
def |(parslet)
|
135
|
-
Parslet::Atoms::Alternative.new(self, parslet)
|
136
|
-
end
|
137
|
-
|
138
|
-
# Tests for absence of a parslet atom in the input stream without consuming
|
139
|
-
# it.
|
140
|
-
#
|
141
|
-
# Example:
|
142
|
-
# # Only proceed the parse if 'a' is absent.
|
143
|
-
# str('a').absnt?
|
144
|
-
#
|
145
|
-
def absnt?
|
146
|
-
Parslet::Atoms::Lookahead.new(self, false)
|
147
|
-
end
|
148
|
-
|
149
|
-
# Tests for presence of a parslet atom in the input stream without consuming
|
150
|
-
# it.
|
151
|
-
#
|
152
|
-
# Example:
|
153
|
-
# # Only proceed the parse if 'a' is present.
|
154
|
-
# str('a').prsnt?
|
155
|
-
#
|
156
|
-
def prsnt?
|
157
|
-
Parslet::Atoms::Lookahead.new(self, true)
|
158
|
-
end
|
159
|
-
|
160
|
-
# Marks a parslet atom as important for the tree output. This must be used
|
161
|
-
# to achieve meaningful output from the #parse method.
|
162
|
-
#
|
163
|
-
# Example:
|
164
|
-
# str('a').as(:b) # will produce {:b => 'a'}
|
165
|
-
#
|
166
|
-
def as(name)
|
167
|
-
Parslet::Atoms::Named.new(self, name)
|
168
|
-
end
|
169
|
-
|
170
95
|
# Takes a mixed value coming out of a parslet and converts it to a return
|
171
96
|
# value for the user by dropping things and merging hashes.
|
172
97
|
#
|
@@ -192,14 +117,24 @@ class Parslet::Atoms::Base
|
|
192
117
|
|
193
118
|
fail "BUG: Unknown tag #{tag.inspect}."
|
194
119
|
end
|
120
|
+
|
121
|
+
# Lisp style fold left where the first element builds the basis for
|
122
|
+
# an inject.
|
123
|
+
#
|
124
|
+
def foldl(list, &block)
|
125
|
+
return '' if list.empty?
|
126
|
+
list[1..-1].inject(list.first, &block)
|
127
|
+
end
|
195
128
|
|
129
|
+
# Flatten results from a sequence of parslets.
|
130
|
+
#
|
196
131
|
def flatten_sequence(list) # :nodoc:
|
197
|
-
list.compact
|
132
|
+
foldl(list.compact) { |r, e| # and then merge flat elements
|
198
133
|
merge_fold(r, e)
|
199
134
|
}
|
200
135
|
end
|
201
136
|
def merge_fold(l, r) # :nodoc:
|
202
|
-
# equal pairs: merge.
|
137
|
+
# equal pairs: merge. ----------------------------------------------------
|
203
138
|
if l.class == r.class
|
204
139
|
if l.is_a?(Hash)
|
205
140
|
warn_about_duplicate_keys(l, r)
|
@@ -209,11 +144,20 @@ class Parslet::Atoms::Base
|
|
209
144
|
end
|
210
145
|
end
|
211
146
|
|
212
|
-
# unequal pairs: hoist to same level.
|
147
|
+
# unequal pairs: hoist to same level. ------------------------------------
|
213
148
|
|
214
|
-
#
|
215
|
-
|
216
|
-
|
149
|
+
# Maybe classes are not equal, but both are stringlike?
|
150
|
+
if l.respond_to?(:to_str) && r.respond_to?(:to_str)
|
151
|
+
# if we're merging a String with a Slice, the slice wins.
|
152
|
+
return r if r.respond_to? :to_slice
|
153
|
+
return l if l.respond_to? :to_slice
|
154
|
+
|
155
|
+
fail "NOTREACHED: What other stringlike classes are there?"
|
156
|
+
end
|
157
|
+
|
158
|
+
# special case: If one of them is a string/slice, the other is more important
|
159
|
+
return l if r.respond_to? :to_str
|
160
|
+
return r if l.respond_to? :to_str
|
217
161
|
|
218
162
|
# otherwise just create an array for one of them to live in
|
219
163
|
return l + [r] if r.class == Hash
|
@@ -222,6 +166,11 @@ class Parslet::Atoms::Base
|
|
222
166
|
fail "Unhandled case when foldr'ing sequence."
|
223
167
|
end
|
224
168
|
|
169
|
+
# Flatten results from a repetition of a single parslet. named indicates
|
170
|
+
# whether the user has named the result or not. If the user has named
|
171
|
+
# the results, we want to leave an empty list alone - otherwise it is
|
172
|
+
# turned into an empty string.
|
173
|
+
#
|
225
174
|
def flatten_repetition(list, named) # :nodoc:
|
226
175
|
if list.any? { |e| e.instance_of?(Hash) }
|
227
176
|
# If keyed subtrees are in the array, we'll want to discard all
|
@@ -241,9 +190,11 @@ class Parslet::Atoms::Base
|
|
241
190
|
return [] if named && list.empty?
|
242
191
|
|
243
192
|
# If there are only strings, concatenate them and return that.
|
244
|
-
list
|
193
|
+
foldl(list) { |s,e| s+e }
|
245
194
|
end
|
246
195
|
|
196
|
+
# Debug printing - in Treetop syntax.
|
197
|
+
#
|
247
198
|
def self.precedence(prec) # :nodoc:
|
248
199
|
define_method(:precedence) { prec }
|
249
200
|
end
|
@@ -272,7 +223,7 @@ class Parslet::Atoms::Base
|
|
272
223
|
|
273
224
|
# Error tree returns what went wrong here plus what went wrong inside
|
274
225
|
# subexpressions as a tree. The error stored for this node will be equal
|
275
|
-
#
|
226
|
+
# to #cause.
|
276
227
|
#
|
277
228
|
def error_tree
|
278
229
|
Parslet::ErrorTree.new(self)
|
@@ -301,10 +252,18 @@ private
|
|
301
252
|
@last_cause.to_s
|
302
253
|
end
|
303
254
|
|
255
|
+
# An internal class that allows delaying the construction of error messages
|
256
|
+
# (as strings) until we really need to print them.
|
257
|
+
#
|
304
258
|
class Cause < Struct.new(:message, :source, :pos)
|
305
259
|
def to_s
|
306
260
|
line, column = source.line_and_column(pos)
|
307
|
-
message
|
261
|
+
# Allow message to be a list of objects. Join them here, since we now
|
262
|
+
# really need it.
|
263
|
+
Array(message).map { |o|
|
264
|
+
o.respond_to?(:to_slice) ?
|
265
|
+
o.str.inspect :
|
266
|
+
o.to_s }.join + " at line #{line} char #{column}."
|
308
267
|
end
|
309
268
|
end
|
310
269
|
|
@@ -0,0 +1,98 @@
|
|
1
|
+
|
2
|
+
# A mixin module that defines operations that can be called on any subclass
|
3
|
+
# of Parslet::Atoms::Base. These operations make parslets atoms chainable and
|
4
|
+
# allow combination of parslet atoms to form bigger parsers.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# str('foo') >> str('bar')
|
9
|
+
# str('f').repeat
|
10
|
+
# any.absent? # also called The Epsilon
|
11
|
+
#
|
12
|
+
module Parslet::Atoms::DSL
|
13
|
+
# Construct a new atom that repeats the current atom min times at least and
|
14
|
+
# at most max times. max can be nil to indicate that no maximum is present.
|
15
|
+
#
|
16
|
+
# Example:
|
17
|
+
# # match any number of 'a's
|
18
|
+
# str('a').repeat
|
19
|
+
#
|
20
|
+
# # match between 1 and 3 'a's
|
21
|
+
# str('a').repeat(1,3)
|
22
|
+
#
|
23
|
+
def repeat(min=0, max=nil)
|
24
|
+
Parslet::Atoms::Repetition.new(self, min, max)
|
25
|
+
end
|
26
|
+
|
27
|
+
# Returns a new parslet atom that is only maybe present in the input. This
|
28
|
+
# is synonymous to calling #repeat(0,1). Generated tree value will be
|
29
|
+
# either nil (if atom is not present in the input) or the matched subtree.
|
30
|
+
#
|
31
|
+
# Example:
|
32
|
+
# str('foo').maybe
|
33
|
+
#
|
34
|
+
def maybe
|
35
|
+
Parslet::Atoms::Repetition.new(self, 0, 1, :maybe)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Chains two parslet atoms together as a sequence.
|
39
|
+
#
|
40
|
+
# Example:
|
41
|
+
# str('a') >> str('b')
|
42
|
+
#
|
43
|
+
def >>(parslet)
|
44
|
+
Parslet::Atoms::Sequence.new(self, parslet)
|
45
|
+
end
|
46
|
+
|
47
|
+
# Chains two parslet atoms together to express alternation. A match will
|
48
|
+
# always be attempted with the parslet on the left side first. If it doesn't
|
49
|
+
# match, the right side will be tried.
|
50
|
+
#
|
51
|
+
# Example:
|
52
|
+
# # matches either 'a' OR 'b'
|
53
|
+
# str('a') | str('b')
|
54
|
+
#
|
55
|
+
def |(parslet)
|
56
|
+
Parslet::Atoms::Alternative.new(self, parslet)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Tests for absence of a parslet atom in the input stream without consuming
|
60
|
+
# it.
|
61
|
+
#
|
62
|
+
# Example:
|
63
|
+
# # Only proceed the parse if 'a' is absent.
|
64
|
+
# str('a').absent?
|
65
|
+
#
|
66
|
+
def absent?
|
67
|
+
Parslet::Atoms::Lookahead.new(self, false)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Tests for presence of a parslet atom in the input stream without consuming
|
71
|
+
# it.
|
72
|
+
#
|
73
|
+
# Example:
|
74
|
+
# # Only proceed the parse if 'a' is present.
|
75
|
+
# str('a').present?
|
76
|
+
#
|
77
|
+
def present?
|
78
|
+
Parslet::Atoms::Lookahead.new(self, true)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Alias for present? that will disappear in 2.0 (deprecated)
|
82
|
+
#
|
83
|
+
alias prsnt? present?
|
84
|
+
|
85
|
+
# Alias for absent? that will disappear in 2.0 (deprecated)
|
86
|
+
#
|
87
|
+
alias absnt? absent?
|
88
|
+
|
89
|
+
# Marks a parslet atom as important for the tree output. This must be used
|
90
|
+
# to achieve meaningful output from the #parse method.
|
91
|
+
#
|
92
|
+
# Example:
|
93
|
+
# str('a').as(:b) # will produce {:b => 'a'}
|
94
|
+
#
|
95
|
+
def as(name)
|
96
|
+
Parslet::Atoms::Named.new(self, name)
|
97
|
+
end
|
98
|
+
end
|