parslet 0.9.0 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -0
- data/HISTORY.txt +24 -1
- data/README +23 -66
- data/Rakefile +10 -6
- data/lib/parslet.rb +50 -137
- data/lib/parslet/atoms.rb +12 -479
- data/lib/parslet/atoms/alternative.rb +40 -0
- data/lib/parslet/atoms/base.rb +196 -0
- data/lib/parslet/atoms/entity.rb +48 -0
- data/lib/parslet/atoms/lookahead.rb +57 -0
- data/lib/parslet/atoms/named.rb +31 -0
- data/lib/parslet/atoms/re.rb +28 -0
- data/lib/parslet/atoms/repetition.rb +58 -0
- data/lib/parslet/atoms/sequence.rb +37 -0
- data/lib/parslet/atoms/str.rb +26 -0
- data/lib/parslet/error_tree.rb +2 -2
- data/lib/parslet/expression.rb +41 -0
- data/lib/parslet/expression/treetop.rb +53 -0
- data/lib/parslet/parser.rb +17 -0
- data/lib/parslet/pattern.rb +22 -12
- data/lib/parslet/pattern/binding.rb +25 -16
- data/lib/parslet/pattern/context.rb +24 -0
- data/lib/parslet/transform.rb +70 -25
- metadata +37 -8
@@ -0,0 +1,37 @@
|
|
1
|
+
# A sequence of parslets, matched from left to right. Denoted by '>>'
|
2
|
+
#
|
3
|
+
# Example:
|
4
|
+
#
|
5
|
+
# str('a') >> str('b') # matches 'a', then 'b'
|
6
|
+
#
|
7
|
+
class Parslet::Atoms::Sequence < Parslet::Atoms::Base
|
8
|
+
attr_reader :parslets
|
9
|
+
def initialize(*parslets)
|
10
|
+
@parslets = parslets
|
11
|
+
end
|
12
|
+
|
13
|
+
def >>(parslet)
|
14
|
+
@parslets << parslet
|
15
|
+
self
|
16
|
+
end
|
17
|
+
|
18
|
+
def try(io)
|
19
|
+
[:sequence]+parslets.map { |p|
|
20
|
+
# Save each parslet as potentially offending (raising an error).
|
21
|
+
@offending_parslet = p
|
22
|
+
p.apply(io)
|
23
|
+
}
|
24
|
+
rescue Parslet::ParseFailed
|
25
|
+
error(io, "Failed to match sequence (#{self.inspect})")
|
26
|
+
end
|
27
|
+
|
28
|
+
precedence SEQUENCE
|
29
|
+
def to_s_inner(prec)
|
30
|
+
parslets.map { |p| p.to_s(prec) }.join(' ')
|
31
|
+
end
|
32
|
+
|
33
|
+
def error_tree
|
34
|
+
Parslet::ErrorTree.new(self).tap { |t|
|
35
|
+
t.children << @offending_parslet.error_tree if @offending_parslet }
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# Matches a string of characters.
|
2
|
+
#
|
3
|
+
# Example:
|
4
|
+
#
|
5
|
+
# str('foo') # matches 'foo'
|
6
|
+
#
|
7
|
+
class Parslet::Atoms::Str < Parslet::Atoms::Base
|
8
|
+
attr_reader :str
|
9
|
+
def initialize(str)
|
10
|
+
@str = str
|
11
|
+
end
|
12
|
+
|
13
|
+
def try(io)
|
14
|
+
old_pos = io.pos
|
15
|
+
s = io.read(str.size)
|
16
|
+
error(io, "Premature end of input") unless s && s.size==str.size
|
17
|
+
error(io, "Expected #{str.inspect}, but got #{s.inspect}", old_pos) \
|
18
|
+
unless s==str
|
19
|
+
return s
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_s_inner(prec)
|
23
|
+
"'#{str}'"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
data/lib/parslet/error_tree.rb
CHANGED
@@ -20,7 +20,6 @@ class Parslet::ErrorTree
|
|
20
20
|
def cause
|
21
21
|
parslet.cause || "Unknown error in #{parslet.inspect}"
|
22
22
|
end
|
23
|
-
alias :to_s :cause
|
24
23
|
|
25
24
|
# Returns an ascii tree representation of the causes of this node and its
|
26
25
|
# children.
|
@@ -30,10 +29,11 @@ class Parslet::ErrorTree
|
|
30
29
|
recursive_ascii_tree(self, io, [true]) }.
|
31
30
|
string
|
32
31
|
end
|
32
|
+
alias to_s ascii_tree
|
33
33
|
private
|
34
34
|
def recursive_ascii_tree(node, stream, curved)
|
35
35
|
append_prefix(stream, curved)
|
36
|
-
stream.puts node
|
36
|
+
stream.puts node.cause
|
37
37
|
|
38
38
|
node.children.each do |child|
|
39
39
|
last_child = (node.children.last == child)
|
@@ -0,0 +1,41 @@
|
|
1
|
+
|
2
|
+
# Allows specifying rules as strings using the exact same grammar that treetop
|
3
|
+
# does, minus the actions. This is on one hand a good example of a fully fledged
|
4
|
+
# parser and on the other hand might even turn out really useful.
|
5
|
+
#
|
6
|
+
# NOT FINISHED & EXPERIMENTAL
|
7
|
+
#
|
8
|
+
class Parslet::Expression
|
9
|
+
include Parslet
|
10
|
+
|
11
|
+
autoload :Treetop, 'parslet/expression/treetop'
|
12
|
+
|
13
|
+
def initialize(str, opts={})
|
14
|
+
@type = opts[:type] || :treetop
|
15
|
+
@exp = str
|
16
|
+
@parslet = transform(
|
17
|
+
parse(str))
|
18
|
+
end
|
19
|
+
|
20
|
+
# Transforms the parse tree into a parslet expression.
|
21
|
+
#
|
22
|
+
def transform(tree)
|
23
|
+
transform = Treetop::Transform.new
|
24
|
+
|
25
|
+
pp tree
|
26
|
+
transform.apply(tree)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Parses the string and returns a parse tree.
|
30
|
+
#
|
31
|
+
def parse(str)
|
32
|
+
parser = Treetop::Parser.new
|
33
|
+
parser.parse(str)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Turns this expression into a parslet.
|
37
|
+
#
|
38
|
+
def to_parslet
|
39
|
+
@parslet
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
class Parslet::Expression::Treetop
|
2
|
+
class Parser < Parslet::Parser
|
3
|
+
root(:expression)
|
4
|
+
|
5
|
+
rule(:expression) {
|
6
|
+
alternatives
|
7
|
+
}
|
8
|
+
|
9
|
+
rule(:alternatives) {
|
10
|
+
simple >> (spaced('/') >> alternatives) |
|
11
|
+
simple
|
12
|
+
}
|
13
|
+
|
14
|
+
rule(:simple) {
|
15
|
+
perhaps.repeat
|
16
|
+
}
|
17
|
+
|
18
|
+
rule(:perhaps) {
|
19
|
+
atom.as(:maybe) >> spaced('?') |
|
20
|
+
atom
|
21
|
+
}
|
22
|
+
|
23
|
+
rule(:atom) {
|
24
|
+
spaced('(') >> expression.as(:unwrap) >> spaced(')') |
|
25
|
+
string
|
26
|
+
}
|
27
|
+
|
28
|
+
rule(:string) {
|
29
|
+
str('\'') >>
|
30
|
+
(
|
31
|
+
(str('\\') >> any) |
|
32
|
+
(str("'").absnt? >> any)
|
33
|
+
).repeat.as(:string) >>
|
34
|
+
str('\'') >> space?
|
35
|
+
}
|
36
|
+
|
37
|
+
rule(:space) { match("\s").repeat(1) }
|
38
|
+
rule(:space?) { space.maybe }
|
39
|
+
|
40
|
+
def spaced(str)
|
41
|
+
str(str) >> space?
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
class Transform < Parser::Transform
|
46
|
+
rule(:unwrap => simple(:u)) { u }
|
47
|
+
rule(sequence(:s)) { |d| Parslet::Atoms::Sequence.new(*d[:s]) }
|
48
|
+
rule(:maybe => simple(:m)) { |d| d[:m].maybe }
|
49
|
+
rule(:string => simple(:s)) { |d| str(d[:s]) }
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
@@ -0,0 +1,17 @@
|
|
1
|
+
|
2
|
+
# The base class for all your parsers. Use as follows:
|
3
|
+
#
|
4
|
+
# require 'parslet'
|
5
|
+
#
|
6
|
+
# class MyParser < Parslet::Parser
|
7
|
+
# rule(:a) { str('a').repeat }
|
8
|
+
# root(:a)
|
9
|
+
# end
|
10
|
+
#
|
11
|
+
# pp MyParser.new.parse('aaaa') # => 'aaaa'
|
12
|
+
# pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed:
|
13
|
+
# # Don't know what to do with bbbb at line 1 char 1.
|
14
|
+
#
|
15
|
+
class Parslet::Parser
|
16
|
+
include Parslet
|
17
|
+
end
|
data/lib/parslet/pattern.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
# Matches trees against expressions. Trees are formed by arrays and hashes
|
3
2
|
# for expressing membership and sequence. The leafs of the tree are other
|
4
3
|
# classes.
|
@@ -20,6 +19,8 @@
|
|
20
19
|
# to match recursively. To do that, please use Parslet::Transform.
|
21
20
|
#
|
22
21
|
class Parslet::Pattern
|
22
|
+
autoload :Context, 'parslet/pattern/context'
|
23
|
+
|
23
24
|
def initialize(pattern)
|
24
25
|
@pattern = pattern
|
25
26
|
end
|
@@ -36,9 +37,11 @@ class Parslet::Pattern
|
|
36
37
|
# end
|
37
38
|
#
|
38
39
|
def each_match(tree, &block) # :yield: subtree
|
40
|
+
raise ArgumentError, "Must pass a block" unless block
|
41
|
+
|
39
42
|
recurse_into(tree) do |subtree|
|
40
43
|
if bindings=match(subtree)
|
41
|
-
|
44
|
+
call_on_match(subtree, bindings, block)
|
42
45
|
end
|
43
46
|
end
|
44
47
|
|
@@ -53,6 +56,23 @@ class Parslet::Pattern
|
|
53
56
|
return bindings if element_match(subtree, @pattern, bindings)
|
54
57
|
end
|
55
58
|
|
59
|
+
# Executes the block on the bindings obtained by #match, if such a match
|
60
|
+
# can be made. Contains the logic that will switch to instance variables
|
61
|
+
# depending on the arity of the block.
|
62
|
+
#
|
63
|
+
def call_on_match(tree, bindings, block)
|
64
|
+
if block
|
65
|
+
if block.arity == 1
|
66
|
+
return block.call(bindings)
|
67
|
+
else
|
68
|
+
context = Context.new(bindings)
|
69
|
+
return context.instance_eval(&block)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# Handles preorder, depth-first recursion through the +expr+ given.
|
75
|
+
#
|
56
76
|
def recurse_into(expr, &block)
|
57
77
|
# p [:attempt_match, expr]
|
58
78
|
block.call(expr)
|
@@ -131,14 +151,4 @@ class Parslet::Pattern
|
|
131
151
|
# Match succeeds
|
132
152
|
return true
|
133
153
|
end
|
134
|
-
|
135
|
-
# Called on a bind variable, returns the variable name without the _
|
136
|
-
#
|
137
|
-
def variable_name(bind_var)
|
138
|
-
str = bind_var.to_s
|
139
|
-
|
140
|
-
if str.size>1
|
141
|
-
str[1..-1].to_sym
|
142
|
-
end
|
143
|
-
end
|
144
154
|
end
|
@@ -2,25 +2,38 @@
|
|
2
2
|
# Used internally for representing a bind placeholder in a Parslet::Transform
|
3
3
|
# pattern. This is the superclass for all bindings.
|
4
4
|
#
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
end
|
10
|
-
|
5
|
+
# It defines the most permissive kind of bind, the one that matches any subtree
|
6
|
+
# whatever it looks like.
|
7
|
+
#
|
8
|
+
class Parslet::Pattern::SubtreeBind < Struct.new(:symbol)
|
11
9
|
def variable_name
|
12
10
|
symbol
|
13
11
|
end
|
12
|
+
|
13
|
+
def inspect
|
14
|
+
"#{bind_type_name}(#{symbol.inspect})"
|
15
|
+
end
|
16
|
+
|
17
|
+
def can_bind?(subtree)
|
18
|
+
true
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
def bind_type_name
|
23
|
+
if md=self.class.name.match(/(\w+)Bind/)
|
24
|
+
md.captures.first.downcase
|
25
|
+
else
|
26
|
+
# This path should never be used, but since this is for inspection only,
|
27
|
+
# let's not raise.
|
28
|
+
'unknown_bind'
|
29
|
+
end
|
30
|
+
end
|
14
31
|
end
|
15
32
|
|
16
33
|
# Binds a symbol to a simple subtree, one that is not either a sequence of
|
17
34
|
# elements or a collection of attributes.
|
18
35
|
#
|
19
|
-
class Parslet::Pattern::SimpleBind < Parslet::Pattern::
|
20
|
-
def inspect
|
21
|
-
"simple(#{symbol.inspect})"
|
22
|
-
end
|
23
|
-
|
36
|
+
class Parslet::Pattern::SimpleBind < Parslet::Pattern::SubtreeBind
|
24
37
|
def can_bind?(subtree)
|
25
38
|
not [Hash, Array].include?(subtree.class)
|
26
39
|
end
|
@@ -28,11 +41,7 @@ end
|
|
28
41
|
|
29
42
|
# Binds a symbol to a sequence of simple leafs ([element1, element2, ...])
|
30
43
|
#
|
31
|
-
class Parslet::Pattern::SequenceBind < Parslet::Pattern::
|
32
|
-
def inspect
|
33
|
-
"sequence(#{symbol.inspect})"
|
34
|
-
end
|
35
|
-
|
44
|
+
class Parslet::Pattern::SequenceBind < Parslet::Pattern::SubtreeBind
|
36
45
|
def can_bind?(subtree)
|
37
46
|
subtree.kind_of?(Array) &&
|
38
47
|
(not subtree.any? { |el| [Hash, Array].include?(el.class) })
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'blankslate'
|
2
|
+
|
3
|
+
# Provides a context for tree transformations to run in. The context allows
|
4
|
+
# accessing each of the bindings in the bindings hash as local method.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# ctx = Context.new(:a => :b)
|
9
|
+
# ctx.instance_eval do
|
10
|
+
# a # => :b
|
11
|
+
# end
|
12
|
+
#
|
13
|
+
class Parslet::Pattern::Context < BlankSlate
|
14
|
+
def initialize(bindings)
|
15
|
+
@bindings = bindings
|
16
|
+
end
|
17
|
+
|
18
|
+
def method_missing(sym, *args, &block)
|
19
|
+
super unless args.empty?
|
20
|
+
super unless @bindings.has_key?(sym.to_sym)
|
21
|
+
|
22
|
+
@bindings[sym]
|
23
|
+
end
|
24
|
+
end
|
data/lib/parslet/transform.rb
CHANGED
@@ -20,36 +20,30 @@ require 'parslet/pattern'
|
|
20
20
|
#
|
21
21
|
# Example:
|
22
22
|
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
# }
|
29
|
-
#
|
30
|
-
# # Transforms the tree
|
31
|
-
# transform.apply(tree)
|
23
|
+
# class Example < Parslet::Transform
|
24
|
+
# rule(:string => simple(:x)) { # (1)
|
25
|
+
# StringLiteral.new(x)
|
26
|
+
# }
|
27
|
+
# end
|
32
28
|
#
|
33
29
|
# A tree transform (Parslet::Transform) is defined by a set of rules. Each
|
34
30
|
# rule can be defined by calling #rule with the pattern as argument. The block
|
35
31
|
# given will be called every time the rule matches somewhere in the tree given
|
36
32
|
# to #apply. It is passed a Hash containing all the variable bindings of this
|
37
33
|
# pattern match.
|
38
|
-
#
|
39
|
-
# In the above example, (1) illustrates a simple matching rule.
|
40
|
-
# such rules are composed of strings ("foobar"), arrays (["a", "b"]) and
|
41
|
-
# hashes like in the example above.
|
34
|
+
#
|
35
|
+
# In the above example, (1) illustrates a simple matching rule.
|
42
36
|
#
|
43
|
-
# Let's say you want to parse matching parentheses and distill a maximum
|
44
|
-
#
|
37
|
+
# Let's say you want to parse matching parentheses and distill a maximum nest
|
38
|
+
# depth. You would probably write a parser like the one in example/parens.rb;
|
45
39
|
# here's the relevant part:
|
46
40
|
#
|
47
41
|
# rule(:balanced) {
|
48
42
|
# str('(').as(:l) >> balanced.maybe.as(:m) >> str(')').as(:r)
|
49
43
|
# }
|
50
44
|
#
|
51
|
-
# If you now apply this to a string like '(())', you get a intermediate
|
52
|
-
#
|
45
|
+
# If you now apply this to a string like '(())', you get a intermediate parse
|
46
|
+
# tree that looks like this:
|
53
47
|
#
|
54
48
|
# {
|
55
49
|
# :l => "(",
|
@@ -61,20 +55,64 @@ require 'parslet/pattern'
|
|
61
55
|
# This parse tree is good for debugging, but what we would really like to have
|
62
56
|
# is just the nesting depth. This transformation rule will produce that:
|
63
57
|
#
|
64
|
-
#
|
65
|
-
#
|
66
|
-
#
|
67
|
-
# depth.nil? ? 1 : depth+1
|
58
|
+
# rule(:l => '(', :m => simple(:x), :r => ')') {
|
59
|
+
# # innermost :m will contain nil
|
60
|
+
# x.nil? ? 1 : x+1
|
68
61
|
# }
|
69
|
-
# t.apply(tree) # => 2
|
70
62
|
#
|
63
|
+
# = Usage patterns
|
64
|
+
#
|
65
|
+
# There are four ways of using this class. The first one is very much
|
66
|
+
# recommended, followed by the second one for generality. The other ones are
|
67
|
+
# omitted here.
|
68
|
+
#
|
69
|
+
# Recommended usage is as follows:
|
70
|
+
#
|
71
|
+
# class MyTransformator < Parslet::Transform
|
72
|
+
# rule(...) { ... }
|
73
|
+
# rule(...) { ... }
|
74
|
+
# # ...
|
75
|
+
# end
|
76
|
+
# MyTransformator.new.apply(tree)
|
77
|
+
#
|
78
|
+
# Alternatively, you can use the Transform class as follows:
|
79
|
+
#
|
80
|
+
# transform = Parslet::Transform.new do
|
81
|
+
# rule(...) { ... }
|
82
|
+
# end
|
83
|
+
# transform.apply(tree)
|
71
84
|
#
|
72
85
|
class Parslet::Transform
|
73
|
-
|
86
|
+
# FIXME: Maybe only part of it? Or maybe only include into constructor
|
87
|
+
# context?
|
88
|
+
include Parslet
|
89
|
+
|
90
|
+
class << self
|
91
|
+
# FIXME: Only do this for subclasses?
|
92
|
+
include Parslet
|
93
|
+
|
94
|
+
# Define a rule for the transform subclass.
|
95
|
+
#
|
96
|
+
def rule(expression, &block)
|
97
|
+
@__transform_rules ||= []
|
98
|
+
@__transform_rules << [Parslet::Pattern.new(expression), block]
|
99
|
+
end
|
100
|
+
|
101
|
+
# Allows accessing the class' rules
|
102
|
+
#
|
103
|
+
def rules
|
104
|
+
@__transform_rules || []
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def initialize(&block)
|
74
109
|
@rules = []
|
110
|
+
|
111
|
+
if block
|
112
|
+
instance_eval(&block)
|
113
|
+
end
|
75
114
|
end
|
76
115
|
|
77
|
-
attr_reader :rules
|
78
116
|
def rule(expression, &block)
|
79
117
|
@rules << [
|
80
118
|
Parslet::Pattern.new(expression),
|
@@ -95,11 +133,18 @@ class Parslet::Transform
|
|
95
133
|
)
|
96
134
|
end
|
97
135
|
|
136
|
+
# Allow easy access to all rules, the ones defined in the instance and the
|
137
|
+
# ones predefined in a subclass definition.
|
138
|
+
#
|
139
|
+
def rules
|
140
|
+
self.class.rules + @rules
|
141
|
+
end
|
142
|
+
|
98
143
|
def transform_elt(elt)
|
99
144
|
rules.each do |pattern, block|
|
100
145
|
if bindings=pattern.match(elt)
|
101
146
|
# Produces transformed value
|
102
|
-
return
|
147
|
+
return pattern.call_on_match(elt, bindings, block)
|
103
148
|
end
|
104
149
|
end
|
105
150
|
|