parslet 0.9.0 → 0.10.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -0
- data/HISTORY.txt +24 -1
- data/README +23 -66
- data/Rakefile +10 -6
- data/lib/parslet.rb +50 -137
- data/lib/parslet/atoms.rb +12 -479
- data/lib/parslet/atoms/alternative.rb +40 -0
- data/lib/parslet/atoms/base.rb +196 -0
- data/lib/parslet/atoms/entity.rb +48 -0
- data/lib/parslet/atoms/lookahead.rb +57 -0
- data/lib/parslet/atoms/named.rb +31 -0
- data/lib/parslet/atoms/re.rb +28 -0
- data/lib/parslet/atoms/repetition.rb +58 -0
- data/lib/parslet/atoms/sequence.rb +37 -0
- data/lib/parslet/atoms/str.rb +26 -0
- data/lib/parslet/error_tree.rb +2 -2
- data/lib/parslet/expression.rb +41 -0
- data/lib/parslet/expression/treetop.rb +53 -0
- data/lib/parslet/parser.rb +17 -0
- data/lib/parslet/pattern.rb +22 -12
- data/lib/parslet/pattern/binding.rb +25 -16
- data/lib/parslet/pattern/context.rb +24 -0
- data/lib/parslet/transform.rb +70 -25
- metadata +37 -8
@@ -0,0 +1,37 @@
|
|
1
|
+
# A sequence of parslets, matched from left to right. Denoted by '>>'
|
2
|
+
#
|
3
|
+
# Example:
|
4
|
+
#
|
5
|
+
# str('a') >> str('b') # matches 'a', then 'b'
|
6
|
+
#
|
7
|
+
class Parslet::Atoms::Sequence < Parslet::Atoms::Base
|
8
|
+
attr_reader :parslets
|
9
|
+
def initialize(*parslets)
|
10
|
+
@parslets = parslets
|
11
|
+
end
|
12
|
+
|
13
|
+
def >>(parslet)
|
14
|
+
@parslets << parslet
|
15
|
+
self
|
16
|
+
end
|
17
|
+
|
18
|
+
def try(io)
|
19
|
+
[:sequence]+parslets.map { |p|
|
20
|
+
# Save each parslet as potentially offending (raising an error).
|
21
|
+
@offending_parslet = p
|
22
|
+
p.apply(io)
|
23
|
+
}
|
24
|
+
rescue Parslet::ParseFailed
|
25
|
+
error(io, "Failed to match sequence (#{self.inspect})")
|
26
|
+
end
|
27
|
+
|
28
|
+
precedence SEQUENCE
|
29
|
+
def to_s_inner(prec)
|
30
|
+
parslets.map { |p| p.to_s(prec) }.join(' ')
|
31
|
+
end
|
32
|
+
|
33
|
+
def error_tree
|
34
|
+
Parslet::ErrorTree.new(self).tap { |t|
|
35
|
+
t.children << @offending_parslet.error_tree if @offending_parslet }
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# Matches a string of characters.
|
2
|
+
#
|
3
|
+
# Example:
|
4
|
+
#
|
5
|
+
# str('foo') # matches 'foo'
|
6
|
+
#
|
7
|
+
class Parslet::Atoms::Str < Parslet::Atoms::Base
|
8
|
+
attr_reader :str
|
9
|
+
def initialize(str)
|
10
|
+
@str = str
|
11
|
+
end
|
12
|
+
|
13
|
+
def try(io)
|
14
|
+
old_pos = io.pos
|
15
|
+
s = io.read(str.size)
|
16
|
+
error(io, "Premature end of input") unless s && s.size==str.size
|
17
|
+
error(io, "Expected #{str.inspect}, but got #{s.inspect}", old_pos) \
|
18
|
+
unless s==str
|
19
|
+
return s
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_s_inner(prec)
|
23
|
+
"'#{str}'"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
data/lib/parslet/error_tree.rb
CHANGED
@@ -20,7 +20,6 @@ class Parslet::ErrorTree
|
|
20
20
|
def cause
|
21
21
|
parslet.cause || "Unknown error in #{parslet.inspect}"
|
22
22
|
end
|
23
|
-
alias :to_s :cause
|
24
23
|
|
25
24
|
# Returns an ascii tree representation of the causes of this node and its
|
26
25
|
# children.
|
@@ -30,10 +29,11 @@ class Parslet::ErrorTree
|
|
30
29
|
recursive_ascii_tree(self, io, [true]) }.
|
31
30
|
string
|
32
31
|
end
|
32
|
+
alias to_s ascii_tree
|
33
33
|
private
|
34
34
|
def recursive_ascii_tree(node, stream, curved)
|
35
35
|
append_prefix(stream, curved)
|
36
|
-
stream.puts node
|
36
|
+
stream.puts node.cause
|
37
37
|
|
38
38
|
node.children.each do |child|
|
39
39
|
last_child = (node.children.last == child)
|
@@ -0,0 +1,41 @@
|
|
1
|
+
|
2
|
+
# Allows specifying rules as strings using the exact same grammar that treetop
|
3
|
+
# does, minus the actions. This is on one hand a good example of a fully fledged
|
4
|
+
# parser and on the other hand might even turn out really useful.
|
5
|
+
#
|
6
|
+
# NOT FINISHED & EXPERIMENTAL
|
7
|
+
#
|
8
|
+
class Parslet::Expression
|
9
|
+
include Parslet
|
10
|
+
|
11
|
+
autoload :Treetop, 'parslet/expression/treetop'
|
12
|
+
|
13
|
+
def initialize(str, opts={})
|
14
|
+
@type = opts[:type] || :treetop
|
15
|
+
@exp = str
|
16
|
+
@parslet = transform(
|
17
|
+
parse(str))
|
18
|
+
end
|
19
|
+
|
20
|
+
# Transforms the parse tree into a parslet expression.
|
21
|
+
#
|
22
|
+
def transform(tree)
|
23
|
+
transform = Treetop::Transform.new
|
24
|
+
|
25
|
+
pp tree
|
26
|
+
transform.apply(tree)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Parses the string and returns a parse tree.
|
30
|
+
#
|
31
|
+
def parse(str)
|
32
|
+
parser = Treetop::Parser.new
|
33
|
+
parser.parse(str)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Turns this expression into a parslet.
|
37
|
+
#
|
38
|
+
def to_parslet
|
39
|
+
@parslet
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
class Parslet::Expression::Treetop
|
2
|
+
class Parser < Parslet::Parser
|
3
|
+
root(:expression)
|
4
|
+
|
5
|
+
rule(:expression) {
|
6
|
+
alternatives
|
7
|
+
}
|
8
|
+
|
9
|
+
rule(:alternatives) {
|
10
|
+
simple >> (spaced('/') >> alternatives) |
|
11
|
+
simple
|
12
|
+
}
|
13
|
+
|
14
|
+
rule(:simple) {
|
15
|
+
perhaps.repeat
|
16
|
+
}
|
17
|
+
|
18
|
+
rule(:perhaps) {
|
19
|
+
atom.as(:maybe) >> spaced('?') |
|
20
|
+
atom
|
21
|
+
}
|
22
|
+
|
23
|
+
rule(:atom) {
|
24
|
+
spaced('(') >> expression.as(:unwrap) >> spaced(')') |
|
25
|
+
string
|
26
|
+
}
|
27
|
+
|
28
|
+
rule(:string) {
|
29
|
+
str('\'') >>
|
30
|
+
(
|
31
|
+
(str('\\') >> any) |
|
32
|
+
(str("'").absnt? >> any)
|
33
|
+
).repeat.as(:string) >>
|
34
|
+
str('\'') >> space?
|
35
|
+
}
|
36
|
+
|
37
|
+
rule(:space) { match("\s").repeat(1) }
|
38
|
+
rule(:space?) { space.maybe }
|
39
|
+
|
40
|
+
def spaced(str)
|
41
|
+
str(str) >> space?
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
class Transform < Parser::Transform
|
46
|
+
rule(:unwrap => simple(:u)) { u }
|
47
|
+
rule(sequence(:s)) { |d| Parslet::Atoms::Sequence.new(*d[:s]) }
|
48
|
+
rule(:maybe => simple(:m)) { |d| d[:m].maybe }
|
49
|
+
rule(:string => simple(:s)) { |d| str(d[:s]) }
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
@@ -0,0 +1,17 @@
|
|
1
|
+
|
2
|
+
# The base class for all your parsers. Use as follows:
|
3
|
+
#
|
4
|
+
# require 'parslet'
|
5
|
+
#
|
6
|
+
# class MyParser < Parslet::Parser
|
7
|
+
# rule(:a) { str('a').repeat }
|
8
|
+
# root(:a)
|
9
|
+
# end
|
10
|
+
#
|
11
|
+
# pp MyParser.new.parse('aaaa') # => 'aaaa'
|
12
|
+
# pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed:
|
13
|
+
# # Don't know what to do with bbbb at line 1 char 1.
|
14
|
+
#
|
15
|
+
class Parslet::Parser
|
16
|
+
include Parslet
|
17
|
+
end
|
data/lib/parslet/pattern.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
# Matches trees against expressions. Trees are formed by arrays and hashes
|
3
2
|
# for expressing membership and sequence. The leafs of the tree are other
|
4
3
|
# classes.
|
@@ -20,6 +19,8 @@
|
|
20
19
|
# to match recursively. To do that, please use Parslet::Transform.
|
21
20
|
#
|
22
21
|
class Parslet::Pattern
|
22
|
+
autoload :Context, 'parslet/pattern/context'
|
23
|
+
|
23
24
|
def initialize(pattern)
|
24
25
|
@pattern = pattern
|
25
26
|
end
|
@@ -36,9 +37,11 @@ class Parslet::Pattern
|
|
36
37
|
# end
|
37
38
|
#
|
38
39
|
def each_match(tree, &block) # :yield: subtree
|
40
|
+
raise ArgumentError, "Must pass a block" unless block
|
41
|
+
|
39
42
|
recurse_into(tree) do |subtree|
|
40
43
|
if bindings=match(subtree)
|
41
|
-
|
44
|
+
call_on_match(subtree, bindings, block)
|
42
45
|
end
|
43
46
|
end
|
44
47
|
|
@@ -53,6 +56,23 @@ class Parslet::Pattern
|
|
53
56
|
return bindings if element_match(subtree, @pattern, bindings)
|
54
57
|
end
|
55
58
|
|
59
|
+
# Executes the block on the bindings obtained by #match, if such a match
|
60
|
+
# can be made. Contains the logic that will switch to instance variables
|
61
|
+
# depending on the arity of the block.
|
62
|
+
#
|
63
|
+
def call_on_match(tree, bindings, block)
|
64
|
+
if block
|
65
|
+
if block.arity == 1
|
66
|
+
return block.call(bindings)
|
67
|
+
else
|
68
|
+
context = Context.new(bindings)
|
69
|
+
return context.instance_eval(&block)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# Handles preorder, depth-first recursion through the +expr+ given.
|
75
|
+
#
|
56
76
|
def recurse_into(expr, &block)
|
57
77
|
# p [:attempt_match, expr]
|
58
78
|
block.call(expr)
|
@@ -131,14 +151,4 @@ class Parslet::Pattern
|
|
131
151
|
# Match succeeds
|
132
152
|
return true
|
133
153
|
end
|
134
|
-
|
135
|
-
# Called on a bind variable, returns the variable name without the _
|
136
|
-
#
|
137
|
-
def variable_name(bind_var)
|
138
|
-
str = bind_var.to_s
|
139
|
-
|
140
|
-
if str.size>1
|
141
|
-
str[1..-1].to_sym
|
142
|
-
end
|
143
|
-
end
|
144
154
|
end
|
@@ -2,25 +2,38 @@
|
|
2
2
|
# Used internally for representing a bind placeholder in a Parslet::Transform
|
3
3
|
# pattern. This is the superclass for all bindings.
|
4
4
|
#
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
end
|
10
|
-
|
5
|
+
# It defines the most permissive kind of bind, the one that matches any subtree
|
6
|
+
# whatever it looks like.
|
7
|
+
#
|
8
|
+
class Parslet::Pattern::SubtreeBind < Struct.new(:symbol)
|
11
9
|
def variable_name
|
12
10
|
symbol
|
13
11
|
end
|
12
|
+
|
13
|
+
def inspect
|
14
|
+
"#{bind_type_name}(#{symbol.inspect})"
|
15
|
+
end
|
16
|
+
|
17
|
+
def can_bind?(subtree)
|
18
|
+
true
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
def bind_type_name
|
23
|
+
if md=self.class.name.match(/(\w+)Bind/)
|
24
|
+
md.captures.first.downcase
|
25
|
+
else
|
26
|
+
# This path should never be used, but since this is for inspection only,
|
27
|
+
# let's not raise.
|
28
|
+
'unknown_bind'
|
29
|
+
end
|
30
|
+
end
|
14
31
|
end
|
15
32
|
|
16
33
|
# Binds a symbol to a simple subtree, one that is not either a sequence of
|
17
34
|
# elements or a collection of attributes.
|
18
35
|
#
|
19
|
-
class Parslet::Pattern::SimpleBind < Parslet::Pattern::
|
20
|
-
def inspect
|
21
|
-
"simple(#{symbol.inspect})"
|
22
|
-
end
|
23
|
-
|
36
|
+
class Parslet::Pattern::SimpleBind < Parslet::Pattern::SubtreeBind
|
24
37
|
def can_bind?(subtree)
|
25
38
|
not [Hash, Array].include?(subtree.class)
|
26
39
|
end
|
@@ -28,11 +41,7 @@ end
|
|
28
41
|
|
29
42
|
# Binds a symbol to a sequence of simple leafs ([element1, element2, ...])
|
30
43
|
#
|
31
|
-
class Parslet::Pattern::SequenceBind < Parslet::Pattern::
|
32
|
-
def inspect
|
33
|
-
"sequence(#{symbol.inspect})"
|
34
|
-
end
|
35
|
-
|
44
|
+
class Parslet::Pattern::SequenceBind < Parslet::Pattern::SubtreeBind
|
36
45
|
def can_bind?(subtree)
|
37
46
|
subtree.kind_of?(Array) &&
|
38
47
|
(not subtree.any? { |el| [Hash, Array].include?(el.class) })
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'blankslate'
|
2
|
+
|
3
|
+
# Provides a context for tree transformations to run in. The context allows
|
4
|
+
# accessing each of the bindings in the bindings hash as local method.
|
5
|
+
#
|
6
|
+
# Example:
|
7
|
+
#
|
8
|
+
# ctx = Context.new(:a => :b)
|
9
|
+
# ctx.instance_eval do
|
10
|
+
# a # => :b
|
11
|
+
# end
|
12
|
+
#
|
13
|
+
class Parslet::Pattern::Context < BlankSlate
|
14
|
+
def initialize(bindings)
|
15
|
+
@bindings = bindings
|
16
|
+
end
|
17
|
+
|
18
|
+
def method_missing(sym, *args, &block)
|
19
|
+
super unless args.empty?
|
20
|
+
super unless @bindings.has_key?(sym.to_sym)
|
21
|
+
|
22
|
+
@bindings[sym]
|
23
|
+
end
|
24
|
+
end
|
data/lib/parslet/transform.rb
CHANGED
@@ -20,36 +20,30 @@ require 'parslet/pattern'
|
|
20
20
|
#
|
21
21
|
# Example:
|
22
22
|
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
# }
|
29
|
-
#
|
30
|
-
# # Transforms the tree
|
31
|
-
# transform.apply(tree)
|
23
|
+
# class Example < Parslet::Transform
|
24
|
+
# rule(:string => simple(:x)) { # (1)
|
25
|
+
# StringLiteral.new(x)
|
26
|
+
# }
|
27
|
+
# end
|
32
28
|
#
|
33
29
|
# A tree transform (Parslet::Transform) is defined by a set of rules. Each
|
34
30
|
# rule can be defined by calling #rule with the pattern as argument. The block
|
35
31
|
# given will be called every time the rule matches somewhere in the tree given
|
36
32
|
# to #apply. It is passed a Hash containing all the variable bindings of this
|
37
33
|
# pattern match.
|
38
|
-
#
|
39
|
-
# In the above example, (1) illustrates a simple matching rule.
|
40
|
-
# such rules are composed of strings ("foobar"), arrays (["a", "b"]) and
|
41
|
-
# hashes like in the example above.
|
34
|
+
#
|
35
|
+
# In the above example, (1) illustrates a simple matching rule.
|
42
36
|
#
|
43
|
-
# Let's say you want to parse matching parentheses and distill a maximum
|
44
|
-
#
|
37
|
+
# Let's say you want to parse matching parentheses and distill a maximum nest
|
38
|
+
# depth. You would probably write a parser like the one in example/parens.rb;
|
45
39
|
# here's the relevant part:
|
46
40
|
#
|
47
41
|
# rule(:balanced) {
|
48
42
|
# str('(').as(:l) >> balanced.maybe.as(:m) >> str(')').as(:r)
|
49
43
|
# }
|
50
44
|
#
|
51
|
-
# If you now apply this to a string like '(())', you get a intermediate
|
52
|
-
#
|
45
|
+
# If you now apply this to a string like '(())', you get a intermediate parse
|
46
|
+
# tree that looks like this:
|
53
47
|
#
|
54
48
|
# {
|
55
49
|
# :l => "(",
|
@@ -61,20 +55,64 @@ require 'parslet/pattern'
|
|
61
55
|
# This parse tree is good for debugging, but what we would really like to have
|
62
56
|
# is just the nesting depth. This transformation rule will produce that:
|
63
57
|
#
|
64
|
-
#
|
65
|
-
#
|
66
|
-
#
|
67
|
-
# depth.nil? ? 1 : depth+1
|
58
|
+
# rule(:l => '(', :m => simple(:x), :r => ')') {
|
59
|
+
# # innermost :m will contain nil
|
60
|
+
# x.nil? ? 1 : x+1
|
68
61
|
# }
|
69
|
-
# t.apply(tree) # => 2
|
70
62
|
#
|
63
|
+
# = Usage patterns
|
64
|
+
#
|
65
|
+
# There are four ways of using this class. The first one is very much
|
66
|
+
# recommended, followed by the second one for generality. The other ones are
|
67
|
+
# omitted here.
|
68
|
+
#
|
69
|
+
# Recommended usage is as follows:
|
70
|
+
#
|
71
|
+
# class MyTransformator < Parslet::Transform
|
72
|
+
# rule(...) { ... }
|
73
|
+
# rule(...) { ... }
|
74
|
+
# # ...
|
75
|
+
# end
|
76
|
+
# MyTransformator.new.apply(tree)
|
77
|
+
#
|
78
|
+
# Alternatively, you can use the Transform class as follows:
|
79
|
+
#
|
80
|
+
# transform = Parslet::Transform.new do
|
81
|
+
# rule(...) { ... }
|
82
|
+
# end
|
83
|
+
# transform.apply(tree)
|
71
84
|
#
|
72
85
|
class Parslet::Transform
|
73
|
-
|
86
|
+
# FIXME: Maybe only part of it? Or maybe only include into constructor
|
87
|
+
# context?
|
88
|
+
include Parslet
|
89
|
+
|
90
|
+
class << self
|
91
|
+
# FIXME: Only do this for subclasses?
|
92
|
+
include Parslet
|
93
|
+
|
94
|
+
# Define a rule for the transform subclass.
|
95
|
+
#
|
96
|
+
def rule(expression, &block)
|
97
|
+
@__transform_rules ||= []
|
98
|
+
@__transform_rules << [Parslet::Pattern.new(expression), block]
|
99
|
+
end
|
100
|
+
|
101
|
+
# Allows accessing the class' rules
|
102
|
+
#
|
103
|
+
def rules
|
104
|
+
@__transform_rules || []
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def initialize(&block)
|
74
109
|
@rules = []
|
110
|
+
|
111
|
+
if block
|
112
|
+
instance_eval(&block)
|
113
|
+
end
|
75
114
|
end
|
76
115
|
|
77
|
-
attr_reader :rules
|
78
116
|
def rule(expression, &block)
|
79
117
|
@rules << [
|
80
118
|
Parslet::Pattern.new(expression),
|
@@ -95,11 +133,18 @@ class Parslet::Transform
|
|
95
133
|
)
|
96
134
|
end
|
97
135
|
|
136
|
+
# Allow easy access to all rules, the ones defined in the instance and the
|
137
|
+
# ones predefined in a subclass definition.
|
138
|
+
#
|
139
|
+
def rules
|
140
|
+
self.class.rules + @rules
|
141
|
+
end
|
142
|
+
|
98
143
|
def transform_elt(elt)
|
99
144
|
rules.each do |pattern, block|
|
100
145
|
if bindings=pattern.match(elt)
|
101
146
|
# Produces transformed value
|
102
|
-
return
|
147
|
+
return pattern.call_on_match(elt, bindings, block)
|
103
148
|
end
|
104
149
|
end
|
105
150
|
|