parslet 0.9.0 → 0.10.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,37 @@
1
+ # A sequence of parslets, matched from left to right. Denoted by '>>'
2
+ #
3
+ # Example:
4
+ #
5
+ # str('a') >> str('b') # matches 'a', then 'b'
6
+ #
7
+ class Parslet::Atoms::Sequence < Parslet::Atoms::Base
8
+ attr_reader :parslets
9
+ def initialize(*parslets)
10
+ @parslets = parslets
11
+ end
12
+
13
+ def >>(parslet)
14
+ @parslets << parslet
15
+ self
16
+ end
17
+
18
+ def try(io)
19
+ [:sequence]+parslets.map { |p|
20
+ # Save each parslet as potentially offending (raising an error).
21
+ @offending_parslet = p
22
+ p.apply(io)
23
+ }
24
+ rescue Parslet::ParseFailed
25
+ error(io, "Failed to match sequence (#{self.inspect})")
26
+ end
27
+
28
+ precedence SEQUENCE
29
+ def to_s_inner(prec)
30
+ parslets.map { |p| p.to_s(prec) }.join(' ')
31
+ end
32
+
33
+ def error_tree
34
+ Parslet::ErrorTree.new(self).tap { |t|
35
+ t.children << @offending_parslet.error_tree if @offending_parslet }
36
+ end
37
+ end
@@ -0,0 +1,26 @@
1
+ # Matches a string of characters.
2
+ #
3
+ # Example:
4
+ #
5
+ # str('foo') # matches 'foo'
6
+ #
7
+ class Parslet::Atoms::Str < Parslet::Atoms::Base
8
+ attr_reader :str
9
+ def initialize(str)
10
+ @str = str
11
+ end
12
+
13
+ def try(io)
14
+ old_pos = io.pos
15
+ s = io.read(str.size)
16
+ error(io, "Premature end of input") unless s && s.size==str.size
17
+ error(io, "Expected #{str.inspect}, but got #{s.inspect}", old_pos) \
18
+ unless s==str
19
+ return s
20
+ end
21
+
22
+ def to_s_inner(prec)
23
+ "'#{str}'"
24
+ end
25
+ end
26
+
@@ -20,7 +20,6 @@ class Parslet::ErrorTree
20
20
  def cause
21
21
  parslet.cause || "Unknown error in #{parslet.inspect}"
22
22
  end
23
- alias :to_s :cause
24
23
 
25
24
  # Returns an ascii tree representation of the causes of this node and its
26
25
  # children.
@@ -30,10 +29,11 @@ class Parslet::ErrorTree
30
29
  recursive_ascii_tree(self, io, [true]) }.
31
30
  string
32
31
  end
32
+ alias to_s ascii_tree
33
33
  private
34
34
  def recursive_ascii_tree(node, stream, curved)
35
35
  append_prefix(stream, curved)
36
- stream.puts node
36
+ stream.puts node.cause
37
37
 
38
38
  node.children.each do |child|
39
39
  last_child = (node.children.last == child)
@@ -0,0 +1,41 @@
1
+
2
+ # Allows specifying rules as strings using the exact same grammar that treetop
3
+ # does, minus the actions. This is on one hand a good example of a fully fledged
4
+ # parser and on the other hand might even turn out really useful.
5
+ #
6
+ # NOT FINISHED & EXPERIMENTAL
7
+ #
8
+ class Parslet::Expression
9
+ include Parslet
10
+
11
+ autoload :Treetop, 'parslet/expression/treetop'
12
+
13
+ def initialize(str, opts={})
14
+ @type = opts[:type] || :treetop
15
+ @exp = str
16
+ @parslet = transform(
17
+ parse(str))
18
+ end
19
+
20
+ # Transforms the parse tree into a parslet expression.
21
+ #
22
+ def transform(tree)
23
+ transform = Treetop::Transform.new
24
+
25
+ pp tree
26
+ transform.apply(tree)
27
+ end
28
+
29
+ # Parses the string and returns a parse tree.
30
+ #
31
+ def parse(str)
32
+ parser = Treetop::Parser.new
33
+ parser.parse(str)
34
+ end
35
+
36
+ # Turns this expression into a parslet.
37
+ #
38
+ def to_parslet
39
+ @parslet
40
+ end
41
+ end
@@ -0,0 +1,53 @@
1
+ class Parslet::Expression::Treetop
2
+ class Parser < Parslet::Parser
3
+ root(:expression)
4
+
5
+ rule(:expression) {
6
+ alternatives
7
+ }
8
+
9
+ rule(:alternatives) {
10
+ simple >> (spaced('/') >> alternatives) |
11
+ simple
12
+ }
13
+
14
+ rule(:simple) {
15
+ perhaps.repeat
16
+ }
17
+
18
+ rule(:perhaps) {
19
+ atom.as(:maybe) >> spaced('?') |
20
+ atom
21
+ }
22
+
23
+ rule(:atom) {
24
+ spaced('(') >> expression.as(:unwrap) >> spaced(')') |
25
+ string
26
+ }
27
+
28
+ rule(:string) {
29
+ str('\'') >>
30
+ (
31
+ (str('\\') >> any) |
32
+ (str("'").absnt? >> any)
33
+ ).repeat.as(:string) >>
34
+ str('\'') >> space?
35
+ }
36
+
37
+ rule(:space) { match("\s").repeat(1) }
38
+ rule(:space?) { space.maybe }
39
+
40
+ def spaced(str)
41
+ str(str) >> space?
42
+ end
43
+ end
44
+
45
+ class Transform < Parser::Transform
46
+ rule(:unwrap => simple(:u)) { u }
47
+ rule(sequence(:s)) { |d| Parslet::Atoms::Sequence.new(*d[:s]) }
48
+ rule(:maybe => simple(:m)) { |d| d[:m].maybe }
49
+ rule(:string => simple(:s)) { |d| str(d[:s]) }
50
+ end
51
+
52
+ end
53
+
@@ -0,0 +1,17 @@
1
+
2
+ # The base class for all your parsers. Use as follows:
3
+ #
4
+ # require 'parslet'
5
+ #
6
+ # class MyParser < Parslet::Parser
7
+ # rule(:a) { str('a').repeat }
8
+ # root(:a)
9
+ # end
10
+ #
11
+ # pp MyParser.new.parse('aaaa') # => 'aaaa'
12
+ # pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed:
13
+ # # Don't know what to do with bbbb at line 1 char 1.
14
+ #
15
+ class Parslet::Parser
16
+ include Parslet
17
+ end
@@ -1,4 +1,3 @@
1
-
2
1
  # Matches trees against expressions. Trees are formed by arrays and hashes
3
2
  # for expressing membership and sequence. The leafs of the tree are other
4
3
  # classes.
@@ -20,6 +19,8 @@
20
19
  # to match recursively. To do that, please use Parslet::Transform.
21
20
  #
22
21
  class Parslet::Pattern
22
+ autoload :Context, 'parslet/pattern/context'
23
+
23
24
  def initialize(pattern)
24
25
  @pattern = pattern
25
26
  end
@@ -36,9 +37,11 @@ class Parslet::Pattern
36
37
  # end
37
38
  #
38
39
  def each_match(tree, &block) # :yield: subtree
40
+ raise ArgumentError, "Must pass a block" unless block
41
+
39
42
  recurse_into(tree) do |subtree|
40
43
  if bindings=match(subtree)
41
- block.call(bindings) if block
44
+ call_on_match(subtree, bindings, block)
42
45
  end
43
46
  end
44
47
 
@@ -53,6 +56,23 @@ class Parslet::Pattern
53
56
  return bindings if element_match(subtree, @pattern, bindings)
54
57
  end
55
58
 
59
+ # Executes the block on the bindings obtained by #match, if such a match
60
+ # can be made. Contains the logic that will switch to instance variables
61
+ # depending on the arity of the block.
62
+ #
63
+ def call_on_match(tree, bindings, block)
64
+ if block
65
+ if block.arity == 1
66
+ return block.call(bindings)
67
+ else
68
+ context = Context.new(bindings)
69
+ return context.instance_eval(&block)
70
+ end
71
+ end
72
+ end
73
+
74
+ # Handles preorder, depth-first recursion through the +expr+ given.
75
+ #
56
76
  def recurse_into(expr, &block)
57
77
  # p [:attempt_match, expr]
58
78
  block.call(expr)
@@ -131,14 +151,4 @@ class Parslet::Pattern
131
151
  # Match succeeds
132
152
  return true
133
153
  end
134
-
135
- # Called on a bind variable, returns the variable name without the _
136
- #
137
- def variable_name(bind_var)
138
- str = bind_var.to_s
139
-
140
- if str.size>1
141
- str[1..-1].to_sym
142
- end
143
- end
144
154
  end
@@ -2,25 +2,38 @@
2
2
  # Used internally for representing a bind placeholder in a Parslet::Transform
3
3
  # pattern. This is the superclass for all bindings.
4
4
  #
5
- class Parslet::Pattern::Bind
6
- attr_reader :symbol
7
- def initialize(symbol)
8
- @symbol = symbol
9
- end
10
-
5
+ # It defines the most permissive kind of bind, the one that matches any subtree
6
+ # whatever it looks like.
7
+ #
8
+ class Parslet::Pattern::SubtreeBind < Struct.new(:symbol)
11
9
  def variable_name
12
10
  symbol
13
11
  end
12
+
13
+ def inspect
14
+ "#{bind_type_name}(#{symbol.inspect})"
15
+ end
16
+
17
+ def can_bind?(subtree)
18
+ true
19
+ end
20
+
21
+ private
22
+ def bind_type_name
23
+ if md=self.class.name.match(/(\w+)Bind/)
24
+ md.captures.first.downcase
25
+ else
26
+ # This path should never be used, but since this is for inspection only,
27
+ # let's not raise.
28
+ 'unknown_bind'
29
+ end
30
+ end
14
31
  end
15
32
 
16
33
  # Binds a symbol to a simple subtree, one that is not either a sequence of
17
34
  # elements or a collection of attributes.
18
35
  #
19
- class Parslet::Pattern::SimpleBind < Parslet::Pattern::Bind
20
- def inspect
21
- "simple(#{symbol.inspect})"
22
- end
23
-
36
+ class Parslet::Pattern::SimpleBind < Parslet::Pattern::SubtreeBind
24
37
  def can_bind?(subtree)
25
38
  not [Hash, Array].include?(subtree.class)
26
39
  end
@@ -28,11 +41,7 @@ end
28
41
 
29
42
  # Binds a symbol to a sequence of simple leafs ([element1, element2, ...])
30
43
  #
31
- class Parslet::Pattern::SequenceBind < Parslet::Pattern::Bind
32
- def inspect
33
- "sequence(#{symbol.inspect})"
34
- end
35
-
44
+ class Parslet::Pattern::SequenceBind < Parslet::Pattern::SubtreeBind
36
45
  def can_bind?(subtree)
37
46
  subtree.kind_of?(Array) &&
38
47
  (not subtree.any? { |el| [Hash, Array].include?(el.class) })
@@ -0,0 +1,24 @@
1
+ require 'blankslate'
2
+
3
+ # Provides a context for tree transformations to run in. The context allows
4
+ # accessing each of the bindings in the bindings hash as local method.
5
+ #
6
+ # Example:
7
+ #
8
+ # ctx = Context.new(:a => :b)
9
+ # ctx.instance_eval do
10
+ # a # => :b
11
+ # end
12
+ #
13
+ class Parslet::Pattern::Context < BlankSlate
14
+ def initialize(bindings)
15
+ @bindings = bindings
16
+ end
17
+
18
+ def method_missing(sym, *args, &block)
19
+ super unless args.empty?
20
+ super unless @bindings.has_key?(sym.to_sym)
21
+
22
+ @bindings[sym]
23
+ end
24
+ end
@@ -20,36 +20,30 @@ require 'parslet/pattern'
20
20
  #
21
21
  # Example:
22
22
  #
23
- # transform = Parslet::Transform.new
24
- # transform.rule(
25
- # :string => simple(:x) # (1)
26
- # ) { |d|
27
- # StringLiteral.new(d[:x]) # (2)
28
- # }
29
- #
30
- # # Transforms the tree
31
- # transform.apply(tree)
23
+ # class Example < Parslet::Transform
24
+ # rule(:string => simple(:x)) { # (1)
25
+ # StringLiteral.new(x)
26
+ # }
27
+ # end
32
28
  #
33
29
  # A tree transform (Parslet::Transform) is defined by a set of rules. Each
34
30
  # rule can be defined by calling #rule with the pattern as argument. The block
35
31
  # given will be called every time the rule matches somewhere in the tree given
36
32
  # to #apply. It is passed a Hash containing all the variable bindings of this
37
33
  # pattern match.
38
- #
39
- # In the above example, (1) illustrates a simple matching rule. In general,
40
- # such rules are composed of strings ("foobar"), arrays (["a", "b"]) and
41
- # hashes like in the example above.
34
+ #
35
+ # In the above example, (1) illustrates a simple matching rule.
42
36
  #
43
- # Let's say you want to parse matching parentheses and distill a maximum
44
- # nest depth. You would probably write a parser like the one in example/parens.rb;
37
+ # Let's say you want to parse matching parentheses and distill a maximum nest
38
+ # depth. You would probably write a parser like the one in example/parens.rb;
45
39
  # here's the relevant part:
46
40
  #
47
41
  # rule(:balanced) {
48
42
  # str('(').as(:l) >> balanced.maybe.as(:m) >> str(')').as(:r)
49
43
  # }
50
44
  #
51
- # If you now apply this to a string like '(())', you get a intermediate
52
- # parse tree that looks like this:
45
+ # If you now apply this to a string like '(())', you get a intermediate parse
46
+ # tree that looks like this:
53
47
  #
54
48
  # {
55
49
  # :l => "(",
@@ -61,20 +55,64 @@ require 'parslet/pattern'
61
55
  # This parse tree is good for debugging, but what we would really like to have
62
56
  # is just the nesting depth. This transformation rule will produce that:
63
57
  #
64
- # t.rule(:l => '(', :m => simple(:x), :r => ')') { |d|
65
- # depth = d[:x]
66
- #
67
- # depth.nil? ? 1 : depth+1
58
+ # rule(:l => '(', :m => simple(:x), :r => ')') {
59
+ # # innermost :m will contain nil
60
+ # x.nil? ? 1 : x+1
68
61
  # }
69
- # t.apply(tree) # => 2
70
62
  #
63
+ # = Usage patterns
64
+ #
65
+ # There are four ways of using this class. The first one is very much
66
+ # recommended, followed by the second one for generality. The other ones are
67
+ # omitted here.
68
+ #
69
+ # Recommended usage is as follows:
70
+ #
71
+ # class MyTransformator < Parslet::Transform
72
+ # rule(...) { ... }
73
+ # rule(...) { ... }
74
+ # # ...
75
+ # end
76
+ # MyTransformator.new.apply(tree)
77
+ #
78
+ # Alternatively, you can use the Transform class as follows:
79
+ #
80
+ # transform = Parslet::Transform.new do
81
+ # rule(...) { ... }
82
+ # end
83
+ # transform.apply(tree)
71
84
  #
72
85
  class Parslet::Transform
73
- def initialize
86
+ # FIXME: Maybe only part of it? Or maybe only include into constructor
87
+ # context?
88
+ include Parslet
89
+
90
+ class << self
91
+ # FIXME: Only do this for subclasses?
92
+ include Parslet
93
+
94
+ # Define a rule for the transform subclass.
95
+ #
96
+ def rule(expression, &block)
97
+ @__transform_rules ||= []
98
+ @__transform_rules << [Parslet::Pattern.new(expression), block]
99
+ end
100
+
101
+ # Allows accessing the class' rules
102
+ #
103
+ def rules
104
+ @__transform_rules || []
105
+ end
106
+ end
107
+
108
+ def initialize(&block)
74
109
  @rules = []
110
+
111
+ if block
112
+ instance_eval(&block)
113
+ end
75
114
  end
76
115
 
77
- attr_reader :rules
78
116
  def rule(expression, &block)
79
117
  @rules << [
80
118
  Parslet::Pattern.new(expression),
@@ -95,11 +133,18 @@ class Parslet::Transform
95
133
  )
96
134
  end
97
135
 
136
+ # Allow easy access to all rules, the ones defined in the instance and the
137
+ # ones predefined in a subclass definition.
138
+ #
139
+ def rules
140
+ self.class.rules + @rules
141
+ end
142
+
98
143
  def transform_elt(elt)
99
144
  rules.each do |pattern, block|
100
145
  if bindings=pattern.match(elt)
101
146
  # Produces transformed value
102
- return block.call(bindings)
147
+ return pattern.call_on_match(elt, bindings, block)
103
148
  end
104
149
  end
105
150