parslet 1.0.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -9,44 +9,32 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
9
9
  attr_reader :bound_parslet
10
10
 
11
11
  def initialize(bound_parslet, positive=true) # :nodoc:
12
+ super()
13
+
12
14
  # Model positive and negative lookahead by testing this flag.
13
15
  @positive = positive
14
16
  @bound_parslet = bound_parslet
17
+ @error_msgs = {
18
+ :positive => "lookahead: #{bound_parslet.inspect} didn't match, but should have",
19
+ :negative => "negative lookahead: #{bound_parslet.inspect} matched, but shouldn't have"
20
+ }
15
21
  end
16
22
 
17
- def try(io) # :nodoc:
18
- pos = io.pos
19
-
20
- failed = true
21
- catch(:error) {
22
- bound_parslet.apply(io)
23
- failed = false
24
- }
25
- return failed ? fail(io) : success(io)
23
+ def try(source, context) # :nodoc:
24
+ pos = source.pos
26
25
 
26
+ value = bound_parslet.apply(source, context)
27
+ return success(nil) if positive ^ value.error?
28
+
29
+ return error(source, @error_msgs[:positive]) if positive
30
+ return error(source, @error_msgs[:negative])
31
+
32
+ # This is probably the only parslet that rewinds its input in #try.
33
+ # Lookaheads NEVER consume their input, even on success, that's why.
27
34
  ensure
28
- io.pos = pos
35
+ source.pos = pos
29
36
  end
30
37
 
31
- # TODO Both of these will produce results that could be reduced easily.
32
- # Maybe do some shortcut reducing here?
33
- def fail(io) # :nodoc:
34
- if positive
35
- error(io, "lookahead: #{bound_parslet.inspect} didn't match, but should have")
36
- else
37
- return nil
38
- end
39
- end
40
- def success(io) # :nodoc:
41
- if positive
42
- return nil
43
- else
44
- error(
45
- io,
46
- "negative lookahead: #{bound_parslet.inspect} matched, but shouldn't have")
47
- end
48
- end
49
-
50
38
  precedence LOOKAHEAD
51
39
  def to_s_inner(prec) # :nodoc:
52
40
  char = positive ? '&' : '!'
@@ -8,13 +8,18 @@
8
8
  class Parslet::Atoms::Named < Parslet::Atoms::Base
9
9
  attr_reader :parslet, :name
10
10
  def initialize(parslet, name) # :nodoc:
11
+ super()
12
+
11
13
  @parslet, @name = parslet, name
12
14
  end
13
15
 
14
- def apply(io) # :nodoc:
15
- value = parslet.apply(io)
16
-
17
- produce_return_value value
16
+ def apply(source, context) # :nodoc:
17
+ value = parslet.apply(source, context)
18
+
19
+ return value if value.error?
20
+ success(
21
+ produce_return_value(
22
+ value.result))
18
23
  end
19
24
 
20
25
  def to_s_inner(prec) # :nodoc:
@@ -26,6 +31,6 @@ class Parslet::Atoms::Named < Parslet::Atoms::Base
26
31
  end
27
32
  private
28
33
  def produce_return_value(val) # :nodoc:
29
- { name => flatten(val) }
34
+ { name => flatten(val, true) }
30
35
  end
31
36
  end
@@ -1,6 +1,6 @@
1
1
  # Matches a special kind of regular expression that only ever matches one
2
- # character at a time. Useful members of this family are: character ranges,
3
- # \w, \d, \r, \n, ...
2
+ # character at a time. Useful members of this family are: <code>character
3
+ # ranges, \\w, \\d, \\r, \\n, ...</code>
4
4
  #
5
5
  # Example:
6
6
  #
@@ -10,15 +10,24 @@
10
10
  class Parslet::Atoms::Re < Parslet::Atoms::Base
11
11
  attr_reader :match, :re
12
12
  def initialize(match) # :nodoc:
13
+ super()
14
+
13
15
  @match = match
14
16
  @re = Regexp.new(match, Regexp::MULTILINE)
17
+ @error_msgs = {
18
+ :premature => "Premature end of input",
19
+ :failed => "Failed to match #{match.inspect[1..-2]}"
20
+ }
15
21
  end
16
22
 
17
- def try(io) # :nodoc:
18
- s = io.read(1)
19
- error(io, "Premature end of input") unless s
20
- error(io, "Failed to match #{match.inspect[1..-2]}") unless s.match(re)
21
- return s
23
+ def try(source, context) # :nodoc:
24
+ error_pos = source.pos
25
+ s = source.read(1)
26
+
27
+ return error(source, @error_msgs[:premature], error_pos) unless s
28
+ return error(source, @error_msgs[:failed], error_pos) unless s.match(re)
29
+
30
+ return success(s)
22
31
  end
23
32
 
24
33
  def to_s_inner(prec) # :nodoc:
@@ -9,29 +9,36 @@
9
9
  class Parslet::Atoms::Repetition < Parslet::Atoms::Base
10
10
  attr_reader :min, :max, :parslet
11
11
  def initialize(parslet, min, max, tag=:repetition)
12
+ super()
13
+
12
14
  @parslet = parslet
13
15
  @min, @max = min, max
14
16
  @tag = tag
17
+ @error_msgs = {
18
+ :minrep => "Expected at least #{min} of #{parslet.inspect}"
19
+ }
15
20
  end
16
21
 
17
- def try(io) # :nodoc:
22
+ def try(source, context) # :nodoc:
18
23
  occ = 0
19
24
  result = [@tag] # initialize the result array with the tag (for flattening)
20
- catch(:error) {
21
- result << parslet.apply(io)
25
+ start_pos = source.pos
26
+ loop do
27
+ value = parslet.apply(source, context)
28
+ break if value.error?
29
+
22
30
  occ += 1
31
+ result << value.result
23
32
 
24
33
  # If we're not greedy (max is defined), check if that has been
25
34
  # reached.
26
- return result if max && occ>=max
27
- redo
28
- }
35
+ return success(result) if max && occ>=max
36
+ end
29
37
 
30
38
  # Greedy matcher has produced a failure. Check if occ (which will
31
39
  # contain the number of sucesses) is in {min, max}.
32
- # p [:repetition, occ, min, max]
33
- error(io, "Expected at least #{min} of #{parslet.inspect}") if occ < min
34
- return result
40
+ return error(source, @error_msgs[:minrep], start_pos) if occ < min
41
+ return success(result)
35
42
  end
36
43
 
37
44
  precedence REPETITION
@@ -7,7 +7,12 @@
7
7
  class Parslet::Atoms::Sequence < Parslet::Atoms::Base
8
8
  attr_reader :parslets
9
9
  def initialize(*parslets)
10
+ super()
11
+
10
12
  @parslets = parslets
13
+ @error_msgs = {
14
+ :failed => "Failed to match sequence (#{self.inspect})"
15
+ }
11
16
  end
12
17
 
13
18
  def >>(parslet) # :nodoc:
@@ -15,16 +20,17 @@ class Parslet::Atoms::Sequence < Parslet::Atoms::Base
15
20
  self
16
21
  end
17
22
 
18
- def try(io) # :nodoc:
19
- catch(:error) {
20
- return [:sequence]+parslets.map { |p|
21
- # Save each parslet as potentially offending (raising an error).
22
- @offending_parslet = p
23
- p.apply(io)
24
- }
25
- }
23
+ def try(source, context) # :nodoc:
24
+ success([:sequence]+parslets.map { |p|
25
+ # Save each parslet as potentially offending (raising an error).
26
+ @offending_parslet = p
27
+
28
+ value = p.apply(source, context)
29
+
30
+ return error(source, @error_msgs[:failed]) if value.error?
26
31
 
27
- error(io, "Failed to match sequence (#{self.inspect})")
32
+ value.result
33
+ })
28
34
  end
29
35
 
30
36
  precedence SEQUENCE
@@ -7,16 +7,26 @@
7
7
  class Parslet::Atoms::Str < Parslet::Atoms::Base
8
8
  attr_reader :str
9
9
  def initialize(str)
10
+ super()
11
+
10
12
  @str = str
13
+ @error_msgs = {
14
+ :premature => "Premature end of input",
15
+ :failed => "Expected #{str.inspect}, but got "
16
+ }
11
17
  end
12
18
 
13
- def try(io) # :nodoc:
14
- old_pos = io.pos
15
- s = io.read(str.size)
16
- error(io, "Premature end of input") unless s && s.size==str.size
17
- error(io, "Expected #{str.inspect}, but got #{s.inspect}", old_pos) \
18
- unless s==str
19
- return s
19
+ def try(source, context) # :nodoc:
20
+ error_pos = source.pos
21
+ s = source.read(str.size)
22
+
23
+ return success(s) if s == str
24
+
25
+ # assert: s != str
26
+
27
+ # Failures:
28
+ return error(source, @error_msgs[:premature]) unless s && s.size==str.size
29
+ return error(source, @error_msgs[:failed]+s.inspect, error_pos)
20
30
  end
21
31
 
22
32
  def to_s_inner(prec) # :nodoc:
@@ -0,0 +1,75 @@
1
+ # Augments all parslet atoms with an accept method that will call back
2
+ # to the visitor given.
3
+
4
+ #
5
+ module Parslet::Atoms
6
+ class Base
7
+ def accept(visitor)
8
+ raise NotImplementedError, "No visit method on #{self.class.name}."
9
+ end
10
+ end
11
+
12
+ class Str
13
+ # Call back visitors #str method. See parslet/export for an example.
14
+ #
15
+ def accept(visitor)
16
+ visitor.str(str)
17
+ end
18
+ end
19
+
20
+ class Entity
21
+ # Call back visitors #entity method. See parslet/export for an example.
22
+ #
23
+ def accept(visitor)
24
+ visitor.entity(name, context, block)
25
+ end
26
+ end
27
+
28
+ class Named
29
+ # Call back visitors #named method. See parslet/export for an example.
30
+ #
31
+ def accept(visitor)
32
+ visitor.named(name, parslet)
33
+ end
34
+ end
35
+
36
+ class Sequence
37
+ # Call back visitors #sequence method. See parslet/export for an example.
38
+ #
39
+ def accept(visitor)
40
+ visitor.sequence(parslets)
41
+ end
42
+ end
43
+
44
+ class Repetition
45
+ # Call back visitors #repetition method. See parslet/export for an example.
46
+ #
47
+ def accept(visitor)
48
+ visitor.repetition(min, max, parslet)
49
+ end
50
+ end
51
+
52
+ class Alternative
53
+ # Call back visitors #alternative method. See parslet/export for an example.
54
+ #
55
+ def accept(visitor)
56
+ visitor.alternative(alternatives)
57
+ end
58
+ end
59
+
60
+ class Lookahead
61
+ # Call back visitors #lookahead method. See parslet/export for an example.
62
+ #
63
+ def accept(visitor)
64
+ visitor.lookahead(positive, bound_parslet)
65
+ end
66
+ end
67
+
68
+ class Re
69
+ # Call back visitors #re method. See parslet/export for an example.
70
+ #
71
+ def accept(visitor)
72
+ visitor.re(match)
73
+ end
74
+ end
75
+ end
data/lib/parslet/atoms.rb CHANGED
@@ -1,3 +1,6 @@
1
+
2
+ # This is where parslets name comes from: Small parser atoms.
3
+ #
1
4
  module Parslet::Atoms
2
5
  # The precedence module controls parenthesis during the #inspect printing
3
6
  # of parslets. It is not relevant to other aspects of the parsing.
@@ -12,14 +15,15 @@ module Parslet::Atoms
12
15
  OUTER = (prec+=1) # printing is done here.
13
16
  end
14
17
 
15
- autoload :Base, 'parslet/atoms/base'
16
- autoload :Named, 'parslet/atoms/named'
17
- autoload :Lookahead, 'parslet/atoms/lookahead'
18
- autoload :Alternative, 'parslet/atoms/alternative'
19
- autoload :Sequence, 'parslet/atoms/sequence'
20
- autoload :Repetition, 'parslet/atoms/repetition'
21
- autoload :Re, 'parslet/atoms/re'
22
- autoload :Str, 'parslet/atoms/str'
23
- autoload :Entity, 'parslet/atoms/entity'
18
+ require 'parslet/atoms/context'
19
+ require 'parslet/atoms/base'
20
+ require 'parslet/atoms/named'
21
+ require 'parslet/atoms/lookahead'
22
+ require 'parslet/atoms/alternative'
23
+ require 'parslet/atoms/sequence'
24
+ require 'parslet/atoms/repetition'
25
+ require 'parslet/atoms/re'
26
+ require 'parslet/atoms/str'
27
+ require 'parslet/atoms/entity'
24
28
  end
25
29
 
@@ -0,0 +1,33 @@
1
+ class Parslet::Parser
2
+
3
+ # Packages the common idiom
4
+ #
5
+ # begin
6
+ # tree = parser.parse('something')
7
+ # rescue Parslet::ParseFailed => error
8
+ # puts error
9
+ # puts parser.root.error_tree
10
+ # end
11
+ #
12
+ # into a convenient method.
13
+ #
14
+ # Usage:
15
+ #
16
+ # require 'parslet'
17
+ # require 'parslet/convenience'
18
+ #
19
+ # class FooParser < Parslet::Parser
20
+ # rule(:foo) { str('foo') }
21
+ # root(:foo)
22
+ # end
23
+ #
24
+ # FooParser.new.parse_with_debug('bar')
25
+ #
26
+ def parse_with_debug str
27
+ parse str
28
+ rescue Parslet::ParseFailed => error
29
+ puts error
30
+ puts root.error_tree
31
+ end
32
+
33
+ end
@@ -0,0 +1,162 @@
1
+ # Allows exporting parslet grammars to other lingos.
2
+
3
+ require 'set'
4
+ require 'parslet/atoms/visitor'
5
+
6
+ class Parslet::Parser
7
+ module Visitors
8
+ class Citrus
9
+ attr_reader :context, :output
10
+ def initialize(context)
11
+ @context = context
12
+ end
13
+
14
+ def str(str)
15
+ "\"#{str.inspect[1..-2]}\""
16
+ end
17
+ def re(match)
18
+ match.to_s
19
+ end
20
+
21
+ def entity(name, ctx, block)
22
+ context.deferred(name, [ctx, block])
23
+
24
+ "(#{context.mangle_name(name)})"
25
+ end
26
+ def named(name, parslet)
27
+ parslet.accept(self)
28
+ end
29
+
30
+ def sequence(parslets)
31
+ '(' <<
32
+ parslets.
33
+ map { |el| el.accept(self) }.
34
+ join(' ') <<
35
+ ')'
36
+ end
37
+ def repetition(min, max, parslet)
38
+ parslet.accept(self) << "#{min}*#{max}"
39
+ end
40
+ def alternative(alternatives)
41
+ '(' <<
42
+ alternatives.
43
+ map { |el| el.accept(self) }.
44
+ join(' | ') <<
45
+ ')'
46
+ end
47
+
48
+ def lookahead(positive, bound_parslet)
49
+ (positive ? '&' : '!') <<
50
+ bound_parslet.accept(self)
51
+ end
52
+ end
53
+
54
+ class Treetop < Citrus
55
+ def repetition(min, max, parslet)
56
+ parslet.accept(self) << "#{min}..#{max}"
57
+ end
58
+
59
+ def alternative(alternatives)
60
+ '(' <<
61
+ alternatives.
62
+ map { |el| el.accept(self) }.
63
+ join(' / ') <<
64
+ ')'
65
+ end
66
+ end
67
+ end
68
+
69
+ # A helper class that formats Citrus and Treetop grammars as a string.
70
+ #
71
+ class PrettyPrinter # :nodoc:
72
+ attr_reader :visitor
73
+ def initialize(visitor_klass)
74
+ @visitor = visitor_klass.new(self)
75
+ end
76
+
77
+ # Pretty prints the given parslet using the visitor that has been
78
+ # configured in initialize. Returns the string representation of the
79
+ # Citrus or Treetop grammar.
80
+ #
81
+ def pretty_print(name, parslet) # :nodoc:
82
+ output = "grammar #{name}\n"
83
+
84
+ output << rule('root', parslet)
85
+
86
+ seen = Set.new
87
+ loop do
88
+ # @todo is constantly filled by the visitor (see #deferred). We
89
+ # keep going until it is empty.
90
+ break if @todo.empty?
91
+ name, (context, block) = @todo.shift
92
+
93
+ # Track what rules we've already seen. This breaks loops.
94
+ next if seen.include?(name)
95
+ seen << name
96
+
97
+ output << rule(name, context.instance_eval(&block))
98
+ end
99
+
100
+ output << "end\n"
101
+ end
102
+
103
+ # Formats a rule in either dialect.
104
+ #
105
+ def rule(name, parslet)
106
+ " rule #{mangle_name name}\n" <<
107
+ " " << parslet.accept(visitor) << "\n" <<
108
+ " end\n"
109
+ end
110
+
111
+ # Whenever the visitor encounters an rule in a parslet, it defers the
112
+ # pretty printing of the rule by calling this method.
113
+ #
114
+ def deferred(name, content) # :nodoc:
115
+ @todo ||= []
116
+ @todo << [name, content]
117
+ end
118
+
119
+ # Mangles names so that Citrus and Treetop can live with it. This mostly
120
+ # transforms some of the things that Ruby allows into other patterns. If
121
+ # there is collision, we will not detect it for now.
122
+ #
123
+ def mangle_name(str) # :nodoc:
124
+ str.to_s.sub(/\?$/, '_p')
125
+ end
126
+ end
127
+
128
+ # Exports the current parser instance as a string in the Citrus dialect.
129
+ #
130
+ # Example:
131
+ #
132
+ # require 'parslet/export'
133
+ # class MyParser < Parslet::Parser
134
+ # root(:expression)
135
+ # rule(:expression) { str('foo') }
136
+ # end
137
+ #
138
+ # MyParser.new.to_citrus # => a citrus grammar as a string
139
+ #
140
+ def to_citrus
141
+ PrettyPrinter.new(Visitors::Citrus).
142
+ pretty_print(self.class.name, root)
143
+ end
144
+
145
+ # Exports the current parser instance as a string in the Treetop dialect.
146
+ #
147
+ # Example:
148
+ #
149
+ # require 'parslet/export'
150
+ # class MyParser < Parslet::Parser
151
+ # root(:expression)
152
+ # rule(:expression) { str('foo') }
153
+ # end
154
+ #
155
+ # MyParser.new.to_treetop # => a treetop grammar as a string
156
+ #
157
+ def to_treetop
158
+ PrettyPrinter.new(Visitors::Treetop).
159
+ pretty_print(self.class.name, root)
160
+ end
161
+ end
162
+
@@ -1,14 +1,14 @@
1
1
 
2
2
  # Allows specifying rules as strings using the exact same grammar that treetop
3
- # does, minus the actions. This is on one hand a good example of a fully fledged
4
- # parser and on the other hand might even turn out really useful.
3
+ # does, minus the actions. This is on one hand a good example of a fully
4
+ # fledged parser and on the other hand might even turn out really useful.
5
5
  #
6
6
  # This can be viewed as an extension to parslet and might even be hosted in
7
7
  # its own gem one fine day.
8
8
  #
9
9
  # NOT FINISHED & EXPERIMENTAL
10
10
  #
11
- class Parslet::Expression # :nodoc:
11
+ class Parslet::Expression
12
12
  include Parslet
13
13
 
14
14
  autoload :Treetop, 'parslet/expression/treetop'
@@ -99,8 +99,8 @@ class Parslet::Pattern
99
99
  end
100
100
 
101
101
  def element_match_hash(tree, exp, bindings)
102
- # Early failure when not all of the hash keys are matched.
103
- return false unless exp.keys == tree.keys
102
+ # Early failure when one hash is bigger than the other
103
+ return false unless exp.size == tree.size
104
104
 
105
105
  # We iterate over expected pattern, since we demand that the keys that
106
106
  # are there should be in tree as well.
@@ -13,12 +13,12 @@ RSpec::Matchers.define(:parse) do |input|
13
13
  failure_message_for_should do |is|
14
14
  "expected " << (@result ?
15
15
  "output of parsing #{input.inspect} with #{is.inspect} to equal #{@as.inspect}, but was #{@result.inspect}" :
16
- "expected #{is.inspect} to be able to parse #{input.inspect}")
16
+ "#{is.inspect} to be able to parse #{input.inspect}")
17
17
  end
18
18
 
19
19
  failure_message_for_should_not do |is|
20
20
  "expected " << (@as ?
21
21
  "output of parsing #{input.inspect} with #{is.inspect} not to equal #{@as.inspect}" :
22
- "expected #{is.inspect} to be able to parse #{input.inspect}")
22
+ "#{is.inspect} to not parse #{input.inspect}, but it did")
23
23
  end
24
24
  end