parslet 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,44 +9,32 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
9
9
  attr_reader :bound_parslet
10
10
 
11
11
  def initialize(bound_parslet, positive=true) # :nodoc:
12
+ super()
13
+
12
14
  # Model positive and negative lookahead by testing this flag.
13
15
  @positive = positive
14
16
  @bound_parslet = bound_parslet
17
+ @error_msgs = {
18
+ :positive => "lookahead: #{bound_parslet.inspect} didn't match, but should have",
19
+ :negative => "negative lookahead: #{bound_parslet.inspect} matched, but shouldn't have"
20
+ }
15
21
  end
16
22
 
17
- def try(io) # :nodoc:
18
- pos = io.pos
19
-
20
- failed = true
21
- catch(:error) {
22
- bound_parslet.apply(io)
23
- failed = false
24
- }
25
- return failed ? fail(io) : success(io)
23
+ def try(source, context) # :nodoc:
24
+ pos = source.pos
26
25
 
26
+ value = bound_parslet.apply(source, context)
27
+ return success(nil) if positive ^ value.error?
28
+
29
+ return error(source, @error_msgs[:positive]) if positive
30
+ return error(source, @error_msgs[:negative])
31
+
32
+ # This is probably the only parslet that rewinds its input in #try.
33
+ # Lookaheads NEVER consume their input, even on success, that's why.
27
34
  ensure
28
- io.pos = pos
35
+ source.pos = pos
29
36
  end
30
37
 
31
- # TODO Both of these will produce results that could be reduced easily.
32
- # Maybe do some shortcut reducing here?
33
- def fail(io) # :nodoc:
34
- if positive
35
- error(io, "lookahead: #{bound_parslet.inspect} didn't match, but should have")
36
- else
37
- return nil
38
- end
39
- end
40
- def success(io) # :nodoc:
41
- if positive
42
- return nil
43
- else
44
- error(
45
- io,
46
- "negative lookahead: #{bound_parslet.inspect} matched, but shouldn't have")
47
- end
48
- end
49
-
50
38
  precedence LOOKAHEAD
51
39
  def to_s_inner(prec) # :nodoc:
52
40
  char = positive ? '&' : '!'
@@ -8,13 +8,18 @@
8
8
  class Parslet::Atoms::Named < Parslet::Atoms::Base
9
9
  attr_reader :parslet, :name
10
10
  def initialize(parslet, name) # :nodoc:
11
+ super()
12
+
11
13
  @parslet, @name = parslet, name
12
14
  end
13
15
 
14
- def apply(io) # :nodoc:
15
- value = parslet.apply(io)
16
-
17
- produce_return_value value
16
+ def apply(source, context) # :nodoc:
17
+ value = parslet.apply(source, context)
18
+
19
+ return value if value.error?
20
+ success(
21
+ produce_return_value(
22
+ value.result))
18
23
  end
19
24
 
20
25
  def to_s_inner(prec) # :nodoc:
@@ -26,6 +31,6 @@ class Parslet::Atoms::Named < Parslet::Atoms::Base
26
31
  end
27
32
  private
28
33
  def produce_return_value(val) # :nodoc:
29
- { name => flatten(val) }
34
+ { name => flatten(val, true) }
30
35
  end
31
36
  end
@@ -1,6 +1,6 @@
1
1
  # Matches a special kind of regular expression that only ever matches one
2
- # character at a time. Useful members of this family are: character ranges,
3
- # \w, \d, \r, \n, ...
2
+ # character at a time. Useful members of this family are: <code>character
3
+ # ranges, \\w, \\d, \\r, \\n, ...</code>
4
4
  #
5
5
  # Example:
6
6
  #
@@ -10,15 +10,24 @@
10
10
  class Parslet::Atoms::Re < Parslet::Atoms::Base
11
11
  attr_reader :match, :re
12
12
  def initialize(match) # :nodoc:
13
+ super()
14
+
13
15
  @match = match
14
16
  @re = Regexp.new(match, Regexp::MULTILINE)
17
+ @error_msgs = {
18
+ :premature => "Premature end of input",
19
+ :failed => "Failed to match #{match.inspect[1..-2]}"
20
+ }
15
21
  end
16
22
 
17
- def try(io) # :nodoc:
18
- s = io.read(1)
19
- error(io, "Premature end of input") unless s
20
- error(io, "Failed to match #{match.inspect[1..-2]}") unless s.match(re)
21
- return s
23
+ def try(source, context) # :nodoc:
24
+ error_pos = source.pos
25
+ s = source.read(1)
26
+
27
+ return error(source, @error_msgs[:premature], error_pos) unless s
28
+ return error(source, @error_msgs[:failed], error_pos) unless s.match(re)
29
+
30
+ return success(s)
22
31
  end
23
32
 
24
33
  def to_s_inner(prec) # :nodoc:
@@ -9,29 +9,36 @@
9
9
  class Parslet::Atoms::Repetition < Parslet::Atoms::Base
10
10
  attr_reader :min, :max, :parslet
11
11
  def initialize(parslet, min, max, tag=:repetition)
12
+ super()
13
+
12
14
  @parslet = parslet
13
15
  @min, @max = min, max
14
16
  @tag = tag
17
+ @error_msgs = {
18
+ :minrep => "Expected at least #{min} of #{parslet.inspect}"
19
+ }
15
20
  end
16
21
 
17
- def try(io) # :nodoc:
22
+ def try(source, context) # :nodoc:
18
23
  occ = 0
19
24
  result = [@tag] # initialize the result array with the tag (for flattening)
20
- catch(:error) {
21
- result << parslet.apply(io)
25
+ start_pos = source.pos
26
+ loop do
27
+ value = parslet.apply(source, context)
28
+ break if value.error?
29
+
22
30
  occ += 1
31
+ result << value.result
23
32
 
24
33
  # If we're not greedy (max is defined), check if that has been
25
34
  # reached.
26
- return result if max && occ>=max
27
- redo
28
- }
35
+ return success(result) if max && occ>=max
36
+ end
29
37
 
30
38
  # Greedy matcher has produced a failure. Check if occ (which will
31
39
  # contain the number of sucesses) is in {min, max}.
32
- # p [:repetition, occ, min, max]
33
- error(io, "Expected at least #{min} of #{parslet.inspect}") if occ < min
34
- return result
40
+ return error(source, @error_msgs[:minrep], start_pos) if occ < min
41
+ return success(result)
35
42
  end
36
43
 
37
44
  precedence REPETITION
@@ -7,7 +7,12 @@
7
7
  class Parslet::Atoms::Sequence < Parslet::Atoms::Base
8
8
  attr_reader :parslets
9
9
  def initialize(*parslets)
10
+ super()
11
+
10
12
  @parslets = parslets
13
+ @error_msgs = {
14
+ :failed => "Failed to match sequence (#{self.inspect})"
15
+ }
11
16
  end
12
17
 
13
18
  def >>(parslet) # :nodoc:
@@ -15,16 +20,17 @@ class Parslet::Atoms::Sequence < Parslet::Atoms::Base
15
20
  self
16
21
  end
17
22
 
18
- def try(io) # :nodoc:
19
- catch(:error) {
20
- return [:sequence]+parslets.map { |p|
21
- # Save each parslet as potentially offending (raising an error).
22
- @offending_parslet = p
23
- p.apply(io)
24
- }
25
- }
23
+ def try(source, context) # :nodoc:
24
+ success([:sequence]+parslets.map { |p|
25
+ # Save each parslet as potentially offending (raising an error).
26
+ @offending_parslet = p
27
+
28
+ value = p.apply(source, context)
29
+
30
+ return error(source, @error_msgs[:failed]) if value.error?
26
31
 
27
- error(io, "Failed to match sequence (#{self.inspect})")
32
+ value.result
33
+ })
28
34
  end
29
35
 
30
36
  precedence SEQUENCE
@@ -7,16 +7,26 @@
7
7
  class Parslet::Atoms::Str < Parslet::Atoms::Base
8
8
  attr_reader :str
9
9
  def initialize(str)
10
+ super()
11
+
10
12
  @str = str
13
+ @error_msgs = {
14
+ :premature => "Premature end of input",
15
+ :failed => "Expected #{str.inspect}, but got "
16
+ }
11
17
  end
12
18
 
13
- def try(io) # :nodoc:
14
- old_pos = io.pos
15
- s = io.read(str.size)
16
- error(io, "Premature end of input") unless s && s.size==str.size
17
- error(io, "Expected #{str.inspect}, but got #{s.inspect}", old_pos) \
18
- unless s==str
19
- return s
19
+ def try(source, context) # :nodoc:
20
+ error_pos = source.pos
21
+ s = source.read(str.size)
22
+
23
+ return success(s) if s == str
24
+
25
+ # assert: s != str
26
+
27
+ # Failures:
28
+ return error(source, @error_msgs[:premature]) unless s && s.size==str.size
29
+ return error(source, @error_msgs[:failed]+s.inspect, error_pos)
20
30
  end
21
31
 
22
32
  def to_s_inner(prec) # :nodoc:
@@ -0,0 +1,75 @@
1
+ # Augments all parslet atoms with an accept method that will call back
2
+ # to the visitor given.
3
+
4
+ #
5
+ module Parslet::Atoms
6
+ class Base
7
+ def accept(visitor)
8
+ raise NotImplementedError, "No visit method on #{self.class.name}."
9
+ end
10
+ end
11
+
12
+ class Str
13
+ # Call back visitors #str method. See parslet/export for an example.
14
+ #
15
+ def accept(visitor)
16
+ visitor.str(str)
17
+ end
18
+ end
19
+
20
+ class Entity
21
+ # Call back visitors #entity method. See parslet/export for an example.
22
+ #
23
+ def accept(visitor)
24
+ visitor.entity(name, context, block)
25
+ end
26
+ end
27
+
28
+ class Named
29
+ # Call back visitors #named method. See parslet/export for an example.
30
+ #
31
+ def accept(visitor)
32
+ visitor.named(name, parslet)
33
+ end
34
+ end
35
+
36
+ class Sequence
37
+ # Call back visitors #sequence method. See parslet/export for an example.
38
+ #
39
+ def accept(visitor)
40
+ visitor.sequence(parslets)
41
+ end
42
+ end
43
+
44
+ class Repetition
45
+ # Call back visitors #repetition method. See parslet/export for an example.
46
+ #
47
+ def accept(visitor)
48
+ visitor.repetition(min, max, parslet)
49
+ end
50
+ end
51
+
52
+ class Alternative
53
+ # Call back visitors #alternative method. See parslet/export for an example.
54
+ #
55
+ def accept(visitor)
56
+ visitor.alternative(alternatives)
57
+ end
58
+ end
59
+
60
+ class Lookahead
61
+ # Call back visitors #lookahead method. See parslet/export for an example.
62
+ #
63
+ def accept(visitor)
64
+ visitor.lookahead(positive, bound_parslet)
65
+ end
66
+ end
67
+
68
+ class Re
69
+ # Call back visitors #re method. See parslet/export for an example.
70
+ #
71
+ def accept(visitor)
72
+ visitor.re(match)
73
+ end
74
+ end
75
+ end
data/lib/parslet/atoms.rb CHANGED
@@ -1,3 +1,6 @@
1
+
2
+ # This is where parslets name comes from: Small parser atoms.
3
+ #
1
4
  module Parslet::Atoms
2
5
  # The precedence module controls parenthesis during the #inspect printing
3
6
  # of parslets. It is not relevant to other aspects of the parsing.
@@ -12,14 +15,15 @@ module Parslet::Atoms
12
15
  OUTER = (prec+=1) # printing is done here.
13
16
  end
14
17
 
15
- autoload :Base, 'parslet/atoms/base'
16
- autoload :Named, 'parslet/atoms/named'
17
- autoload :Lookahead, 'parslet/atoms/lookahead'
18
- autoload :Alternative, 'parslet/atoms/alternative'
19
- autoload :Sequence, 'parslet/atoms/sequence'
20
- autoload :Repetition, 'parslet/atoms/repetition'
21
- autoload :Re, 'parslet/atoms/re'
22
- autoload :Str, 'parslet/atoms/str'
23
- autoload :Entity, 'parslet/atoms/entity'
18
+ require 'parslet/atoms/context'
19
+ require 'parslet/atoms/base'
20
+ require 'parslet/atoms/named'
21
+ require 'parslet/atoms/lookahead'
22
+ require 'parslet/atoms/alternative'
23
+ require 'parslet/atoms/sequence'
24
+ require 'parslet/atoms/repetition'
25
+ require 'parslet/atoms/re'
26
+ require 'parslet/atoms/str'
27
+ require 'parslet/atoms/entity'
24
28
  end
25
29
 
@@ -0,0 +1,33 @@
1
+ class Parslet::Parser
2
+
3
+ # Packages the common idiom
4
+ #
5
+ # begin
6
+ # tree = parser.parse('something')
7
+ # rescue Parslet::ParseFailed => error
8
+ # puts error
9
+ # puts parser.root.error_tree
10
+ # end
11
+ #
12
+ # into a convenient method.
13
+ #
14
+ # Usage:
15
+ #
16
+ # require 'parslet'
17
+ # require 'parslet/convenience'
18
+ #
19
+ # class FooParser < Parslet::Parser
20
+ # rule(:foo) { str('foo') }
21
+ # root(:foo)
22
+ # end
23
+ #
24
+ # FooParser.new.parse_with_debug('bar')
25
+ #
26
+ def parse_with_debug str
27
+ parse str
28
+ rescue Parslet::ParseFailed => error
29
+ puts error
30
+ puts root.error_tree
31
+ end
32
+
33
+ end
@@ -0,0 +1,162 @@
1
+ # Allows exporting parslet grammars to other lingos.
2
+
3
+ require 'set'
4
+ require 'parslet/atoms/visitor'
5
+
6
+ class Parslet::Parser
7
+ module Visitors
8
+ class Citrus
9
+ attr_reader :context, :output
10
+ def initialize(context)
11
+ @context = context
12
+ end
13
+
14
+ def str(str)
15
+ "\"#{str.inspect[1..-2]}\""
16
+ end
17
+ def re(match)
18
+ match.to_s
19
+ end
20
+
21
+ def entity(name, ctx, block)
22
+ context.deferred(name, [ctx, block])
23
+
24
+ "(#{context.mangle_name(name)})"
25
+ end
26
+ def named(name, parslet)
27
+ parslet.accept(self)
28
+ end
29
+
30
+ def sequence(parslets)
31
+ '(' <<
32
+ parslets.
33
+ map { |el| el.accept(self) }.
34
+ join(' ') <<
35
+ ')'
36
+ end
37
+ def repetition(min, max, parslet)
38
+ parslet.accept(self) << "#{min}*#{max}"
39
+ end
40
+ def alternative(alternatives)
41
+ '(' <<
42
+ alternatives.
43
+ map { |el| el.accept(self) }.
44
+ join(' | ') <<
45
+ ')'
46
+ end
47
+
48
+ def lookahead(positive, bound_parslet)
49
+ (positive ? '&' : '!') <<
50
+ bound_parslet.accept(self)
51
+ end
52
+ end
53
+
54
+ class Treetop < Citrus
55
+ def repetition(min, max, parslet)
56
+ parslet.accept(self) << "#{min}..#{max}"
57
+ end
58
+
59
+ def alternative(alternatives)
60
+ '(' <<
61
+ alternatives.
62
+ map { |el| el.accept(self) }.
63
+ join(' / ') <<
64
+ ')'
65
+ end
66
+ end
67
+ end
68
+
69
+ # A helper class that formats Citrus and Treetop grammars as a string.
70
+ #
71
+ class PrettyPrinter # :nodoc:
72
+ attr_reader :visitor
73
+ def initialize(visitor_klass)
74
+ @visitor = visitor_klass.new(self)
75
+ end
76
+
77
+ # Pretty prints the given parslet using the visitor that has been
78
+ # configured in initialize. Returns the string representation of the
79
+ # Citrus or Treetop grammar.
80
+ #
81
+ def pretty_print(name, parslet) # :nodoc:
82
+ output = "grammar #{name}\n"
83
+
84
+ output << rule('root', parslet)
85
+
86
+ seen = Set.new
87
+ loop do
88
+ # @todo is constantly filled by the visitor (see #deferred). We
89
+ # keep going until it is empty.
90
+ break if @todo.empty?
91
+ name, (context, block) = @todo.shift
92
+
93
+ # Track what rules we've already seen. This breaks loops.
94
+ next if seen.include?(name)
95
+ seen << name
96
+
97
+ output << rule(name, context.instance_eval(&block))
98
+ end
99
+
100
+ output << "end\n"
101
+ end
102
+
103
+ # Formats a rule in either dialect.
104
+ #
105
+ def rule(name, parslet)
106
+ " rule #{mangle_name name}\n" <<
107
+ " " << parslet.accept(visitor) << "\n" <<
108
+ " end\n"
109
+ end
110
+
111
+ # Whenever the visitor encounters an rule in a parslet, it defers the
112
+ # pretty printing of the rule by calling this method.
113
+ #
114
+ def deferred(name, content) # :nodoc:
115
+ @todo ||= []
116
+ @todo << [name, content]
117
+ end
118
+
119
+ # Mangles names so that Citrus and Treetop can live with it. This mostly
120
+ # transforms some of the things that Ruby allows into other patterns. If
121
+ # there is collision, we will not detect it for now.
122
+ #
123
+ def mangle_name(str) # :nodoc:
124
+ str.to_s.sub(/\?$/, '_p')
125
+ end
126
+ end
127
+
128
+ # Exports the current parser instance as a string in the Citrus dialect.
129
+ #
130
+ # Example:
131
+ #
132
+ # require 'parslet/export'
133
+ # class MyParser < Parslet::Parser
134
+ # root(:expression)
135
+ # rule(:expression) { str('foo') }
136
+ # end
137
+ #
138
+ # MyParser.new.to_citrus # => a citrus grammar as a string
139
+ #
140
+ def to_citrus
141
+ PrettyPrinter.new(Visitors::Citrus).
142
+ pretty_print(self.class.name, root)
143
+ end
144
+
145
+ # Exports the current parser instance as a string in the Treetop dialect.
146
+ #
147
+ # Example:
148
+ #
149
+ # require 'parslet/export'
150
+ # class MyParser < Parslet::Parser
151
+ # root(:expression)
152
+ # rule(:expression) { str('foo') }
153
+ # end
154
+ #
155
+ # MyParser.new.to_treetop # => a treetop grammar as a string
156
+ #
157
+ def to_treetop
158
+ PrettyPrinter.new(Visitors::Treetop).
159
+ pretty_print(self.class.name, root)
160
+ end
161
+ end
162
+
@@ -1,14 +1,14 @@
1
1
 
2
2
  # Allows specifying rules as strings using the exact same grammar that treetop
3
- # does, minus the actions. This is on one hand a good example of a fully fledged
4
- # parser and on the other hand might even turn out really useful.
3
+ # does, minus the actions. This is on one hand a good example of a fully
4
+ # fledged parser and on the other hand might even turn out really useful.
5
5
  #
6
6
  # This can be viewed as an extension to parslet and might even be hosted in
7
7
  # its own gem one fine day.
8
8
  #
9
9
  # NOT FINISHED & EXPERIMENTAL
10
10
  #
11
- class Parslet::Expression # :nodoc:
11
+ class Parslet::Expression
12
12
  include Parslet
13
13
 
14
14
  autoload :Treetop, 'parslet/expression/treetop'
@@ -99,8 +99,8 @@ class Parslet::Pattern
99
99
  end
100
100
 
101
101
  def element_match_hash(tree, exp, bindings)
102
- # Early failure when not all of the hash keys are matched.
103
- return false unless exp.keys == tree.keys
102
+ # Early failure when one hash is bigger than the other
103
+ return false unless exp.size == tree.size
104
104
 
105
105
  # We iterate over expected pattern, since we demand that the keys that
106
106
  # are there should be in tree as well.
@@ -13,12 +13,12 @@ RSpec::Matchers.define(:parse) do |input|
13
13
  failure_message_for_should do |is|
14
14
  "expected " << (@result ?
15
15
  "output of parsing #{input.inspect} with #{is.inspect} to equal #{@as.inspect}, but was #{@result.inspect}" :
16
- "expected #{is.inspect} to be able to parse #{input.inspect}")
16
+ "#{is.inspect} to be able to parse #{input.inspect}")
17
17
  end
18
18
 
19
19
  failure_message_for_should_not do |is|
20
20
  "expected " << (@as ?
21
21
  "output of parsing #{input.inspect} with #{is.inspect} not to equal #{@as.inspect}" :
22
- "expected #{is.inspect} to be able to parse #{input.inspect}")
22
+ "#{is.inspect} to not parse #{input.inspect}, but it did")
23
23
  end
24
24
  end