parslet 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. data/HISTORY.txt +38 -1
  2. data/README +33 -21
  3. data/example/deepest_errors.rb +131 -0
  4. data/example/email_parser.rb +2 -6
  5. data/example/ignore.rb +2 -2
  6. data/example/json.rb +0 -3
  7. data/example/modularity.rb +47 -0
  8. data/example/nested_errors.rb +132 -0
  9. data/example/output/deepest_errors.out +54 -0
  10. data/example/output/modularity.out +0 -0
  11. data/example/output/nested_errors.out +54 -0
  12. data/lib/parslet.rb +65 -51
  13. data/lib/parslet/atoms.rb +1 -1
  14. data/lib/parslet/atoms/alternative.rb +11 -12
  15. data/lib/parslet/atoms/base.rb +57 -99
  16. data/lib/parslet/atoms/can_flatten.rb +9 -4
  17. data/lib/parslet/atoms/context.rb +26 -4
  18. data/lib/parslet/atoms/entity.rb +5 -10
  19. data/lib/parslet/atoms/lookahead.rb +11 -7
  20. data/lib/parslet/atoms/named.rb +8 -12
  21. data/lib/parslet/atoms/re.rb +10 -9
  22. data/lib/parslet/atoms/repetition.rb +23 -24
  23. data/lib/parslet/atoms/sequence.rb +10 -16
  24. data/lib/parslet/atoms/str.rb +11 -13
  25. data/lib/parslet/cause.rb +45 -13
  26. data/lib/parslet/convenience.rb +6 -6
  27. data/lib/parslet/error_reporter.rb +7 -0
  28. data/lib/parslet/error_reporter/deepest.rb +95 -0
  29. data/lib/parslet/error_reporter/tree.rb +57 -0
  30. data/lib/parslet/export.rb +4 -4
  31. data/lib/parslet/expression.rb +0 -2
  32. data/lib/parslet/expression/treetop.rb +2 -2
  33. data/lib/parslet/parser.rb +2 -6
  34. data/lib/parslet/pattern.rb +15 -4
  35. data/lib/parslet/pattern/binding.rb +3 -3
  36. data/lib/parslet/rig/rspec.rb +2 -2
  37. data/lib/parslet/slice.rb +0 -6
  38. data/lib/parslet/source.rb +40 -59
  39. data/lib/parslet/source/line_cache.rb +2 -2
  40. data/lib/parslet/transform.rb +13 -7
  41. data/lib/parslet/transform/context.rb +1 -1
  42. metadata +69 -26
  43. data/example/ignore_whitespace.rb +0 -66
  44. data/lib/parslet/bytecode.rb +0 -6
  45. data/lib/parslet/bytecode/compiler.rb +0 -138
  46. data/lib/parslet/bytecode/instructions.rb +0 -358
  47. data/lib/parslet/bytecode/vm.rb +0 -209
  48. data/lib/parslet/error_tree.rb +0 -50
@@ -20,7 +20,7 @@ module Parslet::Atoms
20
20
  # naming something using <code>.as(...)</code>. It changes the folding
21
21
  # semantics of repetition.
22
22
  #
23
- def flatten(value, named=false) # :nodoc:
23
+ def flatten(value, named=false)
24
24
  # Passes through everything that isn't an array of things
25
25
  return value unless value.instance_of? Array
26
26
 
@@ -53,12 +53,15 @@ module Parslet::Atoms
53
53
 
54
54
  # Flatten results from a sequence of parslets.
55
55
  #
56
- def flatten_sequence(list) # :nodoc:
56
+ # @api private
57
+ #
58
+ def flatten_sequence(list)
57
59
  foldl(list.compact) { |r, e| # and then merge flat elements
58
60
  merge_fold(r, e)
59
61
  }
60
62
  end
61
- def merge_fold(l, r) # :nodoc:
63
+ # @api private
64
+ def merge_fold(l, r)
62
65
  # equal pairs: merge. ----------------------------------------------------
63
66
  if l.class == r.class
64
67
  if l.is_a?(Hash)
@@ -96,7 +99,9 @@ module Parslet::Atoms
96
99
  # the results, we want to leave an empty list alone - otherwise it is
97
100
  # turned into an empty string.
98
101
  #
99
- def flatten_repetition(list, named) # :nodoc:
102
+ # @api private
103
+ #
104
+ def flatten_repetition(list, named)
100
105
  if list.any? { |e| e.instance_of?(Hash) }
101
106
  # If keyed subtrees are in the array, we'll want to discard all
102
107
  # strings inbetween. To keep them, name them.
@@ -3,11 +3,17 @@ module Parslet::Atoms
3
3
  # parslet object to results. This is used for memoization in the packrat
4
4
  # style.
5
5
  #
6
+ # Also, error reporter is stored here and error reporting happens through
7
+ # this class. This makes the reporting pluggable.
8
+ #
6
9
  class Context
7
- def initialize
10
+ # @param reporter [#err, #err_at] Error reporter (leave empty for default
11
+ # reporter)
12
+ def initialize(reporter=Parslet::ErrorReporter::Tree.new)
8
13
  @cache = Hash.new { |h, k| h[k] = {} }
14
+ @reporter = reporter
9
15
  end
10
-
16
+
11
17
  # Caches a parse answer for obj at source.pos. Applying the same parslet
12
18
  # at one position of input always yields the same result, unless the input
13
19
  # has changed.
@@ -16,12 +22,12 @@ module Parslet::Atoms
16
22
  # were consumed by a successful parse. Imitation of such a parse must
17
23
  # advance the input pos by the same amount of bytes.
18
24
  #
19
- def cache(obj, source, &block)
25
+ def try_with_cache(obj, source)
20
26
  beg = source.pos
21
27
 
22
28
  # Not in cache yet? Return early.
23
29
  unless entry = lookup(obj, beg)
24
- result = yield
30
+ result = obj.try(source, self)
25
31
 
26
32
  set obj, beg, [result, source.pos-beg]
27
33
  return result
@@ -36,6 +42,22 @@ module Parslet::Atoms
36
42
  source.pos = beg + advance
37
43
  return result
38
44
  end
45
+
46
+ # Report an error at a given position.
47
+ # @see ErrorReporter
48
+ #
49
+ def err_at(*args)
50
+ return [false, @reporter.err_at(*args)] if @reporter
51
+ return [false, nil]
52
+ end
53
+
54
+ # Report an error.
55
+ # @see ErrorReporter
56
+ #
57
+ def err(*args)
58
+ return [false, @reporter.err(*args)] if @reporter
59
+ return [false, nil]
60
+ end
39
61
 
40
62
  private
41
63
  def lookup(obj, pos)
@@ -10,14 +10,14 @@
10
10
  #
11
11
  class Parslet::Atoms::Entity < Parslet::Atoms::Base
12
12
  attr_reader :name, :block
13
- def initialize(name, &block) # :nodoc:
13
+ def initialize(name, &block)
14
14
  super()
15
15
 
16
16
  @name = name
17
17
  @block = block
18
18
  end
19
19
 
20
- def try(source, context) # :nodoc:
20
+ def try(source, context)
21
21
  parslet.apply(source, context)
22
22
  end
23
23
 
@@ -27,16 +27,11 @@ class Parslet::Atoms::Entity < Parslet::Atoms::Base
27
27
  }
28
28
  end
29
29
 
30
- def to_s_inner(prec) # :nodoc:
30
+ def to_s_inner(prec)
31
31
  name.to_s.upcase
32
- end
33
-
34
- def error_tree # :nodoc:
35
- parslet.error_tree
36
- end
37
-
32
+ end
38
33
  private
39
- def raise_not_implemented # :nodoc:
34
+ def raise_not_implemented
40
35
  trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport
41
36
  exception = NotImplementedError.new("rule(#{name.inspect}) { ... } returns nil. Still not implemented, but already used?")
42
37
  exception.set_backtrace(trace)
@@ -8,7 +8,7 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
8
8
  attr_reader :positive
9
9
  attr_reader :bound_parslet
10
10
 
11
- def initialize(bound_parslet, positive=true) # :nodoc:
11
+ def initialize(bound_parslet, positive=true)
12
12
  super()
13
13
 
14
14
  # Model positive and negative lookahead by testing this flag.
@@ -21,14 +21,18 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
21
21
  }
22
22
  end
23
23
 
24
- def try(source, context) # :nodoc:
24
+ def try(source, context)
25
25
  pos = source.pos
26
26
 
27
- value = bound_parslet.apply(source, context)
28
- return success(nil) if positive ^ value.error?
27
+ success, value = bound_parslet.apply(source, context)
29
28
 
30
- return error(source, @error_msgs[:positive], pos) if positive
31
- return error(source, @error_msgs[:negative], pos)
29
+ if positive
30
+ return succ(nil) if success
31
+ return context.err_at(self, source, @error_msgs[:positive], pos)
32
+ else
33
+ return succ(nil) unless success
34
+ return context.err_at(self, source, @error_msgs[:negative], pos)
35
+ end
32
36
 
33
37
  # This is probably the only parslet that rewinds its input in #try.
34
38
  # Lookaheads NEVER consume their input, even on success, that's why.
@@ -37,7 +41,7 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
37
41
  end
38
42
 
39
43
  precedence LOOKAHEAD
40
- def to_s_inner(prec) # :nodoc:
44
+ def to_s_inner(prec)
41
45
  char = positive ? '&' : '!'
42
46
 
43
47
  "#{char}#{bound_parslet.to_s(prec)}"
@@ -7,30 +7,26 @@
7
7
  #
8
8
  class Parslet::Atoms::Named < Parslet::Atoms::Base
9
9
  attr_reader :parslet, :name
10
- def initialize(parslet, name) # :nodoc:
10
+ def initialize(parslet, name)
11
11
  super()
12
12
 
13
13
  @parslet, @name = parslet, name
14
14
  end
15
15
 
16
- def apply(source, context) # :nodoc:
17
- value = parslet.apply(source, context)
16
+ def apply(source, context)
17
+ success, value = result = parslet.apply(source, context)
18
18
 
19
- return value if value.error?
20
- success(
19
+ return result unless success
20
+ succ(
21
21
  produce_return_value(
22
- value.result))
22
+ value))
23
23
  end
24
24
 
25
- def to_s_inner(prec) # :nodoc:
25
+ def to_s_inner(prec)
26
26
  "#{name}:#{parslet.to_s(prec)}"
27
27
  end
28
-
29
- def error_tree # :nodoc:
30
- parslet.error_tree
31
- end
32
28
  private
33
- def produce_return_value(val) # :nodoc:
29
+ def produce_return_value(val)
34
30
  { name => flatten(val, true) }
35
31
  end
36
32
  end
@@ -9,7 +9,7 @@
9
9
  #
10
10
  class Parslet::Atoms::Re < Parslet::Atoms::Base
11
11
  attr_reader :match, :re
12
- def initialize(match) # :nodoc:
12
+ def initialize(match)
13
13
  super()
14
14
 
15
15
  @match = match.to_s
@@ -20,17 +20,18 @@ class Parslet::Atoms::Re < Parslet::Atoms::Base
20
20
  }
21
21
  end
22
22
 
23
- def try(source, context) # :nodoc:
24
- error_pos = source.pos
25
- s = source.read(1)
23
+ def try(source, context)
24
+ return succ(source.consume(1)) if source.matches?(re)
26
25
 
27
- return error(source, @error_msgs[:premature], error_pos) unless s
28
- return error(source, @error_msgs[:failed], error_pos) unless s.match(re)
29
-
30
- return success(s)
26
+ # No string could be read
27
+ return context.err(self, source, @error_msgs[:premature]) \
28
+ if source.eof?
29
+
30
+ # No match
31
+ return context.err(self, source, @error_msgs[:failed])
31
32
  end
32
33
 
33
- def to_s_inner(prec) # :nodoc:
34
+ def to_s_inner(prec)
34
35
  match.inspect[1..-2]
35
36
  end
36
37
  end
@@ -19,46 +19,45 @@ class Parslet::Atoms::Repetition < Parslet::Atoms::Base
19
19
  }
20
20
  end
21
21
 
22
- def try(source, context) # :nodoc:
22
+ def try(source, context)
23
23
  occ = 0
24
- result = [@tag] # initialize the result array with the tag (for flattening)
24
+ accum = [@tag] # initialize the result array with the tag (for flattening)
25
25
  start_pos = source.pos
26
+
27
+ break_on = nil
26
28
  loop do
27
- value = parslet.apply(source, context)
28
- break if value.error?
29
+ success, value = parslet.apply(source, context)
30
+
31
+ break_on = value
32
+ break unless success
29
33
 
30
34
  occ += 1
31
- result << value.result
35
+ accum << value
32
36
 
33
- # If we're not greedy (max is defined), check if that has been
34
- # reached.
35
- return success(result) if max && occ>=max
37
+ # If we're not greedy (max is defined), check if that has been reached.
38
+ return succ(accum) if max && occ>=max
36
39
  end
37
40
 
41
+ # Last attempt to match parslet was a failure, failure reason in break_on.
42
+
38
43
  # Greedy matcher has produced a failure. Check if occ (which will
39
- # contain the number of sucesses) is in {min, max}.
40
- return error(source, @error_msgs[:minrep], start_pos) if occ < min
41
- return success(result)
44
+ # contain the number of sucesses) is >= min.
45
+ return context.err_at(
46
+ self,
47
+ source,
48
+ @error_msgs[:minrep],
49
+ start_pos,
50
+ [break_on]) if occ < min
51
+
52
+ return succ(accum)
42
53
  end
43
54
 
44
55
  precedence REPETITION
45
- def to_s_inner(prec) # :nodoc:
56
+ def to_s_inner(prec)
46
57
  minmax = "{#{min}, #{max}}"
47
58
  minmax = '?' if min == 0 && max == 1
48
59
 
49
60
  parslet.to_s(prec) + minmax
50
61
  end
51
-
52
- def cause # :nodoc:
53
- # Either the repetition failed or the parslet inside failed to repeat.
54
- super || parslet.cause
55
- end
56
- def error_tree # :nodoc:
57
- if cause?
58
- Parslet::ErrorTree.new(self, parslet.error_tree)
59
- else
60
- parslet.error_tree
61
- end
62
- end
63
62
  end
64
63
 
@@ -15,30 +15,24 @@ class Parslet::Atoms::Sequence < Parslet::Atoms::Base
15
15
  }
16
16
  end
17
17
 
18
- def >>(parslet) # :nodoc:
18
+ def >>(parslet)
19
19
  self.class.new(* @parslets+[parslet])
20
20
  end
21
21
 
22
- def try(source, context) # :nodoc:
23
- success([:sequence]+parslets.map { |p|
24
- # Save each parslet as potentially offending (raising an error).
25
- @offending_parslet = p
22
+ def try(source, context)
23
+ succ([:sequence]+parslets.map { |p|
24
+ success, value = p.apply(source, context)
26
25
 
27
- value = p.apply(source, context)
28
-
29
- return error(source, @error_msgs[:failed]) if value.error?
30
-
31
- value.result
26
+ unless success
27
+ return context.err(self, source, @error_msgs[:failed], [value])
28
+ end
29
+
30
+ value
32
31
  })
33
32
  end
34
33
 
35
34
  precedence SEQUENCE
36
- def to_s_inner(prec) # :nodoc:
35
+ def to_s_inner(prec)
37
36
  parslets.map { |p| p.to_s(prec) }.join(' ')
38
37
  end
39
-
40
- def error_tree # :nodoc:
41
- Parslet::ErrorTree.new(self).tap { |t|
42
- t.children << @offending_parslet.error_tree if @offending_parslet }
43
- end
44
38
  end
@@ -10,29 +10,27 @@ class Parslet::Atoms::Str < Parslet::Atoms::Base
10
10
  super()
11
11
 
12
12
  @str = str.to_s
13
+ @len = str.size
13
14
  @error_msgs = {
14
15
  :premature => "Premature end of input",
15
16
  :failed => "Expected #{str.inspect}, but got "
16
17
  }
17
18
  end
18
19
 
19
- def try(source, context) # :nodoc:
20
- # NOTE: Even though it doesn't look that way, this is the hotspot, the
21
- # contents of parslets inner loop. Changes here affect parslets speed
22
- # enormously.
23
- error_pos = source.pos
24
- s = source.read(str.bytesize)
25
-
26
- return success(s) if s == str
20
+ def try(source, context)
21
+ return succ(source.consume(@len)) if source.matches?(str)
27
22
 
28
- # assert: s != str
29
-
30
23
  # Failures:
31
- return error(source, @error_msgs[:premature]) unless s && s.size==str.size
32
- return error(source, [@error_msgs[:failed], s], error_pos)
24
+ return context.err(self, source, @error_msgs[:premature]) \
25
+ if source.chars_left<@len
26
+
27
+ error_pos = source.pos
28
+ return context.err_at(
29
+ self, source,
30
+ [@error_msgs[:failed], source.consume(@len)], error_pos)
33
31
  end
34
32
 
35
- def to_s_inner(prec) # :nodoc:
33
+ def to_s_inner(prec)
36
34
  "'#{str}'"
37
35
  end
38
36
  end
data/lib/parslet/cause.rb CHANGED
@@ -1,14 +1,48 @@
1
1
  module Parslet
2
- # An internal class that allows delaying the construction of error messages
3
- # (as strings) until we really need to print them.
2
+ # Represents a cause why a parse did fail. A lot of these objects are
3
+ # constructed - not all of the causes turn out to be failures for the whole
4
+ # parse.
4
5
  #
5
- class Cause < Struct.new(:message, :source, :pos) # :nodoc:
6
- # Appends 'at line ... char ...' to the string given. Use +pos+ to
6
+ class Cause
7
+ def initialize(message, source, pos, children)
8
+ @message, @source, @pos, @children =
9
+ message, source, pos, children
10
+ end
11
+
12
+ # @return [String, Array] A string or an array of message pieces that
13
+ # provide failure information. Use #to_s to get a formatted string.
14
+ attr_reader :message
15
+
16
+ # @return [Parslet::Source] Source that was parsed when this error
17
+ # happend. Mainly used for line number information.
18
+ attr_reader :source
19
+
20
+ # Location of the error.
21
+ #
22
+ # @return [Fixnum] Position where the error happened. (character offset)
23
+ attr_reader :pos
24
+
25
+ # When this cause is part of a tree of error causes: child nodes for this
26
+ # node. Very often carries the reasons for this cause.
27
+ #
28
+ # @return [Array<Parslet::Cause>] A list of reasons for this cause.
29
+ def children
30
+ @children ||= []
31
+ end
32
+
33
+ # Appends 'at line LINE char CHAR' to the string given. Use +pos+ to
7
34
  # override the position of the +source+. This method returns an object
8
35
  # that can be turned into a string using #to_s.
9
36
  #
10
- def self.format(source, pos, str)
11
- self.new(str, source, pos)
37
+ # @param source [Parslet::Source] source that was parsed when this error
38
+ # happened
39
+ # @param pos [Fixnum] position of error
40
+ # @param str [String, Array<String>] message parts
41
+ # @param children [Array<Parslet::Cause>] child nodes for this error tree
42
+ # @return [Parslet::Cause] a new instance of {Parslet::Cause}
43
+ #
44
+ def self.format(source, pos, str, children=[])
45
+ self.new(str, source, pos, children)
12
46
  end
13
47
 
14
48
  def to_s
@@ -37,12 +71,9 @@ module Parslet
37
71
  recursive_ascii_tree(self, io, [true]) }.
38
72
  string
39
73
  end
40
-
41
- def children
42
- @children ||= Array.new
43
- end
74
+
44
75
  private
45
- def recursive_ascii_tree(node, stream, curved) # :nodoc:
76
+ def recursive_ascii_tree(node, stream, curved)
46
77
  append_prefix(stream, curved)
47
78
  stream.puts node.to_s
48
79
 
@@ -52,8 +83,9 @@ module Parslet
52
83
  recursive_ascii_tree(child, stream, curved + [last_child])
53
84
  end
54
85
  end
55
- def append_prefix(stream, curved) # :nodoc:
56
- curved[0..-2].each do |c|
86
+ def append_prefix(stream, curved)
87
+ return if curved.size < 2
88
+ curved[1..-2].each do |c|
57
89
  stream.print c ? " " : "| "
58
90
  end
59
91
  stream.print curved.last ? "`- " : "|- "