parslet 1.3.0 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. data/HISTORY.txt +38 -1
  2. data/README +33 -21
  3. data/example/deepest_errors.rb +131 -0
  4. data/example/email_parser.rb +2 -6
  5. data/example/ignore.rb +2 -2
  6. data/example/json.rb +0 -3
  7. data/example/modularity.rb +47 -0
  8. data/example/nested_errors.rb +132 -0
  9. data/example/output/deepest_errors.out +54 -0
  10. data/example/output/modularity.out +0 -0
  11. data/example/output/nested_errors.out +54 -0
  12. data/lib/parslet.rb +65 -51
  13. data/lib/parslet/atoms.rb +1 -1
  14. data/lib/parslet/atoms/alternative.rb +11 -12
  15. data/lib/parslet/atoms/base.rb +57 -99
  16. data/lib/parslet/atoms/can_flatten.rb +9 -4
  17. data/lib/parslet/atoms/context.rb +26 -4
  18. data/lib/parslet/atoms/entity.rb +5 -10
  19. data/lib/parslet/atoms/lookahead.rb +11 -7
  20. data/lib/parslet/atoms/named.rb +8 -12
  21. data/lib/parslet/atoms/re.rb +10 -9
  22. data/lib/parslet/atoms/repetition.rb +23 -24
  23. data/lib/parslet/atoms/sequence.rb +10 -16
  24. data/lib/parslet/atoms/str.rb +11 -13
  25. data/lib/parslet/cause.rb +45 -13
  26. data/lib/parslet/convenience.rb +6 -6
  27. data/lib/parslet/error_reporter.rb +7 -0
  28. data/lib/parslet/error_reporter/deepest.rb +95 -0
  29. data/lib/parslet/error_reporter/tree.rb +57 -0
  30. data/lib/parslet/export.rb +4 -4
  31. data/lib/parslet/expression.rb +0 -2
  32. data/lib/parslet/expression/treetop.rb +2 -2
  33. data/lib/parslet/parser.rb +2 -6
  34. data/lib/parslet/pattern.rb +15 -4
  35. data/lib/parslet/pattern/binding.rb +3 -3
  36. data/lib/parslet/rig/rspec.rb +2 -2
  37. data/lib/parslet/slice.rb +0 -6
  38. data/lib/parslet/source.rb +40 -59
  39. data/lib/parslet/source/line_cache.rb +2 -2
  40. data/lib/parslet/transform.rb +13 -7
  41. data/lib/parslet/transform/context.rb +1 -1
  42. metadata +69 -26
  43. data/example/ignore_whitespace.rb +0 -66
  44. data/lib/parslet/bytecode.rb +0 -6
  45. data/lib/parslet/bytecode/compiler.rb +0 -138
  46. data/lib/parslet/bytecode/instructions.rb +0 -358
  47. data/lib/parslet/bytecode/vm.rb +0 -209
  48. data/lib/parslet/error_tree.rb +0 -50
@@ -20,7 +20,7 @@ module Parslet::Atoms
20
20
  # naming something using <code>.as(...)</code>. It changes the folding
21
21
  # semantics of repetition.
22
22
  #
23
- def flatten(value, named=false) # :nodoc:
23
+ def flatten(value, named=false)
24
24
  # Passes through everything that isn't an array of things
25
25
  return value unless value.instance_of? Array
26
26
 
@@ -53,12 +53,15 @@ module Parslet::Atoms
53
53
 
54
54
  # Flatten results from a sequence of parslets.
55
55
  #
56
- def flatten_sequence(list) # :nodoc:
56
+ # @api private
57
+ #
58
+ def flatten_sequence(list)
57
59
  foldl(list.compact) { |r, e| # and then merge flat elements
58
60
  merge_fold(r, e)
59
61
  }
60
62
  end
61
- def merge_fold(l, r) # :nodoc:
63
+ # @api private
64
+ def merge_fold(l, r)
62
65
  # equal pairs: merge. ----------------------------------------------------
63
66
  if l.class == r.class
64
67
  if l.is_a?(Hash)
@@ -96,7 +99,9 @@ module Parslet::Atoms
96
99
  # the results, we want to leave an empty list alone - otherwise it is
97
100
  # turned into an empty string.
98
101
  #
99
- def flatten_repetition(list, named) # :nodoc:
102
+ # @api private
103
+ #
104
+ def flatten_repetition(list, named)
100
105
  if list.any? { |e| e.instance_of?(Hash) }
101
106
  # If keyed subtrees are in the array, we'll want to discard all
102
107
  # strings inbetween. To keep them, name them.
@@ -3,11 +3,17 @@ module Parslet::Atoms
3
3
  # parslet object to results. This is used for memoization in the packrat
4
4
  # style.
5
5
  #
6
+ # Also, error reporter is stored here and error reporting happens through
7
+ # this class. This makes the reporting pluggable.
8
+ #
6
9
  class Context
7
- def initialize
10
+ # @param reporter [#err, #err_at] Error reporter (leave empty for default
11
+ # reporter)
12
+ def initialize(reporter=Parslet::ErrorReporter::Tree.new)
8
13
  @cache = Hash.new { |h, k| h[k] = {} }
14
+ @reporter = reporter
9
15
  end
10
-
16
+
11
17
  # Caches a parse answer for obj at source.pos. Applying the same parslet
12
18
  # at one position of input always yields the same result, unless the input
13
19
  # has changed.
@@ -16,12 +22,12 @@ module Parslet::Atoms
16
22
  # were consumed by a successful parse. Imitation of such a parse must
17
23
  # advance the input pos by the same amount of bytes.
18
24
  #
19
- def cache(obj, source, &block)
25
+ def try_with_cache(obj, source)
20
26
  beg = source.pos
21
27
 
22
28
  # Not in cache yet? Return early.
23
29
  unless entry = lookup(obj, beg)
24
- result = yield
30
+ result = obj.try(source, self)
25
31
 
26
32
  set obj, beg, [result, source.pos-beg]
27
33
  return result
@@ -36,6 +42,22 @@ module Parslet::Atoms
36
42
  source.pos = beg + advance
37
43
  return result
38
44
  end
45
+
46
+ # Report an error at a given position.
47
+ # @see ErrorReporter
48
+ #
49
+ def err_at(*args)
50
+ return [false, @reporter.err_at(*args)] if @reporter
51
+ return [false, nil]
52
+ end
53
+
54
+ # Report an error.
55
+ # @see ErrorReporter
56
+ #
57
+ def err(*args)
58
+ return [false, @reporter.err(*args)] if @reporter
59
+ return [false, nil]
60
+ end
39
61
 
40
62
  private
41
63
  def lookup(obj, pos)
@@ -10,14 +10,14 @@
10
10
  #
11
11
  class Parslet::Atoms::Entity < Parslet::Atoms::Base
12
12
  attr_reader :name, :block
13
- def initialize(name, &block) # :nodoc:
13
+ def initialize(name, &block)
14
14
  super()
15
15
 
16
16
  @name = name
17
17
  @block = block
18
18
  end
19
19
 
20
- def try(source, context) # :nodoc:
20
+ def try(source, context)
21
21
  parslet.apply(source, context)
22
22
  end
23
23
 
@@ -27,16 +27,11 @@ class Parslet::Atoms::Entity < Parslet::Atoms::Base
27
27
  }
28
28
  end
29
29
 
30
- def to_s_inner(prec) # :nodoc:
30
+ def to_s_inner(prec)
31
31
  name.to_s.upcase
32
- end
33
-
34
- def error_tree # :nodoc:
35
- parslet.error_tree
36
- end
37
-
32
+ end
38
33
  private
39
- def raise_not_implemented # :nodoc:
34
+ def raise_not_implemented
40
35
  trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport
41
36
  exception = NotImplementedError.new("rule(#{name.inspect}) { ... } returns nil. Still not implemented, but already used?")
42
37
  exception.set_backtrace(trace)
@@ -8,7 +8,7 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
8
8
  attr_reader :positive
9
9
  attr_reader :bound_parslet
10
10
 
11
- def initialize(bound_parslet, positive=true) # :nodoc:
11
+ def initialize(bound_parslet, positive=true)
12
12
  super()
13
13
 
14
14
  # Model positive and negative lookahead by testing this flag.
@@ -21,14 +21,18 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
21
21
  }
22
22
  end
23
23
 
24
- def try(source, context) # :nodoc:
24
+ def try(source, context)
25
25
  pos = source.pos
26
26
 
27
- value = bound_parslet.apply(source, context)
28
- return success(nil) if positive ^ value.error?
27
+ success, value = bound_parslet.apply(source, context)
29
28
 
30
- return error(source, @error_msgs[:positive], pos) if positive
31
- return error(source, @error_msgs[:negative], pos)
29
+ if positive
30
+ return succ(nil) if success
31
+ return context.err_at(self, source, @error_msgs[:positive], pos)
32
+ else
33
+ return succ(nil) unless success
34
+ return context.err_at(self, source, @error_msgs[:negative], pos)
35
+ end
32
36
 
33
37
  # This is probably the only parslet that rewinds its input in #try.
34
38
  # Lookaheads NEVER consume their input, even on success, that's why.
@@ -37,7 +41,7 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
37
41
  end
38
42
 
39
43
  precedence LOOKAHEAD
40
- def to_s_inner(prec) # :nodoc:
44
+ def to_s_inner(prec)
41
45
  char = positive ? '&' : '!'
42
46
 
43
47
  "#{char}#{bound_parslet.to_s(prec)}"
@@ -7,30 +7,26 @@
7
7
  #
8
8
  class Parslet::Atoms::Named < Parslet::Atoms::Base
9
9
  attr_reader :parslet, :name
10
- def initialize(parslet, name) # :nodoc:
10
+ def initialize(parslet, name)
11
11
  super()
12
12
 
13
13
  @parslet, @name = parslet, name
14
14
  end
15
15
 
16
- def apply(source, context) # :nodoc:
17
- value = parslet.apply(source, context)
16
+ def apply(source, context)
17
+ success, value = result = parslet.apply(source, context)
18
18
 
19
- return value if value.error?
20
- success(
19
+ return result unless success
20
+ succ(
21
21
  produce_return_value(
22
- value.result))
22
+ value))
23
23
  end
24
24
 
25
- def to_s_inner(prec) # :nodoc:
25
+ def to_s_inner(prec)
26
26
  "#{name}:#{parslet.to_s(prec)}"
27
27
  end
28
-
29
- def error_tree # :nodoc:
30
- parslet.error_tree
31
- end
32
28
  private
33
- def produce_return_value(val) # :nodoc:
29
+ def produce_return_value(val)
34
30
  { name => flatten(val, true) }
35
31
  end
36
32
  end
@@ -9,7 +9,7 @@
9
9
  #
10
10
  class Parslet::Atoms::Re < Parslet::Atoms::Base
11
11
  attr_reader :match, :re
12
- def initialize(match) # :nodoc:
12
+ def initialize(match)
13
13
  super()
14
14
 
15
15
  @match = match.to_s
@@ -20,17 +20,18 @@ class Parslet::Atoms::Re < Parslet::Atoms::Base
20
20
  }
21
21
  end
22
22
 
23
- def try(source, context) # :nodoc:
24
- error_pos = source.pos
25
- s = source.read(1)
23
+ def try(source, context)
24
+ return succ(source.consume(1)) if source.matches?(re)
26
25
 
27
- return error(source, @error_msgs[:premature], error_pos) unless s
28
- return error(source, @error_msgs[:failed], error_pos) unless s.match(re)
29
-
30
- return success(s)
26
+ # No string could be read
27
+ return context.err(self, source, @error_msgs[:premature]) \
28
+ if source.eof?
29
+
30
+ # No match
31
+ return context.err(self, source, @error_msgs[:failed])
31
32
  end
32
33
 
33
- def to_s_inner(prec) # :nodoc:
34
+ def to_s_inner(prec)
34
35
  match.inspect[1..-2]
35
36
  end
36
37
  end
@@ -19,46 +19,45 @@ class Parslet::Atoms::Repetition < Parslet::Atoms::Base
19
19
  }
20
20
  end
21
21
 
22
- def try(source, context) # :nodoc:
22
+ def try(source, context)
23
23
  occ = 0
24
- result = [@tag] # initialize the result array with the tag (for flattening)
24
+ accum = [@tag] # initialize the result array with the tag (for flattening)
25
25
  start_pos = source.pos
26
+
27
+ break_on = nil
26
28
  loop do
27
- value = parslet.apply(source, context)
28
- break if value.error?
29
+ success, value = parslet.apply(source, context)
30
+
31
+ break_on = value
32
+ break unless success
29
33
 
30
34
  occ += 1
31
- result << value.result
35
+ accum << value
32
36
 
33
- # If we're not greedy (max is defined), check if that has been
34
- # reached.
35
- return success(result) if max && occ>=max
37
+ # If we're not greedy (max is defined), check if that has been reached.
38
+ return succ(accum) if max && occ>=max
36
39
  end
37
40
 
41
+ # Last attempt to match parslet was a failure, failure reason in break_on.
42
+
38
43
  # Greedy matcher has produced a failure. Check if occ (which will
39
- # contain the number of sucesses) is in {min, max}.
40
- return error(source, @error_msgs[:minrep], start_pos) if occ < min
41
- return success(result)
44
+ # contain the number of sucesses) is >= min.
45
+ return context.err_at(
46
+ self,
47
+ source,
48
+ @error_msgs[:minrep],
49
+ start_pos,
50
+ [break_on]) if occ < min
51
+
52
+ return succ(accum)
42
53
  end
43
54
 
44
55
  precedence REPETITION
45
- def to_s_inner(prec) # :nodoc:
56
+ def to_s_inner(prec)
46
57
  minmax = "{#{min}, #{max}}"
47
58
  minmax = '?' if min == 0 && max == 1
48
59
 
49
60
  parslet.to_s(prec) + minmax
50
61
  end
51
-
52
- def cause # :nodoc:
53
- # Either the repetition failed or the parslet inside failed to repeat.
54
- super || parslet.cause
55
- end
56
- def error_tree # :nodoc:
57
- if cause?
58
- Parslet::ErrorTree.new(self, parslet.error_tree)
59
- else
60
- parslet.error_tree
61
- end
62
- end
63
62
  end
64
63
 
@@ -15,30 +15,24 @@ class Parslet::Atoms::Sequence < Parslet::Atoms::Base
15
15
  }
16
16
  end
17
17
 
18
- def >>(parslet) # :nodoc:
18
+ def >>(parslet)
19
19
  self.class.new(* @parslets+[parslet])
20
20
  end
21
21
 
22
- def try(source, context) # :nodoc:
23
- success([:sequence]+parslets.map { |p|
24
- # Save each parslet as potentially offending (raising an error).
25
- @offending_parslet = p
22
+ def try(source, context)
23
+ succ([:sequence]+parslets.map { |p|
24
+ success, value = p.apply(source, context)
26
25
 
27
- value = p.apply(source, context)
28
-
29
- return error(source, @error_msgs[:failed]) if value.error?
30
-
31
- value.result
26
+ unless success
27
+ return context.err(self, source, @error_msgs[:failed], [value])
28
+ end
29
+
30
+ value
32
31
  })
33
32
  end
34
33
 
35
34
  precedence SEQUENCE
36
- def to_s_inner(prec) # :nodoc:
35
+ def to_s_inner(prec)
37
36
  parslets.map { |p| p.to_s(prec) }.join(' ')
38
37
  end
39
-
40
- def error_tree # :nodoc:
41
- Parslet::ErrorTree.new(self).tap { |t|
42
- t.children << @offending_parslet.error_tree if @offending_parslet }
43
- end
44
38
  end
@@ -10,29 +10,27 @@ class Parslet::Atoms::Str < Parslet::Atoms::Base
10
10
  super()
11
11
 
12
12
  @str = str.to_s
13
+ @len = str.size
13
14
  @error_msgs = {
14
15
  :premature => "Premature end of input",
15
16
  :failed => "Expected #{str.inspect}, but got "
16
17
  }
17
18
  end
18
19
 
19
- def try(source, context) # :nodoc:
20
- # NOTE: Even though it doesn't look that way, this is the hotspot, the
21
- # contents of parslets inner loop. Changes here affect parslets speed
22
- # enormously.
23
- error_pos = source.pos
24
- s = source.read(str.bytesize)
25
-
26
- return success(s) if s == str
20
+ def try(source, context)
21
+ return succ(source.consume(@len)) if source.matches?(str)
27
22
 
28
- # assert: s != str
29
-
30
23
  # Failures:
31
- return error(source, @error_msgs[:premature]) unless s && s.size==str.size
32
- return error(source, [@error_msgs[:failed], s], error_pos)
24
+ return context.err(self, source, @error_msgs[:premature]) \
25
+ if source.chars_left<@len
26
+
27
+ error_pos = source.pos
28
+ return context.err_at(
29
+ self, source,
30
+ [@error_msgs[:failed], source.consume(@len)], error_pos)
33
31
  end
34
32
 
35
- def to_s_inner(prec) # :nodoc:
33
+ def to_s_inner(prec)
36
34
  "'#{str}'"
37
35
  end
38
36
  end
data/lib/parslet/cause.rb CHANGED
@@ -1,14 +1,48 @@
1
1
  module Parslet
2
- # An internal class that allows delaying the construction of error messages
3
- # (as strings) until we really need to print them.
2
+ # Represents a cause why a parse did fail. A lot of these objects are
3
+ # constructed - not all of the causes turn out to be failures for the whole
4
+ # parse.
4
5
  #
5
- class Cause < Struct.new(:message, :source, :pos) # :nodoc:
6
- # Appends 'at line ... char ...' to the string given. Use +pos+ to
6
+ class Cause
7
+ def initialize(message, source, pos, children)
8
+ @message, @source, @pos, @children =
9
+ message, source, pos, children
10
+ end
11
+
12
+ # @return [String, Array] A string or an array of message pieces that
13
+ # provide failure information. Use #to_s to get a formatted string.
14
+ attr_reader :message
15
+
16
+ # @return [Parslet::Source] Source that was parsed when this error
17
+ # happend. Mainly used for line number information.
18
+ attr_reader :source
19
+
20
+ # Location of the error.
21
+ #
22
+ # @return [Fixnum] Position where the error happened. (character offset)
23
+ attr_reader :pos
24
+
25
+ # When this cause is part of a tree of error causes: child nodes for this
26
+ # node. Very often carries the reasons for this cause.
27
+ #
28
+ # @return [Array<Parslet::Cause>] A list of reasons for this cause.
29
+ def children
30
+ @children ||= []
31
+ end
32
+
33
+ # Appends 'at line LINE char CHAR' to the string given. Use +pos+ to
7
34
  # override the position of the +source+. This method returns an object
8
35
  # that can be turned into a string using #to_s.
9
36
  #
10
- def self.format(source, pos, str)
11
- self.new(str, source, pos)
37
+ # @param source [Parslet::Source] source that was parsed when this error
38
+ # happened
39
+ # @param pos [Fixnum] position of error
40
+ # @param str [String, Array<String>] message parts
41
+ # @param children [Array<Parslet::Cause>] child nodes for this error tree
42
+ # @return [Parslet::Cause] a new instance of {Parslet::Cause}
43
+ #
44
+ def self.format(source, pos, str, children=[])
45
+ self.new(str, source, pos, children)
12
46
  end
13
47
 
14
48
  def to_s
@@ -37,12 +71,9 @@ module Parslet
37
71
  recursive_ascii_tree(self, io, [true]) }.
38
72
  string
39
73
  end
40
-
41
- def children
42
- @children ||= Array.new
43
- end
74
+
44
75
  private
45
- def recursive_ascii_tree(node, stream, curved) # :nodoc:
76
+ def recursive_ascii_tree(node, stream, curved)
46
77
  append_prefix(stream, curved)
47
78
  stream.puts node.to_s
48
79
 
@@ -52,8 +83,9 @@ module Parslet
52
83
  recursive_ascii_tree(child, stream, curved + [last_child])
53
84
  end
54
85
  end
55
- def append_prefix(stream, curved) # :nodoc:
56
- curved[0..-2].each do |c|
86
+ def append_prefix(stream, curved)
87
+ return if curved.size < 2
88
+ curved[1..-2].each do |c|
57
89
  stream.print c ? " " : "| "
58
90
  end
59
91
  stream.print curved.last ? "`- " : "|- "