RubyGems - parslet - Versions diffs - 1.3.0 → 1.4.0 - Mend

parslet 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

data/HISTORY.txt +38 -1
data/README +33 -21
data/example/deepest_errors.rb +131 -0
data/example/email_parser.rb +2 -6
data/example/ignore.rb +2 -2
data/example/json.rb +0 -3
data/example/modularity.rb +47 -0
data/example/nested_errors.rb +132 -0
data/example/output/deepest_errors.out +54 -0
data/example/output/modularity.out +0 -0
data/example/output/nested_errors.out +54 -0
data/lib/parslet.rb +65 -51
data/lib/parslet/atoms.rb +1 -1
data/lib/parslet/atoms/alternative.rb +11 -12
data/lib/parslet/atoms/base.rb +57 -99
data/lib/parslet/atoms/can_flatten.rb +9 -4
data/lib/parslet/atoms/context.rb +26 -4
data/lib/parslet/atoms/entity.rb +5 -10
data/lib/parslet/atoms/lookahead.rb +11 -7
data/lib/parslet/atoms/named.rb +8 -12
data/lib/parslet/atoms/re.rb +10 -9
data/lib/parslet/atoms/repetition.rb +23 -24
data/lib/parslet/atoms/sequence.rb +10 -16
data/lib/parslet/atoms/str.rb +11 -13
data/lib/parslet/cause.rb +45 -13
data/lib/parslet/convenience.rb +6 -6
data/lib/parslet/error_reporter.rb +7 -0
data/lib/parslet/error_reporter/deepest.rb +95 -0
data/lib/parslet/error_reporter/tree.rb +57 -0
data/lib/parslet/export.rb +4 -4
data/lib/parslet/expression.rb +0 -2
data/lib/parslet/expression/treetop.rb +2 -2
data/lib/parslet/parser.rb +2 -6
data/lib/parslet/pattern.rb +15 -4
data/lib/parslet/pattern/binding.rb +3 -3
data/lib/parslet/rig/rspec.rb +2 -2
data/lib/parslet/slice.rb +0 -6
data/lib/parslet/source.rb +40 -59
data/lib/parslet/source/line_cache.rb +2 -2
data/lib/parslet/transform.rb +13 -7
data/lib/parslet/transform/context.rb +1 -1
metadata +69 -26
data/example/ignore_whitespace.rb +0 -66
data/lib/parslet/bytecode.rb +0 -6
data/lib/parslet/bytecode/compiler.rb +0 -138
data/lib/parslet/bytecode/instructions.rb +0 -358
data/lib/parslet/bytecode/vm.rb +0 -209
data/lib/parslet/error_tree.rb +0 -50

data/lib/parslet/atoms/can_flatten.rb CHANGED Viewed

@@ -20,7 +20,7 @@ module Parslet::Atoms
     # naming something using <code>.as(...)</code>. It changes the folding
     # semantics of repetition.
     #
-    def flatten(value, named=false) # :nodoc:
+    def flatten(value, named=false)
       # Passes through everything that isn't an array of things
       return value unless value.instance_of? Array
@@ -53,12 +53,15 @@ module Parslet::Atoms
     # Flatten results from a sequence of parslets.
     #
-    def flatten_sequence(list) # :nodoc:
+    # @api private
+    #
+    def flatten_sequence(list)
       foldl(list.compact) { |r, e|        # and then merge flat elements
         merge_fold(r, e)
       }
     end
-    def merge_fold(l, r) # :nodoc:
+    # @api private
+    def merge_fold(l, r)
       # equal pairs: merge. ----------------------------------------------------
       if l.class == r.class
         if l.is_a?(Hash)
@@ -96,7 +99,9 @@ module Parslet::Atoms
     # the results, we want to leave an empty list alone - otherwise it is
     # turned into an empty string.
     #
-    def flatten_repetition(list, named) # :nodoc:
+    # @api private
+    #
+    def flatten_repetition(list, named)
       if list.any? { |e| e.instance_of?(Hash) }
         # If keyed subtrees are in the array, we'll want to discard all
         # strings inbetween. To keep them, name them.

data/lib/parslet/atoms/context.rb CHANGED Viewed

@@ -3,11 +3,17 @@ module Parslet::Atoms
   # parslet object to results. This is used for memoization in the packrat
   # style.
   #
+  # Also, error reporter is stored here and error reporting happens through
+  # this class. This makes the reporting pluggable.
+  #
   class Context
-    def initialize
+    # @param reporter [#err, #err_at] Error reporter (leave empty for default
+    #   reporter)
+    def initialize(reporter=Parslet::ErrorReporter::Tree.new)
       @cache = Hash.new { |h, k| h[k] = {} }
+      @reporter = reporter
     end
     # Caches a parse answer for obj at source.pos. Applying the same parslet
     # at one position of input always yields the same result, unless the input
     # has changed.
@@ -16,12 +22,12 @@ module Parslet::Atoms
     # were consumed by a successful parse. Imitation of such a parse must
     # advance the input pos by the same amount of bytes.
     #
-    def cache(obj, source, &block)
+    def try_with_cache(obj, source)
       beg = source.pos
       # Not in cache yet? Return early.
       unless entry = lookup(obj, beg)
-        result = yield
+        result = obj.try(source, self)
         set obj, beg, [result, source.pos-beg]
         return result
@@ -36,6 +42,22 @@ module Parslet::Atoms
       source.pos = beg + advance
       return result
     end
+    # Report an error at a given position.
+    # @see ErrorReporter
+    #
+    def err_at(*args)
+      return [false, @reporter.err_at(*args)] if @reporter
+      return [false, nil]
+    end
+    # Report an error.
+    # @see ErrorReporter
+    #
+    def err(*args)
+      return [false, @reporter.err(*args)] if @reporter
+      return [false, nil]
+    end
   private
     def lookup(obj, pos)

data/lib/parslet/atoms/entity.rb CHANGED Viewed

@@ -10,14 +10,14 @@
 #
 class Parslet::Atoms::Entity < Parslet::Atoms::Base
   attr_reader :name, :block
-  def initialize(name, &block) # :nodoc:
+  def initialize(name, &block)
     super()
     @name = name
     @block = block
   end
-  def try(source, context) # :nodoc:
+  def try(source, context)
     parslet.apply(source, context)
   end
@@ -27,16 +27,11 @@ class Parslet::Atoms::Entity < Parslet::Atoms::Base
     }
   end
-  def to_s_inner(prec) # :nodoc:
+  def to_s_inner(prec)
     name.to_s.upcase
-  end
-  def error_tree # :nodoc:
-    parslet.error_tree
-  end
+  end
 private
-  def raise_not_implemented # :nodoc:
+  def raise_not_implemented
     trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport
     exception = NotImplementedError.new("rule(#{name.inspect}) { ... }  returns nil. Still not implemented, but already used?")
     exception.set_backtrace(trace)

data/lib/parslet/atoms/lookahead.rb CHANGED Viewed

@@ -8,7 +8,7 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
   attr_reader :positive
   attr_reader :bound_parslet
-  def initialize(bound_parslet, positive=true) # :nodoc:
+  def initialize(bound_parslet, positive=true)
     super()
     # Model positive and negative lookahead by testing this flag.
@@ -21,14 +21,18 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
     }
   end
-  def try(source, context) # :nodoc:
+  def try(source, context)
     pos = source.pos
-    value = bound_parslet.apply(source, context)
-    return success(nil) if positive ^ value.error?
+    success, value = bound_parslet.apply(source, context)
-    return error(source, @error_msgs[:positive], pos) if positive
-    return error(source, @error_msgs[:negative], pos)
+    if positive
+      return succ(nil) if success
+      return context.err_at(self, source, @error_msgs[:positive], pos)
+    else
+      return succ(nil) unless success
+      return context.err_at(self, source, @error_msgs[:negative], pos)
+    end
   # This is probably the only parslet that rewinds its input in #try.
   # Lookaheads NEVER consume their input, even on success, that's why.
@@ -37,7 +41,7 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
   end
   precedence LOOKAHEAD
-  def to_s_inner(prec) # :nodoc:
+  def to_s_inner(prec)
     char = positive ? '&' : '!'
     "#{char}#{bound_parslet.to_s(prec)}"

data/lib/parslet/atoms/named.rb CHANGED Viewed

@@ -7,30 +7,26 @@
 #
 class Parslet::Atoms::Named < Parslet::Atoms::Base
   attr_reader :parslet, :name
-  def initialize(parslet, name) # :nodoc:
+  def initialize(parslet, name)
     super()
     @parslet, @name = parslet, name
   end
-  def apply(source, context) # :nodoc:
-    value = parslet.apply(source, context)
+  def apply(source, context)
+    success, value = result = parslet.apply(source, context)
-    return value if value.error?
-    success(
+    return result unless success
+    succ(
       produce_return_value(
-        value.result))
+        value))
   end
-  def to_s_inner(prec) # :nodoc:
+  def to_s_inner(prec)
     "#{name}:#{parslet.to_s(prec)}"
   end
-  def error_tree # :nodoc:
-    parslet.error_tree
-  end
 private
-  def produce_return_value(val) # :nodoc:
+  def produce_return_value(val)
     { name => flatten(val, true) }
   end
 end

data/lib/parslet/atoms/re.rb CHANGED Viewed

@@ -9,7 +9,7 @@
 #
 class Parslet::Atoms::Re < Parslet::Atoms::Base
   attr_reader :match, :re
-  def initialize(match) # :nodoc:
+  def initialize(match)
     super()
     @match = match.to_s
@@ -20,17 +20,18 @@ class Parslet::Atoms::Re < Parslet::Atoms::Base
     }
   end
-  def try(source, context) # :nodoc:
-    error_pos = source.pos
-    s = source.read(1)
+  def try(source, context)
+    return succ(source.consume(1)) if source.matches?(re)
-    return error(source, @error_msgs[:premature], error_pos) unless s
-    return error(source, @error_msgs[:failed], error_pos) unless s.match(re)
-    return success(s)
+    # No string could be read
+    return context.err(self, source, @error_msgs[:premature]) \
+      if source.eof?
+    # No match
+    return context.err(self, source, @error_msgs[:failed])
   end
-  def to_s_inner(prec) # :nodoc:
+  def to_s_inner(prec)
     match.inspect[1..-2]
   end
 end

data/lib/parslet/atoms/repetition.rb CHANGED Viewed

@@ -19,46 +19,45 @@ class Parslet::Atoms::Repetition < Parslet::Atoms::Base
     }
   end
-  def try(source, context) # :nodoc:
+  def try(source, context)
     occ = 0
-    result = [@tag]   # initialize the result array with the tag (for flattening)
+    accum = [@tag]   # initialize the result array with the tag (for flattening)
     start_pos = source.pos
+    break_on = nil
     loop do
-      value = parslet.apply(source, context)
-      break if value.error?
+      success, value = parslet.apply(source, context)
+      break_on = value
+      break unless success
       occ += 1
-      result << value.result
+      accum << value
-      # If we're not greedy (max is defined), check if that has been
-      # reached.
-      return success(result) if max && occ>=max
+      # If we're not greedy (max is defined), check if that has been reached.
+      return succ(accum) if max && occ>=max
     end
+    # Last attempt to match parslet was a failure, failure reason in break_on.
     # Greedy matcher has produced a failure. Check if occ (which will
-    # contain the number of sucesses) is in {min, max}.
-    return error(source, @error_msgs[:minrep], start_pos) if occ < min
-    return success(result)
+    # contain the number of sucesses) is >= min.
+    return context.err_at(
+      self,
+      source,
+      @error_msgs[:minrep],
+      start_pos,
+      [break_on]) if occ < min
+    return succ(accum)
   end
   precedence REPETITION
-  def to_s_inner(prec) # :nodoc:
+  def to_s_inner(prec)
     minmax = "{#{min}, #{max}}"
     minmax = '?' if min == 0 && max == 1
     parslet.to_s(prec) + minmax
   end
-  def cause # :nodoc:
-    # Either the repetition failed or the parslet inside failed to repeat.
-    super || parslet.cause
-  end
-  def error_tree # :nodoc:
-    if cause?
-      Parslet::ErrorTree.new(self, parslet.error_tree)
-    else
-      parslet.error_tree
-    end
-  end
 end

data/lib/parslet/atoms/sequence.rb CHANGED Viewed

@@ -15,30 +15,24 @@ class Parslet::Atoms::Sequence < Parslet::Atoms::Base
     }
   end
-  def >>(parslet) # :nodoc:
+  def >>(parslet)
     self.class.new(* @parslets+[parslet])
   end
-  def try(source, context) # :nodoc:
-    success([:sequence]+parslets.map { |p|
-      # Save each parslet as potentially offending (raising an error).
-      @offending_parslet = p
+  def try(source, context)
+    succ([:sequence]+parslets.map { |p|
+      success, value = p.apply(source, context)
-      value = p.apply(source, context)
-      return error(source, @error_msgs[:failed]) if value.error?
-      value.result
+      unless success
+        return context.err(self, source, @error_msgs[:failed], [value])
+      end
+      value
     })
   end
   precedence SEQUENCE
-  def to_s_inner(prec) # :nodoc:
+  def to_s_inner(prec)
     parslets.map { |p| p.to_s(prec) }.join(' ')
   end
-  def error_tree # :nodoc:
-    Parslet::ErrorTree.new(self).tap { |t|
-      t.children << @offending_parslet.error_tree if @offending_parslet }
-  end
 end

data/lib/parslet/atoms/str.rb CHANGED Viewed

@@ -10,29 +10,27 @@ class Parslet::Atoms::Str < Parslet::Atoms::Base
     super()
     @str = str.to_s
+    @len = str.size
     @error_msgs = {
       :premature  => "Premature end of input",
       :failed     => "Expected #{str.inspect}, but got "
     }
   end
-  def try(source, context) # :nodoc:
-    # NOTE: Even though it doesn't look that way, this is the hotspot, the
-    # contents of parslets inner loop. Changes here affect parslets speed
-    # enormously.
-    error_pos = source.pos
-    s = source.read(str.bytesize)
-    return success(s) if s == str
+  def try(source, context)
+    return succ(source.consume(@len)) if source.matches?(str)
-    # assert: s != str
     # Failures:
-    return error(source, @error_msgs[:premature]) unless s && s.size==str.size
-    return error(source, [@error_msgs[:failed], s], error_pos)
+    return context.err(self, source, @error_msgs[:premature]) \
+      if source.chars_left<@len
+    error_pos = source.pos
+    return context.err_at(
+      self, source,
+      [@error_msgs[:failed], source.consume(@len)], error_pos)
   end
-  def to_s_inner(prec) # :nodoc:
+  def to_s_inner(prec)
     "'#{str}'"
   end
 end

data/lib/parslet/cause.rb CHANGED Viewed

@@ -1,14 +1,48 @@
 module Parslet
-  # An internal class that allows delaying the construction of error messages
-  # (as strings) until we really need to print them.
+  # Represents a cause why a parse did fail. A lot of these objects are
+  # constructed - not all of the causes turn out to be failures for the whole
+  # parse.
   #
-  class Cause < Struct.new(:message, :source, :pos) # :nodoc:
-    # Appends 'at line ... char ...' to the string given. Use +pos+ to
+  class Cause
+    def initialize(message, source, pos, children)
+      @message, @source, @pos, @children =
+        message, source, pos, children
+    end
+    # @return [String, Array] A string or an array of message pieces that
+    #   provide failure information. Use #to_s to get a formatted string.
+    attr_reader :message
+    # @return [Parslet::Source] Source that was parsed when this error
+    #   happend. Mainly used for line number information.
+    attr_reader :source
+    # Location of the error.
+    #
+    # @return [Fixnum] Position where the error happened. (character offset)
+    attr_reader :pos
+    # When this cause is part of a tree of error causes: child nodes for this
+    # node. Very often carries the reasons for this cause.
+    #
+    # @return [Array<Parslet::Cause>] A list of reasons for this cause.
+    def children
+      @children ||= []
+    end
+    # Appends 'at line LINE char CHAR' to the string given. Use +pos+ to
     # override the position of the +source+. This method returns an object
     # that can be turned into a string using #to_s.
     #
-    def self.format(source, pos, str)
-      self.new(str, source, pos)
+    # @param source [Parslet::Source] source that was parsed when this error
+    #   happened
+    # @param pos [Fixnum] position of error
+    # @param str [String, Array<String>] message parts
+    # @param children [Array<Parslet::Cause>] child nodes for this error tree
+    # @return [Parslet::Cause] a new instance of {Parslet::Cause}
+    #
+    def self.format(source, pos, str, children=[])
+      self.new(str, source, pos, children)
     end
     def to_s
@@ -37,12 +71,9 @@ module Parslet
         recursive_ascii_tree(self, io, [true]) }.
         string
     end
-    def children
-      @children ||= Array.new
-    end
   private
-    def recursive_ascii_tree(node, stream, curved) # :nodoc:
+    def recursive_ascii_tree(node, stream, curved)
       append_prefix(stream, curved)
       stream.puts node.to_s
@@ -52,8 +83,9 @@ module Parslet
         recursive_ascii_tree(child, stream, curved + [last_child])
       end
     end
-    def append_prefix(stream, curved) # :nodoc:
-      curved[0..-2].each do |c|
+    def append_prefix(stream, curved)
+      return if curved.size < 2
+      curved[1..-2].each do |c|
         stream.print c ? "   " : "|  "
       end
       stream.print curved.last ? "`- " : "|- "