RubyGems - parslet - Versions diffs - 1.2.3 → 1.3.0 - Mend

parslet 1.2.3 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

data/HISTORY.txt +21 -0
data/README +1 -1
data/example/ignore_whitespace.rb +66 -0
data/example/mathn.rb +44 -0
data/example/output/ignore_whitespace.out +1 -0
data/example/output/ip_address.out +2 -2
data/example/output/mathn.out +4 -0
data/lib/parslet.rb +8 -1
data/lib/parslet/atoms.rb +1 -0
data/lib/parslet/atoms/alternative.rb +1 -1
data/lib/parslet/atoms/base.rb +26 -157
data/lib/parslet/atoms/can_flatten.rb +132 -0
data/lib/parslet/atoms/lookahead.rb +5 -8
data/lib/parslet/atoms/str.rb +1 -1
data/lib/parslet/atoms/visitor.rb +23 -9
data/lib/parslet/bytecode.rb +6 -0
data/lib/parslet/bytecode/compiler.rb +138 -0
data/lib/parslet/bytecode/instructions.rb +358 -0
data/lib/parslet/bytecode/vm.rb +209 -0
data/lib/parslet/cause.rb +62 -0
data/lib/parslet/export.rb +2 -2
data/lib/parslet/rig/rspec.rb +18 -17
data/lib/parslet/source.rb +66 -48
data/lib/parslet/source/line_cache.rb +7 -1
data/lib/parslet/transform/context.rb +15 -7
metadata +57 -16
data/Gemfile +0 -16
data/lib/parslet/atoms/transform.rb +0 -75

data/lib/parslet/atoms/lookahead.rb CHANGED

@@ -14,9 +14,10 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
     # Model positive and negative lookahead by testing this flag.
     @positive = positive
     @bound_parslet = bound_parslet
     @error_msgs = {
-      :positive => "lookahead: #{bound_parslet.inspect} didn't match, but should have",
-      :negative => "negative lookahead: #{bound_parslet.inspect} matched, but shouldn't have"
+      :positive => ["Input should start with ", bound_parslet],
+      :negative => ["Input should not start with ", bound_parslet]
     }
   end
@@ -26,8 +27,8 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
     value = bound_parslet.apply(source, context)
     return success(nil) if positive ^ value.error?
-    return error(source, @error_msgs[:positive]) if positive
-    return error(source, @error_msgs[:negative])
+    return error(source, @error_msgs[:positive], pos) if positive
+    return error(source, @error_msgs[:negative], pos)
   # This is probably the only parslet that rewinds its input in #try.
   # Lookaheads NEVER consume their input, even on success, that's why.
@@ -41,8 +42,4 @@ class Parslet::Atoms::Lookahead < Parslet::Atoms::Base
     "#{char}#{bound_parslet.to_s(prec)}"
   end
-  def error_tree # :nodoc:
-    bound_parslet.error_tree
-  end
 end

data/lib/parslet/atoms/str.rb CHANGED

@@ -21,7 +21,7 @@ class Parslet::Atoms::Str < Parslet::Atoms::Base
     # contents of parslets inner loop. Changes here affect parslets speed
     # enormously.
     error_pos = source.pos
-    s = source.read(str.size)
+    s = source.read(str.bytesize)
     return success(s) if s == str

data/lib/parslet/atoms/visitor.rb CHANGED

@@ -10,7 +10,7 @@ module Parslet::Atoms
   end
   class Str
-    # Call back visitors #str method. See parslet/export for an example.
+    # Call back visitors #visit_str method. See parslet/export for an example.
     #
     def accept(visitor)
       visitor.visit_str(str)
@@ -18,7 +18,8 @@ module Parslet::Atoms
   end
   class Entity
-    # Call back visitors #entity method. See parslet/export for an example.
+    # Call back visitors #visit_entity method. See parslet/export for an
+    # example.
     #
     def accept(visitor)
       visitor.visit_entity(name, block)
@@ -26,7 +27,8 @@ module Parslet::Atoms
   end
   class Named
-    # Call back visitors #named method. See parslet/export for an example.
+    # Call back visitors #visit_named method. See parslet/export for an
+    # example.
     #
     def accept(visitor)
       visitor.visit_named(name, parslet)
@@ -34,7 +36,8 @@ module Parslet::Atoms
   end
   class Sequence
-    # Call back visitors #sequence method. See parslet/export for an example.
+    # Call back visitors #visit_sequence method. See parslet/export for an
+    # example.
     #
     def accept(visitor)
       visitor.visit_sequence(parslets)
@@ -42,15 +45,17 @@ module Parslet::Atoms
   end
   class Repetition
-    # Call back visitors #repetition method. See parslet/export for an example.
+    # Call back visitors #visit_repetition method. See parslet/export for an
+    # example.
     #
     def accept(visitor)
-      visitor.visit_repetition(min, max, parslet)
+      visitor.visit_repetition(@tag, min, max, parslet)
     end
   end
   class Alternative
-    # Call back visitors #alternative method. See parslet/export for an example.
+    # Call back visitors #visit_alternative method. See parslet/export for an
+    # example.
     #
     def accept(visitor)
       visitor.visit_alternative(alternatives)
@@ -58,7 +63,8 @@ module Parslet::Atoms
   end
   class Lookahead
-    # Call back visitors #lookahead method. See parslet/export for an example.
+    # Call back visitors #visit_lookahead method. See parslet/export for an
+    # example.
     #
     def accept(visitor)
       visitor.visit_lookahead(positive, bound_parslet)
@@ -66,10 +72,18 @@ module Parslet::Atoms
   end
   class Re
-    # Call back visitors #re method. See parslet/export for an example.
+    # Call back visitors #visit_re method. See parslet/export for an example.
     #
     def accept(visitor)
       visitor.visit_re(match)
     end
   end
 end
+class Parslet::Parser
+  # Call back visitors #visit_parser method.
+  #
+  def accept(visitor)
+    visitor.visit_parser(root)
+  end
+end

data/lib/parslet/bytecode.rb ADDED

@@ -0,0 +1,6 @@
+module Parslet::Bytecode
+end
+require 'parslet/bytecode/instructions'
+require 'parslet/bytecode/compiler'
+require 'parslet/bytecode/vm'

data/lib/parslet/bytecode/compiler.rb ADDED

@@ -0,0 +1,138 @@
+require 'parslet/atoms/visitor'
+module Parslet::Bytecode
+  class Compiler
+    def initialize
+      @buffer = []
+      @blocks = Hash.new
+    end
+    class Address
+      attr_reader :address
+      def initialize(address=nil)
+        @address = address
+      end
+      def resolve(vm)
+        @address = vm.buffer_pointer
+      end
+      def inspect
+        "@#{@address}"
+      end
+      def to_s
+        "@#{address}"
+      end
+    end
+    class Block
+      def initialize(name, block, compiler)
+        @name = name
+        @block = block
+        @compiler = compiler
+      end
+      def address
+        return @address if @address
+        # Actual compilation:
+        # TODO raise not implemented if the block returns nil (see Entity)
+        @address = @compiler.current_address
+        atom.accept(@compiler)
+        @compiler.add Return.new
+        return @address
+      end
+      def atom
+        @atom ||= @block.call
+      end
+    end
+    def compile(atom)
+      atom.accept(self)
+      add Stop.new
+      @buffer
+    end
+    def add(instruction)
+      @buffer << instruction
+    end
+    def fwd_address
+      Address.new
+    end
+    def current_address
+      Address.new(buffer_pointer)
+    end
+    def buffer_pointer
+      @buffer.size
+    end
+    def visit_str(str)
+      add Match.new(str)
+    end
+    def visit_re(match)
+      add Re.new(match, 1)
+    end
+    def visit_sequence(parslets)
+      emit_block do
+        sequence = Parslet::Atoms::Sequence.new(*parslets)
+        error_msg = "Failed to match sequence (#{sequence.inspect})"
+        end_adr = fwd_address
+        parslets.each_with_index do |atom, idx|
+          atom.accept(self)
+          add CheckSequence.new(idx, end_adr, error_msg)
+        end
+        add PackSequence.new(parslets.size)
+        end_adr.resolve(self)
+      end
+    end
+    def visit_alternative(alternatives)
+      emit_block do
+        adr_end = fwd_address
+        add EnterFrame.new
+        add PushPos.new
+        alternatives.each_with_index do |alternative, idx|
+          alternative.accept(self)
+          add BranchOnSuccess.new(adr_end, idx)
+        end
+        add Fail.new(["Expected one of ", alternatives.inspect], alternatives.size)
+        adr_end.resolve(self)
+      end
+    end
+    def visit_repetition(tag, min, max, parslet)
+      add SetupRepeat.new(tag)
+      start = current_address
+      parslet.accept(self)
+      add Repeat.new(min, max, start, parslet)
+    end
+    def visit_named(name, parslet)
+      parslet.accept(self)
+      add Box.new(name)
+    end
+    def visit_lookahead(positive, parslet)
+      add PushPos.new
+      parslet.accept(self)
+      add CheckAndReset.new(positive, parslet)
+    end
+    def visit_entity(name, block)
+      @blocks[name] ||= Block.new(name, block, self)
+      add CallBlock.new(@blocks[name])
+    end
+    def visit_parser(root)
+      root.accept(self)
+    end
+    def emit_block
+      end_adr = fwd_address
+      cache_adr = current_address
+      add CheckCache.new(end_adr)
+      yield
+      add StoreResult.new(cache_adr)
+      end_adr.resolve(self)
+    end
+  end
+end

data/lib/parslet/bytecode/instructions.rb ADDED

@@ -0,0 +1,358 @@
+module Parslet::Bytecode
+  # Matches the string and pushes the result on the stack (looks like the
+  # string, but is really the slice that was matched).
+  #
+  Match = Struct.new(:str) do
+    def initialize(str)
+      super
+      @mismatch_error_prefix = "Expected #{str.inspect}, but got "
+    end
+    def to_s
+      "MATCH #{str.inspect}"
+    end
+    def run(vm)
+      source = vm.source
+      error_pos = source.pos
+      s = source.read(str.bytesize)
+      if s.size != str.size
+        source.pos = error_pos
+        vm.set_error source.error("Premature end of input")
+      else
+        if s == str
+          vm.push(s)
+        else
+          source.pos = error_pos
+          vm.set_error source.error([@mismatch_error_prefix, s])
+        end
+      end
+    end
+  end
+  Re = Struct.new(:re, :size) do
+    def initialize(re, size)
+      super
+      @failure = "Failed to match #{re.inspect[1..-2]}"
+    end
+    def to_s
+      "RE    #{re.inspect}, #{size}"
+    end
+    def run(vm)
+      source = vm.source
+      error_pos = source.pos
+      s = source.read(size)
+      if s.size != size
+        source.pos = error_pos
+        vm.set_error source.error("Premature end of input")
+        return
+      end
+      if !s.match(re)
+        source.pos = error_pos
+        vm.set_error source.error(@failure)
+        return
+      end
+      vm.push s
+    end
+  end
+  SetupRepeat = Struct.new(:tag) do
+    def run(vm)
+      vm.push vm.source.pos
+      vm.push 0       # occurrences
+      vm.push [tag]   # will collect results
+    end
+    def to_s
+      "STPRE #{tag.inspect}"
+    end
+  end
+  # Repeat matching with a minimum of min and a maximum of max times.
+  #
+  Repeat = Struct.new(:min, :max, :adr, :parslet) do
+    def initialize(*args)
+      super
+      @minrep_error = ["Expected at least #{min} of ", parslet]
+    end
+    def to_s
+      "RPEAT #{min || 'n/a'}, #{max || 'n/a'}, #{adr}, #{parslet}"
+    end
+    def run(vm)
+      source = vm.source
+      start_position = source.pos
+      unless vm.success?
+        pos, occurrences, accumulator = vm.pop(3)
+        source.pos = pos
+        # We've encountered an error. Are we still below the minimum number of
+        # matches?
+        if occurrences < min
+          error = source.error(@minrep_error, pos)
+          error.children << vm.error
+          vm.set_error error
+          return
+        end
+        # assert: occurrences >= min
+        # We've matched the minimum number required, so this is a success:
+        vm.clear_error
+        vm.push accumulator
+        return
+      end
+      # assert: vm.success?
+      result = vm.pop
+      pos, occurrences, accumulator = vm.pop(3)
+      accumulator << result
+      occurrences += 1
+      # All went well but we have reached our maximum?
+      if max && occurrences >= max
+        # We're done! Push the result.
+        vm.push accumulator
+        return
+      end
+      # No maximum was set or it was not reached. Continue matching.
+      vm.push vm.source.pos
+      vm.push occurrences
+      vm.push accumulator
+      vm.jump adr
+    end
+  end
+  # Checks if a sequence must be aborted early because of a parse failure.
+  # Cleans up the stack and jumps after the sequence, having set error.
+  #
+  CheckSequence = Struct.new(:cleanup_items, :adr, :error) do
+    def run(vm)
+      unless vm.success?
+        vm.pop(cleanup_items)
+        cause = vm.source.error(error)
+        cause.children << vm.error
+        vm.set_error cause
+        vm.jump(adr)
+      end
+    end
+    def to_s
+      "CHKSQ #{cleanup_items}, #{adr}, #{error[0,50] + "..."}"
+    end
+  end
+  # Packs size stack elements into an array that is prefixed with the
+  # :sequence tag. This will later be converted by #flatten
+  #
+  PackSequence = Struct.new(:size) do
+    def run(vm)
+      source = vm.source
+      fail "Sequence runs into PackSequence with error flag set!" \
+        unless vm.success?
+      elts = vm.pop(size)
+      vm.push [:sequence, *elts]
+    end
+    def to_s
+      "PACK  #{size}"
+    end
+  end
+  # Enters a new stack frame that can be discarded with vm.discard_frame. This
+  # helps in situations where you need to pop a state that you don't know the
+  # size of.
+  #
+  EnterFrame = Class.new do
+    def run(vm)
+      vm.enter_frame
+    end
+    def to_s
+      "ENTER"
+    end
+  end
+  # Fails at this point with the given error message. Size indicates how many
+  # different alternatives should have generated an error message on the
+  # stack.
+  #
+  Fail = Struct.new(:message, :size) do
+    def run(vm)
+      children = vm.pop(size)
+      error = vm.source.error(message)
+      error.children.replace(children)
+      # Clean up the stack frames:
+      vm.discard_frame
+      vm.set_error error
+    end
+    def to_s
+      "FAIL  #{message}, #{size}"
+    end
+  end
+  # If the vm.success? is true, branches to the given address.
+  #
+  BranchOnSuccess = Struct.new(:adr, :pos_ptr) do
+    def run(vm)
+      source = vm.source
+      if vm.success?
+        # Stack will look like this:
+        #  (n*) previous failures
+        #  successful match
+        # So we pop the match, discard the failures and push the success
+        # again. This way, it looks like a success should look.
+        value = vm.pop
+        vm.discard_frame
+        vm.push value
+        vm.jump(adr)
+      else
+        # Otherwise, clear the error and try the alternative that comes
+        # right here in the byte code.
+        # We need to reset the source.pos to what it was before starting on
+        # one of several alternatives:
+        source.pos = vm.value_at(pos_ptr)
+        # Push the error as if it were a value. If all branches fail, this can
+        # be used to create a complete error trace. If not, VM#discard_frame
+        # will take care of those.
+        vm.push vm.error
+        vm.clear_error
+      end
+    end
+    def to_s
+      "BRSUC #{adr}, #{pos_ptr}"
+    end
+  end
+  # Boxes a value inside a name tag.
+  #
+  # Consumes: parslet result
+  # Pushes: boxed result
+  #
+  Box = Struct.new(:name) do
+    def run(vm)
+      if vm.success?
+        result = vm.pop
+        vm.push(name => result)
+      end
+    end
+    def to_s
+      "BOX   #{name.inspect}"
+    end
+  end
+  # Pushes the current source pos to the stack.
+  #
+  # Consumes: Nothing
+  # Pushes: the current source.pos
+  #
+  PushPos = Class.new do
+    def run(vm)
+      source = vm.source
+      vm.push source.pos
+    end
+    def to_s
+      "PSHPS"
+    end
+  end
+  # Assumes that the stack contains the result of a parslet and above it
+  # the source position from before parsing that parslet (as per PushPos).
+  # Will remove both and leave the vm in a state that indicates the result
+  # of a lookahead, stack will be nil (no capture) and the error flag will
+  # be set.
+  #
+  # Consumes: VM state, source.pos
+  # Pushes: VM.state
+  # Effects: resets source.pos
+  #
+  CheckAndReset = Struct.new(:positive, :parslet) do
+    def run(vm)
+      source = vm.source
+      vm.pop if vm.success?
+      # Retrieve the parse position from before attempting to match the
+      # parslet.
+      start_pos = vm.pop
+      source.pos = start_pos
+      if positive && vm.success? || !positive && !vm.success?
+        vm.clear_error
+        vm.push nil
+      else
+        error_msg = positive ?
+          ["Input should start with ", parslet] :
+          ["Input should not start with ", parslet]
+        vm.set_error source.error(error_msg, start_pos)
+      end
+    end
+    def to_s
+      "CHKRS #{positive ? ':&' : ':!'}, #{parslet.inspect}"
+    end
+  end
+  # Compiles the block or 'calls' the subroutine that was compiled earlier.
+  #
+  CallBlock = Struct.new(:block) do
+    def run(vm)
+      vm.call(block.address)
+    end
+    def to_s
+      "LCALL #{block.address} (was atom<#{block.atom}>)"
+    end
+  end
+  Return = Class.new do
+    def run(vm)
+      vm.call_ret
+    end
+    def to_s
+      "RETRN"
+    end
+  end
+  Stop = Class.new do
+    def run(vm)
+      vm.stop
+    end
+    def to_s
+      "STPVM"
+    end
+  end
+  # Caching
+  CheckCache = Struct.new(:skip_adr) do
+    def run(vm)
+      return if vm.access_cache(skip_adr)
+      vm.push vm.source.pos
+    end
+    def to_s
+      "RETCA #{skip_adr}"
+    end
+  end
+  StoreResult = Struct.new(:adr) do
+    def run(vm)
+      vm.store_cache(adr)
+    end
+    def to_s
+      "STOCA #{adr}"
+    end
+  end
+end