RubyGems - parslet - Versions diffs - 1.2.3 → 1.3.0 - Mend

parslet 1.2.3 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

data/HISTORY.txt +21 -0
data/README +1 -1
data/example/ignore_whitespace.rb +66 -0
data/example/mathn.rb +44 -0
data/example/output/ignore_whitespace.out +1 -0
data/example/output/ip_address.out +2 -2
data/example/output/mathn.out +4 -0
data/lib/parslet.rb +8 -1
data/lib/parslet/atoms.rb +1 -0
data/lib/parslet/atoms/alternative.rb +1 -1
data/lib/parslet/atoms/base.rb +26 -157
data/lib/parslet/atoms/can_flatten.rb +132 -0
data/lib/parslet/atoms/lookahead.rb +5 -8
data/lib/parslet/atoms/str.rb +1 -1
data/lib/parslet/atoms/visitor.rb +23 -9
data/lib/parslet/bytecode.rb +6 -0
data/lib/parslet/bytecode/compiler.rb +138 -0
data/lib/parslet/bytecode/instructions.rb +358 -0
data/lib/parslet/bytecode/vm.rb +209 -0
data/lib/parslet/cause.rb +62 -0
data/lib/parslet/export.rb +2 -2
data/lib/parslet/rig/rspec.rb +18 -17
data/lib/parslet/source.rb +66 -48
data/lib/parslet/source/line_cache.rb +7 -1
data/lib/parslet/transform/context.rb +15 -7
metadata +57 -16
data/Gemfile +0 -16
data/lib/parslet/atoms/transform.rb +0 -75

data/lib/parslet/bytecode/vm.rb ADDED

@@ -0,0 +1,209 @@
+module Parslet::Bytecode
+  class VM
+    include Parslet::Atoms::CanFlatten
+    def initialize(debug=false)
+      @debug = debug
+    end
+    def debug?
+      @debug
+    end
+    def run(program, io)
+      init(program, io)
+      loop do
+        old_ip = @ip
+        instruction = fetch
+        break unless instruction
+        # Diagnostics
+        printf("executing %5d: %s\n", old_ip, instruction) if debug?
+        # Run the current instruction
+        instruction.run(self)
+        # Diagnostics
+        dump_state(0) if debug?
+        break if @stop
+      end
+      fail "Stack contains too many values." if @values.size>1
+      # In the best case, we have successfully matched and consumed all input.
+      # This is what we want, from now on down it's all error cases.
+      return flatten(@values.last) if success? && source.eof?
+      # Maybe we've matched some, but not all of the input? In parslets books,
+      # this is an error as well.
+      if success?
+        # assert: not source.eof?
+        current_pos = source.pos
+        source.error(
+          "Don't know what to do with #{source.read(100)}", current_pos).
+          raise(Parslet::UnconsumedInput)
+      end
+      # assert: ! @error.nil?
+      # And maybe we just could not do it for a reason. Raise that.
+      @error.raise
+    rescue => ex
+      dump_state(-1) unless ex.kind_of?(Parslet::ParseFailed)
+      raise
+    end
+    attr_reader :source
+    attr_reader :context
+    def init(program, io)
+      @ip = 0
+      @program = program
+      @source = Parslet::Source.new(io)
+      @context = Parslet::Atoms::Context.new
+      @values = []
+      @calls  = []
+      @frames = []
+      @cache  = {}
+    end
+    def fetch
+      @program.at(@ip).tap { @ip += 1 }
+    end
+    # Dumps the VM state so that the user can track errors down.
+    #
+    def dump_state(ip_offset)
+      return unless debug?
+      puts "\nVM STATE -------------------------------------------- "
+      old_pos = source.pos
+      debug_pos = old_pos - 10
+      source.pos = debug_pos < 0 ? 0 : debug_pos
+      puts "Source: #{source.read(20)}"
+      puts (" "*"Source: ".size) << (" "*(10+(debug_pos<0 ? debug_pos : 0))) << '^'
+      source.pos = old_pos
+      if @error
+        puts "Error register: #{@error}"
+      else
+        puts "Error register: EMPTY"
+      end
+      puts "Program: "
+      for adr in (@ip-5)..(@ip+5)
+        printf("%s%5d: %s\n",
+          adr == @ip+ip_offset ? '->' : '  ',
+          adr,
+          @program.at(adr)) if adr >= 0 && @program.at(adr)
+      end
+      puts "\nStack(#{@values.size}): (last 5, top is top of stack)"
+      @values.last(5).reverse.each_with_index do |v,i|
+        printf("  %5d: %s\n", i, v.inspect)
+      end
+      puts "\nStack Frames(#{@frames.size}): (last 5, top is top of stack)"
+      @frames.last(5).reverse.each_with_index do |v,i|
+        printf("  %5d: trunc stack at %s\n", i, v)
+      end
+      puts "\nCall Stack(#{@calls.size}): (last 5, top is top of stack)"
+      @calls.last(5).reverse.each_with_index do |v,i|
+        printf("  %5d: return to @%s\n", i, v)
+      end
+      puts "---------------------- -------------------------------- "
+    end
+    # --------------------------------------------- interface for instructions
+    def access_cache(skip_adr)
+      key = [source.pos, @ip-1]
+      # Is the given vm state in the cache yet?
+      if @cache[key]
+        # Restore state
+        success, value, advance = @cache[key]
+        if success
+          push value
+        else
+          set_error value
+        end
+        source.pos += advance
+        # Skip to skip_adr
+        jump skip_adr
+        return true
+      end
+      return false
+    end
+    def store_cache(adr)
+      if success?
+        pos, result = pop(2)
+        key = [pos, adr.address]
+        @cache[key] = [true, result, source.pos-pos]
+        push result
+      else
+        pos = pop
+        key = [pos, adr.address]
+        @cache[key] = [false, @error, source.pos-pos]
+      end
+    end
+    def push(value)
+      @values.push value
+    end
+    def pop(n=nil)
+      if n
+        fail "Stack corruption detected, popping too many values (#{n}/#{@values.size})." \
+          if n>@values.size
+        @values.pop(n)
+      else
+        fail "Stack corruption detected, popping too many values. (stack is empty)" \
+          if @values.empty?
+        @values.pop
+      end
+    end
+    def value_at(ptr)
+      @values.at(-ptr-1)
+    end
+    def enter_frame
+      @frames.push @values.size
+    end
+    def discard_frame
+      size = @frames.pop
+      fail "No stack frame." unless size
+      fail "Stack frame larger than the current stack." if size > @values.size
+      @values = @values[0,size]
+    end
+    def jump(address)
+      @ip = address.address
+    end
+    def success?
+      !@error
+    end
+    def call(adr)
+      @calls.push @ip
+      jump(adr)
+    end
+    def call_ret
+      @ip = @calls.pop
+      fail "One pop too many - empty call stack in #call_ret." unless @ip
+    end
+    def set_error(error)
+      @error = error
+    end
+    def clear_error
+      @error = nil
+    end
+    attr_reader :error
+    def stop
+      @stop = true
+    end
+  end
+end

data/lib/parslet/cause.rb ADDED

@@ -0,0 +1,62 @@
+module Parslet
+  # An internal class that allows delaying the construction of error messages
+  # (as strings) until we really need to print them.
+  #
+  class Cause < Struct.new(:message, :source, :pos) # :nodoc:
+    # Appends 'at line ... char ...' to the string given. Use +pos+ to
+    # override the position of the +source+. This method returns an object
+    # that can be turned into a string using #to_s.
+    #
+    def self.format(source, pos, str)
+      self.new(str, source, pos)
+    end
+    def to_s
+      line, column = source.line_and_column(pos)
+      # Allow message to be a list of objects. Join them here, since we now
+      # really need it.
+      Array(message).map { |o|
+        o.respond_to?(:to_slice) ?
+          o.str.inspect :
+          o.to_s }.join + " at line #{line} char #{column}."
+    end
+    # Signals to the outside that the parse has failed. Use this in
+    # conjunction with .format for nice error messages.
+    #
+    def raise(exception_klass=Parslet::ParseFailed)
+      exception = exception_klass.new(self.to_s, self)
+      Kernel.raise exception
+    end
+    # Returns an ascii tree representation of the causes of this node and its
+    # children.
+    #
+    def ascii_tree
+      StringIO.new.tap { |io|
+        recursive_ascii_tree(self, io, [true]) }.
+        string
+    end
+    def children
+      @children ||= Array.new
+    end
+  private
+    def recursive_ascii_tree(node, stream, curved) # :nodoc:
+      append_prefix(stream, curved)
+      stream.puts node.to_s
+      node.children.each do |child|
+        last_child = (node.children.last == child)
+        recursive_ascii_tree(child, stream, curved + [last_child])
+      end
+    end
+    def append_prefix(stream, curved) # :nodoc:
+      curved[0..-2].each do |c|
+        stream.print c ? "   " : "|  "
+      end
+      stream.print curved.last ? "`- " : "|- "
+    end
+  end
+end

data/lib/parslet/export.rb CHANGED

@@ -34,7 +34,7 @@ class Parslet::Parser
           join(' ') <<
         ')'
       end
-      def visit_repetition(min, max, parslet)
+      def visit_repetition(tag, min, max, parslet)
         parslet.accept(self) << "#{min}*#{max}"
       end
       def visit_alternative(alternatives)
@@ -52,7 +52,7 @@ class Parslet::Parser
     end
     class Treetop < Citrus
-      def visit_repetition(min, max, parslet)
+      def visit_repetition(tag, min, max, parslet)
         parslet.accept(self) << "#{min}..#{max}"
       end

data/lib/parslet/rig/rspec.rb CHANGED

@@ -1,40 +1,42 @@
 RSpec::Matchers.define(:parse) do |input, opts|
+  as = block = nil
+  result = trace = nil
   match do |parser|
     begin
-      @result = parser.parse(input)
-      @block ?
-        @block.call(@result) :
-        (@as == @result || @as.nil?)
+      result = parser.parse(input)
+      block ?
+        block.call(result) :
+        (as == result || as.nil?)
     rescue Parslet::ParseFailed
-      @trace = parser.error_tree.ascii_tree if opts && opts[:trace]
+      trace = parser.error_tree.ascii_tree if opts && opts[:trace]
       false
     end
   end
   failure_message_for_should do |is|
-    if @block
+    if block
       "expected output of parsing #{input.inspect}" <<
       " with #{is.inspect} to meet block conditions, but it didn't"
     else
       "expected " <<
-        (@as ?
+        (as ?
           "output of parsing #{input.inspect}"<<
-          " with #{is.inspect} to equal #{@as.inspect}, but was #{@result.inspect}" :
+          " with #{is.inspect} to equal #{as.inspect}, but was #{result.inspect}" :
           "#{is.inspect} to be able to parse #{input.inspect}") <<
-        (@trace ?
-          "\n"+@trace :
+        (trace ?
+          "\n"+trace :
           '')
     end
   end
   failure_message_for_should_not do |is|
-    if @block
+    if block
       "expected output of parsing #{input.inspect} with #{is.inspect} not to meet block conditions, but it did"
     else
       "expected " <<
-        (@as ?
+        (as ?
           "output of parsing #{input.inspect}"<<
-          " with #{is.inspect} not to equal #{@as.inspect}" :
+          " with #{is.inspect} not to equal #{as.inspect}" :
           "#{is.inspect} to not parse #{input.inspect}, but it did")
     end
@@ -42,9 +44,8 @@ RSpec::Matchers.define(:parse) do |input, opts|
   # NOTE: This has a nodoc tag since the rdoc parser puts this into
   # Object, a thing I would never allow.
-  def as(expected_output = nil, &block) # :nodoc:
-    @as = expected_output
-    @block = block
-    self
+  chain :as do |expected_output, &block|
+    as = expected_output
+    block = block
   end
 end

data/lib/parslet/source.rb CHANGED

@@ -3,65 +3,83 @@ require 'stringio'
 require 'parslet/source/line_cache'
-# Wraps the input IO to parslet. The interface defined by this class is
-# smaller than what IO offers, but enhances it with a #column and #line
-# method for the current position.
-#
-class Parslet::Source
-  def initialize(io)
-    if io.respond_to? :to_str
-      io = StringIO.new(io)
-    end
+module Parslet
+  # Wraps the input IO to parslet. The interface defined by this class is
+  # smaller than what IO offers, but enhances it with a #column and #line
+  # method for the current position.
+  #
+  class Source
+    def initialize(io)
+      if io.respond_to? :to_str
+        io = StringIO.new(io)
+      end
-    @io = io
-    @line_cache = LineCache.new
-  end
+      @io = io
+      @line_cache = LineCache.new
+    end
-  # Reads n chars from the input and returns a Range instance.
-  #
-  def read(n)
-    raise ArgumentError, "Cannot read < 1 characters at a time." if n < 1
-    read_slice(n)
-  end
+    # Reads n bytes from the input and returns a Range instance. If the n
+    # bytes end in the middle of a multibyte representation of a char, that
+    # char is returned fully.
+    #
+    # Example:
+    #   source.read(1)  # always returns at least one valid char
+    #   source.read(7)  # reads 7 bytes, then to the next char boundary.
+    #
+    def read(n)
+      raise ArgumentError, "Cannot read < 1 characters at a time." if n < 1
+      read_slice(n)
+    end
-  def eof?
-    @io.eof?
-  end
-  def pos
-    @io.pos
-  end
-  def pos=(new_pos)
-    @io.pos = new_pos
-  end
+    def eof?
+      @io.eof?
+    end
+    def pos
+      @io.pos
+    end
+    def pos=(new_pos)
+      @io.pos = new_pos
+    end
-  # Returns a <line, column> tuple for the given position. If no position is
-  # given, line/column information is returned for the current position given
-  # by #pos.
-  #
-  def line_and_column(position=nil)
-    @line_cache.line_and_column(position || self.pos)
-  end
+    # Returns a <line, column> tuple for the given position. If no position is
+    # given, line/column information is returned for the current position given
+    # by #pos.
+    #
+    def line_and_column(position=nil)
+      @line_cache.line_and_column(position || self.pos)
+    end
-private
-  def read_slice(needed)
-    start = @io.pos
-    buf = @io.gets(nil, needed)
-    # cache line ends
-    @line_cache.scan_for_line_endings(start, buf)
-    Parslet::Slice.new(buf || '', start, @line_cache)
-  end
+    # Formats an error cause at the current position or at the position given
+    # by pos. If pos is nil, the current source position will be the error
+    # position.
+    #
+    def error(message, error_pos=nil)
+      real_pos = (error_pos||self.pos)
+      Cause.format(self, real_pos, message)
+    end
-  if RUBY_VERSION !~ /^1.9/
+  private
     def read_slice(needed)
       start = @io.pos
-      buf = @io.read(needed)
+      buf = @io.gets(nil, needed)
       # cache line ends
       @line_cache.scan_for_line_endings(start, buf)
       Parslet::Slice.new(buf || '', start, @line_cache)
     end
+    if RUBY_VERSION !~ /^1.9/
+      def read_slice(needed)
+        start = @io.pos
+        buf = @io.read(needed)
+        # cache line ends
+        @line_cache.scan_for_line_endings(start, buf)
+        Parslet::Slice.new(buf || '', start, @line_cache)
+      end
+    end
   end
-end
+end