RubyGems - parslet - Versions diffs - 1.3.0 → 1.4.0 - Mend

parslet 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

data/HISTORY.txt +38 -1
data/README +33 -21
data/example/deepest_errors.rb +131 -0
data/example/email_parser.rb +2 -6
data/example/ignore.rb +2 -2
data/example/json.rb +0 -3
data/example/modularity.rb +47 -0
data/example/nested_errors.rb +132 -0
data/example/output/deepest_errors.out +54 -0
data/example/output/modularity.out +0 -0
data/example/output/nested_errors.out +54 -0
data/lib/parslet.rb +65 -51
data/lib/parslet/atoms.rb +1 -1
data/lib/parslet/atoms/alternative.rb +11 -12
data/lib/parslet/atoms/base.rb +57 -99
data/lib/parslet/atoms/can_flatten.rb +9 -4
data/lib/parslet/atoms/context.rb +26 -4
data/lib/parslet/atoms/entity.rb +5 -10
data/lib/parslet/atoms/lookahead.rb +11 -7
data/lib/parslet/atoms/named.rb +8 -12
data/lib/parslet/atoms/re.rb +10 -9
data/lib/parslet/atoms/repetition.rb +23 -24
data/lib/parslet/atoms/sequence.rb +10 -16
data/lib/parslet/atoms/str.rb +11 -13
data/lib/parslet/cause.rb +45 -13
data/lib/parslet/convenience.rb +6 -6
data/lib/parslet/error_reporter.rb +7 -0
data/lib/parslet/error_reporter/deepest.rb +95 -0
data/lib/parslet/error_reporter/tree.rb +57 -0
data/lib/parslet/export.rb +4 -4
data/lib/parslet/expression.rb +0 -2
data/lib/parslet/expression/treetop.rb +2 -2
data/lib/parslet/parser.rb +2 -6
data/lib/parslet/pattern.rb +15 -4
data/lib/parslet/pattern/binding.rb +3 -3
data/lib/parslet/rig/rspec.rb +2 -2
data/lib/parslet/slice.rb +0 -6
data/lib/parslet/source.rb +40 -59
data/lib/parslet/source/line_cache.rb +2 -2
data/lib/parslet/transform.rb +13 -7
data/lib/parslet/transform/context.rb +1 -1
metadata +69 -26
data/example/ignore_whitespace.rb +0 -66
data/lib/parslet/bytecode.rb +0 -6
data/lib/parslet/bytecode/compiler.rb +0 -138
data/lib/parslet/bytecode/instructions.rb +0 -358
data/lib/parslet/bytecode/vm.rb +0 -209
data/lib/parslet/error_tree.rb +0 -50

data/lib/parslet/bytecode/instructions.rb DELETED Viewed

@@ -1,358 +0,0 @@
-module Parslet::Bytecode
-  # Matches the string and pushes the result on the stack (looks like the
-  # string, but is really the slice that was matched).
-  #
-  Match = Struct.new(:str) do
-    def initialize(str)
-      super
-      @mismatch_error_prefix = "Expected #{str.inspect}, but got "
-    end
-    def to_s
-      "MATCH #{str.inspect}"
-    end
-    def run(vm)
-      source = vm.source
-      error_pos = source.pos
-      s = source.read(str.bytesize)
-      if s.size != str.size
-        source.pos = error_pos
-        vm.set_error source.error("Premature end of input")
-      else
-        if s == str
-          vm.push(s)
-        else
-          source.pos = error_pos
-          vm.set_error source.error([@mismatch_error_prefix, s])
-        end
-      end
-    end
-  end
-  Re = Struct.new(:re, :size) do
-    def initialize(re, size)
-      super
-      @failure = "Failed to match #{re.inspect[1..-2]}"
-    end
-    def to_s
-      "RE    #{re.inspect}, #{size}"
-    end
-    def run(vm)
-      source = vm.source
-      error_pos = source.pos
-      s = source.read(size)
-      if s.size != size
-        source.pos = error_pos
-        vm.set_error source.error("Premature end of input")
-        return
-      end
-      if !s.match(re)
-        source.pos = error_pos
-        vm.set_error source.error(@failure)
-        return
-      end
-      vm.push s
-    end
-  end
-  SetupRepeat = Struct.new(:tag) do
-    def run(vm)
-      vm.push vm.source.pos
-      vm.push 0       # occurrences
-      vm.push [tag]   # will collect results
-    end
-    def to_s
-      "STPRE #{tag.inspect}"
-    end
-  end
-  # Repeat matching with a minimum of min and a maximum of max times.
-  #
-  Repeat = Struct.new(:min, :max, :adr, :parslet) do
-    def initialize(*args)
-      super
-      @minrep_error = ["Expected at least #{min} of ", parslet]
-    end
-    def to_s
-      "RPEAT #{min || 'n/a'}, #{max || 'n/a'}, #{adr}, #{parslet}"
-    end
-    def run(vm)
-      source = vm.source
-      start_position = source.pos
-      unless vm.success?
-        pos, occurrences, accumulator = vm.pop(3)
-        source.pos = pos
-        # We've encountered an error. Are we still below the minimum number of
-        # matches?
-        if occurrences < min
-          error = source.error(@minrep_error, pos)
-          error.children << vm.error
-          vm.set_error error
-          return
-        end
-        # assert: occurrences >= min
-        # We've matched the minimum number required, so this is a success:
-        vm.clear_error
-        vm.push accumulator
-        return
-      end
-      # assert: vm.success?
-      result = vm.pop
-      pos, occurrences, accumulator = vm.pop(3)
-      accumulator << result
-      occurrences += 1
-      # All went well but we have reached our maximum?
-      if max && occurrences >= max
-        # We're done! Push the result.
-        vm.push accumulator
-        return
-      end
-      # No maximum was set or it was not reached. Continue matching.
-      vm.push vm.source.pos
-      vm.push occurrences
-      vm.push accumulator
-      vm.jump adr
-    end
-  end
-  # Checks if a sequence must be aborted early because of a parse failure.
-  # Cleans up the stack and jumps after the sequence, having set error.
-  #
-  CheckSequence = Struct.new(:cleanup_items, :adr, :error) do
-    def run(vm)
-      unless vm.success?
-        vm.pop(cleanup_items)
-        cause = vm.source.error(error)
-        cause.children << vm.error
-        vm.set_error cause
-        vm.jump(adr)
-      end
-    end
-    def to_s
-      "CHKSQ #{cleanup_items}, #{adr}, #{error[0,50] + "..."}"
-    end
-  end
-  # Packs size stack elements into an array that is prefixed with the
-  # :sequence tag. This will later be converted by #flatten
-  #
-  PackSequence = Struct.new(:size) do
-    def run(vm)
-      source = vm.source
-      fail "Sequence runs into PackSequence with error flag set!" \
-        unless vm.success?
-      elts = vm.pop(size)
-      vm.push [:sequence, *elts]
-    end
-    def to_s
-      "PACK  #{size}"
-    end
-  end
-  # Enters a new stack frame that can be discarded with vm.discard_frame. This
-  # helps in situations where you need to pop a state that you don't know the
-  # size of.
-  #
-  EnterFrame = Class.new do
-    def run(vm)
-      vm.enter_frame
-    end
-    def to_s
-      "ENTER"
-    end
-  end
-  # Fails at this point with the given error message. Size indicates how many
-  # different alternatives should have generated an error message on the
-  # stack.
-  #
-  Fail = Struct.new(:message, :size) do
-    def run(vm)
-      children = vm.pop(size)
-      error = vm.source.error(message)
-      error.children.replace(children)
-      # Clean up the stack frames:
-      vm.discard_frame
-      vm.set_error error
-    end
-    def to_s
-      "FAIL  #{message}, #{size}"
-    end
-  end
-  # If the vm.success? is true, branches to the given address.
-  #
-  BranchOnSuccess = Struct.new(:adr, :pos_ptr) do
-    def run(vm)
-      source = vm.source
-      if vm.success?
-        # Stack will look like this:
-        #  (n*) previous failures
-        #  successful match
-        # So we pop the match, discard the failures and push the success
-        # again. This way, it looks like a success should look.
-        value = vm.pop
-        vm.discard_frame
-        vm.push value
-        vm.jump(adr)
-      else
-        # Otherwise, clear the error and try the alternative that comes
-        # right here in the byte code.
-        # We need to reset the source.pos to what it was before starting on
-        # one of several alternatives:
-        source.pos = vm.value_at(pos_ptr)
-        # Push the error as if it were a value. If all branches fail, this can
-        # be used to create a complete error trace. If not, VM#discard_frame
-        # will take care of those.
-        vm.push vm.error
-        vm.clear_error
-      end
-    end
-    def to_s
-      "BRSUC #{adr}, #{pos_ptr}"
-    end
-  end
-  # Boxes a value inside a name tag.
-  #
-  # Consumes: parslet result
-  # Pushes: boxed result
-  #
-  Box = Struct.new(:name) do
-    def run(vm)
-      if vm.success?
-        result = vm.pop
-        vm.push(name => result)
-      end
-    end
-    def to_s
-      "BOX   #{name.inspect}"
-    end
-  end
-  # Pushes the current source pos to the stack.
-  #
-  # Consumes: Nothing
-  # Pushes: the current source.pos
-  #
-  PushPos = Class.new do
-    def run(vm)
-      source = vm.source
-      vm.push source.pos
-    end
-    def to_s
-      "PSHPS"
-    end
-  end
-  # Assumes that the stack contains the result of a parslet and above it
-  # the source position from before parsing that parslet (as per PushPos).
-  # Will remove both and leave the vm in a state that indicates the result
-  # of a lookahead, stack will be nil (no capture) and the error flag will
-  # be set.
-  #
-  # Consumes: VM state, source.pos
-  # Pushes: VM.state
-  # Effects: resets source.pos
-  #
-  CheckAndReset = Struct.new(:positive, :parslet) do
-    def run(vm)
-      source = vm.source
-      vm.pop if vm.success?
-      # Retrieve the parse position from before attempting to match the
-      # parslet.
-      start_pos = vm.pop
-      source.pos = start_pos
-      if positive && vm.success? || !positive && !vm.success?
-        vm.clear_error
-        vm.push nil
-      else
-        error_msg = positive ?
-          ["Input should start with ", parslet] :
-          ["Input should not start with ", parslet]
-        vm.set_error source.error(error_msg, start_pos)
-      end
-    end
-    def to_s
-      "CHKRS #{positive ? ':&' : ':!'}, #{parslet.inspect}"
-    end
-  end
-  # Compiles the block or 'calls' the subroutine that was compiled earlier.
-  #
-  CallBlock = Struct.new(:block) do
-    def run(vm)
-      vm.call(block.address)
-    end
-    def to_s
-      "LCALL #{block.address} (was atom<#{block.atom}>)"
-    end
-  end
-  Return = Class.new do
-    def run(vm)
-      vm.call_ret
-    end
-    def to_s
-      "RETRN"
-    end
-  end
-  Stop = Class.new do
-    def run(vm)
-      vm.stop
-    end
-    def to_s
-      "STPVM"
-    end
-  end
-  # Caching
-  CheckCache = Struct.new(:skip_adr) do
-    def run(vm)
-      return if vm.access_cache(skip_adr)
-      vm.push vm.source.pos
-    end
-    def to_s
-      "RETCA #{skip_adr}"
-    end
-  end
-  StoreResult = Struct.new(:adr) do
-    def run(vm)
-      vm.store_cache(adr)
-    end
-    def to_s
-      "STOCA #{adr}"
-    end
-  end
-end

data/lib/parslet/bytecode/vm.rb DELETED Viewed

@@ -1,209 +0,0 @@
-module Parslet::Bytecode
-  class VM
-    include Parslet::Atoms::CanFlatten
-    def initialize(debug=false)
-      @debug = debug
-    end
-    def debug?
-      @debug
-    end
-    def run(program, io)
-      init(program, io)
-      loop do
-        old_ip = @ip
-        instruction = fetch
-        break unless instruction
-        # Diagnostics
-        printf("executing %5d: %s\n", old_ip, instruction) if debug?
-        # Run the current instruction
-        instruction.run(self)
-        # Diagnostics
-        dump_state(0) if debug?
-        break if @stop
-      end
-      fail "Stack contains too many values." if @values.size>1
-      # In the best case, we have successfully matched and consumed all input.
-      # This is what we want, from now on down it's all error cases.
-      return flatten(@values.last) if success? && source.eof?
-      # Maybe we've matched some, but not all of the input? In parslets books,
-      # this is an error as well.
-      if success?
-        # assert: not source.eof?
-        current_pos = source.pos
-        source.error(
-          "Don't know what to do with #{source.read(100)}", current_pos).
-          raise(Parslet::UnconsumedInput)
-      end
-      # assert: ! @error.nil?
-      # And maybe we just could not do it for a reason. Raise that.
-      @error.raise
-    rescue => ex
-      dump_state(-1) unless ex.kind_of?(Parslet::ParseFailed)
-      raise
-    end
-    attr_reader :source
-    attr_reader :context
-    def init(program, io)
-      @ip = 0
-      @program = program
-      @source = Parslet::Source.new(io)
-      @context = Parslet::Atoms::Context.new
-      @values = []
-      @calls  = []
-      @frames = []
-      @cache  = {}
-    end
-    def fetch
-      @program.at(@ip).tap { @ip += 1 }
-    end
-    # Dumps the VM state so that the user can track errors down.
-    #
-    def dump_state(ip_offset)
-      return unless debug?
-      puts "\nVM STATE -------------------------------------------- "
-      old_pos = source.pos
-      debug_pos = old_pos - 10
-      source.pos = debug_pos < 0 ? 0 : debug_pos
-      puts "Source: #{source.read(20)}"
-      puts (" "*"Source: ".size) << (" "*(10+(debug_pos<0 ? debug_pos : 0))) << '^'
-      source.pos = old_pos
-      if @error
-        puts "Error register: #{@error}"
-      else
-        puts "Error register: EMPTY"
-      end
-      puts "Program: "
-      for adr in (@ip-5)..(@ip+5)
-        printf("%s%5d: %s\n",
-          adr == @ip+ip_offset ? '->' : '  ',
-          adr,
-          @program.at(adr)) if adr >= 0 && @program.at(adr)
-      end
-      puts "\nStack(#{@values.size}): (last 5, top is top of stack)"
-      @values.last(5).reverse.each_with_index do |v,i|
-        printf("  %5d: %s\n", i, v.inspect)
-      end
-      puts "\nStack Frames(#{@frames.size}): (last 5, top is top of stack)"
-      @frames.last(5).reverse.each_with_index do |v,i|
-        printf("  %5d: trunc stack at %s\n", i, v)
-      end
-      puts "\nCall Stack(#{@calls.size}): (last 5, top is top of stack)"
-      @calls.last(5).reverse.each_with_index do |v,i|
-        printf("  %5d: return to @%s\n", i, v)
-      end
-      puts "---------------------- -------------------------------- "
-    end
-    # --------------------------------------------- interface for instructions
-    def access_cache(skip_adr)
-      key = [source.pos, @ip-1]
-      # Is the given vm state in the cache yet?
-      if @cache[key]
-        # Restore state
-        success, value, advance = @cache[key]
-        if success
-          push value
-        else
-          set_error value
-        end
-        source.pos += advance
-        # Skip to skip_adr
-        jump skip_adr
-        return true
-      end
-      return false
-    end
-    def store_cache(adr)
-      if success?
-        pos, result = pop(2)
-        key = [pos, adr.address]
-        @cache[key] = [true, result, source.pos-pos]
-        push result
-      else
-        pos = pop
-        key = [pos, adr.address]
-        @cache[key] = [false, @error, source.pos-pos]
-      end
-    end
-    def push(value)
-      @values.push value
-    end
-    def pop(n=nil)
-      if n
-        fail "Stack corruption detected, popping too many values (#{n}/#{@values.size})." \
-          if n>@values.size
-        @values.pop(n)
-      else
-        fail "Stack corruption detected, popping too many values. (stack is empty)" \
-          if @values.empty?
-        @values.pop
-      end
-    end
-    def value_at(ptr)
-      @values.at(-ptr-1)
-    end
-    def enter_frame
-      @frames.push @values.size
-    end
-    def discard_frame
-      size = @frames.pop
-      fail "No stack frame." unless size
-      fail "Stack frame larger than the current stack." if size > @values.size
-      @values = @values[0,size]
-    end
-    def jump(address)
-      @ip = address.address
-    end
-    def success?
-      !@error
-    end
-    def call(adr)
-      @calls.push @ip
-      jump(adr)
-    end
-    def call_ret
-      @ip = @calls.pop
-      fail "One pop too many - empty call stack in #call_ret." unless @ip
-    end
-    def set_error(error)
-      @error = error
-    end
-    def clear_error
-      @error = nil
-    end
-    attr_reader :error
-    def stop
-      @stop = true
-    end
-  end
-end