RubyGems - parslet - Versions diffs - 1.2.0 → 1.2.1 - Mend

parslet 1.2.0 → 1.2.1

Files changed (14) hide show

data/HISTORY.txt +14 -1
data/README +11 -7
data/example/boolean_algebra.rb +70 -0
data/example/json.rb +131 -0
data/example/output/boolean_algebra.out +4 -0
data/example/output/json.out +5 -0
data/example/output/sentence.out +1 -0
data/example/sentence.rb +36 -0
data/lib/parslet.rb +8 -0
data/lib/parslet/atoms/base.rb +7 -5
data/lib/parslet/convenience.rb +2 -0
data/lib/parslet/slice.rb +32 -76
data/lib/parslet/source.rb +20 -61
metadata +9 -5

data/HISTORY.txt CHANGED Viewed

@@ -2,8 +2,21 @@
   - prsnt? and absnt? are now finally banned into oblivion. Wasting vocals for
     the win.
+= 1.3.0 / ???
+  Next bigger release, features not clear yet. Probably heredoc-parsing.
+= 1.2.1 / 6Jun2011
+  ! FIX: Input at the end of a parse raises Parslet::UnconsumedInput. (see
+    issue 18)
+  ! FIX: Unicode parsing should now work as expected. (see issue 38)
+  ! FIX: Slice#slice returned wrong bits at times (see issue 36).
-= 1.2.0 / ???
+= 1.2.0 / 4Feb2011
   + Parslet::Parser is now also a grammar atom, it can be composed freely with
     other atoms. (str('f') >> MiniLispParser.new >> str('b'))

data/README CHANGED Viewed

@@ -5,10 +5,10 @@ Parslet makes developing complex parsers easy. It does so by
 * providing the best <b>error reporting</b> possible
 * <b>not generating</b> reams of code for you to debug
-Parslet takes the long way around to make <b>your job</b> easier. It allows for
-incremental language construction. Often, you start out small, implementing
-the atoms of your language first; _parslet_ takes pride in making this
-possible.
+Parslet takes the long way around to make <b>your job</b> easier. It allows
+for incremental language construction. Often, you start out small,
+implementing the atoms of your language first; _parslet_ takes pride in making
+this possible.
 Eager to try this out? Please see the associated web site:
 http://kschiess.github.com/parslet
@@ -41,11 +41,15 @@ SYNOPSIS
 COMPATIBILITY
-This library should work with most rubies. I've tested it with MRI 1.8, 1.9,
-rbx-head, jruby. Please report as a bug if you encounter issues.
+This library should work with most rubies. I've tested it with MRI 1.8
+(except 1.8.6), 1.9, rbx-head, jruby. Please report as a bug if you encounter
+issues.
+Note that due to Ruby 1.8 internals, Unicode parsing is not supported on that
+version.
 STATUS
-At version 1.1 - Good basic functionality and lots of plans for extension.
+At version 1.2.1 - See HISTORY.txt for changes.
 (c) 2010 Kaspar Schiess

data/example/boolean_algebra.rb ADDED Viewed

@@ -0,0 +1,70 @@
+$:.unshift File.dirname(__FILE__) + "/../lib"
+require "parslet"
+require "pp"
+# Parses strings like "var1 and (var2 or var3)" respecting operator precedence
+# and parentheses. After that transforms the parse tree into an array of
+# arrays like this:
+#
+# [["1", "2"], ["1", "3"]]
+#
+# The array represents a DNF (disjunctive normal form). Elements of outer
+# array are connected with "or" operator, while elements of inner arrays are
+# joined with "and".
+#
+class Parser < Parslet::Parser
+  rule(:space)  { match[" "].repeat(1) }
+  rule(:space?) { space.maybe }
+  rule(:lparen) { str("(") >> space? }
+  rule(:rparen) { str(")") >> space? }
+  rule(:and_operator) { str("and") >> space? }
+  rule(:or_operator)  { str("or")  >> space? }
+  rule(:var) { str("var") >> match["0-9"].repeat(1).as(:var) >> space? }
+  # The primary rule deals with parentheses.
+  rule(:primary) { lparen >> or_operation >> rparen | var }
+  # Note that following rules are both right-recursive.
+  rule(:and_operation) {
+    (primary.as(:left) >> and_operator >>
+      and_operation.as(:right)).as(:and) |
+    primary }
+  rule(:or_operation)  {
+    (and_operation.as(:left) >> or_operator >>
+      or_operation.as(:right)).as(:or) |
+    and_operation }
+  # We start at the lowest precedence rule.
+  root(:or_operation)
+end
+class Transformer < Parslet::Transform
+  rule(:var => simple(:var)) { [[String(var)]] }
+  rule(:or => { :left => subtree(:left), :right => subtree(:right) }) do
+    (left + right)
+  end
+  rule(:and => { :left => subtree(:left), :right => subtree(:right) }) do
+     res = []
+     left.each do |l|
+       right.each do |r|
+         res << (l + r)
+       end
+     end
+     res
+  end
+end
+pp tree = Parser.new.parse("var1 and (var2 or var3)")
+# {:and=>
+#   {:left=>{:var=>"1"@3},
+#    :right=>{:or=>{:left=>{:var=>"2"@13}, :right=>{:var=>"3"@21}}}}}
+pp Transformer.new.apply(tree)
+# [["1", "2"], ["1", "3"]]

data/example/json.rb ADDED Viewed

@@ -0,0 +1,131 @@
+$:.unshift File.dirname(__FILE__) + "/../lib"
+#
+# MIT License - (c) 2011 John Mettraux
+#
+require 'rubygems'
+require 'parslet' # gem install parslet
+module MyJson
+  class Parser < Parslet::Parser
+    rule(:spaces) { match('\s').repeat(1) }
+    rule(:spaces?) { spaces.maybe }
+    rule(:comma) { spaces? >> str(',') >> spaces? }
+    rule(:digit) { match('[0-9]') }
+    rule(:number) {
+      (
+        str('-').maybe >> (
+          str('0') | (match('[1-9]') >> digit.repeat)
+        ) >> (
+          str('.') >> digit.repeat(1)
+        ).maybe >> (
+          match('[eE]') >> (str('+') | str('-')).maybe >> digit.repeat(1)
+        ).maybe
+      ).as(:number)
+    }
+    rule(:string) {
+      str('"') >> (
+        str('\\') >> any | str('"').absent? >> any
+      ).repeat.as(:string) >> str('"')
+    }
+    rule(:array) {
+      str('[') >> spaces? >>
+      (value >> (comma >> value).repeat).maybe.as(:array) >>
+      spaces? >> str(']')
+    }
+    rule(:object) {
+      str('{') >> spaces? >>
+      (entry >> (comma >> entry).repeat).maybe.as(:object) >>
+      spaces? >> str('}')
+    }
+    rule(:value) {
+      string | number |
+      object | array |
+      str('true').as(:true) | str('false').as(:false) |
+      str('null').as(:null)
+    }
+    rule(:entry) {
+      (
+         string.as(:key) >> spaces? >>
+         str(':') >> spaces? >>
+         value.as(:val)
+      ).as(:entry)
+    }
+    rule(:attribute) { (entry | value).as(:attribute) }
+    rule(:top) { spaces? >> value >> spaces? }
+    root(:top)
+  end
+  class Transformer < Parslet::Transform
+    class Entry < Struct.new(:key, :val); end
+    rule(:array => subtree(:ar)) {
+      ar.is_a?(Array) ? ar : [ ar ]
+    }
+    rule(:object => subtree(:ob)) {
+      (ob.is_a?(Array) ? ob : [ ob ]).inject({}) { |h, e| h[e.key] = e.val; h }
+    }
+    rule(:entry => { :key => simple(:ke), :val => simple(:va) }) {
+      Entry.new(ke, va)
+    }
+    rule(:string => simple(:st)) {
+      st.to_s
+    }
+    rule(:number => simple(:nb)) {
+      nb.match(/[eE\.]/) ? Float(nb) : Integer(nb)
+    }
+    rule(:null => simple(:nu)) { nil }
+    rule(:true => simple(:tr)) { true }
+    rule(:false => simple(:fa)) { false }
+  end
+  def self.parse(s)
+    parser = Parser.new
+    transformer = Transformer.new
+    tree = parser.parse(s)
+    puts; p tree; puts
+    out = transformer.apply(tree)
+    out
+  rescue Parslet::ParseFailed => e
+    puts e, parser.root.error_tree
+  end
+end
+s = %{
+  [ 1, 2, 3, null,
+    "asdfasdf asdfds", { "a": -1.2 }, { "b": true, "c": false },
+    0.1e24, true, false, [ 1 ] ]
+}
+out = MyJson.parse(s)
+p out; puts
+out == [
+  1, 2, 3, nil,
+  "asdfasdf asdfds", { "a" => -1.2 }, { "b" => true, "c" => false },
+  0.1e24, true, false, [ 1 ]
+] || raise("MyJson is a failure")

data/example/output/boolean_algebra.out ADDED Viewed

@@ -0,0 +1,4 @@
+{:and=>
+  {:left=>{:var=>"1"@3},
+   :right=>{:or=>{:left=>{:var=>"2"@13}, :right=>{:var=>"3"@21}}}}}
+[["1", "2"], ["1", "3"]]

data/example/output/json.out ADDED Viewed

@@ -0,0 +1,5 @@
+{:array=>[{:number=>"1"@5}, {:number=>"2"@8}, {:number=>"3"@11}, {:null=>"null"@14}, {:string=>"asdfasdf asdfds"@25}, {:object=>{:entry=>{:key=>{:string=>"a"@46}, :val=>{:number=>"-1.2"@50}}}}, {:object=>[{:entry=>{:key=>{:string=>"b"@61}, :val=>{:true=>"true"@65}}}, {:entry=>{:key=>{:string=>"c"@72}, :val=>{:false=>"false"@76}}}]}, {:number=>"0.1e24"@89}, {:true=>"true"@97}, {:false=>"false"@103}, {:array=>{:number=>"1"@112}}]}
+[1, 2, 3, nil, "asdfasdf asdfds", {"a"=>-1.2}, {"b"=>true, "c"=>false}, 1.0e+23, true, false, [1]]

data/example/output/sentence.out ADDED Viewed

@@ -0,0 +1 @@

+ ["RubyKaigi2009のテーマは、「変わる／変える」です。", " 前回のRubyKaigi2008のテーマであった「多様性」の言葉の通り、 2008年はRubyそのものに関しても、またRubyの活躍する舞台に関しても、ますます多様化が進みつつあります。", "RubyKaigi2008は、そのような Rubyの生態系をあらためて認識する場となりました。", " しかし、こうした多様化が進む中、異なる者同士が単純に距離を置いたままでは、その違いを認識したところであまり意味がありません。", " 異なる実装、異なる思想、異なる背景といった、様々な多様性を理解しつつ、すり合わせるべきものをすり合わせ、変えていくべきところを変えていくことが、豊かな未来へとつながる道に違いありません。"]

data/example/sentence.rb ADDED Viewed

@@ -0,0 +1,36 @@
+# encoding: UTF-8
+# A small example contributed by John Mettraux (jmettraux) that demonstrates
+# working with Unicode. This only works on Ruby 1.9.
+$:.unshift File.dirname(__FILE__) + "/../lib"
+require 'parslet'
+class Parser < Parslet::Parser
+  rule(:sentence) { (match('[^。]').repeat(1) >> str("。")).as(:sentence) }
+  rule(:sentences) { sentence.repeat }
+  root(:sentences)
+end
+class Transformer < Parslet::Transform
+  rule(:sentence => simple(:sen)) { sen.to_s }
+end
+string =
+  "RubyKaigi2009のテーマは、「変わる／変える」です。 前回の" +
+  "RubyKaigi2008のテーマであった「多様性」の言葉の通り、 " +
+  "2008年はRubyそのものに関しても、またRubyの活躍する舞台に関しても、 " +
+  "ますます多様化が進みつつあります。RubyKaigi2008は、そのような " +
+  "Rubyの生態系をあらためて認識する場となりました。 しかし、" +
+  "こうした多様化が進む中、異なる者同士が単純に距離を 置いたままでは、" +
+  "その違いを認識したところであまり意味がありません。 異なる実装、" +
+  "異なる思想、異なる背景といった、様々な多様性を理解しつつ、 " +
+  "すり合わせるべきものをすり合わせ、変えていくべきところを " +
+  "変えていくことが、豊かな未来へとつながる道に違いありません。"
+parser = Parser.new
+transformer = Transformer.new
+tree = parser.parse(string)
+p transformer.apply(tree)

data/lib/parslet.rb CHANGED Viewed

@@ -75,6 +75,14 @@ module Parslet
   class ParseFailed < StandardError
   end
+  # Raised when the parse operation didn't consume all of its input. In this
+  # case, it makes only limited sense to look at the error tree. Maybe the
+  # parser worked just fine, but didn't account for the characters at the tail
+  # of the input?
+  #
+  class UnconsumedInput < ParseFailed
+  end
   module ClassMethods
     # Define an entity for the parser. This generates a method of the same
     # name that can be used as part of other patterns. Those methods can be

data/lib/parslet/atoms/base.rb CHANGED Viewed

@@ -48,14 +48,16 @@ class Parslet::Atoms::Base
       # error to fail with. Otherwise just report that we cannot consume the
       # input.
       if cause
-        # Don't garnish the real cause; but the exception is different anyway.
-        raise Parslet::ParseFailed,
+        # We're not using #parse_failed here, since it assigns to @last_cause.
+        # Still: We'll raise this differently, since the real cause is different.
+        raise Parslet::UnconsumedInput,
           "Unconsumed input, maybe because of this: #{cause}"
       else
         old_pos = source.pos
         parse_failed(
           format_cause(source,
-            "Don't know what to do with #{source.read(100)}", old_pos))
+            "Don't know what to do with #{source.read(100)}", old_pos),
+          Parslet::UnconsumedInput)
       end
     end
@@ -246,9 +248,9 @@ private
   # Signals to the outside that the parse has failed. Use this in conjunction
   # with #format_cause for nice error messages.
   #
-  def parse_failed(cause)
+  def parse_failed(cause, exception_klass=Parslet::ParseFailed)
     @last_cause = cause
-    raise Parslet::ParseFailed,
+    raise exception_klass,
       @last_cause.to_s
   end

data/lib/parslet/convenience.rb CHANGED Viewed

@@ -25,6 +25,8 @@ class Parslet::Atoms::Base
   #
   def parse_with_debug str
     parse str
+  rescue Parslet::UnconsumedInput => error
+    puts error
   rescue Parslet::ParseFailed => error
     puts error
     puts error_tree

data/lib/parslet/slice.rb CHANGED Viewed

@@ -1,24 +1,24 @@
 # A slice is a small part from the parse input. A slice mainly behaves like
 # any other string, except that it remembers where it came from (offset in
-# original input).
+# original input).
 #
 # Some slices also know what parent slice they are a small part of. This
 # allows the slice to be concatenated to other slices from the same buffer by
-# reslicing it against that original buffer.
+# reslicing it against that original buffer.
 #
 # Why the complexity? Slices allow retaining offset information. This will
 # allow to assign line and column to each small bit of output from the parslet
 # parser. Also, while we keep that information, we might as well try to do
 # something useful with it. Reslicing the same buffers should in theory keep
-# buffer copies and allocations down.
+# buffer copies and allocations down.
 #
 # == Extracting line and column
 #
 # Using the #line_and_column method, you can extract the line and column in
-# the original input where this slice starts.
+# the original input where this slice starts.
 #
-# Example:
+# Example:
 #   slice.line_and_column # => [1, 13]
 #   slice.offset          # => 12
 #
@@ -30,117 +30,73 @@
 # calling #to_s.
 #
 # These omissions are somewhat intentional. Rather than maintaining a full
-# delegation, we opt for a partial emulation that gets the job done.
+# delegation, we opt for a partial emulation that gets the job done.
 #
 # Note also that there are some things that work with strings that will never
 # work when using slices. For instance, you cannot concatenate slices that
-# aren't from the same source or that don't join up:
+# aren't from the same source or that don't join up:
 #
-# Example:
+# Example:
 #   big_slice = 'abcdef'
 #   a = big_slice.slice(0, 2)   # => "ab"@0
 #   b = big_slice.slice(4, 2)   # => "ef"@4
-#
+#
 #   a + b # raises Parslet::InvalidSliceOperation
 #
 # This avoids creating slices with impossible offsets or that are
-# discontinous.
+# discontinous.
 #
 class Parslet::Slice
   attr_reader :str, :offset
-  attr_reader :parent
   attr_reader :source
-  def initialize(string, offset, source=nil, parent=nil)
+  def initialize(string, offset, source=nil)
     @str, @offset = string, offset
     @source = source
-    @parent = parent
   end
-  # Compares slices to other slices or strings.
+  # Compares slices to other slices or strings.
   #
   def == other
     str == other
   end
-  # Match regular expressions.
-  #
+  # Match regular expressions.
+  #
   def match(regexp)
     str.match(regexp)
   end
-  # Returns a slice that starts at offset start and that has length characters.
-  # Whenever possible, return parts of the parent buffer that this slice was
-  # cut out of.
-  #
-  def slice(start, length)
-    # NOTE: At a later stage, we might not want to create huge trees of slices.
-    # The fact that the root of the tree creates slices that link to it makes
-    # the tree already rather flat.
-    if parent
-      parent.slice(offset - parent.offset, length)
-    else
-      self.class.new(str.slice(start, length), offset+start, source, self)
-    end
-  end
-  # Returns a slice that starts at file offset start and that has length
-  # characters in it.
-  #
-  def abs_slice(start, length)
-    slice(start-offset, length)
-  end
-  # True if this slice can satisfy an original input request to the
-  # range ofs, len.
+  # Returns the slices size in characters.
   #
-  def satisfies?(ofs, len)
-    ofs >= offset && (ofs-offset+len-1)<str.size
-  end
   def size
     str.size
   end
+  # Concatenate two slices; it is assumed that the second slice begins
+  # where the first one ends. The offset of the resulting slice is the same
+  # as the one of this slice.
+  #
   def +(other)
-    raise ArgumentError,
-      "Cannot concat something other than a slice to a slice." \
-        unless other.respond_to?(:to_slice)
-    raise Parslet::InvalidSliceOperation,
-      "Cannot join slices that aren't adjacent."+
-      " (#{self.inspect} + #{other.inspect})" \
-        if offset+size != other.offset
-    raise Parslet::InvalidSliceOperation, "Not from the same source." \
-      if source != other.source
-    # If both slices stem from the same bigger buffer, we can reslice that
-    # buffer to (probably) avoid a buffer copy, as long as the strings are
-    # not modified.
-    if parent && parent == other.parent
-      return parent.abs_slice(offset, size+other.size)
-    end
-    self.class.new(str + other.str, offset, source)
+    self.class.new(str + other.to_s, offset, source)
   end
-  # Returns a <line, column> tuple referring to the original input.
+  # Returns a <line, column> tuple referring to the original input.
   #
   def line_and_column
     raise ArgumentError, "No source was given, cannot infer line and column." \
       unless source
     source.line_and_column(self.offset)
   end
   # Conversion operators -----------------------------------------------------
   def to_str
     str
   end
   alias to_s to_str
   def to_slice
     self
   end
@@ -156,7 +112,7 @@ class Parslet::Slice
   def to_f
     str.to_f
   end
   # Inspection & Debugging ---------------------------------------------------
   # Prints the slice as <code>"string"@offset</code>.
@@ -165,7 +121,7 @@ class Parslet::Slice
   end
 end
-# Raised when trying to do an operation on slices that cannot succeed, like
+# Raised when trying to do an operation on slices that cannot succeed, like
 # adding non-adjacent slices. See Parslet::Slice.
 #
 class Parslet::InvalidSliceOperation < StandardError

data/lib/parslet/source.rb CHANGED Viewed

@@ -14,32 +14,25 @@ class Parslet::Source
     end
     @io = io
-    @virtual_position = @io.pos
-    @eof_position = nil
     @line_cache = LineCache.new
-    # Stores an array of <offset, buffer> tuples.
-    @slices = []
   end
   # Reads n chars from the input and returns a Range instance.
   #
   def read(n)
-    slice = read_from_cache(@virtual_position, n)
-    @virtual_position += slice.size
-    slice
+    raise ArgumentError, "Cannot read <= 1 characters at a time." \
+      if n < 1
+    read_slice(n)
   end
   def eof?
-    @eof_position && @virtual_position >= @eof_position
+    @io.eof?
   end
   def pos
-    @virtual_position
+    @io.pos
   end
   def pos=(new_pos)
-    @virtual_position = new_pos
+    @io.pos = new_pos
   end
   # Returns a <line, column> tuple for the given position. If no position is
@@ -51,59 +44,25 @@ class Parslet::Source
   end
 private
-  # Minimal size of a single read
-  MIN_READ_SIZE = 10 * 1024
-  # Number of slices to keep
-  BUFFER_CACHE_SIZE = 10
-  # Reads and returns a piece of the input that contains length chars starting
-  # at offset.
-  #
-  def read_from_cache(offset, length)
-    # Do we already have a buffer that contains the given range?
-    # Return that.
-    slice = @slices.find { |slice|
-      slice.satisfies?(offset, length) }
-    return slice.abs_slice(offset, length) if slice
-    # Read a new buffer: Can the demand be satisfied by sequentially reading
-    # from the current position?
-    needed = offset-@io.pos+length
-    if @io.pos <= offset && needed<MIN_READ_SIZE
-      # read the slice
-      slice = read_slice(needed)
-      return slice.abs_slice(offset, length)
-    end
-    # Otherwise seek and read enough so that we can satisfy the demand.
-    @io.pos = offset
-    slice = read_slice(needed)
-    return slice.abs_slice(offset, length)
-  end
   def read_slice(needed)
     start = @io.pos
-    request = [MIN_READ_SIZE, needed].max
-    buf = @io.read(request)
-    # remember eof position
-    if !buf || buf.size<request
-      @eof_position = @io.pos
-    end
+    buf = @io.gets(nil, needed)
     # cache line ends
     @line_cache.scan_for_line_endings(start, buf)
-    slice = Parslet::Slice.new(buf || '', start, self)
-    # Don't cache empty slices.
-    return slice unless buf
-    # cache the buffer (and eject old entries)
-    @slices << slice
-    @slices.shift if @slices.size > BUFFER_CACHE_SIZE
-    slice
+    Parslet::Slice.new(buf || '', start, self)
+  end
+  if RUBY_VERSION !~ /^1.9/
+    def read_slice(needed)
+      start = @io.pos
+      buf = @io.read(needed)
+      # cache line ends
+      @line_cache.scan_for_line_endings(start, buf)
+      Parslet::Slice.new(buf || '', start, self)
+    end
   end
 end

metadata CHANGED Viewed

@@ -2,7 +2,7 @@
 name: parslet
 version: !ruby/object:Gem::Version
   prerelease:
-  version: 1.2.0
+  version: 1.2.1
 platform: ruby
 authors:
 - Kaspar Schiess
@@ -10,8 +10,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-02-04 00:00:00 +01:00
-default_executable:
+date: 2011-06-05 00:00:00 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: blankslate
@@ -100,14 +99,17 @@ files:
 - lib/parslet/source.rb
 - lib/parslet/transform.rb
 - lib/parslet.rb
+- example/boolean_algebra.rb
 - example/comments.rb
 - example/documentation.rb
 - example/email_parser.rb
 - example/empty.rb
 - example/erb.rb
 - example/ip_address.rb
+- example/json.rb
 - example/local.rb
 - example/minilisp.rb
+- example/output/boolean_algebra.out
 - example/output/comments.out
 - example/output/documentation.err
 - example/output/documentation.out
@@ -115,21 +117,23 @@ files:
 - example/output/empty.err
 - example/output/erb.out
 - example/output/ip_address.out
+- example/output/json.out
 - example/output/local.out
 - example/output/minilisp.out
 - example/output/parens.out
 - example/output/readme.out
 - example/output/seasons.out
+- example/output/sentence.out
 - example/output/simple_xml.out
 - example/output/string_parser.out
 - example/parens.rb
 - example/readme.rb
 - example/seasons.rb
+- example/sentence.rb
 - example/simple.lit
 - example/simple_xml.rb
 - example/string_parser.rb
 - example/test.lit
-has_rdoc: true
 homepage: http://kschiess.github.com/parslet
 licenses: []
@@ -154,7 +158,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
 requirements: []
 rubyforge_project:
-rubygems_version: 1.5.2
+rubygems_version: 1.8.5
 signing_key:
 specification_version: 3
 summary: Parser construction library with great error reporting in Ruby.