RubyGems - parslet - Versions diffs - 1.1.1 → 1.2.0 - Mend

parslet 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

data/HISTORY.txt +29 -0
data/README +2 -4
data/Rakefile +18 -4
data/example/comments.rb +11 -13
data/example/documentation.rb +1 -1
data/example/email_parser.rb +5 -5
data/example/empty.rb +2 -2
data/example/erb.rb +6 -3
data/example/ip_address.rb +2 -2
data/example/local.rb +34 -0
data/example/minilisp.rb +2 -2
data/example/output/comments.out +8 -0
data/example/output/documentation.err +4 -0
data/example/output/documentation.out +1 -0
data/example/output/email_parser.out +2 -0
data/example/output/empty.err +1 -0
data/example/output/erb.out +7 -0
data/example/output/ip_address.out +9 -0
data/example/output/local.out +3 -0
data/example/output/minilisp.out +5 -0
data/example/output/parens.out +8 -0
data/example/output/readme.out +1 -0
data/example/output/seasons.out +28 -0
data/example/output/simple_xml.out +2 -0
data/example/output/string_parser.out +3 -0
data/example/parens.rb +1 -3
data/example/readme.rb +4 -10
data/example/seasons.rb +2 -1
data/example/simple_xml.rb +5 -8
data/example/string_parser.rb +7 -5
data/lib/parslet.rb +20 -31
data/lib/parslet/atoms.rb +1 -0
data/lib/parslet/atoms/base.rb +46 -87
data/lib/parslet/atoms/dsl.rb +98 -0
data/lib/parslet/atoms/entity.rb +3 -4
data/lib/parslet/atoms/lookahead.rb +1 -1
data/lib/parslet/atoms/re.rb +2 -2
data/lib/parslet/atoms/str.rb +5 -2
data/lib/parslet/atoms/transform.rb +75 -0
data/lib/parslet/atoms/visitor.rb +9 -9
data/lib/parslet/convenience.rb +3 -3
data/lib/parslet/export.rb +13 -13
data/lib/parslet/expression/treetop.rb +2 -2
data/lib/parslet/parser.rb +55 -1
data/lib/parslet/rig/rspec.rb +36 -10
data/lib/parslet/slice.rb +172 -0
data/lib/parslet/source.rb +72 -83
data/lib/parslet/source/line_cache.rb +90 -0
metadata +22 -20

data/example/seasons.rb CHANGED Viewed

@@ -1,4 +1,5 @@
-$:.unshift '../lib/'
+$:.unshift File.dirname(__FILE__) + "/../lib"
 require 'parslet'
 require 'pp'

data/example/simple_xml.rb CHANGED Viewed

@@ -1,14 +1,12 @@
 # A simple xml parser. It is simple in the respect as that it doesn't address
 # any of the complexities of XML. This is ruby 1.9.
-$:.unshift '../lib'
+$:.unshift File.dirname(__FILE__) + "/../lib"
 require 'pp'
 require 'parslet'
-module XML
-  include Parslet
+class XML < Parslet::Parser
   root :document
   rule(:document) {
@@ -23,7 +21,7 @@ module XML
     parslet = str('<')
     parslet = parslet >> str('/') if close
-    parslet = parslet >> (str('>').absnt? >> match("[a-zA-Z]")).repeat(1).as(:name)
+    parslet = parslet >> (str('>').absent? >> match("[a-zA-Z]")).repeat(1).as(:name)
     parslet = parslet >> str('>')
     parslet
@@ -35,8 +33,7 @@ module XML
 end
 def check(xml)
-  include XML
-  r=parse(xml)
+  r = XML.new.parse(xml)
   # We'll validate the tree by reducing valid pairs of tags into simply the
   # string "verified". If the transformation ends on a string, then the
@@ -54,4 +51,4 @@ def check(xml)
 end
 pp check("<a><b>some text in the tags</b></a>")
-pp check("<b><b>some text in the tags</b></a>")
+pp check("<b><b>some text in the tags</b></a>")

data/example/string_parser.rb CHANGED Viewed

@@ -4,7 +4,7 @@
 require 'pp'
-$:.unshift '../lib/'
+$:.unshift File.dirname(__FILE__) + "/../lib"
 require 'parslet'
 include Parslet
@@ -26,7 +26,7 @@ class LiteralsParser < Parslet::Parser
     str('"') >>
     (
       (str('\\') >> any) |
-      (str('"').absnt? >> any)
+      (str('"').absent? >> any)
     ).repeat.as(:string) >>
     str('"')
   end
@@ -50,8 +50,10 @@ class LiteralsParser < Parslet::Parser
   root :literals
 end
-parsetree = LiteralsParser.new.parse(
-  File.read('simple.lit'))
+input_name = File.join(File.dirname(__FILE__), 'simple.lit')
+file = File.read(input_name)
+parsetree = LiteralsParser.new.parse(file)
 class Lit < Struct.new(:text)
   def to_s
@@ -72,4 +74,4 @@ transform = Parslet::Transform.new do
 end
 ast = transform.apply(parsetree)
-pp ast
+pp ast

data/lib/parslet.rb CHANGED Viewed

@@ -7,7 +7,7 @@
 #     root(:a)
 #   end
 #
-#   pp MyParser.new.parse('aaaa')   # => 'aaaa'
+#   pp MyParser.new.parse('aaaa')   # => 'aaaa'@0
 #   pp MyParser.new.parse('bbbb')   # => Parslet::Atoms::ParseFailed:
 #                                   #    Don't know what to do with bbbb at line 1 char 1.
 #
@@ -64,38 +64,18 @@ module Parslet
   #     puts parslet.error_tree
   #   end
   #
+  # Alternatively, you can just require 'parslet/convenience' and call
+  # the method #parse_with_debug instead of #parse. This method will never
+  # raise and print error trees to stdout.
+  #
+  # Example:
+  #   require 'parslet/convenience'
+  #   parslet.parse_with_debug(str)
+  #
   class ParseFailed < StandardError
   end
   module ClassMethods
-    # Define the parsers #root function. This is the place where you start
-    # parsing; if you have a rule for 'file' that describes what should be
-    # in a file, this would be your root declaration:
-    #
-    #   class Parser
-    #     root :file
-    #     rule(:file) { ... }
-    #   end
-    #
-    # #root declares a 'parse' function that works just like the parse
-    # function that you can call on a simple parslet, taking a string as input
-    # and producing parse output.
-    #
-    # In a way, #root is a shorthand for:
-    #
-    #   def parse(str)
-    #     your_parser_root.parse(str)
-    #   end
-    #
-    def root(name)
-      define_method(:root) do
-        self.send(name)
-      end
-      define_method(:parse) do |str|
-        root.parse(str)
-      end
-    end
     # Define an entity for the parser. This generates a method of the same
     # name that can be used as part of other patterns. Those methods can be
     # freely mixed in your parser class with real ruby methods.
@@ -116,8 +96,14 @@ module Parslet
     def rule(name, &definition)
       define_method(name) do
         @rules ||= {}     # <name, rule> memoization
-        @rules[name] or
-          (@rules[name] = Atoms::Entity.new(name, self, definition))
+        return @rules[name] if @rules.has_key?(name)
+        # Capture the self of the parser class along with the definition.
+        definition_closure = proc {
+          self.instance_eval(&definition)
+        }
+        @rules[name] = Atoms::Entity.new(name, &definition_closure)
       end
     end
   end
@@ -164,6 +150,8 @@ module Parslet
   # Returns an atom matching any character. It acts like the '.' (dot)
   # character in regular expressions.
   #
+  # Example:
+  #
   #   any.parse('a')    # => 'a'
   #
   def any
@@ -227,6 +215,7 @@ module Parslet
   autoload :Expression, 'parslet/expression'
 end
+require 'parslet/slice'
 require 'parslet/source'
 require 'parslet/error_tree'
 require 'parslet/atoms'

data/lib/parslet/atoms.rb CHANGED Viewed

@@ -16,6 +16,7 @@ module Parslet::Atoms
   end
   require 'parslet/atoms/context'
+  require 'parslet/atoms/dsl'
   require 'parslet/atoms/base'
   require 'parslet/atoms/named'
   require 'parslet/atoms/lookahead'

data/lib/parslet/atoms/base.rb CHANGED Viewed

@@ -1,8 +1,11 @@
 # Base class for all parslets, handles orchestration of calls and implements
 # a lot of the operator and chaining methods.
 #
+# Also see Parslet::Atoms::DSL chaining parslet atoms together.
+#
 class Parslet::Atoms::Base
   include Parslet::Atoms::Precedence
+  include Parslet::Atoms::DSL
   # Internally, all parsing functions return either an instance of Fail
   # or an instance of Success.
@@ -89,84 +92,6 @@ class Parslet::Atoms::Base
       "Atoms::Base doesn't have behaviour, please implement #try(source, context)."
   end
-  # Construct a new atom that repeats the current atom min times at least and
-  # at most max times. max can be nil to indicate that no maximum is present.
-  #
-  # Example:
-  #   # match any number of 'a's
-  #   str('a').repeat
-  #
-  #   # match between 1 and 3 'a's
-  #   str('a').repeat(1,3)
-  #
-  def repeat(min=0, max=nil)
-    Parslet::Atoms::Repetition.new(self, min, max)
-  end
-  # Returns a new parslet atom that is only maybe present in the input. This
-  # is synonymous to calling #repeat(0,1). Generated tree value will be
-  # either nil (if atom is not present in the input) or the matched subtree.
-  #
-  # Example:
-  #   str('foo').maybe
-  #
-  def maybe
-    Parslet::Atoms::Repetition.new(self, 0, 1, :maybe)
-  end
-  # Chains two parslet atoms together as a sequence.
-  #
-  # Example:
-  #   str('a') >> str('b')
-  #
-  def >>(parslet)
-    Parslet::Atoms::Sequence.new(self, parslet)
-  end
-  # Chains two parslet atoms together to express alternation. A match will
-  # always be attempted with the parslet on the left side first. If it doesn't
-  # match, the right side will be tried.
-  #
-  # Example:
-  #   # matches either 'a' OR 'b'
-  #   str('a') | str('b')
-  #
-  def |(parslet)
-    Parslet::Atoms::Alternative.new(self, parslet)
-  end
-  # Tests for absence of a parslet atom in the input stream without consuming
-  # it.
-  #
-  # Example:
-  #   # Only proceed the parse if 'a' is absent.
-  #   str('a').absnt?
-  #
-  def absnt?
-    Parslet::Atoms::Lookahead.new(self, false)
-  end
-  # Tests for presence of a parslet atom in the input stream without consuming
-  # it.
-  #
-  # Example:
-  #   # Only proceed the parse if 'a' is present.
-  #   str('a').prsnt?
-  #
-  def prsnt?
-    Parslet::Atoms::Lookahead.new(self, true)
-  end
-  # Marks a parslet atom as important for the tree output. This must be used
-  # to achieve meaningful output from the #parse method.
-  #
-  # Example:
-  #   str('a').as(:b) # will produce {:b => 'a'}
-  #
-  def as(name)
-    Parslet::Atoms::Named.new(self, name)
-  end
   # Takes a mixed value coming out of a parslet and converts it to a return
   # value for the user by dropping things and merging hashes.
   #
@@ -192,14 +117,24 @@ class Parslet::Atoms::Base
     fail "BUG: Unknown tag #{tag.inspect}."
   end
+  # Lisp style fold left where the first element builds the basis for
+  # an inject.
+  #
+  def foldl(list, &block)
+    return '' if list.empty?
+    list[1..-1].inject(list.first, &block)
+  end
+  # Flatten results from a sequence of parslets.
+  #
   def flatten_sequence(list) # :nodoc:
-    list.compact.inject('') { |r, e|        # and then merge flat elements
+    foldl(list.compact) { |r, e|        # and then merge flat elements
       merge_fold(r, e)
     }
   end
   def merge_fold(l, r) # :nodoc:
-    # equal pairs: merge.
+    # equal pairs: merge. ----------------------------------------------------
     if l.class == r.class
       if l.is_a?(Hash)
         warn_about_duplicate_keys(l, r)
@@ -209,11 +144,20 @@ class Parslet::Atoms::Base
       end
     end
-    # unequal pairs: hoist to same level.
+    # unequal pairs: hoist to same level. ------------------------------------
-    # special case: If one of them is a string, the other is more important
-    return l if r.class == String
-    return r if l.class == String
+    # Maybe classes are not equal, but both are stringlike?
+    if l.respond_to?(:to_str) && r.respond_to?(:to_str)
+      # if we're merging a String with a Slice, the slice wins.
+      return r if r.respond_to? :to_slice
+      return l if l.respond_to? :to_slice
+      fail "NOTREACHED: What other stringlike classes are there?"
+    end
+    # special case: If one of them is a string/slice, the other is more important
+    return l if r.respond_to? :to_str
+    return r if l.respond_to? :to_str
     # otherwise just create an array for one of them to live in
     return l + [r] if r.class == Hash
@@ -222,6 +166,11 @@ class Parslet::Atoms::Base
     fail "Unhandled case when foldr'ing sequence."
   end
+  # Flatten results from a repetition of a single parslet. named indicates
+  # whether the user has named the result or not. If the user has named
+  # the results, we want to leave an empty list alone - otherwise it is
+  # turned into an empty string.
+  #
   def flatten_repetition(list, named) # :nodoc:
     if list.any? { |e| e.instance_of?(Hash) }
       # If keyed subtrees are in the array, we'll want to discard all
@@ -241,9 +190,11 @@ class Parslet::Atoms::Base
     return [] if named && list.empty?
     # If there are only strings, concatenate them and return that.
-    list.inject('') { |s,e| s<<e }
+    foldl(list) { |s,e| s+e }
   end
+  # Debug printing - in Treetop syntax.
+  #
   def self.precedence(prec) # :nodoc:
     define_method(:precedence) { prec }
   end
@@ -272,7 +223,7 @@ class Parslet::Atoms::Base
   # Error tree returns what went wrong here plus what went wrong inside
   # subexpressions as a tree. The error stored for this node will be equal
-  # with #cause.
+  # to #cause.
   #
   def error_tree
     Parslet::ErrorTree.new(self)
@@ -301,10 +252,18 @@ private
       @last_cause.to_s
   end
+  # An internal class that allows delaying the construction of error messages
+  # (as strings) until we really need to print them.
+  #
   class Cause < Struct.new(:message, :source, :pos)
     def to_s
       line, column = source.line_and_column(pos)
-      message + " at line #{line} char #{column}."
+      # Allow message to be a list of objects. Join them here, since we now
+      # really need it.
+      Array(message).map { |o|
+        o.respond_to?(:to_slice) ?
+          o.str.inspect :
+          o.to_s }.join + " at line #{line} char #{column}."
     end
   end

data/lib/parslet/atoms/dsl.rb ADDED Viewed

@@ -0,0 +1,98 @@
+# A mixin module that defines operations that can be called on any subclass
+# of Parslet::Atoms::Base. These operations make parslets atoms chainable and
+# allow combination of parslet atoms to form bigger parsers.
+#
+# Example:
+#
+#   str('foo') >> str('bar')
+#   str('f').repeat
+#   any.absent?               # also called The Epsilon
+#
+module Parslet::Atoms::DSL
+  # Construct a new atom that repeats the current atom min times at least and
+  # at most max times. max can be nil to indicate that no maximum is present.
+  #
+  # Example:
+  #   # match any number of 'a's
+  #   str('a').repeat
+  #
+  #   # match between 1 and 3 'a's
+  #   str('a').repeat(1,3)
+  #
+  def repeat(min=0, max=nil)
+    Parslet::Atoms::Repetition.new(self, min, max)
+  end
+  # Returns a new parslet atom that is only maybe present in the input. This
+  # is synonymous to calling #repeat(0,1). Generated tree value will be
+  # either nil (if atom is not present in the input) or the matched subtree.
+  #
+  # Example:
+  #   str('foo').maybe
+  #
+  def maybe
+    Parslet::Atoms::Repetition.new(self, 0, 1, :maybe)
+  end
+  # Chains two parslet atoms together as a sequence.
+  #
+  # Example:
+  #   str('a') >> str('b')
+  #
+  def >>(parslet)
+    Parslet::Atoms::Sequence.new(self, parslet)
+  end
+  # Chains two parslet atoms together to express alternation. A match will
+  # always be attempted with the parslet on the left side first. If it doesn't
+  # match, the right side will be tried.
+  #
+  # Example:
+  #   # matches either 'a' OR 'b'
+  #   str('a') | str('b')
+  #
+  def |(parslet)
+    Parslet::Atoms::Alternative.new(self, parslet)
+  end
+  # Tests for absence of a parslet atom in the input stream without consuming
+  # it.
+  #
+  # Example:
+  #   # Only proceed the parse if 'a' is absent.
+  #   str('a').absent?
+  #
+  def absent?
+    Parslet::Atoms::Lookahead.new(self, false)
+  end
+  # Tests for presence of a parslet atom in the input stream without consuming
+  # it.
+  #
+  # Example:
+  #   # Only proceed the parse if 'a' is present.
+  #   str('a').present?
+  #
+  def present?
+    Parslet::Atoms::Lookahead.new(self, true)
+  end
+  # Alias for present? that will disappear in 2.0 (deprecated)
+  #
+  alias prsnt? present?
+  # Alias for absent? that will disappear in 2.0 (deprecated)
+  #
+  alias absnt? absent?
+  # Marks a parslet atom as important for the tree output. This must be used
+  # to achieve meaningful output from the #parse method.
+  #
+  # Example:
+  #   str('a').as(:b) # will produce {:b => 'a'}
+  #
+  def as(name)
+    Parslet::Atoms::Named.new(self, name)
+  end
+end