RubyGems - parslet - Versions diffs - 1.0.1 → 1.1.0 - Mend

parslet 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

data/Gemfile +3 -4
data/HISTORY.txt +17 -3
data/README +3 -2
data/Rakefile +12 -55
data/example/email_parser.rb +9 -5
data/example/erb.rb +44 -0
data/example/minilisp.rb +11 -17
data/example/parens.rb +3 -1
data/lib/parslet/atoms/alternative.rb +7 -5
data/lib/parslet/atoms/base.rb +90 -59
data/lib/parslet/atoms/context.rb +48 -0
data/lib/parslet/atoms/entity.rb +2 -2
data/lib/parslet/atoms/lookahead.rb +17 -29
data/lib/parslet/atoms/named.rb +10 -5
data/lib/parslet/atoms/re.rb +16 -7
data/lib/parslet/atoms/repetition.rb +16 -9
data/lib/parslet/atoms/sequence.rb +15 -9
data/lib/parslet/atoms/str.rb +17 -7
data/lib/parslet/atoms/visitor.rb +75 -0
data/lib/parslet/atoms.rb +13 -9
data/lib/parslet/convenience.rb +33 -0
data/lib/parslet/export.rb +162 -0
data/lib/parslet/expression.rb +3 -3
data/lib/parslet/pattern.rb +2 -2
data/lib/parslet/rig/rspec.rb +2 -2
data/lib/parslet/source.rb +112 -0
data/lib/parslet.rb +1 -2
metadata +24 -7

data/Gemfile CHANGED Viewed

@@ -1,7 +1,7 @@
 # A sample Gemfile
 source "http://rubygems.org"
-gem 'blankslate', '>= 2.1.2.3'
+gem 'blankslate', '~> 2'
 group :development do
   gem 'rspec'
@@ -9,7 +9,6 @@ group :development do
   gem 'sdoc'
-  gem 'autotest'
-  gem 'autotest-fsevent'
-  gem 'autotest-growl'
+  gem 'guard'
+  gem 'growl'
 end

data/HISTORY.txt CHANGED Viewed

@@ -1,8 +1,22 @@
-= 1.1.0 / ???
+= 1.1.0 / 2Feb2011
-  + Uses throw/catch internally for an order of magnitude increase in execution
-    speed.
+  + Uses return (fail/success), cached line counts, memoizing of parse results
+    and other tricks internally for at least an order of magnitude increase
+    in execution speed.
+  + str('foo').maybe will now return an empty string again. Use .as(...) to
+    name things and get back [] from #repeat and nil from #maybe.
+  + If you require 'parslet/atoms/visitor', you'll get an accept method on
+    all known Parslet::Atoms.
+  + If you require 'parslet/export', you can call #to_citrus and #to_treetop
+    to produce string versions of your grammar in those dialects.
+  + Requiring 'parslet/convenience' will given you a parse_with_debug on
+    your Parslet::Parser class. This prints some diagnostics on parse failure.
+    (Thanks to Florian Hanke)
 = 1.0.1 / 17Jan2011
   A happy new year!

data/README CHANGED Viewed

@@ -43,10 +43,11 @@ SYNOPSIS
 COMPATIBILITY
-This library should work with both ruby 1.8 and ruby 1.9.
+This library should work with most rubies. I've tested it with MRI 1.8, 1.9,
+rbx-head, jruby. Please report as a bug if you encounter issues.
 STATUS
-one dot oh.
+At version 1.1 - Good basic functionality and lots of plans for extension.
 (c) 2010 Kaspar Schiess

data/Rakefile CHANGED Viewed

@@ -1,65 +1,15 @@
 require "rubygems"
-require "rake/gempackagetask"
 require "rake/rdoctask"
 require 'rspec/core/rake_task'
+require "rake/gempackagetask"
 desc "Run all examples"
 RSpec::Core::RakeTask.new
 task :default => :spec
-# This builds the actual gem. For details of what all these options
-# mean, and other ones you can add, check the documentation here:
-#
-#   http://rubygems.org/read/chapter/20
-#
-spec = Gem::Specification.new do |s|
-  # Change these as appropriate
-  s.name              = "parslet"
-  s.version           = "1.0.1"
-  s.summary           = "Parser construction library with great error reporting in Ruby."
-  s.author            = "Kaspar Schiess"
-  s.email             = "kaspar.schiess@absurd.li"
-  s.homepage          = "http://kschiess.github.com/parslet"
-  s.has_rdoc          = true
-  s.extra_rdoc_files  = %w(README)
-  s.rdoc_options      = %w(--main README)
-  # Add any extra files to include in the gem
-  s.files             = %w(Gemfile HISTORY.txt LICENSE Rakefile README) + Dir.glob("{lib,example}/**/*")
-  s.require_paths     = ["lib"]
-  # If you want to depend on other gems, add them here, along with any
-  # relevant versions
-  s.add_dependency("blankslate", "~> 2.1.2.3")
-  # If your tests use any gems, include them here
-  s.add_development_dependency("rspec")
-  s.add_development_dependency("flexmock")
-end
-# This task actually builds the gem. We also regenerate a static
-# .gemspec file, which is useful if something (i.e. GitHub) will
-# be automatically building a gem for this project. If you're not
-# using GitHub, edit as appropriate.
-#
-# To publish your gem online, install the 'gemcutter' gem; Read more
-# about that here: http://gemcutter.org/pages/gem_docs
-Rake::GemPackageTask.new(spec) do |pkg|
-  pkg.gem_spec = spec
-end
-desc "Build the gemspec file #{spec.name}.gemspec"
-task :gemspec do
-  file = File.dirname(__FILE__) + "/#{spec.name}.gemspec"
-  File.open(file, "w") {|f| f << spec.to_ruby }
-end
-task :package => :gemspec
 require 'sdoc'
 # Generate documentation
@@ -73,7 +23,14 @@ Rake::RDocTask.new do |rdoc|
   rdoc.rdoc_dir = "rdoc"
 end
-desc 'Clear out RDoc and generated packages'
-task :clean => [:clobber_rdoc, :clobber_package] do
-  rm "#{spec.name}.gemspec"
+desc 'Clear out RDoc'
+task :clean => [:clobber_rdoc, :clobber_package]
+# This task actually builds the gem.
+spec = eval(File.read('parslet.gemspec'))
+desc "Generate the gem package."
+Rake::GemPackageTask.new(spec) do |pkg|
+  pkg.gem_spec = spec
 end
+task :gem => :spec

data/example/email_parser.rb CHANGED Viewed

@@ -2,6 +2,7 @@
 # Example contributed by Hal Brodigan (postmodern). Thanks!
+$:.unshift '../lib'
 require 'parslet'
 class EmailParser < Parslet::Parser
@@ -19,11 +20,11 @@ class EmailParser < Parslet::Parser
   }
   rule(:word) { match('[a-z0-9]').repeat(1).as(:word) >> space? }
-  rule(:separator) { space? >> dot.as(:dot) >> space? | space }
+  rule(:separator) { dot.as(:dot) >> space? | space }
   rule(:words) { word >> (separator >> word).repeat }
   rule(:email) {
-    (words >> space? >> at.as(:at) >> space? >> words).as(:email)
+    (words.as(:username) >> space? >> at >> space? >> words).as(:email)
   }
   root(:email)
@@ -31,8 +32,11 @@ end
 class EmailSanitizer < Parslet::Transform
   rule(:dot => simple(:dot), :word => simple(:word)) { ".#{word}" }
-  rule(:at => simple(:at)) { '@' }
   rule(:word => simple(:word)) { word }
+  rule(:username => sequence(:username)) { username.join + "@" }
+  rule(:username => simple(:username)) { username + "@" }
   rule(:email => sequence(:email)) { email.join }
 end
@@ -45,8 +49,8 @@ unless ARGV[0]
 end
 begin
-  puts sanitizer.apply(parser.parse(ARGV[0]))
+  p sanitizer.apply(parser.parse(ARGV[0]))
 rescue Parslet::ParseFailed => error
   puts error
   puts parser.root.error_tree
-end
+end

data/example/erb.rb ADDED Viewed

@@ -0,0 +1,44 @@
+$:.unshift File.dirname(__FILE__) + "/../lib"
+require 'parslet'
+class ErbParser < Parslet::Parser
+  rule(:ruby) { (str('%>').absnt? >> any).repeat.as(:ruby) }
+  rule(:expression) { (str('=') >> ruby).as(:expression) }
+  rule(:comment) { (str('#') >> ruby).as(:comment) }
+  rule(:code) { ruby.as(:code) }
+  rule(:erb) { expression | comment | code }
+  rule(:erb_with_tags) { str('<%') >> erb >> str('%>') }
+  rule(:text) { (str('<%').absnt? >> any).repeat(1) }
+  rule(:text_with_ruby) { (text.as(:text) | erb_with_tags).repeat.as(:text) }
+  root(:text_with_ruby)
+end
+parser = ErbParser.new
+p parser.parse "The value of x is <%= x %>."
+p parser.parse "<% 1 + 2 %>"
+p parser.parse "<%# commented %>"
+evaluator = Parslet::Transform.new do
+  erb_binding = binding
+  rule(:code => { :ruby => simple(:ruby) }) { eval(ruby, erb_binding); '' }
+  rule(:expression => { :ruby => simple(:ruby) }) { eval(ruby, erb_binding) }
+  rule(:comment => { :ruby => simple(:ruby) }) { '' }
+  rule(:text => simple(:text)) { text }
+  rule(:text => sequence(:texts)) { texts.join }
+end
+puts evaluator.apply(parser.parse(<<-ERB
+The <% a = 2 %>not printed result of "a = 2".
+The <%# a = 1 %>not printed non-evaluated comment "a = 1", see the value of a below.
+The <%= 'nicely' %> printed result.
+The <% b = 3 %>value of a is <%= a %>, and b is <%= b %>.
+ERB
+))

data/example/minilisp.rb CHANGED Viewed

@@ -5,12 +5,13 @@ $:.unshift '../lib'
 require 'pp'
 require 'parslet'
+require 'parslet/convenience'
 module MiniLisp
   class Parser < Parslet::Parser
     root :expression
     rule(:expression) {
-      space? >> str('(') >> space? >> body >> str(')')
+      space? >> str('(') >> space? >> body >> str(')') >> space?
     }
     rule(:body) {
@@ -77,24 +78,17 @@ end
 parser = MiniLisp::Parser.new
 transform = MiniLisp::Transform.new
-# Parse stage
-begin
-  result = parser.parse %Q{
-    (define test (lambda ()
-      (begin
-        (display "something")
-        (display 1)
-        (display 3.08))))
-    (test)
-  }
-rescue Parslet::ParseFailed => failure
-  puts failure
-  puts parser.root.error_tree if parser.root.cause
-  exit
-end
+result = parser.parse_with_debug %Q{
+  (define test (lambda ()
+    (begin
+      (display "something")
+      (display 1)
+      (display 3.08))))
+  (test)
+}
 # Transform the result
-pp transform.do(result)
+pp transform.do(result) if result
 # Thereby reducing it to the earlier problem:
 # http://github.com/kschiess/toylisp

data/example/parens.rb CHANGED Viewed

@@ -2,7 +2,9 @@
 # uses '.as(:name)' to construct a tree that can reliably be matched
 # afterwards.
-$:.unshift '../lib'
+$:.unshift File.join(
+  File.dirname(__FILE__),
+  '/../lib')
 require 'pp'
 require 'parslet'

data/lib/parslet/atoms/alternative.rb CHANGED Viewed

@@ -16,7 +16,10 @@ class Parslet::Atoms::Alternative < Parslet::Atoms::Base
   #   str('a') | str('b')
   #
   def initialize(*alternatives)
+    super()
     @alternatives = alternatives
+    @error_msg = "Expected one of #{alternatives.inspect}."
   end
   #---
@@ -28,14 +31,13 @@ class Parslet::Atoms::Alternative < Parslet::Atoms::Base
     self
   end
-  def try(io) # :nodoc:
+  def try(source, context) # :nodoc:
     alternatives.each { |a|
-      catch(:error) {
-        return a.apply(io)
-      }
+      value = a.apply(source, context)
+      return value unless value.error?
     }
     # If we reach this point, all alternatives have failed.
-    error(io, "Expected one of #{alternatives.inspect}.")
+    error(source, @error_msg)
   end
   precedence ALTERNATE

data/lib/parslet/atoms/base.rb CHANGED Viewed

@@ -4,31 +4,43 @@
 class Parslet::Atoms::Base
   include Parslet::Atoms::Precedence
+  # Internally, all parsing functions return either an instance of Fail
+  # or an instance of Success.
+  #
+  class Fail < Struct.new(:message)
+    def error?; true end
+  end
+  # Internally, all parsing functions return either an instance of Fail
+  # or an instance of Success.
+  #
+  class Success < Struct.new(:result)
+    def error?; false end
+  end
   # Given a string or an IO object, this will attempt a parse of its contents
   # and return a result. If the parse fails, a Parslet::ParseFailed exception
   # will be thrown.
   #
   def parse(io)
-    if io.respond_to? :to_str
-      io = StringIO.new(io)
-    end
+    source = Parslet::Source.new(io)
+    context = Parslet::Atoms::Context.new
     result = nil
-    error_message_or_success = catch(:error) {
-      result = apply(io)
-      :success
-    }
+    value = apply(source, context)
     # If we didn't succeed the parse, raise an exception for the user.
     # Stack trace will be off, but the error tree should explain the reason
     # it failed.
-    if error_message_or_success != :success
-      raise Parslet::ParseFailed, error_message_or_success
+    if value.error?
+      raise Parslet::ParseFailed, value.message
     end
+    # assert: value is a success answer
     # If we haven't consumed the input, then the pattern doesn't match. Try
     # to provide a good error message (even asking down below)
-    unless io.eof?
+    unless source.eof?
       # Do we know why we stopped matching input? If yes, that's a good
       # error to fail with. Otherwise just report that we cannot consume the
       # input.
@@ -37,44 +49,42 @@ class Parslet::Atoms::Base
         raise Parslet::ParseFailed,
           "Unconsumed input, maybe because of this: #{cause}"
       else
+        old_pos = source.pos
         parse_failed(
-          format_cause(io, "Don't know what to do with #{io.string[io.pos,100]}"))
+          format_cause(source,
+            "Don't know what to do with #{source.read(100)}", old_pos))
       end
     end
-    return flatten(result)
+    return flatten(value.result)
   end
   #---
   # Calls the #try method of this parslet. In case of a parse error, apply
-  # leaves the io in the state it was before the attempt.
+  # leaves the source in the state it was before the attempt.
   #+++
-  def apply(io) # :nodoc:
-    # p [:start, self, io.string[io.pos, 10]]
+  def apply(source, context) # :nodoc:
+    old_pos = source.pos
-    old_pos = io.pos
-    # p [:try, self, io.string[io.pos, 20]]
-    message = catch(:error) {
-      r = try(io)
-      # p [:return_from, self, r, flatten(r)]
-      # This has just succeeded, so last_cause must be empty
-      @last_cause = nil
-      return r
+    result = context.cache(self, source) {
+      try(source, context)
     }
-    # We only reach this point if the parse has failed. message is not nil.
-    # p [:failing, self, io.string[io.pos, 20]]
+    # This has just succeeded, so last_cause must be empty
+    unless result.error?
+      @last_cause = nil
+      return result
+    end
-    io.pos = old_pos
-    throw :error, message
+    # We only reach this point if the parse has failed. Rewind the input.
+    source.pos = old_pos
+    return result # is instance of Fail
   end
   # Override this in your Atoms::Base subclasses to implement parsing
   # behaviour.
   #
-  def try(io)
+  def try(source, context)
     raise NotImplementedError, "Atoms::Base doesn't have behaviour, please implement #try(io)."
   end
@@ -159,7 +169,7 @@ class Parslet::Atoms::Base
   # Takes a mixed value coming out of a parslet and converts it to a return
   # value for the user by dropping things and merging hashes.
   #
-  def flatten(value) # :nodoc:
+  def flatten(value, named=false) # :nodoc:
     # Passes through everything that isn't an array of things
     return value unless value.instance_of? Array
@@ -174,9 +184,9 @@ class Parslet::Atoms::Base
       when :sequence
         return flatten_sequence(result)
       when :maybe
-        return result.first
+        return named ? result.first : result.first || ''
       when :repetition
-        return flatten_repetition(result)
+        return flatten_repetition(result, named)
     end
     fail "BUG: Unknown tag #{tag.inspect}."
@@ -211,7 +221,7 @@ class Parslet::Atoms::Base
     fail "Unhandled case when foldr'ing sequence."
   end
-  def flatten_repetition(list) # :nodoc:
+  def flatten_repetition(list, named) # :nodoc:
     if list.any? { |e| e.instance_of?(Hash) }
       # If keyed subtrees are in the array, we'll want to discard all
       # strings inbetween. To keep them, name them.
@@ -226,8 +236,11 @@ class Parslet::Atoms::Base
         flatten(1)
     end
+    # Consistent handling of empty lists, when we act on a named result
+    return [] if named && list.empty?
     # If there are only strings, concatenate them and return that.
-    list.inject('') { |s,e| s<<(e||'') }
+    list.inject('') { |s,e| s<<e }
   end
   def self.precedence(prec) # :nodoc:
@@ -250,7 +263,10 @@ class Parslet::Atoms::Base
   # but needed for clever error reports.
   #
   def cause # :nodoc:
-    @last_cause
+    @last_cause && @last_cause.to_s || nil
+  end
+  def cause? # :nodoc:
+    !!@last_cause
   end
   # Error tree returns what went wrong here plus what went wrong inside
@@ -260,34 +276,49 @@ class Parslet::Atoms::Base
   def error_tree
     Parslet::ErrorTree.new(self)
   end
-  def cause? # :nodoc:
-    not @last_cause.nil?
-  end
 private
-  # TODO comments!!!
-  # Report/raise a parse error with the given message, printing the current
-  # position as well. Appends 'at line X char Y.' to the message you give.
-  # If +pos+ is given, it is used as the real position the error happened,
-  # correcting the io's current position.
+  # Produces an instance of Success and returns it.
+  #
+  def success(result)
+    Success.new(result)
+  end
+  # Produces an instance of Fail and returns it.
   #
-  def error(io, str, pos=nil)
-    @last_cause = format_cause(io, str, pos)
-    throw :error, @last_cause
+  def error(source, str, pos=nil)
+    @last_cause = format_cause(source, str, pos)
+    Fail.new(@last_cause)
   end
-  def parse_failed(str)
-    @last_cause = str
+  # Signals to the outside that the parse has failed. Use this in conjunction
+  # with #format_cause for nice error messages.
+  #
+  def parse_failed(cause)
+    @last_cause = cause
     raise Parslet::ParseFailed,
-      @last_cause
+      @last_cause.to_s
   end
-  def format_cause(io, str, pos=nil)
-    pre = io.string[0..(pos||io.pos)]
-    lines = Array(pre.lines)
-    return str if lines.empty?
-    pos   = lines.last.length
-    return "#{str} at line #{lines.count} char #{pos}."
+  class Cause < Struct.new(:message, :source, :pos)
+    def to_s
+      line, column = source.line_and_column(pos)
+      message + " at line #{line} char #{column}."
+    end
+  end
+  # Appends 'at line ... char ...' to the string given. Use +pos+ to override
+  # the position of the +source+. This method returns an object that can
+  # be turned into a string using #to_s.
+  #
+  def format_cause(source, str, pos=nil)
+    real_pos = (pos||source.pos)
+    Cause.new(str, source, real_pos)
   end
+  # That annoying warning 'Duplicate subtrees while merging result' comes
+  # from here. You should add more '.as(...)' names to your intermediary tree.
+  #
   def warn_about_duplicate_keys(h1, h2)
     d = h1.keys & h2.keys
     unless d.empty?

data/lib/parslet/atoms/context.rb ADDED Viewed

@@ -0,0 +1,48 @@
+module Parslet::Atoms
+  # Helper class that implements a transient cache that maps position and
+  # parslet object to results. This is used for memoization in the packrat
+  # style.
+  #
+  class Context
+    def initialize
+      @cache = Hash.new { |h, k| h[k] = {} }
+    end
+    # Caches a parse answer for obj at source.pos. Applying the same parslet
+    # at one position of input always yields the same result, unless the input
+    # has changed.
+    #
+    # We need the entire source here so we can ask for how many characters
+    # were consumed by a successful parse. Imitation of such a parse must
+    # advance the input pos by the same amount of bytes.
+    #
+    def cache(obj, source, &block)
+      beg = source.pos
+      # Not in cache yet? Return early.
+      unless entry = lookup(obj, beg)
+        result = yield
+        set obj, beg, [result, source.pos-beg]
+        return result
+      end
+      # the condition in unless has returned true, so entry is not nil.
+      result, advance = entry
+      # The data we're skipping here has been read before. (since it is in
+      # the cache) PLUS the actual contents are not interesting anymore since
+      # we know obj matches at beg. So skip reading.
+      source.pos = beg + advance
+      return result
+    end
+  private
+    def lookup(obj, pos)
+      @cache[pos][obj]
+    end
+    def set(obj, pos, val)
+      @cache[pos][obj] = val
+    end
+  end
+end

data/lib/parslet/atoms/entity.rb CHANGED Viewed

@@ -18,8 +18,8 @@ class Parslet::Atoms::Entity < Parslet::Atoms::Base
     @block = block
   end
-  def try(io) # :nodoc:
-    parslet.apply(io)
+  def try(source, context) # :nodoc:
+    parslet.apply(source, context)
   end
   def parslet