RubyGems - grammar - Versions diffs - 0.5 → 0.8 - Mend

grammar 0.5 → 0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

data/benchmark/json.benchmark.rb +355 -0
data/benchmark/json.grammar.rb +56 -0
data/benchmark/json.grammar0_5.rb +57 -0
data/benchmark/json.ll1.rb +155 -0
data/benchmark/json.peggy.rb +174 -0
data/benchmark/json.re.rb +81 -0
data/lib/grammar.rb +212 -639
data/lib/grammar/ruby.rb +606 -0
data/lib/grammar/ruby/code.rb +1030 -0
data/lib/grammar/ruby0.rb +521 -0
data/lib/grammar/ruby2cext.rb +19 -0
data/lib/grammar/rubycall.rb +21 -0
data/test/advanced.rb +105 -0
data/test/atoms.rb +77 -0
data/test/basic.rb +32 -0
data/test/composite.rb +147 -0
data/test/molecules.rb +125 -0
data/test/test_demo.rb +200 -0
data/test/test_ruby.rb +30 -0
data/test/test_ruby0.rb +30 -0
data/test/test_ruby2cext.rb +30 -0
data/test/test_rubycall.rb +30 -0
metadata +45 -28
data/samples/fact.tcl +0 -12
data/samples/infix2postfix.rb +0 -114
data/samples/tcl.rb +0 -163
data/samples/test.infix +0 -4
data/test/test_grammar.rb +0 -274

data/benchmark/json.ll1.rb ADDED

@@ -0,0 +1,155 @@
+class JSON
+    def parse(io)
+        @la = io.getc
+        @la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
+        value(out=[], io)
+        @la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
+        raise("EOF expected") if @la
+        raise(out.inspect) unless out.length==1
+        out[0]
+    end
+    def error(expected, found)
+        raise("expected #{expected}, found #{found ? ("'"<<found<<?\') : 'EOF'}")
+    end
+    def value(out, io)
+        if ?\"==(@la)
+            out << string(io)
+        elsif ?\{==(@la)
+            # object
+            @la=io.getc
+            @la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
+            kv = []
+            unless ?\}==(@la)
+                kv = []
+                ?\"==(@la) ? (kv << string(io)) : error("a string", @la)
+                @la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
+                ?\:==(@la) ? (@la=io.getc) : error("':'", @la)
+                @la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
+                value(kv, io)
+                @la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
+                until ?\}==(@la)
+                    ?,==(@la) ? (@la=io.getc) : error("','", @la)
+                    @la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
+                    ?\"==(@la) ? (kv << string(io)) : error("a string", @la)
+                    @la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
+                    ?\:==(@la) ? (@la=io.getc) : error("':'", @la)
+                    @la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
+                    value(kv, io)
+                    @la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
+                end
+            end
+            @la = io.getc
+            out << Hash[*kv]
+        elsif ?\[==(@la)
+            # array
+            @la=io.getc
+            @la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
+            a = []
+            unless ?\]==(@la)
+                value(a, io)
+                @la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
+                until ?\]==(@la)
+                    ?\,==(@la) ? (@la=io.getc) : error("','", @la)
+                    @la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
+                    value(a, io)
+                    @la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
+                end
+            end
+            @la = io.getc
+            out << a
+        elsif ?t==(@la)
+            @la = io.getc
+            ?r==(@la) ? (@la=io.getc) : error(?r, @la)
+            ?u==(@la) ? (@la=io.getc) : error(?u, @la)
+            ?e==(@la) ? (@la=io.getc) : error(?e, @la)
+            out << true
+        elsif ?f==(@la)
+            @la = io.getc
+            ?a==(@la) ? (@la=io.getc) : error(?a, @la)
+            ?l==(@la) ? (@la=io.getc) : error(?l, @la)
+            ?s==(@la) ? (@la=io.getc) : error(?s, @la)
+            ?e==(@la) ? (@la=io.getc) : error(?e, @la)
+            out << false
+        elsif ?n==(@la)
+            @la = io.getc
+            ?u==(@la) ? (@la=io.getc) : error(?u, @la)
+            ?l==(@la) ? (@la=io.getc) : error(?l, @la)
+            ?l==(@la) ? (@la=io.getc) : error(?l, @la)
+            out << nil
+        else
+            # number
+            n = ""
+            (n<<@la;@la=io.getc) if ?-==(@la)
+            ?0==(@la) ? (n<<@la;@la=io.getc) : digits(n, io)
+            (?.==(@la) ?
+                (n<<@la;@la=io.getc;digits(n, io);exp(n, io);true) :
+                exp(n, io)) ?
+            (out << n.to_f) :
+            (out << n.to_i)
+        end
+    end
+    # Flattening any of the methods below will improve performance further
+    def ws(io)
+        @la = io.getc while (case @la;when ?\s,?\t,?\n,?\r;true;end)
+    end
+    def digits(out, io)
+        (?0<=@la && ?9>=@la) ? (out<<@la;@la=io.getc) : error("a digit", @la)
+        while (?0<=@la && ?9>=@la); (out<<@la;@la=io.getc); end
+    end
+    def exp(out, io)
+        (case @la;when ?e,?E;true;end) ? (out<<@la;@la=io.getc) :
+            return
+        (out<<@la;@la=io.getc) if (case @la;when ?-,?+;true;end)
+        digits(out, io)
+        true
+    end
+    def string(io)
+        # we've already verified the starting "
+        @la=io.getc
+        s = ""
+        until ?\"==(@la)
+            if ?\\==(@la)
+                @la = io.getc
+                case @la
+                when ?\",?\\,?\/ then (s<<@la;@la=io.getc)
+                when ?b then (s<<?\b;@la=io.getc)
+                when ?f then (s<<?\f;@la=io.getc)
+                when ?n then (s<<?\n;@la=io.getc)
+                when ?r then (s<<?\r;@la=io.getc)
+                when ?t then (s<<?\t;@la=io.getc)
+                when ?u
+                    @la = io.getc
+                    u = ""
+                    4.times {
+                        case @la
+                        when ?0..?9, ?a..?f, ?A..?F
+                            u<<@la;@la=io.getc
+                        else
+                            error("a hex character", @la)
+                        end
+                    }
+                    s << u.to_i(16)
+                else
+                    error("a valid escape", @la)
+                end
+            else
+                error("a character", @la) unless @la
+                s<<@la;@la=io.getc
+            end
+        end
+        @la = io.getc
+        s
+    end
+end

data/benchmark/json.peggy.rb ADDED

@@ -0,0 +1,174 @@
+class JSON < Peggy::Builder
+  KEYWORDS = {"true" => true, "false" => false, "null" => nil}
+  ESCAPES  = Hash[*%W[b \b f \f n \n r \r t \t]]
+  def initialize
+    super
+    self.ignore_productions = [:space]
+    space { lit /\s+/ }
+    value {
+      seq {
+        opt { space }
+        one {
+          string
+          object
+          array
+          keyword
+          number
+        }
+        opt { space }
+      }
+    }
+    object {
+      seq {
+        lit /\{\s*/
+        one {
+          seq {
+            opt { many { seq { string; lit /\s*:/; value; lit /,\s*/ } } }
+                         seq { string; lit /\s*:/; value             }
+            lit "}"
+          }
+          lit "}"
+        }
+      }
+    }
+    array {
+      seq {
+        lit "["
+        one {
+          seq {
+            opt { many { seq { value; lit "," } } }; value; lit "]"
+          }
+          lit "]"
+        }
+      }
+    }
+    string {
+      seq {
+        lit '"'
+        one {
+          lit '"'
+          seq {
+            many {
+              one {
+                seq { string_content; opt { escape         } }
+                seq { escape;         opt { string_content } }
+              }
+            }
+            lit '"'
+          }
+        }
+      }
+    }
+    string_content { lit(/[^\\"]+/) }
+    escape {
+      one {
+        escape_literal
+        escape_sequence
+        escape_unicode
+      }
+    }
+    escape_literal  { lit(%r{\\["\\/]})      }
+    escape_sequence { lit(/\\[bfnrt]/)       }
+    escape_unicode  { lit(/\\u[0-9a-f]{4}/i) }
+    number  { lit(/-?(?:0|[1-9]\d*)(?:\.\d+(?:[eE][+-]?\d+)?)?\b/) }
+    keyword { lit(/\b(?:true|false|null)\b/)                       }
+  end
+  def to_ruby(from = parse_results.keys.min)
+    kind = parse_results[from][:found_order].first
+    to   = parse_results[from][kind]
+    send("to_ruby_#{kind}", from, to)
+  end
+  private
+  def to_ruby_object(from, to)
+    #p parse_results
+    object   = Hash.new
+    skip_to  = nil
+    last_key = nil
+    parse_results.keys.select { |k| k > from and k < to }.sort.each do |key|
+      content = parse_results[key]
+      next if skip_to and key < skip_to
+      next unless content[:found_order]                      and
+                  ( ( content[:found_order].size == 2        and
+                      content[:found_order][1]   == :value ) or
+                    content[:found_order]        == [:string] )
+      if content[:found_order] == [:string]
+        last_key = to_ruby_string(key, content[:string])
+      else
+        case content[:found_order].first
+        when :object
+          object[last_key] = to_ruby_object(key, content[:object])
+          skip_to = content[:object]
+        when :array
+          object[last_key] = to_ruby_array(key, content[:array])
+          skip_to = content[:array]
+        else
+          object[last_key] = to_ruby(key)
+        end
+      end
+    end
+    object
+  end
+  def to_ruby_array(from, to)
+    array   = Array.new
+    skip_to = nil
+    parse_results.keys.select { |k| k > from and k < to }.sort.each do |key|
+      content = parse_results[key]
+      next if skip_to and key < skip_to
+      next unless content[:found_order]                and
+                  content[:found_order].size == 2      and
+                  content[:found_order][1]   == :value
+      case content[:found_order].first
+      when :object
+        array << to_ruby_object(key, content[:object])
+        skip_to = content[:object]
+      when :array
+        array << to_ruby_array(key, content[:array])
+        skip_to = content[:array]
+      else
+        array << to_ruby(key)
+      end
+    end
+    array
+  end
+  def to_ruby_string(from, to)
+    string = String.new
+    parse_results.keys.select { |k| k > from and k < to }.sort.each do |key|
+      content = parse_results[key]
+      next unless content[:found_order]
+      case content[:found_order].first
+      when :string_content
+        string << source_text[key...content[:string_content]]
+      when :escape_literal
+        string << source_text[content[:escape_literal] - 1, 1]
+      when :escape_sequence
+        string << ESCAPES[source_text[content[:escape_sequence] - 1, 1]]
+      when :escape_unicode
+        string << [Integer("0x#{source_text[key + 2, 4]}")].pack("U")
+      end
+    end
+    string
+  end
+  def to_ruby_number(from, to)
+    num = source_text[from...to]
+    num.include?(".") ? Float(num) : Integer(num)
+  end
+  def to_ruby_keyword(from, to)
+    KEYWORDS[source_text[from...to]]
+  end
+end

data/benchmark/json.re.rb ADDED

@@ -0,0 +1,81 @@
+class JSON
+  def parse(input)
+    input.scan(/\s*/)
+    parse_value(out=[], input)
+    input.eos? or error("Unexpected data", input)
+    out[0]
+  end
+  private
+  def parse_value(out, input)
+    if input.scan(/"/)
+      parse_string(out, input)
+    elsif input.scan(/\{\s*/)
+      kv = []
+      until input.scan(/\}\s*/)
+        kv.empty? or input.scan(/,\s*/) or error("Expected ,", input)
+        input.scan(/"/) or error("Expected string", input)
+        parse_string(kv, input)
+        input.scan(/:\s*/) or error("Expecting object separator", input)
+        parse_value(kv, input)
+      end
+      out << Hash[*kv]
+    elsif input.scan(/\[\s*/)
+      array = []
+      until input.scan(/\]\s*/)
+        array.empty? or input.scan(/,\s*/) or error("Expected ,", input)
+        parse_value(array, input)
+      end
+      out << array
+    elsif input.scan(/true\s*/)
+      out << true
+    elsif input.scan(/false\s*/)
+      out << false
+    elsif input.scan(/null\s*/)
+      out << nil
+    elsif text=input.scan(/-?(?:0|[1-9]\d*)(\.\d+)?([eE][+-]?\d+)?\s*/)
+      out << ((input[1]||input[2]) ? text.to_f : text.to_i)
+    else
+      error("Illegal JSON value")
+    end
+  end
+  def parse_string(out, input)
+    s = ""
+    while true
+      if text=input.scan(/[^\\"]+/)
+        s.concat(text)
+      elsif input.scan(/\\/)
+        case (ch=input.getch[0])
+        when ?b ; s << ?\b
+        when ?f ; s << ?\f
+        when ?n ; s << ?\n
+        when ?r ; s << ?\r
+        when ?t ; s << ?\t
+        when ?u
+            text = input.scan(/[0-9a-fA-F]{4}/) or raise("expected hex*4")
+            s << text.to_i(16)
+        else
+            s << ch
+        end
+      else
+        break
+      end
+    end
+    input.scan(/"\s*/) or error("Unclosed string", input)
+    out << s
+  end
+  def error(message, input)
+    if input.eos?
+      raise "Unexpected end of input."
+    else
+      raise "#{message}:  #{input.peek(input.string.length)}"
+    end
+  end
+end

data/lib/grammar.rb CHANGED

@@ -1,692 +1,265 @@
 #!/bin/env ruby
 # = grammar.rb - specify BNF-like grammar directly in Ruby
-# $Id: grammar.rb,v 1.1 2005/10/13 23:58:45 eric_mahurin Exp $
-# Author::  Eric Mahurin (Eric under Mahurin at yahoo dot com)
+# $Id: grammar.rb,v 1.3 2008/09/05 06:01:20 eric_mahurin Exp $
+# Author::  Eric Mahurin (Eric under Mahurin at yahoo period com)
+# Copyright (c) Eric Mahurin 2005-2008
 # License:: Ruby license
 # Home::    http://rubyforge.org/projects/grammar
-# This base class defines common operators to the derived Grammar classes to
-# make specifying the Grammar look similar to BNF.  This base class also serves
-# the purpose of handling recursion in the Grammar.
+# The Grammar class defines operators and methods that allow Grammars to be
+# built in a tree.  The result is similar to BNF seen in other parser
+# generators.  No actual parsing is done by this class.  That is up to an
+# engine.
 class Grammar
-    class << self
-        alias_method(:[],:new)
-        # With several interlocking recursive grammars, this can be used.
-        # For each argument that the block needs, an empty Grammar is
-        # given.  The result of the block should be an Array of the final
-        # grammars for those arguments.
-        def multiple(&block) # :yield: *recursive_grammars
-            grammars = (1..block.arity).map { self.new }
-            grammars.zip(yield(*grammars)) { |g,g1| g << g1 }
-            grammars
-        end
-    end
-    # Creates a Grammar from another +grammar+.  If +grammar+ is not given
-    # and a block is instead, the block is passed +self+ (to handle recursion)
-    # and the resulting grammar from this block will be used.
-    def initialize(grammar=nil,&block) # :yield: +self+
-        @grammar = grammar || block && yield(self)
-    end
-    # Reinitialize with another Grammar.  This will be needed for recursion
-    # unless the block form of new is used.
-    def << (*args)
-        initialize(*args)
-    end
-    # Match to elements at a Cursor while advancing.  When matched, a parse
-    # buffer is returned.  Instead of an empty Array, the seed to this parse buffer
-    # can be given by +buffer+ which should respond to #concat and #<< like Array.
-    # When a mismatch occurs several possibilities exist.  If +lookahead+ and
-    # the Grammar is within its lookahead (defaults one element/token - can be
-    # controlled by #lookahead), the cursor is moved back to where it started and
-    # +false+ is returned.  Otherwise an exception describing the mismatch is
-    # raised.
-    def scan(cursor,buffer=[],lookahead=false)
-        @grammar.scan(cursor,buffer,lookahead)
+    # Create a Grammar from a block.  The block is passed a Grammar engine
+    # which should be used to do any parsing.
+    def initialize(&block) # :yield: engine
+        @block = block
     end
-    # Same as #scan except the +cursor+ is held in place
-    def check(cursor,buffer=[],lookahead=false)
-        cursor.pos { (@grammar||self).scan(cursor,buffer,lookahead) }
+    # Executes the Grammar with an engine.  The engine simply gets passed to
+    # the block (actually a lambda now) contained in the Grammar.
+    def [](engine)
+        @block[engine]
     end
-    def scanner(me,cursor,buffer,lookahead,hold) # :nodoc:
-        hold ?
-            "#{me}.check(#{cursor},#{buffer},#{lookahead})" :
-            "#{me}.scan(#{cursor},#{buffer},#{lookahead})"
+    # Returns the lambda that the Grammar holds.
+    def to_proc
+        @block
     end
-    def leaves # :nodoc:
-        [@grammar||self]
+    # Replaces the contained lambda with one from another Grammar.
+    def <<(gram)
+        @block = gram && gram.to_proc
     end
-    # Creates a new Grammar that matches +self+ or +other+ if that fails.
+    # Grammar that matches +self+ or +other+ if that fails.
     def |(other)
-        Inline.new(self,other) { |us,them,cursor,buffer,lookahead,hold|
-            "(#{us[cursor,buffer,true,hold]} ||
-              #{them[cursor,buffer,lookahead,hold]})"
-        }
+        Grammar { |e| e.alternation(self.to_proc, &other) }
     end
-    # Creates a new Grammar that matches +self+ followed by +other+.
-    # The resulting match list is a concatenation from the match lists
-    # from +self+ and +other+.
+    # Grammar that matches +self+ followed by +other+.
     def +(other)
-        Inline.new(self,other) { |us,them,cursor,buffer,lookahead|
-            "(#{us[cursor,buffer,lookahead,false]} &&
-              #{them[cursor,buffer,false,false]})"
-        }
-    end
-    # Generates a Grammar that matches when +self+ (in-place) and +other+.
-    def &(other)
-        Inline.new(self,other) { |us,them,cursor,buffer,lookahead,hold|
-            "(#{us[cursor,buffer,lookahead,true]} &&
-              #{them[cursor,buffer,lookahead,hold]})"
-        }
-    end
-    # Creates a new Grammar that matches +self+ replicated +multiplier+ times.
-    # +multiplier+ can be a Range to specify a variable multiplier.  The
-    # +multiplier+ just needs to responds to #=== to determine the min and
-    # max iterations.
-    def *(multiplier)
-        Inline.new(self,nil,multiplier) { |us,multiplier,cursor,buffer,lookahead|
-            Inline.var { |n,ret,look| "(
-                #{n} = -1
-                #{ret} = false
-                #{look} = #{lookahead}
-                while true
-                    if #{multiplier}===(#{n}+=1)
-                        if !#{ret}
-                            #{ret} = #{buffer}
-                            #{look} = true
-                        end
-                    else
-                        break(#{ret}) if #{ret}
-                    end
-                    #{us[cursor,buffer,look,false]} or break(#{ret})
-                    #{look} = false if !#{ret}
-                end
-            )" }
-        }
+        Grammar { |e| e.sequence(self.to_proc, &other) }
     end
-    # Creates a new zero-width Grammar that matches +self+.
+    # Zero-width Grammar that matches +self+ (discards results).
     def +@
-        Inline.new(self) { |us,cursor,buffer,lookahead,hold|
-            "(#{us[cursor,'DISCARD',lookahead,true]} && #{buffer})"
-        }
+        Grammar { |e| e.positive(&self) }
     end
-    # Creates a new zero-width Grammar that matches anything but +self+.
+    # Zero-width Grammar that matches anything but +self+ (discards results).
     def -@
-        Inline.new(self) { |us,cursor,buffer,lookahead,hold|
-            "(!#{us[cursor,'DISCARD',true,true]} ? #{buffer} :
-              !#{lookahead}&&raise(Error.new(cursor,'a negative syntatic predicate')))"
-        }
+        Grammar { |e| e.negative(&self) }
     end
-    # Returns a Grammar that as long as what follows doesn't match +self+, it
+    # Grammar that as long as what follows doesn't match +self+, it
     # matches to the next element.  Most useful for a single element Grammar.
     def ~
-        (-self)&ANY
+        -self + ANY
     end
-    # Creates a new Grammar that optionally matches +self+.
+    # Grammar that optionally matches +self+.
     def optional
-        self|NULL
-    end
-    # Matches a list of +self+ (plus possibly other stuff) one or more times.
-    # The arguments are an alternating list of optional terminators and
-    # separators.  Along with #list0 you should be able to describe any
-    # tail recursive grammar.  This is equivalent to this recursive Grammar:
-    #
-    #  Grammar.new { |g| a+(z|b+(y|...g)) }
-    #
-    # where a, b, ... are +self+ and the separators and z, y, ... are the
-    # terminators.
-    #
-    # When a terminator is +nil+, the next item is treated
-    # as optional (i.e. instead of a+(nil|g), a+(g|) is used).
-    #
-    # When there is a missing terminator at the end of +term_sep+ (and it is
-    # non-empty), the list is not allowed to stop at that point.
-    def list1(*term_sep)
-        term_sep.push(nil) if term_sep.empty?
-        term_sep.unshift(self)
-        Inline.new(*term_sep.compact) { |*args|
-            cursor,buffer,lookahead = args.slice!(-3,3)
-            Inline.var { |look,ret|
-                terminated = (term_sep.size&1).nonzero? || term_sep[-1]
-                code = "(
-                #{look} = #{lookahead}
-                #{terminated ? (ret=false;'') : "#{ret} = false"}
-                while true
-                    #{args[j=0][cursor,buffer,look,false]} or break(#{ret})
-                    #{look} = #{terminated ? false : true}
-                    #{terminated ? '' : "#{ret} = #{buffer}"}"
-                1.step(term_sep.size-1,2) { |i|
-                    if term_sep[i]
-                        code << "
-                        #{args[j+=1][cursor,buffer,true,false]} and break(#{buffer})"
-                        if i+1<term_sep.size
-                            code << "
-                            #{args[j+=1][cursor,buffer,false,false]} or break(false)"
-                        end
-                    elsif i+1<term_sep.size
-                        code << "
-                        #{args[j+=1][cursor,buffer,true,false]} or break(#{buffer})"
-                    end
-                }
-                code << "
-                end
-                )"
-            }
-        }
+        self | NULL
     end
-    # Matches a list of +self+ (plus possibly other stuff) zero or more times.
-    # The arguments are an alternating list of optional terminators and
-    # separators.  Along with #list1 you should be able to describe any
-    # tail recursive grammar.  This is equivalent to this recursive Grammar:
-    #
-    #  Grammar.new { |g| x|(a+(z|b+(y|...g))) }
-    #
-    # where a, b, ... are +self+ and the separators and z, y, ..., x are the
-    # terminators.
-    #
-    # When a terminator is +nil+/missing, the next item is treated
-    # as optional.
-    def list0(*term_sep)
-        term_sep.push(nil) if (term_sep.size&1).zero?
-        term_sep.unshift(self)
-        Inline.new(*term_sep.compact) { |*args|
-            cursor,buffer,lookahead = args.slice!(-3,3)
-            Inline.var { |look,ret|
-                code = "("
-                code << "
-                #{look} = #{lookahead}" if term_sep[-1]
-                code << "
-                while true"
-                j = -2
-                -1.step(term_sep.size-3,2) { |i|
-                    if term_sep[i]
-                        code << "
-                        #{args[j+=1][cursor,buffer,true,false]} and break(#{buffer})"
-                        if j.zero?
-                            code << "
-                            #{args[j+=1][cursor,buffer,look,false]} or break(false)
-                            #{look} = false"
-                        else
-                            code << "
-                            #{args[j+=1][cursor,buffer,false,false]} or break(false)"
-                        end
-                    else
-                        j += 1 if j==2
-                        code << "
-                        #{args[j+=1][cursor,buffer,true,false]} or break(#{buffer})"
-                    end
-                }
-                code << "
-                end)"
-            }
-        }
-    end
-    # Creates a new Grammar where the entire grammar is considered a
-    # part of the lookahead (instead of just the first element).
-    def lookahead
-        Inline.new(self) { |us,cursor,buffer,lookahead|
-            Inline.var { |branch| "(
-                #{branch} = #{buffer}.class.new
-                #{cursor}.pos? { begin
-                    #{us[cursor,branch,false]}
-                rescue Error => err
-                    raise(err) if !#{lookahead}
-                end } && #{buffer}.concat(#{branch})
-            )" }
-        }
-    end
-    # Creates a new Grammar where the match list of +self+ is filtered by
-    # some code.
-    # When a +klass+ is given, +klass+.new is used as the buffer to hold what
-    # will be passed to the code.  Otherwise this temporary buffer will come
-    # from buffer.class.new.
-    # If the block needs 1 argument, this temporary buffer will be passed
-    # and the block should return something that will be given to buffer.concat.
-    # If the block needs 2 arguments, the second argument will be the buffer
-    # and the block should do the concatenation.
-    # If there is no block, the temporary buffer is passed to buffer.concat
-    # directly.  Use this to get some isolation.
-    def filter(klass=nil,&code) # :yield: branch[, buffer]
-        if !code
-            if klass
-                Inline.new(self,nil,klass) { |us,klass,cursor,buffer,lookahead,hold|
-                    Inline.var { |branch| "(
-                        #{branch}=#{klass}.new
-                        #{us[cursor,branch,lookahead,hold]} &&
-                          #{buffer}.concat(#{branch})
-                    )"}
-                }
-            else
-                Inline.new(self) { |us,cursor,buffer,lookahead,hold|
-                    Inline.var { |branch| "(
-                        #{branch}=#{buffer}.class.new
-                        #{us[cursor,branch,lookahead,hold]} &&
-                          #{buffer}.concat(#{branch})
-                    )"}
-                }
-            end
-        elsif code.arity>=2
-            if klass
-                Inline.new(self,nil,klass,code) { |us,klass,code,cursor,buffer,lookahead,hold|
-                    Inline.var { |branch| "(
-                        #{branch}=#{klass}.new
-                        #{us[cursor,branch,lookahead,hold]} &&
-                          (#{code}[#{branch},#{buffer}]||
-                            raise(Error.new(cursor,'a filtered '+#{branch}.inspect)))
-                    )"}
-                }
-            else
-                Inline.new(self,nil,code) { |us,code,cursor,buffer,lookahead,hold|
-                    Inline.var { |branch| "(
-                        #{branch}=#{buffer}.class.new
-                        #{us[cursor,branch,lookahead,hold]} &&
-                          (#{code}[#{branch},#{buffer}]||
-                            raise(Error.new(cursor,'a filtered '+#{branch}.inspect)))
-                    )"}
-                }
-            end
+    # Grammar that matches a sequence of zero or more +self+ followed
+    # by an optional terminator (+term+).  If +term+ is given it takes
+    # precedence over matching +self+ items.
+    def repeat0(term=nil)
+        if term
+            Recurse { |g| term | self + g }
         else
-            if klass
-                Inline.new(self,nil,klass,code) { |us,klass,code,cursor,buffer,lookahead,hold|
-                    Inline.var { |branch| "(
-                        #{branch}=#{klass}.new
-                        #{us[cursor,branch,lookahead,hold]} &&
-                          #{buffer}.concat(#{code}[#{branch}]||
-                            raise(Error.new(cursor,'a filtered '+#{branch}.inspect)))
-                    )"}
-                }
-            else
-                Inline.new(self,nil,code) { |us,code,cursor,buffer,lookahead,hold|
-                    Inline.var { |branch| "(
-                        #{branch}=#{buffer}.class.new
-                        #{us[cursor,branch,lookahead,hold]} &&
-                          #{buffer}.concat(#{code}[#{branch}]||
-                            raise(Error.new(cursor,'a filtered '+#{branch}.inspect)))
-                    )"}
-                }
-            end
+            Recurse { |g| g + self | NULL }
         end
     end
-    # Returns a Grammar that discards the match list from +self+
-    def discard
-        Inline.new(self) { |us,cursor,buffer,lookahead,hold|
-            "(#{us[cursor,'DISCARD',lookahead,hold]}&&#{buffer})"
-        }
-    end
-    # Returns a Grammar that groups the match list from +self+.  A temporary
-    # buffer is formed just list #filter, but buffer.<< is used instead of
-    # buffer.concat.
-    def group(klass=nil)
-        if klass
-            Inline.new(self,nil,klass) { |us,klass,cursor,buffer,lookahead,hold|
-                Inline.var { |branch| "(
-                    #{branch}=#{klass}.new
-                    #{us[cursor,branch,lookahead,hold]} &&
-                      #{buffer}<<#{branch}
-                )"}
-            }
+    # Grammar that matches a sequence of one or more +self+ followed
+    # by an optional terminator (+term+).  If +term+ is given it takes
+    # precedence over matching +self+ items.
+    def repeat1(term=nil)
+        if term
+            Recurse { |g| self + (term | g) }
         else
-            Inline.new(self) { |us,cursor,buffer,lookahead,hold|
-                Inline.var { |branch| "(
-                    #{branch}=#{buffer}.class.new
-                    #{us[cursor,branch,lookahead,hold]} &&
-                      #{buffer}<<#{branch}
-                )"}
-            }
+            Recurse { |g| (g | NULL) + self }
         end
     end
-    # A Grammar that can flatten itself (with code strings) to reduce the
-    # amount of method calls needed while parsing.  This is tricky stuff.
-    # Will explain later.
-    class Inline < Grammar
-        def initialize(*objects,&block) # :yield: cursor,buffer,lookahead[,hold]
-            @objects = objects
-            @block = block
-        end
-        Arg_names = %w(cursor buffer lookahead)
-        def scan(cursor,buffer=[],lookahead=false) # :nodoc:
-            (class << self;self;end).class_eval(
-                "def scan(cursor,buffer=[],lookahead=false)\n"+
-                    scanner(*(_leaf_names+Arg_names+[false]))+
-                "\nend"
-            )
-            scan(cursor,buffer,lookahead)
-        end
-        def check(cursor,buffer=[],lookahead=false) # :nodoc:
-            (class << self;self;end).class_eval(
-                "def check(cursor,buffer=[],lookahead=false)\n"+
-                    scanner(*(_leaf_names+Arg_names+[true]))+
-                "\nend"
-            )
-            check(cursor,buffer,lookahead)
-        end
-        def scanner(*leaves_args) # :nodoc:
-            objects = _extractors.map { |e| e[leaves_args] }
-            args = objects+leaves_args
-            if @block.arity<args.size and args.slice!(-1)
-                "#{leaves_args[0]}.pos{#{@block.call(*args)}}"
-            else
-                @block.call(*args)
-            end
-        end
-        def leaves # :nodoc:
-            @_ or begin
-                @_ = []
-                @extractors = []
-                @objects.inject(false) { |leaf,object|
-                    if leaf
-                        @_ << object
-                        @extractors << lambda { |leaves_args|
-                            leaves_args.slice!(0)
-                        }
-                        true
-                    elsif !object
-                        true
-                    elsif false
-                        # enable this code to disable code flattening
-                        @_ << object
-                        @extractors << lambda { |leaves_args|
-                            g = leaves_args.slice!(0)
-                            lambda { |*args|
-                                "#{g}.#{args.slice!(-1) ? 'check' : 'scan'}(#{args.join(',')})"
-                            }
-                        }
-                        false
-                    else
-                        leaves = object.leaves
-                        @_.concat(leaves)
-                        n = leaves.size
-                        @extractors << lambda { |leaves_args|
-                            leaf_names = leaves_args.slice!(0,n)
-                            lambda { |*args| object.scanner(*(leaf_names+args)) }
-                        }
-                        false
+    # Grammar that matches +self+ replicated +multiplier+ times.
+    # +multiplier+ can be a Range to specify a variable multiplier.  The
+    # +multiplier+ just needs to responds to #=== to determine the min and
+    # max iterations.
+    def *(mult)
+        Common { |e|
+            Variables(0) { |i|
+                case mult
+                when Fixnum
+                    start = Check { e[mult].equal?(i << i[] + e[1]) }
+                    inside = Fail()
+                when Range
+                    start = case (range0=mult.begin)
+                    when Fixnum; Check { e[range0].equal?(i << i[] + e[1]) }
+                    else; Check { e[range0] <= (i << i[] + e[1]) }
                     end
-                }
-                remove_instance_variable(:@objects)
-                @_
-            end
-        end
-        def _extractors # :nodoc:
-            @extractors or (leaves;@extractors)
-        end
-        def _leaf_names # :nodoc:
-            (0...leaves.size).map { |i| "@_[#{i}]" }
-        end
-        def inspect # :nodoc:
-            to_s[0..-2].concat(" #{scanner(*(leaves+Arg_names+[false]))}>")
-        end
-        @@symbol = "_0".to_sym
-        # used for generating "local" variable names
-        def self.var(&block)
-            critical0 = Thread.critical
-            Thread.critical = true
-            if block
-                begin
-                    symbol = @@symbol
-                    symbols = []
-                    block.arity.times {
-                        symbols << @@symbol
-                        @@symbol = @@symbol.to_s.succ.to_sym
-                    }
-                    # this better not need other threads - critical section
-                    yield(*symbols)
-                ensure
-                    @@symbol = symbol
-                end
-            else
-                begin
-                    @@symbol
-                ensure
-                    @@symbol = @@symbol.to_s.succ.to_sym
+                    range1 = mult.end
+                    mult.exclude_end? or
+                        range1 = begin;range1.succ;rescue;range1+1;end
+                    inside = case range1
+                    when Fixnum; Check { e.not(e[range1].equal?(i << i[] + e[1])) }
+                    when 1.0/0; NULL
+                    else; Check { e[range1] > (i << i[] + e[1]) }
+                    end
+                else
+                    start = inside = Check { e[mult] === (i << i[] + e[1]) }
                 end
-            end
-        ensure
-            Thread.critical = critical0
-        end
+                tail = Recurse { |l| l + inside + self | NULL }
+                ((mult===0) ? tail : Recurse { |r| self + (start + tail | r) })
+            }
+        }
     end
-    # A Grammar that matches using arbitrary code
-    class Code < Inline
-        def initialize(&code) # :yield: cursor,buffer,lookahead
-            if code.arity<4
-                super(nil,code) { |code,cursor,buffer,lookahead|
-                    "#{code}[#{cursor},#{buffer},#{lookahead}]"
-                }
-            else
-                super(nil,code) { |code,cursor,buffer,lookahead,hold|
-                    "#{code}[#{cursor},#{buffer},#{lookahead},#{hold}]"
-                }
-            end
-        end
+    # Grammar that redirects parsing results of +self+ to a
+    # +buf0+.clone and yields the resulting buffer and possibly the engine
+    # afterwards.
+    def redirect(buf0, &block) # :yield: buf[, engine]
+        Grammar { |e| e.redirect(self.to_proc, buf0, &block) }
     end
-    # Lookup grammar from next token.  Need to doc.
-    class Lookup < Grammar
-        def initialize(lookup)
-            @lookup = lookup
-        end
-        def scan(cursor,buffer=[],lookahead=false) # :nodoc:
-            v = cursor.read1next
-            if grammar = @lookup[v]
-                buffer << v
-                grammar.scan(cursor,buffer,false)
-            else
-                raise(Error.new(cursor,"no grammar for #{v} found in #{@lookup}"))
-            end
-        end
+    # Grammar that discards parsing results of +self+ and afterwards
+    # yields the engine to the optional block which should return something
+    # to be appended to the output.
+    def discard(&block) # :yield: engine
+        Grammar { |e| e.discard(self.to_proc, &block) }
     end
-    class LookupAhead < Grammar
-        def initialize(lookup)
-            @lookup = lookup
-        end
-        def scan(cursor,buffer=[],lookahead=false) # :nodoc:
-            v = cursor.read1after
-            if grammar = @lookup[v]
-                grammar.scan(cursor,buffer,false)
-            elsif lookahead
-                false
-            else
-                raise(Error.new(cursor,"no grammar for #{v} found in #{@lookup}"))
-            end
-        end
+    # Grammar that redirects parsing results of +self+ to a
+    # +buf0+.clone and yields the resulting buffer and possibly the engine
+    # afterwards to an optional block which should return something to be
+    # appended to the output.
+    def group(buf0, &block) # :yield: buf[, engine]
+        block_given? ? redirect(buf0) { |buf, e|
+            e << (block.arity==1 ? yield(buf) : yield(buf, e))
+        } : redirect(buf0) { |buf, e|
+            e << buf
+        }
     end
-    # Grammar that matches to a sequence.  An object responding to #[index]
-    # (i.e. String/Array) is used to represent this sequence.  Each element
-    # returned by #[] should respond to #== to compare each element in the
-    # sequence.
-    class Sequence < Grammar
-        def initialize(value,partial=false)
-            @value = value
-            @partial = partial
-        end
-        def scan(cursor,buffer=[],lookahead=false) # :nodoc:
-            i = cursor.scan(@value,false,false,buffer)
-            if !i
-                if lookahead
-                    false
-                else
-                    raise(Error.new(cursor,@value[0]))
-                end
-            elsif !@partial and i<0
-                raise(Error.new(cursor,@value[-i]))
-            else
-                buffer
-            end
-        end
-        def inspect
-            "#{self.class}.new(#{@value.inspect},#{@partial.inspect})"
-        end
-        def to_s
-            inspect
-        end
+    # not sure if this is needed or wanted right now
+    def backref(&block) # :nodoc: :yield: n[, engine]
+        Grammar { |e| e.backref(self.to_proc, &block) }
     end
-    # Grammar that matches elements until it finds a specific sequence.
-    # Compare to IO#gets.
-    class SequenceUntil < Grammar
-        def initialize(value,allow_eof=false)
-            @value = value
-            @allow_eof = allow_eof
-        end
-        def scan(cursor,buffer=[],lookahead=false) # :nodoc:
-            len,i = cursor.scan_until(@value,false,false,buffer)
-            if !len
-                if lookahead
-                    false
-                else
-                    raise(Error.new(cursor,@value[0]))
-                end
-            elsif !@allow_eof and len.nonzero? and i<=0
-                raise(Error.new(cursor,@value[-i]))
-            else
-                buffer
-            end
-        end
+    # Grammar that matches +self+, but backtracks when it
+    # fails instead of raising an error.
+    def backtrack(len=nil)
+        Grammar { |e| e.backtrack(self.to_proc, len) }
     end
-    # Grammar that matches to a single element.  An object responding to #==
-    # is used to do the matching.
-    class Element < Inline
-        def initialize(value)
-            super(nil,value) { |value,cursor,buffer,lookahead,hold|
-                condition = hold ?
-                    "#{value}==(v=#{cursor}.read1after)" :
-                    "(v=#{cursor}.scan1next(#{value}))"
-                "(#{condition} ? " +
-                    "#{buffer} << v : " +
-                    "!#{lookahead}&&raise(Error.new(#{cursor},#{value})))"
-            }
-        end
+    # Grammar that uses a looped +self+ as a lexer to generate tokens
+    # for +parser+ which sends its results to the output.  +buf0+.clone is used
+    # hold tokens between the lexer and the parser.
+    def supply(parser, buf0, &block) # :yield: buf[, engine]
+        Grammar { |e|
+            e.supply(self.to_proc, parser.to_proc, buf0, &block)
+        }
     end
-    # Grammar that always fails (with a +message+)
-    class Fail < Inline
-        def initialize(message)
-            super { |cursor,buffer,lookahead|
-                "!#{lookahead}&&raise(Error.new(cursor,#{message.inspect}))"
-            }
-        end
+    # Grammar that uses +self+ as a lexer to generate tokens
+    # for +parser+ which sends its results to the output.  +buf0+.clone is used
+    # hold tokens between the lexer and the parser.
+    def pipe(parser, buf0, len=nil, &block) # :yield: buf[, engine]
+        Grammar { |e|
+            e.pipe(self.to_proc, parser.to_proc, buf0, len, &block)
+        }
     end
-    # Grammar that matches any single element
-    ANY = Inline.new { |cursor,buffer,lookahead,hold|
-        "((v=#{cursor}.read1#{hold ? 'after' : 'next'}) ? " +
-            "#{buffer} << v : " +
-            "!#{lookahead}&&raise(Error.new(#{cursor},'any element')))"
-    }
-    # Grammar that always passes and matches nothing
-    NULL = Inline.new { |_,buffer,_,_| "#{buffer}" }
-    # Grammar that matches the end-of-file (or end-of-cursor)
-    EOF = Inline.new { |cursor,buffer,_,_|
-        "(!#{cursor}.skip1after&&#{buffer})"
-    }
+    # include this somewhere to have access to methods that
+    module Molecules
-    # Exception class for handling Grammar errors
-    class Error < RuntimeError
-        attr_accessor(:cursor,:expected,:found)
-        def initialize(cursor=nil,expected=nil,found=nil)
-            @cursor = cursor
-            @expected = expected
-            @found = found
+        # Eliminate the need for the ".new".
+        # Would be better if objects were callable so we wouldn't need this.
+        def Grammar(&block)
+            Grammar.new(&block)
         end
-        def to_s
-            err = [super]
-            err << "expected #{@expected.inspect}" if @expected
-            err << "found #{@found.inspect}" if @found
-            begin
-                #err << @cursor.to_s if @cursor
-            rescue
-            end
-            err * ", "
-        end
-    end
-    # :stopdoc:
-    # Parse buffer that throws out everything
-    DISCARD = Class.new {
-        def concat(v);self;end
-        def << (v);self;end
-        define_method(:class) do;self;end # using "def class" messed up rdoc
-        def new;self;end
-    }.new
-    # :startdoc:
-end
+        # Grammar that matches to a single element.  An object responding to #===
+        # is used to do the matching.
+        def Element(pattern)
+            Grammar { |e| e.match(pattern) }
+        end
+        alias_method(:E, :Element)
-class Cursor
-    # A Cursor that gets its data from a producer Thread.  This Thread is
-    # generated from the block given (passed +self+).  The code in this
-    # block is expected to apply the << and concat methods to the argument
-    # given.  The current Thread is the consumer.
-    #
-    # Unfortunately, this Cursor isn't full-featured (yet).  It is not
-    # reversable.  This will one day be reversable #pos*.
-    class Producer < Cursor
-        def initialize(max_size=16,&producer)
-            @buffer = []
-            @size = 0
-            @max_size = max_size
-            @consumer = Thread.current
-            @producer = Thread.new { producer[self] }
-        end
-        def new_data
-            []
-        end
-        def read1next
-            while (Thread.critical=true;@buffer.empty?&&@producer.alive?)
-                Thread.critical = false
-                @producer.run
-            end
-            v = @buffer.shift
-            @size -= 1
-            v
-        ensure
-            Thread.critical = false
-        end
-        def read1after
-            v = read1next
-            unless v.nil?;begin
-                Thread.critical = true
-                @buffer.unshift(v)
-            ensure
-                Thread.critical = false
-            end;end
-            v
-        end
-        def skip1after
-            read1after.nil? ? nil : true
+        # Grammar that matches the elements in +pattern_sequence+.  +Element+
+        # is used for each pattern in +pattern_sequence+.  Starting from index
+        # 0 #[] is used to access +pattern_sequence+ until it returns +nil+.
+        def Chain(pattern_sequence)
+            p = pattern_sequence[0] or return NULL
+            g = E(p)
+            i = 0
+            g += E(p) while p = pattern_sequence[i+=1]
+            g
         end
-        def scan1next(v)
-            v0 = read1next
-            (v0.nil? || v==v0) ? v0 : begin
-                Thread.critical = true
-                @buffer.unshift(v0)
-                nil
-            ensure
-                Thread.critical = false
-            end
+        # Grammar that always fails (with a +message+)
+        def Fail(message=nil)
+            Grammar { |e| e.failure(message) }
+        end
+        # Grammar that shares/uses one or more variables.  Optional initial
+        # values can be given for the variables.  The block should take
+        # variable reference objects (one or more) and should return a
+        # Grammar that uses the variables in action blocks.  Use var#[] to
+        # get the value in a variable reference object and var#<< to set the
+        # value.
+        def Variables(*vals, &block) # :yield: *vars
+            Grammar { |e|
+                e.variables(block.arity) { |*vars|
+                    init = []
+                    vals.each_with_index { |val, i|
+                        init << (vars[i] << e[val,true])
+                    }
+                    init << e[true]
+                    Grammar { e.steps(*init) } + yield(*vars)
+                }
+            }
         end
-        def << (v)
-            while (Thread.critical=true;@size>=@max_size&&@consumer.alive?)
-                Thread.critical = false
-                @consumer.run
-            end
-            @buffer << v
-            @size += 1
-            self
-        ensure
-            Thread.critical = false
+        # Grammar that handles recursion.  +inner+ represents a call back to
+        # the resulting Grammar.  An +inner+ may be given or it will be
+        # automatically generated (as an empty/invalid Grammar).  +inner+
+        # is yielded to the block which should return the resulting Grammar
+        # (and be based on +inner+).  Middle, right, and left recursion should
+        # be handled by the engine properly, but there may be restrictions on
+        # left recursion (i.e. must be the very first thing in the resulting
+        # Grammar).
+        def Recurse(inner=Grammar()) # :yield: inner
+            outer = yield(inner)
+            Grammar { |e| e.recurse(inner, &outer) }
         end
-        def concat(value)
-            i = 0
-            until (v = value[i]).nil?
-                self << v
-            end
-            self
+        # Grammar that fails with a message when the Grammar block doesn't
+        # pass.
+        def Check(message=nil, &block) # :yield: engine
+            Grammar(&block) | Fail(message)
+        end
+        # Grammar that yields an engine to a Grammar block and expects that
+        # the result always passes.
+        def Always(&block) # :yield: engine
+            Grammar { |e| e.always(&block) }
+        end
+        # Grammar that is the result of yielding an engine.  This adds a
+        # convenience so that action blocks don't need to receive the engine.
+        def Common # :yield: engine
+            Grammar { |e| yield(e)[e] }
+        end
+        # Grammar that modifies the output buffer.  The current buffer and
+        # optionally the engine are yielded to a block which should return
+        # what the new output buffer should be.
+        # WARNING: only use this inside of a Grammar where #group or
+        # #redirect has been applied.  It probably won't work as expected in
+        # other places.
+        def Output(&block) # :yield: buf[, engine]
+            Grammar { |e| e.output(&block) }
         end
+        # Zero-width Grammar that always passes and matches nothing
+        NULL = Grammar.new { |e| e[true] }
+        # Zero-width Grammar that matches the end-of-file (or end-of-input)
+        EOF = Grammar.new { |e| e.eof }
+        # Grammar that matches any single element (not EOF)
+        ANY = Grammar.new { |e| e.any }
     end
+    include Molecules
 end