RubyGems - grammar - Versions diffs - 0.5 - Mend

grammar 0.5

Files changed (7) hide show

data/lib/grammar.rb ADDED

@@ -0,0 +1,692 @@
+#!/bin/env ruby
+# = grammar.rb - specify BNF-like grammar directly in Ruby
+# $Id: grammar.rb,v 1.1 2005/10/13 23:58:45 eric_mahurin Exp $
+# Author::  Eric Mahurin (Eric under Mahurin at yahoo dot com)
+# License:: Ruby license
+# Home::    http://rubyforge.org/projects/grammar
+# This base class defines common operators to the derived Grammar classes to
+# make specifying the Grammar look similar to BNF.  This base class also serves
+# the purpose of handling recursion in the Grammar.
+class Grammar
+    class << self
+        alias_method(:[],:new)
+        # With several interlocking recursive grammars, this can be used.
+        # For each argument that the block needs, an empty Grammar is
+        # given.  The result of the block should be an Array of the final
+        # grammars for those arguments.
+        def multiple(&block) # :yield: *recursive_grammars
+            grammars = (1..block.arity).map { self.new }
+            grammars.zip(yield(*grammars)) { |g,g1| g << g1 }
+            grammars
+        end
+    end
+    # Creates a Grammar from another +grammar+.  If +grammar+ is not given
+    # and a block is instead, the block is passed +self+ (to handle recursion)
+    # and the resulting grammar from this block will be used.
+    def initialize(grammar=nil,&block) # :yield: +self+
+        @grammar = grammar || block && yield(self)
+    end
+    # Reinitialize with another Grammar.  This will be needed for recursion
+    # unless the block form of new is used.
+    def << (*args)
+        initialize(*args)
+    end
+    # Match to elements at a Cursor while advancing.  When matched, a parse
+    # buffer is returned.  Instead of an empty Array, the seed to this parse buffer
+    # can be given by +buffer+ which should respond to #concat and #<< like Array.
+    # When a mismatch occurs several possibilities exist.  If +lookahead+ and
+    # the Grammar is within its lookahead (defaults one element/token - can be
+    # controlled by #lookahead), the cursor is moved back to where it started and
+    # +false+ is returned.  Otherwise an exception describing the mismatch is
+    # raised.
+    def scan(cursor,buffer=[],lookahead=false)
+        @grammar.scan(cursor,buffer,lookahead)
+    end
+    # Same as #scan except the +cursor+ is held in place
+    def check(cursor,buffer=[],lookahead=false)
+        cursor.pos { (@grammar||self).scan(cursor,buffer,lookahead) }
+    end
+    def scanner(me,cursor,buffer,lookahead,hold) # :nodoc:
+        hold ?
+            "#{me}.check(#{cursor},#{buffer},#{lookahead})" :
+            "#{me}.scan(#{cursor},#{buffer},#{lookahead})"
+    end
+    def leaves # :nodoc:
+        [@grammar||self]
+    end
+    # Creates a new Grammar that matches +self+ or +other+ if that fails.
+    def |(other)
+        Inline.new(self,other) { |us,them,cursor,buffer,lookahead,hold|
+            "(#{us[cursor,buffer,true,hold]} ||
+              #{them[cursor,buffer,lookahead,hold]})"
+        }
+    end
+    # Creates a new Grammar that matches +self+ followed by +other+.
+    # The resulting match list is a concatenation from the match lists
+    # from +self+ and +other+.
+    def +(other)
+        Inline.new(self,other) { |us,them,cursor,buffer,lookahead|
+            "(#{us[cursor,buffer,lookahead,false]} &&
+              #{them[cursor,buffer,false,false]})"
+        }
+    end
+    # Generates a Grammar that matches when +self+ (in-place) and +other+.
+    def &(other)
+        Inline.new(self,other) { |us,them,cursor,buffer,lookahead,hold|
+            "(#{us[cursor,buffer,lookahead,true]} &&
+              #{them[cursor,buffer,lookahead,hold]})"
+        }
+    end
+    # Creates a new Grammar that matches +self+ replicated +multiplier+ times.
+    # +multiplier+ can be a Range to specify a variable multiplier.  The
+    # +multiplier+ just needs to responds to #=== to determine the min and
+    # max iterations.
+    def *(multiplier)
+        Inline.new(self,nil,multiplier) { |us,multiplier,cursor,buffer,lookahead|
+            Inline.var { |n,ret,look| "(
+                #{n} = -1
+                #{ret} = false
+                #{look} = #{lookahead}
+                while true
+                    if #{multiplier}===(#{n}+=1)
+                        if !#{ret}
+                            #{ret} = #{buffer}
+                            #{look} = true
+                        end
+                    else
+                        break(#{ret}) if #{ret}
+                    end
+                    #{us[cursor,buffer,look,false]} or break(#{ret})
+                    #{look} = false if !#{ret}
+                end
+            )" }
+        }
+    end
+    # Creates a new zero-width Grammar that matches +self+.
+    def +@
+        Inline.new(self) { |us,cursor,buffer,lookahead,hold|
+            "(#{us[cursor,'DISCARD',lookahead,true]} && #{buffer})"
+        }
+    end
+    # Creates a new zero-width Grammar that matches anything but +self+.
+    def -@
+        Inline.new(self) { |us,cursor,buffer,lookahead,hold|
+            "(!#{us[cursor,'DISCARD',true,true]} ? #{buffer} :
+              !#{lookahead}&&raise(Error.new(cursor,'a negative syntatic predicate')))"
+        }
+    end
+    # Returns a Grammar that as long as what follows doesn't match +self+, it
+    # matches to the next element.  Most useful for a single element Grammar.
+    def ~
+        (-self)&ANY
+    end
+    # Creates a new Grammar that optionally matches +self+.
+    def optional
+        self|NULL
+    end
+    # Matches a list of +self+ (plus possibly other stuff) one or more times.
+    # The arguments are an alternating list of optional terminators and
+    # separators.  Along with #list0 you should be able to describe any
+    # tail recursive grammar.  This is equivalent to this recursive Grammar:
+    #
+    #  Grammar.new { |g| a+(z|b+(y|...g)) }
+    #
+    # where a, b, ... are +self+ and the separators and z, y, ... are the
+    # terminators.
+    #
+    # When a terminator is +nil+, the next item is treated
+    # as optional (i.e. instead of a+(nil|g), a+(g|) is used).
+    #
+    # When there is a missing terminator at the end of +term_sep+ (and it is
+    # non-empty), the list is not allowed to stop at that point.
+    def list1(*term_sep)
+        term_sep.push(nil) if term_sep.empty?
+        term_sep.unshift(self)
+        Inline.new(*term_sep.compact) { |*args|
+            cursor,buffer,lookahead = args.slice!(-3,3)
+            Inline.var { |look,ret|
+                terminated = (term_sep.size&1).nonzero? || term_sep[-1]
+                code = "(
+                #{look} = #{lookahead}
+                #{terminated ? (ret=false;'') : "#{ret} = false"}
+                while true
+                    #{args[j=0][cursor,buffer,look,false]} or break(#{ret})
+                    #{look} = #{terminated ? false : true}
+                    #{terminated ? '' : "#{ret} = #{buffer}"}"
+                1.step(term_sep.size-1,2) { |i|
+                    if term_sep[i]
+                        code << "
+                        #{args[j+=1][cursor,buffer,true,false]} and break(#{buffer})"
+                        if i+1<term_sep.size
+                            code << "
+                            #{args[j+=1][cursor,buffer,false,false]} or break(false)"
+                        end
+                    elsif i+1<term_sep.size
+                        code << "
+                        #{args[j+=1][cursor,buffer,true,false]} or break(#{buffer})"
+                    end
+                }
+                code << "
+                end
+                )"
+            }
+        }
+    end
+    # Matches a list of +self+ (plus possibly other stuff) zero or more times.
+    # The arguments are an alternating list of optional terminators and
+    # separators.  Along with #list1 you should be able to describe any
+    # tail recursive grammar.  This is equivalent to this recursive Grammar:
+    #
+    #  Grammar.new { |g| x|(a+(z|b+(y|...g))) }
+    #
+    # where a, b, ... are +self+ and the separators and z, y, ..., x are the
+    # terminators.
+    #
+    # When a terminator is +nil+/missing, the next item is treated
+    # as optional.
+    def list0(*term_sep)
+        term_sep.push(nil) if (term_sep.size&1).zero?
+        term_sep.unshift(self)
+        Inline.new(*term_sep.compact) { |*args|
+            cursor,buffer,lookahead = args.slice!(-3,3)
+            Inline.var { |look,ret|
+                code = "("
+                code << "
+                #{look} = #{lookahead}" if term_sep[-1]
+                code << "
+                while true"
+                j = -2
+                -1.step(term_sep.size-3,2) { |i|
+                    if term_sep[i]
+                        code << "
+                        #{args[j+=1][cursor,buffer,true,false]} and break(#{buffer})"
+                        if j.zero?
+                            code << "
+                            #{args[j+=1][cursor,buffer,look,false]} or break(false)
+                            #{look} = false"
+                        else
+                            code << "
+                            #{args[j+=1][cursor,buffer,false,false]} or break(false)"
+                        end
+                    else
+                        j += 1 if j==2
+                        code << "
+                        #{args[j+=1][cursor,buffer,true,false]} or break(#{buffer})"
+                    end
+                }
+                code << "
+                end)"
+            }
+        }
+    end
+    # Creates a new Grammar where the entire grammar is considered a
+    # part of the lookahead (instead of just the first element).
+    def lookahead
+        Inline.new(self) { |us,cursor,buffer,lookahead|
+            Inline.var { |branch| "(
+                #{branch} = #{buffer}.class.new
+                #{cursor}.pos? { begin
+                    #{us[cursor,branch,false]}
+                rescue Error => err
+                    raise(err) if !#{lookahead}
+                end } && #{buffer}.concat(#{branch})
+            )" }
+        }
+    end
+    # Creates a new Grammar where the match list of +self+ is filtered by
+    # some code.
+    # When a +klass+ is given, +klass+.new is used as the buffer to hold what
+    # will be passed to the code.  Otherwise this temporary buffer will come
+    # from buffer.class.new.
+    # If the block needs 1 argument, this temporary buffer will be passed
+    # and the block should return something that will be given to buffer.concat.
+    # If the block needs 2 arguments, the second argument will be the buffer
+    # and the block should do the concatenation.
+    # If there is no block, the temporary buffer is passed to buffer.concat
+    # directly.  Use this to get some isolation.
+    def filter(klass=nil,&code) # :yield: branch[, buffer]
+        if !code
+            if klass
+                Inline.new(self,nil,klass) { |us,klass,cursor,buffer,lookahead,hold|
+                    Inline.var { |branch| "(
+                        #{branch}=#{klass}.new
+                        #{us[cursor,branch,lookahead,hold]} &&
+                          #{buffer}.concat(#{branch})
+                    )"}
+                }
+            else
+                Inline.new(self) { |us,cursor,buffer,lookahead,hold|
+                    Inline.var { |branch| "(
+                        #{branch}=#{buffer}.class.new
+                        #{us[cursor,branch,lookahead,hold]} &&
+                          #{buffer}.concat(#{branch})
+                    )"}
+                }
+            end
+        elsif code.arity>=2
+            if klass
+                Inline.new(self,nil,klass,code) { |us,klass,code,cursor,buffer,lookahead,hold|
+                    Inline.var { |branch| "(
+                        #{branch}=#{klass}.new
+                        #{us[cursor,branch,lookahead,hold]} &&
+                          (#{code}[#{branch},#{buffer}]||
+                            raise(Error.new(cursor,'a filtered '+#{branch}.inspect)))
+                    )"}
+                }
+            else
+                Inline.new(self,nil,code) { |us,code,cursor,buffer,lookahead,hold|
+                    Inline.var { |branch| "(
+                        #{branch}=#{buffer}.class.new
+                        #{us[cursor,branch,lookahead,hold]} &&
+                          (#{code}[#{branch},#{buffer}]||
+                            raise(Error.new(cursor,'a filtered '+#{branch}.inspect)))
+                    )"}
+                }
+            end
+        else
+            if klass
+                Inline.new(self,nil,klass,code) { |us,klass,code,cursor,buffer,lookahead,hold|
+                    Inline.var { |branch| "(
+                        #{branch}=#{klass}.new
+                        #{us[cursor,branch,lookahead,hold]} &&
+                          #{buffer}.concat(#{code}[#{branch}]||
+                            raise(Error.new(cursor,'a filtered '+#{branch}.inspect)))
+                    )"}
+                }
+            else
+                Inline.new(self,nil,code) { |us,code,cursor,buffer,lookahead,hold|
+                    Inline.var { |branch| "(
+                        #{branch}=#{buffer}.class.new
+                        #{us[cursor,branch,lookahead,hold]} &&
+                          #{buffer}.concat(#{code}[#{branch}]||
+                            raise(Error.new(cursor,'a filtered '+#{branch}.inspect)))
+                    )"}
+                }
+            end
+        end
+    end
+    # Returns a Grammar that discards the match list from +self+
+    def discard
+        Inline.new(self) { |us,cursor,buffer,lookahead,hold|
+            "(#{us[cursor,'DISCARD',lookahead,hold]}&&#{buffer})"
+        }
+    end
+    # Returns a Grammar that groups the match list from +self+.  A temporary
+    # buffer is formed just list #filter, but buffer.<< is used instead of
+    # buffer.concat.
+    def group(klass=nil)
+        if klass
+            Inline.new(self,nil,klass) { |us,klass,cursor,buffer,lookahead,hold|
+                Inline.var { |branch| "(
+                    #{branch}=#{klass}.new
+                    #{us[cursor,branch,lookahead,hold]} &&
+                      #{buffer}<<#{branch}
+                )"}
+            }
+        else
+            Inline.new(self) { |us,cursor,buffer,lookahead,hold|
+                Inline.var { |branch| "(
+                    #{branch}=#{buffer}.class.new
+                    #{us[cursor,branch,lookahead,hold]} &&
+                      #{buffer}<<#{branch}
+                )"}
+            }
+        end
+    end
+    # A Grammar that can flatten itself (with code strings) to reduce the
+    # amount of method calls needed while parsing.  This is tricky stuff.
+    # Will explain later.
+    class Inline < Grammar
+        def initialize(*objects,&block) # :yield: cursor,buffer,lookahead[,hold]
+            @objects = objects
+            @block = block
+        end
+        Arg_names = %w(cursor buffer lookahead)
+        def scan(cursor,buffer=[],lookahead=false) # :nodoc:
+            (class << self;self;end).class_eval(
+                "def scan(cursor,buffer=[],lookahead=false)\n"+
+                    scanner(*(_leaf_names+Arg_names+[false]))+
+                "\nend"
+            )
+            scan(cursor,buffer,lookahead)
+        end
+        def check(cursor,buffer=[],lookahead=false) # :nodoc:
+            (class << self;self;end).class_eval(
+                "def check(cursor,buffer=[],lookahead=false)\n"+
+                    scanner(*(_leaf_names+Arg_names+[true]))+
+                "\nend"
+            )
+            check(cursor,buffer,lookahead)
+        end
+        def scanner(*leaves_args) # :nodoc:
+            objects = _extractors.map { |e| e[leaves_args] }
+            args = objects+leaves_args
+            if @block.arity<args.size and args.slice!(-1)
+                "#{leaves_args[0]}.pos{#{@block.call(*args)}}"
+            else
+                @block.call(*args)
+            end
+        end
+        def leaves # :nodoc:
+            @_ or begin
+                @_ = []
+                @extractors = []
+                @objects.inject(false) { |leaf,object|
+                    if leaf
+                        @_ << object
+                        @extractors << lambda { |leaves_args|
+                            leaves_args.slice!(0)
+                        }
+                        true
+                    elsif !object
+                        true
+                    elsif false
+                        # enable this code to disable code flattening
+                        @_ << object
+                        @extractors << lambda { |leaves_args|
+                            g = leaves_args.slice!(0)
+                            lambda { |*args|
+                                "#{g}.#{args.slice!(-1) ? 'check' : 'scan'}(#{args.join(',')})"
+                            }
+                        }
+                        false
+                    else
+                        leaves = object.leaves
+                        @_.concat(leaves)
+                        n = leaves.size
+                        @extractors << lambda { |leaves_args|
+                            leaf_names = leaves_args.slice!(0,n)
+                            lambda { |*args| object.scanner(*(leaf_names+args)) }
+                        }
+                        false
+                    end
+                }
+                remove_instance_variable(:@objects)
+                @_
+            end
+        end
+        def _extractors # :nodoc:
+            @extractors or (leaves;@extractors)
+        end
+        def _leaf_names # :nodoc:
+            (0...leaves.size).map { |i| "@_[#{i}]" }
+        end
+        def inspect # :nodoc:
+            to_s[0..-2].concat(" #{scanner(*(leaves+Arg_names+[false]))}>")
+        end
+        @@symbol = "_0".to_sym
+        # used for generating "local" variable names
+        def self.var(&block)
+            critical0 = Thread.critical
+            Thread.critical = true
+            if block
+                begin
+                    symbol = @@symbol
+                    symbols = []
+                    block.arity.times {
+                        symbols << @@symbol
+                        @@symbol = @@symbol.to_s.succ.to_sym
+                    }
+                    # this better not need other threads - critical section
+                    yield(*symbols)
+                ensure
+                    @@symbol = symbol
+                end
+            else
+                begin
+                    @@symbol
+                ensure
+                    @@symbol = @@symbol.to_s.succ.to_sym
+                end
+            end
+        ensure
+            Thread.critical = critical0
+        end
+    end
+    # A Grammar that matches using arbitrary code
+    class Code < Inline
+        def initialize(&code) # :yield: cursor,buffer,lookahead
+            if code.arity<4
+                super(nil,code) { |code,cursor,buffer,lookahead|
+                    "#{code}[#{cursor},#{buffer},#{lookahead}]"
+                }
+            else
+                super(nil,code) { |code,cursor,buffer,lookahead,hold|
+                    "#{code}[#{cursor},#{buffer},#{lookahead},#{hold}]"
+                }
+            end
+        end
+    end
+    # Lookup grammar from next token.  Need to doc.
+    class Lookup < Grammar
+        def initialize(lookup)
+            @lookup = lookup
+        end
+        def scan(cursor,buffer=[],lookahead=false) # :nodoc:
+            v = cursor.read1next
+            if grammar = @lookup[v]
+                buffer << v
+                grammar.scan(cursor,buffer,false)
+            else
+                raise(Error.new(cursor,"no grammar for #{v} found in #{@lookup}"))
+            end
+        end
+    end
+    class LookupAhead < Grammar
+        def initialize(lookup)
+            @lookup = lookup
+        end
+        def scan(cursor,buffer=[],lookahead=false) # :nodoc:
+            v = cursor.read1after
+            if grammar = @lookup[v]
+                grammar.scan(cursor,buffer,false)
+            elsif lookahead
+                false
+            else
+                raise(Error.new(cursor,"no grammar for #{v} found in #{@lookup}"))
+            end
+        end
+    end
+    # Grammar that matches to a sequence.  An object responding to #[index]
+    # (i.e. String/Array) is used to represent this sequence.  Each element
+    # returned by #[] should respond to #== to compare each element in the
+    # sequence.
+    class Sequence < Grammar
+        def initialize(value,partial=false)
+            @value = value
+            @partial = partial
+        end
+        def scan(cursor,buffer=[],lookahead=false) # :nodoc:
+            i = cursor.scan(@value,false,false,buffer)
+            if !i
+                if lookahead
+                    false
+                else
+                    raise(Error.new(cursor,@value[0]))
+                end
+            elsif !@partial and i<0
+                raise(Error.new(cursor,@value[-i]))
+            else
+                buffer
+            end
+        end
+        def inspect
+            "#{self.class}.new(#{@value.inspect},#{@partial.inspect})"
+        end
+        def to_s
+            inspect
+        end
+    end
+    # Grammar that matches elements until it finds a specific sequence.
+    # Compare to IO#gets.
+    class SequenceUntil < Grammar
+        def initialize(value,allow_eof=false)
+            @value = value
+            @allow_eof = allow_eof
+        end
+        def scan(cursor,buffer=[],lookahead=false) # :nodoc:
+            len,i = cursor.scan_until(@value,false,false,buffer)
+            if !len
+                if lookahead
+                    false
+                else
+                    raise(Error.new(cursor,@value[0]))
+                end
+            elsif !@allow_eof and len.nonzero? and i<=0
+                raise(Error.new(cursor,@value[-i]))
+            else
+                buffer
+            end
+        end
+    end
+    # Grammar that matches to a single element.  An object responding to #==
+    # is used to do the matching.
+    class Element < Inline
+        def initialize(value)
+            super(nil,value) { |value,cursor,buffer,lookahead,hold|
+                condition = hold ?
+                    "#{value}==(v=#{cursor}.read1after)" :
+                    "(v=#{cursor}.scan1next(#{value}))"
+                "(#{condition} ? " +
+                    "#{buffer} << v : " +
+                    "!#{lookahead}&&raise(Error.new(#{cursor},#{value})))"
+            }
+        end
+    end
+    # Grammar that always fails (with a +message+)
+    class Fail < Inline
+        def initialize(message)
+            super { |cursor,buffer,lookahead|
+                "!#{lookahead}&&raise(Error.new(cursor,#{message.inspect}))"
+            }
+        end
+    end
+    # Grammar that matches any single element
+    ANY = Inline.new { |cursor,buffer,lookahead,hold|
+        "((v=#{cursor}.read1#{hold ? 'after' : 'next'}) ? " +
+            "#{buffer} << v : " +
+            "!#{lookahead}&&raise(Error.new(#{cursor},'any element')))"
+    }
+    # Grammar that always passes and matches nothing
+    NULL = Inline.new { |_,buffer,_,_| "#{buffer}" }
+    # Grammar that matches the end-of-file (or end-of-cursor)
+    EOF = Inline.new { |cursor,buffer,_,_|
+        "(!#{cursor}.skip1after&&#{buffer})"
+    }
+    # Exception class for handling Grammar errors
+    class Error < RuntimeError
+        attr_accessor(:cursor,:expected,:found)
+        def initialize(cursor=nil,expected=nil,found=nil)
+            @cursor = cursor
+            @expected = expected
+            @found = found
+        end
+        def to_s
+            err = [super]
+            err << "expected #{@expected.inspect}" if @expected
+            err << "found #{@found.inspect}" if @found
+            begin
+                #err << @cursor.to_s if @cursor
+            rescue
+            end
+            err * ", "
+        end
+    end
+    # :stopdoc:
+    # Parse buffer that throws out everything
+    DISCARD = Class.new {
+        def concat(v);self;end
+        def << (v);self;end
+        define_method(:class) do;self;end # using "def class" messed up rdoc
+        def new;self;end
+    }.new
+    # :startdoc:
+end
+class Cursor
+    # A Cursor that gets its data from a producer Thread.  This Thread is
+    # generated from the block given (passed +self+).  The code in this
+    # block is expected to apply the << and concat methods to the argument
+    # given.  The current Thread is the consumer.
+    #
+    # Unfortunately, this Cursor isn't full-featured (yet).  It is not
+    # reversable.  This will one day be reversable #pos*.
+    class Producer < Cursor
+        def initialize(max_size=16,&producer)
+            @buffer = []
+            @size = 0
+            @max_size = max_size
+            @consumer = Thread.current
+            @producer = Thread.new { producer[self] }
+        end
+        def new_data
+            []
+        end
+        def read1next
+            while (Thread.critical=true;@buffer.empty?&&@producer.alive?)
+                Thread.critical = false
+                @producer.run
+            end
+            v = @buffer.shift
+            @size -= 1
+            v
+        ensure
+            Thread.critical = false
+        end
+        def read1after
+            v = read1next
+            unless v.nil?;begin
+                Thread.critical = true
+                @buffer.unshift(v)
+            ensure
+                Thread.critical = false
+            end;end
+            v
+        end
+        def skip1after
+            read1after.nil? ? nil : true
+        end
+        def scan1next(v)
+            v0 = read1next
+            (v0.nil? || v==v0) ? v0 : begin
+                Thread.critical = true
+                @buffer.unshift(v0)
+                nil
+            ensure
+                Thread.critical = false
+            end
+        end
+        def << (v)
+            while (Thread.critical=true;@size>=@max_size&&@consumer.alive?)
+                Thread.critical = false
+                @consumer.run
+            end
+            @buffer << v
+            @size += 1
+            self
+        ensure
+            Thread.critical = false
+        end
+        def concat(value)
+            i = 0
+            until (v = value[i]).nil?
+                self << v
+            end
+            self
+        end
+    end
+end

data/samples/fact.tcl ADDED

@@ -0,0 +1,12 @@
+proc factorial a {
+    set ret 1
+    # factorial(0) == 1
+    if $a {
+        set ret [product $a [factorial \
+            [subtract $a 1]]]
+    }
+    sum $ret
+}
+set x::y(1) 6
+concat "factorial(\"$x::y(1)\") = " [factorial $x::y(1)] "\n"

data/samples/infix2postfix.rb ADDED

@@ -0,0 +1,114 @@
+#!/bin/env ruby
+require 'rubygems'
+require 'cursor/io'
+require 'cursor/indexed'
+require 'grammar'
+require 'duck'
+require 'set'
+class Expression
+def self.lexer
+    space = Grammar::Element[Set[?\ ,?\t,?\n].
+        duck!(:==,:include?,:to_s,:inspect)]
+    spacing = space.discard.list1
+    alpha = Grammar::Element[(Set[?_]+(?a..?z)+(?A..?Z)).
+        duck!(:==,:include?,:to_s,:inspect)]
+    alphanum = Grammar::Element[(Set[?_]+(?0..?9)+(?a..?z)+(?A..?Z)).
+        duck!(:==,:include?,:to_s,:inspect)]
+    identifier = (alpha+alphanum.list0).
+        filter(String) { |iden,buf| buf << iden.to_sym }
+    int = Grammar::Element[(?0..?9).duck!(:==,:===)].list1
+    number =
+        (int.group(String) +
+        ((Grammar::Element[?.]+int).optional +
+          ((Grammar::Element[?e]|Grammar::Element[?E])+
+           (Grammar::Element[?+]|Grammar::Element[?-]).optional+int).optional).
+           group(String)).
+        filter(Array) { |num,buf|
+            buf << (num[1].empty? ? num.to_s.to_i : num.to_s.to_f)
+        }
+    hex = Grammar::Element[(
+        Set[]+(?0..?9)+(?a..?f)+(?A..?F)
+    ).duck!(:==,:include?,:to_s,:inspect)]
+    octal = Grammar::Element[(?0..?7).duck!(:==,:===)]
+    backslashed = Grammar::Element[?\\].discard+(
+        Grammar::Element[?a].filter { "\a" } |
+        Grammar::Element[?b].filter { "\b" } |
+        Grammar::Element[?f].filter { "\f" } |
+        Grammar::Element[?n].filter { "\n" } |
+        Grammar::Element[?r].filter { "\r" } |
+        Grammar::Element[?t].filter { "\t" } |
+        Grammar::Element[?v].filter { "\v" } |
+        Grammar::Element[?x].discard+hex.list1.filter { |n|
+            eval(%Q("\\x#{n}"))
+        } |
+        octal.list1.filter { |n|
+            eval(%Q("\\#{n}"))
+        } |
+        Grammar::ANY
+    )
+    character =
+        backslashed |
+        Grammar::ANY
+    string = Grammar::Element[?\"].discard +
+        character.list1(Grammar::Element[?\"].discard).group(String)
+    other = Grammar::ANY.filter(String) { |op,buf| buf << op.to_sym }
+    (
+        spacing |
+        identifier |
+        number |
+        string |
+        other
+    ).list0(Grammar::EOF)
+end
+def self.parser
+    integer = Grammar::Element[Integer.duck!(:==,:===)]
+    float = Grammar::Element[Float.duck!(:==,:===)]
+    string = Grammar::Element[String.duck!(:==,:===)]
+    identifier = Grammar::Element[lambda { |v|
+        Symbol===v && ( /\A[_a-zA-Z]/=~v.to_s )
+    }.duck!(:==)]
+    expression = Grammar.new { |expression|
+        primary =
+            integer |
+            float |
+            string |
+            identifier |
+            Grammar::Element[:"("].discard+expression+Grammar::Element[:")"].discard
+        product_op = Grammar::Element[:"*"]|Grammar::Element[:"/"]
+        product = primary.filter(Array) { |x,buf|
+            op = buf.pop
+            buf.concat(x)
+            buf << op if op
+            buf
+        }.list1(nil,product_op)
+        sum_op = Grammar::Element[:"+"]|Grammar::Element[:"-"]
+        sum = product.filter(Array) { |x,buf|
+            op = buf.pop
+            buf.concat(x)
+            buf << op if op
+            buf
+        }.list1(nil,sum_op)
+    }
+end
+end
+if __FILE__==$0
+    lexer = Expression.lexer
+    token_buffer = Cursor::Producer.new { |buffer|
+        lexer.scan($stdin.to_cursor,buffer)
+    }
+    parser = Expression.parser
+    result = parser.scan(token_buffer)
+    p result
+    result
+end

data/samples/tcl.rb ADDED

@@ -0,0 +1,163 @@
+#!/bin/env ruby
+require 'rubygems'
+require 'cursor/io'
+require 'cursor/indexed'
+require 'grammar'
+require 'duck'
+require 'set'
+class Tcl
+def initialize
+    @variables = {}
+    @procs = {}
+    space = Grammar::Element[Set[?\ ,?\t].duck!(:==,:include?)].discard
+    newline = Grammar::Element[?\n]
+    command_separator = newline|Grammar::Element[?;]
+    hex = Grammar::Element[(
+        Set[]+(?0..?9)+(?a..?f)+(?A..?F)
+    ).duck!(:==,:include?)]
+    octal = Grammar::Element[(?0..?7).duck!(:==,:===)]
+    alphanum = Grammar::Element[(
+        Set[?_]+(?0..?9)+(?a..?z)+(?A..?Z)
+    ).duck!(:==,:include?)]
+    backslashed = Grammar::Element[?\\].discard+(
+        Grammar::Element[?a].filter { "\a" } |
+        Grammar::Element[?b].filter { "\b" } |
+        Grammar::Element[?f].filter { "\f" } |
+        Grammar::Element[?n].filter { "\n" } |
+        Grammar::Element[?r].filter { "\r" } |
+        Grammar::Element[?t].filter { "\t" } |
+        Grammar::Element[?v].filter { "\v" } |
+        Grammar::Element[?x].discard+hex.list1.filter { |n|
+            eval(%Q("\\x#{n}"))
+        } |
+        Grammar::Element[?u].discard+hex.list1 { |n| # don't know what to do with unicode
+            eval(%Q("\\x#{n}"))
+        } |
+        octal.list1.filter { |n|
+            eval(%Q("\\#{n}"))
+        } |
+        (newline.discard+space.list0).filter { " " } |
+        Grammar::ANY
+    )
+    braced_element = Grammar.new { |braced_element|
+        Grammar::Element[?\{]+braced_element.list0(Grammar::Element[?\}]) |
+        Grammar::Element[?\\].discard+(
+            (newline.discard+space.list0).filter { " " } |
+            Grammar::NULL.filter { "\\" }
+        ) |
+        newline |
+        Grammar::ANY
+    }
+    braced = Grammar::Element[?\{].discard+braced_element.list0(Grammar::Element[?\}].discard)
+    element = Grammar.new
+    varname = (
+        alphanum |
+        Grammar::Element[?\:]*(2..+1.0/0)
+    ).list1
+    index = Grammar::Element[?\(]+element.list1(Grammar::Element[?\)])
+    variable = (Grammar::Element[?\$].discard + (
+        varname.group(String) + index.group(String).optional |
+        braced.group(String)
+    )).filter { |var| @variables[var.to_s.to_sym].to_s }
+    quoted = Grammar::Element[?\"].discard+element.list1(Grammar::Element[?\"].discard)
+    comment = (Grammar::Element[?\#]+Grammar::ANY.list0(newline)).discard
+    commander = lambda { |terminator|
+        word_terminator = space.list1|+command_separator|+terminator
+        word = ((braced|quoted)+word_terminator|element.list1(word_terminator)).
+            group(String).filter {|x,t| t.concat(x)}
+        command = space.list0 + (
+            comment |
+            word.list0(command_separator.discard|+terminator).
+                filter(Array) { |com,ret|
+                    com.empty? ? ret : ret.replace(send(*com).to_s)
+                }
+        )
+        command.list0(terminator.discard)
+    }
+    bracketed = Grammar::Element[?[].discard+commander[Grammar::Element[?]]]
+    @interpreter = commander[Grammar::EOF]
+    element << (backslashed | bracketed | variable | newline | Grammar::ANY)
+end
+def interpret(cursor)
+    @interpreter.scan(cursor,"",false)
+end
+def method_missing(name,*args)
+    vars,body = *@procs[name]
+    return "<#{name}#{args.inspect}>" if !body
+    variables = @variables
+    @variables = {}
+    vars.zip(args).each { |var,val| @variables[var.to_sym] = val }
+    ret = interpret(body.to_cursor)
+    @variables = variables
+    ret
+end
+def proc(name,args,body)
+    @procs[name.to_sym] = [args,body]
+    ""
+end
+def set(name,value)
+    @variables[name.to_sym] = value
+end
+def if(condition,body)
+    # should really use expr to get condition
+    unless %w(0 false no off).include?(condition.to_s)
+        interpret(body.to_cursor)
+    end
+    # need to handle elsif and else
+    ""
+end
+def sum(*values)
+    values.inject(0) { |sum, v| sum + eval(v) }
+end
+def product(*values)
+    values.inject(1) { |sum, v| sum * eval(v) }
+end
+def subtract(a,b)
+    eval(a)-eval(b)
+end
+def divide(a,b)
+    eval(a)/eval(b)
+end
+def puts(str)
+    $stdout.print(str,"\n")
+    ""
+end
+def concat(*args)
+    args.inject("") { |concatenated, arg| concatenated.concat(arg) }
+end
+end
+if $0==__FILE__
+    result = Tcl.new.interpret($stdin.to_cursor)
+    p result
+    result
+end

data/samples/test.infix ADDED

@@ -0,0 +1,4 @@
+"one\n" *
+(2+3.0  /4e0*(5E+0+6.0e-0)-07)+
+(" eight\t"*nine)

data/test/test_grammar.rb ADDED

@@ -0,0 +1,274 @@
+#!/bin/env ruby
+require 'rubygems'
+require 'test/unit'
+require 'test/unit/collector'
+require 'cursor/indexed'
+require 'grammar'
+module Test
+  module Unit
+    class AutoRunner
+      alias_method(:_options_,:options)
+      def options
+        @options = _options_
+        @options.on('-i', '--iterations=NUMBER', Float,
+               "Randomly run tests for a number of iterations.") do |iterations|
+            $random_iterations = iterations
+          end
+        @options.on('-s', '--seed=NUMBER', Integer,
+               "Random seed.") do |seed|
+            $random_seed = seed.nonzero?
+          end
+        @options
+      end
+      alias_method(:_run_,:run)
+      def run
+        $output_level = @output_level
+        $random_seed ||= (srand;srand)
+        srand($random_seed)
+        _run_
+      end
+    end
+    module Collector
+      alias_method(:_add_suite_,:add_suite)
+      def add_suite(destination, suite)
+        _add_suite_(destination, suite)
+        if $random_iterations
+          (class << suite.tests;self;end).class_eval {
+            def each
+              n = size
+              ($random_iterations*n).to_i.times {
+                yield(slice(rand(n)))
+              }
+            end
+          }
+        end
+        destination
+      end
+    end
+    class TestSuite
+      def run(result, &progress_block)
+        yield(STARTED, name)
+        catch(:stop_suite) {
+          @tests.each { |test|
+            catch(:invalid_test) {
+              test.run(result, &progress_block)
+            }
+          }
+        }
+        yield(FINISHED, name)
+      end
+    end
+    class RandomTestCase < TestCase
+      def self.suite
+        suite = super
+        puts("random_seed: #{$random_seed}") if !suite.tests.empty? and $output_level>=UI::NORMAL
+        suite
+      end
+      undef_method(:default_test) # so that RandomTestCase is empty
+      def teardown
+        if not passed?
+          puts("\nrandom_seed: #{$random_seed}")
+          throw(:stop_suite)
+        end
+      end
+    end
+  end
+end
+class Grammar
+class Test < ::Test::Unit::RandomTestCase
+class Grammars
+    include ::Test::Unit::Assertions
+    def initialize
+        @grammar = []
+        @match = []
+        @parsed = []
+        @mismatch = []
+        @partial_match = []
+    end
+    def get
+        i = rand(@grammar.size.nonzero? || throw(:invalid_test))
+        return @grammar[i],@match[i],@parsed[i],@mismatch[i],@partial_match[i]
+    end
+    def add(grammar,match,parsed,mismatch=nil,partial_match=nil)
+        puts("#{grammar.inspect} #{match.inspect} #{parsed.inspect} #{mismatch.inspect} #{partial_match.inspect}") if
+            $output_level>=::Test::Unit::UI::VERBOSE
+        match.size.times { |i|
+            assert_equal(parsed[i],grammar.scan(match[i].to_cursor))
+        }
+        if mismatch
+            assert_raise(Grammar::Error){p grammar.scan(mismatch.to_cursor)}
+            assert_equal(false,grammar.scan(mismatch.to_cursor,[],true))
+        end
+        if partial_match
+            assert_raise(Grammar::Error){grammar.scan(partial_match.to_cursor)}
+        end
+        @grammar << grammar
+        @match << match
+        @parsed << parsed
+        @mismatch << mismatch
+        @partial_match << partial_match
+        nil
+    end
+end
+def self.suite
+    suite = super
+    self.plant
+    suite
+end
+def self.plant
+    @@grammars = Grammars.new
+end
+def test_Sequence
+    partial = rand(2)==1
+    value = ["a","bc"][rand(2)]
+    match = value.dup
+    if partial
+        match = [match,match[0,1+rand(value.size)]]
+    else
+        match = [match]
+    end
+    parsed = match.map{|s|s.unpack("C*")}
+    @@grammars.add(Grammar::Sequence.new(value,partial),match,parsed,"")
+end
+def test_Element
+    value = [?a,?b][rand(2)]
+    match = ["" << value]
+    parsed = [[value]]
+    @@grammars.add(Grammar::Element.new(value),match,parsed,"")
+end
+def test_NULL
+    @@grammars.add(Grammar::NULL,[""],[[]])
+end
+def test_or
+    grammar1,match1,parsed1,mismatch1,partial1 = @@grammars.get
+    grammar2,match2,parsed2,mismatch2,partial2 = @@grammars.get
+    i = rand(match1.size)
+    match1 = match1[i]
+    parsed1 = parsed1[i]
+    i = rand(match2.size)
+    match2 = match2[i]
+    parsed2 = parsed2[i]
+    begin
+        # match2 shouldn't match grammar1
+        grammar1.scan(match2.to_cursor,[],true) and throw(:invalid_test)
+    rescue Grammar::Error
+        throw(:invalid_test) # partial match
+    end
+    @@grammars.add(
+        grammar1|grammar2,
+        [match1,match2],
+        [parsed1,parsed2],
+        mismatch1==mismatch2 ? mismatch1 : nil,
+        partial1==partial2 ? partial1 : nil
+    )
+end
+def test_plus
+    grammar1,match1,parsed1,mismatch1,partial1 = @@grammars.get
+    grammar2,match2,parsed2,mismatch2,partial2 = @@grammars.get
+    i = rand(match1.size)
+    match1 = match1[i]
+    parsed1 = parsed1[i]
+    i = rand(match2.size)
+    match2 = match2[i]
+    parsed2 = parsed2[i]
+    # grammar1 shouldn't eat into match2
+    begin
+        grammar1.scan((match1+match2).to_cursor)==parsed1 or
+        throw(:invalid_test)
+    rescue Grammar::Error
+        throw(:invalid_test)
+    end
+    @@grammars.add(
+        grammar1+grammar2,
+        [match1+match2],
+        [parsed1+parsed2],
+        mismatch1,
+        partial1 || (mismatch2 && match1+mismatch2)
+    )
+end
+def test_Grammar
+    grammar1,match1,parsed1,mismatch1,partial1 = @@grammars.get
+    grammar2,match2,parsed2,mismatch2,partial2 = @@grammars.get
+    i = rand(match1.size)
+    match1 = match1[i]
+    parsed1 = parsed1[i]
+    i = rand(match2.size)
+    match2 = match2[i]
+    parsed2 = parsed2[i]
+    !match1.empty? or throw(:invalid_test)
+    begin
+        (grammar1.scan((match1+match1).to_cursor)==parsed1 and
+        grammar1.scan((match1+match2).to_cursor)==parsed1 and
+        grammar2.scan((match2+match2).to_cursor)==parsed2) or
+        throw(:invalid_test)
+        grammar1.scan(match2.to_cursor,[],true) and throw(:invalid_test)
+    rescue Grammar::Error
+        throw(:invalid_test)
+    end
+    grammar = Grammar.new
+    grammar << grammar1+(grammar|Grammar::NULL)+grammar2
+    @@grammars.add(
+        grammar,
+        [match1+match2,match1+match1+match1+match2+match2+match2],
+        [parsed1+parsed2,parsed1+parsed1+parsed1+parsed2+parsed2+parsed2],
+        mismatch1,
+        partial1
+    )
+end
+def test_times
+    grammar1,match1,parsed1,mismatch1,partial1 = @@grammars.get
+    begin
+        grammar1.scan("".to_cursor,[],true) and throw(:invalid_test)
+    rescue Grammar::Error
+        throw(:invalid_test)
+    end
+    min = rand(2)
+    diff = rand(4)
+    match = ""
+    parsed = []
+    match0 = nil
+    parsed0 = nil
+    (min+rand(diff+1)).times {
+        i = rand(match1.size)
+        match.concat(match1[i])
+        parsed.concat(parsed1[i])
+        if match0
+            begin
+                grammar1.scan((match0+match1[i]).to_cursor)==parsed0 or
+                throw(:invalid_test)
+            rescue Grammar::Error
+                throw(:invalid_test)
+            end
+        end
+        match0 = match1[i]
+        parsed0 = parsed1[i]
+    }
+    diff = 1.0/0.0 if diff==3
+    multiplier = (diff.zero? && rand(2).zero?) ? min : (min..(min+diff))
+    @@grammars.add(
+        grammar1*multiplier,
+        [match],
+        [parsed],
+        min.zero? ? nil : mismatch1,
+        min.zero? ? nil : (partial1 || (min>1 && match1[rand(match1.size)]))
+    )
+end
+end
+end

metadata ADDED

@@ -0,0 +1,54 @@
+--- !ruby/object:Gem::Specification
+rubygems_version: 0.8.10
+specification_version: 1
+name: grammar
+version: !ruby/object:Gem::Version
+  version: "0.5"
+date: 2005-10-13
+summary: BNF-like grammar specified directly in ruby
+require_paths:
+  - lib
+email: Eric under Mahurin at yahoo dot com
+homepage: http://rubyforge.org/projects/grammar/
+rubyforge_project: grammar
+description:
+autorequire:
+default_executable:
+bindir:
+has_rdoc: true
+required_ruby_version: !ruby/object:Gem::Version::Requirement
+  requirements:
+    -
+      - ">"
+      - !ruby/object:Gem::Version
+        version: 0.0.0
+  version:
+platform: ruby
+authors:
+  - Eric Mahurin
+files:
+  - lib/grammar.rb
+  - test/test_grammar.rb
+  - samples/fact.tcl
+  - samples/infix2postfix.rb
+  - samples/tcl.rb
+  - samples/test.infix
+  - samples/CVS
+test_files:
+  - test/test_grammar.rb
+rdoc_options: []
+extra_rdoc_files: []
+executables: []
+extensions: []
+requirements: []
+dependencies:
+  - !ruby/object:Gem::Dependency
+    name: cursor
+    version_requirement:
+    version_requirements: !ruby/object:Gem::Version::Requirement
+      requirements:
+        -
+          - ">="
+          - !ruby/object:Gem::Version
+            version: "0.9"
+      version: