RubyGems - Spectre - Versions diffs - 0.0.1 - Mend

Spectre 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

data/CHANGELOG +1 -0
data/LICENSE +23 -0
data/README +20 -0
data/Rakefile +112 -0
data/lib/spectre/base.rb +44 -0
data/lib/spectre/base/closure.rb +96 -0
data/lib/spectre/base/directive.rb +148 -0
data/lib/spectre/base/grammar.rb +269 -0
data/lib/spectre/base/inputiterator.rb +276 -0
data/lib/spectre/base/node.rb +393 -0
data/lib/spectre/base/operators.rb +342 -0
data/lib/spectre/base/parser.rb +110 -0
data/lib/spectre/generic.rb +115 -0
data/lib/spectre/generic/directives.rb +246 -0
data/lib/spectre/generic/negations.rb +68 -0
data/lib/spectre/generic/primitives.rb +172 -0
data/lib/spectre/generic/semanticaction.rb +43 -0
data/lib/spectre/string.rb +57 -0
data/lib/spectre/string/additionals.rb +80 -0
data/lib/spectre/string/directives.rb +51 -0
data/lib/spectre/string/inputiterator.rb +57 -0
data/lib/spectre/string/primitives.rb +400 -0
data/test/base/closure_tests.rb +108 -0
data/test/base/grammar_tests.rb +97 -0
data/test/base/operator_tests.rb +335 -0
data/test/base/semanticaction_tests.rb +53 -0
data/test/generic/directive_tests.rb +224 -0
data/test/generic/negation_tests.rb +146 -0
data/test/generic/primitive_tests.rb +99 -0
data/test/string/POD2Parser_tests.rb +93 -0
data/test/string/additional_tests.rb +43 -0
data/test/string/directive_tests.rb +32 -0
data/test/string/primitive_tests.rb +173 -0
data/test/tests.rb +33 -0
data/test/tutorial/funnymath_tests.rb +57 -0
data/test/tutorial/html_tests.rb +171 -0
data/test/tutorial/skipping_tests.rb +60 -0
metadata +109 -0

data/lib/spectre/base/grammar.rb ADDED

@@ -0,0 +1,269 @@
+# This is Spectre, a parser framework inspired by Boost.Spirit,
+# which can be found at http://spirit.sourceforge.net/.
+#
+# If you want to find out more or need a tutorial, go to
+# http://spectre.rubyforge.org/
+# You'll find a nice wiki there!
+#
+# Author::      Fabian Streitel (karottenreibe)
+# Copyright::   Copyright (c) 2009 Fabian Streitel
+# License::     Boost Software License 1.0
+#               For further information regarding this license, you can go to
+#               http://www.boost.org/LICENSE_1_0.txt
+#               or read the file LICENSE distributed with this software.
+# Homepage::    http://spectre.rubyforge.org/
+# Git repo::    http://rubyforge.org/scm/?group_id=7618
+#
+# Keeps the Grammar class that can hold recursive Parsers.
+#
+require 'rubygems'
+require 'metaid'
+require 'spectre/base/parser'
+require 'spectre/base/node'
+module Spectre
+    ##
+    # Provides lazy evaluation of the Parser name, so you can use it recursively and before
+    # specifying it.
+    #
+    class SymParser
+        include Parser
+        ##
+        # Initializes the SymParser to look for +sym+.
+        #
+        def initialize sym
+            @sym = sym
+        end
+        def scan is
+            n = @node.find @sym
+            raise "rule #{@sym.inspect} not found." unless n
+            n.parent = @node
+            n.parse is
+        end
+        def inspect
+            "[<#{@sym.inspect}>]"
+        end
+    end
+    ##
+    # Provides the functionality of dynamically defining rules inside Grammars.
+    # Must be mixed into a Parser.
+    #
+    module DynVarMixin
+        ##
+        # Closes over the specified +node+ with a newly constructed Closure.
+        #
+        def close node
+            node = node.to_p
+            node.closure = Closure.new
+            node
+        end
+        ##
+        # Stores the Parsers referenced by +symbol+ => +parser+ from the +hash+ so they can be used
+        # later on.
+        #
+        def rule hash
+            hash.each do |getter,node|
+                @node.symbols[getter] = node.to_p
+            end
+        end
+        ##
+        # Sets the +parser+ with which the Grammar should start parsing.
+        # Does auto-conversion on +parser+.
+        #
+        def start_with parser
+            @start_rule = parser.to_p
+        end
+    end
+    ##
+    # If mixed into a class, it defines shortuct methods for all registered Parsers.
+    # Used by the Spectre standard parsers, e.g. +char('k')+ will be a shortcut for
+    # +CharParser.new('k')+.
+    # See std/std.rb for more details.
+    #
+    module ShortcutsMixin
+        class << self
+            ##
+            # For each +name+ => +klass+ in +hsh+: Register the Parser +klass+ with the +name+ inside
+            # the Grammar class, so that a new Parser of that +klass+ can be chained into a Grammar
+            # simply by calling
+            #   +name _arguments_+
+            # inside the Grammar class definition.
+            # +name+ must be a Symbol.
+            #
+            def register_shortcut hsh
+                hsh.each do |meth,klass|
+                    raise "class Grammar already has a singleton method named '#{meth.to_s}'" if
+                        Grammar.singleton_methods.include? meth.to_s
+                    block = lambda do |*args|
+                        parser = klass.new *args
+                        parser.to_p
+                    end
+                    # define it for the grammars
+                    self.class_def meth, &block
+                end
+            end
+        end
+    end
+    ##
+    # Provides an +inspect+ method for Grammar-like classes.
+    #
+    module GrammarInspectMixin
+        def inspect
+            "[Grammar:" + ( @bound ?
+                "#{@node.symbols.inject(''){ |memo,(s,n)| memo + "{#{s.inspect} => #{n.inspect}} " }[0..-2]}" :
+                "unbound" ) +
+                "]"
+        end
+    end
+    ##
+    # Chains several Parsers together to form a reusable unit and allows for recursion in
+    # Parser definition.
+    #
+    # To define a Grammar, you may use the generator methods to create Parsers and chain
+    # them together. You will then have to store the created top-level Parser in the Grammar
+    # by calling +start_with(parser)+.
+    #
+    # To do so, you first have to call +Grammar.new+, passing it a block that describes the
+    # Grammar's behaviour.
+    # The returned Grammar object can be supplied with arguments on runtime
+    # in order to customize it's behaviour, e.g:
+    #
+    #     mayor = Grammar.new do |city, klass|
+    #         start_with 'Mayor ' >> ( ~blank ).+ >> ", class #{klass}" >>
+    #             ' from ' >> city.to_p
+    #     end
+    #
+    #
+    # The thus created Grammar has to be bound to some arguments before
+    # it can actually be used to parse anything:
+    #
+    #     mayor.bind( AnycharParser.new.+, 'A' )
+    #
+    # It will now parse any Mayor from any city of class 'A'.
+    # You can of course rebind the Grammar anytime (except during parsing):
+    # + mayor.bind( 'Boston', AnycharParser.new )+
+    # Now it will parse any Mayor of any class from 'Boston'.
+    #
+    # The only exception to the binding rule is a dynamic Grammar that takes
+    # no arguments. Such a Grammar will be bound right at instantiation time.
+    # Rebinding will have no effect whatsoever.
+    # NOTE: Due to an existing Ruby bug, you have to define such a Grammar with an empty
+    # argument block:
+    #
+    #
+    #   chunky = Grammar.new do ||
+    #       rule :bacon => ...
+    #   end
+    #
+    #
+    # Otherwise it will not be automatically bound. This will change, as soon as bug #574
+    # is fixed (http://redmine.ruby-lang.org/issues/show/574).
+    #
+    # You may at any time store a parser inside a _rule_, like this:
+    #
+    #
+    #   towns_folk = Grammar.new do ||
+    #       start_with :person % ( 'from '.to_p >> :town >> ', ' )
+    #       rule :person => :name >> blank.+ >> :name,
+    #            :town => :name
+    #       rule :name => ( ~blank ).+
+    #   end
+    #
+    #
+    # As you can also see here, the rules are evaluated lazyly, thus enabling
+    # you to use parsers recursively and before they have actually been defined.
+    #
+    # NOTE: As tempting as it may be, do NOT use instance variables to store Parsers,
+    # because if you use the Parser more than once, the backtrace of the first
+    # invocation of that Parser will be lost as soon as it is invoked a second time.
+    # Also the use of Closures will be broken.
+    # Storing the parsers as is described above will circumvent this problem by dupping
+    # the Parser each time it is invoked.
+    #
+    # If you'd like to provide Grammar-like functionality in your own class(es), you can
+    # receive some from the mixins DynVarMixin, ShortcutsMixin and GrammarInspectMixin.
+    #
+    class Grammar < Node
+        # if we don't preserve the backtrack method, we'll run into an endless loop
+        alias_method :node_backtrack, :backtrack
+        include Parser
+        alias_method :backtrack, :node_backtrack
+        include DynVarMixin
+        include ShortcutsMixin
+        include GrammarInspectMixin
+        ##
+        # Defines a new Grammar.
+        # The passed +block+ will be executed once the returned Grammar's
+        # +bind+ method is called. All of +bind+'s parameters will be
+        # passed to the block.
+        #
+        # If the +block+ takes no arguments, it will be bound at instantiation time.
+        # NOTE: Ruby bug 574 (http://redmine.ruby-lang.org/issues/show/574)
+        #
+        # See Grammar for an example.
+        #
+        def initialize &block
+            @dynamic = block
+            @bound = false
+            # nice little trick: we are node and parser in one
+            # but for upwards compatibility, we will act as if it weren't so
+            # from here on
+            super(self)
+            self.bind if block.arity == 0
+        end
+        def to_p; self; end
+        ##
+        # Binds a Grammar to a set of values supplied in the +args+.
+        # Executes the block given to +#initialize+.
+        #
+        def bind *args
+            self.instance_exec *args, &@dynamic
+            @bound = true
+            self
+        end
+        ##
+        # Parses the InputIterator +iter+ with the Parsers defined in the Grammar.
+        # The Grammar must have been bound before doing so.
+        #
+        def scan iter
+            raise "a dynamic Grammar must be bound to a value" unless @bound
+            raise "you need to set a start rule" unless @start_rule
+            n = @start_rule.dup
+            # sort into tree
+            n.parent = @node
+            @node.left = n
+            # start parsing
+            create_match iter, n.parse(iter)
+        end
+    end
+    ##
+    # The SymParser shortcut is +sym+.
+    ShortcutsMixin.register_shortcut :sym => SymParser
+end

data/lib/spectre/base/inputiterator.rb ADDED

@@ -0,0 +1,276 @@
+# This is Spectre, a parser framework inspired by Boost.Spirit,
+# which can be found at http://spirit.sourceforge.net/.
+#
+# If you want to find out more or need a tutorial, go to
+# http://spectre.rubyforge.org/
+# You'll find a nice wiki there!
+#
+# Author::      Fabian Streitel (karottenreibe)
+# Copyright::   Copyright (c) 2009 Fabian Streitel
+# License::     Boost Software License 1.0
+#               For further information regarding this license, you can go to
+#               http://www.boost.org/LICENSE_1_0.txt
+#               or read the file LICENSE distributed with this software.
+# Homepage::    http://spectre.rubyforge.org/
+# Git repo::    http://rubyforge.org/scm/?group_id=7618
+#
+# Keeps the InputIterator class.
+#
+module Spectre
+    ##
+    # Used to access the input stream.
+    # The standard implementation works with Integers on Array or String-like structures and
+    # is a forward iterator. Jumping to a position behind the current one is only possible via
+    # a call to +#to+.
+    # To actually be able to use the InputIterator, you have to subclass it and implement the
+    # +#concat+ and +#empty+ methods.
+    # When implementing iterators for non-array-like data, you will also have to reimplement
+    # +#get+ and +#valid?+.
+    # When implementing iterators which do not rely on 0 based Integers, you will also have to
+    # reimplement +#\++, +#\+@+, +#-+, +#skip!+ and +#to+.
+    #
+    # = The Input =
+    #
+    # The input the InputIterator will traverse must have some properties, regardless of the Parsers
+    # used on it. It must be
+    # - comparable, i.e. it has to supply the standard comparison operators <, >, ==, != etc.
+    # - non-atomic, i.e. you must be able to split the input into pieces.
+    #
+    class InputIterator
+        ##
+        # The input this iterator works on.
+        # The default is an array-like structure.
+        attr_accessor :input
+        ##
+        # A skipper object that is called for every retrieved token from the input.
+        # It is required to return either +nil+, which causes the token to be processed
+        # normally, or an Integer > 1, which describes how many tokens should be skipped
+        # from and including the current one.
+        #
+        # The transformation is invoked via it's +#call+ method, thus effectively enabling the
+        # use of lambda blocks as transformations.
+        # The object will be passed the token to process and the InputIterator as parameters.
+        #
+        # NOTE:
+        # - The skipper should be set to +:default+ if the default is to be used
+        # - The skipper should be +nil+ if it is required to let all tokens pass as valid
+        # - Parsers may choose to ignore the skipper, e.g. the StringParser, which parses literal
+        #   Strings, must ignore any (white space) skippers in order to function correctly
+        # - The skipper must _not_ modify the InputIterator
+        attr_accessor :skipper
+        ##
+        # A transformation object that is called for every retrieved token from the input, unless the
+        # skipper instructed the iterator not to process the token.
+        # It may modify that token and is expected to return that modified token. The Parses will then be
+        # supplied with that token instead of the original one.
+        #
+        # The transformation is invoked via it's +#call+ method, thus effectively enabling the
+        # use of lambda blocks as transformations.
+        # The object will be passed the token to process and the InputIterator as parameters.
+        #
+        # NOTE:
+        # - The transformation should be set to +:default+ if the default is to be used
+        # - The transformation should be +nil+ if it is required to leave all tokens unmodified
+        # - Parsers should never ignore the transformation
+        # - The transformation must _not_ modify the InputIterator
+        attr_accessor :transformation
+        ##
+        # The position this iterator is currently at.
+        # The default is an Integer.
+        attr_accessor :pos
+        ##
+        # Initializes the iterator to a +pos+ition on an +input+.
+        #
+        def initialize input, pos = 0
+            @pos, @input, @transformation, @skipper = pos, input, :default, :default
+        end
+        ##
+        # Copies the position and input reference from the +other+ iterator to initialize
+        # this one.
+        #
+        def initialize_copy other
+            @pos, @input = other.pos, other.input
+        end
+        ##
+        # Returns the token at the current position and advances by one token.
+        #
+        def +@
+            token,len = self.internal_get(@pos..@pos)
+            @pos += len
+            token
+        end
+        ##
+        # Returns the next n tokens from (and including) the current position and advances
+        # by n tokens.
+        #
+        def + n
+            tokens,len = self.internal_get(@pos..@pos+n-1)
+            @pos += len
+            tokens
+        end
+        ##
+        # Sets the iterator to point to address +n+. Will _not_ return the token at that position but
+        # the modified iterator instead..
+        #
+        def to n
+            @pos = n
+            self
+        end
+        ##
+        # Calculates the distance between the positions this iterator and the other +iter+ point to.
+        # If they point to the same location, the distance will be 0, if this iterator points behind
+        # +iter+, the distance will be positive, else negative.
+        #
+        # NOTE: This method must be used to correctly calculate Match length in Parsers.
+        #
+        def - iter
+            @pos - iter.pos
+        end
+        ##
+        # Whether or not this iterator points to a valid location in the input.
+        # This must take the skip parser into consideration.
+        #
+        def valid?
+            return false if @pos >= @input.length
+            pos = @pos
+            while skip = skipper.call(@input[pos..pos], self)
+                pos += skip
+            end
+            return false if pos >= @input.length
+            true
+        end
+        ##
+        # If there are any skippable tokens from (and including) the current position, a call to this
+        # method will cause the InputIterator to advance over them to the next non-skippable token.
+        #
+        def skip!
+            copy = self.dup
+            while dist = skipper.call(@input[@pos..@pos], copy)
+                @pos += dist
+            end
+        end
+        ##
+        # Returns the default skipper for this InputIterator class.
+        # The default implementation simply returns +nil+.
+        #
+        def default_skipper
+            nil
+        end
+        ##
+        # Returns the default transformation for this InputIterator class.
+        # The default implementation simply returns +nil+.
+        #
+        def default_transformation
+            nil
+        end
+        ##
+        # Returns the transformation or an empty transformation or the +#default_transformation+ if
+        # the transformation is set to +nil+.
+        #
+        def transformation
+            @transformation == :default ?
+                default_transformation || lambda { |token,iter| token } :
+                @transformation || lambda { |token,iter| token }
+        end
+        ##
+        # Returns the skipper or an empty skipper or the +#default_skipper+ if the skipper is set
+        # to +:default+.
+        #
+        def skipper
+            @skipper == :default ?
+                default_skipper || lambda { nil } :
+                @skipper || lambda { nil }
+        end
+        ##
+        # Returns all of the input that has not yet been parsed, while ignoring the skipper.
+        #
+        def rest
+            @input[@pos..-1]
+        end
+        ##
+        # Retrieves the tokens from within the specified +range+, with the transformation applied.
+        #
+        def get *args
+            self.internal_get(*args)[0]
+        end
+        ##
+        # Executes the given +block+ with the skipper being set to +nil+.
+        #
+        def ignore_skipper &block
+            return unless block_given?
+            bak, @skipper = @skipper, nil
+            yield self
+            @skipper = bak
+        end
+        ##
+        # Concatenates the two values and returns the result. The values will be of the
+        # same type as the input. Must be able to handle nil as a value as well.
+        # Must be implemented by a subclass.
+        #
+        def concat val1, val2
+            nil
+        end
+        ##
+        # Returns an empty object of the input type, e.g. an empty String or an empty Array.
+        # Must be implemented by a subclass.
+        #
+        def empty
+            nil
+        end
+        protected
+        ##
+        # Retrieves the tokens from within the specified +range+, with the transformation applied.
+        # Returns +[tokens,len]+, where +len+ is the length of the retrieved input, including skipped
+        # tokens.
+        # NOTE: This method is inteded for iterator internal use only. Use +#get+ instead.
+        #
+        def internal_get range = (@pos..@pos)
+            buf = self.empty
+            pos = range.first
+            while buf.length < range.count and pos < @input.length
+                skip = skipper.call @input[pos..pos], self
+                if skip
+                    pos += skip
+                else
+                    buf = self.concat buf, transformation.call(@input[pos..pos], self)
+                    pos += 1
+                end
+            end
+            [buf, pos - range.first]
+        end
+    end
+end