RubyGems - dhaka - Versions diffs - 0.0.5 → 0.0.6 - Mend

dhaka 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

data/lib/dhaka.rb +6 -2
data/lib/evaluator/evaluator.rb +32 -19
data/lib/grammar/closure_hash.rb +2 -1
data/lib/grammar/grammar.rb +52 -25
data/lib/grammar/grammar_symbol.rb +4 -0
data/lib/grammar/precedence.rb +1 -1
data/lib/parser/action.rb +4 -3
data/lib/parser/channel.rb +4 -3
data/lib/parser/compiled_parser.rb +2 -0
data/lib/parser/item.rb +2 -1
data/lib/parser/parse_result.rb +13 -8
data/lib/parser/parse_tree.rb +22 -16
data/lib/parser/parser.rb +29 -27
data/lib/parser/parser_methods.rb +2 -0
data/lib/parser/parser_run.rb +1 -1
data/lib/parser/parser_state.rb +2 -2
data/lib/parser/token.rb +2 -0
data/lib/tokenizer/tokenizer.rb +52 -33
data/test/arithmetic_precedence_tokenizer.rb +8 -4
data/test/arithmetic_tokenizer.rb +8 -4
data/test/bracket_tokenizer.rb +1 -1
data/test/parser_test.rb +11 -1
metadata +3 -3

data/lib/dhaka.rb CHANGED

@@ -21,6 +21,12 @@
 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #++
+# An introduction to Dhaka and annotated examples can be found at the project homepage http://dhaka.rubyforge.org
+#
+# Further examples can be found in the test suites included with the gem.
+module Dhaka
+end
 require File.dirname(__FILE__)+'/grammar/grammar_symbol'
 require File.dirname(__FILE__)+'/grammar/production'
 require File.dirname(__FILE__)+'/grammar/closure_hash'
@@ -41,5 +47,3 @@ require File.dirname(__FILE__)+'/parser/compiled_parser'
 require File.dirname(__FILE__)+'/tokenizer/tokenizer'
 require File.dirname(__FILE__)+'/evaluator/evaluator'

data/lib/evaluator/evaluator.rb CHANGED

@@ -1,21 +1,43 @@
 module Dhaka
+  # This is the abstract base evaluator class. It is not directly instantiated.
+  # When defining an evaluator for a specific grammar, we subclass it. e.g. for FooGrammar
+  # we create a FooEvaluator that subclasses Evaluator. Note that FooEvaluator may not
+  # be further subclassed.
+  #
+  # An evaluation rule for a given production named +bar+ is defined by calling +for_bar+ with
+  # a block that performs the evaluation. For detailed examples, see the evaluators in the
+  # test suite.
   class Evaluator
+    # Instantiates a new evaluator with the syntax tree of a parsed expression. Only subclasses
+    # of Evaluator are directly instantiated.
     def initialize(syntax_tree)
       @syntax_tree = syntax_tree
       @node_stack = []
     end
+    # Returns the evaluation result.
     def result
       evaluate(@syntax_tree)
     end
+    private
     def child_nodes
       @node_stack[-1]
     end
-    private
+    def evaluate node
+      return node if (ParseTreeLeafNode === node)
+      @node_stack << node.child_nodes.collect {|child_node| evaluate(child_node)}
+      proc = self.class.actions[node.production.name]
+      result = self.instance_eval(&proc)
+      @node_stack.pop
+      result
+    end
     def self.inherited(evaluator)
       class << evaluator
         attr_accessor :grammar, :actions
@@ -29,34 +51,25 @@ module Dhaka
       check_definitions
     end
+    def self.method_missing(method_name, &blk)
+      if method_name.to_s =~ /^for_*/
+        rule_name = method_name.to_s[4..-1]
+        self.for_rule_named(rule_name, &blk)
+      end
+    end
     def self.check_definitions
       non_trivial_productions_with_rules_undefined = self.grammar.productions.select {|production| production.expansion.size != 1}.collect {|production| production.name} - self.actions.keys
       raise EvaluatorDefinitionError.new(non_trivial_productions_with_rules_undefined) unless non_trivial_productions_with_rules_undefined.empty?
     end
-    def evaluate node
-      return node if (ParseTreeLeafNode === node)
-      @node_stack << node.child_nodes.collect {|child_node| evaluate(child_node)}
-      proc = self.class.actions[node.production.name]
-      result = self.instance_eval(&proc)
-      @node_stack.pop
-      result
-    end
     def self.for_rule_named(name, &blk)
       self.actions[name] = blk
     end
-    def self.method_missing(method_name, &blk)
-      if method_name.to_s =~ /^for_*/
-        rule_name = method_name.to_s[4..-1]
-        self.for_rule_named(rule_name, &blk)
-      end
-    end
   end
-  class EvaluatorDefinitionError < StandardError
+  class EvaluatorDefinitionError < StandardError #:nodoc:
     def initialize(non_trivial_productions_with_rules_undefined)
       @non_trivial_productions_with_rules_undefined = non_trivial_productions_with_rules_undefined
     end

data/lib/grammar/closure_hash.rb CHANGED

@@ -1,6 +1,7 @@
 #!/usr/bin/env ruby
 module Dhaka
-  class ClosureHash < Hash
+  # A subclass of Hash with a dirty flag
+  class ClosureHash < Hash #:nodoc:
     attr_accessor :dirty
     def initialize(&block)
       super(&block)

data/lib/grammar/grammar.rb CHANGED

@@ -2,15 +2,24 @@
 require 'set'
 module Dhaka
+  # Reserved name for the start symbol for all grammars.
   START_SYMBOL_NAME = "_Start_"
-  END_SYMBOL_NAME = "_End_"
+  END_SYMBOL_NAME = "_End_" #:nodoc:
+  # Productions for specific grammar symbols are defined in the context of this class.
   class ProductionBuilder
+    # +symbol+ is the grammar symbol that productions are being defined for.
     def initialize(grammar, symbol)
       @grammar = grammar
       @symbol = symbol
     end
+    # Creates a new production for +symbol+ with an expansion of +expansion+. The options hash can include
+    # a directive <tt>:prec</tt>, the value of which is a grammar symbol name. The precedence of the production is then
+    # set to the precedence of the grammar symbol corresponding to that name.
+    #
+    # See the arithmetic precedence grammar in the test suites for an example.
     def method_missing(production_name, expansion, options = {})
       expansion_symbols = expansion.collect {|name| @grammar.symbols[name]}
       if precedence_symbol_name = options[:prec]
@@ -25,12 +34,23 @@ module Dhaka
     end
   end
+  # The precedence builder defines three methods, +left+, +right+ and +nonassoc+. These accept arrays of grammar
+  # symbols all of which have the same precedence level and associativity. This works almost exactly like Yacc.
+  #
+  # See the arithmetic precedence grammar in the test suites for an example.
   class PrecedenceBuilder
-    def initialize(grammar)
+    def initialize(grammar) #:nodoc:
       @grammar = grammar
       @precedence_level = 0
     end
-    def method_missing(associativity, symbol_names)
+    [:left, :right, :nonassoc].each do |associativity|
+      define_method(associativity) do |symbols|
+        assign_precedences associativity, symbols
+      end
+    end
+    private
+    def assign_precedences(associativity, symbol_names)
       symbol_names.each do |symbol_name|
         symbol = @grammar.symbols[symbol_name]
         symbol.precedence = Precedence.new(@precedence_level, associativity)
@@ -39,8 +59,34 @@ module Dhaka
     end
   end
+  # This class is subclassed when specifying a grammar. Note that subclasses of this class may not be further subclassed.
   class Grammar
+    # Used for defining the productions for the symbol with name +symbol+. The block +blk+ is
+    # evaluated in the context of a ProductionBuilder.
+    def self.for_symbol symbol, &blk
+      symbol = symbols[symbol]
+      symbol.non_terminal = true
+      ProductionBuilder.new(self, symbol).instance_eval(&blk)
+    end
+    # Used for defining the precedences and associativities of symbols. The block +blk+ is
+    # evaluated in the context of a PrecedenceBuilder.
+    def self.precedences &blk
+      PrecedenceBuilder.new(self).instance_eval(&blk)
+    end
+    # Returns the grammar symbol identified by +name+
+    def self.symbol_for_name(name)
+      if symbols.has_key? name
+        symbols[name]
+      else
+        raise "No symbol with name #{name} found"
+      end
+    end
+    private
     def self.inherited(grammar)
       class << grammar
         attr_accessor :symbols, :productions_by_symbol, :productions_by_name, :start_symbol, :end_symbol, :__first_cache
@@ -53,13 +99,10 @@ module Dhaka
       grammar.__first_cache = {}
     end
-    def self.for_symbol symbol, &blk
-      symbol = symbols[symbol]
-      symbol.non_terminal = true
-      ProductionBuilder.new(self, symbol).instance_eval(&blk)
+    def self.productions_for_symbol(symbol)
+      productions_by_symbol[symbol]
     end
     def self.productions
       productions_by_name.values
     end
@@ -68,17 +111,6 @@ module Dhaka
       productions_by_name[name]
     end
-    def self.productions_for_symbol(symbol)
-      productions_by_symbol[symbol]
-    end
-    def self.symbol_for_name(name)
-      if symbols.has_key? name
-        symbols[name]
-      else
-        raise "No symbol with name #{name} found"
-      end
-    end
     def self.terminal_symbols
       symbols.values.select {|symbol| symbol.terminal}
@@ -102,10 +134,6 @@ module Dhaka
       return channels, result
     end
-    def self.precedences &blk
-      PrecedenceBuilder.new(self).instance_eval(&blk)
-    end
     def self.first(given_symbol)
       cached_result = self.__first_cache[given_symbol]
       return cached_result if cached_result
@@ -146,7 +174,6 @@ module Dhaka
       end
       return closure_hash
     end
   end
 end

data/lib/grammar/grammar_symbol.rb CHANGED

@@ -1,5 +1,9 @@
 #!/usr/bin/env ruby
 module Dhaka
+  # Each grammar symbol is uniquely identified by a string name. The name of a symbol can
+  # be anything and need not correspond to its character representation. For example, an ampersand in the
+  # character stream could be tokenized as a symbol with a name 'whatever'. In general, it's best to choose
+  # symbol names that are descriptive.
   class GrammarSymbol
     attr_reader :name
     attr_accessor :non_terminal, :nullable, :precedence, :associativity

data/lib/grammar/precedence.rb CHANGED

@@ -1,5 +1,5 @@
 module Dhaka
-  class Precedence
+  class Precedence #:nodoc:
     include Comparable
     attr_reader :precedence_level, :associativity
     def initialize(precedence_level, associativity)

data/lib/parser/action.rb CHANGED

@@ -1,9 +1,10 @@
 module Dhaka
-  class Action
+  # Encapsulates code for Parser actions.
+  class Action #:nodoc:
     attr_reader :action_code
   end
-  class ShiftAction < Action
+  class ShiftAction < Action #:nodoc:
     attr_reader :destination_state
     def initialize destination_state
       @destination_state = destination_state
@@ -20,7 +21,7 @@ module Dhaka
     end
   end
-  class ReduceAction < Action
+  class ReduceAction < Action #:nodoc:
     attr_reader :production
     def initialize(production)
       @production = production

data/lib/parser/channel.rb CHANGED

@@ -1,6 +1,7 @@
 #!/usr/bin/env ruby
 module Dhaka
-  class Channel
+  # Represents channels for pumping of lookaheads between items
+  class Channel #:nodoc:
     attr_reader :start_item, :end_item
     def initialize(grammar, start_item, end_item)
       @grammar = grammar
@@ -23,7 +24,7 @@ module Dhaka
     end
   end
-  class SpontaneousChannel < Channel
+  class SpontaneousChannel < Channel #:nodoc:
     def to_s
       "Spontaneous " + super.to_s
     end
@@ -40,7 +41,7 @@ module Dhaka
     end
   end
-  class PassiveChannel < Channel
+  class PassiveChannel < Channel #:nodoc:
     def to_s
       "Passive " + super.to_s
     end

data/lib/parser/compiled_parser.rb CHANGED

@@ -1,6 +1,8 @@
 module Dhaka
+  # This is the superclass of all compiled Parsers. It is only used by generated code.
   class CompiledParser
+    private
     def self.inherited(compiled_parser)
       class << compiled_parser
         attr_accessor :states, :grammar, :start_state_id

data/lib/parser/item.rb CHANGED

@@ -1,6 +1,7 @@
 #!/usr/bin/env ruby
 module Dhaka
-  class Item
+  # Represents parser state items
+  class Item #:nodoc:
     attr_reader :production, :next_item_index, :lookaheadset
     def initialize(production, next_item_index)
       @production = production

data/lib/parser/parse_result.rb CHANGED

@@ -1,21 +1,26 @@
 module Dhaka
+  # Returned on successful parsing of the input token stream.
   class ParseSuccessResult
-    attr_accessor :syntax_tree
-    def initialize(syntax_tree)
+    # Contains the parse result.
+    attr_accessor :syntax_tree
+    def initialize(syntax_tree) #:nodoc:
       @syntax_tree = syntax_tree
     end
-    def has_error?
+    # This is false.
+    def has_error?
       false
     end
   end
+  # Returned on unsuccessful parsing of the input token stream.
   class ParseErrorResult
-    attr_reader :bad_token_index
-    def initialize(bad_token_index)
+    # The index of the token that caused the parse error.
+    attr_reader :bad_token_index
+    def initialize(bad_token_index) #:nodoc:
       @bad_token_index = bad_token_index
     end
-    def has_error?
+    # This is true.
+    def has_error?
       true
     end
   end

data/lib/parser/parse_tree.rb CHANGED

@@ -1,20 +1,18 @@
 module Dhaka
-  class ParseTreeCompositeNode
+  # These are composite nodes of the syntax tree returned by the successful parsing of a token stream.
+  class ParseTreeCompositeNode
     attr_reader :production, :child_nodes
-    def initialize(production)
+    def initialize(production) #:nodoc:
       @production = production
       @child_nodes = []
     end
-    def linearize
+    def linearize #:nodoc:
       child_nodes.collect {|child_node| child_node.linearize}.flatten + [production.name]
     end
-    def to_s
+    def to_s #:nodoc:
       "CompositeNode: #{production.symbol} --> [#{child_nodes.join(", ")}]"
     end
-    def dot_name
-      "Node#{object_id}"
-    end
+    # Returns the dot representation of the syntax tree.
     def to_dot
       result = []
       result << ["digraph x {", "node [fontsize=\"10\" shape=box size=\"5\"]"] if head_node?
@@ -28,31 +26,39 @@ module Dhaka
       result.join("\n")
     end
-    def head_node?
+    def head_node? #:nodoc:
       production.symbol.name == START_SYMBOL_NAME
     end
+    def dot_name #:nodoc:
+      "Node#{object_id}"
+    end
   end
+  # These are leaf nodes of syntax trees. They contain tokens.
   class ParseTreeLeafNode
     attr_reader :token
-    def initialize(token)
+    def initialize(token) #:nodoc:
       @token = token
     end
-    def linearize
+    def linearize #:nodoc:
       []
     end
-    def to_s
+    def to_s #:nodoc:
       "LeafNode: #{token}"
     end
-    def dot_name
-      "Node#{object_id}"
-    end
+    # Returns the dot representation of this node.
     def to_dot
       label = "#{token}#{' : '+token.value.to_s if token.value}"
       "#{dot_name} [label=\"#{label}\"]"
     end
-    def head_node?
+    def head_node? #:nodoc:
       false
     end
+    def dot_name #:nodoc:
+      "Node#{object_id}"
+    end
   end
 end

data/lib/parser/parser.rb CHANGED

@@ -7,6 +7,9 @@ module Dhaka
     include ParserMethods
     attr_reader :grammar, :start_state
+    # Creates a new parser from the given grammar. Messages are logged by default to STDOUT
+    # and the log level is WARN. Shift-reduce conflicts are reported at WARN and reduce-reduce conflicts
+    # at ERROR. You may pass in your own logger. Logging at DEBUG shows a lot of progress output.
     def initialize(grammar, logger = nil)
       if logger
         @logger = logger
@@ -34,20 +37,7 @@ module Dhaka
       initialize_states
     end
-    def initialize_states
-      start_productions = @grammar.productions_for_symbol(@grammar.start_symbol)
-      raise NoStartProductionsError.new(@grammar) if start_productions.empty?
-      start_items = ItemSet.new(start_productions.collect {|production| Item.new(production, 0)})
-      start_items.each {|start_item| start_item.lookaheadset << @grammar.end_symbol}
-      @start_state = @states[start_items]
-      @logger.debug("Pumping #{@channels.size} channels...")
-      pump_channels
-      @logger.debug("Generating shift actions...")
-      generate_shift_actions
-      @logger.debug("Generating reduce actions...")
-      generate_reduce_actions
-    end
+    # Returns the Ruby source of the generated parser compiled as +parser_class_name+. This can be written out to a file.
     def compile_to_ruby_source_as parser_class_name
       result = "class #{parser_class_name} < Dhaka::CompiledParser\n\n"
       result << "  self.grammar = #{@grammar.name}\n\n"
@@ -59,6 +49,9 @@ module Dhaka
       result
     end
+    # Returns the dot representation of the parser. If <tt>:hide_lookaheads</tt> is set to true in the
+    # options hash, lookaheads are not written out to the parser states, which is helpful when there are dozens
+    # of lookahead symbols for every item in every state.
     def to_dot(options = {})
       result = ["digraph x {", "node [fontsize=\"10\" shape=box size=\"5\"]"]
       result += states.collect { |state| state.to_dot(options) }
@@ -70,10 +63,28 @@ module Dhaka
       result << ['}']
       result.join("\n")
     end
+    private :start_state
+    private
     def states
       @states.values
     end
+    def initialize_states
+      start_productions = @grammar.productions_for_symbol(@grammar.start_symbol)
+      raise NoStartProductionsError.new(@grammar) if start_productions.empty?
+      start_items = ItemSet.new(start_productions.collect {|production| Item.new(production, 0)})
+      start_items.each {|start_item| start_item.lookaheadset << @grammar.end_symbol}
+      @start_state = @states[start_items]
+      @logger.debug("Pumping #{@channels.size} channels...")
+      pump_channels
+      @logger.debug("Generating shift actions...")
+      generate_shift_actions
+      @logger.debug("Generating reduce actions...")
+      generate_reduce_actions
+    end
     def generate_shift_actions
       @states.values.each do |state|
         @transitions[state].keys.each { |symbol|
@@ -157,21 +168,12 @@ module Dhaka
   end
-  class ParserReduceReduceConflictError < StandardError
-    def initialize(message)
-      @message = message
-    end
-    def to_s
-      @message
-    end
-  end
+  # Raised when trying to create a Parser for a grammar that has no productions for the start symbol
   class NoStartProductionsError < StandardError
-    def initialize(grammar)
+    def initialize(grammar) #:nodoc:
       @grammar = grammar
     end
-    def to_s
+    def to_s #:nodoc:
       "No start productions defined for #{@grammar.name}"
     end
   end

data/lib/parser/parser_methods.rb CHANGED

@@ -1,6 +1,8 @@
 #!/usr/bin/env ruby
 module Dhaka
+  # This module is included both in Parser and CompiledParser.
   module ParserMethods
+    # +token_stream+ is an Enumerable of Token-s. Returns either a ParseSuccessResult or a ParseErrorResult.
     def parse token_stream
       parser_run = ParserRun.new(grammar, start_state, token_stream)
       parser_run.run

data/lib/parser/parser_run.rb CHANGED

@@ -1,5 +1,5 @@
 module Dhaka
-  class ParserRun
+  class ParserRun #:nodoc:
     def initialize(grammar, start_state, token_stream)
       @grammar = grammar

data/lib/parser/parser_state.rb CHANGED

@@ -1,7 +1,7 @@
 #!/usr/bin/env ruby
 require 'set'
 module Dhaka
-  class ParserState
+  class ParserState #:nodoc:
     attr_accessor :items, :actions, :id
@@ -55,7 +55,7 @@ module Dhaka
   end
-  class ItemSet < Set
+  class ItemSet < Set #:nodoc:
     def hash
       self.collect{|item| item.hash}.inject{|result, hashcode| result ^ hashcode}
     end

data/lib/parser/token.rb CHANGED

@@ -1,4 +1,6 @@
 module Dhaka
+  # Represents a portion of the input character stream that is mapped by the tokenizer
+  # to a symbol in the grammar.
   class Token
     attr_accessor :grammar_symbol, :value
     def initialize(grammar_symbol, value)

data/lib/tokenizer/tokenizer.rb CHANGED

@@ -1,4 +1,10 @@
 module Dhaka
+  # Reserved constant used to identify the idle state of the tokenizer.
+  TOKENIZER_IDLE_STATE = :idle_state
+  # Raised when the tokenizer encounters a character that has no corresponding action in
+  # its current state.
   class UnrecognizedInputCharacterException < StandardError
     attr_reader :input, :char_index
     def initialize(input, char_index)
@@ -10,6 +16,8 @@ module Dhaka
     end
   end
+  # A tokenizer state encapsulates actions that should be performed upon
+  # encountering each permissible character for that state.
   class TokenizerState
     attr_reader :actions
@@ -17,72 +25,83 @@ module Dhaka
       @actions = {}
     end
+    # Define the action (+blk+) to be performed when encountering any of +characters+ in the token stream.
     def for_characters(characters, &blk)
       characters.each do |character|
         actions[character] = blk
       end
     end
-    def for_character(character, &blk)
-      actions[character[0]] = blk
-    end
+    alias for_character for_characters
-    def to_s
+    def to_s #:nodoc:
       actions.inspect
     end
   end
+  # This class contains a DSL for specifying tokenizers. Subclass it to implement tokenizers for specific grammars.
+  # Subclasses of this class may not be further subclassed.
+  #
+  # Tokenizers are state machines that are specified pretty much by hand. Each state of a tokenizer is identified
+  # by a Ruby symbol. The constant Dhaka::TOKENIZER_IDLE_STATE is reserved for the idle state of the tokenizer (the one
+  # that it starts in).
   class Tokenizer
-    def self.inherited(tokenizer)
-      class << tokenizer
-        attr_accessor :states
-      end
-      tokenizer.states = Hash.new {|hash, key| hash[key] = TokenizerState.new}
-    end
+    # Define the action for the state named +state_name+.
     def self.for_state(state_name, &blk)
       states[state_name].instance_eval(&blk)
     end
+    # Tokenizes a string +input+ and returns an array of Token-s.
     def self.tokenize(input)
-      TokenizerRun.new(self, input).run
+      self.new(input).run
     end
-  end
-  class TokenizerRun
+    # A slot that can be used to accumulate characters when processing multi-character tokens.
     attr_accessor :accumulator
+    # The tokens shifted so far.
     attr_reader :tokens
-    def initialize(tokenizer, input)
-      @tokenizer = tokenizer
+    def initialize(input) #:nodoc:
       @input = input
-      @current_state = tokenizer.states[:idle_state]
+      @current_state = self.class.states[TOKENIZER_IDLE_STATE]
       @curr_char_index = 0
       @tokens = []
     end
-    def run
-      while curr_char
-        blk = @current_state.actions[curr_char]
-        raise UnrecognizedInputCharacterException.new(@input, @curr_char_index) unless blk
-        instance_eval(&blk)
-      end
-      tokens
-    end
+    # The character currently being processed.
     def curr_char
       @input[@curr_char_index] and @input[@curr_char_index].chr
     end
+    # Advance to the next character.
     def advance
       @curr_char_index += 1
     end
+    # Change the active state of the tokenizer to the state identified by the symbol +state_name+.
     def switch_to state_name
-      @current_state = @tokenizer.states[state_name]
+      @current_state = self.class.states[state_name]
+    end
+    def run #:nodoc:
+      while curr_char
+        blk = @current_state.actions[curr_char]
+        raise UnrecognizedInputCharacterException.new(@input, @curr_char_index) unless blk
+        instance_eval(&blk)
+      end
+      tokens
+    end
+    private
+    def self.inherited(tokenizer)
+      class << tokenizer
+        attr_accessor :states
+      end
+      tokenizer.states = Hash.new {|hash, key| hash[key] = TokenizerState.new}
     end
   end
 end

data/test/arithmetic_precedence_tokenizer.rb CHANGED

@@ -13,7 +13,7 @@ class ArithmeticPrecedenceTokenizer < Dhaka::Tokenizer
   all_characters = digits + parenths + operators + functions + arg_separator + whitespace
-  for_state :idle_state do
+  for_state Dhaka::TOKENIZER_IDLE_STATE do
     for_characters(all_characters - (digits + whitespace)) do
       tokens << Dhaka::Token.new(ArithmeticPrecedenceGrammar.symbol_for_name(curr_char), nil)
       advance
@@ -29,15 +29,19 @@ class ArithmeticPrecedenceTokenizer < Dhaka::Tokenizer
   for_state :get_integer_literal do
     for_characters all_characters - digits do
-      tokens << Dhaka::Token.new(ArithmeticPrecedenceGrammar.symbol_for_name('n'), accumulator.to_i)
-      switch_to :idle_state
+      tokens << integer_literal_token(accumulator.to_i)
+      switch_to Dhaka::TOKENIZER_IDLE_STATE
     end
     for_characters digits do
       self.accumulator += curr_char
       advance
-      tokens << Dhaka::Token.new(ArithmeticPrecedenceGrammar.symbol_for_name('n'), accumulator.to_i) unless curr_char
+      tokens << integer_literal_token(accumulator.to_i) unless curr_char
     end
   end
+  def integer_literal_token(value)
+    Dhaka::Token.new(ArithmeticPrecedenceGrammar.symbol_for_name('n'), value)
+  end
 end

data/test/arithmetic_tokenizer.rb CHANGED

@@ -13,7 +13,7 @@ class ArithmeticTokenizer < Dhaka::Tokenizer
   all_characters = digits + parenths + operators + functions + arg_separator + whitespace
-  for_state :idle_state do
+  for_state Dhaka::TOKENIZER_IDLE_STATE do
     for_characters(all_characters - (digits + whitespace)) do
       tokens << Dhaka::Token.new(ArithmeticGrammar.symbol_for_name(curr_char), nil)
       advance
@@ -29,15 +29,19 @@ class ArithmeticTokenizer < Dhaka::Tokenizer
   for_state :get_integer_literal do
     for_characters all_characters - digits do
-      tokens << Dhaka::Token.new(ArithmeticGrammar.symbol_for_name('n'), accumulator.to_i)
-      switch_to :idle_state
+      tokens << integer_literal_token(accumulator.to_i)
+      switch_to Dhaka::TOKENIZER_IDLE_STATE
     end
     for_characters digits do
       self.accumulator += curr_char
       advance
-      tokens << Dhaka::Token.new(ArithmeticGrammar.symbol_for_name('n'), accumulator.to_i) unless curr_char
+      tokens << integer_literal_token(accumulator.to_i) unless curr_char
     end
   end
+  def integer_literal_token(value)
+    Dhaka::Token.new(ArithmeticGrammar.symbol_for_name('n'), value)
+  end
 end

data/test/bracket_tokenizer.rb CHANGED

@@ -6,7 +6,7 @@ class BracketTokenizer < Dhaka::Tokenizer
   all_characters = ['(', '[', '{', 'B', '}', ']', ')']
-  for_state :idle_state do
+  for_state Dhaka::TOKENIZER_IDLE_STATE do
     for_characters(all_characters) do
       tokens << Dhaka::Token.new(BracketGrammar.symbol_for_name(curr_char), nil)
       advance

data/test/parser_test.rb CHANGED

@@ -42,12 +42,17 @@ class ParserTest < Test::Unit::TestCase
                           'E ::= E -> - T [)-]'])
     expected_states[10] = Set.new(['T ::= ( E ) -> [#)-]'])
     expected_states[11] = Set.new(['_Start_ ::= S # -> [_End_]'])
-    actual_states = Set.new(@parser.states.collect {|state| Set.new(state.items.values.collect {|item| item.to_s})})
+    actual_states = Set.new(@parser.send('states').collect {|state| Set.new(state.items.values.collect {|item| item.to_s})})
     #write_parser(@parser)
     expected_states.values.each do |state|
       assert set_finder(state, actual_states), "expected #{state.to_a}"
     end
   end
+  def test_parser_can_be_exported_to_dot_format
+    dot_representation = @parser.to_dot
+  end
   def build_tokens(token_symbol_names, grammar)
     token_symbol_names.collect {|symbol_name| Dhaka::Token.new(grammar.symbol_for_name(symbol_name), nil)}
   end
@@ -71,6 +76,11 @@ class ParserTest < Test::Unit::TestCase
        "start"], syntax_tree.linearize
   end
+  def test_syntax_trees_can_be_exported_to_dot_format
+    syntax_tree = @parser.parse(build_tokens(['(','n','-','(','n','-','n',')',')','-','n','#'], @grammar)).syntax_tree
+    syntax_tree.to_dot
+  end
   def get_linearized_parse_result(input, parser)
     parser.parse(build_tokens(input, parser.grammar)).syntax_tree.linearize
   end

metadata CHANGED

@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
 specification_version: 1
 name: dhaka
 version: !ruby/object:Gem::Version
-  version: 0.0.5
-date: 2006-12-15 00:00:00 -05:00
+  version: 0.0.6
+date: 2007-01-07 00:00:00 -05:00
 summary: An LALR1 parser generator written in Ruby
 require_paths:
 - lib
@@ -15,7 +15,7 @@ description:
 autorequire: dhaka
 default_executable:
 bindir: bin
-has_rdoc: false
+has_rdoc: true
 required_ruby_version: !ruby/object:Gem::Version::Requirement
   requirements:
   - - ">"