RubyGems - foreverman-dhaka - Versions diffs - 2.2.1 - Mend

foreverman-dhaka 2.2.1

Files changed (84) hide show

data/Rakefile +64 -0
data/lib/dhaka.rb +62 -0
data/lib/dhaka/dot/dot.rb +29 -0
data/lib/dhaka/evaluator/evaluator.rb +133 -0
data/lib/dhaka/grammar/closure_hash.rb +15 -0
data/lib/dhaka/grammar/grammar.rb +236 -0
data/lib/dhaka/grammar/grammar_symbol.rb +27 -0
data/lib/dhaka/grammar/precedence.rb +19 -0
data/lib/dhaka/grammar/production.rb +36 -0
data/lib/dhaka/lexer/accept_actions.rb +36 -0
data/lib/dhaka/lexer/alphabet.rb +21 -0
data/lib/dhaka/lexer/compiled_lexer.rb +46 -0
data/lib/dhaka/lexer/dfa.rb +121 -0
data/lib/dhaka/lexer/lexeme.rb +32 -0
data/lib/dhaka/lexer/lexer.rb +70 -0
data/lib/dhaka/lexer/lexer_run.rb +78 -0
data/lib/dhaka/lexer/regex_grammar.rb +393 -0
data/lib/dhaka/lexer/regex_parser.rb +2010 -0
data/lib/dhaka/lexer/regex_tokenizer.rb +14 -0
data/lib/dhaka/lexer/specification.rb +96 -0
data/lib/dhaka/lexer/state.rb +68 -0
data/lib/dhaka/lexer/state_machine.rb +37 -0
data/lib/dhaka/parser/action.rb +55 -0
data/lib/dhaka/parser/channel.rb +58 -0
data/lib/dhaka/parser/compiled_parser.rb +51 -0
data/lib/dhaka/parser/conflict.rb +54 -0
data/lib/dhaka/parser/item.rb +43 -0
data/lib/dhaka/parser/parse_result.rb +50 -0
data/lib/dhaka/parser/parse_tree.rb +66 -0
data/lib/dhaka/parser/parser.rb +165 -0
data/lib/dhaka/parser/parser_methods.rb +11 -0
data/lib/dhaka/parser/parser_run.rb +39 -0
data/lib/dhaka/parser/parser_state.rb +74 -0
data/lib/dhaka/parser/token.rb +22 -0
data/lib/dhaka/runtime.rb +51 -0
data/lib/dhaka/tokenizer/tokenizer.rb +190 -0
data/test/all_tests.rb +5 -0
data/test/arithmetic/arithmetic_evaluator.rb +64 -0
data/test/arithmetic/arithmetic_evaluator_test.rb +43 -0
data/test/arithmetic/arithmetic_grammar.rb +41 -0
data/test/arithmetic/arithmetic_grammar_test.rb +9 -0
data/test/arithmetic/arithmetic_test_methods.rb +9 -0
data/test/arithmetic/arithmetic_tokenizer.rb +39 -0
data/test/arithmetic/arithmetic_tokenizer_test.rb +38 -0
data/test/arithmetic_precedence/arithmetic_precedence_evaluator.rb +43 -0
data/test/arithmetic_precedence/arithmetic_precedence_grammar.rb +24 -0
data/test/arithmetic_precedence/arithmetic_precedence_grammar_test.rb +30 -0
data/test/arithmetic_precedence/arithmetic_precedence_lexer_specification.rb +23 -0
data/test/arithmetic_precedence/arithmetic_precedence_parser_test.rb +33 -0
data/test/brackets/bracket_grammar.rb +23 -0
data/test/brackets/bracket_tokenizer.rb +22 -0
data/test/brackets/brackets_test.rb +28 -0
data/test/chittagong/chittagong_driver.rb +46 -0
data/test/chittagong/chittagong_driver_test.rb +276 -0
data/test/chittagong/chittagong_evaluator.rb +284 -0
data/test/chittagong/chittagong_evaluator_test.rb +38 -0
data/test/chittagong/chittagong_grammar.rb +104 -0
data/test/chittagong/chittagong_lexer.rb +109 -0
data/test/chittagong/chittagong_lexer_specification.rb +37 -0
data/test/chittagong/chittagong_lexer_test.rb +58 -0
data/test/chittagong/chittagong_parser.rb +879 -0
data/test/chittagong/chittagong_parser_test.rb +55 -0
data/test/chittagong/chittagong_test.rb +170 -0
data/test/core/another_lalr_but_not_slr_grammar.rb +20 -0
data/test/core/compiled_parser_test.rb +44 -0
data/test/core/dfa_test.rb +170 -0
data/test/core/evaluator_test.rb +22 -0
data/test/core/grammar_test.rb +83 -0
data/test/core/lalr_but_not_slr_grammar.rb +19 -0
data/test/core/lexer_test.rb +139 -0
data/test/core/malformed_grammar.rb +7 -0
data/test/core/malformed_grammar_test.rb +8 -0
data/test/core/nullable_grammar.rb +21 -0
data/test/core/parse_result_test.rb +44 -0
data/test/core/parser_state_test.rb +24 -0
data/test/core/parser_test.rb +131 -0
data/test/core/precedence_grammar.rb +17 -0
data/test/core/precedence_grammar_test.rb +9 -0
data/test/core/rr_conflict_grammar.rb +21 -0
data/test/core/simple_grammar.rb +22 -0
data/test/core/sr_conflict_grammar.rb +16 -0
data/test/dhaka_test_helper.rb +18 -0
data/test/fake_logger.rb +17 -0
metadata +137 -0

data/lib/dhaka/parser/parse_result.rb ADDED Viewed

@@ -0,0 +1,50 @@
+module Dhaka
+  # Returned on successful parsing of the input token stream.
+  class ParseSuccessResult < DelegateClass(ParseTreeCompositeNode)
+    # Contains the parse result.
+    attr_accessor :parse_tree
+    def initialize(parse_tree) #:nodoc:
+      super
+      @parse_tree = parse_tree
+    end
+    # This is false.
+    def has_error?
+      false
+    end
+    # Returns the dot representation of the parse tree
+    def to_dot
+      Dot::Digraph.new(:fontsize => 10, :shape => :box, :size => 5) do |g|
+        parse_tree.to_dot(g)
+      end.to_dot
+    end
+    # Deprecated. Use the +parse_tree+ accessor.
+    alias syntax_tree parse_tree
+  end
+  # Returned on unsuccessful parsing of the input token stream.
+  class ParseErrorResult
+    attr_reader :unexpected_token, :parser_state
+    def initialize(unexpected_token, parser_state) #:nodoc:
+      @unexpected_token = unexpected_token
+      @parser_state = parser_state
+    end
+    # This is true.
+    def has_error?
+      true
+    end
+    def inspect #:nodoc:
+      "<Dhaka::ParseErrorResult unexpected_token=#{unexpected_token.inspect}>"
+    end
+  end
+end

data/lib/dhaka/parser/parse_tree.rb ADDED Viewed

@@ -0,0 +1,66 @@
+module Dhaka
+  # These are composite nodes of the syntax tree returned by the successful parsing of a token stream.
+  class ParseTreeCompositeNode
+    attr_reader :production, :child_nodes
+    def initialize(production) #:nodoc:
+      @production  = production
+      @child_nodes = []
+    end
+    def linearize #:nodoc:
+      child_nodes.collect {|child_node| child_node.linearize}.flatten + [self]
+    end
+    def tokens
+      child_nodes.collect{|child_node| child_node.tokens}.flatten
+    end
+    def to_s #:nodoc:
+      "CompositeNode: #{production.symbol} --> [#{child_nodes.join(", ")}]"
+    end
+    # Returns the dot representation of this node.
+    def to_dot graph
+      graph.node(self, :label => production)
+      child_nodes.each do |child|
+        graph.edge(self, child)
+        child.to_dot(graph)
+      end
+    end
+    def head_node? #:nodoc:
+      production.symbol.name == START_SYMBOL_NAME
+    end
+  end
+  # These are leaf nodes of syntax trees. They contain tokens.
+  class ParseTreeLeafNode
+    attr_reader :token
+    def initialize(token) #:nodoc:
+      @token = token
+    end
+    def linearize #:nodoc:
+      []
+    end
+    def tokens
+      [token]
+    end
+    def to_s #:nodoc:
+      "LeafNode: #{token}"
+    end
+    # Returns the dot representation of this node.
+    def to_dot(graph)
+      graph.node(self, :label => token)
+    end
+    def head_node? #:nodoc:
+      false
+    end
+  end
+end

data/lib/dhaka/parser/parser.rb ADDED Viewed

@@ -0,0 +1,165 @@
+module Dhaka
+  # The parser generator. To generate a parser from a grammar specification +ArithmeticPrecedenceGrammar+, one would
+  # write:
+  #   parser = Dhaka::Parser.new(ArithmeticPrecedenceGrammar)
+  #
+  # To compile this parser to Ruby source as +ArithmeticPrecedenceParser+:
+  #   parser.compile_to_ruby_source_as(:ArithmeticPrecedenceParser)
+  # which returns a string of Ruby code.
+  class Parser
+    include ParserMethods
+    attr_reader :grammar
+    # Creates a new parser from the given grammar. Messages are logged by default to STDOUT
+    # and the log level is WARN. Shift-reduce conflicts are reported at WARN and reduce-reduce conflicts
+    # at ERROR. You may pass in your own logger. Logging at DEBUG shows a lot of progress output.
+    def initialize(grammar, logger = nil)
+      @shift_actions  = Hash.new {|hash, state| hash[state] = ShiftAction.new(state)}
+      @reduce_actions = Hash.new {|hash, production| hash[production] = ReduceAction.new(production)}
+      @logger         = logger || default_logger
+      @transitions    = Hash.new {|hash, state| hash[state] = {}}
+      @grammar        = grammar
+      @channels       = Hash.new {|hash, start_item| hash[start_item] = []}
+      @states = Hash.new do |hash, kernel|
+          closure, channels = grammar.closure(kernel)
+          channels.each do |start_item, channel_set|
+            @channels[start_item].concat channel_set.to_a
+          end
+          new_state    = ParserState.new(self, closure)
+          hash[kernel] = new_state
+          @logger.debug("Created #{new_state.unique_name}.")
+          new_state.transition_items.each do |symbol, items|
+            destination_kernel = ItemSet.new(items.collect{|item| item.next_item})
+            destination_state  = hash[destination_kernel]
+            items.each {|item| @channels[item] << grammar.passive_channel(item, destination_state.items[item.next_item])}
+            @transitions[new_state][symbol] = destination_state
+          end
+          new_state
+      end
+      initialize_states
+    end
+    # Returns the Ruby source of the generated parser compiled as +parser_class_name+. This can be written out to a file.
+    def compile_to_ruby_source_as parser_class_name
+      result = "class #{parser_class_name} < Dhaka::CompiledParser\n\n"
+      result << "  self.grammar = #{grammar.name}\n\n"
+      result << "  start_with #{start_state.id}\n\n"
+      states.each do |state|
+        result << "#{state.compile_to_ruby_source}\n\n"
+      end
+      result << "end"
+      result
+    end
+    # Returns the dot representation of the parser. If <tt>:hide_lookaheads</tt> is set to true in the
+    # options hash, lookaheads are not written out to the parser states, which is helpful when there are dozens
+    # of lookahead symbols for every item in every state.
+    def to_dot(options = {})
+      Dot::Digraph.new(:fontsize => 10, :shape => :box, :size => 5) do |g|
+        states.each do |state|
+          g.node(state, :label => state.items.values.collect{|item| item.to_s(options)}.join("\n"))
+          @transitions[state].each do |symbol, dest_state|
+            g.edge(state, dest_state, :label => symbol.name)
+          end
+        end
+      end.to_dot
+    end
+    def inspect
+      "<Dhaka::Parser grammar : #{grammar}>"
+    end
+    private
+      attr_reader :start_state
+    def states
+      @states.values
+    end
+    def default_logger
+      logger           = Logger.new(STDOUT)
+      logger.level     = Logger::WARN
+      logger.formatter = ParserLogOutputFormatter.new
+      logger
+    end
+    def initialize_states
+      start_productions = grammar.productions_for_symbol(grammar.start_symbol)
+      raise NoStartProductionsError.new(grammar) if start_productions.empty?
+      start_items = ItemSet.new(start_productions.collect {|production| Item.new(production, 0)})
+      start_items.each {|start_item| start_item.lookaheadset << grammar.end_symbol}
+      @start_state = @states[start_items]
+      @logger.debug("Pumping #{@channels.keys.size} dirty items...")
+      pump_channels @channels.keys
+      @logger.debug("Generating shift actions...")
+      generate_shift_actions
+      @logger.debug("Generating reduce actions...")
+      generate_reduce_actions
+    end
+    def generate_shift_actions
+      @states.values.each do |state|
+        @transitions[state].keys.each do |symbol|
+          state.actions[symbol.name] = @shift_actions[@transitions[state][symbol]]
+        end
+      end
+    end
+    def generate_reduce_actions
+      @states.values.each do |state|
+        state.items.values.select{ |item| !item.next_symbol }.each do |item|
+          create_reduction_actions_for_item_and_state item, state
+        end
+      end
+    end
+    def create_reduction_actions_for_item_and_state item, state
+      item.lookaheadset.each do |lookahead|
+        new_action = @reduce_actions[item.production]
+        if existing_action = state.actions[lookahead.name]
+          if ReduceAction === existing_action
+            message = ReduceReduceConflict.new(state, lookahead, new_action).resolve
+            @logger.error(message)
+          else
+            message = ShiftReduceConflict.new(state, lookahead, new_action).resolve
+            @logger.warn(message)
+          end
+        else
+          state.actions[lookahead.name] = new_action
+        end
+      end
+    end
+    def pump_channels dirty_items
+      loop do
+        new_dirty_items = Set.new
+        dirty_items.each do |dirty_item|
+          @channels[dirty_item].each do |channel|
+            new_dirty_items << channel.end_item if channel.pump
+          end
+        end
+        break if new_dirty_items.empty?
+        @logger.debug("#{new_dirty_items.size} dirty items...")
+        dirty_items = new_dirty_items
+      end
+    end
+  end
+  # Raised when trying to create a Parser for a grammar that has no productions for the start symbol
+  class NoStartProductionsError < StandardError
+    def initialize(grammar) #:nodoc:
+      @grammar = grammar
+    end
+    def to_s #:nodoc:
+      "No start productions defined for #{@grammar.name}"
+    end
+  end
+  class ParserLogOutputFormatter < Logger::Formatter #:nodoc:
+    def call(severity, time, progname, msg)
+      "\n%s -- %s: %s\n" % [ severity, progname, msg2str(msg)]
+    end
+  end
+end

data/lib/dhaka/parser/parser_methods.rb ADDED Viewed

@@ -0,0 +1,11 @@
+#!/usr/bin/env ruby
+module Dhaka
+  # This module is included both in Parser and CompiledParser.
+  module ParserMethods
+    # +token_stream+ is an Enumerable of Token-s. Returns either a ParseSuccessResult or a ParseErrorResult.
+    def parse token_stream
+      parser_run = ParserRun.new(grammar, start_state, token_stream)
+      parser_run.run
+    end
+  end
+end

data/lib/dhaka/parser/parser_run.rb ADDED Viewed

@@ -0,0 +1,39 @@
+module Dhaka
+  class ParserRun #:nodoc:
+    def initialize(grammar, start_state, token_stream)
+      @grammar      = grammar
+      @node_stack   = []
+      @state_stack  = [start_state]
+      @token_stream = token_stream
+      @symbol_queue = []
+    end
+    def run
+      tokenize_result = token_stream.each do |token|
+        @current_token = token
+        @symbol_queue << @current_token.symbol_name
+        error = execute_actions
+        return error if error
+        node_stack << ParseTreeLeafNode.new(@current_token)
+        state_stack.last
+      end
+      return tokenize_result if TokenizerErrorResult === tokenize_result
+      ParseSuccessResult.new(node_stack.first) if node_stack.first.head_node?
+    end
+    private
+    attr_reader :state_stack, :token_stream, :node_stack
+    def execute_actions
+      while symbol_name = @symbol_queue.pop
+        action = state_stack.last.actions[symbol_name]
+        return ParseErrorResult.new(@current_token, state_stack.last) unless action
+        instance_eval(&action.action_code)
+      end
+      nil
+    end
+  end
+end

data/lib/dhaka/parser/parser_state.rb ADDED Viewed

@@ -0,0 +1,74 @@
+module Dhaka
+  class ParserState #:nodoc:
+    attr_accessor :items, :actions, :id
+    @@state_id = 0
+    def self.next_state_id
+      result      = @@state_id
+      @@state_id += 1
+      result
+    end
+    def initialize(parser, items, id=nil)
+      @parser  = parser
+      @items   = items
+      @actions = {}
+      @id      = id || ParserState.next_state_id
+    end
+    def transition_items
+      result = Hash.new {|h, k| h[k] = ItemSet.new()}
+      items.values.each do |item|
+        result[item.next_symbol] << item if item.next_symbol
+      end
+      result
+    end
+    def unique_name
+      "State#{id}"
+    end
+    def compile_to_ruby_source
+      result = "  at_state(#{id}) {\n"
+      symbol_names_by_action = Hash.new {|hash, key| hash[key] = []}
+      actions.each do |symbol_name, action|
+        symbol_names_by_action[action] << symbol_name
+      end
+      symbol_names_by_action.keys.each do |action|
+        symbol_names = symbol_names_by_action[action].collect {|symbol_name| "#{symbol_name.inspect}"}.join(', ')
+        result << "    for_symbols(#{symbol_names}) { #{action.compile_to_ruby_source} }\n"
+      end
+      result << "  }"
+      result
+    end
+    def for_symbols *symbol_names, &blk
+      symbol_names.each do |symbol_name|
+        actions[symbol_name] = @parser.instance_eval(&blk)
+      end
+    end
+    alias :for_symbol :for_symbols
+    def to_s(options = {})
+      items.values.collect{|item| item.to_s(options)}.join("\n")
+    end
+  end
+  class ItemSet < Set #:nodoc:
+    def hash
+      result = 5381
+      each { |item| result ^= item.hash }
+      result
+    end
+    def eql? other
+      self == other
+    end
+  end
+end

data/lib/dhaka/parser/token.rb ADDED Viewed

@@ -0,0 +1,22 @@
+module Dhaka
+  # Represents a portion of the input character stream that is mapped by the tokenizer
+  # to a symbol in the grammar. The attribute +input_position+ contains the start index position of the original
+  # string input that this token came from. It can be used to report errors by indicating the specific portion
+  # of the input where the error occurred.
+  class Token
+    attr_accessor :symbol_name, :value, :input_position
+    def initialize(symbol_name, value, input_position)
+      @symbol_name    = symbol_name
+      @value          = value
+      @input_position = input_position
+    end
+    def to_s #:nodoc:
+      value ? "#{symbol_name} : #{value}" : "#{symbol_name}"
+    end
+    def == other
+      symbol_name == other.symbol_name && value == other.value
+    end
+  end
+end

data/lib/dhaka/runtime.rb ADDED Viewed

@@ -0,0 +1,51 @@
+#--
+# Copyright (c) 2006, 2007 Mushfeq Khan
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#++
+require 'set'
+require 'logger'
+require 'delegate'
+%w[
+grammar/grammar_symbol
+grammar/production
+grammar/grammar
+grammar/precedence
+parser/parse_tree
+parser/parse_result
+parser/parser_methods
+parser/parser_state
+parser/token
+parser/action
+parser/parser_run
+parser/compiled_parser
+tokenizer/tokenizer
+evaluator/evaluator
+lexer/accept_actions
+lexer/alphabet
+lexer/state_machine
+lexer/state
+lexer/specification
+lexer/lexeme
+lexer/lexer_run
+lexer/compiled_lexer
+].each {|path| require File.join(File.dirname(__FILE__), path)}