RubyGems - dhaka - Versions diffs - 0.0.1 - Mend

dhaka 0.0.1

Files changed (43) hide show

data/lib/dhaka.rb +44 -0
data/lib/evaluator/evaluator.rb +70 -0
data/lib/grammar/closure_hash.rb +13 -0
data/lib/grammar/grammar.rb +129 -0
data/lib/grammar/grammar_symbol.rb +19 -0
data/lib/grammar/production.rb +14 -0
data/lib/parser/action.rb +51 -0
data/lib/parser/channel.rb +51 -0
data/lib/parser/compiled_parser.rb +35 -0
data/lib/parser/item.rb +37 -0
data/lib/parser/parse_result.rb +26 -0
data/lib/parser/parse_tree.rb +34 -0
data/lib/parser/parser.rb +125 -0
data/lib/parser/parser_methods.rb +10 -0
data/lib/parser/parser_run.rb +35 -0
data/lib/parser/parser_state.rb +66 -0
data/lib/parser/token.rb +15 -0
data/lib/tokenizer/tokenizer.rb +88 -0
data/test/all_tests.rb +11 -0
data/test/arithmetic_evaluator.rb +70 -0
data/test/arithmetic_evaluator_test.rb +55 -0
data/test/arithmetic_grammar.rb +38 -0
data/test/arithmetic_grammar_test.rb +11 -0
data/test/arithmetic_test_methods.rb +11 -0
data/test/arithmetic_tokenizer.rb +43 -0
data/test/arithmetic_tokenizer_test.rb +32 -0
data/test/bracket_grammar.rb +25 -0
data/test/bracket_tokenizer.rb +17 -0
data/test/brackets_test.rb +20 -0
data/test/compiled_arithmetic_parser.rb +252 -0
data/test/compiled_parser_test.rb +71 -0
data/test/evaluator_test.rb +8 -0
data/test/grammar_test.rb +70 -0
data/test/incomplete_arithmetic_evaluator.rb +60 -0
data/test/lalr_but_not_slr_grammar.rb +17 -0
data/test/malformed_grammar.rb +9 -0
data/test/malformed_grammar_test.rb +9 -0
data/test/nullable_grammar.rb +18 -0
data/test/parser_test.rb +168 -0
data/test/rr_conflict_grammar.rb +23 -0
data/test/simple_grammar.rb +24 -0
data/test/sr_conflict_grammar.rb +16 -0
metadata +87 -0

@@ -0,0 +1,26 @@
+module Dhaka
+  class ParseSuccessResult
+    attr_accessor :syntax_tree
+    def initialize(syntax_tree)
+      @syntax_tree = syntax_tree
+    end
+    def has_error?
+      false
+    end
+  end
+  class ParseErrorResult
+    attr_reader :bad_token_index
+    def initialize(bad_token_index)
+      @bad_token_index = bad_token_index
+    end
+    def has_error?
+      true
+    end
+  end
+end

data/lib/parser/parse_tree.rb ADDED

@@ -0,0 +1,34 @@
+module Dhaka
+  class ParseTreeCompositeNode
+    attr_reader :production, :child_nodes
+    def initialize(production)
+      @production = production
+      @child_nodes = []
+    end
+    def linearize
+      child_nodes.collect {|child_node| child_node.linearize}.flatten + [production.name]
+    end
+    def to_s
+      "CompositeNode: #{production.symbol} --> [#{child_nodes.join(", ")}]"
+    end
+    def head_node?
+      production.symbol.name == START_SYMBOL_NAME
+    end
+  end
+  class ParseTreeLeafNode
+    attr_reader :token
+    def initialize(token)
+      @token = token
+    end
+    def linearize
+      []
+    end
+    def to_s
+      "LeafNode: #{token}"
+    end
+    def head_node?
+      false
+    end
+  end
+end

data/lib/parser/parser.rb ADDED

@@ -0,0 +1,125 @@
+#!/usr/bin/env ruby
+require 'set'
+module Dhaka
+  class Parser
+    include ParserMethods
+    attr_reader :grammar, :start_state
+    def initialize(grammar)
+      @transitions = Hash.new {|hash, state| hash[state] = {}}
+      @grammar = grammar
+      @channels = []
+      @states = Hash.new do |hash, kernel|
+          channels, closure = @grammar.closure(kernel)
+          @channels += channels.to_a
+          new_state = ParserState.new(self, closure)
+          hash[kernel] = new_state
+          new_state.transition_items.each do |symbol, items|
+            destination_kernel = ItemSet.new(items.collect{|item| item.next_item})
+            destination_state = hash[destination_kernel]
+            items.each { |item| @channels << @grammar.passive_channel(item, destination_state.items[item.next_item]) }
+            @transitions[new_state][symbol] = destination_state
+          end
+          new_state
+      end
+      initialize_states
+    end
+    def initialize_states
+      start_productions = @grammar.productions_for_symbol(@grammar.start_symbol)
+      raise NoStartProductionsError.new(@grammar) if start_productions.empty?
+      start_items = ItemSet.new(start_productions.collect {|production| Item.new(production, 0)})
+      start_items.each {|start_item| start_item.lookaheadset << @grammar.end_symbol}
+      @start_state = @states[start_items]
+      pump_channels
+      generate_shift_actions
+      generate_reduce_actions
+    end
+    def compile_to_ruby_source_as parser_class_name
+      result = "class #{parser_class_name} < Dhaka::CompiledParser\n\n"
+      result << "  self.grammar = #{@grammar.name}\n\n"
+      result << "  start_with #{start_state.id}\n\n"
+      states.each do |state|
+        result << "#{state.compile_to_ruby_source}\n\n"
+      end
+      result << "end"
+      result
+    end
+    def to_dot
+      result = ["digraph x {", "node [fontsize=\"10\" shape=box size=\"5\"]"]
+      result += states.collect { |state| state.to_dot }
+      states.each { |state|
+          @transitions[state].each { |symbol, dest_state|
+              result << "#{state.dot_name} -> #{dest_state.dot_name} [label=\"#{symbol.name}\"]"
+             }
+         }
+      result << ['}']
+      result.join("\n")
+    end
+    def states
+      @states.values
+    end
+    def generate_shift_actions
+      @states.values.each do |state|
+        @transitions[state].keys.each { |symbol|
+            state.actions[symbol.name] = ShiftAction.new(@transitions[state][symbol])
+           }
+      end
+    end
+    def generate_reduce_actions
+      @states.values.each do |state|
+        state.items.values.select{ |item| !item.next_symbol }.each do |item|
+          create_reduction_actions_for_item_and_state item, state
+        end
+      end
+    end
+    def create_reduction_actions_for_item_and_state item, state
+      item.lookaheadset.each do |lookahead|
+        existing_action = state.actions[lookahead.name]
+        new_action = ReduceAction.new(item.production)
+        if existing_action
+          raise ParserConflictError.new(state, existing_action, new_action)
+        else
+          state.actions[lookahead.name] = new_action
+        end
+      end
+    end
+    def pump_channels
+      while true
+        break unless @channels.inject(false) do |pumped, channel|
+          pumped || channel.pump
+        end
+      end
+    end
+  end
+  class ParserConflictError < StandardError
+    def initialize(state, existing_action, new_action)
+      @state = state
+      @existing_action = existing_action
+      @new_action = new_action
+    end
+    def to_s
+      "Conflict in state #{@state}\n Existing: #{@existing_action}\n New: #{@new_action}"
+    end
+  end
+  class NoStartProductionsError < StandardError
+    def initialize(grammar)
+      @grammar = grammar
+    end
+    def to_s
+      "No start productions defined for #{@grammar.name}"
+    end
+  end
+end

data/lib/parser/parser_methods.rb ADDED

@@ -0,0 +1,10 @@
+#!/usr/bin/env ruby
+module Dhaka
+  module ParserMethods
+    def parse token_stream
+      return nil if token_stream.empty?
+      parser_run = ParserRun.new(grammar, start_state, token_stream+[Token.new(@grammar.end_symbol, nil)])
+      parser_run.run
+    end
+  end
+end

data/lib/parser/parser_run.rb ADDED

@@ -0,0 +1,35 @@
+module Dhaka
+  class ParserRun
+    attr_reader :state_stack, :token_stream, :node_stack
+    def initialize(grammar, start_state, token_stream)
+      @grammar = grammar
+      @node_stack = []
+      @state_stack = [start_state]
+      @token_stream = token_stream
+      @current_token_index = 0
+    end
+    def current_token
+      @token_stream[@current_token_index]
+    end
+    def advance
+      node_stack << ParseTreeLeafNode.new(current_token)
+      @current_token_index += 1
+    end
+    def run
+      while current_token
+        error = execute_action current_token.grammar_symbol.name
+        return error if error
+        self.advance
+      end
+      ParseSuccessResult.new(node_stack[0])
+    end
+    def execute_action symbol_name
+      action = state_stack[-1].actions[symbol_name]
+      return ParseErrorResult.new(@current_token_index) unless action
+      self.instance_eval(&action.action_code).each do |symbol_name|
+        execute_action symbol_name
+      end
+      nil
+    end
+  end
+end

data/lib/parser/parser_state.rb ADDED

@@ -0,0 +1,66 @@
+#!/usr/bin/env ruby
+require 'set'
+module Dhaka
+  class ParserState
+    attr_accessor :items, :actions, :id
+    @@state_id = 0
+    def self.next_state_id
+      result = @@state_id
+      @@state_id += 1
+      result
+    end
+    def initialize(parser, items, id=nil)
+      @parser = parser
+      @items = items
+      @actions = {}
+      @id =  id ? id : ParserState.next_state_id
+    end
+    def transition_items
+      result = Hash.new {|h, k| h[k] = ItemSet.new()}
+      for item in @items.values
+        (result[item.next_symbol] << item) if item.next_symbol
+      end
+      result
+    end
+    def dot_name
+      self.to_s
+    end
+    def to_dot
+      label = self.items.values.join('\n')
+      "#{dot_name} [label=\"#{label}\"]"
+    end
+    def compile_to_ruby_source
+      result = "  at_state(#{@id}) {\n"
+      actions.each do |symbol_name, action|
+        result << "    for_symbol('#{symbol_name}') { #{action.compile_to_ruby_source} }\n"
+      end
+      result << "  }"
+      result
+    end
+    def for_symbol symbol_name, &blk
+      actions[symbol_name] = @parser.instance_eval(&blk)
+    end
+    def to_s
+      "State#{id}"
+    end
+  end
+  class ItemSet < Set
+    def hash
+      self.collect{|item| item.hash}.inject{|result, hashcode| result ^ hashcode}
+    end
+    def eql? other
+      self == other
+    end
+  end
+end

data/lib/parser/token.rb ADDED

@@ -0,0 +1,15 @@
+module Dhaka
+  class Token
+    attr_accessor :grammar_symbol, :value
+    def initialize(grammar_symbol, value)
+      @grammar_symbol = grammar_symbol
+      @value = value
+    end
+    def to_s
+      "#{@grammar_symbol.name}"
+    end
+    def == other
+      (grammar_symbol == other.grammar_symbol) && (value == other.value)
+    end
+  end
+end

data/lib/tokenizer/tokenizer.rb ADDED

@@ -0,0 +1,88 @@
+module Dhaka
+  class UnrecognizedInputCharacterException < StandardError
+    attr_reader :input, :char_index
+    def initialize(input, char_index)
+      @input = input
+      @char_index = char_index
+    end
+    def to_s
+      "Unrecognized character #{input[char_index].chr} encountered while tokenizing:\n #{input}"
+    end
+  end
+  class TokenizerState
+    attr_reader :actions
+    def initialize
+      @actions = {}
+    end
+    def for_characters(characters, &blk)
+      characters.each do |character|
+        actions[character] = blk
+      end
+    end
+    def for_character(character, &blk)
+      actions[character[0]] = blk
+    end
+    def to_s
+      actions.inspect
+    end
+  end
+  class Tokenizer
+    def self.inherited(tokenizer)
+      class << tokenizer
+        attr_accessor :states
+      end
+      tokenizer.states = Hash.new {|hash, key| hash[key] = TokenizerState.new}
+    end
+    def self.for_state(state_name, &blk)
+      states[state_name].instance_eval(&blk)
+    end
+    def self.tokenize(input)
+      TokenizerRun.new(self, input).run
+    end
+  end
+  class TokenizerRun
+    attr_accessor :accumulator
+    attr_reader :tokens
+    def initialize(tokenizer, input)
+      @tokenizer = tokenizer
+      @input = input
+      @current_state = tokenizer.states[:idle_state]
+      @curr_char_index = 0
+      @tokens = []
+    end
+    def run
+      while curr_char
+        blk = @current_state.actions[curr_char]
+        raise UnrecognizedInputCharacterException.new(@input, @curr_char_index) unless blk
+        instance_eval(&blk)
+      end
+      tokens
+    end
+    def curr_char
+      @input[@curr_char_index] and @input[@curr_char_index].chr
+    end
+    def advance
+      @curr_char_index += 1
+    end
+    def switch_to state_name
+      @current_state = @tokenizer.states[state_name]
+    end
+  end
+end

data/test/all_tests.rb ADDED

@@ -0,0 +1,11 @@
+#!/usr/bin/env ruby
+require 'test/unit'
+require 'grammar_test'
+require 'parser_test'
+require 'arithmetic_evaluator_test'
+require 'compiled_parser_test'
+require 'evaluator_test'
+require 'arithmetic_tokenizer_test'
+require 'malformed_grammar_test'
+require 'brackets_test'

data/test/arithmetic_evaluator.rb ADDED

@@ -0,0 +1,70 @@
+require File.dirname(__FILE__)+'/../lib/dhaka'
+require 'arithmetic_grammar'
+class ArithmeticEvaluator < Dhaka::Evaluator
+  self.grammar = ArithmeticGrammar
+  define_evaluation_rules do
+    for_subtraction do
+      child_nodes[0] - child_nodes[2]
+    end
+    for_addition do
+      child_nodes[0] + child_nodes[2]
+    end
+    for_division do
+      child_nodes[0].to_f/child_nodes[2]
+    end
+    for_multiplication do
+      child_nodes[0] * child_nodes[2]
+    end
+    for_getting_literals do
+      child_nodes[0].token.value
+    end
+    for_start_production do
+      child_nodes[0]
+    end
+    for_unpacking_parenthetized_expression do
+      child_nodes[1]
+    end
+    for_empty_args do
+      []
+    end
+    for_evaluating_function do
+      child_nodes[0].call child_nodes[2]
+    end
+    for_concatenating_args do
+      [child_nodes[0]]+child_nodes[2]
+    end
+    for_single_args do
+      [child_nodes[0]]
+    end
+    for_min_function do
+      @min_function
+    end
+    for_max_function do
+      @max_function
+    end
+  end
+  def initialize(syntax_tree, min_function, max_function)
+    @min_function = min_function
+    @max_function = max_function
+    super(syntax_tree)
+  end
+end