RubyGems - foreverman-dhaka - Versions diffs - 2.2.1 - Mend

foreverman-dhaka 2.2.1

Files changed (84) hide show

data/Rakefile +64 -0
data/lib/dhaka.rb +62 -0
data/lib/dhaka/dot/dot.rb +29 -0
data/lib/dhaka/evaluator/evaluator.rb +133 -0
data/lib/dhaka/grammar/closure_hash.rb +15 -0
data/lib/dhaka/grammar/grammar.rb +236 -0
data/lib/dhaka/grammar/grammar_symbol.rb +27 -0
data/lib/dhaka/grammar/precedence.rb +19 -0
data/lib/dhaka/grammar/production.rb +36 -0
data/lib/dhaka/lexer/accept_actions.rb +36 -0
data/lib/dhaka/lexer/alphabet.rb +21 -0
data/lib/dhaka/lexer/compiled_lexer.rb +46 -0
data/lib/dhaka/lexer/dfa.rb +121 -0
data/lib/dhaka/lexer/lexeme.rb +32 -0
data/lib/dhaka/lexer/lexer.rb +70 -0
data/lib/dhaka/lexer/lexer_run.rb +78 -0
data/lib/dhaka/lexer/regex_grammar.rb +393 -0
data/lib/dhaka/lexer/regex_parser.rb +2010 -0
data/lib/dhaka/lexer/regex_tokenizer.rb +14 -0
data/lib/dhaka/lexer/specification.rb +96 -0
data/lib/dhaka/lexer/state.rb +68 -0
data/lib/dhaka/lexer/state_machine.rb +37 -0
data/lib/dhaka/parser/action.rb +55 -0
data/lib/dhaka/parser/channel.rb +58 -0
data/lib/dhaka/parser/compiled_parser.rb +51 -0
data/lib/dhaka/parser/conflict.rb +54 -0
data/lib/dhaka/parser/item.rb +43 -0
data/lib/dhaka/parser/parse_result.rb +50 -0
data/lib/dhaka/parser/parse_tree.rb +66 -0
data/lib/dhaka/parser/parser.rb +165 -0
data/lib/dhaka/parser/parser_methods.rb +11 -0
data/lib/dhaka/parser/parser_run.rb +39 -0
data/lib/dhaka/parser/parser_state.rb +74 -0
data/lib/dhaka/parser/token.rb +22 -0
data/lib/dhaka/runtime.rb +51 -0
data/lib/dhaka/tokenizer/tokenizer.rb +190 -0
data/test/all_tests.rb +5 -0
data/test/arithmetic/arithmetic_evaluator.rb +64 -0
data/test/arithmetic/arithmetic_evaluator_test.rb +43 -0
data/test/arithmetic/arithmetic_grammar.rb +41 -0
data/test/arithmetic/arithmetic_grammar_test.rb +9 -0
data/test/arithmetic/arithmetic_test_methods.rb +9 -0
data/test/arithmetic/arithmetic_tokenizer.rb +39 -0
data/test/arithmetic/arithmetic_tokenizer_test.rb +38 -0
data/test/arithmetic_precedence/arithmetic_precedence_evaluator.rb +43 -0
data/test/arithmetic_precedence/arithmetic_precedence_grammar.rb +24 -0
data/test/arithmetic_precedence/arithmetic_precedence_grammar_test.rb +30 -0
data/test/arithmetic_precedence/arithmetic_precedence_lexer_specification.rb +23 -0
data/test/arithmetic_precedence/arithmetic_precedence_parser_test.rb +33 -0
data/test/brackets/bracket_grammar.rb +23 -0
data/test/brackets/bracket_tokenizer.rb +22 -0
data/test/brackets/brackets_test.rb +28 -0
data/test/chittagong/chittagong_driver.rb +46 -0
data/test/chittagong/chittagong_driver_test.rb +276 -0
data/test/chittagong/chittagong_evaluator.rb +284 -0
data/test/chittagong/chittagong_evaluator_test.rb +38 -0
data/test/chittagong/chittagong_grammar.rb +104 -0
data/test/chittagong/chittagong_lexer.rb +109 -0
data/test/chittagong/chittagong_lexer_specification.rb +37 -0
data/test/chittagong/chittagong_lexer_test.rb +58 -0
data/test/chittagong/chittagong_parser.rb +879 -0
data/test/chittagong/chittagong_parser_test.rb +55 -0
data/test/chittagong/chittagong_test.rb +170 -0
data/test/core/another_lalr_but_not_slr_grammar.rb +20 -0
data/test/core/compiled_parser_test.rb +44 -0
data/test/core/dfa_test.rb +170 -0
data/test/core/evaluator_test.rb +22 -0
data/test/core/grammar_test.rb +83 -0
data/test/core/lalr_but_not_slr_grammar.rb +19 -0
data/test/core/lexer_test.rb +139 -0
data/test/core/malformed_grammar.rb +7 -0
data/test/core/malformed_grammar_test.rb +8 -0
data/test/core/nullable_grammar.rb +21 -0
data/test/core/parse_result_test.rb +44 -0
data/test/core/parser_state_test.rb +24 -0
data/test/core/parser_test.rb +131 -0
data/test/core/precedence_grammar.rb +17 -0
data/test/core/precedence_grammar_test.rb +9 -0
data/test/core/rr_conflict_grammar.rb +21 -0
data/test/core/simple_grammar.rb +22 -0
data/test/core/sr_conflict_grammar.rb +16 -0
data/test/dhaka_test_helper.rb +18 -0
data/test/fake_logger.rb +17 -0
metadata +137 -0

data/lib/dhaka/tokenizer/tokenizer.rb ADDED Viewed

@@ -0,0 +1,190 @@
+module Dhaka
+  # Reserved constant used to identify the idle state of the tokenizer.
+  TOKENIZER_IDLE_STATE = :idle_state
+  # Returned on successful tokenizing of the input stream. Supports iteration by including Enumerable, so it can
+  # be passed in directly to the parser.
+  class TokenizerSuccessResult
+    include Enumerable
+    def initialize(tokens)
+      @tokens = tokens
+    end
+    # Returns false.
+    def has_error?
+      false
+    end
+    def each(&block)
+      @tokens.each(&block)
+    end
+  end
+  # Returned when tokenizing fails due to an unexpected character in the input stream.
+  class TokenizerErrorResult
+    # The index of the character that caused the error.
+    attr_reader :unexpected_char_index
+    def initialize(unexpected_char_index)
+      @unexpected_char_index = unexpected_char_index
+    end
+    # Returns true.
+    def has_error?
+      true
+    end
+  end
+  # A tokenizer state encapsulates actions that should be performed upon
+  # encountering each permissible character for that state.
+  class TokenizerState
+    attr_reader :actions, :default_action
+    def initialize
+      @actions = {}
+    end
+    # Define the action (+blk+) to be performed when encountering any of +characters+ in the token stream.
+    def for_characters(characters, &blk)
+      characters.each do |character|
+        actions[character] = blk
+      end
+    end
+    alias for_character for_characters
+    # define the action (+blk+) to be performed for any +characters+ that don't have an action to perform.
+    def for_default(&blk)
+      @default_action = blk
+    end
+    def to_s #:nodoc:
+      actions.inspect
+    end
+  end
+  # This abstract class contains a DSL for hand-coding tokenizers. Subclass it to implement tokenizers for specific grammars.
+  #
+  # Tokenizers are state machines. Each state of a tokenizer is identified
+  # by a Ruby symbol. The constant Dhaka::TOKENIZER_IDLE_STATE is reserved for the idle state of the tokenizer (the one
+  # that it starts in).
+  #
+  # The following is a tokenizer for arithmetic expressions with integer terms. The tokenizer starts in the idle state
+  # creating single-character tokens for all characters excepts digits and whitespace. It shifts to
+  # <tt>:get_integer_literal</tt> when it encounters a digit character and creates a token on the stack on which it
+  # accumulates the value of the literal. When it again encounters a non-digit character, it shifts back to idle.
+  # Whitespace is treated as a delimiter, but not shifted as a token.
+  #
+  #  class ArithmeticPrecedenceTokenizer < Dhaka::Tokenizer
+  #
+  #    digits = ('0'..'9').to_a
+  #    parenths = ['(', ')']
+  #    operators = ['-', '+', '/', '*', '^']
+  #    functions = ['h', 'l']
+  #    arg_separator = [',']
+  #    whitespace = [' ']
+  #
+  #    all_characters = digits + parenths + operators + functions + arg_separator + whitespace
+  #
+  #    for_state Dhaka::TOKENIZER_IDLE_STATE do
+  #      for_characters(all_characters - (digits + whitespace)) do
+  #        create_token(curr_char, nil)
+  #        advance
+  #      end
+  #      for_characters digits do
+  #        create_token('n', '')
+  #        switch_to :get_integer_literal
+  #      end
+  #      for_character whitespace do
+  #        advance
+  #      end
+  #    end
+  #
+  #    for_state :get_integer_literal do
+  #      for_characters all_characters - digits do
+  #        switch_to Dhaka::TOKENIZER_IDLE_STATE
+  #      end
+  #      for_characters digits do
+  #        curr_token.value << curr_char
+  #        advance
+  #      end
+  #    end
+  #
+  #  end
+  #
+  # For languages where the lexical structure is very complicated, it may be too tedious to implement a Tokenizer by hand.
+  # In such cases, it's a lot easier to write a LexerSpecification using regular expressions and create a Lexer from that.
+  class Tokenizer
+    class << self
+      # Define the action for the state named +state_name+.
+      def for_state(state_name, &blk)
+        states[state_name].instance_eval(&blk)
+      end
+      # Tokenizes a string +input+ and returns a TokenizerErrorResult on failure or a TokenizerSuccessResult on sucess.
+      def tokenize(input)
+        new(input).run
+      end
+      private
+        def inherited(tokenizer)
+          class << tokenizer
+            attr_accessor :states, :grammar
+          end
+          tokenizer.states = Hash.new {|hash, key| hash[key] = TokenizerState.new}
+        end
+    end
+    # The tokens shifted so far.
+    attr_reader :tokens
+    def initialize(input) #:nodoc:
+      @input           = input
+      @current_state   = self.class.states[TOKENIZER_IDLE_STATE]
+      @curr_char_index = 0
+      @tokens          = []
+    end
+    # The character currently being processed.
+    def curr_char
+      @input[@curr_char_index] and @input[@curr_char_index].chr
+    end
+    # Advance to the next character.
+    def advance
+      @curr_char_index += 1
+    end
+    def inspect
+      "<Dhaka::Tokenizer grammar : #{grammar}>"
+    end
+    # The token currently on top of the stack.
+    def curr_token
+      tokens.last
+    end
+    # Push a new token on to the stack with symbol corresponding to +symbol_name+ and a value of +value+.
+    def create_token(symbol_name, value)
+      new_token = Dhaka::Token.new(symbol_name, value, @curr_char_index)
+      tokens << new_token
+    end
+    # Change the active state of the tokenizer to the state identified by the symbol +state_name+.
+    def switch_to state_name
+      @current_state = self.class.states[state_name]
+    end
+    def run #:nodoc:
+      while curr_char
+        blk = @current_state.actions[curr_char] || @current_state.default_action
+        return TokenizerErrorResult.new(@curr_char_index) unless blk
+        instance_eval(&blk)
+      end
+      tokens << Dhaka::Token.new(Dhaka::END_SYMBOL_NAME, nil, nil)
+      TokenizerSuccessResult.new(tokens)
+    end
+  end
+end

data/test/all_tests.rb ADDED Viewed

@@ -0,0 +1,5 @@
+#!/usr/bin/env ruby
+Dir['**/*test.rb'].each do |test_file|
+  puts test_file
+  require File.join(File.dirname(__FILE__), test_file)
+end

data/test/arithmetic/arithmetic_evaluator.rb ADDED Viewed

@@ -0,0 +1,64 @@
+require File.dirname(__FILE__) + '/arithmetic_grammar'
+class ArithmeticEvaluator < Dhaka::Evaluator
+  self.grammar = ArithmeticGrammar
+  define_evaluation_rules do
+    for_subtraction do
+      evaluate(child_nodes[0]) - evaluate(child_nodes[2])
+    end
+    for_addition do
+      evaluate(child_nodes[0]) + evaluate(child_nodes[2])
+    end
+    for_division do
+      evaluate(child_nodes[0]).to_f/evaluate(child_nodes[2])
+    end
+    for_multiplication do
+      evaluate(child_nodes[0]) * evaluate(child_nodes[2])
+    end
+    for_getting_literals do
+      child_nodes[0].token.value
+    end
+    for_unpacking_parenthetized_expression do
+      evaluate(child_nodes[1])
+    end
+    for_empty_args do
+      []
+    end
+    for_evaluating_function do
+      evaluate(child_nodes[0]).call evaluate(child_nodes[2])
+    end
+    for_concatenating_args do
+      [evaluate(child_nodes[0])]+evaluate(child_nodes[2])
+    end
+    for_single_args do
+      [evaluate(child_nodes[0])]
+    end
+    for_min_function do
+      @min_function
+    end
+    for_max_function do
+      @max_function
+    end
+  end
+  def initialize(min_function, max_function)
+    @min_function = min_function
+    @max_function = max_function
+  end
+end

data/test/arithmetic/arithmetic_evaluator_test.rb ADDED Viewed

@@ -0,0 +1,43 @@
+require File.dirname(__FILE__) + '/../dhaka_test_helper'
+require File.dirname(__FILE__) + '/arithmetic_evaluator'
+require File.dirname(__FILE__) + '/arithmetic_test_methods'
+eval(Dhaka::Parser.new(ArithmeticGrammar).compile_to_ruby_source_as(:CompiledArithmeticParser))
+class TestArithmeticEvaluator < Test::Unit::TestCase
+  include ArithmeticTestMethods
+  def setup
+    @min_func = Proc.new {|args| args.inject {|min, elem| min = (elem < min ? elem : min)}}
+    @max_func = Proc.new {|args| args.inject {|max, elem| max = (elem > max ? elem : max)}}
+  end
+  def test_results_simple_arithmetic_given_tokens_and_parse_tree_1
+    token_stream = [token('n', 2), token('-', nil), token('n', 4), token(Dhaka::END_SYMBOL_NAME, nil)]
+    parse_tree   = parse(token_stream)
+    assert_equal -2, ArithmeticEvaluator.new(@min_func, @max_func).evaluate(parse_tree)
+  end
+  def test_results_simple_arithmetic_given_tokens_and_parse_tree_2
+    token_stream = [token('n', 2), token('-', nil), token('(', nil), token('n', 3), token('/', nil), token('n', 4), token(')', nil), token(Dhaka::END_SYMBOL_NAME, nil)]
+    parse_tree   = parse(token_stream)
+    assert_equal 1.25, ArithmeticEvaluator.new(@min_func, @max_func).evaluate(parse_tree)
+  end
+  def test_results_simple_arithmetic_given_tokens_and_parse_tree_3
+    token_stream = [token('n', 2), token('+', nil), token('(', nil), token('n', 3), token('/', nil), token('(', nil), token('n', 7), token('-', nil), token('n', 5), token(')', nil) , token(')', nil), token(Dhaka::END_SYMBOL_NAME, nil)]
+    parse_tree   = parse(token_stream)
+    assert_equal 3.5, ArithmeticEvaluator.new(@min_func, @max_func).evaluate(parse_tree)
+  end
+  def test_results_simple_arithmetic_given_tokens_and_parse_tree_4
+    token_stream = [token('n', 2), token('+', nil), token('h', nil), token('(', nil), token('n', 3), token(',', nil), token('n', 4), token(')', nil), token(Dhaka::END_SYMBOL_NAME, nil)]
+    parse_tree   = parse(token_stream)
+    assert_equal 6, ArithmeticEvaluator.new(@min_func, @max_func).evaluate(parse_tree)
+  end
+  def test_results_simple_arithmetic_given_tokens_and_parse_tree_5
+    token_stream = [token('n', 2), token('+', nil), token('l', nil), token('(', nil), token('n', 3), token(',', nil), token('n', 4), token(')', nil), token(Dhaka::END_SYMBOL_NAME, nil)]
+    parse_tree   = parse(token_stream)
+    assert_equal 5, ArithmeticEvaluator.new(@min_func, @max_func).evaluate(parse_tree)
+  end
+end

data/test/arithmetic/arithmetic_grammar.rb ADDED Viewed

@@ -0,0 +1,41 @@
+class ArithmeticGrammar < Dhaka::Grammar
+  for_symbol(Dhaka::START_SYMBOL_NAME) do
+    expression                         %w| E |
+  end
+  for_symbol('E') do
+    subtraction                        %w| E - T |
+    addition                           %w| E + T |
+    term                               %w| T |
+  end
+  for_symbol('T') do
+    factor                             %w| F |
+    division                           %w| T / F |
+    multiplication                     %w| T * F |
+  end
+  for_symbol('F') do
+    getting_literals                   %w| n |
+    unpacking_parenthetized_expression %w| ( E ) |
+    function                           %w| Function |
+  end
+  for_symbol('Function') do
+    evaluating_function                %w| FunctionName ( Args ) |
+  end
+  for_symbol('FunctionName') do
+    max_function                       %w| h |
+    min_function                       %w| l |
+  end
+  for_symbol('Args') do
+    empty_args                         %w||
+    single_args                        %w| E |
+    concatenating_args                 %w| E , Args |
+  end
+end

data/test/arithmetic/arithmetic_grammar_test.rb ADDED Viewed

@@ -0,0 +1,9 @@
+require File.dirname(__FILE__) + '/../dhaka_test_helper'
+require File.dirname(__FILE__) + '/arithmetic_grammar'
+class ArithmeticGrammarTest < Test::Unit::TestCase
+  def test_first_with_nullable_non_terminals
+    grammar = ArithmeticGrammar
+    assert_equal(Set.new(['(', 'n', 'h', 'l']), Set.new(grammar.first(grammar.symbol_for_name('Args')).collect { |symbol| symbol.name }))
+  end
+end

data/test/arithmetic/arithmetic_test_methods.rb ADDED Viewed

@@ -0,0 +1,9 @@
+module ArithmeticTestMethods
+  def parse(token_stream)
+    CompiledArithmeticParser.parse(token_stream)
+  end
+  def token(symbol_name, value)
+    Dhaka::Token.new(symbol_name, value, nil)
+  end
+end

data/test/arithmetic/arithmetic_tokenizer.rb ADDED Viewed

@@ -0,0 +1,39 @@
+require File.dirname(__FILE__) + '/arithmetic_grammar'
+class ArithmeticTokenizer < Dhaka::Tokenizer
+  digits        = ('0'..'9').to_a
+  parenths      = %w| ( ) |
+  operators     = %w| - + / * |
+  functions     = %w| h l |
+  arg_separator = %w| , |
+  whitespace    = [' ']
+  all_characters = digits + parenths + operators + functions + arg_separator + whitespace
+  for_state Dhaka::TOKENIZER_IDLE_STATE do
+    for_characters(all_characters - (digits + whitespace)) do
+      create_token(curr_char, nil)
+      advance
+    end
+    for_characters digits do
+      create_token('n', '')
+      switch_to :get_integer_literal
+    end
+    for_character whitespace do
+      advance
+    end
+  end
+  for_state :get_integer_literal do
+    for_characters all_characters - digits do
+      switch_to Dhaka::TOKENIZER_IDLE_STATE
+    end
+    for_characters digits do
+      curr_token.value << curr_char
+      advance
+    end
+  end
+end

data/test/arithmetic/arithmetic_tokenizer_test.rb ADDED Viewed

@@ -0,0 +1,38 @@
+require File.dirname(__FILE__) + '/../dhaka_test_helper'
+require File.dirname(__FILE__) + "/arithmetic_tokenizer"
+class TestArithmeticTokenizer < Test::Unit::TestCase
+  def test_returns_end_of_input_token_for_empty_input
+    assert_equal([token(Dhaka::END_SYMBOL_NAME, nil)], ArithmeticTokenizer.tokenize([]).to_a)
+  end
+  def test_tokenizes_given_a_string_input
+    assert_equal([token('n', 2), token('-', nil), token('n', 4), token(Dhaka::END_SYMBOL_NAME, nil)], ArithmeticTokenizer.tokenize('2 - 4').to_a)
+  end
+  def test_a_longer_input
+    actual = ArithmeticTokenizer.tokenize('2+(3 / (7 - 5))').to_a
+    assert_equal([token('n', 2), token('+', nil), token('(', nil), token('n', 3), token('/', nil), token('(', nil), token('n', 7), token('-', nil), token('n', 5), token(')', nil) , token(')', nil), token(Dhaka::END_SYMBOL_NAME, nil)], actual)
+  end
+  def test_another_input_with_multi_digit_numbers
+    actual = ArithmeticTokenizer.tokenize('2034 +(3433 / (7 - 5))').to_a
+    assert_equal([token('n', 2034), token('+', nil), token('(', nil), token('n', 3433), token('/', nil), token('(', nil), token('n', 7), token('-', nil), token('n', 5), token(')', nil) , token(')', nil), token(Dhaka::END_SYMBOL_NAME, nil)], actual)
+  end
+  def test_an_input_with_unrecognized_characters
+    result = ArithmeticTokenizer.tokenize('2+(3 / (7 -& 5))')
+    assert(result.has_error?)
+    assert_equal(11, result.unexpected_char_index)
+  end
+  def test_another_input_with_illegal_characters
+    result = ArithmeticTokenizer.tokenize('2034 +(34b3 / (7 - 5))')
+    assert(result.has_error?)
+    assert_equal(9, result.unexpected_char_index)
+  end
+  def token(symbol_name, value)
+    Dhaka::Token.new(symbol_name, value ? value.to_s : nil, nil)
+  end
+end