dhaka 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. data/lib/dhaka.rb +44 -0
  2. data/lib/evaluator/evaluator.rb +70 -0
  3. data/lib/grammar/closure_hash.rb +13 -0
  4. data/lib/grammar/grammar.rb +129 -0
  5. data/lib/grammar/grammar_symbol.rb +19 -0
  6. data/lib/grammar/production.rb +14 -0
  7. data/lib/parser/action.rb +51 -0
  8. data/lib/parser/channel.rb +51 -0
  9. data/lib/parser/compiled_parser.rb +35 -0
  10. data/lib/parser/item.rb +37 -0
  11. data/lib/parser/parse_result.rb +26 -0
  12. data/lib/parser/parse_tree.rb +34 -0
  13. data/lib/parser/parser.rb +125 -0
  14. data/lib/parser/parser_methods.rb +10 -0
  15. data/lib/parser/parser_run.rb +35 -0
  16. data/lib/parser/parser_state.rb +66 -0
  17. data/lib/parser/token.rb +15 -0
  18. data/lib/tokenizer/tokenizer.rb +88 -0
  19. data/test/all_tests.rb +11 -0
  20. data/test/arithmetic_evaluator.rb +70 -0
  21. data/test/arithmetic_evaluator_test.rb +55 -0
  22. data/test/arithmetic_grammar.rb +38 -0
  23. data/test/arithmetic_grammar_test.rb +11 -0
  24. data/test/arithmetic_test_methods.rb +11 -0
  25. data/test/arithmetic_tokenizer.rb +43 -0
  26. data/test/arithmetic_tokenizer_test.rb +32 -0
  27. data/test/bracket_grammar.rb +25 -0
  28. data/test/bracket_tokenizer.rb +17 -0
  29. data/test/brackets_test.rb +20 -0
  30. data/test/compiled_arithmetic_parser.rb +252 -0
  31. data/test/compiled_parser_test.rb +71 -0
  32. data/test/evaluator_test.rb +8 -0
  33. data/test/grammar_test.rb +70 -0
  34. data/test/incomplete_arithmetic_evaluator.rb +60 -0
  35. data/test/lalr_but_not_slr_grammar.rb +17 -0
  36. data/test/malformed_grammar.rb +9 -0
  37. data/test/malformed_grammar_test.rb +9 -0
  38. data/test/nullable_grammar.rb +18 -0
  39. data/test/parser_test.rb +168 -0
  40. data/test/rr_conflict_grammar.rb +23 -0
  41. data/test/simple_grammar.rb +24 -0
  42. data/test/sr_conflict_grammar.rb +16 -0
  43. metadata +87 -0
@@ -0,0 +1,26 @@
1
+ module Dhaka
2
+ class ParseSuccessResult
3
+ attr_accessor :syntax_tree
4
+ def initialize(syntax_tree)
5
+ @syntax_tree = syntax_tree
6
+ end
7
+
8
+ def has_error?
9
+ false
10
+ end
11
+ end
12
+ class ParseErrorResult
13
+ attr_reader :bad_token_index
14
+ def initialize(bad_token_index)
15
+ @bad_token_index = bad_token_index
16
+ end
17
+
18
+ def has_error?
19
+ true
20
+ end
21
+ end
22
+ end
23
+
24
+
25
+
26
+
@@ -0,0 +1,34 @@
1
+ module Dhaka
2
+ class ParseTreeCompositeNode
3
+ attr_reader :production, :child_nodes
4
+ def initialize(production)
5
+ @production = production
6
+ @child_nodes = []
7
+ end
8
+ def linearize
9
+ child_nodes.collect {|child_node| child_node.linearize}.flatten + [production.name]
10
+ end
11
+ def to_s
12
+ "CompositeNode: #{production.symbol} --> [#{child_nodes.join(", ")}]"
13
+ end
14
+ def head_node?
15
+ production.symbol.name == START_SYMBOL_NAME
16
+ end
17
+ end
18
+
19
+ class ParseTreeLeafNode
20
+ attr_reader :token
21
+ def initialize(token)
22
+ @token = token
23
+ end
24
+ def linearize
25
+ []
26
+ end
27
+ def to_s
28
+ "LeafNode: #{token}"
29
+ end
30
+ def head_node?
31
+ false
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,125 @@
1
+ #!/usr/bin/env ruby
2
+ require 'set'
3
+ module Dhaka
4
+ class Parser
5
+ include ParserMethods
6
+ attr_reader :grammar, :start_state
7
+
8
+ def initialize(grammar)
9
+ @transitions = Hash.new {|hash, state| hash[state] = {}}
10
+ @grammar = grammar
11
+ @channels = []
12
+ @states = Hash.new do |hash, kernel|
13
+ channels, closure = @grammar.closure(kernel)
14
+ @channels += channels.to_a
15
+ new_state = ParserState.new(self, closure)
16
+ hash[kernel] = new_state
17
+ new_state.transition_items.each do |symbol, items|
18
+ destination_kernel = ItemSet.new(items.collect{|item| item.next_item})
19
+ destination_state = hash[destination_kernel]
20
+ items.each { |item| @channels << @grammar.passive_channel(item, destination_state.items[item.next_item]) }
21
+ @transitions[new_state][symbol] = destination_state
22
+ end
23
+ new_state
24
+ end
25
+ initialize_states
26
+ end
27
+
28
+ def initialize_states
29
+ start_productions = @grammar.productions_for_symbol(@grammar.start_symbol)
30
+ raise NoStartProductionsError.new(@grammar) if start_productions.empty?
31
+ start_items = ItemSet.new(start_productions.collect {|production| Item.new(production, 0)})
32
+ start_items.each {|start_item| start_item.lookaheadset << @grammar.end_symbol}
33
+ @start_state = @states[start_items]
34
+ pump_channels
35
+ generate_shift_actions
36
+ generate_reduce_actions
37
+ end
38
+
39
+ def compile_to_ruby_source_as parser_class_name
40
+ result = "class #{parser_class_name} < Dhaka::CompiledParser\n\n"
41
+ result << " self.grammar = #{@grammar.name}\n\n"
42
+ result << " start_with #{start_state.id}\n\n"
43
+ states.each do |state|
44
+ result << "#{state.compile_to_ruby_source}\n\n"
45
+ end
46
+ result << "end"
47
+ result
48
+ end
49
+
50
+ def to_dot
51
+ result = ["digraph x {", "node [fontsize=\"10\" shape=box size=\"5\"]"]
52
+ result += states.collect { |state| state.to_dot }
53
+ states.each { |state|
54
+ @transitions[state].each { |symbol, dest_state|
55
+ result << "#{state.dot_name} -> #{dest_state.dot_name} [label=\"#{symbol.name}\"]"
56
+ }
57
+ }
58
+ result << ['}']
59
+ result.join("\n")
60
+ end
61
+ def states
62
+ @states.values
63
+ end
64
+
65
+ def generate_shift_actions
66
+ @states.values.each do |state|
67
+ @transitions[state].keys.each { |symbol|
68
+ state.actions[symbol.name] = ShiftAction.new(@transitions[state][symbol])
69
+ }
70
+ end
71
+ end
72
+
73
+ def generate_reduce_actions
74
+ @states.values.each do |state|
75
+ state.items.values.select{ |item| !item.next_symbol }.each do |item|
76
+ create_reduction_actions_for_item_and_state item, state
77
+ end
78
+ end
79
+ end
80
+
81
+ def create_reduction_actions_for_item_and_state item, state
82
+ item.lookaheadset.each do |lookahead|
83
+ existing_action = state.actions[lookahead.name]
84
+ new_action = ReduceAction.new(item.production)
85
+ if existing_action
86
+ raise ParserConflictError.new(state, existing_action, new_action)
87
+ else
88
+ state.actions[lookahead.name] = new_action
89
+ end
90
+ end
91
+ end
92
+
93
+
94
+ def pump_channels
95
+ while true
96
+ break unless @channels.inject(false) do |pumped, channel|
97
+ pumped || channel.pump
98
+ end
99
+ end
100
+ end
101
+
102
+ end
103
+
104
+
105
+ class ParserConflictError < StandardError
106
+ def initialize(state, existing_action, new_action)
107
+ @state = state
108
+ @existing_action = existing_action
109
+ @new_action = new_action
110
+ end
111
+ def to_s
112
+ "Conflict in state #{@state}\n Existing: #{@existing_action}\n New: #{@new_action}"
113
+ end
114
+ end
115
+
116
+ class NoStartProductionsError < StandardError
117
+ def initialize(grammar)
118
+ @grammar = grammar
119
+ end
120
+ def to_s
121
+ "No start productions defined for #{@grammar.name}"
122
+ end
123
+ end
124
+ end
125
+
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ module Dhaka
3
+ module ParserMethods
4
+ def parse token_stream
5
+ return nil if token_stream.empty?
6
+ parser_run = ParserRun.new(grammar, start_state, token_stream+[Token.new(@grammar.end_symbol, nil)])
7
+ parser_run.run
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,35 @@
1
+ module Dhaka
2
+ class ParserRun
3
+ attr_reader :state_stack, :token_stream, :node_stack
4
+ def initialize(grammar, start_state, token_stream)
5
+ @grammar = grammar
6
+ @node_stack = []
7
+ @state_stack = [start_state]
8
+ @token_stream = token_stream
9
+ @current_token_index = 0
10
+ end
11
+ def current_token
12
+ @token_stream[@current_token_index]
13
+ end
14
+ def advance
15
+ node_stack << ParseTreeLeafNode.new(current_token)
16
+ @current_token_index += 1
17
+ end
18
+ def run
19
+ while current_token
20
+ error = execute_action current_token.grammar_symbol.name
21
+ return error if error
22
+ self.advance
23
+ end
24
+ ParseSuccessResult.new(node_stack[0])
25
+ end
26
+ def execute_action symbol_name
27
+ action = state_stack[-1].actions[symbol_name]
28
+ return ParseErrorResult.new(@current_token_index) unless action
29
+ self.instance_eval(&action.action_code).each do |symbol_name|
30
+ execute_action symbol_name
31
+ end
32
+ nil
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env ruby
2
+ require 'set'
3
+ module Dhaka
4
+ class ParserState
5
+
6
+ attr_accessor :items, :actions, :id
7
+
8
+ @@state_id = 0
9
+
10
+ def self.next_state_id
11
+ result = @@state_id
12
+ @@state_id += 1
13
+ result
14
+ end
15
+
16
+ def initialize(parser, items, id=nil)
17
+ @parser = parser
18
+ @items = items
19
+ @actions = {}
20
+ @id = id ? id : ParserState.next_state_id
21
+ end
22
+
23
+ def transition_items
24
+ result = Hash.new {|h, k| h[k] = ItemSet.new()}
25
+ for item in @items.values
26
+ (result[item.next_symbol] << item) if item.next_symbol
27
+ end
28
+ result
29
+ end
30
+
31
+ def dot_name
32
+ self.to_s
33
+ end
34
+
35
+ def to_dot
36
+ label = self.items.values.join('\n')
37
+ "#{dot_name} [label=\"#{label}\"]"
38
+ end
39
+ def compile_to_ruby_source
40
+ result = " at_state(#{@id}) {\n"
41
+ actions.each do |symbol_name, action|
42
+ result << " for_symbol('#{symbol_name}') { #{action.compile_to_ruby_source} }\n"
43
+ end
44
+ result << " }"
45
+ result
46
+ end
47
+
48
+ def for_symbol symbol_name, &blk
49
+ actions[symbol_name] = @parser.instance_eval(&blk)
50
+ end
51
+
52
+ def to_s
53
+ "State#{id}"
54
+ end
55
+
56
+ end
57
+
58
+ class ItemSet < Set
59
+ def hash
60
+ self.collect{|item| item.hash}.inject{|result, hashcode| result ^ hashcode}
61
+ end
62
+ def eql? other
63
+ self == other
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,15 @@
1
+ module Dhaka
2
+ class Token
3
+ attr_accessor :grammar_symbol, :value
4
+ def initialize(grammar_symbol, value)
5
+ @grammar_symbol = grammar_symbol
6
+ @value = value
7
+ end
8
+ def to_s
9
+ "#{@grammar_symbol.name}"
10
+ end
11
+ def == other
12
+ (grammar_symbol == other.grammar_symbol) && (value == other.value)
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,88 @@
1
+ module Dhaka
2
+ class UnrecognizedInputCharacterException < StandardError
3
+ attr_reader :input, :char_index
4
+ def initialize(input, char_index)
5
+ @input = input
6
+ @char_index = char_index
7
+ end
8
+ def to_s
9
+ "Unrecognized character #{input[char_index].chr} encountered while tokenizing:\n #{input}"
10
+ end
11
+ end
12
+
13
+ class TokenizerState
14
+ attr_reader :actions
15
+
16
+ def initialize
17
+ @actions = {}
18
+ end
19
+
20
+ def for_characters(characters, &blk)
21
+ characters.each do |character|
22
+ actions[character] = blk
23
+ end
24
+ end
25
+
26
+ def for_character(character, &blk)
27
+ actions[character[0]] = blk
28
+ end
29
+
30
+ def to_s
31
+ actions.inspect
32
+ end
33
+
34
+ end
35
+
36
+ class Tokenizer
37
+
38
+ def self.inherited(tokenizer)
39
+ class << tokenizer
40
+ attr_accessor :states
41
+ end
42
+ tokenizer.states = Hash.new {|hash, key| hash[key] = TokenizerState.new}
43
+ end
44
+
45
+ def self.for_state(state_name, &blk)
46
+ states[state_name].instance_eval(&blk)
47
+ end
48
+
49
+ def self.tokenize(input)
50
+ TokenizerRun.new(self, input).run
51
+ end
52
+ end
53
+
54
+ class TokenizerRun
55
+
56
+ attr_accessor :accumulator
57
+ attr_reader :tokens
58
+ def initialize(tokenizer, input)
59
+ @tokenizer = tokenizer
60
+ @input = input
61
+ @current_state = tokenizer.states[:idle_state]
62
+ @curr_char_index = 0
63
+ @tokens = []
64
+ end
65
+
66
+ def run
67
+ while curr_char
68
+ blk = @current_state.actions[curr_char]
69
+ raise UnrecognizedInputCharacterException.new(@input, @curr_char_index) unless blk
70
+ instance_eval(&blk)
71
+ end
72
+ tokens
73
+ end
74
+
75
+ def curr_char
76
+ @input[@curr_char_index] and @input[@curr_char_index].chr
77
+ end
78
+
79
+ def advance
80
+ @curr_char_index += 1
81
+ end
82
+
83
+ def switch_to state_name
84
+ @current_state = @tokenizer.states[state_name]
85
+ end
86
+
87
+ end
88
+ end
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+ require 'test/unit'
3
+
4
+ require 'grammar_test'
5
+ require 'parser_test'
6
+ require 'arithmetic_evaluator_test'
7
+ require 'compiled_parser_test'
8
+ require 'evaluator_test'
9
+ require 'arithmetic_tokenizer_test'
10
+ require 'malformed_grammar_test'
11
+ require 'brackets_test'
@@ -0,0 +1,70 @@
1
+ require File.dirname(__FILE__)+'/../lib/dhaka'
2
+ require 'arithmetic_grammar'
3
+
4
+ class ArithmeticEvaluator < Dhaka::Evaluator
5
+
6
+ self.grammar = ArithmeticGrammar
7
+
8
+ define_evaluation_rules do
9
+
10
+ for_subtraction do
11
+ child_nodes[0] - child_nodes[2]
12
+ end
13
+
14
+ for_addition do
15
+ child_nodes[0] + child_nodes[2]
16
+ end
17
+
18
+ for_division do
19
+ child_nodes[0].to_f/child_nodes[2]
20
+ end
21
+
22
+ for_multiplication do
23
+ child_nodes[0] * child_nodes[2]
24
+ end
25
+
26
+ for_getting_literals do
27
+ child_nodes[0].token.value
28
+ end
29
+
30
+ for_start_production do
31
+ child_nodes[0]
32
+ end
33
+
34
+ for_unpacking_parenthetized_expression do
35
+ child_nodes[1]
36
+ end
37
+
38
+ for_empty_args do
39
+ []
40
+ end
41
+
42
+ for_evaluating_function do
43
+ child_nodes[0].call child_nodes[2]
44
+ end
45
+
46
+ for_concatenating_args do
47
+ [child_nodes[0]]+child_nodes[2]
48
+ end
49
+
50
+ for_single_args do
51
+ [child_nodes[0]]
52
+ end
53
+
54
+ for_min_function do
55
+ @min_function
56
+ end
57
+
58
+ for_max_function do
59
+ @max_function
60
+ end
61
+
62
+ end
63
+
64
+ def initialize(syntax_tree, min_function, max_function)
65
+ @min_function = min_function
66
+ @max_function = max_function
67
+ super(syntax_tree)
68
+ end
69
+
70
+ end