dhaka 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. data/lib/dhaka.rb +44 -0
  2. data/lib/evaluator/evaluator.rb +70 -0
  3. data/lib/grammar/closure_hash.rb +13 -0
  4. data/lib/grammar/grammar.rb +129 -0
  5. data/lib/grammar/grammar_symbol.rb +19 -0
  6. data/lib/grammar/production.rb +14 -0
  7. data/lib/parser/action.rb +51 -0
  8. data/lib/parser/channel.rb +51 -0
  9. data/lib/parser/compiled_parser.rb +35 -0
  10. data/lib/parser/item.rb +37 -0
  11. data/lib/parser/parse_result.rb +26 -0
  12. data/lib/parser/parse_tree.rb +34 -0
  13. data/lib/parser/parser.rb +125 -0
  14. data/lib/parser/parser_methods.rb +10 -0
  15. data/lib/parser/parser_run.rb +35 -0
  16. data/lib/parser/parser_state.rb +66 -0
  17. data/lib/parser/token.rb +15 -0
  18. data/lib/tokenizer/tokenizer.rb +88 -0
  19. data/test/all_tests.rb +11 -0
  20. data/test/arithmetic_evaluator.rb +70 -0
  21. data/test/arithmetic_evaluator_test.rb +55 -0
  22. data/test/arithmetic_grammar.rb +38 -0
  23. data/test/arithmetic_grammar_test.rb +11 -0
  24. data/test/arithmetic_test_methods.rb +11 -0
  25. data/test/arithmetic_tokenizer.rb +43 -0
  26. data/test/arithmetic_tokenizer_test.rb +32 -0
  27. data/test/bracket_grammar.rb +25 -0
  28. data/test/bracket_tokenizer.rb +17 -0
  29. data/test/brackets_test.rb +20 -0
  30. data/test/compiled_arithmetic_parser.rb +252 -0
  31. data/test/compiled_parser_test.rb +71 -0
  32. data/test/evaluator_test.rb +8 -0
  33. data/test/grammar_test.rb +70 -0
  34. data/test/incomplete_arithmetic_evaluator.rb +60 -0
  35. data/test/lalr_but_not_slr_grammar.rb +17 -0
  36. data/test/malformed_grammar.rb +9 -0
  37. data/test/malformed_grammar_test.rb +9 -0
  38. data/test/nullable_grammar.rb +18 -0
  39. data/test/parser_test.rb +168 -0
  40. data/test/rr_conflict_grammar.rb +23 -0
  41. data/test/simple_grammar.rb +24 -0
  42. data/test/sr_conflict_grammar.rb +16 -0
  43. metadata +87 -0
@@ -0,0 +1,26 @@
1
+ module Dhaka
2
+ class ParseSuccessResult
3
+ attr_accessor :syntax_tree
4
+ def initialize(syntax_tree)
5
+ @syntax_tree = syntax_tree
6
+ end
7
+
8
+ def has_error?
9
+ false
10
+ end
11
+ end
12
+ class ParseErrorResult
13
+ attr_reader :bad_token_index
14
+ def initialize(bad_token_index)
15
+ @bad_token_index = bad_token_index
16
+ end
17
+
18
+ def has_error?
19
+ true
20
+ end
21
+ end
22
+ end
23
+
24
+
25
+
26
+
@@ -0,0 +1,34 @@
1
+ module Dhaka
2
+ class ParseTreeCompositeNode
3
+ attr_reader :production, :child_nodes
4
+ def initialize(production)
5
+ @production = production
6
+ @child_nodes = []
7
+ end
8
+ def linearize
9
+ child_nodes.collect {|child_node| child_node.linearize}.flatten + [production.name]
10
+ end
11
+ def to_s
12
+ "CompositeNode: #{production.symbol} --> [#{child_nodes.join(", ")}]"
13
+ end
14
+ def head_node?
15
+ production.symbol.name == START_SYMBOL_NAME
16
+ end
17
+ end
18
+
19
+ class ParseTreeLeafNode
20
+ attr_reader :token
21
+ def initialize(token)
22
+ @token = token
23
+ end
24
+ def linearize
25
+ []
26
+ end
27
+ def to_s
28
+ "LeafNode: #{token}"
29
+ end
30
+ def head_node?
31
+ false
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,125 @@
1
+ #!/usr/bin/env ruby
2
+ require 'set'
3
+ module Dhaka
4
+ class Parser
5
+ include ParserMethods
6
+ attr_reader :grammar, :start_state
7
+
8
+ def initialize(grammar)
9
+ @transitions = Hash.new {|hash, state| hash[state] = {}}
10
+ @grammar = grammar
11
+ @channels = []
12
+ @states = Hash.new do |hash, kernel|
13
+ channels, closure = @grammar.closure(kernel)
14
+ @channels += channels.to_a
15
+ new_state = ParserState.new(self, closure)
16
+ hash[kernel] = new_state
17
+ new_state.transition_items.each do |symbol, items|
18
+ destination_kernel = ItemSet.new(items.collect{|item| item.next_item})
19
+ destination_state = hash[destination_kernel]
20
+ items.each { |item| @channels << @grammar.passive_channel(item, destination_state.items[item.next_item]) }
21
+ @transitions[new_state][symbol] = destination_state
22
+ end
23
+ new_state
24
+ end
25
+ initialize_states
26
+ end
27
+
28
+ def initialize_states
29
+ start_productions = @grammar.productions_for_symbol(@grammar.start_symbol)
30
+ raise NoStartProductionsError.new(@grammar) if start_productions.empty?
31
+ start_items = ItemSet.new(start_productions.collect {|production| Item.new(production, 0)})
32
+ start_items.each {|start_item| start_item.lookaheadset << @grammar.end_symbol}
33
+ @start_state = @states[start_items]
34
+ pump_channels
35
+ generate_shift_actions
36
+ generate_reduce_actions
37
+ end
38
+
39
+ def compile_to_ruby_source_as parser_class_name
40
+ result = "class #{parser_class_name} < Dhaka::CompiledParser\n\n"
41
+ result << " self.grammar = #{@grammar.name}\n\n"
42
+ result << " start_with #{start_state.id}\n\n"
43
+ states.each do |state|
44
+ result << "#{state.compile_to_ruby_source}\n\n"
45
+ end
46
+ result << "end"
47
+ result
48
+ end
49
+
50
+ def to_dot
51
+ result = ["digraph x {", "node [fontsize=\"10\" shape=box size=\"5\"]"]
52
+ result += states.collect { |state| state.to_dot }
53
+ states.each { |state|
54
+ @transitions[state].each { |symbol, dest_state|
55
+ result << "#{state.dot_name} -> #{dest_state.dot_name} [label=\"#{symbol.name}\"]"
56
+ }
57
+ }
58
+ result << ['}']
59
+ result.join("\n")
60
+ end
61
+ def states
62
+ @states.values
63
+ end
64
+
65
+ def generate_shift_actions
66
+ @states.values.each do |state|
67
+ @transitions[state].keys.each { |symbol|
68
+ state.actions[symbol.name] = ShiftAction.new(@transitions[state][symbol])
69
+ }
70
+ end
71
+ end
72
+
73
+ def generate_reduce_actions
74
+ @states.values.each do |state|
75
+ state.items.values.select{ |item| !item.next_symbol }.each do |item|
76
+ create_reduction_actions_for_item_and_state item, state
77
+ end
78
+ end
79
+ end
80
+
81
+ def create_reduction_actions_for_item_and_state item, state
82
+ item.lookaheadset.each do |lookahead|
83
+ existing_action = state.actions[lookahead.name]
84
+ new_action = ReduceAction.new(item.production)
85
+ if existing_action
86
+ raise ParserConflictError.new(state, existing_action, new_action)
87
+ else
88
+ state.actions[lookahead.name] = new_action
89
+ end
90
+ end
91
+ end
92
+
93
+
94
+ def pump_channels
95
+ while true
96
+ break unless @channels.inject(false) do |pumped, channel|
97
+ pumped || channel.pump
98
+ end
99
+ end
100
+ end
101
+
102
+ end
103
+
104
+
105
+ class ParserConflictError < StandardError
106
+ def initialize(state, existing_action, new_action)
107
+ @state = state
108
+ @existing_action = existing_action
109
+ @new_action = new_action
110
+ end
111
+ def to_s
112
+ "Conflict in state #{@state}\n Existing: #{@existing_action}\n New: #{@new_action}"
113
+ end
114
+ end
115
+
116
+ class NoStartProductionsError < StandardError
117
+ def initialize(grammar)
118
+ @grammar = grammar
119
+ end
120
+ def to_s
121
+ "No start productions defined for #{@grammar.name}"
122
+ end
123
+ end
124
+ end
125
+
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ module Dhaka
3
+ module ParserMethods
4
+ def parse token_stream
5
+ return nil if token_stream.empty?
6
+ parser_run = ParserRun.new(grammar, start_state, token_stream+[Token.new(@grammar.end_symbol, nil)])
7
+ parser_run.run
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,35 @@
1
+ module Dhaka
2
+ class ParserRun
3
+ attr_reader :state_stack, :token_stream, :node_stack
4
+ def initialize(grammar, start_state, token_stream)
5
+ @grammar = grammar
6
+ @node_stack = []
7
+ @state_stack = [start_state]
8
+ @token_stream = token_stream
9
+ @current_token_index = 0
10
+ end
11
+ def current_token
12
+ @token_stream[@current_token_index]
13
+ end
14
+ def advance
15
+ node_stack << ParseTreeLeafNode.new(current_token)
16
+ @current_token_index += 1
17
+ end
18
+ def run
19
+ while current_token
20
+ error = execute_action current_token.grammar_symbol.name
21
+ return error if error
22
+ self.advance
23
+ end
24
+ ParseSuccessResult.new(node_stack[0])
25
+ end
26
+ def execute_action symbol_name
27
+ action = state_stack[-1].actions[symbol_name]
28
+ return ParseErrorResult.new(@current_token_index) unless action
29
+ self.instance_eval(&action.action_code).each do |symbol_name|
30
+ execute_action symbol_name
31
+ end
32
+ nil
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env ruby
2
+ require 'set'
3
+ module Dhaka
4
+ class ParserState
5
+
6
+ attr_accessor :items, :actions, :id
7
+
8
+ @@state_id = 0
9
+
10
+ def self.next_state_id
11
+ result = @@state_id
12
+ @@state_id += 1
13
+ result
14
+ end
15
+
16
+ def initialize(parser, items, id=nil)
17
+ @parser = parser
18
+ @items = items
19
+ @actions = {}
20
+ @id = id ? id : ParserState.next_state_id
21
+ end
22
+
23
+ def transition_items
24
+ result = Hash.new {|h, k| h[k] = ItemSet.new()}
25
+ for item in @items.values
26
+ (result[item.next_symbol] << item) if item.next_symbol
27
+ end
28
+ result
29
+ end
30
+
31
+ def dot_name
32
+ self.to_s
33
+ end
34
+
35
+ def to_dot
36
+ label = self.items.values.join('\n')
37
+ "#{dot_name} [label=\"#{label}\"]"
38
+ end
39
+ def compile_to_ruby_source
40
+ result = " at_state(#{@id}) {\n"
41
+ actions.each do |symbol_name, action|
42
+ result << " for_symbol('#{symbol_name}') { #{action.compile_to_ruby_source} }\n"
43
+ end
44
+ result << " }"
45
+ result
46
+ end
47
+
48
+ def for_symbol symbol_name, &blk
49
+ actions[symbol_name] = @parser.instance_eval(&blk)
50
+ end
51
+
52
+ def to_s
53
+ "State#{id}"
54
+ end
55
+
56
+ end
57
+
58
+ class ItemSet < Set
59
+ def hash
60
+ self.collect{|item| item.hash}.inject{|result, hashcode| result ^ hashcode}
61
+ end
62
+ def eql? other
63
+ self == other
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,15 @@
1
+ module Dhaka
2
+ class Token
3
+ attr_accessor :grammar_symbol, :value
4
+ def initialize(grammar_symbol, value)
5
+ @grammar_symbol = grammar_symbol
6
+ @value = value
7
+ end
8
+ def to_s
9
+ "#{@grammar_symbol.name}"
10
+ end
11
+ def == other
12
+ (grammar_symbol == other.grammar_symbol) && (value == other.value)
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,88 @@
1
+ module Dhaka
2
+ class UnrecognizedInputCharacterException < StandardError
3
+ attr_reader :input, :char_index
4
+ def initialize(input, char_index)
5
+ @input = input
6
+ @char_index = char_index
7
+ end
8
+ def to_s
9
+ "Unrecognized character #{input[char_index].chr} encountered while tokenizing:\n #{input}"
10
+ end
11
+ end
12
+
13
+ class TokenizerState
14
+ attr_reader :actions
15
+
16
+ def initialize
17
+ @actions = {}
18
+ end
19
+
20
+ def for_characters(characters, &blk)
21
+ characters.each do |character|
22
+ actions[character] = blk
23
+ end
24
+ end
25
+
26
+ def for_character(character, &blk)
27
+ actions[character[0]] = blk
28
+ end
29
+
30
+ def to_s
31
+ actions.inspect
32
+ end
33
+
34
+ end
35
+
36
+ class Tokenizer
37
+
38
+ def self.inherited(tokenizer)
39
+ class << tokenizer
40
+ attr_accessor :states
41
+ end
42
+ tokenizer.states = Hash.new {|hash, key| hash[key] = TokenizerState.new}
43
+ end
44
+
45
+ def self.for_state(state_name, &blk)
46
+ states[state_name].instance_eval(&blk)
47
+ end
48
+
49
+ def self.tokenize(input)
50
+ TokenizerRun.new(self, input).run
51
+ end
52
+ end
53
+
54
+ class TokenizerRun
55
+
56
+ attr_accessor :accumulator
57
+ attr_reader :tokens
58
+ def initialize(tokenizer, input)
59
+ @tokenizer = tokenizer
60
+ @input = input
61
+ @current_state = tokenizer.states[:idle_state]
62
+ @curr_char_index = 0
63
+ @tokens = []
64
+ end
65
+
66
+ def run
67
+ while curr_char
68
+ blk = @current_state.actions[curr_char]
69
+ raise UnrecognizedInputCharacterException.new(@input, @curr_char_index) unless blk
70
+ instance_eval(&blk)
71
+ end
72
+ tokens
73
+ end
74
+
75
+ def curr_char
76
+ @input[@curr_char_index] and @input[@curr_char_index].chr
77
+ end
78
+
79
+ def advance
80
+ @curr_char_index += 1
81
+ end
82
+
83
+ def switch_to state_name
84
+ @current_state = @tokenizer.states[state_name]
85
+ end
86
+
87
+ end
88
+ end
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+ require 'test/unit'
3
+
4
+ require 'grammar_test'
5
+ require 'parser_test'
6
+ require 'arithmetic_evaluator_test'
7
+ require 'compiled_parser_test'
8
+ require 'evaluator_test'
9
+ require 'arithmetic_tokenizer_test'
10
+ require 'malformed_grammar_test'
11
+ require 'brackets_test'
@@ -0,0 +1,70 @@
1
+ require File.dirname(__FILE__)+'/../lib/dhaka'
2
+ require 'arithmetic_grammar'
3
+
4
+ class ArithmeticEvaluator < Dhaka::Evaluator
5
+
6
+ self.grammar = ArithmeticGrammar
7
+
8
+ define_evaluation_rules do
9
+
10
+ for_subtraction do
11
+ child_nodes[0] - child_nodes[2]
12
+ end
13
+
14
+ for_addition do
15
+ child_nodes[0] + child_nodes[2]
16
+ end
17
+
18
+ for_division do
19
+ child_nodes[0].to_f/child_nodes[2]
20
+ end
21
+
22
+ for_multiplication do
23
+ child_nodes[0] * child_nodes[2]
24
+ end
25
+
26
+ for_getting_literals do
27
+ child_nodes[0].token.value
28
+ end
29
+
30
+ for_start_production do
31
+ child_nodes[0]
32
+ end
33
+
34
+ for_unpacking_parenthetized_expression do
35
+ child_nodes[1]
36
+ end
37
+
38
+ for_empty_args do
39
+ []
40
+ end
41
+
42
+ for_evaluating_function do
43
+ child_nodes[0].call child_nodes[2]
44
+ end
45
+
46
+ for_concatenating_args do
47
+ [child_nodes[0]]+child_nodes[2]
48
+ end
49
+
50
+ for_single_args do
51
+ [child_nodes[0]]
52
+ end
53
+
54
+ for_min_function do
55
+ @min_function
56
+ end
57
+
58
+ for_max_function do
59
+ @max_function
60
+ end
61
+
62
+ end
63
+
64
+ def initialize(syntax_tree, min_function, max_function)
65
+ @min_function = min_function
66
+ @max_function = max_function
67
+ super(syntax_tree)
68
+ end
69
+
70
+ end