dhaka 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. data/lib/dhaka.rb +44 -0
  2. data/lib/evaluator/evaluator.rb +70 -0
  3. data/lib/grammar/closure_hash.rb +13 -0
  4. data/lib/grammar/grammar.rb +129 -0
  5. data/lib/grammar/grammar_symbol.rb +19 -0
  6. data/lib/grammar/production.rb +14 -0
  7. data/lib/parser/action.rb +51 -0
  8. data/lib/parser/channel.rb +51 -0
  9. data/lib/parser/compiled_parser.rb +35 -0
  10. data/lib/parser/item.rb +37 -0
  11. data/lib/parser/parse_result.rb +26 -0
  12. data/lib/parser/parse_tree.rb +34 -0
  13. data/lib/parser/parser.rb +125 -0
  14. data/lib/parser/parser_methods.rb +10 -0
  15. data/lib/parser/parser_run.rb +35 -0
  16. data/lib/parser/parser_state.rb +66 -0
  17. data/lib/parser/token.rb +15 -0
  18. data/lib/tokenizer/tokenizer.rb +88 -0
  19. data/test/all_tests.rb +11 -0
  20. data/test/arithmetic_evaluator.rb +70 -0
  21. data/test/arithmetic_evaluator_test.rb +55 -0
  22. data/test/arithmetic_grammar.rb +38 -0
  23. data/test/arithmetic_grammar_test.rb +11 -0
  24. data/test/arithmetic_test_methods.rb +11 -0
  25. data/test/arithmetic_tokenizer.rb +43 -0
  26. data/test/arithmetic_tokenizer_test.rb +32 -0
  27. data/test/bracket_grammar.rb +25 -0
  28. data/test/bracket_tokenizer.rb +17 -0
  29. data/test/brackets_test.rb +20 -0
  30. data/test/compiled_arithmetic_parser.rb +252 -0
  31. data/test/compiled_parser_test.rb +71 -0
  32. data/test/evaluator_test.rb +8 -0
  33. data/test/grammar_test.rb +70 -0
  34. data/test/incomplete_arithmetic_evaluator.rb +60 -0
  35. data/test/lalr_but_not_slr_grammar.rb +17 -0
  36. data/test/malformed_grammar.rb +9 -0
  37. data/test/malformed_grammar_test.rb +9 -0
  38. data/test/nullable_grammar.rb +18 -0
  39. data/test/parser_test.rb +168 -0
  40. data/test/rr_conflict_grammar.rb +23 -0
  41. data/test/simple_grammar.rb +24 -0
  42. data/test/sr_conflict_grammar.rb +16 -0
  43. metadata +87 -0
@@ -0,0 +1,44 @@
1
+ #--
2
+ # Copyright (c) 2006 Mushfeq Khan
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # "Software"), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+
24
+ require 'grammar/grammar_symbol'
25
+ require 'grammar/production'
26
+ require 'grammar/closure_hash'
27
+ require 'grammar/grammar'
28
+
29
+ require 'parser/parse_result'
30
+ require 'parser/item'
31
+ require 'parser/channel'
32
+ require 'parser/parser_methods'
33
+ require 'parser/parse_tree'
34
+ require 'parser/parser_state'
35
+ require 'parser/token'
36
+ require 'parser/action'
37
+ require 'parser/parser_run'
38
+ require 'parser/parser'
39
+ require 'parser/compiled_parser'
40
+
41
+ require 'tokenizer/tokenizer'
42
+ require 'evaluator/evaluator'
43
+
44
+
@@ -0,0 +1,70 @@
1
+ module Dhaka
2
+ class Evaluator
3
+
4
+ def initialize(syntax_tree)
5
+ @syntax_tree = syntax_tree
6
+ @node_stack = []
7
+ end
8
+
9
+ def result
10
+ evaluate(@syntax_tree)
11
+ end
12
+
13
+ def child_nodes
14
+ current_node = @node_stack[-1]
15
+ Array.new(current_node.child_nodes.size) {|i| evaluate(current_node.child_nodes[i])}
16
+ end
17
+
18
+ private
19
+
20
+ def self.inherited(evaluator)
21
+ class << evaluator
22
+ attr_accessor :grammar, :actions
23
+ end
24
+ end
25
+
26
+ def self.define_evaluation_rules
27
+ default_action = Proc.new { child_nodes[0] }
28
+ self.actions = Hash.new { |hash, key| default_action }
29
+ yield
30
+ check_definitions
31
+ end
32
+
33
+ def self.check_definitions
34
+ non_trivial_productions_with_rules_undefined = self.grammar.productions.select {|production| production.expansion.size != 1}.collect {|production| production.name} - self.actions.keys
35
+ raise EvaluatorDefinitionError.new(non_trivial_productions_with_rules_undefined) unless non_trivial_productions_with_rules_undefined.empty?
36
+ end
37
+
38
+ def evaluate node
39
+ return node if (ParseTreeLeafNode === node)
40
+ @node_stack << node
41
+ proc = self.class.actions[node.production.name]
42
+ result = self.instance_eval(&proc)
43
+ @node_stack.pop
44
+ result
45
+ end
46
+
47
+ def self.for_rule_named(name, &blk)
48
+ self.actions[name] = blk
49
+ end
50
+
51
+ def self.method_missing(method_name, &blk)
52
+ if method_name.to_s =~ /^for_*/
53
+ rule_name = method_name.to_s[4..-1]
54
+ self.for_rule_named(rule_name, &blk)
55
+ end
56
+ end
57
+
58
+ end
59
+
60
+ class EvaluatorDefinitionError < StandardError
61
+ def initialize(non_trivial_productions_with_rules_undefined)
62
+ @non_trivial_productions_with_rules_undefined = non_trivial_productions_with_rules_undefined
63
+ end
64
+
65
+ def to_s
66
+ result = "The following non-trivial productions do not have any evaluation rules defined:\n"
67
+ result << (@non_trivial_productions_with_rules_undefined).join("\n")
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+ module Dhaka
3
+ class ClosureHash < Hash
4
+ attr_accessor :dirty
5
+ def initialize(&block)
6
+ super(&block)
7
+ @dirty = false
8
+ end
9
+ def load_set(set)
10
+ set.each {|item| self[item] = item}
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,129 @@
1
+ #!/usr/bin/env ruby
2
+ require 'set'
3
+ module Dhaka
4
+
5
+ START_SYMBOL_NAME = "_Start_"
6
+ END_SYMBOL_NAME = "_End_"
7
+
8
+ class ProductionBuilder
9
+ def initialize(grammar, symbol)
10
+ @grammar = grammar
11
+ @symbol = symbol
12
+ end
13
+
14
+ def method_missing(production_name, expansion)
15
+ expansion_symbols = expansion.collect {|name| @grammar.symbols[name]}
16
+ production = Production.new(@symbol, expansion_symbols, production_name.to_s)
17
+ @symbol.nullable = true if expansion_symbols.empty?
18
+ @grammar.productions_by_symbol[production.symbol] << production
19
+ @grammar.productions_by_name[production.name] = production
20
+ end
21
+ end
22
+
23
+ class Grammar
24
+
25
+ def self.inherited(grammar)
26
+ class << grammar
27
+ attr_accessor :symbols, :productions_by_symbol, :productions_by_name, :start_symbol, :end_symbol, :__first_cache
28
+ end
29
+ grammar.symbols = Hash.new {|hash, name| hash[name] = GrammarSymbol.new(name)}
30
+ grammar.productions_by_symbol = Hash.new {|hash, name| hash[name] = Set.new([])}
31
+ grammar.productions_by_name = {}
32
+ grammar.end_symbol = grammar.symbols[END_SYMBOL_NAME]
33
+ grammar.start_symbol = grammar.symbols[START_SYMBOL_NAME]
34
+ grammar.__first_cache = {}
35
+ end
36
+
37
+ def self.for_symbol symbol, &blk
38
+ symbol = symbols[symbol]
39
+ symbol.non_terminal = true
40
+ ProductionBuilder.new(self, symbol).instance_eval(&blk)
41
+ end
42
+
43
+
44
+ def self.productions
45
+ productions_by_name.values
46
+ end
47
+
48
+ def self.production_named(name)
49
+ productions_by_name[name]
50
+ end
51
+
52
+ def self.productions_for_symbol(symbol)
53
+ productions_by_symbol[symbol]
54
+ end
55
+
56
+ def self.symbol_for_name(name)
57
+ if symbols.has_key? name
58
+ symbols[name]
59
+ else
60
+ raise "No symbol with name #{name} found"
61
+ end
62
+ end
63
+
64
+ def self.terminal_symbols
65
+ symbols.values.select {|symbol| symbol.terminal}
66
+ end
67
+
68
+ def self.non_terminal_symbols
69
+ symbols.values.select {|symbol| symbol.non_terminal}
70
+ end
71
+
72
+ def self.closure(kernel)
73
+ channels = Set.new
74
+
75
+ result = compute_closure(kernel) do |hash, item|
76
+ if item.next_symbol and item.next_symbol.non_terminal
77
+ productions_by_symbol[item.next_symbol].each do |production|
78
+ channels << spontaneous_channel(item, hash[Item.new(production, 0)])
79
+ end
80
+ end
81
+ end
82
+
83
+ return channels, result
84
+ end
85
+
86
+ def self.first(given_symbol)
87
+ cached_result = self.__first_cache[given_symbol]
88
+ return cached_result if cached_result
89
+ result = compute_closure([given_symbol]) do |hash, symbol|
90
+ productions_by_symbol[symbol].each do |production|
91
+ symbol_index = 0
92
+ while next_symbol = production.expansion[symbol_index]
93
+ hash[next_symbol]
94
+ break if !next_symbol.nullable
95
+ symbol_index += 1
96
+ end
97
+ end if symbol.non_terminal
98
+ end.values.select {|symbol| symbol.terminal}.to_set
99
+ self.__first_cache[given_symbol] = result
100
+ result
101
+ end
102
+
103
+ def self.spontaneous_channel(start_item, end_item)
104
+ SpontaneousChannel.new(self, start_item, end_item)
105
+ end
106
+
107
+ def self.passive_channel(start_item, end_item)
108
+ PassiveChannel.new(self, start_item, end_item)
109
+ end
110
+
111
+ def self.compute_closure(initial)
112
+ closure_hash = ClosureHash.new do |hash, item|
113
+ hash.dirty = true
114
+ hash[item] = item
115
+ end
116
+ closure_hash.load_set(initial)
117
+ while true
118
+ closure_hash.keys.each do |element|
119
+ yield closure_hash, element
120
+ end
121
+ break if !closure_hash.dirty
122
+ closure_hash.dirty = false
123
+ end
124
+ return closure_hash
125
+ end
126
+
127
+
128
+ end
129
+ end
@@ -0,0 +1,19 @@
1
+ #!/usr/bin/env ruby
2
+ module Dhaka
3
+ class GrammarSymbol
4
+ attr_reader :name
5
+ attr_accessor :non_terminal, :nullable
6
+ def initialize(name)
7
+ @name = name
8
+ end
9
+ def terminal
10
+ !non_terminal
11
+ end
12
+ def to_s
13
+ name
14
+ end
15
+ def <=> other
16
+ self.name <=> other.name
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ module Dhaka
3
+ class Production
4
+ attr_reader :symbol, :expansion, :name
5
+ def initialize(symbol, expansion, name)
6
+ @symbol = symbol
7
+ @expansion = expansion
8
+ @name = name
9
+ end
10
+ def to_s
11
+ "#{@name} #{@symbol} ::= #{@expansion.join(' ')}"
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,51 @@
1
+ module Dhaka
2
+ class Action
3
+ attr_reader :action_code
4
+ end
5
+
6
+ class ShiftAction < Action
7
+ attr_reader :destination_state
8
+ def initialize destination_state
9
+ @destination_state = destination_state
10
+ @action_code = Proc.new do
11
+ state_stack << destination_state
12
+ []
13
+ end
14
+ end
15
+ def compile_to_ruby_source
16
+ "shift_to #{@destination_state.id}"
17
+ end
18
+ def to_s
19
+ "Shift to #{@destination_state}"
20
+ end
21
+ end
22
+
23
+ class ReduceAction < Action
24
+ attr_reader :production
25
+ def initialize(production)
26
+ @production = production
27
+ @action_code = Proc.new do
28
+ composite_node = ParseTreeCompositeNode.new(production)
29
+
30
+ production.expansion.each { |symbol|
31
+ state_stack.pop
32
+ composite_node.child_nodes.unshift(node_stack.pop)
33
+ }
34
+
35
+ node_stack << composite_node
36
+
37
+ unless composite_node.head_node?
38
+ [production.symbol.name, current_token.grammar_symbol.name]
39
+ else
40
+ []
41
+ end
42
+ end
43
+ end
44
+ def compile_to_ruby_source
45
+ "reduce_with '#{@production.name}'"
46
+ end
47
+ def to_s
48
+ "Reduce with #{production}"
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env ruby
2
+ module Dhaka
3
+ class Channel
4
+ attr_reader :start_item, :end_item
5
+ def initialize(grammar, start_item, end_item)
6
+ @grammar = grammar
7
+ @start_item = start_item
8
+ @end_item = end_item
9
+ end
10
+ def propagate cargo
11
+ diff = cargo - @end_item.lookaheadset
12
+ @end_item.lookaheadset.merge(diff)
13
+ !diff.empty?
14
+ end
15
+ def to_s
16
+ "Channel from #{@start_item} to #{@end_item}"
17
+ end
18
+ def eql? other
19
+ @start_item.eql?(other.start_item) and @end_item.eql?(other.end_item)
20
+ end
21
+ def hash
22
+ @start_item.hash ^ @end_item.hash
23
+ end
24
+ end
25
+
26
+ class SpontaneousChannel < Channel
27
+ def to_s
28
+ "Spontaneous " + super.to_s
29
+ end
30
+ def pump
31
+ follow_index = @start_item.next_item_index + 1
32
+ cargo = Set.new
33
+ while follow_symbol = @start_item.production.expansion[follow_index]
34
+ cargo += @grammar.first(follow_symbol)
35
+ return propagate(cargo) unless follow_symbol.nullable
36
+ follow_index += 1
37
+ end
38
+ cargo += @start_item.lookaheadset
39
+ propagate cargo
40
+ end
41
+ end
42
+
43
+ class PassiveChannel < Channel
44
+ def to_s
45
+ "Passive " + super.to_s
46
+ end
47
+ def pump
48
+ propagate @start_item.lookaheadset
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,35 @@
1
+ module Dhaka
2
+ class CompiledParser
3
+
4
+ def self.inherited(compiled_parser)
5
+ class << compiled_parser
6
+ attr_accessor :states, :grammar, :start_state_id
7
+ end
8
+ compiled_parser.states = Hash.new {|hash, state_id| hash[state_id] = ParserState.new(compiled_parser, {}, state_id)}
9
+ end
10
+
11
+ def self.at_state x, &blk
12
+ self.states[x].instance_eval(&blk)
13
+ end
14
+
15
+ def self.start_state
16
+ states[start_state_id]
17
+ end
18
+
19
+ def self.start_with start_state_id
20
+ self.start_state_id = start_state_id
21
+ end
22
+
23
+ def self.reduce_with production_name
24
+ ReduceAction.new(grammar.production_named(production_name))
25
+ end
26
+
27
+ def self.shift_to state_id
28
+ ShiftAction.new(states[state_id])
29
+ end
30
+
31
+ self.extend(ParserMethods)
32
+
33
+ end
34
+
35
+ end
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env ruby
2
+ module Dhaka
3
+ class Item
4
+ attr_reader :production, :next_item_index, :lookaheadset
5
+ def initialize(production, next_item_index)
6
+ @production = production
7
+ @next_item_index = next_item_index
8
+ @lookaheadset = Set.new
9
+ end
10
+ def next_symbol
11
+ if @next_item_index < @production.expansion.size
12
+ @production.expansion[@next_item_index]
13
+ else
14
+ nil
15
+ end
16
+ end
17
+ def next_item
18
+ Item.new(@production, @next_item_index+1)
19
+ end
20
+ def to_s
21
+ expansion_symbols = @production.expansion.collect {|symbol| symbol.name}
22
+ if @next_item_index < expansion_symbols.size
23
+ expansion_symbols.insert(@next_item_index, '->')
24
+ else
25
+ expansion_symbols << '->'
26
+ end
27
+ expansion_repr = expansion_symbols.join(' ')
28
+ "#{@production.symbol} ::= #{expansion_repr} [#{@lookaheadset.collect.sort}]"
29
+ end
30
+ def eql?(other)
31
+ @production == other.production && @next_item_index==other.next_item_index
32
+ end
33
+ def hash
34
+ @production.hash ^ @next_item_index.hash
35
+ end
36
+ end
37
+ end