dhaka 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. data/lib/dhaka.rb +44 -0
  2. data/lib/evaluator/evaluator.rb +70 -0
  3. data/lib/grammar/closure_hash.rb +13 -0
  4. data/lib/grammar/grammar.rb +129 -0
  5. data/lib/grammar/grammar_symbol.rb +19 -0
  6. data/lib/grammar/production.rb +14 -0
  7. data/lib/parser/action.rb +51 -0
  8. data/lib/parser/channel.rb +51 -0
  9. data/lib/parser/compiled_parser.rb +35 -0
  10. data/lib/parser/item.rb +37 -0
  11. data/lib/parser/parse_result.rb +26 -0
  12. data/lib/parser/parse_tree.rb +34 -0
  13. data/lib/parser/parser.rb +125 -0
  14. data/lib/parser/parser_methods.rb +10 -0
  15. data/lib/parser/parser_run.rb +35 -0
  16. data/lib/parser/parser_state.rb +66 -0
  17. data/lib/parser/token.rb +15 -0
  18. data/lib/tokenizer/tokenizer.rb +88 -0
  19. data/test/all_tests.rb +11 -0
  20. data/test/arithmetic_evaluator.rb +70 -0
  21. data/test/arithmetic_evaluator_test.rb +55 -0
  22. data/test/arithmetic_grammar.rb +38 -0
  23. data/test/arithmetic_grammar_test.rb +11 -0
  24. data/test/arithmetic_test_methods.rb +11 -0
  25. data/test/arithmetic_tokenizer.rb +43 -0
  26. data/test/arithmetic_tokenizer_test.rb +32 -0
  27. data/test/bracket_grammar.rb +25 -0
  28. data/test/bracket_tokenizer.rb +17 -0
  29. data/test/brackets_test.rb +20 -0
  30. data/test/compiled_arithmetic_parser.rb +252 -0
  31. data/test/compiled_parser_test.rb +71 -0
  32. data/test/evaluator_test.rb +8 -0
  33. data/test/grammar_test.rb +70 -0
  34. data/test/incomplete_arithmetic_evaluator.rb +60 -0
  35. data/test/lalr_but_not_slr_grammar.rb +17 -0
  36. data/test/malformed_grammar.rb +9 -0
  37. data/test/malformed_grammar_test.rb +9 -0
  38. data/test/nullable_grammar.rb +18 -0
  39. data/test/parser_test.rb +168 -0
  40. data/test/rr_conflict_grammar.rb +23 -0
  41. data/test/simple_grammar.rb +24 -0
  42. data/test/sr_conflict_grammar.rb +16 -0
  43. metadata +87 -0
@@ -0,0 +1,44 @@
1
+ #--
2
+ # Copyright (c) 2006 Mushfeq Khan
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # "Software"), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+
24
+ require 'grammar/grammar_symbol'
25
+ require 'grammar/production'
26
+ require 'grammar/closure_hash'
27
+ require 'grammar/grammar'
28
+
29
+ require 'parser/parse_result'
30
+ require 'parser/item'
31
+ require 'parser/channel'
32
+ require 'parser/parser_methods'
33
+ require 'parser/parse_tree'
34
+ require 'parser/parser_state'
35
+ require 'parser/token'
36
+ require 'parser/action'
37
+ require 'parser/parser_run'
38
+ require 'parser/parser'
39
+ require 'parser/compiled_parser'
40
+
41
+ require 'tokenizer/tokenizer'
42
+ require 'evaluator/evaluator'
43
+
44
+
@@ -0,0 +1,70 @@
1
+ module Dhaka
2
+ class Evaluator
3
+
4
+ def initialize(syntax_tree)
5
+ @syntax_tree = syntax_tree
6
+ @node_stack = []
7
+ end
8
+
9
+ def result
10
+ evaluate(@syntax_tree)
11
+ end
12
+
13
+ def child_nodes
14
+ current_node = @node_stack[-1]
15
+ Array.new(current_node.child_nodes.size) {|i| evaluate(current_node.child_nodes[i])}
16
+ end
17
+
18
+ private
19
+
20
+ def self.inherited(evaluator)
21
+ class << evaluator
22
+ attr_accessor :grammar, :actions
23
+ end
24
+ end
25
+
26
+ def self.define_evaluation_rules
27
+ default_action = Proc.new { child_nodes[0] }
28
+ self.actions = Hash.new { |hash, key| default_action }
29
+ yield
30
+ check_definitions
31
+ end
32
+
33
+ def self.check_definitions
34
+ non_trivial_productions_with_rules_undefined = self.grammar.productions.select {|production| production.expansion.size != 1}.collect {|production| production.name} - self.actions.keys
35
+ raise EvaluatorDefinitionError.new(non_trivial_productions_with_rules_undefined) unless non_trivial_productions_with_rules_undefined.empty?
36
+ end
37
+
38
+ def evaluate node
39
+ return node if (ParseTreeLeafNode === node)
40
+ @node_stack << node
41
+ proc = self.class.actions[node.production.name]
42
+ result = self.instance_eval(&proc)
43
+ @node_stack.pop
44
+ result
45
+ end
46
+
47
+ def self.for_rule_named(name, &blk)
48
+ self.actions[name] = blk
49
+ end
50
+
51
+ def self.method_missing(method_name, &blk)
52
+ if method_name.to_s =~ /^for_*/
53
+ rule_name = method_name.to_s[4..-1]
54
+ self.for_rule_named(rule_name, &blk)
55
+ end
56
+ end
57
+
58
+ end
59
+
60
+ class EvaluatorDefinitionError < StandardError
61
+ def initialize(non_trivial_productions_with_rules_undefined)
62
+ @non_trivial_productions_with_rules_undefined = non_trivial_productions_with_rules_undefined
63
+ end
64
+
65
+ def to_s
66
+ result = "The following non-trivial productions do not have any evaluation rules defined:\n"
67
+ result << (@non_trivial_productions_with_rules_undefined).join("\n")
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+ module Dhaka
3
+ class ClosureHash < Hash
4
+ attr_accessor :dirty
5
+ def initialize(&block)
6
+ super(&block)
7
+ @dirty = false
8
+ end
9
+ def load_set(set)
10
+ set.each {|item| self[item] = item}
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,129 @@
1
+ #!/usr/bin/env ruby
2
+ require 'set'
3
+ module Dhaka
4
+
5
+ START_SYMBOL_NAME = "_Start_"
6
+ END_SYMBOL_NAME = "_End_"
7
+
8
+ class ProductionBuilder
9
+ def initialize(grammar, symbol)
10
+ @grammar = grammar
11
+ @symbol = symbol
12
+ end
13
+
14
+ def method_missing(production_name, expansion)
15
+ expansion_symbols = expansion.collect {|name| @grammar.symbols[name]}
16
+ production = Production.new(@symbol, expansion_symbols, production_name.to_s)
17
+ @symbol.nullable = true if expansion_symbols.empty?
18
+ @grammar.productions_by_symbol[production.symbol] << production
19
+ @grammar.productions_by_name[production.name] = production
20
+ end
21
+ end
22
+
23
+ class Grammar
24
+
25
+ def self.inherited(grammar)
26
+ class << grammar
27
+ attr_accessor :symbols, :productions_by_symbol, :productions_by_name, :start_symbol, :end_symbol, :__first_cache
28
+ end
29
+ grammar.symbols = Hash.new {|hash, name| hash[name] = GrammarSymbol.new(name)}
30
+ grammar.productions_by_symbol = Hash.new {|hash, name| hash[name] = Set.new([])}
31
+ grammar.productions_by_name = {}
32
+ grammar.end_symbol = grammar.symbols[END_SYMBOL_NAME]
33
+ grammar.start_symbol = grammar.symbols[START_SYMBOL_NAME]
34
+ grammar.__first_cache = {}
35
+ end
36
+
37
+ def self.for_symbol symbol, &blk
38
+ symbol = symbols[symbol]
39
+ symbol.non_terminal = true
40
+ ProductionBuilder.new(self, symbol).instance_eval(&blk)
41
+ end
42
+
43
+
44
+ def self.productions
45
+ productions_by_name.values
46
+ end
47
+
48
+ def self.production_named(name)
49
+ productions_by_name[name]
50
+ end
51
+
52
+ def self.productions_for_symbol(symbol)
53
+ productions_by_symbol[symbol]
54
+ end
55
+
56
+ def self.symbol_for_name(name)
57
+ if symbols.has_key? name
58
+ symbols[name]
59
+ else
60
+ raise "No symbol with name #{name} found"
61
+ end
62
+ end
63
+
64
+ def self.terminal_symbols
65
+ symbols.values.select {|symbol| symbol.terminal}
66
+ end
67
+
68
+ def self.non_terminal_symbols
69
+ symbols.values.select {|symbol| symbol.non_terminal}
70
+ end
71
+
72
+ def self.closure(kernel)
73
+ channels = Set.new
74
+
75
+ result = compute_closure(kernel) do |hash, item|
76
+ if item.next_symbol and item.next_symbol.non_terminal
77
+ productions_by_symbol[item.next_symbol].each do |production|
78
+ channels << spontaneous_channel(item, hash[Item.new(production, 0)])
79
+ end
80
+ end
81
+ end
82
+
83
+ return channels, result
84
+ end
85
+
86
+ def self.first(given_symbol)
87
+ cached_result = self.__first_cache[given_symbol]
88
+ return cached_result if cached_result
89
+ result = compute_closure([given_symbol]) do |hash, symbol|
90
+ productions_by_symbol[symbol].each do |production|
91
+ symbol_index = 0
92
+ while next_symbol = production.expansion[symbol_index]
93
+ hash[next_symbol]
94
+ break if !next_symbol.nullable
95
+ symbol_index += 1
96
+ end
97
+ end if symbol.non_terminal
98
+ end.values.select {|symbol| symbol.terminal}.to_set
99
+ self.__first_cache[given_symbol] = result
100
+ result
101
+ end
102
+
103
+ def self.spontaneous_channel(start_item, end_item)
104
+ SpontaneousChannel.new(self, start_item, end_item)
105
+ end
106
+
107
+ def self.passive_channel(start_item, end_item)
108
+ PassiveChannel.new(self, start_item, end_item)
109
+ end
110
+
111
+ def self.compute_closure(initial)
112
+ closure_hash = ClosureHash.new do |hash, item|
113
+ hash.dirty = true
114
+ hash[item] = item
115
+ end
116
+ closure_hash.load_set(initial)
117
+ while true
118
+ closure_hash.keys.each do |element|
119
+ yield closure_hash, element
120
+ end
121
+ break if !closure_hash.dirty
122
+ closure_hash.dirty = false
123
+ end
124
+ return closure_hash
125
+ end
126
+
127
+
128
+ end
129
+ end
@@ -0,0 +1,19 @@
1
+ #!/usr/bin/env ruby
2
+ module Dhaka
3
+ class GrammarSymbol
4
+ attr_reader :name
5
+ attr_accessor :non_terminal, :nullable
6
+ def initialize(name)
7
+ @name = name
8
+ end
9
+ def terminal
10
+ !non_terminal
11
+ end
12
+ def to_s
13
+ name
14
+ end
15
+ def <=> other
16
+ self.name <=> other.name
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ module Dhaka
3
+ class Production
4
+ attr_reader :symbol, :expansion, :name
5
+ def initialize(symbol, expansion, name)
6
+ @symbol = symbol
7
+ @expansion = expansion
8
+ @name = name
9
+ end
10
+ def to_s
11
+ "#{@name} #{@symbol} ::= #{@expansion.join(' ')}"
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,51 @@
1
+ module Dhaka
2
+ class Action
3
+ attr_reader :action_code
4
+ end
5
+
6
+ class ShiftAction < Action
7
+ attr_reader :destination_state
8
+ def initialize destination_state
9
+ @destination_state = destination_state
10
+ @action_code = Proc.new do
11
+ state_stack << destination_state
12
+ []
13
+ end
14
+ end
15
+ def compile_to_ruby_source
16
+ "shift_to #{@destination_state.id}"
17
+ end
18
+ def to_s
19
+ "Shift to #{@destination_state}"
20
+ end
21
+ end
22
+
23
+ class ReduceAction < Action
24
+ attr_reader :production
25
+ def initialize(production)
26
+ @production = production
27
+ @action_code = Proc.new do
28
+ composite_node = ParseTreeCompositeNode.new(production)
29
+
30
+ production.expansion.each { |symbol|
31
+ state_stack.pop
32
+ composite_node.child_nodes.unshift(node_stack.pop)
33
+ }
34
+
35
+ node_stack << composite_node
36
+
37
+ unless composite_node.head_node?
38
+ [production.symbol.name, current_token.grammar_symbol.name]
39
+ else
40
+ []
41
+ end
42
+ end
43
+ end
44
+ def compile_to_ruby_source
45
+ "reduce_with '#{@production.name}'"
46
+ end
47
+ def to_s
48
+ "Reduce with #{production}"
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env ruby
2
+ module Dhaka
3
+ class Channel
4
+ attr_reader :start_item, :end_item
5
+ def initialize(grammar, start_item, end_item)
6
+ @grammar = grammar
7
+ @start_item = start_item
8
+ @end_item = end_item
9
+ end
10
+ def propagate cargo
11
+ diff = cargo - @end_item.lookaheadset
12
+ @end_item.lookaheadset.merge(diff)
13
+ !diff.empty?
14
+ end
15
+ def to_s
16
+ "Channel from #{@start_item} to #{@end_item}"
17
+ end
18
+ def eql? other
19
+ @start_item.eql?(other.start_item) and @end_item.eql?(other.end_item)
20
+ end
21
+ def hash
22
+ @start_item.hash ^ @end_item.hash
23
+ end
24
+ end
25
+
26
+ class SpontaneousChannel < Channel
27
+ def to_s
28
+ "Spontaneous " + super.to_s
29
+ end
30
+ def pump
31
+ follow_index = @start_item.next_item_index + 1
32
+ cargo = Set.new
33
+ while follow_symbol = @start_item.production.expansion[follow_index]
34
+ cargo += @grammar.first(follow_symbol)
35
+ return propagate(cargo) unless follow_symbol.nullable
36
+ follow_index += 1
37
+ end
38
+ cargo += @start_item.lookaheadset
39
+ propagate cargo
40
+ end
41
+ end
42
+
43
+ class PassiveChannel < Channel
44
+ def to_s
45
+ "Passive " + super.to_s
46
+ end
47
+ def pump
48
+ propagate @start_item.lookaheadset
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,35 @@
1
+ module Dhaka
2
+ class CompiledParser
3
+
4
+ def self.inherited(compiled_parser)
5
+ class << compiled_parser
6
+ attr_accessor :states, :grammar, :start_state_id
7
+ end
8
+ compiled_parser.states = Hash.new {|hash, state_id| hash[state_id] = ParserState.new(compiled_parser, {}, state_id)}
9
+ end
10
+
11
+ def self.at_state x, &blk
12
+ self.states[x].instance_eval(&blk)
13
+ end
14
+
15
+ def self.start_state
16
+ states[start_state_id]
17
+ end
18
+
19
+ def self.start_with start_state_id
20
+ self.start_state_id = start_state_id
21
+ end
22
+
23
+ def self.reduce_with production_name
24
+ ReduceAction.new(grammar.production_named(production_name))
25
+ end
26
+
27
+ def self.shift_to state_id
28
+ ShiftAction.new(states[state_id])
29
+ end
30
+
31
+ self.extend(ParserMethods)
32
+
33
+ end
34
+
35
+ end
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env ruby
2
+ module Dhaka
3
+ class Item
4
+ attr_reader :production, :next_item_index, :lookaheadset
5
+ def initialize(production, next_item_index)
6
+ @production = production
7
+ @next_item_index = next_item_index
8
+ @lookaheadset = Set.new
9
+ end
10
+ def next_symbol
11
+ if @next_item_index < @production.expansion.size
12
+ @production.expansion[@next_item_index]
13
+ else
14
+ nil
15
+ end
16
+ end
17
+ def next_item
18
+ Item.new(@production, @next_item_index+1)
19
+ end
20
+ def to_s
21
+ expansion_symbols = @production.expansion.collect {|symbol| symbol.name}
22
+ if @next_item_index < expansion_symbols.size
23
+ expansion_symbols.insert(@next_item_index, '->')
24
+ else
25
+ expansion_symbols << '->'
26
+ end
27
+ expansion_repr = expansion_symbols.join(' ')
28
+ "#{@production.symbol} ::= #{expansion_repr} [#{@lookaheadset.collect.sort}]"
29
+ end
30
+ def eql?(other)
31
+ @production == other.production && @next_item_index==other.next_item_index
32
+ end
33
+ def hash
34
+ @production.hash ^ @next_item_index.hash
35
+ end
36
+ end
37
+ end