dhaka 2.0.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. data/lib/dhaka.rb +24 -22
  2. data/lib/evaluator/evaluator.rb +42 -44
  3. data/lib/grammar/closure_hash.rb +4 -3
  4. data/lib/grammar/grammar.rb +113 -110
  5. data/lib/grammar/grammar_symbol.rb +6 -3
  6. data/lib/grammar/precedence.rb +3 -2
  7. data/lib/grammar/production.rb +5 -6
  8. data/lib/parser/action.rb +16 -11
  9. data/lib/parser/channel.rb +22 -16
  10. data/lib/parser/compiled_parser.rb +28 -22
  11. data/lib/parser/conflict.rb +54 -0
  12. data/lib/parser/item.rb +19 -19
  13. data/lib/parser/parse_result.rb +16 -1
  14. data/lib/parser/parse_tree.rb +15 -9
  15. data/lib/parser/parser.rb +51 -80
  16. data/lib/parser/parser_run.rb +6 -6
  17. data/lib/parser/parser_state.rb +16 -18
  18. data/lib/parser/token.rb +6 -4
  19. data/lib/tokenizer/tokenizer.rb +34 -31
  20. data/test/all_tests.rb +4 -18
  21. data/test/another_lalr_but_not_slr_grammar.rb +9 -5
  22. data/test/{arithmetic_evaluator.rb → arithmetic/arithmetic_evaluator.rb} +1 -2
  23. data/test/{arithmetic_evaluator_test.rb → arithmetic/arithmetic_evaluator_test.rb} +9 -20
  24. data/test/arithmetic/arithmetic_grammar.rb +41 -0
  25. data/test/{arithmetic_grammar_test.rb → arithmetic/arithmetic_grammar_test.rb} +2 -4
  26. data/test/{arithmetic_test_methods.rb → arithmetic/arithmetic_test_methods.rb} +1 -3
  27. data/test/{arithmetic_tokenizer.rb → arithmetic/arithmetic_tokenizer.rb} +8 -10
  28. data/test/{arithmetic_tokenizer_test.rb → arithmetic/arithmetic_tokenizer_test.rb} +4 -2
  29. data/test/{arithmetic_precedence_evaluator.rb → arithmetic_precedence/arithmetic_precedence_evaluator.rb} +1 -2
  30. data/test/arithmetic_precedence/arithmetic_precedence_grammar.rb +24 -0
  31. data/test/{arithmetic_precedence_grammar_test.rb → arithmetic_precedence/arithmetic_precedence_grammar_test.rb} +2 -3
  32. data/test/arithmetic_precedence/arithmetic_precedence_parser_test.rb +31 -0
  33. data/test/{arithmetic_precedence_tokenizer.rb → arithmetic_precedence/arithmetic_precedence_tokenizer.rb} +8 -10
  34. data/test/brackets/bracket_grammar.rb +23 -0
  35. data/test/{bracket_tokenizer.rb → brackets/bracket_tokenizer.rb} +2 -4
  36. data/test/{brackets_test.rb → brackets/brackets_test.rb} +3 -4
  37. data/test/chittagong/chittagong_driver.rb +47 -0
  38. data/test/{chittagong_driver_test.rb → chittagong/chittagong_driver_test.rb} +66 -58
  39. data/test/{chittagong_evaluator.rb → chittagong/chittagong_evaluator.rb} +28 -13
  40. data/test/{chittagong_evaluator_test.rb → chittagong/chittagong_evaluator_test.rb} +6 -10
  41. data/test/chittagong/chittagong_grammar.rb +110 -0
  42. data/test/{chittagong_parser_test.rb → chittagong/chittagong_parser_test.rb} +5 -7
  43. data/test/{chittagong_test.rb → chittagong/chittagong_test.rb} +27 -36
  44. data/test/{chittagong_tokenizer.rb → chittagong/chittagong_tokenizer.rb} +17 -17
  45. data/test/{chittagong_tokenizer_test.rb → chittagong/chittagong_tokenizer_test.rb} +2 -3
  46. data/test/compiled_parser_test.rb +9 -42
  47. data/test/dhaka_test_helper.rb +17 -0
  48. data/test/evaluator_test.rb +18 -3
  49. data/test/grammar_test.rb +10 -15
  50. data/test/lalr_but_not_slr_grammar.rb +10 -8
  51. data/test/malformed_grammar.rb +2 -4
  52. data/test/malformed_grammar_test.rb +2 -3
  53. data/test/nullable_grammar.rb +11 -8
  54. data/test/parse_result_test.rb +44 -0
  55. data/test/parser_state_test.rb +36 -0
  56. data/test/parser_test.rb +53 -103
  57. data/test/precedence_grammar.rb +6 -6
  58. data/test/precedence_grammar_test.rb +2 -3
  59. data/test/rr_conflict_grammar.rb +5 -7
  60. data/test/simple_grammar.rb +6 -8
  61. data/test/sr_conflict_grammar.rb +6 -6
  62. metadata +30 -26
  63. data/test/arithmetic_grammar.rb +0 -35
  64. data/test/arithmetic_precedence_grammar.rb +0 -24
  65. data/test/arithmetic_precedence_parser_test.rb +0 -33
  66. data/test/bracket_grammar.rb +0 -25
  67. data/test/chittagong_grammar.rb +0 -104
  68. data/test/incomplete_arithmetic_evaluator.rb +0 -60
@@ -21,26 +21,28 @@
21
21
  # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
23
 
24
- module Dhaka
25
- end
24
+ require 'set'
25
+ require 'logger'
26
+ require 'delegate'
26
27
 
27
- require File.dirname(__FILE__)+'/grammar/grammar_symbol'
28
- require File.dirname(__FILE__)+'/grammar/production'
29
- require File.dirname(__FILE__)+'/grammar/closure_hash'
30
- require File.dirname(__FILE__)+'/grammar/grammar'
31
- require File.dirname(__FILE__)+'/grammar/precedence'
32
-
33
- require File.dirname(__FILE__)+'/parser/parse_result'
34
- require File.dirname(__FILE__)+'/parser/item'
35
- require File.dirname(__FILE__)+'/parser/channel'
36
- require File.dirname(__FILE__)+'/parser/parser_methods'
37
- require File.dirname(__FILE__)+'/parser/parse_tree'
38
- require File.dirname(__FILE__)+'/parser/parser_state'
39
- require File.dirname(__FILE__)+'/parser/token'
40
- require File.dirname(__FILE__)+'/parser/action'
41
- require File.dirname(__FILE__)+'/parser/parser_run'
42
- require File.dirname(__FILE__)+'/parser/parser'
43
- require File.dirname(__FILE__)+'/parser/compiled_parser'
44
-
45
- require File.dirname(__FILE__)+'/tokenizer/tokenizer'
46
- require File.dirname(__FILE__)+'/evaluator/evaluator'
28
+ %w[
29
+ grammar/grammar_symbol
30
+ grammar/production
31
+ grammar/closure_hash
32
+ grammar/grammar
33
+ grammar/precedence
34
+ parser/parse_tree
35
+ parser/parse_result
36
+ parser/item
37
+ parser/channel
38
+ parser/parser_methods
39
+ parser/parser_state
40
+ parser/conflict
41
+ parser/token
42
+ parser/action
43
+ parser/parser_run
44
+ parser/parser
45
+ parser/compiled_parser
46
+ tokenizer/tokenizer
47
+ evaluator/evaluator
48
+ ].each {|path| require File.join(File.dirname(__FILE__), path)}
@@ -1,5 +1,4 @@
1
1
  module Dhaka
2
-
3
2
  # This is the abstract base evaluator class. It is not directly instantiated.
4
3
  # When defining an evaluator for a specific grammar, we subclass it. e.g. for FooGrammar
5
4
  # we create a FooEvaluator that subclasses Evaluator. Note that FooEvaluator may not
@@ -55,61 +54,60 @@ module Dhaka
55
54
  # end
56
55
  #
57
56
  # end
58
-
59
-
60
57
  class Evaluator
58
+ class << self
59
+ def inherited(evaluator)
60
+ class << evaluator
61
+ attr_accessor :grammar, :actions
62
+ end
63
+ end
64
+
65
+ def method_missing(method_name, *args, &blk)
66
+ name = method_name.to_s
67
+ if name =~ /^for_(.+)$/
68
+ rule_name = $1
69
+ actions << rule_name
70
+ send(:define_method, rule_name, &blk)
71
+ else
72
+ super
73
+ end
74
+ end
75
+
76
+ # Evaluation rules are defined within a block passed to this method.
77
+ def define_evaluation_rules
78
+ self.actions = []
79
+ yield
80
+ check_definitions
81
+ end
82
+
83
+ private
61
84
 
62
- # Instantiates a new evaluator with the syntax tree of a parsed expression. Only subclasses
63
- # of Evaluator are directly instantiated.
85
+ def check_definitions
86
+ filter = lambda {|productions| productions.map {|production| production.name} - actions}
87
+ pass_through_productions_without_rules = filter[grammar.productions.select {|production| production.expansion.size == 1}]
88
+ pass_through_productions_without_rules.each do |rule_name|
89
+ send(:define_method, rule_name) do
90
+ evaluate(child_nodes.first)
91
+ end
92
+ end
93
+ non_trivial_productions_with_rules_undefined = filter[grammar.productions.select {|production| production.expansion.size != 1}]
94
+ raise EvaluatorDefinitionError.new(non_trivial_productions_with_rules_undefined) unless non_trivial_productions_with_rules_undefined.empty?
95
+ end
96
+ end
64
97
 
65
98
  # Evaluate a syntax tree node.
66
99
  def evaluate node
67
100
  @node_stack ||= []
68
101
  @node_stack << node.child_nodes
69
- result = self.send(node.production.name)
102
+ result = send(node.production.name)
70
103
  @node_stack.pop
71
104
  result
72
105
  end
73
106
 
74
- # Performs the pass-through calculations for nodes with only one child_node for which an
75
- # evaluation rule is not explicitly defined. Will probably be deprecated in future versions.
76
- def method_missing(method_name)
77
- evaluate(child_nodes[0])
78
- end
79
-
80
107
  # Returns the array of child nodes of the node being currently evaluated.
81
108
  def child_nodes
82
- @node_stack[-1]
109
+ @node_stack.last
83
110
  end
84
-
85
- # Evaluation rules are defined within a block passed to this method.
86
- def self.define_evaluation_rules
87
- self.actions = []
88
- yield
89
- check_definitions
90
- end
91
-
92
- private
93
-
94
- def self.inherited(evaluator)
95
- class << evaluator
96
- attr_accessor :grammar, :actions
97
- end
98
- end
99
-
100
- def self.method_missing(method_name, &blk)
101
- if method_name.to_s =~ /^for_*/
102
- rule_name = method_name.to_s[4..-1]
103
- self.actions << rule_name
104
- self.send(:define_method, rule_name, &blk)
105
- end
106
- end
107
-
108
- def self.check_definitions
109
- non_trivial_productions_with_rules_undefined = self.grammar.productions.select {|production| production.expansion.size != 1}.collect {|production| production.name} - self.actions
110
- raise EvaluatorDefinitionError.new(non_trivial_productions_with_rules_undefined) unless non_trivial_productions_with_rules_undefined.empty?
111
- end
112
-
113
111
  end
114
112
 
115
113
  class EvaluatorDefinitionError < StandardError #:nodoc:
@@ -118,8 +116,8 @@ module Dhaka
118
116
  end
119
117
 
120
118
  def to_s
121
- result = "The following non-trivial productions do not have any evaluation rules defined:\n"
122
- result << (@non_trivial_productions_with_rules_undefined).join("\n")
119
+ result = "The following non-trivial productions do not have any evaluation rules defined:\n"
120
+ result << @non_trivial_productions_with_rules_undefined.join("\n")
123
121
  end
124
122
  end
125
123
  end
@@ -1,12 +1,13 @@
1
- #!/usr/bin/env ruby
2
1
  module Dhaka
3
2
  # A subclass of Hash with a dirty flag
4
3
  class ClosureHash < Hash #:nodoc:
5
4
  attr_accessor :dirty
6
- def initialize(&block)
7
- super(&block)
5
+
6
+ def initialize
7
+ super
8
8
  @dirty = false
9
9
  end
10
+
10
11
  def load_set(set)
11
12
  set.each {|item| self[item] = item}
12
13
  end
@@ -1,10 +1,8 @@
1
- #!/usr/bin/env ruby
2
- require 'set'
3
1
  module Dhaka
4
2
 
5
3
  # Reserved name for the start symbol for all grammars.
6
4
  START_SYMBOL_NAME = "_Start_"
7
- END_SYMBOL_NAME = "_End_" #:nodoc:
5
+ END_SYMBOL_NAME = "_End_" #:nodoc:
8
6
 
9
7
  # Productions for specific grammar symbols are defined in the context of this class.
10
8
  class ProductionBuilder
@@ -12,7 +10,7 @@ module Dhaka
12
10
  # +symbol+ is the grammar symbol that productions are being defined for.
13
11
  def initialize(grammar, symbol)
14
12
  @grammar = grammar
15
- @symbol = symbol
13
+ @symbol = symbol
16
14
  end
17
15
 
18
16
  # Creates a new production for +symbol+ with an expansion of +expansion+. The options hash can include
@@ -22,11 +20,13 @@ module Dhaka
22
20
  # See the arithmetic precedence grammar in the test suites for an example.
23
21
  def method_missing(production_name, expansion, options = {})
24
22
  expansion_symbols = expansion.collect {|name| @grammar.symbols[name]}
23
+ production_args = [@symbol, expansion_symbols, production_name.to_s]
25
24
  if precedence_symbol_name = options[:prec]
26
- production = Production.new(@symbol, expansion_symbols, production_name.to_s, @grammar.symbol_for_name(precedence_symbol_name).precedence)
27
- else
28
- production = Production.new(@symbol, expansion_symbols, production_name.to_s)
25
+ production_args << @grammar.symbol_for_name(precedence_symbol_name).precedence
29
26
  end
27
+
28
+ production = Production.new(*production_args)
29
+
30
30
  @symbol.nullable = true if expansion_symbols.empty?
31
31
  @grammar.productions_by_symbol[production.symbol] << production
32
32
  raise "Duplicate production named #{production.name}" if @grammar.productions_by_name[production.name]
@@ -40,9 +40,10 @@ module Dhaka
40
40
  # See the arithmetic precedence grammar in the test suites for an example.
41
41
  class PrecedenceBuilder
42
42
  def initialize(grammar) #:nodoc:
43
- @grammar = grammar
43
+ @grammar = grammar
44
44
  @precedence_level = 0
45
45
  end
46
+
46
47
  [:left, :right, :nonassoc].each do |associativity|
47
48
  define_method(associativity) do |symbols|
48
49
  assign_precedences associativity, symbols
@@ -52,7 +53,7 @@ module Dhaka
52
53
  private
53
54
  def assign_precedences(associativity, symbol_names)
54
55
  symbol_names.each do |symbol_name|
55
- symbol = @grammar.symbols[symbol_name]
56
+ symbol = @grammar.symbols[symbol_name]
56
57
  symbol.precedence = Precedence.new(@precedence_level, associativity)
57
58
  end
58
59
  @precedence_level += 1
@@ -91,119 +92,121 @@ module Dhaka
91
92
  # end
92
93
  #
93
94
  class Grammar
94
-
95
- # Used for defining the productions for the symbol with name +symbol+. The block +blk+ is
96
- # evaluated in the context of a ProductionBuilder.
97
- def self.for_symbol symbol, &blk
98
- symbol = symbols[symbol]
99
- symbol.non_terminal = true
100
- ProductionBuilder.new(self, symbol).instance_eval(&blk)
101
- end
102
-
103
- # Used for defining the precedences and associativities of symbols. The block +blk+ is
104
- # evaluated in the context of a PrecedenceBuilder.
105
- def self.precedences &blk
106
- PrecedenceBuilder.new(self).instance_eval(&blk)
107
- end
108
-
109
- # Returns the grammar symbol identified by +name+
110
- def self.symbol_for_name(name)
111
- if symbols.has_key? name
112
- symbols[name]
113
- else
114
- raise "No symbol with name #{name} found"
95
+ class << self
96
+ # Used for defining the productions for the symbol with name +symbol+. The block +blk+ is
97
+ # evaluated in the context of a ProductionBuilder.
98
+ def for_symbol symbol, &blk
99
+ symbol = symbols[symbol]
100
+ symbol.non_terminal = true
101
+ ProductionBuilder.new(self, symbol).instance_eval(&blk)
115
102
  end
116
- end
117
103
 
118
- private
119
-
120
- def self.inherited(grammar)
121
- class << grammar
122
- attr_accessor :symbols, :productions_by_symbol, :productions_by_name, :start_symbol, :end_symbol, :__first_cache
104
+ # Used for defining the precedences and associativities of symbols. The block +blk+ is
105
+ # evaluated in the context of a PrecedenceBuilder.
106
+ def precedences &blk
107
+ PrecedenceBuilder.new(self).instance_eval(&blk)
123
108
  end
124
- grammar.symbols = Hash.new {|hash, name| hash[name] = GrammarSymbol.new(name)}
125
- grammar.productions_by_symbol = Hash.new {|hash, name| hash[name] = Set.new([])}
126
- grammar.productions_by_name = {}
127
- grammar.end_symbol = grammar.symbols[END_SYMBOL_NAME]
128
- grammar.start_symbol = grammar.symbols[START_SYMBOL_NAME]
129
- grammar.__first_cache = {}
130
- end
131
-
132
- def self.productions_for_symbol(symbol)
133
- productions_by_symbol[symbol]
134
- end
135
-
136
- def self.productions
137
- productions_by_name.values
138
- end
139
-
140
- def self.production_named(name)
141
- productions_by_name[name]
142
- end
143
-
144
-
145
- def self.terminal_symbols
146
- symbols.values.select {|symbol| symbol.terminal}
147
- end
148
-
149
- def self.non_terminal_symbols
150
- symbols.values.select {|symbol| symbol.non_terminal}
151
- end
152
-
153
- def self.closure(kernel)
154
- channels = Set.new
155
109
 
156
- result = compute_closure(kernel) do |hash, item|
157
- if item.next_symbol and item.next_symbol.non_terminal
158
- productions_by_symbol[item.next_symbol].each do |production|
159
- channels << spontaneous_channel(item, hash[Item.new(production, 0)])
160
- end
110
+ # Returns the grammar symbol identified by +name+
111
+ def symbol_for_name(name)
112
+ if symbols.has_key? name
113
+ symbols[name]
114
+ else
115
+ raise "No symbol with name #{name} found"
161
116
  end
162
117
  end
118
+
119
+ def productions
120
+ productions_by_name.values
121
+ end
122
+
123
+ def productions_for_symbol(symbol)
124
+ productions_by_symbol[symbol]
125
+ end
126
+
127
+ def closure(kernel)
128
+ channels = Set.new
129
+
130
+ result = compute_closure(kernel) do |hash, item|
131
+ if item.next_symbol and item.next_symbol.non_terminal
132
+ productions_by_symbol[item.next_symbol].each do |production|
133
+ channels << spontaneous_channel(item, hash[Item.new(production, 0)])
134
+ end
135
+ end
136
+ end
163
137
 
164
- return channels, result
165
- end
138
+ [channels, result]
139
+ end
140
+
141
+ def passive_channel(start_item, end_item)
142
+ PassiveChannel.new(self, start_item, end_item)
143
+ end
144
+
145
+ def first(given_symbol)
146
+ cached_result = __first_cache[given_symbol]
147
+ return cached_result if cached_result
148
+ result = compute_closure([given_symbol]) do |hash, symbol|
149
+ productions_by_symbol[symbol].each do |production|
150
+ symbol_index = 0
151
+ while next_symbol = production.expansion[symbol_index]
152
+ hash[next_symbol]
153
+ break unless next_symbol.nullable
154
+ symbol_index += 1
155
+ end
156
+ end if symbol.non_terminal
157
+ end.values.select {|symbol| symbol.terminal}.to_set
158
+ __first_cache[given_symbol] = result
159
+ result
160
+ end
161
+
162
+ def production_named(name)
163
+ productions_by_name[name]
164
+ end
165
+
166
+ def terminal_symbols
167
+ symbols.values.select {|symbol| symbol.terminal}
168
+ end
166
169
 
167
- def self.first(given_symbol)
168
- cached_result = self.__first_cache[given_symbol]
169
- return cached_result if cached_result
170
- result = compute_closure([given_symbol]) do |hash, symbol|
171
- productions_by_symbol[symbol].each do |production|
172
- symbol_index = 0
173
- while next_symbol = production.expansion[symbol_index]
174
- hash[next_symbol]
175
- break if !next_symbol.nullable
176
- symbol_index += 1
177
- end
178
- end if symbol.non_terminal
179
- end.values.select {|symbol| symbol.terminal}.to_set
180
- self.__first_cache[given_symbol] = result
181
- result
182
- end
170
+ def non_terminal_symbols
171
+ symbols.values.select {|symbol| symbol.non_terminal}
172
+ end
173
+
174
+ private
175
+
176
+ def inherited(grammar)
177
+ class << grammar
178
+ attr_accessor :symbols, :productions_by_symbol, :productions_by_name, :start_symbol, :end_symbol, :__first_cache
179
+ end
180
+ grammar.symbols = Hash.new {|hash, name| hash[name] = GrammarSymbol.new(name)}
181
+ grammar.productions_by_symbol = Hash.new {|hash, name| hash[name] = Set.new([])}
182
+ grammar.productions_by_name = {}
183
+ grammar.end_symbol = grammar.symbols[END_SYMBOL_NAME]
184
+ grammar.start_symbol = grammar.symbols[START_SYMBOL_NAME]
185
+ grammar.__first_cache = {}
186
+ end
183
187
 
184
- def self.spontaneous_channel(start_item, end_item)
185
- SpontaneousChannel.new(self, start_item, end_item)
186
- end
187
-
188
- def self.passive_channel(start_item, end_item)
189
- PassiveChannel.new(self, start_item, end_item)
190
- end
188
+ def spontaneous_channel(start_item, end_item)
189
+ SpontaneousChannel.new(self, start_item, end_item)
190
+ end
191
191
 
192
- def self.compute_closure(initial)
193
- closure_hash = ClosureHash.new do |hash, item|
194
- hash.dirty = true
195
- hash[item] = item
196
- end
197
- closure_hash.load_set(initial)
198
- while true
199
- closure_hash.keys.each do |element|
200
- yield closure_hash, element
192
+ def compute_closure(initial)
193
+ closure_hash = ClosureHash.new do |hash, item|
194
+ hash.dirty = true
195
+ hash[item] = item
196
+ end
197
+
198
+ closure_hash.load_set(initial)
199
+
200
+ loop do
201
+ closure_hash.keys.each do |element|
202
+ yield closure_hash, element
203
+ end
204
+ break unless closure_hash.dirty
205
+ closure_hash.dirty = false
206
+ end
207
+ closure_hash
201
208
  end
202
- break if !closure_hash.dirty
203
- closure_hash.dirty = false
204
209
  end
205
- return closure_hash
206
- end
207
210
  end
208
211
 
209
212
  end