dhaka 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. data/lib/dhaka.rb +24 -22
  2. data/lib/evaluator/evaluator.rb +42 -44
  3. data/lib/grammar/closure_hash.rb +4 -3
  4. data/lib/grammar/grammar.rb +113 -110
  5. data/lib/grammar/grammar_symbol.rb +6 -3
  6. data/lib/grammar/precedence.rb +3 -2
  7. data/lib/grammar/production.rb +5 -6
  8. data/lib/parser/action.rb +16 -11
  9. data/lib/parser/channel.rb +22 -16
  10. data/lib/parser/compiled_parser.rb +28 -22
  11. data/lib/parser/conflict.rb +54 -0
  12. data/lib/parser/item.rb +19 -19
  13. data/lib/parser/parse_result.rb +16 -1
  14. data/lib/parser/parse_tree.rb +15 -9
  15. data/lib/parser/parser.rb +51 -80
  16. data/lib/parser/parser_run.rb +6 -6
  17. data/lib/parser/parser_state.rb +16 -18
  18. data/lib/parser/token.rb +6 -4
  19. data/lib/tokenizer/tokenizer.rb +34 -31
  20. data/test/all_tests.rb +4 -18
  21. data/test/another_lalr_but_not_slr_grammar.rb +9 -5
  22. data/test/{arithmetic_evaluator.rb → arithmetic/arithmetic_evaluator.rb} +1 -2
  23. data/test/{arithmetic_evaluator_test.rb → arithmetic/arithmetic_evaluator_test.rb} +9 -20
  24. data/test/arithmetic/arithmetic_grammar.rb +41 -0
  25. data/test/{arithmetic_grammar_test.rb → arithmetic/arithmetic_grammar_test.rb} +2 -4
  26. data/test/{arithmetic_test_methods.rb → arithmetic/arithmetic_test_methods.rb} +1 -3
  27. data/test/{arithmetic_tokenizer.rb → arithmetic/arithmetic_tokenizer.rb} +8 -10
  28. data/test/{arithmetic_tokenizer_test.rb → arithmetic/arithmetic_tokenizer_test.rb} +4 -2
  29. data/test/{arithmetic_precedence_evaluator.rb → arithmetic_precedence/arithmetic_precedence_evaluator.rb} +1 -2
  30. data/test/arithmetic_precedence/arithmetic_precedence_grammar.rb +24 -0
  31. data/test/{arithmetic_precedence_grammar_test.rb → arithmetic_precedence/arithmetic_precedence_grammar_test.rb} +2 -3
  32. data/test/arithmetic_precedence/arithmetic_precedence_parser_test.rb +31 -0
  33. data/test/{arithmetic_precedence_tokenizer.rb → arithmetic_precedence/arithmetic_precedence_tokenizer.rb} +8 -10
  34. data/test/brackets/bracket_grammar.rb +23 -0
  35. data/test/{bracket_tokenizer.rb → brackets/bracket_tokenizer.rb} +2 -4
  36. data/test/{brackets_test.rb → brackets/brackets_test.rb} +3 -4
  37. data/test/chittagong/chittagong_driver.rb +47 -0
  38. data/test/{chittagong_driver_test.rb → chittagong/chittagong_driver_test.rb} +66 -58
  39. data/test/{chittagong_evaluator.rb → chittagong/chittagong_evaluator.rb} +28 -13
  40. data/test/{chittagong_evaluator_test.rb → chittagong/chittagong_evaluator_test.rb} +6 -10
  41. data/test/chittagong/chittagong_grammar.rb +110 -0
  42. data/test/{chittagong_parser_test.rb → chittagong/chittagong_parser_test.rb} +5 -7
  43. data/test/{chittagong_test.rb → chittagong/chittagong_test.rb} +27 -36
  44. data/test/{chittagong_tokenizer.rb → chittagong/chittagong_tokenizer.rb} +17 -17
  45. data/test/{chittagong_tokenizer_test.rb → chittagong/chittagong_tokenizer_test.rb} +2 -3
  46. data/test/compiled_parser_test.rb +9 -42
  47. data/test/dhaka_test_helper.rb +17 -0
  48. data/test/evaluator_test.rb +18 -3
  49. data/test/grammar_test.rb +10 -15
  50. data/test/lalr_but_not_slr_grammar.rb +10 -8
  51. data/test/malformed_grammar.rb +2 -4
  52. data/test/malformed_grammar_test.rb +2 -3
  53. data/test/nullable_grammar.rb +11 -8
  54. data/test/parse_result_test.rb +44 -0
  55. data/test/parser_state_test.rb +36 -0
  56. data/test/parser_test.rb +53 -103
  57. data/test/precedence_grammar.rb +6 -6
  58. data/test/precedence_grammar_test.rb +2 -3
  59. data/test/rr_conflict_grammar.rb +5 -7
  60. data/test/simple_grammar.rb +6 -8
  61. data/test/sr_conflict_grammar.rb +6 -6
  62. metadata +30 -26
  63. data/test/arithmetic_grammar.rb +0 -35
  64. data/test/arithmetic_precedence_grammar.rb +0 -24
  65. data/test/arithmetic_precedence_parser_test.rb +0 -33
  66. data/test/bracket_grammar.rb +0 -25
  67. data/test/chittagong_grammar.rb +0 -104
  68. data/test/incomplete_arithmetic_evaluator.rb +0 -60
@@ -21,26 +21,28 @@
21
21
  # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
23
 
24
- module Dhaka
25
- end
24
+ require 'set'
25
+ require 'logger'
26
+ require 'delegate'
26
27
 
27
- require File.dirname(__FILE__)+'/grammar/grammar_symbol'
28
- require File.dirname(__FILE__)+'/grammar/production'
29
- require File.dirname(__FILE__)+'/grammar/closure_hash'
30
- require File.dirname(__FILE__)+'/grammar/grammar'
31
- require File.dirname(__FILE__)+'/grammar/precedence'
32
-
33
- require File.dirname(__FILE__)+'/parser/parse_result'
34
- require File.dirname(__FILE__)+'/parser/item'
35
- require File.dirname(__FILE__)+'/parser/channel'
36
- require File.dirname(__FILE__)+'/parser/parser_methods'
37
- require File.dirname(__FILE__)+'/parser/parse_tree'
38
- require File.dirname(__FILE__)+'/parser/parser_state'
39
- require File.dirname(__FILE__)+'/parser/token'
40
- require File.dirname(__FILE__)+'/parser/action'
41
- require File.dirname(__FILE__)+'/parser/parser_run'
42
- require File.dirname(__FILE__)+'/parser/parser'
43
- require File.dirname(__FILE__)+'/parser/compiled_parser'
44
-
45
- require File.dirname(__FILE__)+'/tokenizer/tokenizer'
46
- require File.dirname(__FILE__)+'/evaluator/evaluator'
28
+ %w[
29
+ grammar/grammar_symbol
30
+ grammar/production
31
+ grammar/closure_hash
32
+ grammar/grammar
33
+ grammar/precedence
34
+ parser/parse_tree
35
+ parser/parse_result
36
+ parser/item
37
+ parser/channel
38
+ parser/parser_methods
39
+ parser/parser_state
40
+ parser/conflict
41
+ parser/token
42
+ parser/action
43
+ parser/parser_run
44
+ parser/parser
45
+ parser/compiled_parser
46
+ tokenizer/tokenizer
47
+ evaluator/evaluator
48
+ ].each {|path| require File.join(File.dirname(__FILE__), path)}
@@ -1,5 +1,4 @@
1
1
  module Dhaka
2
-
3
2
  # This is the abstract base evaluator class. It is not directly instantiated.
4
3
  # When defining an evaluator for a specific grammar, we subclass it. e.g. for FooGrammar
5
4
  # we create a FooEvaluator that subclasses Evaluator. Note that FooEvaluator may not
@@ -55,61 +54,60 @@ module Dhaka
55
54
  # end
56
55
  #
57
56
  # end
58
-
59
-
60
57
  class Evaluator
58
+ class << self
59
+ def inherited(evaluator)
60
+ class << evaluator
61
+ attr_accessor :grammar, :actions
62
+ end
63
+ end
64
+
65
+ def method_missing(method_name, *args, &blk)
66
+ name = method_name.to_s
67
+ if name =~ /^for_(.+)$/
68
+ rule_name = $1
69
+ actions << rule_name
70
+ send(:define_method, rule_name, &blk)
71
+ else
72
+ super
73
+ end
74
+ end
75
+
76
+ # Evaluation rules are defined within a block passed to this method.
77
+ def define_evaluation_rules
78
+ self.actions = []
79
+ yield
80
+ check_definitions
81
+ end
82
+
83
+ private
61
84
 
62
- # Instantiates a new evaluator with the syntax tree of a parsed expression. Only subclasses
63
- # of Evaluator are directly instantiated.
85
+ def check_definitions
86
+ filter = lambda {|productions| productions.map {|production| production.name} - actions}
87
+ pass_through_productions_without_rules = filter[grammar.productions.select {|production| production.expansion.size == 1}]
88
+ pass_through_productions_without_rules.each do |rule_name|
89
+ send(:define_method, rule_name) do
90
+ evaluate(child_nodes.first)
91
+ end
92
+ end
93
+ non_trivial_productions_with_rules_undefined = filter[grammar.productions.select {|production| production.expansion.size != 1}]
94
+ raise EvaluatorDefinitionError.new(non_trivial_productions_with_rules_undefined) unless non_trivial_productions_with_rules_undefined.empty?
95
+ end
96
+ end
64
97
 
65
98
  # Evaluate a syntax tree node.
66
99
  def evaluate node
67
100
  @node_stack ||= []
68
101
  @node_stack << node.child_nodes
69
- result = self.send(node.production.name)
102
+ result = send(node.production.name)
70
103
  @node_stack.pop
71
104
  result
72
105
  end
73
106
 
74
- # Performs the pass-through calculations for nodes with only one child_node for which an
75
- # evaluation rule is not explicitly defined. Will probably be deprecated in future versions.
76
- def method_missing(method_name)
77
- evaluate(child_nodes[0])
78
- end
79
-
80
107
  # Returns the array of child nodes of the node being currently evaluated.
81
108
  def child_nodes
82
- @node_stack[-1]
109
+ @node_stack.last
83
110
  end
84
-
85
- # Evaluation rules are defined within a block passed to this method.
86
- def self.define_evaluation_rules
87
- self.actions = []
88
- yield
89
- check_definitions
90
- end
91
-
92
- private
93
-
94
- def self.inherited(evaluator)
95
- class << evaluator
96
- attr_accessor :grammar, :actions
97
- end
98
- end
99
-
100
- def self.method_missing(method_name, &blk)
101
- if method_name.to_s =~ /^for_*/
102
- rule_name = method_name.to_s[4..-1]
103
- self.actions << rule_name
104
- self.send(:define_method, rule_name, &blk)
105
- end
106
- end
107
-
108
- def self.check_definitions
109
- non_trivial_productions_with_rules_undefined = self.grammar.productions.select {|production| production.expansion.size != 1}.collect {|production| production.name} - self.actions
110
- raise EvaluatorDefinitionError.new(non_trivial_productions_with_rules_undefined) unless non_trivial_productions_with_rules_undefined.empty?
111
- end
112
-
113
111
  end
114
112
 
115
113
  class EvaluatorDefinitionError < StandardError #:nodoc:
@@ -118,8 +116,8 @@ module Dhaka
118
116
  end
119
117
 
120
118
  def to_s
121
- result = "The following non-trivial productions do not have any evaluation rules defined:\n"
122
- result << (@non_trivial_productions_with_rules_undefined).join("\n")
119
+ result = "The following non-trivial productions do not have any evaluation rules defined:\n"
120
+ result << @non_trivial_productions_with_rules_undefined.join("\n")
123
121
  end
124
122
  end
125
123
  end
@@ -1,12 +1,13 @@
1
- #!/usr/bin/env ruby
2
1
  module Dhaka
3
2
  # A subclass of Hash with a dirty flag
4
3
  class ClosureHash < Hash #:nodoc:
5
4
  attr_accessor :dirty
6
- def initialize(&block)
7
- super(&block)
5
+
6
+ def initialize
7
+ super
8
8
  @dirty = false
9
9
  end
10
+
10
11
  def load_set(set)
11
12
  set.each {|item| self[item] = item}
12
13
  end
@@ -1,10 +1,8 @@
1
- #!/usr/bin/env ruby
2
- require 'set'
3
1
  module Dhaka
4
2
 
5
3
  # Reserved name for the start symbol for all grammars.
6
4
  START_SYMBOL_NAME = "_Start_"
7
- END_SYMBOL_NAME = "_End_" #:nodoc:
5
+ END_SYMBOL_NAME = "_End_" #:nodoc:
8
6
 
9
7
  # Productions for specific grammar symbols are defined in the context of this class.
10
8
  class ProductionBuilder
@@ -12,7 +10,7 @@ module Dhaka
12
10
  # +symbol+ is the grammar symbol that productions are being defined for.
13
11
  def initialize(grammar, symbol)
14
12
  @grammar = grammar
15
- @symbol = symbol
13
+ @symbol = symbol
16
14
  end
17
15
 
18
16
  # Creates a new production for +symbol+ with an expansion of +expansion+. The options hash can include
@@ -22,11 +20,13 @@ module Dhaka
22
20
  # See the arithmetic precedence grammar in the test suites for an example.
23
21
  def method_missing(production_name, expansion, options = {})
24
22
  expansion_symbols = expansion.collect {|name| @grammar.symbols[name]}
23
+ production_args = [@symbol, expansion_symbols, production_name.to_s]
25
24
  if precedence_symbol_name = options[:prec]
26
- production = Production.new(@symbol, expansion_symbols, production_name.to_s, @grammar.symbol_for_name(precedence_symbol_name).precedence)
27
- else
28
- production = Production.new(@symbol, expansion_symbols, production_name.to_s)
25
+ production_args << @grammar.symbol_for_name(precedence_symbol_name).precedence
29
26
  end
27
+
28
+ production = Production.new(*production_args)
29
+
30
30
  @symbol.nullable = true if expansion_symbols.empty?
31
31
  @grammar.productions_by_symbol[production.symbol] << production
32
32
  raise "Duplicate production named #{production.name}" if @grammar.productions_by_name[production.name]
@@ -40,9 +40,10 @@ module Dhaka
40
40
  # See the arithmetic precedence grammar in the test suites for an example.
41
41
  class PrecedenceBuilder
42
42
  def initialize(grammar) #:nodoc:
43
- @grammar = grammar
43
+ @grammar = grammar
44
44
  @precedence_level = 0
45
45
  end
46
+
46
47
  [:left, :right, :nonassoc].each do |associativity|
47
48
  define_method(associativity) do |symbols|
48
49
  assign_precedences associativity, symbols
@@ -52,7 +53,7 @@ module Dhaka
52
53
  private
53
54
  def assign_precedences(associativity, symbol_names)
54
55
  symbol_names.each do |symbol_name|
55
- symbol = @grammar.symbols[symbol_name]
56
+ symbol = @grammar.symbols[symbol_name]
56
57
  symbol.precedence = Precedence.new(@precedence_level, associativity)
57
58
  end
58
59
  @precedence_level += 1
@@ -91,119 +92,121 @@ module Dhaka
91
92
  # end
92
93
  #
93
94
  class Grammar
94
-
95
- # Used for defining the productions for the symbol with name +symbol+. The block +blk+ is
96
- # evaluated in the context of a ProductionBuilder.
97
- def self.for_symbol symbol, &blk
98
- symbol = symbols[symbol]
99
- symbol.non_terminal = true
100
- ProductionBuilder.new(self, symbol).instance_eval(&blk)
101
- end
102
-
103
- # Used for defining the precedences and associativities of symbols. The block +blk+ is
104
- # evaluated in the context of a PrecedenceBuilder.
105
- def self.precedences &blk
106
- PrecedenceBuilder.new(self).instance_eval(&blk)
107
- end
108
-
109
- # Returns the grammar symbol identified by +name+
110
- def self.symbol_for_name(name)
111
- if symbols.has_key? name
112
- symbols[name]
113
- else
114
- raise "No symbol with name #{name} found"
95
+ class << self
96
+ # Used for defining the productions for the symbol with name +symbol+. The block +blk+ is
97
+ # evaluated in the context of a ProductionBuilder.
98
+ def for_symbol symbol, &blk
99
+ symbol = symbols[symbol]
100
+ symbol.non_terminal = true
101
+ ProductionBuilder.new(self, symbol).instance_eval(&blk)
115
102
  end
116
- end
117
103
 
118
- private
119
-
120
- def self.inherited(grammar)
121
- class << grammar
122
- attr_accessor :symbols, :productions_by_symbol, :productions_by_name, :start_symbol, :end_symbol, :__first_cache
104
+ # Used for defining the precedences and associativities of symbols. The block +blk+ is
105
+ # evaluated in the context of a PrecedenceBuilder.
106
+ def precedences &blk
107
+ PrecedenceBuilder.new(self).instance_eval(&blk)
123
108
  end
124
- grammar.symbols = Hash.new {|hash, name| hash[name] = GrammarSymbol.new(name)}
125
- grammar.productions_by_symbol = Hash.new {|hash, name| hash[name] = Set.new([])}
126
- grammar.productions_by_name = {}
127
- grammar.end_symbol = grammar.symbols[END_SYMBOL_NAME]
128
- grammar.start_symbol = grammar.symbols[START_SYMBOL_NAME]
129
- grammar.__first_cache = {}
130
- end
131
-
132
- def self.productions_for_symbol(symbol)
133
- productions_by_symbol[symbol]
134
- end
135
-
136
- def self.productions
137
- productions_by_name.values
138
- end
139
-
140
- def self.production_named(name)
141
- productions_by_name[name]
142
- end
143
-
144
-
145
- def self.terminal_symbols
146
- symbols.values.select {|symbol| symbol.terminal}
147
- end
148
-
149
- def self.non_terminal_symbols
150
- symbols.values.select {|symbol| symbol.non_terminal}
151
- end
152
-
153
- def self.closure(kernel)
154
- channels = Set.new
155
109
 
156
- result = compute_closure(kernel) do |hash, item|
157
- if item.next_symbol and item.next_symbol.non_terminal
158
- productions_by_symbol[item.next_symbol].each do |production|
159
- channels << spontaneous_channel(item, hash[Item.new(production, 0)])
160
- end
110
+ # Returns the grammar symbol identified by +name+
111
+ def symbol_for_name(name)
112
+ if symbols.has_key? name
113
+ symbols[name]
114
+ else
115
+ raise "No symbol with name #{name} found"
161
116
  end
162
117
  end
118
+
119
+ def productions
120
+ productions_by_name.values
121
+ end
122
+
123
+ def productions_for_symbol(symbol)
124
+ productions_by_symbol[symbol]
125
+ end
126
+
127
+ def closure(kernel)
128
+ channels = Set.new
129
+
130
+ result = compute_closure(kernel) do |hash, item|
131
+ if item.next_symbol and item.next_symbol.non_terminal
132
+ productions_by_symbol[item.next_symbol].each do |production|
133
+ channels << spontaneous_channel(item, hash[Item.new(production, 0)])
134
+ end
135
+ end
136
+ end
163
137
 
164
- return channels, result
165
- end
138
+ [channels, result]
139
+ end
140
+
141
+ def passive_channel(start_item, end_item)
142
+ PassiveChannel.new(self, start_item, end_item)
143
+ end
144
+
145
+ def first(given_symbol)
146
+ cached_result = __first_cache[given_symbol]
147
+ return cached_result if cached_result
148
+ result = compute_closure([given_symbol]) do |hash, symbol|
149
+ productions_by_symbol[symbol].each do |production|
150
+ symbol_index = 0
151
+ while next_symbol = production.expansion[symbol_index]
152
+ hash[next_symbol]
153
+ break unless next_symbol.nullable
154
+ symbol_index += 1
155
+ end
156
+ end if symbol.non_terminal
157
+ end.values.select {|symbol| symbol.terminal}.to_set
158
+ __first_cache[given_symbol] = result
159
+ result
160
+ end
161
+
162
+ def production_named(name)
163
+ productions_by_name[name]
164
+ end
165
+
166
+ def terminal_symbols
167
+ symbols.values.select {|symbol| symbol.terminal}
168
+ end
166
169
 
167
- def self.first(given_symbol)
168
- cached_result = self.__first_cache[given_symbol]
169
- return cached_result if cached_result
170
- result = compute_closure([given_symbol]) do |hash, symbol|
171
- productions_by_symbol[symbol].each do |production|
172
- symbol_index = 0
173
- while next_symbol = production.expansion[symbol_index]
174
- hash[next_symbol]
175
- break if !next_symbol.nullable
176
- symbol_index += 1
177
- end
178
- end if symbol.non_terminal
179
- end.values.select {|symbol| symbol.terminal}.to_set
180
- self.__first_cache[given_symbol] = result
181
- result
182
- end
170
+ def non_terminal_symbols
171
+ symbols.values.select {|symbol| symbol.non_terminal}
172
+ end
173
+
174
+ private
175
+
176
+ def inherited(grammar)
177
+ class << grammar
178
+ attr_accessor :symbols, :productions_by_symbol, :productions_by_name, :start_symbol, :end_symbol, :__first_cache
179
+ end
180
+ grammar.symbols = Hash.new {|hash, name| hash[name] = GrammarSymbol.new(name)}
181
+ grammar.productions_by_symbol = Hash.new {|hash, name| hash[name] = Set.new([])}
182
+ grammar.productions_by_name = {}
183
+ grammar.end_symbol = grammar.symbols[END_SYMBOL_NAME]
184
+ grammar.start_symbol = grammar.symbols[START_SYMBOL_NAME]
185
+ grammar.__first_cache = {}
186
+ end
183
187
 
184
- def self.spontaneous_channel(start_item, end_item)
185
- SpontaneousChannel.new(self, start_item, end_item)
186
- end
187
-
188
- def self.passive_channel(start_item, end_item)
189
- PassiveChannel.new(self, start_item, end_item)
190
- end
188
+ def spontaneous_channel(start_item, end_item)
189
+ SpontaneousChannel.new(self, start_item, end_item)
190
+ end
191
191
 
192
- def self.compute_closure(initial)
193
- closure_hash = ClosureHash.new do |hash, item|
194
- hash.dirty = true
195
- hash[item] = item
196
- end
197
- closure_hash.load_set(initial)
198
- while true
199
- closure_hash.keys.each do |element|
200
- yield closure_hash, element
192
+ def compute_closure(initial)
193
+ closure_hash = ClosureHash.new do |hash, item|
194
+ hash.dirty = true
195
+ hash[item] = item
196
+ end
197
+
198
+ closure_hash.load_set(initial)
199
+
200
+ loop do
201
+ closure_hash.keys.each do |element|
202
+ yield closure_hash, element
203
+ end
204
+ break unless closure_hash.dirty
205
+ closure_hash.dirty = false
206
+ end
207
+ closure_hash
201
208
  end
202
- break if !closure_hash.dirty
203
- closure_hash.dirty = false
204
209
  end
205
- return closure_hash
206
- end
207
210
  end
208
211
 
209
212
  end