dhaka 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/dhaka.rb +24 -22
- data/lib/evaluator/evaluator.rb +42 -44
- data/lib/grammar/closure_hash.rb +4 -3
- data/lib/grammar/grammar.rb +113 -110
- data/lib/grammar/grammar_symbol.rb +6 -3
- data/lib/grammar/precedence.rb +3 -2
- data/lib/grammar/production.rb +5 -6
- data/lib/parser/action.rb +16 -11
- data/lib/parser/channel.rb +22 -16
- data/lib/parser/compiled_parser.rb +28 -22
- data/lib/parser/conflict.rb +54 -0
- data/lib/parser/item.rb +19 -19
- data/lib/parser/parse_result.rb +16 -1
- data/lib/parser/parse_tree.rb +15 -9
- data/lib/parser/parser.rb +51 -80
- data/lib/parser/parser_run.rb +6 -6
- data/lib/parser/parser_state.rb +16 -18
- data/lib/parser/token.rb +6 -4
- data/lib/tokenizer/tokenizer.rb +34 -31
- data/test/all_tests.rb +4 -18
- data/test/another_lalr_but_not_slr_grammar.rb +9 -5
- data/test/{arithmetic_evaluator.rb → arithmetic/arithmetic_evaluator.rb} +1 -2
- data/test/{arithmetic_evaluator_test.rb → arithmetic/arithmetic_evaluator_test.rb} +9 -20
- data/test/arithmetic/arithmetic_grammar.rb +41 -0
- data/test/{arithmetic_grammar_test.rb → arithmetic/arithmetic_grammar_test.rb} +2 -4
- data/test/{arithmetic_test_methods.rb → arithmetic/arithmetic_test_methods.rb} +1 -3
- data/test/{arithmetic_tokenizer.rb → arithmetic/arithmetic_tokenizer.rb} +8 -10
- data/test/{arithmetic_tokenizer_test.rb → arithmetic/arithmetic_tokenizer_test.rb} +4 -2
- data/test/{arithmetic_precedence_evaluator.rb → arithmetic_precedence/arithmetic_precedence_evaluator.rb} +1 -2
- data/test/arithmetic_precedence/arithmetic_precedence_grammar.rb +24 -0
- data/test/{arithmetic_precedence_grammar_test.rb → arithmetic_precedence/arithmetic_precedence_grammar_test.rb} +2 -3
- data/test/arithmetic_precedence/arithmetic_precedence_parser_test.rb +31 -0
- data/test/{arithmetic_precedence_tokenizer.rb → arithmetic_precedence/arithmetic_precedence_tokenizer.rb} +8 -10
- data/test/brackets/bracket_grammar.rb +23 -0
- data/test/{bracket_tokenizer.rb → brackets/bracket_tokenizer.rb} +2 -4
- data/test/{brackets_test.rb → brackets/brackets_test.rb} +3 -4
- data/test/chittagong/chittagong_driver.rb +47 -0
- data/test/{chittagong_driver_test.rb → chittagong/chittagong_driver_test.rb} +66 -58
- data/test/{chittagong_evaluator.rb → chittagong/chittagong_evaluator.rb} +28 -13
- data/test/{chittagong_evaluator_test.rb → chittagong/chittagong_evaluator_test.rb} +6 -10
- data/test/chittagong/chittagong_grammar.rb +110 -0
- data/test/{chittagong_parser_test.rb → chittagong/chittagong_parser_test.rb} +5 -7
- data/test/{chittagong_test.rb → chittagong/chittagong_test.rb} +27 -36
- data/test/{chittagong_tokenizer.rb → chittagong/chittagong_tokenizer.rb} +17 -17
- data/test/{chittagong_tokenizer_test.rb → chittagong/chittagong_tokenizer_test.rb} +2 -3
- data/test/compiled_parser_test.rb +9 -42
- data/test/dhaka_test_helper.rb +17 -0
- data/test/evaluator_test.rb +18 -3
- data/test/grammar_test.rb +10 -15
- data/test/lalr_but_not_slr_grammar.rb +10 -8
- data/test/malformed_grammar.rb +2 -4
- data/test/malformed_grammar_test.rb +2 -3
- data/test/nullable_grammar.rb +11 -8
- data/test/parse_result_test.rb +44 -0
- data/test/parser_state_test.rb +36 -0
- data/test/parser_test.rb +53 -103
- data/test/precedence_grammar.rb +6 -6
- data/test/precedence_grammar_test.rb +2 -3
- data/test/rr_conflict_grammar.rb +5 -7
- data/test/simple_grammar.rb +6 -8
- data/test/sr_conflict_grammar.rb +6 -6
- metadata +30 -26
- data/test/arithmetic_grammar.rb +0 -35
- data/test/arithmetic_precedence_grammar.rb +0 -24
- data/test/arithmetic_precedence_parser_test.rb +0 -33
- data/test/bracket_grammar.rb +0 -25
- data/test/chittagong_grammar.rb +0 -104
- data/test/incomplete_arithmetic_evaluator.rb +0 -60
data/lib/dhaka.rb
CHANGED
@@ -21,26 +21,28 @@
|
|
21
21
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
-
|
25
|
-
|
24
|
+
require 'set'
|
25
|
+
require 'logger'
|
26
|
+
require 'delegate'
|
26
27
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
28
|
+
%w[
|
29
|
+
grammar/grammar_symbol
|
30
|
+
grammar/production
|
31
|
+
grammar/closure_hash
|
32
|
+
grammar/grammar
|
33
|
+
grammar/precedence
|
34
|
+
parser/parse_tree
|
35
|
+
parser/parse_result
|
36
|
+
parser/item
|
37
|
+
parser/channel
|
38
|
+
parser/parser_methods
|
39
|
+
parser/parser_state
|
40
|
+
parser/conflict
|
41
|
+
parser/token
|
42
|
+
parser/action
|
43
|
+
parser/parser_run
|
44
|
+
parser/parser
|
45
|
+
parser/compiled_parser
|
46
|
+
tokenizer/tokenizer
|
47
|
+
evaluator/evaluator
|
48
|
+
].each {|path| require File.join(File.dirname(__FILE__), path)}
|
data/lib/evaluator/evaluator.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
module Dhaka
|
2
|
-
|
3
2
|
# This is the abstract base evaluator class. It is not directly instantiated.
|
4
3
|
# When defining an evaluator for a specific grammar, we subclass it. e.g. for FooGrammar
|
5
4
|
# we create a FooEvaluator that subclasses Evaluator. Note that FooEvaluator may not
|
@@ -55,61 +54,60 @@ module Dhaka
|
|
55
54
|
# end
|
56
55
|
#
|
57
56
|
# end
|
58
|
-
|
59
|
-
|
60
57
|
class Evaluator
|
58
|
+
class << self
|
59
|
+
def inherited(evaluator)
|
60
|
+
class << evaluator
|
61
|
+
attr_accessor :grammar, :actions
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def method_missing(method_name, *args, &blk)
|
66
|
+
name = method_name.to_s
|
67
|
+
if name =~ /^for_(.+)$/
|
68
|
+
rule_name = $1
|
69
|
+
actions << rule_name
|
70
|
+
send(:define_method, rule_name, &blk)
|
71
|
+
else
|
72
|
+
super
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Evaluation rules are defined within a block passed to this method.
|
77
|
+
def define_evaluation_rules
|
78
|
+
self.actions = []
|
79
|
+
yield
|
80
|
+
check_definitions
|
81
|
+
end
|
82
|
+
|
83
|
+
private
|
61
84
|
|
62
|
-
|
63
|
-
|
85
|
+
def check_definitions
|
86
|
+
filter = lambda {|productions| productions.map {|production| production.name} - actions}
|
87
|
+
pass_through_productions_without_rules = filter[grammar.productions.select {|production| production.expansion.size == 1}]
|
88
|
+
pass_through_productions_without_rules.each do |rule_name|
|
89
|
+
send(:define_method, rule_name) do
|
90
|
+
evaluate(child_nodes.first)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
non_trivial_productions_with_rules_undefined = filter[grammar.productions.select {|production| production.expansion.size != 1}]
|
94
|
+
raise EvaluatorDefinitionError.new(non_trivial_productions_with_rules_undefined) unless non_trivial_productions_with_rules_undefined.empty?
|
95
|
+
end
|
96
|
+
end
|
64
97
|
|
65
98
|
# Evaluate a syntax tree node.
|
66
99
|
def evaluate node
|
67
100
|
@node_stack ||= []
|
68
101
|
@node_stack << node.child_nodes
|
69
|
-
result
|
102
|
+
result = send(node.production.name)
|
70
103
|
@node_stack.pop
|
71
104
|
result
|
72
105
|
end
|
73
106
|
|
74
|
-
# Performs the pass-through calculations for nodes with only one child_node for which an
|
75
|
-
# evaluation rule is not explicitly defined. Will probably be deprecated in future versions.
|
76
|
-
def method_missing(method_name)
|
77
|
-
evaluate(child_nodes[0])
|
78
|
-
end
|
79
|
-
|
80
107
|
# Returns the array of child nodes of the node being currently evaluated.
|
81
108
|
def child_nodes
|
82
|
-
@node_stack
|
109
|
+
@node_stack.last
|
83
110
|
end
|
84
|
-
|
85
|
-
# Evaluation rules are defined within a block passed to this method.
|
86
|
-
def self.define_evaluation_rules
|
87
|
-
self.actions = []
|
88
|
-
yield
|
89
|
-
check_definitions
|
90
|
-
end
|
91
|
-
|
92
|
-
private
|
93
|
-
|
94
|
-
def self.inherited(evaluator)
|
95
|
-
class << evaluator
|
96
|
-
attr_accessor :grammar, :actions
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
def self.method_missing(method_name, &blk)
|
101
|
-
if method_name.to_s =~ /^for_*/
|
102
|
-
rule_name = method_name.to_s[4..-1]
|
103
|
-
self.actions << rule_name
|
104
|
-
self.send(:define_method, rule_name, &blk)
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
def self.check_definitions
|
109
|
-
non_trivial_productions_with_rules_undefined = self.grammar.productions.select {|production| production.expansion.size != 1}.collect {|production| production.name} - self.actions
|
110
|
-
raise EvaluatorDefinitionError.new(non_trivial_productions_with_rules_undefined) unless non_trivial_productions_with_rules_undefined.empty?
|
111
|
-
end
|
112
|
-
|
113
111
|
end
|
114
112
|
|
115
113
|
class EvaluatorDefinitionError < StandardError #:nodoc:
|
@@ -118,8 +116,8 @@ module Dhaka
|
|
118
116
|
end
|
119
117
|
|
120
118
|
def to_s
|
121
|
-
|
122
|
-
|
119
|
+
result = "The following non-trivial productions do not have any evaluation rules defined:\n"
|
120
|
+
result << @non_trivial_productions_with_rules_undefined.join("\n")
|
123
121
|
end
|
124
122
|
end
|
125
123
|
end
|
data/lib/grammar/closure_hash.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
module Dhaka
|
3
2
|
# A subclass of Hash with a dirty flag
|
4
3
|
class ClosureHash < Hash #:nodoc:
|
5
4
|
attr_accessor :dirty
|
6
|
-
|
7
|
-
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
super
|
8
8
|
@dirty = false
|
9
9
|
end
|
10
|
+
|
10
11
|
def load_set(set)
|
11
12
|
set.each {|item| self[item] = item}
|
12
13
|
end
|
data/lib/grammar/grammar.rb
CHANGED
@@ -1,10 +1,8 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
require 'set'
|
3
1
|
module Dhaka
|
4
2
|
|
5
3
|
# Reserved name for the start symbol for all grammars.
|
6
4
|
START_SYMBOL_NAME = "_Start_"
|
7
|
-
END_SYMBOL_NAME
|
5
|
+
END_SYMBOL_NAME = "_End_" #:nodoc:
|
8
6
|
|
9
7
|
# Productions for specific grammar symbols are defined in the context of this class.
|
10
8
|
class ProductionBuilder
|
@@ -12,7 +10,7 @@ module Dhaka
|
|
12
10
|
# +symbol+ is the grammar symbol that productions are being defined for.
|
13
11
|
def initialize(grammar, symbol)
|
14
12
|
@grammar = grammar
|
15
|
-
@symbol
|
13
|
+
@symbol = symbol
|
16
14
|
end
|
17
15
|
|
18
16
|
# Creates a new production for +symbol+ with an expansion of +expansion+. The options hash can include
|
@@ -22,11 +20,13 @@ module Dhaka
|
|
22
20
|
# See the arithmetic precedence grammar in the test suites for an example.
|
23
21
|
def method_missing(production_name, expansion, options = {})
|
24
22
|
expansion_symbols = expansion.collect {|name| @grammar.symbols[name]}
|
23
|
+
production_args = [@symbol, expansion_symbols, production_name.to_s]
|
25
24
|
if precedence_symbol_name = options[:prec]
|
26
|
-
|
27
|
-
else
|
28
|
-
production = Production.new(@symbol, expansion_symbols, production_name.to_s)
|
25
|
+
production_args << @grammar.symbol_for_name(precedence_symbol_name).precedence
|
29
26
|
end
|
27
|
+
|
28
|
+
production = Production.new(*production_args)
|
29
|
+
|
30
30
|
@symbol.nullable = true if expansion_symbols.empty?
|
31
31
|
@grammar.productions_by_symbol[production.symbol] << production
|
32
32
|
raise "Duplicate production named #{production.name}" if @grammar.productions_by_name[production.name]
|
@@ -40,9 +40,10 @@ module Dhaka
|
|
40
40
|
# See the arithmetic precedence grammar in the test suites for an example.
|
41
41
|
class PrecedenceBuilder
|
42
42
|
def initialize(grammar) #:nodoc:
|
43
|
-
@grammar
|
43
|
+
@grammar = grammar
|
44
44
|
@precedence_level = 0
|
45
45
|
end
|
46
|
+
|
46
47
|
[:left, :right, :nonassoc].each do |associativity|
|
47
48
|
define_method(associativity) do |symbols|
|
48
49
|
assign_precedences associativity, symbols
|
@@ -52,7 +53,7 @@ module Dhaka
|
|
52
53
|
private
|
53
54
|
def assign_precedences(associativity, symbol_names)
|
54
55
|
symbol_names.each do |symbol_name|
|
55
|
-
symbol
|
56
|
+
symbol = @grammar.symbols[symbol_name]
|
56
57
|
symbol.precedence = Precedence.new(@precedence_level, associativity)
|
57
58
|
end
|
58
59
|
@precedence_level += 1
|
@@ -91,119 +92,121 @@ module Dhaka
|
|
91
92
|
# end
|
92
93
|
#
|
93
94
|
class Grammar
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
end
|
102
|
-
|
103
|
-
# Used for defining the precedences and associativities of symbols. The block +blk+ is
|
104
|
-
# evaluated in the context of a PrecedenceBuilder.
|
105
|
-
def self.precedences &blk
|
106
|
-
PrecedenceBuilder.new(self).instance_eval(&blk)
|
107
|
-
end
|
108
|
-
|
109
|
-
# Returns the grammar symbol identified by +name+
|
110
|
-
def self.symbol_for_name(name)
|
111
|
-
if symbols.has_key? name
|
112
|
-
symbols[name]
|
113
|
-
else
|
114
|
-
raise "No symbol with name #{name} found"
|
95
|
+
class << self
|
96
|
+
# Used for defining the productions for the symbol with name +symbol+. The block +blk+ is
|
97
|
+
# evaluated in the context of a ProductionBuilder.
|
98
|
+
def for_symbol symbol, &blk
|
99
|
+
symbol = symbols[symbol]
|
100
|
+
symbol.non_terminal = true
|
101
|
+
ProductionBuilder.new(self, symbol).instance_eval(&blk)
|
115
102
|
end
|
116
|
-
end
|
117
103
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
attr_accessor :symbols, :productions_by_symbol, :productions_by_name, :start_symbol, :end_symbol, :__first_cache
|
104
|
+
# Used for defining the precedences and associativities of symbols. The block +blk+ is
|
105
|
+
# evaluated in the context of a PrecedenceBuilder.
|
106
|
+
def precedences &blk
|
107
|
+
PrecedenceBuilder.new(self).instance_eval(&blk)
|
123
108
|
end
|
124
|
-
grammar.symbols = Hash.new {|hash, name| hash[name] = GrammarSymbol.new(name)}
|
125
|
-
grammar.productions_by_symbol = Hash.new {|hash, name| hash[name] = Set.new([])}
|
126
|
-
grammar.productions_by_name = {}
|
127
|
-
grammar.end_symbol = grammar.symbols[END_SYMBOL_NAME]
|
128
|
-
grammar.start_symbol = grammar.symbols[START_SYMBOL_NAME]
|
129
|
-
grammar.__first_cache = {}
|
130
|
-
end
|
131
|
-
|
132
|
-
def self.productions_for_symbol(symbol)
|
133
|
-
productions_by_symbol[symbol]
|
134
|
-
end
|
135
|
-
|
136
|
-
def self.productions
|
137
|
-
productions_by_name.values
|
138
|
-
end
|
139
|
-
|
140
|
-
def self.production_named(name)
|
141
|
-
productions_by_name[name]
|
142
|
-
end
|
143
|
-
|
144
|
-
|
145
|
-
def self.terminal_symbols
|
146
|
-
symbols.values.select {|symbol| symbol.terminal}
|
147
|
-
end
|
148
|
-
|
149
|
-
def self.non_terminal_symbols
|
150
|
-
symbols.values.select {|symbol| symbol.non_terminal}
|
151
|
-
end
|
152
|
-
|
153
|
-
def self.closure(kernel)
|
154
|
-
channels = Set.new
|
155
109
|
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
110
|
+
# Returns the grammar symbol identified by +name+
|
111
|
+
def symbol_for_name(name)
|
112
|
+
if symbols.has_key? name
|
113
|
+
symbols[name]
|
114
|
+
else
|
115
|
+
raise "No symbol with name #{name} found"
|
161
116
|
end
|
162
117
|
end
|
118
|
+
|
119
|
+
def productions
|
120
|
+
productions_by_name.values
|
121
|
+
end
|
122
|
+
|
123
|
+
def productions_for_symbol(symbol)
|
124
|
+
productions_by_symbol[symbol]
|
125
|
+
end
|
126
|
+
|
127
|
+
def closure(kernel)
|
128
|
+
channels = Set.new
|
129
|
+
|
130
|
+
result = compute_closure(kernel) do |hash, item|
|
131
|
+
if item.next_symbol and item.next_symbol.non_terminal
|
132
|
+
productions_by_symbol[item.next_symbol].each do |production|
|
133
|
+
channels << spontaneous_channel(item, hash[Item.new(production, 0)])
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
163
137
|
|
164
|
-
|
165
|
-
|
138
|
+
[channels, result]
|
139
|
+
end
|
140
|
+
|
141
|
+
def passive_channel(start_item, end_item)
|
142
|
+
PassiveChannel.new(self, start_item, end_item)
|
143
|
+
end
|
144
|
+
|
145
|
+
def first(given_symbol)
|
146
|
+
cached_result = __first_cache[given_symbol]
|
147
|
+
return cached_result if cached_result
|
148
|
+
result = compute_closure([given_symbol]) do |hash, symbol|
|
149
|
+
productions_by_symbol[symbol].each do |production|
|
150
|
+
symbol_index = 0
|
151
|
+
while next_symbol = production.expansion[symbol_index]
|
152
|
+
hash[next_symbol]
|
153
|
+
break unless next_symbol.nullable
|
154
|
+
symbol_index += 1
|
155
|
+
end
|
156
|
+
end if symbol.non_terminal
|
157
|
+
end.values.select {|symbol| symbol.terminal}.to_set
|
158
|
+
__first_cache[given_symbol] = result
|
159
|
+
result
|
160
|
+
end
|
161
|
+
|
162
|
+
def production_named(name)
|
163
|
+
productions_by_name[name]
|
164
|
+
end
|
165
|
+
|
166
|
+
def terminal_symbols
|
167
|
+
symbols.values.select {|symbol| symbol.terminal}
|
168
|
+
end
|
166
169
|
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
170
|
+
def non_terminal_symbols
|
171
|
+
symbols.values.select {|symbol| symbol.non_terminal}
|
172
|
+
end
|
173
|
+
|
174
|
+
private
|
175
|
+
|
176
|
+
def inherited(grammar)
|
177
|
+
class << grammar
|
178
|
+
attr_accessor :symbols, :productions_by_symbol, :productions_by_name, :start_symbol, :end_symbol, :__first_cache
|
179
|
+
end
|
180
|
+
grammar.symbols = Hash.new {|hash, name| hash[name] = GrammarSymbol.new(name)}
|
181
|
+
grammar.productions_by_symbol = Hash.new {|hash, name| hash[name] = Set.new([])}
|
182
|
+
grammar.productions_by_name = {}
|
183
|
+
grammar.end_symbol = grammar.symbols[END_SYMBOL_NAME]
|
184
|
+
grammar.start_symbol = grammar.symbols[START_SYMBOL_NAME]
|
185
|
+
grammar.__first_cache = {}
|
186
|
+
end
|
183
187
|
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
def self.passive_channel(start_item, end_item)
|
189
|
-
PassiveChannel.new(self, start_item, end_item)
|
190
|
-
end
|
188
|
+
def spontaneous_channel(start_item, end_item)
|
189
|
+
SpontaneousChannel.new(self, start_item, end_item)
|
190
|
+
end
|
191
191
|
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
192
|
+
def compute_closure(initial)
|
193
|
+
closure_hash = ClosureHash.new do |hash, item|
|
194
|
+
hash.dirty = true
|
195
|
+
hash[item] = item
|
196
|
+
end
|
197
|
+
|
198
|
+
closure_hash.load_set(initial)
|
199
|
+
|
200
|
+
loop do
|
201
|
+
closure_hash.keys.each do |element|
|
202
|
+
yield closure_hash, element
|
203
|
+
end
|
204
|
+
break unless closure_hash.dirty
|
205
|
+
closure_hash.dirty = false
|
206
|
+
end
|
207
|
+
closure_hash
|
201
208
|
end
|
202
|
-
break if !closure_hash.dirty
|
203
|
-
closure_hash.dirty = false
|
204
209
|
end
|
205
|
-
return closure_hash
|
206
|
-
end
|
207
210
|
end
|
208
211
|
|
209
212
|
end
|