dhaka 2.0.0 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/dhaka.rb +24 -22
- data/lib/evaluator/evaluator.rb +42 -44
- data/lib/grammar/closure_hash.rb +4 -3
- data/lib/grammar/grammar.rb +113 -110
- data/lib/grammar/grammar_symbol.rb +6 -3
- data/lib/grammar/precedence.rb +3 -2
- data/lib/grammar/production.rb +5 -6
- data/lib/parser/action.rb +16 -11
- data/lib/parser/channel.rb +22 -16
- data/lib/parser/compiled_parser.rb +28 -22
- data/lib/parser/conflict.rb +54 -0
- data/lib/parser/item.rb +19 -19
- data/lib/parser/parse_result.rb +16 -1
- data/lib/parser/parse_tree.rb +15 -9
- data/lib/parser/parser.rb +51 -80
- data/lib/parser/parser_run.rb +6 -6
- data/lib/parser/parser_state.rb +16 -18
- data/lib/parser/token.rb +6 -4
- data/lib/tokenizer/tokenizer.rb +34 -31
- data/test/all_tests.rb +4 -18
- data/test/another_lalr_but_not_slr_grammar.rb +9 -5
- data/test/{arithmetic_evaluator.rb → arithmetic/arithmetic_evaluator.rb} +1 -2
- data/test/{arithmetic_evaluator_test.rb → arithmetic/arithmetic_evaluator_test.rb} +9 -20
- data/test/arithmetic/arithmetic_grammar.rb +41 -0
- data/test/{arithmetic_grammar_test.rb → arithmetic/arithmetic_grammar_test.rb} +2 -4
- data/test/{arithmetic_test_methods.rb → arithmetic/arithmetic_test_methods.rb} +1 -3
- data/test/{arithmetic_tokenizer.rb → arithmetic/arithmetic_tokenizer.rb} +8 -10
- data/test/{arithmetic_tokenizer_test.rb → arithmetic/arithmetic_tokenizer_test.rb} +4 -2
- data/test/{arithmetic_precedence_evaluator.rb → arithmetic_precedence/arithmetic_precedence_evaluator.rb} +1 -2
- data/test/arithmetic_precedence/arithmetic_precedence_grammar.rb +24 -0
- data/test/{arithmetic_precedence_grammar_test.rb → arithmetic_precedence/arithmetic_precedence_grammar_test.rb} +2 -3
- data/test/arithmetic_precedence/arithmetic_precedence_parser_test.rb +31 -0
- data/test/{arithmetic_precedence_tokenizer.rb → arithmetic_precedence/arithmetic_precedence_tokenizer.rb} +8 -10
- data/test/brackets/bracket_grammar.rb +23 -0
- data/test/{bracket_tokenizer.rb → brackets/bracket_tokenizer.rb} +2 -4
- data/test/{brackets_test.rb → brackets/brackets_test.rb} +3 -4
- data/test/chittagong/chittagong_driver.rb +47 -0
- data/test/{chittagong_driver_test.rb → chittagong/chittagong_driver_test.rb} +66 -58
- data/test/{chittagong_evaluator.rb → chittagong/chittagong_evaluator.rb} +28 -13
- data/test/{chittagong_evaluator_test.rb → chittagong/chittagong_evaluator_test.rb} +6 -10
- data/test/chittagong/chittagong_grammar.rb +110 -0
- data/test/{chittagong_parser_test.rb → chittagong/chittagong_parser_test.rb} +5 -7
- data/test/{chittagong_test.rb → chittagong/chittagong_test.rb} +27 -36
- data/test/{chittagong_tokenizer.rb → chittagong/chittagong_tokenizer.rb} +17 -17
- data/test/{chittagong_tokenizer_test.rb → chittagong/chittagong_tokenizer_test.rb} +2 -3
- data/test/compiled_parser_test.rb +9 -42
- data/test/dhaka_test_helper.rb +17 -0
- data/test/evaluator_test.rb +18 -3
- data/test/grammar_test.rb +10 -15
- data/test/lalr_but_not_slr_grammar.rb +10 -8
- data/test/malformed_grammar.rb +2 -4
- data/test/malformed_grammar_test.rb +2 -3
- data/test/nullable_grammar.rb +11 -8
- data/test/parse_result_test.rb +44 -0
- data/test/parser_state_test.rb +36 -0
- data/test/parser_test.rb +53 -103
- data/test/precedence_grammar.rb +6 -6
- data/test/precedence_grammar_test.rb +2 -3
- data/test/rr_conflict_grammar.rb +5 -7
- data/test/simple_grammar.rb +6 -8
- data/test/sr_conflict_grammar.rb +6 -6
- metadata +30 -26
- data/test/arithmetic_grammar.rb +0 -35
- data/test/arithmetic_precedence_grammar.rb +0 -24
- data/test/arithmetic_precedence_parser_test.rb +0 -33
- data/test/bracket_grammar.rb +0 -25
- data/test/chittagong_grammar.rb +0 -104
- data/test/incomplete_arithmetic_evaluator.rb +0 -60
data/lib/dhaka.rb
CHANGED
@@ -21,26 +21,28 @@
|
|
21
21
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
-
|
25
|
-
|
24
|
+
require 'set'
|
25
|
+
require 'logger'
|
26
|
+
require 'delegate'
|
26
27
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
28
|
+
%w[
|
29
|
+
grammar/grammar_symbol
|
30
|
+
grammar/production
|
31
|
+
grammar/closure_hash
|
32
|
+
grammar/grammar
|
33
|
+
grammar/precedence
|
34
|
+
parser/parse_tree
|
35
|
+
parser/parse_result
|
36
|
+
parser/item
|
37
|
+
parser/channel
|
38
|
+
parser/parser_methods
|
39
|
+
parser/parser_state
|
40
|
+
parser/conflict
|
41
|
+
parser/token
|
42
|
+
parser/action
|
43
|
+
parser/parser_run
|
44
|
+
parser/parser
|
45
|
+
parser/compiled_parser
|
46
|
+
tokenizer/tokenizer
|
47
|
+
evaluator/evaluator
|
48
|
+
].each {|path| require File.join(File.dirname(__FILE__), path)}
|
data/lib/evaluator/evaluator.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
module Dhaka
|
2
|
-
|
3
2
|
# This is the abstract base evaluator class. It is not directly instantiated.
|
4
3
|
# When defining an evaluator for a specific grammar, we subclass it. e.g. for FooGrammar
|
5
4
|
# we create a FooEvaluator that subclasses Evaluator. Note that FooEvaluator may not
|
@@ -55,61 +54,60 @@ module Dhaka
|
|
55
54
|
# end
|
56
55
|
#
|
57
56
|
# end
|
58
|
-
|
59
|
-
|
60
57
|
class Evaluator
|
58
|
+
class << self
|
59
|
+
def inherited(evaluator)
|
60
|
+
class << evaluator
|
61
|
+
attr_accessor :grammar, :actions
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def method_missing(method_name, *args, &blk)
|
66
|
+
name = method_name.to_s
|
67
|
+
if name =~ /^for_(.+)$/
|
68
|
+
rule_name = $1
|
69
|
+
actions << rule_name
|
70
|
+
send(:define_method, rule_name, &blk)
|
71
|
+
else
|
72
|
+
super
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Evaluation rules are defined within a block passed to this method.
|
77
|
+
def define_evaluation_rules
|
78
|
+
self.actions = []
|
79
|
+
yield
|
80
|
+
check_definitions
|
81
|
+
end
|
82
|
+
|
83
|
+
private
|
61
84
|
|
62
|
-
|
63
|
-
|
85
|
+
def check_definitions
|
86
|
+
filter = lambda {|productions| productions.map {|production| production.name} - actions}
|
87
|
+
pass_through_productions_without_rules = filter[grammar.productions.select {|production| production.expansion.size == 1}]
|
88
|
+
pass_through_productions_without_rules.each do |rule_name|
|
89
|
+
send(:define_method, rule_name) do
|
90
|
+
evaluate(child_nodes.first)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
non_trivial_productions_with_rules_undefined = filter[grammar.productions.select {|production| production.expansion.size != 1}]
|
94
|
+
raise EvaluatorDefinitionError.new(non_trivial_productions_with_rules_undefined) unless non_trivial_productions_with_rules_undefined.empty?
|
95
|
+
end
|
96
|
+
end
|
64
97
|
|
65
98
|
# Evaluate a syntax tree node.
|
66
99
|
def evaluate node
|
67
100
|
@node_stack ||= []
|
68
101
|
@node_stack << node.child_nodes
|
69
|
-
result
|
102
|
+
result = send(node.production.name)
|
70
103
|
@node_stack.pop
|
71
104
|
result
|
72
105
|
end
|
73
106
|
|
74
|
-
# Performs the pass-through calculations for nodes with only one child_node for which an
|
75
|
-
# evaluation rule is not explicitly defined. Will probably be deprecated in future versions.
|
76
|
-
def method_missing(method_name)
|
77
|
-
evaluate(child_nodes[0])
|
78
|
-
end
|
79
|
-
|
80
107
|
# Returns the array of child nodes of the node being currently evaluated.
|
81
108
|
def child_nodes
|
82
|
-
@node_stack
|
109
|
+
@node_stack.last
|
83
110
|
end
|
84
|
-
|
85
|
-
# Evaluation rules are defined within a block passed to this method.
|
86
|
-
def self.define_evaluation_rules
|
87
|
-
self.actions = []
|
88
|
-
yield
|
89
|
-
check_definitions
|
90
|
-
end
|
91
|
-
|
92
|
-
private
|
93
|
-
|
94
|
-
def self.inherited(evaluator)
|
95
|
-
class << evaluator
|
96
|
-
attr_accessor :grammar, :actions
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
def self.method_missing(method_name, &blk)
|
101
|
-
if method_name.to_s =~ /^for_*/
|
102
|
-
rule_name = method_name.to_s[4..-1]
|
103
|
-
self.actions << rule_name
|
104
|
-
self.send(:define_method, rule_name, &blk)
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
def self.check_definitions
|
109
|
-
non_trivial_productions_with_rules_undefined = self.grammar.productions.select {|production| production.expansion.size != 1}.collect {|production| production.name} - self.actions
|
110
|
-
raise EvaluatorDefinitionError.new(non_trivial_productions_with_rules_undefined) unless non_trivial_productions_with_rules_undefined.empty?
|
111
|
-
end
|
112
|
-
|
113
111
|
end
|
114
112
|
|
115
113
|
class EvaluatorDefinitionError < StandardError #:nodoc:
|
@@ -118,8 +116,8 @@ module Dhaka
|
|
118
116
|
end
|
119
117
|
|
120
118
|
def to_s
|
121
|
-
|
122
|
-
|
119
|
+
result = "The following non-trivial productions do not have any evaluation rules defined:\n"
|
120
|
+
result << @non_trivial_productions_with_rules_undefined.join("\n")
|
123
121
|
end
|
124
122
|
end
|
125
123
|
end
|
data/lib/grammar/closure_hash.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
module Dhaka
|
3
2
|
# A subclass of Hash with a dirty flag
|
4
3
|
class ClosureHash < Hash #:nodoc:
|
5
4
|
attr_accessor :dirty
|
6
|
-
|
7
|
-
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
super
|
8
8
|
@dirty = false
|
9
9
|
end
|
10
|
+
|
10
11
|
def load_set(set)
|
11
12
|
set.each {|item| self[item] = item}
|
12
13
|
end
|
data/lib/grammar/grammar.rb
CHANGED
@@ -1,10 +1,8 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
require 'set'
|
3
1
|
module Dhaka
|
4
2
|
|
5
3
|
# Reserved name for the start symbol for all grammars.
|
6
4
|
START_SYMBOL_NAME = "_Start_"
|
7
|
-
END_SYMBOL_NAME
|
5
|
+
END_SYMBOL_NAME = "_End_" #:nodoc:
|
8
6
|
|
9
7
|
# Productions for specific grammar symbols are defined in the context of this class.
|
10
8
|
class ProductionBuilder
|
@@ -12,7 +10,7 @@ module Dhaka
|
|
12
10
|
# +symbol+ is the grammar symbol that productions are being defined for.
|
13
11
|
def initialize(grammar, symbol)
|
14
12
|
@grammar = grammar
|
15
|
-
@symbol
|
13
|
+
@symbol = symbol
|
16
14
|
end
|
17
15
|
|
18
16
|
# Creates a new production for +symbol+ with an expansion of +expansion+. The options hash can include
|
@@ -22,11 +20,13 @@ module Dhaka
|
|
22
20
|
# See the arithmetic precedence grammar in the test suites for an example.
|
23
21
|
def method_missing(production_name, expansion, options = {})
|
24
22
|
expansion_symbols = expansion.collect {|name| @grammar.symbols[name]}
|
23
|
+
production_args = [@symbol, expansion_symbols, production_name.to_s]
|
25
24
|
if precedence_symbol_name = options[:prec]
|
26
|
-
|
27
|
-
else
|
28
|
-
production = Production.new(@symbol, expansion_symbols, production_name.to_s)
|
25
|
+
production_args << @grammar.symbol_for_name(precedence_symbol_name).precedence
|
29
26
|
end
|
27
|
+
|
28
|
+
production = Production.new(*production_args)
|
29
|
+
|
30
30
|
@symbol.nullable = true if expansion_symbols.empty?
|
31
31
|
@grammar.productions_by_symbol[production.symbol] << production
|
32
32
|
raise "Duplicate production named #{production.name}" if @grammar.productions_by_name[production.name]
|
@@ -40,9 +40,10 @@ module Dhaka
|
|
40
40
|
# See the arithmetic precedence grammar in the test suites for an example.
|
41
41
|
class PrecedenceBuilder
|
42
42
|
def initialize(grammar) #:nodoc:
|
43
|
-
@grammar
|
43
|
+
@grammar = grammar
|
44
44
|
@precedence_level = 0
|
45
45
|
end
|
46
|
+
|
46
47
|
[:left, :right, :nonassoc].each do |associativity|
|
47
48
|
define_method(associativity) do |symbols|
|
48
49
|
assign_precedences associativity, symbols
|
@@ -52,7 +53,7 @@ module Dhaka
|
|
52
53
|
private
|
53
54
|
def assign_precedences(associativity, symbol_names)
|
54
55
|
symbol_names.each do |symbol_name|
|
55
|
-
symbol
|
56
|
+
symbol = @grammar.symbols[symbol_name]
|
56
57
|
symbol.precedence = Precedence.new(@precedence_level, associativity)
|
57
58
|
end
|
58
59
|
@precedence_level += 1
|
@@ -91,119 +92,121 @@ module Dhaka
|
|
91
92
|
# end
|
92
93
|
#
|
93
94
|
class Grammar
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
end
|
102
|
-
|
103
|
-
# Used for defining the precedences and associativities of symbols. The block +blk+ is
|
104
|
-
# evaluated in the context of a PrecedenceBuilder.
|
105
|
-
def self.precedences &blk
|
106
|
-
PrecedenceBuilder.new(self).instance_eval(&blk)
|
107
|
-
end
|
108
|
-
|
109
|
-
# Returns the grammar symbol identified by +name+
|
110
|
-
def self.symbol_for_name(name)
|
111
|
-
if symbols.has_key? name
|
112
|
-
symbols[name]
|
113
|
-
else
|
114
|
-
raise "No symbol with name #{name} found"
|
95
|
+
class << self
|
96
|
+
# Used for defining the productions for the symbol with name +symbol+. The block +blk+ is
|
97
|
+
# evaluated in the context of a ProductionBuilder.
|
98
|
+
def for_symbol symbol, &blk
|
99
|
+
symbol = symbols[symbol]
|
100
|
+
symbol.non_terminal = true
|
101
|
+
ProductionBuilder.new(self, symbol).instance_eval(&blk)
|
115
102
|
end
|
116
|
-
end
|
117
103
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
attr_accessor :symbols, :productions_by_symbol, :productions_by_name, :start_symbol, :end_symbol, :__first_cache
|
104
|
+
# Used for defining the precedences and associativities of symbols. The block +blk+ is
|
105
|
+
# evaluated in the context of a PrecedenceBuilder.
|
106
|
+
def precedences &blk
|
107
|
+
PrecedenceBuilder.new(self).instance_eval(&blk)
|
123
108
|
end
|
124
|
-
grammar.symbols = Hash.new {|hash, name| hash[name] = GrammarSymbol.new(name)}
|
125
|
-
grammar.productions_by_symbol = Hash.new {|hash, name| hash[name] = Set.new([])}
|
126
|
-
grammar.productions_by_name = {}
|
127
|
-
grammar.end_symbol = grammar.symbols[END_SYMBOL_NAME]
|
128
|
-
grammar.start_symbol = grammar.symbols[START_SYMBOL_NAME]
|
129
|
-
grammar.__first_cache = {}
|
130
|
-
end
|
131
|
-
|
132
|
-
def self.productions_for_symbol(symbol)
|
133
|
-
productions_by_symbol[symbol]
|
134
|
-
end
|
135
|
-
|
136
|
-
def self.productions
|
137
|
-
productions_by_name.values
|
138
|
-
end
|
139
|
-
|
140
|
-
def self.production_named(name)
|
141
|
-
productions_by_name[name]
|
142
|
-
end
|
143
|
-
|
144
|
-
|
145
|
-
def self.terminal_symbols
|
146
|
-
symbols.values.select {|symbol| symbol.terminal}
|
147
|
-
end
|
148
|
-
|
149
|
-
def self.non_terminal_symbols
|
150
|
-
symbols.values.select {|symbol| symbol.non_terminal}
|
151
|
-
end
|
152
|
-
|
153
|
-
def self.closure(kernel)
|
154
|
-
channels = Set.new
|
155
109
|
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
110
|
+
# Returns the grammar symbol identified by +name+
|
111
|
+
def symbol_for_name(name)
|
112
|
+
if symbols.has_key? name
|
113
|
+
symbols[name]
|
114
|
+
else
|
115
|
+
raise "No symbol with name #{name} found"
|
161
116
|
end
|
162
117
|
end
|
118
|
+
|
119
|
+
def productions
|
120
|
+
productions_by_name.values
|
121
|
+
end
|
122
|
+
|
123
|
+
def productions_for_symbol(symbol)
|
124
|
+
productions_by_symbol[symbol]
|
125
|
+
end
|
126
|
+
|
127
|
+
def closure(kernel)
|
128
|
+
channels = Set.new
|
129
|
+
|
130
|
+
result = compute_closure(kernel) do |hash, item|
|
131
|
+
if item.next_symbol and item.next_symbol.non_terminal
|
132
|
+
productions_by_symbol[item.next_symbol].each do |production|
|
133
|
+
channels << spontaneous_channel(item, hash[Item.new(production, 0)])
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
163
137
|
|
164
|
-
|
165
|
-
|
138
|
+
[channels, result]
|
139
|
+
end
|
140
|
+
|
141
|
+
def passive_channel(start_item, end_item)
|
142
|
+
PassiveChannel.new(self, start_item, end_item)
|
143
|
+
end
|
144
|
+
|
145
|
+
def first(given_symbol)
|
146
|
+
cached_result = __first_cache[given_symbol]
|
147
|
+
return cached_result if cached_result
|
148
|
+
result = compute_closure([given_symbol]) do |hash, symbol|
|
149
|
+
productions_by_symbol[symbol].each do |production|
|
150
|
+
symbol_index = 0
|
151
|
+
while next_symbol = production.expansion[symbol_index]
|
152
|
+
hash[next_symbol]
|
153
|
+
break unless next_symbol.nullable
|
154
|
+
symbol_index += 1
|
155
|
+
end
|
156
|
+
end if symbol.non_terminal
|
157
|
+
end.values.select {|symbol| symbol.terminal}.to_set
|
158
|
+
__first_cache[given_symbol] = result
|
159
|
+
result
|
160
|
+
end
|
161
|
+
|
162
|
+
def production_named(name)
|
163
|
+
productions_by_name[name]
|
164
|
+
end
|
165
|
+
|
166
|
+
def terminal_symbols
|
167
|
+
symbols.values.select {|symbol| symbol.terminal}
|
168
|
+
end
|
166
169
|
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
170
|
+
def non_terminal_symbols
|
171
|
+
symbols.values.select {|symbol| symbol.non_terminal}
|
172
|
+
end
|
173
|
+
|
174
|
+
private
|
175
|
+
|
176
|
+
def inherited(grammar)
|
177
|
+
class << grammar
|
178
|
+
attr_accessor :symbols, :productions_by_symbol, :productions_by_name, :start_symbol, :end_symbol, :__first_cache
|
179
|
+
end
|
180
|
+
grammar.symbols = Hash.new {|hash, name| hash[name] = GrammarSymbol.new(name)}
|
181
|
+
grammar.productions_by_symbol = Hash.new {|hash, name| hash[name] = Set.new([])}
|
182
|
+
grammar.productions_by_name = {}
|
183
|
+
grammar.end_symbol = grammar.symbols[END_SYMBOL_NAME]
|
184
|
+
grammar.start_symbol = grammar.symbols[START_SYMBOL_NAME]
|
185
|
+
grammar.__first_cache = {}
|
186
|
+
end
|
183
187
|
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
def self.passive_channel(start_item, end_item)
|
189
|
-
PassiveChannel.new(self, start_item, end_item)
|
190
|
-
end
|
188
|
+
def spontaneous_channel(start_item, end_item)
|
189
|
+
SpontaneousChannel.new(self, start_item, end_item)
|
190
|
+
end
|
191
191
|
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
192
|
+
def compute_closure(initial)
|
193
|
+
closure_hash = ClosureHash.new do |hash, item|
|
194
|
+
hash.dirty = true
|
195
|
+
hash[item] = item
|
196
|
+
end
|
197
|
+
|
198
|
+
closure_hash.load_set(initial)
|
199
|
+
|
200
|
+
loop do
|
201
|
+
closure_hash.keys.each do |element|
|
202
|
+
yield closure_hash, element
|
203
|
+
end
|
204
|
+
break unless closure_hash.dirty
|
205
|
+
closure_hash.dirty = false
|
206
|
+
end
|
207
|
+
closure_hash
|
201
208
|
end
|
202
|
-
break if !closure_hash.dirty
|
203
|
-
closure_hash.dirty = false
|
204
209
|
end
|
205
|
-
return closure_hash
|
206
|
-
end
|
207
210
|
end
|
208
211
|
|
209
212
|
end
|