dhaka 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/dhaka.rb +44 -0
- data/lib/evaluator/evaluator.rb +70 -0
- data/lib/grammar/closure_hash.rb +13 -0
- data/lib/grammar/grammar.rb +129 -0
- data/lib/grammar/grammar_symbol.rb +19 -0
- data/lib/grammar/production.rb +14 -0
- data/lib/parser/action.rb +51 -0
- data/lib/parser/channel.rb +51 -0
- data/lib/parser/compiled_parser.rb +35 -0
- data/lib/parser/item.rb +37 -0
- data/lib/parser/parse_result.rb +26 -0
- data/lib/parser/parse_tree.rb +34 -0
- data/lib/parser/parser.rb +125 -0
- data/lib/parser/parser_methods.rb +10 -0
- data/lib/parser/parser_run.rb +35 -0
- data/lib/parser/parser_state.rb +66 -0
- data/lib/parser/token.rb +15 -0
- data/lib/tokenizer/tokenizer.rb +88 -0
- data/test/all_tests.rb +11 -0
- data/test/arithmetic_evaluator.rb +70 -0
- data/test/arithmetic_evaluator_test.rb +55 -0
- data/test/arithmetic_grammar.rb +38 -0
- data/test/arithmetic_grammar_test.rb +11 -0
- data/test/arithmetic_test_methods.rb +11 -0
- data/test/arithmetic_tokenizer.rb +43 -0
- data/test/arithmetic_tokenizer_test.rb +32 -0
- data/test/bracket_grammar.rb +25 -0
- data/test/bracket_tokenizer.rb +17 -0
- data/test/brackets_test.rb +20 -0
- data/test/compiled_arithmetic_parser.rb +252 -0
- data/test/compiled_parser_test.rb +71 -0
- data/test/evaluator_test.rb +8 -0
- data/test/grammar_test.rb +70 -0
- data/test/incomplete_arithmetic_evaluator.rb +60 -0
- data/test/lalr_but_not_slr_grammar.rb +17 -0
- data/test/malformed_grammar.rb +9 -0
- data/test/malformed_grammar_test.rb +9 -0
- data/test/nullable_grammar.rb +18 -0
- data/test/parser_test.rb +168 -0
- data/test/rr_conflict_grammar.rb +23 -0
- data/test/simple_grammar.rb +24 -0
- data/test/sr_conflict_grammar.rb +16 -0
- metadata +87 -0
data/lib/dhaka.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2006 Mushfeq Khan
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
require 'grammar/grammar_symbol'
|
25
|
+
require 'grammar/production'
|
26
|
+
require 'grammar/closure_hash'
|
27
|
+
require 'grammar/grammar'
|
28
|
+
|
29
|
+
require 'parser/parse_result'
|
30
|
+
require 'parser/item'
|
31
|
+
require 'parser/channel'
|
32
|
+
require 'parser/parser_methods'
|
33
|
+
require 'parser/parse_tree'
|
34
|
+
require 'parser/parser_state'
|
35
|
+
require 'parser/token'
|
36
|
+
require 'parser/action'
|
37
|
+
require 'parser/parser_run'
|
38
|
+
require 'parser/parser'
|
39
|
+
require 'parser/compiled_parser'
|
40
|
+
|
41
|
+
require 'tokenizer/tokenizer'
|
42
|
+
require 'evaluator/evaluator'
|
43
|
+
|
44
|
+
|
@@ -0,0 +1,70 @@
|
|
1
|
+
module Dhaka
|
2
|
+
class Evaluator
|
3
|
+
|
4
|
+
def initialize(syntax_tree)
|
5
|
+
@syntax_tree = syntax_tree
|
6
|
+
@node_stack = []
|
7
|
+
end
|
8
|
+
|
9
|
+
def result
|
10
|
+
evaluate(@syntax_tree)
|
11
|
+
end
|
12
|
+
|
13
|
+
def child_nodes
|
14
|
+
current_node = @node_stack[-1]
|
15
|
+
Array.new(current_node.child_nodes.size) {|i| evaluate(current_node.child_nodes[i])}
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def self.inherited(evaluator)
|
21
|
+
class << evaluator
|
22
|
+
attr_accessor :grammar, :actions
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.define_evaluation_rules
|
27
|
+
default_action = Proc.new { child_nodes[0] }
|
28
|
+
self.actions = Hash.new { |hash, key| default_action }
|
29
|
+
yield
|
30
|
+
check_definitions
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.check_definitions
|
34
|
+
non_trivial_productions_with_rules_undefined = self.grammar.productions.select {|production| production.expansion.size != 1}.collect {|production| production.name} - self.actions.keys
|
35
|
+
raise EvaluatorDefinitionError.new(non_trivial_productions_with_rules_undefined) unless non_trivial_productions_with_rules_undefined.empty?
|
36
|
+
end
|
37
|
+
|
38
|
+
def evaluate node
|
39
|
+
return node if (ParseTreeLeafNode === node)
|
40
|
+
@node_stack << node
|
41
|
+
proc = self.class.actions[node.production.name]
|
42
|
+
result = self.instance_eval(&proc)
|
43
|
+
@node_stack.pop
|
44
|
+
result
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.for_rule_named(name, &blk)
|
48
|
+
self.actions[name] = blk
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.method_missing(method_name, &blk)
|
52
|
+
if method_name.to_s =~ /^for_*/
|
53
|
+
rule_name = method_name.to_s[4..-1]
|
54
|
+
self.for_rule_named(rule_name, &blk)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
class EvaluatorDefinitionError < StandardError
|
61
|
+
def initialize(non_trivial_productions_with_rules_undefined)
|
62
|
+
@non_trivial_productions_with_rules_undefined = non_trivial_productions_with_rules_undefined
|
63
|
+
end
|
64
|
+
|
65
|
+
def to_s
|
66
|
+
result = "The following non-trivial productions do not have any evaluation rules defined:\n"
|
67
|
+
result << (@non_trivial_productions_with_rules_undefined).join("\n")
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'set'
|
3
|
+
module Dhaka
|
4
|
+
|
5
|
+
START_SYMBOL_NAME = "_Start_"
|
6
|
+
END_SYMBOL_NAME = "_End_"
|
7
|
+
|
8
|
+
class ProductionBuilder
|
9
|
+
def initialize(grammar, symbol)
|
10
|
+
@grammar = grammar
|
11
|
+
@symbol = symbol
|
12
|
+
end
|
13
|
+
|
14
|
+
def method_missing(production_name, expansion)
|
15
|
+
expansion_symbols = expansion.collect {|name| @grammar.symbols[name]}
|
16
|
+
production = Production.new(@symbol, expansion_symbols, production_name.to_s)
|
17
|
+
@symbol.nullable = true if expansion_symbols.empty?
|
18
|
+
@grammar.productions_by_symbol[production.symbol] << production
|
19
|
+
@grammar.productions_by_name[production.name] = production
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class Grammar
|
24
|
+
|
25
|
+
def self.inherited(grammar)
|
26
|
+
class << grammar
|
27
|
+
attr_accessor :symbols, :productions_by_symbol, :productions_by_name, :start_symbol, :end_symbol, :__first_cache
|
28
|
+
end
|
29
|
+
grammar.symbols = Hash.new {|hash, name| hash[name] = GrammarSymbol.new(name)}
|
30
|
+
grammar.productions_by_symbol = Hash.new {|hash, name| hash[name] = Set.new([])}
|
31
|
+
grammar.productions_by_name = {}
|
32
|
+
grammar.end_symbol = grammar.symbols[END_SYMBOL_NAME]
|
33
|
+
grammar.start_symbol = grammar.symbols[START_SYMBOL_NAME]
|
34
|
+
grammar.__first_cache = {}
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.for_symbol symbol, &blk
|
38
|
+
symbol = symbols[symbol]
|
39
|
+
symbol.non_terminal = true
|
40
|
+
ProductionBuilder.new(self, symbol).instance_eval(&blk)
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
def self.productions
|
45
|
+
productions_by_name.values
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.production_named(name)
|
49
|
+
productions_by_name[name]
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.productions_for_symbol(symbol)
|
53
|
+
productions_by_symbol[symbol]
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.symbol_for_name(name)
|
57
|
+
if symbols.has_key? name
|
58
|
+
symbols[name]
|
59
|
+
else
|
60
|
+
raise "No symbol with name #{name} found"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.terminal_symbols
|
65
|
+
symbols.values.select {|symbol| symbol.terminal}
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.non_terminal_symbols
|
69
|
+
symbols.values.select {|symbol| symbol.non_terminal}
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.closure(kernel)
|
73
|
+
channels = Set.new
|
74
|
+
|
75
|
+
result = compute_closure(kernel) do |hash, item|
|
76
|
+
if item.next_symbol and item.next_symbol.non_terminal
|
77
|
+
productions_by_symbol[item.next_symbol].each do |production|
|
78
|
+
channels << spontaneous_channel(item, hash[Item.new(production, 0)])
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
return channels, result
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.first(given_symbol)
|
87
|
+
cached_result = self.__first_cache[given_symbol]
|
88
|
+
return cached_result if cached_result
|
89
|
+
result = compute_closure([given_symbol]) do |hash, symbol|
|
90
|
+
productions_by_symbol[symbol].each do |production|
|
91
|
+
symbol_index = 0
|
92
|
+
while next_symbol = production.expansion[symbol_index]
|
93
|
+
hash[next_symbol]
|
94
|
+
break if !next_symbol.nullable
|
95
|
+
symbol_index += 1
|
96
|
+
end
|
97
|
+
end if symbol.non_terminal
|
98
|
+
end.values.select {|symbol| symbol.terminal}.to_set
|
99
|
+
self.__first_cache[given_symbol] = result
|
100
|
+
result
|
101
|
+
end
|
102
|
+
|
103
|
+
def self.spontaneous_channel(start_item, end_item)
|
104
|
+
SpontaneousChannel.new(self, start_item, end_item)
|
105
|
+
end
|
106
|
+
|
107
|
+
def self.passive_channel(start_item, end_item)
|
108
|
+
PassiveChannel.new(self, start_item, end_item)
|
109
|
+
end
|
110
|
+
|
111
|
+
def self.compute_closure(initial)
|
112
|
+
closure_hash = ClosureHash.new do |hash, item|
|
113
|
+
hash.dirty = true
|
114
|
+
hash[item] = item
|
115
|
+
end
|
116
|
+
closure_hash.load_set(initial)
|
117
|
+
while true
|
118
|
+
closure_hash.keys.each do |element|
|
119
|
+
yield closure_hash, element
|
120
|
+
end
|
121
|
+
break if !closure_hash.dirty
|
122
|
+
closure_hash.dirty = false
|
123
|
+
end
|
124
|
+
return closure_hash
|
125
|
+
end
|
126
|
+
|
127
|
+
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
module Dhaka
|
3
|
+
class GrammarSymbol
|
4
|
+
attr_reader :name
|
5
|
+
attr_accessor :non_terminal, :nullable
|
6
|
+
def initialize(name)
|
7
|
+
@name = name
|
8
|
+
end
|
9
|
+
def terminal
|
10
|
+
!non_terminal
|
11
|
+
end
|
12
|
+
def to_s
|
13
|
+
name
|
14
|
+
end
|
15
|
+
def <=> other
|
16
|
+
self.name <=> other.name
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
module Dhaka
|
3
|
+
class Production
|
4
|
+
attr_reader :symbol, :expansion, :name
|
5
|
+
def initialize(symbol, expansion, name)
|
6
|
+
@symbol = symbol
|
7
|
+
@expansion = expansion
|
8
|
+
@name = name
|
9
|
+
end
|
10
|
+
def to_s
|
11
|
+
"#{@name} #{@symbol} ::= #{@expansion.join(' ')}"
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module Dhaka
|
2
|
+
class Action
|
3
|
+
attr_reader :action_code
|
4
|
+
end
|
5
|
+
|
6
|
+
class ShiftAction < Action
|
7
|
+
attr_reader :destination_state
|
8
|
+
def initialize destination_state
|
9
|
+
@destination_state = destination_state
|
10
|
+
@action_code = Proc.new do
|
11
|
+
state_stack << destination_state
|
12
|
+
[]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
def compile_to_ruby_source
|
16
|
+
"shift_to #{@destination_state.id}"
|
17
|
+
end
|
18
|
+
def to_s
|
19
|
+
"Shift to #{@destination_state}"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class ReduceAction < Action
|
24
|
+
attr_reader :production
|
25
|
+
def initialize(production)
|
26
|
+
@production = production
|
27
|
+
@action_code = Proc.new do
|
28
|
+
composite_node = ParseTreeCompositeNode.new(production)
|
29
|
+
|
30
|
+
production.expansion.each { |symbol|
|
31
|
+
state_stack.pop
|
32
|
+
composite_node.child_nodes.unshift(node_stack.pop)
|
33
|
+
}
|
34
|
+
|
35
|
+
node_stack << composite_node
|
36
|
+
|
37
|
+
unless composite_node.head_node?
|
38
|
+
[production.symbol.name, current_token.grammar_symbol.name]
|
39
|
+
else
|
40
|
+
[]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
def compile_to_ruby_source
|
45
|
+
"reduce_with '#{@production.name}'"
|
46
|
+
end
|
47
|
+
def to_s
|
48
|
+
"Reduce with #{production}"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
module Dhaka
|
3
|
+
class Channel
|
4
|
+
attr_reader :start_item, :end_item
|
5
|
+
def initialize(grammar, start_item, end_item)
|
6
|
+
@grammar = grammar
|
7
|
+
@start_item = start_item
|
8
|
+
@end_item = end_item
|
9
|
+
end
|
10
|
+
def propagate cargo
|
11
|
+
diff = cargo - @end_item.lookaheadset
|
12
|
+
@end_item.lookaheadset.merge(diff)
|
13
|
+
!diff.empty?
|
14
|
+
end
|
15
|
+
def to_s
|
16
|
+
"Channel from #{@start_item} to #{@end_item}"
|
17
|
+
end
|
18
|
+
def eql? other
|
19
|
+
@start_item.eql?(other.start_item) and @end_item.eql?(other.end_item)
|
20
|
+
end
|
21
|
+
def hash
|
22
|
+
@start_item.hash ^ @end_item.hash
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class SpontaneousChannel < Channel
|
27
|
+
def to_s
|
28
|
+
"Spontaneous " + super.to_s
|
29
|
+
end
|
30
|
+
def pump
|
31
|
+
follow_index = @start_item.next_item_index + 1
|
32
|
+
cargo = Set.new
|
33
|
+
while follow_symbol = @start_item.production.expansion[follow_index]
|
34
|
+
cargo += @grammar.first(follow_symbol)
|
35
|
+
return propagate(cargo) unless follow_symbol.nullable
|
36
|
+
follow_index += 1
|
37
|
+
end
|
38
|
+
cargo += @start_item.lookaheadset
|
39
|
+
propagate cargo
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
class PassiveChannel < Channel
|
44
|
+
def to_s
|
45
|
+
"Passive " + super.to_s
|
46
|
+
end
|
47
|
+
def pump
|
48
|
+
propagate @start_item.lookaheadset
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Dhaka
|
2
|
+
class CompiledParser
|
3
|
+
|
4
|
+
def self.inherited(compiled_parser)
|
5
|
+
class << compiled_parser
|
6
|
+
attr_accessor :states, :grammar, :start_state_id
|
7
|
+
end
|
8
|
+
compiled_parser.states = Hash.new {|hash, state_id| hash[state_id] = ParserState.new(compiled_parser, {}, state_id)}
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.at_state x, &blk
|
12
|
+
self.states[x].instance_eval(&blk)
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.start_state
|
16
|
+
states[start_state_id]
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.start_with start_state_id
|
20
|
+
self.start_state_id = start_state_id
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.reduce_with production_name
|
24
|
+
ReduceAction.new(grammar.production_named(production_name))
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.shift_to state_id
|
28
|
+
ShiftAction.new(states[state_id])
|
29
|
+
end
|
30
|
+
|
31
|
+
self.extend(ParserMethods)
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
data/lib/parser/item.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
module Dhaka
|
3
|
+
class Item
|
4
|
+
attr_reader :production, :next_item_index, :lookaheadset
|
5
|
+
def initialize(production, next_item_index)
|
6
|
+
@production = production
|
7
|
+
@next_item_index = next_item_index
|
8
|
+
@lookaheadset = Set.new
|
9
|
+
end
|
10
|
+
def next_symbol
|
11
|
+
if @next_item_index < @production.expansion.size
|
12
|
+
@production.expansion[@next_item_index]
|
13
|
+
else
|
14
|
+
nil
|
15
|
+
end
|
16
|
+
end
|
17
|
+
def next_item
|
18
|
+
Item.new(@production, @next_item_index+1)
|
19
|
+
end
|
20
|
+
def to_s
|
21
|
+
expansion_symbols = @production.expansion.collect {|symbol| symbol.name}
|
22
|
+
if @next_item_index < expansion_symbols.size
|
23
|
+
expansion_symbols.insert(@next_item_index, '->')
|
24
|
+
else
|
25
|
+
expansion_symbols << '->'
|
26
|
+
end
|
27
|
+
expansion_repr = expansion_symbols.join(' ')
|
28
|
+
"#{@production.symbol} ::= #{expansion_repr} [#{@lookaheadset.collect.sort}]"
|
29
|
+
end
|
30
|
+
def eql?(other)
|
31
|
+
@production == other.production && @next_item_index==other.next_item_index
|
32
|
+
end
|
33
|
+
def hash
|
34
|
+
@production.hash ^ @next_item_index.hash
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|