dhaka 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/dhaka.rb +44 -0
- data/lib/evaluator/evaluator.rb +70 -0
- data/lib/grammar/closure_hash.rb +13 -0
- data/lib/grammar/grammar.rb +129 -0
- data/lib/grammar/grammar_symbol.rb +19 -0
- data/lib/grammar/production.rb +14 -0
- data/lib/parser/action.rb +51 -0
- data/lib/parser/channel.rb +51 -0
- data/lib/parser/compiled_parser.rb +35 -0
- data/lib/parser/item.rb +37 -0
- data/lib/parser/parse_result.rb +26 -0
- data/lib/parser/parse_tree.rb +34 -0
- data/lib/parser/parser.rb +125 -0
- data/lib/parser/parser_methods.rb +10 -0
- data/lib/parser/parser_run.rb +35 -0
- data/lib/parser/parser_state.rb +66 -0
- data/lib/parser/token.rb +15 -0
- data/lib/tokenizer/tokenizer.rb +88 -0
- data/test/all_tests.rb +11 -0
- data/test/arithmetic_evaluator.rb +70 -0
- data/test/arithmetic_evaluator_test.rb +55 -0
- data/test/arithmetic_grammar.rb +38 -0
- data/test/arithmetic_grammar_test.rb +11 -0
- data/test/arithmetic_test_methods.rb +11 -0
- data/test/arithmetic_tokenizer.rb +43 -0
- data/test/arithmetic_tokenizer_test.rb +32 -0
- data/test/bracket_grammar.rb +25 -0
- data/test/bracket_tokenizer.rb +17 -0
- data/test/brackets_test.rb +20 -0
- data/test/compiled_arithmetic_parser.rb +252 -0
- data/test/compiled_parser_test.rb +71 -0
- data/test/evaluator_test.rb +8 -0
- data/test/grammar_test.rb +70 -0
- data/test/incomplete_arithmetic_evaluator.rb +60 -0
- data/test/lalr_but_not_slr_grammar.rb +17 -0
- data/test/malformed_grammar.rb +9 -0
- data/test/malformed_grammar_test.rb +9 -0
- data/test/nullable_grammar.rb +18 -0
- data/test/parser_test.rb +168 -0
- data/test/rr_conflict_grammar.rb +23 -0
- data/test/simple_grammar.rb +24 -0
- data/test/sr_conflict_grammar.rb +16 -0
- metadata +87 -0
@@ -0,0 +1,26 @@
|
|
1
|
+
module Dhaka
|
2
|
+
class ParseSuccessResult
|
3
|
+
attr_accessor :syntax_tree
|
4
|
+
def initialize(syntax_tree)
|
5
|
+
@syntax_tree = syntax_tree
|
6
|
+
end
|
7
|
+
|
8
|
+
def has_error?
|
9
|
+
false
|
10
|
+
end
|
11
|
+
end
|
12
|
+
class ParseErrorResult
|
13
|
+
attr_reader :bad_token_index
|
14
|
+
def initialize(bad_token_index)
|
15
|
+
@bad_token_index = bad_token_index
|
16
|
+
end
|
17
|
+
|
18
|
+
def has_error?
|
19
|
+
true
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Dhaka
|
2
|
+
class ParseTreeCompositeNode
|
3
|
+
attr_reader :production, :child_nodes
|
4
|
+
def initialize(production)
|
5
|
+
@production = production
|
6
|
+
@child_nodes = []
|
7
|
+
end
|
8
|
+
def linearize
|
9
|
+
child_nodes.collect {|child_node| child_node.linearize}.flatten + [production.name]
|
10
|
+
end
|
11
|
+
def to_s
|
12
|
+
"CompositeNode: #{production.symbol} --> [#{child_nodes.join(", ")}]"
|
13
|
+
end
|
14
|
+
def head_node?
|
15
|
+
production.symbol.name == START_SYMBOL_NAME
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class ParseTreeLeafNode
|
20
|
+
attr_reader :token
|
21
|
+
def initialize(token)
|
22
|
+
@token = token
|
23
|
+
end
|
24
|
+
def linearize
|
25
|
+
[]
|
26
|
+
end
|
27
|
+
def to_s
|
28
|
+
"LeafNode: #{token}"
|
29
|
+
end
|
30
|
+
def head_node?
|
31
|
+
false
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'set'
|
3
|
+
module Dhaka
|
4
|
+
class Parser
|
5
|
+
include ParserMethods
|
6
|
+
attr_reader :grammar, :start_state
|
7
|
+
|
8
|
+
def initialize(grammar)
|
9
|
+
@transitions = Hash.new {|hash, state| hash[state] = {}}
|
10
|
+
@grammar = grammar
|
11
|
+
@channels = []
|
12
|
+
@states = Hash.new do |hash, kernel|
|
13
|
+
channels, closure = @grammar.closure(kernel)
|
14
|
+
@channels += channels.to_a
|
15
|
+
new_state = ParserState.new(self, closure)
|
16
|
+
hash[kernel] = new_state
|
17
|
+
new_state.transition_items.each do |symbol, items|
|
18
|
+
destination_kernel = ItemSet.new(items.collect{|item| item.next_item})
|
19
|
+
destination_state = hash[destination_kernel]
|
20
|
+
items.each { |item| @channels << @grammar.passive_channel(item, destination_state.items[item.next_item]) }
|
21
|
+
@transitions[new_state][symbol] = destination_state
|
22
|
+
end
|
23
|
+
new_state
|
24
|
+
end
|
25
|
+
initialize_states
|
26
|
+
end
|
27
|
+
|
28
|
+
def initialize_states
|
29
|
+
start_productions = @grammar.productions_for_symbol(@grammar.start_symbol)
|
30
|
+
raise NoStartProductionsError.new(@grammar) if start_productions.empty?
|
31
|
+
start_items = ItemSet.new(start_productions.collect {|production| Item.new(production, 0)})
|
32
|
+
start_items.each {|start_item| start_item.lookaheadset << @grammar.end_symbol}
|
33
|
+
@start_state = @states[start_items]
|
34
|
+
pump_channels
|
35
|
+
generate_shift_actions
|
36
|
+
generate_reduce_actions
|
37
|
+
end
|
38
|
+
|
39
|
+
def compile_to_ruby_source_as parser_class_name
|
40
|
+
result = "class #{parser_class_name} < Dhaka::CompiledParser\n\n"
|
41
|
+
result << " self.grammar = #{@grammar.name}\n\n"
|
42
|
+
result << " start_with #{start_state.id}\n\n"
|
43
|
+
states.each do |state|
|
44
|
+
result << "#{state.compile_to_ruby_source}\n\n"
|
45
|
+
end
|
46
|
+
result << "end"
|
47
|
+
result
|
48
|
+
end
|
49
|
+
|
50
|
+
def to_dot
|
51
|
+
result = ["digraph x {", "node [fontsize=\"10\" shape=box size=\"5\"]"]
|
52
|
+
result += states.collect { |state| state.to_dot }
|
53
|
+
states.each { |state|
|
54
|
+
@transitions[state].each { |symbol, dest_state|
|
55
|
+
result << "#{state.dot_name} -> #{dest_state.dot_name} [label=\"#{symbol.name}\"]"
|
56
|
+
}
|
57
|
+
}
|
58
|
+
result << ['}']
|
59
|
+
result.join("\n")
|
60
|
+
end
|
61
|
+
def states
|
62
|
+
@states.values
|
63
|
+
end
|
64
|
+
|
65
|
+
def generate_shift_actions
|
66
|
+
@states.values.each do |state|
|
67
|
+
@transitions[state].keys.each { |symbol|
|
68
|
+
state.actions[symbol.name] = ShiftAction.new(@transitions[state][symbol])
|
69
|
+
}
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def generate_reduce_actions
|
74
|
+
@states.values.each do |state|
|
75
|
+
state.items.values.select{ |item| !item.next_symbol }.each do |item|
|
76
|
+
create_reduction_actions_for_item_and_state item, state
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def create_reduction_actions_for_item_and_state item, state
|
82
|
+
item.lookaheadset.each do |lookahead|
|
83
|
+
existing_action = state.actions[lookahead.name]
|
84
|
+
new_action = ReduceAction.new(item.production)
|
85
|
+
if existing_action
|
86
|
+
raise ParserConflictError.new(state, existing_action, new_action)
|
87
|
+
else
|
88
|
+
state.actions[lookahead.name] = new_action
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
def pump_channels
|
95
|
+
while true
|
96
|
+
break unless @channels.inject(false) do |pumped, channel|
|
97
|
+
pumped || channel.pump
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
class ParserConflictError < StandardError
|
106
|
+
def initialize(state, existing_action, new_action)
|
107
|
+
@state = state
|
108
|
+
@existing_action = existing_action
|
109
|
+
@new_action = new_action
|
110
|
+
end
|
111
|
+
def to_s
|
112
|
+
"Conflict in state #{@state}\n Existing: #{@existing_action}\n New: #{@new_action}"
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
class NoStartProductionsError < StandardError
|
117
|
+
def initialize(grammar)
|
118
|
+
@grammar = grammar
|
119
|
+
end
|
120
|
+
def to_s
|
121
|
+
"No start productions defined for #{@grammar.name}"
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Dhaka
|
2
|
+
class ParserRun
|
3
|
+
attr_reader :state_stack, :token_stream, :node_stack
|
4
|
+
def initialize(grammar, start_state, token_stream)
|
5
|
+
@grammar = grammar
|
6
|
+
@node_stack = []
|
7
|
+
@state_stack = [start_state]
|
8
|
+
@token_stream = token_stream
|
9
|
+
@current_token_index = 0
|
10
|
+
end
|
11
|
+
def current_token
|
12
|
+
@token_stream[@current_token_index]
|
13
|
+
end
|
14
|
+
def advance
|
15
|
+
node_stack << ParseTreeLeafNode.new(current_token)
|
16
|
+
@current_token_index += 1
|
17
|
+
end
|
18
|
+
def run
|
19
|
+
while current_token
|
20
|
+
error = execute_action current_token.grammar_symbol.name
|
21
|
+
return error if error
|
22
|
+
self.advance
|
23
|
+
end
|
24
|
+
ParseSuccessResult.new(node_stack[0])
|
25
|
+
end
|
26
|
+
def execute_action symbol_name
|
27
|
+
action = state_stack[-1].actions[symbol_name]
|
28
|
+
return ParseErrorResult.new(@current_token_index) unless action
|
29
|
+
self.instance_eval(&action.action_code).each do |symbol_name|
|
30
|
+
execute_action symbol_name
|
31
|
+
end
|
32
|
+
nil
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'set'
|
3
|
+
module Dhaka
|
4
|
+
class ParserState
|
5
|
+
|
6
|
+
attr_accessor :items, :actions, :id
|
7
|
+
|
8
|
+
@@state_id = 0
|
9
|
+
|
10
|
+
def self.next_state_id
|
11
|
+
result = @@state_id
|
12
|
+
@@state_id += 1
|
13
|
+
result
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize(parser, items, id=nil)
|
17
|
+
@parser = parser
|
18
|
+
@items = items
|
19
|
+
@actions = {}
|
20
|
+
@id = id ? id : ParserState.next_state_id
|
21
|
+
end
|
22
|
+
|
23
|
+
def transition_items
|
24
|
+
result = Hash.new {|h, k| h[k] = ItemSet.new()}
|
25
|
+
for item in @items.values
|
26
|
+
(result[item.next_symbol] << item) if item.next_symbol
|
27
|
+
end
|
28
|
+
result
|
29
|
+
end
|
30
|
+
|
31
|
+
def dot_name
|
32
|
+
self.to_s
|
33
|
+
end
|
34
|
+
|
35
|
+
def to_dot
|
36
|
+
label = self.items.values.join('\n')
|
37
|
+
"#{dot_name} [label=\"#{label}\"]"
|
38
|
+
end
|
39
|
+
def compile_to_ruby_source
|
40
|
+
result = " at_state(#{@id}) {\n"
|
41
|
+
actions.each do |symbol_name, action|
|
42
|
+
result << " for_symbol('#{symbol_name}') { #{action.compile_to_ruby_source} }\n"
|
43
|
+
end
|
44
|
+
result << " }"
|
45
|
+
result
|
46
|
+
end
|
47
|
+
|
48
|
+
def for_symbol symbol_name, &blk
|
49
|
+
actions[symbol_name] = @parser.instance_eval(&blk)
|
50
|
+
end
|
51
|
+
|
52
|
+
def to_s
|
53
|
+
"State#{id}"
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
class ItemSet < Set
|
59
|
+
def hash
|
60
|
+
self.collect{|item| item.hash}.inject{|result, hashcode| result ^ hashcode}
|
61
|
+
end
|
62
|
+
def eql? other
|
63
|
+
self == other
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
data/lib/parser/token.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Dhaka
|
2
|
+
class Token
|
3
|
+
attr_accessor :grammar_symbol, :value
|
4
|
+
def initialize(grammar_symbol, value)
|
5
|
+
@grammar_symbol = grammar_symbol
|
6
|
+
@value = value
|
7
|
+
end
|
8
|
+
def to_s
|
9
|
+
"#{@grammar_symbol.name}"
|
10
|
+
end
|
11
|
+
def == other
|
12
|
+
(grammar_symbol == other.grammar_symbol) && (value == other.value)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
module Dhaka
|
2
|
+
class UnrecognizedInputCharacterException < StandardError
|
3
|
+
attr_reader :input, :char_index
|
4
|
+
def initialize(input, char_index)
|
5
|
+
@input = input
|
6
|
+
@char_index = char_index
|
7
|
+
end
|
8
|
+
def to_s
|
9
|
+
"Unrecognized character #{input[char_index].chr} encountered while tokenizing:\n #{input}"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class TokenizerState
|
14
|
+
attr_reader :actions
|
15
|
+
|
16
|
+
def initialize
|
17
|
+
@actions = {}
|
18
|
+
end
|
19
|
+
|
20
|
+
def for_characters(characters, &blk)
|
21
|
+
characters.each do |character|
|
22
|
+
actions[character] = blk
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def for_character(character, &blk)
|
27
|
+
actions[character[0]] = blk
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_s
|
31
|
+
actions.inspect
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
class Tokenizer
|
37
|
+
|
38
|
+
def self.inherited(tokenizer)
|
39
|
+
class << tokenizer
|
40
|
+
attr_accessor :states
|
41
|
+
end
|
42
|
+
tokenizer.states = Hash.new {|hash, key| hash[key] = TokenizerState.new}
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.for_state(state_name, &blk)
|
46
|
+
states[state_name].instance_eval(&blk)
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.tokenize(input)
|
50
|
+
TokenizerRun.new(self, input).run
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
class TokenizerRun
|
55
|
+
|
56
|
+
attr_accessor :accumulator
|
57
|
+
attr_reader :tokens
|
58
|
+
def initialize(tokenizer, input)
|
59
|
+
@tokenizer = tokenizer
|
60
|
+
@input = input
|
61
|
+
@current_state = tokenizer.states[:idle_state]
|
62
|
+
@curr_char_index = 0
|
63
|
+
@tokens = []
|
64
|
+
end
|
65
|
+
|
66
|
+
def run
|
67
|
+
while curr_char
|
68
|
+
blk = @current_state.actions[curr_char]
|
69
|
+
raise UnrecognizedInputCharacterException.new(@input, @curr_char_index) unless blk
|
70
|
+
instance_eval(&blk)
|
71
|
+
end
|
72
|
+
tokens
|
73
|
+
end
|
74
|
+
|
75
|
+
def curr_char
|
76
|
+
@input[@curr_char_index] and @input[@curr_char_index].chr
|
77
|
+
end
|
78
|
+
|
79
|
+
def advance
|
80
|
+
@curr_char_index += 1
|
81
|
+
end
|
82
|
+
|
83
|
+
def switch_to state_name
|
84
|
+
@current_state = @tokenizer.states[state_name]
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
end
|
data/test/all_tests.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
require 'grammar_test'
|
5
|
+
require 'parser_test'
|
6
|
+
require 'arithmetic_evaluator_test'
|
7
|
+
require 'compiled_parser_test'
|
8
|
+
require 'evaluator_test'
|
9
|
+
require 'arithmetic_tokenizer_test'
|
10
|
+
require 'malformed_grammar_test'
|
11
|
+
require 'brackets_test'
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/dhaka'
|
2
|
+
require 'arithmetic_grammar'
|
3
|
+
|
4
|
+
class ArithmeticEvaluator < Dhaka::Evaluator
|
5
|
+
|
6
|
+
self.grammar = ArithmeticGrammar
|
7
|
+
|
8
|
+
define_evaluation_rules do
|
9
|
+
|
10
|
+
for_subtraction do
|
11
|
+
child_nodes[0] - child_nodes[2]
|
12
|
+
end
|
13
|
+
|
14
|
+
for_addition do
|
15
|
+
child_nodes[0] + child_nodes[2]
|
16
|
+
end
|
17
|
+
|
18
|
+
for_division do
|
19
|
+
child_nodes[0].to_f/child_nodes[2]
|
20
|
+
end
|
21
|
+
|
22
|
+
for_multiplication do
|
23
|
+
child_nodes[0] * child_nodes[2]
|
24
|
+
end
|
25
|
+
|
26
|
+
for_getting_literals do
|
27
|
+
child_nodes[0].token.value
|
28
|
+
end
|
29
|
+
|
30
|
+
for_start_production do
|
31
|
+
child_nodes[0]
|
32
|
+
end
|
33
|
+
|
34
|
+
for_unpacking_parenthetized_expression do
|
35
|
+
child_nodes[1]
|
36
|
+
end
|
37
|
+
|
38
|
+
for_empty_args do
|
39
|
+
[]
|
40
|
+
end
|
41
|
+
|
42
|
+
for_evaluating_function do
|
43
|
+
child_nodes[0].call child_nodes[2]
|
44
|
+
end
|
45
|
+
|
46
|
+
for_concatenating_args do
|
47
|
+
[child_nodes[0]]+child_nodes[2]
|
48
|
+
end
|
49
|
+
|
50
|
+
for_single_args do
|
51
|
+
[child_nodes[0]]
|
52
|
+
end
|
53
|
+
|
54
|
+
for_min_function do
|
55
|
+
@min_function
|
56
|
+
end
|
57
|
+
|
58
|
+
for_max_function do
|
59
|
+
@max_function
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
def initialize(syntax_tree, min_function, max_function)
|
65
|
+
@min_function = min_function
|
66
|
+
@max_function = max_function
|
67
|
+
super(syntax_tree)
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|