aurum 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/example/expression/expression.rb +8 -2
- data/example/expression/lisp.rb +26 -0
- data/lib/aurum/engine.rb +15 -13
- data/lib/aurum/grammar.rb +37 -45
- data/lib/aurum/lexical_table_generator.rb +7 -1
- data/lib/aurum/parsing_table_generator.rb +254 -235
- data/test/engine/lexer_test.rb +7 -0
- data/test/grammar_definition/character_class_definition_test.rb +1 -1
- data/test/grammar_definition/grammar_definition_test.rb +4 -3
- data/test/lexical_table_generator/pattern_test.rb +6 -0
- data/test/lexical_table_generator/table_generator_test.rb +1 -1
- data/test/parsing_table_generator/augmented_grammar_test.rb +4 -4
- data/test/parsing_table_generator/lalr_n_computation_test.rb +6 -3
- data/test/parsing_table_generator/lr_0_automata_test.rb +7 -4
- data/test/parsing_table_generator/lr_item_test.rb +6 -12
- data/test/parsing_table_generator/parsing_table_state_test.rb +5 -5
- data/test/test_helper.rb +2 -2
- metadata +16 -12
@@ -19,11 +19,17 @@ class ExpressionGrammar < Aurum::Grammar
|
|
19
19
|
expression expression, '/', expression {expression.value = expression1.value / expression2.value}
|
20
20
|
expression '(', expression, ')'
|
21
21
|
expression _number {expression.value = _number.value.to_i}
|
22
|
-
|
22
|
+
expression '+', _number {expression.value = _number.value.to_i}
|
23
|
+
expression '-', _number {expression.value = -_number.value.to_i}
|
24
|
+
end
|
23
25
|
end
|
24
26
|
|
25
27
|
puts ExpressionGrammar.parse_expression('1 + 2').value
|
28
|
+
puts ExpressionGrammar.parse_expression('(1 + 2)').value
|
29
|
+
puts ExpressionGrammar.parse_expression('-1 - 2').value
|
26
30
|
puts ExpressionGrammar.parse_expression('1 + 2 * 3').value
|
27
31
|
puts ExpressionGrammar.parse_expression('1 * 2 + 3').value
|
28
32
|
puts ExpressionGrammar.parse_expression('1 * (2 + 3)').value
|
29
|
-
puts ExpressionGrammar.parse_expression('1 + (2 + 3) * 4').value
|
33
|
+
puts ExpressionGrammar.parse_expression('1 + (2 + 3) * 4').value
|
34
|
+
puts ExpressionGrammar.parse_expression('1 * - 5').value
|
35
|
+
puts ExpressionGrammar.parse_expression('(1+3) - - 5').value
|
@@ -0,0 +1,26 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__) + '/../../lib')
|
2
|
+
require 'aurum'
|
3
|
+
|
4
|
+
class LispGrammar < Aurum::Grammar
|
5
|
+
tokens do
|
6
|
+
ignore string(' ').one_or_more
|
7
|
+
_number range(?0, ?9).one_or_more
|
8
|
+
end
|
9
|
+
|
10
|
+
productions do
|
11
|
+
expression '(', tuple, ')'
|
12
|
+
tuple '+', atom, atom {tuple.value = atom1.value + atom2.value}
|
13
|
+
tuple tuple, atom {tuple.value = tuple1.value + atom.value}
|
14
|
+
atom _number {atom.value = _number.value.to_i}
|
15
|
+
atom expression
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
puts LispGrammar.parse_expression('(+ 1 3 4) ').value
|
20
|
+
#puts LispGrammar.parse_expression('-1 - 2').value
|
21
|
+
#puts LispGrammar.parse_expression('1 + 2 * 3').value
|
22
|
+
#puts LispGrammar.parse_expression('1 * 2 + 3').value
|
23
|
+
#puts LispGrammar.parse_expression('1 * (2 + 3)').value
|
24
|
+
#puts LispGrammar.parse_expression('1 + (2 + 3) * 4').value
|
25
|
+
#puts LispGrammar.parse_expression('1 * - 5').value
|
26
|
+
#puts LispGrammar.parse_expression('(1+3) - - 5').value
|
data/lib/aurum/engine.rb
CHANGED
@@ -27,7 +27,7 @@ module Aurum
|
|
27
27
|
end
|
28
28
|
|
29
29
|
Symbol.class_eval {attr_accessor :value}
|
30
|
-
|
30
|
+
Unknown = Symbol.new '$unknown', true
|
31
31
|
class Lexer
|
32
32
|
def initialize table, accepts, lexical_states, input
|
33
33
|
@table, @accepts, @lexical_states, @input = table, accepts, lexical_states, input
|
@@ -44,11 +44,12 @@ module Aurum
|
|
44
44
|
state, char = next_state, @input.get_char
|
45
45
|
next_state = goto state, char
|
46
46
|
end
|
47
|
-
@input.pushback char
|
48
|
-
|
47
|
+
@input.pushback char
|
48
|
+
return Unknown unless actions = @accepts[state]
|
49
|
+
if actions.first == IgnoreAction
|
49
50
|
lexeme = ''
|
50
51
|
else
|
51
|
-
|
52
|
+
actions.first.execute self, lexeme
|
52
53
|
end
|
53
54
|
end
|
54
55
|
@recognized.value = lexeme unless @recognized.value
|
@@ -71,23 +72,24 @@ module Aurum
|
|
71
72
|
|
72
73
|
def recognize token
|
73
74
|
@recognized = Aurum::Symbol.new token, true
|
74
|
-
end
|
75
|
+
end
|
75
76
|
end
|
76
77
|
|
77
78
|
RecognizeTokenAction.class_eval do
|
78
|
-
def execute lexer
|
79
|
-
lexer.recognize token
|
79
|
+
def execute lexer, lexeme
|
80
|
+
recognized = lexer.recognize token
|
81
|
+
action.call recognized, lexeme if action
|
80
82
|
end
|
81
83
|
end
|
82
84
|
|
83
85
|
ChangeStateAction.class_eval do
|
84
|
-
def execute lexer
|
86
|
+
def execute lexer, lexeme
|
85
87
|
lexer.shift_to state
|
86
88
|
end
|
87
89
|
end
|
88
90
|
|
89
91
|
RecognizeTokenAndChangeStateAction.class_eval do
|
90
|
-
def execute lexer
|
92
|
+
def execute lexer, lexeme
|
91
93
|
lexer.recognize token
|
92
94
|
lexer.shift_to state
|
93
95
|
end
|
@@ -118,13 +120,13 @@ module Aurum
|
|
118
120
|
lookahead = lexer.next_symbol
|
119
121
|
end
|
120
122
|
state_stack.slice! -handle.symbols.length..-1
|
121
|
-
symbols = symbol_stack.slice! -handle.symbols.length..-1
|
123
|
+
symbols = symbol_stack.slice! -handle.symbols.length..-1
|
122
124
|
handle.nonterminal == Aurum::START and return value_stack.pop
|
123
125
|
if handle.action
|
124
126
|
context = {handle.nonterminal.name => [SemanticAttributes.new]}
|
125
|
-
handle.symbols.each_with_index do |symbol, index|
|
127
|
+
handle.symbols.reverse.each_with_index do |symbol, index|
|
126
128
|
context[symbol.name] = [] unless context.has_key? symbol.name
|
127
|
-
context[symbol.name] << (symbol.is_terminal ? symbols[index] : value_stack.pop)
|
129
|
+
context[symbol.name] << (symbol.is_terminal ? symbols[-index-1] : value_stack.pop)
|
128
130
|
end
|
129
131
|
SemanticActionContext.new(context).instance_eval &handle.action
|
130
132
|
value_stack.push context[handle.nonterminal.name][0] if context[handle.nonterminal.name]
|
@@ -152,7 +154,7 @@ module Aurum
|
|
152
154
|
def method_missing name, *args
|
153
155
|
name_string = name.to_s
|
154
156
|
index = name_string =~ /\d+/ ? name_string.slice!(/\d+/).to_i : 0
|
155
|
-
@hash[name_string][index] and return @hash[name_string][index]
|
157
|
+
@hash[name_string][-index] and return @hash[name_string][-index]
|
156
158
|
SemanticAttributes.new
|
157
159
|
end
|
158
160
|
end
|
data/lib/aurum/grammar.rb
CHANGED
@@ -4,39 +4,8 @@ module Aurum
|
|
4
4
|
class << nil
|
5
5
|
attr_accessor :action
|
6
6
|
end
|
7
|
-
|
8
|
-
|
9
|
-
instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
|
10
|
-
|
11
|
-
def method_missing name, *args, &block
|
12
|
-
name_string = name.to_s
|
13
|
-
symbol =
|
14
|
-
case name_string
|
15
|
-
when /^[a-z]/
|
16
|
-
Aurum::Symbol.new name_string, false
|
17
|
-
when '_'
|
18
|
-
nil
|
19
|
-
when /_.+/
|
20
|
-
Aurum::Symbol.new name_string, true
|
21
|
-
end
|
22
|
-
symbol.action = block if block_given?
|
23
|
-
(args.empty? || name_string == '_') and return symbol
|
24
|
-
symbols = args.map do |sym|
|
25
|
-
case sym
|
26
|
-
when String
|
27
|
-
Aurum::Symbol.new "$literal_#{sym}", true
|
28
|
-
when Aurum::Symbol
|
29
|
-
sym
|
30
|
-
end
|
31
|
-
end
|
32
|
-
action = symbols.last.action
|
33
|
-
@definition = {} unless @definition
|
34
|
-
@definition[symbol] = [].to_set unless @definition.has_key? symbol
|
35
|
-
production = Aurum::Production.new symbol, symbols.compact
|
36
|
-
production.action = action if action
|
37
|
-
@definition[symbol] << production
|
38
|
-
end
|
39
|
-
end
|
7
|
+
|
8
|
+
Aurum::RecognizeTokenAction.class_eval {attr_accessor :action}
|
40
9
|
|
41
10
|
class CharacterClassDefinition
|
42
11
|
instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
|
@@ -52,24 +21,25 @@ module Aurum
|
|
52
21
|
CharacterSet::Interval.new(a, b).to_char_set
|
53
22
|
end
|
54
23
|
|
55
|
-
def
|
24
|
+
def enum a
|
56
25
|
result = CharacterSet.new
|
57
26
|
result << a
|
58
27
|
result
|
59
28
|
end
|
60
|
-
|
29
|
+
|
61
30
|
def method_missing name, char_set, &block
|
62
31
|
@definitions[name] = char_set unless @definitions.has_key? name
|
63
32
|
end
|
64
33
|
end
|
65
34
|
|
66
35
|
class LexicalSpecification
|
36
|
+
alias __extend extend
|
67
37
|
instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
|
68
38
|
attr_reader :definitions, :character_classes
|
69
39
|
|
70
40
|
def initialize definition = {:initial => {}}, state = :initial
|
71
41
|
@definitions, @character_classes = definition, CharacterClassDefinition.new
|
72
|
-
@lexical_definition = @definitions[state]
|
42
|
+
@lexical_definition, @patterns = @definitions[state], {}
|
73
43
|
end
|
74
44
|
|
75
45
|
def range a, b
|
@@ -80,8 +50,12 @@ module Aurum
|
|
80
50
|
Pattern.from_string literal
|
81
51
|
end
|
82
52
|
|
83
|
-
def
|
84
|
-
Pattern.
|
53
|
+
def enum literal
|
54
|
+
Pattern.from_enum literal
|
55
|
+
end
|
56
|
+
|
57
|
+
def cat *patterns
|
58
|
+
Pattern.concat *(patterns.collect {|x| x.kind_of?(Pattern) ? x : Pattern.from_string(x.to_s)})
|
85
59
|
end
|
86
60
|
|
87
61
|
def shift_to state, *patterns, &config
|
@@ -109,8 +83,21 @@ module Aurum
|
|
109
83
|
__create_pattern RecognizeTokenAndChangeStateAction.new(token.to_s, state), *patterns
|
110
84
|
end
|
111
85
|
|
112
|
-
def method_missing name, *patterns, &action
|
113
|
-
|
86
|
+
def method_missing name, *patterns, &action
|
87
|
+
return __get_pattern(name) if patterns.empty?
|
88
|
+
pattern = Pattern.concat *(patterns.collect {|x| x.kind_of?(Pattern) ? x : Pattern.from_string(x.to_s)})
|
89
|
+
@patterns[name] = pattern
|
90
|
+
if name.to_s =~ /^_/
|
91
|
+
recognize_action = RecognizeTokenAction.new(name.to_s)
|
92
|
+
recognize_action.action = action
|
93
|
+
@lexical_definition[pattern] = recognize_action
|
94
|
+
end
|
95
|
+
pattern
|
96
|
+
end
|
97
|
+
|
98
|
+
def __get_pattern name
|
99
|
+
return @patterns[name] if @patterns.has_key? name
|
100
|
+
Pattern.from_char_set @character_classes.definitions[name]
|
114
101
|
end
|
115
102
|
|
116
103
|
def __create_pattern action, *patterns
|
@@ -150,10 +137,12 @@ module Aurum
|
|
150
137
|
end
|
151
138
|
|
152
139
|
def __associativity direction, *operators
|
140
|
+
symbols = []
|
153
141
|
for operator in operators
|
154
|
-
symbol = __get_symbol
|
155
|
-
@associativities[direction] << symbol unless @associativities[direction].include? symbol
|
156
|
-
end
|
142
|
+
symbols << (symbol = __get_symbol(operator))
|
143
|
+
@associativities[direction] << symbol unless @associativities[direction].include? symbol
|
144
|
+
end
|
145
|
+
@precedences << symbols.uniq
|
157
146
|
end
|
158
147
|
end
|
159
148
|
|
@@ -190,8 +179,8 @@ module Aurum
|
|
190
179
|
|
191
180
|
class Grammar
|
192
181
|
def self.character_classes &block
|
193
|
-
@lexical_sepcification = LexicalSpecification.new unless @lexical_sepcification
|
194
|
-
@lexical_sepcification.character_classes.instance_eval &block
|
182
|
+
@lexical_sepcification = LexicalSpecification.new unless @lexical_sepcification
|
183
|
+
@lexical_sepcification.character_classes.instance_eval &block
|
195
184
|
end
|
196
185
|
|
197
186
|
def self.tokens &block
|
@@ -210,6 +199,7 @@ module Aurum
|
|
210
199
|
end
|
211
200
|
|
212
201
|
def self.lexer input
|
202
|
+
@productions = ProductionDefinition.new unless @productions
|
213
203
|
@lexical_sepcification.definitions[:initial].merge!(@productions.__literals)
|
214
204
|
generator = Aurum::LexicalTableGenerator.new @lexical_sepcification.definitions
|
215
205
|
table, accepts = generator.lexical_table
|
@@ -217,6 +207,8 @@ module Aurum
|
|
217
207
|
end
|
218
208
|
|
219
209
|
def self.start_from start_symbol
|
210
|
+
@productions = ProductionDefinition.new unless @productions
|
211
|
+
@precedences = OperatorPrecedenceDefinition.new unless @precedences
|
220
212
|
generator = Aurum::ParsingTableGenerator.new @productions.__definitions, @precedences.precedences, @precedences.associativities
|
221
213
|
productions = generator.start_from(Aurum::Symbol.new(start_symbol.to_s, false)).productions
|
222
214
|
table, lookeahead_level = generator.parsing_table
|
@@ -12,7 +12,7 @@ module Aurum
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def lexical_table
|
15
|
-
construct_automata
|
15
|
+
construct_automata
|
16
16
|
make_initial_partitions
|
17
17
|
refine_partitions
|
18
18
|
@partitions.size < @lexical_automata.table.size ? construct_minimize_automata : [@lexical_automata.table, @accept_states]
|
@@ -104,6 +104,12 @@ module Aurum
|
|
104
104
|
automata.connect 0, set, 1
|
105
105
|
new automata, 1
|
106
106
|
end
|
107
|
+
|
108
|
+
def self.from_enum enum_literal
|
109
|
+
automata = Automata.new enum_literal.length + 2
|
110
|
+
enum_literal.each_byte {|byte| automata.connect 0, CharacterSet::Interval.new(byte).to_char_set, 1}
|
111
|
+
new automata, 1
|
112
|
+
end
|
107
113
|
|
108
114
|
def self.concat *patterns
|
109
115
|
automata, index = Automata.new, 0
|
@@ -1,11 +1,16 @@
|
|
1
1
|
require 'set'
|
2
|
+
require 'logger'
|
2
3
|
|
3
4
|
module Aurum
|
4
5
|
Symbol, Production= Struct.new(:name, :is_terminal), Struct.new(:nonterminal, :symbols)
|
5
|
-
ShiftAction, ReduceAction = Struct.new(:state, :is_lookahead_shift), Struct.new(:handle, :is_read_reduce)
|
6
|
+
ShiftAction, ReduceAction = Struct.new(:state, :is_lookahead_shift), Struct.new(:handle, :is_read_reduce)
|
7
|
+
Production.class_eval {attr_accessor :index}
|
6
8
|
|
7
9
|
START, EOF = Symbol.new('$start', false), Symbol.new('$eof', true)
|
8
10
|
|
11
|
+
Log = Logger.new(STDOUT)
|
12
|
+
Log.level = Logger::INFO
|
13
|
+
|
9
14
|
class ParsingTableGenerator
|
10
15
|
attr_reader :symbols, :productions
|
11
16
|
|
@@ -17,19 +22,20 @@ module Aurum
|
|
17
22
|
end
|
18
23
|
|
19
24
|
def start_from start
|
20
|
-
|
21
|
-
@symbols, @productions, @nullables, @first_sets = [], [@start_production], [], {START => []}
|
22
|
-
find_all_used_symbols_and_productions start
|
25
|
+
initialize_augmented_grammar start
|
23
26
|
compute_nullable_symbols
|
24
27
|
compute_first_sets
|
25
28
|
self
|
26
29
|
end
|
27
30
|
|
28
31
|
def parsing_table
|
32
|
+
Log.debug 'Start constructing LR(0) automata.'
|
29
33
|
construct_LR0_automata
|
30
|
-
|
34
|
+
Log.debug "Finished, #{@states.size} LR(0) states constructed."
|
35
|
+
Log.debug "#{@inconsistents.size} inconsistent states found."
|
36
|
+
if @inconsistents.size > 0
|
31
37
|
compute_LALR_1_lookahead
|
32
|
-
compute_LALR_n_lookahead
|
38
|
+
compute_LALR_n_lookahead unless @conflicts.empty?
|
33
39
|
end
|
34
40
|
parsing_table = []
|
35
41
|
for state in @states do
|
@@ -40,67 +46,102 @@ module Aurum
|
|
40
46
|
return parsing_table, @lookahead_level
|
41
47
|
end
|
42
48
|
|
43
|
-
private
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
49
|
+
private
|
50
|
+
# BEGIN utils methods
|
51
|
+
def fixed_point
|
52
|
+
1 until !yield false
|
53
|
+
end
|
54
|
+
|
55
|
+
def working_list unvisited
|
56
|
+
yield unvisited, unvisited.pop while !unvisited.empty?
|
57
|
+
end
|
58
|
+
|
59
|
+
def mark_working_list uvisited, result, item
|
60
|
+
unless result.include? item
|
61
|
+
result << item
|
62
|
+
uvisited << item
|
63
|
+
end
|
64
|
+
end
|
65
|
+
# END utils methods
|
66
|
+
|
67
|
+
# BEGIN augmented grammar
|
68
|
+
def initialize_augmented_grammar start
|
69
|
+
@symbols, @productions = [start], []
|
70
|
+
@start_production = add_production Production.new(START, [start])
|
71
|
+
@nullables, @first_sets, @closures = [].to_set, {START => []}, {}
|
72
|
+
working_list [start] do |unvisited, visiting|
|
73
|
+
@first_sets[visiting], @closures[visiting] = visiting.is_terminal ? [visiting] : [], []
|
74
|
+
for production in @definition[visiting]
|
75
|
+
add_production(production).symbols.each {|symbol| mark_working_list unvisited, @symbols, symbol}
|
76
|
+
@closures[visiting] += closure([LRItem.new(production, 0)])
|
77
|
+
end unless visiting.is_terminal
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def add_production production
|
82
|
+
production.index = @productions.size
|
83
|
+
(@productions << production).last
|
84
|
+
end
|
58
85
|
|
59
86
|
def compute_nullable_symbols
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
end unless nullable? production.nonterminal
|
67
|
-
end
|
68
|
-
end while changed
|
87
|
+
fixed_point do |changed|
|
88
|
+
@productions.each do |production|
|
89
|
+
changed |= @nullables.add? production.nonterminal if nullable? production.symbols
|
90
|
+
end
|
91
|
+
changed
|
92
|
+
end
|
69
93
|
end
|
70
94
|
|
71
95
|
def compute_first_sets
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
96
|
+
fixed_point do |changed|
|
97
|
+
for production in @productions do
|
98
|
+
set = @first_sets[production.nonterminal]
|
99
|
+
for symbol in production.symbols do
|
100
|
+
changed |= set.size != set.replace(set | @first_sets[symbol]).size
|
101
|
+
break unless nullable? [symbol]
|
102
|
+
end
|
103
|
+
end
|
104
|
+
changed
|
105
|
+
end
|
82
106
|
end
|
83
107
|
|
84
|
-
def nullable?
|
85
|
-
|
108
|
+
def nullable? symbols
|
109
|
+
return true if symbols.empty?
|
110
|
+
symbols.all?{|symbol| @nullables.include? symbol }
|
86
111
|
end
|
87
|
-
|
112
|
+
|
113
|
+
def closure items
|
114
|
+
result = items.dup
|
115
|
+
working_list items.dup do |unvisited, visiting|
|
116
|
+
if @closures[visiting.dot_symbol]
|
117
|
+
result |= @closures[visiting.dot_symbol]
|
118
|
+
else
|
119
|
+
@definition[visiting.dot_symbol].each {|x| mark_working_list unvisited, result, LRItem.new(x, 0)}
|
120
|
+
end unless visiting.is_handle || visiting.dot_symbol.is_terminal
|
121
|
+
end
|
122
|
+
result
|
123
|
+
end
|
124
|
+
# END augmented grammar
|
125
|
+
|
126
|
+
# BEGIN LR(0) automata construction
|
88
127
|
def construct_LR0_automata
|
89
128
|
@lookahead_level = 0
|
90
129
|
start_state = State.new closure([LRItem.new(@start_production, 0)])
|
91
|
-
@states
|
92
|
-
|
93
|
-
|
94
|
-
|
130
|
+
@states = [start_state]
|
131
|
+
@inconsistents = start_state.inconsistent? ? [start_state] : []
|
132
|
+
working_list [start_state] do |unvisited, visiting|
|
133
|
+
for item in visiting.non_handles
|
95
134
|
symbol = item.dot_symbol
|
96
135
|
new_state = goto visiting, symbol
|
97
136
|
if (read_reduce = new_state.read_reduce)
|
98
|
-
|
137
|
+
visiting[symbol] << ReduceAction.new(read_reduce.index, true)
|
99
138
|
else
|
100
139
|
if index = @states.index(new_state)
|
101
140
|
new_state = @states[index]
|
102
141
|
else
|
103
|
-
|
142
|
+
@states << new_state
|
143
|
+
@inconsistents << new_state if new_state.inconsistent?
|
144
|
+
unvisited << new_state
|
104
145
|
index = @states.length - 1
|
105
146
|
end
|
106
147
|
visiting[symbol] << ShiftAction.new(index, false)
|
@@ -109,101 +150,124 @@ module Aurum
|
|
109
150
|
end
|
110
151
|
end
|
111
152
|
end
|
153
|
+
|
154
|
+
def goto items, symbol
|
155
|
+
result = []
|
156
|
+
for item in items
|
157
|
+
result << LRItem.new(item.production, item.position + 1) if symbol == item.dot_symbol
|
158
|
+
end
|
159
|
+
State.new closure(result)
|
160
|
+
end
|
161
|
+
# END LR(0) automata construction
|
112
162
|
|
163
|
+
# BEGIN lookahead computation
|
113
164
|
def compute_LALR_1_lookahead
|
114
|
-
|
165
|
+
@lookahead_level, @conflicts = 1, []
|
115
166
|
@lookahead_config_stack, @lookahead_indicitor, @lookahead_result = [], {}, {}
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
end
|
129
|
-
end
|
130
|
-
end
|
131
|
-
end
|
167
|
+
handle_each @inconsistents do |state, handle|
|
168
|
+
lookahead_each(state, handle.production) do |production, lookahead|
|
169
|
+
if state.only_shift?(lookahead) && @precedence_table.operator?(lookahead)
|
170
|
+
if @precedence_table.compare(production.operator, lookahead) >= 0
|
171
|
+
state[lookahead].clear
|
172
|
+
state[lookahead] << ReduceAction.new(production.index, false)
|
173
|
+
end
|
174
|
+
else
|
175
|
+
state[lookahead] << ReduceAction.new(production.index, false)
|
176
|
+
end
|
177
|
+
end
|
178
|
+
@conflicts << state if state.conflict?
|
132
179
|
end
|
133
180
|
end
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
181
|
+
|
182
|
+
def handle_each states
|
183
|
+
states.each {|state| state.handles.each {|handle| yield state, handle}}
|
184
|
+
end
|
185
|
+
|
186
|
+
def lookahead_each state, production
|
187
|
+
for predsucceor in state.predsucceors production.symbols.reverse
|
188
|
+
config = Configuration.new predsucceor, production.nonterminal
|
189
|
+
(@lookahead_indicitor[config] ? @lookahead_result[config] : compute_follow_set(config)).each {|x| yield production, x}
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
def compute_follow_set config
|
194
|
+
state, nonterminal = config.state, config.symbol
|
195
|
+
if START == nonterminal
|
196
|
+
@lookahead_indicitor[config] = 65535
|
197
|
+
@lookahead_result[config] = [EOF]
|
198
|
+
else
|
199
|
+
@lookahead_config_stack.push config
|
200
|
+
@lookahead_indicitor[config] = (d = @lookahead_config_stack.length)
|
201
|
+
@lookahead_result[config] = read_set state, nonterminal
|
202
|
+
each_included_by state, nonterminal do |new_config|
|
203
|
+
compute_follow_set new_config unless @lookahead_indicitor[new_config]
|
204
|
+
@lookahead_indicitor[config] = [@lookahead_indicitor[config], @lookahead_indicitor[new_config]].min
|
205
|
+
@lookahead_result[config] |= @lookahead_result[new_config]
|
206
|
+
end
|
207
|
+
connected = nil
|
208
|
+
until connected == config
|
209
|
+
connected = @lookahead_config_stack.pop
|
210
|
+
@lookahead_result[connected] = @lookahead_result[config].dup
|
211
|
+
@lookahead_indicitor[connected] = 65535
|
212
|
+
end if @lookahead_indicitor[config] == d
|
156
213
|
end
|
214
|
+
@lookahead_result[config]
|
157
215
|
end
|
158
|
-
|
159
|
-
def
|
160
|
-
result
|
161
|
-
|
162
|
-
visiting = unmarked.pop
|
163
|
-
for production in @definition[visiting.dot_symbol] do
|
164
|
-
item = LRItem.new production, 0
|
165
|
-
[result, unmarked].each {|x| x << item} unless result.include? item
|
166
|
-
end unless visiting.handle? || visiting.dot_symbol.is_terminal
|
167
|
-
end
|
216
|
+
|
217
|
+
def read_set state, symbol
|
218
|
+
result = []
|
219
|
+
each_read_by(state, symbol) {|q, y| result |= @first_sets[y] }
|
168
220
|
result
|
169
221
|
end
|
170
222
|
|
171
|
-
def
|
172
|
-
|
173
|
-
|
174
|
-
|
223
|
+
def each_read_by state, symbol
|
224
|
+
index = state.goto symbol
|
225
|
+
for item in @states[index]
|
226
|
+
for symbol in item.second_part
|
227
|
+
yield state, symbol
|
228
|
+
nullable? [symbol] or break
|
229
|
+
end
|
230
|
+
end if index
|
175
231
|
end
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
@
|
191
|
-
|
192
|
-
|
193
|
-
compute_follow_set new_config unless @lookahead_indicitor[new_config]
|
194
|
-
@lookahead_indicitor[config] = [@lookahead_indicitor[config], @lookahead_indicitor[new_config]].min
|
195
|
-
@lookahead_result[config] |= @lookahead_result[new_config]
|
232
|
+
|
233
|
+
def each_included_by state, nonterminal
|
234
|
+
for item in state
|
235
|
+
if item.dot_symbol == nonterminal && nullable?(item.second_part[1..-1])
|
236
|
+
first_part = item.production.symbols.slice 0, item.position
|
237
|
+
predsucceors = state.predsucceors first_part.reverse
|
238
|
+
predsucceors.each {|s| yield Configuration.new(s, item.production.nonterminal) }
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
def compute_LALR_n_lookahead
|
244
|
+
@stack_seen = []
|
245
|
+
@conflicts.each do |state|
|
246
|
+
@current_lookahead_level = 1
|
247
|
+
for lookahead, actions in state.conflicted_actions do
|
248
|
+
resolve_conficts state, lookahead, sources_of(state, lookahead, actions)
|
196
249
|
end
|
197
|
-
|
198
|
-
until connected == config
|
199
|
-
connected = @lookahead_config_stack.pop
|
200
|
-
@lookahead_result[connected] = @lookahead_result[config].dup
|
201
|
-
@lookahead_indicitor[connected] = 65535
|
202
|
-
end if @lookahead_indicitor[config] == d
|
250
|
+
@lookahead_level = [@current_lookahead_level, @lookahead_level].max
|
203
251
|
end
|
204
|
-
@lookahead_result[config]
|
205
252
|
end
|
206
|
-
|
253
|
+
|
254
|
+
def sources_of state, lookahead, actions
|
255
|
+
sources = {}
|
256
|
+
for action in actions do
|
257
|
+
if action.kind_of? ShiftAction
|
258
|
+
sources[action] = [[state]].to_set
|
259
|
+
else
|
260
|
+
handle = @productions[action.handle]
|
261
|
+
sources[action] = action.is_read_reduce ? [[state]].to_set : [].to_set
|
262
|
+
for predsucceor in state.predsucceors handle.symbols.reverse
|
263
|
+
@follow_sources_visited = []
|
264
|
+
sources[action].merge follow_sources([predsucceor], handle.nonterminal, lookahead)
|
265
|
+
end
|
266
|
+
end
|
267
|
+
end
|
268
|
+
sources
|
269
|
+
end
|
270
|
+
|
207
271
|
def follow_sources stack, nonterminal, lookahead
|
208
272
|
top = stack.last
|
209
273
|
if stack.length == 1
|
@@ -217,15 +281,17 @@ module Aurum
|
|
217
281
|
stacks = [stack + [q]].to_set if q.direct_read.include?(lookahead)
|
218
282
|
end
|
219
283
|
each_read_by(top, nonterminal) {|q, y| stacks |= follow_sources stack+[q], y, lookahead unless y.is_terminal}
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
284
|
+
top.kernels.each do |item|
|
285
|
+
if !item.is_start && item.dot_symbol == nonterminal
|
286
|
+
c = item.production.nonterminal
|
287
|
+
if item.position < stack.length
|
288
|
+
stacks |= follow_sources stack.slice(0..-item.position-1), c, lookahead
|
289
|
+
else
|
290
|
+
first_part = item.production.symbols.slice 0..-stack.length-1
|
291
|
+
stack[0].predsucceors(first_part).reverse.each {|q| stacks |= follow_sources [q], c, lookahead }
|
292
|
+
end
|
293
|
+
end
|
294
|
+
end
|
229
295
|
stacks
|
230
296
|
end
|
231
297
|
|
@@ -237,7 +303,7 @@ module Aurum
|
|
237
303
|
for stk in stacks
|
238
304
|
raise 'not LALR(n)' if @stack_seen.include? stk
|
239
305
|
@stack_seen << stk
|
240
|
-
for a in next_lookaheads stk, lookahead
|
306
|
+
for a in next_lookaheads stk, lookahead
|
241
307
|
lookahead_state[a] << action
|
242
308
|
end
|
243
309
|
end
|
@@ -246,7 +312,7 @@ module Aurum
|
|
246
312
|
new_sources = {}
|
247
313
|
for action in actions do
|
248
314
|
new_sources[action] = [].to_set
|
249
|
-
for stk in
|
315
|
+
for stk in sources[action] do
|
250
316
|
@follow_sources_visited = []
|
251
317
|
new_sources[action] |= follow_sources stk, lookahead, next_lookahead
|
252
318
|
end
|
@@ -259,53 +325,28 @@ module Aurum
|
|
259
325
|
EOF == lookahead and return [EOF]
|
260
326
|
top = stack.last
|
261
327
|
lookaheads = read_set top, lookahead
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
328
|
+
top.kernels.each do |item|
|
329
|
+
if !item.is_start && item.dot_symbol == lookahead
|
330
|
+
c = item.production.nonterminal
|
331
|
+
if item.position < stack.length
|
332
|
+
lookaheads |= next_lookaheads stack.slice(0..-item.position-1), c
|
333
|
+
else
|
334
|
+
first_part = item.production.symbols.slice 0..-stack.length-1
|
335
|
+
stack[0].predsucceors(first_part).reverse.each {|q| lookaheads |= compute_follow_set Configuration.new(q, c) }
|
336
|
+
end
|
337
|
+
end
|
270
338
|
end
|
271
339
|
lookaheads
|
272
340
|
end
|
273
341
|
|
274
|
-
def read_set state, symbol
|
275
|
-
result = []
|
276
|
-
each_read_by(state, symbol) {|q, y| result |= @first_sets[y] }
|
277
|
-
result
|
278
|
-
end
|
279
|
-
|
280
|
-
def each_read_by state, symbol
|
281
|
-
index = state.goto symbol
|
282
|
-
for item in @states[index]
|
283
|
-
for symbol in item.second_part
|
284
|
-
yield state, symbol
|
285
|
-
nullable? symbol or break
|
286
|
-
end
|
287
|
-
end if index
|
288
|
-
end
|
289
|
-
|
290
|
-
def each_included_by state, nonterminal
|
291
|
-
for item in state
|
292
|
-
symbols = item.production.symbols
|
293
|
-
symbols.reverse.each_with_index do |symbol, index|
|
294
|
-
first_part = symbols.slice 0, symbols.length - index - 1
|
295
|
-
state.predsucceors(first_part.reverse).each {|s| yield s, item.production.nonterminal} if nonterminal == symbol
|
296
|
-
nullable? symbol or break
|
297
|
-
end
|
298
|
-
end if state
|
299
|
-
end
|
300
|
-
|
301
342
|
def default_action state
|
302
343
|
if !state.empty?
|
303
344
|
handle = nil
|
304
345
|
for x in state
|
305
346
|
p = x.production
|
306
|
-
handle = x if x.
|
347
|
+
handle = x if x.is_handle && p.nonterminal != START && (!handle || handle.production.symbols.length > p.symbols.length)
|
307
348
|
end
|
308
|
-
default_action = handle ? ReduceAction.new(
|
349
|
+
default_action = handle ? ReduceAction.new(handle.production.index, false) : nil
|
309
350
|
else
|
310
351
|
candidates = state.actions.values.inject [] do |candidates, actions|
|
311
352
|
candidates |= actions.find_all {|x| x.kind_of?(Aurum::ReduceAction) && !x.is_read_reduce }
|
@@ -346,30 +387,31 @@ module Aurum
|
|
346
387
|
end
|
347
388
|
|
348
389
|
class State < Array
|
349
|
-
|
390
|
+
attr_reader :actions, :handles, :non_handles, :kernels, :read_reduce, :direct_read
|
350
391
|
def initialize elements
|
351
392
|
super elements
|
352
|
-
|
393
|
+
@actions, @predsucceors, @read_reduce = {}, [], nil
|
394
|
+
@handles, @non_handles, @kernels, @direct_read = [], [], [], [].to_set
|
395
|
+
for item in elements
|
396
|
+
(item.is_handle ? @handles : @non_handles) << item
|
397
|
+
@kernels << item if item.is_kernel
|
398
|
+
@direct_read << item.dot_symbol if item.dot_symbol
|
399
|
+
end
|
400
|
+
@read_reduce = first.production if size == 1 && first.is_handle
|
353
401
|
end
|
354
402
|
|
355
403
|
def [] symbol
|
356
404
|
@actions[symbol] = Set.new([]) unless @actions.has_key? symbol
|
357
405
|
@actions[symbol]
|
358
406
|
end
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
true
|
368
|
-
end
|
369
|
-
|
370
|
-
def conflicted?
|
371
|
-
!consistent? && @actions.any? {|symbol, actions| actions.length > 1}
|
372
|
-
end
|
407
|
+
|
408
|
+
def inconsistent?
|
409
|
+
@handles.size > 1 || (@handles.size == 1 && @kernels.size != 1)
|
410
|
+
end
|
411
|
+
|
412
|
+
def conflict?
|
413
|
+
inconsistent? && @actions.any? {|symbol, actions| actions.length > 1}
|
414
|
+
end
|
373
415
|
|
374
416
|
def conflicted_actions
|
375
417
|
@actions.find_all {|symbol, actions| actions.length > 1}
|
@@ -379,10 +421,6 @@ module Aurum
|
|
379
421
|
!self[symbol].empty? && @actions[symbol].all? {|x| x.kind_of? ShiftAction}
|
380
422
|
end
|
381
423
|
|
382
|
-
def read_reduce
|
383
|
-
length == 1 && first.handle? ? first.production : nil
|
384
|
-
end
|
385
|
-
|
386
424
|
def goto symbol
|
387
425
|
shift = self[symbol].find {|x| x.kind_of? Aurum::ShiftAction }
|
388
426
|
shift.state if shift
|
@@ -392,54 +430,35 @@ module Aurum
|
|
392
430
|
symbols or return @predsucceors
|
393
431
|
result = [self]
|
394
432
|
for symbol in symbols
|
395
|
-
|
396
|
-
|
397
|
-
|
433
|
+
new_result = []
|
434
|
+
for x in result
|
435
|
+
new_result |= x.predsucceors.find_all {|predsucceor| predsucceor.any? {|item| item.dot_symbol == symbol}}
|
436
|
+
end
|
398
437
|
result.replace new_result
|
399
438
|
end
|
400
439
|
result
|
401
440
|
end
|
402
|
-
|
403
|
-
def direct_read
|
404
|
-
inject [] do |result, item|
|
405
|
-
item.dot_symbol ? result | [item.dot_symbol] : result
|
406
|
-
end
|
407
|
-
end
|
408
|
-
|
441
|
+
|
409
442
|
def == other
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
all? {|x| other.include? x}
|
443
|
+
return false unless other.kind_of?(State) && (@kernels.size == other.kernels.size)
|
444
|
+
return true if equal? other
|
445
|
+
@kernels.all? {|x| other.kernels.include? x}
|
414
446
|
end
|
415
447
|
end
|
416
448
|
|
417
449
|
LRItem, Configuration = Struct.new(:production, :position), Struct.new(:state, :symbol)
|
418
450
|
|
419
451
|
LRItem.class_eval do
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
end
|
431
|
-
|
432
|
-
def kernel?
|
433
|
-
handle? || position != 0
|
434
|
-
end
|
435
|
-
|
436
|
-
def first_part
|
437
|
-
production.symbols.slice(0, position)
|
438
|
-
end
|
439
|
-
|
440
|
-
def second_part
|
441
|
-
handle? ? [] : production.symbols.slice(position..-1)
|
442
|
-
end
|
452
|
+
attr_reader :dot_symbol, :second_part, :is_handle, :is_kernel, :is_start
|
453
|
+
|
454
|
+
def initialize production, position
|
455
|
+
super production, position
|
456
|
+
@dot_symbol = production.symbols[position]
|
457
|
+
@is_handle = position >= production.symbols.length
|
458
|
+
@is_kernel = @is_handle || (position != 0)
|
459
|
+
@is_start = production.nonterminal == START
|
460
|
+
@second_part = @is_handle ? [] : production.symbols.slice(position..-1)
|
461
|
+
end
|
443
462
|
end
|
444
463
|
end
|
445
464
|
end
|
data/test/engine/lexer_test.rb
CHANGED
@@ -43,6 +43,13 @@ class LexerTest < Test::Unit::TestCase
|
|
43
43
|
lexer = create_lexer specification, ''
|
44
44
|
assert_equal terminal('$eof'), lexer.next_symbol
|
45
45
|
end
|
46
|
+
|
47
|
+
def test_should_return_unknown_if_not_recognized
|
48
|
+
specification = {:initial => {PATTERN_A => Aurum::RecognizeTokenAction.new('tokenA'),
|
49
|
+
PATTERN_B => Aurum::IgnoreAction}}
|
50
|
+
lexer = create_lexer specification, 'whatever'
|
51
|
+
assert_equal terminal('$unknown'), lexer.next_symbol
|
52
|
+
end
|
46
53
|
|
47
54
|
def create_lexer specification, input
|
48
55
|
generator = Aurum::LexicalTableGenerator.new specification
|
@@ -8,7 +8,7 @@ class CharacterClassDefinitionTest < Test::Unit::TestCase
|
|
8
8
|
|
9
9
|
def test_should_add_char_class_to_definition
|
10
10
|
@character_class.instance_eval do
|
11
|
-
alpha range(?a, ?z) +
|
11
|
+
alpha range(?a, ?z) + enum('ABCDEF')
|
12
12
|
end
|
13
13
|
assert_equal 1, @character_class.definitions.size
|
14
14
|
alpha = @character_class.definitions[:alpha]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__) + '/../')
|
2
2
|
require 'test_helper'
|
3
|
-
|
3
|
+
=begin
|
4
4
|
class ExpressionGrammar < Aurum::Grammar
|
5
5
|
character_classes do
|
6
6
|
number range(?0, ?9)
|
@@ -8,7 +8,7 @@ class ExpressionGrammar < Aurum::Grammar
|
|
8
8
|
|
9
9
|
tokens do
|
10
10
|
ignore string(' ').one_or_more
|
11
|
-
_number
|
11
|
+
_number number.one_or_more
|
12
12
|
end
|
13
13
|
|
14
14
|
precedences do
|
@@ -51,4 +51,5 @@ class GrammarDefinitionTest < Test::Unit::TestCase
|
|
51
51
|
assert_equal token, symbol
|
52
52
|
assert_equal lexeme, symbol.value
|
53
53
|
end
|
54
|
-
end
|
54
|
+
end
|
55
|
+
=end
|
@@ -85,6 +85,12 @@ class PatternTest < Test::Unit::TestCase
|
|
85
85
|
assert match?('comments */', pattern)
|
86
86
|
assert !match?('everything', pattern)
|
87
87
|
end
|
88
|
+
|
89
|
+
def test_should_match_every_char_in_enum_literal
|
90
|
+
pattern = Aurum::Pattern.from_enum('*/')
|
91
|
+
assert match?('*', pattern)
|
92
|
+
assert match?('/', pattern)
|
93
|
+
end
|
88
94
|
|
89
95
|
def match? expected_string, pattern
|
90
96
|
states = closure pattern.automata.table, [0]
|
@@ -112,7 +112,7 @@ class LexicalTableGeneratorTest < Test::Unit::TestCase
|
|
112
112
|
assert recognize?(:state_a, 'pattern_b')
|
113
113
|
assert !recognize?(:all, 'pattern_b')
|
114
114
|
end
|
115
|
-
|
115
|
+
|
116
116
|
def recognize? lexical_state, source
|
117
117
|
begin
|
118
118
|
lexical_state = - @lexical_states.index(lexical_state) - 1
|
@@ -7,7 +7,7 @@ Aurum::ParsingTableGenerator.class_eval do
|
|
7
7
|
end
|
8
8
|
|
9
9
|
class AugmentedGrammarTest < Test::Unit::TestCase
|
10
|
-
def
|
10
|
+
def test_should_find_all_used_symbols
|
11
11
|
generator = parser_generator E=>[production(E, T)], T=>[production(T, F)], F=>[production(F, ID)]
|
12
12
|
generator.start_from E
|
13
13
|
assert_equal [E, T, F, ID], generator.symbols
|
@@ -15,7 +15,7 @@ class AugmentedGrammarTest < Test::Unit::TestCase
|
|
15
15
|
assert_equal [T, F, ID], generator.symbols
|
16
16
|
end
|
17
17
|
|
18
|
-
def
|
18
|
+
def test_should_find_all_used_productions
|
19
19
|
generator = parser_generator E=>[production(E, T)], T=>[production(T, F)], F=>[production(F, ID)]
|
20
20
|
generator.start_from E
|
21
21
|
assert_equal [production(START, E), production(E, T), production(T, F), production(F, ID)].to_set, generator.productions.to_set
|
@@ -31,13 +31,13 @@ class AugmentedGrammarTest < Test::Unit::TestCase
|
|
31
31
|
assert_equal [T].to_set, generator.nullables.to_set
|
32
32
|
end
|
33
33
|
|
34
|
-
def
|
34
|
+
def test_first_set_should_contain_terminals_left_depends_on_nt_dirctly
|
35
35
|
generator = parser_generator E=>[production(E, T, ID), production(E, T, T, T, terminal('other'))], T=>[production(T)]
|
36
36
|
generator.start_from E
|
37
37
|
assert_equal [ID, terminal('other')].to_set, generator.first_sets[E].to_set
|
38
38
|
end
|
39
39
|
|
40
|
-
def
|
40
|
+
def test_should_contain_fist_set_of_nt_which_left_depends_on_nt_dirctly
|
41
41
|
generator = parser_generator E=>[production(E, T, ID), production(E, T, T, T, terminal('other'))], T=>[production(T)], F=>[production(F, T, E)]
|
42
42
|
generator.start_from F
|
43
43
|
assert_equal generator.first_sets[F].to_set, generator.first_sets[E].to_set
|
@@ -11,7 +11,7 @@ class LALRLookaheadComputationTest < Test::Unit::TestCase
|
|
11
11
|
generator = parser_generator EXPRESSION_GRAMMAR_LALR1
|
12
12
|
generator.start_from E
|
13
13
|
generator.construct_LR0_automata
|
14
|
-
states = generator.states.find_all {|x|
|
14
|
+
states = generator.states.find_all {|x| x.inconsistent?}
|
15
15
|
generator.compute_LALR_1_lookahead
|
16
16
|
assert_equal [reduce(0)].to_set, states[0][terminal('$eof')]
|
17
17
|
assert_equal [reduce(2)].to_set, states[1][terminal('+')]
|
@@ -25,10 +25,10 @@ class LALRLookaheadComputationTest < Test::Unit::TestCase
|
|
25
25
|
generator.start_from BNF
|
26
26
|
generator.construct_LR0_automata
|
27
27
|
generator.compute_LALR_1_lookahead
|
28
|
-
conflicted_state = (generator.states.find_all {|x| x.
|
28
|
+
conflicted_state = (generator.states.find_all {|x| x.conflict? })[0]
|
29
29
|
generator.compute_LALR_n_lookahead
|
30
30
|
assert_equal [lookahead_shift(generator.states.length - 1)].to_set, conflicted_state[terminal('s')]
|
31
|
-
assert !conflicted_state.
|
31
|
+
assert !conflicted_state.conflict?
|
32
32
|
end
|
33
33
|
|
34
34
|
def test_should_add_reduce_action_to_lookahead_state
|
@@ -62,6 +62,9 @@ class LALRLookaheadComputationTest < Test::Unit::TestCase
|
|
62
62
|
assert_equal 1, level
|
63
63
|
generator = parser_generator IF_GRAMMAR_LALR2
|
64
64
|
table, level = generator.start_from(STATEMENT).parsing_table
|
65
|
+
assert_equal 2, level
|
66
|
+
generator = parser_generator BNF_GRAMMAR_LALR2
|
67
|
+
table, level = generator.start_from(BNF).parsing_table
|
65
68
|
assert_equal 2, level
|
66
69
|
end
|
67
70
|
|
@@ -6,7 +6,7 @@ Aurum::ParsingTableGenerator.class_eval do
|
|
6
6
|
public :closure, :goto, :read_set, :construct_LR0_automata
|
7
7
|
end
|
8
8
|
|
9
|
-
class LR0AutomataTest < Test::Unit::TestCase
|
9
|
+
class LR0AutomataTest < Test::Unit::TestCase
|
10
10
|
def test_closure_should_contain_items_themselves
|
11
11
|
generator = parser_generator E=>[production(E, ID)]
|
12
12
|
assert_equal [LR_item(0, E, ID)], generator.closure([LR_item(0, E, ID)])
|
@@ -14,17 +14,20 @@ class LR0AutomataTest < Test::Unit::TestCase
|
|
14
14
|
|
15
15
|
def test_closure_should_contain_all_right_most_lr_items_of_dot_symbol
|
16
16
|
generator = parser_generator E=>[production(E, T)], T=>[production(T, ID), production(T, terminal('other'))]
|
17
|
+
generator.start_from E
|
17
18
|
closure = generator.closure [LR_item(0, E, T)]
|
18
19
|
[LR_item(0, T, ID), LR_item(0, T, terminal('other'))].each {|x| assert closure.include?(x)}
|
19
20
|
end
|
20
21
|
|
21
22
|
def test_should_return_goto_items_if_expected_symbol_given
|
22
23
|
generator = parser_generator E=>[production(E, T)], T=>[production(T, ID), production(T, terminal('other'))]
|
24
|
+
generator.start_from E
|
23
25
|
assert_equal [LR_item(1, E, T)], generator.goto([LR_item(0, E, T)], T)
|
24
26
|
end
|
25
27
|
|
26
28
|
def test_goto_items_should_be_closured_if_expected_symbol_given
|
27
29
|
generator = parser_generator E=>[production(E, T, T)], T=>[production(T, ID), production(T, terminal('other'))]
|
30
|
+
generator.start_from E
|
28
31
|
goto = generator.goto [LR_item(0, E, T, T)], T
|
29
32
|
[LR_item(0, T, ID), LR_item(0, T, terminal('other'))].each {|x| assert goto.include?(x)}
|
30
33
|
end
|
@@ -39,7 +42,7 @@ class LR0AutomataTest < Test::Unit::TestCase
|
|
39
42
|
assert [LR_item(1, T, terminal('+'), T)], states[1]
|
40
43
|
assert [LR_item(2, T, terminal('+'), T), LR_item(0, T, ID)], states[2]
|
41
44
|
end
|
42
|
-
|
45
|
+
|
43
46
|
def test_should_add_shift_action_to_states
|
44
47
|
generator = parser_generator SIMPLE_GRAMMAR_LR0
|
45
48
|
generator.start_from E
|
@@ -54,7 +57,7 @@ class LR0AutomataTest < Test::Unit::TestCase
|
|
54
57
|
generator.start_from E
|
55
58
|
generator.construct_LR0_automata
|
56
59
|
states = generator.states
|
57
|
-
|
60
|
+
assert_equal [read_reduce(0)].to_set, states[0][E]
|
58
61
|
assert_equal [read_reduce(2)].to_set, states[0][ID]
|
59
62
|
assert_equal [read_reduce(2)].to_set, states[2][ID]
|
60
63
|
assert_equal [read_reduce(1)].to_set, states[2][T]
|
@@ -87,5 +90,5 @@ class LR0AutomataTest < Test::Unit::TestCase
|
|
87
90
|
generator.construct_LR0_automata
|
88
91
|
states = generator.states
|
89
92
|
assert_equal [terminal('id'), terminal('(')].to_set, generator.read_set(states[2], terminal('*')).to_set
|
90
|
-
end
|
93
|
+
end
|
91
94
|
end
|
@@ -8,23 +8,17 @@ class LRItemTest < Test::Unit::TestCase
|
|
8
8
|
end
|
9
9
|
|
10
10
|
def test_should_be_handle_if_at_right_most_position
|
11
|
-
assert LR_item(2, E, T, ID).
|
12
|
-
assert !LR_item(1, E, T, ID).
|
13
|
-
assert LR_item(0, E).
|
11
|
+
assert LR_item(2, E, T, ID).is_handle
|
12
|
+
assert !LR_item(1, E, T, ID).is_handle
|
13
|
+
assert LR_item(0, E).is_handle
|
14
14
|
end
|
15
15
|
|
16
16
|
def test_should_be_kernel_if_not_at_left_most_position
|
17
|
-
assert LR_item(2, E, T, ID).
|
18
|
-
assert LR_item(1, E, T, ID).
|
19
|
-
assert !LR_item(0, E, T, ID).
|
17
|
+
assert LR_item(2, E, T, ID).is_kernel
|
18
|
+
assert LR_item(1, E, T, ID).is_kernel
|
19
|
+
assert !LR_item(0, E, T, ID).is_kernel
|
20
20
|
end
|
21
21
|
|
22
|
-
def test_should_return_zero_to_position_as_first_part
|
23
|
-
assert_equal [], LR_item(0, E, T, ID).first_part
|
24
|
-
assert_equal [T], LR_item(1, E, T, ID).first_part
|
25
|
-
assert_equal [T, ID], LR_item(2, E, T, ID).first_part
|
26
|
-
end
|
27
|
-
|
28
22
|
def test_should_return_position_to_end_as_second_part
|
29
23
|
assert_equal [T, ID], LR_item(0, E, T, ID).second_part
|
30
24
|
assert_equal [ID], LR_item(1, E, T, ID).second_part
|
@@ -9,7 +9,7 @@ class ParsingTableStateTest < Test::Unit::TestCase
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def test_should_be_read_reducable_if_contains_one_and_only_one_handle
|
12
|
-
assert parsing_table_state(LR_item(1, E,
|
12
|
+
assert parsing_table_state(LR_item(1, E, ID)).read_reduce
|
13
13
|
assert !parsing_table_state(LR_item(1, E, T, T)).read_reduce
|
14
14
|
assert !parsing_table_state(LR_item(1, E, T, T), LR_item(1, E, T)).read_reduce
|
15
15
|
end
|
@@ -22,18 +22,18 @@ class ParsingTableStateTest < Test::Unit::TestCase
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def test_should_be_consistent_if_contains_one_and_only_one_handle
|
25
|
-
assert parsing_table_state(LR_item(1, E, T)).
|
25
|
+
assert !parsing_table_state(LR_item(1, E, T)).inconsistent?
|
26
26
|
end
|
27
27
|
|
28
28
|
def test_should_be_consistent_if_contains_no_handle
|
29
|
-
assert parsing_table_state(LR_item(1, E, T, ID), LR_item(1, E, F, ID), LR_item(1, E, ID, F)).
|
29
|
+
assert !parsing_table_state(LR_item(1, E, T, ID), LR_item(1, E, F, ID), LR_item(1, E, ID, F)).inconsistent?
|
30
30
|
end
|
31
31
|
|
32
32
|
def test_should_be_inconsistent_if_contains_handle_and_other_kernal_items
|
33
|
-
assert
|
33
|
+
assert parsing_table_state(LR_item(1, E, T), LR_item(1, E, T, ID)).inconsistent?
|
34
34
|
end
|
35
35
|
|
36
36
|
def test_should_return_all_direct_read_symbols
|
37
|
-
|
37
|
+
assert_equal [T].to_set, parsing_table_state(LR_item(1, E, T, T), LR_item(1, E, T)).direct_read
|
38
38
|
end
|
39
39
|
end
|
data/test/test_helper.rb
CHANGED
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.2
|
|
3
3
|
specification_version: 1
|
4
4
|
name: aurum
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.1.
|
7
|
-
date: 2007-05-
|
6
|
+
version: 0.1.1
|
7
|
+
date: 2007-05-26 00:00:00 +10:00
|
8
8
|
summary: Aurum is a LALR(n) parser generator written in Ruby.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -31,37 +31,41 @@ authors:
|
|
31
31
|
files:
|
32
32
|
- example/expression
|
33
33
|
- example/expression/expression.rb
|
34
|
+
- example/expression/lisp.rb
|
34
35
|
- lib/aurum
|
35
36
|
- lib/aurum/lexical_table_generator.rb
|
36
37
|
- lib/aurum/parsing_table_generator.rb
|
37
|
-
- lib/aurum/grammar.rb
|
38
38
|
- lib/aurum/engine.rb
|
39
|
+
- lib/aurum/grammar.rb
|
40
|
+
- lib/grammars
|
39
41
|
- lib/aurum.rb
|
40
42
|
- test/parsing_table_generator
|
41
43
|
- test/parsing_table_generator/augmented_grammar_test.rb
|
42
|
-
- test/parsing_table_generator/
|
44
|
+
- test/parsing_table_generator/precedence_table_test.rb
|
43
45
|
- test/parsing_table_generator/production_test.rb
|
46
|
+
- test/parsing_table_generator/lr_0_automata_test.rb
|
47
|
+
- test/parsing_table_generator/lalr_n_computation_test.rb
|
44
48
|
- test/parsing_table_generator/parsing_table_state_test.rb
|
45
49
|
- test/parsing_table_generator/lr_item_test.rb
|
46
|
-
- test/
|
47
|
-
- test/
|
50
|
+
- test/engine
|
51
|
+
- test/engine/lexer_test.rb
|
52
|
+
- test/engine/semantic_attributes_test.rb
|
53
|
+
- test/grammars
|
54
|
+
- test/grammars/ruby
|
48
55
|
- test/grammar_definition
|
49
56
|
- test/grammar_definition/grammar_definition_test.rb
|
57
|
+
- test/grammar_definition/production_definition_test.rb
|
58
|
+
- test/grammar_definition/operator_precedence_definition_test.rb
|
50
59
|
- test/grammar_definition/character_class_definition_test.rb
|
51
60
|
- test/grammar_definition/lexical_definition_test.rb
|
52
|
-
- test/grammar_definition/operator_precedence_definition_test.rb
|
53
|
-
- test/grammar_definition/production_definition_test.rb
|
54
61
|
- test/lexical_table_generator
|
55
62
|
- test/lexical_table_generator/interval_test.rb
|
63
|
+
- test/lexical_table_generator/subset_determinizer_test.rb
|
56
64
|
- test/lexical_table_generator/character_set_test.rb
|
57
65
|
- test/lexical_table_generator/automata_test.rb
|
58
66
|
- test/lexical_table_generator/pattern_test.rb
|
59
|
-
- test/lexical_table_generator/subset_determinizer_test.rb
|
60
67
|
- test/lexical_table_generator/table_generator_test.rb
|
61
68
|
- test/test_helper.rb
|
62
|
-
- test/engine
|
63
|
-
- test/engine/semantic_attributes_test.rb
|
64
|
-
- test/engine/lexer_test.rb
|
65
69
|
test_files: []
|
66
70
|
|
67
71
|
rdoc_options: []
|