aurum 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/example/expression/expression.rb +8 -2
- data/example/expression/lisp.rb +26 -0
- data/lib/aurum/engine.rb +15 -13
- data/lib/aurum/grammar.rb +37 -45
- data/lib/aurum/lexical_table_generator.rb +7 -1
- data/lib/aurum/parsing_table_generator.rb +254 -235
- data/test/engine/lexer_test.rb +7 -0
- data/test/grammar_definition/character_class_definition_test.rb +1 -1
- data/test/grammar_definition/grammar_definition_test.rb +4 -3
- data/test/lexical_table_generator/pattern_test.rb +6 -0
- data/test/lexical_table_generator/table_generator_test.rb +1 -1
- data/test/parsing_table_generator/augmented_grammar_test.rb +4 -4
- data/test/parsing_table_generator/lalr_n_computation_test.rb +6 -3
- data/test/parsing_table_generator/lr_0_automata_test.rb +7 -4
- data/test/parsing_table_generator/lr_item_test.rb +6 -12
- data/test/parsing_table_generator/parsing_table_state_test.rb +5 -5
- data/test/test_helper.rb +2 -2
- metadata +16 -12
@@ -19,11 +19,17 @@ class ExpressionGrammar < Aurum::Grammar
|
|
19
19
|
expression expression, '/', expression {expression.value = expression1.value / expression2.value}
|
20
20
|
expression '(', expression, ')'
|
21
21
|
expression _number {expression.value = _number.value.to_i}
|
22
|
-
|
22
|
+
expression '+', _number {expression.value = _number.value.to_i}
|
23
|
+
expression '-', _number {expression.value = -_number.value.to_i}
|
24
|
+
end
|
23
25
|
end
|
24
26
|
|
25
27
|
puts ExpressionGrammar.parse_expression('1 + 2').value
|
28
|
+
puts ExpressionGrammar.parse_expression('(1 + 2)').value
|
29
|
+
puts ExpressionGrammar.parse_expression('-1 - 2').value
|
26
30
|
puts ExpressionGrammar.parse_expression('1 + 2 * 3').value
|
27
31
|
puts ExpressionGrammar.parse_expression('1 * 2 + 3').value
|
28
32
|
puts ExpressionGrammar.parse_expression('1 * (2 + 3)').value
|
29
|
-
puts ExpressionGrammar.parse_expression('1 + (2 + 3) * 4').value
|
33
|
+
puts ExpressionGrammar.parse_expression('1 + (2 + 3) * 4').value
|
34
|
+
puts ExpressionGrammar.parse_expression('1 * - 5').value
|
35
|
+
puts ExpressionGrammar.parse_expression('(1+3) - - 5').value
|
@@ -0,0 +1,26 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__) + '/../../lib')
|
2
|
+
require 'aurum'
|
3
|
+
|
4
|
+
class LispGrammar < Aurum::Grammar
|
5
|
+
tokens do
|
6
|
+
ignore string(' ').one_or_more
|
7
|
+
_number range(?0, ?9).one_or_more
|
8
|
+
end
|
9
|
+
|
10
|
+
productions do
|
11
|
+
expression '(', tuple, ')'
|
12
|
+
tuple '+', atom, atom {tuple.value = atom1.value + atom2.value}
|
13
|
+
tuple tuple, atom {tuple.value = tuple1.value + atom.value}
|
14
|
+
atom _number {atom.value = _number.value.to_i}
|
15
|
+
atom expression
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
puts LispGrammar.parse_expression('(+ 1 3 4) ').value
|
20
|
+
#puts LispGrammar.parse_expression('-1 - 2').value
|
21
|
+
#puts LispGrammar.parse_expression('1 + 2 * 3').value
|
22
|
+
#puts LispGrammar.parse_expression('1 * 2 + 3').value
|
23
|
+
#puts LispGrammar.parse_expression('1 * (2 + 3)').value
|
24
|
+
#puts LispGrammar.parse_expression('1 + (2 + 3) * 4').value
|
25
|
+
#puts LispGrammar.parse_expression('1 * - 5').value
|
26
|
+
#puts LispGrammar.parse_expression('(1+3) - - 5').value
|
data/lib/aurum/engine.rb
CHANGED
@@ -27,7 +27,7 @@ module Aurum
|
|
27
27
|
end
|
28
28
|
|
29
29
|
Symbol.class_eval {attr_accessor :value}
|
30
|
-
|
30
|
+
Unknown = Symbol.new '$unknown', true
|
31
31
|
class Lexer
|
32
32
|
def initialize table, accepts, lexical_states, input
|
33
33
|
@table, @accepts, @lexical_states, @input = table, accepts, lexical_states, input
|
@@ -44,11 +44,12 @@ module Aurum
|
|
44
44
|
state, char = next_state, @input.get_char
|
45
45
|
next_state = goto state, char
|
46
46
|
end
|
47
|
-
@input.pushback char
|
48
|
-
|
47
|
+
@input.pushback char
|
48
|
+
return Unknown unless actions = @accepts[state]
|
49
|
+
if actions.first == IgnoreAction
|
49
50
|
lexeme = ''
|
50
51
|
else
|
51
|
-
|
52
|
+
actions.first.execute self, lexeme
|
52
53
|
end
|
53
54
|
end
|
54
55
|
@recognized.value = lexeme unless @recognized.value
|
@@ -71,23 +72,24 @@ module Aurum
|
|
71
72
|
|
72
73
|
def recognize token
|
73
74
|
@recognized = Aurum::Symbol.new token, true
|
74
|
-
end
|
75
|
+
end
|
75
76
|
end
|
76
77
|
|
77
78
|
RecognizeTokenAction.class_eval do
|
78
|
-
def execute lexer
|
79
|
-
lexer.recognize token
|
79
|
+
def execute lexer, lexeme
|
80
|
+
recognized = lexer.recognize token
|
81
|
+
action.call recognized, lexeme if action
|
80
82
|
end
|
81
83
|
end
|
82
84
|
|
83
85
|
ChangeStateAction.class_eval do
|
84
|
-
def execute lexer
|
86
|
+
def execute lexer, lexeme
|
85
87
|
lexer.shift_to state
|
86
88
|
end
|
87
89
|
end
|
88
90
|
|
89
91
|
RecognizeTokenAndChangeStateAction.class_eval do
|
90
|
-
def execute lexer
|
92
|
+
def execute lexer, lexeme
|
91
93
|
lexer.recognize token
|
92
94
|
lexer.shift_to state
|
93
95
|
end
|
@@ -118,13 +120,13 @@ module Aurum
|
|
118
120
|
lookahead = lexer.next_symbol
|
119
121
|
end
|
120
122
|
state_stack.slice! -handle.symbols.length..-1
|
121
|
-
symbols = symbol_stack.slice! -handle.symbols.length..-1
|
123
|
+
symbols = symbol_stack.slice! -handle.symbols.length..-1
|
122
124
|
handle.nonterminal == Aurum::START and return value_stack.pop
|
123
125
|
if handle.action
|
124
126
|
context = {handle.nonterminal.name => [SemanticAttributes.new]}
|
125
|
-
handle.symbols.each_with_index do |symbol, index|
|
127
|
+
handle.symbols.reverse.each_with_index do |symbol, index|
|
126
128
|
context[symbol.name] = [] unless context.has_key? symbol.name
|
127
|
-
context[symbol.name] << (symbol.is_terminal ? symbols[index] : value_stack.pop)
|
129
|
+
context[symbol.name] << (symbol.is_terminal ? symbols[-index-1] : value_stack.pop)
|
128
130
|
end
|
129
131
|
SemanticActionContext.new(context).instance_eval &handle.action
|
130
132
|
value_stack.push context[handle.nonterminal.name][0] if context[handle.nonterminal.name]
|
@@ -152,7 +154,7 @@ module Aurum
|
|
152
154
|
def method_missing name, *args
|
153
155
|
name_string = name.to_s
|
154
156
|
index = name_string =~ /\d+/ ? name_string.slice!(/\d+/).to_i : 0
|
155
|
-
@hash[name_string][index] and return @hash[name_string][index]
|
157
|
+
@hash[name_string][-index] and return @hash[name_string][-index]
|
156
158
|
SemanticAttributes.new
|
157
159
|
end
|
158
160
|
end
|
data/lib/aurum/grammar.rb
CHANGED
@@ -4,39 +4,8 @@ module Aurum
|
|
4
4
|
class << nil
|
5
5
|
attr_accessor :action
|
6
6
|
end
|
7
|
-
|
8
|
-
|
9
|
-
instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
|
10
|
-
|
11
|
-
def method_missing name, *args, &block
|
12
|
-
name_string = name.to_s
|
13
|
-
symbol =
|
14
|
-
case name_string
|
15
|
-
when /^[a-z]/
|
16
|
-
Aurum::Symbol.new name_string, false
|
17
|
-
when '_'
|
18
|
-
nil
|
19
|
-
when /_.+/
|
20
|
-
Aurum::Symbol.new name_string, true
|
21
|
-
end
|
22
|
-
symbol.action = block if block_given?
|
23
|
-
(args.empty? || name_string == '_') and return symbol
|
24
|
-
symbols = args.map do |sym|
|
25
|
-
case sym
|
26
|
-
when String
|
27
|
-
Aurum::Symbol.new "$literal_#{sym}", true
|
28
|
-
when Aurum::Symbol
|
29
|
-
sym
|
30
|
-
end
|
31
|
-
end
|
32
|
-
action = symbols.last.action
|
33
|
-
@definition = {} unless @definition
|
34
|
-
@definition[symbol] = [].to_set unless @definition.has_key? symbol
|
35
|
-
production = Aurum::Production.new symbol, symbols.compact
|
36
|
-
production.action = action if action
|
37
|
-
@definition[symbol] << production
|
38
|
-
end
|
39
|
-
end
|
7
|
+
|
8
|
+
Aurum::RecognizeTokenAction.class_eval {attr_accessor :action}
|
40
9
|
|
41
10
|
class CharacterClassDefinition
|
42
11
|
instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
|
@@ -52,24 +21,25 @@ module Aurum
|
|
52
21
|
CharacterSet::Interval.new(a, b).to_char_set
|
53
22
|
end
|
54
23
|
|
55
|
-
def
|
24
|
+
def enum a
|
56
25
|
result = CharacterSet.new
|
57
26
|
result << a
|
58
27
|
result
|
59
28
|
end
|
60
|
-
|
29
|
+
|
61
30
|
def method_missing name, char_set, &block
|
62
31
|
@definitions[name] = char_set unless @definitions.has_key? name
|
63
32
|
end
|
64
33
|
end
|
65
34
|
|
66
35
|
class LexicalSpecification
|
36
|
+
alias __extend extend
|
67
37
|
instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
|
68
38
|
attr_reader :definitions, :character_classes
|
69
39
|
|
70
40
|
def initialize definition = {:initial => {}}, state = :initial
|
71
41
|
@definitions, @character_classes = definition, CharacterClassDefinition.new
|
72
|
-
@lexical_definition = @definitions[state]
|
42
|
+
@lexical_definition, @patterns = @definitions[state], {}
|
73
43
|
end
|
74
44
|
|
75
45
|
def range a, b
|
@@ -80,8 +50,12 @@ module Aurum
|
|
80
50
|
Pattern.from_string literal
|
81
51
|
end
|
82
52
|
|
83
|
-
def
|
84
|
-
Pattern.
|
53
|
+
def enum literal
|
54
|
+
Pattern.from_enum literal
|
55
|
+
end
|
56
|
+
|
57
|
+
def cat *patterns
|
58
|
+
Pattern.concat *(patterns.collect {|x| x.kind_of?(Pattern) ? x : Pattern.from_string(x.to_s)})
|
85
59
|
end
|
86
60
|
|
87
61
|
def shift_to state, *patterns, &config
|
@@ -109,8 +83,21 @@ module Aurum
|
|
109
83
|
__create_pattern RecognizeTokenAndChangeStateAction.new(token.to_s, state), *patterns
|
110
84
|
end
|
111
85
|
|
112
|
-
def method_missing name, *patterns, &action
|
113
|
-
|
86
|
+
def method_missing name, *patterns, &action
|
87
|
+
return __get_pattern(name) if patterns.empty?
|
88
|
+
pattern = Pattern.concat *(patterns.collect {|x| x.kind_of?(Pattern) ? x : Pattern.from_string(x.to_s)})
|
89
|
+
@patterns[name] = pattern
|
90
|
+
if name.to_s =~ /^_/
|
91
|
+
recognize_action = RecognizeTokenAction.new(name.to_s)
|
92
|
+
recognize_action.action = action
|
93
|
+
@lexical_definition[pattern] = recognize_action
|
94
|
+
end
|
95
|
+
pattern
|
96
|
+
end
|
97
|
+
|
98
|
+
def __get_pattern name
|
99
|
+
return @patterns[name] if @patterns.has_key? name
|
100
|
+
Pattern.from_char_set @character_classes.definitions[name]
|
114
101
|
end
|
115
102
|
|
116
103
|
def __create_pattern action, *patterns
|
@@ -150,10 +137,12 @@ module Aurum
|
|
150
137
|
end
|
151
138
|
|
152
139
|
def __associativity direction, *operators
|
140
|
+
symbols = []
|
153
141
|
for operator in operators
|
154
|
-
symbol = __get_symbol
|
155
|
-
@associativities[direction] << symbol unless @associativities[direction].include? symbol
|
156
|
-
end
|
142
|
+
symbols << (symbol = __get_symbol(operator))
|
143
|
+
@associativities[direction] << symbol unless @associativities[direction].include? symbol
|
144
|
+
end
|
145
|
+
@precedences << symbols.uniq
|
157
146
|
end
|
158
147
|
end
|
159
148
|
|
@@ -190,8 +179,8 @@ module Aurum
|
|
190
179
|
|
191
180
|
class Grammar
|
192
181
|
def self.character_classes &block
|
193
|
-
@lexical_sepcification = LexicalSpecification.new unless @lexical_sepcification
|
194
|
-
@lexical_sepcification.character_classes.instance_eval &block
|
182
|
+
@lexical_sepcification = LexicalSpecification.new unless @lexical_sepcification
|
183
|
+
@lexical_sepcification.character_classes.instance_eval &block
|
195
184
|
end
|
196
185
|
|
197
186
|
def self.tokens &block
|
@@ -210,6 +199,7 @@ module Aurum
|
|
210
199
|
end
|
211
200
|
|
212
201
|
def self.lexer input
|
202
|
+
@productions = ProductionDefinition.new unless @productions
|
213
203
|
@lexical_sepcification.definitions[:initial].merge!(@productions.__literals)
|
214
204
|
generator = Aurum::LexicalTableGenerator.new @lexical_sepcification.definitions
|
215
205
|
table, accepts = generator.lexical_table
|
@@ -217,6 +207,8 @@ module Aurum
|
|
217
207
|
end
|
218
208
|
|
219
209
|
def self.start_from start_symbol
|
210
|
+
@productions = ProductionDefinition.new unless @productions
|
211
|
+
@precedences = OperatorPrecedenceDefinition.new unless @precedences
|
220
212
|
generator = Aurum::ParsingTableGenerator.new @productions.__definitions, @precedences.precedences, @precedences.associativities
|
221
213
|
productions = generator.start_from(Aurum::Symbol.new(start_symbol.to_s, false)).productions
|
222
214
|
table, lookeahead_level = generator.parsing_table
|
@@ -12,7 +12,7 @@ module Aurum
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def lexical_table
|
15
|
-
construct_automata
|
15
|
+
construct_automata
|
16
16
|
make_initial_partitions
|
17
17
|
refine_partitions
|
18
18
|
@partitions.size < @lexical_automata.table.size ? construct_minimize_automata : [@lexical_automata.table, @accept_states]
|
@@ -104,6 +104,12 @@ module Aurum
|
|
104
104
|
automata.connect 0, set, 1
|
105
105
|
new automata, 1
|
106
106
|
end
|
107
|
+
|
108
|
+
def self.from_enum enum_literal
|
109
|
+
automata = Automata.new enum_literal.length + 2
|
110
|
+
enum_literal.each_byte {|byte| automata.connect 0, CharacterSet::Interval.new(byte).to_char_set, 1}
|
111
|
+
new automata, 1
|
112
|
+
end
|
107
113
|
|
108
114
|
def self.concat *patterns
|
109
115
|
automata, index = Automata.new, 0
|
@@ -1,11 +1,16 @@
|
|
1
1
|
require 'set'
|
2
|
+
require 'logger'
|
2
3
|
|
3
4
|
module Aurum
|
4
5
|
Symbol, Production= Struct.new(:name, :is_terminal), Struct.new(:nonterminal, :symbols)
|
5
|
-
ShiftAction, ReduceAction = Struct.new(:state, :is_lookahead_shift), Struct.new(:handle, :is_read_reduce)
|
6
|
+
ShiftAction, ReduceAction = Struct.new(:state, :is_lookahead_shift), Struct.new(:handle, :is_read_reduce)
|
7
|
+
Production.class_eval {attr_accessor :index}
|
6
8
|
|
7
9
|
START, EOF = Symbol.new('$start', false), Symbol.new('$eof', true)
|
8
10
|
|
11
|
+
Log = Logger.new(STDOUT)
|
12
|
+
Log.level = Logger::INFO
|
13
|
+
|
9
14
|
class ParsingTableGenerator
|
10
15
|
attr_reader :symbols, :productions
|
11
16
|
|
@@ -17,19 +22,20 @@ module Aurum
|
|
17
22
|
end
|
18
23
|
|
19
24
|
def start_from start
|
20
|
-
|
21
|
-
@symbols, @productions, @nullables, @first_sets = [], [@start_production], [], {START => []}
|
22
|
-
find_all_used_symbols_and_productions start
|
25
|
+
initialize_augmented_grammar start
|
23
26
|
compute_nullable_symbols
|
24
27
|
compute_first_sets
|
25
28
|
self
|
26
29
|
end
|
27
30
|
|
28
31
|
def parsing_table
|
32
|
+
Log.debug 'Start constructing LR(0) automata.'
|
29
33
|
construct_LR0_automata
|
30
|
-
|
34
|
+
Log.debug "Finished, #{@states.size} LR(0) states constructed."
|
35
|
+
Log.debug "#{@inconsistents.size} inconsistent states found."
|
36
|
+
if @inconsistents.size > 0
|
31
37
|
compute_LALR_1_lookahead
|
32
|
-
compute_LALR_n_lookahead
|
38
|
+
compute_LALR_n_lookahead unless @conflicts.empty?
|
33
39
|
end
|
34
40
|
parsing_table = []
|
35
41
|
for state in @states do
|
@@ -40,67 +46,102 @@ module Aurum
|
|
40
46
|
return parsing_table, @lookahead_level
|
41
47
|
end
|
42
48
|
|
43
|
-
private
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
49
|
+
private
|
50
|
+
# BEGIN utils methods
|
51
|
+
def fixed_point
|
52
|
+
1 until !yield false
|
53
|
+
end
|
54
|
+
|
55
|
+
def working_list unvisited
|
56
|
+
yield unvisited, unvisited.pop while !unvisited.empty?
|
57
|
+
end
|
58
|
+
|
59
|
+
def mark_working_list uvisited, result, item
|
60
|
+
unless result.include? item
|
61
|
+
result << item
|
62
|
+
uvisited << item
|
63
|
+
end
|
64
|
+
end
|
65
|
+
# END utils methods
|
66
|
+
|
67
|
+
# BEGIN augmented grammar
|
68
|
+
def initialize_augmented_grammar start
|
69
|
+
@symbols, @productions = [start], []
|
70
|
+
@start_production = add_production Production.new(START, [start])
|
71
|
+
@nullables, @first_sets, @closures = [].to_set, {START => []}, {}
|
72
|
+
working_list [start] do |unvisited, visiting|
|
73
|
+
@first_sets[visiting], @closures[visiting] = visiting.is_terminal ? [visiting] : [], []
|
74
|
+
for production in @definition[visiting]
|
75
|
+
add_production(production).symbols.each {|symbol| mark_working_list unvisited, @symbols, symbol}
|
76
|
+
@closures[visiting] += closure([LRItem.new(production, 0)])
|
77
|
+
end unless visiting.is_terminal
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def add_production production
|
82
|
+
production.index = @productions.size
|
83
|
+
(@productions << production).last
|
84
|
+
end
|
58
85
|
|
59
86
|
def compute_nullable_symbols
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
end unless nullable? production.nonterminal
|
67
|
-
end
|
68
|
-
end while changed
|
87
|
+
fixed_point do |changed|
|
88
|
+
@productions.each do |production|
|
89
|
+
changed |= @nullables.add? production.nonterminal if nullable? production.symbols
|
90
|
+
end
|
91
|
+
changed
|
92
|
+
end
|
69
93
|
end
|
70
94
|
|
71
95
|
def compute_first_sets
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
96
|
+
fixed_point do |changed|
|
97
|
+
for production in @productions do
|
98
|
+
set = @first_sets[production.nonterminal]
|
99
|
+
for symbol in production.symbols do
|
100
|
+
changed |= set.size != set.replace(set | @first_sets[symbol]).size
|
101
|
+
break unless nullable? [symbol]
|
102
|
+
end
|
103
|
+
end
|
104
|
+
changed
|
105
|
+
end
|
82
106
|
end
|
83
107
|
|
84
|
-
def nullable?
|
85
|
-
|
108
|
+
def nullable? symbols
|
109
|
+
return true if symbols.empty?
|
110
|
+
symbols.all?{|symbol| @nullables.include? symbol }
|
86
111
|
end
|
87
|
-
|
112
|
+
|
113
|
+
def closure items
|
114
|
+
result = items.dup
|
115
|
+
working_list items.dup do |unvisited, visiting|
|
116
|
+
if @closures[visiting.dot_symbol]
|
117
|
+
result |= @closures[visiting.dot_symbol]
|
118
|
+
else
|
119
|
+
@definition[visiting.dot_symbol].each {|x| mark_working_list unvisited, result, LRItem.new(x, 0)}
|
120
|
+
end unless visiting.is_handle || visiting.dot_symbol.is_terminal
|
121
|
+
end
|
122
|
+
result
|
123
|
+
end
|
124
|
+
# END augmented grammar
|
125
|
+
|
126
|
+
# BEGIN LR(0) automata construction
|
88
127
|
def construct_LR0_automata
|
89
128
|
@lookahead_level = 0
|
90
129
|
start_state = State.new closure([LRItem.new(@start_production, 0)])
|
91
|
-
@states
|
92
|
-
|
93
|
-
|
94
|
-
|
130
|
+
@states = [start_state]
|
131
|
+
@inconsistents = start_state.inconsistent? ? [start_state] : []
|
132
|
+
working_list [start_state] do |unvisited, visiting|
|
133
|
+
for item in visiting.non_handles
|
95
134
|
symbol = item.dot_symbol
|
96
135
|
new_state = goto visiting, symbol
|
97
136
|
if (read_reduce = new_state.read_reduce)
|
98
|
-
|
137
|
+
visiting[symbol] << ReduceAction.new(read_reduce.index, true)
|
99
138
|
else
|
100
139
|
if index = @states.index(new_state)
|
101
140
|
new_state = @states[index]
|
102
141
|
else
|
103
|
-
|
142
|
+
@states << new_state
|
143
|
+
@inconsistents << new_state if new_state.inconsistent?
|
144
|
+
unvisited << new_state
|
104
145
|
index = @states.length - 1
|
105
146
|
end
|
106
147
|
visiting[symbol] << ShiftAction.new(index, false)
|
@@ -109,101 +150,124 @@ module Aurum
|
|
109
150
|
end
|
110
151
|
end
|
111
152
|
end
|
153
|
+
|
154
|
+
def goto items, symbol
|
155
|
+
result = []
|
156
|
+
for item in items
|
157
|
+
result << LRItem.new(item.production, item.position + 1) if symbol == item.dot_symbol
|
158
|
+
end
|
159
|
+
State.new closure(result)
|
160
|
+
end
|
161
|
+
# END LR(0) automata construction
|
112
162
|
|
163
|
+
# BEGIN lookahead computation
|
113
164
|
def compute_LALR_1_lookahead
|
114
|
-
|
165
|
+
@lookahead_level, @conflicts = 1, []
|
115
166
|
@lookahead_config_stack, @lookahead_indicitor, @lookahead_result = [], {}, {}
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
end
|
129
|
-
end
|
130
|
-
end
|
131
|
-
end
|
167
|
+
handle_each @inconsistents do |state, handle|
|
168
|
+
lookahead_each(state, handle.production) do |production, lookahead|
|
169
|
+
if state.only_shift?(lookahead) && @precedence_table.operator?(lookahead)
|
170
|
+
if @precedence_table.compare(production.operator, lookahead) >= 0
|
171
|
+
state[lookahead].clear
|
172
|
+
state[lookahead] << ReduceAction.new(production.index, false)
|
173
|
+
end
|
174
|
+
else
|
175
|
+
state[lookahead] << ReduceAction.new(production.index, false)
|
176
|
+
end
|
177
|
+
end
|
178
|
+
@conflicts << state if state.conflict?
|
132
179
|
end
|
133
180
|
end
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
181
|
+
|
182
|
+
def handle_each states
|
183
|
+
states.each {|state| state.handles.each {|handle| yield state, handle}}
|
184
|
+
end
|
185
|
+
|
186
|
+
def lookahead_each state, production
|
187
|
+
for predsucceor in state.predsucceors production.symbols.reverse
|
188
|
+
config = Configuration.new predsucceor, production.nonterminal
|
189
|
+
(@lookahead_indicitor[config] ? @lookahead_result[config] : compute_follow_set(config)).each {|x| yield production, x}
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
def compute_follow_set config
|
194
|
+
state, nonterminal = config.state, config.symbol
|
195
|
+
if START == nonterminal
|
196
|
+
@lookahead_indicitor[config] = 65535
|
197
|
+
@lookahead_result[config] = [EOF]
|
198
|
+
else
|
199
|
+
@lookahead_config_stack.push config
|
200
|
+
@lookahead_indicitor[config] = (d = @lookahead_config_stack.length)
|
201
|
+
@lookahead_result[config] = read_set state, nonterminal
|
202
|
+
each_included_by state, nonterminal do |new_config|
|
203
|
+
compute_follow_set new_config unless @lookahead_indicitor[new_config]
|
204
|
+
@lookahead_indicitor[config] = [@lookahead_indicitor[config], @lookahead_indicitor[new_config]].min
|
205
|
+
@lookahead_result[config] |= @lookahead_result[new_config]
|
206
|
+
end
|
207
|
+
connected = nil
|
208
|
+
until connected == config
|
209
|
+
connected = @lookahead_config_stack.pop
|
210
|
+
@lookahead_result[connected] = @lookahead_result[config].dup
|
211
|
+
@lookahead_indicitor[connected] = 65535
|
212
|
+
end if @lookahead_indicitor[config] == d
|
156
213
|
end
|
214
|
+
@lookahead_result[config]
|
157
215
|
end
|
158
|
-
|
159
|
-
def
|
160
|
-
result
|
161
|
-
|
162
|
-
visiting = unmarked.pop
|
163
|
-
for production in @definition[visiting.dot_symbol] do
|
164
|
-
item = LRItem.new production, 0
|
165
|
-
[result, unmarked].each {|x| x << item} unless result.include? item
|
166
|
-
end unless visiting.handle? || visiting.dot_symbol.is_terminal
|
167
|
-
end
|
216
|
+
|
217
|
+
def read_set state, symbol
|
218
|
+
result = []
|
219
|
+
each_read_by(state, symbol) {|q, y| result |= @first_sets[y] }
|
168
220
|
result
|
169
221
|
end
|
170
222
|
|
171
|
-
def
|
172
|
-
|
173
|
-
|
174
|
-
|
223
|
+
def each_read_by state, symbol
|
224
|
+
index = state.goto symbol
|
225
|
+
for item in @states[index]
|
226
|
+
for symbol in item.second_part
|
227
|
+
yield state, symbol
|
228
|
+
nullable? [symbol] or break
|
229
|
+
end
|
230
|
+
end if index
|
175
231
|
end
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
@
|
191
|
-
|
192
|
-
|
193
|
-
compute_follow_set new_config unless @lookahead_indicitor[new_config]
|
194
|
-
@lookahead_indicitor[config] = [@lookahead_indicitor[config], @lookahead_indicitor[new_config]].min
|
195
|
-
@lookahead_result[config] |= @lookahead_result[new_config]
|
232
|
+
|
233
|
+
def each_included_by state, nonterminal
|
234
|
+
for item in state
|
235
|
+
if item.dot_symbol == nonterminal && nullable?(item.second_part[1..-1])
|
236
|
+
first_part = item.production.symbols.slice 0, item.position
|
237
|
+
predsucceors = state.predsucceors first_part.reverse
|
238
|
+
predsucceors.each {|s| yield Configuration.new(s, item.production.nonterminal) }
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
def compute_LALR_n_lookahead
|
244
|
+
@stack_seen = []
|
245
|
+
@conflicts.each do |state|
|
246
|
+
@current_lookahead_level = 1
|
247
|
+
for lookahead, actions in state.conflicted_actions do
|
248
|
+
resolve_conficts state, lookahead, sources_of(state, lookahead, actions)
|
196
249
|
end
|
197
|
-
|
198
|
-
until connected == config
|
199
|
-
connected = @lookahead_config_stack.pop
|
200
|
-
@lookahead_result[connected] = @lookahead_result[config].dup
|
201
|
-
@lookahead_indicitor[connected] = 65535
|
202
|
-
end if @lookahead_indicitor[config] == d
|
250
|
+
@lookahead_level = [@current_lookahead_level, @lookahead_level].max
|
203
251
|
end
|
204
|
-
@lookahead_result[config]
|
205
252
|
end
|
206
|
-
|
253
|
+
|
254
|
+
def sources_of state, lookahead, actions
|
255
|
+
sources = {}
|
256
|
+
for action in actions do
|
257
|
+
if action.kind_of? ShiftAction
|
258
|
+
sources[action] = [[state]].to_set
|
259
|
+
else
|
260
|
+
handle = @productions[action.handle]
|
261
|
+
sources[action] = action.is_read_reduce ? [[state]].to_set : [].to_set
|
262
|
+
for predsucceor in state.predsucceors handle.symbols.reverse
|
263
|
+
@follow_sources_visited = []
|
264
|
+
sources[action].merge follow_sources([predsucceor], handle.nonterminal, lookahead)
|
265
|
+
end
|
266
|
+
end
|
267
|
+
end
|
268
|
+
sources
|
269
|
+
end
|
270
|
+
|
207
271
|
def follow_sources stack, nonterminal, lookahead
|
208
272
|
top = stack.last
|
209
273
|
if stack.length == 1
|
@@ -217,15 +281,17 @@ module Aurum
|
|
217
281
|
stacks = [stack + [q]].to_set if q.direct_read.include?(lookahead)
|
218
282
|
end
|
219
283
|
each_read_by(top, nonterminal) {|q, y| stacks |= follow_sources stack+[q], y, lookahead unless y.is_terminal}
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
284
|
+
top.kernels.each do |item|
|
285
|
+
if !item.is_start && item.dot_symbol == nonterminal
|
286
|
+
c = item.production.nonterminal
|
287
|
+
if item.position < stack.length
|
288
|
+
stacks |= follow_sources stack.slice(0..-item.position-1), c, lookahead
|
289
|
+
else
|
290
|
+
first_part = item.production.symbols.slice 0..-stack.length-1
|
291
|
+
stack[0].predsucceors(first_part).reverse.each {|q| stacks |= follow_sources [q], c, lookahead }
|
292
|
+
end
|
293
|
+
end
|
294
|
+
end
|
229
295
|
stacks
|
230
296
|
end
|
231
297
|
|
@@ -237,7 +303,7 @@ module Aurum
|
|
237
303
|
for stk in stacks
|
238
304
|
raise 'not LALR(n)' if @stack_seen.include? stk
|
239
305
|
@stack_seen << stk
|
240
|
-
for a in next_lookaheads stk, lookahead
|
306
|
+
for a in next_lookaheads stk, lookahead
|
241
307
|
lookahead_state[a] << action
|
242
308
|
end
|
243
309
|
end
|
@@ -246,7 +312,7 @@ module Aurum
|
|
246
312
|
new_sources = {}
|
247
313
|
for action in actions do
|
248
314
|
new_sources[action] = [].to_set
|
249
|
-
for stk in
|
315
|
+
for stk in sources[action] do
|
250
316
|
@follow_sources_visited = []
|
251
317
|
new_sources[action] |= follow_sources stk, lookahead, next_lookahead
|
252
318
|
end
|
@@ -259,53 +325,28 @@ module Aurum
|
|
259
325
|
EOF == lookahead and return [EOF]
|
260
326
|
top = stack.last
|
261
327
|
lookaheads = read_set top, lookahead
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
328
|
+
top.kernels.each do |item|
|
329
|
+
if !item.is_start && item.dot_symbol == lookahead
|
330
|
+
c = item.production.nonterminal
|
331
|
+
if item.position < stack.length
|
332
|
+
lookaheads |= next_lookaheads stack.slice(0..-item.position-1), c
|
333
|
+
else
|
334
|
+
first_part = item.production.symbols.slice 0..-stack.length-1
|
335
|
+
stack[0].predsucceors(first_part).reverse.each {|q| lookaheads |= compute_follow_set Configuration.new(q, c) }
|
336
|
+
end
|
337
|
+
end
|
270
338
|
end
|
271
339
|
lookaheads
|
272
340
|
end
|
273
341
|
|
274
|
-
def read_set state, symbol
|
275
|
-
result = []
|
276
|
-
each_read_by(state, symbol) {|q, y| result |= @first_sets[y] }
|
277
|
-
result
|
278
|
-
end
|
279
|
-
|
280
|
-
def each_read_by state, symbol
|
281
|
-
index = state.goto symbol
|
282
|
-
for item in @states[index]
|
283
|
-
for symbol in item.second_part
|
284
|
-
yield state, symbol
|
285
|
-
nullable? symbol or break
|
286
|
-
end
|
287
|
-
end if index
|
288
|
-
end
|
289
|
-
|
290
|
-
def each_included_by state, nonterminal
|
291
|
-
for item in state
|
292
|
-
symbols = item.production.symbols
|
293
|
-
symbols.reverse.each_with_index do |symbol, index|
|
294
|
-
first_part = symbols.slice 0, symbols.length - index - 1
|
295
|
-
state.predsucceors(first_part.reverse).each {|s| yield s, item.production.nonterminal} if nonterminal == symbol
|
296
|
-
nullable? symbol or break
|
297
|
-
end
|
298
|
-
end if state
|
299
|
-
end
|
300
|
-
|
301
342
|
def default_action state
|
302
343
|
if !state.empty?
|
303
344
|
handle = nil
|
304
345
|
for x in state
|
305
346
|
p = x.production
|
306
|
-
handle = x if x.
|
347
|
+
handle = x if x.is_handle && p.nonterminal != START && (!handle || handle.production.symbols.length > p.symbols.length)
|
307
348
|
end
|
308
|
-
default_action = handle ? ReduceAction.new(
|
349
|
+
default_action = handle ? ReduceAction.new(handle.production.index, false) : nil
|
309
350
|
else
|
310
351
|
candidates = state.actions.values.inject [] do |candidates, actions|
|
311
352
|
candidates |= actions.find_all {|x| x.kind_of?(Aurum::ReduceAction) && !x.is_read_reduce }
|
@@ -346,30 +387,31 @@ module Aurum
|
|
346
387
|
end
|
347
388
|
|
348
389
|
class State < Array
|
349
|
-
|
390
|
+
attr_reader :actions, :handles, :non_handles, :kernels, :read_reduce, :direct_read
|
350
391
|
def initialize elements
|
351
392
|
super elements
|
352
|
-
|
393
|
+
@actions, @predsucceors, @read_reduce = {}, [], nil
|
394
|
+
@handles, @non_handles, @kernels, @direct_read = [], [], [], [].to_set
|
395
|
+
for item in elements
|
396
|
+
(item.is_handle ? @handles : @non_handles) << item
|
397
|
+
@kernels << item if item.is_kernel
|
398
|
+
@direct_read << item.dot_symbol if item.dot_symbol
|
399
|
+
end
|
400
|
+
@read_reduce = first.production if size == 1 && first.is_handle
|
353
401
|
end
|
354
402
|
|
355
403
|
def [] symbol
|
356
404
|
@actions[symbol] = Set.new([]) unless @actions.has_key? symbol
|
357
405
|
@actions[symbol]
|
358
406
|
end
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
true
|
368
|
-
end
|
369
|
-
|
370
|
-
def conflicted?
|
371
|
-
!consistent? && @actions.any? {|symbol, actions| actions.length > 1}
|
372
|
-
end
|
407
|
+
|
408
|
+
def inconsistent?
|
409
|
+
@handles.size > 1 || (@handles.size == 1 && @kernels.size != 1)
|
410
|
+
end
|
411
|
+
|
412
|
+
def conflict?
|
413
|
+
inconsistent? && @actions.any? {|symbol, actions| actions.length > 1}
|
414
|
+
end
|
373
415
|
|
374
416
|
def conflicted_actions
|
375
417
|
@actions.find_all {|symbol, actions| actions.length > 1}
|
@@ -379,10 +421,6 @@ module Aurum
|
|
379
421
|
!self[symbol].empty? && @actions[symbol].all? {|x| x.kind_of? ShiftAction}
|
380
422
|
end
|
381
423
|
|
382
|
-
def read_reduce
|
383
|
-
length == 1 && first.handle? ? first.production : nil
|
384
|
-
end
|
385
|
-
|
386
424
|
def goto symbol
|
387
425
|
shift = self[symbol].find {|x| x.kind_of? Aurum::ShiftAction }
|
388
426
|
shift.state if shift
|
@@ -392,54 +430,35 @@ module Aurum
|
|
392
430
|
symbols or return @predsucceors
|
393
431
|
result = [self]
|
394
432
|
for symbol in symbols
|
395
|
-
|
396
|
-
|
397
|
-
|
433
|
+
new_result = []
|
434
|
+
for x in result
|
435
|
+
new_result |= x.predsucceors.find_all {|predsucceor| predsucceor.any? {|item| item.dot_symbol == symbol}}
|
436
|
+
end
|
398
437
|
result.replace new_result
|
399
438
|
end
|
400
439
|
result
|
401
440
|
end
|
402
|
-
|
403
|
-
def direct_read
|
404
|
-
inject [] do |result, item|
|
405
|
-
item.dot_symbol ? result | [item.dot_symbol] : result
|
406
|
-
end
|
407
|
-
end
|
408
|
-
|
441
|
+
|
409
442
|
def == other
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
all? {|x| other.include? x}
|
443
|
+
return false unless other.kind_of?(State) && (@kernels.size == other.kernels.size)
|
444
|
+
return true if equal? other
|
445
|
+
@kernels.all? {|x| other.kernels.include? x}
|
414
446
|
end
|
415
447
|
end
|
416
448
|
|
417
449
|
LRItem, Configuration = Struct.new(:production, :position), Struct.new(:state, :symbol)
|
418
450
|
|
419
451
|
LRItem.class_eval do
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
end
|
431
|
-
|
432
|
-
def kernel?
|
433
|
-
handle? || position != 0
|
434
|
-
end
|
435
|
-
|
436
|
-
def first_part
|
437
|
-
production.symbols.slice(0, position)
|
438
|
-
end
|
439
|
-
|
440
|
-
def second_part
|
441
|
-
handle? ? [] : production.symbols.slice(position..-1)
|
442
|
-
end
|
452
|
+
attr_reader :dot_symbol, :second_part, :is_handle, :is_kernel, :is_start
|
453
|
+
|
454
|
+
def initialize production, position
|
455
|
+
super production, position
|
456
|
+
@dot_symbol = production.symbols[position]
|
457
|
+
@is_handle = position >= production.symbols.length
|
458
|
+
@is_kernel = @is_handle || (position != 0)
|
459
|
+
@is_start = production.nonterminal == START
|
460
|
+
@second_part = @is_handle ? [] : production.symbols.slice(position..-1)
|
461
|
+
end
|
443
462
|
end
|
444
463
|
end
|
445
464
|
end
|
data/test/engine/lexer_test.rb
CHANGED
@@ -43,6 +43,13 @@ class LexerTest < Test::Unit::TestCase
|
|
43
43
|
lexer = create_lexer specification, ''
|
44
44
|
assert_equal terminal('$eof'), lexer.next_symbol
|
45
45
|
end
|
46
|
+
|
47
|
+
def test_should_return_unknown_if_not_recognized
|
48
|
+
specification = {:initial => {PATTERN_A => Aurum::RecognizeTokenAction.new('tokenA'),
|
49
|
+
PATTERN_B => Aurum::IgnoreAction}}
|
50
|
+
lexer = create_lexer specification, 'whatever'
|
51
|
+
assert_equal terminal('$unknown'), lexer.next_symbol
|
52
|
+
end
|
46
53
|
|
47
54
|
def create_lexer specification, input
|
48
55
|
generator = Aurum::LexicalTableGenerator.new specification
|
@@ -8,7 +8,7 @@ class CharacterClassDefinitionTest < Test::Unit::TestCase
|
|
8
8
|
|
9
9
|
def test_should_add_char_class_to_definition
|
10
10
|
@character_class.instance_eval do
|
11
|
-
alpha range(?a, ?z) +
|
11
|
+
alpha range(?a, ?z) + enum('ABCDEF')
|
12
12
|
end
|
13
13
|
assert_equal 1, @character_class.definitions.size
|
14
14
|
alpha = @character_class.definitions[:alpha]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__) + '/../')
|
2
2
|
require 'test_helper'
|
3
|
-
|
3
|
+
=begin
|
4
4
|
class ExpressionGrammar < Aurum::Grammar
|
5
5
|
character_classes do
|
6
6
|
number range(?0, ?9)
|
@@ -8,7 +8,7 @@ class ExpressionGrammar < Aurum::Grammar
|
|
8
8
|
|
9
9
|
tokens do
|
10
10
|
ignore string(' ').one_or_more
|
11
|
-
_number
|
11
|
+
_number number.one_or_more
|
12
12
|
end
|
13
13
|
|
14
14
|
precedences do
|
@@ -51,4 +51,5 @@ class GrammarDefinitionTest < Test::Unit::TestCase
|
|
51
51
|
assert_equal token, symbol
|
52
52
|
assert_equal lexeme, symbol.value
|
53
53
|
end
|
54
|
-
end
|
54
|
+
end
|
55
|
+
=end
|
@@ -85,6 +85,12 @@ class PatternTest < Test::Unit::TestCase
|
|
85
85
|
assert match?('comments */', pattern)
|
86
86
|
assert !match?('everything', pattern)
|
87
87
|
end
|
88
|
+
|
89
|
+
def test_should_match_every_char_in_enum_literal
|
90
|
+
pattern = Aurum::Pattern.from_enum('*/')
|
91
|
+
assert match?('*', pattern)
|
92
|
+
assert match?('/', pattern)
|
93
|
+
end
|
88
94
|
|
89
95
|
def match? expected_string, pattern
|
90
96
|
states = closure pattern.automata.table, [0]
|
@@ -112,7 +112,7 @@ class LexicalTableGeneratorTest < Test::Unit::TestCase
|
|
112
112
|
assert recognize?(:state_a, 'pattern_b')
|
113
113
|
assert !recognize?(:all, 'pattern_b')
|
114
114
|
end
|
115
|
-
|
115
|
+
|
116
116
|
def recognize? lexical_state, source
|
117
117
|
begin
|
118
118
|
lexical_state = - @lexical_states.index(lexical_state) - 1
|
@@ -7,7 +7,7 @@ Aurum::ParsingTableGenerator.class_eval do
|
|
7
7
|
end
|
8
8
|
|
9
9
|
class AugmentedGrammarTest < Test::Unit::TestCase
|
10
|
-
def
|
10
|
+
def test_should_find_all_used_symbols
|
11
11
|
generator = parser_generator E=>[production(E, T)], T=>[production(T, F)], F=>[production(F, ID)]
|
12
12
|
generator.start_from E
|
13
13
|
assert_equal [E, T, F, ID], generator.symbols
|
@@ -15,7 +15,7 @@ class AugmentedGrammarTest < Test::Unit::TestCase
|
|
15
15
|
assert_equal [T, F, ID], generator.symbols
|
16
16
|
end
|
17
17
|
|
18
|
-
def
|
18
|
+
def test_should_find_all_used_productions
|
19
19
|
generator = parser_generator E=>[production(E, T)], T=>[production(T, F)], F=>[production(F, ID)]
|
20
20
|
generator.start_from E
|
21
21
|
assert_equal [production(START, E), production(E, T), production(T, F), production(F, ID)].to_set, generator.productions.to_set
|
@@ -31,13 +31,13 @@ class AugmentedGrammarTest < Test::Unit::TestCase
|
|
31
31
|
assert_equal [T].to_set, generator.nullables.to_set
|
32
32
|
end
|
33
33
|
|
34
|
-
def
|
34
|
+
def test_first_set_should_contain_terminals_left_depends_on_nt_dirctly
|
35
35
|
generator = parser_generator E=>[production(E, T, ID), production(E, T, T, T, terminal('other'))], T=>[production(T)]
|
36
36
|
generator.start_from E
|
37
37
|
assert_equal [ID, terminal('other')].to_set, generator.first_sets[E].to_set
|
38
38
|
end
|
39
39
|
|
40
|
-
def
|
40
|
+
def test_should_contain_fist_set_of_nt_which_left_depends_on_nt_dirctly
|
41
41
|
generator = parser_generator E=>[production(E, T, ID), production(E, T, T, T, terminal('other'))], T=>[production(T)], F=>[production(F, T, E)]
|
42
42
|
generator.start_from F
|
43
43
|
assert_equal generator.first_sets[F].to_set, generator.first_sets[E].to_set
|
@@ -11,7 +11,7 @@ class LALRLookaheadComputationTest < Test::Unit::TestCase
|
|
11
11
|
generator = parser_generator EXPRESSION_GRAMMAR_LALR1
|
12
12
|
generator.start_from E
|
13
13
|
generator.construct_LR0_automata
|
14
|
-
states = generator.states.find_all {|x|
|
14
|
+
states = generator.states.find_all {|x| x.inconsistent?}
|
15
15
|
generator.compute_LALR_1_lookahead
|
16
16
|
assert_equal [reduce(0)].to_set, states[0][terminal('$eof')]
|
17
17
|
assert_equal [reduce(2)].to_set, states[1][terminal('+')]
|
@@ -25,10 +25,10 @@ class LALRLookaheadComputationTest < Test::Unit::TestCase
|
|
25
25
|
generator.start_from BNF
|
26
26
|
generator.construct_LR0_automata
|
27
27
|
generator.compute_LALR_1_lookahead
|
28
|
-
conflicted_state = (generator.states.find_all {|x| x.
|
28
|
+
conflicted_state = (generator.states.find_all {|x| x.conflict? })[0]
|
29
29
|
generator.compute_LALR_n_lookahead
|
30
30
|
assert_equal [lookahead_shift(generator.states.length - 1)].to_set, conflicted_state[terminal('s')]
|
31
|
-
assert !conflicted_state.
|
31
|
+
assert !conflicted_state.conflict?
|
32
32
|
end
|
33
33
|
|
34
34
|
def test_should_add_reduce_action_to_lookahead_state
|
@@ -62,6 +62,9 @@ class LALRLookaheadComputationTest < Test::Unit::TestCase
|
|
62
62
|
assert_equal 1, level
|
63
63
|
generator = parser_generator IF_GRAMMAR_LALR2
|
64
64
|
table, level = generator.start_from(STATEMENT).parsing_table
|
65
|
+
assert_equal 2, level
|
66
|
+
generator = parser_generator BNF_GRAMMAR_LALR2
|
67
|
+
table, level = generator.start_from(BNF).parsing_table
|
65
68
|
assert_equal 2, level
|
66
69
|
end
|
67
70
|
|
@@ -6,7 +6,7 @@ Aurum::ParsingTableGenerator.class_eval do
|
|
6
6
|
public :closure, :goto, :read_set, :construct_LR0_automata
|
7
7
|
end
|
8
8
|
|
9
|
-
class LR0AutomataTest < Test::Unit::TestCase
|
9
|
+
class LR0AutomataTest < Test::Unit::TestCase
|
10
10
|
def test_closure_should_contain_items_themselves
|
11
11
|
generator = parser_generator E=>[production(E, ID)]
|
12
12
|
assert_equal [LR_item(0, E, ID)], generator.closure([LR_item(0, E, ID)])
|
@@ -14,17 +14,20 @@ class LR0AutomataTest < Test::Unit::TestCase
|
|
14
14
|
|
15
15
|
def test_closure_should_contain_all_right_most_lr_items_of_dot_symbol
|
16
16
|
generator = parser_generator E=>[production(E, T)], T=>[production(T, ID), production(T, terminal('other'))]
|
17
|
+
generator.start_from E
|
17
18
|
closure = generator.closure [LR_item(0, E, T)]
|
18
19
|
[LR_item(0, T, ID), LR_item(0, T, terminal('other'))].each {|x| assert closure.include?(x)}
|
19
20
|
end
|
20
21
|
|
21
22
|
def test_should_return_goto_items_if_expected_symbol_given
|
22
23
|
generator = parser_generator E=>[production(E, T)], T=>[production(T, ID), production(T, terminal('other'))]
|
24
|
+
generator.start_from E
|
23
25
|
assert_equal [LR_item(1, E, T)], generator.goto([LR_item(0, E, T)], T)
|
24
26
|
end
|
25
27
|
|
26
28
|
def test_goto_items_should_be_closured_if_expected_symbol_given
|
27
29
|
generator = parser_generator E=>[production(E, T, T)], T=>[production(T, ID), production(T, terminal('other'))]
|
30
|
+
generator.start_from E
|
28
31
|
goto = generator.goto [LR_item(0, E, T, T)], T
|
29
32
|
[LR_item(0, T, ID), LR_item(0, T, terminal('other'))].each {|x| assert goto.include?(x)}
|
30
33
|
end
|
@@ -39,7 +42,7 @@ class LR0AutomataTest < Test::Unit::TestCase
|
|
39
42
|
assert [LR_item(1, T, terminal('+'), T)], states[1]
|
40
43
|
assert [LR_item(2, T, terminal('+'), T), LR_item(0, T, ID)], states[2]
|
41
44
|
end
|
42
|
-
|
45
|
+
|
43
46
|
def test_should_add_shift_action_to_states
|
44
47
|
generator = parser_generator SIMPLE_GRAMMAR_LR0
|
45
48
|
generator.start_from E
|
@@ -54,7 +57,7 @@ class LR0AutomataTest < Test::Unit::TestCase
|
|
54
57
|
generator.start_from E
|
55
58
|
generator.construct_LR0_automata
|
56
59
|
states = generator.states
|
57
|
-
|
60
|
+
assert_equal [read_reduce(0)].to_set, states[0][E]
|
58
61
|
assert_equal [read_reduce(2)].to_set, states[0][ID]
|
59
62
|
assert_equal [read_reduce(2)].to_set, states[2][ID]
|
60
63
|
assert_equal [read_reduce(1)].to_set, states[2][T]
|
@@ -87,5 +90,5 @@ class LR0AutomataTest < Test::Unit::TestCase
|
|
87
90
|
generator.construct_LR0_automata
|
88
91
|
states = generator.states
|
89
92
|
assert_equal [terminal('id'), terminal('(')].to_set, generator.read_set(states[2], terminal('*')).to_set
|
90
|
-
end
|
93
|
+
end
|
91
94
|
end
|
@@ -8,23 +8,17 @@ class LRItemTest < Test::Unit::TestCase
|
|
8
8
|
end
|
9
9
|
|
10
10
|
def test_should_be_handle_if_at_right_most_position
|
11
|
-
assert LR_item(2, E, T, ID).
|
12
|
-
assert !LR_item(1, E, T, ID).
|
13
|
-
assert LR_item(0, E).
|
11
|
+
assert LR_item(2, E, T, ID).is_handle
|
12
|
+
assert !LR_item(1, E, T, ID).is_handle
|
13
|
+
assert LR_item(0, E).is_handle
|
14
14
|
end
|
15
15
|
|
16
16
|
def test_should_be_kernel_if_not_at_left_most_position
|
17
|
-
assert LR_item(2, E, T, ID).
|
18
|
-
assert LR_item(1, E, T, ID).
|
19
|
-
assert !LR_item(0, E, T, ID).
|
17
|
+
assert LR_item(2, E, T, ID).is_kernel
|
18
|
+
assert LR_item(1, E, T, ID).is_kernel
|
19
|
+
assert !LR_item(0, E, T, ID).is_kernel
|
20
20
|
end
|
21
21
|
|
22
|
-
def test_should_return_zero_to_position_as_first_part
|
23
|
-
assert_equal [], LR_item(0, E, T, ID).first_part
|
24
|
-
assert_equal [T], LR_item(1, E, T, ID).first_part
|
25
|
-
assert_equal [T, ID], LR_item(2, E, T, ID).first_part
|
26
|
-
end
|
27
|
-
|
28
22
|
def test_should_return_position_to_end_as_second_part
|
29
23
|
assert_equal [T, ID], LR_item(0, E, T, ID).second_part
|
30
24
|
assert_equal [ID], LR_item(1, E, T, ID).second_part
|
@@ -9,7 +9,7 @@ class ParsingTableStateTest < Test::Unit::TestCase
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def test_should_be_read_reducable_if_contains_one_and_only_one_handle
|
12
|
-
assert parsing_table_state(LR_item(1, E,
|
12
|
+
assert parsing_table_state(LR_item(1, E, ID)).read_reduce
|
13
13
|
assert !parsing_table_state(LR_item(1, E, T, T)).read_reduce
|
14
14
|
assert !parsing_table_state(LR_item(1, E, T, T), LR_item(1, E, T)).read_reduce
|
15
15
|
end
|
@@ -22,18 +22,18 @@ class ParsingTableStateTest < Test::Unit::TestCase
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def test_should_be_consistent_if_contains_one_and_only_one_handle
|
25
|
-
assert parsing_table_state(LR_item(1, E, T)).
|
25
|
+
assert !parsing_table_state(LR_item(1, E, T)).inconsistent?
|
26
26
|
end
|
27
27
|
|
28
28
|
def test_should_be_consistent_if_contains_no_handle
|
29
|
-
assert parsing_table_state(LR_item(1, E, T, ID), LR_item(1, E, F, ID), LR_item(1, E, ID, F)).
|
29
|
+
assert !parsing_table_state(LR_item(1, E, T, ID), LR_item(1, E, F, ID), LR_item(1, E, ID, F)).inconsistent?
|
30
30
|
end
|
31
31
|
|
32
32
|
def test_should_be_inconsistent_if_contains_handle_and_other_kernal_items
|
33
|
-
assert
|
33
|
+
assert parsing_table_state(LR_item(1, E, T), LR_item(1, E, T, ID)).inconsistent?
|
34
34
|
end
|
35
35
|
|
36
36
|
def test_should_return_all_direct_read_symbols
|
37
|
-
|
37
|
+
assert_equal [T].to_set, parsing_table_state(LR_item(1, E, T, T), LR_item(1, E, T)).direct_read
|
38
38
|
end
|
39
39
|
end
|
data/test/test_helper.rb
CHANGED
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.2
|
|
3
3
|
specification_version: 1
|
4
4
|
name: aurum
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.1.
|
7
|
-
date: 2007-05-
|
6
|
+
version: 0.1.1
|
7
|
+
date: 2007-05-26 00:00:00 +10:00
|
8
8
|
summary: Aurum is a LALR(n) parser generator written in Ruby.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -31,37 +31,41 @@ authors:
|
|
31
31
|
files:
|
32
32
|
- example/expression
|
33
33
|
- example/expression/expression.rb
|
34
|
+
- example/expression/lisp.rb
|
34
35
|
- lib/aurum
|
35
36
|
- lib/aurum/lexical_table_generator.rb
|
36
37
|
- lib/aurum/parsing_table_generator.rb
|
37
|
-
- lib/aurum/grammar.rb
|
38
38
|
- lib/aurum/engine.rb
|
39
|
+
- lib/aurum/grammar.rb
|
40
|
+
- lib/grammars
|
39
41
|
- lib/aurum.rb
|
40
42
|
- test/parsing_table_generator
|
41
43
|
- test/parsing_table_generator/augmented_grammar_test.rb
|
42
|
-
- test/parsing_table_generator/
|
44
|
+
- test/parsing_table_generator/precedence_table_test.rb
|
43
45
|
- test/parsing_table_generator/production_test.rb
|
46
|
+
- test/parsing_table_generator/lr_0_automata_test.rb
|
47
|
+
- test/parsing_table_generator/lalr_n_computation_test.rb
|
44
48
|
- test/parsing_table_generator/parsing_table_state_test.rb
|
45
49
|
- test/parsing_table_generator/lr_item_test.rb
|
46
|
-
- test/
|
47
|
-
- test/
|
50
|
+
- test/engine
|
51
|
+
- test/engine/lexer_test.rb
|
52
|
+
- test/engine/semantic_attributes_test.rb
|
53
|
+
- test/grammars
|
54
|
+
- test/grammars/ruby
|
48
55
|
- test/grammar_definition
|
49
56
|
- test/grammar_definition/grammar_definition_test.rb
|
57
|
+
- test/grammar_definition/production_definition_test.rb
|
58
|
+
- test/grammar_definition/operator_precedence_definition_test.rb
|
50
59
|
- test/grammar_definition/character_class_definition_test.rb
|
51
60
|
- test/grammar_definition/lexical_definition_test.rb
|
52
|
-
- test/grammar_definition/operator_precedence_definition_test.rb
|
53
|
-
- test/grammar_definition/production_definition_test.rb
|
54
61
|
- test/lexical_table_generator
|
55
62
|
- test/lexical_table_generator/interval_test.rb
|
63
|
+
- test/lexical_table_generator/subset_determinizer_test.rb
|
56
64
|
- test/lexical_table_generator/character_set_test.rb
|
57
65
|
- test/lexical_table_generator/automata_test.rb
|
58
66
|
- test/lexical_table_generator/pattern_test.rb
|
59
|
-
- test/lexical_table_generator/subset_determinizer_test.rb
|
60
67
|
- test/lexical_table_generator/table_generator_test.rb
|
61
68
|
- test/test_helper.rb
|
62
|
-
- test/engine
|
63
|
-
- test/engine/semantic_attributes_test.rb
|
64
|
-
- test/engine/lexer_test.rb
|
65
69
|
test_files: []
|
66
70
|
|
67
71
|
rdoc_options: []
|