aurum 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/example/expression/expression.rb +29 -0
- data/lib/aurum.rb +10 -0
- data/lib/aurum/engine.rb +173 -0
- data/lib/aurum/grammar.rb +234 -0
- data/lib/aurum/lexical_table_generator.rb +423 -0
- data/lib/aurum/parsing_table_generator.rb +445 -0
- data/test/engine/lexer_test.rb +52 -0
- data/test/engine/semantic_attributes_test.rb +15 -0
- data/test/grammar_definition/character_class_definition_test.rb +28 -0
- data/test/grammar_definition/grammar_definition_test.rb +54 -0
- data/test/grammar_definition/lexical_definition_test.rb +56 -0
- data/test/grammar_definition/operator_precedence_definition_test.rb +35 -0
- data/test/grammar_definition/production_definition_test.rb +60 -0
- data/test/lexical_table_generator/automata_test.rb +74 -0
- data/test/lexical_table_generator/character_set_test.rb +73 -0
- data/test/lexical_table_generator/interval_test.rb +36 -0
- data/test/lexical_table_generator/pattern_test.rb +109 -0
- data/test/lexical_table_generator/subset_determinizer_test.rb +19 -0
- data/test/lexical_table_generator/table_generator_test.rb +126 -0
- data/test/parsing_table_generator/augmented_grammar_test.rb +45 -0
- data/test/parsing_table_generator/lalr_n_computation_test.rb +89 -0
- data/test/parsing_table_generator/lr_0_automata_test.rb +91 -0
- data/test/parsing_table_generator/lr_item_test.rb +33 -0
- data/test/parsing_table_generator/parsing_table_state_test.rb +39 -0
- data/test/parsing_table_generator/precedence_table_test.rb +28 -0
- data/test/parsing_table_generator/production_test.rb +9 -0
- data/test/test_helper.rb +103 -0
- metadata +78 -0
@@ -0,0 +1,29 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__) + '/../../lib')
|
2
|
+
require 'aurum'
|
3
|
+
|
4
|
+
class ExpressionGrammar < Aurum::Grammar
|
5
|
+
tokens do
|
6
|
+
ignore string(' ').one_or_more
|
7
|
+
_number range(?0, ?9).one_or_more
|
8
|
+
end
|
9
|
+
|
10
|
+
precedences do
|
11
|
+
operator '*', '/'
|
12
|
+
operator '+', '-'
|
13
|
+
end
|
14
|
+
|
15
|
+
productions do
|
16
|
+
expression expression, '+', expression {expression.value = expression1.value + expression2.value}
|
17
|
+
expression expression, '-', expression {expression.value = expression1.value - expression2.value}
|
18
|
+
expression expression, '*', expression {expression.value = expression1.value * expression2.value}
|
19
|
+
expression expression, '/', expression {expression.value = expression1.value / expression2.value}
|
20
|
+
expression '(', expression, ')'
|
21
|
+
expression _number {expression.value = _number.value.to_i}
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
puts ExpressionGrammar.parse_expression('1 + 2').value
|
26
|
+
puts ExpressionGrammar.parse_expression('1 + 2 * 3').value
|
27
|
+
puts ExpressionGrammar.parse_expression('1 * 2 + 3').value
|
28
|
+
puts ExpressionGrammar.parse_expression('1 * (2 + 3)').value
|
29
|
+
puts ExpressionGrammar.parse_expression('1 + (2 + 3) * 4').value
|
data/lib/aurum.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require 'aurum/lexical_table_generator'
|
2
|
+
require 'aurum/parsing_table_generator'
|
3
|
+
require 'aurum/engine'
|
4
|
+
require 'aurum/grammar'
|
5
|
+
|
6
|
+
Enumerable.class_eval do
|
7
|
+
def grep_each condition
|
8
|
+
condition.kind_of?(Proc) ? each {|x| yield x if condition.call x} : each {|x| yield x if eval(condition.to_s)}
|
9
|
+
end
|
10
|
+
end
|
data/lib/aurum/engine.rb
ADDED
@@ -0,0 +1,173 @@
|
|
1
|
+
module Aurum
|
2
|
+
IO.class_eval do
|
3
|
+
def get_char
|
4
|
+
char, @pushback_char = @pushback_char ? @pushback_char : self.getc, nil
|
5
|
+
char > 128 ? char * 128 + self.getc : char
|
6
|
+
end
|
7
|
+
|
8
|
+
def pushback char
|
9
|
+
@pushback_char = char
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
String.class_eval do
|
14
|
+
def get_char
|
15
|
+
@get_char_index = -1 unless @get_char_index
|
16
|
+
self[@get_char_index += 1]
|
17
|
+
end
|
18
|
+
|
19
|
+
def eof?
|
20
|
+
@get_char_index = -1 unless @get_char_index
|
21
|
+
@get_char_index >= (self.length - 1)
|
22
|
+
end
|
23
|
+
|
24
|
+
def pushback char
|
25
|
+
@get_char_index -= 1
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
Symbol.class_eval {attr_accessor :value}
|
30
|
+
|
31
|
+
class Lexer
|
32
|
+
def initialize table, accepts, lexical_states, input
|
33
|
+
@table, @accepts, @lexical_states, @input = table, accepts, lexical_states, input
|
34
|
+
shift_to :initial
|
35
|
+
end
|
36
|
+
|
37
|
+
def next_symbol
|
38
|
+
return Aurum::EOF if @input.eof?
|
39
|
+
@recognized, lexeme, @pushback_symbol = @pushback_symbol, '', nil
|
40
|
+
until @recognized
|
41
|
+
next_state, char = @start_state, nil
|
42
|
+
while next_state
|
43
|
+
lexeme << char if char
|
44
|
+
state, char = next_state, @input.get_char
|
45
|
+
next_state = goto state, char
|
46
|
+
end
|
47
|
+
@input.pushback char
|
48
|
+
if @accepts[state].first == IgnoreAction
|
49
|
+
lexeme = ''
|
50
|
+
else
|
51
|
+
@accepts[state].first.execute self
|
52
|
+
end
|
53
|
+
end
|
54
|
+
@recognized.value = lexeme unless @recognized.value
|
55
|
+
@recognized
|
56
|
+
end
|
57
|
+
|
58
|
+
def pushback symbol
|
59
|
+
@pushback_symbol = symbol
|
60
|
+
end
|
61
|
+
|
62
|
+
def goto state, input
|
63
|
+
return nil unless input
|
64
|
+
next_state = @table[state].find {|tran| tran.symbols.include?(input)}
|
65
|
+
next_state ? next_state.destination : nil
|
66
|
+
end
|
67
|
+
|
68
|
+
def shift_to state
|
69
|
+
@start_state = goto 0, -@lexical_states.index(state)-1
|
70
|
+
end
|
71
|
+
|
72
|
+
def recognize token
|
73
|
+
@recognized = Aurum::Symbol.new token, true
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
RecognizeTokenAction.class_eval do
|
78
|
+
def execute lexer
|
79
|
+
lexer.recognize token
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
ChangeStateAction.class_eval do
|
84
|
+
def execute lexer
|
85
|
+
lexer.shift_to state
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
RecognizeTokenAndChangeStateAction.class_eval do
|
90
|
+
def execute lexer
|
91
|
+
lexer.recognize token
|
92
|
+
lexer.shift_to state
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class Parser
|
97
|
+
def initialize productions, parsing_table
|
98
|
+
@productions, @parsing_table = productions, parsing_table
|
99
|
+
end
|
100
|
+
|
101
|
+
def parse lexer
|
102
|
+
lookahead, state_stack, symbol_stack, value_stack = lexer.next_symbol, [0], [], []
|
103
|
+
lookahead_shift = 0
|
104
|
+
while (true)
|
105
|
+
state = @parsing_table[state_stack.last]
|
106
|
+
action = state[lookahead]
|
107
|
+
if action.kind_of? ShiftAction
|
108
|
+
state_stack.push action.state
|
109
|
+
symbol_stack.push lookahead unless action.is_lookahead_shift
|
110
|
+
action.is_lookahead_shift ? lookahead_shift += 1 : lookahead = lexer.next_symbol
|
111
|
+
elsif action.kind_of? ReduceAction
|
112
|
+
handle = @productions[action.handle]
|
113
|
+
lookahead_shift.times { state_stack.pop }
|
114
|
+
lookahead_shift = 0
|
115
|
+
if action.is_read_reduce
|
116
|
+
state_stack.push state
|
117
|
+
symbol_stack.push lookahead
|
118
|
+
lookahead = lexer.next_symbol
|
119
|
+
end
|
120
|
+
state_stack.slice! -handle.symbols.length..-1
|
121
|
+
symbols = symbol_stack.slice! -handle.symbols.length..-1
|
122
|
+
handle.nonterminal == Aurum::START and return value_stack.pop
|
123
|
+
if handle.action
|
124
|
+
context = {handle.nonterminal.name => [SemanticAttributes.new]}
|
125
|
+
handle.symbols.each_with_index do |symbol, index|
|
126
|
+
context[symbol.name] = [] unless context.has_key? symbol.name
|
127
|
+
context[symbol.name] << (symbol.is_terminal ? symbols[index] : value_stack.pop)
|
128
|
+
end
|
129
|
+
SemanticActionContext.new(context).instance_eval &handle.action
|
130
|
+
value_stack.push context[handle.nonterminal.name][0] if context[handle.nonterminal.name]
|
131
|
+
end
|
132
|
+
goto = @parsing_table[state_stack.last][handle.nonterminal]
|
133
|
+
if goto.kind_of? ShiftAction
|
134
|
+
state_stack.push goto.state
|
135
|
+
symbol_stack.push nil
|
136
|
+
else
|
137
|
+
lexer.pushback lookahead
|
138
|
+
lookahead = handle.nonterminal
|
139
|
+
end
|
140
|
+
else
|
141
|
+
error_recover
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
class SemanticActionContext
|
147
|
+
instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
|
148
|
+
def initialize hash
|
149
|
+
@hash = hash
|
150
|
+
end
|
151
|
+
|
152
|
+
def method_missing name, *args
|
153
|
+
name_string = name.to_s
|
154
|
+
index = name_string =~ /\d+/ ? name_string.slice!(/\d+/).to_i : 0
|
155
|
+
@hash[name_string][index] and return @hash[name_string][index]
|
156
|
+
SemanticAttributes.new
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
class SemanticAttributes
|
161
|
+
instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new'}
|
162
|
+
def initialize
|
163
|
+
@hash = {}
|
164
|
+
end
|
165
|
+
|
166
|
+
def method_missing name, *args
|
167
|
+
name_string = name.to_s
|
168
|
+
return @hash[name_string] unless name_string[-1] == 61
|
169
|
+
@hash[name_string.slice(0..-2)] = args.first
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
@@ -0,0 +1,234 @@
|
|
1
|
+
module Aurum
|
2
|
+
Symbol.class_eval { attr_accessor :action }
|
3
|
+
|
4
|
+
class << nil
|
5
|
+
attr_accessor :action
|
6
|
+
end
|
7
|
+
|
8
|
+
module GrammarDefinition
|
9
|
+
instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
|
10
|
+
|
11
|
+
def method_missing name, *args, &block
|
12
|
+
name_string = name.to_s
|
13
|
+
symbol =
|
14
|
+
case name_string
|
15
|
+
when /^[a-z]/
|
16
|
+
Aurum::Symbol.new name_string, false
|
17
|
+
when '_'
|
18
|
+
nil
|
19
|
+
when /_.+/
|
20
|
+
Aurum::Symbol.new name_string, true
|
21
|
+
end
|
22
|
+
symbol.action = block if block_given?
|
23
|
+
(args.empty? || name_string == '_') and return symbol
|
24
|
+
symbols = args.map do |sym|
|
25
|
+
case sym
|
26
|
+
when String
|
27
|
+
Aurum::Symbol.new "$literal_#{sym}", true
|
28
|
+
when Aurum::Symbol
|
29
|
+
sym
|
30
|
+
end
|
31
|
+
end
|
32
|
+
action = symbols.last.action
|
33
|
+
@definition = {} unless @definition
|
34
|
+
@definition[symbol] = [].to_set unless @definition.has_key? symbol
|
35
|
+
production = Aurum::Production.new symbol, symbols.compact
|
36
|
+
production.action = action if action
|
37
|
+
@definition[symbol] << production
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
class CharacterClassDefinition
|
42
|
+
instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
|
43
|
+
attr_reader :definitions
|
44
|
+
|
45
|
+
def initialize
|
46
|
+
@definitions = {}
|
47
|
+
end
|
48
|
+
|
49
|
+
def range a, b
|
50
|
+
a = a[0] if a.kind_of? String
|
51
|
+
b = b[0] if b.kind_of? String
|
52
|
+
CharacterSet::Interval.new(a, b).to_char_set
|
53
|
+
end
|
54
|
+
|
55
|
+
def string a
|
56
|
+
result = CharacterSet.new
|
57
|
+
result << a
|
58
|
+
result
|
59
|
+
end
|
60
|
+
|
61
|
+
def method_missing name, char_set, &block
|
62
|
+
@definitions[name] = char_set unless @definitions.has_key? name
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
class LexicalSpecification
|
67
|
+
instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
|
68
|
+
attr_reader :definitions, :character_classes
|
69
|
+
|
70
|
+
def initialize definition = {:initial => {}}, state = :initial
|
71
|
+
@definitions, @character_classes = definition, CharacterClassDefinition.new
|
72
|
+
@lexical_definition = @definitions[state]
|
73
|
+
end
|
74
|
+
|
75
|
+
def range a, b
|
76
|
+
Pattern.from_char_set CharacterSet::Interval.new(a, b).to_char_set
|
77
|
+
end
|
78
|
+
|
79
|
+
def string literal
|
80
|
+
Pattern.from_string literal
|
81
|
+
end
|
82
|
+
|
83
|
+
def the name
|
84
|
+
Pattern.from_char_set @character_classes.definitions[name]
|
85
|
+
end
|
86
|
+
|
87
|
+
def shift_to state, *patterns, &config
|
88
|
+
pattern = __create_pattern ChangeStateAction.new(state), *patterns
|
89
|
+
within state, &config if block_given?
|
90
|
+
pattern
|
91
|
+
end
|
92
|
+
|
93
|
+
def match *patterns, &action
|
94
|
+
__create_pattern UserDefinedAction.new(action), *patterns
|
95
|
+
end
|
96
|
+
|
97
|
+
def ignore *patterns
|
98
|
+
__create_pattern IgnoreAction, *patterns
|
99
|
+
end
|
100
|
+
|
101
|
+
def within *states, &config
|
102
|
+
for state in states
|
103
|
+
@definitions[state] = {} unless @definitions[state]
|
104
|
+
LexicalSpecification.new(@definitions, state).instance_eval &config
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def recognize_and_shift_to token, state, *patterns
|
109
|
+
__create_pattern RecognizeTokenAndChangeStateAction.new(token.to_s, state), *patterns
|
110
|
+
end
|
111
|
+
|
112
|
+
def method_missing name, *patterns, &action
|
113
|
+
__create_pattern RecognizeTokenAction.new(name.to_s), *patterns
|
114
|
+
end
|
115
|
+
|
116
|
+
def __create_pattern action, *patterns
|
117
|
+
pattern = Pattern.concat *(patterns.collect {|x| x.kind_of?(Pattern) ? x : Pattern.from_string(x.to_s)})
|
118
|
+
@lexical_definition[pattern] = action
|
119
|
+
pattern
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
class OperatorPrecedenceDefinition
|
124
|
+
instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
|
125
|
+
attr_reader :precedences, :associativities
|
126
|
+
|
127
|
+
def initialize
|
128
|
+
@precedences, @associativities = [], {:right => [], :left => []}
|
129
|
+
end
|
130
|
+
|
131
|
+
def operator *operators
|
132
|
+
operators = operators.collect {|x| __get_symbol x}
|
133
|
+
@precedences << operators.uniq
|
134
|
+
end
|
135
|
+
|
136
|
+
def left *operators
|
137
|
+
__associativity :left, *operators
|
138
|
+
end
|
139
|
+
|
140
|
+
def right *operators
|
141
|
+
__associativity :right, *operators
|
142
|
+
end
|
143
|
+
|
144
|
+
def method_missing name
|
145
|
+
Aurum::Symbol.new name.to_s, true
|
146
|
+
end
|
147
|
+
|
148
|
+
def __get_symbol operator
|
149
|
+
operator.kind_of?(Aurum::Symbol) ? operator : Aurum::Symbol.new("$literal_#{operator}", true)
|
150
|
+
end
|
151
|
+
|
152
|
+
def __associativity direction, *operators
|
153
|
+
for operator in operators
|
154
|
+
symbol = __get_symbol operator
|
155
|
+
@associativities[direction] << symbol unless @associativities[direction].include? symbol
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
class ProductionDefinition
|
161
|
+
instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
|
162
|
+
attr_reader :__definitions, :__literals
|
163
|
+
|
164
|
+
def initialize
|
165
|
+
@__definitions, @__literals = {}, {}
|
166
|
+
end
|
167
|
+
|
168
|
+
def method_missing name, *args, &block
|
169
|
+
name_string = name.to_s
|
170
|
+
symbol = case name_string
|
171
|
+
when /^[a-z]/ then Aurum::Symbol.new name_string, false
|
172
|
+
when '_' then nil
|
173
|
+
when /_.+/ then Aurum::Symbol.new name_string, true
|
174
|
+
end
|
175
|
+
symbol.action = block if block_given?
|
176
|
+
(args.empty? || name_string == '_') and return symbol
|
177
|
+
symbols = args.map do |sym|
|
178
|
+
if sym.kind_of? String
|
179
|
+
@__literals[Pattern.from_string(sym)] = RecognizeTokenAction.new "$literal_#{sym}"
|
180
|
+
sym = Aurum::Symbol.new("$literal_#{sym}", true)
|
181
|
+
end
|
182
|
+
sym
|
183
|
+
end
|
184
|
+
@__definitions[symbol] = [].to_set unless @__definitions.has_key? symbol
|
185
|
+
production = Aurum::Production.new symbol, symbols.compact
|
186
|
+
production.action = symbols.last.action if symbols.last.action
|
187
|
+
@__definitions[symbol] << production
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
class Grammar
|
192
|
+
def self.character_classes &block
|
193
|
+
@lexical_sepcification = LexicalSpecification.new unless @lexical_sepcification
|
194
|
+
@lexical_sepcification.character_classes.instance_eval &block
|
195
|
+
end
|
196
|
+
|
197
|
+
def self.tokens &block
|
198
|
+
@lexical_sepcification = LexicalSpecification.new unless @lexical_sepcification
|
199
|
+
@lexical_sepcification.instance_eval &block
|
200
|
+
end
|
201
|
+
|
202
|
+
def self.precedences &block
|
203
|
+
@precedences = OperatorPrecedenceDefinition.new unless @precedences
|
204
|
+
@precedences.instance_eval &block
|
205
|
+
end
|
206
|
+
|
207
|
+
def self.productions &block
|
208
|
+
@productions = ProductionDefinition.new unless @productions
|
209
|
+
@productions.instance_eval &block
|
210
|
+
end
|
211
|
+
|
212
|
+
def self.lexer input
|
213
|
+
@lexical_sepcification.definitions[:initial].merge!(@productions.__literals)
|
214
|
+
generator = Aurum::LexicalTableGenerator.new @lexical_sepcification.definitions
|
215
|
+
table, accepts = generator.lexical_table
|
216
|
+
Aurum::Lexer.new table, accepts, generator.lexical_states, input
|
217
|
+
end
|
218
|
+
|
219
|
+
def self.start_from start_symbol
|
220
|
+
generator = Aurum::ParsingTableGenerator.new @productions.__definitions, @precedences.precedences, @precedences.associativities
|
221
|
+
productions = generator.start_from(Aurum::Symbol.new(start_symbol.to_s, false)).productions
|
222
|
+
table, lookeahead_level = generator.parsing_table
|
223
|
+
Aurum::Parser.new productions, table
|
224
|
+
end
|
225
|
+
|
226
|
+
def self.method_missing name, input
|
227
|
+
name_string = name.to_s
|
228
|
+
if name_string =~ /^parse_/
|
229
|
+
start_nonterminal = name_string.split(/^parse_/).last
|
230
|
+
self.start_from(start_nonterminal).parse self.lexer(input)
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|