aurum 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/example/expression/expression.rb +29 -0
- data/lib/aurum.rb +10 -0
- data/lib/aurum/engine.rb +173 -0
- data/lib/aurum/grammar.rb +234 -0
- data/lib/aurum/lexical_table_generator.rb +423 -0
- data/lib/aurum/parsing_table_generator.rb +445 -0
- data/test/engine/lexer_test.rb +52 -0
- data/test/engine/semantic_attributes_test.rb +15 -0
- data/test/grammar_definition/character_class_definition_test.rb +28 -0
- data/test/grammar_definition/grammar_definition_test.rb +54 -0
- data/test/grammar_definition/lexical_definition_test.rb +56 -0
- data/test/grammar_definition/operator_precedence_definition_test.rb +35 -0
- data/test/grammar_definition/production_definition_test.rb +60 -0
- data/test/lexical_table_generator/automata_test.rb +74 -0
- data/test/lexical_table_generator/character_set_test.rb +73 -0
- data/test/lexical_table_generator/interval_test.rb +36 -0
- data/test/lexical_table_generator/pattern_test.rb +109 -0
- data/test/lexical_table_generator/subset_determinizer_test.rb +19 -0
- data/test/lexical_table_generator/table_generator_test.rb +126 -0
- data/test/parsing_table_generator/augmented_grammar_test.rb +45 -0
- data/test/parsing_table_generator/lalr_n_computation_test.rb +89 -0
- data/test/parsing_table_generator/lr_0_automata_test.rb +91 -0
- data/test/parsing_table_generator/lr_item_test.rb +33 -0
- data/test/parsing_table_generator/parsing_table_state_test.rb +39 -0
- data/test/parsing_table_generator/precedence_table_test.rb +28 -0
- data/test/parsing_table_generator/production_test.rb +9 -0
- data/test/test_helper.rb +103 -0
- metadata +78 -0
@@ -0,0 +1,29 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__) + '/../../lib')
|
2
|
+
require 'aurum'
|
3
|
+
|
4
|
+
class ExpressionGrammar < Aurum::Grammar
|
5
|
+
tokens do
|
6
|
+
ignore string(' ').one_or_more
|
7
|
+
_number range(?0, ?9).one_or_more
|
8
|
+
end
|
9
|
+
|
10
|
+
precedences do
|
11
|
+
operator '*', '/'
|
12
|
+
operator '+', '-'
|
13
|
+
end
|
14
|
+
|
15
|
+
productions do
|
16
|
+
expression expression, '+', expression {expression.value = expression1.value + expression2.value}
|
17
|
+
expression expression, '-', expression {expression.value = expression1.value - expression2.value}
|
18
|
+
expression expression, '*', expression {expression.value = expression1.value * expression2.value}
|
19
|
+
expression expression, '/', expression {expression.value = expression1.value / expression2.value}
|
20
|
+
expression '(', expression, ')'
|
21
|
+
expression _number {expression.value = _number.value.to_i}
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
puts ExpressionGrammar.parse_expression('1 + 2').value
|
26
|
+
puts ExpressionGrammar.parse_expression('1 + 2 * 3').value
|
27
|
+
puts ExpressionGrammar.parse_expression('1 * 2 + 3').value
|
28
|
+
puts ExpressionGrammar.parse_expression('1 * (2 + 3)').value
|
29
|
+
puts ExpressionGrammar.parse_expression('1 + (2 + 3) * 4').value
|
data/lib/aurum.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require 'aurum/lexical_table_generator'
|
2
|
+
require 'aurum/parsing_table_generator'
|
3
|
+
require 'aurum/engine'
|
4
|
+
require 'aurum/grammar'
|
5
|
+
|
6
|
+
Enumerable.class_eval do
|
7
|
+
def grep_each condition
|
8
|
+
condition.kind_of?(Proc) ? each {|x| yield x if condition.call x} : each {|x| yield x if eval(condition.to_s)}
|
9
|
+
end
|
10
|
+
end
|
data/lib/aurum/engine.rb
ADDED
@@ -0,0 +1,173 @@
|
|
1
|
+
module Aurum
|
2
|
+
IO.class_eval do
|
3
|
+
def get_char
|
4
|
+
char, @pushback_char = @pushback_char ? @pushback_char : self.getc, nil
|
5
|
+
char > 128 ? char * 128 + self.getc : char
|
6
|
+
end
|
7
|
+
|
8
|
+
def pushback char
|
9
|
+
@pushback_char = char
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
String.class_eval do
|
14
|
+
def get_char
|
15
|
+
@get_char_index = -1 unless @get_char_index
|
16
|
+
self[@get_char_index += 1]
|
17
|
+
end
|
18
|
+
|
19
|
+
def eof?
|
20
|
+
@get_char_index = -1 unless @get_char_index
|
21
|
+
@get_char_index >= (self.length - 1)
|
22
|
+
end
|
23
|
+
|
24
|
+
def pushback char
|
25
|
+
@get_char_index -= 1
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
Symbol.class_eval {attr_accessor :value}
|
30
|
+
|
31
|
+
class Lexer
|
32
|
+
def initialize table, accepts, lexical_states, input
|
33
|
+
@table, @accepts, @lexical_states, @input = table, accepts, lexical_states, input
|
34
|
+
shift_to :initial
|
35
|
+
end
|
36
|
+
|
37
|
+
def next_symbol
|
38
|
+
return Aurum::EOF if @input.eof?
|
39
|
+
@recognized, lexeme, @pushback_symbol = @pushback_symbol, '', nil
|
40
|
+
until @recognized
|
41
|
+
next_state, char = @start_state, nil
|
42
|
+
while next_state
|
43
|
+
lexeme << char if char
|
44
|
+
state, char = next_state, @input.get_char
|
45
|
+
next_state = goto state, char
|
46
|
+
end
|
47
|
+
@input.pushback char
|
48
|
+
if @accepts[state].first == IgnoreAction
|
49
|
+
lexeme = ''
|
50
|
+
else
|
51
|
+
@accepts[state].first.execute self
|
52
|
+
end
|
53
|
+
end
|
54
|
+
@recognized.value = lexeme unless @recognized.value
|
55
|
+
@recognized
|
56
|
+
end
|
57
|
+
|
58
|
+
def pushback symbol
|
59
|
+
@pushback_symbol = symbol
|
60
|
+
end
|
61
|
+
|
62
|
+
def goto state, input
|
63
|
+
return nil unless input
|
64
|
+
next_state = @table[state].find {|tran| tran.symbols.include?(input)}
|
65
|
+
next_state ? next_state.destination : nil
|
66
|
+
end
|
67
|
+
|
68
|
+
def shift_to state
|
69
|
+
@start_state = goto 0, -@lexical_states.index(state)-1
|
70
|
+
end
|
71
|
+
|
72
|
+
def recognize token
|
73
|
+
@recognized = Aurum::Symbol.new token, true
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
RecognizeTokenAction.class_eval do
|
78
|
+
def execute lexer
|
79
|
+
lexer.recognize token
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
ChangeStateAction.class_eval do
|
84
|
+
def execute lexer
|
85
|
+
lexer.shift_to state
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
RecognizeTokenAndChangeStateAction.class_eval do
|
90
|
+
def execute lexer
|
91
|
+
lexer.recognize token
|
92
|
+
lexer.shift_to state
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class Parser
|
97
|
+
def initialize productions, parsing_table
|
98
|
+
@productions, @parsing_table = productions, parsing_table
|
99
|
+
end
|
100
|
+
|
101
|
+
def parse lexer
|
102
|
+
lookahead, state_stack, symbol_stack, value_stack = lexer.next_symbol, [0], [], []
|
103
|
+
lookahead_shift = 0
|
104
|
+
while (true)
|
105
|
+
state = @parsing_table[state_stack.last]
|
106
|
+
action = state[lookahead]
|
107
|
+
if action.kind_of? ShiftAction
|
108
|
+
state_stack.push action.state
|
109
|
+
symbol_stack.push lookahead unless action.is_lookahead_shift
|
110
|
+
action.is_lookahead_shift ? lookahead_shift += 1 : lookahead = lexer.next_symbol
|
111
|
+
elsif action.kind_of? ReduceAction
|
112
|
+
handle = @productions[action.handle]
|
113
|
+
lookahead_shift.times { state_stack.pop }
|
114
|
+
lookahead_shift = 0
|
115
|
+
if action.is_read_reduce
|
116
|
+
state_stack.push state
|
117
|
+
symbol_stack.push lookahead
|
118
|
+
lookahead = lexer.next_symbol
|
119
|
+
end
|
120
|
+
state_stack.slice! -handle.symbols.length..-1
|
121
|
+
symbols = symbol_stack.slice! -handle.symbols.length..-1
|
122
|
+
handle.nonterminal == Aurum::START and return value_stack.pop
|
123
|
+
if handle.action
|
124
|
+
context = {handle.nonterminal.name => [SemanticAttributes.new]}
|
125
|
+
handle.symbols.each_with_index do |symbol, index|
|
126
|
+
context[symbol.name] = [] unless context.has_key? symbol.name
|
127
|
+
context[symbol.name] << (symbol.is_terminal ? symbols[index] : value_stack.pop)
|
128
|
+
end
|
129
|
+
SemanticActionContext.new(context).instance_eval &handle.action
|
130
|
+
value_stack.push context[handle.nonterminal.name][0] if context[handle.nonterminal.name]
|
131
|
+
end
|
132
|
+
goto = @parsing_table[state_stack.last][handle.nonterminal]
|
133
|
+
if goto.kind_of? ShiftAction
|
134
|
+
state_stack.push goto.state
|
135
|
+
symbol_stack.push nil
|
136
|
+
else
|
137
|
+
lexer.pushback lookahead
|
138
|
+
lookahead = handle.nonterminal
|
139
|
+
end
|
140
|
+
else
|
141
|
+
error_recover
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
class SemanticActionContext
|
147
|
+
instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
|
148
|
+
def initialize hash
|
149
|
+
@hash = hash
|
150
|
+
end
|
151
|
+
|
152
|
+
def method_missing name, *args
|
153
|
+
name_string = name.to_s
|
154
|
+
index = name_string =~ /\d+/ ? name_string.slice!(/\d+/).to_i : 0
|
155
|
+
@hash[name_string][index] and return @hash[name_string][index]
|
156
|
+
SemanticAttributes.new
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
class SemanticAttributes
|
161
|
+
instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new'}
|
162
|
+
def initialize
|
163
|
+
@hash = {}
|
164
|
+
end
|
165
|
+
|
166
|
+
def method_missing name, *args
|
167
|
+
name_string = name.to_s
|
168
|
+
return @hash[name_string] unless name_string[-1] == 61
|
169
|
+
@hash[name_string.slice(0..-2)] = args.first
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
@@ -0,0 +1,234 @@
|
|
1
|
+
module Aurum
|
2
|
+
Symbol.class_eval { attr_accessor :action }
|
3
|
+
|
4
|
+
class << nil
|
5
|
+
attr_accessor :action
|
6
|
+
end
|
7
|
+
|
8
|
+
module GrammarDefinition
|
9
|
+
instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
|
10
|
+
|
11
|
+
def method_missing name, *args, &block
|
12
|
+
name_string = name.to_s
|
13
|
+
symbol =
|
14
|
+
case name_string
|
15
|
+
when /^[a-z]/
|
16
|
+
Aurum::Symbol.new name_string, false
|
17
|
+
when '_'
|
18
|
+
nil
|
19
|
+
when /_.+/
|
20
|
+
Aurum::Symbol.new name_string, true
|
21
|
+
end
|
22
|
+
symbol.action = block if block_given?
|
23
|
+
(args.empty? || name_string == '_') and return symbol
|
24
|
+
symbols = args.map do |sym|
|
25
|
+
case sym
|
26
|
+
when String
|
27
|
+
Aurum::Symbol.new "$literal_#{sym}", true
|
28
|
+
when Aurum::Symbol
|
29
|
+
sym
|
30
|
+
end
|
31
|
+
end
|
32
|
+
action = symbols.last.action
|
33
|
+
@definition = {} unless @definition
|
34
|
+
@definition[symbol] = [].to_set unless @definition.has_key? symbol
|
35
|
+
production = Aurum::Production.new symbol, symbols.compact
|
36
|
+
production.action = action if action
|
37
|
+
@definition[symbol] << production
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
class CharacterClassDefinition
|
42
|
+
instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
|
43
|
+
attr_reader :definitions
|
44
|
+
|
45
|
+
def initialize
|
46
|
+
@definitions = {}
|
47
|
+
end
|
48
|
+
|
49
|
+
def range a, b
|
50
|
+
a = a[0] if a.kind_of? String
|
51
|
+
b = b[0] if b.kind_of? String
|
52
|
+
CharacterSet::Interval.new(a, b).to_char_set
|
53
|
+
end
|
54
|
+
|
55
|
+
def string a
|
56
|
+
result = CharacterSet.new
|
57
|
+
result << a
|
58
|
+
result
|
59
|
+
end
|
60
|
+
|
61
|
+
def method_missing name, char_set, &block
|
62
|
+
@definitions[name] = char_set unless @definitions.has_key? name
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
class LexicalSpecification
|
67
|
+
instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
|
68
|
+
attr_reader :definitions, :character_classes
|
69
|
+
|
70
|
+
def initialize definition = {:initial => {}}, state = :initial
|
71
|
+
@definitions, @character_classes = definition, CharacterClassDefinition.new
|
72
|
+
@lexical_definition = @definitions[state]
|
73
|
+
end
|
74
|
+
|
75
|
+
def range a, b
|
76
|
+
Pattern.from_char_set CharacterSet::Interval.new(a, b).to_char_set
|
77
|
+
end
|
78
|
+
|
79
|
+
def string literal
|
80
|
+
Pattern.from_string literal
|
81
|
+
end
|
82
|
+
|
83
|
+
def the name
|
84
|
+
Pattern.from_char_set @character_classes.definitions[name]
|
85
|
+
end
|
86
|
+
|
87
|
+
def shift_to state, *patterns, &config
|
88
|
+
pattern = __create_pattern ChangeStateAction.new(state), *patterns
|
89
|
+
within state, &config if block_given?
|
90
|
+
pattern
|
91
|
+
end
|
92
|
+
|
93
|
+
def match *patterns, &action
|
94
|
+
__create_pattern UserDefinedAction.new(action), *patterns
|
95
|
+
end
|
96
|
+
|
97
|
+
def ignore *patterns
|
98
|
+
__create_pattern IgnoreAction, *patterns
|
99
|
+
end
|
100
|
+
|
101
|
+
def within *states, &config
|
102
|
+
for state in states
|
103
|
+
@definitions[state] = {} unless @definitions[state]
|
104
|
+
LexicalSpecification.new(@definitions, state).instance_eval &config
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def recognize_and_shift_to token, state, *patterns
|
109
|
+
__create_pattern RecognizeTokenAndChangeStateAction.new(token.to_s, state), *patterns
|
110
|
+
end
|
111
|
+
|
112
|
+
def method_missing name, *patterns, &action
|
113
|
+
__create_pattern RecognizeTokenAction.new(name.to_s), *patterns
|
114
|
+
end
|
115
|
+
|
116
|
+
def __create_pattern action, *patterns
|
117
|
+
pattern = Pattern.concat *(patterns.collect {|x| x.kind_of?(Pattern) ? x : Pattern.from_string(x.to_s)})
|
118
|
+
@lexical_definition[pattern] = action
|
119
|
+
pattern
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
class OperatorPrecedenceDefinition
|
124
|
+
instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
|
125
|
+
attr_reader :precedences, :associativities
|
126
|
+
|
127
|
+
def initialize
|
128
|
+
@precedences, @associativities = [], {:right => [], :left => []}
|
129
|
+
end
|
130
|
+
|
131
|
+
def operator *operators
|
132
|
+
operators = operators.collect {|x| __get_symbol x}
|
133
|
+
@precedences << operators.uniq
|
134
|
+
end
|
135
|
+
|
136
|
+
def left *operators
|
137
|
+
__associativity :left, *operators
|
138
|
+
end
|
139
|
+
|
140
|
+
def right *operators
|
141
|
+
__associativity :right, *operators
|
142
|
+
end
|
143
|
+
|
144
|
+
def method_missing name
|
145
|
+
Aurum::Symbol.new name.to_s, true
|
146
|
+
end
|
147
|
+
|
148
|
+
def __get_symbol operator
|
149
|
+
operator.kind_of?(Aurum::Symbol) ? operator : Aurum::Symbol.new("$literal_#{operator}", true)
|
150
|
+
end
|
151
|
+
|
152
|
+
def __associativity direction, *operators
|
153
|
+
for operator in operators
|
154
|
+
symbol = __get_symbol operator
|
155
|
+
@associativities[direction] << symbol unless @associativities[direction].include? symbol
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
class ProductionDefinition
|
161
|
+
instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
|
162
|
+
attr_reader :__definitions, :__literals
|
163
|
+
|
164
|
+
def initialize
|
165
|
+
@__definitions, @__literals = {}, {}
|
166
|
+
end
|
167
|
+
|
168
|
+
def method_missing name, *args, &block
|
169
|
+
name_string = name.to_s
|
170
|
+
symbol = case name_string
|
171
|
+
when /^[a-z]/ then Aurum::Symbol.new name_string, false
|
172
|
+
when '_' then nil
|
173
|
+
when /_.+/ then Aurum::Symbol.new name_string, true
|
174
|
+
end
|
175
|
+
symbol.action = block if block_given?
|
176
|
+
(args.empty? || name_string == '_') and return symbol
|
177
|
+
symbols = args.map do |sym|
|
178
|
+
if sym.kind_of? String
|
179
|
+
@__literals[Pattern.from_string(sym)] = RecognizeTokenAction.new "$literal_#{sym}"
|
180
|
+
sym = Aurum::Symbol.new("$literal_#{sym}", true)
|
181
|
+
end
|
182
|
+
sym
|
183
|
+
end
|
184
|
+
@__definitions[symbol] = [].to_set unless @__definitions.has_key? symbol
|
185
|
+
production = Aurum::Production.new symbol, symbols.compact
|
186
|
+
production.action = symbols.last.action if symbols.last.action
|
187
|
+
@__definitions[symbol] << production
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
class Grammar
|
192
|
+
def self.character_classes &block
|
193
|
+
@lexical_sepcification = LexicalSpecification.new unless @lexical_sepcification
|
194
|
+
@lexical_sepcification.character_classes.instance_eval &block
|
195
|
+
end
|
196
|
+
|
197
|
+
def self.tokens &block
|
198
|
+
@lexical_sepcification = LexicalSpecification.new unless @lexical_sepcification
|
199
|
+
@lexical_sepcification.instance_eval &block
|
200
|
+
end
|
201
|
+
|
202
|
+
def self.precedences &block
|
203
|
+
@precedences = OperatorPrecedenceDefinition.new unless @precedences
|
204
|
+
@precedences.instance_eval &block
|
205
|
+
end
|
206
|
+
|
207
|
+
def self.productions &block
|
208
|
+
@productions = ProductionDefinition.new unless @productions
|
209
|
+
@productions.instance_eval &block
|
210
|
+
end
|
211
|
+
|
212
|
+
def self.lexer input
|
213
|
+
@lexical_sepcification.definitions[:initial].merge!(@productions.__literals)
|
214
|
+
generator = Aurum::LexicalTableGenerator.new @lexical_sepcification.definitions
|
215
|
+
table, accepts = generator.lexical_table
|
216
|
+
Aurum::Lexer.new table, accepts, generator.lexical_states, input
|
217
|
+
end
|
218
|
+
|
219
|
+
def self.start_from start_symbol
|
220
|
+
generator = Aurum::ParsingTableGenerator.new @productions.__definitions, @precedences.precedences, @precedences.associativities
|
221
|
+
productions = generator.start_from(Aurum::Symbol.new(start_symbol.to_s, false)).productions
|
222
|
+
table, lookeahead_level = generator.parsing_table
|
223
|
+
Aurum::Parser.new productions, table
|
224
|
+
end
|
225
|
+
|
226
|
+
def self.method_missing name, input
|
227
|
+
name_string = name.to_s
|
228
|
+
if name_string =~ /^parse_/
|
229
|
+
start_nonterminal = name_string.split(/^parse_/).last
|
230
|
+
self.start_from(start_nonterminal).parse self.lexer(input)
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|