aurum 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. data/example/expression/expression.rb +29 -0
  2. data/lib/aurum.rb +10 -0
  3. data/lib/aurum/engine.rb +173 -0
  4. data/lib/aurum/grammar.rb +234 -0
  5. data/lib/aurum/lexical_table_generator.rb +423 -0
  6. data/lib/aurum/parsing_table_generator.rb +445 -0
  7. data/test/engine/lexer_test.rb +52 -0
  8. data/test/engine/semantic_attributes_test.rb +15 -0
  9. data/test/grammar_definition/character_class_definition_test.rb +28 -0
  10. data/test/grammar_definition/grammar_definition_test.rb +54 -0
  11. data/test/grammar_definition/lexical_definition_test.rb +56 -0
  12. data/test/grammar_definition/operator_precedence_definition_test.rb +35 -0
  13. data/test/grammar_definition/production_definition_test.rb +60 -0
  14. data/test/lexical_table_generator/automata_test.rb +74 -0
  15. data/test/lexical_table_generator/character_set_test.rb +73 -0
  16. data/test/lexical_table_generator/interval_test.rb +36 -0
  17. data/test/lexical_table_generator/pattern_test.rb +109 -0
  18. data/test/lexical_table_generator/subset_determinizer_test.rb +19 -0
  19. data/test/lexical_table_generator/table_generator_test.rb +126 -0
  20. data/test/parsing_table_generator/augmented_grammar_test.rb +45 -0
  21. data/test/parsing_table_generator/lalr_n_computation_test.rb +89 -0
  22. data/test/parsing_table_generator/lr_0_automata_test.rb +91 -0
  23. data/test/parsing_table_generator/lr_item_test.rb +33 -0
  24. data/test/parsing_table_generator/parsing_table_state_test.rb +39 -0
  25. data/test/parsing_table_generator/precedence_table_test.rb +28 -0
  26. data/test/parsing_table_generator/production_test.rb +9 -0
  27. data/test/test_helper.rb +103 -0
  28. metadata +78 -0
@@ -0,0 +1,29 @@
1
+ $:.unshift(File.dirname(__FILE__) + '/../../lib')
2
+ require 'aurum'
3
+
4
+ class ExpressionGrammar < Aurum::Grammar
5
+ tokens do
6
+ ignore string(' ').one_or_more
7
+ _number range(?0, ?9).one_or_more
8
+ end
9
+
10
+ precedences do
11
+ operator '*', '/'
12
+ operator '+', '-'
13
+ end
14
+
15
+ productions do
16
+ expression expression, '+', expression {expression.value = expression1.value + expression2.value}
17
+ expression expression, '-', expression {expression.value = expression1.value - expression2.value}
18
+ expression expression, '*', expression {expression.value = expression1.value * expression2.value}
19
+ expression expression, '/', expression {expression.value = expression1.value / expression2.value}
20
+ expression '(', expression, ')'
21
+ expression _number {expression.value = _number.value.to_i}
22
+ end
23
+ end
24
+
25
+ puts ExpressionGrammar.parse_expression('1 + 2').value
26
+ puts ExpressionGrammar.parse_expression('1 + 2 * 3').value
27
+ puts ExpressionGrammar.parse_expression('1 * 2 + 3').value
28
+ puts ExpressionGrammar.parse_expression('1 * (2 + 3)').value
29
+ puts ExpressionGrammar.parse_expression('1 + (2 + 3) * 4').value
data/lib/aurum.rb ADDED
@@ -0,0 +1,10 @@
1
+ require 'aurum/lexical_table_generator'
2
+ require 'aurum/parsing_table_generator'
3
+ require 'aurum/engine'
4
+ require 'aurum/grammar'
5
+
6
+ Enumerable.class_eval do
7
+ def grep_each condition
8
+ condition.kind_of?(Proc) ? each {|x| yield x if condition.call x} : each {|x| yield x if eval(condition.to_s)}
9
+ end
10
+ end
@@ -0,0 +1,173 @@
1
+ module Aurum
2
+ IO.class_eval do
3
+ def get_char
4
+ char, @pushback_char = @pushback_char ? @pushback_char : self.getc, nil
5
+ char > 128 ? char * 128 + self.getc : char
6
+ end
7
+
8
+ def pushback char
9
+ @pushback_char = char
10
+ end
11
+ end
12
+
13
+ String.class_eval do
14
+ def get_char
15
+ @get_char_index = -1 unless @get_char_index
16
+ self[@get_char_index += 1]
17
+ end
18
+
19
+ def eof?
20
+ @get_char_index = -1 unless @get_char_index
21
+ @get_char_index >= (self.length - 1)
22
+ end
23
+
24
+ def pushback char
25
+ @get_char_index -= 1
26
+ end
27
+ end
28
+
29
+ Symbol.class_eval {attr_accessor :value}
30
+
31
+ class Lexer
32
+ def initialize table, accepts, lexical_states, input
33
+ @table, @accepts, @lexical_states, @input = table, accepts, lexical_states, input
34
+ shift_to :initial
35
+ end
36
+
37
+ def next_symbol
38
+ return Aurum::EOF if @input.eof?
39
+ @recognized, lexeme, @pushback_symbol = @pushback_symbol, '', nil
40
+ until @recognized
41
+ next_state, char = @start_state, nil
42
+ while next_state
43
+ lexeme << char if char
44
+ state, char = next_state, @input.get_char
45
+ next_state = goto state, char
46
+ end
47
+ @input.pushback char
48
+ if @accepts[state].first == IgnoreAction
49
+ lexeme = ''
50
+ else
51
+ @accepts[state].first.execute self
52
+ end
53
+ end
54
+ @recognized.value = lexeme unless @recognized.value
55
+ @recognized
56
+ end
57
+
58
+ def pushback symbol
59
+ @pushback_symbol = symbol
60
+ end
61
+
62
+ def goto state, input
63
+ return nil unless input
64
+ next_state = @table[state].find {|tran| tran.symbols.include?(input)}
65
+ next_state ? next_state.destination : nil
66
+ end
67
+
68
+ def shift_to state
69
+ @start_state = goto 0, -@lexical_states.index(state)-1
70
+ end
71
+
72
+ def recognize token
73
+ @recognized = Aurum::Symbol.new token, true
74
+ end
75
+ end
76
+
77
+ RecognizeTokenAction.class_eval do
78
+ def execute lexer
79
+ lexer.recognize token
80
+ end
81
+ end
82
+
83
+ ChangeStateAction.class_eval do
84
+ def execute lexer
85
+ lexer.shift_to state
86
+ end
87
+ end
88
+
89
+ RecognizeTokenAndChangeStateAction.class_eval do
90
+ def execute lexer
91
+ lexer.recognize token
92
+ lexer.shift_to state
93
+ end
94
+ end
95
+
96
+ class Parser
97
+ def initialize productions, parsing_table
98
+ @productions, @parsing_table = productions, parsing_table
99
+ end
100
+
101
+ def parse lexer
102
+ lookahead, state_stack, symbol_stack, value_stack = lexer.next_symbol, [0], [], []
103
+ lookahead_shift = 0
104
+ while (true)
105
+ state = @parsing_table[state_stack.last]
106
+ action = state[lookahead]
107
+ if action.kind_of? ShiftAction
108
+ state_stack.push action.state
109
+ symbol_stack.push lookahead unless action.is_lookahead_shift
110
+ action.is_lookahead_shift ? lookahead_shift += 1 : lookahead = lexer.next_symbol
111
+ elsif action.kind_of? ReduceAction
112
+ handle = @productions[action.handle]
113
+ lookahead_shift.times { state_stack.pop }
114
+ lookahead_shift = 0
115
+ if action.is_read_reduce
116
+ state_stack.push state
117
+ symbol_stack.push lookahead
118
+ lookahead = lexer.next_symbol
119
+ end
120
+ state_stack.slice! -handle.symbols.length..-1
121
+ symbols = symbol_stack.slice! -handle.symbols.length..-1
122
+ handle.nonterminal == Aurum::START and return value_stack.pop
123
+ if handle.action
124
+ context = {handle.nonterminal.name => [SemanticAttributes.new]}
125
+ handle.symbols.each_with_index do |symbol, index|
126
+ context[symbol.name] = [] unless context.has_key? symbol.name
127
+ context[symbol.name] << (symbol.is_terminal ? symbols[index] : value_stack.pop)
128
+ end
129
+ SemanticActionContext.new(context).instance_eval &handle.action
130
+ value_stack.push context[handle.nonterminal.name][0] if context[handle.nonterminal.name]
131
+ end
132
+ goto = @parsing_table[state_stack.last][handle.nonterminal]
133
+ if goto.kind_of? ShiftAction
134
+ state_stack.push goto.state
135
+ symbol_stack.push nil
136
+ else
137
+ lexer.pushback lookahead
138
+ lookahead = handle.nonterminal
139
+ end
140
+ else
141
+ error_recover
142
+ end
143
+ end
144
+ end
145
+
146
+ class SemanticActionContext
147
+ instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
148
+ def initialize hash
149
+ @hash = hash
150
+ end
151
+
152
+ def method_missing name, *args
153
+ name_string = name.to_s
154
+ index = name_string =~ /\d+/ ? name_string.slice!(/\d+/).to_i : 0
155
+ @hash[name_string][index] and return @hash[name_string][index]
156
+ SemanticAttributes.new
157
+ end
158
+ end
159
+
160
+ class SemanticAttributes
161
+ instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new'}
162
+ def initialize
163
+ @hash = {}
164
+ end
165
+
166
+ def method_missing name, *args
167
+ name_string = name.to_s
168
+ return @hash[name_string] unless name_string[-1] == 61
169
+ @hash[name_string.slice(0..-2)] = args.first
170
+ end
171
+ end
172
+ end
173
+ end
@@ -0,0 +1,234 @@
1
+ module Aurum
2
+ Symbol.class_eval { attr_accessor :action }
3
+
4
+ class << nil
5
+ attr_accessor :action
6
+ end
7
+
8
+ module GrammarDefinition
9
+ instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
10
+
11
+ def method_missing name, *args, &block
12
+ name_string = name.to_s
13
+ symbol =
14
+ case name_string
15
+ when /^[a-z]/
16
+ Aurum::Symbol.new name_string, false
17
+ when '_'
18
+ nil
19
+ when /_.+/
20
+ Aurum::Symbol.new name_string, true
21
+ end
22
+ symbol.action = block if block_given?
23
+ (args.empty? || name_string == '_') and return symbol
24
+ symbols = args.map do |sym|
25
+ case sym
26
+ when String
27
+ Aurum::Symbol.new "$literal_#{sym}", true
28
+ when Aurum::Symbol
29
+ sym
30
+ end
31
+ end
32
+ action = symbols.last.action
33
+ @definition = {} unless @definition
34
+ @definition[symbol] = [].to_set unless @definition.has_key? symbol
35
+ production = Aurum::Production.new symbol, symbols.compact
36
+ production.action = action if action
37
+ @definition[symbol] << production
38
+ end
39
+ end
40
+
41
+ class CharacterClassDefinition
42
+ instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
43
+ attr_reader :definitions
44
+
45
+ def initialize
46
+ @definitions = {}
47
+ end
48
+
49
+ def range a, b
50
+ a = a[0] if a.kind_of? String
51
+ b = b[0] if b.kind_of? String
52
+ CharacterSet::Interval.new(a, b).to_char_set
53
+ end
54
+
55
+ def string a
56
+ result = CharacterSet.new
57
+ result << a
58
+ result
59
+ end
60
+
61
+ def method_missing name, char_set, &block
62
+ @definitions[name] = char_set unless @definitions.has_key? name
63
+ end
64
+ end
65
+
66
+ class LexicalSpecification
67
+ instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
68
+ attr_reader :definitions, :character_classes
69
+
70
+ def initialize definition = {:initial => {}}, state = :initial
71
+ @definitions, @character_classes = definition, CharacterClassDefinition.new
72
+ @lexical_definition = @definitions[state]
73
+ end
74
+
75
+ def range a, b
76
+ Pattern.from_char_set CharacterSet::Interval.new(a, b).to_char_set
77
+ end
78
+
79
+ def string literal
80
+ Pattern.from_string literal
81
+ end
82
+
83
+ def the name
84
+ Pattern.from_char_set @character_classes.definitions[name]
85
+ end
86
+
87
+ def shift_to state, *patterns, &config
88
+ pattern = __create_pattern ChangeStateAction.new(state), *patterns
89
+ within state, &config if block_given?
90
+ pattern
91
+ end
92
+
93
+ def match *patterns, &action
94
+ __create_pattern UserDefinedAction.new(action), *patterns
95
+ end
96
+
97
+ def ignore *patterns
98
+ __create_pattern IgnoreAction, *patterns
99
+ end
100
+
101
+ def within *states, &config
102
+ for state in states
103
+ @definitions[state] = {} unless @definitions[state]
104
+ LexicalSpecification.new(@definitions, state).instance_eval &config
105
+ end
106
+ end
107
+
108
+ def recognize_and_shift_to token, state, *patterns
109
+ __create_pattern RecognizeTokenAndChangeStateAction.new(token.to_s, state), *patterns
110
+ end
111
+
112
+ def method_missing name, *patterns, &action
113
+ __create_pattern RecognizeTokenAction.new(name.to_s), *patterns
114
+ end
115
+
116
+ def __create_pattern action, *patterns
117
+ pattern = Pattern.concat *(patterns.collect {|x| x.kind_of?(Pattern) ? x : Pattern.from_string(x.to_s)})
118
+ @lexical_definition[pattern] = action
119
+ pattern
120
+ end
121
+ end
122
+
123
+ class OperatorPrecedenceDefinition
124
+ instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
125
+ attr_reader :precedences, :associativities
126
+
127
+ def initialize
128
+ @precedences, @associativities = [], {:right => [], :left => []}
129
+ end
130
+
131
+ def operator *operators
132
+ operators = operators.collect {|x| __get_symbol x}
133
+ @precedences << operators.uniq
134
+ end
135
+
136
+ def left *operators
137
+ __associativity :left, *operators
138
+ end
139
+
140
+ def right *operators
141
+ __associativity :right, *operators
142
+ end
143
+
144
+ def method_missing name
145
+ Aurum::Symbol.new name.to_s, true
146
+ end
147
+
148
+ def __get_symbol operator
149
+ operator.kind_of?(Aurum::Symbol) ? operator : Aurum::Symbol.new("$literal_#{operator}", true)
150
+ end
151
+
152
+ def __associativity direction, *operators
153
+ for operator in operators
154
+ symbol = __get_symbol operator
155
+ @associativities[direction] << symbol unless @associativities[direction].include? symbol
156
+ end
157
+ end
158
+ end
159
+
160
+ class ProductionDefinition
161
+ instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
162
+ attr_reader :__definitions, :__literals
163
+
164
+ def initialize
165
+ @__definitions, @__literals = {}, {}
166
+ end
167
+
168
+ def method_missing name, *args, &block
169
+ name_string = name.to_s
170
+ symbol = case name_string
171
+ when /^[a-z]/ then Aurum::Symbol.new name_string, false
172
+ when '_' then nil
173
+ when /_.+/ then Aurum::Symbol.new name_string, true
174
+ end
175
+ symbol.action = block if block_given?
176
+ (args.empty? || name_string == '_') and return symbol
177
+ symbols = args.map do |sym|
178
+ if sym.kind_of? String
179
+ @__literals[Pattern.from_string(sym)] = RecognizeTokenAction.new "$literal_#{sym}"
180
+ sym = Aurum::Symbol.new("$literal_#{sym}", true)
181
+ end
182
+ sym
183
+ end
184
+ @__definitions[symbol] = [].to_set unless @__definitions.has_key? symbol
185
+ production = Aurum::Production.new symbol, symbols.compact
186
+ production.action = symbols.last.action if symbols.last.action
187
+ @__definitions[symbol] << production
188
+ end
189
+ end
190
+
191
+ class Grammar
192
+ def self.character_classes &block
193
+ @lexical_sepcification = LexicalSpecification.new unless @lexical_sepcification
194
+ @lexical_sepcification.character_classes.instance_eval &block
195
+ end
196
+
197
+ def self.tokens &block
198
+ @lexical_sepcification = LexicalSpecification.new unless @lexical_sepcification
199
+ @lexical_sepcification.instance_eval &block
200
+ end
201
+
202
+ def self.precedences &block
203
+ @precedences = OperatorPrecedenceDefinition.new unless @precedences
204
+ @precedences.instance_eval &block
205
+ end
206
+
207
+ def self.productions &block
208
+ @productions = ProductionDefinition.new unless @productions
209
+ @productions.instance_eval &block
210
+ end
211
+
212
+ def self.lexer input
213
+ @lexical_sepcification.definitions[:initial].merge!(@productions.__literals)
214
+ generator = Aurum::LexicalTableGenerator.new @lexical_sepcification.definitions
215
+ table, accepts = generator.lexical_table
216
+ Aurum::Lexer.new table, accepts, generator.lexical_states, input
217
+ end
218
+
219
+ def self.start_from start_symbol
220
+ generator = Aurum::ParsingTableGenerator.new @productions.__definitions, @precedences.precedences, @precedences.associativities
221
+ productions = generator.start_from(Aurum::Symbol.new(start_symbol.to_s, false)).productions
222
+ table, lookeahead_level = generator.parsing_table
223
+ Aurum::Parser.new productions, table
224
+ end
225
+
226
+ def self.method_missing name, input
227
+ name_string = name.to_s
228
+ if name_string =~ /^parse_/
229
+ start_nonterminal = name_string.split(/^parse_/).last
230
+ self.start_from(start_nonterminal).parse self.lexer(input)
231
+ end
232
+ end
233
+ end
234
+ end