aurum 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (28) hide show
  1. data/example/expression/expression.rb +29 -0
  2. data/lib/aurum.rb +10 -0
  3. data/lib/aurum/engine.rb +173 -0
  4. data/lib/aurum/grammar.rb +234 -0
  5. data/lib/aurum/lexical_table_generator.rb +423 -0
  6. data/lib/aurum/parsing_table_generator.rb +445 -0
  7. data/test/engine/lexer_test.rb +52 -0
  8. data/test/engine/semantic_attributes_test.rb +15 -0
  9. data/test/grammar_definition/character_class_definition_test.rb +28 -0
  10. data/test/grammar_definition/grammar_definition_test.rb +54 -0
  11. data/test/grammar_definition/lexical_definition_test.rb +56 -0
  12. data/test/grammar_definition/operator_precedence_definition_test.rb +35 -0
  13. data/test/grammar_definition/production_definition_test.rb +60 -0
  14. data/test/lexical_table_generator/automata_test.rb +74 -0
  15. data/test/lexical_table_generator/character_set_test.rb +73 -0
  16. data/test/lexical_table_generator/interval_test.rb +36 -0
  17. data/test/lexical_table_generator/pattern_test.rb +109 -0
  18. data/test/lexical_table_generator/subset_determinizer_test.rb +19 -0
  19. data/test/lexical_table_generator/table_generator_test.rb +126 -0
  20. data/test/parsing_table_generator/augmented_grammar_test.rb +45 -0
  21. data/test/parsing_table_generator/lalr_n_computation_test.rb +89 -0
  22. data/test/parsing_table_generator/lr_0_automata_test.rb +91 -0
  23. data/test/parsing_table_generator/lr_item_test.rb +33 -0
  24. data/test/parsing_table_generator/parsing_table_state_test.rb +39 -0
  25. data/test/parsing_table_generator/precedence_table_test.rb +28 -0
  26. data/test/parsing_table_generator/production_test.rb +9 -0
  27. data/test/test_helper.rb +103 -0
  28. metadata +78 -0
@@ -0,0 +1,29 @@
1
+ $:.unshift(File.dirname(__FILE__) + '/../../lib')
2
+ require 'aurum'
3
+
4
+ class ExpressionGrammar < Aurum::Grammar
5
+ tokens do
6
+ ignore string(' ').one_or_more
7
+ _number range(?0, ?9).one_or_more
8
+ end
9
+
10
+ precedences do
11
+ operator '*', '/'
12
+ operator '+', '-'
13
+ end
14
+
15
+ productions do
16
+ expression expression, '+', expression {expression.value = expression1.value + expression2.value}
17
+ expression expression, '-', expression {expression.value = expression1.value - expression2.value}
18
+ expression expression, '*', expression {expression.value = expression1.value * expression2.value}
19
+ expression expression, '/', expression {expression.value = expression1.value / expression2.value}
20
+ expression '(', expression, ')'
21
+ expression _number {expression.value = _number.value.to_i}
22
+ end
23
+ end
24
+
25
+ puts ExpressionGrammar.parse_expression('1 + 2').value
26
+ puts ExpressionGrammar.parse_expression('1 + 2 * 3').value
27
+ puts ExpressionGrammar.parse_expression('1 * 2 + 3').value
28
+ puts ExpressionGrammar.parse_expression('1 * (2 + 3)').value
29
+ puts ExpressionGrammar.parse_expression('1 + (2 + 3) * 4').value
data/lib/aurum.rb ADDED
@@ -0,0 +1,10 @@
1
+ require 'aurum/lexical_table_generator'
2
+ require 'aurum/parsing_table_generator'
3
+ require 'aurum/engine'
4
+ require 'aurum/grammar'
5
+
6
+ Enumerable.class_eval do
7
+ def grep_each condition
8
+ condition.kind_of?(Proc) ? each {|x| yield x if condition.call x} : each {|x| yield x if eval(condition.to_s)}
9
+ end
10
+ end
@@ -0,0 +1,173 @@
1
+ module Aurum
2
+ IO.class_eval do
3
+ def get_char
4
+ char, @pushback_char = @pushback_char ? @pushback_char : self.getc, nil
5
+ char > 128 ? char * 128 + self.getc : char
6
+ end
7
+
8
+ def pushback char
9
+ @pushback_char = char
10
+ end
11
+ end
12
+
13
+ String.class_eval do
14
+ def get_char
15
+ @get_char_index = -1 unless @get_char_index
16
+ self[@get_char_index += 1]
17
+ end
18
+
19
+ def eof?
20
+ @get_char_index = -1 unless @get_char_index
21
+ @get_char_index >= (self.length - 1)
22
+ end
23
+
24
+ def pushback char
25
+ @get_char_index -= 1
26
+ end
27
+ end
28
+
29
+ Symbol.class_eval {attr_accessor :value}
30
+
31
+ class Lexer
32
+ def initialize table, accepts, lexical_states, input
33
+ @table, @accepts, @lexical_states, @input = table, accepts, lexical_states, input
34
+ shift_to :initial
35
+ end
36
+
37
+ def next_symbol
38
+ return Aurum::EOF if @input.eof?
39
+ @recognized, lexeme, @pushback_symbol = @pushback_symbol, '', nil
40
+ until @recognized
41
+ next_state, char = @start_state, nil
42
+ while next_state
43
+ lexeme << char if char
44
+ state, char = next_state, @input.get_char
45
+ next_state = goto state, char
46
+ end
47
+ @input.pushback char
48
+ if @accepts[state].first == IgnoreAction
49
+ lexeme = ''
50
+ else
51
+ @accepts[state].first.execute self
52
+ end
53
+ end
54
+ @recognized.value = lexeme unless @recognized.value
55
+ @recognized
56
+ end
57
+
58
+ def pushback symbol
59
+ @pushback_symbol = symbol
60
+ end
61
+
62
+ def goto state, input
63
+ return nil unless input
64
+ next_state = @table[state].find {|tran| tran.symbols.include?(input)}
65
+ next_state ? next_state.destination : nil
66
+ end
67
+
68
+ def shift_to state
69
+ @start_state = goto 0, -@lexical_states.index(state)-1
70
+ end
71
+
72
+ def recognize token
73
+ @recognized = Aurum::Symbol.new token, true
74
+ end
75
+ end
76
+
77
+ RecognizeTokenAction.class_eval do
78
+ def execute lexer
79
+ lexer.recognize token
80
+ end
81
+ end
82
+
83
+ ChangeStateAction.class_eval do
84
+ def execute lexer
85
+ lexer.shift_to state
86
+ end
87
+ end
88
+
89
+ RecognizeTokenAndChangeStateAction.class_eval do
90
+ def execute lexer
91
+ lexer.recognize token
92
+ lexer.shift_to state
93
+ end
94
+ end
95
+
96
+ class Parser
97
+ def initialize productions, parsing_table
98
+ @productions, @parsing_table = productions, parsing_table
99
+ end
100
+
101
+ def parse lexer
102
+ lookahead, state_stack, symbol_stack, value_stack = lexer.next_symbol, [0], [], []
103
+ lookahead_shift = 0
104
+ while (true)
105
+ state = @parsing_table[state_stack.last]
106
+ action = state[lookahead]
107
+ if action.kind_of? ShiftAction
108
+ state_stack.push action.state
109
+ symbol_stack.push lookahead unless action.is_lookahead_shift
110
+ action.is_lookahead_shift ? lookahead_shift += 1 : lookahead = lexer.next_symbol
111
+ elsif action.kind_of? ReduceAction
112
+ handle = @productions[action.handle]
113
+ lookahead_shift.times { state_stack.pop }
114
+ lookahead_shift = 0
115
+ if action.is_read_reduce
116
+ state_stack.push state
117
+ symbol_stack.push lookahead
118
+ lookahead = lexer.next_symbol
119
+ end
120
+ state_stack.slice! -handle.symbols.length..-1
121
+ symbols = symbol_stack.slice! -handle.symbols.length..-1
122
+ handle.nonterminal == Aurum::START and return value_stack.pop
123
+ if handle.action
124
+ context = {handle.nonterminal.name => [SemanticAttributes.new]}
125
+ handle.symbols.each_with_index do |symbol, index|
126
+ context[symbol.name] = [] unless context.has_key? symbol.name
127
+ context[symbol.name] << (symbol.is_terminal ? symbols[index] : value_stack.pop)
128
+ end
129
+ SemanticActionContext.new(context).instance_eval &handle.action
130
+ value_stack.push context[handle.nonterminal.name][0] if context[handle.nonterminal.name]
131
+ end
132
+ goto = @parsing_table[state_stack.last][handle.nonterminal]
133
+ if goto.kind_of? ShiftAction
134
+ state_stack.push goto.state
135
+ symbol_stack.push nil
136
+ else
137
+ lexer.pushback lookahead
138
+ lookahead = handle.nonterminal
139
+ end
140
+ else
141
+ error_recover
142
+ end
143
+ end
144
+ end
145
+
146
+ class SemanticActionContext
147
+ instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
148
+ def initialize hash
149
+ @hash = hash
150
+ end
151
+
152
+ def method_missing name, *args
153
+ name_string = name.to_s
154
+ index = name_string =~ /\d+/ ? name_string.slice!(/\d+/).to_i : 0
155
+ @hash[name_string][index] and return @hash[name_string][index]
156
+ SemanticAttributes.new
157
+ end
158
+ end
159
+
160
+ class SemanticAttributes
161
+ instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new'}
162
+ def initialize
163
+ @hash = {}
164
+ end
165
+
166
+ def method_missing name, *args
167
+ name_string = name.to_s
168
+ return @hash[name_string] unless name_string[-1] == 61
169
+ @hash[name_string.slice(0..-2)] = args.first
170
+ end
171
+ end
172
+ end
173
+ end
@@ -0,0 +1,234 @@
1
+ module Aurum
2
+ Symbol.class_eval { attr_accessor :action }
3
+
4
+ class << nil
5
+ attr_accessor :action
6
+ end
7
+
8
+ module GrammarDefinition
9
+ instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
10
+
11
+ def method_missing name, *args, &block
12
+ name_string = name.to_s
13
+ symbol =
14
+ case name_string
15
+ when /^[a-z]/
16
+ Aurum::Symbol.new name_string, false
17
+ when '_'
18
+ nil
19
+ when /_.+/
20
+ Aurum::Symbol.new name_string, true
21
+ end
22
+ symbol.action = block if block_given?
23
+ (args.empty? || name_string == '_') and return symbol
24
+ symbols = args.map do |sym|
25
+ case sym
26
+ when String
27
+ Aurum::Symbol.new "$literal_#{sym}", true
28
+ when Aurum::Symbol
29
+ sym
30
+ end
31
+ end
32
+ action = symbols.last.action
33
+ @definition = {} unless @definition
34
+ @definition[symbol] = [].to_set unless @definition.has_key? symbol
35
+ production = Aurum::Production.new symbol, symbols.compact
36
+ production.action = action if action
37
+ @definition[symbol] << production
38
+ end
39
+ end
40
+
41
+ class CharacterClassDefinition
42
+ instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
43
+ attr_reader :definitions
44
+
45
+ def initialize
46
+ @definitions = {}
47
+ end
48
+
49
+ def range a, b
50
+ a = a[0] if a.kind_of? String
51
+ b = b[0] if b.kind_of? String
52
+ CharacterSet::Interval.new(a, b).to_char_set
53
+ end
54
+
55
+ def string a
56
+ result = CharacterSet.new
57
+ result << a
58
+ result
59
+ end
60
+
61
+ def method_missing name, char_set, &block
62
+ @definitions[name] = char_set unless @definitions.has_key? name
63
+ end
64
+ end
65
+
66
+ class LexicalSpecification
67
+ instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
68
+ attr_reader :definitions, :character_classes
69
+
70
+ def initialize definition = {:initial => {}}, state = :initial
71
+ @definitions, @character_classes = definition, CharacterClassDefinition.new
72
+ @lexical_definition = @definitions[state]
73
+ end
74
+
75
+ def range a, b
76
+ Pattern.from_char_set CharacterSet::Interval.new(a, b).to_char_set
77
+ end
78
+
79
+ def string literal
80
+ Pattern.from_string literal
81
+ end
82
+
83
+ def the name
84
+ Pattern.from_char_set @character_classes.definitions[name]
85
+ end
86
+
87
+ def shift_to state, *patterns, &config
88
+ pattern = __create_pattern ChangeStateAction.new(state), *patterns
89
+ within state, &config if block_given?
90
+ pattern
91
+ end
92
+
93
+ def match *patterns, &action
94
+ __create_pattern UserDefinedAction.new(action), *patterns
95
+ end
96
+
97
+ def ignore *patterns
98
+ __create_pattern IgnoreAction, *patterns
99
+ end
100
+
101
+ def within *states, &config
102
+ for state in states
103
+ @definitions[state] = {} unless @definitions[state]
104
+ LexicalSpecification.new(@definitions, state).instance_eval &config
105
+ end
106
+ end
107
+
108
+ def recognize_and_shift_to token, state, *patterns
109
+ __create_pattern RecognizeTokenAndChangeStateAction.new(token.to_s, state), *patterns
110
+ end
111
+
112
+ def method_missing name, *patterns, &action
113
+ __create_pattern RecognizeTokenAction.new(name.to_s), *patterns
114
+ end
115
+
116
+ def __create_pattern action, *patterns
117
+ pattern = Pattern.concat *(patterns.collect {|x| x.kind_of?(Pattern) ? x : Pattern.from_string(x.to_s)})
118
+ @lexical_definition[pattern] = action
119
+ pattern
120
+ end
121
+ end
122
+
123
+ class OperatorPrecedenceDefinition
124
+ instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
125
+ attr_reader :precedences, :associativities
126
+
127
+ def initialize
128
+ @precedences, @associativities = [], {:right => [], :left => []}
129
+ end
130
+
131
+ def operator *operators
132
+ operators = operators.collect {|x| __get_symbol x}
133
+ @precedences << operators.uniq
134
+ end
135
+
136
+ def left *operators
137
+ __associativity :left, *operators
138
+ end
139
+
140
+ def right *operators
141
+ __associativity :right, *operators
142
+ end
143
+
144
+ def method_missing name
145
+ Aurum::Symbol.new name.to_s, true
146
+ end
147
+
148
+ def __get_symbol operator
149
+ operator.kind_of?(Aurum::Symbol) ? operator : Aurum::Symbol.new("$literal_#{operator}", true)
150
+ end
151
+
152
+ def __associativity direction, *operators
153
+ for operator in operators
154
+ symbol = __get_symbol operator
155
+ @associativities[direction] << symbol unless @associativities[direction].include? symbol
156
+ end
157
+ end
158
+ end
159
+
160
+ class ProductionDefinition
161
+ instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
162
+ attr_reader :__definitions, :__literals
163
+
164
+ def initialize
165
+ @__definitions, @__literals = {}, {}
166
+ end
167
+
168
+ def method_missing name, *args, &block
169
+ name_string = name.to_s
170
+ symbol = case name_string
171
+ when /^[a-z]/ then Aurum::Symbol.new name_string, false
172
+ when '_' then nil
173
+ when /_.+/ then Aurum::Symbol.new name_string, true
174
+ end
175
+ symbol.action = block if block_given?
176
+ (args.empty? || name_string == '_') and return symbol
177
+ symbols = args.map do |sym|
178
+ if sym.kind_of? String
179
+ @__literals[Pattern.from_string(sym)] = RecognizeTokenAction.new "$literal_#{sym}"
180
+ sym = Aurum::Symbol.new("$literal_#{sym}", true)
181
+ end
182
+ sym
183
+ end
184
+ @__definitions[symbol] = [].to_set unless @__definitions.has_key? symbol
185
+ production = Aurum::Production.new symbol, symbols.compact
186
+ production.action = symbols.last.action if symbols.last.action
187
+ @__definitions[symbol] << production
188
+ end
189
+ end
190
+
191
+ class Grammar
192
+ def self.character_classes &block
193
+ @lexical_sepcification = LexicalSpecification.new unless @lexical_sepcification
194
+ @lexical_sepcification.character_classes.instance_eval &block
195
+ end
196
+
197
+ def self.tokens &block
198
+ @lexical_sepcification = LexicalSpecification.new unless @lexical_sepcification
199
+ @lexical_sepcification.instance_eval &block
200
+ end
201
+
202
+ def self.precedences &block
203
+ @precedences = OperatorPrecedenceDefinition.new unless @precedences
204
+ @precedences.instance_eval &block
205
+ end
206
+
207
+ def self.productions &block
208
+ @productions = ProductionDefinition.new unless @productions
209
+ @productions.instance_eval &block
210
+ end
211
+
212
+ def self.lexer input
213
+ @lexical_sepcification.definitions[:initial].merge!(@productions.__literals)
214
+ generator = Aurum::LexicalTableGenerator.new @lexical_sepcification.definitions
215
+ table, accepts = generator.lexical_table
216
+ Aurum::Lexer.new table, accepts, generator.lexical_states, input
217
+ end
218
+
219
+ def self.start_from start_symbol
220
+ generator = Aurum::ParsingTableGenerator.new @productions.__definitions, @precedences.precedences, @precedences.associativities
221
+ productions = generator.start_from(Aurum::Symbol.new(start_symbol.to_s, false)).productions
222
+ table, lookeahead_level = generator.parsing_table
223
+ Aurum::Parser.new productions, table
224
+ end
225
+
226
+ def self.method_missing name, input
227
+ name_string = name.to_s
228
+ if name_string =~ /^parse_/
229
+ start_nonterminal = name_string.split(/^parse_/).last
230
+ self.start_from(start_nonterminal).parse self.lexer(input)
231
+ end
232
+ end
233
+ end
234
+ end