aurum 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. data/Rakefile +29 -0
  2. data/examples/dangling_else/grammar.rb +23 -0
  3. data/examples/expression/grammar.rb +28 -0
  4. data/examples/smalltalk/grammar.rb +151 -0
  5. data/examples/smalltalk/interpreter.rb +70 -0
  6. data/examples/yacc/grammar.rb +72 -0
  7. data/lib/aurum.rb +1 -9
  8. data/lib/aurum/engine.rb +39 -175
  9. data/lib/aurum/engine/parsing_facility.rb +107 -0
  10. data/lib/aurum/engine/tokenization_facility.rb +86 -0
  11. data/lib/aurum/grammar.rb +52 -219
  12. data/lib/aurum/grammar/automata.rb +194 -0
  13. data/lib/aurum/grammar/builder/augmented_grammar.rb +83 -0
  14. data/lib/aurum/grammar/builder/dot_logger.rb +66 -0
  15. data/lib/aurum/grammar/builder/lexical_table_builder.rb +55 -0
  16. data/lib/aurum/grammar/builder/parsing_table_builder.rb +238 -0
  17. data/lib/aurum/grammar/builder/set_of_items.rb +190 -0
  18. data/lib/aurum/grammar/compiled_tables.rb +20 -0
  19. data/lib/aurum/grammar/dsl/lexical_definition.rb +94 -0
  20. data/lib/aurum/grammar/dsl/syntax_definition.rb +79 -0
  21. data/lib/aurum/grammar/lexical_rules.rb +224 -0
  22. data/lib/aurum/grammar/metalang/grammar.rb +47 -0
  23. data/lib/aurum/grammar/syntax_rules.rb +95 -0
  24. data/spec/builder/dsl_definition/aurum_grammar_spec.rb +33 -0
  25. data/spec/engine/lexer_spec.rb +59 -0
  26. data/spec/engine/parser_spec.rb +90 -0
  27. data/spec/examples/dangling_else_example.rb +30 -0
  28. data/spec/examples/expression_example.rb +48 -0
  29. data/spec/examples/smalltalk_example.rb +50 -0
  30. data/spec/examples/yacc_spec.rb +30 -0
  31. data/spec/grammar/builder/lexical_table/automata_spec.rb +55 -0
  32. data/spec/grammar/builder/lexical_table/builder_spec.rb +78 -0
  33. data/spec/grammar/builder/lexical_table/character_set_spec.rb +100 -0
  34. data/spec/grammar/builder/lexical_table/pattern_spec.rb +11 -0
  35. data/spec/grammar/builder/lexical_table/regular_expression.rb +40 -0
  36. data/spec/grammar/builder/parsing_table/augmented_grammar_spec.rb +36 -0
  37. data/spec/grammar/builder/parsing_table/builder_spec.rb +152 -0
  38. data/spec/grammar/builder/parsing_table/digraph_traverser_spec.rb +42 -0
  39. data/spec/grammar/builder/parsing_table/item_spec.rb +51 -0
  40. data/spec/grammar/builder/parsing_table/sources_spec.rb +66 -0
  41. data/spec/grammar/builder/parsing_table/state_spec.rb +82 -0
  42. data/spec/grammar/dsl/character_classes_builder_spec.rb +50 -0
  43. data/spec/grammar/dsl/lexical_rules_builder_spec.rb +181 -0
  44. data/spec/grammar/dsl/precedence_builder_spec.rb +64 -0
  45. data/spec/grammar/dsl/productions_builder_spec.rb +78 -0
  46. data/spec/grammar/metalang/metalang_spec.rb +0 -0
  47. data/spec/grammar/precedence_spec.rb +42 -0
  48. data/spec/grammar/syntax_rules_spec.rb +31 -0
  49. data/spec/parser_matcher.rb +69 -0
  50. data/spec/pattern_matcher.rb +123 -0
  51. data/spec/spec_helper.rb +133 -0
  52. metadata +70 -36
  53. data/example/expression/expression.rb +0 -35
  54. data/example/expression/lisp.rb +0 -26
  55. data/lib/aurum/lexical_table_generator.rb +0 -429
  56. data/lib/aurum/parsing_table_generator.rb +0 -464
  57. data/test/engine/lexer_test.rb +0 -59
  58. data/test/engine/semantic_attributes_test.rb +0 -15
  59. data/test/grammar_definition/character_class_definition_test.rb +0 -28
  60. data/test/grammar_definition/grammar_definition_test.rb +0 -55
  61. data/test/grammar_definition/lexical_definition_test.rb +0 -56
  62. data/test/grammar_definition/operator_precedence_definition_test.rb +0 -35
  63. data/test/grammar_definition/production_definition_test.rb +0 -60
  64. data/test/lexical_table_generator/automata_test.rb +0 -74
  65. data/test/lexical_table_generator/character_set_test.rb +0 -73
  66. data/test/lexical_table_generator/interval_test.rb +0 -36
  67. data/test/lexical_table_generator/pattern_test.rb +0 -115
  68. data/test/lexical_table_generator/subset_determinizer_test.rb +0 -19
  69. data/test/lexical_table_generator/table_generator_test.rb +0 -126
  70. data/test/parsing_table_generator/augmented_grammar_test.rb +0 -45
  71. data/test/parsing_table_generator/lalr_n_computation_test.rb +0 -92
  72. data/test/parsing_table_generator/lr_0_automata_test.rb +0 -94
  73. data/test/parsing_table_generator/lr_item_test.rb +0 -27
  74. data/test/parsing_table_generator/parsing_table_state_test.rb +0 -39
  75. data/test/parsing_table_generator/precedence_table_test.rb +0 -28
  76. data/test/parsing_table_generator/production_test.rb +0 -9
  77. data/test/test_helper.rb +0 -103
@@ -0,0 +1,107 @@
1
+ module Aurum
2
+ module Engine
3
+ module BasicParsingCapability
4
+ def parse lexer
5
+ @lookahead, @lookaheads = lexer.next_symbol, []
6
+ @state_stack, @value_stack = [0], []
7
+ loop do
8
+ action = parsing_table.actions[current_state][@lookahead.to_s]
9
+ error_recovery unless action
10
+ if action.shift_action?
11
+ @state_stack.push(action.value)
12
+ if action.lookahead_shift?
13
+ @lookaheads << @lookahead
14
+ else
15
+ @value_stack.push(@lookahead) unless @lookahead.is_a?(String)
16
+ end
17
+ @lookahead = lexer.next_symbol
18
+ elsif action.reduce_action?
19
+ pushback_lookaheads(lexer) unless @lookaheads.empty?
20
+ prepare_read_reduce(lexer) if action.read_reduce?
21
+ handle = action.value
22
+ return @value_stack.pop if handle.nonterminal == Aurum::Builder::StartSymbol
23
+ nonterminal = reduce_to(handle, lexer)
24
+ goto = parsing_table.actions[current_state][nonterminal]
25
+ if goto.shift_action?
26
+ @state_stack.push(goto.value)
27
+ elsif goto.reduce_action?
28
+ lexer.pushback(@lookahead)
29
+ @lookahead = nonterminal
30
+ end
31
+ end
32
+ end
33
+ end
34
+
35
+ private
36
+ def current_state
37
+ @state_stack.last
38
+ end
39
+
40
+ def pushback_lookaheads lexer
41
+ @state_stack.slice!(-@lookaheads.size, @lookaheads.size)
42
+ lexer.pushback(@lookahead)
43
+ @lookahead = @lookaheads.shift
44
+ lexer.pushback(@lookaheads.pop) until @lookaheads.empty?
45
+ end
46
+
47
+ def prepare_read_reduce lexer
48
+ @state_stack.push(:placeholder)
49
+ @value_stack.push(@lookahead) unless @lookahead.is_a?(String)
50
+ @lookahead = lexer.next_symbol
51
+ end
52
+
53
+ def reduce_to handle, lexer
54
+ nonterminal, length = handle.nonterminal, handle.symbols.length
55
+ @state_stack.slice!(-length, length)
56
+ if semantic_actions.has_key?(handle.name)
57
+ @value_stack.push(execute_semantic_action(lexer, handle, @value_stack.slice!(-length, length)))
58
+ end
59
+ nonterminal.name rescue nonterminal
60
+ end
61
+ end
62
+
63
+ module SemanticActionExecutable
64
+ def execute_semantic_action lexer, handle, values
65
+ nonterminal_name = handle.nonterminal.name rescue handle.nonterminal
66
+ context = {nonterminal_name => [Attributes.new]}
67
+ handle.symbols.reverse.each_with_index do |symbol, index|
68
+ name = symbol.name rescue symbol
69
+ context[name] = [] unless context.has_key?(name)
70
+ context[name] << values[-index-1]
71
+ end
72
+ mappings = (handle.mappings || {} rescue {})
73
+ action = semantic_actions[handle.name]
74
+ SemanticContext.new(@lexer, context, mappings).instance_eval(&action)
75
+ context[nonterminal_name] ? context[nonterminal_name][0] : nil
76
+ end
77
+
78
+ class SemanticContext
79
+ instance_methods.each { |m| undef_method m unless m =~ /^__/ || m=='instance_eval'}
80
+ def initialize lexer, context, mappings
81
+ @lexer, @context, @mappings = lexer, context, mappings
82
+ end
83
+
84
+ def method_missing name
85
+ name_string = name.to_s
86
+ index = name_string =~ /\d+/ ? name_string.slice!(/\d+/).to_i : 0
87
+ name_string = @mappings[name_string] if @mappings.has_key?(name_string)
88
+ return @context[name_string][-index] if @context[name_string][-index]
89
+ Attributes.new
90
+ end
91
+ end
92
+
93
+ class Attributes
94
+ instance_methods.each { |m| undef_method m unless m =~ /^__/ }
95
+ def initialize
96
+ @attributes = {}
97
+ end
98
+
99
+ def method_missing name, *args
100
+ name_string = name.to_s
101
+ return @attributes[name_string] unless name_string =~ /=$/
102
+ @attributes[name_string.slice(0..-2)] = args.first
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,86 @@
1
+ module Aurum
2
+ module Engine
3
+ class Token < Struct.new(:terminal, :value)
4
+ def to_s
5
+ terminal
6
+ end
7
+ end
8
+ EOF = Token.new('$eof', '')
9
+
10
+ module BasicTokenizationCapability
11
+ attr_reader :line, :column
12
+ def next_symbol
13
+ return @push_back.pop unless @push_back.empty?
14
+ @recognized, @recognized_lexeme = nil, ''
15
+ until @recognized
16
+ @lexeme, next_state, char = '', @states.last, nil
17
+ lookaheads, action = [], nil
18
+ while next_state
19
+ if char
20
+ lookaheads << char
21
+ @column += 1
22
+ end
23
+ return EOF if @input.eof? && @recognized_lexeme.empty? && @lexeme.empty? && lookaheads.empty?
24
+ state, char = next_state, @input.get_char
25
+ if lexical_table.actions[state]
26
+ action = lexical_table.actions[state]
27
+ while (c = lookaheads.shift)
28
+ @lexeme << c
29
+ end
30
+ end
31
+ next_state = goto(state, char)
32
+ end
33
+ @input.pushback char
34
+ unless action
35
+ unrecognized_lexeme = @recognized_lexeme
36
+ lookaheads.each {|lookahead| unrecognized_lexeme << lookahead}
37
+ return Token.new('$unknown', unrecognized_lexeme)
38
+ end
39
+ while (c = lookaheads.pop)
40
+ @input.pushback c
41
+ end
42
+ @recognized_lexeme += @lexeme unless action.token == '$ignored'
43
+ shift_to(action.state) if action.state
44
+ @recognized = Token.new(action.token, @recognized_lexeme) if action.token && action.token != '$ignored'
45
+ instance_eval(&action.action) if action.action
46
+ end
47
+ @recognized
48
+ end
49
+
50
+ def pushback symbol
51
+ @push_back.push(symbol)
52
+ end
53
+
54
+ private
55
+ def shift_to state
56
+ @states.push(goto(0, - lexical_table.lexical_states.index(state) - 1))
57
+ end
58
+
59
+ def goto state, input
60
+ return nil unless input
61
+ next_state = lexical_table.states[state].find {|tran| tran.character_set.include?(input)}
62
+ next_state ? next_state.destination : nil
63
+ end
64
+ end
65
+
66
+ class PushbackString
67
+ def initialize string
68
+ @string = string
69
+ end
70
+
71
+ def get_char
72
+ @get_char_index = -1 unless @get_char_index
73
+ @string[@get_char_index += 1]
74
+ end
75
+
76
+ def eof?
77
+ @get_char_index = -1 unless @get_char_index
78
+ @get_char_index >= (@string.length - 1)
79
+ end
80
+
81
+ def pushback char
82
+ @get_char_index -= 1
83
+ end
84
+ end
85
+ end
86
+ end
@@ -1,226 +1,59 @@
1
- module Aurum
2
- Symbol.class_eval { attr_accessor :action }
3
-
4
- class << nil
5
- attr_accessor :action
6
- end
7
-
8
- Aurum::RecognizeTokenAction.class_eval {attr_accessor :action}
1
+ require 'set'
9
2
 
10
- class CharacterClassDefinition
11
- instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
12
- attr_reader :definitions
13
-
14
- def initialize
15
- @definitions = {}
16
- end
3
+ require File.join(File.dirname(__FILE__), 'grammar/syntax_rules')
4
+ require File.join(File.dirname(__FILE__), 'grammar/lexical_rules')
5
+ require File.join(File.dirname(__FILE__), 'grammar/compiled_tables')
6
+ require File.join(File.dirname(__FILE__), 'grammar/dsl/syntax_definition')
7
+ require File.join(File.dirname(__FILE__), 'grammar/dsl/lexical_definition')
8
+ require File.join(File.dirname(__FILE__), 'grammar/builder/parsing_table_builder')
9
+ require File.join(File.dirname(__FILE__), 'grammar/builder/lexical_table_builder')
17
10
 
18
- def range a, b
19
- a = a[0] if a.kind_of? String
20
- b = b[0] if b.kind_of? String
21
- CharacterSet::Interval.new(a, b).to_char_set
11
+ module Aurum
12
+ class Grammar
13
+ def Grammar.include_grammar(grammar, mapping)
14
+ end
15
+
16
+ def Grammar.inherited(grammar)
17
+ grammar.instance_eval do
18
+ @lexical_rules, @syntax_rules, @semantic_actions = LexicalRules.new, SyntaxRules.new, {}
19
+ @character_classes_builder = DSL::CharacterClassesBuilder.new
20
+ @lexical_rules_builder = DSL::LexicalRulesBuilder.new(@lexical_rules, @character_classes_builder)
21
+ @precedeces_builder = DSL::PrecedencesBuilder.new(@syntax_rules)
22
+ @productions_builder = DSL::ProductionsBuilder.new(@syntax_rules, @semantic_actions)
23
+ @reserve_word_states = ['all']
24
+
25
+ def character_classes &definition
26
+ @character_classes_builder.instance_eval(&definition) if block_given?
22
27
  end
23
-
24
- def enum a
25
- result = CharacterSet.new
26
- result << a
27
- result
28
+
29
+ def tokens &definition
30
+ @lexical_rules_builder.instance_eval(&definition) if block_given?
31
+ end
32
+
33
+ def reserve_words_in *states
34
+ @reserve_word_states = states
35
+ end
36
+
37
+ def precedences &definition
38
+ @precedeces_builder.instance_eval(&definition) if block_given?
28
39
  end
29
-
30
- def method_missing name, char_set, &block
31
- @definitions[name] = char_set unless @definitions.has_key? name
40
+
41
+ def syntax_rules &definition
42
+ @productions_builder.instance_eval(&definition) if block_given?
43
+ end
44
+ alias productions syntax_rules
45
+
46
+
47
+ def start_from start_symbol
48
+ start_symbol = Grammar.nonterminal(start_symbol.to_s)
49
+ @syntax_rules.assign_operator_precedence_to_symbols
50
+ [Builder.build_parsing_table(@syntax_rules, start_symbol), @semantic_actions]
32
51
  end
52
+
53
+ def lexical_table
54
+ Builder.build_lexical_table(@lexical_rules, @reserve_word_states, @syntax_rules.literals)
55
+ end
56
+ end
33
57
  end
34
-
35
- class LexicalSpecification
36
- alias __extend extend
37
- instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
38
- attr_reader :definitions, :character_classes
39
-
40
- def initialize definition = {:initial => {}}, state = :initial
41
- @definitions, @character_classes = definition, CharacterClassDefinition.new
42
- @lexical_definition, @patterns = @definitions[state], {}
43
- end
44
-
45
- def range a, b
46
- Pattern.from_char_set CharacterSet::Interval.new(a, b).to_char_set
47
- end
48
-
49
- def string literal
50
- Pattern.from_string literal
51
- end
52
-
53
- def enum literal
54
- Pattern.from_enum literal
55
- end
56
-
57
- def cat *patterns
58
- Pattern.concat *(patterns.collect {|x| x.kind_of?(Pattern) ? x : Pattern.from_string(x.to_s)})
59
- end
60
-
61
- def shift_to state, *patterns, &config
62
- pattern = __create_pattern ChangeStateAction.new(state), *patterns
63
- within state, &config if block_given?
64
- pattern
65
- end
66
-
67
- def match *patterns, &action
68
- __create_pattern UserDefinedAction.new(action), *patterns
69
- end
70
-
71
- def ignore *patterns
72
- __create_pattern IgnoreAction, *patterns
73
- end
74
-
75
- def within *states, &config
76
- for state in states
77
- @definitions[state] = {} unless @definitions[state]
78
- LexicalSpecification.new(@definitions, state).instance_eval &config
79
- end
80
- end
81
-
82
- def recognize_and_shift_to token, state, *patterns
83
- __create_pattern RecognizeTokenAndChangeStateAction.new(token.to_s, state), *patterns
84
- end
85
-
86
- def method_missing name, *patterns, &action
87
- return __get_pattern(name) if patterns.empty?
88
- pattern = Pattern.concat *(patterns.collect {|x| x.kind_of?(Pattern) ? x : Pattern.from_string(x.to_s)})
89
- @patterns[name] = pattern
90
- if name.to_s =~ /^_/
91
- recognize_action = RecognizeTokenAction.new(name.to_s)
92
- recognize_action.action = action
93
- @lexical_definition[pattern] = recognize_action
94
- end
95
- pattern
96
- end
97
-
98
- def __get_pattern name
99
- return @patterns[name] if @patterns.has_key? name
100
- Pattern.from_char_set @character_classes.definitions[name]
101
- end
102
-
103
- def __create_pattern action, *patterns
104
- pattern = Pattern.concat *(patterns.collect {|x| x.kind_of?(Pattern) ? x : Pattern.from_string(x.to_s)})
105
- @lexical_definition[pattern] = action
106
- pattern
107
- end
108
- end
109
-
110
- class OperatorPrecedenceDefinition
111
- instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
112
- attr_reader :precedences, :associativities
113
-
114
- def initialize
115
- @precedences, @associativities = [], {:right => [], :left => []}
116
- end
117
-
118
- def operator *operators
119
- operators = operators.collect {|x| __get_symbol x}
120
- @precedences << operators.uniq
121
- end
122
-
123
- def left *operators
124
- __associativity :left, *operators
125
- end
126
-
127
- def right *operators
128
- __associativity :right, *operators
129
- end
130
-
131
- def method_missing name
132
- Aurum::Symbol.new name.to_s, true
133
- end
134
-
135
- def __get_symbol operator
136
- operator.kind_of?(Aurum::Symbol) ? operator : Aurum::Symbol.new("$literal_#{operator}", true)
137
- end
138
-
139
- def __associativity direction, *operators
140
- symbols = []
141
- for operator in operators
142
- symbols << (symbol = __get_symbol(operator))
143
- @associativities[direction] << symbol unless @associativities[direction].include? symbol
144
- end
145
- @precedences << symbols.uniq
146
- end
147
- end
148
-
149
- class ProductionDefinition
150
- instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
151
- attr_reader :__definitions, :__literals
152
-
153
- def initialize
154
- @__definitions, @__literals = {}, {}
155
- end
156
-
157
- def method_missing name, *args, &block
158
- name_string = name.to_s
159
- symbol = case name_string
160
- when /^[a-z]/ then Aurum::Symbol.new name_string, false
161
- when '_' then nil
162
- when /_.+/ then Aurum::Symbol.new name_string, true
163
- end
164
- symbol.action = block if block_given?
165
- (args.empty? || name_string == '_') and return symbol
166
- symbols = args.map do |sym|
167
- if sym.kind_of? String
168
- @__literals[Pattern.from_string(sym)] = RecognizeTokenAction.new "$literal_#{sym}"
169
- sym = Aurum::Symbol.new("$literal_#{sym}", true)
170
- end
171
- sym
172
- end
173
- @__definitions[symbol] = [].to_set unless @__definitions.has_key? symbol
174
- production = Aurum::Production.new symbol, symbols.compact
175
- production.action = symbols.last.action if symbols.last.action
176
- @__definitions[symbol] << production
177
- end
178
- end
179
-
180
- class Grammar
181
- def self.character_classes &block
182
- @lexical_sepcification = LexicalSpecification.new unless @lexical_sepcification
183
- @lexical_sepcification.character_classes.instance_eval &block
184
- end
185
-
186
- def self.tokens &block
187
- @lexical_sepcification = LexicalSpecification.new unless @lexical_sepcification
188
- @lexical_sepcification.instance_eval &block
189
- end
190
-
191
- def self.precedences &block
192
- @precedences = OperatorPrecedenceDefinition.new unless @precedences
193
- @precedences.instance_eval &block
194
- end
195
-
196
- def self.productions &block
197
- @productions = ProductionDefinition.new unless @productions
198
- @productions.instance_eval &block
199
- end
200
-
201
- def self.lexer input
202
- @productions = ProductionDefinition.new unless @productions
203
- @lexical_sepcification.definitions[:initial].merge!(@productions.__literals)
204
- generator = Aurum::LexicalTableGenerator.new @lexical_sepcification.definitions
205
- table, accepts = generator.lexical_table
206
- Aurum::Lexer.new table, accepts, generator.lexical_states, input
207
- end
208
-
209
- def self.start_from start_symbol
210
- @productions = ProductionDefinition.new unless @productions
211
- @precedences = OperatorPrecedenceDefinition.new unless @precedences
212
- generator = Aurum::ParsingTableGenerator.new @productions.__definitions, @precedences.precedences, @precedences.associativities
213
- productions = generator.start_from(Aurum::Symbol.new(start_symbol.to_s, false)).productions
214
- table, lookeahead_level = generator.parsing_table
215
- Aurum::Parser.new productions, table
216
- end
217
-
218
- def self.method_missing name, input
219
- name_string = name.to_s
220
- if name_string =~ /^parse_/
221
- start_nonterminal = name_string.split(/^parse_/).last
222
- self.start_from(start_nonterminal).parse self.lexer(input)
223
- end
224
- end
225
- end
58
+ end
226
59
  end