aurum 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. data/Rakefile +29 -0
  2. data/examples/dangling_else/grammar.rb +23 -0
  3. data/examples/expression/grammar.rb +28 -0
  4. data/examples/smalltalk/grammar.rb +151 -0
  5. data/examples/smalltalk/interpreter.rb +70 -0
  6. data/examples/yacc/grammar.rb +72 -0
  7. data/lib/aurum.rb +1 -9
  8. data/lib/aurum/engine.rb +39 -175
  9. data/lib/aurum/engine/parsing_facility.rb +107 -0
  10. data/lib/aurum/engine/tokenization_facility.rb +86 -0
  11. data/lib/aurum/grammar.rb +52 -219
  12. data/lib/aurum/grammar/automata.rb +194 -0
  13. data/lib/aurum/grammar/builder/augmented_grammar.rb +83 -0
  14. data/lib/aurum/grammar/builder/dot_logger.rb +66 -0
  15. data/lib/aurum/grammar/builder/lexical_table_builder.rb +55 -0
  16. data/lib/aurum/grammar/builder/parsing_table_builder.rb +238 -0
  17. data/lib/aurum/grammar/builder/set_of_items.rb +190 -0
  18. data/lib/aurum/grammar/compiled_tables.rb +20 -0
  19. data/lib/aurum/grammar/dsl/lexical_definition.rb +94 -0
  20. data/lib/aurum/grammar/dsl/syntax_definition.rb +79 -0
  21. data/lib/aurum/grammar/lexical_rules.rb +224 -0
  22. data/lib/aurum/grammar/metalang/grammar.rb +47 -0
  23. data/lib/aurum/grammar/syntax_rules.rb +95 -0
  24. data/spec/builder/dsl_definition/aurum_grammar_spec.rb +33 -0
  25. data/spec/engine/lexer_spec.rb +59 -0
  26. data/spec/engine/parser_spec.rb +90 -0
  27. data/spec/examples/dangling_else_example.rb +30 -0
  28. data/spec/examples/expression_example.rb +48 -0
  29. data/spec/examples/smalltalk_example.rb +50 -0
  30. data/spec/examples/yacc_spec.rb +30 -0
  31. data/spec/grammar/builder/lexical_table/automata_spec.rb +55 -0
  32. data/spec/grammar/builder/lexical_table/builder_spec.rb +78 -0
  33. data/spec/grammar/builder/lexical_table/character_set_spec.rb +100 -0
  34. data/spec/grammar/builder/lexical_table/pattern_spec.rb +11 -0
  35. data/spec/grammar/builder/lexical_table/regular_expression.rb +40 -0
  36. data/spec/grammar/builder/parsing_table/augmented_grammar_spec.rb +36 -0
  37. data/spec/grammar/builder/parsing_table/builder_spec.rb +152 -0
  38. data/spec/grammar/builder/parsing_table/digraph_traverser_spec.rb +42 -0
  39. data/spec/grammar/builder/parsing_table/item_spec.rb +51 -0
  40. data/spec/grammar/builder/parsing_table/sources_spec.rb +66 -0
  41. data/spec/grammar/builder/parsing_table/state_spec.rb +82 -0
  42. data/spec/grammar/dsl/character_classes_builder_spec.rb +50 -0
  43. data/spec/grammar/dsl/lexical_rules_builder_spec.rb +181 -0
  44. data/spec/grammar/dsl/precedence_builder_spec.rb +64 -0
  45. data/spec/grammar/dsl/productions_builder_spec.rb +78 -0
  46. data/spec/grammar/metalang/metalang_spec.rb +0 -0
  47. data/spec/grammar/precedence_spec.rb +42 -0
  48. data/spec/grammar/syntax_rules_spec.rb +31 -0
  49. data/spec/parser_matcher.rb +69 -0
  50. data/spec/pattern_matcher.rb +123 -0
  51. data/spec/spec_helper.rb +133 -0
  52. metadata +70 -36
  53. data/example/expression/expression.rb +0 -35
  54. data/example/expression/lisp.rb +0 -26
  55. data/lib/aurum/lexical_table_generator.rb +0 -429
  56. data/lib/aurum/parsing_table_generator.rb +0 -464
  57. data/test/engine/lexer_test.rb +0 -59
  58. data/test/engine/semantic_attributes_test.rb +0 -15
  59. data/test/grammar_definition/character_class_definition_test.rb +0 -28
  60. data/test/grammar_definition/grammar_definition_test.rb +0 -55
  61. data/test/grammar_definition/lexical_definition_test.rb +0 -56
  62. data/test/grammar_definition/operator_precedence_definition_test.rb +0 -35
  63. data/test/grammar_definition/production_definition_test.rb +0 -60
  64. data/test/lexical_table_generator/automata_test.rb +0 -74
  65. data/test/lexical_table_generator/character_set_test.rb +0 -73
  66. data/test/lexical_table_generator/interval_test.rb +0 -36
  67. data/test/lexical_table_generator/pattern_test.rb +0 -115
  68. data/test/lexical_table_generator/subset_determinizer_test.rb +0 -19
  69. data/test/lexical_table_generator/table_generator_test.rb +0 -126
  70. data/test/parsing_table_generator/augmented_grammar_test.rb +0 -45
  71. data/test/parsing_table_generator/lalr_n_computation_test.rb +0 -92
  72. data/test/parsing_table_generator/lr_0_automata_test.rb +0 -94
  73. data/test/parsing_table_generator/lr_item_test.rb +0 -27
  74. data/test/parsing_table_generator/parsing_table_state_test.rb +0 -39
  75. data/test/parsing_table_generator/precedence_table_test.rb +0 -28
  76. data/test/parsing_table_generator/production_test.rb +0 -9
  77. data/test/test_helper.rb +0 -103
@@ -1,464 +0,0 @@
1
- require 'set'
2
- require 'logger'
3
-
4
- module Aurum
5
- Symbol, Production= Struct.new(:name, :is_terminal), Struct.new(:nonterminal, :symbols)
6
- ShiftAction, ReduceAction = Struct.new(:state, :is_lookahead_shift), Struct.new(:handle, :is_read_reduce)
7
- Production.class_eval {attr_accessor :index}
8
-
9
- START, EOF = Symbol.new('$start', false), Symbol.new('$eof', true)
10
-
11
- Log = Logger.new(STDOUT)
12
- Log.level = Logger::INFO
13
-
14
- class ParsingTableGenerator
15
- attr_reader :symbols, :productions
16
-
17
- DEFAULT_ASSOCIATIVITIES = {:left => [], :right => []}
18
-
19
- def initialize(definition, precedences = [], associativities = DEFAULT_ASSOCIATIVITIES)
20
- @definition = definition
21
- @precedence_table = PrecedenceTable.new precedences, associativities
22
- end
23
-
24
- def start_from start
25
- initialize_augmented_grammar start
26
- compute_nullable_symbols
27
- compute_first_sets
28
- self
29
- end
30
-
31
- def parsing_table
32
- Log.debug 'Start constructing LR(0) automata.'
33
- construct_LR0_automata
34
- Log.debug "Finished, #{@states.size} LR(0) states constructed."
35
- Log.debug "#{@inconsistents.size} inconsistent states found."
36
- if @inconsistents.size > 0
37
- compute_LALR_1_lookahead
38
- compute_LALR_n_lookahead unless @conflicts.empty?
39
- end
40
- parsing_table = []
41
- for state in @states do
42
- actions = Hash.new default_action(state)
43
- state.actions.each {|symbol, action| actions[symbol] = action.to_a.first}
44
- parsing_table << actions
45
- end
46
- return parsing_table, @lookahead_level
47
- end
48
-
49
- private
50
- # BEGIN utils methods
51
- def fixed_point
52
- 1 until !yield false
53
- end
54
-
55
- def working_list unvisited
56
- yield unvisited, unvisited.pop while !unvisited.empty?
57
- end
58
-
59
- def mark_working_list uvisited, result, item
60
- unless result.include? item
61
- result << item
62
- uvisited << item
63
- end
64
- end
65
- # END utils methods
66
-
67
- # BEGIN augmented grammar
68
- def initialize_augmented_grammar start
69
- @symbols, @productions = [start], []
70
- @start_production = add_production Production.new(START, [start])
71
- @nullables, @first_sets, @closures = [].to_set, {START => []}, {}
72
- working_list [start] do |unvisited, visiting|
73
- @first_sets[visiting], @closures[visiting] = visiting.is_terminal ? [visiting] : [], []
74
- for production in @definition[visiting]
75
- add_production(production).symbols.each {|symbol| mark_working_list unvisited, @symbols, symbol}
76
- @closures[visiting] += closure([LRItem.new(production, 0)])
77
- end unless visiting.is_terminal
78
- end
79
- end
80
-
81
- def add_production production
82
- production.index = @productions.size
83
- (@productions << production).last
84
- end
85
-
86
- def compute_nullable_symbols
87
- fixed_point do |changed|
88
- @productions.each do |production|
89
- changed |= @nullables.add? production.nonterminal if nullable? production.symbols
90
- end
91
- changed
92
- end
93
- end
94
-
95
- def compute_first_sets
96
- fixed_point do |changed|
97
- for production in @productions do
98
- set = @first_sets[production.nonterminal]
99
- for symbol in production.symbols do
100
- changed |= set.size != set.replace(set | @first_sets[symbol]).size
101
- break unless nullable? [symbol]
102
- end
103
- end
104
- changed
105
- end
106
- end
107
-
108
- def nullable? symbols
109
- return true if symbols.empty?
110
- symbols.all?{|symbol| @nullables.include? symbol }
111
- end
112
-
113
- def closure items
114
- result = items.dup
115
- working_list items.dup do |unvisited, visiting|
116
- if @closures[visiting.dot_symbol]
117
- result |= @closures[visiting.dot_symbol]
118
- else
119
- @definition[visiting.dot_symbol].each {|x| mark_working_list unvisited, result, LRItem.new(x, 0)}
120
- end unless visiting.is_handle || visiting.dot_symbol.is_terminal
121
- end
122
- result
123
- end
124
- # END augmented grammar
125
-
126
- # BEGIN LR(0) automata construction
127
- def construct_LR0_automata
128
- @lookahead_level = 0
129
- start_state = State.new closure([LRItem.new(@start_production, 0)])
130
- @states = [start_state]
131
- @inconsistents = start_state.inconsistent? ? [start_state] : []
132
- working_list [start_state] do |unvisited, visiting|
133
- for item in visiting.non_handles
134
- symbol = item.dot_symbol
135
- new_state = goto visiting, symbol
136
- if (read_reduce = new_state.read_reduce)
137
- visiting[symbol] << ReduceAction.new(read_reduce.index, true)
138
- else
139
- if index = @states.index(new_state)
140
- new_state = @states[index]
141
- else
142
- @states << new_state
143
- @inconsistents << new_state if new_state.inconsistent?
144
- unvisited << new_state
145
- index = @states.length - 1
146
- end
147
- visiting[symbol] << ShiftAction.new(index, false)
148
- new_state.predsucceors << visiting
149
- end
150
- end
151
- end
152
- end
153
-
154
- def goto items, symbol
155
- result = []
156
- for item in items
157
- result << LRItem.new(item.production, item.position + 1) if symbol == item.dot_symbol
158
- end
159
- State.new closure(result)
160
- end
161
- # END LR(0) automata construction
162
-
163
- # BEGIN lookahead computation
164
- def compute_LALR_1_lookahead
165
- @lookahead_level, @conflicts = 1, []
166
- @lookahead_config_stack, @lookahead_indicitor, @lookahead_result = [], {}, {}
167
- handle_each @inconsistents do |state, handle|
168
- lookahead_each(state, handle.production) do |production, lookahead|
169
- if state.only_shift?(lookahead) && @precedence_table.operator?(lookahead)
170
- if @precedence_table.compare(production.operator, lookahead) >= 0
171
- state[lookahead].clear
172
- state[lookahead] << ReduceAction.new(production.index, false)
173
- end
174
- else
175
- state[lookahead] << ReduceAction.new(production.index, false)
176
- end
177
- end
178
- @conflicts << state if state.conflict?
179
- end
180
- end
181
-
182
- def handle_each states
183
- states.each {|state| state.handles.each {|handle| yield state, handle}}
184
- end
185
-
186
- def lookahead_each state, production
187
- for predsucceor in state.predsucceors production.symbols.reverse
188
- config = Configuration.new predsucceor, production.nonterminal
189
- (@lookahead_indicitor[config] ? @lookahead_result[config] : compute_follow_set(config)).each {|x| yield production, x}
190
- end
191
- end
192
-
193
- def compute_follow_set config
194
- state, nonterminal = config.state, config.symbol
195
- if START == nonterminal
196
- @lookahead_indicitor[config] = 65535
197
- @lookahead_result[config] = [EOF]
198
- else
199
- @lookahead_config_stack.push config
200
- @lookahead_indicitor[config] = (d = @lookahead_config_stack.length)
201
- @lookahead_result[config] = read_set state, nonterminal
202
- each_included_by state, nonterminal do |new_config|
203
- compute_follow_set new_config unless @lookahead_indicitor[new_config]
204
- @lookahead_indicitor[config] = [@lookahead_indicitor[config], @lookahead_indicitor[new_config]].min
205
- @lookahead_result[config] |= @lookahead_result[new_config]
206
- end
207
- connected = nil
208
- until connected == config
209
- connected = @lookahead_config_stack.pop
210
- @lookahead_result[connected] = @lookahead_result[config].dup
211
- @lookahead_indicitor[connected] = 65535
212
- end if @lookahead_indicitor[config] == d
213
- end
214
- @lookahead_result[config]
215
- end
216
-
217
- def read_set state, symbol
218
- result = []
219
- each_read_by(state, symbol) {|q, y| result |= @first_sets[y] }
220
- result
221
- end
222
-
223
- def each_read_by state, symbol
224
- index = state.goto symbol
225
- for item in @states[index]
226
- for symbol in item.second_part
227
- yield state, symbol
228
- nullable? [symbol] or break
229
- end
230
- end if index
231
- end
232
-
233
- def each_included_by state, nonterminal
234
- for item in state
235
- if item.dot_symbol == nonterminal && nullable?(item.second_part[1..-1])
236
- first_part = item.production.symbols.slice 0, item.position
237
- predsucceors = state.predsucceors first_part.reverse
238
- predsucceors.each {|s| yield Configuration.new(s, item.production.nonterminal) }
239
- end
240
- end
241
- end
242
-
243
- def compute_LALR_n_lookahead
244
- @stack_seen = []
245
- @conflicts.each do |state|
246
- @current_lookahead_level = 1
247
- for lookahead, actions in state.conflicted_actions do
248
- resolve_conficts state, lookahead, sources_of(state, lookahead, actions)
249
- end
250
- @lookahead_level = [@current_lookahead_level, @lookahead_level].max
251
- end
252
- end
253
-
254
- def sources_of state, lookahead, actions
255
- sources = {}
256
- for action in actions do
257
- if action.kind_of? ShiftAction
258
- sources[action] = [[state]].to_set
259
- else
260
- handle = @productions[action.handle]
261
- sources[action] = action.is_read_reduce ? [[state]].to_set : [].to_set
262
- for predsucceor in state.predsucceors handle.symbols.reverse
263
- @follow_sources_visited = []
264
- sources[action].merge follow_sources([predsucceor], handle.nonterminal, lookahead)
265
- end
266
- end
267
- end
268
- sources
269
- end
270
-
271
- def follow_sources stack, nonterminal, lookahead
272
- top = stack.last
273
- if stack.length == 1
274
- config = Configuration.new top, nonterminal
275
- @follow_sources_visited.include?(config) and return []
276
- @follow_sources_visited |= [config]
277
- end
278
- stacks = [].to_set
279
- if q_index = top.goto(nonterminal)
280
- q = @states[q_index]
281
- stacks = [stack + [q]].to_set if q.direct_read.include?(lookahead)
282
- end
283
- each_read_by(top, nonterminal) {|q, y| stacks |= follow_sources stack+[q], y, lookahead unless y.is_terminal}
284
- top.kernels.each do |item|
285
- if !item.is_start && item.dot_symbol == nonterminal
286
- c = item.production.nonterminal
287
- if item.position < stack.length
288
- stacks |= follow_sources stack.slice(0..-item.position-1), c, lookahead
289
- else
290
- first_part = item.production.symbols.slice 0..-stack.length-1
291
- stack[0].predsucceors(first_part).reverse.each {|q| stacks |= follow_sources [q], c, lookahead }
292
- end
293
- end
294
- end
295
- stacks
296
- end
297
-
298
- def resolve_conficts state, lookahead, sources
299
- @current_lookahead_level += 1
300
- @states << (lookahead_state = State.new([]))
301
- state[lookahead].replace [ShiftAction.new((@states.length - 1), true)]
302
- for action, stacks in sources
303
- for stk in stacks
304
- raise 'not LALR(n)' if @stack_seen.include? stk
305
- @stack_seen << stk
306
- for a in next_lookaheads stk, lookahead
307
- lookahead_state[a] << action
308
- end
309
- end
310
- end
311
- for next_lookahead, actions in lookahead_state.conflicted_actions
312
- new_sources = {}
313
- for action in actions do
314
- new_sources[action] = [].to_set
315
- for stk in sources[action] do
316
- @follow_sources_visited = []
317
- new_sources[action] |= follow_sources stk, lookahead, next_lookahead
318
- end
319
- resolve_conficts lookahead_state, next_lookahead, new_sources
320
- end
321
- end
322
- end
323
-
324
- def next_lookaheads stack, lookahead
325
- EOF == lookahead and return [EOF]
326
- top = stack.last
327
- lookaheads = read_set top, lookahead
328
- top.kernels.each do |item|
329
- if !item.is_start && item.dot_symbol == lookahead
330
- c = item.production.nonterminal
331
- if item.position < stack.length
332
- lookaheads |= next_lookaheads stack.slice(0..-item.position-1), c
333
- else
334
- first_part = item.production.symbols.slice 0..-stack.length-1
335
- stack[0].predsucceors(first_part).reverse.each {|q| lookaheads |= compute_follow_set Configuration.new(q, c) }
336
- end
337
- end
338
- end
339
- lookaheads
340
- end
341
-
342
- def default_action state
343
- if !state.empty?
344
- handle = nil
345
- for x in state
346
- p = x.production
347
- handle = x if x.is_handle && p.nonterminal != START && (!handle || handle.production.symbols.length > p.symbols.length)
348
- end
349
- default_action = handle ? ReduceAction.new(handle.production.index, false) : nil
350
- else
351
- candidates = state.actions.values.inject [] do |candidates, actions|
352
- candidates |= actions.find_all {|x| x.kind_of?(Aurum::ReduceAction) && !x.is_read_reduce }
353
- end
354
- default_action = candidates.min {|x, y| @productions[x].handle.length <=> @productions[y].handle.length}
355
- end
356
- default_action
357
- end
358
-
359
- class PrecedenceTable
360
- def initialize precedences, associativities
361
- @precedence_table, @associativities = {}, associativities
362
- precedences.reverse.each_with_index do |terminals, index|
363
- for terminal in terminals
364
- @precedence_table[terminal] = index
365
- end
366
- end
367
- end
368
-
369
- def operator? symbol
370
- @precedence_table.has_key? symbol
371
- end
372
-
373
- def compare current, lookahead
374
- if @precedence_table[current] == @precedence_table[lookahead]
375
- return (@associativities[:right].include?(current) &&
376
- @associativities[:right].include?(lookahead)) ? -1 : 1
377
- end
378
- @precedence_table[current] <=> @precedence_table[lookahead]
379
- end
380
- end
381
-
382
- Production.class_eval do
383
- attr_accessor :action
384
- def operator
385
- symbols.reverse.find {|x| x.is_terminal}
386
- end
387
- end
388
-
389
- class State < Array
390
- attr_reader :actions, :handles, :non_handles, :kernels, :read_reduce, :direct_read
391
- def initialize elements
392
- super elements
393
- @actions, @predsucceors, @read_reduce = {}, [], nil
394
- @handles, @non_handles, @kernels, @direct_read = [], [], [], [].to_set
395
- for item in elements
396
- (item.is_handle ? @handles : @non_handles) << item
397
- @kernels << item if item.is_kernel
398
- @direct_read << item.dot_symbol if item.dot_symbol
399
- end
400
- @read_reduce = first.production if size == 1 && first.is_handle
401
- end
402
-
403
- def [] symbol
404
- @actions[symbol] = Set.new([]) unless @actions.has_key? symbol
405
- @actions[symbol]
406
- end
407
-
408
- def inconsistent?
409
- @handles.size > 1 || (@handles.size == 1 && @kernels.size != 1)
410
- end
411
-
412
- def conflict?
413
- inconsistent? && @actions.any? {|symbol, actions| actions.length > 1}
414
- end
415
-
416
- def conflicted_actions
417
- @actions.find_all {|symbol, actions| actions.length > 1}
418
- end
419
-
420
- def only_shift? symbol
421
- !self[symbol].empty? && @actions[symbol].all? {|x| x.kind_of? ShiftAction}
422
- end
423
-
424
- def goto symbol
425
- shift = self[symbol].find {|x| x.kind_of? Aurum::ShiftAction }
426
- shift.state if shift
427
- end
428
-
429
- def predsucceors(symbols = nil)
430
- symbols or return @predsucceors
431
- result = [self]
432
- for symbol in symbols
433
- new_result = []
434
- for x in result
435
- new_result |= x.predsucceors.find_all {|predsucceor| predsucceor.any? {|item| item.dot_symbol == symbol}}
436
- end
437
- result.replace new_result
438
- end
439
- result
440
- end
441
-
442
- def == other
443
- return false unless other.kind_of?(State) && (@kernels.size == other.kernels.size)
444
- return true if equal? other
445
- @kernels.all? {|x| other.kernels.include? x}
446
- end
447
- end
448
-
449
- LRItem, Configuration = Struct.new(:production, :position), Struct.new(:state, :symbol)
450
-
451
- LRItem.class_eval do
452
- attr_reader :dot_symbol, :second_part, :is_handle, :is_kernel, :is_start
453
-
454
- def initialize production, position
455
- super production, position
456
- @dot_symbol = production.symbols[position]
457
- @is_handle = position >= production.symbols.length
458
- @is_kernel = @is_handle || (position != 0)
459
- @is_start = production.nonterminal == START
460
- @second_part = @is_handle ? [] : production.symbols.slice(position..-1)
461
- end
462
- end
463
- end
464
- end