aurum 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. data/Rakefile +29 -0
  2. data/examples/dangling_else/grammar.rb +23 -0
  3. data/examples/expression/grammar.rb +28 -0
  4. data/examples/smalltalk/grammar.rb +151 -0
  5. data/examples/smalltalk/interpreter.rb +70 -0
  6. data/examples/yacc/grammar.rb +72 -0
  7. data/lib/aurum.rb +1 -9
  8. data/lib/aurum/engine.rb +39 -175
  9. data/lib/aurum/engine/parsing_facility.rb +107 -0
  10. data/lib/aurum/engine/tokenization_facility.rb +86 -0
  11. data/lib/aurum/grammar.rb +52 -219
  12. data/lib/aurum/grammar/automata.rb +194 -0
  13. data/lib/aurum/grammar/builder/augmented_grammar.rb +83 -0
  14. data/lib/aurum/grammar/builder/dot_logger.rb +66 -0
  15. data/lib/aurum/grammar/builder/lexical_table_builder.rb +55 -0
  16. data/lib/aurum/grammar/builder/parsing_table_builder.rb +238 -0
  17. data/lib/aurum/grammar/builder/set_of_items.rb +190 -0
  18. data/lib/aurum/grammar/compiled_tables.rb +20 -0
  19. data/lib/aurum/grammar/dsl/lexical_definition.rb +94 -0
  20. data/lib/aurum/grammar/dsl/syntax_definition.rb +79 -0
  21. data/lib/aurum/grammar/lexical_rules.rb +224 -0
  22. data/lib/aurum/grammar/metalang/grammar.rb +47 -0
  23. data/lib/aurum/grammar/syntax_rules.rb +95 -0
  24. data/spec/builder/dsl_definition/aurum_grammar_spec.rb +33 -0
  25. data/spec/engine/lexer_spec.rb +59 -0
  26. data/spec/engine/parser_spec.rb +90 -0
  27. data/spec/examples/dangling_else_example.rb +30 -0
  28. data/spec/examples/expression_example.rb +48 -0
  29. data/spec/examples/smalltalk_example.rb +50 -0
  30. data/spec/examples/yacc_spec.rb +30 -0
  31. data/spec/grammar/builder/lexical_table/automata_spec.rb +55 -0
  32. data/spec/grammar/builder/lexical_table/builder_spec.rb +78 -0
  33. data/spec/grammar/builder/lexical_table/character_set_spec.rb +100 -0
  34. data/spec/grammar/builder/lexical_table/pattern_spec.rb +11 -0
  35. data/spec/grammar/builder/lexical_table/regular_expression.rb +40 -0
  36. data/spec/grammar/builder/parsing_table/augmented_grammar_spec.rb +36 -0
  37. data/spec/grammar/builder/parsing_table/builder_spec.rb +152 -0
  38. data/spec/grammar/builder/parsing_table/digraph_traverser_spec.rb +42 -0
  39. data/spec/grammar/builder/parsing_table/item_spec.rb +51 -0
  40. data/spec/grammar/builder/parsing_table/sources_spec.rb +66 -0
  41. data/spec/grammar/builder/parsing_table/state_spec.rb +82 -0
  42. data/spec/grammar/dsl/character_classes_builder_spec.rb +50 -0
  43. data/spec/grammar/dsl/lexical_rules_builder_spec.rb +181 -0
  44. data/spec/grammar/dsl/precedence_builder_spec.rb +64 -0
  45. data/spec/grammar/dsl/productions_builder_spec.rb +78 -0
  46. data/spec/grammar/metalang/metalang_spec.rb +0 -0
  47. data/spec/grammar/precedence_spec.rb +42 -0
  48. data/spec/grammar/syntax_rules_spec.rb +31 -0
  49. data/spec/parser_matcher.rb +69 -0
  50. data/spec/pattern_matcher.rb +123 -0
  51. data/spec/spec_helper.rb +133 -0
  52. metadata +70 -36
  53. data/example/expression/expression.rb +0 -35
  54. data/example/expression/lisp.rb +0 -26
  55. data/lib/aurum/lexical_table_generator.rb +0 -429
  56. data/lib/aurum/parsing_table_generator.rb +0 -464
  57. data/test/engine/lexer_test.rb +0 -59
  58. data/test/engine/semantic_attributes_test.rb +0 -15
  59. data/test/grammar_definition/character_class_definition_test.rb +0 -28
  60. data/test/grammar_definition/grammar_definition_test.rb +0 -55
  61. data/test/grammar_definition/lexical_definition_test.rb +0 -56
  62. data/test/grammar_definition/operator_precedence_definition_test.rb +0 -35
  63. data/test/grammar_definition/production_definition_test.rb +0 -60
  64. data/test/lexical_table_generator/automata_test.rb +0 -74
  65. data/test/lexical_table_generator/character_set_test.rb +0 -73
  66. data/test/lexical_table_generator/interval_test.rb +0 -36
  67. data/test/lexical_table_generator/pattern_test.rb +0 -115
  68. data/test/lexical_table_generator/subset_determinizer_test.rb +0 -19
  69. data/test/lexical_table_generator/table_generator_test.rb +0 -126
  70. data/test/parsing_table_generator/augmented_grammar_test.rb +0 -45
  71. data/test/parsing_table_generator/lalr_n_computation_test.rb +0 -92
  72. data/test/parsing_table_generator/lr_0_automata_test.rb +0 -94
  73. data/test/parsing_table_generator/lr_item_test.rb +0 -27
  74. data/test/parsing_table_generator/parsing_table_state_test.rb +0 -39
  75. data/test/parsing_table_generator/precedence_table_test.rb +0 -28
  76. data/test/parsing_table_generator/production_test.rb +0 -9
  77. data/test/test_helper.rb +0 -103
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.2
2
+ rubygems_version: 0.9.4
3
3
  specification_version: 1
4
4
  name: aurum
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.1
7
- date: 2007-05-26 00:00:00 +10:00
6
+ version: 0.2.0
7
+ date: 2007-10-11 00:00:00 +10:00
8
8
  summary: Aurum is a LALR(n) parser generator written in Ruby.
9
9
  require_paths:
10
10
  - lib
@@ -29,43 +29,77 @@ post_install_message:
29
29
  authors:
30
30
  - Vincent Xu
31
31
  files:
32
- - example/expression
33
- - example/expression/expression.rb
34
- - example/expression/lisp.rb
32
+ - examples/dangling_else
33
+ - examples/dangling_else/grammar.rb
34
+ - examples/expression
35
+ - examples/expression/grammar.rb
36
+ - examples/smalltalk
37
+ - examples/smalltalk/grammar.rb
38
+ - examples/smalltalk/interpreter.rb
39
+ - examples/yacc
40
+ - examples/yacc/grammar.rb
35
41
  - lib/aurum
36
- - lib/aurum/lexical_table_generator.rb
37
- - lib/aurum/parsing_table_generator.rb
42
+ - lib/aurum/engine
43
+ - lib/aurum/engine/parsing_facility.rb
44
+ - lib/aurum/engine/tokenization_facility.rb
38
45
  - lib/aurum/engine.rb
46
+ - lib/aurum/grammar
47
+ - lib/aurum/grammar/automata.rb
48
+ - lib/aurum/grammar/builder
49
+ - lib/aurum/grammar/builder/augmented_grammar.rb
50
+ - lib/aurum/grammar/builder/dot_logger.rb
51
+ - lib/aurum/grammar/builder/lexical_table_builder.rb
52
+ - lib/aurum/grammar/builder/parsing_table_builder.rb
53
+ - lib/aurum/grammar/builder/set_of_items.rb
54
+ - lib/aurum/grammar/compiled_tables.rb
55
+ - lib/aurum/grammar/dsl
56
+ - lib/aurum/grammar/dsl/lexical_definition.rb
57
+ - lib/aurum/grammar/dsl/syntax_definition.rb
58
+ - lib/aurum/grammar/lexical_rules.rb
59
+ - lib/aurum/grammar/metalang
60
+ - lib/aurum/grammar/metalang/grammar.rb
61
+ - lib/aurum/grammar/syntax_rules.rb
39
62
  - lib/aurum/grammar.rb
40
- - lib/grammars
41
63
  - lib/aurum.rb
42
- - test/parsing_table_generator
43
- - test/parsing_table_generator/augmented_grammar_test.rb
44
- - test/parsing_table_generator/precedence_table_test.rb
45
- - test/parsing_table_generator/production_test.rb
46
- - test/parsing_table_generator/lr_0_automata_test.rb
47
- - test/parsing_table_generator/lalr_n_computation_test.rb
48
- - test/parsing_table_generator/parsing_table_state_test.rb
49
- - test/parsing_table_generator/lr_item_test.rb
50
- - test/engine
51
- - test/engine/lexer_test.rb
52
- - test/engine/semantic_attributes_test.rb
53
- - test/grammars
54
- - test/grammars/ruby
55
- - test/grammar_definition
56
- - test/grammar_definition/grammar_definition_test.rb
57
- - test/grammar_definition/production_definition_test.rb
58
- - test/grammar_definition/operator_precedence_definition_test.rb
59
- - test/grammar_definition/character_class_definition_test.rb
60
- - test/grammar_definition/lexical_definition_test.rb
61
- - test/lexical_table_generator
62
- - test/lexical_table_generator/interval_test.rb
63
- - test/lexical_table_generator/subset_determinizer_test.rb
64
- - test/lexical_table_generator/character_set_test.rb
65
- - test/lexical_table_generator/automata_test.rb
66
- - test/lexical_table_generator/pattern_test.rb
67
- - test/lexical_table_generator/table_generator_test.rb
68
- - test/test_helper.rb
64
+ - spec/builder
65
+ - spec/builder/dsl_definition
66
+ - spec/builder/dsl_definition/aurum_grammar_spec.rb
67
+ - spec/engine
68
+ - spec/engine/lexer_spec.rb
69
+ - spec/engine/parser_spec.rb
70
+ - spec/examples
71
+ - spec/examples/dangling_else_example.rb
72
+ - spec/examples/expression_example.rb
73
+ - spec/examples/smalltalk_example.rb
74
+ - spec/examples/yacc_spec.rb
75
+ - spec/grammar
76
+ - spec/grammar/builder
77
+ - spec/grammar/builder/lexical_table
78
+ - spec/grammar/builder/lexical_table/automata_spec.rb
79
+ - spec/grammar/builder/lexical_table/builder_spec.rb
80
+ - spec/grammar/builder/lexical_table/character_set_spec.rb
81
+ - spec/grammar/builder/lexical_table/pattern_spec.rb
82
+ - spec/grammar/builder/lexical_table/regular_expression.rb
83
+ - spec/grammar/builder/parsing_table
84
+ - spec/grammar/builder/parsing_table/augmented_grammar_spec.rb
85
+ - spec/grammar/builder/parsing_table/builder_spec.rb
86
+ - spec/grammar/builder/parsing_table/digraph_traverser_spec.rb
87
+ - spec/grammar/builder/parsing_table/item_spec.rb
88
+ - spec/grammar/builder/parsing_table/sources_spec.rb
89
+ - spec/grammar/builder/parsing_table/state_spec.rb
90
+ - spec/grammar/dsl
91
+ - spec/grammar/dsl/character_classes_builder_spec.rb
92
+ - spec/grammar/dsl/lexical_rules_builder_spec.rb
93
+ - spec/grammar/dsl/precedence_builder_spec.rb
94
+ - spec/grammar/dsl/productions_builder_spec.rb
95
+ - spec/grammar/metalang
96
+ - spec/grammar/metalang/metalang_spec.rb
97
+ - spec/grammar/precedence_spec.rb
98
+ - spec/grammar/syntax_rules_spec.rb
99
+ - spec/parser_matcher.rb
100
+ - spec/pattern_matcher.rb
101
+ - spec/spec_helper.rb
102
+ - Rakefile
69
103
  test_files: []
70
104
 
71
105
  rdoc_options: []
@@ -1,35 +0,0 @@
1
- $:.unshift(File.dirname(__FILE__) + '/../../lib')
2
- require 'aurum'
3
-
4
- class ExpressionGrammar < Aurum::Grammar
5
- tokens do
6
- ignore string(' ').one_or_more
7
- _number range(?0, ?9).one_or_more
8
- end
9
-
10
- precedences do
11
- operator '*', '/'
12
- operator '+', '-'
13
- end
14
-
15
- productions do
16
- expression expression, '+', expression {expression.value = expression1.value + expression2.value}
17
- expression expression, '-', expression {expression.value = expression1.value - expression2.value}
18
- expression expression, '*', expression {expression.value = expression1.value * expression2.value}
19
- expression expression, '/', expression {expression.value = expression1.value / expression2.value}
20
- expression '(', expression, ')'
21
- expression _number {expression.value = _number.value.to_i}
22
- expression '+', _number {expression.value = _number.value.to_i}
23
- expression '-', _number {expression.value = -_number.value.to_i}
24
- end
25
- end
26
-
27
- puts ExpressionGrammar.parse_expression('1 + 2').value
28
- puts ExpressionGrammar.parse_expression('(1 + 2)').value
29
- puts ExpressionGrammar.parse_expression('-1 - 2').value
30
- puts ExpressionGrammar.parse_expression('1 + 2 * 3').value
31
- puts ExpressionGrammar.parse_expression('1 * 2 + 3').value
32
- puts ExpressionGrammar.parse_expression('1 * (2 + 3)').value
33
- puts ExpressionGrammar.parse_expression('1 + (2 + 3) * 4').value
34
- puts ExpressionGrammar.parse_expression('1 * - 5').value
35
- puts ExpressionGrammar.parse_expression('(1+3) - - 5').value
@@ -1,26 +0,0 @@
1
- $:.unshift(File.dirname(__FILE__) + '/../../lib')
2
- require 'aurum'
3
-
4
- class LispGrammar < Aurum::Grammar
5
- tokens do
6
- ignore string(' ').one_or_more
7
- _number range(?0, ?9).one_or_more
8
- end
9
-
10
- productions do
11
- expression '(', tuple, ')'
12
- tuple '+', atom, atom {tuple.value = atom1.value + atom2.value}
13
- tuple tuple, atom {tuple.value = tuple1.value + atom.value}
14
- atom _number {atom.value = _number.value.to_i}
15
- atom expression
16
- end
17
- end
18
-
19
- puts LispGrammar.parse_expression('(+ 1 3 4) ').value
20
- #puts LispGrammar.parse_expression('-1 - 2').value
21
- #puts LispGrammar.parse_expression('1 + 2 * 3').value
22
- #puts LispGrammar.parse_expression('1 * 2 + 3').value
23
- #puts LispGrammar.parse_expression('1 * (2 + 3)').value
24
- #puts LispGrammar.parse_expression('1 + (2 + 3) * 4').value
25
- #puts LispGrammar.parse_expression('1 * - 5').value
26
- #puts LispGrammar.parse_expression('(1+3) - - 5').value
@@ -1,429 +0,0 @@
1
- module Aurum
2
- RecognizeTokenAction, ChangeStateAction, UserDefinedAction = Struct.new(:token), Struct.new(:state), Struct.new(:action)
3
- RecognizeTokenAndChangeStateAction = Struct.new :token, :state
4
- IgnoreAction = RecognizeTokenAction.new '$ignore'
5
-
6
- class LexicalTableGenerator
7
- attr_reader :lexical_states
8
- def initialize specification
9
- @specification, @accept_states = specification, {}
10
- @lexical_states = @specification.keys - [:all]
11
- @patterns_for_all = specification[:all] ? specification[:all] : {}
12
- end
13
-
14
- def lexical_table
15
- construct_automata
16
- make_initial_partitions
17
- refine_partitions
18
- @partitions.size < @lexical_automata.table.size ? construct_minimize_automata : [@lexical_automata.table, @accept_states]
19
- end
20
-
21
- private
22
- def construct_automata
23
- automata, accepts = Automata.new(1), {}, {}
24
- @lexical_states.each_with_index do |lexcial_state, index|
25
- lexical_state_start = automata.new_state
26
- automata.connect 0, CharacterSet::Interval.new(-index - 1).to_char_set, lexical_state_start
27
- @patterns_for_all.merge(@specification[lexcial_state]).each do |pattern, action|
28
- pattern_start = automata.merge! pattern.automata
29
- automata.connect lexical_state_start, Epsilon, pattern_start
30
- accepts[pattern_start + pattern.accept] = action
31
- end
32
- end
33
- @lexical_automata, deterministic_accepts = automata.determinize accepts.keys
34
- deterministic_accepts.each {|d, n| @accept_states[d] = n.inject([]){|r, x| r << accepts[x]}}
35
- end
36
-
37
- def make_initial_partitions
38
- partitions = {}
39
- @accept_states.each do |state, action|
40
- partitions[action] = [] unless partitions.has_key? action
41
- partitions[action] << state
42
- end
43
- @partitions = [[0], @lexical_automata.all_states - @accept_states.keys - [0]] + partitions.values
44
- @partitions.delete []
45
- end
46
-
47
- def refine_partitions
48
- reverse_automata, working_list = @lexical_automata.reverse, @partitions.dup
49
- until working_list.empty?
50
- reverse_automata.alphabet(working_list.pop) do |ia, symbols|
51
- @partitions.grep_each 'x.size > 1' do |r|
52
- r1, r2 = r & ia, r - ia
53
- unless r2.empty? || r2 == r
54
- replace @partitions, r => [r1, r2]
55
- if working_list.include? r
56
- replace working_list, r => [r1, r2]
57
- else
58
- working_list << (r1.size <= r2.size ? r1 : r2)
59
- end
60
- working_list.uniq!
61
- end
62
- end
63
- end
64
- end
65
- end
66
-
67
- def construct_minimize_automata
68
- automata, accepts = Automata.new(@partitions.size), {}
69
- choose_representatives do |representative, index|
70
- @lexical_automata.table[representative].each do |transition|
71
- automata.connect index, transition.symbols, partition_contains(transition.destination)
72
- end
73
- accepts[index] = @accept_states[representative] if @accept_states.has_key? representative
74
- end
75
- return automata.table, accepts
76
- end
77
-
78
- def choose_representatives
79
- @partitions.each_with_index {|partition, index| yield partition.first, index}
80
- end
81
-
82
- def partition_contains state
83
- @partitions.each_with_index {|partition, index| return index if partition.include? state}
84
- end
85
-
86
- def replace array, replacements
87
- replacements.each do |old, new|
88
- array.delete old
89
- new.each {|x| array << x}
90
- end
91
- end
92
- end
93
-
94
- class Pattern
95
- attr_reader :automata, :accept
96
- def self.from_string literal
97
- automata, index = Automata.new(literal.length + 1), 0
98
- literal.each_byte {|byte|automata.connect index, CharacterSet::Interval.new(byte).to_char_set, (index += 1)}
99
- new automata, index
100
- end
101
-
102
- def self.from_char_set set
103
- automata = Automata.new 2
104
- automata.connect 0, set, 1
105
- new automata, 1
106
- end
107
-
108
- def self.from_enum enum_literal
109
- automata = Automata.new enum_literal.length + 2
110
- enum_literal.each_byte {|byte| automata.connect 0, CharacterSet::Interval.new(byte).to_char_set, 1}
111
- new automata, 1
112
- end
113
-
114
- def self.concat *patterns
115
- automata, index = Automata.new, 0
116
- patterns.each do |pattern|
117
- index = automata.connect(index, Epsilon, automata.merge!(pattern.automata)) + pattern.accept
118
- end
119
- new automata, index
120
- end
121
-
122
- def initialize automata, accept
123
- @automata, @accept = automata, accept
124
- end
125
-
126
- def kleene
127
- kleene_automata = @automata.dup
128
- kleene_automata.connect 0, Epsilon, @accept
129
- kleene_automata.connect @accept, Epsilon, 0
130
- Pattern.new kleene_automata, @accept
131
- end
132
- alias :zero_or_more :kleene
133
-
134
- def iterate
135
- iterate_automata = @automata.dup
136
- iterate_automata.connect @accept, Epsilon, 0
137
- Pattern.new iterate_automata, @accept
138
- end
139
- alias :one_or_more :iterate
140
-
141
- def opt
142
- opt_automata = @automata.dup
143
- opt_automata.connect 0, Epsilon, @accept
144
- Pattern.new opt_automata, @accept
145
- end
146
- alias :zero_or_one :opt
147
-
148
- def negate
149
- deterministic, accepts = automata.determinize [@accept]
150
- sink = deterministic.new_state
151
- deterministic.connect sink, CharacterSet.any, sink
152
- sink.times do |state|
153
- joint = CharacterSet.any
154
- deterministic.table[state].each {|tran| joint.delete tran.symbols}
155
- deterministic.connect state, joint, sink unless joint.empty?
156
- end
157
- accept = deterministic.new_state
158
- accept.times {|state| deterministic.connect state, Epsilon, accept unless accepts.include? state }
159
- Pattern.new deterministic, accept
160
- end
161
- alias :not :negate
162
-
163
- def [] least, most = least
164
- Pattern.concat *([self] * least + [self.opt] * (most-least))
165
- end
166
-
167
- def | other
168
- automata = Automata.new 2
169
- [self, other].each do |pattern|
170
- automata.connect automata.connect(0, Epsilon, automata.merge!(pattern.automata)) + pattern.accept, Epsilon, 1
171
- end
172
- Pattern.new automata, 1
173
- end
174
-
175
- def ~
176
- any = Pattern.from_char_set(CharacterSet.any).kleene
177
- return Pattern.concat(Pattern.concat(any, self, any).negate, self)
178
- end
179
- end
180
-
181
- class Automata
182
- attr_reader :table
183
- Transition = Struct.new(:symbols, :destination)
184
-
185
- def initialize(table=[])
186
- case table
187
- when Array
188
- @table = table
189
- when Fixnum
190
- @table = []
191
- table.times {@table << []}
192
- end
193
- end
194
-
195
- def connect start, symbols, destination
196
- @table[start] << Transition.new(symbols, destination)
197
- destination
198
- end
199
-
200
- def merge! other
201
- start = @table.length
202
- other_table = other.instance_eval{@table}
203
- other_table.each do |trans|
204
- @table << []
205
- trans.each {|tran| @table.last << Transition.new(tran.symbols, tran.destination + start)}
206
- end
207
- start
208
- end
209
-
210
- def reverse
211
- reverse = []
212
- @table.length.times {reverse << []}
213
- @table.each_with_index do |trans, index|
214
- trans.each {|tran| reverse[tran.destination] << Transition.new(tran.symbols, index)}
215
- end
216
- Automata.new reverse
217
- end
218
-
219
- def dup
220
- dup_table = []
221
- @table.each {|x| dup_table << x.dup}
222
- Automata.new dup_table
223
- end
224
-
225
- def alphabet states
226
- points = states.inject([]) do |result, state|
227
- @table[state].inject(result){|r, s|r += s.symbols.to_points s.destination}
228
- end
229
- points.sort! do |x, y|
230
- x.char == y.char ? (x.is_start ? (y.is_start ? 0 : -1) : (y.is_start ? 1 : 0)) : (x.char < y.char ? -1 : 1)
231
- end
232
- reachable_states = []
233
- points.each_with_index do |point, index|
234
- if point.is_start
235
- reachable_states << point.destination
236
- else
237
- reachable_states.delete point.destination
238
- next if reachable_states.empty?
239
- end
240
- symbols = range(point, points[index + 1])
241
- yield reachable_states.uniq, symbols if symbols
242
- end
243
- end
244
-
245
- def determinize accepts
246
- SubsetDeterminizer.new(self, accepts).determinize
247
- end
248
-
249
- def new_state
250
- @table << []
251
- @table.length - 1
252
- end
253
-
254
- def all_states
255
- (0..table.length - 1).to_a
256
- end
257
-
258
- private
259
- def range point_a, point_b
260
- start_point = point_a.is_start ? point_a.char : (point_a.char + 1)
261
- end_point = point_b.is_start ? point_b.char - 1 : point_b.char
262
- start_point > end_point ? nil : CharacterSet::Interval.new(start_point, end_point).to_char_set
263
- end
264
- end
265
-
266
- class SubsetDeterminizer
267
- def initialize nondeterministic, accepts
268
- @unmarked, @dstates, @accepts = [], [], accepts
269
- @nondeterministic, @deterministic, @accept_states = nondeterministic, Automata.new, {}
270
- unmark closure([0])
271
- end
272
-
273
- def determinize
274
- until @unmarked.empty?
275
- start = @unmarked.pop
276
- @nondeterministic.alphabet(@dstates[start]) do |states, symbols|
277
- destination_state = closure(states)
278
- destination = unmark destination_state unless destination = @dstates.index(destination_state)
279
- @deterministic.connect start, symbols, destination
280
- end
281
- end
282
- return @deterministic, @accept_states
283
- end
284
- private
285
- def unmark states
286
- @dstates << states
287
- @unmarked.push @deterministic.new_state
288
- accepts = states.find_all {|x| @accepts.include? x}
289
- @accept_states[@unmarked.last] = accepts unless accepts.empty?
290
- @unmarked.last
291
- end
292
-
293
- def closure states
294
- closure, unvisited = states.dup, states.dup
295
- until unvisited.empty? do
296
- @nondeterministic.table[unvisited.pop].each do |tran|
297
- if tran.symbols == Epsilon && !closure.include?(tran.destination)
298
- closure << tran.destination
299
- unvisited << tran.destination
300
- end
301
- end
302
- end
303
- closure.sort!
304
- end
305
- end
306
-
307
- class CharacterSet
308
- attr_reader :intervals
309
- def self.any
310
- Interval.new(0, 65535).to_char_set
311
- end
312
-
313
- def + other
314
- result = self.dup
315
- if (other.kind_of? CharacterSet)
316
- for interval in other.intervals
317
- result.add_interval interval.first, interval.last
318
- end
319
- else
320
- other.to_s.each_byte do |byte|
321
- result.add_interval byte
322
- end
323
- end
324
- result
325
- end
326
-
327
- def - other
328
- result = self.dup
329
- if (other.kind_of? CharacterSet)
330
- for interval in other.intervals
331
- result.delete_interval interval.first, interval.last
332
- end
333
- else
334
- other.to_s.each_byte do |byte|
335
- result.delete_interval byte
336
- end
337
- end
338
- result
339
- end
340
-
341
- def initialize *intervals
342
- @intervals = intervals
343
- end
344
-
345
- def << obj
346
- obj.kind_of?(Range) ? add_interval(obj.first, obj.last) : obj.to_s.each_byte {|x| add_interval x}
347
- end
348
-
349
- def delete obj
350
- case obj
351
- when Range
352
- delete_interval obj.first, obj.last
353
- when Aurum::CharacterSet
354
- obj.intervals.each {|interval| delete_interval interval.first, interval.last}
355
- else
356
- obj.to_s.each_byte {|x| delete_interval x}
357
- end
358
- end
359
-
360
- def include? char
361
- @intervals.any? {|x| x.include? char}
362
- end
363
-
364
- def empty?
365
- return @intervals.empty?
366
- end
367
-
368
- def to_points destination
369
- @intervals.inject [] do |points, interval|
370
- points << Point.new(interval.first, true, destination)
371
- points << Point.new(interval.last, false, destination)
372
- end
373
- end
374
-
375
- def dup
376
- intervals = []
377
- for interval in @intervals
378
- intervals << interval.dup
379
- end
380
- CharacterSet.new *intervals
381
- end
382
-
383
- protected
384
- def add_interval first, last = first
385
- interval = Interval.new first, last
386
- @intervals << interval unless @intervals.any? {|x| x.merge! interval}
387
- end
388
-
389
- def delete_interval first, last = first
390
- interval = Interval.new first, last
391
- return unless to_be_replaced = @intervals.find {|x| x.include?(interval.first) || x.include?(interval.last)}
392
- @intervals.delete to_be_replaced
393
- add_new_interval to_be_replaced.first, interval.first - 1
394
- add_new_interval interval.last + 1, to_be_replaced.last
395
- end
396
-
397
- def add_new_interval first, last
398
- @intervals << Interval.new(first, last) if first <= last
399
- end
400
-
401
- Interval, Point = Struct.new(:first, :last), Struct.new(:char, :is_start, :destination)
402
-
403
- Interval.class_eval do
404
- def initialize first, last = first
405
- super first, last
406
- end
407
-
408
- def include? char
409
- char = char[0] if char.kind_of? String
410
- self.first <= char && char <= self.last
411
- end
412
-
413
- def merge! other
414
- if include?(other.first) || include?(other.last) || other.first - self.last == 1 || self.first - other.last == 1
415
- self.first = [self.first, other.first].min
416
- self.last = [self.last, other.last].max
417
- return true;
418
- end
419
- false
420
- end
421
-
422
- def to_char_set
423
- CharacterSet.new self
424
- end
425
- end
426
- end
427
-
428
- Epsilon = CharacterSet.new
429
- end