aurum 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +29 -0
- data/examples/dangling_else/grammar.rb +23 -0
- data/examples/expression/grammar.rb +28 -0
- data/examples/smalltalk/grammar.rb +151 -0
- data/examples/smalltalk/interpreter.rb +70 -0
- data/examples/yacc/grammar.rb +72 -0
- data/lib/aurum.rb +1 -9
- data/lib/aurum/engine.rb +39 -175
- data/lib/aurum/engine/parsing_facility.rb +107 -0
- data/lib/aurum/engine/tokenization_facility.rb +86 -0
- data/lib/aurum/grammar.rb +52 -219
- data/lib/aurum/grammar/automata.rb +194 -0
- data/lib/aurum/grammar/builder/augmented_grammar.rb +83 -0
- data/lib/aurum/grammar/builder/dot_logger.rb +66 -0
- data/lib/aurum/grammar/builder/lexical_table_builder.rb +55 -0
- data/lib/aurum/grammar/builder/parsing_table_builder.rb +238 -0
- data/lib/aurum/grammar/builder/set_of_items.rb +190 -0
- data/lib/aurum/grammar/compiled_tables.rb +20 -0
- data/lib/aurum/grammar/dsl/lexical_definition.rb +94 -0
- data/lib/aurum/grammar/dsl/syntax_definition.rb +79 -0
- data/lib/aurum/grammar/lexical_rules.rb +224 -0
- data/lib/aurum/grammar/metalang/grammar.rb +47 -0
- data/lib/aurum/grammar/syntax_rules.rb +95 -0
- data/spec/builder/dsl_definition/aurum_grammar_spec.rb +33 -0
- data/spec/engine/lexer_spec.rb +59 -0
- data/spec/engine/parser_spec.rb +90 -0
- data/spec/examples/dangling_else_example.rb +30 -0
- data/spec/examples/expression_example.rb +48 -0
- data/spec/examples/smalltalk_example.rb +50 -0
- data/spec/examples/yacc_spec.rb +30 -0
- data/spec/grammar/builder/lexical_table/automata_spec.rb +55 -0
- data/spec/grammar/builder/lexical_table/builder_spec.rb +78 -0
- data/spec/grammar/builder/lexical_table/character_set_spec.rb +100 -0
- data/spec/grammar/builder/lexical_table/pattern_spec.rb +11 -0
- data/spec/grammar/builder/lexical_table/regular_expression.rb +40 -0
- data/spec/grammar/builder/parsing_table/augmented_grammar_spec.rb +36 -0
- data/spec/grammar/builder/parsing_table/builder_spec.rb +152 -0
- data/spec/grammar/builder/parsing_table/digraph_traverser_spec.rb +42 -0
- data/spec/grammar/builder/parsing_table/item_spec.rb +51 -0
- data/spec/grammar/builder/parsing_table/sources_spec.rb +66 -0
- data/spec/grammar/builder/parsing_table/state_spec.rb +82 -0
- data/spec/grammar/dsl/character_classes_builder_spec.rb +50 -0
- data/spec/grammar/dsl/lexical_rules_builder_spec.rb +181 -0
- data/spec/grammar/dsl/precedence_builder_spec.rb +64 -0
- data/spec/grammar/dsl/productions_builder_spec.rb +78 -0
- data/spec/grammar/metalang/metalang_spec.rb +0 -0
- data/spec/grammar/precedence_spec.rb +42 -0
- data/spec/grammar/syntax_rules_spec.rb +31 -0
- data/spec/parser_matcher.rb +69 -0
- data/spec/pattern_matcher.rb +123 -0
- data/spec/spec_helper.rb +133 -0
- metadata +70 -36
- data/example/expression/expression.rb +0 -35
- data/example/expression/lisp.rb +0 -26
- data/lib/aurum/lexical_table_generator.rb +0 -429
- data/lib/aurum/parsing_table_generator.rb +0 -464
- data/test/engine/lexer_test.rb +0 -59
- data/test/engine/semantic_attributes_test.rb +0 -15
- data/test/grammar_definition/character_class_definition_test.rb +0 -28
- data/test/grammar_definition/grammar_definition_test.rb +0 -55
- data/test/grammar_definition/lexical_definition_test.rb +0 -56
- data/test/grammar_definition/operator_precedence_definition_test.rb +0 -35
- data/test/grammar_definition/production_definition_test.rb +0 -60
- data/test/lexical_table_generator/automata_test.rb +0 -74
- data/test/lexical_table_generator/character_set_test.rb +0 -73
- data/test/lexical_table_generator/interval_test.rb +0 -36
- data/test/lexical_table_generator/pattern_test.rb +0 -115
- data/test/lexical_table_generator/subset_determinizer_test.rb +0 -19
- data/test/lexical_table_generator/table_generator_test.rb +0 -126
- data/test/parsing_table_generator/augmented_grammar_test.rb +0 -45
- data/test/parsing_table_generator/lalr_n_computation_test.rb +0 -92
- data/test/parsing_table_generator/lr_0_automata_test.rb +0 -94
- data/test/parsing_table_generator/lr_item_test.rb +0 -27
- data/test/parsing_table_generator/parsing_table_state_test.rb +0 -39
- data/test/parsing_table_generator/precedence_table_test.rb +0 -28
- data/test/parsing_table_generator/production_test.rb +0 -9
- data/test/test_helper.rb +0 -103
metadata
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.
|
2
|
+
rubygems_version: 0.9.4
|
3
3
|
specification_version: 1
|
4
4
|
name: aurum
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.
|
7
|
-
date: 2007-
|
6
|
+
version: 0.2.0
|
7
|
+
date: 2007-10-11 00:00:00 +10:00
|
8
8
|
summary: Aurum is a LALR(n) parser generator written in Ruby.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -29,43 +29,77 @@ post_install_message:
|
|
29
29
|
authors:
|
30
30
|
- Vincent Xu
|
31
31
|
files:
|
32
|
-
-
|
33
|
-
-
|
34
|
-
-
|
32
|
+
- examples/dangling_else
|
33
|
+
- examples/dangling_else/grammar.rb
|
34
|
+
- examples/expression
|
35
|
+
- examples/expression/grammar.rb
|
36
|
+
- examples/smalltalk
|
37
|
+
- examples/smalltalk/grammar.rb
|
38
|
+
- examples/smalltalk/interpreter.rb
|
39
|
+
- examples/yacc
|
40
|
+
- examples/yacc/grammar.rb
|
35
41
|
- lib/aurum
|
36
|
-
- lib/aurum/
|
37
|
-
- lib/aurum/
|
42
|
+
- lib/aurum/engine
|
43
|
+
- lib/aurum/engine/parsing_facility.rb
|
44
|
+
- lib/aurum/engine/tokenization_facility.rb
|
38
45
|
- lib/aurum/engine.rb
|
46
|
+
- lib/aurum/grammar
|
47
|
+
- lib/aurum/grammar/automata.rb
|
48
|
+
- lib/aurum/grammar/builder
|
49
|
+
- lib/aurum/grammar/builder/augmented_grammar.rb
|
50
|
+
- lib/aurum/grammar/builder/dot_logger.rb
|
51
|
+
- lib/aurum/grammar/builder/lexical_table_builder.rb
|
52
|
+
- lib/aurum/grammar/builder/parsing_table_builder.rb
|
53
|
+
- lib/aurum/grammar/builder/set_of_items.rb
|
54
|
+
- lib/aurum/grammar/compiled_tables.rb
|
55
|
+
- lib/aurum/grammar/dsl
|
56
|
+
- lib/aurum/grammar/dsl/lexical_definition.rb
|
57
|
+
- lib/aurum/grammar/dsl/syntax_definition.rb
|
58
|
+
- lib/aurum/grammar/lexical_rules.rb
|
59
|
+
- lib/aurum/grammar/metalang
|
60
|
+
- lib/aurum/grammar/metalang/grammar.rb
|
61
|
+
- lib/aurum/grammar/syntax_rules.rb
|
39
62
|
- lib/aurum/grammar.rb
|
40
|
-
- lib/grammars
|
41
63
|
- lib/aurum.rb
|
42
|
-
-
|
43
|
-
-
|
44
|
-
-
|
45
|
-
-
|
46
|
-
-
|
47
|
-
-
|
48
|
-
-
|
49
|
-
-
|
50
|
-
-
|
51
|
-
-
|
52
|
-
-
|
53
|
-
-
|
54
|
-
-
|
55
|
-
-
|
56
|
-
-
|
57
|
-
-
|
58
|
-
-
|
59
|
-
-
|
60
|
-
-
|
61
|
-
-
|
62
|
-
-
|
63
|
-
-
|
64
|
-
-
|
65
|
-
-
|
66
|
-
-
|
67
|
-
-
|
68
|
-
-
|
64
|
+
- spec/builder
|
65
|
+
- spec/builder/dsl_definition
|
66
|
+
- spec/builder/dsl_definition/aurum_grammar_spec.rb
|
67
|
+
- spec/engine
|
68
|
+
- spec/engine/lexer_spec.rb
|
69
|
+
- spec/engine/parser_spec.rb
|
70
|
+
- spec/examples
|
71
|
+
- spec/examples/dangling_else_example.rb
|
72
|
+
- spec/examples/expression_example.rb
|
73
|
+
- spec/examples/smalltalk_example.rb
|
74
|
+
- spec/examples/yacc_spec.rb
|
75
|
+
- spec/grammar
|
76
|
+
- spec/grammar/builder
|
77
|
+
- spec/grammar/builder/lexical_table
|
78
|
+
- spec/grammar/builder/lexical_table/automata_spec.rb
|
79
|
+
- spec/grammar/builder/lexical_table/builder_spec.rb
|
80
|
+
- spec/grammar/builder/lexical_table/character_set_spec.rb
|
81
|
+
- spec/grammar/builder/lexical_table/pattern_spec.rb
|
82
|
+
- spec/grammar/builder/lexical_table/regular_expression.rb
|
83
|
+
- spec/grammar/builder/parsing_table
|
84
|
+
- spec/grammar/builder/parsing_table/augmented_grammar_spec.rb
|
85
|
+
- spec/grammar/builder/parsing_table/builder_spec.rb
|
86
|
+
- spec/grammar/builder/parsing_table/digraph_traverser_spec.rb
|
87
|
+
- spec/grammar/builder/parsing_table/item_spec.rb
|
88
|
+
- spec/grammar/builder/parsing_table/sources_spec.rb
|
89
|
+
- spec/grammar/builder/parsing_table/state_spec.rb
|
90
|
+
- spec/grammar/dsl
|
91
|
+
- spec/grammar/dsl/character_classes_builder_spec.rb
|
92
|
+
- spec/grammar/dsl/lexical_rules_builder_spec.rb
|
93
|
+
- spec/grammar/dsl/precedence_builder_spec.rb
|
94
|
+
- spec/grammar/dsl/productions_builder_spec.rb
|
95
|
+
- spec/grammar/metalang
|
96
|
+
- spec/grammar/metalang/metalang_spec.rb
|
97
|
+
- spec/grammar/precedence_spec.rb
|
98
|
+
- spec/grammar/syntax_rules_spec.rb
|
99
|
+
- spec/parser_matcher.rb
|
100
|
+
- spec/pattern_matcher.rb
|
101
|
+
- spec/spec_helper.rb
|
102
|
+
- Rakefile
|
69
103
|
test_files: []
|
70
104
|
|
71
105
|
rdoc_options: []
|
@@ -1,35 +0,0 @@
|
|
1
|
-
$:.unshift(File.dirname(__FILE__) + '/../../lib')
|
2
|
-
require 'aurum'
|
3
|
-
|
4
|
-
class ExpressionGrammar < Aurum::Grammar
|
5
|
-
tokens do
|
6
|
-
ignore string(' ').one_or_more
|
7
|
-
_number range(?0, ?9).one_or_more
|
8
|
-
end
|
9
|
-
|
10
|
-
precedences do
|
11
|
-
operator '*', '/'
|
12
|
-
operator '+', '-'
|
13
|
-
end
|
14
|
-
|
15
|
-
productions do
|
16
|
-
expression expression, '+', expression {expression.value = expression1.value + expression2.value}
|
17
|
-
expression expression, '-', expression {expression.value = expression1.value - expression2.value}
|
18
|
-
expression expression, '*', expression {expression.value = expression1.value * expression2.value}
|
19
|
-
expression expression, '/', expression {expression.value = expression1.value / expression2.value}
|
20
|
-
expression '(', expression, ')'
|
21
|
-
expression _number {expression.value = _number.value.to_i}
|
22
|
-
expression '+', _number {expression.value = _number.value.to_i}
|
23
|
-
expression '-', _number {expression.value = -_number.value.to_i}
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
puts ExpressionGrammar.parse_expression('1 + 2').value
|
28
|
-
puts ExpressionGrammar.parse_expression('(1 + 2)').value
|
29
|
-
puts ExpressionGrammar.parse_expression('-1 - 2').value
|
30
|
-
puts ExpressionGrammar.parse_expression('1 + 2 * 3').value
|
31
|
-
puts ExpressionGrammar.parse_expression('1 * 2 + 3').value
|
32
|
-
puts ExpressionGrammar.parse_expression('1 * (2 + 3)').value
|
33
|
-
puts ExpressionGrammar.parse_expression('1 + (2 + 3) * 4').value
|
34
|
-
puts ExpressionGrammar.parse_expression('1 * - 5').value
|
35
|
-
puts ExpressionGrammar.parse_expression('(1+3) - - 5').value
|
data/example/expression/lisp.rb
DELETED
@@ -1,26 +0,0 @@
|
|
1
|
-
$:.unshift(File.dirname(__FILE__) + '/../../lib')
|
2
|
-
require 'aurum'
|
3
|
-
|
4
|
-
class LispGrammar < Aurum::Grammar
|
5
|
-
tokens do
|
6
|
-
ignore string(' ').one_or_more
|
7
|
-
_number range(?0, ?9).one_or_more
|
8
|
-
end
|
9
|
-
|
10
|
-
productions do
|
11
|
-
expression '(', tuple, ')'
|
12
|
-
tuple '+', atom, atom {tuple.value = atom1.value + atom2.value}
|
13
|
-
tuple tuple, atom {tuple.value = tuple1.value + atom.value}
|
14
|
-
atom _number {atom.value = _number.value.to_i}
|
15
|
-
atom expression
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
puts LispGrammar.parse_expression('(+ 1 3 4) ').value
|
20
|
-
#puts LispGrammar.parse_expression('-1 - 2').value
|
21
|
-
#puts LispGrammar.parse_expression('1 + 2 * 3').value
|
22
|
-
#puts LispGrammar.parse_expression('1 * 2 + 3').value
|
23
|
-
#puts LispGrammar.parse_expression('1 * (2 + 3)').value
|
24
|
-
#puts LispGrammar.parse_expression('1 + (2 + 3) * 4').value
|
25
|
-
#puts LispGrammar.parse_expression('1 * - 5').value
|
26
|
-
#puts LispGrammar.parse_expression('(1+3) - - 5').value
|
@@ -1,429 +0,0 @@
|
|
1
|
-
module Aurum
|
2
|
-
RecognizeTokenAction, ChangeStateAction, UserDefinedAction = Struct.new(:token), Struct.new(:state), Struct.new(:action)
|
3
|
-
RecognizeTokenAndChangeStateAction = Struct.new :token, :state
|
4
|
-
IgnoreAction = RecognizeTokenAction.new '$ignore'
|
5
|
-
|
6
|
-
class LexicalTableGenerator
|
7
|
-
attr_reader :lexical_states
|
8
|
-
def initialize specification
|
9
|
-
@specification, @accept_states = specification, {}
|
10
|
-
@lexical_states = @specification.keys - [:all]
|
11
|
-
@patterns_for_all = specification[:all] ? specification[:all] : {}
|
12
|
-
end
|
13
|
-
|
14
|
-
def lexical_table
|
15
|
-
construct_automata
|
16
|
-
make_initial_partitions
|
17
|
-
refine_partitions
|
18
|
-
@partitions.size < @lexical_automata.table.size ? construct_minimize_automata : [@lexical_automata.table, @accept_states]
|
19
|
-
end
|
20
|
-
|
21
|
-
private
|
22
|
-
def construct_automata
|
23
|
-
automata, accepts = Automata.new(1), {}, {}
|
24
|
-
@lexical_states.each_with_index do |lexcial_state, index|
|
25
|
-
lexical_state_start = automata.new_state
|
26
|
-
automata.connect 0, CharacterSet::Interval.new(-index - 1).to_char_set, lexical_state_start
|
27
|
-
@patterns_for_all.merge(@specification[lexcial_state]).each do |pattern, action|
|
28
|
-
pattern_start = automata.merge! pattern.automata
|
29
|
-
automata.connect lexical_state_start, Epsilon, pattern_start
|
30
|
-
accepts[pattern_start + pattern.accept] = action
|
31
|
-
end
|
32
|
-
end
|
33
|
-
@lexical_automata, deterministic_accepts = automata.determinize accepts.keys
|
34
|
-
deterministic_accepts.each {|d, n| @accept_states[d] = n.inject([]){|r, x| r << accepts[x]}}
|
35
|
-
end
|
36
|
-
|
37
|
-
def make_initial_partitions
|
38
|
-
partitions = {}
|
39
|
-
@accept_states.each do |state, action|
|
40
|
-
partitions[action] = [] unless partitions.has_key? action
|
41
|
-
partitions[action] << state
|
42
|
-
end
|
43
|
-
@partitions = [[0], @lexical_automata.all_states - @accept_states.keys - [0]] + partitions.values
|
44
|
-
@partitions.delete []
|
45
|
-
end
|
46
|
-
|
47
|
-
def refine_partitions
|
48
|
-
reverse_automata, working_list = @lexical_automata.reverse, @partitions.dup
|
49
|
-
until working_list.empty?
|
50
|
-
reverse_automata.alphabet(working_list.pop) do |ia, symbols|
|
51
|
-
@partitions.grep_each 'x.size > 1' do |r|
|
52
|
-
r1, r2 = r & ia, r - ia
|
53
|
-
unless r2.empty? || r2 == r
|
54
|
-
replace @partitions, r => [r1, r2]
|
55
|
-
if working_list.include? r
|
56
|
-
replace working_list, r => [r1, r2]
|
57
|
-
else
|
58
|
-
working_list << (r1.size <= r2.size ? r1 : r2)
|
59
|
-
end
|
60
|
-
working_list.uniq!
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
def construct_minimize_automata
|
68
|
-
automata, accepts = Automata.new(@partitions.size), {}
|
69
|
-
choose_representatives do |representative, index|
|
70
|
-
@lexical_automata.table[representative].each do |transition|
|
71
|
-
automata.connect index, transition.symbols, partition_contains(transition.destination)
|
72
|
-
end
|
73
|
-
accepts[index] = @accept_states[representative] if @accept_states.has_key? representative
|
74
|
-
end
|
75
|
-
return automata.table, accepts
|
76
|
-
end
|
77
|
-
|
78
|
-
def choose_representatives
|
79
|
-
@partitions.each_with_index {|partition, index| yield partition.first, index}
|
80
|
-
end
|
81
|
-
|
82
|
-
def partition_contains state
|
83
|
-
@partitions.each_with_index {|partition, index| return index if partition.include? state}
|
84
|
-
end
|
85
|
-
|
86
|
-
def replace array, replacements
|
87
|
-
replacements.each do |old, new|
|
88
|
-
array.delete old
|
89
|
-
new.each {|x| array << x}
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
class Pattern
|
95
|
-
attr_reader :automata, :accept
|
96
|
-
def self.from_string literal
|
97
|
-
automata, index = Automata.new(literal.length + 1), 0
|
98
|
-
literal.each_byte {|byte|automata.connect index, CharacterSet::Interval.new(byte).to_char_set, (index += 1)}
|
99
|
-
new automata, index
|
100
|
-
end
|
101
|
-
|
102
|
-
def self.from_char_set set
|
103
|
-
automata = Automata.new 2
|
104
|
-
automata.connect 0, set, 1
|
105
|
-
new automata, 1
|
106
|
-
end
|
107
|
-
|
108
|
-
def self.from_enum enum_literal
|
109
|
-
automata = Automata.new enum_literal.length + 2
|
110
|
-
enum_literal.each_byte {|byte| automata.connect 0, CharacterSet::Interval.new(byte).to_char_set, 1}
|
111
|
-
new automata, 1
|
112
|
-
end
|
113
|
-
|
114
|
-
def self.concat *patterns
|
115
|
-
automata, index = Automata.new, 0
|
116
|
-
patterns.each do |pattern|
|
117
|
-
index = automata.connect(index, Epsilon, automata.merge!(pattern.automata)) + pattern.accept
|
118
|
-
end
|
119
|
-
new automata, index
|
120
|
-
end
|
121
|
-
|
122
|
-
def initialize automata, accept
|
123
|
-
@automata, @accept = automata, accept
|
124
|
-
end
|
125
|
-
|
126
|
-
def kleene
|
127
|
-
kleene_automata = @automata.dup
|
128
|
-
kleene_automata.connect 0, Epsilon, @accept
|
129
|
-
kleene_automata.connect @accept, Epsilon, 0
|
130
|
-
Pattern.new kleene_automata, @accept
|
131
|
-
end
|
132
|
-
alias :zero_or_more :kleene
|
133
|
-
|
134
|
-
def iterate
|
135
|
-
iterate_automata = @automata.dup
|
136
|
-
iterate_automata.connect @accept, Epsilon, 0
|
137
|
-
Pattern.new iterate_automata, @accept
|
138
|
-
end
|
139
|
-
alias :one_or_more :iterate
|
140
|
-
|
141
|
-
def opt
|
142
|
-
opt_automata = @automata.dup
|
143
|
-
opt_automata.connect 0, Epsilon, @accept
|
144
|
-
Pattern.new opt_automata, @accept
|
145
|
-
end
|
146
|
-
alias :zero_or_one :opt
|
147
|
-
|
148
|
-
def negate
|
149
|
-
deterministic, accepts = automata.determinize [@accept]
|
150
|
-
sink = deterministic.new_state
|
151
|
-
deterministic.connect sink, CharacterSet.any, sink
|
152
|
-
sink.times do |state|
|
153
|
-
joint = CharacterSet.any
|
154
|
-
deterministic.table[state].each {|tran| joint.delete tran.symbols}
|
155
|
-
deterministic.connect state, joint, sink unless joint.empty?
|
156
|
-
end
|
157
|
-
accept = deterministic.new_state
|
158
|
-
accept.times {|state| deterministic.connect state, Epsilon, accept unless accepts.include? state }
|
159
|
-
Pattern.new deterministic, accept
|
160
|
-
end
|
161
|
-
alias :not :negate
|
162
|
-
|
163
|
-
def [] least, most = least
|
164
|
-
Pattern.concat *([self] * least + [self.opt] * (most-least))
|
165
|
-
end
|
166
|
-
|
167
|
-
def | other
|
168
|
-
automata = Automata.new 2
|
169
|
-
[self, other].each do |pattern|
|
170
|
-
automata.connect automata.connect(0, Epsilon, automata.merge!(pattern.automata)) + pattern.accept, Epsilon, 1
|
171
|
-
end
|
172
|
-
Pattern.new automata, 1
|
173
|
-
end
|
174
|
-
|
175
|
-
def ~
|
176
|
-
any = Pattern.from_char_set(CharacterSet.any).kleene
|
177
|
-
return Pattern.concat(Pattern.concat(any, self, any).negate, self)
|
178
|
-
end
|
179
|
-
end
|
180
|
-
|
181
|
-
class Automata
|
182
|
-
attr_reader :table
|
183
|
-
Transition = Struct.new(:symbols, :destination)
|
184
|
-
|
185
|
-
def initialize(table=[])
|
186
|
-
case table
|
187
|
-
when Array
|
188
|
-
@table = table
|
189
|
-
when Fixnum
|
190
|
-
@table = []
|
191
|
-
table.times {@table << []}
|
192
|
-
end
|
193
|
-
end
|
194
|
-
|
195
|
-
def connect start, symbols, destination
|
196
|
-
@table[start] << Transition.new(symbols, destination)
|
197
|
-
destination
|
198
|
-
end
|
199
|
-
|
200
|
-
def merge! other
|
201
|
-
start = @table.length
|
202
|
-
other_table = other.instance_eval{@table}
|
203
|
-
other_table.each do |trans|
|
204
|
-
@table << []
|
205
|
-
trans.each {|tran| @table.last << Transition.new(tran.symbols, tran.destination + start)}
|
206
|
-
end
|
207
|
-
start
|
208
|
-
end
|
209
|
-
|
210
|
-
def reverse
|
211
|
-
reverse = []
|
212
|
-
@table.length.times {reverse << []}
|
213
|
-
@table.each_with_index do |trans, index|
|
214
|
-
trans.each {|tran| reverse[tran.destination] << Transition.new(tran.symbols, index)}
|
215
|
-
end
|
216
|
-
Automata.new reverse
|
217
|
-
end
|
218
|
-
|
219
|
-
def dup
|
220
|
-
dup_table = []
|
221
|
-
@table.each {|x| dup_table << x.dup}
|
222
|
-
Automata.new dup_table
|
223
|
-
end
|
224
|
-
|
225
|
-
def alphabet states
|
226
|
-
points = states.inject([]) do |result, state|
|
227
|
-
@table[state].inject(result){|r, s|r += s.symbols.to_points s.destination}
|
228
|
-
end
|
229
|
-
points.sort! do |x, y|
|
230
|
-
x.char == y.char ? (x.is_start ? (y.is_start ? 0 : -1) : (y.is_start ? 1 : 0)) : (x.char < y.char ? -1 : 1)
|
231
|
-
end
|
232
|
-
reachable_states = []
|
233
|
-
points.each_with_index do |point, index|
|
234
|
-
if point.is_start
|
235
|
-
reachable_states << point.destination
|
236
|
-
else
|
237
|
-
reachable_states.delete point.destination
|
238
|
-
next if reachable_states.empty?
|
239
|
-
end
|
240
|
-
symbols = range(point, points[index + 1])
|
241
|
-
yield reachable_states.uniq, symbols if symbols
|
242
|
-
end
|
243
|
-
end
|
244
|
-
|
245
|
-
def determinize accepts
|
246
|
-
SubsetDeterminizer.new(self, accepts).determinize
|
247
|
-
end
|
248
|
-
|
249
|
-
def new_state
|
250
|
-
@table << []
|
251
|
-
@table.length - 1
|
252
|
-
end
|
253
|
-
|
254
|
-
def all_states
|
255
|
-
(0..table.length - 1).to_a
|
256
|
-
end
|
257
|
-
|
258
|
-
private
|
259
|
-
def range point_a, point_b
|
260
|
-
start_point = point_a.is_start ? point_a.char : (point_a.char + 1)
|
261
|
-
end_point = point_b.is_start ? point_b.char - 1 : point_b.char
|
262
|
-
start_point > end_point ? nil : CharacterSet::Interval.new(start_point, end_point).to_char_set
|
263
|
-
end
|
264
|
-
end
|
265
|
-
|
266
|
-
class SubsetDeterminizer
|
267
|
-
def initialize nondeterministic, accepts
|
268
|
-
@unmarked, @dstates, @accepts = [], [], accepts
|
269
|
-
@nondeterministic, @deterministic, @accept_states = nondeterministic, Automata.new, {}
|
270
|
-
unmark closure([0])
|
271
|
-
end
|
272
|
-
|
273
|
-
def determinize
|
274
|
-
until @unmarked.empty?
|
275
|
-
start = @unmarked.pop
|
276
|
-
@nondeterministic.alphabet(@dstates[start]) do |states, symbols|
|
277
|
-
destination_state = closure(states)
|
278
|
-
destination = unmark destination_state unless destination = @dstates.index(destination_state)
|
279
|
-
@deterministic.connect start, symbols, destination
|
280
|
-
end
|
281
|
-
end
|
282
|
-
return @deterministic, @accept_states
|
283
|
-
end
|
284
|
-
private
|
285
|
-
def unmark states
|
286
|
-
@dstates << states
|
287
|
-
@unmarked.push @deterministic.new_state
|
288
|
-
accepts = states.find_all {|x| @accepts.include? x}
|
289
|
-
@accept_states[@unmarked.last] = accepts unless accepts.empty?
|
290
|
-
@unmarked.last
|
291
|
-
end
|
292
|
-
|
293
|
-
def closure states
|
294
|
-
closure, unvisited = states.dup, states.dup
|
295
|
-
until unvisited.empty? do
|
296
|
-
@nondeterministic.table[unvisited.pop].each do |tran|
|
297
|
-
if tran.symbols == Epsilon && !closure.include?(tran.destination)
|
298
|
-
closure << tran.destination
|
299
|
-
unvisited << tran.destination
|
300
|
-
end
|
301
|
-
end
|
302
|
-
end
|
303
|
-
closure.sort!
|
304
|
-
end
|
305
|
-
end
|
306
|
-
|
307
|
-
class CharacterSet
|
308
|
-
attr_reader :intervals
|
309
|
-
def self.any
|
310
|
-
Interval.new(0, 65535).to_char_set
|
311
|
-
end
|
312
|
-
|
313
|
-
def + other
|
314
|
-
result = self.dup
|
315
|
-
if (other.kind_of? CharacterSet)
|
316
|
-
for interval in other.intervals
|
317
|
-
result.add_interval interval.first, interval.last
|
318
|
-
end
|
319
|
-
else
|
320
|
-
other.to_s.each_byte do |byte|
|
321
|
-
result.add_interval byte
|
322
|
-
end
|
323
|
-
end
|
324
|
-
result
|
325
|
-
end
|
326
|
-
|
327
|
-
def - other
|
328
|
-
result = self.dup
|
329
|
-
if (other.kind_of? CharacterSet)
|
330
|
-
for interval in other.intervals
|
331
|
-
result.delete_interval interval.first, interval.last
|
332
|
-
end
|
333
|
-
else
|
334
|
-
other.to_s.each_byte do |byte|
|
335
|
-
result.delete_interval byte
|
336
|
-
end
|
337
|
-
end
|
338
|
-
result
|
339
|
-
end
|
340
|
-
|
341
|
-
def initialize *intervals
|
342
|
-
@intervals = intervals
|
343
|
-
end
|
344
|
-
|
345
|
-
def << obj
|
346
|
-
obj.kind_of?(Range) ? add_interval(obj.first, obj.last) : obj.to_s.each_byte {|x| add_interval x}
|
347
|
-
end
|
348
|
-
|
349
|
-
def delete obj
|
350
|
-
case obj
|
351
|
-
when Range
|
352
|
-
delete_interval obj.first, obj.last
|
353
|
-
when Aurum::CharacterSet
|
354
|
-
obj.intervals.each {|interval| delete_interval interval.first, interval.last}
|
355
|
-
else
|
356
|
-
obj.to_s.each_byte {|x| delete_interval x}
|
357
|
-
end
|
358
|
-
end
|
359
|
-
|
360
|
-
def include? char
|
361
|
-
@intervals.any? {|x| x.include? char}
|
362
|
-
end
|
363
|
-
|
364
|
-
def empty?
|
365
|
-
return @intervals.empty?
|
366
|
-
end
|
367
|
-
|
368
|
-
def to_points destination
|
369
|
-
@intervals.inject [] do |points, interval|
|
370
|
-
points << Point.new(interval.first, true, destination)
|
371
|
-
points << Point.new(interval.last, false, destination)
|
372
|
-
end
|
373
|
-
end
|
374
|
-
|
375
|
-
def dup
|
376
|
-
intervals = []
|
377
|
-
for interval in @intervals
|
378
|
-
intervals << interval.dup
|
379
|
-
end
|
380
|
-
CharacterSet.new *intervals
|
381
|
-
end
|
382
|
-
|
383
|
-
protected
|
384
|
-
def add_interval first, last = first
|
385
|
-
interval = Interval.new first, last
|
386
|
-
@intervals << interval unless @intervals.any? {|x| x.merge! interval}
|
387
|
-
end
|
388
|
-
|
389
|
-
def delete_interval first, last = first
|
390
|
-
interval = Interval.new first, last
|
391
|
-
return unless to_be_replaced = @intervals.find {|x| x.include?(interval.first) || x.include?(interval.last)}
|
392
|
-
@intervals.delete to_be_replaced
|
393
|
-
add_new_interval to_be_replaced.first, interval.first - 1
|
394
|
-
add_new_interval interval.last + 1, to_be_replaced.last
|
395
|
-
end
|
396
|
-
|
397
|
-
def add_new_interval first, last
|
398
|
-
@intervals << Interval.new(first, last) if first <= last
|
399
|
-
end
|
400
|
-
|
401
|
-
Interval, Point = Struct.new(:first, :last), Struct.new(:char, :is_start, :destination)
|
402
|
-
|
403
|
-
Interval.class_eval do
|
404
|
-
def initialize first, last = first
|
405
|
-
super first, last
|
406
|
-
end
|
407
|
-
|
408
|
-
def include? char
|
409
|
-
char = char[0] if char.kind_of? String
|
410
|
-
self.first <= char && char <= self.last
|
411
|
-
end
|
412
|
-
|
413
|
-
def merge! other
|
414
|
-
if include?(other.first) || include?(other.last) || other.first - self.last == 1 || self.first - other.last == 1
|
415
|
-
self.first = [self.first, other.first].min
|
416
|
-
self.last = [self.last, other.last].max
|
417
|
-
return true;
|
418
|
-
end
|
419
|
-
false
|
420
|
-
end
|
421
|
-
|
422
|
-
def to_char_set
|
423
|
-
CharacterSet.new self
|
424
|
-
end
|
425
|
-
end
|
426
|
-
end
|
427
|
-
|
428
|
-
Epsilon = CharacterSet.new
|
429
|
-
end
|