stamina 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. data/.gemtest +0 -0
  2. data/CHANGELOG.md +22 -0
  3. data/Gemfile +2 -0
  4. data/Gemfile.lock +33 -0
  5. data/LICENCE.md +22 -0
  6. data/Manifest.txt +16 -0
  7. data/README.md +78 -0
  8. data/Rakefile +23 -0
  9. data/bin/adl2dot +12 -0
  10. data/bin/classify +12 -0
  11. data/bin/redblue +12 -0
  12. data/bin/rpni +12 -0
  13. data/example/adl/automaton.adl +49 -0
  14. data/example/adl/sample.adl +53 -0
  15. data/example/basic/characteristic_sample.adl +32 -0
  16. data/example/basic/target.adl +9 -0
  17. data/example/competition/31_test.adl +1500 -0
  18. data/example/competition/31_training.adl +1759 -0
  19. data/lib/stamina.rb +19 -0
  20. data/lib/stamina/adl.rb +298 -0
  21. data/lib/stamina/automaton.rb +1237 -0
  22. data/lib/stamina/automaton/walking.rb +336 -0
  23. data/lib/stamina/classifier.rb +37 -0
  24. data/lib/stamina/command/adl2dot_command.rb +73 -0
  25. data/lib/stamina/command/classify_command.rb +57 -0
  26. data/lib/stamina/command/redblue_command.rb +58 -0
  27. data/lib/stamina/command/rpni_command.rb +58 -0
  28. data/lib/stamina/command/stamina_command.rb +79 -0
  29. data/lib/stamina/errors.rb +20 -0
  30. data/lib/stamina/induction/commons.rb +170 -0
  31. data/lib/stamina/induction/redblue.rb +264 -0
  32. data/lib/stamina/induction/rpni.rb +188 -0
  33. data/lib/stamina/induction/union_find.rb +377 -0
  34. data/lib/stamina/input_string.rb +123 -0
  35. data/lib/stamina/loader.rb +0 -0
  36. data/lib/stamina/markable.rb +42 -0
  37. data/lib/stamina/sample.rb +190 -0
  38. data/lib/stamina/version.rb +14 -0
  39. data/stamina.gemspec +190 -0
  40. data/stamina.noespec +35 -0
  41. data/tasks/debug_mail.rake +78 -0
  42. data/tasks/debug_mail.txt +13 -0
  43. data/tasks/gem.rake +68 -0
  44. data/tasks/spec_test.rake +79 -0
  45. data/tasks/unit_test.rake +77 -0
  46. data/tasks/yard.rake +51 -0
  47. data/test/stamina/adl_test.rb +491 -0
  48. data/test/stamina/automaton_additional_test.rb +190 -0
  49. data/test/stamina/automaton_classifier_test.rb +155 -0
  50. data/test/stamina/automaton_test.rb +1092 -0
  51. data/test/stamina/automaton_to_dot_test.rb +64 -0
  52. data/test/stamina/automaton_walking_test.rb +206 -0
  53. data/test/stamina/exit.rb +3 -0
  54. data/test/stamina/induction/induction_test.rb +70 -0
  55. data/test/stamina/induction/redblue_mergesamestatebug_expected.adl +19 -0
  56. data/test/stamina/induction/redblue_mergesamestatebug_pta.dot +64 -0
  57. data/test/stamina/induction/redblue_mergesamestatebug_sample.adl +9 -0
  58. data/test/stamina/induction/redblue_test.rb +83 -0
  59. data/test/stamina/induction/redblue_universal_expected.adl +4 -0
  60. data/test/stamina/induction/redblue_universal_sample.adl +5 -0
  61. data/test/stamina/induction/rpni_inria_expected.adl +7 -0
  62. data/test/stamina/induction/rpni_inria_sample.adl +9 -0
  63. data/test/stamina/induction/rpni_test.rb +129 -0
  64. data/test/stamina/induction/rpni_test_pta.dot +22 -0
  65. data/test/stamina/induction/rpni_universal_expected.adl +4 -0
  66. data/test/stamina/induction/rpni_universal_sample.adl +4 -0
  67. data/test/stamina/induction/union_find_test.rb +124 -0
  68. data/test/stamina/input_string_test.rb +323 -0
  69. data/test/stamina/markable_test.rb +70 -0
  70. data/test/stamina/randdfa.adl +66 -0
  71. data/test/stamina/sample.adl +4 -0
  72. data/test/stamina/sample_classify_test.rb +149 -0
  73. data/test/stamina/sample_test.rb +218 -0
  74. data/test/stamina/small_dfa.dot +16 -0
  75. data/test/stamina/small_dfa.gif +0 -0
  76. data/test/stamina/small_nfa.dot +18 -0
  77. data/test/stamina/small_nfa.gif +0 -0
  78. data/test/stamina/stamina_test.rb +69 -0
  79. data/test/test_all.rb +7 -0
  80. metadata +279 -0
@@ -0,0 +1,19 @@
1
+ module Stamina
2
+
3
+ end
4
+ require 'stamina/version'
5
+ require 'stamina/loader'
6
+ require 'set'
7
+ require 'enumerator'
8
+ require 'stringio'
9
+ require 'stamina/errors'
10
+ require 'stamina/markable'
11
+ require 'stamina/adl'
12
+ require 'stamina/sample'
13
+ require 'stamina/input_string'
14
+ require 'stamina/classifier'
15
+ require 'stamina/automaton'
16
+ require 'stamina/induction/union_find'
17
+ require 'stamina/induction/commons'
18
+ require "stamina/induction/rpni"
19
+ require "stamina/induction/redblue"
@@ -0,0 +1,298 @@
1
+ module Stamina
2
+ #
3
+ # Automaton Description Language module. This module provides parsing and
4
+ # printing methods for automata and samples. Documentation of the file format
5
+ # used for an automaton is given in parse_automaton; file format for samples is
6
+ # documented in parse_sample.
7
+ #
8
+ # Methods of this module are not intended to be included by a class but invoked
9
+ # on the module instead:
10
+ #
11
+ # begin
12
+ # dfa = Stamina::ADL.parse_automaton_file("my_automaton.adl")
13
+ # rescue ADL::ParseError => ex
14
+ # puts "Oops, the ADL automaton file seems corrupted..."
15
+ # end
16
+ #
17
+ # == Detailed API
18
+ module ADL
19
+
20
+ #################################################################################
21
+ # Automaton Section #
22
+ #################################################################################
23
+
24
+ #
25
+ # Parses a given automaton description and returns an Automaton instance.
26
+ #
27
+ # Raises:
28
+ # - ArgumentError unless _descr_ is an IO object or a String.
29
+ # - ADL::ParseError if the ADL automaton format is not respected.
30
+ #
31
+ # ADL provides a really simple grammar to describe automata. Here is a succint
32
+ # example (full documentation of the ADL automaton grammar can be found in
33
+ # the self-documenting example/adl/automaton.adl file).
34
+ #
35
+ # # Some header comments: tool which has generated this automaton,
36
+ # # maybe a date or other tool options ...
37
+ # # here: 'this automaton accepts the a(ba)* regular language'
38
+ # 2 2
39
+ # 0 true false
40
+ # 1 false true
41
+ # 0 1 a
42
+ # 1 0 b
43
+ #
44
+ def self.parse_automaton(descr)
45
+ automaton = nil
46
+ ADL::to_io(descr) do |io|
47
+ state_count, edge_count = nil, nil
48
+ state_read, edge_read = 0, 0
49
+ states = {}
50
+ mode = :header
51
+
52
+ automaton = Automaton.new do |fa|
53
+ # parse each description line
54
+ line_number = 1
55
+ io.each_line do |l|
56
+ index = l.index('#')
57
+ l = l[0,index] if index
58
+ l = l.strip
59
+ next if l.empty? or l[0,1]=='#'
60
+
61
+ case mode
62
+ when :header
63
+ # looking for |state_count edge_count|
64
+ raise(ADL::ParseError,
65
+ "Parse error line #{line_number}: 'state_count edge_count' expected, "\
66
+ "'#{l}' found.") unless /^(\d+)\s+(\d+)$/ =~ l
67
+ state_count, edge_count = $1.to_i, $2.to_i
68
+ mode = :states
69
+
70
+ when :states
71
+ # looking for |number initial accepting|
72
+ raise(ADL::ParseError,
73
+ "Parse error line #{line_number}: state definition expected, "\
74
+ "'#{l}' found.") unless /^(\S+)\s+(true|false)\s+(true|false)$/ =~ l
75
+ id, initial, accepting = $1, $2, $3
76
+ initial, accepting = ("true"==initial), ("true"==accepting)
77
+
78
+ state = fa.add_state(:initial => initial, :accepting => accepting)
79
+ state[:name]=id.to_s
80
+ states[id] = state
81
+
82
+ state_read += 1
83
+ mode = (edge_count==0 ? :end : :edges) if state_read==state_count
84
+
85
+ when :edges
86
+ # looking for |source target symbol|
87
+ raise(ADL::ParseError,
88
+ "Parse error line #{line_number}: edge definition expected, "\
89
+ "'#{l}' found.") unless /^(\S+)\s+(\S+)\s+(\S+)$/ =~ l
90
+ source, target, symbol = $1, $2, $3
91
+ raise(ADL::ParseError,
92
+ "Parse error line #{line_number}: no such state #{source}") \
93
+ unless states[source]
94
+ raise(ADL::ParseError,
95
+ "Parse error line #{line_number}: no such state #{target}") \
96
+ unless states[target]
97
+
98
+ fa.connect(states[source], states[target], {:symbol => symbol})
99
+
100
+ edge_read += 1
101
+ mode = :end if edge_read==edge_count
102
+
103
+ when :end
104
+ raise(ADL::ParseError,
105
+ "Parse error line #{line_number}: trailing data found '#{l}")
106
+
107
+ end # case mode
108
+
109
+ line_number += 1
110
+ end
111
+
112
+ raise(ADL::ParseError, "Parse error: #{state_count} states annouced, "\
113
+ "#{state_read} found.") if state_count != state_read
114
+ raise(ADL::ParseError, "Parse error: #{edge_count} edges annouced, "\
115
+ "#{edge_read} found.") if edge_count != edge_read
116
+
117
+ end # Automaton.new
118
+ end
119
+ return automaton
120
+ end # def self.parse
121
+
122
+ #
123
+ # Parses an automaton file _f_.
124
+ #
125
+ # Shortcut for:
126
+ # File.open(f, 'r') do |io|
127
+ # Stamina::ADL.parse_automaton(io)
128
+ # end
129
+ #
130
+ def self.parse_automaton_file(f)
131
+ automaton = nil
132
+ File.open(f) do |file|
133
+ automaton = ADL::parse_automaton(file)
134
+ end
135
+ automaton
136
+ end
137
+
138
+ #
139
+ # Prints an automaton to a buffer (responding to <code>:&lt;&lt;</code>) in ADL
140
+ # format. Returns the buffer itself.
141
+ #
142
+ def self.print_automaton(fa, buffer="")
143
+ buffer << "#{fa.state_count.to_s} #{fa.edge_count.to_s}" << "\n"
144
+ fa.states.each do |s|
145
+ buffer << "#{s.index.to_s} #{s.initial?} #{s.accepting?}" << "\n"
146
+ end
147
+ fa.edges.each do |e|
148
+ buffer << "#{e.source.index.to_s} #{e.target.index.to_s} #{e.symbol.to_s}" << "\n"
149
+ end
150
+ buffer
151
+ end
152
+
153
+ #
154
+ # Prints an automaton to a file whose path is provided.
155
+ #
156
+ # Shortcut for:
157
+ # File.open(file, 'w') do |io|
158
+ # print_automaton(fa, io)
159
+ # end
160
+ #
161
+ def self.print_automaton_to_file(fa, file)
162
+ File.open(file, 'w') do |io|
163
+ print_automaton(fa, io)
164
+ end
165
+ end
166
+
167
+ #################################################################################
168
+ # String and Sample Section #
169
+ #################################################################################
170
+
171
+ #
172
+ # Parses an input string _str_ and returns a InputString instance. Format of
173
+ # input strings is documented in parse_sample. _str_ is required to be a ruby
174
+ # String.
175
+ #
176
+ # Raises:
177
+ # - ADL::ParseError if the ADL string format is not respected.
178
+ #
179
+ def self.parse_string(str)
180
+ symbols = str.split(' ')
181
+ case symbols[0]
182
+ when '+'
183
+ symbols.shift
184
+ InputString.new symbols, true, false
185
+ when '-'
186
+ symbols.shift
187
+ InputString.new symbols, false, false
188
+ when '?'
189
+ symbols.shift
190
+ InputString.new symbols, nil, false
191
+ else
192
+ raise ADL::ParseError, "Invalid string format #{str}", caller
193
+ end
194
+ end
195
+
196
+ #
197
+ # Parses the sample provided by _descr_. When a block is provided, yields it with
198
+ # InputString instances and ignores the sample argument. Otherwise, fills the sample
199
+ # (any object responding to <code><<</code>) with string, creating a fresh new
200
+ # one (as a Sample instance) if sample is nil.
201
+ #
202
+ # ADL provides a really simple grammar to describe samples (here is a succint
203
+ # example, the full documentation of the sample grammar can be found in the
204
+ # self-documenting example/adl/sample.adl file):
205
+ #
206
+ # #
207
+ # # Some header comments: tool which has generated this sample,
208
+ # # maybe a date or other tool options ...
209
+ # # here: 'this sample is caracteristic for the a(ba)* regular language'
210
+ # #
211
+ # # Positive, Negative, Unlabeled strings become with +, -, ?, respectively
212
+ # # Empty lines and lines becoming with # are simply ignored.
213
+ # #
214
+ # -
215
+ # + a
216
+ # - a b
217
+ # + a b a
218
+ #
219
+ # Raises:
220
+ # - ArgumentError unless _descr_ argument is an IO object or a String.
221
+ # - ADL::ParseError if the ADL sample format is not respected.
222
+ # - InconsistencyError if the sample is not consistent (see Sample)
223
+ #
224
+ def self.parse_sample(descr, sample=nil)
225
+ sample = Sample.new if (sample.nil? and not block_given?)
226
+ ADL::to_io(descr) do |io|
227
+ io.each_line do |l|
228
+ l = l.strip
229
+ next if l.empty? or l[0,1]=='#'
230
+ if sample.nil? and block_given?
231
+ yield parse_string(l)
232
+ else
233
+ sample << parse_string(l)
234
+ end
235
+ end
236
+ end
237
+ sample
238
+ end
239
+
240
+ #
241
+ # Parses an automaton file _f_.
242
+ #
243
+ # Shortuct for:
244
+ # File.open(f) do |file|
245
+ # sample = ADL::parse_sample(file, sample)
246
+ # end
247
+ #
248
+ def self.parse_sample_file(f, sample=nil)
249
+ File.open(f) do |file|
250
+ sample = ADL::parse_sample(file, sample)
251
+ end
252
+ sample
253
+ end
254
+
255
+ #
256
+ # Prints a sample in ADL format on a buffer. Sample argument is expected to be
257
+ # an object responding to each, yielding InputString instances. Buffer is expected
258
+ # to be an object responding to <code><<</code>.
259
+ #
260
+ def self.print_sample(sample, buffer="")
261
+ sample.each do |str|
262
+ buffer << str.to_s << "\n"
263
+ end
264
+ end
265
+
266
+ #
267
+ # Prints a sample in a file.
268
+ #
269
+ # Shortcut for:
270
+ # File.open(file, 'w') do |io|
271
+ # print_sample(sample, f)
272
+ # end
273
+ #
274
+ def self.print_sample_in_file(sample, file)
275
+ File.open(file, 'w') do |f|
276
+ print_sample(sample, f)
277
+ end
278
+ end
279
+
280
+ ### private section ##########################################################
281
+ private
282
+
283
+ #
284
+ # Converts a parsable argument to an IO object or raises an ArgumentError.
285
+ #
286
+ def self.to_io(descr)
287
+ case descr
288
+ when IO
289
+ yield descr
290
+ when String
291
+ yield StringIO.new(descr)
292
+ else
293
+ raise ArgumentError, "IO instance expected, #{descr.class} received", caller
294
+ end
295
+ end
296
+
297
+ end # module ADL
298
+ end # module Stamina
@@ -0,0 +1,1237 @@
1
+ module Stamina
2
+
3
+ #
4
+ # Automaton data-structure.
5
+ #
6
+ # == Examples
7
+ # The following example uses a lot of useful DRY shortcuts, so, if it does not
8
+ # fit you needs then, read on!):
9
+ #
10
+ # # Building an automaton for the regular language a(ba)*
11
+ # fa = Automaton.new do
12
+ # add_state(:initial => true)
13
+ # add_state(:accepting => true)
14
+ # connect(0,1,'a')
15
+ # connect(1,0,'b')
16
+ # end
17
+ #
18
+ # # It accepts 'a b a b a', rejects 'a b' as well as ''
19
+ # puts fa.accepts?('? a b a b a') # prints true
20
+ # puts fa.accepts?('? a b') # prints false
21
+ # puts fa.rejects?('?') # prints true
22
+ #
23
+ # == Four things you need to know
24
+ # 1. Automaton, State and Edge classes implement a Markable design pattern, that
25
+ # is, you can read and write any key/value pair you want on them using the []
26
+ # and []= operators. Note that the following keys are used by Stamina itself,
27
+ # with the obvious semantics (for automata and transducers):
28
+ # - <tt>:initial</tt>, <tt>:accepting</tt>, <tt>:error</tt> on State;
29
+ # expected to be _true_ or _false_ (_nil_ and ommitted are considered as false).
30
+ # Shortcuts for querying and setting these attributes are provided by State.
31
+ # - <tt>:symbol</tt> on Edge, with shortcuts as well on Edge.
32
+ # The convention is to use _nil_ for the epsilon symbol (aka non observable)
33
+ # on non deterministic automata.
34
+ # The following keys are reserved for future extensions:
35
+ # - <tt>:output</tt> on State and Edge.
36
+ # - <tt>:short_prefix</tt> on State.
37
+ # See also the "About states and edges" subsection of the design choices.
38
+ # 2. Why using State methods State#step and State#delta ? The Automaton class includes
39
+ # the Walking module by default, which is much more powerful !
40
+ # 3. The constructor of this class executes the argument block (between <tt>do</tt>
41
+ # and <tt>end</tt>) with instance_eval by default. You won't be able to invoke
42
+ # the methods defined in the scope of your block in such a case. See new
43
+ # for details.
44
+ # 4. This class has not been designed with efficiency in mind. If you experiment
45
+ # performance problems, read the "About Automaton modifications" sub section
46
+ # of the design choices.
47
+ #
48
+ # == Design choices
49
+ # This section fully details the design choices that has been made for the
50
+ # implementation of the Automaton data structure used by Stamina. It is provided
51
+ # because Automaton is one of the core classes of Stamina, that probably all
52
+ # users (and contributors) will use. Automaton usage is really user-friendly,
53
+ # so <b>you are normally not required</b> to read this section in the first
54
+ # place ! Read it only if of interest for you, or if you experiment unexpected
55
+ # results.
56
+ #
57
+ # === One Automaton class only
58
+ # One class only implements all kinds of automata: deterministic, non-deterministic,
59
+ # transducers, prefix-tree-acceptors, etc. The Markable design pattern on states and
60
+ # edges should allow you to make anything you could find useful with this class.
61
+ #
62
+ # === Adjacency-list graph
63
+ # This class implements an automaton using a adjacent-list graph structure.
64
+ # The automaton has state and edge array lists and exposes them through the
65
+ # _states_ and _edges_ accessors. In order to let users enjoy the enumerability
66
+ # of Ruby's arrays while allowing automata to be modified, these arrays are
67
+ # externaly modifiable. However, <b>users are not expected to modify them!</b>
68
+ # and future versions of Stamina will certainly remove this ability.
69
+ #
70
+ # === Indices exposed
71
+ # State and Edge indices in these arrays are exposed by this class. Unless stated
72
+ # explicitely, all methods taking state or edge arguments support indices as well.
73
+ # Moreover, ith_state, ith_states, ith_edge and ith_edges methods provide powerful
74
+ # access to states and edges by indices. All these methods are robust to invalid
75
+ # indices (and raise an IndexError if incorrectly invoked) but do not allow
76
+ # negative indexing (unlike ruby arrays).
77
+ #
78
+ # States and edges know their index in the corresponding array and expose them
79
+ # through the (read-only) _index_ accessor. These indices are always valid;
80
+ # without deletion of states or edges in the automaton, they are guaranteed not
81
+ # to change. Indices saved in your own variables must be considered deprecated
82
+ # each time you perform a deletion ! That's the only rule to respect if you plan
83
+ # to use indices.
84
+ #
85
+ # Indices exposition may seem a strange choice and could be interpreted as
86
+ # breaking OOP's best practice. You are not required to use them but, as will
87
+ # quiclky appear, using them is really powerful and leads to beautiful code!
88
+ # If you don't remove any state or edge, this class guarantees that indices
89
+ # are assigned in the same order as invocations of add_state and add_edge (as
90
+ # well as their plural forms and aliases).
91
+ #
92
+ # === About states and edges
93
+ # Edges know their source and target states, which are exposed through the
94
+ # _source_ and _target_ (read-only) accessors (also aliased as _from_ and _to_).
95
+ # States keep their incoming and outgoing edges in arrays, which are accessible
96
+ # (in fact, a copy) using State#in_edges and State#out_edges. If you use them
97
+ # for walking the automaton in a somewhat standard way, consider using the Walking
98
+ # module instead!
99
+ #
100
+ # Common attributes of states and edges are installed using the Markable pattern
101
+ # itself:
102
+ # - <tt>:initial</tt>, <tt>:accepting</tt> and <tt>:error</tt> on states. These
103
+ # attributes are expected to be _true_ or _false_ (_nil_ and ommitted are also
104
+ # supported and both considered as false).
105
+ # - <tt>:symbol</tt> on edges. Any object you want as long as it responds to the
106
+ # <tt><=></tt> operator. Also, the convention is to use _nil_ for the epsilon
107
+ # symbol (aka non observable) on non deterministic automata.
108
+ #
109
+ # In addition, useful shortcuts are available:
110
+ # - <tt>s.initial?</tt> is a shortcut for <tt>s[:initial]</tt> if _s_ is a State
111
+ # - <tt>s.initial!</tt> is a shortcut for <tt>s[:initial]=true</tt> if _s_ is a State
112
+ # - Similar shortcuts are available for :accepting and :error
113
+ # - <tt>e.symbol</tt> is a shortcut for <tt>e[:symbol]</tt> if _e_ is an Edge
114
+ # - <tt>e.symbol='a'</tt> is a shortcut for <tt>e[:symbol]='a'</tt> if _e_ is an Edge
115
+ #
116
+ # Following keys should be considered reserved by Stamina for future extensions:
117
+ # - <tt>:output</tt> on State and Edge.
118
+ # - <tt>:short_prefix</tt> on State.
119
+ #
120
+ # === About Automaton modifications
121
+ # This class has not been implemented with efficiency in mind. In particular, we expect
122
+ # the vast majority of Stamina core algorithms considering automata as immutable values.
123
+ # For this reason, the Automaton class does not handle modifications really efficiently.
124
+ #
125
+ # So, if you experiment performance problems, consider what follows:
126
+ # 1. Why updating an automaton ? Building a fresh one is much more clean and efficient !
127
+ # This is particularly true for removals.
128
+ # 2. If you can create multiples states or edges at once, consider the plural form
129
+ # of the modification methods: add_n_states and drop_states. Those methods are
130
+ # optimized for multiple updates.
131
+ #
132
+ # == Detailed API
133
+ class Automaton
134
+ include Stamina::Markable
135
+
136
+ #
137
+ # Automaton state.
138
+ #
139
+ class State
140
+ include Stamina::Markable
141
+ attr_reader :automaton, :index
142
+
143
+ #
144
+ # Creates a state.
145
+ #
146
+ # Arguments:
147
+ # - automaton: parent automaton of the state.
148
+ # - index: index of the state in the state list.
149
+ # - data: user data attached to this state.
150
+ #
151
+ def initialize(automaton, index, data)
152
+ @automaton = automaton
153
+ @index = index
154
+ @data = data.dup
155
+ @out_edges = []
156
+ @in_edges = []
157
+ @epsilon_closure = nil
158
+ end
159
+
160
+ ### public read-only section ###############################################
161
+ public
162
+
163
+ #
164
+ # Returns true if this state is an initial state, false otherwise.
165
+ #
166
+ def initial?() return false unless @data[:initial]; @data[:initial] end
167
+
168
+ #
169
+ # Sets this state as an initial state.
170
+ #
171
+ def initial!() @data[:initial] = true end
172
+
173
+ #
174
+ # Returns true if this state is an accepting state, false otherwise.
175
+ #
176
+ def accepting?() return false unless @data[:accepting]; @data[:accepting] end
177
+
178
+ #
179
+ # Sets this state as an accepting state.
180
+ #
181
+ def accepting!() @data[:accepting] = true end
182
+
183
+ #
184
+ # Returns true if this state is an error state, false otherwise.
185
+ #
186
+ def error?() return false unless @data[:error]; @data[:error] end
187
+
188
+ #
189
+ # Sets this state as an error state.
190
+ #
191
+ def error!() @data[:error] = true end
192
+
193
+ #
194
+ # Returns true if this state is deterministic, false otherwise.
195
+ #
196
+ def deterministic?
197
+ outs = out_symbols
198
+ (outs.size==@out_edges.size) and not(outs.include?(nil))
199
+ end
200
+
201
+ #
202
+ # Returns an array containing all incoming edges of the state. Edges are
203
+ # sorted if _sorted_ is set to true. If two incoming edges have same symbol
204
+ # no order is guaranteed between them.
205
+ #
206
+ # Returned array may be modified.
207
+ #
208
+ def in_edges(sorted=false)
209
+ sorted ? @in_edges.sort : @in_edges.dup
210
+ end
211
+
212
+ #
213
+ # Returns an array containing all outgoing edges of the state. Edges are
214
+ # sorted if _sorted_ is set to true. If two outgoing edges have same symbol
215
+ # no order is guaranteed between them.
216
+ #
217
+ # Returned array may be modified.
218
+ #
219
+ def out_edges(sorted=false)
220
+ sorted ? @out_edges.sort : @out_edges.dup
221
+ end
222
+
223
+ #
224
+ # Returns an array with the different symbols appearing on incoming edges.
225
+ # Returned array does not contain duplicates. Symbols are sorted in the
226
+ # array if _sorted_ is set to true.
227
+ #
228
+ # Returned array may be modified.
229
+ #
230
+ def in_symbols(sorted=false)
231
+ symbols = @in_edges.collect{|e| e.symbol}.uniq
232
+ return sorted ? (symbols.sort &automaton.symbols_comparator) : symbols
233
+ end
234
+
235
+ #
236
+ # Returns an array with the different symbols appearing on outgoing edges.
237
+ # Returned array does not contain duplicates. Symbols are sorted in the
238
+ # array if _sorted_ is set to true.
239
+ #
240
+ # Returned array may be modified.
241
+ #
242
+ def out_symbols(sorted=false)
243
+ symbols = @out_edges.collect{|e| e.symbol}.uniq
244
+ return sorted ? (symbols.sort &automaton.symbols_comparator) : symbols
245
+ end
246
+
247
+ #
248
+ # Returns an array with adjacent states (in or out edge).
249
+ #
250
+ # Returned array may be modified.
251
+ #
252
+ def adjacent_states()
253
+ (in_adjacent_states+out_adjacent_states).uniq
254
+ end
255
+
256
+ #
257
+ # Returns an array with adjacent states along an incoming edge (without
258
+ # duplicates).
259
+ #
260
+ # Returned array may be modified.
261
+ #
262
+ def in_adjacent_states()
263
+ (@in_edges.collect {|e| e.source}).uniq
264
+ end
265
+
266
+ #
267
+ # Returns an array with adjacent states along an outgoing edge (whithout
268
+ # duplicates).
269
+ #
270
+ # Returned array may be modified.
271
+ #
272
+ def out_adjacent_states()
273
+ (@out_edges.collect {|e| e.target}).uniq
274
+ end
275
+
276
+ #
277
+ # Returns reachable states from this one with an input _symbol_. Returned
278
+ # array does not contain duplicates and may be modified. This method if not
279
+ # epsilon symbol aware.
280
+ #
281
+ def step(symbol)
282
+ @out_edges.select{|e| e.symbol==symbol}.collect{|e| e.target}
283
+ end
284
+
285
+ #
286
+ # Returns the state reached from this one with an input _symbol_, or nil if
287
+ # no such state. This method is not epsilon symbol aware. Moreover it is
288
+ # expected to be used on deterministic states only. If the state is not
289
+ # deterministic, the method returns one reachable state if such a state
290
+ # exists; which one is returned must be considered non deterministic.
291
+ #
292
+ def dfa_step(symbol)
293
+ @out_edges.each {|e| return e.target if e.symbol==symbol}
294
+ nil
295
+ end
296
+
297
+ #
298
+ # Computes the epsilon closure of this state. Epsilon closure is the set of
299
+ # all states reached from this one with a <tt>eps*</tt> input (sequence of
300
+ # zero or more epsilon symbols). The current state is always contained in
301
+ # the epsilon closure. Returns an unsorted array without duplicates; this
302
+ # array may not be modified.
303
+ #
304
+ def epsilon_closure()
305
+ @epsilon_closure ||= compute_epsilon_closure(Set.new).to_a.freeze
306
+ end
307
+
308
+ #
309
+ # Internal implementation of epsilon_closure. _result_ is expected to be
310
+ # a Set instance, is modified and is the returned value.
311
+ #
312
+ def compute_epsilon_closure(result)
313
+ result << self
314
+ step(nil).each do |t|
315
+ t.compute_epsilon_closure(result) unless result.include?(t)
316
+ end
317
+ raise if result.nil?
318
+ return result
319
+ end
320
+
321
+ #
322
+ # Computes an array representing the set of states that can be reached from
323
+ # this state with a given input _symbol_. Returned array does not contain
324
+ # duplicates and may be modified. No particular ordering of states in the
325
+ # array is guaranteed.
326
+ #
327
+ # This method is epsilon symbol aware (represented with nil) on non
328
+ # deterministic automata, meaning that it actually computes the set of
329
+ # reachable states through strings respecting the <tt>eps* symbol eps*</tt>
330
+ # regular expression, where eps is the epsilon symbol.
331
+ #
332
+ def delta(symbol)
333
+ if automaton.deterministic?
334
+ target = dfa_delta(symbol)
335
+ target.nil? ? [] : [target]
336
+ else
337
+ # 1) first compute epsilon closure of self
338
+ at_epsilon = epsilon_closure
339
+
340
+ # 2) now, look where we can go from there
341
+ at_espilon_then_symbol = at_epsilon.collect do |s|
342
+ s.step(symbol)
343
+ end.flatten.uniq
344
+
345
+ # 3) look where we can go from there using epsilon
346
+ result = at_espilon_then_symbol.collect do |s|
347
+ s.epsilon_closure
348
+ end.flatten.uniq
349
+
350
+ # return result as an array
351
+ result
352
+ end
353
+ end
354
+
355
+ #
356
+ # Returns the target state that can be reached from this state with _symbol_
357
+ # input. Returns nil if no such state exists.
358
+ #
359
+ # This method is expected to be used on deterministic automata. Unlike delta,
360
+ # it returns a State instance (or nil), not an array of states. When used on
361
+ # non deterministic automata, it returns a state immediately reachable from
362
+ # this state with _symbol_ input, or nil if no such state exists. This
363
+ # method is not epsilon aware.
364
+ #
365
+ def dfa_delta(symbol)
366
+ return nil if symbol.nil?
367
+ @out_edges.each {|e| return e.target if e.symbol==symbol}
368
+ return nil
369
+ end
370
+
371
+ #
372
+ # Provides comparator of states, based on the index in the automaton state
373
+ # list. This method returns nil unless _o_ is a State from the same
374
+ # automaton than self.
375
+ #
376
+ def <=>(o)
377
+ return nil unless State===o
378
+ return nil unless automaton===o.automaton
379
+ return index <=> o.index
380
+ end
381
+
382
+ # Returns a string representation
383
+ def inspect
384
+ 's' << @index.to_s
385
+ end
386
+
387
+ # Returns a string representation
388
+ def to_s
389
+ 's' << @index.to_s
390
+ end
391
+
392
+ ### protected write section ################################################
393
+ protected
394
+
395
+ # Changes the index of this state in the state list. This method is only
396
+ # expected to be used by the automaton itself.
397
+ def index=(i) @index=i end
398
+
399
+ #
400
+ # Fired by Loaded when a user data is changed. The message is forwarded to
401
+ # the automaton.
402
+ #
403
+ def state_changed(what, description)
404
+ @epsilon_closure = nil
405
+ @automaton.send(:state_changed, what, description)
406
+ end
407
+
408
+ # Adds an incoming edge to the state.
409
+ def add_incoming_edge(edge)
410
+ @epsilon_closure = nil
411
+ @in_edges << edge
412
+ end
413
+
414
+ # Adds an outgoing edge to the state.
415
+ def add_outgoing_edge(edge)
416
+ @epsilon_closure = nil
417
+ @out_edges << edge
418
+ end
419
+
420
+ # Adds an incoming edge to the state.
421
+ def drop_incoming_edge(edge)
422
+ @epsilon_closure = nil
423
+ @in_edges.delete(edge)
424
+ end
425
+
426
+ # Adds an outgoing edge to the state.
427
+ def drop_outgoing_edge(edge)
428
+ @epsilon_closure = nil
429
+ @out_edges.delete(edge)
430
+ end
431
+
432
+ protected :compute_epsilon_closure
433
+ end
434
+
435
+ #
436
+ # Automaton edge.
437
+ #
438
+ class Edge
439
+ include Stamina::Markable
440
+ attr_reader :automaton, :index, :from, :to
441
+
442
+ #
443
+ # Creates an edge.
444
+ #
445
+ # Arguments:
446
+ # - automaton: parent automaton of the edge.
447
+ # - index: index of the edge in the edge list.
448
+ # - data: user data attached to this edge.
449
+ # - from: source state of the edge.
450
+ # - to: target state of the edge.
451
+ #
452
+ def initialize(automaton, index, data, from, to)
453
+ @automaton, @index = automaton, index
454
+ @data = data
455
+ @from, @to = from, to
456
+ end
457
+
458
+ # Returns edge symbol.
459
+ def symbol() @data[:symbol] end
460
+
461
+ # Sets edge symbol.
462
+ def symbol=(symbol) @data[:symbol]=symbol end
463
+
464
+ alias :source :from
465
+ alias :target :to
466
+
467
+ #
468
+ # Provides comparator of edges, based on the index in the automaton edge
469
+ # list. This method returns nil unless _o_ is an Edge from the same
470
+ # automaton than self.
471
+ # Once again, this method has nothing to do with equality, it looks at an
472
+ # index and ID only.
473
+ #
474
+ def <=>(o)
475
+ return nil unless Edge===o
476
+ return nil unless automaton===o.automaton
477
+ return index <=> o.index
478
+ end
479
+
480
+ # Returns a string representation
481
+ def inspect
482
+ 'e' << @index.to_s
483
+ end
484
+
485
+ # Returns a string representation
486
+ def to_s
487
+ 'e' << @index.to_s
488
+ end
489
+
490
+ ### protected write section ################################################
491
+ protected
492
+
493
+ # Changes the index of this edge in the edge list. This method is only
494
+ # expected to be used by the automaton itself.
495
+ def index=(i) @index=i end
496
+
497
+ #
498
+ # Fired by Loaded when a user data is changed. The message if forwarded to
499
+ # the automaton.
500
+ #
501
+ def state_changed(what, infos)
502
+ @automaton.send(:state_changed, what, infos)
503
+ end
504
+
505
+ end
506
+
507
+ ### Automaton class ##########################################################
508
+ public
509
+
510
+ # State list and edge list of the automaton
511
+ attr_reader :states, :edges
512
+
513
+ #
514
+ # Creates an empty automaton and executes the block passed as argument. The _onself_
515
+ # argument dictates the way _block_ is executed:
516
+ # - when set to false, the block is executed traditionnally (i.e. using yield).
517
+ # In this case, methods invocations must be performed on the automaton object
518
+ # passed as block argument.
519
+ # - when set to _true_ (by default) the block is executed in the context of the
520
+ # automaton itself (i.e. with instance_eval), allowing call of its methods
521
+ # without prefixing them by the automaton variable. The automaton still
522
+ # passes itself as first block argument. Note that in this case, you won't be
523
+ # able to invoke a method defined in the scope of your block.
524
+ #
525
+ # Example:
526
+ # # The DRY way to do:
527
+ # Automaton.new do |automaton| # automaton will not be used here, but it is passed
528
+ # add_state(:initial => true)
529
+ # add_state(:accepting => true)
530
+ # connect(0, 1, 'a')
531
+ # connect(1, 0, 'b')
532
+ #
533
+ # # method_in_caller_scope() # commented because not allowed here !!
534
+ # end
535
+ #
536
+ # # The other way:
537
+ # Automaton.new(false) do |automaton| # automaton MUST be used here
538
+ # automaton.add_state(:initial => true)
539
+ # automaton.add_state(:accepting => true)
540
+ # automaton.connect(0, 1, 'a')
541
+ # automaton.connect(1, 0, 'b')
542
+ #
543
+ # method_in_caller_scope() # allowed in this variant !!
544
+ # end
545
+ #
546
+ def initialize(onself=true, &block) # :yields: automaton
547
+ @states = []
548
+ @edges = []
549
+ @initials = nil
550
+ @alphabet = nil
551
+ @deterministic = nil
552
+
553
+ # if there's a block, execute it now!
554
+ if block_given?
555
+ if onself
556
+ if RUBY_VERSION >= "1.9.0"
557
+ instance_exec(self, &block)
558
+ else
559
+ instance_eval(&block)
560
+ end
561
+ else
562
+ block.call(self)
563
+ end
564
+ end
565
+ end
566
+
567
+ ### public read-only section #################################################
568
+ public
569
+
570
+ #
571
+ # Returns a symbols comparator taking epsilon symbols into account. Comparator
572
+ # is provided as Proc instance which is a lambda function.
573
+ #
574
+ def symbols_comparator
575
+ @symbols_comparator ||= Kernel.lambda do |a,b|
576
+ if a==b then 0
577
+ elsif a.nil? then -1
578
+ elsif b.nil? then 1
579
+ else a <=> b
580
+ end
581
+ end
582
+ end
583
+
584
+ # Returns the number of states
585
+ def state_count() @states.size end
586
+
587
+ # Returns the number of edges
588
+ def edge_count() @edges.size end
589
+
590
+ #
591
+ # Returns the i-th state of the state list.
592
+ #
593
+ # Raises:
594
+ # - ArgumentError unless i is an Integer
595
+ # - IndexError if i is not in [0..state_count)
596
+ #
597
+ def ith_state(i)
598
+ raise(ArgumentError, "Integer expected, #{i} found.", caller)\
599
+ unless Integer === i
600
+ raise(ArgumentError, "Invalid state index #{i}", caller)\
601
+ unless i>=0 and i<state_count
602
+ @states[i]
603
+ end
604
+
605
+ #
606
+ # Returns state associated with the supplied state name, throws an exception if no such state can be found.
607
+ #
608
+ def get_state(name)
609
+ raise(ArgumentError, "String expected, #{name} found.", caller)\
610
+ unless String === name
611
+ result = states.find do |s|
612
+ name == s[:name]
613
+ end
614
+ raise(ArgumentError, "State #{name} was not found", caller)\
615
+ if result.nil?
616
+ result
617
+ end
618
+
619
+ #
620
+ # Returns the i-th states of the state list.
621
+ #
622
+ # Raises:
623
+ # - ArgumentError unless all _i_ are integers
624
+ # - IndexError unless all _i_ are in [0..state_count)
625
+ #
626
+ def ith_states(*i)
627
+ i.collect{|j| ith_state(j)}
628
+ end
629
+
630
+ #
631
+ # Returns the i-th edge of the edge list.
632
+ #
633
+ # Raises:
634
+ # - ArgumentError unless i is an Integer
635
+ # - IndexError if i is not in [0..state_count)
636
+ #
637
+ def ith_edge(i)
638
+ raise(ArgumentError, "Integer expected, #{i} found.", caller)\
639
+ unless Integer === i
640
+ raise(ArgumentError, "Invalid edge index #{i}", caller)\
641
+ unless i>=0 and i<edge_count
642
+ @edges[i]
643
+ end
644
+
645
+ #
646
+ # Returns the i-th edges of the edge list.
647
+ #
648
+ # Raises:
649
+ # - ArgumentError unless all _i_ are integers
650
+ # - IndexError unless all _i_ are in [0..edge_count)
651
+ #
652
+ def ith_edges(*i)
653
+ i.collect{|j| ith_edge(j)}
654
+ end
655
+
656
+ #
657
+ # Calls block for each state of the automaton state list. States are
658
+ # enumerated in index order.
659
+ #
660
+ def each_state() @states.each {|s| yield s if block_given?} end
661
+
662
+ #
663
+ # Calls block for each edge of the automaton edge list. Edges are
664
+ # enumerated in index order.
665
+ #
666
+ def each_edge() @edges.each {|e| yield e if block_given?} end
667
+
668
+ #
669
+ # Returns an array with incoming edges of _state_. Edges are sorted by symbols
670
+ # if _sorted_ is set to true. If two incoming edges have same symbol, no
671
+ # order is guaranteed between them. Returned array may be modified.
672
+ #
673
+ # If _state_ is an Integer, this method returns the incoming edges of the
674
+ # state'th state in the state list.
675
+ #
676
+ # Raises:
677
+ # - IndexError if state is an Integer and state<0 or state>=state_count.
678
+ # - ArgumentError if _state_ is not a valid state for this automaton.
679
+ #
680
+ def in_edges(state, sorted=false) to_state(state).in_edges(sorted) end
681
+
682
+ #
683
+ # Returns an array with outgoing edges of _state_. Edges are sorted by symbols
684
+ # if _sorted_ is set to true. If two incoming edges have same symbol, no
685
+ # order is guaranteed between them. Returned array may be modified.
686
+ #
687
+ # If _state_ is an Integer, this method returns the outgoing edges of the
688
+ # state'th state in the state list.
689
+ #
690
+ # Raises:
691
+ # - IndexError if state is an Integer and state<0 or state>=state_count.
692
+ # - ArgumentError if state is not a valid state (not a state or not from this
693
+ # automaton)
694
+ #
695
+ def out_edges(state, sorted=false) to_state(state).out_edges(sorted) end
696
+
697
+ #
698
+ # Returns an array with the different symbols appearing on incoming edges of
699
+ # _state_. Returned array does not contain duplicates and may be modified;
700
+ # it is sorted if _sorted_ is set to true.
701
+ #
702
+ # If _state_ is an Integer, this method returns the incoming symbols of the
703
+ # state'th state in the state list.
704
+ #
705
+ # Raises:
706
+ # - IndexError if state is an Integer and state<0 or state>=state_count.
707
+ # - ArgumentError if _state_ is not a valid state for this automaton.
708
+ #
709
+ def in_symbols(state, sorted=false) to_state(state).in_symbols(sorted) end
710
+
711
+ #
712
+ # Returns an array with the different symbols appearing on outgoing edges of
713
+ # _state_. Returned array does not contain duplicates and may be modified;
714
+ # it is sorted if _sorted_ is set to true.
715
+ #
716
+ # If _state_ is an Integer, this method returns the outgoing symbols of the
717
+ # state'th state in the state list.
718
+ #
719
+ # Raises:
720
+ # - IndexError if state is an Integer and state<0 or state>=state_count.
721
+ # - ArgumentError if state is not a valid state (not a state or not from this
722
+ # automaton)
723
+ #
724
+ def out_symbols(state, sorted=false) to_state(state).out_symbols(sorted) end
725
+
726
+ #
727
+ # Returns an array with adjacent states (along incoming and outgoing edges)
728
+ # of _state_. Returned array does not contain duplicates; it may be modified.
729
+ #
730
+ # If _state_ is an Integer, this method returns the adjacent states of the
731
+ # state'th state in the state list.
732
+ #
733
+ # Raises:
734
+ # - IndexError if state is an Integer and state<0 or state>=state_count.
735
+ # - ArgumentError if state is not a valid state (not a state or not from this
736
+ # automaton)
737
+ #
738
+ def adjacent_states(state) to_state(state).adjacent_states() end
739
+
740
+ #
741
+ # Returns an array with adjacent states (along incoming edges) of _state_.
742
+ # Returned array does not contain duplicates; it may be modified.
743
+ #
744
+ # If _state_ is an Integer, this method returns the incoming adjacent states
745
+ # of the state'th state in the state list.
746
+ #
747
+ # Raises:
748
+ # - IndexError if state is an Integer and state<0 or state>=state_count.
749
+ # - ArgumentError if state is not a valid state (not a state or not from this
750
+ # automaton)
751
+ #
752
+ def in_adjacent_states(state) to_state(state).in_adjacent_states() end
753
+
754
+ #
755
+ # Returns an array with adjacent states (along outgoing edges) of _state_.
756
+ # Returned array does not contain duplicates; it may be modified.
757
+ #
758
+ # If _state_ is an Integer, this method returns the outgoing adjacent states
759
+ # of the state'th state in the state list.
760
+ #
761
+ # Raises:
762
+ # - IndexError if state is an Integer and state<0 or state>=state_count.
763
+ # - ArgumentError if state is not a valid state (not a state or not from this
764
+ # automaton)
765
+ #
766
+ def out_adjacent_states(state) to_state(state).out_adjacent_states() end
767
+
768
+ #
769
+ # Collects all initial states of this Automaton and returns it. Returned array
770
+ # does not contain duplicates and may be modified.
771
+ #
772
+ # This method is epsilon symbol aware (represented with nil) on
773
+ # non-deterministic automata, meaning that it actually computes the set of
774
+ # reachable states from an initial state through strings respecting the
775
+ # <tt>eps*</tt> regular expression, where eps is the epsilon symbol.
776
+ #
777
+ def initial_states
778
+ @initials = compute_initial_states if @initials.nil? or @initials.empty?
779
+ @initials
780
+ end
781
+
782
+ #
783
+ # Returns the initial state of the automaton. This method is expected to used
784
+ # on deterministic automata only. Unlike initial_states, it returns one State
785
+ # instance instead of an Array.
786
+ #
787
+ # When used with a non deterministic automaton, it returns one of the states
788
+ # tagged as initial. Which one is returned must be considered a non
789
+ # deterministic choice. This method is not epsilon symbol aware.
790
+ #
791
+ def initial_state
792
+ initial_states[0]
793
+ end
794
+
795
+ # Internal implementation of initial_states.
796
+ def compute_initial_states()
797
+ initials = @states.select {|s| s.initial?}
798
+ initials.collect{|s| s.epsilon_closure}.flatten.uniq
799
+ end
800
+
801
+ ### public write section #####################################################
802
+ public
803
+
804
+ #
805
+ # Adds a new state.
806
+ #
807
+ # Arguments:
808
+ # - data: user-data to attach to the state (see Automaton documentation).
809
+ #
810
+ # Raises:
811
+ # - ArgumentError if _data_ is not a valid state data.
812
+ #
813
+ def add_state(data={})
814
+ data = to_valid_state_data(data)
815
+
816
+ # create new state, add it to state-list
817
+ state = State.new(self, state_count, data)
818
+ @states << state
819
+
820
+ # let the automaton know that something has changed
821
+ state_changed(:state_added, state)
822
+
823
+ # return created state
824
+ state
825
+ end
826
+ alias :create_state :add_state
827
+
828
+ #
829
+ # Adds _n_ new states in the automaton. Created states are returned as an
830
+ # ordered array (order of states according to their index in state list).
831
+ #
832
+ # _data_ is duplicated for each created state.
833
+ #
834
+ def add_n_states(n, data={})
835
+ created = []
836
+ n.times do |i|
837
+ created << add_state(data.dup)
838
+ end
839
+ created
840
+ end
841
+ alias :create_n_states :add_n_states
842
+
843
+ #
844
+ # Adds a new edge, connecting _from_ and _to_ states of the automaton.
845
+ #
846
+ # Arguments:
847
+ # - from: either a State or a valid state index (Integer).
848
+ # - to: either a State or a valid state index (Integer).
849
+ # - data: user data to attach to the created edge (see Automaton documentation).
850
+ #
851
+ # Raises:
852
+ # - IndexError if _from_ is an Integer but not in [0..state_count)
853
+ # - IndexError if _to_ is an Integer but not in [0..state_count)
854
+ # - ArgumentError if _from_ is not a valid state for this automaton.
855
+ # - ArgumentError if _to_ is not a valid state for this automaton.
856
+ # - ArgumentError if _data_ is not a valid edge data.
857
+ #
858
+ def add_edge(from, to, data)
859
+ from, to, data = to_state(from), to_state(to), to_valid_edge_data(data)
860
+
861
+ # create edge, install it, add it to edge-list
862
+ edge = Edge.new(self, edge_count, data, from, to)
863
+ @edges << edge
864
+ from.send(:add_outgoing_edge, edge)
865
+ to.send(:add_incoming_edge, edge)
866
+
867
+ # let automaton know that something has changed
868
+ state_changed(:edge_added, edge)
869
+
870
+ # return created edge
871
+ edge
872
+ end
873
+ alias :create_edge :add_edge
874
+ alias :connect :add_edge
875
+
876
+ # Adds all states and transitions (as copies) from a different automaton.
877
+ # Returns the initial state of the added part. In order to ensure that names of
878
+ # the new states do not clash with names of existing states, state names may have
879
+ # to be removed from added states; this is the case if _clear_names_ is set to true.
880
+ # None of the added states are made initial.
881
+ def add_automaton(what,clear_names=true)
882
+ map_what_self = {}
883
+ what.states.each do |state|
884
+ map_what_self[state]=add_state(state.data)
885
+ map_what_self[state][:name]=nil if clear_names
886
+ map_what_self[state][:initial]=false
887
+ end
888
+ what.edges.each do |edge|
889
+ add_edge(map_what_self[edge.from],map_what_self[edge.to],edge.data)
890
+ end
891
+ map_what_self[what.initial_state]
892
+ end
893
+
894
+ # Constructs a replica of this automaton and returns a copy.
895
+ # This copy can be modified in whatever way without affecting the original
896
+ # automaton.
897
+ def dup
898
+ Automaton.new(false) do |fa|
899
+ initial = fa.add_automaton(self,false)
900
+ initial[:initial] = true unless initial.nil?
901
+ end
902
+ end
903
+
904
+ #
905
+ # Drops a state of the automaton, as well as all connected edges to that state.
906
+ # If _state_ is an integer, the state-th state of the state list is removed.
907
+ # This method returns the automaton itself.
908
+ #
909
+ # Raises:
910
+ # - IndexError if _edge_ is an Integer but not in [0..edge_count)
911
+ # - ArgumentError if _edge_ is not a valid edge for this automaton.
912
+ #
913
+ def drop_state(state)
914
+ state = to_state(state)
915
+ # remove edges first: drop_edges ensures that edge list is coherent
916
+ drop_edges(*(state.in_edges + state.out_edges).uniq)
917
+
918
+ # remove state now and renumber
919
+ @states.delete_at(state.index)
920
+ state.index.upto(state_count-1) do |i|
921
+ @states[i].send(:index=, i)
922
+ end
923
+ state.send(:index=, -1)
924
+
925
+ state_changed(:state_dropped, state)
926
+ self
927
+ end
928
+ alias :delete_state :drop_state
929
+
930
+ #
931
+ # Drops all states passed as parameter as well as all their connected edges.
932
+ # Arguments may be state instances, as well as valid state indices. Duplicates
933
+ # are even supported. This method has no effect on the automaton and raises
934
+ # an error if some state argument is not valid.
935
+ #
936
+ # Raises:
937
+ # - ArgumentError if one state in _states_ is not a valid state of this
938
+ # automaton.
939
+ #
940
+ def drop_states(*states)
941
+ # check states first
942
+ states = states.collect{|s| to_state(s)}.uniq.sort
943
+ edges = states.collect{|s| (s.in_edges + s.out_edges).uniq}.flatten.uniq.sort
944
+
945
+ # Remove all edges, we do not use drop_edges to avoid spending too much
946
+ # time reindexing edges. Moreover, we can do it that way because we take
947
+ # edges in reverse indexing order (has been sorted previously)
948
+ until edges.empty?
949
+ edge = edges.pop
950
+ edge.source.send(:drop_outgoing_edge,edge)
951
+ edge.target.send(:drop_incoming_edge,edge)
952
+ @edges.delete_at(edge.index)
953
+ edge.send(:index=, -1)
954
+ state_changed(:edge_dropped, edge)
955
+ end
956
+
957
+ # Remove all states, same kind of hack is used
958
+ until states.empty?
959
+ state = states.pop
960
+ @states.delete_at(state.index)
961
+ state.send(:index=, -1)
962
+ state_changed(:state_dropped, state)
963
+ end
964
+
965
+ # sanitize state and edge lists
966
+ @states.each_with_index {|s,i| s.send(:index=,i)}
967
+ @edges.each_with_index {|e,i| e.send(:index=,i)}
968
+
969
+ self
970
+ end
971
+
972
+ #
973
+ # Drops an edge in the automaton. If _edge_ is an integer, the edge-th edge
974
+ # of the edge list is removed. This method returns the automaton itself.
975
+ #
976
+ # Raises:
977
+ # - IndexError if _edge_ is an Integer but not in [0..edge_count)
978
+ # - ArgumentError if _edge_ is not a valid edge for this automaton.
979
+ #
980
+ def drop_edge(edge)
981
+ edge = to_edge(edge)
982
+ @edges.delete_at(edge.index)
983
+ edge.from.send(:drop_outgoing_edge,edge)
984
+ edge.to.send(:drop_incoming_edge,edge)
985
+ edge.index.upto(edge_count-1) do |i|
986
+ @edges[i].send(:index=, i)
987
+ end
988
+ edge.send(:index=,-1)
989
+ state_changed(:edge_dropped, edge)
990
+ self
991
+ end
992
+ alias :delete_edge :drop_edge
993
+
994
+ #
995
+ # Drops all edges passed as parameters. Arguments may be edge objects,
996
+ # as well as valid edge indices. Duplicates are even supported. This method
997
+ # has no effect on the automaton and raises an error if some edge argument
998
+ # is not valid.
999
+ #
1000
+ # Raises:
1001
+ # - ArgumentError if one edge in _edges_ is not a valid edge of this automaton.
1002
+ #
1003
+ def drop_edges(*edges)
1004
+ # check edges first
1005
+ edges = edges.collect{|e| to_edge(e)}.uniq
1006
+
1007
+ # remove all edges
1008
+ edges.each do |e|
1009
+ @edges.delete(e)
1010
+ e.from.send(:drop_outgoing_edge,e)
1011
+ e.to.send(:drop_incoming_edge,e)
1012
+ e.send(:index=, -1)
1013
+ state_changed(:edge_dropped, e)
1014
+ end
1015
+ @edges.each_with_index do |e,i|
1016
+ e.send(:index=,i)
1017
+ end
1018
+
1019
+ self
1020
+ end
1021
+ alias :delete_edges :drop_edges
1022
+
1023
+ ### protected section ########################################################
1024
+ protected
1025
+
1026
+ #
1027
+ # Converts a _state_ argument to a valid State of this automaton.
1028
+ # There are three ways to refer to a state, by position in the internal
1029
+ # collection of states, using an instance of State and using a name of a
1030
+ # state (represented with a String).
1031
+ #
1032
+ # Raises:
1033
+ # - IndexError if state is an Integer and state<0 or state>=state_count.
1034
+ # - ArgumentError if state is not a valid state (not a state or not from this
1035
+ # automaton)
1036
+ #
1037
+ def to_state(state)
1038
+ case state
1039
+ when State
1040
+ return state if state.automaton==self and state==@states[state.index]
1041
+ raise ArgumentError, "Not a state of this automaton", caller
1042
+ when Integer
1043
+ return ith_state(state)
1044
+ when String
1045
+ result = get_state(state)
1046
+ return result unless result.nil?
1047
+ end
1048
+ raise ArgumentError, "Invalid state argument #{state}", caller
1049
+ end
1050
+
1051
+ #
1052
+ # Converts an _edge_ argument to a valid Edge of this automaton.
1053
+ #
1054
+ # Raises:
1055
+ # - IndexError if _edge_ is an Integer but not in [0..edge_count)
1056
+ # - ArgumentError if _edge_ is not a valid edge (not a edge or not from this
1057
+ # automaton)
1058
+ #
1059
+ def to_edge(edge)
1060
+ case edge
1061
+ when Edge
1062
+ return edge if edge.automaton==self and edge==@edges[edge.index]
1063
+ raise ArgumentError, "Not an edge of this automaton", caller
1064
+ when Integer
1065
+ return ith_edge(edge)
1066
+ end
1067
+ raise ArgumentError, "Invalid edge argument #{edge}", caller
1068
+ end
1069
+
1070
+ #
1071
+ # Checks if a given user-data contains enough information to be attached to
1072
+ # a given state. Returns the data if ok.
1073
+ #
1074
+ # Raises:
1075
+ # - ArgumentError if data is not considered a valid state data.
1076
+ #
1077
+ def to_valid_state_data(data)
1078
+ raise(ArgumentError,
1079
+ "User data should be an Hash", caller) unless Hash===data
1080
+ data
1081
+ end
1082
+
1083
+ #
1084
+ # Checks if a given user-data contains enough information to be attached to
1085
+ # a given edge. Returns the data if ok.
1086
+ #
1087
+ # Raises:
1088
+ # - ArgumentError if data is not considered a valid edge data.
1089
+ #
1090
+ def to_valid_edge_data(data)
1091
+ return {:symbol => data} if data.nil? or data.is_a?(String)
1092
+ raise(ArgumentError,
1093
+ "User data should be an Hash", caller) unless Hash===data
1094
+ raise(ArgumentError,
1095
+ "User data should contain a :symbol attribute.",
1096
+ caller) unless data.has_key?(:symbol)
1097
+ raise(ArgumentError,
1098
+ "Edge :symbol attribute cannot be an array.",
1099
+ caller) if Array===data[:symbol]
1100
+ data
1101
+ end
1102
+
1103
+ ### public sections with useful utilities ####################################
1104
+ public
1105
+
1106
+ # Returns true if the automaton is deterministic, false otherwise
1107
+ def deterministic?
1108
+ @deterministic = @states.reject{|s| s.deterministic?}.empty? if @deterministic.nil?
1109
+ @deterministic
1110
+ end
1111
+
1112
+ ### public & protected sections about alphabet ###############################
1113
+ protected
1114
+
1115
+ # Deduces the alphabet from the automaton edges.
1116
+ def deduce_alphabet
1117
+ edges.collect{|e| e.symbol}.uniq.compact.sort
1118
+ end
1119
+
1120
+ public
1121
+
1122
+ # Returns the alphabet of the automaton.
1123
+ def alphabet
1124
+ @alphabet || deduce_alphabet
1125
+ end
1126
+
1127
+ # Sets the aphabet of the automaton. _alph_ is expected to be an array without
1128
+ # nil nor duplicated. This method raises an ArgumentError otherwise. Such an
1129
+ # error is also raised if a symbol used on the automaton edges is not included
1130
+ # in _alph_.
1131
+ def alphabet=(alph)
1132
+ raise ArgumentError, "Invalid alphabet" unless alph.uniq.compact.size==alph.size
1133
+ raise ArgumentError, "Invalid alphabet" unless deduce_alphabet.reject{|s| alph.include?(s)}.empty?
1134
+ @alphabet = alph.sort
1135
+ end
1136
+
1137
+ ### public section about dot utilities #######################################
1138
+ protected
1139
+
1140
+ #
1141
+ # Converts a hash of attributes (typically automaton, state or edge attributes)
1142
+ # to a <code>[...]</code> dot string. Braces are part of the output.
1143
+ #
1144
+ def attributes2dot(attrs)
1145
+ buffer = ""
1146
+ attrs.keys.sort{|k1,k2| k1.to_s <=> k2.to_s}.each do |key|
1147
+ buffer << " " unless buffer.empty?
1148
+ value = attrs[key].to_s.gsub('"','\"')
1149
+ buffer << "#{key}=\"#{value}\""
1150
+ end
1151
+ buffer
1152
+ end
1153
+
1154
+ public
1155
+
1156
+ #
1157
+ # Generates a dot output from an automaton. The rewriter block takes
1158
+ # two arguments: the first one is a Markable instance (graph, state or
1159
+ # edge), the second one indicates which kind of element is passed (through
1160
+ # :automaton, :state or :edge symbol). The rewriter is expected to return a
1161
+ # hash-like object providing dot attributes for the element.
1162
+ #
1163
+ # When no rewriter is provided, a default one is used by default, providing
1164
+ # the following behavior:
1165
+ # - on :automaton
1166
+ #
1167
+ # {:rankdir => "LR"}
1168
+ #
1169
+ # - on :state
1170
+ #
1171
+ # {:shape => "doublecircle/circle" (following accepting?),
1172
+ # :style => "filled",
1173
+ # :fillcolor => "green/red/white" (if initial?/error?/else, respectively)}
1174
+ #
1175
+ # - on edge
1176
+ #
1177
+ # {:label => "#{edge.symbol}"}
1178
+ #
1179
+ def to_dot(&rewriter)
1180
+ unless rewriter
1181
+ to_dot do |elm, kind|
1182
+ case kind
1183
+ when :automaton
1184
+ {:rankdir => "LR"}
1185
+ when :state
1186
+ {:shape => (elm.accepting? ? "doublecircle" : "circle"),
1187
+ :style => "filled",
1188
+ :color => "black",
1189
+ :fillcolor => (elm.initial? ? "green" : (elm.error? ? "red" : "white"))}
1190
+ when :edge
1191
+ {:label => elm.symbol.nil? ? '' : elm.symbol.to_s}
1192
+ end
1193
+ end
1194
+ else
1195
+ buffer = "digraph G {\n"
1196
+ attrs = attributes2dot(rewriter.call(self, :automaton))
1197
+ buffer << " graph [#{attrs}];\n"
1198
+ states.each do |s|
1199
+ attrs = attributes2dot(rewriter.call(s, :state))
1200
+ buffer << " #{s.index} [#{attrs}];\n"
1201
+ end
1202
+ edges.each do |e|
1203
+ attrs = attributes2dot(rewriter.call(e, :edge))
1204
+ buffer << " #{e.source.index} -> #{e.target.index} [#{attrs}];\n"
1205
+ end
1206
+ buffer << "}\n"
1207
+ end
1208
+ end
1209
+
1210
+ ### public section about reordering ##########################################
1211
+ public
1212
+
1213
+ # Uses a comparator block to reorder the state list.
1214
+ def order_states(&block)
1215
+ raise ArgumentError, "A comparator block must be given" unless block_given?
1216
+ raise ArgumentError, "A comparator block of arity 2 must be given" unless block.arity==2
1217
+ @states.sort!(&block)
1218
+ @states.each_with_index{|s,i| s.send(:index=, i)}
1219
+ self
1220
+ end
1221
+
1222
+ ### protected section about changes ##########################################
1223
+ protected
1224
+
1225
+ #
1226
+ # Fires by write method when an automaton change occurs.
1227
+ #
1228
+ def state_changed(what, infos)
1229
+ @initials = nil
1230
+ @deterministic = nil
1231
+ end
1232
+
1233
+ protected :compute_initial_states
1234
+ end # class Automaton
1235
+
1236
+ end # module Stamina
1237
+ require 'stamina/automaton/walking'