stamina 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. data/.gemtest +0 -0
  2. data/CHANGELOG.md +22 -0
  3. data/Gemfile +2 -0
  4. data/Gemfile.lock +33 -0
  5. data/LICENCE.md +22 -0
  6. data/Manifest.txt +16 -0
  7. data/README.md +78 -0
  8. data/Rakefile +23 -0
  9. data/bin/adl2dot +12 -0
  10. data/bin/classify +12 -0
  11. data/bin/redblue +12 -0
  12. data/bin/rpni +12 -0
  13. data/example/adl/automaton.adl +49 -0
  14. data/example/adl/sample.adl +53 -0
  15. data/example/basic/characteristic_sample.adl +32 -0
  16. data/example/basic/target.adl +9 -0
  17. data/example/competition/31_test.adl +1500 -0
  18. data/example/competition/31_training.adl +1759 -0
  19. data/lib/stamina.rb +19 -0
  20. data/lib/stamina/adl.rb +298 -0
  21. data/lib/stamina/automaton.rb +1237 -0
  22. data/lib/stamina/automaton/walking.rb +336 -0
  23. data/lib/stamina/classifier.rb +37 -0
  24. data/lib/stamina/command/adl2dot_command.rb +73 -0
  25. data/lib/stamina/command/classify_command.rb +57 -0
  26. data/lib/stamina/command/redblue_command.rb +58 -0
  27. data/lib/stamina/command/rpni_command.rb +58 -0
  28. data/lib/stamina/command/stamina_command.rb +79 -0
  29. data/lib/stamina/errors.rb +20 -0
  30. data/lib/stamina/induction/commons.rb +170 -0
  31. data/lib/stamina/induction/redblue.rb +264 -0
  32. data/lib/stamina/induction/rpni.rb +188 -0
  33. data/lib/stamina/induction/union_find.rb +377 -0
  34. data/lib/stamina/input_string.rb +123 -0
  35. data/lib/stamina/loader.rb +0 -0
  36. data/lib/stamina/markable.rb +42 -0
  37. data/lib/stamina/sample.rb +190 -0
  38. data/lib/stamina/version.rb +14 -0
  39. data/stamina.gemspec +190 -0
  40. data/stamina.noespec +35 -0
  41. data/tasks/debug_mail.rake +78 -0
  42. data/tasks/debug_mail.txt +13 -0
  43. data/tasks/gem.rake +68 -0
  44. data/tasks/spec_test.rake +79 -0
  45. data/tasks/unit_test.rake +77 -0
  46. data/tasks/yard.rake +51 -0
  47. data/test/stamina/adl_test.rb +491 -0
  48. data/test/stamina/automaton_additional_test.rb +190 -0
  49. data/test/stamina/automaton_classifier_test.rb +155 -0
  50. data/test/stamina/automaton_test.rb +1092 -0
  51. data/test/stamina/automaton_to_dot_test.rb +64 -0
  52. data/test/stamina/automaton_walking_test.rb +206 -0
  53. data/test/stamina/exit.rb +3 -0
  54. data/test/stamina/induction/induction_test.rb +70 -0
  55. data/test/stamina/induction/redblue_mergesamestatebug_expected.adl +19 -0
  56. data/test/stamina/induction/redblue_mergesamestatebug_pta.dot +64 -0
  57. data/test/stamina/induction/redblue_mergesamestatebug_sample.adl +9 -0
  58. data/test/stamina/induction/redblue_test.rb +83 -0
  59. data/test/stamina/induction/redblue_universal_expected.adl +4 -0
  60. data/test/stamina/induction/redblue_universal_sample.adl +5 -0
  61. data/test/stamina/induction/rpni_inria_expected.adl +7 -0
  62. data/test/stamina/induction/rpni_inria_sample.adl +9 -0
  63. data/test/stamina/induction/rpni_test.rb +129 -0
  64. data/test/stamina/induction/rpni_test_pta.dot +22 -0
  65. data/test/stamina/induction/rpni_universal_expected.adl +4 -0
  66. data/test/stamina/induction/rpni_universal_sample.adl +4 -0
  67. data/test/stamina/induction/union_find_test.rb +124 -0
  68. data/test/stamina/input_string_test.rb +323 -0
  69. data/test/stamina/markable_test.rb +70 -0
  70. data/test/stamina/randdfa.adl +66 -0
  71. data/test/stamina/sample.adl +4 -0
  72. data/test/stamina/sample_classify_test.rb +149 -0
  73. data/test/stamina/sample_test.rb +218 -0
  74. data/test/stamina/small_dfa.dot +16 -0
  75. data/test/stamina/small_dfa.gif +0 -0
  76. data/test/stamina/small_nfa.dot +18 -0
  77. data/test/stamina/small_nfa.gif +0 -0
  78. data/test/stamina/stamina_test.rb +69 -0
  79. data/test/test_all.rb +7 -0
  80. metadata +279 -0
@@ -0,0 +1,19 @@
1
+ module Stamina
2
+
3
+ end
4
+ require 'stamina/version'
5
+ require 'stamina/loader'
6
+ require 'set'
7
+ require 'enumerator'
8
+ require 'stringio'
9
+ require 'stamina/errors'
10
+ require 'stamina/markable'
11
+ require 'stamina/adl'
12
+ require 'stamina/sample'
13
+ require 'stamina/input_string'
14
+ require 'stamina/classifier'
15
+ require 'stamina/automaton'
16
+ require 'stamina/induction/union_find'
17
+ require 'stamina/induction/commons'
18
+ require "stamina/induction/rpni"
19
+ require "stamina/induction/redblue"
@@ -0,0 +1,298 @@
1
+ module Stamina
2
+ #
3
+ # Automaton Description Language module. This module provides parsing and
4
+ # printing methods for automata and samples. Documentation of the file format
5
+ # used for an automaton is given in parse_automaton; file format for samples is
6
+ # documented in parse_sample.
7
+ #
8
+ # Methods of this module are not intended to be included by a class but invoked
9
+ # on the module instead:
10
+ #
11
+ # begin
12
+ # dfa = Stamina::ADL.parse_automaton_file("my_automaton.adl")
13
+ # rescue ADL::ParseError => ex
14
+ # puts "Oops, the ADL automaton file seems corrupted..."
15
+ # end
16
+ #
17
+ # == Detailed API
18
+ module ADL
19
+
20
+ #################################################################################
21
+ # Automaton Section #
22
+ #################################################################################
23
+
24
+ #
25
+ # Parses a given automaton description and returns an Automaton instance.
26
+ #
27
+ # Raises:
28
+ # - ArgumentError unless _descr_ is an IO object or a String.
29
+ # - ADL::ParseError if the ADL automaton format is not respected.
30
+ #
31
+ # ADL provides a really simple grammar to describe automata. Here is a succint
32
+ # example (full documentation of the ADL automaton grammar can be found in
33
+ # the self-documenting example/adl/automaton.adl file).
34
+ #
35
+ # # Some header comments: tool which has generated this automaton,
36
+ # # maybe a date or other tool options ...
37
+ # # here: 'this automaton accepts the a(ba)* regular language'
38
+ # 2 2
39
+ # 0 true false
40
+ # 1 false true
41
+ # 0 1 a
42
+ # 1 0 b
43
+ #
44
+ def self.parse_automaton(descr)
45
+ automaton = nil
46
+ ADL::to_io(descr) do |io|
47
+ state_count, edge_count = nil, nil
48
+ state_read, edge_read = 0, 0
49
+ states = {}
50
+ mode = :header
51
+
52
+ automaton = Automaton.new do |fa|
53
+ # parse each description line
54
+ line_number = 1
55
+ io.each_line do |l|
56
+ index = l.index('#')
57
+ l = l[0,index] if index
58
+ l = l.strip
59
+ next if l.empty? or l[0,1]=='#'
60
+
61
+ case mode
62
+ when :header
63
+ # looking for |state_count edge_count|
64
+ raise(ADL::ParseError,
65
+ "Parse error line #{line_number}: 'state_count edge_count' expected, "\
66
+ "'#{l}' found.") unless /^(\d+)\s+(\d+)$/ =~ l
67
+ state_count, edge_count = $1.to_i, $2.to_i
68
+ mode = :states
69
+
70
+ when :states
71
+ # looking for |number initial accepting|
72
+ raise(ADL::ParseError,
73
+ "Parse error line #{line_number}: state definition expected, "\
74
+ "'#{l}' found.") unless /^(\S+)\s+(true|false)\s+(true|false)$/ =~ l
75
+ id, initial, accepting = $1, $2, $3
76
+ initial, accepting = ("true"==initial), ("true"==accepting)
77
+
78
+ state = fa.add_state(:initial => initial, :accepting => accepting)
79
+ state[:name]=id.to_s
80
+ states[id] = state
81
+
82
+ state_read += 1
83
+ mode = (edge_count==0 ? :end : :edges) if state_read==state_count
84
+
85
+ when :edges
86
+ # looking for |source target symbol|
87
+ raise(ADL::ParseError,
88
+ "Parse error line #{line_number}: edge definition expected, "\
89
+ "'#{l}' found.") unless /^(\S+)\s+(\S+)\s+(\S+)$/ =~ l
90
+ source, target, symbol = $1, $2, $3
91
+ raise(ADL::ParseError,
92
+ "Parse error line #{line_number}: no such state #{source}") \
93
+ unless states[source]
94
+ raise(ADL::ParseError,
95
+ "Parse error line #{line_number}: no such state #{target}") \
96
+ unless states[target]
97
+
98
+ fa.connect(states[source], states[target], {:symbol => symbol})
99
+
100
+ edge_read += 1
101
+ mode = :end if edge_read==edge_count
102
+
103
+ when :end
104
+ raise(ADL::ParseError,
105
+ "Parse error line #{line_number}: trailing data found '#{l}")
106
+
107
+ end # case mode
108
+
109
+ line_number += 1
110
+ end
111
+
112
+ raise(ADL::ParseError, "Parse error: #{state_count} states annouced, "\
113
+ "#{state_read} found.") if state_count != state_read
114
+ raise(ADL::ParseError, "Parse error: #{edge_count} edges annouced, "\
115
+ "#{edge_read} found.") if edge_count != edge_read
116
+
117
+ end # Automaton.new
118
+ end
119
+ return automaton
120
+ end # def self.parse
121
+
122
+ #
123
+ # Parses an automaton file _f_.
124
+ #
125
+ # Shortcut for:
126
+ # File.open(f, 'r') do |io|
127
+ # Stamina::ADL.parse_automaton(io)
128
+ # end
129
+ #
130
+ def self.parse_automaton_file(f)
131
+ automaton = nil
132
+ File.open(f) do |file|
133
+ automaton = ADL::parse_automaton(file)
134
+ end
135
+ automaton
136
+ end
137
+
138
+ #
139
+ # Prints an automaton to a buffer (responding to <code>:&lt;&lt;</code>) in ADL
140
+ # format. Returns the buffer itself.
141
+ #
142
+ def self.print_automaton(fa, buffer="")
143
+ buffer << "#{fa.state_count.to_s} #{fa.edge_count.to_s}" << "\n"
144
+ fa.states.each do |s|
145
+ buffer << "#{s.index.to_s} #{s.initial?} #{s.accepting?}" << "\n"
146
+ end
147
+ fa.edges.each do |e|
148
+ buffer << "#{e.source.index.to_s} #{e.target.index.to_s} #{e.symbol.to_s}" << "\n"
149
+ end
150
+ buffer
151
+ end
152
+
153
+ #
154
+ # Prints an automaton to a file whose path is provided.
155
+ #
156
+ # Shortcut for:
157
+ # File.open(file, 'w') do |io|
158
+ # print_automaton(fa, io)
159
+ # end
160
+ #
161
+ def self.print_automaton_to_file(fa, file)
162
+ File.open(file, 'w') do |io|
163
+ print_automaton(fa, io)
164
+ end
165
+ end
166
+
167
+ #################################################################################
168
+ # String and Sample Section #
169
+ #################################################################################
170
+
171
+ #
172
+ # Parses an input string _str_ and returns a InputString instance. Format of
173
+ # input strings is documented in parse_sample. _str_ is required to be a ruby
174
+ # String.
175
+ #
176
+ # Raises:
177
+ # - ADL::ParseError if the ADL string format is not respected.
178
+ #
179
+ def self.parse_string(str)
180
+ symbols = str.split(' ')
181
+ case symbols[0]
182
+ when '+'
183
+ symbols.shift
184
+ InputString.new symbols, true, false
185
+ when '-'
186
+ symbols.shift
187
+ InputString.new symbols, false, false
188
+ when '?'
189
+ symbols.shift
190
+ InputString.new symbols, nil, false
191
+ else
192
+ raise ADL::ParseError, "Invalid string format #{str}", caller
193
+ end
194
+ end
195
+
196
+ #
197
+ # Parses the sample provided by _descr_. When a block is provided, yields it with
198
+ # InputString instances and ignores the sample argument. Otherwise, fills the sample
199
+ # (any object responding to <code><<</code>) with string, creating a fresh new
200
+ # one (as a Sample instance) if sample is nil.
201
+ #
202
+ # ADL provides a really simple grammar to describe samples (here is a succint
203
+ # example, the full documentation of the sample grammar can be found in the
204
+ # self-documenting example/adl/sample.adl file):
205
+ #
206
+ # #
207
+ # # Some header comments: tool which has generated this sample,
208
+ # # maybe a date or other tool options ...
209
+ # # here: 'this sample is caracteristic for the a(ba)* regular language'
210
+ # #
211
+ # # Positive, Negative, Unlabeled strings become with +, -, ?, respectively
212
+ # # Empty lines and lines becoming with # are simply ignored.
213
+ # #
214
+ # -
215
+ # + a
216
+ # - a b
217
+ # + a b a
218
+ #
219
+ # Raises:
220
+ # - ArgumentError unless _descr_ argument is an IO object or a String.
221
+ # - ADL::ParseError if the ADL sample format is not respected.
222
+ # - InconsistencyError if the sample is not consistent (see Sample)
223
+ #
224
+ def self.parse_sample(descr, sample=nil)
225
+ sample = Sample.new if (sample.nil? and not block_given?)
226
+ ADL::to_io(descr) do |io|
227
+ io.each_line do |l|
228
+ l = l.strip
229
+ next if l.empty? or l[0,1]=='#'
230
+ if sample.nil? and block_given?
231
+ yield parse_string(l)
232
+ else
233
+ sample << parse_string(l)
234
+ end
235
+ end
236
+ end
237
+ sample
238
+ end
239
+
240
+ #
241
+ # Parses an automaton file _f_.
242
+ #
243
+ # Shortuct for:
244
+ # File.open(f) do |file|
245
+ # sample = ADL::parse_sample(file, sample)
246
+ # end
247
+ #
248
+ def self.parse_sample_file(f, sample=nil)
249
+ File.open(f) do |file|
250
+ sample = ADL::parse_sample(file, sample)
251
+ end
252
+ sample
253
+ end
254
+
255
+ #
256
+ # Prints a sample in ADL format on a buffer. Sample argument is expected to be
257
+ # an object responding to each, yielding InputString instances. Buffer is expected
258
+ # to be an object responding to <code><<</code>.
259
+ #
260
+ def self.print_sample(sample, buffer="")
261
+ sample.each do |str|
262
+ buffer << str.to_s << "\n"
263
+ end
264
+ end
265
+
266
+ #
267
+ # Prints a sample in a file.
268
+ #
269
+ # Shortcut for:
270
+ # File.open(file, 'w') do |io|
271
+ # print_sample(sample, f)
272
+ # end
273
+ #
274
+ def self.print_sample_in_file(sample, file)
275
+ File.open(file, 'w') do |f|
276
+ print_sample(sample, f)
277
+ end
278
+ end
279
+
280
+ ### private section ##########################################################
281
+ private
282
+
283
+ #
284
+ # Converts a parsable argument to an IO object or raises an ArgumentError.
285
+ #
286
+ def self.to_io(descr)
287
+ case descr
288
+ when IO
289
+ yield descr
290
+ when String
291
+ yield StringIO.new(descr)
292
+ else
293
+ raise ArgumentError, "IO instance expected, #{descr.class} received", caller
294
+ end
295
+ end
296
+
297
+ end # module ADL
298
+ end # module Stamina
@@ -0,0 +1,1237 @@
1
+ module Stamina
2
+
3
+ #
4
+ # Automaton data-structure.
5
+ #
6
+ # == Examples
7
+ # The following example uses a lot of useful DRY shortcuts, so, if it does not
8
+ # fit you needs then, read on!):
9
+ #
10
+ # # Building an automaton for the regular language a(ba)*
11
+ # fa = Automaton.new do
12
+ # add_state(:initial => true)
13
+ # add_state(:accepting => true)
14
+ # connect(0,1,'a')
15
+ # connect(1,0,'b')
16
+ # end
17
+ #
18
+ # # It accepts 'a b a b a', rejects 'a b' as well as ''
19
+ # puts fa.accepts?('? a b a b a') # prints true
20
+ # puts fa.accepts?('? a b') # prints false
21
+ # puts fa.rejects?('?') # prints true
22
+ #
23
+ # == Four things you need to know
24
+ # 1. Automaton, State and Edge classes implement a Markable design pattern, that
25
+ # is, you can read and write any key/value pair you want on them using the []
26
+ # and []= operators. Note that the following keys are used by Stamina itself,
27
+ # with the obvious semantics (for automata and transducers):
28
+ # - <tt>:initial</tt>, <tt>:accepting</tt>, <tt>:error</tt> on State;
29
+ # expected to be _true_ or _false_ (_nil_ and ommitted are considered as false).
30
+ # Shortcuts for querying and setting these attributes are provided by State.
31
+ # - <tt>:symbol</tt> on Edge, with shortcuts as well on Edge.
32
+ # The convention is to use _nil_ for the epsilon symbol (aka non observable)
33
+ # on non deterministic automata.
34
+ # The following keys are reserved for future extensions:
35
+ # - <tt>:output</tt> on State and Edge.
36
+ # - <tt>:short_prefix</tt> on State.
37
+ # See also the "About states and edges" subsection of the design choices.
38
+ # 2. Why using State methods State#step and State#delta ? The Automaton class includes
39
+ # the Walking module by default, which is much more powerful !
40
+ # 3. The constructor of this class executes the argument block (between <tt>do</tt>
41
+ # and <tt>end</tt>) with instance_eval by default. You won't be able to invoke
42
+ # the methods defined in the scope of your block in such a case. See new
43
+ # for details.
44
+ # 4. This class has not been designed with efficiency in mind. If you experiment
45
+ # performance problems, read the "About Automaton modifications" sub section
46
+ # of the design choices.
47
+ #
48
+ # == Design choices
49
+ # This section fully details the design choices that has been made for the
50
+ # implementation of the Automaton data structure used by Stamina. It is provided
51
+ # because Automaton is one of the core classes of Stamina, that probably all
52
+ # users (and contributors) will use. Automaton usage is really user-friendly,
53
+ # so <b>you are normally not required</b> to read this section in the first
54
+ # place ! Read it only if of interest for you, or if you experiment unexpected
55
+ # results.
56
+ #
57
+ # === One Automaton class only
58
+ # One class only implements all kinds of automata: deterministic, non-deterministic,
59
+ # transducers, prefix-tree-acceptors, etc. The Markable design pattern on states and
60
+ # edges should allow you to make anything you could find useful with this class.
61
+ #
62
+ # === Adjacency-list graph
63
+ # This class implements an automaton using a adjacent-list graph structure.
64
+ # The automaton has state and edge array lists and exposes them through the
65
+ # _states_ and _edges_ accessors. In order to let users enjoy the enumerability
66
+ # of Ruby's arrays while allowing automata to be modified, these arrays are
67
+ # externaly modifiable. However, <b>users are not expected to modify them!</b>
68
+ # and future versions of Stamina will certainly remove this ability.
69
+ #
70
+ # === Indices exposed
71
+ # State and Edge indices in these arrays are exposed by this class. Unless stated
72
+ # explicitely, all methods taking state or edge arguments support indices as well.
73
+ # Moreover, ith_state, ith_states, ith_edge and ith_edges methods provide powerful
74
+ # access to states and edges by indices. All these methods are robust to invalid
75
+ # indices (and raise an IndexError if incorrectly invoked) but do not allow
76
+ # negative indexing (unlike ruby arrays).
77
+ #
78
+ # States and edges know their index in the corresponding array and expose them
79
+ # through the (read-only) _index_ accessor. These indices are always valid;
80
+ # without deletion of states or edges in the automaton, they are guaranteed not
81
+ # to change. Indices saved in your own variables must be considered deprecated
82
+ # each time you perform a deletion ! That's the only rule to respect if you plan
83
+ # to use indices.
84
+ #
85
+ # Indices exposition may seem a strange choice and could be interpreted as
86
+ # breaking OOP's best practice. You are not required to use them but, as will
87
+ # quiclky appear, using them is really powerful and leads to beautiful code!
88
+ # If you don't remove any state or edge, this class guarantees that indices
89
+ # are assigned in the same order as invocations of add_state and add_edge (as
90
+ # well as their plural forms and aliases).
91
+ #
92
+ # === About states and edges
93
+ # Edges know their source and target states, which are exposed through the
94
+ # _source_ and _target_ (read-only) accessors (also aliased as _from_ and _to_).
95
+ # States keep their incoming and outgoing edges in arrays, which are accessible
96
+ # (in fact, a copy) using State#in_edges and State#out_edges. If you use them
97
+ # for walking the automaton in a somewhat standard way, consider using the Walking
98
+ # module instead!
99
+ #
100
+ # Common attributes of states and edges are installed using the Markable pattern
101
+ # itself:
102
+ # - <tt>:initial</tt>, <tt>:accepting</tt> and <tt>:error</tt> on states. These
103
+ # attributes are expected to be _true_ or _false_ (_nil_ and ommitted are also
104
+ # supported and both considered as false).
105
+ # - <tt>:symbol</tt> on edges. Any object you want as long as it responds to the
106
+ # <tt><=></tt> operator. Also, the convention is to use _nil_ for the epsilon
107
+ # symbol (aka non observable) on non deterministic automata.
108
+ #
109
+ # In addition, useful shortcuts are available:
110
+ # - <tt>s.initial?</tt> is a shortcut for <tt>s[:initial]</tt> if _s_ is a State
111
+ # - <tt>s.initial!</tt> is a shortcut for <tt>s[:initial]=true</tt> if _s_ is a State
112
+ # - Similar shortcuts are available for :accepting and :error
113
+ # - <tt>e.symbol</tt> is a shortcut for <tt>e[:symbol]</tt> if _e_ is an Edge
114
+ # - <tt>e.symbol='a'</tt> is a shortcut for <tt>e[:symbol]='a'</tt> if _e_ is an Edge
115
+ #
116
+ # Following keys should be considered reserved by Stamina for future extensions:
117
+ # - <tt>:output</tt> on State and Edge.
118
+ # - <tt>:short_prefix</tt> on State.
119
+ #
120
+ # === About Automaton modifications
121
+ # This class has not been implemented with efficiency in mind. In particular, we expect
122
+ # the vast majority of Stamina core algorithms considering automata as immutable values.
123
+ # For this reason, the Automaton class does not handle modifications really efficiently.
124
+ #
125
+ # So, if you experiment performance problems, consider what follows:
126
+ # 1. Why updating an automaton ? Building a fresh one is much more clean and efficient !
127
+ # This is particularly true for removals.
128
+ # 2. If you can create multiples states or edges at once, consider the plural form
129
+ # of the modification methods: add_n_states and drop_states. Those methods are
130
+ # optimized for multiple updates.
131
+ #
132
+ # == Detailed API
133
+ class Automaton
134
+ include Stamina::Markable
135
+
136
+ #
137
+ # Automaton state.
138
+ #
139
+ class State
140
+ include Stamina::Markable
141
+ attr_reader :automaton, :index
142
+
143
+ #
144
+ # Creates a state.
145
+ #
146
+ # Arguments:
147
+ # - automaton: parent automaton of the state.
148
+ # - index: index of the state in the state list.
149
+ # - data: user data attached to this state.
150
+ #
151
+ def initialize(automaton, index, data)
152
+ @automaton = automaton
153
+ @index = index
154
+ @data = data.dup
155
+ @out_edges = []
156
+ @in_edges = []
157
+ @epsilon_closure = nil
158
+ end
159
+
160
+ ### public read-only section ###############################################
161
+ public
162
+
163
+ #
164
+ # Returns true if this state is an initial state, false otherwise.
165
+ #
166
+ def initial?() return false unless @data[:initial]; @data[:initial] end
167
+
168
+ #
169
+ # Sets this state as an initial state.
170
+ #
171
+ def initial!() @data[:initial] = true end
172
+
173
+ #
174
+ # Returns true if this state is an accepting state, false otherwise.
175
+ #
176
+ def accepting?() return false unless @data[:accepting]; @data[:accepting] end
177
+
178
+ #
179
+ # Sets this state as an accepting state.
180
+ #
181
+ def accepting!() @data[:accepting] = true end
182
+
183
+ #
184
+ # Returns true if this state is an error state, false otherwise.
185
+ #
186
+ def error?() return false unless @data[:error]; @data[:error] end
187
+
188
+ #
189
+ # Sets this state as an error state.
190
+ #
191
+ def error!() @data[:error] = true end
192
+
193
+ #
194
+ # Returns true if this state is deterministic, false otherwise.
195
+ #
196
+ def deterministic?
197
+ outs = out_symbols
198
+ (outs.size==@out_edges.size) and not(outs.include?(nil))
199
+ end
200
+
201
+ #
202
+ # Returns an array containing all incoming edges of the state. Edges are
203
+ # sorted if _sorted_ is set to true. If two incoming edges have same symbol
204
+ # no order is guaranteed between them.
205
+ #
206
+ # Returned array may be modified.
207
+ #
208
+ def in_edges(sorted=false)
209
+ sorted ? @in_edges.sort : @in_edges.dup
210
+ end
211
+
212
+ #
213
+ # Returns an array containing all outgoing edges of the state. Edges are
214
+ # sorted if _sorted_ is set to true. If two outgoing edges have same symbol
215
+ # no order is guaranteed between them.
216
+ #
217
+ # Returned array may be modified.
218
+ #
219
+ def out_edges(sorted=false)
220
+ sorted ? @out_edges.sort : @out_edges.dup
221
+ end
222
+
223
+ #
224
+ # Returns an array with the different symbols appearing on incoming edges.
225
+ # Returned array does not contain duplicates. Symbols are sorted in the
226
+ # array if _sorted_ is set to true.
227
+ #
228
+ # Returned array may be modified.
229
+ #
230
+ def in_symbols(sorted=false)
231
+ symbols = @in_edges.collect{|e| e.symbol}.uniq
232
+ return sorted ? (symbols.sort &automaton.symbols_comparator) : symbols
233
+ end
234
+
235
+ #
236
+ # Returns an array with the different symbols appearing on outgoing edges.
237
+ # Returned array does not contain duplicates. Symbols are sorted in the
238
+ # array if _sorted_ is set to true.
239
+ #
240
+ # Returned array may be modified.
241
+ #
242
+ def out_symbols(sorted=false)
243
+ symbols = @out_edges.collect{|e| e.symbol}.uniq
244
+ return sorted ? (symbols.sort &automaton.symbols_comparator) : symbols
245
+ end
246
+
247
+ #
248
+ # Returns an array with adjacent states (in or out edge).
249
+ #
250
+ # Returned array may be modified.
251
+ #
252
+ def adjacent_states()
253
+ (in_adjacent_states+out_adjacent_states).uniq
254
+ end
255
+
256
+ #
257
+ # Returns an array with adjacent states along an incoming edge (without
258
+ # duplicates).
259
+ #
260
+ # Returned array may be modified.
261
+ #
262
+ def in_adjacent_states()
263
+ (@in_edges.collect {|e| e.source}).uniq
264
+ end
265
+
266
+ #
267
+ # Returns an array with adjacent states along an outgoing edge (whithout
268
+ # duplicates).
269
+ #
270
+ # Returned array may be modified.
271
+ #
272
+ def out_adjacent_states()
273
+ (@out_edges.collect {|e| e.target}).uniq
274
+ end
275
+
276
+ #
277
+ # Returns reachable states from this one with an input _symbol_. Returned
278
+ # array does not contain duplicates and may be modified. This method if not
279
+ # epsilon symbol aware.
280
+ #
281
+ def step(symbol)
282
+ @out_edges.select{|e| e.symbol==symbol}.collect{|e| e.target}
283
+ end
284
+
285
+ #
286
+ # Returns the state reached from this one with an input _symbol_, or nil if
287
+ # no such state. This method is not epsilon symbol aware. Moreover it is
288
+ # expected to be used on deterministic states only. If the state is not
289
+ # deterministic, the method returns one reachable state if such a state
290
+ # exists; which one is returned must be considered non deterministic.
291
+ #
292
+ def dfa_step(symbol)
293
+ @out_edges.each {|e| return e.target if e.symbol==symbol}
294
+ nil
295
+ end
296
+
297
+ #
298
+ # Computes the epsilon closure of this state. Epsilon closure is the set of
299
+ # all states reached from this one with a <tt>eps*</tt> input (sequence of
300
+ # zero or more epsilon symbols). The current state is always contained in
301
+ # the epsilon closure. Returns an unsorted array without duplicates; this
302
+ # array may not be modified.
303
+ #
304
+ def epsilon_closure()
305
+ @epsilon_closure ||= compute_epsilon_closure(Set.new).to_a.freeze
306
+ end
307
+
308
+ #
309
+ # Internal implementation of epsilon_closure. _result_ is expected to be
310
+ # a Set instance, is modified and is the returned value.
311
+ #
312
+ def compute_epsilon_closure(result)
313
+ result << self
314
+ step(nil).each do |t|
315
+ t.compute_epsilon_closure(result) unless result.include?(t)
316
+ end
317
+ raise if result.nil?
318
+ return result
319
+ end
320
+
321
+ #
322
+ # Computes an array representing the set of states that can be reached from
323
+ # this state with a given input _symbol_. Returned array does not contain
324
+ # duplicates and may be modified. No particular ordering of states in the
325
+ # array is guaranteed.
326
+ #
327
+ # This method is epsilon symbol aware (represented with nil) on non
328
+ # deterministic automata, meaning that it actually computes the set of
329
+ # reachable states through strings respecting the <tt>eps* symbol eps*</tt>
330
+ # regular expression, where eps is the epsilon symbol.
331
+ #
332
+ def delta(symbol)
333
+ if automaton.deterministic?
334
+ target = dfa_delta(symbol)
335
+ target.nil? ? [] : [target]
336
+ else
337
+ # 1) first compute epsilon closure of self
338
+ at_epsilon = epsilon_closure
339
+
340
+ # 2) now, look where we can go from there
341
+ at_espilon_then_symbol = at_epsilon.collect do |s|
342
+ s.step(symbol)
343
+ end.flatten.uniq
344
+
345
+ # 3) look where we can go from there using epsilon
346
+ result = at_espilon_then_symbol.collect do |s|
347
+ s.epsilon_closure
348
+ end.flatten.uniq
349
+
350
+ # return result as an array
351
+ result
352
+ end
353
+ end
354
+
355
+ #
356
+ # Returns the target state that can be reached from this state with _symbol_
357
+ # input. Returns nil if no such state exists.
358
+ #
359
+ # This method is expected to be used on deterministic automata. Unlike delta,
360
+ # it returns a State instance (or nil), not an array of states. When used on
361
+ # non deterministic automata, it returns a state immediately reachable from
362
+ # this state with _symbol_ input, or nil if no such state exists. This
363
+ # method is not epsilon aware.
364
+ #
365
+ def dfa_delta(symbol)
366
+ return nil if symbol.nil?
367
+ @out_edges.each {|e| return e.target if e.symbol==symbol}
368
+ return nil
369
+ end
370
+
371
+ #
372
+ # Provides comparator of states, based on the index in the automaton state
373
+ # list. This method returns nil unless _o_ is a State from the same
374
+ # automaton than self.
375
+ #
376
+ def <=>(o)
377
+ return nil unless State===o
378
+ return nil unless automaton===o.automaton
379
+ return index <=> o.index
380
+ end
381
+
382
+ # Returns a string representation
383
+ def inspect
384
+ 's' << @index.to_s
385
+ end
386
+
387
+ # Returns a string representation
388
+ def to_s
389
+ 's' << @index.to_s
390
+ end
391
+
392
+ ### protected write section ################################################
393
+ protected
394
+
395
+ # Changes the index of this state in the state list. This method is only
396
+ # expected to be used by the automaton itself.
397
+ def index=(i) @index=i end
398
+
399
+ #
400
+ # Fired by Loaded when a user data is changed. The message is forwarded to
401
+ # the automaton.
402
+ #
403
+ def state_changed(what, description)
404
+ @epsilon_closure = nil
405
+ @automaton.send(:state_changed, what, description)
406
+ end
407
+
408
+ # Adds an incoming edge to the state.
409
+ def add_incoming_edge(edge)
410
+ @epsilon_closure = nil
411
+ @in_edges << edge
412
+ end
413
+
414
+ # Adds an outgoing edge to the state.
415
+ def add_outgoing_edge(edge)
416
+ @epsilon_closure = nil
417
+ @out_edges << edge
418
+ end
419
+
420
+ # Adds an incoming edge to the state.
421
+ def drop_incoming_edge(edge)
422
+ @epsilon_closure = nil
423
+ @in_edges.delete(edge)
424
+ end
425
+
426
+ # Adds an outgoing edge to the state.
427
+ def drop_outgoing_edge(edge)
428
+ @epsilon_closure = nil
429
+ @out_edges.delete(edge)
430
+ end
431
+
432
+ protected :compute_epsilon_closure
433
+ end
434
+
435
+ #
436
+ # Automaton edge.
437
+ #
438
+ class Edge
439
+ include Stamina::Markable
440
+ attr_reader :automaton, :index, :from, :to
441
+
442
+ #
443
+ # Creates an edge.
444
+ #
445
+ # Arguments:
446
+ # - automaton: parent automaton of the edge.
447
+ # - index: index of the edge in the edge list.
448
+ # - data: user data attached to this edge.
449
+ # - from: source state of the edge.
450
+ # - to: target state of the edge.
451
+ #
452
+ def initialize(automaton, index, data, from, to)
453
+ @automaton, @index = automaton, index
454
+ @data = data
455
+ @from, @to = from, to
456
+ end
457
+
458
+ # Returns edge symbol.
459
+ def symbol() @data[:symbol] end
460
+
461
+ # Sets edge symbol.
462
+ def symbol=(symbol) @data[:symbol]=symbol end
463
+
464
+ alias :source :from
465
+ alias :target :to
466
+
467
+ #
468
+ # Provides comparator of edges, based on the index in the automaton edge
469
+ # list. This method returns nil unless _o_ is an Edge from the same
470
+ # automaton than self.
471
+ # Once again, this method has nothing to do with equality, it looks at an
472
+ # index and ID only.
473
+ #
474
+ def <=>(o)
475
+ return nil unless Edge===o
476
+ return nil unless automaton===o.automaton
477
+ return index <=> o.index
478
+ end
479
+
480
+ # Returns a string representation
481
+ def inspect
482
+ 'e' << @index.to_s
483
+ end
484
+
485
+ # Returns a string representation
486
+ def to_s
487
+ 'e' << @index.to_s
488
+ end
489
+
490
+ ### protected write section ################################################
491
+ protected
492
+
493
+ # Changes the index of this edge in the edge list. This method is only
494
+ # expected to be used by the automaton itself.
495
+ def index=(i) @index=i end
496
+
497
+ #
498
+ # Fired by Loaded when a user data is changed. The message if forwarded to
499
+ # the automaton.
500
+ #
501
+ def state_changed(what, infos)
502
+ @automaton.send(:state_changed, what, infos)
503
+ end
504
+
505
+ end
506
+
507
+ ### Automaton class ##########################################################
508
+ public
509
+
510
+ # State list and edge list of the automaton
511
+ attr_reader :states, :edges
512
+
513
+ #
514
+ # Creates an empty automaton and executes the block passed as argument. The _onself_
515
+ # argument dictates the way _block_ is executed:
516
+ # - when set to false, the block is executed traditionnally (i.e. using yield).
517
+ # In this case, methods invocations must be performed on the automaton object
518
+ # passed as block argument.
519
+ # - when set to _true_ (by default) the block is executed in the context of the
520
+ # automaton itself (i.e. with instance_eval), allowing call of its methods
521
+ # without prefixing them by the automaton variable. The automaton still
522
+ # passes itself as first block argument. Note that in this case, you won't be
523
+ # able to invoke a method defined in the scope of your block.
524
+ #
525
+ # Example:
526
+ # # The DRY way to do:
527
+ # Automaton.new do |automaton| # automaton will not be used here, but it is passed
528
+ # add_state(:initial => true)
529
+ # add_state(:accepting => true)
530
+ # connect(0, 1, 'a')
531
+ # connect(1, 0, 'b')
532
+ #
533
+ # # method_in_caller_scope() # commented because not allowed here !!
534
+ # end
535
+ #
536
+ # # The other way:
537
+ # Automaton.new(false) do |automaton| # automaton MUST be used here
538
+ # automaton.add_state(:initial => true)
539
+ # automaton.add_state(:accepting => true)
540
+ # automaton.connect(0, 1, 'a')
541
+ # automaton.connect(1, 0, 'b')
542
+ #
543
+ # method_in_caller_scope() # allowed in this variant !!
544
+ # end
545
+ #
546
+ def initialize(onself=true, &block) # :yields: automaton
547
+ @states = []
548
+ @edges = []
549
+ @initials = nil
550
+ @alphabet = nil
551
+ @deterministic = nil
552
+
553
+ # if there's a block, execute it now!
554
+ if block_given?
555
+ if onself
556
+ if RUBY_VERSION >= "1.9.0"
557
+ instance_exec(self, &block)
558
+ else
559
+ instance_eval(&block)
560
+ end
561
+ else
562
+ block.call(self)
563
+ end
564
+ end
565
+ end
566
+
567
+ ### public read-only section #################################################
568
+ public
569
+
570
+ #
571
+ # Returns a symbols comparator taking epsilon symbols into account. Comparator
572
+ # is provided as Proc instance which is a lambda function.
573
+ #
574
+ def symbols_comparator
575
+ @symbols_comparator ||= Kernel.lambda do |a,b|
576
+ if a==b then 0
577
+ elsif a.nil? then -1
578
+ elsif b.nil? then 1
579
+ else a <=> b
580
+ end
581
+ end
582
+ end
583
+
584
+ # Returns the number of states
585
+ def state_count() @states.size end
586
+
587
+ # Returns the number of edges
588
+ def edge_count() @edges.size end
589
+
590
+ #
591
+ # Returns the i-th state of the state list.
592
+ #
593
+ # Raises:
594
+ # - ArgumentError unless i is an Integer
595
+ # - IndexError if i is not in [0..state_count)
596
+ #
597
+ def ith_state(i)
598
+ raise(ArgumentError, "Integer expected, #{i} found.", caller)\
599
+ unless Integer === i
600
+ raise(ArgumentError, "Invalid state index #{i}", caller)\
601
+ unless i>=0 and i<state_count
602
+ @states[i]
603
+ end
604
+
605
+ #
606
+ # Returns state associated with the supplied state name, throws an exception if no such state can be found.
607
+ #
608
+ def get_state(name)
609
+ raise(ArgumentError, "String expected, #{name} found.", caller)\
610
+ unless String === name
611
+ result = states.find do |s|
612
+ name == s[:name]
613
+ end
614
+ raise(ArgumentError, "State #{name} was not found", caller)\
615
+ if result.nil?
616
+ result
617
+ end
618
+
619
+ #
620
+ # Returns the i-th states of the state list.
621
+ #
622
+ # Raises:
623
+ # - ArgumentError unless all _i_ are integers
624
+ # - IndexError unless all _i_ are in [0..state_count)
625
+ #
626
+ def ith_states(*i)
627
+ i.collect{|j| ith_state(j)}
628
+ end
629
+
630
+ #
631
+ # Returns the i-th edge of the edge list.
632
+ #
633
+ # Raises:
634
+ # - ArgumentError unless i is an Integer
635
+ # - IndexError if i is not in [0..state_count)
636
+ #
637
+ def ith_edge(i)
638
+ raise(ArgumentError, "Integer expected, #{i} found.", caller)\
639
+ unless Integer === i
640
+ raise(ArgumentError, "Invalid edge index #{i}", caller)\
641
+ unless i>=0 and i<edge_count
642
+ @edges[i]
643
+ end
644
+
645
+ #
646
+ # Returns the i-th edges of the edge list.
647
+ #
648
+ # Raises:
649
+ # - ArgumentError unless all _i_ are integers
650
+ # - IndexError unless all _i_ are in [0..edge_count)
651
+ #
652
+ def ith_edges(*i)
653
+ i.collect{|j| ith_edge(j)}
654
+ end
655
+
656
+ #
657
+ # Calls block for each state of the automaton state list. States are
658
+ # enumerated in index order.
659
+ #
660
+ def each_state() @states.each {|s| yield s if block_given?} end
661
+
662
+ #
663
+ # Calls block for each edge of the automaton edge list. Edges are
664
+ # enumerated in index order.
665
+ #
666
+ def each_edge() @edges.each {|e| yield e if block_given?} end
667
+
668
+ #
669
+ # Returns an array with incoming edges of _state_. Edges are sorted by symbols
670
+ # if _sorted_ is set to true. If two incoming edges have same symbol, no
671
+ # order is guaranteed between them. Returned array may be modified.
672
+ #
673
+ # If _state_ is an Integer, this method returns the incoming edges of the
674
+ # state'th state in the state list.
675
+ #
676
+ # Raises:
677
+ # - IndexError if state is an Integer and state<0 or state>=state_count.
678
+ # - ArgumentError if _state_ is not a valid state for this automaton.
679
+ #
680
+ def in_edges(state, sorted=false) to_state(state).in_edges(sorted) end
681
+
682
+ #
683
+ # Returns an array with outgoing edges of _state_. Edges are sorted by symbols
684
+ # if _sorted_ is set to true. If two incoming edges have same symbol, no
685
+ # order is guaranteed between them. Returned array may be modified.
686
+ #
687
+ # If _state_ is an Integer, this method returns the outgoing edges of the
688
+ # state'th state in the state list.
689
+ #
690
+ # Raises:
691
+ # - IndexError if state is an Integer and state<0 or state>=state_count.
692
+ # - ArgumentError if state is not a valid state (not a state or not from this
693
+ # automaton)
694
+ #
695
+ def out_edges(state, sorted=false) to_state(state).out_edges(sorted) end
696
+
697
+ #
698
+ # Returns an array with the different symbols appearing on incoming edges of
699
+ # _state_. Returned array does not contain duplicates and may be modified;
700
+ # it is sorted if _sorted_ is set to true.
701
+ #
702
+ # If _state_ is an Integer, this method returns the incoming symbols of the
703
+ # state'th state in the state list.
704
+ #
705
+ # Raises:
706
+ # - IndexError if state is an Integer and state<0 or state>=state_count.
707
+ # - ArgumentError if _state_ is not a valid state for this automaton.
708
+ #
709
+ def in_symbols(state, sorted=false) to_state(state).in_symbols(sorted) end
710
+
711
+ #
712
+ # Returns an array with the different symbols appearing on outgoing edges of
713
+ # _state_. Returned array does not contain duplicates and may be modified;
714
+ # it is sorted if _sorted_ is set to true.
715
+ #
716
+ # If _state_ is an Integer, this method returns the outgoing symbols of the
717
+ # state'th state in the state list.
718
+ #
719
+ # Raises:
720
+ # - IndexError if state is an Integer and state<0 or state>=state_count.
721
+ # - ArgumentError if state is not a valid state (not a state or not from this
722
+ # automaton)
723
+ #
724
+ def out_symbols(state, sorted=false) to_state(state).out_symbols(sorted) end
725
+
726
+ #
727
+ # Returns an array with adjacent states (along incoming and outgoing edges)
728
+ # of _state_. Returned array does not contain duplicates; it may be modified.
729
+ #
730
+ # If _state_ is an Integer, this method returns the adjacent states of the
731
+ # state'th state in the state list.
732
+ #
733
+ # Raises:
734
+ # - IndexError if state is an Integer and state<0 or state>=state_count.
735
+ # - ArgumentError if state is not a valid state (not a state or not from this
736
+ # automaton)
737
+ #
738
+ def adjacent_states(state) to_state(state).adjacent_states() end
739
+
740
+ #
741
+ # Returns an array with adjacent states (along incoming edges) of _state_.
742
+ # Returned array does not contain duplicates; it may be modified.
743
+ #
744
+ # If _state_ is an Integer, this method returns the incoming adjacent states
745
+ # of the state'th state in the state list.
746
+ #
747
+ # Raises:
748
+ # - IndexError if state is an Integer and state<0 or state>=state_count.
749
+ # - ArgumentError if state is not a valid state (not a state or not from this
750
+ # automaton)
751
+ #
752
+ def in_adjacent_states(state) to_state(state).in_adjacent_states() end
753
+
754
+ #
755
+ # Returns an array with adjacent states (along outgoing edges) of _state_.
756
+ # Returned array does not contain duplicates; it may be modified.
757
+ #
758
+ # If _state_ is an Integer, this method returns the outgoing adjacent states
759
+ # of the state'th state in the state list.
760
+ #
761
+ # Raises:
762
+ # - IndexError if state is an Integer and state<0 or state>=state_count.
763
+ # - ArgumentError if state is not a valid state (not a state or not from this
764
+ # automaton)
765
+ #
766
+ def out_adjacent_states(state) to_state(state).out_adjacent_states() end
767
+
768
+ #
769
+ # Collects all initial states of this Automaton and returns it. Returned array
770
+ # does not contain duplicates and may be modified.
771
+ #
772
+ # This method is epsilon symbol aware (represented with nil) on
773
+ # non-deterministic automata, meaning that it actually computes the set of
774
+ # reachable states from an initial state through strings respecting the
775
+ # <tt>eps*</tt> regular expression, where eps is the epsilon symbol.
776
+ #
777
+ def initial_states
778
+ @initials = compute_initial_states if @initials.nil? or @initials.empty?
779
+ @initials
780
+ end
781
+
782
+ #
783
+ # Returns the initial state of the automaton. This method is expected to used
784
+ # on deterministic automata only. Unlike initial_states, it returns one State
785
+ # instance instead of an Array.
786
+ #
787
+ # When used with a non deterministic automaton, it returns one of the states
788
+ # tagged as initial. Which one is returned must be considered a non
789
+ # deterministic choice. This method is not epsilon symbol aware.
790
+ #
791
+ def initial_state
792
+ initial_states[0]
793
+ end
794
+
795
+ # Internal implementation of initial_states.
796
+ def compute_initial_states()
797
+ initials = @states.select {|s| s.initial?}
798
+ initials.collect{|s| s.epsilon_closure}.flatten.uniq
799
+ end
800
+
801
+ ### public write section #####################################################
802
+ public
803
+
804
+ #
805
+ # Adds a new state.
806
+ #
807
+ # Arguments:
808
+ # - data: user-data to attach to the state (see Automaton documentation).
809
+ #
810
+ # Raises:
811
+ # - ArgumentError if _data_ is not a valid state data.
812
+ #
813
+ def add_state(data={})
814
+ data = to_valid_state_data(data)
815
+
816
+ # create new state, add it to state-list
817
+ state = State.new(self, state_count, data)
818
+ @states << state
819
+
820
+ # let the automaton know that something has changed
821
+ state_changed(:state_added, state)
822
+
823
+ # return created state
824
+ state
825
+ end
826
+ alias :create_state :add_state
827
+
828
+ #
829
+ # Adds _n_ new states in the automaton. Created states are returned as an
830
+ # ordered array (order of states according to their index in state list).
831
+ #
832
+ # _data_ is duplicated for each created state.
833
+ #
834
+ def add_n_states(n, data={})
835
+ created = []
836
+ n.times do |i|
837
+ created << add_state(data.dup)
838
+ end
839
+ created
840
+ end
841
+ alias :create_n_states :add_n_states
842
+
843
+ #
844
+ # Adds a new edge, connecting _from_ and _to_ states of the automaton.
845
+ #
846
+ # Arguments:
847
+ # - from: either a State or a valid state index (Integer).
848
+ # - to: either a State or a valid state index (Integer).
849
+ # - data: user data to attach to the created edge (see Automaton documentation).
850
+ #
851
+ # Raises:
852
+ # - IndexError if _from_ is an Integer but not in [0..state_count)
853
+ # - IndexError if _to_ is an Integer but not in [0..state_count)
854
+ # - ArgumentError if _from_ is not a valid state for this automaton.
855
+ # - ArgumentError if _to_ is not a valid state for this automaton.
856
+ # - ArgumentError if _data_ is not a valid edge data.
857
+ #
858
+ def add_edge(from, to, data)
859
+ from, to, data = to_state(from), to_state(to), to_valid_edge_data(data)
860
+
861
+ # create edge, install it, add it to edge-list
862
+ edge = Edge.new(self, edge_count, data, from, to)
863
+ @edges << edge
864
+ from.send(:add_outgoing_edge, edge)
865
+ to.send(:add_incoming_edge, edge)
866
+
867
+ # let automaton know that something has changed
868
+ state_changed(:edge_added, edge)
869
+
870
+ # return created edge
871
+ edge
872
+ end
873
+ alias :create_edge :add_edge
874
+ alias :connect :add_edge
875
+
876
+ # Adds all states and transitions (as copies) from a different automaton.
877
+ # Returns the initial state of the added part. In order to ensure that names of
878
+ # the new states do not clash with names of existing states, state names may have
879
+ # to be removed from added states; this is the case if _clear_names_ is set to true.
880
+ # None of the added states are made initial.
881
+ def add_automaton(what,clear_names=true)
882
+ map_what_self = {}
883
+ what.states.each do |state|
884
+ map_what_self[state]=add_state(state.data)
885
+ map_what_self[state][:name]=nil if clear_names
886
+ map_what_self[state][:initial]=false
887
+ end
888
+ what.edges.each do |edge|
889
+ add_edge(map_what_self[edge.from],map_what_self[edge.to],edge.data)
890
+ end
891
+ map_what_self[what.initial_state]
892
+ end
893
+
894
+ # Constructs a replica of this automaton and returns a copy.
895
+ # This copy can be modified in whatever way without affecting the original
896
+ # automaton.
897
+ def dup
898
+ Automaton.new(false) do |fa|
899
+ initial = fa.add_automaton(self,false)
900
+ initial[:initial] = true unless initial.nil?
901
+ end
902
+ end
903
+
904
+ #
905
+ # Drops a state of the automaton, as well as all connected edges to that state.
906
+ # If _state_ is an integer, the state-th state of the state list is removed.
907
+ # This method returns the automaton itself.
908
+ #
909
+ # Raises:
910
+ # - IndexError if _edge_ is an Integer but not in [0..edge_count)
911
+ # - ArgumentError if _edge_ is not a valid edge for this automaton.
912
+ #
913
+ def drop_state(state)
914
+ state = to_state(state)
915
+ # remove edges first: drop_edges ensures that edge list is coherent
916
+ drop_edges(*(state.in_edges + state.out_edges).uniq)
917
+
918
+ # remove state now and renumber
919
+ @states.delete_at(state.index)
920
+ state.index.upto(state_count-1) do |i|
921
+ @states[i].send(:index=, i)
922
+ end
923
+ state.send(:index=, -1)
924
+
925
+ state_changed(:state_dropped, state)
926
+ self
927
+ end
928
+ alias :delete_state :drop_state
929
+
930
+ #
931
+ # Drops all states passed as parameter as well as all their connected edges.
932
+ # Arguments may be state instances, as well as valid state indices. Duplicates
933
+ # are even supported. This method has no effect on the automaton and raises
934
+ # an error if some state argument is not valid.
935
+ #
936
+ # Raises:
937
+ # - ArgumentError if one state in _states_ is not a valid state of this
938
+ # automaton.
939
+ #
940
+ def drop_states(*states)
941
+ # check states first
942
+ states = states.collect{|s| to_state(s)}.uniq.sort
943
+ edges = states.collect{|s| (s.in_edges + s.out_edges).uniq}.flatten.uniq.sort
944
+
945
+ # Remove all edges, we do not use drop_edges to avoid spending too much
946
+ # time reindexing edges. Moreover, we can do it that way because we take
947
+ # edges in reverse indexing order (has been sorted previously)
948
+ until edges.empty?
949
+ edge = edges.pop
950
+ edge.source.send(:drop_outgoing_edge,edge)
951
+ edge.target.send(:drop_incoming_edge,edge)
952
+ @edges.delete_at(edge.index)
953
+ edge.send(:index=, -1)
954
+ state_changed(:edge_dropped, edge)
955
+ end
956
+
957
+ # Remove all states, same kind of hack is used
958
+ until states.empty?
959
+ state = states.pop
960
+ @states.delete_at(state.index)
961
+ state.send(:index=, -1)
962
+ state_changed(:state_dropped, state)
963
+ end
964
+
965
+ # sanitize state and edge lists
966
+ @states.each_with_index {|s,i| s.send(:index=,i)}
967
+ @edges.each_with_index {|e,i| e.send(:index=,i)}
968
+
969
+ self
970
+ end
971
+
972
+ #
973
+ # Drops an edge in the automaton. If _edge_ is an integer, the edge-th edge
974
+ # of the edge list is removed. This method returns the automaton itself.
975
+ #
976
+ # Raises:
977
+ # - IndexError if _edge_ is an Integer but not in [0..edge_count)
978
+ # - ArgumentError if _edge_ is not a valid edge for this automaton.
979
+ #
980
+ def drop_edge(edge)
981
+ edge = to_edge(edge)
982
+ @edges.delete_at(edge.index)
983
+ edge.from.send(:drop_outgoing_edge,edge)
984
+ edge.to.send(:drop_incoming_edge,edge)
985
+ edge.index.upto(edge_count-1) do |i|
986
+ @edges[i].send(:index=, i)
987
+ end
988
+ edge.send(:index=,-1)
989
+ state_changed(:edge_dropped, edge)
990
+ self
991
+ end
992
+ alias :delete_edge :drop_edge
993
+
994
+ #
995
+ # Drops all edges passed as parameters. Arguments may be edge objects,
996
+ # as well as valid edge indices. Duplicates are even supported. This method
997
+ # has no effect on the automaton and raises an error if some edge argument
998
+ # is not valid.
999
+ #
1000
+ # Raises:
1001
+ # - ArgumentError if one edge in _edges_ is not a valid edge of this automaton.
1002
+ #
1003
+ def drop_edges(*edges)
1004
+ # check edges first
1005
+ edges = edges.collect{|e| to_edge(e)}.uniq
1006
+
1007
+ # remove all edges
1008
+ edges.each do |e|
1009
+ @edges.delete(e)
1010
+ e.from.send(:drop_outgoing_edge,e)
1011
+ e.to.send(:drop_incoming_edge,e)
1012
+ e.send(:index=, -1)
1013
+ state_changed(:edge_dropped, e)
1014
+ end
1015
+ @edges.each_with_index do |e,i|
1016
+ e.send(:index=,i)
1017
+ end
1018
+
1019
+ self
1020
+ end
1021
+ alias :delete_edges :drop_edges
1022
+
1023
+ ### protected section ########################################################
1024
+ protected
1025
+
1026
+ #
1027
+ # Converts a _state_ argument to a valid State of this automaton.
1028
+ # There are three ways to refer to a state, by position in the internal
1029
+ # collection of states, using an instance of State and using a name of a
1030
+ # state (represented with a String).
1031
+ #
1032
+ # Raises:
1033
+ # - IndexError if state is an Integer and state<0 or state>=state_count.
1034
+ # - ArgumentError if state is not a valid state (not a state or not from this
1035
+ # automaton)
1036
+ #
1037
+ def to_state(state)
1038
+ case state
1039
+ when State
1040
+ return state if state.automaton==self and state==@states[state.index]
1041
+ raise ArgumentError, "Not a state of this automaton", caller
1042
+ when Integer
1043
+ return ith_state(state)
1044
+ when String
1045
+ result = get_state(state)
1046
+ return result unless result.nil?
1047
+ end
1048
+ raise ArgumentError, "Invalid state argument #{state}", caller
1049
+ end
1050
+
1051
+ #
1052
+ # Converts an _edge_ argument to a valid Edge of this automaton.
1053
+ #
1054
+ # Raises:
1055
+ # - IndexError if _edge_ is an Integer but not in [0..edge_count)
1056
+ # - ArgumentError if _edge_ is not a valid edge (not a edge or not from this
1057
+ # automaton)
1058
+ #
1059
+ def to_edge(edge)
1060
+ case edge
1061
+ when Edge
1062
+ return edge if edge.automaton==self and edge==@edges[edge.index]
1063
+ raise ArgumentError, "Not an edge of this automaton", caller
1064
+ when Integer
1065
+ return ith_edge(edge)
1066
+ end
1067
+ raise ArgumentError, "Invalid edge argument #{edge}", caller
1068
+ end
1069
+
1070
+ #
1071
+ # Checks if a given user-data contains enough information to be attached to
1072
+ # a given state. Returns the data if ok.
1073
+ #
1074
+ # Raises:
1075
+ # - ArgumentError if data is not considered a valid state data.
1076
+ #
1077
+ def to_valid_state_data(data)
1078
+ raise(ArgumentError,
1079
+ "User data should be an Hash", caller) unless Hash===data
1080
+ data
1081
+ end
1082
+
1083
+ #
1084
+ # Checks if a given user-data contains enough information to be attached to
1085
+ # a given edge. Returns the data if ok.
1086
+ #
1087
+ # Raises:
1088
+ # - ArgumentError if data is not considered a valid edge data.
1089
+ #
1090
+ def to_valid_edge_data(data)
1091
+ return {:symbol => data} if data.nil? or data.is_a?(String)
1092
+ raise(ArgumentError,
1093
+ "User data should be an Hash", caller) unless Hash===data
1094
+ raise(ArgumentError,
1095
+ "User data should contain a :symbol attribute.",
1096
+ caller) unless data.has_key?(:symbol)
1097
+ raise(ArgumentError,
1098
+ "Edge :symbol attribute cannot be an array.",
1099
+ caller) if Array===data[:symbol]
1100
+ data
1101
+ end
1102
+
1103
+ ### public sections with useful utilities ####################################
1104
+ public
1105
+
1106
+ # Returns true if the automaton is deterministic, false otherwise
1107
+ def deterministic?
1108
+ @deterministic = @states.reject{|s| s.deterministic?}.empty? if @deterministic.nil?
1109
+ @deterministic
1110
+ end
1111
+
1112
+ ### public & protected sections about alphabet ###############################
1113
+ protected
1114
+
1115
+ # Deduces the alphabet from the automaton edges.
1116
+ def deduce_alphabet
1117
+ edges.collect{|e| e.symbol}.uniq.compact.sort
1118
+ end
1119
+
1120
+ public
1121
+
1122
+ # Returns the alphabet of the automaton.
1123
+ def alphabet
1124
+ @alphabet || deduce_alphabet
1125
+ end
1126
+
1127
+ # Sets the aphabet of the automaton. _alph_ is expected to be an array without
1128
+ # nil nor duplicated. This method raises an ArgumentError otherwise. Such an
1129
+ # error is also raised if a symbol used on the automaton edges is not included
1130
+ # in _alph_.
1131
+ def alphabet=(alph)
1132
+ raise ArgumentError, "Invalid alphabet" unless alph.uniq.compact.size==alph.size
1133
+ raise ArgumentError, "Invalid alphabet" unless deduce_alphabet.reject{|s| alph.include?(s)}.empty?
1134
+ @alphabet = alph.sort
1135
+ end
1136
+
1137
+ ### public section about dot utilities #######################################
1138
+ protected
1139
+
1140
+ #
1141
+ # Converts a hash of attributes (typically automaton, state or edge attributes)
1142
+ # to a <code>[...]</code> dot string. Braces are part of the output.
1143
+ #
1144
+ def attributes2dot(attrs)
1145
+ buffer = ""
1146
+ attrs.keys.sort{|k1,k2| k1.to_s <=> k2.to_s}.each do |key|
1147
+ buffer << " " unless buffer.empty?
1148
+ value = attrs[key].to_s.gsub('"','\"')
1149
+ buffer << "#{key}=\"#{value}\""
1150
+ end
1151
+ buffer
1152
+ end
1153
+
1154
+ public
1155
+
1156
+ #
1157
+ # Generates a dot output from an automaton. The rewriter block takes
1158
+ # two arguments: the first one is a Markable instance (graph, state or
1159
+ # edge), the second one indicates which kind of element is passed (through
1160
+ # :automaton, :state or :edge symbol). The rewriter is expected to return a
1161
+ # hash-like object providing dot attributes for the element.
1162
+ #
1163
+ # When no rewriter is provided, a default one is used by default, providing
1164
+ # the following behavior:
1165
+ # - on :automaton
1166
+ #
1167
+ # {:rankdir => "LR"}
1168
+ #
1169
+ # - on :state
1170
+ #
1171
+ # {:shape => "doublecircle/circle" (following accepting?),
1172
+ # :style => "filled",
1173
+ # :fillcolor => "green/red/white" (if initial?/error?/else, respectively)}
1174
+ #
1175
+ # - on edge
1176
+ #
1177
+ # {:label => "#{edge.symbol}"}
1178
+ #
1179
+ def to_dot(&rewriter)
1180
+ unless rewriter
1181
+ to_dot do |elm, kind|
1182
+ case kind
1183
+ when :automaton
1184
+ {:rankdir => "LR"}
1185
+ when :state
1186
+ {:shape => (elm.accepting? ? "doublecircle" : "circle"),
1187
+ :style => "filled",
1188
+ :color => "black",
1189
+ :fillcolor => (elm.initial? ? "green" : (elm.error? ? "red" : "white"))}
1190
+ when :edge
1191
+ {:label => elm.symbol.nil? ? '' : elm.symbol.to_s}
1192
+ end
1193
+ end
1194
+ else
1195
+ buffer = "digraph G {\n"
1196
+ attrs = attributes2dot(rewriter.call(self, :automaton))
1197
+ buffer << " graph [#{attrs}];\n"
1198
+ states.each do |s|
1199
+ attrs = attributes2dot(rewriter.call(s, :state))
1200
+ buffer << " #{s.index} [#{attrs}];\n"
1201
+ end
1202
+ edges.each do |e|
1203
+ attrs = attributes2dot(rewriter.call(e, :edge))
1204
+ buffer << " #{e.source.index} -> #{e.target.index} [#{attrs}];\n"
1205
+ end
1206
+ buffer << "}\n"
1207
+ end
1208
+ end
1209
+
1210
+ ### public section about reordering ##########################################
1211
+ public
1212
+
1213
+ # Uses a comparator block to reorder the state list.
1214
+ def order_states(&block)
1215
+ raise ArgumentError, "A comparator block must be given" unless block_given?
1216
+ raise ArgumentError, "A comparator block of arity 2 must be given" unless block.arity==2
1217
+ @states.sort!(&block)
1218
+ @states.each_with_index{|s,i| s.send(:index=, i)}
1219
+ self
1220
+ end
1221
+
1222
+ ### protected section about changes ##########################################
1223
+ protected
1224
+
1225
+ #
1226
+ # Fires by write method when an automaton change occurs.
1227
+ #
1228
+ def state_changed(what, infos)
1229
+ @initials = nil
1230
+ @deterministic = nil
1231
+ end
1232
+
1233
+ protected :compute_initial_states
1234
+ end # class Automaton
1235
+
1236
+ end # module Stamina
1237
+ require 'stamina/automaton/walking'