stamina-induction 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. data/CHANGELOG.md +78 -0
  2. data/LICENCE.md +22 -0
  3. data/lib/stamina-induction/stamina-induction.rb +1 -0
  4. data/lib/stamina-induction/stamina/abbadingo.rb +2 -0
  5. data/lib/stamina-induction/stamina/abbadingo/random_dfa.rb +55 -0
  6. data/lib/stamina-induction/stamina/abbadingo/random_sample.rb +146 -0
  7. data/lib/stamina-induction/stamina/classifier.rb +55 -0
  8. data/lib/stamina-induction/stamina/command.rb +6 -0
  9. data/lib/stamina-induction/stamina/command/abbadingo_dfa.rb +80 -0
  10. data/lib/stamina-induction/stamina/command/abbadingo_samples.rb +39 -0
  11. data/lib/stamina-induction/stamina/command/classify.rb +47 -0
  12. data/lib/stamina-induction/stamina/command/infer.rb +140 -0
  13. data/lib/stamina-induction/stamina/command/metrics.rb +50 -0
  14. data/lib/stamina-induction/stamina/command/score.rb +34 -0
  15. data/lib/stamina-induction/stamina/dsl.rb +2 -0
  16. data/lib/stamina-induction/stamina/dsl/induction.rb +29 -0
  17. data/lib/stamina-induction/stamina/dsl/reg_lang.rb +69 -0
  18. data/lib/stamina-induction/stamina/induction.rb +13 -0
  19. data/lib/stamina-induction/stamina/induction/blue_fringe.rb +265 -0
  20. data/lib/stamina-induction/stamina/induction/commons.rb +156 -0
  21. data/lib/stamina-induction/stamina/induction/rpni.rb +186 -0
  22. data/lib/stamina-induction/stamina/induction/union_find.rb +377 -0
  23. data/lib/stamina-induction/stamina/input_string.rb +123 -0
  24. data/lib/stamina-induction/stamina/reg_lang.rb +226 -0
  25. data/lib/stamina-induction/stamina/reg_lang/canonical_info.rb +181 -0
  26. data/lib/stamina-induction/stamina/reg_lang/parser.rb +10 -0
  27. data/lib/stamina-induction/stamina/reg_lang/parser/alternative.rb +19 -0
  28. data/lib/stamina-induction/stamina/reg_lang/parser/node.rb +22 -0
  29. data/lib/stamina-induction/stamina/reg_lang/parser/parenthesized.rb +12 -0
  30. data/lib/stamina-induction/stamina/reg_lang/parser/parser.citrus +49 -0
  31. data/lib/stamina-induction/stamina/reg_lang/parser/plus.rb +14 -0
  32. data/lib/stamina-induction/stamina/reg_lang/parser/question.rb +17 -0
  33. data/lib/stamina-induction/stamina/reg_lang/parser/regexp.rb +12 -0
  34. data/lib/stamina-induction/stamina/reg_lang/parser/sequence.rb +15 -0
  35. data/lib/stamina-induction/stamina/reg_lang/parser/star.rb +15 -0
  36. data/lib/stamina-induction/stamina/reg_lang/parser/symbol.rb +14 -0
  37. data/lib/stamina-induction/stamina/sample.rb +309 -0
  38. data/lib/stamina-induction/stamina/scoring.rb +213 -0
  39. metadata +106 -0
@@ -0,0 +1,156 @@
1
+ module Stamina
2
+ module Induction
3
+
4
+ #
5
+ # Defines common utilities used by rpni and blue_fringe. About acronyms:
6
+ # - _pta_ stands for Prefix Tree Acceptor
7
+ # - _ufds_ stands for Union-Find Data Structure
8
+ #
9
+ # Methods pta2ufds and sample2ufds are simply conversion methods used when the induction
10
+ # algorithm starts (executed on a sample, it first built a pta then convert it to a union
11
+ # find). Method ufds2dfa is used when the algorithm ends, to convert refined union find to
12
+ # a dfa.
13
+ #
14
+ # The merge_user_data method is probably the most important as it actually computes
15
+ # the merging of two states and build information about merging for determinization.
16
+ #
17
+ module Commons
18
+
19
+ DEFAULT_OPTIONS = {
20
+ :verbose => false,
21
+ :verbose_io => $stderr
22
+ }
23
+
24
+ # Additional options of the algorithm
25
+ attr_reader :options
26
+
27
+ # Is the verbose mode on ?
28
+ def verbose?
29
+ @verbose ||= !!options[:verbose]
30
+ end
31
+
32
+ def verbose_io
33
+ @verbose_io ||= options[:verbose_io] || $stderr
34
+ end
35
+
36
+ # Display an information message (when verbose)
37
+ def info(msg)
38
+ if verbose?
39
+ verbose_io << msg << "\n"
40
+ verbose_io.flush
41
+ end
42
+ end
43
+
44
+ #
45
+ # Factors and returns a UnionFind data structure from a PTA, keeping natural order
46
+ # of its states for union-find elements. The resulting UnionFind contains a Hash as
47
+ # mergeable user data, presenting the following keys:
48
+ # - :initial, :accepting and :error flags of each state
49
+ # - :master indicating the index of the state in the PTA
50
+ # - :delta a delta function through a Hash {symbol => state_index}
51
+ #
52
+ # In this version, other user data attached to PTA states is lost during the
53
+ # conversion.
54
+ #
55
+ def pta2ufds(pta)
56
+ Stamina::Induction::UnionFind.new(pta.state_count) do |i|
57
+ state = pta.ith_state(i)
58
+ data = {:initial => state.initial?,
59
+ :accepting => state.accepting?,
60
+ :error => state.error?,
61
+ :master => i,
62
+ :delta => {}}
63
+ state.out_edges.each {|edge| data[:delta][edge.symbol] = edge.target.index}
64
+ data
65
+ end
66
+ end
67
+
68
+ #
69
+ # Converts a Sample to an (augmented) prefix tree acceptor. This method ensures
70
+ # that the states of the PTA are in lexical order, according to the <code><=></code>
71
+ # operator defined on symbols. States reached by negative strings are tagged as
72
+ # non accepting and error.
73
+ #
74
+ def sample2pta(sample)
75
+ sample.to_pta
76
+ end
77
+
78
+ #
79
+ # Converts a Sample instance to a 'ready to refine' union find data structure.
80
+ # This method is simply a shortcut for <code>pta2ufds(sample2pta(sample))</code>.
81
+ #
82
+ def sample2ufds(sample)
83
+ pta2ufds(sample2pta(sample))
84
+ end
85
+
86
+ #
87
+ # Computes the quotient automaton from a refined UnionFind data structure.
88
+ #
89
+ # In this version, only accepting and initial flags are taken into account
90
+ # when creating quotient automaton states. Other user data is lost during
91
+ # the conversion.
92
+ #
93
+ def ufds2dfa(ufds)
94
+ Automaton.new(false) do |fa|
95
+ mergeable_datas = ufds.mergeable_datas
96
+ mergeable_datas.each do |data|
97
+ state_data = data.reject {|key,value| [:master, :count, :delta].include?(key)}
98
+ state_data[:name] = data[:master].to_s
99
+ state_data[:error] = false
100
+ fa.add_state(state_data)
101
+ end
102
+ mergeable_datas.each do |data|
103
+ source = fa.get_state(data[:master].to_s)
104
+ data[:delta].each_pair do |symbol, target|
105
+ target = fa.get_state(ufds.find(target).to_s)
106
+ fa.connect(source, target, symbol)
107
+ end
108
+ end
109
+ end
110
+ end
111
+
112
+ #
113
+ # Merges two user data hashes _d1_ and _d2_ according to rules defined
114
+ # below. Also fills a _determinization_ array with pairs of state indices
115
+ # that are reached from d1 and d2 through the same symbol and should be
116
+ # merged for determinization. This method does NOT ensure that those pairs
117
+ # correspond to distinguish states according to the union find. In other
118
+ # words state indices in these pairs do not necessarily corespond to master
119
+ # states (see UnionFind for this term).
120
+ #
121
+ # Returns the resulting data if the merge is successful (does not lead to
122
+ # merging an error state with an accepting one), nil otherwise.
123
+ #
124
+ # The merging procedure for the different hash keys is as follows:
125
+ # - result[:initial] = d1[:initial] or d2[:initial]
126
+ # - result[:accepting] = d1[:accepting] or d2[:accepting]
127
+ # - result[:error] = d1[:error] or d2[:error]
128
+ # - result[:master] = min(d1[:master], d2[:master])
129
+ # - result[:delta] = merging of delta hashes, keeping smaller target index
130
+ # on key collisions.
131
+ #
132
+ def merge_user_data(d1, d2, determinization)
133
+ # we compute flags first
134
+ new_data = {:initial => d1[:initial] || d2[:initial],
135
+ :accepting => d1[:accepting] || d2[:accepting],
136
+ :error => d1[:error] || d2[:error],
137
+ :master => d1[:master] < d2[:master] ? d1[:master] : d2[:master]}
138
+
139
+ # merge failure if accepting and error states are merged
140
+ return nil if new_data[:accepting] and new_data[:error]
141
+
142
+ # we recompute the delta function of the resulting state
143
+ # keeping merging for determinization as pairs in _determinization_
144
+ new_data[:delta] = d1[:delta].merge(d2[:delta]) do |symbol, t1, t2|
145
+ determinization << [t1, t2]
146
+ t1 < t2 ? t1 : t2
147
+ end
148
+
149
+ # returns merged data
150
+ new_data
151
+ end
152
+
153
+ end # module Commons
154
+
155
+ end # module Induction
156
+ end # module Stamina
@@ -0,0 +1,186 @@
1
+ module Stamina
2
+ module Induction
3
+
4
+ #
5
+ # Implementation of the standard Regular Positive and Negative Induction (RPNI)
6
+ # algorithm. From a given sample, containing positive and negative strings, RPNI
7
+ # computes the smallest deterministic automaton compatible with the sample.
8
+ #
9
+ # See J. Oncina and P. Garcia, Infering Regular Languages in Polynomial Update
10
+ # Time, In N. Perez de la Blanca, A. Sanfeliu and E. Vidal, editors, Pattern
11
+ # Recognition and Image Analysis, volume 1 of Series in Machines Perception and
12
+ # Artificial Intelligence, pages 49-61, World Scientific, 1992.
13
+ #
14
+ # Example:
15
+ # # sample typically comes from an ADL file
16
+ # sample = Stamina::ADL.parse_sample_file('sample.adl')
17
+ #
18
+ # # let RPNI build the smallest dfa
19
+ # dfa = Stamina::Induction::RPNI.execute(sample, {:verbose => true})
20
+ #
21
+ # Remarks:
22
+ # - Constructor and instance methods of this class are public but not intended
23
+ # to be used directly. They are left public for testing purposes only.
24
+ # - This class intensively uses the Stamina::Induction::UnionFind class and
25
+ # methods defined in the Stamina::Induction::Commons module which are worth
26
+ # reading to understand the algorithm implementation.
27
+ #
28
+ class RPNI
29
+ include Stamina::Induction::Commons
30
+
31
+ # Union-find data structure used internally
32
+ attr_reader :ufds
33
+
34
+ # Creates an algorithm instance with given options.
35
+ def initialize(options={})
36
+ raise ArgumentError, "Invalid options #{options.inspect}" unless options.is_a?(Hash)
37
+ @options = DEFAULT_OPTIONS.merge(options)
38
+ end
39
+
40
+ #
41
+ # Merges a state of rank j with a state of lower rank i. This merge method
42
+ # includes merging for determinization.
43
+ #
44
+ # Preconditions:
45
+ # - States denoted by i and j are expected leader states (non merged ones)
46
+ # - States denoted by i and j are expected to be different
47
+ #
48
+ # Postconditions:
49
+ # - Union find is refined, states i and j having been merged, as well as all
50
+ # state pairs that need to be merged to ensure the deterministic property
51
+ # of the quotient automaton.
52
+ # - If the resulting quotient automaton is consistent with the negative sample,
53
+ # this method returns true and the refined union-find correctly encodes the
54
+ # quotient automaton. Otherwise, the method returns false and the union-find
55
+ # information must be considered inaccurate.
56
+ #
57
+ def merge_and_determinize(i, j)
58
+ # Make the union (keep additional merges to be performed in determinization)
59
+ # and recompute the user data attached to the new state group (new_data)
60
+ determinization = []
61
+ @ufds.union(i, j) do |d1, d2|
62
+ new_data = merge_user_data(d1, d2, determinization)
63
+ return false unless new_data
64
+ new_data
65
+ end
66
+
67
+ # Merge for determinization
68
+ determinization.each do |pair|
69
+ # we take the leader states of the pair to merge
70
+ pair = pair.collect{|i| @ufds.find(i)}
71
+ # do nothing if already the same leader state
72
+ next if pair[0]==pair[1]
73
+ # otherwise recurse or fail
74
+ return false unless merge_and_determinize(pair[0], pair[1])
75
+ end
76
+
77
+ # Everything seems ok!
78
+ true
79
+ end
80
+
81
+ #
82
+ # Makes a complete merge (including determinization), or simply do nothing if
83
+ # it leads accepting a negative string.
84
+ #
85
+ # Preconditions:
86
+ # - States denoted by i and j are expected leader states (non merged ones)
87
+ # - States denoted by i and j are expected to be different
88
+ #
89
+ # Postconditions:
90
+ # - Union find is refined, states i and j having been merged, as well as all
91
+ # state pairs that need to be merged to ensure the deterministic property
92
+ # of the quotient automaton.
93
+ # - If the resulting quotient automaton is consistent with the negative sample,
94
+ # this method returns true and the refined union-find correctly encodes the
95
+ # quotient automaton. Otherwise, the union find has not been changed.
96
+ #
97
+ def successfull_merge_or_nothing(i,j)
98
+ # try a merge and determinize inside a transaction on the ufds
99
+ @ufds.transactional do
100
+ merge_and_determinize(i, j)
101
+ end
102
+ end
103
+
104
+ #
105
+ # Main method of the algorithm. Refines the union find passed as first argument
106
+ # by merging well chosen state pairs. Returns the refined union find.
107
+ #
108
+ # Preconditions:
109
+ # - The union find _ufds_ is correctly initialized (contains :initial, :accepting,
110
+ # and :error boolean flags as well as a :delta sub hash)
111
+ #
112
+ # Postconditions:
113
+ # - The union find has been refined. It encodes a quotient automaton (of the PTA
114
+ # it comes from) such that all positive and negative strings of the underlying
115
+ # sample are correctly classified by it.
116
+ #
117
+ def main(ufds)
118
+ @ufds = ufds
119
+ info("Starting RPNI (#{@ufds.size} states)")
120
+ # First loop, iterating all PTA states
121
+ (1...@ufds.size).each do |i|
122
+ # we ignore those that have been previously merged
123
+ next if @ufds.slave?(i)
124
+ # second loop: states of lower rank, with ignore
125
+ (0...i).each do |j|
126
+ next if @ufds.slave?(j)
127
+ # try to merge this pair, including determinization
128
+ # simply break the loop if it works!
129
+ success = successfull_merge_or_nothing(i,j)
130
+ if success
131
+ info("#{i} and #{j} successfully merged")
132
+ break
133
+ end
134
+ end # j loop
135
+ end # i loop
136
+ @ufds
137
+ end
138
+
139
+ #
140
+ # Build the smallest DFA compatible with the sample given as input.
141
+ #
142
+ # Preconditions:
143
+ # - The sample is consistent (does not contains the same string both labeled as
144
+ # positive and negative) and contains at least one string.
145
+ #
146
+ # Postconditions:
147
+ # - The returned DFA is the smallest DFA that correctly labels the learning sample
148
+ # given as input.
149
+ #
150
+ # Remarks:
151
+ # - This instance version of RPNI.execute is not intended to be used directly and
152
+ # is mainly provided for testing purposes. Please use the class variant of this
153
+ # method if possible.
154
+ #
155
+ def execute(sample)
156
+ # create union-find
157
+ info("Creating PTA and UnionFind structure")
158
+ ufds = sample2ufds(sample)
159
+ # refine it
160
+ ufds = main(ufds)
161
+ # compute and return quotient automaton
162
+ ufds2dfa(ufds)
163
+ end
164
+
165
+ #
166
+ # Build the smallest DFA compatible with the sample given as input.
167
+ #
168
+ # Options (the _options_ hash):
169
+ # - :verbose can be set to true to trace algorithm execution on standard output.
170
+ #
171
+ # Preconditions:
172
+ # - The sample is consistent (does not contains the same string both labeled as
173
+ # positive and negative) and contains at least one string.
174
+ #
175
+ # Postconditions:
176
+ # - The returned DFA is the smallest DFA that correctly labels the learning sample
177
+ # given as input.
178
+ #
179
+ def self.execute(sample, options={})
180
+ RPNI.new(options).execute(sample)
181
+ end
182
+
183
+ end # class RPNI
184
+
185
+ end # module Induction
186
+ end # module Stamina
@@ -0,0 +1,377 @@
1
+ module Stamina
2
+ module Induction
3
+
4
+ #
5
+ # Implements an UnionFind data structure dedicated to state merging induction algorithms.
6
+ # For this purpose, this union-find handles mergeable user data as well as transactional
7
+ # support. See Stamina::Induction::Commons about the usage of this class (and mergeable
8
+ # user data in particular) by induction algorithms.
9
+ #
10
+ # == Example (probably easier than a long explanation)
11
+ #
12
+ # # create a union-find for 10 elements
13
+ # ufds = Stamina::Induction::UnionFind.new(10) do |index|
14
+ # # each element will be associated with a hash with data of interest:
15
+ # # smallest element, greatest element and concatenation of names
16
+ # {:smallest => index, :greatest => index, :names => index.to_s}
17
+ # end
18
+ #
19
+ # # each element is its own leader
20
+ # puts (0...10).all?{|s| ufds.leader?(s)} -> true
21
+ #
22
+ # # and their respective group number are the element indices themselve
23
+ # puts ufds.to_a -> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
24
+ #
25
+ # # now, let merge 4 with 0
26
+ # ufds.union(0, 4) do |d0, d4|
27
+ # {:smallest => d0[:smallest] < d4[:smallest] ? d0[:smallest] : d4[:smallest],
28
+ # :greatest => d0[:smallest] > d4[:smallest] ? d0[:smallest] : d4[:smallest],
29
+ # :names => d0[:names] + " " + d4[:names]}
30
+ # end
31
+ #
32
+ # # let see what happens on group numbers
33
+ # puts ufds.to_a -> [0, 1, 2, 3, 0, 5, 6, 7, 8, 9]
34
+ #
35
+ # # let now have a look on mergeable_data of the group of 0 (same result for 4)
36
+ # puts ufds.mergeable_data(0).inspect -> {:smallest => 0, :greatest => 4, :names => "0 4"}
37
+ #
38
+ # == Basic Union Find API
39
+ #
40
+ # A UnionFind data structure typically allows encoding a partition of elements (a
41
+ # partition is a collection of disjoint sets - aka a collection of groups). Basically,
42
+ # this class represents elements by successive indices (from 0 to size, the later being
43
+ # excluded). The partitioning information is kept in a array, associating a group number
44
+ # to each element. This group number is simply the index of the least element in the
45
+ # group (which means that group numbers are not necessarily consecutive). For example,
46
+ # the following arrays maps to the associated partitions:
47
+ #
48
+ # [0, 1, 2, 3, 4, 5] -> {{0}, {1}, {2}, {3}, {4}}
49
+ # [0, 0, 0, 0, 0, 0] -> {{0, 1, 2, 3, 4, 5}}
50
+ # [0, 1, 1, 0, 4, 4] -> {{0, 3}, {1, 2}, {5, 5}}
51
+ #
52
+ # The API of this basic union-find data structure is composed of the following
53
+ # methods:
54
+ # - new(size) (class method): builds an initial partition information over _size_
55
+ # elements. This initial partition keeps each element in its own group.
56
+ # - find(i): returns the group number of the i-th element
57
+ # - union(i, j): merge the group of the i-th element with the group of the j-th
58
+ # element. Note that i and j are elements, NOT group numbers.
59
+ #
60
+ # As we use least elements as group numbers, it is also interesting to know if a
61
+ # given element is that least element (aka leader element of the group) or not:
62
+ #
63
+ # - leader?(i): returns true if i is the group number of the i-th element, false
64
+ # otherwise. In other words, returns true if find(i)==i
65
+ # - slave?(i): the negation of leader?(i).
66
+ #
67
+ # == Handling User Data
68
+ #
69
+ # Even if this class represents elements by indices, it also allows keeping user
70
+ # data attached to each group. For this:
71
+ #
72
+ # - an initial user data is attached to each element at construction time by
73
+ # yielding a block (passing the element index as first argument and expecting
74
+ # user data as block return value).
75
+ # - the union(i, j) method allows a block to be given. It passes user data of i's
76
+ # and j's groups as arguments and expects the block to compute and return the
77
+ # merged user data for the new group.
78
+ # - mergeable_data(i) returns the current user data associated to the group of
79
+ # the i-th element.
80
+ # - mergeable_datas returns an array with user data attached to each group.
81
+ #
82
+ # Please note that user data are considered immutable values, and should never be
83
+ # changed... Only new ones can be created at union time. To ensures this good usage,
84
+ # user data are freezed by this class at creation time and union time.
85
+ #
86
+ # == Transactional support
87
+ #
88
+ # The main aim of this UnionFind is to make the implementation induction algorithms
89
+ # Stamina::Induction::RPNI and Stamina::Induction::BlueFringe (sufficiently) efficient,
90
+ # simple and readable. These algorithms rely on a try-and-error strategy are must be
91
+ # able to revert the changes they have made during their last try. The transaction
92
+ # support implemented by this data structure helps them achieving this goal. For this
93
+ # we provide the following methods:
94
+ #
95
+ # - save_point: ensures that the internal state of the UnionFind can be restored if
96
+ # rollback is invoked later.
97
+ # - commit: informs the UnionFind that changes that have been made since the last
98
+ # invocation of save_point will not be reconsidered.
99
+ # - rollback: restores the internal state of the UnionFind that has been saved when
100
+ # save_point has been called.
101
+ #
102
+ # Please note that this class does not support sub-transactions.
103
+ #
104
+ class UnionFind
105
+
106
+ #
107
+ # An element of the union find, keeping the index of its leader element as well as
108
+ # mergeable user data. This class is not intended to be used by external users of the
109
+ # UnionFind data structure.
110
+ #
111
+ class Node
112
+
113
+ # Index of the parent element (on the way to the leader)
114
+ attr_accessor :parent
115
+
116
+ # Attached user data
117
+ attr_accessor :data
118
+
119
+ #
120
+ # Creates a default Node instance with a specific parent index and attached
121
+ # user data.
122
+ #
123
+ def initialize(parent, data)
124
+ @parent = parent
125
+ @data = data
126
+ end
127
+
128
+ #
129
+ # Duplicates this node, ensuring that future changes will not affect the copy.
130
+ # Please note that the user data itself is not duplicated and is not expected
131
+ # to change. This property (not changing user data) is respected by the RPNI
132
+ # and BlueFringe classes as implemented in this library.
133
+ #
134
+ def dup
135
+ Node.new(@parent, @data)
136
+ end
137
+
138
+ end # class Node
139
+
140
+ #
141
+ # Number of elements in this union find
142
+ #
143
+ attr_reader :size
144
+
145
+ #
146
+ # (protected) Accessor on elements array, provided for duplication
147
+ #
148
+ attr_writer :elements
149
+
150
+ #
151
+ # Creates a default union find of a given size. Each element is initially in its own
152
+ # group. User data attached to each group is obtained by yielding a block, passing
153
+ # element index as first argument.
154
+ #
155
+ # Precondition:
156
+ # - size is expected to be strictly positive
157
+ #
158
+ def initialize(size)
159
+ @size = size
160
+ @elements = (0...size).collect do |i|
161
+ Node.new(i, block_given? ? yield(i).freeze : nil)
162
+ end
163
+ @changed = nil
164
+ end
165
+
166
+ # Union Find API ###########################################################
167
+
168
+ #
169
+ # Finds the group number of the i-th element (the group number is the least
170
+ # element of the group, aka _leader_).
171
+ #
172
+ # Preconditions:
173
+ # - i is a valid element: 0 <= i < size
174
+ #
175
+ # Postconditions:
176
+ # - returned value _found_ is such that <code>find(found)==found</code>
177
+ # - the union find data structure is not modified (no compression implemented).
178
+ #
179
+ def find(i)
180
+ while @elements[i].parent != i
181
+ i = @elements[i].parent
182
+ end
183
+ i
184
+ end
185
+
186
+ #
187
+ # Merges groups of the i-th element and j-th element, yielding a block to compute
188
+ # the merging of user data attached to their respective groups before merging.
189
+ #
190
+ # Preconditions:
191
+ # - This method allows i and j not to be leaders, but any element.
192
+ # - i and j are expected to be valid elements (0 <= i <= size, same for j)
193
+ #
194
+ # Postconditions:
195
+ # - groups of i and j have been merged. All elements of the two subgroups have
196
+ # the group number defined as <code>min(find(i),find(j))</code> (before
197
+ # merging)
198
+ # - if a block is provided, the user data attached to the new group is computed by
199
+ # yielding the block, passing mergable_data(i) and mergable_data(j) as arguments.
200
+ # The block is ecpected to return the merged data that will be kept for the new
201
+ # group.
202
+ # - If a transaction is pending, all required information is saved to restore
203
+ # the union-find structure if the transaction is rollbacked later.
204
+ #
205
+ def union(i, j)
206
+ i, j = find(i), find(j)
207
+ reversed = false
208
+ i, j, reversed = j, i, true if j<i
209
+
210
+ # Save i and j if in transaction and not already saved
211
+ if @changed
212
+ @changed[i] = @elements[i].dup unless @changed.has_key?(i)
213
+ @changed[j] = @elements[j].dup unless @changed.has_key?(j)
214
+ end
215
+
216
+ # Make the changes now
217
+ @elements[j].parent = i
218
+ if block_given?
219
+ d1, d2 = @elements[i].data, @elements[j].data
220
+ d1, d2 = d2, d1 if reversed
221
+ @elements[i].data = yield(d1, d2).freeze
222
+ else
223
+ nil
224
+ end
225
+ end
226
+
227
+ #
228
+ # Checks if an element is the leader of its group.
229
+ #
230
+ # Preconditions:
231
+ # - i is a valid element: 0 <= i < size
232
+ #
233
+ # Postconditions:
234
+ # - true if find(i)==i, false otherwise.
235
+ #
236
+ def leader?(i)
237
+ @elements[i].parent==i
238
+ end
239
+
240
+ #
241
+ # Checks if an element is a slave in its group (negation of leader?).
242
+ #
243
+ # Preconditions:
244
+ # - i is a valid element: 0 <= i < size
245
+ #
246
+ # Postconditions:
247
+ # - false if find(i)==i, true otherwise.
248
+ #
249
+ def slave?(i)
250
+ @elements[i].parent != i
251
+ end
252
+
253
+ # UserData API #############################################################
254
+
255
+ #
256
+ # Returns the mergeable data of each group in an array. No order of the
257
+ # groups is ensured by this method.
258
+ #
259
+ def mergeable_datas
260
+ indices = (0...size).select {|i| leader?(i)}
261
+ indices.collect{|i| @elements[i].data}
262
+ end
263
+
264
+ #
265
+ # Returns the mergeable data attached to the group of the i-th element.
266
+ #
267
+ # Preconditions:
268
+ # - This method allows i not to be leader, but any element.
269
+ # - i is a valid element: 0 <= i < size
270
+ #
271
+ def mergeable_data(i)
272
+ @elements[find(i)].data
273
+ end
274
+
275
+ # Transactional API ########################################################
276
+
277
+ #
278
+ # Makes a save point now. Internally ensures that future changes will be
279
+ # tracked and that a later rollback will restore the union find to the
280
+ # internal state it had before this call. This method should not be called
281
+ # if a transaction is already pending.
282
+ #
283
+ def save_point
284
+ @changed = {}
285
+ end
286
+
287
+ #
288
+ # Terminates the pending transaction by commiting all changes that have been
289
+ # done since the last save_point call. This method should not be called if no
290
+ # transaction is pending.
291
+ #
292
+ def commit
293
+ @changed = nil
294
+ end
295
+
296
+ #
297
+ # Rollbacks all changes that have been done since the last save_point call.
298
+ # This method will certainly fail if no transaction is pending.
299
+ #
300
+ def rollback
301
+ @changed.each_pair do |index, node|
302
+ @elements[index] = node
303
+ end
304
+ @changed = nil
305
+ end
306
+
307
+ #
308
+ # Makes a save point, yields the block. If it returns false or nil, rollbacks
309
+ # the transaction otherwise commits it. This method is a nice shortcut for
310
+ # the following piece of code
311
+ #
312
+ # ufds.save_point
313
+ # if try_something
314
+ # ufds.commit
315
+ # else
316
+ # ufds.rollback
317
+ # end
318
+ #
319
+ # which can also be expressed as:
320
+ #
321
+ # ufds.transactional do
322
+ # try_something
323
+ # end
324
+ #
325
+ # This method returns the value returned by the block
326
+ #
327
+ def transactional
328
+ save_point
329
+ returned = yield
330
+ if returned.nil? or returned == false
331
+ rollback
332
+ else
333
+ commit
334
+ end
335
+ returned
336
+ end
337
+
338
+ # Common utilities #########################################################
339
+
340
+ #
341
+ # Duplicates this data-structure, ensuring that no change on self or on the
342
+ # copy is shared. Please note that user datas themselve are not duplicated as
343
+ # they are considered immutable values (and freezed at construction and union).
344
+ #
345
+ def dup
346
+ copy = UnionFind.new(size)
347
+ copy.elements = @elements.collect{|e| e.dup}
348
+ copy
349
+ end
350
+
351
+ #
352
+ # Returns the partitioning information as as array with the group number of
353
+ # each element.
354
+ #
355
+ def to_a
356
+ (0...size).collect{|i| find(i)}
357
+ end
358
+
359
+ #
360
+ # Returns a string representation of this union find information.
361
+ #
362
+ def to_s
363
+ @elements.to_s
364
+ end
365
+
366
+ #
367
+ # Returns a string representation of this union find information.
368
+ #
369
+ def inspect
370
+ @elements.to_s
371
+ end
372
+
373
+ protected :elements=
374
+ end # class UnionFind
375
+
376
+ end # module Induction
377
+ end # module Stamina