stamina-induction 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. data/CHANGELOG.md +78 -0
  2. data/LICENCE.md +22 -0
  3. data/lib/stamina-induction/stamina-induction.rb +1 -0
  4. data/lib/stamina-induction/stamina/abbadingo.rb +2 -0
  5. data/lib/stamina-induction/stamina/abbadingo/random_dfa.rb +55 -0
  6. data/lib/stamina-induction/stamina/abbadingo/random_sample.rb +146 -0
  7. data/lib/stamina-induction/stamina/classifier.rb +55 -0
  8. data/lib/stamina-induction/stamina/command.rb +6 -0
  9. data/lib/stamina-induction/stamina/command/abbadingo_dfa.rb +80 -0
  10. data/lib/stamina-induction/stamina/command/abbadingo_samples.rb +39 -0
  11. data/lib/stamina-induction/stamina/command/classify.rb +47 -0
  12. data/lib/stamina-induction/stamina/command/infer.rb +140 -0
  13. data/lib/stamina-induction/stamina/command/metrics.rb +50 -0
  14. data/lib/stamina-induction/stamina/command/score.rb +34 -0
  15. data/lib/stamina-induction/stamina/dsl.rb +2 -0
  16. data/lib/stamina-induction/stamina/dsl/induction.rb +29 -0
  17. data/lib/stamina-induction/stamina/dsl/reg_lang.rb +69 -0
  18. data/lib/stamina-induction/stamina/induction.rb +13 -0
  19. data/lib/stamina-induction/stamina/induction/blue_fringe.rb +265 -0
  20. data/lib/stamina-induction/stamina/induction/commons.rb +156 -0
  21. data/lib/stamina-induction/stamina/induction/rpni.rb +186 -0
  22. data/lib/stamina-induction/stamina/induction/union_find.rb +377 -0
  23. data/lib/stamina-induction/stamina/input_string.rb +123 -0
  24. data/lib/stamina-induction/stamina/reg_lang.rb +226 -0
  25. data/lib/stamina-induction/stamina/reg_lang/canonical_info.rb +181 -0
  26. data/lib/stamina-induction/stamina/reg_lang/parser.rb +10 -0
  27. data/lib/stamina-induction/stamina/reg_lang/parser/alternative.rb +19 -0
  28. data/lib/stamina-induction/stamina/reg_lang/parser/node.rb +22 -0
  29. data/lib/stamina-induction/stamina/reg_lang/parser/parenthesized.rb +12 -0
  30. data/lib/stamina-induction/stamina/reg_lang/parser/parser.citrus +49 -0
  31. data/lib/stamina-induction/stamina/reg_lang/parser/plus.rb +14 -0
  32. data/lib/stamina-induction/stamina/reg_lang/parser/question.rb +17 -0
  33. data/lib/stamina-induction/stamina/reg_lang/parser/regexp.rb +12 -0
  34. data/lib/stamina-induction/stamina/reg_lang/parser/sequence.rb +15 -0
  35. data/lib/stamina-induction/stamina/reg_lang/parser/star.rb +15 -0
  36. data/lib/stamina-induction/stamina/reg_lang/parser/symbol.rb +14 -0
  37. data/lib/stamina-induction/stamina/sample.rb +309 -0
  38. data/lib/stamina-induction/stamina/scoring.rb +213 -0
  39. metadata +106 -0
@@ -0,0 +1,156 @@
1
+ module Stamina
2
+ module Induction
3
+
4
+ #
5
+ # Defines common utilities used by rpni and blue_fringe. About acronyms:
6
+ # - _pta_ stands for Prefix Tree Acceptor
7
+ # - _ufds_ stands for Union-Find Data Structure
8
+ #
9
+ # Methods pta2ufds and sample2ufds are simply conversion methods used when the induction
10
+ # algorithm starts (executed on a sample, it first built a pta then convert it to a union
11
+ # find). Method ufds2dfa is used when the algorithm ends, to convert refined union find to
12
+ # a dfa.
13
+ #
14
+ # The merge_user_data method is probably the most important as it actually computes
15
+ # the merging of two states and build information about merging for determinization.
16
+ #
17
+ module Commons
18
+
19
+ DEFAULT_OPTIONS = {
20
+ :verbose => false,
21
+ :verbose_io => $stderr
22
+ }
23
+
24
+ # Additional options of the algorithm
25
+ attr_reader :options
26
+
27
+ # Is the verbose mode on ?
28
+ def verbose?
29
+ @verbose ||= !!options[:verbose]
30
+ end
31
+
32
+ def verbose_io
33
+ @verbose_io ||= options[:verbose_io] || $stderr
34
+ end
35
+
36
+ # Display an information message (when verbose)
37
+ def info(msg)
38
+ if verbose?
39
+ verbose_io << msg << "\n"
40
+ verbose_io.flush
41
+ end
42
+ end
43
+
44
+ #
45
+ # Factors and returns a UnionFind data structure from a PTA, keeping natural order
46
+ # of its states for union-find elements. The resulting UnionFind contains a Hash as
47
+ # mergeable user data, presenting the following keys:
48
+ # - :initial, :accepting and :error flags of each state
49
+ # - :master indicating the index of the state in the PTA
50
+ # - :delta a delta function through a Hash {symbol => state_index}
51
+ #
52
+ # In this version, other user data attached to PTA states is lost during the
53
+ # conversion.
54
+ #
55
+ def pta2ufds(pta)
56
+ Stamina::Induction::UnionFind.new(pta.state_count) do |i|
57
+ state = pta.ith_state(i)
58
+ data = {:initial => state.initial?,
59
+ :accepting => state.accepting?,
60
+ :error => state.error?,
61
+ :master => i,
62
+ :delta => {}}
63
+ state.out_edges.each {|edge| data[:delta][edge.symbol] = edge.target.index}
64
+ data
65
+ end
66
+ end
67
+
68
+ #
69
+ # Converts a Sample to an (augmented) prefix tree acceptor. This method ensures
70
+ # that the states of the PTA are in lexical order, according to the <code><=></code>
71
+ # operator defined on symbols. States reached by negative strings are tagged as
72
+ # non accepting and error.
73
+ #
74
+ def sample2pta(sample)
75
+ sample.to_pta
76
+ end
77
+
78
+ #
79
+ # Converts a Sample instance to a 'ready to refine' union find data structure.
80
+ # This method is simply a shortcut for <code>pta2ufds(sample2pta(sample))</code>.
81
+ #
82
+ def sample2ufds(sample)
83
+ pta2ufds(sample2pta(sample))
84
+ end
85
+
86
+ #
87
+ # Computes the quotient automaton from a refined UnionFind data structure.
88
+ #
89
+ # In this version, only accepting and initial flags are taken into account
90
+ # when creating quotient automaton states. Other user data is lost during
91
+ # the conversion.
92
+ #
93
+ def ufds2dfa(ufds)
94
+ Automaton.new(false) do |fa|
95
+ mergeable_datas = ufds.mergeable_datas
96
+ mergeable_datas.each do |data|
97
+ state_data = data.reject {|key,value| [:master, :count, :delta].include?(key)}
98
+ state_data[:name] = data[:master].to_s
99
+ state_data[:error] = false
100
+ fa.add_state(state_data)
101
+ end
102
+ mergeable_datas.each do |data|
103
+ source = fa.get_state(data[:master].to_s)
104
+ data[:delta].each_pair do |symbol, target|
105
+ target = fa.get_state(ufds.find(target).to_s)
106
+ fa.connect(source, target, symbol)
107
+ end
108
+ end
109
+ end
110
+ end
111
+
112
+ #
113
+ # Merges two user data hashes _d1_ and _d2_ according to rules defined
114
+ # below. Also fills a _determinization_ array with pairs of state indices
115
+ # that are reached from d1 and d2 through the same symbol and should be
116
+ # merged for determinization. This method does NOT ensure that those pairs
117
+ # correspond to distinguish states according to the union find. In other
118
+ # words state indices in these pairs do not necessarily corespond to master
119
+ # states (see UnionFind for this term).
120
+ #
121
+ # Returns the resulting data if the merge is successful (does not lead to
122
+ # merging an error state with an accepting one), nil otherwise.
123
+ #
124
+ # The merging procedure for the different hash keys is as follows:
125
+ # - result[:initial] = d1[:initial] or d2[:initial]
126
+ # - result[:accepting] = d1[:accepting] or d2[:accepting]
127
+ # - result[:error] = d1[:error] or d2[:error]
128
+ # - result[:master] = min(d1[:master], d2[:master])
129
+ # - result[:delta] = merging of delta hashes, keeping smaller target index
130
+ # on key collisions.
131
+ #
132
+ def merge_user_data(d1, d2, determinization)
133
+ # we compute flags first
134
+ new_data = {:initial => d1[:initial] || d2[:initial],
135
+ :accepting => d1[:accepting] || d2[:accepting],
136
+ :error => d1[:error] || d2[:error],
137
+ :master => d1[:master] < d2[:master] ? d1[:master] : d2[:master]}
138
+
139
+ # merge failure if accepting and error states are merged
140
+ return nil if new_data[:accepting] and new_data[:error]
141
+
142
+ # we recompute the delta function of the resulting state
143
+ # keeping merging for determinization as pairs in _determinization_
144
+ new_data[:delta] = d1[:delta].merge(d2[:delta]) do |symbol, t1, t2|
145
+ determinization << [t1, t2]
146
+ t1 < t2 ? t1 : t2
147
+ end
148
+
149
+ # returns merged data
150
+ new_data
151
+ end
152
+
153
+ end # module Commons
154
+
155
+ end # module Induction
156
+ end # module Stamina
@@ -0,0 +1,186 @@
1
+ module Stamina
2
+ module Induction
3
+
4
+ #
5
+ # Implementation of the standard Regular Positive and Negative Induction (RPNI)
6
+ # algorithm. From a given sample, containing positive and negative strings, RPNI
7
+ # computes the smallest deterministic automaton compatible with the sample.
8
+ #
9
+ # See J. Oncina and P. Garcia, Infering Regular Languages in Polynomial Update
10
+ # Time, In N. Perez de la Blanca, A. Sanfeliu and E. Vidal, editors, Pattern
11
+ # Recognition and Image Analysis, volume 1 of Series in Machines Perception and
12
+ # Artificial Intelligence, pages 49-61, World Scientific, 1992.
13
+ #
14
+ # Example:
15
+ # # sample typically comes from an ADL file
16
+ # sample = Stamina::ADL.parse_sample_file('sample.adl')
17
+ #
18
+ # # let RPNI build the smallest dfa
19
+ # dfa = Stamina::Induction::RPNI.execute(sample, {:verbose => true})
20
+ #
21
+ # Remarks:
22
+ # - Constructor and instance methods of this class are public but not intended
23
+ # to be used directly. They are left public for testing purposes only.
24
+ # - This class intensively uses the Stamina::Induction::UnionFind class and
25
+ # methods defined in the Stamina::Induction::Commons module which are worth
26
+ # reading to understand the algorithm implementation.
27
+ #
28
+ class RPNI
29
+ include Stamina::Induction::Commons
30
+
31
+ # Union-find data structure used internally
32
+ attr_reader :ufds
33
+
34
+ # Creates an algorithm instance with given options.
35
+ def initialize(options={})
36
+ raise ArgumentError, "Invalid options #{options.inspect}" unless options.is_a?(Hash)
37
+ @options = DEFAULT_OPTIONS.merge(options)
38
+ end
39
+
40
+ #
41
+ # Merges a state of rank j with a state of lower rank i. This merge method
42
+ # includes merging for determinization.
43
+ #
44
+ # Preconditions:
45
+ # - States denoted by i and j are expected leader states (non merged ones)
46
+ # - States denoted by i and j are expected to be different
47
+ #
48
+ # Postconditions:
49
+ # - Union find is refined, states i and j having been merged, as well as all
50
+ # state pairs that need to be merged to ensure the deterministic property
51
+ # of the quotient automaton.
52
+ # - If the resulting quotient automaton is consistent with the negative sample,
53
+ # this method returns true and the refined union-find correctly encodes the
54
+ # quotient automaton. Otherwise, the method returns false and the union-find
55
+ # information must be considered inaccurate.
56
+ #
57
+ def merge_and_determinize(i, j)
58
+ # Make the union (keep additional merges to be performed in determinization)
59
+ # and recompute the user data attached to the new state group (new_data)
60
+ determinization = []
61
+ @ufds.union(i, j) do |d1, d2|
62
+ new_data = merge_user_data(d1, d2, determinization)
63
+ return false unless new_data
64
+ new_data
65
+ end
66
+
67
+ # Merge for determinization
68
+ determinization.each do |pair|
69
+ # we take the leader states of the pair to merge
70
+ pair = pair.collect{|i| @ufds.find(i)}
71
+ # do nothing if already the same leader state
72
+ next if pair[0]==pair[1]
73
+ # otherwise recurse or fail
74
+ return false unless merge_and_determinize(pair[0], pair[1])
75
+ end
76
+
77
+ # Everything seems ok!
78
+ true
79
+ end
80
+
81
+ #
82
+ # Makes a complete merge (including determinization), or simply do nothing if
83
+ # it leads accepting a negative string.
84
+ #
85
+ # Preconditions:
86
+ # - States denoted by i and j are expected leader states (non merged ones)
87
+ # - States denoted by i and j are expected to be different
88
+ #
89
+ # Postconditions:
90
+ # - Union find is refined, states i and j having been merged, as well as all
91
+ # state pairs that need to be merged to ensure the deterministic property
92
+ # of the quotient automaton.
93
+ # - If the resulting quotient automaton is consistent with the negative sample,
94
+ # this method returns true and the refined union-find correctly encodes the
95
+ # quotient automaton. Otherwise, the union find has not been changed.
96
+ #
97
+ def successfull_merge_or_nothing(i,j)
98
+ # try a merge and determinize inside a transaction on the ufds
99
+ @ufds.transactional do
100
+ merge_and_determinize(i, j)
101
+ end
102
+ end
103
+
104
+ #
105
+ # Main method of the algorithm. Refines the union find passed as first argument
106
+ # by merging well chosen state pairs. Returns the refined union find.
107
+ #
108
+ # Preconditions:
109
+ # - The union find _ufds_ is correctly initialized (contains :initial, :accepting,
110
+ # and :error boolean flags as well as a :delta sub hash)
111
+ #
112
+ # Postconditions:
113
+ # - The union find has been refined. It encodes a quotient automaton (of the PTA
114
+ # it comes from) such that all positive and negative strings of the underlying
115
+ # sample are correctly classified by it.
116
+ #
117
+ def main(ufds)
118
+ @ufds = ufds
119
+ info("Starting RPNI (#{@ufds.size} states)")
120
+ # First loop, iterating all PTA states
121
+ (1...@ufds.size).each do |i|
122
+ # we ignore those that have been previously merged
123
+ next if @ufds.slave?(i)
124
+ # second loop: states of lower rank, with ignore
125
+ (0...i).each do |j|
126
+ next if @ufds.slave?(j)
127
+ # try to merge this pair, including determinization
128
+ # simply break the loop if it works!
129
+ success = successfull_merge_or_nothing(i,j)
130
+ if success
131
+ info("#{i} and #{j} successfully merged")
132
+ break
133
+ end
134
+ end # j loop
135
+ end # i loop
136
+ @ufds
137
+ end
138
+
139
+ #
140
+ # Build the smallest DFA compatible with the sample given as input.
141
+ #
142
+ # Preconditions:
143
+ # - The sample is consistent (does not contains the same string both labeled as
144
+ # positive and negative) and contains at least one string.
145
+ #
146
+ # Postconditions:
147
+ # - The returned DFA is the smallest DFA that correctly labels the learning sample
148
+ # given as input.
149
+ #
150
+ # Remarks:
151
+ # - This instance version of RPNI.execute is not intended to be used directly and
152
+ # is mainly provided for testing purposes. Please use the class variant of this
153
+ # method if possible.
154
+ #
155
+ def execute(sample)
156
+ # create union-find
157
+ info("Creating PTA and UnionFind structure")
158
+ ufds = sample2ufds(sample)
159
+ # refine it
160
+ ufds = main(ufds)
161
+ # compute and return quotient automaton
162
+ ufds2dfa(ufds)
163
+ end
164
+
165
+ #
166
+ # Build the smallest DFA compatible with the sample given as input.
167
+ #
168
+ # Options (the _options_ hash):
169
+ # - :verbose can be set to true to trace algorithm execution on standard output.
170
+ #
171
+ # Preconditions:
172
+ # - The sample is consistent (does not contains the same string both labeled as
173
+ # positive and negative) and contains at least one string.
174
+ #
175
+ # Postconditions:
176
+ # - The returned DFA is the smallest DFA that correctly labels the learning sample
177
+ # given as input.
178
+ #
179
+ def self.execute(sample, options={})
180
+ RPNI.new(options).execute(sample)
181
+ end
182
+
183
+ end # class RPNI
184
+
185
+ end # module Induction
186
+ end # module Stamina
@@ -0,0 +1,377 @@
1
+ module Stamina
2
+ module Induction
3
+
4
+ #
5
+ # Implements an UnionFind data structure dedicated to state merging induction algorithms.
6
+ # For this purpose, this union-find handles mergeable user data as well as transactional
7
+ # support. See Stamina::Induction::Commons about the usage of this class (and mergeable
8
+ # user data in particular) by induction algorithms.
9
+ #
10
+ # == Example (probably easier than a long explanation)
11
+ #
12
+ # # create a union-find for 10 elements
13
+ # ufds = Stamina::Induction::UnionFind.new(10) do |index|
14
+ # # each element will be associated with a hash with data of interest:
15
+ # # smallest element, greatest element and concatenation of names
16
+ # {:smallest => index, :greatest => index, :names => index.to_s}
17
+ # end
18
+ #
19
+ # # each element is its own leader
20
+ # puts (0...10).all?{|s| ufds.leader?(s)} -> true
21
+ #
22
+ # # and their respective group number are the element indices themselve
23
+ # puts ufds.to_a -> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
24
+ #
25
+ # # now, let merge 4 with 0
26
+ # ufds.union(0, 4) do |d0, d4|
27
+ # {:smallest => d0[:smallest] < d4[:smallest] ? d0[:smallest] : d4[:smallest],
28
+ # :greatest => d0[:smallest] > d4[:smallest] ? d0[:smallest] : d4[:smallest],
29
+ # :names => d0[:names] + " " + d4[:names]}
30
+ # end
31
+ #
32
+ # # let see what happens on group numbers
33
+ # puts ufds.to_a -> [0, 1, 2, 3, 0, 5, 6, 7, 8, 9]
34
+ #
35
+ # # let now have a look on mergeable_data of the group of 0 (same result for 4)
36
+ # puts ufds.mergeable_data(0).inspect -> {:smallest => 0, :greatest => 4, :names => "0 4"}
37
+ #
38
+ # == Basic Union Find API
39
+ #
40
+ # A UnionFind data structure typically allows encoding a partition of elements (a
41
+ # partition is a collection of disjoint sets - aka a collection of groups). Basically,
42
+ # this class represents elements by successive indices (from 0 to size, the later being
43
+ # excluded). The partitioning information is kept in a array, associating a group number
44
+ # to each element. This group number is simply the index of the least element in the
45
+ # group (which means that group numbers are not necessarily consecutive). For example,
46
+ # the following arrays maps to the associated partitions:
47
+ #
48
+ # [0, 1, 2, 3, 4, 5] -> {{0}, {1}, {2}, {3}, {4}}
49
+ # [0, 0, 0, 0, 0, 0] -> {{0, 1, 2, 3, 4, 5}}
50
+ # [0, 1, 1, 0, 4, 4] -> {{0, 3}, {1, 2}, {5, 5}}
51
+ #
52
+ # The API of this basic union-find data structure is composed of the following
53
+ # methods:
54
+ # - new(size) (class method): builds an initial partition information over _size_
55
+ # elements. This initial partition keeps each element in its own group.
56
+ # - find(i): returns the group number of the i-th element
57
+ # - union(i, j): merge the group of the i-th element with the group of the j-th
58
+ # element. Note that i and j are elements, NOT group numbers.
59
+ #
60
+ # As we use least elements as group numbers, it is also interesting to know if a
61
+ # given element is that least element (aka leader element of the group) or not:
62
+ #
63
+ # - leader?(i): returns true if i is the group number of the i-th element, false
64
+ # otherwise. In other words, returns true if find(i)==i
65
+ # - slave?(i): the negation of leader?(i).
66
+ #
67
+ # == Handling User Data
68
+ #
69
+ # Even if this class represents elements by indices, it also allows keeping user
70
+ # data attached to each group. For this:
71
+ #
72
+ # - an initial user data is attached to each element at construction time by
73
+ # yielding a block (passing the element index as first argument and expecting
74
+ # user data as block return value).
75
+ # - the union(i, j) method allows a block to be given. It passes user data of i's
76
+ # and j's groups as arguments and expects the block to compute and return the
77
+ # merged user data for the new group.
78
+ # - mergeable_data(i) returns the current user data associated to the group of
79
+ # the i-th element.
80
+ # - mergeable_datas returns an array with user data attached to each group.
81
+ #
82
+ # Please note that user data are considered immutable values, and should never be
83
+ # changed... Only new ones can be created at union time. To ensures this good usage,
84
+ # user data are freezed by this class at creation time and union time.
85
+ #
86
+ # == Transactional support
87
+ #
88
+ # The main aim of this UnionFind is to make the implementation induction algorithms
89
+ # Stamina::Induction::RPNI and Stamina::Induction::BlueFringe (sufficiently) efficient,
90
+ # simple and readable. These algorithms rely on a try-and-error strategy are must be
91
+ # able to revert the changes they have made during their last try. The transaction
92
+ # support implemented by this data structure helps them achieving this goal. For this
93
+ # we provide the following methods:
94
+ #
95
+ # - save_point: ensures that the internal state of the UnionFind can be restored if
96
+ # rollback is invoked later.
97
+ # - commit: informs the UnionFind that changes that have been made since the last
98
+ # invocation of save_point will not be reconsidered.
99
+ # - rollback: restores the internal state of the UnionFind that has been saved when
100
+ # save_point has been called.
101
+ #
102
+ # Please note that this class does not support sub-transactions.
103
+ #
104
+ class UnionFind
105
+
106
+ #
107
+ # An element of the union find, keeping the index of its leader element as well as
108
+ # mergeable user data. This class is not intended to be used by external users of the
109
+ # UnionFind data structure.
110
+ #
111
+ class Node
112
+
113
+ # Index of the parent element (on the way to the leader)
114
+ attr_accessor :parent
115
+
116
+ # Attached user data
117
+ attr_accessor :data
118
+
119
+ #
120
+ # Creates a default Node instance with a specific parent index and attached
121
+ # user data.
122
+ #
123
+ def initialize(parent, data)
124
+ @parent = parent
125
+ @data = data
126
+ end
127
+
128
+ #
129
+ # Duplicates this node, ensuring that future changes will not affect the copy.
130
+ # Please note that the user data itself is not duplicated and is not expected
131
+ # to change. This property (not changing user data) is respected by the RPNI
132
+ # and BlueFringe classes as implemented in this library.
133
+ #
134
+ def dup
135
+ Node.new(@parent, @data)
136
+ end
137
+
138
+ end # class Node
139
+
140
+ #
141
+ # Number of elements in this union find
142
+ #
143
+ attr_reader :size
144
+
145
+ #
146
+ # (protected) Accessor on elements array, provided for duplication
147
+ #
148
+ attr_writer :elements
149
+
150
+ #
151
+ # Creates a default union find of a given size. Each element is initially in its own
152
+ # group. User data attached to each group is obtained by yielding a block, passing
153
+ # element index as first argument.
154
+ #
155
+ # Precondition:
156
+ # - size is expected to be strictly positive
157
+ #
158
+ def initialize(size)
159
+ @size = size
160
+ @elements = (0...size).collect do |i|
161
+ Node.new(i, block_given? ? yield(i).freeze : nil)
162
+ end
163
+ @changed = nil
164
+ end
165
+
166
+ # Union Find API ###########################################################
167
+
168
+ #
169
+ # Finds the group number of the i-th element (the group number is the least
170
+ # element of the group, aka _leader_).
171
+ #
172
+ # Preconditions:
173
+ # - i is a valid element: 0 <= i < size
174
+ #
175
+ # Postconditions:
176
+ # - returned value _found_ is such that <code>find(found)==found</code>
177
+ # - the union find data structure is not modified (no compression implemented).
178
+ #
179
+ def find(i)
180
+ while @elements[i].parent != i
181
+ i = @elements[i].parent
182
+ end
183
+ i
184
+ end
185
+
186
+ #
187
+ # Merges groups of the i-th element and j-th element, yielding a block to compute
188
+ # the merging of user data attached to their respective groups before merging.
189
+ #
190
+ # Preconditions:
191
+ # - This method allows i and j not to be leaders, but any element.
192
+ # - i and j are expected to be valid elements (0 <= i <= size, same for j)
193
+ #
194
+ # Postconditions:
195
+ # - groups of i and j have been merged. All elements of the two subgroups have
196
+ # the group number defined as <code>min(find(i),find(j))</code> (before
197
+ # merging)
198
+ # - if a block is provided, the user data attached to the new group is computed by
199
+ # yielding the block, passing mergable_data(i) and mergable_data(j) as arguments.
200
+ # The block is ecpected to return the merged data that will be kept for the new
201
+ # group.
202
+ # - If a transaction is pending, all required information is saved to restore
203
+ # the union-find structure if the transaction is rollbacked later.
204
+ #
205
+ def union(i, j)
206
+ i, j = find(i), find(j)
207
+ reversed = false
208
+ i, j, reversed = j, i, true if j<i
209
+
210
+ # Save i and j if in transaction and not already saved
211
+ if @changed
212
+ @changed[i] = @elements[i].dup unless @changed.has_key?(i)
213
+ @changed[j] = @elements[j].dup unless @changed.has_key?(j)
214
+ end
215
+
216
+ # Make the changes now
217
+ @elements[j].parent = i
218
+ if block_given?
219
+ d1, d2 = @elements[i].data, @elements[j].data
220
+ d1, d2 = d2, d1 if reversed
221
+ @elements[i].data = yield(d1, d2).freeze
222
+ else
223
+ nil
224
+ end
225
+ end
226
+
227
+ #
228
+ # Checks if an element is the leader of its group.
229
+ #
230
+ # Preconditions:
231
+ # - i is a valid element: 0 <= i < size
232
+ #
233
+ # Postconditions:
234
+ # - true if find(i)==i, false otherwise.
235
+ #
236
+ def leader?(i)
237
+ @elements[i].parent==i
238
+ end
239
+
240
+ #
241
+ # Checks if an element is a slave in its group (negation of leader?).
242
+ #
243
+ # Preconditions:
244
+ # - i is a valid element: 0 <= i < size
245
+ #
246
+ # Postconditions:
247
+ # - false if find(i)==i, true otherwise.
248
+ #
249
+ def slave?(i)
250
+ @elements[i].parent != i
251
+ end
252
+
253
+ # UserData API #############################################################
254
+
255
+ #
256
+ # Returns the mergeable data of each group in an array. No order of the
257
+ # groups is ensured by this method.
258
+ #
259
+ def mergeable_datas
260
+ indices = (0...size).select {|i| leader?(i)}
261
+ indices.collect{|i| @elements[i].data}
262
+ end
263
+
264
+ #
265
+ # Returns the mergeable data attached to the group of the i-th element.
266
+ #
267
+ # Preconditions:
268
+ # - This method allows i not to be leader, but any element.
269
+ # - i is a valid element: 0 <= i < size
270
+ #
271
+ def mergeable_data(i)
272
+ @elements[find(i)].data
273
+ end
274
+
275
+ # Transactional API ########################################################
276
+
277
+ #
278
+ # Makes a save point now. Internally ensures that future changes will be
279
+ # tracked and that a later rollback will restore the union find to the
280
+ # internal state it had before this call. This method should not be called
281
+ # if a transaction is already pending.
282
+ #
283
+ def save_point
284
+ @changed = {}
285
+ end
286
+
287
+ #
288
+ # Terminates the pending transaction by commiting all changes that have been
289
+ # done since the last save_point call. This method should not be called if no
290
+ # transaction is pending.
291
+ #
292
+ def commit
293
+ @changed = nil
294
+ end
295
+
296
+ #
297
+ # Rollbacks all changes that have been done since the last save_point call.
298
+ # This method will certainly fail if no transaction is pending.
299
+ #
300
+ def rollback
301
+ @changed.each_pair do |index, node|
302
+ @elements[index] = node
303
+ end
304
+ @changed = nil
305
+ end
306
+
307
+ #
308
+ # Makes a save point, yields the block. If it returns false or nil, rollbacks
309
+ # the transaction otherwise commits it. This method is a nice shortcut for
310
+ # the following piece of code
311
+ #
312
+ # ufds.save_point
313
+ # if try_something
314
+ # ufds.commit
315
+ # else
316
+ # ufds.rollback
317
+ # end
318
+ #
319
+ # which can also be expressed as:
320
+ #
321
+ # ufds.transactional do
322
+ # try_something
323
+ # end
324
+ #
325
+ # This method returns the value returned by the block
326
+ #
327
+ def transactional
328
+ save_point
329
+ returned = yield
330
+ if returned.nil? or returned == false
331
+ rollback
332
+ else
333
+ commit
334
+ end
335
+ returned
336
+ end
337
+
338
+ # Common utilities #########################################################
339
+
340
+ #
341
+ # Duplicates this data-structure, ensuring that no change on self or on the
342
+ # copy is shared. Please note that user datas themselve are not duplicated as
343
+ # they are considered immutable values (and freezed at construction and union).
344
+ #
345
+ def dup
346
+ copy = UnionFind.new(size)
347
+ copy.elements = @elements.collect{|e| e.dup}
348
+ copy
349
+ end
350
+
351
+ #
352
+ # Returns the partitioning information as as array with the group number of
353
+ # each element.
354
+ #
355
+ def to_a
356
+ (0...size).collect{|i| find(i)}
357
+ end
358
+
359
+ #
360
+ # Returns a string representation of this union find information.
361
+ #
362
+ def to_s
363
+ @elements.to_s
364
+ end
365
+
366
+ #
367
+ # Returns a string representation of this union find information.
368
+ #
369
+ def inspect
370
+ @elements.to_s
371
+ end
372
+
373
+ protected :elements=
374
+ end # class UnionFind
375
+
376
+ end # module Induction
377
+ end # module Stamina