stamina-induction 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +78 -0
- data/LICENCE.md +22 -0
- data/lib/stamina-induction/stamina-induction.rb +1 -0
- data/lib/stamina-induction/stamina/abbadingo.rb +2 -0
- data/lib/stamina-induction/stamina/abbadingo/random_dfa.rb +55 -0
- data/lib/stamina-induction/stamina/abbadingo/random_sample.rb +146 -0
- data/lib/stamina-induction/stamina/classifier.rb +55 -0
- data/lib/stamina-induction/stamina/command.rb +6 -0
- data/lib/stamina-induction/stamina/command/abbadingo_dfa.rb +80 -0
- data/lib/stamina-induction/stamina/command/abbadingo_samples.rb +39 -0
- data/lib/stamina-induction/stamina/command/classify.rb +47 -0
- data/lib/stamina-induction/stamina/command/infer.rb +140 -0
- data/lib/stamina-induction/stamina/command/metrics.rb +50 -0
- data/lib/stamina-induction/stamina/command/score.rb +34 -0
- data/lib/stamina-induction/stamina/dsl.rb +2 -0
- data/lib/stamina-induction/stamina/dsl/induction.rb +29 -0
- data/lib/stamina-induction/stamina/dsl/reg_lang.rb +69 -0
- data/lib/stamina-induction/stamina/induction.rb +13 -0
- data/lib/stamina-induction/stamina/induction/blue_fringe.rb +265 -0
- data/lib/stamina-induction/stamina/induction/commons.rb +156 -0
- data/lib/stamina-induction/stamina/induction/rpni.rb +186 -0
- data/lib/stamina-induction/stamina/induction/union_find.rb +377 -0
- data/lib/stamina-induction/stamina/input_string.rb +123 -0
- data/lib/stamina-induction/stamina/reg_lang.rb +226 -0
- data/lib/stamina-induction/stamina/reg_lang/canonical_info.rb +181 -0
- data/lib/stamina-induction/stamina/reg_lang/parser.rb +10 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/alternative.rb +19 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/node.rb +22 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/parenthesized.rb +12 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/parser.citrus +49 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/plus.rb +14 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/question.rb +17 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/regexp.rb +12 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/sequence.rb +15 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/star.rb +15 -0
- data/lib/stamina-induction/stamina/reg_lang/parser/symbol.rb +14 -0
- data/lib/stamina-induction/stamina/sample.rb +309 -0
- data/lib/stamina-induction/stamina/scoring.rb +213 -0
- metadata +106 -0
@@ -0,0 +1,156 @@
|
|
1
|
+
module Stamina
|
2
|
+
module Induction
|
3
|
+
|
4
|
+
#
|
5
|
+
# Defines common utilities used by rpni and blue_fringe. About acronyms:
|
6
|
+
# - _pta_ stands for Prefix Tree Acceptor
|
7
|
+
# - _ufds_ stands for Union-Find Data Structure
|
8
|
+
#
|
9
|
+
# Methods pta2ufds and sample2ufds are simply conversion methods used when the induction
|
10
|
+
# algorithm starts (executed on a sample, it first built a pta then convert it to a union
|
11
|
+
# find). Method ufds2dfa is used when the algorithm ends, to convert refined union find to
|
12
|
+
# a dfa.
|
13
|
+
#
|
14
|
+
# The merge_user_data method is probably the most important as it actually computes
|
15
|
+
# the merging of two states and build information about merging for determinization.
|
16
|
+
#
|
17
|
+
module Commons
|
18
|
+
|
19
|
+
DEFAULT_OPTIONS = {
|
20
|
+
:verbose => false,
|
21
|
+
:verbose_io => $stderr
|
22
|
+
}
|
23
|
+
|
24
|
+
# Additional options of the algorithm
|
25
|
+
attr_reader :options
|
26
|
+
|
27
|
+
# Is the verbose mode on ?
|
28
|
+
def verbose?
|
29
|
+
@verbose ||= !!options[:verbose]
|
30
|
+
end
|
31
|
+
|
32
|
+
def verbose_io
|
33
|
+
@verbose_io ||= options[:verbose_io] || $stderr
|
34
|
+
end
|
35
|
+
|
36
|
+
# Display an information message (when verbose)
|
37
|
+
def info(msg)
|
38
|
+
if verbose?
|
39
|
+
verbose_io << msg << "\n"
|
40
|
+
verbose_io.flush
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
#
|
45
|
+
# Factors and returns a UnionFind data structure from a PTA, keeping natural order
|
46
|
+
# of its states for union-find elements. The resulting UnionFind contains a Hash as
|
47
|
+
# mergeable user data, presenting the following keys:
|
48
|
+
# - :initial, :accepting and :error flags of each state
|
49
|
+
# - :master indicating the index of the state in the PTA
|
50
|
+
# - :delta a delta function through a Hash {symbol => state_index}
|
51
|
+
#
|
52
|
+
# In this version, other user data attached to PTA states is lost during the
|
53
|
+
# conversion.
|
54
|
+
#
|
55
|
+
def pta2ufds(pta)
|
56
|
+
Stamina::Induction::UnionFind.new(pta.state_count) do |i|
|
57
|
+
state = pta.ith_state(i)
|
58
|
+
data = {:initial => state.initial?,
|
59
|
+
:accepting => state.accepting?,
|
60
|
+
:error => state.error?,
|
61
|
+
:master => i,
|
62
|
+
:delta => {}}
|
63
|
+
state.out_edges.each {|edge| data[:delta][edge.symbol] = edge.target.index}
|
64
|
+
data
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
#
|
69
|
+
# Converts a Sample to an (augmented) prefix tree acceptor. This method ensures
|
70
|
+
# that the states of the PTA are in lexical order, according to the <code><=></code>
|
71
|
+
# operator defined on symbols. States reached by negative strings are tagged as
|
72
|
+
# non accepting and error.
|
73
|
+
#
|
74
|
+
def sample2pta(sample)
|
75
|
+
sample.to_pta
|
76
|
+
end
|
77
|
+
|
78
|
+
#
|
79
|
+
# Converts a Sample instance to a 'ready to refine' union find data structure.
|
80
|
+
# This method is simply a shortcut for <code>pta2ufds(sample2pta(sample))</code>.
|
81
|
+
#
|
82
|
+
def sample2ufds(sample)
|
83
|
+
pta2ufds(sample2pta(sample))
|
84
|
+
end
|
85
|
+
|
86
|
+
#
|
87
|
+
# Computes the quotient automaton from a refined UnionFind data structure.
|
88
|
+
#
|
89
|
+
# In this version, only accepting and initial flags are taken into account
|
90
|
+
# when creating quotient automaton states. Other user data is lost during
|
91
|
+
# the conversion.
|
92
|
+
#
|
93
|
+
def ufds2dfa(ufds)
|
94
|
+
Automaton.new(false) do |fa|
|
95
|
+
mergeable_datas = ufds.mergeable_datas
|
96
|
+
mergeable_datas.each do |data|
|
97
|
+
state_data = data.reject {|key,value| [:master, :count, :delta].include?(key)}
|
98
|
+
state_data[:name] = data[:master].to_s
|
99
|
+
state_data[:error] = false
|
100
|
+
fa.add_state(state_data)
|
101
|
+
end
|
102
|
+
mergeable_datas.each do |data|
|
103
|
+
source = fa.get_state(data[:master].to_s)
|
104
|
+
data[:delta].each_pair do |symbol, target|
|
105
|
+
target = fa.get_state(ufds.find(target).to_s)
|
106
|
+
fa.connect(source, target, symbol)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
#
|
113
|
+
# Merges two user data hashes _d1_ and _d2_ according to rules defined
|
114
|
+
# below. Also fills a _determinization_ array with pairs of state indices
|
115
|
+
# that are reached from d1 and d2 through the same symbol and should be
|
116
|
+
# merged for determinization. This method does NOT ensure that those pairs
|
117
|
+
# correspond to distinguish states according to the union find. In other
|
118
|
+
# words state indices in these pairs do not necessarily corespond to master
|
119
|
+
# states (see UnionFind for this term).
|
120
|
+
#
|
121
|
+
# Returns the resulting data if the merge is successful (does not lead to
|
122
|
+
# merging an error state with an accepting one), nil otherwise.
|
123
|
+
#
|
124
|
+
# The merging procedure for the different hash keys is as follows:
|
125
|
+
# - result[:initial] = d1[:initial] or d2[:initial]
|
126
|
+
# - result[:accepting] = d1[:accepting] or d2[:accepting]
|
127
|
+
# - result[:error] = d1[:error] or d2[:error]
|
128
|
+
# - result[:master] = min(d1[:master], d2[:master])
|
129
|
+
# - result[:delta] = merging of delta hashes, keeping smaller target index
|
130
|
+
# on key collisions.
|
131
|
+
#
|
132
|
+
def merge_user_data(d1, d2, determinization)
|
133
|
+
# we compute flags first
|
134
|
+
new_data = {:initial => d1[:initial] || d2[:initial],
|
135
|
+
:accepting => d1[:accepting] || d2[:accepting],
|
136
|
+
:error => d1[:error] || d2[:error],
|
137
|
+
:master => d1[:master] < d2[:master] ? d1[:master] : d2[:master]}
|
138
|
+
|
139
|
+
# merge failure if accepting and error states are merged
|
140
|
+
return nil if new_data[:accepting] and new_data[:error]
|
141
|
+
|
142
|
+
# we recompute the delta function of the resulting state
|
143
|
+
# keeping merging for determinization as pairs in _determinization_
|
144
|
+
new_data[:delta] = d1[:delta].merge(d2[:delta]) do |symbol, t1, t2|
|
145
|
+
determinization << [t1, t2]
|
146
|
+
t1 < t2 ? t1 : t2
|
147
|
+
end
|
148
|
+
|
149
|
+
# returns merged data
|
150
|
+
new_data
|
151
|
+
end
|
152
|
+
|
153
|
+
end # module Commons
|
154
|
+
|
155
|
+
end # module Induction
|
156
|
+
end # module Stamina
|
@@ -0,0 +1,186 @@
|
|
1
|
+
module Stamina
|
2
|
+
module Induction
|
3
|
+
|
4
|
+
#
|
5
|
+
# Implementation of the standard Regular Positive and Negative Induction (RPNI)
|
6
|
+
# algorithm. From a given sample, containing positive and negative strings, RPNI
|
7
|
+
# computes the smallest deterministic automaton compatible with the sample.
|
8
|
+
#
|
9
|
+
# See J. Oncina and P. Garcia, Infering Regular Languages in Polynomial Update
|
10
|
+
# Time, In N. Perez de la Blanca, A. Sanfeliu and E. Vidal, editors, Pattern
|
11
|
+
# Recognition and Image Analysis, volume 1 of Series in Machines Perception and
|
12
|
+
# Artificial Intelligence, pages 49-61, World Scientific, 1992.
|
13
|
+
#
|
14
|
+
# Example:
|
15
|
+
# # sample typically comes from an ADL file
|
16
|
+
# sample = Stamina::ADL.parse_sample_file('sample.adl')
|
17
|
+
#
|
18
|
+
# # let RPNI build the smallest dfa
|
19
|
+
# dfa = Stamina::Induction::RPNI.execute(sample, {:verbose => true})
|
20
|
+
#
|
21
|
+
# Remarks:
|
22
|
+
# - Constructor and instance methods of this class are public but not intended
|
23
|
+
# to be used directly. They are left public for testing purposes only.
|
24
|
+
# - This class intensively uses the Stamina::Induction::UnionFind class and
|
25
|
+
# methods defined in the Stamina::Induction::Commons module which are worth
|
26
|
+
# reading to understand the algorithm implementation.
|
27
|
+
#
|
28
|
+
class RPNI
|
29
|
+
include Stamina::Induction::Commons
|
30
|
+
|
31
|
+
# Union-find data structure used internally
|
32
|
+
attr_reader :ufds
|
33
|
+
|
34
|
+
# Creates an algorithm instance with given options.
|
35
|
+
def initialize(options={})
|
36
|
+
raise ArgumentError, "Invalid options #{options.inspect}" unless options.is_a?(Hash)
|
37
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
38
|
+
end
|
39
|
+
|
40
|
+
#
|
41
|
+
# Merges a state of rank j with a state of lower rank i. This merge method
|
42
|
+
# includes merging for determinization.
|
43
|
+
#
|
44
|
+
# Preconditions:
|
45
|
+
# - States denoted by i and j are expected leader states (non merged ones)
|
46
|
+
# - States denoted by i and j are expected to be different
|
47
|
+
#
|
48
|
+
# Postconditions:
|
49
|
+
# - Union find is refined, states i and j having been merged, as well as all
|
50
|
+
# state pairs that need to be merged to ensure the deterministic property
|
51
|
+
# of the quotient automaton.
|
52
|
+
# - If the resulting quotient automaton is consistent with the negative sample,
|
53
|
+
# this method returns true and the refined union-find correctly encodes the
|
54
|
+
# quotient automaton. Otherwise, the method returns false and the union-find
|
55
|
+
# information must be considered inaccurate.
|
56
|
+
#
|
57
|
+
def merge_and_determinize(i, j)
|
58
|
+
# Make the union (keep additional merges to be performed in determinization)
|
59
|
+
# and recompute the user data attached to the new state group (new_data)
|
60
|
+
determinization = []
|
61
|
+
@ufds.union(i, j) do |d1, d2|
|
62
|
+
new_data = merge_user_data(d1, d2, determinization)
|
63
|
+
return false unless new_data
|
64
|
+
new_data
|
65
|
+
end
|
66
|
+
|
67
|
+
# Merge for determinization
|
68
|
+
determinization.each do |pair|
|
69
|
+
# we take the leader states of the pair to merge
|
70
|
+
pair = pair.collect{|i| @ufds.find(i)}
|
71
|
+
# do nothing if already the same leader state
|
72
|
+
next if pair[0]==pair[1]
|
73
|
+
# otherwise recurse or fail
|
74
|
+
return false unless merge_and_determinize(pair[0], pair[1])
|
75
|
+
end
|
76
|
+
|
77
|
+
# Everything seems ok!
|
78
|
+
true
|
79
|
+
end
|
80
|
+
|
81
|
+
#
|
82
|
+
# Makes a complete merge (including determinization), or simply do nothing if
|
83
|
+
# it leads accepting a negative string.
|
84
|
+
#
|
85
|
+
# Preconditions:
|
86
|
+
# - States denoted by i and j are expected leader states (non merged ones)
|
87
|
+
# - States denoted by i and j are expected to be different
|
88
|
+
#
|
89
|
+
# Postconditions:
|
90
|
+
# - Union find is refined, states i and j having been merged, as well as all
|
91
|
+
# state pairs that need to be merged to ensure the deterministic property
|
92
|
+
# of the quotient automaton.
|
93
|
+
# - If the resulting quotient automaton is consistent with the negative sample,
|
94
|
+
# this method returns true and the refined union-find correctly encodes the
|
95
|
+
# quotient automaton. Otherwise, the union find has not been changed.
|
96
|
+
#
|
97
|
+
def successfull_merge_or_nothing(i,j)
|
98
|
+
# try a merge and determinize inside a transaction on the ufds
|
99
|
+
@ufds.transactional do
|
100
|
+
merge_and_determinize(i, j)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
#
|
105
|
+
# Main method of the algorithm. Refines the union find passed as first argument
|
106
|
+
# by merging well chosen state pairs. Returns the refined union find.
|
107
|
+
#
|
108
|
+
# Preconditions:
|
109
|
+
# - The union find _ufds_ is correctly initialized (contains :initial, :accepting,
|
110
|
+
# and :error boolean flags as well as a :delta sub hash)
|
111
|
+
#
|
112
|
+
# Postconditions:
|
113
|
+
# - The union find has been refined. It encodes a quotient automaton (of the PTA
|
114
|
+
# it comes from) such that all positive and negative strings of the underlying
|
115
|
+
# sample are correctly classified by it.
|
116
|
+
#
|
117
|
+
def main(ufds)
|
118
|
+
@ufds = ufds
|
119
|
+
info("Starting RPNI (#{@ufds.size} states)")
|
120
|
+
# First loop, iterating all PTA states
|
121
|
+
(1...@ufds.size).each do |i|
|
122
|
+
# we ignore those that have been previously merged
|
123
|
+
next if @ufds.slave?(i)
|
124
|
+
# second loop: states of lower rank, with ignore
|
125
|
+
(0...i).each do |j|
|
126
|
+
next if @ufds.slave?(j)
|
127
|
+
# try to merge this pair, including determinization
|
128
|
+
# simply break the loop if it works!
|
129
|
+
success = successfull_merge_or_nothing(i,j)
|
130
|
+
if success
|
131
|
+
info("#{i} and #{j} successfully merged")
|
132
|
+
break
|
133
|
+
end
|
134
|
+
end # j loop
|
135
|
+
end # i loop
|
136
|
+
@ufds
|
137
|
+
end
|
138
|
+
|
139
|
+
#
|
140
|
+
# Build the smallest DFA compatible with the sample given as input.
|
141
|
+
#
|
142
|
+
# Preconditions:
|
143
|
+
# - The sample is consistent (does not contains the same string both labeled as
|
144
|
+
# positive and negative) and contains at least one string.
|
145
|
+
#
|
146
|
+
# Postconditions:
|
147
|
+
# - The returned DFA is the smallest DFA that correctly labels the learning sample
|
148
|
+
# given as input.
|
149
|
+
#
|
150
|
+
# Remarks:
|
151
|
+
# - This instance version of RPNI.execute is not intended to be used directly and
|
152
|
+
# is mainly provided for testing purposes. Please use the class variant of this
|
153
|
+
# method if possible.
|
154
|
+
#
|
155
|
+
def execute(sample)
|
156
|
+
# create union-find
|
157
|
+
info("Creating PTA and UnionFind structure")
|
158
|
+
ufds = sample2ufds(sample)
|
159
|
+
# refine it
|
160
|
+
ufds = main(ufds)
|
161
|
+
# compute and return quotient automaton
|
162
|
+
ufds2dfa(ufds)
|
163
|
+
end
|
164
|
+
|
165
|
+
#
|
166
|
+
# Build the smallest DFA compatible with the sample given as input.
|
167
|
+
#
|
168
|
+
# Options (the _options_ hash):
|
169
|
+
# - :verbose can be set to true to trace algorithm execution on standard output.
|
170
|
+
#
|
171
|
+
# Preconditions:
|
172
|
+
# - The sample is consistent (does not contains the same string both labeled as
|
173
|
+
# positive and negative) and contains at least one string.
|
174
|
+
#
|
175
|
+
# Postconditions:
|
176
|
+
# - The returned DFA is the smallest DFA that correctly labels the learning sample
|
177
|
+
# given as input.
|
178
|
+
#
|
179
|
+
def self.execute(sample, options={})
|
180
|
+
RPNI.new(options).execute(sample)
|
181
|
+
end
|
182
|
+
|
183
|
+
end # class RPNI
|
184
|
+
|
185
|
+
end # module Induction
|
186
|
+
end # module Stamina
|
@@ -0,0 +1,377 @@
|
|
1
|
+
module Stamina
|
2
|
+
module Induction
|
3
|
+
|
4
|
+
#
|
5
|
+
# Implements an UnionFind data structure dedicated to state merging induction algorithms.
|
6
|
+
# For this purpose, this union-find handles mergeable user data as well as transactional
|
7
|
+
# support. See Stamina::Induction::Commons about the usage of this class (and mergeable
|
8
|
+
# user data in particular) by induction algorithms.
|
9
|
+
#
|
10
|
+
# == Example (probably easier than a long explanation)
|
11
|
+
#
|
12
|
+
# # create a union-find for 10 elements
|
13
|
+
# ufds = Stamina::Induction::UnionFind.new(10) do |index|
|
14
|
+
# # each element will be associated with a hash with data of interest:
|
15
|
+
# # smallest element, greatest element and concatenation of names
|
16
|
+
# {:smallest => index, :greatest => index, :names => index.to_s}
|
17
|
+
# end
|
18
|
+
#
|
19
|
+
# # each element is its own leader
|
20
|
+
# puts (0...10).all?{|s| ufds.leader?(s)} -> true
|
21
|
+
#
|
22
|
+
# # and their respective group number are the element indices themselve
|
23
|
+
# puts ufds.to_a -> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
|
24
|
+
#
|
25
|
+
# # now, let merge 4 with 0
|
26
|
+
# ufds.union(0, 4) do |d0, d4|
|
27
|
+
# {:smallest => d0[:smallest] < d4[:smallest] ? d0[:smallest] : d4[:smallest],
|
28
|
+
# :greatest => d0[:smallest] > d4[:smallest] ? d0[:smallest] : d4[:smallest],
|
29
|
+
# :names => d0[:names] + " " + d4[:names]}
|
30
|
+
# end
|
31
|
+
#
|
32
|
+
# # let see what happens on group numbers
|
33
|
+
# puts ufds.to_a -> [0, 1, 2, 3, 0, 5, 6, 7, 8, 9]
|
34
|
+
#
|
35
|
+
# # let now have a look on mergeable_data of the group of 0 (same result for 4)
|
36
|
+
# puts ufds.mergeable_data(0).inspect -> {:smallest => 0, :greatest => 4, :names => "0 4"}
|
37
|
+
#
|
38
|
+
# == Basic Union Find API
|
39
|
+
#
|
40
|
+
# A UnionFind data structure typically allows encoding a partition of elements (a
|
41
|
+
# partition is a collection of disjoint sets - aka a collection of groups). Basically,
|
42
|
+
# this class represents elements by successive indices (from 0 to size, the later being
|
43
|
+
# excluded). The partitioning information is kept in a array, associating a group number
|
44
|
+
# to each element. This group number is simply the index of the least element in the
|
45
|
+
# group (which means that group numbers are not necessarily consecutive). For example,
|
46
|
+
# the following arrays maps to the associated partitions:
|
47
|
+
#
|
48
|
+
# [0, 1, 2, 3, 4, 5] -> {{0}, {1}, {2}, {3}, {4}}
|
49
|
+
# [0, 0, 0, 0, 0, 0] -> {{0, 1, 2, 3, 4, 5}}
|
50
|
+
# [0, 1, 1, 0, 4, 4] -> {{0, 3}, {1, 2}, {5, 5}}
|
51
|
+
#
|
52
|
+
# The API of this basic union-find data structure is composed of the following
|
53
|
+
# methods:
|
54
|
+
# - new(size) (class method): builds an initial partition information over _size_
|
55
|
+
# elements. This initial partition keeps each element in its own group.
|
56
|
+
# - find(i): returns the group number of the i-th element
|
57
|
+
# - union(i, j): merge the group of the i-th element with the group of the j-th
|
58
|
+
# element. Note that i and j are elements, NOT group numbers.
|
59
|
+
#
|
60
|
+
# As we use least elements as group numbers, it is also interesting to know if a
|
61
|
+
# given element is that least element (aka leader element of the group) or not:
|
62
|
+
#
|
63
|
+
# - leader?(i): returns true if i is the group number of the i-th element, false
|
64
|
+
# otherwise. In other words, returns true if find(i)==i
|
65
|
+
# - slave?(i): the negation of leader?(i).
|
66
|
+
#
|
67
|
+
# == Handling User Data
|
68
|
+
#
|
69
|
+
# Even if this class represents elements by indices, it also allows keeping user
|
70
|
+
# data attached to each group. For this:
|
71
|
+
#
|
72
|
+
# - an initial user data is attached to each element at construction time by
|
73
|
+
# yielding a block (passing the element index as first argument and expecting
|
74
|
+
# user data as block return value).
|
75
|
+
# - the union(i, j) method allows a block to be given. It passes user data of i's
|
76
|
+
# and j's groups as arguments and expects the block to compute and return the
|
77
|
+
# merged user data for the new group.
|
78
|
+
# - mergeable_data(i) returns the current user data associated to the group of
|
79
|
+
# the i-th element.
|
80
|
+
# - mergeable_datas returns an array with user data attached to each group.
|
81
|
+
#
|
82
|
+
# Please note that user data are considered immutable values, and should never be
|
83
|
+
# changed... Only new ones can be created at union time. To ensures this good usage,
|
84
|
+
# user data are freezed by this class at creation time and union time.
|
85
|
+
#
|
86
|
+
# == Transactional support
|
87
|
+
#
|
88
|
+
# The main aim of this UnionFind is to make the implementation induction algorithms
|
89
|
+
# Stamina::Induction::RPNI and Stamina::Induction::BlueFringe (sufficiently) efficient,
|
90
|
+
# simple and readable. These algorithms rely on a try-and-error strategy are must be
|
91
|
+
# able to revert the changes they have made during their last try. The transaction
|
92
|
+
# support implemented by this data structure helps them achieving this goal. For this
|
93
|
+
# we provide the following methods:
|
94
|
+
#
|
95
|
+
# - save_point: ensures that the internal state of the UnionFind can be restored if
|
96
|
+
# rollback is invoked later.
|
97
|
+
# - commit: informs the UnionFind that changes that have been made since the last
|
98
|
+
# invocation of save_point will not be reconsidered.
|
99
|
+
# - rollback: restores the internal state of the UnionFind that has been saved when
|
100
|
+
# save_point has been called.
|
101
|
+
#
|
102
|
+
# Please note that this class does not support sub-transactions.
|
103
|
+
#
|
104
|
+
class UnionFind
|
105
|
+
|
106
|
+
#
|
107
|
+
# An element of the union find, keeping the index of its leader element as well as
|
108
|
+
# mergeable user data. This class is not intended to be used by external users of the
|
109
|
+
# UnionFind data structure.
|
110
|
+
#
|
111
|
+
class Node
|
112
|
+
|
113
|
+
# Index of the parent element (on the way to the leader)
|
114
|
+
attr_accessor :parent
|
115
|
+
|
116
|
+
# Attached user data
|
117
|
+
attr_accessor :data
|
118
|
+
|
119
|
+
#
|
120
|
+
# Creates a default Node instance with a specific parent index and attached
|
121
|
+
# user data.
|
122
|
+
#
|
123
|
+
def initialize(parent, data)
|
124
|
+
@parent = parent
|
125
|
+
@data = data
|
126
|
+
end
|
127
|
+
|
128
|
+
#
|
129
|
+
# Duplicates this node, ensuring that future changes will not affect the copy.
|
130
|
+
# Please note that the user data itself is not duplicated and is not expected
|
131
|
+
# to change. This property (not changing user data) is respected by the RPNI
|
132
|
+
# and BlueFringe classes as implemented in this library.
|
133
|
+
#
|
134
|
+
def dup
|
135
|
+
Node.new(@parent, @data)
|
136
|
+
end
|
137
|
+
|
138
|
+
end # class Node
|
139
|
+
|
140
|
+
#
|
141
|
+
# Number of elements in this union find
|
142
|
+
#
|
143
|
+
attr_reader :size
|
144
|
+
|
145
|
+
#
|
146
|
+
# (protected) Accessor on elements array, provided for duplication
|
147
|
+
#
|
148
|
+
attr_writer :elements
|
149
|
+
|
150
|
+
#
|
151
|
+
# Creates a default union find of a given size. Each element is initially in its own
|
152
|
+
# group. User data attached to each group is obtained by yielding a block, passing
|
153
|
+
# element index as first argument.
|
154
|
+
#
|
155
|
+
# Precondition:
|
156
|
+
# - size is expected to be strictly positive
|
157
|
+
#
|
158
|
+
def initialize(size)
|
159
|
+
@size = size
|
160
|
+
@elements = (0...size).collect do |i|
|
161
|
+
Node.new(i, block_given? ? yield(i).freeze : nil)
|
162
|
+
end
|
163
|
+
@changed = nil
|
164
|
+
end
|
165
|
+
|
166
|
+
# Union Find API ###########################################################
|
167
|
+
|
168
|
+
#
|
169
|
+
# Finds the group number of the i-th element (the group number is the least
|
170
|
+
# element of the group, aka _leader_).
|
171
|
+
#
|
172
|
+
# Preconditions:
|
173
|
+
# - i is a valid element: 0 <= i < size
|
174
|
+
#
|
175
|
+
# Postconditions:
|
176
|
+
# - returned value _found_ is such that <code>find(found)==found</code>
|
177
|
+
# - the union find data structure is not modified (no compression implemented).
|
178
|
+
#
|
179
|
+
def find(i)
|
180
|
+
while @elements[i].parent != i
|
181
|
+
i = @elements[i].parent
|
182
|
+
end
|
183
|
+
i
|
184
|
+
end
|
185
|
+
|
186
|
+
#
|
187
|
+
# Merges groups of the i-th element and j-th element, yielding a block to compute
|
188
|
+
# the merging of user data attached to their respective groups before merging.
|
189
|
+
#
|
190
|
+
# Preconditions:
|
191
|
+
# - This method allows i and j not to be leaders, but any element.
|
192
|
+
# - i and j are expected to be valid elements (0 <= i <= size, same for j)
|
193
|
+
#
|
194
|
+
# Postconditions:
|
195
|
+
# - groups of i and j have been merged. All elements of the two subgroups have
|
196
|
+
# the group number defined as <code>min(find(i),find(j))</code> (before
|
197
|
+
# merging)
|
198
|
+
# - if a block is provided, the user data attached to the new group is computed by
|
199
|
+
# yielding the block, passing mergable_data(i) and mergable_data(j) as arguments.
|
200
|
+
# The block is ecpected to return the merged data that will be kept for the new
|
201
|
+
# group.
|
202
|
+
# - If a transaction is pending, all required information is saved to restore
|
203
|
+
# the union-find structure if the transaction is rollbacked later.
|
204
|
+
#
|
205
|
+
def union(i, j)
|
206
|
+
i, j = find(i), find(j)
|
207
|
+
reversed = false
|
208
|
+
i, j, reversed = j, i, true if j<i
|
209
|
+
|
210
|
+
# Save i and j if in transaction and not already saved
|
211
|
+
if @changed
|
212
|
+
@changed[i] = @elements[i].dup unless @changed.has_key?(i)
|
213
|
+
@changed[j] = @elements[j].dup unless @changed.has_key?(j)
|
214
|
+
end
|
215
|
+
|
216
|
+
# Make the changes now
|
217
|
+
@elements[j].parent = i
|
218
|
+
if block_given?
|
219
|
+
d1, d2 = @elements[i].data, @elements[j].data
|
220
|
+
d1, d2 = d2, d1 if reversed
|
221
|
+
@elements[i].data = yield(d1, d2).freeze
|
222
|
+
else
|
223
|
+
nil
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
#
|
228
|
+
# Checks if an element is the leader of its group.
|
229
|
+
#
|
230
|
+
# Preconditions:
|
231
|
+
# - i is a valid element: 0 <= i < size
|
232
|
+
#
|
233
|
+
# Postconditions:
|
234
|
+
# - true if find(i)==i, false otherwise.
|
235
|
+
#
|
236
|
+
def leader?(i)
|
237
|
+
@elements[i].parent==i
|
238
|
+
end
|
239
|
+
|
240
|
+
#
|
241
|
+
# Checks if an element is a slave in its group (negation of leader?).
|
242
|
+
#
|
243
|
+
# Preconditions:
|
244
|
+
# - i is a valid element: 0 <= i < size
|
245
|
+
#
|
246
|
+
# Postconditions:
|
247
|
+
# - false if find(i)==i, true otherwise.
|
248
|
+
#
|
249
|
+
def slave?(i)
|
250
|
+
@elements[i].parent != i
|
251
|
+
end
|
252
|
+
|
253
|
+
# UserData API #############################################################
|
254
|
+
|
255
|
+
#
|
256
|
+
# Returns the mergeable data of each group in an array. No order of the
|
257
|
+
# groups is ensured by this method.
|
258
|
+
#
|
259
|
+
def mergeable_datas
|
260
|
+
indices = (0...size).select {|i| leader?(i)}
|
261
|
+
indices.collect{|i| @elements[i].data}
|
262
|
+
end
|
263
|
+
|
264
|
+
#
|
265
|
+
# Returns the mergeable data attached to the group of the i-th element.
|
266
|
+
#
|
267
|
+
# Preconditions:
|
268
|
+
# - This method allows i not to be leader, but any element.
|
269
|
+
# - i is a valid element: 0 <= i < size
|
270
|
+
#
|
271
|
+
def mergeable_data(i)
|
272
|
+
@elements[find(i)].data
|
273
|
+
end
|
274
|
+
|
275
|
+
# Transactional API ########################################################
|
276
|
+
|
277
|
+
#
|
278
|
+
# Makes a save point now. Internally ensures that future changes will be
|
279
|
+
# tracked and that a later rollback will restore the union find to the
|
280
|
+
# internal state it had before this call. This method should not be called
|
281
|
+
# if a transaction is already pending.
|
282
|
+
#
|
283
|
+
def save_point
|
284
|
+
@changed = {}
|
285
|
+
end
|
286
|
+
|
287
|
+
#
|
288
|
+
# Terminates the pending transaction by commiting all changes that have been
|
289
|
+
# done since the last save_point call. This method should not be called if no
|
290
|
+
# transaction is pending.
|
291
|
+
#
|
292
|
+
def commit
|
293
|
+
@changed = nil
|
294
|
+
end
|
295
|
+
|
296
|
+
#
|
297
|
+
# Rollbacks all changes that have been done since the last save_point call.
|
298
|
+
# This method will certainly fail if no transaction is pending.
|
299
|
+
#
|
300
|
+
def rollback
|
301
|
+
@changed.each_pair do |index, node|
|
302
|
+
@elements[index] = node
|
303
|
+
end
|
304
|
+
@changed = nil
|
305
|
+
end
|
306
|
+
|
307
|
+
#
|
308
|
+
# Makes a save point, yields the block. If it returns false or nil, rollbacks
|
309
|
+
# the transaction otherwise commits it. This method is a nice shortcut for
|
310
|
+
# the following piece of code
|
311
|
+
#
|
312
|
+
# ufds.save_point
|
313
|
+
# if try_something
|
314
|
+
# ufds.commit
|
315
|
+
# else
|
316
|
+
# ufds.rollback
|
317
|
+
# end
|
318
|
+
#
|
319
|
+
# which can also be expressed as:
|
320
|
+
#
|
321
|
+
# ufds.transactional do
|
322
|
+
# try_something
|
323
|
+
# end
|
324
|
+
#
|
325
|
+
# This method returns the value returned by the block
|
326
|
+
#
|
327
|
+
def transactional
|
328
|
+
save_point
|
329
|
+
returned = yield
|
330
|
+
if returned.nil? or returned == false
|
331
|
+
rollback
|
332
|
+
else
|
333
|
+
commit
|
334
|
+
end
|
335
|
+
returned
|
336
|
+
end
|
337
|
+
|
338
|
+
# Common utilities #########################################################
|
339
|
+
|
340
|
+
#
|
341
|
+
# Duplicates this data-structure, ensuring that no change on self or on the
|
342
|
+
# copy is shared. Please note that user datas themselve are not duplicated as
|
343
|
+
# they are considered immutable values (and freezed at construction and union).
|
344
|
+
#
|
345
|
+
def dup
|
346
|
+
copy = UnionFind.new(size)
|
347
|
+
copy.elements = @elements.collect{|e| e.dup}
|
348
|
+
copy
|
349
|
+
end
|
350
|
+
|
351
|
+
#
|
352
|
+
# Returns the partitioning information as as array with the group number of
|
353
|
+
# each element.
|
354
|
+
#
|
355
|
+
def to_a
|
356
|
+
(0...size).collect{|i| find(i)}
|
357
|
+
end
|
358
|
+
|
359
|
+
#
|
360
|
+
# Returns a string representation of this union find information.
|
361
|
+
#
|
362
|
+
def to_s
|
363
|
+
@elements.to_s
|
364
|
+
end
|
365
|
+
|
366
|
+
#
|
367
|
+
# Returns a string representation of this union find information.
|
368
|
+
#
|
369
|
+
def inspect
|
370
|
+
@elements.to_s
|
371
|
+
end
|
372
|
+
|
373
|
+
protected :elements=
|
374
|
+
end # class UnionFind
|
375
|
+
|
376
|
+
end # module Induction
|
377
|
+
end # module Stamina
|