riskfolio-lib 7.2.0__cp313-cp313-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- riskfolio/__init__.py +14 -0
- riskfolio/external/__init__.py +10 -0
- riskfolio/external/cppfunctions.py +376 -0
- riskfolio/external/functions.cpython-313-darwin.so +0 -0
- riskfolio/src/AuxFunctions.py +1488 -0
- riskfolio/src/ConstraintsFunctions.py +2210 -0
- riskfolio/src/DBHT.py +1089 -0
- riskfolio/src/GerberStatistic.py +240 -0
- riskfolio/src/HCPortfolio.py +1102 -0
- riskfolio/src/OwaWeights.py +433 -0
- riskfolio/src/ParamsEstimation.py +1989 -0
- riskfolio/src/PlotFunctions.py +5052 -0
- riskfolio/src/Portfolio.py +6164 -0
- riskfolio/src/Reports.py +692 -0
- riskfolio/src/RiskFunctions.py +3195 -0
- riskfolio/src/__init__.py +20 -0
- riskfolio/version.py +4 -0
- riskfolio_lib-7.2.0.dist-info/LICENSE.txt +27 -0
- riskfolio_lib-7.2.0.dist-info/METADATA +386 -0
- riskfolio_lib-7.2.0.dist-info/RECORD +22 -0
- riskfolio_lib-7.2.0.dist-info/WHEEL +6 -0
- riskfolio_lib-7.2.0.dist-info/top_level.txt +1 -0
riskfolio/src/DBHT.py
ADDED
|
@@ -0,0 +1,1089 @@
|
|
|
1
|
+
"""""" #
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
Copyright (c) 2020-2026, Dany Cajas
|
|
5
|
+
All rights reserved.
|
|
6
|
+
This work is licensed under BSD 3-Clause "New" or "Revised" License.
|
|
7
|
+
License available at https://github.com/dcajasn/Riskfolio-Lib/blob/master/LICENSE.txt
|
|
8
|
+
|
|
9
|
+
This work is based on the code of Tomaso Aste available at
|
|
10
|
+
https://www.mathworks.com/matlabcentral/fileexchange/46750-dbht
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
import scipy.sparse as sp
|
|
15
|
+
from scipy.spatial.distance import squareform
|
|
16
|
+
from scipy.cluster.hierarchy import from_mlab_linkage, optimal_leaf_ordering
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"DBHTs",
|
|
21
|
+
"j_LoGo",
|
|
22
|
+
"PMFG_T2s",
|
|
23
|
+
"distance_wei",
|
|
24
|
+
"CliqHierarchyTree2s",
|
|
25
|
+
"BuildHierarchy",
|
|
26
|
+
"FindDisjoint",
|
|
27
|
+
"AdjCliq",
|
|
28
|
+
"BubbleHierarchy",
|
|
29
|
+
"clique3",
|
|
30
|
+
"breadth",
|
|
31
|
+
"BubbleCluster8s",
|
|
32
|
+
"DirectHb",
|
|
33
|
+
"HierarchyConstruct4s",
|
|
34
|
+
"LinkageFunction",
|
|
35
|
+
"BubbleMember",
|
|
36
|
+
"DendroConstruct",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def DBHTs(D, S, leaf_order=True):
|
|
41
|
+
r"""
|
|
42
|
+
Perform Direct Bubble Hierarchical Tree (DBHT) clustering, a deterministic
|
|
43
|
+
technique which only requires a similarity matrix S, and related
|
|
44
|
+
dissimilarity matrix D. For more information see "Hierarchical information
|
|
45
|
+
clustering by means of topologically embedded graphs." :cite:`d-Song`.
|
|
46
|
+
This version makes extensive use of graph-theoretic filtering technique
|
|
47
|
+
called Triangulated Maximally Filtered Graph (TMFG).
|
|
48
|
+
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
D : nd-array
|
|
52
|
+
N x N dissimilarity matrix - e.g. a distance: D=pdist(data,'euclidean')
|
|
53
|
+
and then D=squareform(D).
|
|
54
|
+
|
|
55
|
+
S : nd-array
|
|
56
|
+
N x N similarity matrix (non-negative)- e.g. correlation
|
|
57
|
+
coefficient+1: S = 2-D**2/2 or another possible choice can be S =
|
|
58
|
+
exp(-D).
|
|
59
|
+
|
|
60
|
+
Returns
|
|
61
|
+
-------
|
|
62
|
+
T8 : DataFrame
|
|
63
|
+
N x 1 cluster membership vector.
|
|
64
|
+
Rpm : nd-array
|
|
65
|
+
N x N adjacency matrix of Plannar Maximally Filtered
|
|
66
|
+
Graph (PMFG).
|
|
67
|
+
Adjv : nd-array
|
|
68
|
+
Bubble cluster membership matrix from BubbleCluster8.
|
|
69
|
+
Dpm : nd-array
|
|
70
|
+
N x N shortest path length matrix of PMFG
|
|
71
|
+
Mv : nd-array
|
|
72
|
+
N x Nb bubble membership matrix. Nb(n,bi)=1 indicates vertex n
|
|
73
|
+
is a vertex of bubble bi.
|
|
74
|
+
Z : nd-array
|
|
75
|
+
Linkage matrix using DBHT hierarchy.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
(Rpm, _, _, _, _) = PMFG_T2s(S)
|
|
79
|
+
Apm = Rpm.copy()
|
|
80
|
+
Apm[Apm != 0] = D[Apm != 0].copy()
|
|
81
|
+
(Dpm, _) = distance_wei(Apm)
|
|
82
|
+
(H1, Hb, Mb, CliqList, Sb) = CliqHierarchyTree2s(Rpm, method1="uniqueroot")
|
|
83
|
+
del H1, Sb
|
|
84
|
+
Mb = Mb[0 : CliqList.shape[0], :]
|
|
85
|
+
Mv = np.empty((Rpm.shape[0], 0))
|
|
86
|
+
for i in range(0, Mb.shape[1]):
|
|
87
|
+
vec = np.zeros(Rpm.shape[0])
|
|
88
|
+
vec[np.int32(np.unique(CliqList[Mb[:, i] != 0, :]))] = 1
|
|
89
|
+
Mv = np.hstack((Mv, vec.reshape(-1, 1)))
|
|
90
|
+
|
|
91
|
+
(Adjv, T8) = BubbleCluster8s(Rpm, Dpm, Hb, Mb, Mv, CliqList)
|
|
92
|
+
Z = HierarchyConstruct4s(Rpm, Dpm, T8, Adjv, Mv)
|
|
93
|
+
|
|
94
|
+
if leaf_order == True:
|
|
95
|
+
Z = optimal_leaf_ordering(Z, squareform(D))
|
|
96
|
+
|
|
97
|
+
return (T8, Rpm, Adjv, Dpm, Mv, Z)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def j_LoGo(S, separators, cliques):
|
|
101
|
+
r"""
|
|
102
|
+
computes sparse inverse covariance, J, from a clique tree made of cliques
|
|
103
|
+
and separators. For more information see: :cite:`d-jLogo`.
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
Parameters
|
|
107
|
+
----------
|
|
108
|
+
S : ndarray
|
|
109
|
+
It is the complete covariance matrix.
|
|
110
|
+
separators : nd-array
|
|
111
|
+
It is the list of separators.
|
|
112
|
+
cliques : nd-array
|
|
113
|
+
It is the list of cliques.
|
|
114
|
+
|
|
115
|
+
Returns
|
|
116
|
+
-------
|
|
117
|
+
JLogo : nd-array
|
|
118
|
+
Inverse covariance.
|
|
119
|
+
|
|
120
|
+
Notes
|
|
121
|
+
-----
|
|
122
|
+
separators and cliques can be the outputs of TMFG function
|
|
123
|
+
|
|
124
|
+
"""
|
|
125
|
+
N = S.shape[0]
|
|
126
|
+
if isinstance(separators, dict) == False:
|
|
127
|
+
separators_temp = {}
|
|
128
|
+
for i in range(len(separators)):
|
|
129
|
+
separators_temp[i] = separators[i, :]
|
|
130
|
+
|
|
131
|
+
if isinstance(cliques, dict) == False:
|
|
132
|
+
cliques_temp = {}
|
|
133
|
+
for i in range(len(cliques)):
|
|
134
|
+
cliques_temp[i] = cliques[i, :]
|
|
135
|
+
|
|
136
|
+
Jlogo = np.zeros((N, N))
|
|
137
|
+
for i in cliques_temp.keys():
|
|
138
|
+
v = np.int32(cliques_temp[i])
|
|
139
|
+
Jlogo[np.ix_(v, v)] = Jlogo[np.ix_(v, v)] + np.linalg.inv(S[np.ix_(v, v)])
|
|
140
|
+
|
|
141
|
+
for i in separators_temp.keys():
|
|
142
|
+
v = np.int32(separators_temp[i])
|
|
143
|
+
Jlogo[np.ix_(v, v)] = Jlogo[np.ix_(v, v)] - np.linalg.inv(S[np.ix_(v, v)])
|
|
144
|
+
|
|
145
|
+
return Jlogo
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def PMFG_T2s(W, nargout=3):
|
|
149
|
+
r"""
|
|
150
|
+
Computes a Triangulated Maximally Filtered Graph (TMFG) :cite:`d-Massara`
|
|
151
|
+
starting from a tetrahedron and inserting recursively vertices inside
|
|
152
|
+
existing triangles (T2 move) in order to approximate a maximal planar
|
|
153
|
+
graph with the largest total weight - non negative weights.
|
|
154
|
+
|
|
155
|
+
Parameters
|
|
156
|
+
----------
|
|
157
|
+
W : nd-array
|
|
158
|
+
An N x N matrix of non-negative weights.
|
|
159
|
+
nargout : int, optional
|
|
160
|
+
Number of results, Possible values are 3, 4 and 5.
|
|
161
|
+
|
|
162
|
+
Returns
|
|
163
|
+
-------
|
|
164
|
+
A : nd-array
|
|
165
|
+
Adjacency matrix of the PMFG (with weights)
|
|
166
|
+
tri : nd-array
|
|
167
|
+
Matrix of triangles (triangular faces) of size 2N - 4 x 3
|
|
168
|
+
separators : nd-array
|
|
169
|
+
Matrix of 3-cliques that are not triangular faces (all 3-cliques are
|
|
170
|
+
given by: [tri;separators]).
|
|
171
|
+
clique4 : nd-array, optional
|
|
172
|
+
List of all 4-cliques.
|
|
173
|
+
cliqueTree : nd-array, optional
|
|
174
|
+
4-cliques tree structure (adjacency matrix).
|
|
175
|
+
|
|
176
|
+
"""
|
|
177
|
+
N = W.shape[0]
|
|
178
|
+
if N < 9:
|
|
179
|
+
print("W Matrix too small \n")
|
|
180
|
+
if np.any(W < 0):
|
|
181
|
+
print("W Matrix has negative elements! \n")
|
|
182
|
+
|
|
183
|
+
A = np.zeros((N, N)) # ininzialize adjacency matrix
|
|
184
|
+
in_v = -1 * np.ones(N, dtype=np.int32) # ininzialize list of inserted vertices
|
|
185
|
+
tri = np.zeros((2 * N - 4, 3)) # ininzialize list of triangles
|
|
186
|
+
separators = np.zeros(
|
|
187
|
+
(N - 4, 3)
|
|
188
|
+
) # ininzialize list of 3-cliques (non face-triangles)
|
|
189
|
+
# find 3 vertices with largest strength
|
|
190
|
+
s = np.sum(W * (W > np.mean(W)), axis=1)
|
|
191
|
+
j = np.int32(np.argsort(s)[::-1].reshape(-1))
|
|
192
|
+
|
|
193
|
+
in_v[0:4] = j[0:4]
|
|
194
|
+
ou_v = np.setdiff1d(np.arange(0, N), in_v) # list of vertices not inserted yet
|
|
195
|
+
# build the tetrahedron with largest strength
|
|
196
|
+
tri[0, :] = in_v[[0, 1, 2]]
|
|
197
|
+
tri[1, :] = in_v[[1, 2, 3]]
|
|
198
|
+
tri[2, :] = in_v[[0, 1, 3]]
|
|
199
|
+
tri[3, :] = in_v[[0, 2, 3]]
|
|
200
|
+
A[in_v[0], in_v[1]] = 1
|
|
201
|
+
A[in_v[0], in_v[2]] = 1
|
|
202
|
+
A[in_v[0], in_v[3]] = 1
|
|
203
|
+
A[in_v[1], in_v[2]] = 1
|
|
204
|
+
A[in_v[1], in_v[3]] = 1
|
|
205
|
+
A[in_v[2], in_v[3]] = 1
|
|
206
|
+
# build initial gain table
|
|
207
|
+
gain = np.zeros((N, 2 * N - 4))
|
|
208
|
+
gain[ou_v, 0] = np.sum(W[np.ix_(ou_v, np.int32(tri[0, :]))], axis=1)
|
|
209
|
+
gain[ou_v, 1] = np.sum(W[np.ix_(ou_v, np.int32(tri[1, :]))], axis=1)
|
|
210
|
+
gain[ou_v, 2] = np.sum(W[np.ix_(ou_v, np.int32(tri[2, :]))], axis=1)
|
|
211
|
+
gain[ou_v, 3] = np.sum(W[np.ix_(ou_v, np.int32(tri[3, :]))], axis=1)
|
|
212
|
+
|
|
213
|
+
kk = 3 # number of triangles
|
|
214
|
+
for k in range(4, N):
|
|
215
|
+
# find best vertex to add in a triangle
|
|
216
|
+
if len(ou_v) == 1: # special case for the last vertex
|
|
217
|
+
ve = ou_v[0]
|
|
218
|
+
v = 0
|
|
219
|
+
tr = np.argmax(gain[ou_v, :])
|
|
220
|
+
else:
|
|
221
|
+
gij = np.max(gain[ou_v, :], axis=0)
|
|
222
|
+
v = np.argmax(gain[ou_v, :], axis=0)
|
|
223
|
+
tr = np.argmax(np.round(gij, 6).flatten())
|
|
224
|
+
ve = ou_v[v[tr]]
|
|
225
|
+
v = v[tr]
|
|
226
|
+
|
|
227
|
+
# update vertex lists
|
|
228
|
+
ou_v = ou_v[np.delete(np.arange(len(ou_v)), v)]
|
|
229
|
+
in_v[k] = ve
|
|
230
|
+
# update adjacency matrix
|
|
231
|
+
A[np.ix_([ve], np.int32(tri[tr, :]))] = 1
|
|
232
|
+
# update 3-clique list
|
|
233
|
+
separators[k - 4, :] = tri[tr, :]
|
|
234
|
+
# update triangle list replacing 1 and adding 2 triangles
|
|
235
|
+
tri[kk + 1, :] = np.hstack((tri[tr, [0, 2]], ve)) # add
|
|
236
|
+
tri[kk + 2, :] = np.hstack((tri[tr, [1, 2]], ve)) # add
|
|
237
|
+
tri[tr, :] = np.hstack((tri[tr, [0, 1]], ve)) # replace
|
|
238
|
+
# update gain table
|
|
239
|
+
gain[ve, :] = 0
|
|
240
|
+
gain[ou_v, tr] = np.sum(W[np.ix_(ou_v, np.int32(tri[tr, :]))], axis=1)
|
|
241
|
+
gain[ou_v, kk + 1] = np.sum(W[np.ix_(ou_v, np.int32(tri[kk + 1, :]))], axis=1)
|
|
242
|
+
gain[ou_v, kk + 2] = np.sum(W[np.ix_(ou_v, np.int32(tri[kk + 2, :]))], axis=1)
|
|
243
|
+
# update number of triangles
|
|
244
|
+
kk = kk + 2
|
|
245
|
+
if np.mod(k, 1000) == 0:
|
|
246
|
+
print("PMFG T2: %0.2f per-cent done\n", k / N * 100)
|
|
247
|
+
|
|
248
|
+
A = W * ((A + A.T) == 1)
|
|
249
|
+
|
|
250
|
+
if nargout > 3:
|
|
251
|
+
cliques = np.vstack(
|
|
252
|
+
(in_v[0:4].reshape(1, -1), np.hstack((separators, in_v[4:].reshape(-1, 1))))
|
|
253
|
+
)
|
|
254
|
+
else:
|
|
255
|
+
cliques = None
|
|
256
|
+
|
|
257
|
+
# computes 4-clique tree (note this may include incomplete cliques!)
|
|
258
|
+
if nargout > 4:
|
|
259
|
+
cliqueTree = np.zeros((cliques.shape[0], cliques.shape[0]))
|
|
260
|
+
for i in range(0, cliques.shape[0]):
|
|
261
|
+
ss = np.zeros(cliques.shape[0], 1)
|
|
262
|
+
for k in range(0, 3):
|
|
263
|
+
ss = ss + np.sum((cliques[i, k] == cliques), axis=1)
|
|
264
|
+
|
|
265
|
+
cliqueTree[i, ss == 2] = 1
|
|
266
|
+
else:
|
|
267
|
+
cliqueTree = None
|
|
268
|
+
|
|
269
|
+
return (A, tri, separators, cliques, cliqueTree)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def distance_wei(L):
|
|
273
|
+
r"""
|
|
274
|
+
The distance matrix contains lengths of shortest paths between all
|
|
275
|
+
pairs of nodes. An entry (u,v) represents the length of shortest path
|
|
276
|
+
from node u to node v. The average shortest path length is the
|
|
277
|
+
characteristic path length of the network.
|
|
278
|
+
|
|
279
|
+
Parameters
|
|
280
|
+
----------
|
|
281
|
+
L : nd-array
|
|
282
|
+
Directed/undirected connection-length matrix.
|
|
283
|
+
|
|
284
|
+
Returns
|
|
285
|
+
-------
|
|
286
|
+
D : nd-array
|
|
287
|
+
Distance (shortest weighted path) matrix
|
|
288
|
+
B : nd-array
|
|
289
|
+
Number of edges in shortest weighted path matrix
|
|
290
|
+
|
|
291
|
+
Notes
|
|
292
|
+
-----
|
|
293
|
+
The input matrix must be a connection-length matrix, typically
|
|
294
|
+
obtained via a mapping from weight to length. For instance, in a
|
|
295
|
+
weighted correlation network higher correlations are more naturally
|
|
296
|
+
interpreted as shorter distances and the input matrix should
|
|
297
|
+
consequently be some inverse of the connectivity matrix.
|
|
298
|
+
The number of edges in shortest weighted paths may in general
|
|
299
|
+
exceed the number of edges in shortest binary paths (i.e. shortest
|
|
300
|
+
paths computed on the binarized connectivity matrix), because shortest
|
|
301
|
+
weighted paths have the minimal weighted distance, but not necessarily
|
|
302
|
+
the minimal number of edges.
|
|
303
|
+
|
|
304
|
+
Lengths between disconnected nodes are set to Inf.
|
|
305
|
+
Lengths on the main diagonal are set to 0.
|
|
306
|
+
|
|
307
|
+
Algorithm\: Dijkstra's algorithm.
|
|
308
|
+
|
|
309
|
+
Mika Rubinov, UNSW/U Cambridge, 2007-2012.
|
|
310
|
+
Rick Betzel and Andrea Avena, IU, 2012
|
|
311
|
+
Modification history \:
|
|
312
|
+
2007: original (MR)
|
|
313
|
+
2009-08-04: min() function vectorized (MR)
|
|
314
|
+
2012: added number of edges in shortest path as additional output (RB/AA)
|
|
315
|
+
2013: variable names changed for consistency with other functions (MR)
|
|
316
|
+
|
|
317
|
+
"""
|
|
318
|
+
|
|
319
|
+
n = len(L)
|
|
320
|
+
D = np.ones((n, n)) * np.inf
|
|
321
|
+
np.fill_diagonal(D, 0) # distance matrix
|
|
322
|
+
B = np.zeros((n, n)) # number of edges matrix
|
|
323
|
+
for u in range(0, n):
|
|
324
|
+
S = np.full(n, True, dtype=bool) # distance permanence (true is temporary)
|
|
325
|
+
L1 = L.copy()
|
|
326
|
+
V = np.array([u])
|
|
327
|
+
while 1:
|
|
328
|
+
S[V] = False # distance u->V is now permanent
|
|
329
|
+
L1[:, V] = 0 # no in-edges as already shortest
|
|
330
|
+
for v in V.tolist():
|
|
331
|
+
# T = np.ravel(np.argwhere(L1[v, :])) # neighbours of shortest nodes
|
|
332
|
+
(_, T, _) = sp.find(L1[v, :]) # neighbours of shortest nodes
|
|
333
|
+
d = np.min(
|
|
334
|
+
np.vstack(
|
|
335
|
+
(D[np.ix_([u], T)], D[np.ix_([u], [v])] + L1[np.ix_([v], T)])
|
|
336
|
+
),
|
|
337
|
+
axis=0,
|
|
338
|
+
)
|
|
339
|
+
wi = np.argmin(
|
|
340
|
+
np.vstack(
|
|
341
|
+
(D[np.ix_([u], T)], D[np.ix_([u], [v])] + L1[np.ix_([v], T)])
|
|
342
|
+
),
|
|
343
|
+
axis=0,
|
|
344
|
+
)
|
|
345
|
+
D[np.ix_([u], T)] = d # smallest of old/new path lengths
|
|
346
|
+
ind = T[wi == 2] # indices of lengthened paths
|
|
347
|
+
B[u, ind] = B[u, v] + 1 # increment no. of edges in lengthened paths
|
|
348
|
+
|
|
349
|
+
if D[u, S].size == 0:
|
|
350
|
+
minD = np.empty((0, 0))
|
|
351
|
+
else:
|
|
352
|
+
minD = np.min(D[u, S])
|
|
353
|
+
minD = np.array([minD])
|
|
354
|
+
|
|
355
|
+
if minD.shape[0] == 0 or np.isinf(minD):
|
|
356
|
+
# isempty: all nodes reached; isinf: some nodes cannot be reached
|
|
357
|
+
break
|
|
358
|
+
V = np.ravel(np.argwhere(D[u, :] == minD))
|
|
359
|
+
|
|
360
|
+
return (D, B)
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def CliqHierarchyTree2s(Apm, method1):
|
|
364
|
+
r"""
|
|
365
|
+
ClqHierarchyTree2 looks for 3-cliques of a maximal planar graph, then
|
|
366
|
+
construct hierarchy of the cliques with the definition of 'inside' a
|
|
367
|
+
clique to be a subgraph with smaller size, when the entire graph is
|
|
368
|
+
made disjoint by removing the clique :cite:`d-Song2`.
|
|
369
|
+
|
|
370
|
+
Parameters
|
|
371
|
+
----------
|
|
372
|
+
Apm : N
|
|
373
|
+
N x N Adjacency matrix of a maximal planar graph.
|
|
374
|
+
|
|
375
|
+
method1 : str
|
|
376
|
+
Choose between 'uniqueroot' and 'equalroot'. Assigns connections
|
|
377
|
+
between final root cliques. Uses Voronoi tesselation between tiling
|
|
378
|
+
triangles.
|
|
379
|
+
|
|
380
|
+
Returns
|
|
381
|
+
-------
|
|
382
|
+
H1 : nd-array
|
|
383
|
+
Nc x Nc adjacency matrix for 3-clique hierarchical tree where Nc is
|
|
384
|
+
the number of 3-cliques.
|
|
385
|
+
H2 : nd-array
|
|
386
|
+
Nb x Nb adjacency matrix for bubble hierarchical tree where Nb is the
|
|
387
|
+
number of bubbles.
|
|
388
|
+
Mb : nd-array
|
|
389
|
+
Nc x Nb matrix bubble membership matrix. Mb(n,bi)=1 indicates that
|
|
390
|
+
3-clique n belongs to bi bubble.
|
|
391
|
+
CliqList : nd-array
|
|
392
|
+
Nc x 3 matrix. Each row vector lists three vertices consisting a
|
|
393
|
+
3-clique in the maximal planar graph.
|
|
394
|
+
Sb : nd-array
|
|
395
|
+
Nc x 1 vector. Sb(n)=1 indicates nth 3-clique is separating.
|
|
396
|
+
"""
|
|
397
|
+
N = Apm.shape[0]
|
|
398
|
+
# IndxTotal=1:N;
|
|
399
|
+
if sp.issparse(Apm) != 1:
|
|
400
|
+
A = 1.0 * sp.csr_matrix(Apm != 0).toarray()
|
|
401
|
+
else:
|
|
402
|
+
A = 1.0 * (Apm != 0)
|
|
403
|
+
|
|
404
|
+
(K3, E, clique) = clique3(A)
|
|
405
|
+
del K3, E # , N3
|
|
406
|
+
|
|
407
|
+
Nc = clique.shape[0]
|
|
408
|
+
M = np.zeros((N, Nc))
|
|
409
|
+
CliqList = clique.copy()
|
|
410
|
+
Sb = np.zeros(Nc)
|
|
411
|
+
del clique
|
|
412
|
+
for n in range(0, Nc):
|
|
413
|
+
cliq_vec = CliqList[n, :]
|
|
414
|
+
(T, IndxNot) = FindDisjoint(A, cliq_vec)
|
|
415
|
+
indx0 = np.argwhere(np.ravel(T) == 0)
|
|
416
|
+
indx1 = np.argwhere(np.ravel(T) == 1)
|
|
417
|
+
indx2 = np.argwhere(np.ravel(T) == 2)
|
|
418
|
+
if len(indx1) > len(indx2):
|
|
419
|
+
indx_s = np.vstack((indx2, indx0))
|
|
420
|
+
del indx1, indx2
|
|
421
|
+
else:
|
|
422
|
+
indx_s = np.vstack((indx1, indx0))
|
|
423
|
+
del indx1, indx2
|
|
424
|
+
|
|
425
|
+
if (indx_s.shape[0] == 0) == 1:
|
|
426
|
+
Sb[n] = 0
|
|
427
|
+
else:
|
|
428
|
+
Sb[n] = len(indx_s) - 3 # -3
|
|
429
|
+
|
|
430
|
+
M[indx_s, n] = 1
|
|
431
|
+
# del Indicator, InsideCliq, count, T, Temp, cliq_vec, IndxNot, InsideCliq
|
|
432
|
+
del T, cliq_vec, IndxNot
|
|
433
|
+
|
|
434
|
+
Pred = BuildHierarchy(M)
|
|
435
|
+
Root = np.argwhere(Pred == -1)
|
|
436
|
+
# for n=1:length(Root);
|
|
437
|
+
# Components{n}=find(M(:,Root(n))==1);
|
|
438
|
+
# end
|
|
439
|
+
del n
|
|
440
|
+
|
|
441
|
+
if method1.lower() == "uniqueroot":
|
|
442
|
+
if len(Root) > 1:
|
|
443
|
+
Pred = np.append(Pred[:], -1)
|
|
444
|
+
Pred[Root] = len(Pred) - 1
|
|
445
|
+
|
|
446
|
+
H = np.zeros((Nc + 1, Nc + 1))
|
|
447
|
+
for n in range(0, len(Pred)):
|
|
448
|
+
if Pred[n] != -1:
|
|
449
|
+
H[n, np.int32(Pred[n])] = 1
|
|
450
|
+
|
|
451
|
+
H = H + H.T
|
|
452
|
+
elif method1.lower() == "equalroot":
|
|
453
|
+
if len(Root) > 1:
|
|
454
|
+
# %RootCliq=CliqList(Root,:);
|
|
455
|
+
Adj = AdjCliq(A, CliqList, Root)
|
|
456
|
+
H = np.zeros((Nc, Nc))
|
|
457
|
+
for n in range(0, len(Pred)):
|
|
458
|
+
if Pred[n] != -1:
|
|
459
|
+
H[n, np.int32(Pred[n])] = 1
|
|
460
|
+
if (Pred.shape[0] == 0) != 1:
|
|
461
|
+
H = H + H.T
|
|
462
|
+
H = H + Adj
|
|
463
|
+
else:
|
|
464
|
+
H = np.empty((0, 0))
|
|
465
|
+
|
|
466
|
+
H1 = H.copy()
|
|
467
|
+
|
|
468
|
+
if (H1.shape[0] == 0) != 1:
|
|
469
|
+
(H2, Mb) = BubbleHierarchy(Pred, Sb, A, CliqList)
|
|
470
|
+
else:
|
|
471
|
+
H2 = np.empty((0, 0))
|
|
472
|
+
Mb = np.empty((0, 0))
|
|
473
|
+
|
|
474
|
+
H2 = 1.0 * (H2 != 0)
|
|
475
|
+
Mb = Mb[0 : CliqList.shape[0], :]
|
|
476
|
+
|
|
477
|
+
return (H1, H2, Mb, CliqList, Sb)
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
def BuildHierarchy(M):
|
|
481
|
+
Pred = -1 * np.ones(M.shape[1])
|
|
482
|
+
for n in range(0, M.shape[1]):
|
|
483
|
+
# Children = np.argwhere(np.ravel(M[:, n]) == 1)
|
|
484
|
+
(_, Children, _) = sp.find(M[:, n] == 1)
|
|
485
|
+
ChildrenSum = np.sum(M[Children, :], axis=0)
|
|
486
|
+
Parents = np.argwhere(np.ravel(ChildrenSum) == len(Children))
|
|
487
|
+
Parents = Parents[Parents != n]
|
|
488
|
+
if (Parents.shape[0] == 0) != 1:
|
|
489
|
+
ParentSum = np.sum(M[:, Parents], axis=0)
|
|
490
|
+
a = np.argwhere(ParentSum == np.min(ParentSum))
|
|
491
|
+
if len(a) == 1:
|
|
492
|
+
Pred[n] = Parents[a]
|
|
493
|
+
else:
|
|
494
|
+
Pred = np.empty(0)
|
|
495
|
+
break
|
|
496
|
+
else:
|
|
497
|
+
Pred[n] = -1
|
|
498
|
+
|
|
499
|
+
return Pred
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
def FindDisjoint(Adj, Cliq):
|
|
503
|
+
N = Adj.shape[0]
|
|
504
|
+
Temp = Adj.copy()
|
|
505
|
+
T = np.zeros(N)
|
|
506
|
+
IndxTotal = np.arange(0, N)
|
|
507
|
+
IndxNot = np.argwhere(
|
|
508
|
+
np.logical_and(IndxTotal != Cliq[0], IndxTotal != Cliq[1], IndxTotal != Cliq[2])
|
|
509
|
+
)
|
|
510
|
+
Temp[np.int32(Cliq), :] = 0
|
|
511
|
+
Temp[:, np.int32(Cliq)] = 0
|
|
512
|
+
# %d = bfs(Temp,IndxNot(1));
|
|
513
|
+
(d, _) = breadth(Temp, IndxNot[0])
|
|
514
|
+
d[np.isinf(d)] = -1
|
|
515
|
+
d[IndxNot[0]] = 0
|
|
516
|
+
Indx1 = d == -1
|
|
517
|
+
Indx2 = d != -1
|
|
518
|
+
T[Indx1] = 1
|
|
519
|
+
T[Indx2] = 2
|
|
520
|
+
T[np.int32(Cliq)] = 0
|
|
521
|
+
del Temp
|
|
522
|
+
|
|
523
|
+
return (T, IndxNot)
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
def AdjCliq(A, CliqList, CliqRoot):
|
|
527
|
+
Nc = CliqList.shape[0]
|
|
528
|
+
CliqList_temp = np.int32(CliqList.copy())
|
|
529
|
+
CliqRoot_temp = np.int32(np.ravel(CliqRoot))
|
|
530
|
+
N = A.shape[0]
|
|
531
|
+
Adj = np.zeros((Nc, Nc))
|
|
532
|
+
Indicator = np.zeros((N, 1))
|
|
533
|
+
for n in range(0, len(CliqRoot_temp)):
|
|
534
|
+
Indicator[CliqList_temp[CliqRoot_temp[n], :]] = 1
|
|
535
|
+
Indi = np.hstack(
|
|
536
|
+
(
|
|
537
|
+
Indicator[CliqList_temp[CliqRoot_temp, 0], 0],
|
|
538
|
+
Indicator[CliqList_temp[CliqRoot_temp, 1], 0],
|
|
539
|
+
Indicator[CliqList_temp[CliqRoot_temp, 2], 0],
|
|
540
|
+
)
|
|
541
|
+
)
|
|
542
|
+
adjacent = CliqRoot_temp[np.sum(Indi.T, axis=0) == 1]
|
|
543
|
+
Adj[adjacent, n] = 0
|
|
544
|
+
|
|
545
|
+
Adj = Adj + Adj.T
|
|
546
|
+
|
|
547
|
+
return Adj
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
def BubbleHierarchy(Pred, Sb, A, CliqList):
|
|
551
|
+
Nc = Pred.shape[0]
|
|
552
|
+
Root = np.argwhere(Pred == -1)
|
|
553
|
+
CliqCount = np.zeros(Nc)
|
|
554
|
+
CliqCount[Root] = 1
|
|
555
|
+
Mb = np.empty((Nc, 0))
|
|
556
|
+
|
|
557
|
+
if len(Root) > 1:
|
|
558
|
+
TempVec = np.zeros((Nc, 1))
|
|
559
|
+
TempVec[Root] = 1
|
|
560
|
+
Mb = np.hstack((Mb, TempVec))
|
|
561
|
+
del TempVec
|
|
562
|
+
|
|
563
|
+
while np.sum(CliqCount) < Nc:
|
|
564
|
+
NxtRoot = np.empty((0, 1))
|
|
565
|
+
|
|
566
|
+
for n in range(0, len(Root)):
|
|
567
|
+
# DirectChild = np.ravel(np.argwhere(Pred == Root[n]))
|
|
568
|
+
(_, DirectChild, _) = sp.find(Pred == Root[n])
|
|
569
|
+
TempVec = np.zeros((Nc, 1))
|
|
570
|
+
TempVec[np.append(DirectChild, np.int32(Root[n])), 0] = 1
|
|
571
|
+
Mb = np.hstack((Mb, TempVec))
|
|
572
|
+
CliqCount[DirectChild] = 1
|
|
573
|
+
|
|
574
|
+
for m in range(0, len(DirectChild)):
|
|
575
|
+
if Sb[DirectChild[m]] != 0:
|
|
576
|
+
NxtRoot = np.vstack((NxtRoot, DirectChild[m]))
|
|
577
|
+
|
|
578
|
+
del DirectChild, TempVec
|
|
579
|
+
|
|
580
|
+
Root = np.unique(NxtRoot)
|
|
581
|
+
|
|
582
|
+
Nb = Mb.shape[1]
|
|
583
|
+
H = np.zeros((Nb, Nb))
|
|
584
|
+
|
|
585
|
+
# if sum(IdentifyJoint==0)==0;
|
|
586
|
+
for n in range(0, Nb):
|
|
587
|
+
Indx = Mb[:, n] == 1
|
|
588
|
+
JointSum = np.sum(Mb[Indx, :], axis=0)
|
|
589
|
+
Neigh = JointSum >= 1
|
|
590
|
+
H[n, Neigh] = 1
|
|
591
|
+
# else
|
|
592
|
+
# H=[];
|
|
593
|
+
|
|
594
|
+
H = H + H.T
|
|
595
|
+
H = H - np.diag(np.diag(H))
|
|
596
|
+
|
|
597
|
+
return (H, Mb)
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
def clique3(A):
|
|
601
|
+
r"""
|
|
602
|
+
Computes the list of 3-cliques.
|
|
603
|
+
|
|
604
|
+
Parameters
|
|
605
|
+
----------
|
|
606
|
+
A : nd-array
|
|
607
|
+
N x N sparse adjacency matrix.
|
|
608
|
+
|
|
609
|
+
Returns
|
|
610
|
+
-------
|
|
611
|
+
clique : nd-array
|
|
612
|
+
Nc x 3 matrix. Each row vector contains the list of vertices for
|
|
613
|
+
a 3-clique.
|
|
614
|
+
"""
|
|
615
|
+
|
|
616
|
+
A = A - np.diag(np.diag(A))
|
|
617
|
+
A = 1.0 * (A != 0)
|
|
618
|
+
A2 = A @ A
|
|
619
|
+
P = (1.0 * (A2 != 0)) * (1.0 * (A != 0))
|
|
620
|
+
|
|
621
|
+
P = sp.csr_matrix(np.triu(P))
|
|
622
|
+
|
|
623
|
+
(r, c, _) = sp.find(P != 0)
|
|
624
|
+
E = np.hstack((r.reshape(-1, 1), c.reshape(-1, 1)))
|
|
625
|
+
|
|
626
|
+
K3 = {}
|
|
627
|
+
N3 = np.zeros(len(r))
|
|
628
|
+
for n in range(0, len(r)):
|
|
629
|
+
i = r[n]
|
|
630
|
+
j = c[n]
|
|
631
|
+
a = A[i, :] * A[j, :]
|
|
632
|
+
# indx = np.ravel(np.argwhere(a != 0))
|
|
633
|
+
(_, indx, _) = sp.find(a != 0)
|
|
634
|
+
K3[n] = indx
|
|
635
|
+
N3[n] = len(indx)
|
|
636
|
+
|
|
637
|
+
clique = np.zeros((1, 3))
|
|
638
|
+
for n in range(0, len(r)):
|
|
639
|
+
temp = K3[n]
|
|
640
|
+
for m in range(0, len(temp)):
|
|
641
|
+
candidate = E[n, :]
|
|
642
|
+
candidate = np.hstack((candidate, temp[m]))
|
|
643
|
+
candidate = np.sort(candidate)
|
|
644
|
+
a = 1 * (clique[:, 0] == candidate[0])
|
|
645
|
+
b = 1 * (clique[:, 1] == candidate[1])
|
|
646
|
+
c = 1 * (clique[:, 2] == candidate[2])
|
|
647
|
+
check = (a * b) * c
|
|
648
|
+
check = np.sum(check)
|
|
649
|
+
if check == 0:
|
|
650
|
+
clique = np.vstack((clique, candidate.reshape(1, -1)))
|
|
651
|
+
del candidate, check, a, b, c
|
|
652
|
+
|
|
653
|
+
isort = np.lexsort((clique[:, 2], clique[:, 1], clique[:, 0]))
|
|
654
|
+
|
|
655
|
+
clique = clique[isort]
|
|
656
|
+
clique = clique[1 : clique.shape[0], :]
|
|
657
|
+
|
|
658
|
+
return (K3, E, clique)
|
|
659
|
+
|
|
660
|
+
|
|
661
|
+
def breadth(CIJ, source):
|
|
662
|
+
r"""
|
|
663
|
+
Implementation of breadth-first search.
|
|
664
|
+
|
|
665
|
+
Parameters
|
|
666
|
+
----------
|
|
667
|
+
CIJ : nd-array
|
|
668
|
+
Binary (directed/undirected) connection matrix
|
|
669
|
+
source : nd-array
|
|
670
|
+
Source vertex
|
|
671
|
+
|
|
672
|
+
Returns
|
|
673
|
+
-------
|
|
674
|
+
distance : nd-array
|
|
675
|
+
Distance between 'source' and i'th vertex (0 for source vertex).
|
|
676
|
+
branch : nd-array
|
|
677
|
+
Vertex that precedes i in the breadth-first search tree (-1 for source
|
|
678
|
+
vertex)
|
|
679
|
+
|
|
680
|
+
Notes
|
|
681
|
+
-----
|
|
682
|
+
Breadth-first search tree does not contain all paths (or all shortest
|
|
683
|
+
paths), but allows the determination of at least one path with minimum
|
|
684
|
+
distance. The entire graph is explored, starting from source vertex
|
|
685
|
+
'source'.
|
|
686
|
+
|
|
687
|
+
Olaf Sporns, Indiana University, 2002/2007/2008
|
|
688
|
+
"""
|
|
689
|
+
|
|
690
|
+
N = CIJ.shape[0]
|
|
691
|
+
# colors: white, gray, black
|
|
692
|
+
white = 0
|
|
693
|
+
gray = 1
|
|
694
|
+
black = 2
|
|
695
|
+
# initialize colors
|
|
696
|
+
color = np.zeros(N)
|
|
697
|
+
# initialize distances
|
|
698
|
+
distance = np.inf * np.ones(N)
|
|
699
|
+
# initialize branches
|
|
700
|
+
branch = np.zeros(N)
|
|
701
|
+
# start on vertex 'source'
|
|
702
|
+
color[source] = gray
|
|
703
|
+
distance[source] = 0
|
|
704
|
+
branch[source] = -1
|
|
705
|
+
Q = np.array(source).reshape(-1)
|
|
706
|
+
# keep going until the entire graph is explored
|
|
707
|
+
while (Q.shape[0] == 0) == 0:
|
|
708
|
+
u = Q[0]
|
|
709
|
+
# ns = np.argwhere(CIJ[u, :])
|
|
710
|
+
(_, ns, _) = sp.find(CIJ[u, :])
|
|
711
|
+
for v in ns:
|
|
712
|
+
# this allows the 'source' distance to itself to be recorded
|
|
713
|
+
if distance[v].all() == 0:
|
|
714
|
+
distance[v] = distance[u] + 1
|
|
715
|
+
if color[v].all() == white:
|
|
716
|
+
color[v] = gray
|
|
717
|
+
distance[v] = distance[u] + 1
|
|
718
|
+
branch[v] = u
|
|
719
|
+
Q = np.hstack((Q, v))
|
|
720
|
+
|
|
721
|
+
Q = Q[1 : len(Q)]
|
|
722
|
+
color[u] = black
|
|
723
|
+
|
|
724
|
+
return (distance, branch)
|
|
725
|
+
|
|
726
|
+
|
|
727
|
+
def BubbleCluster8s(Rpm, Dpm, Hb, Mb, Mv, CliqList):
|
|
728
|
+
r"""
|
|
729
|
+
Obtains non-discrete and discrete clusterings from the bubble topology of
|
|
730
|
+
PMFG.
|
|
731
|
+
|
|
732
|
+
Parameters
|
|
733
|
+
----------
|
|
734
|
+
Rpm : nd-array
|
|
735
|
+
N x N sparse weighted adjacency matrix of PMFG.
|
|
736
|
+
Dpm : nd-array
|
|
737
|
+
N x N shortest path lengths matrix of PMFG
|
|
738
|
+
Hb : nd-array
|
|
739
|
+
Undirected bubble tree of PMFG
|
|
740
|
+
Mb : nd-array
|
|
741
|
+
Nc x Nb bubble membership matrix for 3-cliques. Mb(n,bi)=1 indicates
|
|
742
|
+
that 3-clique n belongs to bi bubble.
|
|
743
|
+
Mv : nd-array
|
|
744
|
+
N x Nb bubble membership matrix for vertices.
|
|
745
|
+
CliqList : nd-array
|
|
746
|
+
Nc x 3 matrix of list of 3-cliques. Each row vector contains the list
|
|
747
|
+
of vertices for a particular 3-clique.
|
|
748
|
+
|
|
749
|
+
Returns
|
|
750
|
+
-------
|
|
751
|
+
Adjv : nd-array
|
|
752
|
+
N x Nk cluster membership matrix for vertices for non-discrete
|
|
753
|
+
clustering via the bubble topology. Adjv(n,k)=1 indicates cluster
|
|
754
|
+
membership of vertex n to kth non-discrete cluster.
|
|
755
|
+
Tc : nd-array
|
|
756
|
+
N x 1 cluster membership vector. Tc(n)=k indicates cluster membership
|
|
757
|
+
of vertex n to kth discrete cluster.
|
|
758
|
+
"""
|
|
759
|
+
|
|
760
|
+
(Hc, Sep) = DirectHb(
|
|
761
|
+
Rpm, Hb, Mb, Mv, CliqList
|
|
762
|
+
) # Assign directions on the bubble tree
|
|
763
|
+
N = Rpm.shape[0] # Number of vertices in the PMFG
|
|
764
|
+
# indx = np.ravel(np.argwhere(Sep == 1)) # Look for the converging bubbles
|
|
765
|
+
(_, indx, _) = sp.find(Sep == 1) # Look for the converging bubbles
|
|
766
|
+
Adjv = np.empty((0, 0))
|
|
767
|
+
if len(indx) > 1:
|
|
768
|
+
Adjv = np.zeros(
|
|
769
|
+
(Mv.shape[0], len(indx))
|
|
770
|
+
) # Set the non-discrete cluster membership matrix 'Adjv' at default
|
|
771
|
+
# Identify the non-discrete cluster membership of vertices by each
|
|
772
|
+
# converging bubble
|
|
773
|
+
for n in range(0, len(indx)):
|
|
774
|
+
# %[d dt p]=bfs(Hc.T, indx[n]);
|
|
775
|
+
(d, _) = breadth(Hc.T, indx[n])
|
|
776
|
+
d[np.isinf(d)] = -1
|
|
777
|
+
d[indx[n]] = 0
|
|
778
|
+
(r, c, _) = sp.find(Mv[:, d != -1] != 0)
|
|
779
|
+
Adjv[np.unique(r), n] = 1
|
|
780
|
+
del d, r, c # %, dt, p
|
|
781
|
+
|
|
782
|
+
Tc = -1 * np.ones(N) # Set the discrete cluster membership vector at default
|
|
783
|
+
Bubv = Mv[:, indx] # Gather the list of vertices in the converging bubbles
|
|
784
|
+
(_, cv, _) = sp.find(
|
|
785
|
+
np.sum(Bubv.T, axis=0).T == 1
|
|
786
|
+
) # Identify vertices which belong to single converging bubbles
|
|
787
|
+
(_, uv, _) = sp.find(
|
|
788
|
+
np.sum(Bubv.T, axis=0).T > 1
|
|
789
|
+
) # Identify vertices which belong to more than one converging bubbles.
|
|
790
|
+
Mdjv = np.zeros(
|
|
791
|
+
(N, len(indx))
|
|
792
|
+
) # Set the cluster membership matrix for vertices in the converging bubbles at default
|
|
793
|
+
Mdjv[cv, :] = Bubv[
|
|
794
|
+
cv, :
|
|
795
|
+
].copy() # Assign vertices which belong to single converging bubbles to the rightful clusters.
|
|
796
|
+
# Assign converging bubble membership of vertices in `uv'
|
|
797
|
+
for v in range(0, len(uv)):
|
|
798
|
+
v_cont = np.sum(Rpm[:, uv[v]].reshape(-1, 1) * Bubv, axis=0).reshape(
|
|
799
|
+
-1, 1
|
|
800
|
+
) # sum of edge weights linked to uv(v) in each converging bubble
|
|
801
|
+
all_cont = 3 * (
|
|
802
|
+
np.sum(Bubv, axis=0) - 2
|
|
803
|
+
) # number of edges in converging bubble
|
|
804
|
+
all_cont = all_cont.reshape(-1, 1)
|
|
805
|
+
imx = np.argmax(v_cont / all_cont)
|
|
806
|
+
Mdjv[uv[v], imx] = 1 # Pick the most strongly associated converging bubble
|
|
807
|
+
|
|
808
|
+
(v, ci, _) = sp.find(1 * (Mdjv != 0))
|
|
809
|
+
Tc[v] = ci
|
|
810
|
+
del (
|
|
811
|
+
v,
|
|
812
|
+
ci,
|
|
813
|
+
) # Assign discrete cluster membership of vertices in the converging bubbles.
|
|
814
|
+
|
|
815
|
+
Udjv = Dpm @ (Mdjv @ np.diag(1 / np.sum(1 * (Mdjv != 0), axis=0)))
|
|
816
|
+
Udjv[Adjv == 0] = (
|
|
817
|
+
np.inf
|
|
818
|
+
) # Compute the distance between a vertex and the converging bubbles.
|
|
819
|
+
# mn = np.min(Udjv[np.sum(Mdjv.T, axis=0)==0,:].T) # Look for the closest converging bubble
|
|
820
|
+
imn = np.argmin(Udjv[np.sum(Mdjv, axis=1) == 0, :], axis=1)
|
|
821
|
+
Tc[Tc == -1] = (
|
|
822
|
+
imn # Assign discrete cluster membership according to the distances to the converging bubbles
|
|
823
|
+
)
|
|
824
|
+
else:
|
|
825
|
+
Tc = np.ones(
|
|
826
|
+
N
|
|
827
|
+
) # if there is one converging bubble, all vertices belong to a single cluster
|
|
828
|
+
|
|
829
|
+
return (Adjv, Tc)
|
|
830
|
+
|
|
831
|
+
|
|
832
|
+
def DirectHb(Rpm, Hb, Mb, Mv, CliqList):
|
|
833
|
+
r"""
|
|
834
|
+
Computes directions on each separating 3-clique of a maximal planar
|
|
835
|
+
graph, hence computes Directed Bubble Hierarchical Tree (DBHT).
|
|
836
|
+
|
|
837
|
+
Parameters
|
|
838
|
+
----------
|
|
839
|
+
Rpm : nd-array
|
|
840
|
+
N x N sparse weighted adjacency matrix of PMFG
|
|
841
|
+
Hb : nd-array
|
|
842
|
+
Undirected bubble tree of PMFG
|
|
843
|
+
Mb : nd-array
|
|
844
|
+
Nc x Nb bubble membership matrix for 3-cliques. Mb(n,bi)=1 indicates
|
|
845
|
+
that 3-clique n belongs to bi bubble.
|
|
846
|
+
Mv : nd-array
|
|
847
|
+
N x Nb bubble membership matrix for vertices.
|
|
848
|
+
CliqList : nd-array
|
|
849
|
+
Nc x 3 matrix of list of 3-cliques. Each row vector contains the list
|
|
850
|
+
of vertices for a particular 3-clique.
|
|
851
|
+
|
|
852
|
+
Returns
|
|
853
|
+
-------
|
|
854
|
+
Hc : nd-array
|
|
855
|
+
Nb x Nb unweighted directed adjacency matrix of DBHT. Hc(i,j)=1
|
|
856
|
+
indicates a directed edge from bubble i to bubble j.
|
|
857
|
+
"""
|
|
858
|
+
|
|
859
|
+
Hb_temp = 1 * (Hb != 0)
|
|
860
|
+
(r, c, _) = sp.find(sp.triu(sp.csr_matrix(Hb_temp)) != 0)
|
|
861
|
+
CliqEdge = np.empty((0, 3))
|
|
862
|
+
for n in range(0, len(r)):
|
|
863
|
+
data = np.argwhere(np.logical_and(Mb[:, r[n]] != 0, Mb[:, c[n]] != 0))
|
|
864
|
+
if data.shape[0] != 0:
|
|
865
|
+
data = np.hstack((r[n].reshape(1, -1), c[n].reshape(1, -1), data))
|
|
866
|
+
CliqEdge = np.vstack((CliqEdge, data))
|
|
867
|
+
|
|
868
|
+
del r, c
|
|
869
|
+
|
|
870
|
+
kb = np.sum(1 * (Hb_temp != 0), axis=0)
|
|
871
|
+
Hc = np.zeros((Mv.shape[1], Mv.shape[1]))
|
|
872
|
+
CliqEdge = np.int32(CliqEdge)
|
|
873
|
+
|
|
874
|
+
for n in range(0, CliqEdge.shape[0]):
|
|
875
|
+
Temp = Hb_temp.copy()
|
|
876
|
+
Temp[CliqEdge[n, 0], CliqEdge[n, 1]] = 0
|
|
877
|
+
Temp[CliqEdge[n, 1], CliqEdge[n, 0]] = 0
|
|
878
|
+
(d, _) = breadth(Temp, np.array([0]))
|
|
879
|
+
d[np.isinf(d)] = -1
|
|
880
|
+
d[0] = 0
|
|
881
|
+
vo = np.int32(CliqList[CliqEdge[n, 2], :])
|
|
882
|
+
bleft = CliqEdge[n, 0:2]
|
|
883
|
+
bleft = bleft[d[bleft] != -1]
|
|
884
|
+
bright = CliqEdge[n, 0:2]
|
|
885
|
+
bright = bright[d[bright] == -1]
|
|
886
|
+
vleftc = np.argwhere(Mv[:, d != -1] != 0)
|
|
887
|
+
vleft = vleftc[:, 0]
|
|
888
|
+
c = vleftc[:, 1]
|
|
889
|
+
vleft = np.setdiff1d(vleft, vo)
|
|
890
|
+
vrightc = np.argwhere(Mv[:, d == -1] != 0)
|
|
891
|
+
vright = vrightc[:, 0]
|
|
892
|
+
c = vrightc[:, 1]
|
|
893
|
+
vright = np.setdiff1d(vright, vo)
|
|
894
|
+
del c
|
|
895
|
+
left = np.sum(Rpm[np.ix_(vo, vleft)])
|
|
896
|
+
right = np.sum(Rpm[np.ix_(vo, vright)])
|
|
897
|
+
if left > right:
|
|
898
|
+
Hc[np.ix_(bright, bleft)] = left
|
|
899
|
+
else:
|
|
900
|
+
Hc[np.ix_(bleft, bright)] = right
|
|
901
|
+
del vleft, vright, vo, Temp, bleft, bright, right, left
|
|
902
|
+
|
|
903
|
+
Sep = np.double((np.sum(Hc.T, axis=0) == 0))
|
|
904
|
+
# Sep[(np.sum(Hc, axis=0) == 0) & (kb > 1)] = 2
|
|
905
|
+
Sep[np.logical_and(np.sum(Hc, axis=0) == 0, kb > 1)] = 2
|
|
906
|
+
|
|
907
|
+
return (Hc, Sep)
|
|
908
|
+
|
|
909
|
+
|
|
910
|
+
def HierarchyConstruct4s(Rpm, Dpm, Tc, Adjv, Mv):
|
|
911
|
+
r"""
|
|
912
|
+
Constructs intra- and inter-cluster hierarchy by utilizing Bubble
|
|
913
|
+
hierarchy structure of a maximal planar graph, namely Planar Maximally
|
|
914
|
+
Filtered Graph (PMFG).
|
|
915
|
+
|
|
916
|
+
Parameters
|
|
917
|
+
----------
|
|
918
|
+
Rpm : nd-array
|
|
919
|
+
N x N Weighted adjacency matrix of PMFG.
|
|
920
|
+
Dpm : nd-array
|
|
921
|
+
N x N shortest path length matrix of PMFG.
|
|
922
|
+
Tc : nd-array
|
|
923
|
+
N x 1 cluster membership vector from DBHT clustering. Tc(n)=z_i
|
|
924
|
+
indicate cluster of nth vertex.
|
|
925
|
+
Adjv : nd-array
|
|
926
|
+
Bubble cluster membership matrix from BubbleCluster8s.
|
|
927
|
+
Mv : nd-array
|
|
928
|
+
Bubble membership of vertices from BubbleCluster8s.
|
|
929
|
+
|
|
930
|
+
Returns
|
|
931
|
+
-------
|
|
932
|
+
Z : nd-array
|
|
933
|
+
(N-1) x 4 linkage matrix, in the same format as the output from matlab
|
|
934
|
+
function 'linkage'.
|
|
935
|
+
"""
|
|
936
|
+
|
|
937
|
+
N = Dpm.shape[0]
|
|
938
|
+
kvec = np.int32(np.unique(Tc))
|
|
939
|
+
LabelVec1 = np.arange(0, N)
|
|
940
|
+
# LinkageDist = np.zeros((1,1))
|
|
941
|
+
E = sp.csr_matrix(
|
|
942
|
+
(np.ones(N), (np.arange(0, N), np.int32(Tc))),
|
|
943
|
+
shape=(N, np.int32(np.max(Tc) + 1)),
|
|
944
|
+
).toarray()
|
|
945
|
+
Z = np.array(np.empty((0, 3)))
|
|
946
|
+
Tc = Tc + 1
|
|
947
|
+
kvec = kvec + 1
|
|
948
|
+
# Intra-cluster hierarchy construction
|
|
949
|
+
for n in range(0, len(kvec)):
|
|
950
|
+
Mc = (
|
|
951
|
+
E[:, kvec[n] - 1].reshape(-1, 1) * Mv
|
|
952
|
+
) # Get the list of bubbles which coincide with nth cluster
|
|
953
|
+
Mvv = BubbleMember(
|
|
954
|
+
Dpm, Rpm, Mv, Mc
|
|
955
|
+
) # Assign each vertex in the nth cluster to a specific bubble.
|
|
956
|
+
(_, Bub, _) = sp.find(
|
|
957
|
+
np.sum(Mvv, axis=0) > 0
|
|
958
|
+
) # Get the list of bubbles which contain the vertices of nth cluster
|
|
959
|
+
nc = np.sum(Tc == kvec[n], axis=0) - 1 ##########
|
|
960
|
+
# %Apply the linkage within the bubbles.
|
|
961
|
+
for m in range(0, len(Bub)):
|
|
962
|
+
(_, V, _) = sp.find(
|
|
963
|
+
Mvv[:, Bub[m]] != 0
|
|
964
|
+
) # Retrieve the list of vertices assigned to mth bubble.
|
|
965
|
+
if len(V) > 1:
|
|
966
|
+
dpm = Dpm[
|
|
967
|
+
np.ix_(V, V)
|
|
968
|
+
] # Retrieve the distance matrix for the vertices in V
|
|
969
|
+
LabelVec = LabelVec1[
|
|
970
|
+
V
|
|
971
|
+
] # Initiate the label vector which labels for the clusters.
|
|
972
|
+
LabelVec2 = LabelVec1.copy()
|
|
973
|
+
for v in range(0, len(V) - 1):
|
|
974
|
+
(PairLink, dvu) = LinkageFunction(
|
|
975
|
+
dpm, LabelVec
|
|
976
|
+
) # Look for the pair of clusters which produces the best linkage
|
|
977
|
+
LabelVec[
|
|
978
|
+
np.logical_or(LabelVec == PairLink[0], LabelVec == PairLink[1])
|
|
979
|
+
] = (
|
|
980
|
+
np.max(LabelVec1, axis=0) + 1
|
|
981
|
+
) # Merge the cluster pair by updating the label vector with a same label.
|
|
982
|
+
LabelVec2[V] = LabelVec.copy()
|
|
983
|
+
Z = DendroConstruct(Z, LabelVec1, LabelVec2, 1 / nc)
|
|
984
|
+
nc = nc - 1
|
|
985
|
+
LabelVec1 = LabelVec2.copy()
|
|
986
|
+
del PairLink, dvu # , Vect
|
|
987
|
+
del LabelVec, dpm, LabelVec2
|
|
988
|
+
del V
|
|
989
|
+
|
|
990
|
+
(_, V, _) = sp.find(E[:, kvec[n] - 1] != 0)
|
|
991
|
+
dpm = Dpm[np.ix_(V, V)]
|
|
992
|
+
# %Perform linkage merging between the bubbles
|
|
993
|
+
LabelVec = LabelVec1[
|
|
994
|
+
V
|
|
995
|
+
] # Initiate the label vector which labels for the clusters.
|
|
996
|
+
LabelVec2 = LabelVec1.copy()
|
|
997
|
+
for b in range(0, len(Bub) - 1):
|
|
998
|
+
(PairLink, dvu) = LinkageFunction(dpm, LabelVec)
|
|
999
|
+
# %[PairLink,dvu]=LinkageFunction(rpm,LabelVec);
|
|
1000
|
+
LabelVec[
|
|
1001
|
+
np.logical_or(LabelVec == PairLink[0], LabelVec == PairLink[1])
|
|
1002
|
+
] = (
|
|
1003
|
+
np.max(LabelVec1) + 1
|
|
1004
|
+
) # Merge the cluster pair by updating the label vector with a same label.
|
|
1005
|
+
LabelVec2[V] = LabelVec.copy()
|
|
1006
|
+
Z = DendroConstruct(Z, LabelVec1, LabelVec2, 1 / nc)
|
|
1007
|
+
nc = nc - 1
|
|
1008
|
+
LabelVec1 = LabelVec2.copy()
|
|
1009
|
+
del PairLink, dvu # , Vect
|
|
1010
|
+
|
|
1011
|
+
del LabelVec, V, dpm, LabelVec2 # , rpm,
|
|
1012
|
+
|
|
1013
|
+
# %Inter-cluster hierarchy construction
|
|
1014
|
+
LabelVec2 = LabelVec1.copy()
|
|
1015
|
+
dcl = np.ones(len(LabelVec1))
|
|
1016
|
+
for n in range(0, len(kvec) - 1):
|
|
1017
|
+
(PairLink, dvu) = LinkageFunction(Dpm, LabelVec1)
|
|
1018
|
+
# %[PairLink,dvu]=LinkageFunction(Rpm,LabelVec);
|
|
1019
|
+
LabelVec2[np.logical_or(LabelVec1 == PairLink[0], LabelVec1 == PairLink[1])] = (
|
|
1020
|
+
np.max(LabelVec1, axis=0) + 1
|
|
1021
|
+
) # Merge the cluster pair by updating the label vector with a same label.
|
|
1022
|
+
dvu = np.unique(dcl[LabelVec1 == PairLink[0]]) + np.unique(
|
|
1023
|
+
dcl[LabelVec1 == PairLink[1]]
|
|
1024
|
+
)
|
|
1025
|
+
dcl[np.logical_or(LabelVec1 == PairLink[0], LabelVec1 == PairLink[1])] = dvu
|
|
1026
|
+
Z = DendroConstruct(Z, LabelVec1, LabelVec2, dvu)
|
|
1027
|
+
LabelVec1 = LabelVec2.copy()
|
|
1028
|
+
del PairLink, dvu
|
|
1029
|
+
del LabelVec1
|
|
1030
|
+
Z[:, 0:2] = Z[:, 0:2] + 1
|
|
1031
|
+
Z = from_mlab_linkage(Z)
|
|
1032
|
+
|
|
1033
|
+
if len(np.unique(LabelVec2)) > 1:
|
|
1034
|
+
print("Something Wrong in Merging. Check the codes.")
|
|
1035
|
+
return None
|
|
1036
|
+
|
|
1037
|
+
return Z
|
|
1038
|
+
|
|
1039
|
+
|
|
1040
|
+
def LinkageFunction(d, labelvec):
|
|
1041
|
+
lvec = np.unique(labelvec)
|
|
1042
|
+
Links = np.empty((0, 3))
|
|
1043
|
+
for r in range(0, len(lvec) - 1):
|
|
1044
|
+
vecr = (labelvec == lvec[r]).reshape(-1)
|
|
1045
|
+
for c in range(r + 1, len(lvec)):
|
|
1046
|
+
vecc = (labelvec == lvec[c]).reshape(-1)
|
|
1047
|
+
x1 = np.ravel(np.logical_or(vecr, vecc))
|
|
1048
|
+
dd = d[np.ix_(x1, x1)]
|
|
1049
|
+
|
|
1050
|
+
if dd[dd != 0].shape[0] == 0:
|
|
1051
|
+
Link1 = np.hstack((lvec[r], lvec[c], 0))
|
|
1052
|
+
else:
|
|
1053
|
+
Link1 = np.hstack((lvec[r], lvec[c], np.max(dd[dd != 0], axis=0)))
|
|
1054
|
+
Links = np.vstack((Links, Link1))
|
|
1055
|
+
del vecc
|
|
1056
|
+
|
|
1057
|
+
dvu = np.min(Links[:, 2], axis=0)
|
|
1058
|
+
imn = np.argmin(Links[:, 2], axis=0)
|
|
1059
|
+
PairLink = Links[imn, 0:2]
|
|
1060
|
+
|
|
1061
|
+
return (PairLink, dvu)
|
|
1062
|
+
|
|
1063
|
+
|
|
1064
|
+
def BubbleMember(Dpm, Rpm, Mv, Mc):
|
|
1065
|
+
Mvv = np.zeros((Mv.shape[0], Mv.shape[1]))
|
|
1066
|
+
(_, vu, _) = sp.find(np.sum(Mc.T, axis=0) > 1)
|
|
1067
|
+
(_, v, _) = sp.find(np.sum(Mc.T, axis=0) == 1)
|
|
1068
|
+
Mvv[v, :] = Mc[v, :]
|
|
1069
|
+
for n in range(0, len(vu)):
|
|
1070
|
+
(_, bub, _) = sp.find(Mc[vu[n], :] != 0)
|
|
1071
|
+
vu_bub = np.sum(Rpm[:, vu[n]].reshape(-1, 1) * Mv[:, bub], axis=0).T
|
|
1072
|
+
all_bub = np.diag(Mv[:, bub].T @ Rpm @ Mv[:, bub]) / 2
|
|
1073
|
+
frac = vu_bub / all_bub
|
|
1074
|
+
# mx = np.max(frac, axis=0)
|
|
1075
|
+
imx = np.argmax(frac, axis=0)
|
|
1076
|
+
Mvv[vu[n], bub[imx]] = 1
|
|
1077
|
+
|
|
1078
|
+
return Mvv
|
|
1079
|
+
|
|
1080
|
+
|
|
1081
|
+
def DendroConstruct(Zi, LabelVec1, LabelVec2, LinkageDist):
|
|
1082
|
+
indx = (LabelVec1.T == LabelVec2.T) != 1
|
|
1083
|
+
if len(np.unique(LabelVec1[indx])) != 2:
|
|
1084
|
+
print("Check the codes")
|
|
1085
|
+
return
|
|
1086
|
+
|
|
1087
|
+
Z = np.vstack((Zi, np.hstack((np.sort(np.unique(LabelVec1[indx])), LinkageDist))))
|
|
1088
|
+
|
|
1089
|
+
return Z
|