scikit-network 0.31.0__cp39-cp39-win_amd64.whl → 0.32.1__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/AUTHORS.rst +3 -0
- {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/METADATA +19 -3
- {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/RECORD +112 -105
- {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/WHEEL +1 -1
- sknetwork/__init__.py +1 -1
- sknetwork/classification/base.py +1 -1
- sknetwork/classification/base_rank.py +3 -3
- sknetwork/classification/diffusion.py +21 -13
- sknetwork/classification/knn.py +19 -13
- sknetwork/classification/metrics.py +1 -1
- sknetwork/classification/pagerank.py +12 -8
- sknetwork/classification/propagation.py +22 -15
- sknetwork/classification/tests/test_diffusion.py +10 -0
- sknetwork/classification/vote.cp39-win_amd64.pyd +0 -0
- sknetwork/classification/vote.cpp +14549 -8668
- sknetwork/clustering/__init__.py +3 -1
- sknetwork/clustering/base.py +1 -1
- sknetwork/clustering/kcenters.py +253 -0
- sknetwork/clustering/leiden.py +241 -0
- sknetwork/clustering/leiden_core.cp39-win_amd64.pyd +0 -0
- sknetwork/clustering/leiden_core.cpp +31564 -0
- sknetwork/clustering/leiden_core.pyx +124 -0
- sknetwork/clustering/louvain.py +118 -83
- sknetwork/clustering/louvain_core.cp39-win_amd64.pyd +0 -0
- sknetwork/clustering/louvain_core.cpp +21876 -16332
- sknetwork/clustering/louvain_core.pyx +86 -94
- sknetwork/clustering/postprocess.py +2 -2
- sknetwork/clustering/propagation_clustering.py +4 -4
- sknetwork/clustering/tests/test_API.py +7 -3
- sknetwork/clustering/tests/test_kcenters.py +92 -0
- sknetwork/clustering/tests/test_leiden.py +34 -0
- sknetwork/clustering/tests/test_louvain.py +2 -3
- sknetwork/data/load.py +2 -4
- sknetwork/data/parse.py +41 -20
- sknetwork/data/tests/test_parse.py +9 -12
- sknetwork/embedding/__init__.py +0 -1
- sknetwork/embedding/base.py +20 -19
- sknetwork/embedding/force_atlas.py +3 -2
- sknetwork/embedding/louvain_embedding.py +1 -1
- sknetwork/embedding/random_projection.py +5 -3
- sknetwork/embedding/spectral.py +0 -73
- sknetwork/embedding/tests/test_API.py +4 -28
- sknetwork/embedding/tests/test_louvain_embedding.py +4 -9
- sknetwork/embedding/tests/test_spectral.py +2 -5
- sknetwork/embedding/tests/test_svd.py +1 -1
- sknetwork/gnn/base_layer.py +3 -3
- sknetwork/gnn/gnn_classifier.py +40 -86
- sknetwork/gnn/layer.py +1 -1
- sknetwork/gnn/loss.py +1 -1
- sknetwork/gnn/optimizer.py +4 -3
- sknetwork/gnn/tests/test_base_layer.py +4 -4
- sknetwork/gnn/tests/test_gnn_classifier.py +12 -39
- sknetwork/gnn/utils.py +8 -8
- sknetwork/hierarchy/base.py +27 -0
- sknetwork/hierarchy/louvain_hierarchy.py +45 -41
- sknetwork/hierarchy/paris.cp39-win_amd64.pyd +0 -0
- sknetwork/hierarchy/paris.cpp +27521 -20771
- sknetwork/hierarchy/paris.pyx +7 -7
- sknetwork/hierarchy/postprocess.py +16 -16
- sknetwork/hierarchy/tests/test_algos.py +5 -0
- sknetwork/linalg/__init__.py +1 -1
- sknetwork/linalg/diteration.cp39-win_amd64.pyd +0 -0
- sknetwork/linalg/diteration.cpp +13916 -8050
- sknetwork/linalg/{normalization.py → normalizer.py} +17 -14
- sknetwork/linalg/operators.py +1 -1
- sknetwork/linalg/ppr_solver.py +1 -1
- sknetwork/linalg/push.cp39-win_amd64.pyd +0 -0
- sknetwork/linalg/push.cpp +23187 -16973
- sknetwork/linalg/tests/test_normalization.py +3 -7
- sknetwork/linalg/tests/test_operators.py +2 -6
- sknetwork/linalg/tests/test_ppr.py +1 -1
- sknetwork/linkpred/base.py +12 -1
- sknetwork/linkpred/nn.py +6 -6
- sknetwork/path/distances.py +11 -4
- sknetwork/path/shortest_path.py +1 -1
- sknetwork/path/tests/test_distances.py +7 -0
- sknetwork/path/tests/test_search.py +2 -2
- sknetwork/ranking/base.py +11 -6
- sknetwork/ranking/betweenness.cp39-win_amd64.pyd +0 -0
- sknetwork/ranking/betweenness.cpp +5256 -2190
- sknetwork/ranking/pagerank.py +13 -12
- sknetwork/ranking/tests/test_API.py +0 -2
- sknetwork/ranking/tests/test_betweenness.py +1 -1
- sknetwork/ranking/tests/test_pagerank.py +11 -5
- sknetwork/regression/base.py +18 -1
- sknetwork/regression/diffusion.py +24 -10
- sknetwork/regression/tests/test_diffusion.py +8 -0
- sknetwork/topology/__init__.py +3 -1
- sknetwork/topology/cliques.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/cliques.cpp +23528 -16848
- sknetwork/topology/core.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/core.cpp +22849 -16581
- sknetwork/topology/cycles.py +243 -0
- sknetwork/topology/minheap.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/minheap.cpp +19495 -13469
- sknetwork/topology/structure.py +2 -42
- sknetwork/topology/tests/test_cycles.py +65 -0
- sknetwork/topology/tests/test_structure.py +2 -16
- sknetwork/topology/triangles.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/triangles.cpp +5283 -1397
- sknetwork/topology/triangles.pyx +7 -4
- sknetwork/topology/weisfeiler_lehman_core.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/weisfeiler_lehman_core.cpp +14781 -8915
- sknetwork/utils/format.py +1 -1
- sknetwork/utils/membership.py +2 -2
- sknetwork/visualization/__init__.py +2 -2
- sknetwork/visualization/dendrograms.py +55 -7
- sknetwork/visualization/graphs.py +261 -44
- sknetwork/visualization/tests/test_dendrograms.py +9 -9
- sknetwork/visualization/tests/test_graphs.py +63 -57
- sknetwork/embedding/louvain_hierarchy.py +0 -142
- sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
- {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/LICENSE +0 -0
- {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# distutils: language=c++
|
|
2
|
+
# cython: language_level=3
|
|
3
|
+
from libcpp.set cimport set
|
|
4
|
+
from libc.stdlib cimport rand
|
|
5
|
+
|
|
6
|
+
cimport cython
|
|
7
|
+
|
|
8
|
+
ctypedef fused int_or_long:
|
|
9
|
+
int
|
|
10
|
+
long
|
|
11
|
+
|
|
12
|
+
@cython.boundscheck(False)
|
|
13
|
+
@cython.wraparound(False)
|
|
14
|
+
def optimize_refine_core(int_or_long[:] labels, int_or_long[:] labels_refined, int_or_long[:] indices,
|
|
15
|
+
int_or_long[:] indptr, float[:] data, float[:] out_weights, float[:] in_weights, float[:] out_cluster_weights,
|
|
16
|
+
float[:] in_cluster_weights, float[:] cluster_weights, float[:] self_loops, float resolution): # pragma: no cover
|
|
17
|
+
"""Refine clusters while maximizing modularity.
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
labels :
|
|
22
|
+
Labels (initial partition).
|
|
23
|
+
labels_refined :
|
|
24
|
+
Refined labels.
|
|
25
|
+
indices :
|
|
26
|
+
CSR format index array of the normalized adjacency matrix.
|
|
27
|
+
indptr :
|
|
28
|
+
CSR format index pointer array of the normalized adjacency matrix.
|
|
29
|
+
data :
|
|
30
|
+
CSR format data array of the normalized adjacency matrix.
|
|
31
|
+
out_weights :
|
|
32
|
+
Out-weights of nodes (sum to 1).
|
|
33
|
+
in_weights :
|
|
34
|
+
In-weights of nodes (sum to 1).
|
|
35
|
+
out_cluster_weights :
|
|
36
|
+
Out-weights of clusters (sum to 1).
|
|
37
|
+
in_cluster_weights :
|
|
38
|
+
In-weights of clusters (sum to 1).
|
|
39
|
+
cluster_weights :
|
|
40
|
+
Weights of clusters (initialized to 0).
|
|
41
|
+
self_loops :
|
|
42
|
+
Weights of self loops.
|
|
43
|
+
resolution :
|
|
44
|
+
Resolution parameter (positive).
|
|
45
|
+
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
48
|
+
labels_refined :
|
|
49
|
+
Refined labels.
|
|
50
|
+
"""
|
|
51
|
+
cdef int_or_long n
|
|
52
|
+
cdef int_or_long label
|
|
53
|
+
cdef int_or_long label_refined
|
|
54
|
+
cdef int_or_long label_target
|
|
55
|
+
cdef int_or_long label_best
|
|
56
|
+
cdef int_or_long i
|
|
57
|
+
cdef int_or_long j
|
|
58
|
+
cdef int_or_long start
|
|
59
|
+
cdef int_or_long end
|
|
60
|
+
|
|
61
|
+
cdef float increase = 1
|
|
62
|
+
cdef float delta
|
|
63
|
+
cdef float delta_local
|
|
64
|
+
cdef float delta_best
|
|
65
|
+
cdef float in_weight
|
|
66
|
+
cdef float out_weight
|
|
67
|
+
|
|
68
|
+
cdef set[int_or_long] label_set
|
|
69
|
+
cdef set[int_or_long] label_target_set
|
|
70
|
+
|
|
71
|
+
n = labels.shape[0]
|
|
72
|
+
while increase:
|
|
73
|
+
increase = 0
|
|
74
|
+
|
|
75
|
+
for i in range(n):
|
|
76
|
+
label_set = ()
|
|
77
|
+
label = labels[i]
|
|
78
|
+
label_refined = labels_refined[i]
|
|
79
|
+
start = indptr[i]
|
|
80
|
+
end = indptr[i+1]
|
|
81
|
+
|
|
82
|
+
# neighboring clusters
|
|
83
|
+
for j in range(start, end):
|
|
84
|
+
if labels[indices[j]] == label:
|
|
85
|
+
label_target = labels_refined[indices[j]]
|
|
86
|
+
label_set.insert(label_target)
|
|
87
|
+
cluster_weights[label_target] += data[j]
|
|
88
|
+
label_set.erase(label_refined)
|
|
89
|
+
|
|
90
|
+
if not label_set.empty():
|
|
91
|
+
out_weight = out_weights[i]
|
|
92
|
+
in_weight = in_weights[i]
|
|
93
|
+
|
|
94
|
+
# node leaving the current cluster
|
|
95
|
+
delta = 2 * (cluster_weights[label_refined] - self_loops[i])
|
|
96
|
+
delta -= resolution * out_weight * (in_cluster_weights[label_refined] - in_weight)
|
|
97
|
+
delta -= resolution * in_weight * (out_cluster_weights[label_refined] - out_weight)
|
|
98
|
+
|
|
99
|
+
label_target_set = ()
|
|
100
|
+
for label_target in label_set:
|
|
101
|
+
delta_local = 2 * cluster_weights[label_target]
|
|
102
|
+
delta_local -= resolution * out_weight * in_cluster_weights[label_target]
|
|
103
|
+
delta_local -= resolution * in_weight * out_cluster_weights[label_target]
|
|
104
|
+
delta_local -= delta
|
|
105
|
+
if delta_local > 0:
|
|
106
|
+
label_target_set.insert(label_target)
|
|
107
|
+
cluster_weights[label_target] = 0
|
|
108
|
+
|
|
109
|
+
if not label_target_set.empty():
|
|
110
|
+
increase = 1
|
|
111
|
+
k = rand() % label_target_set.size()
|
|
112
|
+
for label_target in label_target_set:
|
|
113
|
+
k -= 1
|
|
114
|
+
if k == 0:
|
|
115
|
+
break
|
|
116
|
+
labels_refined[i] = label_target
|
|
117
|
+
# update weights
|
|
118
|
+
out_cluster_weights[label_refined] -= out_weight
|
|
119
|
+
in_cluster_weights[label_refined] -= in_weight
|
|
120
|
+
out_cluster_weights[label_target] += out_weight
|
|
121
|
+
in_cluster_weights[label_target] += in_weight
|
|
122
|
+
cluster_weights[label_refined] = 0
|
|
123
|
+
|
|
124
|
+
return labels_refined
|
sknetwork/clustering/louvain.py
CHANGED
|
@@ -12,7 +12,7 @@ import numpy as np
|
|
|
12
12
|
from scipy import sparse
|
|
13
13
|
|
|
14
14
|
from sknetwork.clustering.base import BaseClustering
|
|
15
|
-
from sknetwork.clustering.louvain_core import
|
|
15
|
+
from sknetwork.clustering.louvain_core import optimize_core
|
|
16
16
|
from sknetwork.clustering.postprocess import reindex_labels
|
|
17
17
|
from sknetwork.utils.check import check_random_state, get_probs
|
|
18
18
|
from sknetwork.utils.format import check_format, get_adjacency, directed2undirected
|
|
@@ -30,11 +30,11 @@ class Louvain(BaseClustering, Log):
|
|
|
30
30
|
resolution :
|
|
31
31
|
Resolution parameter.
|
|
32
32
|
modularity : str
|
|
33
|
-
|
|
33
|
+
Type of modularity to maximize. Can be ``'Dugue'``, ``'Newman'`` or ``'Potts'`` (default = ``'dugue'``).
|
|
34
34
|
tol_optimization :
|
|
35
|
-
Minimum increase in
|
|
35
|
+
Minimum increase in modularity to enter a new optimization pass in the local search.
|
|
36
36
|
tol_aggregation :
|
|
37
|
-
Minimum increase in
|
|
37
|
+
Minimum increase in modularity to enter a new aggregation pass.
|
|
38
38
|
n_aggregations :
|
|
39
39
|
Maximum number of aggregations.
|
|
40
40
|
A negative value is interpreted as no limit.
|
|
@@ -91,6 +91,7 @@ class Louvain(BaseClustering, Log):
|
|
|
91
91
|
<https://arxiv.org/pdf/0707.1616>`_
|
|
92
92
|
Physical Review E, 76(6).
|
|
93
93
|
"""
|
|
94
|
+
|
|
94
95
|
def __init__(self, resolution: float = 1, modularity: str = 'dugue', tol_optimization: float = 1e-3,
|
|
95
96
|
tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
|
|
96
97
|
sort_clusters: bool = True, return_probs: bool = True, return_aggregate: bool = True,
|
|
@@ -102,73 +103,76 @@ class Louvain(BaseClustering, Log):
|
|
|
102
103
|
self.labels_ = None
|
|
103
104
|
self.resolution = resolution
|
|
104
105
|
self.modularity = modularity.lower()
|
|
105
|
-
self.
|
|
106
|
+
self.tol_optimization = tol_optimization
|
|
106
107
|
self.tol_aggregation = tol_aggregation
|
|
107
108
|
self.n_aggregations = n_aggregations
|
|
108
109
|
self.shuffle_nodes = shuffle_nodes
|
|
109
110
|
self.random_state = check_random_state(random_state)
|
|
110
111
|
self.bipartite = None
|
|
111
112
|
|
|
112
|
-
def _optimize(self,
|
|
113
|
-
"""One
|
|
113
|
+
def _optimize(self, labels, adjacency, out_weights, in_weights):
|
|
114
|
+
"""One optimization pass of the Louvain algorithm.
|
|
114
115
|
|
|
115
116
|
Parameters
|
|
116
117
|
----------
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
118
|
+
labels :
|
|
119
|
+
Labels of nodes.
|
|
120
|
+
adjacency :
|
|
121
|
+
Adjacency matrix.
|
|
122
|
+
out_weights :
|
|
123
|
+
Out-weights of nodes.
|
|
124
|
+
in_weights :
|
|
125
|
+
In-weights of nodes
|
|
123
126
|
|
|
124
127
|
Returns
|
|
125
128
|
-------
|
|
126
129
|
labels :
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
+
Labels of nodes after optimization.
|
|
131
|
+
increase :
|
|
132
|
+
Gain in modularity after optimization.
|
|
130
133
|
"""
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
134
|
+
labels = labels.astype(np.int32)
|
|
135
|
+
indices = adjacency.indices
|
|
136
|
+
indptr = adjacency.indptr
|
|
137
|
+
data = adjacency.data.astype(np.float32)
|
|
138
|
+
out_weights = out_weights.astype(np.float32)
|
|
139
|
+
in_weights = in_weights.astype(np.float32)
|
|
140
|
+
out_cluster_weights = out_weights.copy()
|
|
141
|
+
in_cluster_weights = in_weights.copy()
|
|
142
|
+
cluster_weights = np.zeros_like(out_cluster_weights).astype(np.float32)
|
|
136
143
|
self_loops = adjacency.diagonal().astype(np.float32)
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
indices: np.ndarray = adjacency.indices
|
|
140
|
-
data: np.ndarray = adjacency.data.astype(np.float32)
|
|
141
|
-
|
|
142
|
-
return fit_core(self.resolution, self.tol, node_probs_ou, node_probs_in, self_loops, data, indices, indptr)
|
|
144
|
+
return optimize_core(labels, indices, indptr, data, out_weights, in_weights, out_cluster_weights,
|
|
145
|
+
in_cluster_weights, cluster_weights, self_loops, self.resolution, self.tol_optimization)
|
|
143
146
|
|
|
144
147
|
@staticmethod
|
|
145
|
-
def _aggregate(
|
|
148
|
+
def _aggregate(labels, adjacency, out_weights, in_weights):
|
|
146
149
|
"""Aggregate nodes belonging to the same cluster.
|
|
147
150
|
|
|
148
151
|
Parameters
|
|
149
152
|
----------
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
153
|
+
labels :
|
|
154
|
+
Labels of nodes.
|
|
155
|
+
adjacency :
|
|
156
|
+
Adjacency matrix.
|
|
157
|
+
out_weights :
|
|
158
|
+
Out-weights of nodes.
|
|
159
|
+
in_weights :
|
|
160
|
+
In-weights of nodes.
|
|
158
161
|
|
|
159
162
|
Returns
|
|
160
163
|
-------
|
|
161
|
-
Aggregate graph.
|
|
164
|
+
Aggregate graph (adjacency matrix, out-weights, in-weights).
|
|
162
165
|
"""
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
166
|
+
membership = get_membership(labels)
|
|
167
|
+
adjacency_ = membership.T.tocsr().dot(adjacency.dot(membership))
|
|
168
|
+
out_weights_ = membership.T.dot(out_weights)
|
|
169
|
+
in_weights_ = membership.T.dot(in_weights)
|
|
170
|
+
return adjacency_, out_weights_, in_weights_
|
|
167
171
|
|
|
168
|
-
def
|
|
169
|
-
"""
|
|
172
|
+
def _pre_processing(self, input_matrix, force_bipartite):
|
|
173
|
+
"""Pre-processing for Louvain.
|
|
170
174
|
|
|
171
|
-
|
|
175
|
+
Parameters
|
|
172
176
|
----------
|
|
173
177
|
input_matrix :
|
|
174
178
|
Adjacency matrix or biadjacency matrix of the graph.
|
|
@@ -177,63 +181,64 @@ class Louvain(BaseClustering, Log):
|
|
|
177
181
|
|
|
178
182
|
Returns
|
|
179
183
|
-------
|
|
180
|
-
|
|
184
|
+
adjacency :
|
|
185
|
+
Adjacency matrix.
|
|
186
|
+
out_weights, in_weights :
|
|
187
|
+
Node weights.
|
|
188
|
+
membership :
|
|
189
|
+
Membership matrix (labels).
|
|
190
|
+
index :
|
|
191
|
+
Index of nodes.
|
|
181
192
|
"""
|
|
182
193
|
self._init_vars()
|
|
194
|
+
|
|
195
|
+
# adjacency matrix
|
|
183
196
|
input_matrix = check_format(input_matrix)
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
else:
|
|
188
|
-
adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)
|
|
197
|
+
force_directed = self.modularity == 'dugue'
|
|
198
|
+
adjacency, self.bipartite = get_adjacency(input_matrix, force_directed=force_directed,
|
|
199
|
+
force_bipartite=force_bipartite)
|
|
189
200
|
|
|
201
|
+
# shuffling
|
|
190
202
|
n = adjacency.shape[0]
|
|
191
|
-
|
|
192
203
|
index = np.arange(n)
|
|
193
204
|
if self.shuffle_nodes:
|
|
194
205
|
index = self.random_state.permutation(index)
|
|
195
206
|
adjacency = adjacency[index][:, index]
|
|
196
207
|
|
|
208
|
+
# node weights
|
|
197
209
|
if self.modularity == 'potts':
|
|
198
|
-
|
|
199
|
-
|
|
210
|
+
out_weights = get_probs('uniform', adjacency)
|
|
211
|
+
in_weights = out_weights.copy()
|
|
200
212
|
elif self.modularity == 'newman':
|
|
201
|
-
|
|
202
|
-
|
|
213
|
+
out_weights = get_probs('degree', adjacency)
|
|
214
|
+
in_weights = out_weights.copy()
|
|
203
215
|
elif self.modularity == 'dugue':
|
|
204
|
-
|
|
205
|
-
|
|
216
|
+
out_weights = get_probs('degree', adjacency)
|
|
217
|
+
in_weights = get_probs('degree', adjacency.T)
|
|
206
218
|
else:
|
|
207
219
|
raise ValueError('Unknown modularity function.')
|
|
208
220
|
|
|
209
|
-
|
|
221
|
+
# normalized, symmetric adjacency matrix (sums to 1)
|
|
222
|
+
adjacency = directed2undirected(adjacency)
|
|
223
|
+
adjacency = adjacency / adjacency.data.sum()
|
|
210
224
|
|
|
225
|
+
# cluster membership
|
|
211
226
|
membership = sparse.identity(n, format='csr')
|
|
212
|
-
increase = True
|
|
213
|
-
count_aggregations = 0
|
|
214
|
-
self.print_log("Starting with", n, "nodes.")
|
|
215
|
-
while increase:
|
|
216
|
-
count_aggregations += 1
|
|
217
|
-
|
|
218
|
-
labels_cluster, pass_increase = self._optimize(adjacency_cluster, probs_out, probs_in)
|
|
219
|
-
_, labels_cluster = np.unique(labels_cluster, return_inverse=True)
|
|
220
|
-
|
|
221
|
-
if pass_increase <= self.tol_aggregation:
|
|
222
|
-
increase = False
|
|
223
|
-
else:
|
|
224
|
-
membership_cluster = get_membership(labels_cluster)
|
|
225
|
-
membership = membership.dot(membership_cluster)
|
|
226
|
-
adjacency_cluster, probs_out, probs_in = self._aggregate(adjacency_cluster, probs_out, probs_in,
|
|
227
|
-
membership_cluster)
|
|
228
|
-
|
|
229
|
-
n = adjacency_cluster.shape[0]
|
|
230
|
-
if n == 1:
|
|
231
|
-
break
|
|
232
|
-
self.print_log("Aggregation", count_aggregations, "completed with", n, "clusters and ",
|
|
233
|
-
pass_increase, "increment.")
|
|
234
|
-
if count_aggregations == self.n_aggregations:
|
|
235
|
-
break
|
|
236
227
|
|
|
228
|
+
return adjacency, out_weights, in_weights, membership, index
|
|
229
|
+
|
|
230
|
+
def _post_processing(self, input_matrix, membership, index):
|
|
231
|
+
"""Post-processing for Louvain.
|
|
232
|
+
|
|
233
|
+
Parameters
|
|
234
|
+
----------
|
|
235
|
+
input_matrix :
|
|
236
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
237
|
+
membership :
|
|
238
|
+
Membership matrix (labels).
|
|
239
|
+
index :
|
|
240
|
+
Index of nodes.
|
|
241
|
+
"""
|
|
237
242
|
if self.sort_clusters:
|
|
238
243
|
labels = reindex_labels(membership.indices)
|
|
239
244
|
else:
|
|
@@ -242,10 +247,40 @@ class Louvain(BaseClustering, Log):
|
|
|
242
247
|
reverse = np.empty(index.size, index.dtype)
|
|
243
248
|
reverse[index] = np.arange(index.size)
|
|
244
249
|
labels = labels[reverse]
|
|
245
|
-
|
|
246
250
|
self.labels_ = labels
|
|
247
251
|
if self.bipartite:
|
|
248
252
|
self._split_vars(input_matrix.shape)
|
|
249
253
|
self._secondary_outputs(input_matrix)
|
|
250
254
|
|
|
255
|
+
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) -> 'Louvain':
|
|
256
|
+
"""Fit algorithm to data.
|
|
257
|
+
|
|
258
|
+
Parameters
|
|
259
|
+
----------
|
|
260
|
+
input_matrix :
|
|
261
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
262
|
+
force_bipartite :
|
|
263
|
+
If ``True``, force the input matrix to be considered as a biadjacency matrix even if square.
|
|
264
|
+
|
|
265
|
+
Returns
|
|
266
|
+
-------
|
|
267
|
+
self : :class:`Louvain`
|
|
268
|
+
"""
|
|
269
|
+
adjacency, out_weights, in_weights, membership, index = self._pre_processing(input_matrix, force_bipartite)
|
|
270
|
+
n = adjacency.shape[0]
|
|
271
|
+
count = 0
|
|
272
|
+
stop = False
|
|
273
|
+
while not stop:
|
|
274
|
+
count += 1
|
|
275
|
+
labels = np.arange(n)
|
|
276
|
+
labels, increase = self._optimize(labels, adjacency, out_weights, in_weights)
|
|
277
|
+
_, labels = np.unique(labels, return_inverse=True)
|
|
278
|
+
adjacency, out_weights, in_weights = self._aggregate(labels, adjacency, out_weights, in_weights)
|
|
279
|
+
membership = membership.dot(get_membership(labels))
|
|
280
|
+
n = adjacency.shape[0]
|
|
281
|
+
stop = n == 1
|
|
282
|
+
stop |= increase <= self.tol_aggregation
|
|
283
|
+
stop |= count == self.n_aggregations
|
|
284
|
+
self.print_log("Aggregation:", count, " Clusters:", n, " Increase:", increase)
|
|
285
|
+
self._post_processing(input_matrix, membership, index)
|
|
251
286
|
return self
|
|
Binary file
|