scikit-network 0.30.0__cp310-cp310-win_amd64.whl → 0.32.1__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/AUTHORS.rst +3 -0
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/METADATA +31 -3
- scikit_network-0.32.1.dist-info/RECORD +228 -0
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/WHEEL +1 -1
- sknetwork/__init__.py +1 -1
- sknetwork/base.py +67 -0
- sknetwork/classification/base.py +24 -24
- sknetwork/classification/base_rank.py +17 -25
- sknetwork/classification/diffusion.py +35 -35
- sknetwork/classification/knn.py +24 -21
- sknetwork/classification/metrics.py +1 -1
- sknetwork/classification/pagerank.py +10 -10
- sknetwork/classification/propagation.py +23 -20
- sknetwork/classification/tests/test_diffusion.py +13 -3
- sknetwork/classification/vote.cp310-win_amd64.pyd +0 -0
- sknetwork/classification/vote.cpp +14482 -10351
- sknetwork/classification/vote.pyx +1 -3
- sknetwork/clustering/__init__.py +3 -1
- sknetwork/clustering/base.py +36 -40
- sknetwork/clustering/kcenters.py +253 -0
- sknetwork/clustering/leiden.py +241 -0
- sknetwork/clustering/leiden_core.cp310-win_amd64.pyd +0 -0
- sknetwork/clustering/leiden_core.cpp +31564 -0
- sknetwork/clustering/leiden_core.pyx +124 -0
- sknetwork/clustering/louvain.py +133 -102
- sknetwork/clustering/louvain_core.cp310-win_amd64.pyd +0 -0
- sknetwork/clustering/louvain_core.cpp +22457 -18792
- sknetwork/clustering/louvain_core.pyx +86 -96
- sknetwork/clustering/postprocess.py +2 -2
- sknetwork/clustering/propagation_clustering.py +15 -19
- sknetwork/clustering/tests/test_API.py +8 -4
- sknetwork/clustering/tests/test_kcenters.py +92 -0
- sknetwork/clustering/tests/test_leiden.py +34 -0
- sknetwork/clustering/tests/test_louvain.py +3 -4
- sknetwork/data/__init__.py +2 -1
- sknetwork/data/base.py +28 -0
- sknetwork/data/load.py +38 -37
- sknetwork/data/models.py +18 -18
- sknetwork/data/parse.py +54 -33
- sknetwork/data/test_graphs.py +2 -2
- sknetwork/data/tests/test_API.py +1 -1
- sknetwork/data/tests/test_base.py +14 -0
- sknetwork/data/tests/test_load.py +1 -1
- sknetwork/data/tests/test_parse.py +9 -12
- sknetwork/data/tests/test_test_graphs.py +1 -2
- sknetwork/data/toy_graphs.py +18 -18
- sknetwork/embedding/__init__.py +0 -1
- sknetwork/embedding/base.py +21 -20
- sknetwork/embedding/force_atlas.py +3 -2
- sknetwork/embedding/louvain_embedding.py +2 -2
- sknetwork/embedding/random_projection.py +5 -3
- sknetwork/embedding/spectral.py +0 -73
- sknetwork/embedding/tests/test_API.py +4 -28
- sknetwork/embedding/tests/test_louvain_embedding.py +4 -9
- sknetwork/embedding/tests/test_random_projection.py +2 -2
- sknetwork/embedding/tests/test_spectral.py +5 -8
- sknetwork/embedding/tests/test_svd.py +1 -1
- sknetwork/gnn/base.py +4 -4
- sknetwork/gnn/base_layer.py +3 -3
- sknetwork/gnn/gnn_classifier.py +45 -89
- sknetwork/gnn/layer.py +1 -1
- sknetwork/gnn/loss.py +1 -1
- sknetwork/gnn/optimizer.py +4 -3
- sknetwork/gnn/tests/test_base_layer.py +4 -4
- sknetwork/gnn/tests/test_gnn_classifier.py +12 -35
- sknetwork/gnn/utils.py +8 -8
- sknetwork/hierarchy/base.py +29 -2
- sknetwork/hierarchy/louvain_hierarchy.py +45 -41
- sknetwork/hierarchy/paris.cp310-win_amd64.pyd +0 -0
- sknetwork/hierarchy/paris.cpp +27369 -22852
- sknetwork/hierarchy/paris.pyx +7 -9
- sknetwork/hierarchy/postprocess.py +16 -16
- sknetwork/hierarchy/tests/test_API.py +1 -1
- sknetwork/hierarchy/tests/test_algos.py +5 -0
- sknetwork/hierarchy/tests/test_metrics.py +1 -1
- sknetwork/linalg/__init__.py +1 -1
- sknetwork/linalg/diteration.cp310-win_amd64.pyd +0 -0
- sknetwork/linalg/diteration.cpp +13474 -9454
- sknetwork/linalg/diteration.pyx +0 -2
- sknetwork/linalg/eig_solver.py +1 -1
- sknetwork/linalg/{normalization.py → normalizer.py} +18 -15
- sknetwork/linalg/operators.py +1 -1
- sknetwork/linalg/ppr_solver.py +1 -1
- sknetwork/linalg/push.cp310-win_amd64.pyd +0 -0
- sknetwork/linalg/push.cpp +22993 -18807
- sknetwork/linalg/push.pyx +0 -2
- sknetwork/linalg/svd_solver.py +1 -1
- sknetwork/linalg/tests/test_normalization.py +3 -7
- sknetwork/linalg/tests/test_operators.py +4 -8
- sknetwork/linalg/tests/test_ppr.py +1 -1
- sknetwork/linkpred/base.py +13 -2
- sknetwork/linkpred/nn.py +6 -6
- sknetwork/log.py +19 -0
- sknetwork/path/__init__.py +4 -3
- sknetwork/path/dag.py +54 -0
- sknetwork/path/distances.py +98 -0
- sknetwork/path/search.py +13 -47
- sknetwork/path/shortest_path.py +37 -162
- sknetwork/path/tests/test_dag.py +37 -0
- sknetwork/path/tests/test_distances.py +62 -0
- sknetwork/path/tests/test_search.py +26 -11
- sknetwork/path/tests/test_shortest_path.py +31 -36
- sknetwork/ranking/__init__.py +0 -1
- sknetwork/ranking/base.py +13 -8
- sknetwork/ranking/betweenness.cp310-win_amd64.pyd +0 -0
- sknetwork/ranking/betweenness.cpp +5709 -3017
- sknetwork/ranking/betweenness.pyx +0 -2
- sknetwork/ranking/closeness.py +7 -10
- sknetwork/ranking/pagerank.py +14 -14
- sknetwork/ranking/postprocess.py +12 -3
- sknetwork/ranking/tests/test_API.py +2 -4
- sknetwork/ranking/tests/test_betweenness.py +3 -3
- sknetwork/ranking/tests/test_closeness.py +3 -7
- sknetwork/ranking/tests/test_pagerank.py +11 -5
- sknetwork/ranking/tests/test_postprocess.py +5 -0
- sknetwork/regression/base.py +19 -2
- sknetwork/regression/diffusion.py +24 -10
- sknetwork/regression/tests/test_diffusion.py +8 -0
- sknetwork/test_base.py +35 -0
- sknetwork/test_log.py +15 -0
- sknetwork/topology/__init__.py +7 -8
- sknetwork/topology/cliques.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/{kcliques.cpp → cliques.cpp} +23412 -20276
- sknetwork/topology/cliques.pyx +149 -0
- sknetwork/topology/core.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/{kcore.cpp → core.cpp} +21732 -18867
- sknetwork/topology/core.pyx +90 -0
- sknetwork/topology/cycles.py +243 -0
- sknetwork/topology/minheap.cp310-win_amd64.pyd +0 -0
- sknetwork/{utils → topology}/minheap.cpp +19452 -15368
- sknetwork/{utils → topology}/minheap.pxd +1 -3
- sknetwork/{utils → topology}/minheap.pyx +1 -3
- sknetwork/topology/structure.py +3 -43
- sknetwork/topology/tests/test_cliques.py +11 -11
- sknetwork/topology/tests/test_core.py +19 -0
- sknetwork/topology/tests/test_cycles.py +65 -0
- sknetwork/topology/tests/test_structure.py +2 -16
- sknetwork/topology/tests/test_triangles.py +11 -15
- sknetwork/topology/tests/test_wl.py +72 -0
- sknetwork/topology/triangles.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/triangles.cpp +5056 -2696
- sknetwork/topology/triangles.pyx +74 -89
- sknetwork/topology/weisfeiler_lehman.py +56 -86
- sknetwork/topology/weisfeiler_lehman_core.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/weisfeiler_lehman_core.cpp +14727 -10622
- sknetwork/topology/weisfeiler_lehman_core.pyx +0 -2
- sknetwork/utils/__init__.py +1 -31
- sknetwork/utils/check.py +2 -2
- sknetwork/utils/format.py +5 -3
- sknetwork/utils/membership.py +2 -2
- sknetwork/utils/tests/test_check.py +3 -3
- sknetwork/utils/tests/test_format.py +3 -1
- sknetwork/utils/values.py +1 -1
- sknetwork/visualization/__init__.py +2 -2
- sknetwork/visualization/dendrograms.py +55 -7
- sknetwork/visualization/graphs.py +292 -72
- sknetwork/visualization/tests/test_dendrograms.py +9 -9
- sknetwork/visualization/tests/test_graphs.py +71 -62
- scikit_network-0.30.0.dist-info/RECORD +0 -227
- sknetwork/embedding/louvain_hierarchy.py +0 -142
- sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
- sknetwork/path/metrics.py +0 -148
- sknetwork/path/tests/test_metrics.py +0 -29
- sknetwork/ranking/harmonic.py +0 -82
- sknetwork/topology/dag.py +0 -74
- sknetwork/topology/dag_core.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/dag_core.cpp +0 -23350
- sknetwork/topology/dag_core.pyx +0 -38
- sknetwork/topology/kcliques.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/kcliques.pyx +0 -193
- sknetwork/topology/kcore.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/kcore.pyx +0 -120
- sknetwork/topology/tests/test_cores.py +0 -21
- sknetwork/topology/tests/test_dag.py +0 -26
- sknetwork/topology/tests/test_wl_coloring.py +0 -49
- sknetwork/topology/tests/test_wl_kernel.py +0 -31
- sknetwork/utils/base.py +0 -35
- sknetwork/utils/minheap.cp310-win_amd64.pyd +0 -0
- sknetwork/utils/simplex.py +0 -140
- sknetwork/utils/tests/test_base.py +0 -28
- sknetwork/utils/tests/test_bunch.py +0 -16
- sknetwork/utils/tests/test_projection_simplex.py +0 -33
- sknetwork/utils/tests/test_verbose.py +0 -15
- sknetwork/utils/verbose.py +0 -37
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/LICENSE +0 -0
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/top_level.txt +0 -0
- /sknetwork/{utils → data}/timeout.py +0 -0
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# distutils: language=c++
|
|
2
|
+
# cython: language_level=3
|
|
3
|
+
from libcpp.set cimport set
|
|
4
|
+
from libc.stdlib cimport rand
|
|
5
|
+
|
|
6
|
+
cimport cython
|
|
7
|
+
|
|
8
|
+
ctypedef fused int_or_long:
|
|
9
|
+
int
|
|
10
|
+
long
|
|
11
|
+
|
|
12
|
+
@cython.boundscheck(False)
|
|
13
|
+
@cython.wraparound(False)
|
|
14
|
+
def optimize_refine_core(int_or_long[:] labels, int_or_long[:] labels_refined, int_or_long[:] indices,
|
|
15
|
+
int_or_long[:] indptr, float[:] data, float[:] out_weights, float[:] in_weights, float[:] out_cluster_weights,
|
|
16
|
+
float[:] in_cluster_weights, float[:] cluster_weights, float[:] self_loops, float resolution): # pragma: no cover
|
|
17
|
+
"""Refine clusters while maximizing modularity.
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
labels :
|
|
22
|
+
Labels (initial partition).
|
|
23
|
+
labels_refined :
|
|
24
|
+
Refined labels.
|
|
25
|
+
indices :
|
|
26
|
+
CSR format index array of the normalized adjacency matrix.
|
|
27
|
+
indptr :
|
|
28
|
+
CSR format index pointer array of the normalized adjacency matrix.
|
|
29
|
+
data :
|
|
30
|
+
CSR format data array of the normalized adjacency matrix.
|
|
31
|
+
out_weights :
|
|
32
|
+
Out-weights of nodes (sum to 1).
|
|
33
|
+
in_weights :
|
|
34
|
+
In-weights of nodes (sum to 1).
|
|
35
|
+
out_cluster_weights :
|
|
36
|
+
Out-weights of clusters (sum to 1).
|
|
37
|
+
in_cluster_weights :
|
|
38
|
+
In-weights of clusters (sum to 1).
|
|
39
|
+
cluster_weights :
|
|
40
|
+
Weights of clusters (initialized to 0).
|
|
41
|
+
self_loops :
|
|
42
|
+
Weights of self loops.
|
|
43
|
+
resolution :
|
|
44
|
+
Resolution parameter (positive).
|
|
45
|
+
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
48
|
+
labels_refined :
|
|
49
|
+
Refined labels.
|
|
50
|
+
"""
|
|
51
|
+
cdef int_or_long n
|
|
52
|
+
cdef int_or_long label
|
|
53
|
+
cdef int_or_long label_refined
|
|
54
|
+
cdef int_or_long label_target
|
|
55
|
+
cdef int_or_long label_best
|
|
56
|
+
cdef int_or_long i
|
|
57
|
+
cdef int_or_long j
|
|
58
|
+
cdef int_or_long start
|
|
59
|
+
cdef int_or_long end
|
|
60
|
+
|
|
61
|
+
cdef float increase = 1
|
|
62
|
+
cdef float delta
|
|
63
|
+
cdef float delta_local
|
|
64
|
+
cdef float delta_best
|
|
65
|
+
cdef float in_weight
|
|
66
|
+
cdef float out_weight
|
|
67
|
+
|
|
68
|
+
cdef set[int_or_long] label_set
|
|
69
|
+
cdef set[int_or_long] label_target_set
|
|
70
|
+
|
|
71
|
+
n = labels.shape[0]
|
|
72
|
+
while increase:
|
|
73
|
+
increase = 0
|
|
74
|
+
|
|
75
|
+
for i in range(n):
|
|
76
|
+
label_set = ()
|
|
77
|
+
label = labels[i]
|
|
78
|
+
label_refined = labels_refined[i]
|
|
79
|
+
start = indptr[i]
|
|
80
|
+
end = indptr[i+1]
|
|
81
|
+
|
|
82
|
+
# neighboring clusters
|
|
83
|
+
for j in range(start, end):
|
|
84
|
+
if labels[indices[j]] == label:
|
|
85
|
+
label_target = labels_refined[indices[j]]
|
|
86
|
+
label_set.insert(label_target)
|
|
87
|
+
cluster_weights[label_target] += data[j]
|
|
88
|
+
label_set.erase(label_refined)
|
|
89
|
+
|
|
90
|
+
if not label_set.empty():
|
|
91
|
+
out_weight = out_weights[i]
|
|
92
|
+
in_weight = in_weights[i]
|
|
93
|
+
|
|
94
|
+
# node leaving the current cluster
|
|
95
|
+
delta = 2 * (cluster_weights[label_refined] - self_loops[i])
|
|
96
|
+
delta -= resolution * out_weight * (in_cluster_weights[label_refined] - in_weight)
|
|
97
|
+
delta -= resolution * in_weight * (out_cluster_weights[label_refined] - out_weight)
|
|
98
|
+
|
|
99
|
+
label_target_set = ()
|
|
100
|
+
for label_target in label_set:
|
|
101
|
+
delta_local = 2 * cluster_weights[label_target]
|
|
102
|
+
delta_local -= resolution * out_weight * in_cluster_weights[label_target]
|
|
103
|
+
delta_local -= resolution * in_weight * out_cluster_weights[label_target]
|
|
104
|
+
delta_local -= delta
|
|
105
|
+
if delta_local > 0:
|
|
106
|
+
label_target_set.insert(label_target)
|
|
107
|
+
cluster_weights[label_target] = 0
|
|
108
|
+
|
|
109
|
+
if not label_target_set.empty():
|
|
110
|
+
increase = 1
|
|
111
|
+
k = rand() % label_target_set.size()
|
|
112
|
+
for label_target in label_target_set:
|
|
113
|
+
k -= 1
|
|
114
|
+
if k == 0:
|
|
115
|
+
break
|
|
116
|
+
labels_refined[i] = label_target
|
|
117
|
+
# update weights
|
|
118
|
+
out_cluster_weights[label_refined] -= out_weight
|
|
119
|
+
in_cluster_weights[label_refined] -= in_weight
|
|
120
|
+
out_cluster_weights[label_target] += out_weight
|
|
121
|
+
in_cluster_weights[label_target] += in_weight
|
|
122
|
+
cluster_weights[label_refined] = 0
|
|
123
|
+
|
|
124
|
+
return labels_refined
|
sknetwork/clustering/louvain.py
CHANGED
|
@@ -12,15 +12,15 @@ import numpy as np
|
|
|
12
12
|
from scipy import sparse
|
|
13
13
|
|
|
14
14
|
from sknetwork.clustering.base import BaseClustering
|
|
15
|
-
from sknetwork.clustering.louvain_core import
|
|
15
|
+
from sknetwork.clustering.louvain_core import optimize_core
|
|
16
16
|
from sknetwork.clustering.postprocess import reindex_labels
|
|
17
17
|
from sknetwork.utils.check import check_random_state, get_probs
|
|
18
18
|
from sknetwork.utils.format import check_format, get_adjacency, directed2undirected
|
|
19
19
|
from sknetwork.utils.membership import get_membership
|
|
20
|
-
from sknetwork.
|
|
20
|
+
from sknetwork.log import Log
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
class Louvain(BaseClustering,
|
|
23
|
+
class Louvain(BaseClustering, Log):
|
|
24
24
|
"""Louvain algorithm for clustering graphs by maximization of modularity.
|
|
25
25
|
|
|
26
26
|
For bipartite graphs, the algorithm maximizes Barber's modularity by default.
|
|
@@ -30,11 +30,11 @@ class Louvain(BaseClustering, VerboseMixin):
|
|
|
30
30
|
resolution :
|
|
31
31
|
Resolution parameter.
|
|
32
32
|
modularity : str
|
|
33
|
-
|
|
33
|
+
Type of modularity to maximize. Can be ``'Dugue'``, ``'Newman'`` or ``'Potts'`` (default = ``'dugue'``).
|
|
34
34
|
tol_optimization :
|
|
35
|
-
Minimum increase in
|
|
35
|
+
Minimum increase in modularity to enter a new optimization pass in the local search.
|
|
36
36
|
tol_aggregation :
|
|
37
|
-
Minimum increase in
|
|
37
|
+
Minimum increase in modularity to enter a new aggregation pass.
|
|
38
38
|
n_aggregations :
|
|
39
39
|
Maximum number of aggregations.
|
|
40
40
|
A negative value is interpreted as no limit.
|
|
@@ -42,8 +42,8 @@ class Louvain(BaseClustering, VerboseMixin):
|
|
|
42
42
|
Enables node shuffling before optimization.
|
|
43
43
|
sort_clusters :
|
|
44
44
|
If ``True``, sort labels in decreasing order of cluster size.
|
|
45
|
-
|
|
46
|
-
If ``True``, return the
|
|
45
|
+
return_probs :
|
|
46
|
+
If ``True``, return the probability distribution over clusters (soft clustering).
|
|
47
47
|
return_aggregate :
|
|
48
48
|
If ``True``, return the adjacency matrix of the graph between clusters.
|
|
49
49
|
random_state :
|
|
@@ -53,18 +53,14 @@ class Louvain(BaseClustering, VerboseMixin):
|
|
|
53
53
|
|
|
54
54
|
Attributes
|
|
55
55
|
----------
|
|
56
|
-
labels_ : np.ndarray
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
labels_col_ : np.ndarray
|
|
61
|
-
Labels of
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
membership_row_ : sparse.csr_matrix
|
|
65
|
-
Membership matrix of the rows (for bipartite graphs).
|
|
66
|
-
membership_col_ : sparse.csr_matrix
|
|
67
|
-
Membership matrix of the columns (for bipartite graphs).
|
|
56
|
+
labels_ : np.ndarray, shape (n_labels,)
|
|
57
|
+
Label of each node.
|
|
58
|
+
probs_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
59
|
+
Probability distribution over labels.
|
|
60
|
+
labels_row_, labels_col_ : np.ndarray
|
|
61
|
+
Labels of rows and columns, for bipartite graphs.
|
|
62
|
+
probs_row_, probs_col_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
63
|
+
Probability distributions over labels for rows and columns (for bipartite graphs).
|
|
68
64
|
aggregate_ : sparse.csr_matrix
|
|
69
65
|
Aggregate adjacency matrix or biadjacency matrix between clusters.
|
|
70
66
|
|
|
@@ -95,84 +91,88 @@ class Louvain(BaseClustering, VerboseMixin):
|
|
|
95
91
|
<https://arxiv.org/pdf/0707.1616>`_
|
|
96
92
|
Physical Review E, 76(6).
|
|
97
93
|
"""
|
|
94
|
+
|
|
98
95
|
def __init__(self, resolution: float = 1, modularity: str = 'dugue', tol_optimization: float = 1e-3,
|
|
99
96
|
tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
|
|
100
|
-
sort_clusters: bool = True,
|
|
97
|
+
sort_clusters: bool = True, return_probs: bool = True, return_aggregate: bool = True,
|
|
101
98
|
random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False):
|
|
102
|
-
super(Louvain, self).__init__(sort_clusters=sort_clusters,
|
|
99
|
+
super(Louvain, self).__init__(sort_clusters=sort_clusters, return_probs=return_probs,
|
|
103
100
|
return_aggregate=return_aggregate)
|
|
104
|
-
|
|
101
|
+
Log.__init__(self, verbose)
|
|
105
102
|
|
|
106
103
|
self.labels_ = None
|
|
107
104
|
self.resolution = resolution
|
|
108
105
|
self.modularity = modularity.lower()
|
|
109
|
-
self.
|
|
106
|
+
self.tol_optimization = tol_optimization
|
|
110
107
|
self.tol_aggregation = tol_aggregation
|
|
111
108
|
self.n_aggregations = n_aggregations
|
|
112
109
|
self.shuffle_nodes = shuffle_nodes
|
|
113
110
|
self.random_state = check_random_state(random_state)
|
|
114
111
|
self.bipartite = None
|
|
115
112
|
|
|
116
|
-
def _optimize(self,
|
|
117
|
-
"""One
|
|
113
|
+
def _optimize(self, labels, adjacency, out_weights, in_weights):
|
|
114
|
+
"""One optimization pass of the Louvain algorithm.
|
|
118
115
|
|
|
119
116
|
Parameters
|
|
120
117
|
----------
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
118
|
+
labels :
|
|
119
|
+
Labels of nodes.
|
|
120
|
+
adjacency :
|
|
121
|
+
Adjacency matrix.
|
|
122
|
+
out_weights :
|
|
123
|
+
Out-weights of nodes.
|
|
124
|
+
in_weights :
|
|
125
|
+
In-weights of nodes
|
|
127
126
|
|
|
128
127
|
Returns
|
|
129
128
|
-------
|
|
130
129
|
labels :
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
130
|
+
Labels of nodes after optimization.
|
|
131
|
+
increase :
|
|
132
|
+
Gain in modularity after optimization.
|
|
134
133
|
"""
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
134
|
+
labels = labels.astype(np.int32)
|
|
135
|
+
indices = adjacency.indices
|
|
136
|
+
indptr = adjacency.indptr
|
|
137
|
+
data = adjacency.data.astype(np.float32)
|
|
138
|
+
out_weights = out_weights.astype(np.float32)
|
|
139
|
+
in_weights = in_weights.astype(np.float32)
|
|
140
|
+
out_cluster_weights = out_weights.copy()
|
|
141
|
+
in_cluster_weights = in_weights.copy()
|
|
142
|
+
cluster_weights = np.zeros_like(out_cluster_weights).astype(np.float32)
|
|
140
143
|
self_loops = adjacency.diagonal().astype(np.float32)
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
indices: np.ndarray = adjacency.indices
|
|
144
|
-
data: np.ndarray = adjacency.data.astype(np.float32)
|
|
145
|
-
|
|
146
|
-
return fit_core(self.resolution, self.tol, node_probs_ou, node_probs_in, self_loops, data, indices, indptr)
|
|
144
|
+
return optimize_core(labels, indices, indptr, data, out_weights, in_weights, out_cluster_weights,
|
|
145
|
+
in_cluster_weights, cluster_weights, self_loops, self.resolution, self.tol_optimization)
|
|
147
146
|
|
|
148
147
|
@staticmethod
|
|
149
|
-
def _aggregate(
|
|
148
|
+
def _aggregate(labels, adjacency, out_weights, in_weights):
|
|
150
149
|
"""Aggregate nodes belonging to the same cluster.
|
|
151
150
|
|
|
152
151
|
Parameters
|
|
153
152
|
----------
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
153
|
+
labels :
|
|
154
|
+
Labels of nodes.
|
|
155
|
+
adjacency :
|
|
156
|
+
Adjacency matrix.
|
|
157
|
+
out_weights :
|
|
158
|
+
Out-weights of nodes.
|
|
159
|
+
in_weights :
|
|
160
|
+
In-weights of nodes.
|
|
162
161
|
|
|
163
162
|
Returns
|
|
164
163
|
-------
|
|
165
|
-
Aggregate graph.
|
|
164
|
+
Aggregate graph (adjacency matrix, out-weights, in-weights).
|
|
166
165
|
"""
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
166
|
+
membership = get_membership(labels)
|
|
167
|
+
adjacency_ = membership.T.tocsr().dot(adjacency.dot(membership))
|
|
168
|
+
out_weights_ = membership.T.dot(out_weights)
|
|
169
|
+
in_weights_ = membership.T.dot(in_weights)
|
|
170
|
+
return adjacency_, out_weights_, in_weights_
|
|
171
171
|
|
|
172
|
-
def
|
|
173
|
-
"""
|
|
172
|
+
def _pre_processing(self, input_matrix, force_bipartite):
|
|
173
|
+
"""Pre-processing for Louvain.
|
|
174
174
|
|
|
175
|
-
|
|
175
|
+
Parameters
|
|
176
176
|
----------
|
|
177
177
|
input_matrix :
|
|
178
178
|
Adjacency matrix or biadjacency matrix of the graph.
|
|
@@ -181,63 +181,64 @@ class Louvain(BaseClustering, VerboseMixin):
|
|
|
181
181
|
|
|
182
182
|
Returns
|
|
183
183
|
-------
|
|
184
|
-
|
|
184
|
+
adjacency :
|
|
185
|
+
Adjacency matrix.
|
|
186
|
+
out_weights, in_weights :
|
|
187
|
+
Node weights.
|
|
188
|
+
membership :
|
|
189
|
+
Membership matrix (labels).
|
|
190
|
+
index :
|
|
191
|
+
Index of nodes.
|
|
185
192
|
"""
|
|
186
193
|
self._init_vars()
|
|
194
|
+
|
|
195
|
+
# adjacency matrix
|
|
187
196
|
input_matrix = check_format(input_matrix)
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
else:
|
|
192
|
-
adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)
|
|
197
|
+
force_directed = self.modularity == 'dugue'
|
|
198
|
+
adjacency, self.bipartite = get_adjacency(input_matrix, force_directed=force_directed,
|
|
199
|
+
force_bipartite=force_bipartite)
|
|
193
200
|
|
|
201
|
+
# shuffling
|
|
194
202
|
n = adjacency.shape[0]
|
|
195
|
-
|
|
196
203
|
index = np.arange(n)
|
|
197
204
|
if self.shuffle_nodes:
|
|
198
205
|
index = self.random_state.permutation(index)
|
|
199
206
|
adjacency = adjacency[index][:, index]
|
|
200
207
|
|
|
208
|
+
# node weights
|
|
201
209
|
if self.modularity == 'potts':
|
|
202
|
-
|
|
203
|
-
|
|
210
|
+
out_weights = get_probs('uniform', adjacency)
|
|
211
|
+
in_weights = out_weights.copy()
|
|
204
212
|
elif self.modularity == 'newman':
|
|
205
|
-
|
|
206
|
-
|
|
213
|
+
out_weights = get_probs('degree', adjacency)
|
|
214
|
+
in_weights = out_weights.copy()
|
|
207
215
|
elif self.modularity == 'dugue':
|
|
208
|
-
|
|
209
|
-
|
|
216
|
+
out_weights = get_probs('degree', adjacency)
|
|
217
|
+
in_weights = get_probs('degree', adjacency.T)
|
|
210
218
|
else:
|
|
211
219
|
raise ValueError('Unknown modularity function.')
|
|
212
220
|
|
|
213
|
-
|
|
221
|
+
# normalized, symmetric adjacency matrix (sums to 1)
|
|
222
|
+
adjacency = directed2undirected(adjacency)
|
|
223
|
+
adjacency = adjacency / adjacency.data.sum()
|
|
214
224
|
|
|
225
|
+
# cluster membership
|
|
215
226
|
membership = sparse.identity(n, format='csr')
|
|
216
|
-
increase = True
|
|
217
|
-
count_aggregations = 0
|
|
218
|
-
self.log.print("Starting with", n, "nodes.")
|
|
219
|
-
while increase:
|
|
220
|
-
count_aggregations += 1
|
|
221
|
-
|
|
222
|
-
labels_cluster, pass_increase = self._optimize(adjacency_cluster, probs_out, probs_in)
|
|
223
|
-
_, labels_cluster = np.unique(labels_cluster, return_inverse=True)
|
|
224
|
-
|
|
225
|
-
if pass_increase <= self.tol_aggregation:
|
|
226
|
-
increase = False
|
|
227
|
-
else:
|
|
228
|
-
membership_cluster = get_membership(labels_cluster)
|
|
229
|
-
membership = membership.dot(membership_cluster)
|
|
230
|
-
adjacency_cluster, probs_out, probs_in = self._aggregate(adjacency_cluster, probs_out, probs_in,
|
|
231
|
-
membership_cluster)
|
|
232
|
-
|
|
233
|
-
n = adjacency_cluster.shape[0]
|
|
234
|
-
if n == 1:
|
|
235
|
-
break
|
|
236
|
-
self.log.print("Aggregation", count_aggregations, "completed with", n, "clusters and ",
|
|
237
|
-
pass_increase, "increment.")
|
|
238
|
-
if count_aggregations == self.n_aggregations:
|
|
239
|
-
break
|
|
240
227
|
|
|
228
|
+
return adjacency, out_weights, in_weights, membership, index
|
|
229
|
+
|
|
230
|
+
def _post_processing(self, input_matrix, membership, index):
|
|
231
|
+
"""Post-processing for Louvain.
|
|
232
|
+
|
|
233
|
+
Parameters
|
|
234
|
+
----------
|
|
235
|
+
input_matrix :
|
|
236
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
237
|
+
membership :
|
|
238
|
+
Membership matrix (labels).
|
|
239
|
+
index :
|
|
240
|
+
Index of nodes.
|
|
241
|
+
"""
|
|
241
242
|
if self.sort_clusters:
|
|
242
243
|
labels = reindex_labels(membership.indices)
|
|
243
244
|
else:
|
|
@@ -246,10 +247,40 @@ class Louvain(BaseClustering, VerboseMixin):
|
|
|
246
247
|
reverse = np.empty(index.size, index.dtype)
|
|
247
248
|
reverse[index] = np.arange(index.size)
|
|
248
249
|
labels = labels[reverse]
|
|
249
|
-
|
|
250
250
|
self.labels_ = labels
|
|
251
251
|
if self.bipartite:
|
|
252
252
|
self._split_vars(input_matrix.shape)
|
|
253
253
|
self._secondary_outputs(input_matrix)
|
|
254
254
|
|
|
255
|
+
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) -> 'Louvain':
|
|
256
|
+
"""Fit algorithm to data.
|
|
257
|
+
|
|
258
|
+
Parameters
|
|
259
|
+
----------
|
|
260
|
+
input_matrix :
|
|
261
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
262
|
+
force_bipartite :
|
|
263
|
+
If ``True``, force the input matrix to be considered as a biadjacency matrix even if square.
|
|
264
|
+
|
|
265
|
+
Returns
|
|
266
|
+
-------
|
|
267
|
+
self : :class:`Louvain`
|
|
268
|
+
"""
|
|
269
|
+
adjacency, out_weights, in_weights, membership, index = self._pre_processing(input_matrix, force_bipartite)
|
|
270
|
+
n = adjacency.shape[0]
|
|
271
|
+
count = 0
|
|
272
|
+
stop = False
|
|
273
|
+
while not stop:
|
|
274
|
+
count += 1
|
|
275
|
+
labels = np.arange(n)
|
|
276
|
+
labels, increase = self._optimize(labels, adjacency, out_weights, in_weights)
|
|
277
|
+
_, labels = np.unique(labels, return_inverse=True)
|
|
278
|
+
adjacency, out_weights, in_weights = self._aggregate(labels, adjacency, out_weights, in_weights)
|
|
279
|
+
membership = membership.dot(get_membership(labels))
|
|
280
|
+
n = adjacency.shape[0]
|
|
281
|
+
stop = n == 1
|
|
282
|
+
stop |= increase <= self.tol_aggregation
|
|
283
|
+
stop |= count == self.n_aggregations
|
|
284
|
+
self.print_log("Aggregation:", count, " Clusters:", n, " Increase:", increase)
|
|
285
|
+
self._post_processing(input_matrix, membership, index)
|
|
255
286
|
return self
|
|
Binary file
|