scikit-network 0.33.3__cp312-cp312-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- scikit_network-0.33.3.dist-info/METADATA +122 -0
- scikit_network-0.33.3.dist-info/RECORD +228 -0
- scikit_network-0.33.3.dist-info/WHEEL +6 -0
- scikit_network-0.33.3.dist-info/licenses/AUTHORS.rst +43 -0
- scikit_network-0.33.3.dist-info/licenses/LICENSE +34 -0
- scikit_network-0.33.3.dist-info/top_level.txt +1 -0
- sknetwork/__init__.py +21 -0
- sknetwork/base.py +67 -0
- sknetwork/classification/__init__.py +8 -0
- sknetwork/classification/base.py +142 -0
- sknetwork/classification/base_rank.py +133 -0
- sknetwork/classification/diffusion.py +134 -0
- sknetwork/classification/knn.py +139 -0
- sknetwork/classification/metrics.py +205 -0
- sknetwork/classification/pagerank.py +66 -0
- sknetwork/classification/propagation.py +152 -0
- sknetwork/classification/tests/__init__.py +1 -0
- sknetwork/classification/tests/test_API.py +30 -0
- sknetwork/classification/tests/test_diffusion.py +77 -0
- sknetwork/classification/tests/test_knn.py +23 -0
- sknetwork/classification/tests/test_metrics.py +53 -0
- sknetwork/classification/tests/test_pagerank.py +20 -0
- sknetwork/classification/tests/test_propagation.py +24 -0
- sknetwork/classification/vote.cpp +27581 -0
- sknetwork/classification/vote.cpython-312-darwin.so +0 -0
- sknetwork/classification/vote.pyx +56 -0
- sknetwork/clustering/__init__.py +8 -0
- sknetwork/clustering/base.py +172 -0
- sknetwork/clustering/kcenters.py +253 -0
- sknetwork/clustering/leiden.py +242 -0
- sknetwork/clustering/leiden_core.cpp +31572 -0
- sknetwork/clustering/leiden_core.cpython-312-darwin.so +0 -0
- sknetwork/clustering/leiden_core.pyx +124 -0
- sknetwork/clustering/louvain.py +286 -0
- sknetwork/clustering/louvain_core.cpp +31217 -0
- sknetwork/clustering/louvain_core.cpython-312-darwin.so +0 -0
- sknetwork/clustering/louvain_core.pyx +124 -0
- sknetwork/clustering/metrics.py +91 -0
- sknetwork/clustering/postprocess.py +66 -0
- sknetwork/clustering/propagation_clustering.py +104 -0
- sknetwork/clustering/tests/__init__.py +1 -0
- sknetwork/clustering/tests/test_API.py +38 -0
- sknetwork/clustering/tests/test_kcenters.py +60 -0
- sknetwork/clustering/tests/test_leiden.py +34 -0
- sknetwork/clustering/tests/test_louvain.py +135 -0
- sknetwork/clustering/tests/test_metrics.py +50 -0
- sknetwork/clustering/tests/test_postprocess.py +39 -0
- sknetwork/data/__init__.py +6 -0
- sknetwork/data/base.py +33 -0
- sknetwork/data/load.py +406 -0
- sknetwork/data/models.py +459 -0
- sknetwork/data/parse.py +644 -0
- sknetwork/data/test_graphs.py +84 -0
- sknetwork/data/tests/__init__.py +1 -0
- sknetwork/data/tests/test_API.py +30 -0
- sknetwork/data/tests/test_base.py +14 -0
- sknetwork/data/tests/test_load.py +95 -0
- sknetwork/data/tests/test_models.py +52 -0
- sknetwork/data/tests/test_parse.py +250 -0
- sknetwork/data/tests/test_test_graphs.py +29 -0
- sknetwork/data/tests/test_toy_graphs.py +68 -0
- sknetwork/data/timeout.py +38 -0
- sknetwork/data/toy_graphs.py +611 -0
- sknetwork/embedding/__init__.py +8 -0
- sknetwork/embedding/base.py +94 -0
- sknetwork/embedding/force_atlas.py +198 -0
- sknetwork/embedding/louvain_embedding.py +148 -0
- sknetwork/embedding/random_projection.py +135 -0
- sknetwork/embedding/spectral.py +141 -0
- sknetwork/embedding/spring.py +198 -0
- sknetwork/embedding/svd.py +359 -0
- sknetwork/embedding/tests/__init__.py +1 -0
- sknetwork/embedding/tests/test_API.py +49 -0
- sknetwork/embedding/tests/test_force_atlas.py +35 -0
- sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
- sknetwork/embedding/tests/test_random_projection.py +28 -0
- sknetwork/embedding/tests/test_spectral.py +81 -0
- sknetwork/embedding/tests/test_spring.py +50 -0
- sknetwork/embedding/tests/test_svd.py +43 -0
- sknetwork/gnn/__init__.py +10 -0
- sknetwork/gnn/activation.py +117 -0
- sknetwork/gnn/base.py +181 -0
- sknetwork/gnn/base_activation.py +90 -0
- sknetwork/gnn/base_layer.py +109 -0
- sknetwork/gnn/gnn_classifier.py +305 -0
- sknetwork/gnn/layer.py +153 -0
- sknetwork/gnn/loss.py +180 -0
- sknetwork/gnn/neighbor_sampler.py +65 -0
- sknetwork/gnn/optimizer.py +164 -0
- sknetwork/gnn/tests/__init__.py +1 -0
- sknetwork/gnn/tests/test_activation.py +56 -0
- sknetwork/gnn/tests/test_base.py +75 -0
- sknetwork/gnn/tests/test_base_layer.py +37 -0
- sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
- sknetwork/gnn/tests/test_layers.py +80 -0
- sknetwork/gnn/tests/test_loss.py +33 -0
- sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
- sknetwork/gnn/tests/test_optimizer.py +43 -0
- sknetwork/gnn/tests/test_utils.py +41 -0
- sknetwork/gnn/utils.py +127 -0
- sknetwork/hierarchy/__init__.py +6 -0
- sknetwork/hierarchy/base.py +96 -0
- sknetwork/hierarchy/louvain_hierarchy.py +272 -0
- sknetwork/hierarchy/metrics.py +234 -0
- sknetwork/hierarchy/paris.cpp +37865 -0
- sknetwork/hierarchy/paris.cpython-312-darwin.so +0 -0
- sknetwork/hierarchy/paris.pyx +316 -0
- sknetwork/hierarchy/postprocess.py +350 -0
- sknetwork/hierarchy/tests/__init__.py +1 -0
- sknetwork/hierarchy/tests/test_API.py +24 -0
- sknetwork/hierarchy/tests/test_algos.py +34 -0
- sknetwork/hierarchy/tests/test_metrics.py +62 -0
- sknetwork/hierarchy/tests/test_postprocess.py +57 -0
- sknetwork/linalg/__init__.py +9 -0
- sknetwork/linalg/basics.py +37 -0
- sknetwork/linalg/diteration.cpp +27397 -0
- sknetwork/linalg/diteration.cpython-312-darwin.so +0 -0
- sknetwork/linalg/diteration.pyx +47 -0
- sknetwork/linalg/eig_solver.py +93 -0
- sknetwork/linalg/laplacian.py +15 -0
- sknetwork/linalg/normalizer.py +86 -0
- sknetwork/linalg/operators.py +225 -0
- sknetwork/linalg/polynome.py +76 -0
- sknetwork/linalg/ppr_solver.py +170 -0
- sknetwork/linalg/push.cpp +31069 -0
- sknetwork/linalg/push.cpython-312-darwin.so +0 -0
- sknetwork/linalg/push.pyx +71 -0
- sknetwork/linalg/sparse_lowrank.py +142 -0
- sknetwork/linalg/svd_solver.py +91 -0
- sknetwork/linalg/tests/__init__.py +1 -0
- sknetwork/linalg/tests/test_eig.py +44 -0
- sknetwork/linalg/tests/test_laplacian.py +18 -0
- sknetwork/linalg/tests/test_normalization.py +34 -0
- sknetwork/linalg/tests/test_operators.py +66 -0
- sknetwork/linalg/tests/test_polynome.py +38 -0
- sknetwork/linalg/tests/test_ppr.py +50 -0
- sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
- sknetwork/linalg/tests/test_svd.py +38 -0
- sknetwork/linkpred/__init__.py +2 -0
- sknetwork/linkpred/base.py +46 -0
- sknetwork/linkpred/nn.py +126 -0
- sknetwork/linkpred/tests/__init__.py +1 -0
- sknetwork/linkpred/tests/test_nn.py +27 -0
- sknetwork/log.py +19 -0
- sknetwork/path/__init__.py +5 -0
- sknetwork/path/dag.py +54 -0
- sknetwork/path/distances.py +98 -0
- sknetwork/path/search.py +31 -0
- sknetwork/path/shortest_path.py +61 -0
- sknetwork/path/tests/__init__.py +1 -0
- sknetwork/path/tests/test_dag.py +37 -0
- sknetwork/path/tests/test_distances.py +62 -0
- sknetwork/path/tests/test_search.py +40 -0
- sknetwork/path/tests/test_shortest_path.py +40 -0
- sknetwork/ranking/__init__.py +8 -0
- sknetwork/ranking/base.py +61 -0
- sknetwork/ranking/betweenness.cpp +9704 -0
- sknetwork/ranking/betweenness.cpython-312-darwin.so +0 -0
- sknetwork/ranking/betweenness.pyx +97 -0
- sknetwork/ranking/closeness.py +92 -0
- sknetwork/ranking/hits.py +94 -0
- sknetwork/ranking/katz.py +83 -0
- sknetwork/ranking/pagerank.py +110 -0
- sknetwork/ranking/postprocess.py +37 -0
- sknetwork/ranking/tests/__init__.py +1 -0
- sknetwork/ranking/tests/test_API.py +32 -0
- sknetwork/ranking/tests/test_betweenness.py +38 -0
- sknetwork/ranking/tests/test_closeness.py +30 -0
- sknetwork/ranking/tests/test_hits.py +20 -0
- sknetwork/ranking/tests/test_pagerank.py +62 -0
- sknetwork/ranking/tests/test_postprocess.py +26 -0
- sknetwork/regression/__init__.py +4 -0
- sknetwork/regression/base.py +61 -0
- sknetwork/regression/diffusion.py +210 -0
- sknetwork/regression/tests/__init__.py +1 -0
- sknetwork/regression/tests/test_API.py +32 -0
- sknetwork/regression/tests/test_diffusion.py +56 -0
- sknetwork/sknetwork.py +3 -0
- sknetwork/test_base.py +35 -0
- sknetwork/test_log.py +15 -0
- sknetwork/topology/__init__.py +8 -0
- sknetwork/topology/cliques.cpp +32562 -0
- sknetwork/topology/cliques.cpython-312-darwin.so +0 -0
- sknetwork/topology/cliques.pyx +149 -0
- sknetwork/topology/core.cpp +30648 -0
- sknetwork/topology/core.cpython-312-darwin.so +0 -0
- sknetwork/topology/core.pyx +90 -0
- sknetwork/topology/cycles.py +243 -0
- sknetwork/topology/minheap.cpp +27329 -0
- sknetwork/topology/minheap.cpython-312-darwin.so +0 -0
- sknetwork/topology/minheap.pxd +20 -0
- sknetwork/topology/minheap.pyx +109 -0
- sknetwork/topology/structure.py +194 -0
- sknetwork/topology/tests/__init__.py +1 -0
- sknetwork/topology/tests/test_cliques.py +28 -0
- sknetwork/topology/tests/test_core.py +19 -0
- sknetwork/topology/tests/test_cycles.py +65 -0
- sknetwork/topology/tests/test_structure.py +85 -0
- sknetwork/topology/tests/test_triangles.py +38 -0
- sknetwork/topology/tests/test_wl.py +72 -0
- sknetwork/topology/triangles.cpp +8891 -0
- sknetwork/topology/triangles.cpython-312-darwin.so +0 -0
- sknetwork/topology/triangles.pyx +151 -0
- sknetwork/topology/weisfeiler_lehman.py +133 -0
- sknetwork/topology/weisfeiler_lehman_core.cpp +27632 -0
- sknetwork/topology/weisfeiler_lehman_core.cpython-312-darwin.so +0 -0
- sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
- sknetwork/utils/__init__.py +7 -0
- sknetwork/utils/check.py +355 -0
- sknetwork/utils/format.py +221 -0
- sknetwork/utils/membership.py +82 -0
- sknetwork/utils/neighbors.py +115 -0
- sknetwork/utils/tests/__init__.py +1 -0
- sknetwork/utils/tests/test_check.py +190 -0
- sknetwork/utils/tests/test_format.py +63 -0
- sknetwork/utils/tests/test_membership.py +24 -0
- sknetwork/utils/tests/test_neighbors.py +41 -0
- sknetwork/utils/tests/test_tfidf.py +18 -0
- sknetwork/utils/tests/test_values.py +66 -0
- sknetwork/utils/tfidf.py +37 -0
- sknetwork/utils/values.py +76 -0
- sknetwork/visualization/__init__.py +4 -0
- sknetwork/visualization/colors.py +34 -0
- sknetwork/visualization/dendrograms.py +277 -0
- sknetwork/visualization/graphs.py +1039 -0
- sknetwork/visualization/tests/__init__.py +1 -0
- sknetwork/visualization/tests/test_dendrograms.py +53 -0
- sknetwork/visualization/tests/test_graphs.py +176 -0
|
Binary file
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# distutils: language=c++
|
|
2
|
+
# cython: language_level=3
|
|
3
|
+
from libcpp.set cimport set
|
|
4
|
+
from libc.stdlib cimport rand
|
|
5
|
+
|
|
6
|
+
cimport cython
|
|
7
|
+
|
|
8
|
+
ctypedef fused int_or_long:
|
|
9
|
+
int
|
|
10
|
+
long
|
|
11
|
+
|
|
12
|
+
@cython.boundscheck(False)
|
|
13
|
+
@cython.wraparound(False)
|
|
14
|
+
def optimize_refine_core(int_or_long[:] labels, int_or_long[:] labels_refined, int_or_long[:] indices,
|
|
15
|
+
int_or_long[:] indptr, float[:] data, float[:] out_weights, float[:] in_weights, float[:] out_cluster_weights,
|
|
16
|
+
float[:] in_cluster_weights, float[:] cluster_weights, float[:] self_loops, float resolution): # pragma: no cover
|
|
17
|
+
"""Refine clusters while maximizing modularity.
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
labels :
|
|
22
|
+
Labels (initial partition).
|
|
23
|
+
labels_refined :
|
|
24
|
+
Refined labels.
|
|
25
|
+
indices :
|
|
26
|
+
CSR format index array of the normalized adjacency matrix.
|
|
27
|
+
indptr :
|
|
28
|
+
CSR format index pointer array of the normalized adjacency matrix.
|
|
29
|
+
data :
|
|
30
|
+
CSR format data array of the normalized adjacency matrix.
|
|
31
|
+
out_weights :
|
|
32
|
+
Out-weights of nodes (sum to 1).
|
|
33
|
+
in_weights :
|
|
34
|
+
In-weights of nodes (sum to 1).
|
|
35
|
+
out_cluster_weights :
|
|
36
|
+
Out-weights of clusters (sum to 1).
|
|
37
|
+
in_cluster_weights :
|
|
38
|
+
In-weights of clusters (sum to 1).
|
|
39
|
+
cluster_weights :
|
|
40
|
+
Weights of clusters (initialized to 0).
|
|
41
|
+
self_loops :
|
|
42
|
+
Weights of self loops.
|
|
43
|
+
resolution :
|
|
44
|
+
Resolution parameter (positive).
|
|
45
|
+
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
48
|
+
labels_refined :
|
|
49
|
+
Refined labels.
|
|
50
|
+
"""
|
|
51
|
+
cdef int_or_long n
|
|
52
|
+
cdef int_or_long label
|
|
53
|
+
cdef int_or_long label_refined
|
|
54
|
+
cdef int_or_long label_target
|
|
55
|
+
cdef int_or_long label_best
|
|
56
|
+
cdef int_or_long i
|
|
57
|
+
cdef int_or_long j
|
|
58
|
+
cdef int_or_long start
|
|
59
|
+
cdef int_or_long end
|
|
60
|
+
|
|
61
|
+
cdef float increase = 1
|
|
62
|
+
cdef float delta
|
|
63
|
+
cdef float delta_local
|
|
64
|
+
cdef float delta_best
|
|
65
|
+
cdef float in_weight
|
|
66
|
+
cdef float out_weight
|
|
67
|
+
|
|
68
|
+
cdef set[int_or_long] label_set
|
|
69
|
+
cdef set[int_or_long] label_target_set
|
|
70
|
+
|
|
71
|
+
n = labels.shape[0]
|
|
72
|
+
while increase:
|
|
73
|
+
increase = 0
|
|
74
|
+
|
|
75
|
+
for i in range(n):
|
|
76
|
+
label_set = ()
|
|
77
|
+
label = labels[i]
|
|
78
|
+
label_refined = labels_refined[i]
|
|
79
|
+
start = indptr[i]
|
|
80
|
+
end = indptr[i+1]
|
|
81
|
+
|
|
82
|
+
# neighboring clusters
|
|
83
|
+
for j in range(start, end):
|
|
84
|
+
if labels[indices[j]] == label:
|
|
85
|
+
label_target = labels_refined[indices[j]]
|
|
86
|
+
label_set.insert(label_target)
|
|
87
|
+
cluster_weights[label_target] += data[j]
|
|
88
|
+
label_set.erase(label_refined)
|
|
89
|
+
|
|
90
|
+
if not label_set.empty():
|
|
91
|
+
out_weight = out_weights[i]
|
|
92
|
+
in_weight = in_weights[i]
|
|
93
|
+
|
|
94
|
+
# node leaving the current cluster
|
|
95
|
+
delta = 2 * (cluster_weights[label_refined] - self_loops[i])
|
|
96
|
+
delta -= resolution * out_weight * (in_cluster_weights[label_refined] - in_weight)
|
|
97
|
+
delta -= resolution * in_weight * (out_cluster_weights[label_refined] - out_weight)
|
|
98
|
+
|
|
99
|
+
label_target_set = ()
|
|
100
|
+
for label_target in label_set:
|
|
101
|
+
delta_local = 2 * cluster_weights[label_target]
|
|
102
|
+
delta_local -= resolution * out_weight * in_cluster_weights[label_target]
|
|
103
|
+
delta_local -= resolution * in_weight * out_cluster_weights[label_target]
|
|
104
|
+
delta_local -= delta
|
|
105
|
+
if delta_local > 0:
|
|
106
|
+
label_target_set.insert(label_target)
|
|
107
|
+
cluster_weights[label_target] = 0
|
|
108
|
+
|
|
109
|
+
if not label_target_set.empty():
|
|
110
|
+
increase = 1
|
|
111
|
+
k = rand() % label_target_set.size()
|
|
112
|
+
for label_target in label_target_set:
|
|
113
|
+
k -= 1
|
|
114
|
+
if k == 0:
|
|
115
|
+
break
|
|
116
|
+
labels_refined[i] = label_target
|
|
117
|
+
# update weights
|
|
118
|
+
out_cluster_weights[label_refined] -= out_weight
|
|
119
|
+
in_cluster_weights[label_refined] -= in_weight
|
|
120
|
+
out_cluster_weights[label_target] += out_weight
|
|
121
|
+
in_cluster_weights[label_target] += in_weight
|
|
122
|
+
cluster_weights[label_refined] = 0
|
|
123
|
+
|
|
124
|
+
return labels_refined
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created in November 2018
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
@author: Quentin Lutz <qlutz@enst.fr>
|
|
7
|
+
@author: Thomas Bonald <bonald@enst.fr>
|
|
8
|
+
"""
|
|
9
|
+
from typing import Union, Optional
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
from scipy import sparse
|
|
13
|
+
|
|
14
|
+
from sknetwork.clustering.base import BaseClustering
|
|
15
|
+
from sknetwork.clustering.louvain_core import optimize_core
|
|
16
|
+
from sknetwork.clustering.postprocess import reindex_labels
|
|
17
|
+
from sknetwork.utils.check import check_format, check_random_state, get_probs
|
|
18
|
+
from sknetwork.utils.format import get_adjacency, directed2undirected
|
|
19
|
+
from sknetwork.utils.membership import get_membership
|
|
20
|
+
from sknetwork.log import Log
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Louvain(BaseClustering, Log):
|
|
24
|
+
"""Louvain algorithm for clustering graphs by maximization of modularity.
|
|
25
|
+
|
|
26
|
+
For bipartite graphs, the algorithm maximizes Barber's modularity by default.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
resolution :
|
|
31
|
+
Resolution parameter.
|
|
32
|
+
modularity : str
|
|
33
|
+
Type of modularity to maximize. Can be ``'Dugue'``, ``'Newman'`` or ``'Potts'`` (default = ``'dugue'``).
|
|
34
|
+
tol_optimization :
|
|
35
|
+
Minimum increase in modularity to enter a new optimization pass in the local search.
|
|
36
|
+
tol_aggregation :
|
|
37
|
+
Minimum increase in modularity to enter a new aggregation pass.
|
|
38
|
+
n_aggregations :
|
|
39
|
+
Maximum number of aggregations.
|
|
40
|
+
A negative value is interpreted as no limit.
|
|
41
|
+
shuffle_nodes :
|
|
42
|
+
Enables node shuffling before optimization.
|
|
43
|
+
sort_clusters :
|
|
44
|
+
If ``True``, sort labels in decreasing order of cluster size.
|
|
45
|
+
return_probs :
|
|
46
|
+
If ``True``, return the probability distribution over clusters (soft clustering).
|
|
47
|
+
return_aggregate :
|
|
48
|
+
If ``True``, return the adjacency matrix of the graph between clusters.
|
|
49
|
+
random_state :
|
|
50
|
+
Random number generator or random seed. If None, numpy.random is used.
|
|
51
|
+
verbose :
|
|
52
|
+
Verbose mode.
|
|
53
|
+
|
|
54
|
+
Attributes
|
|
55
|
+
----------
|
|
56
|
+
labels_ : np.ndarray, shape (n_labels,)
|
|
57
|
+
Label of each node.
|
|
58
|
+
probs_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
59
|
+
Probability distribution over labels.
|
|
60
|
+
labels_row_, labels_col_ : np.ndarray
|
|
61
|
+
Labels of rows and columns, for bipartite graphs.
|
|
62
|
+
probs_row_, probs_col_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
63
|
+
Probability distributions over labels for rows and columns (for bipartite graphs).
|
|
64
|
+
aggregate_ : sparse.csr_matrix
|
|
65
|
+
Aggregate adjacency matrix or biadjacency matrix between clusters.
|
|
66
|
+
|
|
67
|
+
Example
|
|
68
|
+
-------
|
|
69
|
+
>>> from sknetwork.clustering import Louvain
|
|
70
|
+
>>> from sknetwork.data import karate_club
|
|
71
|
+
>>> louvain = Louvain()
|
|
72
|
+
>>> adjacency = karate_club()
|
|
73
|
+
>>> labels = louvain.fit_predict(adjacency)
|
|
74
|
+
>>> len(set(labels))
|
|
75
|
+
4
|
|
76
|
+
|
|
77
|
+
References
|
|
78
|
+
----------
|
|
79
|
+
* Blondel, V. D., Guillaume, J. L., Lambiotte, R., & Lefebvre, E. (2008).
|
|
80
|
+
`Fast unfolding of communities in large networks.
|
|
81
|
+
<https://arxiv.org/abs/0803.0476>`_
|
|
82
|
+
Journal of statistical mechanics: theory and experiment, 2008.
|
|
83
|
+
|
|
84
|
+
* Dugué, N., & Perez, A. (2015).
|
|
85
|
+
`Directed Louvain: maximizing modularity in directed networks
|
|
86
|
+
<https://hal.archives-ouvertes.fr/hal-01231784/document>`_
|
|
87
|
+
(Doctoral dissertation, Université d'Orléans).
|
|
88
|
+
|
|
89
|
+
* Barber, M. J. (2007).
|
|
90
|
+
`Modularity and community detection in bipartite networks
|
|
91
|
+
<https://arxiv.org/pdf/0707.1616>`_
|
|
92
|
+
Physical Review E, 76(6).
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
def __init__(self, resolution: float = 1, modularity: str = 'dugue', tol_optimization: float = 1e-3,
|
|
96
|
+
tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
|
|
97
|
+
sort_clusters: bool = True, return_probs: bool = True, return_aggregate: bool = True,
|
|
98
|
+
random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False):
|
|
99
|
+
super(Louvain, self).__init__(sort_clusters=sort_clusters, return_probs=return_probs,
|
|
100
|
+
return_aggregate=return_aggregate)
|
|
101
|
+
Log.__init__(self, verbose)
|
|
102
|
+
|
|
103
|
+
self.labels_ = None
|
|
104
|
+
self.resolution = resolution
|
|
105
|
+
self.modularity = modularity.lower()
|
|
106
|
+
self.tol_optimization = tol_optimization
|
|
107
|
+
self.tol_aggregation = tol_aggregation
|
|
108
|
+
self.n_aggregations = n_aggregations
|
|
109
|
+
self.shuffle_nodes = shuffle_nodes
|
|
110
|
+
self.random_state = check_random_state(random_state)
|
|
111
|
+
self.bipartite = None
|
|
112
|
+
|
|
113
|
+
def _optimize(self, labels, adjacency, out_weights, in_weights):
|
|
114
|
+
"""One optimization pass of the Louvain algorithm.
|
|
115
|
+
|
|
116
|
+
Parameters
|
|
117
|
+
----------
|
|
118
|
+
labels :
|
|
119
|
+
Labels of nodes.
|
|
120
|
+
adjacency :
|
|
121
|
+
Adjacency matrix.
|
|
122
|
+
out_weights :
|
|
123
|
+
Out-weights of nodes.
|
|
124
|
+
in_weights :
|
|
125
|
+
In-weights of nodes
|
|
126
|
+
|
|
127
|
+
Returns
|
|
128
|
+
-------
|
|
129
|
+
labels :
|
|
130
|
+
Labels of nodes after optimization.
|
|
131
|
+
increase :
|
|
132
|
+
Gain in modularity after optimization.
|
|
133
|
+
"""
|
|
134
|
+
labels = labels.astype(np.int64)
|
|
135
|
+
indices = adjacency.indices.astype(np.int64)
|
|
136
|
+
indptr = adjacency.indptr.astype(np.int64)
|
|
137
|
+
data = adjacency.data.astype(np.float32)
|
|
138
|
+
out_weights = out_weights.astype(np.float32)
|
|
139
|
+
in_weights = in_weights.astype(np.float32)
|
|
140
|
+
out_cluster_weights = out_weights.copy()
|
|
141
|
+
in_cluster_weights = in_weights.copy()
|
|
142
|
+
cluster_weights = np.zeros_like(out_cluster_weights).astype(np.float32)
|
|
143
|
+
self_loops = adjacency.diagonal().astype(np.float32)
|
|
144
|
+
return optimize_core(labels, indices, indptr, data, out_weights, in_weights, out_cluster_weights,
|
|
145
|
+
in_cluster_weights, cluster_weights, self_loops, self.resolution, self.tol_optimization)
|
|
146
|
+
|
|
147
|
+
@staticmethod
|
|
148
|
+
def _aggregate(labels, adjacency, out_weights, in_weights):
|
|
149
|
+
"""Aggregate nodes belonging to the same cluster.
|
|
150
|
+
|
|
151
|
+
Parameters
|
|
152
|
+
----------
|
|
153
|
+
labels :
|
|
154
|
+
Labels of nodes.
|
|
155
|
+
adjacency :
|
|
156
|
+
Adjacency matrix.
|
|
157
|
+
out_weights :
|
|
158
|
+
Out-weights of nodes.
|
|
159
|
+
in_weights :
|
|
160
|
+
In-weights of nodes.
|
|
161
|
+
|
|
162
|
+
Returns
|
|
163
|
+
-------
|
|
164
|
+
Aggregate graph (adjacency matrix, out-weights, in-weights).
|
|
165
|
+
"""
|
|
166
|
+
membership = get_membership(labels)
|
|
167
|
+
adjacency_ = membership.T.tocsr().dot(adjacency.dot(membership))
|
|
168
|
+
out_weights_ = membership.T.dot(out_weights)
|
|
169
|
+
in_weights_ = membership.T.dot(in_weights)
|
|
170
|
+
return adjacency_, out_weights_, in_weights_
|
|
171
|
+
|
|
172
|
+
def _pre_processing(self, input_matrix, force_bipartite):
|
|
173
|
+
"""Pre-processing for Louvain.
|
|
174
|
+
|
|
175
|
+
Parameters
|
|
176
|
+
----------
|
|
177
|
+
input_matrix :
|
|
178
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
179
|
+
force_bipartite :
|
|
180
|
+
If ``True``, force the input matrix to be considered as a biadjacency matrix even if square.
|
|
181
|
+
|
|
182
|
+
Returns
|
|
183
|
+
-------
|
|
184
|
+
adjacency :
|
|
185
|
+
Adjacency matrix.
|
|
186
|
+
out_weights, in_weights :
|
|
187
|
+
Node weights.
|
|
188
|
+
membership :
|
|
189
|
+
Membership matrix (labels).
|
|
190
|
+
index :
|
|
191
|
+
Index of nodes.
|
|
192
|
+
"""
|
|
193
|
+
self._init_vars()
|
|
194
|
+
|
|
195
|
+
# adjacency matrix
|
|
196
|
+
force_directed = self.modularity == 'dugue'
|
|
197
|
+
adjacency, self.bipartite = get_adjacency(input_matrix, force_directed=force_directed,
|
|
198
|
+
force_bipartite=force_bipartite)
|
|
199
|
+
|
|
200
|
+
# shuffling
|
|
201
|
+
n = adjacency.shape[0]
|
|
202
|
+
index = np.arange(n)
|
|
203
|
+
if self.shuffle_nodes:
|
|
204
|
+
index = self.random_state.permutation(index)
|
|
205
|
+
adjacency = adjacency[index][:, index]
|
|
206
|
+
|
|
207
|
+
# node weights
|
|
208
|
+
if self.modularity == 'potts':
|
|
209
|
+
out_weights = get_probs('uniform', adjacency)
|
|
210
|
+
in_weights = out_weights.copy()
|
|
211
|
+
elif self.modularity == 'newman':
|
|
212
|
+
out_weights = get_probs('degree', adjacency)
|
|
213
|
+
in_weights = out_weights.copy()
|
|
214
|
+
elif self.modularity == 'dugue':
|
|
215
|
+
out_weights = get_probs('degree', adjacency)
|
|
216
|
+
in_weights = get_probs('degree', adjacency.T)
|
|
217
|
+
else:
|
|
218
|
+
raise ValueError('Unknown modularity function.')
|
|
219
|
+
|
|
220
|
+
# normalized, symmetric adjacency matrix (sums to 1)
|
|
221
|
+
adjacency = directed2undirected(adjacency)
|
|
222
|
+
adjacency = adjacency / adjacency.data.sum()
|
|
223
|
+
|
|
224
|
+
# cluster membership
|
|
225
|
+
membership = sparse.identity(n, format='csr')
|
|
226
|
+
|
|
227
|
+
return adjacency, out_weights, in_weights, membership, index
|
|
228
|
+
|
|
229
|
+
def _post_processing(self, input_matrix, membership, index):
|
|
230
|
+
"""Post-processing for Louvain.
|
|
231
|
+
|
|
232
|
+
Parameters
|
|
233
|
+
----------
|
|
234
|
+
input_matrix :
|
|
235
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
236
|
+
membership :
|
|
237
|
+
Membership matrix (labels).
|
|
238
|
+
index :
|
|
239
|
+
Index of nodes.
|
|
240
|
+
"""
|
|
241
|
+
if self.sort_clusters:
|
|
242
|
+
labels = reindex_labels(membership.indices)
|
|
243
|
+
else:
|
|
244
|
+
labels = membership.indices
|
|
245
|
+
if self.shuffle_nodes:
|
|
246
|
+
reverse = np.empty(index.size, index.dtype)
|
|
247
|
+
reverse[index] = np.arange(index.size)
|
|
248
|
+
labels = labels[reverse]
|
|
249
|
+
self.labels_ = labels
|
|
250
|
+
if self.bipartite:
|
|
251
|
+
self._split_vars(input_matrix.shape)
|
|
252
|
+
self._secondary_outputs(input_matrix)
|
|
253
|
+
|
|
254
|
+
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) -> 'Louvain':
|
|
255
|
+
"""Fit algorithm to data.
|
|
256
|
+
|
|
257
|
+
Parameters
|
|
258
|
+
----------
|
|
259
|
+
input_matrix :
|
|
260
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
261
|
+
force_bipartite :
|
|
262
|
+
If ``True``, force the input matrix to be considered as a biadjacency matrix even if square.
|
|
263
|
+
|
|
264
|
+
Returns
|
|
265
|
+
-------
|
|
266
|
+
self : :class:`Louvain`
|
|
267
|
+
"""
|
|
268
|
+
input_matrix = check_format(input_matrix)
|
|
269
|
+
adjacency, out_weights, in_weights, membership, index = self._pre_processing(input_matrix, force_bipartite)
|
|
270
|
+
n = adjacency.shape[0]
|
|
271
|
+
count = 0
|
|
272
|
+
stop = False
|
|
273
|
+
while not stop:
|
|
274
|
+
count += 1
|
|
275
|
+
labels = np.arange(n)
|
|
276
|
+
labels, increase = self._optimize(labels, adjacency, out_weights, in_weights)
|
|
277
|
+
_, labels = np.unique(labels, return_inverse=True)
|
|
278
|
+
adjacency, out_weights, in_weights = self._aggregate(labels, adjacency, out_weights, in_weights)
|
|
279
|
+
membership = membership.dot(get_membership(labels))
|
|
280
|
+
n = adjacency.shape[0]
|
|
281
|
+
stop = n == 1
|
|
282
|
+
stop |= increase <= self.tol_aggregation
|
|
283
|
+
stop |= count == self.n_aggregations
|
|
284
|
+
self.print_log("Aggregation:", count, " Clusters:", n, " Increase:", increase)
|
|
285
|
+
self._post_processing(input_matrix, membership, index)
|
|
286
|
+
return self
|