scikit-network 0.28.3__cp39-cp39-macosx_12_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- scikit_network-0.28.3.dist-info/AUTHORS.rst +41 -0
- scikit_network-0.28.3.dist-info/LICENSE +34 -0
- scikit_network-0.28.3.dist-info/METADATA +457 -0
- scikit_network-0.28.3.dist-info/RECORD +240 -0
- scikit_network-0.28.3.dist-info/WHEEL +5 -0
- scikit_network-0.28.3.dist-info/top_level.txt +1 -0
- sknetwork/__init__.py +21 -0
- sknetwork/classification/__init__.py +8 -0
- sknetwork/classification/base.py +84 -0
- sknetwork/classification/base_rank.py +143 -0
- sknetwork/classification/diffusion.py +134 -0
- sknetwork/classification/knn.py +162 -0
- sknetwork/classification/metrics.py +205 -0
- sknetwork/classification/pagerank.py +66 -0
- sknetwork/classification/propagation.py +152 -0
- sknetwork/classification/tests/__init__.py +1 -0
- sknetwork/classification/tests/test_API.py +35 -0
- sknetwork/classification/tests/test_diffusion.py +37 -0
- sknetwork/classification/tests/test_knn.py +24 -0
- sknetwork/classification/tests/test_metrics.py +53 -0
- sknetwork/classification/tests/test_pagerank.py +20 -0
- sknetwork/classification/tests/test_propagation.py +24 -0
- sknetwork/classification/vote.cpython-39-darwin.so +0 -0
- sknetwork/classification/vote.pyx +58 -0
- sknetwork/clustering/__init__.py +7 -0
- sknetwork/clustering/base.py +102 -0
- sknetwork/clustering/kmeans.py +142 -0
- sknetwork/clustering/louvain.py +255 -0
- sknetwork/clustering/louvain_core.cpython-39-darwin.so +0 -0
- sknetwork/clustering/louvain_core.pyx +134 -0
- sknetwork/clustering/metrics.py +91 -0
- sknetwork/clustering/postprocess.py +66 -0
- sknetwork/clustering/propagation_clustering.py +108 -0
- sknetwork/clustering/tests/__init__.py +1 -0
- sknetwork/clustering/tests/test_API.py +37 -0
- sknetwork/clustering/tests/test_kmeans.py +47 -0
- sknetwork/clustering/tests/test_louvain.py +104 -0
- sknetwork/clustering/tests/test_metrics.py +50 -0
- sknetwork/clustering/tests/test_post_processing.py +23 -0
- sknetwork/clustering/tests/test_postprocess.py +39 -0
- sknetwork/data/__init__.py +5 -0
- sknetwork/data/load.py +408 -0
- sknetwork/data/models.py +459 -0
- sknetwork/data/parse.py +621 -0
- sknetwork/data/test_graphs.py +84 -0
- sknetwork/data/tests/__init__.py +1 -0
- sknetwork/data/tests/test_API.py +30 -0
- sknetwork/data/tests/test_load.py +95 -0
- sknetwork/data/tests/test_models.py +52 -0
- sknetwork/data/tests/test_parse.py +253 -0
- sknetwork/data/tests/test_test_graphs.py +30 -0
- sknetwork/data/tests/test_toy_graphs.py +68 -0
- sknetwork/data/toy_graphs.py +619 -0
- sknetwork/embedding/__init__.py +10 -0
- sknetwork/embedding/base.py +90 -0
- sknetwork/embedding/force_atlas.py +197 -0
- sknetwork/embedding/louvain_embedding.py +174 -0
- sknetwork/embedding/louvain_hierarchy.py +142 -0
- sknetwork/embedding/metrics.py +66 -0
- sknetwork/embedding/random_projection.py +133 -0
- sknetwork/embedding/spectral.py +214 -0
- sknetwork/embedding/spring.py +198 -0
- sknetwork/embedding/svd.py +363 -0
- sknetwork/embedding/tests/__init__.py +1 -0
- sknetwork/embedding/tests/test_API.py +73 -0
- sknetwork/embedding/tests/test_force_atlas.py +35 -0
- sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
- sknetwork/embedding/tests/test_louvain_hierarchy.py +19 -0
- sknetwork/embedding/tests/test_metrics.py +29 -0
- sknetwork/embedding/tests/test_random_projection.py +28 -0
- sknetwork/embedding/tests/test_spectral.py +84 -0
- sknetwork/embedding/tests/test_spring.py +50 -0
- sknetwork/embedding/tests/test_svd.py +37 -0
- sknetwork/flow/__init__.py +3 -0
- sknetwork/flow/flow.py +73 -0
- sknetwork/flow/tests/__init__.py +1 -0
- sknetwork/flow/tests/test_flow.py +17 -0
- sknetwork/flow/tests/test_utils.py +69 -0
- sknetwork/flow/utils.py +91 -0
- sknetwork/gnn/__init__.py +10 -0
- sknetwork/gnn/activation.py +117 -0
- sknetwork/gnn/base.py +155 -0
- sknetwork/gnn/base_activation.py +89 -0
- sknetwork/gnn/base_layer.py +109 -0
- sknetwork/gnn/gnn_classifier.py +381 -0
- sknetwork/gnn/layer.py +153 -0
- sknetwork/gnn/layers.py +127 -0
- sknetwork/gnn/loss.py +180 -0
- sknetwork/gnn/neighbor_sampler.py +65 -0
- sknetwork/gnn/optimizer.py +163 -0
- sknetwork/gnn/tests/__init__.py +1 -0
- sknetwork/gnn/tests/test_activation.py +56 -0
- sknetwork/gnn/tests/test_base.py +79 -0
- sknetwork/gnn/tests/test_base_layer.py +37 -0
- sknetwork/gnn/tests/test_gnn_classifier.py +192 -0
- sknetwork/gnn/tests/test_layers.py +80 -0
- sknetwork/gnn/tests/test_loss.py +33 -0
- sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
- sknetwork/gnn/tests/test_optimizer.py +43 -0
- sknetwork/gnn/tests/test_utils.py +93 -0
- sknetwork/gnn/utils.py +219 -0
- sknetwork/hierarchy/__init__.py +7 -0
- sknetwork/hierarchy/base.py +69 -0
- sknetwork/hierarchy/louvain_hierarchy.py +264 -0
- sknetwork/hierarchy/metrics.py +234 -0
- sknetwork/hierarchy/paris.cpython-39-darwin.so +0 -0
- sknetwork/hierarchy/paris.pyx +317 -0
- sknetwork/hierarchy/postprocess.py +350 -0
- sknetwork/hierarchy/tests/__init__.py +1 -0
- sknetwork/hierarchy/tests/test_API.py +25 -0
- sknetwork/hierarchy/tests/test_algos.py +29 -0
- sknetwork/hierarchy/tests/test_metrics.py +62 -0
- sknetwork/hierarchy/tests/test_postprocess.py +57 -0
- sknetwork/hierarchy/tests/test_ward.py +25 -0
- sknetwork/hierarchy/ward.py +94 -0
- sknetwork/linalg/__init__.py +9 -0
- sknetwork/linalg/basics.py +37 -0
- sknetwork/linalg/diteration.cpython-39-darwin.so +0 -0
- sknetwork/linalg/diteration.pyx +49 -0
- sknetwork/linalg/eig_solver.py +93 -0
- sknetwork/linalg/laplacian.py +15 -0
- sknetwork/linalg/normalization.py +66 -0
- sknetwork/linalg/operators.py +225 -0
- sknetwork/linalg/polynome.py +76 -0
- sknetwork/linalg/ppr_solver.py +170 -0
- sknetwork/linalg/push.cpython-39-darwin.so +0 -0
- sknetwork/linalg/push.pyx +73 -0
- sknetwork/linalg/sparse_lowrank.py +142 -0
- sknetwork/linalg/svd_solver.py +91 -0
- sknetwork/linalg/tests/__init__.py +1 -0
- sknetwork/linalg/tests/test_eig.py +44 -0
- sknetwork/linalg/tests/test_laplacian.py +18 -0
- sknetwork/linalg/tests/test_normalization.py +38 -0
- sknetwork/linalg/tests/test_operators.py +70 -0
- sknetwork/linalg/tests/test_polynome.py +38 -0
- sknetwork/linalg/tests/test_ppr.py +50 -0
- sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
- sknetwork/linalg/tests/test_svd.py +38 -0
- sknetwork/linkpred/__init__.py +4 -0
- sknetwork/linkpred/base.py +80 -0
- sknetwork/linkpred/first_order.py +508 -0
- sknetwork/linkpred/first_order_core.cpython-39-darwin.so +0 -0
- sknetwork/linkpred/first_order_core.pyx +315 -0
- sknetwork/linkpred/postprocessing.py +98 -0
- sknetwork/linkpred/tests/__init__.py +1 -0
- sknetwork/linkpred/tests/test_API.py +49 -0
- sknetwork/linkpred/tests/test_postprocessing.py +21 -0
- sknetwork/path/__init__.py +4 -0
- sknetwork/path/metrics.py +148 -0
- sknetwork/path/search.py +65 -0
- sknetwork/path/shortest_path.py +186 -0
- sknetwork/path/tests/__init__.py +1 -0
- sknetwork/path/tests/test_metrics.py +29 -0
- sknetwork/path/tests/test_search.py +25 -0
- sknetwork/path/tests/test_shortest_path.py +45 -0
- sknetwork/ranking/__init__.py +9 -0
- sknetwork/ranking/base.py +56 -0
- sknetwork/ranking/betweenness.cpython-39-darwin.so +0 -0
- sknetwork/ranking/betweenness.pyx +99 -0
- sknetwork/ranking/closeness.py +95 -0
- sknetwork/ranking/harmonic.py +82 -0
- sknetwork/ranking/hits.py +94 -0
- sknetwork/ranking/katz.py +81 -0
- sknetwork/ranking/pagerank.py +107 -0
- sknetwork/ranking/postprocess.py +25 -0
- sknetwork/ranking/tests/__init__.py +1 -0
- sknetwork/ranking/tests/test_API.py +34 -0
- sknetwork/ranking/tests/test_betweenness.py +38 -0
- sknetwork/ranking/tests/test_closeness.py +34 -0
- sknetwork/ranking/tests/test_hits.py +20 -0
- sknetwork/ranking/tests/test_pagerank.py +69 -0
- sknetwork/regression/__init__.py +4 -0
- sknetwork/regression/base.py +56 -0
- sknetwork/regression/diffusion.py +190 -0
- sknetwork/regression/tests/__init__.py +1 -0
- sknetwork/regression/tests/test_API.py +34 -0
- sknetwork/regression/tests/test_diffusion.py +48 -0
- sknetwork/sknetwork.py +3 -0
- sknetwork/topology/__init__.py +9 -0
- sknetwork/topology/dag.py +74 -0
- sknetwork/topology/dag_core.cpython-39-darwin.so +0 -0
- sknetwork/topology/dag_core.pyx +38 -0
- sknetwork/topology/kcliques.cpython-39-darwin.so +0 -0
- sknetwork/topology/kcliques.pyx +193 -0
- sknetwork/topology/kcore.cpython-39-darwin.so +0 -0
- sknetwork/topology/kcore.pyx +120 -0
- sknetwork/topology/structure.py +234 -0
- sknetwork/topology/tests/__init__.py +1 -0
- sknetwork/topology/tests/test_cliques.py +28 -0
- sknetwork/topology/tests/test_cores.py +21 -0
- sknetwork/topology/tests/test_dag.py +26 -0
- sknetwork/topology/tests/test_structure.py +99 -0
- sknetwork/topology/tests/test_triangles.py +42 -0
- sknetwork/topology/tests/test_wl_coloring.py +49 -0
- sknetwork/topology/tests/test_wl_kernel.py +31 -0
- sknetwork/topology/triangles.cpython-39-darwin.so +0 -0
- sknetwork/topology/triangles.pyx +166 -0
- sknetwork/topology/weisfeiler_lehman.py +163 -0
- sknetwork/topology/weisfeiler_lehman_core.cpython-39-darwin.so +0 -0
- sknetwork/topology/weisfeiler_lehman_core.pyx +116 -0
- sknetwork/utils/__init__.py +40 -0
- sknetwork/utils/base.py +35 -0
- sknetwork/utils/check.py +354 -0
- sknetwork/utils/co_neighbor.py +71 -0
- sknetwork/utils/format.py +219 -0
- sknetwork/utils/kmeans.py +89 -0
- sknetwork/utils/knn.py +166 -0
- sknetwork/utils/knn1d.cpython-39-darwin.so +0 -0
- sknetwork/utils/knn1d.pyx +80 -0
- sknetwork/utils/membership.py +82 -0
- sknetwork/utils/minheap.cpython-39-darwin.so +0 -0
- sknetwork/utils/minheap.pxd +22 -0
- sknetwork/utils/minheap.pyx +111 -0
- sknetwork/utils/neighbors.py +115 -0
- sknetwork/utils/seeds.py +75 -0
- sknetwork/utils/simplex.py +140 -0
- sknetwork/utils/tests/__init__.py +1 -0
- sknetwork/utils/tests/test_base.py +28 -0
- sknetwork/utils/tests/test_bunch.py +16 -0
- sknetwork/utils/tests/test_check.py +190 -0
- sknetwork/utils/tests/test_co_neighbor.py +43 -0
- sknetwork/utils/tests/test_format.py +61 -0
- sknetwork/utils/tests/test_kmeans.py +21 -0
- sknetwork/utils/tests/test_knn.py +32 -0
- sknetwork/utils/tests/test_membership.py +24 -0
- sknetwork/utils/tests/test_neighbors.py +41 -0
- sknetwork/utils/tests/test_projection_simplex.py +33 -0
- sknetwork/utils/tests/test_seeds.py +67 -0
- sknetwork/utils/tests/test_verbose.py +15 -0
- sknetwork/utils/tests/test_ward.py +20 -0
- sknetwork/utils/timeout.py +38 -0
- sknetwork/utils/verbose.py +37 -0
- sknetwork/utils/ward.py +60 -0
- sknetwork/visualization/__init__.py +4 -0
- sknetwork/visualization/colors.py +34 -0
- sknetwork/visualization/dendrograms.py +229 -0
- sknetwork/visualization/graphs.py +819 -0
- sknetwork/visualization/tests/__init__.py +1 -0
- sknetwork/visualization/tests/test_dendrograms.py +53 -0
- sknetwork/visualization/tests/test_graphs.py +167 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""Tests for classification metrics"""
|
|
4
|
+
|
|
5
|
+
import unittest
|
|
6
|
+
|
|
7
|
+
from sknetwork.classification.metrics import *
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TestMetrics(unittest.TestCase):
|
|
11
|
+
|
|
12
|
+
def setUp(self) -> None:
|
|
13
|
+
self.labels_true = np.array([0, 1, 1, 2, 2, -1])
|
|
14
|
+
self.labels_pred1 = np.array([0, -1, 1, 2, 0, 0])
|
|
15
|
+
self.labels_pred2 = np.array([-1, -1, -1, -1, -1, 0])
|
|
16
|
+
|
|
17
|
+
def test_accuracy(self):
|
|
18
|
+
self.assertEqual(get_accuracy_score(self.labels_true, self.labels_pred1), 0.75)
|
|
19
|
+
with self.assertRaises(ValueError):
|
|
20
|
+
get_accuracy_score(self.labels_true, self.labels_pred2)
|
|
21
|
+
|
|
22
|
+
def test_confusion(self):
|
|
23
|
+
confusion = get_confusion_matrix(self.labels_true, self.labels_pred1)
|
|
24
|
+
self.assertEqual(confusion.data.sum(), 4)
|
|
25
|
+
self.assertEqual(confusion.diagonal().sum(), 3)
|
|
26
|
+
with self.assertRaises(ValueError):
|
|
27
|
+
get_accuracy_score(self.labels_true, self.labels_pred2)
|
|
28
|
+
|
|
29
|
+
def test_f1_score(self):
|
|
30
|
+
f1_score = get_f1_score(np.array([0, 0, 1]), np.array([0, 1, 1]))
|
|
31
|
+
self.assertAlmostEqual(f1_score, 0.67, 2)
|
|
32
|
+
with self.assertRaises(ValueError):
|
|
33
|
+
get_f1_score(self.labels_true, self.labels_pred1)
|
|
34
|
+
|
|
35
|
+
def test_f1_scores(self):
|
|
36
|
+
f1_scores = get_f1_scores(self.labels_true, self.labels_pred1)
|
|
37
|
+
self.assertAlmostEqual(min(f1_scores), 0.67, 2)
|
|
38
|
+
f1_scores, precisions, recalls = get_f1_scores(self.labels_true, self.labels_pred1, True)
|
|
39
|
+
self.assertAlmostEqual(min(f1_scores), 0.67, 2)
|
|
40
|
+
self.assertAlmostEqual(min(precisions), 0.5, 2)
|
|
41
|
+
self.assertAlmostEqual(min(recalls), 0.5, 2)
|
|
42
|
+
with self.assertRaises(ValueError):
|
|
43
|
+
get_f1_scores(self.labels_true, self.labels_pred2)
|
|
44
|
+
|
|
45
|
+
def test_average_f1_score(self):
|
|
46
|
+
f1_score = get_average_f1_score(self.labels_true, self.labels_pred1)
|
|
47
|
+
self.assertAlmostEqual(f1_score, 0.78, 2)
|
|
48
|
+
f1_score = get_average_f1_score(self.labels_true, self.labels_pred1, average='micro')
|
|
49
|
+
self.assertEqual(f1_score, 0.75)
|
|
50
|
+
f1_score = get_average_f1_score(self.labels_true, self.labels_pred1, average='weighted')
|
|
51
|
+
self.assertEqual(f1_score, 0.80)
|
|
52
|
+
with self.assertRaises(ValueError):
|
|
53
|
+
get_average_f1_score(self.labels_true, self.labels_pred2, 'toto')
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""Tests for PageRankClassifier"""
|
|
4
|
+
|
|
5
|
+
import unittest
|
|
6
|
+
|
|
7
|
+
from sknetwork.classification import PageRankClassifier
|
|
8
|
+
from sknetwork.data.test_graphs import *
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestPageRankClassifier(unittest.TestCase):
|
|
12
|
+
|
|
13
|
+
def test_solvers(self):
|
|
14
|
+
adjacency = test_graph()
|
|
15
|
+
seeds = {0: 0, 1: 1}
|
|
16
|
+
|
|
17
|
+
ref = PageRankClassifier(solver='piteration').fit_predict(adjacency, seeds)
|
|
18
|
+
for solver in ['lanczos', 'bicgstab']:
|
|
19
|
+
labels = PageRankClassifier(solver=solver).fit_predict(adjacency, seeds)
|
|
20
|
+
self.assertTrue((ref == labels).all())
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""Tests for label propagation"""
|
|
4
|
+
|
|
5
|
+
import unittest
|
|
6
|
+
|
|
7
|
+
from sknetwork.classification import Propagation
|
|
8
|
+
from sknetwork.data.test_graphs import *
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestLabelPropagation(unittest.TestCase):
|
|
12
|
+
|
|
13
|
+
def test_algo(self):
|
|
14
|
+
for adjacency in [test_graph(), test_digraph(), test_bigraph()]:
|
|
15
|
+
n = adjacency.shape[0]
|
|
16
|
+
seeds = {0: 0, 1: 1}
|
|
17
|
+
propagation = Propagation(n_iter=3, weighted=False)
|
|
18
|
+
labels = propagation.fit_predict(adjacency, seeds)
|
|
19
|
+
self.assertEqual(labels.shape, (n,))
|
|
20
|
+
|
|
21
|
+
for order in ['random', 'decreasing', 'increasing']:
|
|
22
|
+
propagation = Propagation(node_order=order)
|
|
23
|
+
labels = propagation.fit_predict(adjacency, seeds)
|
|
24
|
+
self.assertEqual(labels.shape, (n,))
|
|
Binary file
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# distutils: language = c++
|
|
2
|
+
# cython: language_level=3
|
|
3
|
+
# cython: linetrace=True
|
|
4
|
+
# distutils: define_macros=CYTHON_TRACE_NOGIL=1
|
|
5
|
+
"""
|
|
6
|
+
Created on April, 2020
|
|
7
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
8
|
+
"""
|
|
9
|
+
from libcpp.set cimport set
|
|
10
|
+
from libcpp.vector cimport vector
|
|
11
|
+
|
|
12
|
+
cimport cython
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@cython.boundscheck(False)
|
|
16
|
+
@cython.wraparound(False)
|
|
17
|
+
def vote_update(int[:] indptr, int[:] indices, float[:] data, int[:] labels, int[:] index):
|
|
18
|
+
"""One pass of label updates over the graph by majority vote among neighbors."""
|
|
19
|
+
cdef int i
|
|
20
|
+
cdef int ii
|
|
21
|
+
cdef int j
|
|
22
|
+
cdef int jj
|
|
23
|
+
cdef int n_indices = index.shape[0]
|
|
24
|
+
cdef int label
|
|
25
|
+
cdef int label_neigh_size
|
|
26
|
+
cdef float best_score
|
|
27
|
+
|
|
28
|
+
cdef vector[int] labels_neigh
|
|
29
|
+
cdef vector[float] votes_neigh, votes
|
|
30
|
+
cdef set[int] labels_unique = ()
|
|
31
|
+
|
|
32
|
+
cdef int n = labels.shape[0]
|
|
33
|
+
for i in range(n):
|
|
34
|
+
votes.push_back(0)
|
|
35
|
+
|
|
36
|
+
for ii in range(n_indices):
|
|
37
|
+
i = index[ii]
|
|
38
|
+
labels_neigh.clear()
|
|
39
|
+
for j in range(indptr[i], indptr[i + 1]):
|
|
40
|
+
jj = indices[j]
|
|
41
|
+
labels_neigh.push_back(labels[jj])
|
|
42
|
+
votes_neigh.push_back(data[jj])
|
|
43
|
+
|
|
44
|
+
labels_unique.clear()
|
|
45
|
+
label_neigh_size = labels_neigh.size()
|
|
46
|
+
for jj in range(label_neigh_size):
|
|
47
|
+
label = labels_neigh[jj]
|
|
48
|
+
if label >= 0:
|
|
49
|
+
labels_unique.insert(label)
|
|
50
|
+
votes[label] += votes_neigh[jj]
|
|
51
|
+
|
|
52
|
+
best_score = -1
|
|
53
|
+
for label in labels_unique:
|
|
54
|
+
if votes[label] > best_score:
|
|
55
|
+
labels[i] = label
|
|
56
|
+
best_score = votes[label]
|
|
57
|
+
votes[label] = 0
|
|
58
|
+
return labels
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"""clustering module"""
|
|
2
|
+
from sknetwork.clustering.base import BaseClustering
|
|
3
|
+
from sknetwork.clustering.kmeans import KMeans
|
|
4
|
+
from sknetwork.clustering.louvain import Louvain
|
|
5
|
+
from sknetwork.clustering.metrics import get_modularity
|
|
6
|
+
from sknetwork.clustering.postprocess import reindex_labels, aggregate_graph
|
|
7
|
+
from sknetwork.clustering.propagation_clustering import PropagationClustering
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created on Nov, 2019
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
"""
|
|
7
|
+
from abc import ABC
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
from scipy import sparse
|
|
11
|
+
|
|
12
|
+
from sknetwork.linalg.normalization import normalize
|
|
13
|
+
from sknetwork.utils.base import Algorithm
|
|
14
|
+
from sknetwork.utils.membership import get_membership
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class BaseClustering(Algorithm, ABC):
|
|
18
|
+
"""Base class for clustering algorithms.
|
|
19
|
+
|
|
20
|
+
Attributes
|
|
21
|
+
----------
|
|
22
|
+
labels_ : np.ndarray
|
|
23
|
+
Labels of the nodes (rows for bipartite graphs)
|
|
24
|
+
labels_row_ : np.ndarray
|
|
25
|
+
Labels of the rows (for bipartite graphs).
|
|
26
|
+
labels_col_ : np.ndarray
|
|
27
|
+
Labels of the columns (for bipartite graphs, in case of co-clustering).
|
|
28
|
+
membership_ : sparse.csr_matrix
|
|
29
|
+
Membership matrix of the nodes, shape (n_nodes, n_clusters).
|
|
30
|
+
membership_row_ : sparse.csr_matrix
|
|
31
|
+
Membership matrix of the rows (for bipartite graphs).
|
|
32
|
+
membership_col_ : sparse.csr_matrix
|
|
33
|
+
Membership matrix of the columns (for bipartite graphs, in case of co-clustering).
|
|
34
|
+
aggregate_ : sparse.csr_matrix
|
|
35
|
+
Aggregate adjacency matrix or biadjacency matrix between clusters.
|
|
36
|
+
"""
|
|
37
|
+
def __init__(self, sort_clusters: bool = True, return_membership: bool = False, return_aggregate: bool = False):
|
|
38
|
+
self.sort_clusters = sort_clusters
|
|
39
|
+
self.return_membership = return_membership
|
|
40
|
+
self.return_aggregate = return_aggregate
|
|
41
|
+
self._init_vars()
|
|
42
|
+
|
|
43
|
+
def fit_transform(self, *args, **kwargs) -> np.ndarray:
|
|
44
|
+
"""Fit algorithm to the data and return the labels. Same parameters as the ``fit`` method.
|
|
45
|
+
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
48
|
+
labels : np.ndarray
|
|
49
|
+
Labels.
|
|
50
|
+
"""
|
|
51
|
+
self.fit(*args, **kwargs)
|
|
52
|
+
return self.labels_
|
|
53
|
+
|
|
54
|
+
def _init_vars(self):
|
|
55
|
+
"""Init variables."""
|
|
56
|
+
self.labels_ = None
|
|
57
|
+
self.labels_row_ = None
|
|
58
|
+
self.labels_col_ = None
|
|
59
|
+
self.membership_ = None
|
|
60
|
+
self.membership_row_ = None
|
|
61
|
+
self.membership_col_ = None
|
|
62
|
+
self.aggregate_ = None
|
|
63
|
+
self.bipartite = None
|
|
64
|
+
return self
|
|
65
|
+
|
|
66
|
+
def _split_vars(self, shape):
|
|
67
|
+
"""Split labels_ into labels_row_ and labels_col_"""
|
|
68
|
+
n_row = shape[0]
|
|
69
|
+
self.labels_row_ = self.labels_[:n_row]
|
|
70
|
+
self.labels_col_ = self.labels_[n_row:]
|
|
71
|
+
self.labels_ = self.labels_row_
|
|
72
|
+
return self
|
|
73
|
+
|
|
74
|
+
def _secondary_outputs(self, input_matrix: sparse.csr_matrix):
|
|
75
|
+
"""Compute different variables from labels_."""
|
|
76
|
+
if self.return_membership or self.return_aggregate:
|
|
77
|
+
input_matrix = input_matrix.astype(float)
|
|
78
|
+
if not self.bipartite:
|
|
79
|
+
membership = get_membership(self.labels_)
|
|
80
|
+
if self.return_membership:
|
|
81
|
+
self.membership_ = normalize(input_matrix.dot(membership))
|
|
82
|
+
if self.return_aggregate:
|
|
83
|
+
self.aggregate_ = sparse.csr_matrix(membership.T.dot(input_matrix.dot(membership)))
|
|
84
|
+
else:
|
|
85
|
+
if self.labels_col_ is None:
|
|
86
|
+
n_labels = max(self.labels_) + 1
|
|
87
|
+
membership_row = get_membership(self.labels_, n_labels=n_labels)
|
|
88
|
+
membership_col = normalize(input_matrix.T.dot(membership_row))
|
|
89
|
+
else:
|
|
90
|
+
n_labels = max(max(self.labels_row_), max(self.labels_col_)) + 1
|
|
91
|
+
membership_row = get_membership(self.labels_row_, n_labels=n_labels)
|
|
92
|
+
membership_col = get_membership(self.labels_col_, n_labels=n_labels)
|
|
93
|
+
if self.return_membership:
|
|
94
|
+
self.membership_row_ = normalize(input_matrix.dot(membership_col))
|
|
95
|
+
self.membership_col_ = normalize(input_matrix.T.dot(membership_row))
|
|
96
|
+
self.membership_ = self.membership_row_
|
|
97
|
+
if self.return_aggregate:
|
|
98
|
+
aggregate_ = sparse.csr_matrix(membership_row.T.dot(input_matrix))
|
|
99
|
+
aggregate_ = aggregate_.dot(membership_col)
|
|
100
|
+
self.aggregate_ = aggregate_
|
|
101
|
+
|
|
102
|
+
return self
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
|
|
2
|
+
#!/usr/bin/env python3
|
|
3
|
+
# -*- coding: utf-8 -*-
|
|
4
|
+
"""
|
|
5
|
+
Created on October 2019
|
|
6
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
7
|
+
@author: Thomas Bonald <bonald@enst.fr>
|
|
8
|
+
"""
|
|
9
|
+
from typing import Union, Tuple
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
from scipy import sparse
|
|
13
|
+
|
|
14
|
+
from sknetwork.clustering.base import BaseClustering
|
|
15
|
+
from sknetwork.clustering.postprocess import reindex_labels
|
|
16
|
+
from sknetwork.embedding.base import BaseEmbedding
|
|
17
|
+
from sknetwork.embedding.spectral import Spectral
|
|
18
|
+
from sknetwork.utils.format import is_square
|
|
19
|
+
from sknetwork.utils.check import check_n_clusters, check_format
|
|
20
|
+
from sknetwork.utils.kmeans import KMeansDense
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_embedding(input_matrix: Union[sparse.csr_matrix, np.ndarray], method: BaseEmbedding,
|
|
24
|
+
co_embedding: bool = False) -> Tuple[np.ndarray, bool]:
|
|
25
|
+
"""Return the embedding of the input_matrix.
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
input_matrix :
|
|
29
|
+
Adjacency matrix of biadjacency matrix of the graph.
|
|
30
|
+
method :
|
|
31
|
+
Embedding method.
|
|
32
|
+
co_embedding : bool
|
|
33
|
+
If ``True``, co-embedding of rows and columns.
|
|
34
|
+
Otherwise, do it only if the input matrix is not square or not symmetric with ``allow_directed=False``.
|
|
35
|
+
"""
|
|
36
|
+
bipartite = (not is_square(input_matrix)) or co_embedding
|
|
37
|
+
if co_embedding:
|
|
38
|
+
try:
|
|
39
|
+
method.fit(input_matrix, force_bipartite=True)
|
|
40
|
+
except:
|
|
41
|
+
method.fit(input_matrix)
|
|
42
|
+
embedding = np.vstack((method.embedding_row_, method.embedding_col_))
|
|
43
|
+
else:
|
|
44
|
+
method.fit(input_matrix)
|
|
45
|
+
embedding = method.embedding_
|
|
46
|
+
return embedding, bipartite
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class KMeans(BaseClustering):
|
|
50
|
+
"""K-means clustering applied in the embedding space.
|
|
51
|
+
|
|
52
|
+
Parameters
|
|
53
|
+
----------
|
|
54
|
+
n_clusters :
|
|
55
|
+
Number of desired clusters (default = 2).
|
|
56
|
+
embedding_method :
|
|
57
|
+
Embedding method (default = Spectral embedding in dimension 10).
|
|
58
|
+
co_cluster :
|
|
59
|
+
If ``True``, co-cluster rows and columns, considered as different nodes (default = ``False``).
|
|
60
|
+
sort_clusters :
|
|
61
|
+
If ``True``, sort labels in decreasing order of cluster size.
|
|
62
|
+
return_membership :
|
|
63
|
+
If ``True``, return the membership matrix of nodes to each cluster (soft clustering).
|
|
64
|
+
return_aggregate :
|
|
65
|
+
If ``True``, return the adjacency matrix of the graph between clusters.
|
|
66
|
+
Attributes
|
|
67
|
+
----------
|
|
68
|
+
labels_ : np.ndarray
|
|
69
|
+
Labels of the nodes.
|
|
70
|
+
labels_row_ : np.ndarray
|
|
71
|
+
Labels of the rows (for bipartite graphs).
|
|
72
|
+
labels_col_ : np.ndarray
|
|
73
|
+
Labels of the columns (for bipartite graphs).
|
|
74
|
+
membership_ : sparse.csr_matrix
|
|
75
|
+
Membership matrix of the nodes, shape (n_nodes, n_clusters).
|
|
76
|
+
membership_row_ : sparse.csr_matrix
|
|
77
|
+
Membership matrix of the rows (for bipartite graphs).
|
|
78
|
+
membership_col_ : sparse.csr_matrix
|
|
79
|
+
Membership matrix of the columns (for bipartite graphs).
|
|
80
|
+
aggregate_ : sparse.csr_matrix
|
|
81
|
+
Aggregate adjacency matrix or biadjacency matrix between clusters.
|
|
82
|
+
|
|
83
|
+
Example
|
|
84
|
+
-------
|
|
85
|
+
>>> from sknetwork.clustering import KMeans
|
|
86
|
+
>>> from sknetwork.data import karate_club
|
|
87
|
+
>>> kmeans = KMeans(n_clusters=3)
|
|
88
|
+
>>> adjacency = karate_club()
|
|
89
|
+
>>> labels = kmeans.fit_transform(adjacency)
|
|
90
|
+
>>> len(set(labels))
|
|
91
|
+
3
|
|
92
|
+
"""
|
|
93
|
+
def __init__(self, n_clusters: int = 2, embedding_method: BaseEmbedding = Spectral(10), co_cluster: bool = False,
|
|
94
|
+
sort_clusters: bool = True, return_membership: bool = True, return_aggregate: bool = True):
|
|
95
|
+
super(KMeans, self).__init__(sort_clusters=sort_clusters, return_membership=return_membership,
|
|
96
|
+
return_aggregate=return_aggregate)
|
|
97
|
+
self.n_clusters = n_clusters
|
|
98
|
+
self.embedding_method = embedding_method
|
|
99
|
+
self.co_cluster = co_cluster
|
|
100
|
+
self.bipartite = None
|
|
101
|
+
|
|
102
|
+
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> 'KMeans':
|
|
103
|
+
"""Apply embedding method followed by K-means.
|
|
104
|
+
|
|
105
|
+
Parameters
|
|
106
|
+
----------
|
|
107
|
+
input_matrix :
|
|
108
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
109
|
+
|
|
110
|
+
Returns
|
|
111
|
+
-------
|
|
112
|
+
self: :class:`KMeans`
|
|
113
|
+
"""
|
|
114
|
+
self._init_vars()
|
|
115
|
+
|
|
116
|
+
# input
|
|
117
|
+
input_matrix = check_format(input_matrix)
|
|
118
|
+
if self.co_cluster:
|
|
119
|
+
check_n_clusters(self.n_clusters, np.sum(input_matrix.shape))
|
|
120
|
+
else:
|
|
121
|
+
check_n_clusters(self.n_clusters, input_matrix.shape[0])
|
|
122
|
+
|
|
123
|
+
# embedding
|
|
124
|
+
embedding, self.bipartite = get_embedding(input_matrix, self.embedding_method, self.co_cluster)
|
|
125
|
+
|
|
126
|
+
# clustering
|
|
127
|
+
kmeans = KMeansDense(self.n_clusters)
|
|
128
|
+
kmeans.fit(embedding)
|
|
129
|
+
|
|
130
|
+
# sort
|
|
131
|
+
if self.sort_clusters:
|
|
132
|
+
labels = reindex_labels(kmeans.labels_)
|
|
133
|
+
else:
|
|
134
|
+
labels = kmeans.labels_
|
|
135
|
+
|
|
136
|
+
# output
|
|
137
|
+
self.labels_ = labels
|
|
138
|
+
if self.co_cluster:
|
|
139
|
+
self._split_vars(input_matrix.shape)
|
|
140
|
+
self._secondary_outputs(input_matrix)
|
|
141
|
+
|
|
142
|
+
return self
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created in November 2018
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
@author: Quentin Lutz <qlutz@enst.fr>
|
|
7
|
+
@author: Thomas Bonald <bonald@enst.fr>
|
|
8
|
+
"""
|
|
9
|
+
from typing import Union, Optional
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
from scipy import sparse
|
|
13
|
+
|
|
14
|
+
from sknetwork.clustering.base import BaseClustering
|
|
15
|
+
from sknetwork.clustering.louvain_core import fit_core
|
|
16
|
+
from sknetwork.clustering.postprocess import reindex_labels
|
|
17
|
+
from sknetwork.utils.check import check_random_state, get_probs
|
|
18
|
+
from sknetwork.utils.format import check_format, get_adjacency, directed2undirected
|
|
19
|
+
from sknetwork.utils.membership import get_membership
|
|
20
|
+
from sknetwork.utils.verbose import VerboseMixin
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Louvain(BaseClustering, VerboseMixin):
|
|
24
|
+
"""Louvain algorithm for clustering graphs by maximization of modularity.
|
|
25
|
+
|
|
26
|
+
For bipartite graphs, the algorithm maximizes Barber's modularity by default.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
resolution :
|
|
31
|
+
Resolution parameter.
|
|
32
|
+
modularity : str
|
|
33
|
+
Which objective function to maximize. Can be ``'Dugue'``, ``'Newman'`` or ``'Potts'`` (default = ``'dugue'``).
|
|
34
|
+
tol_optimization :
|
|
35
|
+
Minimum increase in the objective function to enter a new optimization pass.
|
|
36
|
+
tol_aggregation :
|
|
37
|
+
Minimum increase in the objective function to enter a new aggregation pass.
|
|
38
|
+
n_aggregations :
|
|
39
|
+
Maximum number of aggregations.
|
|
40
|
+
A negative value is interpreted as no limit.
|
|
41
|
+
shuffle_nodes :
|
|
42
|
+
Enables node shuffling before optimization.
|
|
43
|
+
sort_clusters :
|
|
44
|
+
If ``True``, sort labels in decreasing order of cluster size.
|
|
45
|
+
return_membership :
|
|
46
|
+
If ``True``, return the membership matrix of nodes to each cluster (soft clustering).
|
|
47
|
+
return_aggregate :
|
|
48
|
+
If ``True``, return the adjacency matrix of the graph between clusters.
|
|
49
|
+
random_state :
|
|
50
|
+
Random number generator or random seed. If None, numpy.random is used.
|
|
51
|
+
verbose :
|
|
52
|
+
Verbose mode.
|
|
53
|
+
|
|
54
|
+
Attributes
|
|
55
|
+
----------
|
|
56
|
+
labels_ : np.ndarray
|
|
57
|
+
Labels of the nodes.
|
|
58
|
+
labels_row_ : np.ndarray
|
|
59
|
+
Labels of the rows (for bipartite graphs).
|
|
60
|
+
labels_col_ : np.ndarray
|
|
61
|
+
Labels of the columns (for bipartite graphs).
|
|
62
|
+
membership_ : sparse.csr_matrix
|
|
63
|
+
Membership matrix of the nodes, shape (n_nodes, n_clusters).
|
|
64
|
+
membership_row_ : sparse.csr_matrix
|
|
65
|
+
Membership matrix of the rows (for bipartite graphs).
|
|
66
|
+
membership_col_ : sparse.csr_matrix
|
|
67
|
+
Membership matrix of the columns (for bipartite graphs).
|
|
68
|
+
aggregate_ : sparse.csr_matrix
|
|
69
|
+
Aggregate adjacency matrix or biadjacency matrix between clusters.
|
|
70
|
+
|
|
71
|
+
Example
|
|
72
|
+
-------
|
|
73
|
+
>>> from sknetwork.clustering import Louvain
|
|
74
|
+
>>> from sknetwork.data import karate_club
|
|
75
|
+
>>> louvain = Louvain()
|
|
76
|
+
>>> adjacency = karate_club()
|
|
77
|
+
>>> labels = louvain.fit_transform(adjacency)
|
|
78
|
+
>>> len(set(labels))
|
|
79
|
+
4
|
|
80
|
+
|
|
81
|
+
References
|
|
82
|
+
----------
|
|
83
|
+
* Blondel, V. D., Guillaume, J. L., Lambiotte, R., & Lefebvre, E. (2008).
|
|
84
|
+
`Fast unfolding of communities in large networks.
|
|
85
|
+
<https://arxiv.org/abs/0803.0476>`_
|
|
86
|
+
Journal of statistical mechanics: theory and experiment, 2008.
|
|
87
|
+
|
|
88
|
+
* Dugué, N., & Perez, A. (2015).
|
|
89
|
+
`Directed Louvain: maximizing modularity in directed networks
|
|
90
|
+
<https://hal.archives-ouvertes.fr/hal-01231784/document>`_
|
|
91
|
+
(Doctoral dissertation, Université d'Orléans).
|
|
92
|
+
|
|
93
|
+
* Barber, M. J. (2007).
|
|
94
|
+
`Modularity and community detection in bipartite networks
|
|
95
|
+
<https://arxiv.org/pdf/0707.1616>`_
|
|
96
|
+
Physical Review E, 76(6).
|
|
97
|
+
"""
|
|
98
|
+
def __init__(self, resolution: float = 1, modularity: str = 'dugue', tol_optimization: float = 1e-3,
|
|
99
|
+
tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
|
|
100
|
+
sort_clusters: bool = True, return_membership: bool = True, return_aggregate: bool = True,
|
|
101
|
+
random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False):
|
|
102
|
+
super(Louvain, self).__init__(sort_clusters=sort_clusters, return_membership=return_membership,
|
|
103
|
+
return_aggregate=return_aggregate)
|
|
104
|
+
VerboseMixin.__init__(self, verbose)
|
|
105
|
+
|
|
106
|
+
self.labels_ = None
|
|
107
|
+
self.resolution = resolution
|
|
108
|
+
self.modularity = modularity.lower()
|
|
109
|
+
self.tol = tol_optimization
|
|
110
|
+
self.tol_aggregation = tol_aggregation
|
|
111
|
+
self.n_aggregations = n_aggregations
|
|
112
|
+
self.shuffle_nodes = shuffle_nodes
|
|
113
|
+
self.random_state = check_random_state(random_state)
|
|
114
|
+
self.bipartite = None
|
|
115
|
+
|
|
116
|
+
def _optimize(self, adjacency_norm, probs_ou, probs_in):
|
|
117
|
+
"""One local optimization pass of the Louvain algorithm
|
|
118
|
+
|
|
119
|
+
Parameters
|
|
120
|
+
----------
|
|
121
|
+
adjacency_norm :
|
|
122
|
+
the norm of the adjacency
|
|
123
|
+
probs_ou :
|
|
124
|
+
the array of degrees of the adjacency
|
|
125
|
+
probs_in :
|
|
126
|
+
the array of degrees of the transpose of the adjacency
|
|
127
|
+
|
|
128
|
+
Returns
|
|
129
|
+
-------
|
|
130
|
+
labels :
|
|
131
|
+
the communities of each node after optimization
|
|
132
|
+
pass_increase :
|
|
133
|
+
the increase in modularity gained after optimization
|
|
134
|
+
"""
|
|
135
|
+
node_probs_in = probs_in.astype(np.float32)
|
|
136
|
+
node_probs_ou = probs_ou.astype(np.float32)
|
|
137
|
+
|
|
138
|
+
adjacency = 0.5 * directed2undirected(adjacency_norm)
|
|
139
|
+
|
|
140
|
+
self_loops = adjacency.diagonal().astype(np.float32)
|
|
141
|
+
|
|
142
|
+
indptr: np.ndarray = adjacency.indptr
|
|
143
|
+
indices: np.ndarray = adjacency.indices
|
|
144
|
+
data: np.ndarray = adjacency.data.astype(np.float32)
|
|
145
|
+
|
|
146
|
+
return fit_core(self.resolution, self.tol, node_probs_ou, node_probs_in, self_loops, data, indices, indptr)
|
|
147
|
+
|
|
148
|
+
@staticmethod
|
|
149
|
+
def _aggregate(adjacency_norm, probs_out, probs_in, membership: Union[sparse.csr_matrix, np.ndarray]):
|
|
150
|
+
"""Aggregate nodes belonging to the same cluster.
|
|
151
|
+
|
|
152
|
+
Parameters
|
|
153
|
+
----------
|
|
154
|
+
adjacency_norm :
|
|
155
|
+
the norm of the adjacency
|
|
156
|
+
probs_out :
|
|
157
|
+
the array of degrees of the adjacency
|
|
158
|
+
probs_in :
|
|
159
|
+
the array of degrees of the transpose of the adjacency
|
|
160
|
+
membership :
|
|
161
|
+
membership matrix (rows).
|
|
162
|
+
|
|
163
|
+
Returns
|
|
164
|
+
-------
|
|
165
|
+
Aggregate graph.
|
|
166
|
+
"""
|
|
167
|
+
adjacency_norm = (membership.T.dot(adjacency_norm.dot(membership))).tocsr()
|
|
168
|
+
probs_in = np.array(membership.T.dot(probs_in).T)
|
|
169
|
+
probs_out = np.array(membership.T.dot(probs_out).T)
|
|
170
|
+
return adjacency_norm, probs_out, probs_in
|
|
171
|
+
|
|
172
|
+
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) -> 'Louvain':
|
|
173
|
+
"""Fit algorithm to data.
|
|
174
|
+
|
|
175
|
+
Parameters
|
|
176
|
+
----------
|
|
177
|
+
input_matrix :
|
|
178
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
179
|
+
force_bipartite :
|
|
180
|
+
If ``True``, force the input matrix to be considered as a biadjacency matrix even if square.
|
|
181
|
+
|
|
182
|
+
Returns
|
|
183
|
+
-------
|
|
184
|
+
self: :class:`Louvain`
|
|
185
|
+
"""
|
|
186
|
+
self._init_vars()
|
|
187
|
+
input_matrix = check_format(input_matrix)
|
|
188
|
+
if self.modularity == 'dugue':
|
|
189
|
+
adjacency, self.bipartite = get_adjacency(input_matrix, force_directed=True,
|
|
190
|
+
force_bipartite=force_bipartite)
|
|
191
|
+
else:
|
|
192
|
+
adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)
|
|
193
|
+
|
|
194
|
+
n = adjacency.shape[0]
|
|
195
|
+
|
|
196
|
+
index = np.arange(n)
|
|
197
|
+
if self.shuffle_nodes:
|
|
198
|
+
index = self.random_state.permutation(index)
|
|
199
|
+
adjacency = adjacency[index][:, index]
|
|
200
|
+
|
|
201
|
+
if self.modularity == 'potts':
|
|
202
|
+
probs_out = get_probs('uniform', adjacency)
|
|
203
|
+
probs_in = probs_out.copy()
|
|
204
|
+
elif self.modularity == 'newman':
|
|
205
|
+
probs_out = get_probs('degree', adjacency)
|
|
206
|
+
probs_in = probs_out.copy()
|
|
207
|
+
elif self.modularity == 'dugue':
|
|
208
|
+
probs_out = get_probs('degree', adjacency)
|
|
209
|
+
probs_in = get_probs('degree', adjacency.T)
|
|
210
|
+
else:
|
|
211
|
+
raise ValueError('Unknown modularity function.')
|
|
212
|
+
|
|
213
|
+
adjacency_cluster = adjacency / adjacency.data.sum()
|
|
214
|
+
|
|
215
|
+
membership = sparse.identity(n, format='csr')
|
|
216
|
+
increase = True
|
|
217
|
+
count_aggregations = 0
|
|
218
|
+
self.log.print("Starting with", n, "nodes.")
|
|
219
|
+
while increase:
|
|
220
|
+
count_aggregations += 1
|
|
221
|
+
|
|
222
|
+
labels_cluster, pass_increase = self._optimize(adjacency_cluster, probs_out, probs_in)
|
|
223
|
+
_, labels_cluster = np.unique(labels_cluster, return_inverse=True)
|
|
224
|
+
|
|
225
|
+
if pass_increase <= self.tol_aggregation:
|
|
226
|
+
increase = False
|
|
227
|
+
else:
|
|
228
|
+
membership_cluster = get_membership(labels_cluster)
|
|
229
|
+
membership = membership.dot(membership_cluster)
|
|
230
|
+
adjacency_cluster, probs_out, probs_in = self._aggregate(adjacency_cluster, probs_out, probs_in,
|
|
231
|
+
membership_cluster)
|
|
232
|
+
|
|
233
|
+
n = adjacency_cluster.shape[0]
|
|
234
|
+
if n == 1:
|
|
235
|
+
break
|
|
236
|
+
self.log.print("Aggregation", count_aggregations, "completed with", n, "clusters and ",
|
|
237
|
+
pass_increase, "increment.")
|
|
238
|
+
if count_aggregations == self.n_aggregations:
|
|
239
|
+
break
|
|
240
|
+
|
|
241
|
+
if self.sort_clusters:
|
|
242
|
+
labels = reindex_labels(membership.indices)
|
|
243
|
+
else:
|
|
244
|
+
labels = membership.indices
|
|
245
|
+
if self.shuffle_nodes:
|
|
246
|
+
reverse = np.empty(index.size, index.dtype)
|
|
247
|
+
reverse[index] = np.arange(index.size)
|
|
248
|
+
labels = labels[reverse]
|
|
249
|
+
|
|
250
|
+
self.labels_ = labels
|
|
251
|
+
if self.bipartite:
|
|
252
|
+
self._split_vars(input_matrix.shape)
|
|
253
|
+
self._secondary_outputs(input_matrix)
|
|
254
|
+
|
|
255
|
+
return self
|
|
Binary file
|