scikit-network 0.28.3__cp39-cp39-macosx_12_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (240) hide show
  1. scikit_network-0.28.3.dist-info/AUTHORS.rst +41 -0
  2. scikit_network-0.28.3.dist-info/LICENSE +34 -0
  3. scikit_network-0.28.3.dist-info/METADATA +457 -0
  4. scikit_network-0.28.3.dist-info/RECORD +240 -0
  5. scikit_network-0.28.3.dist-info/WHEEL +5 -0
  6. scikit_network-0.28.3.dist-info/top_level.txt +1 -0
  7. sknetwork/__init__.py +21 -0
  8. sknetwork/classification/__init__.py +8 -0
  9. sknetwork/classification/base.py +84 -0
  10. sknetwork/classification/base_rank.py +143 -0
  11. sknetwork/classification/diffusion.py +134 -0
  12. sknetwork/classification/knn.py +162 -0
  13. sknetwork/classification/metrics.py +205 -0
  14. sknetwork/classification/pagerank.py +66 -0
  15. sknetwork/classification/propagation.py +152 -0
  16. sknetwork/classification/tests/__init__.py +1 -0
  17. sknetwork/classification/tests/test_API.py +35 -0
  18. sknetwork/classification/tests/test_diffusion.py +37 -0
  19. sknetwork/classification/tests/test_knn.py +24 -0
  20. sknetwork/classification/tests/test_metrics.py +53 -0
  21. sknetwork/classification/tests/test_pagerank.py +20 -0
  22. sknetwork/classification/tests/test_propagation.py +24 -0
  23. sknetwork/classification/vote.cpython-39-darwin.so +0 -0
  24. sknetwork/classification/vote.pyx +58 -0
  25. sknetwork/clustering/__init__.py +7 -0
  26. sknetwork/clustering/base.py +102 -0
  27. sknetwork/clustering/kmeans.py +142 -0
  28. sknetwork/clustering/louvain.py +255 -0
  29. sknetwork/clustering/louvain_core.cpython-39-darwin.so +0 -0
  30. sknetwork/clustering/louvain_core.pyx +134 -0
  31. sknetwork/clustering/metrics.py +91 -0
  32. sknetwork/clustering/postprocess.py +66 -0
  33. sknetwork/clustering/propagation_clustering.py +108 -0
  34. sknetwork/clustering/tests/__init__.py +1 -0
  35. sknetwork/clustering/tests/test_API.py +37 -0
  36. sknetwork/clustering/tests/test_kmeans.py +47 -0
  37. sknetwork/clustering/tests/test_louvain.py +104 -0
  38. sknetwork/clustering/tests/test_metrics.py +50 -0
  39. sknetwork/clustering/tests/test_post_processing.py +23 -0
  40. sknetwork/clustering/tests/test_postprocess.py +39 -0
  41. sknetwork/data/__init__.py +5 -0
  42. sknetwork/data/load.py +408 -0
  43. sknetwork/data/models.py +459 -0
  44. sknetwork/data/parse.py +621 -0
  45. sknetwork/data/test_graphs.py +84 -0
  46. sknetwork/data/tests/__init__.py +1 -0
  47. sknetwork/data/tests/test_API.py +30 -0
  48. sknetwork/data/tests/test_load.py +95 -0
  49. sknetwork/data/tests/test_models.py +52 -0
  50. sknetwork/data/tests/test_parse.py +253 -0
  51. sknetwork/data/tests/test_test_graphs.py +30 -0
  52. sknetwork/data/tests/test_toy_graphs.py +68 -0
  53. sknetwork/data/toy_graphs.py +619 -0
  54. sknetwork/embedding/__init__.py +10 -0
  55. sknetwork/embedding/base.py +90 -0
  56. sknetwork/embedding/force_atlas.py +197 -0
  57. sknetwork/embedding/louvain_embedding.py +174 -0
  58. sknetwork/embedding/louvain_hierarchy.py +142 -0
  59. sknetwork/embedding/metrics.py +66 -0
  60. sknetwork/embedding/random_projection.py +133 -0
  61. sknetwork/embedding/spectral.py +214 -0
  62. sknetwork/embedding/spring.py +198 -0
  63. sknetwork/embedding/svd.py +363 -0
  64. sknetwork/embedding/tests/__init__.py +1 -0
  65. sknetwork/embedding/tests/test_API.py +73 -0
  66. sknetwork/embedding/tests/test_force_atlas.py +35 -0
  67. sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
  68. sknetwork/embedding/tests/test_louvain_hierarchy.py +19 -0
  69. sknetwork/embedding/tests/test_metrics.py +29 -0
  70. sknetwork/embedding/tests/test_random_projection.py +28 -0
  71. sknetwork/embedding/tests/test_spectral.py +84 -0
  72. sknetwork/embedding/tests/test_spring.py +50 -0
  73. sknetwork/embedding/tests/test_svd.py +37 -0
  74. sknetwork/flow/__init__.py +3 -0
  75. sknetwork/flow/flow.py +73 -0
  76. sknetwork/flow/tests/__init__.py +1 -0
  77. sknetwork/flow/tests/test_flow.py +17 -0
  78. sknetwork/flow/tests/test_utils.py +69 -0
  79. sknetwork/flow/utils.py +91 -0
  80. sknetwork/gnn/__init__.py +10 -0
  81. sknetwork/gnn/activation.py +117 -0
  82. sknetwork/gnn/base.py +155 -0
  83. sknetwork/gnn/base_activation.py +89 -0
  84. sknetwork/gnn/base_layer.py +109 -0
  85. sknetwork/gnn/gnn_classifier.py +381 -0
  86. sknetwork/gnn/layer.py +153 -0
  87. sknetwork/gnn/layers.py +127 -0
  88. sknetwork/gnn/loss.py +180 -0
  89. sknetwork/gnn/neighbor_sampler.py +65 -0
  90. sknetwork/gnn/optimizer.py +163 -0
  91. sknetwork/gnn/tests/__init__.py +1 -0
  92. sknetwork/gnn/tests/test_activation.py +56 -0
  93. sknetwork/gnn/tests/test_base.py +79 -0
  94. sknetwork/gnn/tests/test_base_layer.py +37 -0
  95. sknetwork/gnn/tests/test_gnn_classifier.py +192 -0
  96. sknetwork/gnn/tests/test_layers.py +80 -0
  97. sknetwork/gnn/tests/test_loss.py +33 -0
  98. sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
  99. sknetwork/gnn/tests/test_optimizer.py +43 -0
  100. sknetwork/gnn/tests/test_utils.py +93 -0
  101. sknetwork/gnn/utils.py +219 -0
  102. sknetwork/hierarchy/__init__.py +7 -0
  103. sknetwork/hierarchy/base.py +69 -0
  104. sknetwork/hierarchy/louvain_hierarchy.py +264 -0
  105. sknetwork/hierarchy/metrics.py +234 -0
  106. sknetwork/hierarchy/paris.cpython-39-darwin.so +0 -0
  107. sknetwork/hierarchy/paris.pyx +317 -0
  108. sknetwork/hierarchy/postprocess.py +350 -0
  109. sknetwork/hierarchy/tests/__init__.py +1 -0
  110. sknetwork/hierarchy/tests/test_API.py +25 -0
  111. sknetwork/hierarchy/tests/test_algos.py +29 -0
  112. sknetwork/hierarchy/tests/test_metrics.py +62 -0
  113. sknetwork/hierarchy/tests/test_postprocess.py +57 -0
  114. sknetwork/hierarchy/tests/test_ward.py +25 -0
  115. sknetwork/hierarchy/ward.py +94 -0
  116. sknetwork/linalg/__init__.py +9 -0
  117. sknetwork/linalg/basics.py +37 -0
  118. sknetwork/linalg/diteration.cpython-39-darwin.so +0 -0
  119. sknetwork/linalg/diteration.pyx +49 -0
  120. sknetwork/linalg/eig_solver.py +93 -0
  121. sknetwork/linalg/laplacian.py +15 -0
  122. sknetwork/linalg/normalization.py +66 -0
  123. sknetwork/linalg/operators.py +225 -0
  124. sknetwork/linalg/polynome.py +76 -0
  125. sknetwork/linalg/ppr_solver.py +170 -0
  126. sknetwork/linalg/push.cpython-39-darwin.so +0 -0
  127. sknetwork/linalg/push.pyx +73 -0
  128. sknetwork/linalg/sparse_lowrank.py +142 -0
  129. sknetwork/linalg/svd_solver.py +91 -0
  130. sknetwork/linalg/tests/__init__.py +1 -0
  131. sknetwork/linalg/tests/test_eig.py +44 -0
  132. sknetwork/linalg/tests/test_laplacian.py +18 -0
  133. sknetwork/linalg/tests/test_normalization.py +38 -0
  134. sknetwork/linalg/tests/test_operators.py +70 -0
  135. sknetwork/linalg/tests/test_polynome.py +38 -0
  136. sknetwork/linalg/tests/test_ppr.py +50 -0
  137. sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
  138. sknetwork/linalg/tests/test_svd.py +38 -0
  139. sknetwork/linkpred/__init__.py +4 -0
  140. sknetwork/linkpred/base.py +80 -0
  141. sknetwork/linkpred/first_order.py +508 -0
  142. sknetwork/linkpred/first_order_core.cpython-39-darwin.so +0 -0
  143. sknetwork/linkpred/first_order_core.pyx +315 -0
  144. sknetwork/linkpred/postprocessing.py +98 -0
  145. sknetwork/linkpred/tests/__init__.py +1 -0
  146. sknetwork/linkpred/tests/test_API.py +49 -0
  147. sknetwork/linkpred/tests/test_postprocessing.py +21 -0
  148. sknetwork/path/__init__.py +4 -0
  149. sknetwork/path/metrics.py +148 -0
  150. sknetwork/path/search.py +65 -0
  151. sknetwork/path/shortest_path.py +186 -0
  152. sknetwork/path/tests/__init__.py +1 -0
  153. sknetwork/path/tests/test_metrics.py +29 -0
  154. sknetwork/path/tests/test_search.py +25 -0
  155. sknetwork/path/tests/test_shortest_path.py +45 -0
  156. sknetwork/ranking/__init__.py +9 -0
  157. sknetwork/ranking/base.py +56 -0
  158. sknetwork/ranking/betweenness.cpython-39-darwin.so +0 -0
  159. sknetwork/ranking/betweenness.pyx +99 -0
  160. sknetwork/ranking/closeness.py +95 -0
  161. sknetwork/ranking/harmonic.py +82 -0
  162. sknetwork/ranking/hits.py +94 -0
  163. sknetwork/ranking/katz.py +81 -0
  164. sknetwork/ranking/pagerank.py +107 -0
  165. sknetwork/ranking/postprocess.py +25 -0
  166. sknetwork/ranking/tests/__init__.py +1 -0
  167. sknetwork/ranking/tests/test_API.py +34 -0
  168. sknetwork/ranking/tests/test_betweenness.py +38 -0
  169. sknetwork/ranking/tests/test_closeness.py +34 -0
  170. sknetwork/ranking/tests/test_hits.py +20 -0
  171. sknetwork/ranking/tests/test_pagerank.py +69 -0
  172. sknetwork/regression/__init__.py +4 -0
  173. sknetwork/regression/base.py +56 -0
  174. sknetwork/regression/diffusion.py +190 -0
  175. sknetwork/regression/tests/__init__.py +1 -0
  176. sknetwork/regression/tests/test_API.py +34 -0
  177. sknetwork/regression/tests/test_diffusion.py +48 -0
  178. sknetwork/sknetwork.py +3 -0
  179. sknetwork/topology/__init__.py +9 -0
  180. sknetwork/topology/dag.py +74 -0
  181. sknetwork/topology/dag_core.cpython-39-darwin.so +0 -0
  182. sknetwork/topology/dag_core.pyx +38 -0
  183. sknetwork/topology/kcliques.cpython-39-darwin.so +0 -0
  184. sknetwork/topology/kcliques.pyx +193 -0
  185. sknetwork/topology/kcore.cpython-39-darwin.so +0 -0
  186. sknetwork/topology/kcore.pyx +120 -0
  187. sknetwork/topology/structure.py +234 -0
  188. sknetwork/topology/tests/__init__.py +1 -0
  189. sknetwork/topology/tests/test_cliques.py +28 -0
  190. sknetwork/topology/tests/test_cores.py +21 -0
  191. sknetwork/topology/tests/test_dag.py +26 -0
  192. sknetwork/topology/tests/test_structure.py +99 -0
  193. sknetwork/topology/tests/test_triangles.py +42 -0
  194. sknetwork/topology/tests/test_wl_coloring.py +49 -0
  195. sknetwork/topology/tests/test_wl_kernel.py +31 -0
  196. sknetwork/topology/triangles.cpython-39-darwin.so +0 -0
  197. sknetwork/topology/triangles.pyx +166 -0
  198. sknetwork/topology/weisfeiler_lehman.py +163 -0
  199. sknetwork/topology/weisfeiler_lehman_core.cpython-39-darwin.so +0 -0
  200. sknetwork/topology/weisfeiler_lehman_core.pyx +116 -0
  201. sknetwork/utils/__init__.py +40 -0
  202. sknetwork/utils/base.py +35 -0
  203. sknetwork/utils/check.py +354 -0
  204. sknetwork/utils/co_neighbor.py +71 -0
  205. sknetwork/utils/format.py +219 -0
  206. sknetwork/utils/kmeans.py +89 -0
  207. sknetwork/utils/knn.py +166 -0
  208. sknetwork/utils/knn1d.cpython-39-darwin.so +0 -0
  209. sknetwork/utils/knn1d.pyx +80 -0
  210. sknetwork/utils/membership.py +82 -0
  211. sknetwork/utils/minheap.cpython-39-darwin.so +0 -0
  212. sknetwork/utils/minheap.pxd +22 -0
  213. sknetwork/utils/minheap.pyx +111 -0
  214. sknetwork/utils/neighbors.py +115 -0
  215. sknetwork/utils/seeds.py +75 -0
  216. sknetwork/utils/simplex.py +140 -0
  217. sknetwork/utils/tests/__init__.py +1 -0
  218. sknetwork/utils/tests/test_base.py +28 -0
  219. sknetwork/utils/tests/test_bunch.py +16 -0
  220. sknetwork/utils/tests/test_check.py +190 -0
  221. sknetwork/utils/tests/test_co_neighbor.py +43 -0
  222. sknetwork/utils/tests/test_format.py +61 -0
  223. sknetwork/utils/tests/test_kmeans.py +21 -0
  224. sknetwork/utils/tests/test_knn.py +32 -0
  225. sknetwork/utils/tests/test_membership.py +24 -0
  226. sknetwork/utils/tests/test_neighbors.py +41 -0
  227. sknetwork/utils/tests/test_projection_simplex.py +33 -0
  228. sknetwork/utils/tests/test_seeds.py +67 -0
  229. sknetwork/utils/tests/test_verbose.py +15 -0
  230. sknetwork/utils/tests/test_ward.py +20 -0
  231. sknetwork/utils/timeout.py +38 -0
  232. sknetwork/utils/verbose.py +37 -0
  233. sknetwork/utils/ward.py +60 -0
  234. sknetwork/visualization/__init__.py +4 -0
  235. sknetwork/visualization/colors.py +34 -0
  236. sknetwork/visualization/dendrograms.py +229 -0
  237. sknetwork/visualization/graphs.py +819 -0
  238. sknetwork/visualization/tests/__init__.py +1 -0
  239. sknetwork/visualization/tests/test_dendrograms.py +53 -0
  240. sknetwork/visualization/tests/test_graphs.py +167 -0
@@ -0,0 +1,53 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for classification metrics"""
4
+
5
+ import unittest
6
+
7
+ from sknetwork.classification.metrics import *
8
+
9
+
10
+ class TestMetrics(unittest.TestCase):
11
+
12
+ def setUp(self) -> None:
13
+ self.labels_true = np.array([0, 1, 1, 2, 2, -1])
14
+ self.labels_pred1 = np.array([0, -1, 1, 2, 0, 0])
15
+ self.labels_pred2 = np.array([-1, -1, -1, -1, -1, 0])
16
+
17
+ def test_accuracy(self):
18
+ self.assertEqual(get_accuracy_score(self.labels_true, self.labels_pred1), 0.75)
19
+ with self.assertRaises(ValueError):
20
+ get_accuracy_score(self.labels_true, self.labels_pred2)
21
+
22
+ def test_confusion(self):
23
+ confusion = get_confusion_matrix(self.labels_true, self.labels_pred1)
24
+ self.assertEqual(confusion.data.sum(), 4)
25
+ self.assertEqual(confusion.diagonal().sum(), 3)
26
+ with self.assertRaises(ValueError):
27
+ get_accuracy_score(self.labels_true, self.labels_pred2)
28
+
29
+ def test_f1_score(self):
30
+ f1_score = get_f1_score(np.array([0, 0, 1]), np.array([0, 1, 1]))
31
+ self.assertAlmostEqual(f1_score, 0.67, 2)
32
+ with self.assertRaises(ValueError):
33
+ get_f1_score(self.labels_true, self.labels_pred1)
34
+
35
+ def test_f1_scores(self):
36
+ f1_scores = get_f1_scores(self.labels_true, self.labels_pred1)
37
+ self.assertAlmostEqual(min(f1_scores), 0.67, 2)
38
+ f1_scores, precisions, recalls = get_f1_scores(self.labels_true, self.labels_pred1, True)
39
+ self.assertAlmostEqual(min(f1_scores), 0.67, 2)
40
+ self.assertAlmostEqual(min(precisions), 0.5, 2)
41
+ self.assertAlmostEqual(min(recalls), 0.5, 2)
42
+ with self.assertRaises(ValueError):
43
+ get_f1_scores(self.labels_true, self.labels_pred2)
44
+
45
+ def test_average_f1_score(self):
46
+ f1_score = get_average_f1_score(self.labels_true, self.labels_pred1)
47
+ self.assertAlmostEqual(f1_score, 0.78, 2)
48
+ f1_score = get_average_f1_score(self.labels_true, self.labels_pred1, average='micro')
49
+ self.assertEqual(f1_score, 0.75)
50
+ f1_score = get_average_f1_score(self.labels_true, self.labels_pred1, average='weighted')
51
+ self.assertEqual(f1_score, 0.80)
52
+ with self.assertRaises(ValueError):
53
+ get_average_f1_score(self.labels_true, self.labels_pred2, 'toto')
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for PageRankClassifier"""
4
+
5
+ import unittest
6
+
7
+ from sknetwork.classification import PageRankClassifier
8
+ from sknetwork.data.test_graphs import *
9
+
10
+
11
+ class TestPageRankClassifier(unittest.TestCase):
12
+
13
+ def test_solvers(self):
14
+ adjacency = test_graph()
15
+ seeds = {0: 0, 1: 1}
16
+
17
+ ref = PageRankClassifier(solver='piteration').fit_predict(adjacency, seeds)
18
+ for solver in ['lanczos', 'bicgstab']:
19
+ labels = PageRankClassifier(solver=solver).fit_predict(adjacency, seeds)
20
+ self.assertTrue((ref == labels).all())
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for label propagation"""
4
+
5
+ import unittest
6
+
7
+ from sknetwork.classification import Propagation
8
+ from sknetwork.data.test_graphs import *
9
+
10
+
11
+ class TestLabelPropagation(unittest.TestCase):
12
+
13
+ def test_algo(self):
14
+ for adjacency in [test_graph(), test_digraph(), test_bigraph()]:
15
+ n = adjacency.shape[0]
16
+ seeds = {0: 0, 1: 1}
17
+ propagation = Propagation(n_iter=3, weighted=False)
18
+ labels = propagation.fit_predict(adjacency, seeds)
19
+ self.assertEqual(labels.shape, (n,))
20
+
21
+ for order in ['random', 'decreasing', 'increasing']:
22
+ propagation = Propagation(node_order=order)
23
+ labels = propagation.fit_predict(adjacency, seeds)
24
+ self.assertEqual(labels.shape, (n,))
@@ -0,0 +1,58 @@
1
+ # distutils: language = c++
2
+ # cython: language_level=3
3
+ # cython: linetrace=True
4
+ # distutils: define_macros=CYTHON_TRACE_NOGIL=1
5
+ """
6
+ Created on April, 2020
7
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
8
+ """
9
+ from libcpp.set cimport set
10
+ from libcpp.vector cimport vector
11
+
12
+ cimport cython
13
+
14
+
15
+ @cython.boundscheck(False)
16
+ @cython.wraparound(False)
17
+ def vote_update(int[:] indptr, int[:] indices, float[:] data, int[:] labels, int[:] index):
18
+ """One pass of label updates over the graph by majority vote among neighbors."""
19
+ cdef int i
20
+ cdef int ii
21
+ cdef int j
22
+ cdef int jj
23
+ cdef int n_indices = index.shape[0]
24
+ cdef int label
25
+ cdef int label_neigh_size
26
+ cdef float best_score
27
+
28
+ cdef vector[int] labels_neigh
29
+ cdef vector[float] votes_neigh, votes
30
+ cdef set[int] labels_unique = ()
31
+
32
+ cdef int n = labels.shape[0]
33
+ for i in range(n):
34
+ votes.push_back(0)
35
+
36
+ for ii in range(n_indices):
37
+ i = index[ii]
38
+ labels_neigh.clear()
39
+ for j in range(indptr[i], indptr[i + 1]):
40
+ jj = indices[j]
41
+ labels_neigh.push_back(labels[jj])
42
+ votes_neigh.push_back(data[jj])
43
+
44
+ labels_unique.clear()
45
+ label_neigh_size = labels_neigh.size()
46
+ for jj in range(label_neigh_size):
47
+ label = labels_neigh[jj]
48
+ if label >= 0:
49
+ labels_unique.insert(label)
50
+ votes[label] += votes_neigh[jj]
51
+
52
+ best_score = -1
53
+ for label in labels_unique:
54
+ if votes[label] > best_score:
55
+ labels[i] = label
56
+ best_score = votes[label]
57
+ votes[label] = 0
58
+ return labels
@@ -0,0 +1,7 @@
1
+ """clustering module"""
2
+ from sknetwork.clustering.base import BaseClustering
3
+ from sknetwork.clustering.kmeans import KMeans
4
+ from sknetwork.clustering.louvain import Louvain
5
+ from sknetwork.clustering.metrics import get_modularity
6
+ from sknetwork.clustering.postprocess import reindex_labels, aggregate_graph
7
+ from sknetwork.clustering.propagation_clustering import PropagationClustering
@@ -0,0 +1,102 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on Nov, 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+ from abc import ABC
8
+
9
+ import numpy as np
10
+ from scipy import sparse
11
+
12
+ from sknetwork.linalg.normalization import normalize
13
+ from sknetwork.utils.base import Algorithm
14
+ from sknetwork.utils.membership import get_membership
15
+
16
+
17
+ class BaseClustering(Algorithm, ABC):
18
+ """Base class for clustering algorithms.
19
+
20
+ Attributes
21
+ ----------
22
+ labels_ : np.ndarray
23
+ Labels of the nodes (rows for bipartite graphs)
24
+ labels_row_ : np.ndarray
25
+ Labels of the rows (for bipartite graphs).
26
+ labels_col_ : np.ndarray
27
+ Labels of the columns (for bipartite graphs, in case of co-clustering).
28
+ membership_ : sparse.csr_matrix
29
+ Membership matrix of the nodes, shape (n_nodes, n_clusters).
30
+ membership_row_ : sparse.csr_matrix
31
+ Membership matrix of the rows (for bipartite graphs).
32
+ membership_col_ : sparse.csr_matrix
33
+ Membership matrix of the columns (for bipartite graphs, in case of co-clustering).
34
+ aggregate_ : sparse.csr_matrix
35
+ Aggregate adjacency matrix or biadjacency matrix between clusters.
36
+ """
37
+ def __init__(self, sort_clusters: bool = True, return_membership: bool = False, return_aggregate: bool = False):
38
+ self.sort_clusters = sort_clusters
39
+ self.return_membership = return_membership
40
+ self.return_aggregate = return_aggregate
41
+ self._init_vars()
42
+
43
+ def fit_transform(self, *args, **kwargs) -> np.ndarray:
44
+ """Fit algorithm to the data and return the labels. Same parameters as the ``fit`` method.
45
+
46
+ Returns
47
+ -------
48
+ labels : np.ndarray
49
+ Labels.
50
+ """
51
+ self.fit(*args, **kwargs)
52
+ return self.labels_
53
+
54
+ def _init_vars(self):
55
+ """Init variables."""
56
+ self.labels_ = None
57
+ self.labels_row_ = None
58
+ self.labels_col_ = None
59
+ self.membership_ = None
60
+ self.membership_row_ = None
61
+ self.membership_col_ = None
62
+ self.aggregate_ = None
63
+ self.bipartite = None
64
+ return self
65
+
66
+ def _split_vars(self, shape):
67
+ """Split labels_ into labels_row_ and labels_col_"""
68
+ n_row = shape[0]
69
+ self.labels_row_ = self.labels_[:n_row]
70
+ self.labels_col_ = self.labels_[n_row:]
71
+ self.labels_ = self.labels_row_
72
+ return self
73
+
74
+ def _secondary_outputs(self, input_matrix: sparse.csr_matrix):
75
+ """Compute different variables from labels_."""
76
+ if self.return_membership or self.return_aggregate:
77
+ input_matrix = input_matrix.astype(float)
78
+ if not self.bipartite:
79
+ membership = get_membership(self.labels_)
80
+ if self.return_membership:
81
+ self.membership_ = normalize(input_matrix.dot(membership))
82
+ if self.return_aggregate:
83
+ self.aggregate_ = sparse.csr_matrix(membership.T.dot(input_matrix.dot(membership)))
84
+ else:
85
+ if self.labels_col_ is None:
86
+ n_labels = max(self.labels_) + 1
87
+ membership_row = get_membership(self.labels_, n_labels=n_labels)
88
+ membership_col = normalize(input_matrix.T.dot(membership_row))
89
+ else:
90
+ n_labels = max(max(self.labels_row_), max(self.labels_col_)) + 1
91
+ membership_row = get_membership(self.labels_row_, n_labels=n_labels)
92
+ membership_col = get_membership(self.labels_col_, n_labels=n_labels)
93
+ if self.return_membership:
94
+ self.membership_row_ = normalize(input_matrix.dot(membership_col))
95
+ self.membership_col_ = normalize(input_matrix.T.dot(membership_row))
96
+ self.membership_ = self.membership_row_
97
+ if self.return_aggregate:
98
+ aggregate_ = sparse.csr_matrix(membership_row.T.dot(input_matrix))
99
+ aggregate_ = aggregate_.dot(membership_col)
100
+ self.aggregate_ = aggregate_
101
+
102
+ return self
@@ -0,0 +1,142 @@
1
+
2
+ #!/usr/bin/env python3
3
+ # -*- coding: utf-8 -*-
4
+ """
5
+ Created on October 2019
6
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
7
+ @author: Thomas Bonald <bonald@enst.fr>
8
+ """
9
+ from typing import Union, Tuple
10
+
11
+ import numpy as np
12
+ from scipy import sparse
13
+
14
+ from sknetwork.clustering.base import BaseClustering
15
+ from sknetwork.clustering.postprocess import reindex_labels
16
+ from sknetwork.embedding.base import BaseEmbedding
17
+ from sknetwork.embedding.spectral import Spectral
18
+ from sknetwork.utils.format import is_square
19
+ from sknetwork.utils.check import check_n_clusters, check_format
20
+ from sknetwork.utils.kmeans import KMeansDense
21
+
22
+
23
+ def get_embedding(input_matrix: Union[sparse.csr_matrix, np.ndarray], method: BaseEmbedding,
24
+ co_embedding: bool = False) -> Tuple[np.ndarray, bool]:
25
+ """Return the embedding of the input_matrix.
26
+ Parameters
27
+ ----------
28
+ input_matrix :
29
+ Adjacency matrix of biadjacency matrix of the graph.
30
+ method :
31
+ Embedding method.
32
+ co_embedding : bool
33
+ If ``True``, co-embedding of rows and columns.
34
+ Otherwise, do it only if the input matrix is not square or not symmetric with ``allow_directed=False``.
35
+ """
36
+ bipartite = (not is_square(input_matrix)) or co_embedding
37
+ if co_embedding:
38
+ try:
39
+ method.fit(input_matrix, force_bipartite=True)
40
+ except:
41
+ method.fit(input_matrix)
42
+ embedding = np.vstack((method.embedding_row_, method.embedding_col_))
43
+ else:
44
+ method.fit(input_matrix)
45
+ embedding = method.embedding_
46
+ return embedding, bipartite
47
+
48
+
49
+ class KMeans(BaseClustering):
50
+ """K-means clustering applied in the embedding space.
51
+
52
+ Parameters
53
+ ----------
54
+ n_clusters :
55
+ Number of desired clusters (default = 2).
56
+ embedding_method :
57
+ Embedding method (default = Spectral embedding in dimension 10).
58
+ co_cluster :
59
+ If ``True``, co-cluster rows and columns, considered as different nodes (default = ``False``).
60
+ sort_clusters :
61
+ If ``True``, sort labels in decreasing order of cluster size.
62
+ return_membership :
63
+ If ``True``, return the membership matrix of nodes to each cluster (soft clustering).
64
+ return_aggregate :
65
+ If ``True``, return the adjacency matrix of the graph between clusters.
66
+ Attributes
67
+ ----------
68
+ labels_ : np.ndarray
69
+ Labels of the nodes.
70
+ labels_row_ : np.ndarray
71
+ Labels of the rows (for bipartite graphs).
72
+ labels_col_ : np.ndarray
73
+ Labels of the columns (for bipartite graphs).
74
+ membership_ : sparse.csr_matrix
75
+ Membership matrix of the nodes, shape (n_nodes, n_clusters).
76
+ membership_row_ : sparse.csr_matrix
77
+ Membership matrix of the rows (for bipartite graphs).
78
+ membership_col_ : sparse.csr_matrix
79
+ Membership matrix of the columns (for bipartite graphs).
80
+ aggregate_ : sparse.csr_matrix
81
+ Aggregate adjacency matrix or biadjacency matrix between clusters.
82
+
83
+ Example
84
+ -------
85
+ >>> from sknetwork.clustering import KMeans
86
+ >>> from sknetwork.data import karate_club
87
+ >>> kmeans = KMeans(n_clusters=3)
88
+ >>> adjacency = karate_club()
89
+ >>> labels = kmeans.fit_transform(adjacency)
90
+ >>> len(set(labels))
91
+ 3
92
+ """
93
+ def __init__(self, n_clusters: int = 2, embedding_method: BaseEmbedding = Spectral(10), co_cluster: bool = False,
94
+ sort_clusters: bool = True, return_membership: bool = True, return_aggregate: bool = True):
95
+ super(KMeans, self).__init__(sort_clusters=sort_clusters, return_membership=return_membership,
96
+ return_aggregate=return_aggregate)
97
+ self.n_clusters = n_clusters
98
+ self.embedding_method = embedding_method
99
+ self.co_cluster = co_cluster
100
+ self.bipartite = None
101
+
102
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> 'KMeans':
103
+ """Apply embedding method followed by K-means.
104
+
105
+ Parameters
106
+ ----------
107
+ input_matrix :
108
+ Adjacency matrix or biadjacency matrix of the graph.
109
+
110
+ Returns
111
+ -------
112
+ self: :class:`KMeans`
113
+ """
114
+ self._init_vars()
115
+
116
+ # input
117
+ input_matrix = check_format(input_matrix)
118
+ if self.co_cluster:
119
+ check_n_clusters(self.n_clusters, np.sum(input_matrix.shape))
120
+ else:
121
+ check_n_clusters(self.n_clusters, input_matrix.shape[0])
122
+
123
+ # embedding
124
+ embedding, self.bipartite = get_embedding(input_matrix, self.embedding_method, self.co_cluster)
125
+
126
+ # clustering
127
+ kmeans = KMeansDense(self.n_clusters)
128
+ kmeans.fit(embedding)
129
+
130
+ # sort
131
+ if self.sort_clusters:
132
+ labels = reindex_labels(kmeans.labels_)
133
+ else:
134
+ labels = kmeans.labels_
135
+
136
+ # output
137
+ self.labels_ = labels
138
+ if self.co_cluster:
139
+ self._split_vars(input_matrix.shape)
140
+ self._secondary_outputs(input_matrix)
141
+
142
+ return self
@@ -0,0 +1,255 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in November 2018
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ @author: Quentin Lutz <qlutz@enst.fr>
7
+ @author: Thomas Bonald <bonald@enst.fr>
8
+ """
9
+ from typing import Union, Optional
10
+
11
+ import numpy as np
12
+ from scipy import sparse
13
+
14
+ from sknetwork.clustering.base import BaseClustering
15
+ from sknetwork.clustering.louvain_core import fit_core
16
+ from sknetwork.clustering.postprocess import reindex_labels
17
+ from sknetwork.utils.check import check_random_state, get_probs
18
+ from sknetwork.utils.format import check_format, get_adjacency, directed2undirected
19
+ from sknetwork.utils.membership import get_membership
20
+ from sknetwork.utils.verbose import VerboseMixin
21
+
22
+
23
+ class Louvain(BaseClustering, VerboseMixin):
24
+ """Louvain algorithm for clustering graphs by maximization of modularity.
25
+
26
+ For bipartite graphs, the algorithm maximizes Barber's modularity by default.
27
+
28
+ Parameters
29
+ ----------
30
+ resolution :
31
+ Resolution parameter.
32
+ modularity : str
33
+ Which objective function to maximize. Can be ``'Dugue'``, ``'Newman'`` or ``'Potts'`` (default = ``'dugue'``).
34
+ tol_optimization :
35
+ Minimum increase in the objective function to enter a new optimization pass.
36
+ tol_aggregation :
37
+ Minimum increase in the objective function to enter a new aggregation pass.
38
+ n_aggregations :
39
+ Maximum number of aggregations.
40
+ A negative value is interpreted as no limit.
41
+ shuffle_nodes :
42
+ Enables node shuffling before optimization.
43
+ sort_clusters :
44
+ If ``True``, sort labels in decreasing order of cluster size.
45
+ return_membership :
46
+ If ``True``, return the membership matrix of nodes to each cluster (soft clustering).
47
+ return_aggregate :
48
+ If ``True``, return the adjacency matrix of the graph between clusters.
49
+ random_state :
50
+ Random number generator or random seed. If None, numpy.random is used.
51
+ verbose :
52
+ Verbose mode.
53
+
54
+ Attributes
55
+ ----------
56
+ labels_ : np.ndarray
57
+ Labels of the nodes.
58
+ labels_row_ : np.ndarray
59
+ Labels of the rows (for bipartite graphs).
60
+ labels_col_ : np.ndarray
61
+ Labels of the columns (for bipartite graphs).
62
+ membership_ : sparse.csr_matrix
63
+ Membership matrix of the nodes, shape (n_nodes, n_clusters).
64
+ membership_row_ : sparse.csr_matrix
65
+ Membership matrix of the rows (for bipartite graphs).
66
+ membership_col_ : sparse.csr_matrix
67
+ Membership matrix of the columns (for bipartite graphs).
68
+ aggregate_ : sparse.csr_matrix
69
+ Aggregate adjacency matrix or biadjacency matrix between clusters.
70
+
71
+ Example
72
+ -------
73
+ >>> from sknetwork.clustering import Louvain
74
+ >>> from sknetwork.data import karate_club
75
+ >>> louvain = Louvain()
76
+ >>> adjacency = karate_club()
77
+ >>> labels = louvain.fit_transform(adjacency)
78
+ >>> len(set(labels))
79
+ 4
80
+
81
+ References
82
+ ----------
83
+ * Blondel, V. D., Guillaume, J. L., Lambiotte, R., & Lefebvre, E. (2008).
84
+ `Fast unfolding of communities in large networks.
85
+ <https://arxiv.org/abs/0803.0476>`_
86
+ Journal of statistical mechanics: theory and experiment, 2008.
87
+
88
+ * Dugué, N., & Perez, A. (2015).
89
+ `Directed Louvain: maximizing modularity in directed networks
90
+ <https://hal.archives-ouvertes.fr/hal-01231784/document>`_
91
+ (Doctoral dissertation, Université d'Orléans).
92
+
93
+ * Barber, M. J. (2007).
94
+ `Modularity and community detection in bipartite networks
95
+ <https://arxiv.org/pdf/0707.1616>`_
96
+ Physical Review E, 76(6).
97
+ """
98
+ def __init__(self, resolution: float = 1, modularity: str = 'dugue', tol_optimization: float = 1e-3,
99
+ tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
100
+ sort_clusters: bool = True, return_membership: bool = True, return_aggregate: bool = True,
101
+ random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False):
102
+ super(Louvain, self).__init__(sort_clusters=sort_clusters, return_membership=return_membership,
103
+ return_aggregate=return_aggregate)
104
+ VerboseMixin.__init__(self, verbose)
105
+
106
+ self.labels_ = None
107
+ self.resolution = resolution
108
+ self.modularity = modularity.lower()
109
+ self.tol = tol_optimization
110
+ self.tol_aggregation = tol_aggregation
111
+ self.n_aggregations = n_aggregations
112
+ self.shuffle_nodes = shuffle_nodes
113
+ self.random_state = check_random_state(random_state)
114
+ self.bipartite = None
115
+
116
+ def _optimize(self, adjacency_norm, probs_ou, probs_in):
117
+ """One local optimization pass of the Louvain algorithm
118
+
119
+ Parameters
120
+ ----------
121
+ adjacency_norm :
122
+ the norm of the adjacency
123
+ probs_ou :
124
+ the array of degrees of the adjacency
125
+ probs_in :
126
+ the array of degrees of the transpose of the adjacency
127
+
128
+ Returns
129
+ -------
130
+ labels :
131
+ the communities of each node after optimization
132
+ pass_increase :
133
+ the increase in modularity gained after optimization
134
+ """
135
+ node_probs_in = probs_in.astype(np.float32)
136
+ node_probs_ou = probs_ou.astype(np.float32)
137
+
138
+ adjacency = 0.5 * directed2undirected(adjacency_norm)
139
+
140
+ self_loops = adjacency.diagonal().astype(np.float32)
141
+
142
+ indptr: np.ndarray = adjacency.indptr
143
+ indices: np.ndarray = adjacency.indices
144
+ data: np.ndarray = adjacency.data.astype(np.float32)
145
+
146
+ return fit_core(self.resolution, self.tol, node_probs_ou, node_probs_in, self_loops, data, indices, indptr)
147
+
148
+ @staticmethod
149
+ def _aggregate(adjacency_norm, probs_out, probs_in, membership: Union[sparse.csr_matrix, np.ndarray]):
150
+ """Aggregate nodes belonging to the same cluster.
151
+
152
+ Parameters
153
+ ----------
154
+ adjacency_norm :
155
+ the norm of the adjacency
156
+ probs_out :
157
+ the array of degrees of the adjacency
158
+ probs_in :
159
+ the array of degrees of the transpose of the adjacency
160
+ membership :
161
+ membership matrix (rows).
162
+
163
+ Returns
164
+ -------
165
+ Aggregate graph.
166
+ """
167
+ adjacency_norm = (membership.T.dot(adjacency_norm.dot(membership))).tocsr()
168
+ probs_in = np.array(membership.T.dot(probs_in).T)
169
+ probs_out = np.array(membership.T.dot(probs_out).T)
170
+ return adjacency_norm, probs_out, probs_in
171
+
172
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) -> 'Louvain':
173
+ """Fit algorithm to data.
174
+
175
+ Parameters
176
+ ----------
177
+ input_matrix :
178
+ Adjacency matrix or biadjacency matrix of the graph.
179
+ force_bipartite :
180
+ If ``True``, force the input matrix to be considered as a biadjacency matrix even if square.
181
+
182
+ Returns
183
+ -------
184
+ self: :class:`Louvain`
185
+ """
186
+ self._init_vars()
187
+ input_matrix = check_format(input_matrix)
188
+ if self.modularity == 'dugue':
189
+ adjacency, self.bipartite = get_adjacency(input_matrix, force_directed=True,
190
+ force_bipartite=force_bipartite)
191
+ else:
192
+ adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)
193
+
194
+ n = adjacency.shape[0]
195
+
196
+ index = np.arange(n)
197
+ if self.shuffle_nodes:
198
+ index = self.random_state.permutation(index)
199
+ adjacency = adjacency[index][:, index]
200
+
201
+ if self.modularity == 'potts':
202
+ probs_out = get_probs('uniform', adjacency)
203
+ probs_in = probs_out.copy()
204
+ elif self.modularity == 'newman':
205
+ probs_out = get_probs('degree', adjacency)
206
+ probs_in = probs_out.copy()
207
+ elif self.modularity == 'dugue':
208
+ probs_out = get_probs('degree', adjacency)
209
+ probs_in = get_probs('degree', adjacency.T)
210
+ else:
211
+ raise ValueError('Unknown modularity function.')
212
+
213
+ adjacency_cluster = adjacency / adjacency.data.sum()
214
+
215
+ membership = sparse.identity(n, format='csr')
216
+ increase = True
217
+ count_aggregations = 0
218
+ self.log.print("Starting with", n, "nodes.")
219
+ while increase:
220
+ count_aggregations += 1
221
+
222
+ labels_cluster, pass_increase = self._optimize(adjacency_cluster, probs_out, probs_in)
223
+ _, labels_cluster = np.unique(labels_cluster, return_inverse=True)
224
+
225
+ if pass_increase <= self.tol_aggregation:
226
+ increase = False
227
+ else:
228
+ membership_cluster = get_membership(labels_cluster)
229
+ membership = membership.dot(membership_cluster)
230
+ adjacency_cluster, probs_out, probs_in = self._aggregate(adjacency_cluster, probs_out, probs_in,
231
+ membership_cluster)
232
+
233
+ n = adjacency_cluster.shape[0]
234
+ if n == 1:
235
+ break
236
+ self.log.print("Aggregation", count_aggregations, "completed with", n, "clusters and ",
237
+ pass_increase, "increment.")
238
+ if count_aggregations == self.n_aggregations:
239
+ break
240
+
241
+ if self.sort_clusters:
242
+ labels = reindex_labels(membership.indices)
243
+ else:
244
+ labels = membership.indices
245
+ if self.shuffle_nodes:
246
+ reverse = np.empty(index.size, index.dtype)
247
+ reverse[index] = np.arange(index.size)
248
+ labels = labels[reverse]
249
+
250
+ self.labels_ = labels
251
+ if self.bipartite:
252
+ self._split_vars(input_matrix.shape)
253
+ self._secondary_outputs(input_matrix)
254
+
255
+ return self