scikit-network 0.28.3__cp39-cp39-macosx_12_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (240) hide show
  1. scikit_network-0.28.3.dist-info/AUTHORS.rst +41 -0
  2. scikit_network-0.28.3.dist-info/LICENSE +34 -0
  3. scikit_network-0.28.3.dist-info/METADATA +457 -0
  4. scikit_network-0.28.3.dist-info/RECORD +240 -0
  5. scikit_network-0.28.3.dist-info/WHEEL +5 -0
  6. scikit_network-0.28.3.dist-info/top_level.txt +1 -0
  7. sknetwork/__init__.py +21 -0
  8. sknetwork/classification/__init__.py +8 -0
  9. sknetwork/classification/base.py +84 -0
  10. sknetwork/classification/base_rank.py +143 -0
  11. sknetwork/classification/diffusion.py +134 -0
  12. sknetwork/classification/knn.py +162 -0
  13. sknetwork/classification/metrics.py +205 -0
  14. sknetwork/classification/pagerank.py +66 -0
  15. sknetwork/classification/propagation.py +152 -0
  16. sknetwork/classification/tests/__init__.py +1 -0
  17. sknetwork/classification/tests/test_API.py +35 -0
  18. sknetwork/classification/tests/test_diffusion.py +37 -0
  19. sknetwork/classification/tests/test_knn.py +24 -0
  20. sknetwork/classification/tests/test_metrics.py +53 -0
  21. sknetwork/classification/tests/test_pagerank.py +20 -0
  22. sknetwork/classification/tests/test_propagation.py +24 -0
  23. sknetwork/classification/vote.cpython-39-darwin.so +0 -0
  24. sknetwork/classification/vote.pyx +58 -0
  25. sknetwork/clustering/__init__.py +7 -0
  26. sknetwork/clustering/base.py +102 -0
  27. sknetwork/clustering/kmeans.py +142 -0
  28. sknetwork/clustering/louvain.py +255 -0
  29. sknetwork/clustering/louvain_core.cpython-39-darwin.so +0 -0
  30. sknetwork/clustering/louvain_core.pyx +134 -0
  31. sknetwork/clustering/metrics.py +91 -0
  32. sknetwork/clustering/postprocess.py +66 -0
  33. sknetwork/clustering/propagation_clustering.py +108 -0
  34. sknetwork/clustering/tests/__init__.py +1 -0
  35. sknetwork/clustering/tests/test_API.py +37 -0
  36. sknetwork/clustering/tests/test_kmeans.py +47 -0
  37. sknetwork/clustering/tests/test_louvain.py +104 -0
  38. sknetwork/clustering/tests/test_metrics.py +50 -0
  39. sknetwork/clustering/tests/test_post_processing.py +23 -0
  40. sknetwork/clustering/tests/test_postprocess.py +39 -0
  41. sknetwork/data/__init__.py +5 -0
  42. sknetwork/data/load.py +408 -0
  43. sknetwork/data/models.py +459 -0
  44. sknetwork/data/parse.py +621 -0
  45. sknetwork/data/test_graphs.py +84 -0
  46. sknetwork/data/tests/__init__.py +1 -0
  47. sknetwork/data/tests/test_API.py +30 -0
  48. sknetwork/data/tests/test_load.py +95 -0
  49. sknetwork/data/tests/test_models.py +52 -0
  50. sknetwork/data/tests/test_parse.py +253 -0
  51. sknetwork/data/tests/test_test_graphs.py +30 -0
  52. sknetwork/data/tests/test_toy_graphs.py +68 -0
  53. sknetwork/data/toy_graphs.py +619 -0
  54. sknetwork/embedding/__init__.py +10 -0
  55. sknetwork/embedding/base.py +90 -0
  56. sknetwork/embedding/force_atlas.py +197 -0
  57. sknetwork/embedding/louvain_embedding.py +174 -0
  58. sknetwork/embedding/louvain_hierarchy.py +142 -0
  59. sknetwork/embedding/metrics.py +66 -0
  60. sknetwork/embedding/random_projection.py +133 -0
  61. sknetwork/embedding/spectral.py +214 -0
  62. sknetwork/embedding/spring.py +198 -0
  63. sknetwork/embedding/svd.py +363 -0
  64. sknetwork/embedding/tests/__init__.py +1 -0
  65. sknetwork/embedding/tests/test_API.py +73 -0
  66. sknetwork/embedding/tests/test_force_atlas.py +35 -0
  67. sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
  68. sknetwork/embedding/tests/test_louvain_hierarchy.py +19 -0
  69. sknetwork/embedding/tests/test_metrics.py +29 -0
  70. sknetwork/embedding/tests/test_random_projection.py +28 -0
  71. sknetwork/embedding/tests/test_spectral.py +84 -0
  72. sknetwork/embedding/tests/test_spring.py +50 -0
  73. sknetwork/embedding/tests/test_svd.py +37 -0
  74. sknetwork/flow/__init__.py +3 -0
  75. sknetwork/flow/flow.py +73 -0
  76. sknetwork/flow/tests/__init__.py +1 -0
  77. sknetwork/flow/tests/test_flow.py +17 -0
  78. sknetwork/flow/tests/test_utils.py +69 -0
  79. sknetwork/flow/utils.py +91 -0
  80. sknetwork/gnn/__init__.py +10 -0
  81. sknetwork/gnn/activation.py +117 -0
  82. sknetwork/gnn/base.py +155 -0
  83. sknetwork/gnn/base_activation.py +89 -0
  84. sknetwork/gnn/base_layer.py +109 -0
  85. sknetwork/gnn/gnn_classifier.py +381 -0
  86. sknetwork/gnn/layer.py +153 -0
  87. sknetwork/gnn/layers.py +127 -0
  88. sknetwork/gnn/loss.py +180 -0
  89. sknetwork/gnn/neighbor_sampler.py +65 -0
  90. sknetwork/gnn/optimizer.py +163 -0
  91. sknetwork/gnn/tests/__init__.py +1 -0
  92. sknetwork/gnn/tests/test_activation.py +56 -0
  93. sknetwork/gnn/tests/test_base.py +79 -0
  94. sknetwork/gnn/tests/test_base_layer.py +37 -0
  95. sknetwork/gnn/tests/test_gnn_classifier.py +192 -0
  96. sknetwork/gnn/tests/test_layers.py +80 -0
  97. sknetwork/gnn/tests/test_loss.py +33 -0
  98. sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
  99. sknetwork/gnn/tests/test_optimizer.py +43 -0
  100. sknetwork/gnn/tests/test_utils.py +93 -0
  101. sknetwork/gnn/utils.py +219 -0
  102. sknetwork/hierarchy/__init__.py +7 -0
  103. sknetwork/hierarchy/base.py +69 -0
  104. sknetwork/hierarchy/louvain_hierarchy.py +264 -0
  105. sknetwork/hierarchy/metrics.py +234 -0
  106. sknetwork/hierarchy/paris.cpython-39-darwin.so +0 -0
  107. sknetwork/hierarchy/paris.pyx +317 -0
  108. sknetwork/hierarchy/postprocess.py +350 -0
  109. sknetwork/hierarchy/tests/__init__.py +1 -0
  110. sknetwork/hierarchy/tests/test_API.py +25 -0
  111. sknetwork/hierarchy/tests/test_algos.py +29 -0
  112. sknetwork/hierarchy/tests/test_metrics.py +62 -0
  113. sknetwork/hierarchy/tests/test_postprocess.py +57 -0
  114. sknetwork/hierarchy/tests/test_ward.py +25 -0
  115. sknetwork/hierarchy/ward.py +94 -0
  116. sknetwork/linalg/__init__.py +9 -0
  117. sknetwork/linalg/basics.py +37 -0
  118. sknetwork/linalg/diteration.cpython-39-darwin.so +0 -0
  119. sknetwork/linalg/diteration.pyx +49 -0
  120. sknetwork/linalg/eig_solver.py +93 -0
  121. sknetwork/linalg/laplacian.py +15 -0
  122. sknetwork/linalg/normalization.py +66 -0
  123. sknetwork/linalg/operators.py +225 -0
  124. sknetwork/linalg/polynome.py +76 -0
  125. sknetwork/linalg/ppr_solver.py +170 -0
  126. sknetwork/linalg/push.cpython-39-darwin.so +0 -0
  127. sknetwork/linalg/push.pyx +73 -0
  128. sknetwork/linalg/sparse_lowrank.py +142 -0
  129. sknetwork/linalg/svd_solver.py +91 -0
  130. sknetwork/linalg/tests/__init__.py +1 -0
  131. sknetwork/linalg/tests/test_eig.py +44 -0
  132. sknetwork/linalg/tests/test_laplacian.py +18 -0
  133. sknetwork/linalg/tests/test_normalization.py +38 -0
  134. sknetwork/linalg/tests/test_operators.py +70 -0
  135. sknetwork/linalg/tests/test_polynome.py +38 -0
  136. sknetwork/linalg/tests/test_ppr.py +50 -0
  137. sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
  138. sknetwork/linalg/tests/test_svd.py +38 -0
  139. sknetwork/linkpred/__init__.py +4 -0
  140. sknetwork/linkpred/base.py +80 -0
  141. sknetwork/linkpred/first_order.py +508 -0
  142. sknetwork/linkpred/first_order_core.cpython-39-darwin.so +0 -0
  143. sknetwork/linkpred/first_order_core.pyx +315 -0
  144. sknetwork/linkpred/postprocessing.py +98 -0
  145. sknetwork/linkpred/tests/__init__.py +1 -0
  146. sknetwork/linkpred/tests/test_API.py +49 -0
  147. sknetwork/linkpred/tests/test_postprocessing.py +21 -0
  148. sknetwork/path/__init__.py +4 -0
  149. sknetwork/path/metrics.py +148 -0
  150. sknetwork/path/search.py +65 -0
  151. sknetwork/path/shortest_path.py +186 -0
  152. sknetwork/path/tests/__init__.py +1 -0
  153. sknetwork/path/tests/test_metrics.py +29 -0
  154. sknetwork/path/tests/test_search.py +25 -0
  155. sknetwork/path/tests/test_shortest_path.py +45 -0
  156. sknetwork/ranking/__init__.py +9 -0
  157. sknetwork/ranking/base.py +56 -0
  158. sknetwork/ranking/betweenness.cpython-39-darwin.so +0 -0
  159. sknetwork/ranking/betweenness.pyx +99 -0
  160. sknetwork/ranking/closeness.py +95 -0
  161. sknetwork/ranking/harmonic.py +82 -0
  162. sknetwork/ranking/hits.py +94 -0
  163. sknetwork/ranking/katz.py +81 -0
  164. sknetwork/ranking/pagerank.py +107 -0
  165. sknetwork/ranking/postprocess.py +25 -0
  166. sknetwork/ranking/tests/__init__.py +1 -0
  167. sknetwork/ranking/tests/test_API.py +34 -0
  168. sknetwork/ranking/tests/test_betweenness.py +38 -0
  169. sknetwork/ranking/tests/test_closeness.py +34 -0
  170. sknetwork/ranking/tests/test_hits.py +20 -0
  171. sknetwork/ranking/tests/test_pagerank.py +69 -0
  172. sknetwork/regression/__init__.py +4 -0
  173. sknetwork/regression/base.py +56 -0
  174. sknetwork/regression/diffusion.py +190 -0
  175. sknetwork/regression/tests/__init__.py +1 -0
  176. sknetwork/regression/tests/test_API.py +34 -0
  177. sknetwork/regression/tests/test_diffusion.py +48 -0
  178. sknetwork/sknetwork.py +3 -0
  179. sknetwork/topology/__init__.py +9 -0
  180. sknetwork/topology/dag.py +74 -0
  181. sknetwork/topology/dag_core.cpython-39-darwin.so +0 -0
  182. sknetwork/topology/dag_core.pyx +38 -0
  183. sknetwork/topology/kcliques.cpython-39-darwin.so +0 -0
  184. sknetwork/topology/kcliques.pyx +193 -0
  185. sknetwork/topology/kcore.cpython-39-darwin.so +0 -0
  186. sknetwork/topology/kcore.pyx +120 -0
  187. sknetwork/topology/structure.py +234 -0
  188. sknetwork/topology/tests/__init__.py +1 -0
  189. sknetwork/topology/tests/test_cliques.py +28 -0
  190. sknetwork/topology/tests/test_cores.py +21 -0
  191. sknetwork/topology/tests/test_dag.py +26 -0
  192. sknetwork/topology/tests/test_structure.py +99 -0
  193. sknetwork/topology/tests/test_triangles.py +42 -0
  194. sknetwork/topology/tests/test_wl_coloring.py +49 -0
  195. sknetwork/topology/tests/test_wl_kernel.py +31 -0
  196. sknetwork/topology/triangles.cpython-39-darwin.so +0 -0
  197. sknetwork/topology/triangles.pyx +166 -0
  198. sknetwork/topology/weisfeiler_lehman.py +163 -0
  199. sknetwork/topology/weisfeiler_lehman_core.cpython-39-darwin.so +0 -0
  200. sknetwork/topology/weisfeiler_lehman_core.pyx +116 -0
  201. sknetwork/utils/__init__.py +40 -0
  202. sknetwork/utils/base.py +35 -0
  203. sknetwork/utils/check.py +354 -0
  204. sknetwork/utils/co_neighbor.py +71 -0
  205. sknetwork/utils/format.py +219 -0
  206. sknetwork/utils/kmeans.py +89 -0
  207. sknetwork/utils/knn.py +166 -0
  208. sknetwork/utils/knn1d.cpython-39-darwin.so +0 -0
  209. sknetwork/utils/knn1d.pyx +80 -0
  210. sknetwork/utils/membership.py +82 -0
  211. sknetwork/utils/minheap.cpython-39-darwin.so +0 -0
  212. sknetwork/utils/minheap.pxd +22 -0
  213. sknetwork/utils/minheap.pyx +111 -0
  214. sknetwork/utils/neighbors.py +115 -0
  215. sknetwork/utils/seeds.py +75 -0
  216. sknetwork/utils/simplex.py +140 -0
  217. sknetwork/utils/tests/__init__.py +1 -0
  218. sknetwork/utils/tests/test_base.py +28 -0
  219. sknetwork/utils/tests/test_bunch.py +16 -0
  220. sknetwork/utils/tests/test_check.py +190 -0
  221. sknetwork/utils/tests/test_co_neighbor.py +43 -0
  222. sknetwork/utils/tests/test_format.py +61 -0
  223. sknetwork/utils/tests/test_kmeans.py +21 -0
  224. sknetwork/utils/tests/test_knn.py +32 -0
  225. sknetwork/utils/tests/test_membership.py +24 -0
  226. sknetwork/utils/tests/test_neighbors.py +41 -0
  227. sknetwork/utils/tests/test_projection_simplex.py +33 -0
  228. sknetwork/utils/tests/test_seeds.py +67 -0
  229. sknetwork/utils/tests/test_verbose.py +15 -0
  230. sknetwork/utils/tests/test_ward.py +20 -0
  231. sknetwork/utils/timeout.py +38 -0
  232. sknetwork/utils/verbose.py +37 -0
  233. sknetwork/utils/ward.py +60 -0
  234. sknetwork/visualization/__init__.py +4 -0
  235. sknetwork/visualization/colors.py +34 -0
  236. sknetwork/visualization/dendrograms.py +229 -0
  237. sknetwork/visualization/graphs.py +819 -0
  238. sknetwork/visualization/tests/__init__.py +1 -0
  239. sknetwork/visualization/tests/test_dendrograms.py +53 -0
  240. sknetwork/visualization/tests/test_graphs.py +167 -0
@@ -0,0 +1,134 @@
1
+ # distutils: language = c++
2
+ # cython: language_level=3
3
+ # cython: linetrace=True
4
+ # distutils: define_macros=CYTHON_TRACE_NOGIL=1
5
+ from libcpp.set cimport set
6
+ from libcpp.vector cimport vector
7
+ cimport cython
8
+
9
+ ctypedef fused int_or_long:
10
+ int
11
+ long
12
+
13
+ @cython.boundscheck(False)
14
+ @cython.wraparound(False)
15
+ def fit_core(float resolution, float tol, float[:] ou_node_probs, float[:] in_node_probs, float[:] self_loops,
16
+ float[:] data, int_or_long[:] indices, int_or_long[:] indptr): # pragma: no cover
17
+ """Fit the clusters to the objective function.
18
+
19
+ Parameters
20
+ ----------
21
+ resolution :
22
+ Resolution parameter (positive).
23
+ tol :
24
+ Minimum increase in modularity to enter a new optimization pass.
25
+ ou_node_probs :
26
+ Distribution of node weights based on their out-edges (sums to 1).
27
+ in_node_probs :
28
+ Distribution of node weights based on their in-edges (sums to 1).
29
+ self_loops :
30
+ Weights of self loops.
31
+ data :
32
+ CSR format data array of the normalized adjacency matrix.
33
+ indices :
34
+ CSR format index array of the normalized adjacency matrix.
35
+ indptr :
36
+ CSR format index pointer array of the normalized adjacency matrix.
37
+
38
+ Returns
39
+ -------
40
+ labels :
41
+ Cluster index of each node.
42
+ total_increase :
43
+ Score of the clustering (total increase in modularity).
44
+ """
45
+ cdef int_or_long n = indptr.shape[0] - 1
46
+ cdef int_or_long increase = 1
47
+ cdef int_or_long cluster
48
+ cdef int_or_long cluster_best
49
+ cdef int_or_long cluster_node
50
+ cdef int_or_long i
51
+ cdef int_or_long j
52
+ cdef int_or_long j1
53
+ cdef int_or_long j2
54
+ cdef int_or_long label
55
+
56
+ cdef float increase_total = 0
57
+ cdef float increase_pass
58
+ cdef float delta
59
+ cdef float delta_best
60
+ cdef float delta_exit
61
+ cdef float delta_local
62
+ cdef float node_prob_in
63
+ cdef float node_prob_ou
64
+ cdef float ratio_in
65
+ cdef float ratio_ou
66
+
67
+ cdef vector[int_or_long] labels
68
+ cdef vector[float] neighbor_clusters_weights
69
+ cdef vector[float] ou_clusters_weights
70
+ cdef vector[float] in_clusters_weights
71
+ cdef set[int_or_long] unique_clusters = ()
72
+
73
+ for i in range(n):
74
+ labels.push_back(i)
75
+ neighbor_clusters_weights.push_back(0.)
76
+ ou_clusters_weights.push_back(ou_node_probs[i])
77
+ in_clusters_weights.push_back(in_node_probs[i])
78
+
79
+ while increase == 1:
80
+ increase = 0
81
+ increase_pass = 0
82
+
83
+ for i in range(n):
84
+ unique_clusters.clear()
85
+ cluster_node = labels[i]
86
+ j1 = indptr[i]
87
+ j2 = indptr[i + 1]
88
+
89
+ for j in range(j1, j2):
90
+ label = labels[indices[j]]
91
+ neighbor_clusters_weights[label] += data[j]
92
+ unique_clusters.insert(label)
93
+
94
+ unique_clusters.erase(cluster_node)
95
+
96
+ if not unique_clusters.empty():
97
+ node_prob_ou = ou_node_probs[i]
98
+ node_prob_in = in_node_probs[i]
99
+ ratio_ou = resolution * node_prob_ou
100
+ ratio_in = resolution * node_prob_in
101
+
102
+ delta_exit = 2 * (neighbor_clusters_weights[cluster_node] - self_loops[i])
103
+ delta_exit -= ratio_ou * (in_clusters_weights[cluster_node] - node_prob_in)
104
+ delta_exit -= ratio_in * (ou_clusters_weights[cluster_node] - node_prob_ou)
105
+
106
+ delta_best = 0
107
+ cluster_best = cluster_node
108
+
109
+ for cluster in unique_clusters:
110
+ delta = 2 * neighbor_clusters_weights[cluster]
111
+ delta -= ratio_ou * in_clusters_weights[cluster]
112
+ delta -= ratio_in * ou_clusters_weights[cluster]
113
+
114
+ delta_local = delta - delta_exit
115
+ if delta_local > delta_best:
116
+ delta_best = delta_local
117
+ cluster_best = cluster
118
+
119
+ neighbor_clusters_weights[cluster] = 0
120
+
121
+ if delta_best > 0:
122
+ increase_pass += delta_best
123
+ ou_clusters_weights[cluster_node] -= node_prob_ou
124
+ in_clusters_weights[cluster_node] -= node_prob_in
125
+ ou_clusters_weights[cluster_best] += node_prob_ou
126
+ in_clusters_weights[cluster_best] += node_prob_in
127
+ labels[i] = cluster_best
128
+
129
+ neighbor_clusters_weights[cluster_node] = 0
130
+
131
+ increase_total += increase_pass
132
+ if increase_pass > tol:
133
+ increase = 1
134
+ return labels, increase_total
@@ -0,0 +1,91 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in July 2018
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ @author: Thomas Bonald <bonald@enst.fr>
7
+ """
8
+ from typing import Optional, Union, Tuple
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.utils.check import get_probs
14
+ from sknetwork.utils.format import get_adjacency
15
+ from sknetwork.utils.membership import get_membership
16
+
17
+
18
+ def get_modularity(input_matrix: Union[sparse.csr_matrix, np.ndarray], labels: np.ndarray,
19
+ labels_col: Optional[np.ndarray] = None, weights: str = 'degree',
20
+ resolution: float = 1, return_all: bool = False) -> Union[float, Tuple[float, float, float]]:
21
+ """Modularity of a clustering.
22
+
23
+ The modularity of a clustering is
24
+
25
+ :math:`Q = \\dfrac{1}{w} \\sum_{i,j}\\left(A_{ij} - \\gamma \\dfrac{w_iw_j}{w}\\right)\\delta_{c_i,c_j}`
26
+ for graphs,
27
+
28
+ :math:`Q = \\dfrac{1}{w} \\sum_{i,j}\\left(A_{ij} - \\gamma \\dfrac{d^+_id^-_j}{w}\\right)\\delta_{c_i,c_j}`
29
+ for directed graphs,
30
+
31
+ where
32
+
33
+ * :math:`c_i` is the cluster of node :math:`i`,\n
34
+ * :math:`w_i` is the weight of node :math:`i`,\n
35
+ * :math:`w^+_i, w^-_i` are the out-weight, in-weight of node :math:`i` (for directed graphs),\n
36
+ * :math:`w = 1^TA1` is the total weight,\n
37
+ * :math:`\\delta` is the Kronecker symbol,\n
38
+ * :math:`\\gamma \\ge 0` is the resolution parameter.
39
+
40
+ Parameters
41
+ ----------
42
+ input_matrix :
43
+ Adjacency matrix or biadjacency matrix of the graph.
44
+ labels :
45
+ Labels of nodes.
46
+ labels_col :
47
+ Labels of column nodes (for bipartite graphs).
48
+ weights :
49
+ Weighting of nodes (``'degree'`` (default) or ``'uniform'``).
50
+ resolution:
51
+ Resolution parameter (default = 1).
52
+ return_all:
53
+ If ``True``, return modularity, fit, diversity.
54
+
55
+ Returns
56
+ -------
57
+ modularity : float
58
+ fit: float, optional
59
+ diversity: float, optional
60
+
61
+ Example
62
+ -------
63
+ >>> from sknetwork.clustering import get_modularity
64
+ >>> from sknetwork.data import house
65
+ >>> adjacency = house()
66
+ >>> labels = np.array([0, 0, 1, 1, 0])
67
+ >>> np.round(get_modularity(adjacency, labels), 2)
68
+ 0.11
69
+ """
70
+ adjacency, bipartite = get_adjacency(input_matrix.astype(float))
71
+
72
+ if bipartite:
73
+ if labels_col is None:
74
+ raise ValueError('For bipartite graphs, you must specify the labels of both rows and columns.')
75
+ else:
76
+ labels = np.hstack((labels, labels_col))
77
+
78
+ if len(labels) != adjacency.shape[0]:
79
+ raise ValueError('Dimension mismatch between labels and input matrix.')
80
+
81
+ probs_row = get_probs(weights, adjacency)
82
+ probs_col = get_probs(weights, adjacency.T)
83
+ membership = get_membership(labels).astype(float)
84
+
85
+ fit = membership.T.dot(adjacency.dot(membership)).diagonal().sum() / adjacency.data.sum()
86
+ div = membership.T.dot(probs_col).dot(membership.T.dot(probs_row))
87
+ mod = fit - resolution * div
88
+ if return_all:
89
+ return mod, fit, div
90
+ else:
91
+ return mod
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on July 10, 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ @author: Thomas Bonald <bonald@enst.fr>
7
+ """
8
+ from typing import Optional
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.utils.membership import get_membership
14
+
15
+
16
+ def reindex_labels(labels: np.ndarray) -> np.ndarray:
17
+ """Reindex clusters in decreasing order of size.
18
+
19
+ Parameters
20
+ ----------
21
+ labels :
22
+ Label of each node.
23
+ Returns
24
+ -------
25
+ new_labels : np.ndarray
26
+ New label of each node.
27
+
28
+ Example
29
+ -------
30
+ >>> from sknetwork.clustering import reindex_labels
31
+ >>> labels = np.array([0, 1, 1])
32
+ >>> reindex_labels(labels)
33
+ array([1, 0, 0])
34
+ """
35
+ _, index, counts = np.unique(labels, return_inverse=True, return_counts=True)
36
+ _, new_index = np.unique(np.argsort(-counts), return_index=True)
37
+ return new_index[index]
38
+
39
+
40
+ def aggregate_graph(input_matrix: sparse.csr_matrix, labels: Optional[np.ndarray] = None,
41
+ labels_row: Optional[np.ndarray] = None, labels_col: Optional[np.ndarray] = None) \
42
+ -> sparse.csr_matrix:
43
+ """Aggregate graph per label. All nodes with the same label become a single node.
44
+ Negative labels are ignored (corresponding nodes are not discarded).
45
+
46
+ Parameters
47
+ ----------
48
+ input_matrix: sparse matrix
49
+ Adjacency or biadjacency matrix of the graph.
50
+ labels: np.ndarray
51
+ Labels of nodes.
52
+ labels_row: np.ndarray
53
+ Labels of rows (for bipartite graphs). Alias for labels.
54
+ labels_col: np.ndarray
55
+ Labels of columns (for bipartite graphs).
56
+ """
57
+ if labels_row is not None:
58
+ membership_row = get_membership(labels_row)
59
+ else:
60
+ membership_row = get_membership(labels)
61
+ if labels_col is not None:
62
+ membership_col = get_membership(labels_col)
63
+ else:
64
+ membership_col = membership_row
65
+ aggregate_matrix = membership_row.T.dot(input_matrix).dot(membership_col)
66
+ return aggregate_matrix
@@ -0,0 +1,108 @@
1
+ #!/usr/bin/env python3
2
+ # coding: utf-8
3
+ """
4
+ Created on May, 2020
5
+ @author: Thomas Bonald <tbonald@enst.fr>
6
+ """
7
+ from typing import Union
8
+
9
+ import numpy as np
10
+ from scipy import sparse
11
+
12
+ from sknetwork.classification.propagation import Propagation
13
+ from sknetwork.clustering.base import BaseClustering
14
+ from sknetwork.utils.format import check_format, get_adjacency
15
+
16
+
17
+ class PropagationClustering(BaseClustering, Propagation):
18
+ """Clustering by label propagation.
19
+
20
+ Parameters
21
+ ----------
22
+ n_iter : int
23
+ Maximum number of iterations (-1 for infinity).
24
+ node_order : str
25
+ * `'random'`: node labels are updated in random order.
26
+ * `'increasing'`: node labels are updated by increasing order of weight.
27
+ * `'decreasing'`: node labels are updated by decreasing order of weight.
28
+ * Otherwise, node labels are updated by index order.
29
+ weighted : bool
30
+ If ``True``, the vote of each neighbor is proportional to the edge weight.
31
+ Otherwise, all votes have weight 1.
32
+ sort_clusters :
33
+ If ``True``, sort labels in decreasing order of cluster size.
34
+ return_membership :
35
+ If ``True``, return the membership matrix of nodes to each cluster (soft clustering).
36
+ return_aggregate :
37
+ If ``True``, return the aggregate adjacency matrix or biadjacency matrix between clusters.
38
+
39
+ Attributes
40
+ ----------
41
+ labels_ : np.ndarray
42
+ Labels of the nodes.
43
+ labels_row_ : np.ndarray
44
+ Labels of the rows (for bipartite graphs).
45
+ labels_col_ : np.ndarray
46
+ Labels of the columns (for bipartite graphs).
47
+ membership_ : sparse.csr_matrix
48
+ Membership matrix of the nodes, shape (n_nodes, n_clusters).
49
+ membership_row_ : sparse.csr_matrix
50
+ Membership matrix of the rows (for bipartite graphs).
51
+ membership_col_ : sparse.csr_matrix
52
+ Membership matrix of the columns (for bipartite graphs).
53
+ aggregate_ : sparse.csr_matrix
54
+ Aggregate adjacency matrix or biadjacency matrix between clusters.
55
+
56
+ Example
57
+ -------
58
+ >>> from sknetwork.clustering import PropagationClustering
59
+ >>> from sknetwork.data import karate_club
60
+ >>> propagation = PropagationClustering()
61
+ >>> graph = karate_club(metadata=True)
62
+ >>> adjacency = graph.adjacency
63
+ >>> labels = propagation.fit_transform(adjacency)
64
+ >>> len(set(labels))
65
+ 2
66
+
67
+ References
68
+ ----------
69
+ Raghavan, U. N., Albert, R., & Kumara, S. (2007).
70
+ `Near linear time algorithm to detect community structures in large-scale networks.
71
+ <https://arxiv.org/pdf/0709.2938.pdf>`_
72
+ Physical review E, 76(3), 036106.
73
+ """
74
+ def __init__(self, n_iter: int = 5, node_order: str = 'decreasing', weighted: bool = True,
75
+ sort_clusters: bool = True, return_membership: bool = True, return_aggregate: bool = True):
76
+ Propagation.__init__(self, n_iter, node_order, weighted)
77
+ BaseClustering.__init__(self, sort_clusters, return_membership, return_aggregate)
78
+ self.bipartite = None
79
+
80
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> 'PropagationClustering':
81
+ """Clustering by label propagation.
82
+
83
+ Parameters
84
+ ----------
85
+ input_matrix :
86
+ Adjacency matrix or biadjacency matrix of the graph.
87
+
88
+ Returns
89
+ -------
90
+ self: :class:`PropagationClustering`
91
+ """
92
+ self._init_vars()
93
+
94
+ # input
95
+ input_matrix = check_format(input_matrix)
96
+ adjacency, bipartite = get_adjacency(input_matrix)
97
+
98
+ # propagation
99
+ Propagation.fit(self, adjacency)
100
+
101
+ # output
102
+ _, self.labels_ = np.unique(self.labels_, return_inverse=True)
103
+ if bipartite:
104
+ self._split_vars(input_matrix.shape)
105
+ self.bipartite = True
106
+ self._secondary_outputs(input_matrix)
107
+
108
+ return self
@@ -0,0 +1 @@
1
+ """tests for clustering"""
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for clustering API"""
4
+ import unittest
5
+
6
+ from sknetwork.clustering import *
7
+ from sknetwork.data import house
8
+ from sknetwork.data.test_graphs import *
9
+ from sknetwork.embedding.svd import GSVD
10
+
11
+
12
+ class TestClusteringAPI(unittest.TestCase):
13
+
14
+ def test_regular(self):
15
+ for algo in [Louvain(return_aggregate=True), KMeans(embedding_method=GSVD(3), return_aggregate=True),
16
+ PropagationClustering(return_aggregate=True)]:
17
+ for adjacency in [test_graph(), test_digraph(), test_graph_disconnect()]:
18
+ n = adjacency.shape[0]
19
+ labels = algo.fit_transform(adjacency)
20
+ n_labels = len(set(labels))
21
+ self.assertEqual(labels.shape, (n,))
22
+ self.assertEqual(algo.aggregate_.shape, (n_labels, n_labels))
23
+ adjacency_bool = adjacency.astype(bool)
24
+ labels = algo.fit_transform(adjacency_bool)
25
+ n_labels = len(set(labels))
26
+ self.assertEqual(labels.shape, (n,))
27
+ self.assertEqual(algo.aggregate_.shape, (n_labels, n_labels))
28
+
29
+ def test_bipartite(self):
30
+ biadjacency = test_bigraph()
31
+ n_row, n_col = biadjacency.shape
32
+ for algo in [Louvain(return_aggregate=True),
33
+ KMeans(embedding_method=GSVD(3), co_cluster=True, return_aggregate=True),
34
+ PropagationClustering(return_aggregate=True)]:
35
+ algo.fit_transform(biadjacency)
36
+ self.assertEqual(algo.labels_row_.shape, (n_row,))
37
+ self.assertEqual(algo.labels_col_.shape, (n_col,))
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in October 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+
8
+ import unittest
9
+
10
+ from sknetwork.clustering import KMeans
11
+ from sknetwork.data.test_graphs import *
12
+ from sknetwork.embedding import GSVD, Spectral
13
+
14
+
15
+ class TestKMeans(unittest.TestCase):
16
+
17
+ def test_undirected(self):
18
+ n_clusters = 3
19
+ algo = KMeans(n_clusters, GSVD(2))
20
+ algo_options = KMeans(n_clusters, Spectral(3), co_cluster=True, sort_clusters=False)
21
+ for adjacency in [test_graph(), test_graph_disconnect(), test_digraph()]:
22
+ n = adjacency.shape[0]
23
+ labels = algo.fit_transform(adjacency)
24
+ self.assertEqual(len(set(labels)), n_clusters)
25
+ self.assertEqual(algo.membership_.shape, (n, n_clusters))
26
+ self.assertEqual(algo.aggregate_.shape, (n_clusters, n_clusters))
27
+ labels = algo_options.fit_transform(adjacency)
28
+ self.assertEqual(len(set(labels)), n_clusters)
29
+
30
+ def test_bipartite(self):
31
+ algo = KMeans(3, GSVD(2))
32
+ algo_options = KMeans(4, Spectral(3), co_cluster=True, sort_clusters=False)
33
+ for biadjacency in [test_bigraph(), test_bigraph_disconnect()]:
34
+ n_row, n_col = biadjacency.shape
35
+ algo.fit(biadjacency)
36
+ self.assertEqual(len(algo.labels_), n_row)
37
+ self.assertEqual(algo.membership_.shape, (n_row, 3))
38
+ self.assertEqual(algo.membership_row_.shape, (n_row, 3))
39
+ self.assertEqual(algo.membership_col_.shape, (n_col, 3))
40
+ self.assertEqual(algo.aggregate_.shape, (3, 3))
41
+ algo_options.fit(biadjacency)
42
+ labels = np.hstack((algo_options.labels_row_, algo_options.labels_col_))
43
+ self.assertEqual(len(set(labels)), 4)
44
+ self.assertEqual(algo_options.membership_.shape, (n_row, 4))
45
+ self.assertEqual(algo_options.membership_row_.shape, (n_row, 4))
46
+ self.assertEqual(algo_options.membership_col_.shape, (n_col, 4))
47
+ self.assertEqual(algo_options.aggregate_.shape, (4, 4))
@@ -0,0 +1,104 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for Louvain"""
4
+ import unittest
5
+
6
+ from sknetwork.clustering import Louvain
7
+ from sknetwork.data import karate_club, star_wars
8
+ from sknetwork.data.test_graphs import *
9
+ from sknetwork.utils import bipartite2undirected
10
+
11
+
12
+ class TestLouvainClustering(unittest.TestCase):
13
+
14
+ def test_disconnected(self):
15
+ adjacency = test_graph_disconnect()
16
+ n = adjacency.shape[0]
17
+ labels = Louvain().fit_transform(adjacency)
18
+ self.assertEqual(len(labels), n)
19
+
20
+ def test_modularity(self):
21
+ adjacency = karate_club()
22
+ louvain_d = Louvain(modularity='dugue')
23
+ louvain_n = Louvain(modularity='newman')
24
+ labels_d = louvain_d.fit_transform(adjacency)
25
+ labels_n = louvain_n.fit_transform(adjacency)
26
+ self.assertTrue((labels_d == labels_n).all())
27
+
28
+ louvain_p = Louvain(modularity='potts')
29
+ louvain_p.fit_transform(adjacency)
30
+
31
+ def test_bilouvain(self):
32
+ biadjacency = star_wars()
33
+ adjacency = bipartite2undirected(biadjacency)
34
+ louvain = Louvain(modularity='newman')
35
+ labels1 = louvain.fit_transform(adjacency)
36
+ louvain.fit(biadjacency)
37
+ labels2 = np.concatenate((louvain.labels_row_, louvain.labels_col_))
38
+ self.assertTrue((labels1 == labels2).all())
39
+
40
+ def test_options(self):
41
+ adjacency = karate_club()
42
+
43
+ # resolution
44
+ louvain = Louvain(resolution=2)
45
+ labels = louvain.fit_transform(adjacency)
46
+ self.assertEqual(len(set(labels)), 7)
47
+
48
+ # tolerance
49
+ louvain = Louvain(resolution=2, tol_aggregation=0.1)
50
+ labels = louvain.fit_transform(adjacency)
51
+ self.assertEqual(len(set(labels)), 12)
52
+
53
+ # shuffling
54
+ louvain = Louvain(resolution=2, shuffle_nodes=True, random_state=42)
55
+ labels = louvain.fit_transform(adjacency)
56
+ self.assertEqual(len(set(labels)), 7)
57
+
58
+ # aggregate graph
59
+ louvain = Louvain(return_aggregate=True)
60
+ labels = louvain.fit_transform(adjacency)
61
+ n_labels = len(set(labels))
62
+ self.assertEqual(louvain.aggregate_.shape, (n_labels, n_labels))
63
+
64
+ # aggregate graph
65
+ Louvain(n_aggregations=1, sort_clusters=False).fit(adjacency)
66
+
67
+ def test_options_with_64_bit(self):
68
+ adjacency = karate_club()
69
+ # force 64-bit index
70
+ adjacency.indices = adjacency.indices.astype(np.int64)
71
+ adjacency.indptr = adjacency.indptr.astype(np.int64)
72
+
73
+ # resolution
74
+ louvain = Louvain(resolution=2)
75
+ labels = louvain.fit_transform(adjacency)
76
+ self.assertEqual(len(set(labels)), 7)
77
+
78
+ # tolerance
79
+ louvain = Louvain(resolution=2, tol_aggregation=0.1)
80
+ labels = louvain.fit_transform(adjacency)
81
+ self.assertEqual(len(set(labels)), 12)
82
+
83
+ # shuffling
84
+ louvain = Louvain(resolution=2, shuffle_nodes=True, random_state=42)
85
+ labels = louvain.fit_transform(adjacency)
86
+ self.assertEqual(len(set(labels)), 7)
87
+
88
+ # aggregate graph
89
+ louvain = Louvain(return_aggregate=True)
90
+ labels = louvain.fit_transform(adjacency)
91
+ n_labels = len(set(labels))
92
+ self.assertEqual(louvain.aggregate_.shape, (n_labels, n_labels))
93
+
94
+ # aggregate graph
95
+ Louvain(n_aggregations=1, sort_clusters=False).fit(adjacency)
96
+
97
+ # check if labels are 64-bit
98
+ self.assertEqual(labels.dtype, np.int64)
99
+
100
+ def test_invalid(self):
101
+ adjacency = karate_club()
102
+ louvain = Louvain(modularity='toto')
103
+ with self.assertRaises(ValueError):
104
+ louvain.fit(adjacency)
@@ -0,0 +1,50 @@
1
+ # -*- coding: utf-8 -*-
2
+ # tests for metrics.py
3
+ """"tests for clustering metrics"""
4
+ import unittest
5
+
6
+ import numpy as np
7
+
8
+ from sknetwork.clustering import get_modularity, Louvain
9
+ from sknetwork.data import star_wars, karate_club
10
+ from sknetwork.data.test_graphs import test_graph
11
+
12
+
13
+ class TestClusteringMetrics(unittest.TestCase):
14
+
15
+ def setUp(self):
16
+ """Basic graph for tests"""
17
+ self.adjacency = test_graph()
18
+ n = self.adjacency.shape[0]
19
+ labels = np.zeros(n)
20
+ labels[0] = 1
21
+ self.labels = labels.astype(int)
22
+ self.unique_cluster = np.zeros(n, dtype=int)
23
+
24
+ def test_api(self):
25
+ for metric in [get_modularity]:
26
+ _, fit, div = metric(self.adjacency, self.labels, return_all=True)
27
+ mod = metric(self.adjacency, self.labels, return_all=False)
28
+ self.assertAlmostEqual(fit - div, mod)
29
+ self.assertAlmostEqual(metric(self.adjacency, self.unique_cluster), 0.)
30
+
31
+ with self.assertRaises(ValueError):
32
+ metric(self.adjacency, self.labels[:3])
33
+
34
+ def test_modularity(self):
35
+ adjacency = karate_club()
36
+ labels = Louvain().fit_transform(adjacency)
37
+ self.assertAlmostEqual(get_modularity(adjacency, labels), 0.42, 2)
38
+
39
+ def test_bimodularity(self):
40
+ biadjacency = star_wars()
41
+ labels_row = np.array([0, 0, 1, 1])
42
+ labels_col = np.array([0, 1, 0])
43
+ self.assertAlmostEqual(get_modularity(biadjacency, labels_row, labels_col), 0.12, 2)
44
+
45
+ with self.assertRaises(ValueError):
46
+ get_modularity(biadjacency, labels_row)
47
+ with self.assertRaises(ValueError):
48
+ get_modularity(biadjacency, labels_row[:2], labels_col)
49
+ with self.assertRaises(ValueError):
50
+ get_modularity(biadjacency, labels_row, labels_col[:2])
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for clustering post-processing"""
4
+
5
+ import unittest
6
+
7
+ import numpy as np
8
+
9
+ from sknetwork.clustering import reindex_labels
10
+
11
+
12
+ class TestClusteringPostProcessing(unittest.TestCase):
13
+
14
+ def test_reindex_clusters(self):
15
+ truth = np.array([1, 1, 2, 0, 0, 0])
16
+
17
+ labels = np.array([0, 0, 1, 2, 2, 2])
18
+ output = reindex_labels(labels)
19
+ self.assertTrue(np.array_equal(truth, output))
20
+
21
+ labels = np.array([0, 0, 5, 2, 2, 2])
22
+ output = reindex_labels(labels, consecutive=False)
23
+ self.assertTrue(np.array_equal(truth, output))