scikit-network 0.33.3__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (228) hide show
  1. scikit_network-0.33.3.dist-info/METADATA +122 -0
  2. scikit_network-0.33.3.dist-info/RECORD +228 -0
  3. scikit_network-0.33.3.dist-info/WHEEL +5 -0
  4. scikit_network-0.33.3.dist-info/licenses/AUTHORS.rst +43 -0
  5. scikit_network-0.33.3.dist-info/licenses/LICENSE +34 -0
  6. scikit_network-0.33.3.dist-info/top_level.txt +1 -0
  7. sknetwork/__init__.py +21 -0
  8. sknetwork/base.py +67 -0
  9. sknetwork/classification/__init__.py +8 -0
  10. sknetwork/classification/base.py +142 -0
  11. sknetwork/classification/base_rank.py +133 -0
  12. sknetwork/classification/diffusion.py +134 -0
  13. sknetwork/classification/knn.py +139 -0
  14. sknetwork/classification/metrics.py +205 -0
  15. sknetwork/classification/pagerank.py +66 -0
  16. sknetwork/classification/propagation.py +152 -0
  17. sknetwork/classification/tests/__init__.py +1 -0
  18. sknetwork/classification/tests/test_API.py +30 -0
  19. sknetwork/classification/tests/test_diffusion.py +77 -0
  20. sknetwork/classification/tests/test_knn.py +23 -0
  21. sknetwork/classification/tests/test_metrics.py +53 -0
  22. sknetwork/classification/tests/test_pagerank.py +20 -0
  23. sknetwork/classification/tests/test_propagation.py +24 -0
  24. sknetwork/classification/vote.cp313-win_amd64.pyd +0 -0
  25. sknetwork/classification/vote.cpp +27584 -0
  26. sknetwork/classification/vote.pyx +56 -0
  27. sknetwork/clustering/__init__.py +8 -0
  28. sknetwork/clustering/base.py +172 -0
  29. sknetwork/clustering/kcenters.py +253 -0
  30. sknetwork/clustering/leiden.py +242 -0
  31. sknetwork/clustering/leiden_core.cp313-win_amd64.pyd +0 -0
  32. sknetwork/clustering/leiden_core.cpp +31575 -0
  33. sknetwork/clustering/leiden_core.pyx +124 -0
  34. sknetwork/clustering/louvain.py +286 -0
  35. sknetwork/clustering/louvain_core.cp313-win_amd64.pyd +0 -0
  36. sknetwork/clustering/louvain_core.cpp +31220 -0
  37. sknetwork/clustering/louvain_core.pyx +124 -0
  38. sknetwork/clustering/metrics.py +91 -0
  39. sknetwork/clustering/postprocess.py +66 -0
  40. sknetwork/clustering/propagation_clustering.py +104 -0
  41. sknetwork/clustering/tests/__init__.py +1 -0
  42. sknetwork/clustering/tests/test_API.py +38 -0
  43. sknetwork/clustering/tests/test_kcenters.py +60 -0
  44. sknetwork/clustering/tests/test_leiden.py +34 -0
  45. sknetwork/clustering/tests/test_louvain.py +135 -0
  46. sknetwork/clustering/tests/test_metrics.py +50 -0
  47. sknetwork/clustering/tests/test_postprocess.py +39 -0
  48. sknetwork/data/__init__.py +6 -0
  49. sknetwork/data/base.py +33 -0
  50. sknetwork/data/load.py +406 -0
  51. sknetwork/data/models.py +459 -0
  52. sknetwork/data/parse.py +644 -0
  53. sknetwork/data/test_graphs.py +84 -0
  54. sknetwork/data/tests/__init__.py +1 -0
  55. sknetwork/data/tests/test_API.py +30 -0
  56. sknetwork/data/tests/test_base.py +14 -0
  57. sknetwork/data/tests/test_load.py +95 -0
  58. sknetwork/data/tests/test_models.py +52 -0
  59. sknetwork/data/tests/test_parse.py +250 -0
  60. sknetwork/data/tests/test_test_graphs.py +29 -0
  61. sknetwork/data/tests/test_toy_graphs.py +68 -0
  62. sknetwork/data/timeout.py +38 -0
  63. sknetwork/data/toy_graphs.py +611 -0
  64. sknetwork/embedding/__init__.py +8 -0
  65. sknetwork/embedding/base.py +94 -0
  66. sknetwork/embedding/force_atlas.py +198 -0
  67. sknetwork/embedding/louvain_embedding.py +148 -0
  68. sknetwork/embedding/random_projection.py +135 -0
  69. sknetwork/embedding/spectral.py +141 -0
  70. sknetwork/embedding/spring.py +198 -0
  71. sknetwork/embedding/svd.py +359 -0
  72. sknetwork/embedding/tests/__init__.py +1 -0
  73. sknetwork/embedding/tests/test_API.py +49 -0
  74. sknetwork/embedding/tests/test_force_atlas.py +35 -0
  75. sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
  76. sknetwork/embedding/tests/test_random_projection.py +28 -0
  77. sknetwork/embedding/tests/test_spectral.py +81 -0
  78. sknetwork/embedding/tests/test_spring.py +50 -0
  79. sknetwork/embedding/tests/test_svd.py +43 -0
  80. sknetwork/gnn/__init__.py +10 -0
  81. sknetwork/gnn/activation.py +117 -0
  82. sknetwork/gnn/base.py +181 -0
  83. sknetwork/gnn/base_activation.py +90 -0
  84. sknetwork/gnn/base_layer.py +109 -0
  85. sknetwork/gnn/gnn_classifier.py +305 -0
  86. sknetwork/gnn/layer.py +153 -0
  87. sknetwork/gnn/loss.py +180 -0
  88. sknetwork/gnn/neighbor_sampler.py +65 -0
  89. sknetwork/gnn/optimizer.py +164 -0
  90. sknetwork/gnn/tests/__init__.py +1 -0
  91. sknetwork/gnn/tests/test_activation.py +56 -0
  92. sknetwork/gnn/tests/test_base.py +75 -0
  93. sknetwork/gnn/tests/test_base_layer.py +37 -0
  94. sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
  95. sknetwork/gnn/tests/test_layers.py +80 -0
  96. sknetwork/gnn/tests/test_loss.py +33 -0
  97. sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
  98. sknetwork/gnn/tests/test_optimizer.py +43 -0
  99. sknetwork/gnn/tests/test_utils.py +41 -0
  100. sknetwork/gnn/utils.py +127 -0
  101. sknetwork/hierarchy/__init__.py +6 -0
  102. sknetwork/hierarchy/base.py +96 -0
  103. sknetwork/hierarchy/louvain_hierarchy.py +272 -0
  104. sknetwork/hierarchy/metrics.py +234 -0
  105. sknetwork/hierarchy/paris.cp313-win_amd64.pyd +0 -0
  106. sknetwork/hierarchy/paris.cpp +37868 -0
  107. sknetwork/hierarchy/paris.pyx +316 -0
  108. sknetwork/hierarchy/postprocess.py +350 -0
  109. sknetwork/hierarchy/tests/__init__.py +1 -0
  110. sknetwork/hierarchy/tests/test_API.py +24 -0
  111. sknetwork/hierarchy/tests/test_algos.py +34 -0
  112. sknetwork/hierarchy/tests/test_metrics.py +62 -0
  113. sknetwork/hierarchy/tests/test_postprocess.py +57 -0
  114. sknetwork/linalg/__init__.py +9 -0
  115. sknetwork/linalg/basics.py +37 -0
  116. sknetwork/linalg/diteration.cp313-win_amd64.pyd +0 -0
  117. sknetwork/linalg/diteration.cpp +27400 -0
  118. sknetwork/linalg/diteration.pyx +47 -0
  119. sknetwork/linalg/eig_solver.py +93 -0
  120. sknetwork/linalg/laplacian.py +15 -0
  121. sknetwork/linalg/normalizer.py +86 -0
  122. sknetwork/linalg/operators.py +225 -0
  123. sknetwork/linalg/polynome.py +76 -0
  124. sknetwork/linalg/ppr_solver.py +170 -0
  125. sknetwork/linalg/push.cp313-win_amd64.pyd +0 -0
  126. sknetwork/linalg/push.cpp +31072 -0
  127. sknetwork/linalg/push.pyx +71 -0
  128. sknetwork/linalg/sparse_lowrank.py +142 -0
  129. sknetwork/linalg/svd_solver.py +91 -0
  130. sknetwork/linalg/tests/__init__.py +1 -0
  131. sknetwork/linalg/tests/test_eig.py +44 -0
  132. sknetwork/linalg/tests/test_laplacian.py +18 -0
  133. sknetwork/linalg/tests/test_normalization.py +34 -0
  134. sknetwork/linalg/tests/test_operators.py +66 -0
  135. sknetwork/linalg/tests/test_polynome.py +38 -0
  136. sknetwork/linalg/tests/test_ppr.py +50 -0
  137. sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
  138. sknetwork/linalg/tests/test_svd.py +38 -0
  139. sknetwork/linkpred/__init__.py +2 -0
  140. sknetwork/linkpred/base.py +46 -0
  141. sknetwork/linkpred/nn.py +126 -0
  142. sknetwork/linkpred/tests/__init__.py +1 -0
  143. sknetwork/linkpred/tests/test_nn.py +27 -0
  144. sknetwork/log.py +19 -0
  145. sknetwork/path/__init__.py +5 -0
  146. sknetwork/path/dag.py +54 -0
  147. sknetwork/path/distances.py +98 -0
  148. sknetwork/path/search.py +31 -0
  149. sknetwork/path/shortest_path.py +61 -0
  150. sknetwork/path/tests/__init__.py +1 -0
  151. sknetwork/path/tests/test_dag.py +37 -0
  152. sknetwork/path/tests/test_distances.py +62 -0
  153. sknetwork/path/tests/test_search.py +40 -0
  154. sknetwork/path/tests/test_shortest_path.py +40 -0
  155. sknetwork/ranking/__init__.py +8 -0
  156. sknetwork/ranking/base.py +61 -0
  157. sknetwork/ranking/betweenness.cp313-win_amd64.pyd +0 -0
  158. sknetwork/ranking/betweenness.cpp +9707 -0
  159. sknetwork/ranking/betweenness.pyx +97 -0
  160. sknetwork/ranking/closeness.py +92 -0
  161. sknetwork/ranking/hits.py +94 -0
  162. sknetwork/ranking/katz.py +83 -0
  163. sknetwork/ranking/pagerank.py +110 -0
  164. sknetwork/ranking/postprocess.py +37 -0
  165. sknetwork/ranking/tests/__init__.py +1 -0
  166. sknetwork/ranking/tests/test_API.py +32 -0
  167. sknetwork/ranking/tests/test_betweenness.py +38 -0
  168. sknetwork/ranking/tests/test_closeness.py +30 -0
  169. sknetwork/ranking/tests/test_hits.py +20 -0
  170. sknetwork/ranking/tests/test_pagerank.py +62 -0
  171. sknetwork/ranking/tests/test_postprocess.py +26 -0
  172. sknetwork/regression/__init__.py +4 -0
  173. sknetwork/regression/base.py +61 -0
  174. sknetwork/regression/diffusion.py +210 -0
  175. sknetwork/regression/tests/__init__.py +1 -0
  176. sknetwork/regression/tests/test_API.py +32 -0
  177. sknetwork/regression/tests/test_diffusion.py +56 -0
  178. sknetwork/sknetwork.py +3 -0
  179. sknetwork/test_base.py +35 -0
  180. sknetwork/test_log.py +15 -0
  181. sknetwork/topology/__init__.py +8 -0
  182. sknetwork/topology/cliques.cp313-win_amd64.pyd +0 -0
  183. sknetwork/topology/cliques.cpp +32565 -0
  184. sknetwork/topology/cliques.pyx +149 -0
  185. sknetwork/topology/core.cp313-win_amd64.pyd +0 -0
  186. sknetwork/topology/core.cpp +30651 -0
  187. sknetwork/topology/core.pyx +90 -0
  188. sknetwork/topology/cycles.py +243 -0
  189. sknetwork/topology/minheap.cp313-win_amd64.pyd +0 -0
  190. sknetwork/topology/minheap.cpp +27332 -0
  191. sknetwork/topology/minheap.pxd +20 -0
  192. sknetwork/topology/minheap.pyx +109 -0
  193. sknetwork/topology/structure.py +194 -0
  194. sknetwork/topology/tests/__init__.py +1 -0
  195. sknetwork/topology/tests/test_cliques.py +28 -0
  196. sknetwork/topology/tests/test_core.py +19 -0
  197. sknetwork/topology/tests/test_cycles.py +65 -0
  198. sknetwork/topology/tests/test_structure.py +85 -0
  199. sknetwork/topology/tests/test_triangles.py +38 -0
  200. sknetwork/topology/tests/test_wl.py +72 -0
  201. sknetwork/topology/triangles.cp313-win_amd64.pyd +0 -0
  202. sknetwork/topology/triangles.cpp +8894 -0
  203. sknetwork/topology/triangles.pyx +151 -0
  204. sknetwork/topology/weisfeiler_lehman.py +133 -0
  205. sknetwork/topology/weisfeiler_lehman_core.cp313-win_amd64.pyd +0 -0
  206. sknetwork/topology/weisfeiler_lehman_core.cpp +27635 -0
  207. sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
  208. sknetwork/utils/__init__.py +7 -0
  209. sknetwork/utils/check.py +355 -0
  210. sknetwork/utils/format.py +221 -0
  211. sknetwork/utils/membership.py +82 -0
  212. sknetwork/utils/neighbors.py +115 -0
  213. sknetwork/utils/tests/__init__.py +1 -0
  214. sknetwork/utils/tests/test_check.py +190 -0
  215. sknetwork/utils/tests/test_format.py +63 -0
  216. sknetwork/utils/tests/test_membership.py +24 -0
  217. sknetwork/utils/tests/test_neighbors.py +41 -0
  218. sknetwork/utils/tests/test_tfidf.py +18 -0
  219. sknetwork/utils/tests/test_values.py +66 -0
  220. sknetwork/utils/tfidf.py +37 -0
  221. sknetwork/utils/values.py +76 -0
  222. sknetwork/visualization/__init__.py +4 -0
  223. sknetwork/visualization/colors.py +34 -0
  224. sknetwork/visualization/dendrograms.py +277 -0
  225. sknetwork/visualization/graphs.py +1039 -0
  226. sknetwork/visualization/tests/__init__.py +1 -0
  227. sknetwork/visualization/tests/test_dendrograms.py +53 -0
  228. sknetwork/visualization/tests/test_graphs.py +176 -0
@@ -0,0 +1,124 @@
1
+ # distutils: language=c++
2
+ # cython: language_level=3
3
+ from libcpp.set cimport set
4
+ cimport cython
5
+
6
+ ctypedef fused int_or_long:
7
+ int
8
+ long
9
+
10
+ @cython.boundscheck(False)
11
+ @cython.wraparound(False)
12
+ def optimize_core(int_or_long[:] labels, int_or_long[:] indices, int_or_long[:] indptr, float[:] data,
13
+ float[:] out_weights, float[:] in_weights, float[:] out_cluster_weights, float[:] in_cluster_weights,
14
+ float[:] cluster_weights, float[:] self_loops, float resolution, float tol_optimization): # pragma: no cover
15
+ """Find clusters maximizing modularity.
16
+
17
+ Parameters
18
+ ----------
19
+ labels :
20
+ Initial labels.
21
+ indices :
22
+ CSR format index array of the normalized adjacency matrix.
23
+ indptr :
24
+ CSR format index pointer array of the normalized adjacency matrix.
25
+ data :
26
+ CSR format data array of the normalized adjacency matrix.
27
+ out_weights :
28
+ Out-weights of nodes (sum to 1).
29
+ in_weights :
30
+ In-weights of nodes (sum to 1).
31
+ out_cluster_weights :
32
+ Out-weights of clusters (sum to 1).
33
+ in_cluster_weights :
34
+ In-weights of clusters (sum to 1).
35
+ cluster_weights :
36
+ Weights of clusters (initialized to 0).
37
+ self_loops :
38
+ Weights of self loops.
39
+ resolution :
40
+ Resolution parameter (positive).
41
+ tol_optimization :
42
+ Minimum increase in modularity to enter a new optimization pass.
43
+
44
+ Returns
45
+ -------
46
+ labels :
47
+ Labels of nodes.
48
+ increase :
49
+ Increase in modularity.
50
+ """
51
+ cdef int_or_long n
52
+ cdef int_or_long stop = 0
53
+ cdef int_or_long label
54
+ cdef int_or_long label_target
55
+ cdef int_or_long label_best
56
+ cdef int_or_long i
57
+ cdef int_or_long j
58
+ cdef int_or_long start
59
+ cdef int_or_long end
60
+
61
+ cdef float increase = 0
62
+ cdef float increase_pass
63
+ cdef float delta
64
+ cdef float delta_local
65
+ cdef float delta_best
66
+ cdef float in_weight
67
+ cdef float out_weight
68
+
69
+ cdef set[int_or_long] label_set = ()
70
+
71
+ n = labels.shape[0]
72
+ while not stop:
73
+ increase_pass = 0
74
+
75
+ for i in range(n):
76
+ label_set.clear()
77
+ label = labels[i]
78
+ start = indptr[i]
79
+ end = indptr[i+1]
80
+
81
+ # neighboring clusters
82
+ for j in range(start, end):
83
+ label_target = labels[indices[j]]
84
+ label_set.insert(label_target)
85
+ cluster_weights[label_target] += data[j]
86
+ label_set.erase(label)
87
+
88
+ if not label_set.empty():
89
+ out_weight = out_weights[i]
90
+ in_weight = in_weights[i]
91
+
92
+ # node leaving the current cluster
93
+ delta = 2 * (cluster_weights[label] - self_loops[i])
94
+ delta -= resolution * out_weight * (in_cluster_weights[label] - in_weight)
95
+ delta -= resolution * in_weight * (out_cluster_weights[label] - out_weight)
96
+
97
+ delta_best = 0
98
+ label_best = label
99
+
100
+ for label_target in label_set:
101
+ delta_local = 2 * cluster_weights[label_target]
102
+ delta_local -= resolution * out_weight * in_cluster_weights[label_target]
103
+ delta_local -= resolution * in_weight * out_cluster_weights[label_target]
104
+ delta_local -= delta
105
+ if delta_local > delta_best:
106
+ delta_best = delta_local
107
+ label_best = label_target
108
+ cluster_weights[label_target] = 0
109
+
110
+ if label_best != label:
111
+ increase_pass += delta_best
112
+ labels[i] = label_best
113
+ # update weights
114
+ out_cluster_weights[label] -= out_weight
115
+ in_cluster_weights[label] -= in_weight
116
+ out_cluster_weights[label_best] += out_weight
117
+ in_cluster_weights[label_best] += in_weight
118
+
119
+ cluster_weights[label] = 0
120
+
121
+ increase += increase_pass
122
+ stop = increase_pass <= tol_optimization
123
+
124
+ return labels, increase
@@ -0,0 +1,91 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in July 2018
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ @author: Thomas Bonald <bonald@enst.fr>
7
+ """
8
+ from typing import Optional, Union, Tuple
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.utils.check import get_probs
14
+ from sknetwork.utils.format import get_adjacency
15
+ from sknetwork.utils.membership import get_membership
16
+
17
+
18
+ def get_modularity(input_matrix: Union[sparse.csr_matrix, np.ndarray], labels: np.ndarray,
19
+ labels_col: Optional[np.ndarray] = None, weights: str = 'degree',
20
+ resolution: float = 1, return_all: bool = False) -> Union[float, Tuple[float, float, float]]:
21
+ """Modularity of a clustering.
22
+
23
+ The modularity of a clustering is
24
+
25
+ :math:`Q = \\dfrac{1}{w} \\sum_{i,j}\\left(A_{ij} - \\gamma \\dfrac{w_iw_j}{w}\\right)\\delta_{c_i,c_j}`
26
+ for graphs,
27
+
28
+ :math:`Q = \\dfrac{1}{w} \\sum_{i,j}\\left(A_{ij} - \\gamma \\dfrac{d^+_id^-_j}{w}\\right)\\delta_{c_i,c_j}`
29
+ for directed graphs,
30
+
31
+ where
32
+
33
+ * :math:`c_i` is the cluster of node :math:`i`,\n
34
+ * :math:`w_i` is the weight of node :math:`i`,\n
35
+ * :math:`w^+_i, w^-_i` are the out-weight, in-weight of node :math:`i` (for directed graphs),\n
36
+ * :math:`w = 1^TA1` is the total weight,\n
37
+ * :math:`\\delta` is the Kronecker symbol,\n
38
+ * :math:`\\gamma \\ge 0` is the resolution parameter.
39
+
40
+ Parameters
41
+ ----------
42
+ input_matrix :
43
+ Adjacency matrix or biadjacency matrix of the graph.
44
+ labels :
45
+ Labels of nodes.
46
+ labels_col :
47
+ Labels of column nodes (for bipartite graphs).
48
+ weights :
49
+ Weighting of nodes (``'degree'`` (default) or ``'uniform'``).
50
+ resolution:
51
+ Resolution parameter (default = 1).
52
+ return_all:
53
+ If ``True``, return modularity, fit, diversity.
54
+
55
+ Returns
56
+ -------
57
+ modularity : float
58
+ fit: float, optional
59
+ diversity: float, optional
60
+
61
+ Example
62
+ -------
63
+ >>> from sknetwork.clustering import get_modularity
64
+ >>> from sknetwork.data import house
65
+ >>> adjacency = house()
66
+ >>> labels = np.array([0, 0, 1, 1, 0])
67
+ >>> float(np.round(get_modularity(adjacency, labels), 2))
68
+ 0.11
69
+ """
70
+ adjacency, bipartite = get_adjacency(input_matrix.astype(float))
71
+
72
+ if bipartite:
73
+ if labels_col is None:
74
+ raise ValueError('For bipartite graphs, you must specify the labels of both rows and columns.')
75
+ else:
76
+ labels = np.hstack((labels, labels_col))
77
+
78
+ if len(labels) != adjacency.shape[0]:
79
+ raise ValueError('Dimension mismatch between labels and input matrix.')
80
+
81
+ probs_row = get_probs(weights, adjacency)
82
+ probs_col = get_probs(weights, adjacency.T)
83
+ membership = get_membership(labels).astype(float)
84
+
85
+ fit = membership.T.dot(adjacency.dot(membership)).diagonal().sum() / adjacency.data.sum()
86
+ div = membership.T.dot(probs_col).dot(membership.T.dot(probs_row))
87
+ mod = fit - resolution * div
88
+ if return_all:
89
+ return mod, fit, div
90
+ else:
91
+ return mod
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on July 10, 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ @author: Thomas Bonald <bonald@enst.fr>
7
+ """
8
+ from typing import Optional
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.utils.membership import get_membership
14
+
15
+
16
+ def reindex_labels(labels: np.ndarray) -> np.ndarray:
17
+ """Reindex clusters in decreasing order of size.
18
+
19
+ Parameters
20
+ ----------
21
+ labels :
22
+ Label of each node.
23
+ Returns
24
+ -------
25
+ new_labels : np.ndarray
26
+ New label of each node.
27
+
28
+ Example
29
+ -------
30
+ >>> from sknetwork.clustering import reindex_labels
31
+ >>> labels = np.array([0, 1, 1])
32
+ >>> reindex_labels(labels)
33
+ array([1, 0, 0])
34
+ """
35
+ _, index, counts = np.unique(labels, return_inverse=True, return_counts=True)
36
+ _, new_index = np.unique(np.argsort(-counts), return_index=True)
37
+ return new_index[index]
38
+
39
+
40
+ def aggregate_graph(input_matrix: sparse.csr_matrix, labels: Optional[np.ndarray] = None,
41
+ labels_row: Optional[np.ndarray] = None, labels_col: Optional[np.ndarray] = None) \
42
+ -> sparse.csr_matrix:
43
+ """Aggregate graph per label. All nodes with the same label become a single node.
44
+ Negative labels are ignored (corresponding nodes are discarded).
45
+
46
+ Parameters
47
+ ----------
48
+ input_matrix: sparse matrix
49
+ Adjacency or biadjacency matrix of the graph.
50
+ labels: np.ndarray
51
+ Labels of nodes.
52
+ labels_row: np.ndarray
53
+ Labels of rows (for bipartite graphs). Alias for labels.
54
+ labels_col: np.ndarray
55
+ Labels of columns (for bipartite graphs).
56
+ """
57
+ if labels_row is not None:
58
+ membership_row = get_membership(labels_row)
59
+ else:
60
+ membership_row = get_membership(labels)
61
+ if labels_col is not None:
62
+ membership_col = get_membership(labels_col)
63
+ else:
64
+ membership_col = membership_row
65
+ aggregate_matrix = membership_row.T.dot(input_matrix).dot(membership_col)
66
+ return aggregate_matrix.tocsr()
@@ -0,0 +1,104 @@
1
+ #!/usr/bin/env python3
2
+ # coding: utf-8
3
+ """
4
+ Created on May, 2020
5
+ @author: Thomas Bonald <tbonald@enst.fr>
6
+ """
7
+ from typing import Union
8
+
9
+ import numpy as np
10
+ from scipy import sparse
11
+
12
+ from sknetwork.classification.propagation import Propagation
13
+ from sknetwork.clustering.base import BaseClustering
14
+ from sknetwork.utils.format import check_format, get_adjacency
15
+
16
+
17
+ class PropagationClustering(BaseClustering, Propagation):
18
+ """Clustering by label propagation.
19
+
20
+ Parameters
21
+ ----------
22
+ n_iter : int
23
+ Maximum number of iterations (-1 for infinity).
24
+ node_order : str
25
+ * `'random'`: node labels are updated in random order.
26
+ * `'increasing'`: node labels are updated by increasing order of weight.
27
+ * `'decreasing'`: node labels are updated by decreasing order of weight.
28
+ * Otherwise, node labels are updated by index order.
29
+ weighted : bool
30
+ If ``True``, the vote of each neighbor is proportional to the edge weight.
31
+ Otherwise, all votes have weight 1.
32
+ sort_clusters : bool
33
+ If ``True``, sort labels in decreasing order of cluster size.
34
+ return_probs : bool
35
+ If ``True``, return the probability distribution over clusters (soft clustering).
36
+ return_aggregate : bool
37
+ If ``True``, return the aggregate adjacency matrix or biadjacency matrix between clusters.
38
+
39
+ Attributes
40
+ ----------
41
+ labels_ : np.ndarray, shape (n_labels,)
42
+ Label of each node.
43
+ probs_ : sparse.csr_matrix, shape (n_row, n_labels)
44
+ Probability distribution over labels.
45
+ labels_row_, labels_col_ : np.ndarray
46
+ Labels of rows and columns, for bipartite graphs.
47
+ probs_row_, probs_col_ : sparse.csr_matrix, shape (n_row, n_labels)
48
+ Probability distributions over labels for rows and columns (for bipartite graphs).
49
+ aggregate_ : sparse.csr_matrix
50
+ Aggregate adjacency matrix or biadjacency matrix between clusters.
51
+
52
+ Example
53
+ -------
54
+ >>> from sknetwork.clustering import PropagationClustering
55
+ >>> from sknetwork.data import karate_club
56
+ >>> propagation = PropagationClustering()
57
+ >>> graph = karate_club(metadata=True)
58
+ >>> adjacency = graph.adjacency
59
+ >>> labels = propagation.fit_predict(adjacency)
60
+ >>> len(set(labels))
61
+ 2
62
+
63
+ References
64
+ ----------
65
+ Raghavan, U. N., Albert, R., & Kumara, S. (2007).
66
+ `Near linear time algorithm to detect community structures in large-scale networks.
67
+ <https://arxiv.org/pdf/0709.2938.pdf>`_
68
+ Physical review E, 76(3), 036106.
69
+ """
70
+ def __init__(self, n_iter: int = 5, node_order: str = 'decreasing', weighted: bool = True,
71
+ sort_clusters: bool = True, return_probs: bool = True, return_aggregate: bool = True):
72
+ Propagation.__init__(self, n_iter, node_order, weighted)
73
+ BaseClustering.__init__(self, sort_clusters, return_probs, return_aggregate)
74
+ self.bipartite = None
75
+
76
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> 'PropagationClustering':
77
+ """Clustering by label propagation.
78
+
79
+ Parameters
80
+ ----------
81
+ input_matrix : sparse.csr_matrix, np.ndarray
82
+ Adjacency matrix or biadjacency matrix of the graph.
83
+
84
+ Returns
85
+ -------
86
+ self: :class:`PropagationClustering`
87
+ """
88
+ self._init_vars()
89
+
90
+ # input
91
+ input_matrix = check_format(input_matrix)
92
+ adjacency, bipartite = get_adjacency(input_matrix)
93
+
94
+ # propagation
95
+ Propagation.fit(self, adjacency)
96
+
97
+ # output
98
+ _, self.labels_ = np.unique(self.labels_, return_inverse=True)
99
+ if bipartite:
100
+ self._split_vars(input_matrix.shape)
101
+ self.bipartite = True
102
+ self._secondary_outputs(input_matrix)
103
+
104
+ return self
@@ -0,0 +1 @@
1
+ """tests for clustering"""
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for clustering API"""
4
+ import unittest
5
+
6
+ from sknetwork.clustering import *
7
+ from sknetwork.data.test_graphs import *
8
+
9
+
10
+ class TestClusteringAPI(unittest.TestCase):
11
+
12
+ def setUp(self):
13
+ self.algos = [Louvain(return_aggregate=True), Leiden(return_aggregate=True),
14
+ PropagationClustering(return_aggregate=True)]
15
+
16
+ def test_regular(self):
17
+ for algo in self.algos:
18
+ for adjacency in [test_graph(), test_digraph(), test_disconnected_graph()]:
19
+ n = adjacency.shape[0]
20
+ labels = algo.fit_predict(adjacency)
21
+ n_labels = len(set(labels))
22
+ self.assertEqual(labels.shape, (n,))
23
+ self.assertEqual(algo.aggregate_.shape, (n_labels, n_labels))
24
+ adjacency_bool = adjacency.astype(bool)
25
+ labels = algo.fit_predict(adjacency_bool)
26
+ n_labels = len(set(labels))
27
+ self.assertEqual(labels.shape, (n,))
28
+ self.assertEqual(algo.aggregate_.shape, (n_labels, n_labels))
29
+ membership = algo.fit_transform(adjacency_bool)
30
+ self.assertEqual(membership.shape, (n, n_labels))
31
+
32
+ def test_bipartite(self):
33
+ biadjacency = test_bigraph()
34
+ n_row, n_col = biadjacency.shape
35
+ for algo in self.algos:
36
+ algo.fit(biadjacency)
37
+ self.assertEqual(algo.labels_row_.shape, (n_row,))
38
+ self.assertEqual(algo.labels_col_.shape, (n_col,))
@@ -0,0 +1,60 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for KCenters"""
4
+ import unittest
5
+
6
+ from sknetwork.clustering import KCenters
7
+ from sknetwork.data.test_graphs import *
8
+
9
+
10
+ class TestKCentersClustering(unittest.TestCase):
11
+
12
+ def test_kcenters(self):
13
+ # Test undirected graph
14
+ n_clusters = 2
15
+ adjacency = test_graph()
16
+ n_row = adjacency.shape[0]
17
+ kcenters = KCenters(n_clusters=n_clusters)
18
+ labels = kcenters.fit_predict(adjacency)
19
+ self.assertEqual(len(labels), n_row)
20
+ self.assertEqual(len(set(labels)), n_clusters)
21
+
22
+ # Test directed graph
23
+ n_clusters = 3
24
+ adjacency = test_digraph()
25
+ n_row = adjacency.shape[0]
26
+ kcenters = KCenters(n_clusters=n_clusters, directed=True)
27
+ labels = kcenters.fit_predict(adjacency)
28
+ self.assertEqual(len(labels), n_row)
29
+ self.assertEqual(len(set(labels)), n_clusters)
30
+
31
+ # Test bipartite graph
32
+ n_clusters = 2
33
+ biadjacency = test_bigraph()
34
+ n_row, n_col = biadjacency.shape
35
+ kcenters = KCenters(n_clusters=n_clusters)
36
+ kcenters.fit(biadjacency)
37
+ labels = kcenters.labels_
38
+ self.assertEqual(len(kcenters.labels_row_), n_row)
39
+ self.assertEqual(len(kcenters.labels_col_), n_col)
40
+ self.assertEqual(len(set(labels)), n_clusters)
41
+
42
+ def test_kcenters_error(self):
43
+ # Test value errors
44
+ adjacency = test_graph()
45
+ biadjacency = test_bigraph()
46
+
47
+ # test n_clusters error
48
+ kcenters = KCenters(n_clusters=1)
49
+ with self.assertRaises(ValueError):
50
+ kcenters.fit(adjacency)
51
+
52
+ # test n_init error
53
+ kcenters = KCenters(n_clusters=2, n_init=0)
54
+ with self.assertRaises(ValueError):
55
+ kcenters.fit(adjacency)
56
+
57
+ # test center_position error
58
+ kcenters = KCenters(n_clusters=2, center_position="other")
59
+ with self.assertRaises(ValueError):
60
+ kcenters.fit(biadjacency)
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for Leiden"""
4
+ import unittest
5
+
6
+ from sknetwork.clustering import Leiden
7
+ from sknetwork.data.test_graphs import *
8
+ from sknetwork.utils import bipartite2undirected
9
+
10
+
11
+ class TestLeidenClustering(unittest.TestCase):
12
+
13
+ def test_disconnected(self):
14
+ adjacency = test_disconnected_graph()
15
+ n = adjacency.shape[0]
16
+ labels = Leiden().fit_predict(adjacency)
17
+ self.assertEqual(len(labels), n)
18
+
19
+ def test_modularity(self):
20
+ adjacency = test_graph()
21
+ leiden_d = Leiden(modularity='dugue')
22
+ leiden_n = Leiden(modularity='newman')
23
+ labels_d = leiden_d.fit_predict(adjacency)
24
+ labels_n = leiden_n.fit_predict(adjacency)
25
+ self.assertTrue((labels_d == labels_n).all())
26
+
27
+ def test_bipartite(self):
28
+ biadjacency = test_bigraph()
29
+ adjacency = bipartite2undirected(biadjacency)
30
+ leiden = Leiden(modularity='newman')
31
+ labels1 = leiden.fit_predict(adjacency)
32
+ leiden.fit(biadjacency)
33
+ labels2 = np.concatenate((leiden.labels_row_, leiden.labels_col_))
34
+ self.assertTrue((labels1 == labels2).all())
@@ -0,0 +1,135 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for Louvain"""
4
+ import unittest
5
+
6
+ from sknetwork.clustering import Louvain
7
+ from sknetwork.data import karate_club, star_wars
8
+ from sknetwork.data.test_graphs import *
9
+ from sknetwork.utils import bipartite2undirected
10
+
11
+
12
+ class TestLouvainClustering(unittest.TestCase):
13
+
14
+ def test_disconnected(self):
15
+ adjacency = test_disconnected_graph()
16
+ n = adjacency.shape[0]
17
+ labels = Louvain().fit_predict(adjacency)
18
+ self.assertEqual(len(labels), n)
19
+
20
+ def test_format(self):
21
+ adjacency = test_graph()
22
+ n = adjacency.shape[0]
23
+ labels = Louvain().fit_predict(adjacency.toarray())
24
+ self.assertEqual(len(labels), n)
25
+
26
+ def test_modularity(self):
27
+ adjacency = karate_club()
28
+ louvain_d = Louvain(modularity='dugue')
29
+ louvain_n = Louvain(modularity='newman')
30
+ labels_d = louvain_d.fit_predict(adjacency)
31
+ labels_n = louvain_n.fit_predict(adjacency)
32
+ self.assertTrue((labels_d == labels_n).all())
33
+ louvain_p = Louvain(modularity='potts')
34
+ louvain_p.fit_predict(adjacency)
35
+
36
+ def test_bilouvain(self):
37
+ biadjacency = star_wars()
38
+ adjacency = bipartite2undirected(biadjacency)
39
+ louvain = Louvain(modularity='newman')
40
+ labels1 = louvain.fit_predict(adjacency)
41
+ louvain.fit(biadjacency)
42
+ labels2 = np.concatenate((louvain.labels_row_, louvain.labels_col_))
43
+ self.assertTrue((labels1 == labels2).all())
44
+
45
+ def test_options(self):
46
+ adjacency = karate_club()
47
+
48
+ # resolution
49
+ louvain = Louvain(resolution=2)
50
+ labels = louvain.fit_predict(adjacency)
51
+ self.assertEqual(len(set(labels)), 7)
52
+
53
+ # tolerance
54
+ louvain = Louvain(resolution=2, tol_aggregation=0.1)
55
+ labels = louvain.fit_predict(adjacency)
56
+ self.assertEqual(len(set(labels)), 7)
57
+
58
+ # shuffling
59
+ louvain = Louvain(resolution=2, shuffle_nodes=True, random_state=42)
60
+ labels = louvain.fit_predict(adjacency)
61
+ self.assertEqual(len(set(labels)), 7)
62
+
63
+ # aggregate graph
64
+ louvain = Louvain(return_aggregate=True)
65
+ labels = louvain.fit_predict(adjacency)
66
+ n_labels = len(set(labels))
67
+ self.assertEqual(louvain.aggregate_.shape, (n_labels, n_labels))
68
+
69
+ # aggregate graph
70
+ Louvain(n_aggregations=1, sort_clusters=False).fit(adjacency)
71
+
72
+ def test_options_with_64_bit(self):
73
+ adjacency = karate_club()
74
+ # force 64-bit index
75
+ adjacency.indices = adjacency.indices.astype(np.int64)
76
+ adjacency.indptr = adjacency.indptr.astype(np.int64)
77
+
78
+ # resolution
79
+ louvain = Louvain(resolution=2)
80
+ labels = louvain.fit_predict(adjacency)
81
+ self.assertEqual(len(set(labels)), 7)
82
+
83
+ # tolerance
84
+ louvain = Louvain(resolution=2, tol_aggregation=0.1)
85
+ labels = louvain.fit_predict(adjacency)
86
+ self.assertEqual(len(set(labels)), 7)
87
+
88
+ # shuffling
89
+ louvain = Louvain(resolution=2, shuffle_nodes=True, random_state=42)
90
+ labels = louvain.fit_predict(adjacency)
91
+ self.assertEqual(len(set(labels)), 7)
92
+
93
+ # aggregate graph
94
+ louvain = Louvain(return_aggregate=True)
95
+ labels = louvain.fit_predict(adjacency)
96
+ n_labels = len(set(labels))
97
+ self.assertEqual(louvain.aggregate_.shape, (n_labels, n_labels))
98
+
99
+ # aggregate graph
100
+ Louvain(n_aggregations=1, sort_clusters=False).fit(adjacency)
101
+
102
+ # check if labels are 64-bit
103
+ self.assertEqual(labels.dtype, np.int64)
104
+
105
+ def test_predict(self):
106
+ adjacency = karate_club()
107
+ n_nodes = adjacency.shape[0]
108
+ louvain = Louvain()
109
+ labels = louvain.fit_predict(adjacency)
110
+ self.assertEqual(len(labels), n_nodes)
111
+ probs = louvain.fit_predict_proba(adjacency)
112
+ self.assertEqual(probs.shape[0], n_nodes)
113
+ membership = louvain.fit_transform(adjacency)
114
+ self.assertEqual(membership.shape[0], n_nodes)
115
+ biadjacency = star_wars()
116
+ n_row, n_col = biadjacency.shape
117
+ louvain.fit(biadjacency)
118
+ labels = louvain.predict()
119
+ self.assertEqual(len(labels), n_row)
120
+ labels = louvain.predict(columns=True)
121
+ self.assertEqual(len(labels), n_col)
122
+ probs = louvain.predict_proba()
123
+ self.assertEqual(probs.shape[0], n_row)
124
+ probs = louvain.predict(columns=True)
125
+ self.assertEqual(probs.shape[0], n_col)
126
+ membership = louvain.transform()
127
+ self.assertEqual(membership.shape[0], n_row)
128
+ membership = louvain.transform(columns=True)
129
+ self.assertEqual(membership.shape[0], n_col)
130
+
131
+ def test_invalid(self):
132
+ adjacency = karate_club()
133
+ louvain = Louvain(modularity='toto')
134
+ with self.assertRaises(ValueError):
135
+ louvain.fit(adjacency)