scikit-network 0.28.3__cp39-cp39-macosx_12_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (240) hide show
  1. scikit_network-0.28.3.dist-info/AUTHORS.rst +41 -0
  2. scikit_network-0.28.3.dist-info/LICENSE +34 -0
  3. scikit_network-0.28.3.dist-info/METADATA +457 -0
  4. scikit_network-0.28.3.dist-info/RECORD +240 -0
  5. scikit_network-0.28.3.dist-info/WHEEL +5 -0
  6. scikit_network-0.28.3.dist-info/top_level.txt +1 -0
  7. sknetwork/__init__.py +21 -0
  8. sknetwork/classification/__init__.py +8 -0
  9. sknetwork/classification/base.py +84 -0
  10. sknetwork/classification/base_rank.py +143 -0
  11. sknetwork/classification/diffusion.py +134 -0
  12. sknetwork/classification/knn.py +162 -0
  13. sknetwork/classification/metrics.py +205 -0
  14. sknetwork/classification/pagerank.py +66 -0
  15. sknetwork/classification/propagation.py +152 -0
  16. sknetwork/classification/tests/__init__.py +1 -0
  17. sknetwork/classification/tests/test_API.py +35 -0
  18. sknetwork/classification/tests/test_diffusion.py +37 -0
  19. sknetwork/classification/tests/test_knn.py +24 -0
  20. sknetwork/classification/tests/test_metrics.py +53 -0
  21. sknetwork/classification/tests/test_pagerank.py +20 -0
  22. sknetwork/classification/tests/test_propagation.py +24 -0
  23. sknetwork/classification/vote.cpython-39-darwin.so +0 -0
  24. sknetwork/classification/vote.pyx +58 -0
  25. sknetwork/clustering/__init__.py +7 -0
  26. sknetwork/clustering/base.py +102 -0
  27. sknetwork/clustering/kmeans.py +142 -0
  28. sknetwork/clustering/louvain.py +255 -0
  29. sknetwork/clustering/louvain_core.cpython-39-darwin.so +0 -0
  30. sknetwork/clustering/louvain_core.pyx +134 -0
  31. sknetwork/clustering/metrics.py +91 -0
  32. sknetwork/clustering/postprocess.py +66 -0
  33. sknetwork/clustering/propagation_clustering.py +108 -0
  34. sknetwork/clustering/tests/__init__.py +1 -0
  35. sknetwork/clustering/tests/test_API.py +37 -0
  36. sknetwork/clustering/tests/test_kmeans.py +47 -0
  37. sknetwork/clustering/tests/test_louvain.py +104 -0
  38. sknetwork/clustering/tests/test_metrics.py +50 -0
  39. sknetwork/clustering/tests/test_post_processing.py +23 -0
  40. sknetwork/clustering/tests/test_postprocess.py +39 -0
  41. sknetwork/data/__init__.py +5 -0
  42. sknetwork/data/load.py +408 -0
  43. sknetwork/data/models.py +459 -0
  44. sknetwork/data/parse.py +621 -0
  45. sknetwork/data/test_graphs.py +84 -0
  46. sknetwork/data/tests/__init__.py +1 -0
  47. sknetwork/data/tests/test_API.py +30 -0
  48. sknetwork/data/tests/test_load.py +95 -0
  49. sknetwork/data/tests/test_models.py +52 -0
  50. sknetwork/data/tests/test_parse.py +253 -0
  51. sknetwork/data/tests/test_test_graphs.py +30 -0
  52. sknetwork/data/tests/test_toy_graphs.py +68 -0
  53. sknetwork/data/toy_graphs.py +619 -0
  54. sknetwork/embedding/__init__.py +10 -0
  55. sknetwork/embedding/base.py +90 -0
  56. sknetwork/embedding/force_atlas.py +197 -0
  57. sknetwork/embedding/louvain_embedding.py +174 -0
  58. sknetwork/embedding/louvain_hierarchy.py +142 -0
  59. sknetwork/embedding/metrics.py +66 -0
  60. sknetwork/embedding/random_projection.py +133 -0
  61. sknetwork/embedding/spectral.py +214 -0
  62. sknetwork/embedding/spring.py +198 -0
  63. sknetwork/embedding/svd.py +363 -0
  64. sknetwork/embedding/tests/__init__.py +1 -0
  65. sknetwork/embedding/tests/test_API.py +73 -0
  66. sknetwork/embedding/tests/test_force_atlas.py +35 -0
  67. sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
  68. sknetwork/embedding/tests/test_louvain_hierarchy.py +19 -0
  69. sknetwork/embedding/tests/test_metrics.py +29 -0
  70. sknetwork/embedding/tests/test_random_projection.py +28 -0
  71. sknetwork/embedding/tests/test_spectral.py +84 -0
  72. sknetwork/embedding/tests/test_spring.py +50 -0
  73. sknetwork/embedding/tests/test_svd.py +37 -0
  74. sknetwork/flow/__init__.py +3 -0
  75. sknetwork/flow/flow.py +73 -0
  76. sknetwork/flow/tests/__init__.py +1 -0
  77. sknetwork/flow/tests/test_flow.py +17 -0
  78. sknetwork/flow/tests/test_utils.py +69 -0
  79. sknetwork/flow/utils.py +91 -0
  80. sknetwork/gnn/__init__.py +10 -0
  81. sknetwork/gnn/activation.py +117 -0
  82. sknetwork/gnn/base.py +155 -0
  83. sknetwork/gnn/base_activation.py +89 -0
  84. sknetwork/gnn/base_layer.py +109 -0
  85. sknetwork/gnn/gnn_classifier.py +381 -0
  86. sknetwork/gnn/layer.py +153 -0
  87. sknetwork/gnn/layers.py +127 -0
  88. sknetwork/gnn/loss.py +180 -0
  89. sknetwork/gnn/neighbor_sampler.py +65 -0
  90. sknetwork/gnn/optimizer.py +163 -0
  91. sknetwork/gnn/tests/__init__.py +1 -0
  92. sknetwork/gnn/tests/test_activation.py +56 -0
  93. sknetwork/gnn/tests/test_base.py +79 -0
  94. sknetwork/gnn/tests/test_base_layer.py +37 -0
  95. sknetwork/gnn/tests/test_gnn_classifier.py +192 -0
  96. sknetwork/gnn/tests/test_layers.py +80 -0
  97. sknetwork/gnn/tests/test_loss.py +33 -0
  98. sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
  99. sknetwork/gnn/tests/test_optimizer.py +43 -0
  100. sknetwork/gnn/tests/test_utils.py +93 -0
  101. sknetwork/gnn/utils.py +219 -0
  102. sknetwork/hierarchy/__init__.py +7 -0
  103. sknetwork/hierarchy/base.py +69 -0
  104. sknetwork/hierarchy/louvain_hierarchy.py +264 -0
  105. sknetwork/hierarchy/metrics.py +234 -0
  106. sknetwork/hierarchy/paris.cpython-39-darwin.so +0 -0
  107. sknetwork/hierarchy/paris.pyx +317 -0
  108. sknetwork/hierarchy/postprocess.py +350 -0
  109. sknetwork/hierarchy/tests/__init__.py +1 -0
  110. sknetwork/hierarchy/tests/test_API.py +25 -0
  111. sknetwork/hierarchy/tests/test_algos.py +29 -0
  112. sknetwork/hierarchy/tests/test_metrics.py +62 -0
  113. sknetwork/hierarchy/tests/test_postprocess.py +57 -0
  114. sknetwork/hierarchy/tests/test_ward.py +25 -0
  115. sknetwork/hierarchy/ward.py +94 -0
  116. sknetwork/linalg/__init__.py +9 -0
  117. sknetwork/linalg/basics.py +37 -0
  118. sknetwork/linalg/diteration.cpython-39-darwin.so +0 -0
  119. sknetwork/linalg/diteration.pyx +49 -0
  120. sknetwork/linalg/eig_solver.py +93 -0
  121. sknetwork/linalg/laplacian.py +15 -0
  122. sknetwork/linalg/normalization.py +66 -0
  123. sknetwork/linalg/operators.py +225 -0
  124. sknetwork/linalg/polynome.py +76 -0
  125. sknetwork/linalg/ppr_solver.py +170 -0
  126. sknetwork/linalg/push.cpython-39-darwin.so +0 -0
  127. sknetwork/linalg/push.pyx +73 -0
  128. sknetwork/linalg/sparse_lowrank.py +142 -0
  129. sknetwork/linalg/svd_solver.py +91 -0
  130. sknetwork/linalg/tests/__init__.py +1 -0
  131. sknetwork/linalg/tests/test_eig.py +44 -0
  132. sknetwork/linalg/tests/test_laplacian.py +18 -0
  133. sknetwork/linalg/tests/test_normalization.py +38 -0
  134. sknetwork/linalg/tests/test_operators.py +70 -0
  135. sknetwork/linalg/tests/test_polynome.py +38 -0
  136. sknetwork/linalg/tests/test_ppr.py +50 -0
  137. sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
  138. sknetwork/linalg/tests/test_svd.py +38 -0
  139. sknetwork/linkpred/__init__.py +4 -0
  140. sknetwork/linkpred/base.py +80 -0
  141. sknetwork/linkpred/first_order.py +508 -0
  142. sknetwork/linkpred/first_order_core.cpython-39-darwin.so +0 -0
  143. sknetwork/linkpred/first_order_core.pyx +315 -0
  144. sknetwork/linkpred/postprocessing.py +98 -0
  145. sknetwork/linkpred/tests/__init__.py +1 -0
  146. sknetwork/linkpred/tests/test_API.py +49 -0
  147. sknetwork/linkpred/tests/test_postprocessing.py +21 -0
  148. sknetwork/path/__init__.py +4 -0
  149. sknetwork/path/metrics.py +148 -0
  150. sknetwork/path/search.py +65 -0
  151. sknetwork/path/shortest_path.py +186 -0
  152. sknetwork/path/tests/__init__.py +1 -0
  153. sknetwork/path/tests/test_metrics.py +29 -0
  154. sknetwork/path/tests/test_search.py +25 -0
  155. sknetwork/path/tests/test_shortest_path.py +45 -0
  156. sknetwork/ranking/__init__.py +9 -0
  157. sknetwork/ranking/base.py +56 -0
  158. sknetwork/ranking/betweenness.cpython-39-darwin.so +0 -0
  159. sknetwork/ranking/betweenness.pyx +99 -0
  160. sknetwork/ranking/closeness.py +95 -0
  161. sknetwork/ranking/harmonic.py +82 -0
  162. sknetwork/ranking/hits.py +94 -0
  163. sknetwork/ranking/katz.py +81 -0
  164. sknetwork/ranking/pagerank.py +107 -0
  165. sknetwork/ranking/postprocess.py +25 -0
  166. sknetwork/ranking/tests/__init__.py +1 -0
  167. sknetwork/ranking/tests/test_API.py +34 -0
  168. sknetwork/ranking/tests/test_betweenness.py +38 -0
  169. sknetwork/ranking/tests/test_closeness.py +34 -0
  170. sknetwork/ranking/tests/test_hits.py +20 -0
  171. sknetwork/ranking/tests/test_pagerank.py +69 -0
  172. sknetwork/regression/__init__.py +4 -0
  173. sknetwork/regression/base.py +56 -0
  174. sknetwork/regression/diffusion.py +190 -0
  175. sknetwork/regression/tests/__init__.py +1 -0
  176. sknetwork/regression/tests/test_API.py +34 -0
  177. sknetwork/regression/tests/test_diffusion.py +48 -0
  178. sknetwork/sknetwork.py +3 -0
  179. sknetwork/topology/__init__.py +9 -0
  180. sknetwork/topology/dag.py +74 -0
  181. sknetwork/topology/dag_core.cpython-39-darwin.so +0 -0
  182. sknetwork/topology/dag_core.pyx +38 -0
  183. sknetwork/topology/kcliques.cpython-39-darwin.so +0 -0
  184. sknetwork/topology/kcliques.pyx +193 -0
  185. sknetwork/topology/kcore.cpython-39-darwin.so +0 -0
  186. sknetwork/topology/kcore.pyx +120 -0
  187. sknetwork/topology/structure.py +234 -0
  188. sknetwork/topology/tests/__init__.py +1 -0
  189. sknetwork/topology/tests/test_cliques.py +28 -0
  190. sknetwork/topology/tests/test_cores.py +21 -0
  191. sknetwork/topology/tests/test_dag.py +26 -0
  192. sknetwork/topology/tests/test_structure.py +99 -0
  193. sknetwork/topology/tests/test_triangles.py +42 -0
  194. sknetwork/topology/tests/test_wl_coloring.py +49 -0
  195. sknetwork/topology/tests/test_wl_kernel.py +31 -0
  196. sknetwork/topology/triangles.cpython-39-darwin.so +0 -0
  197. sknetwork/topology/triangles.pyx +166 -0
  198. sknetwork/topology/weisfeiler_lehman.py +163 -0
  199. sknetwork/topology/weisfeiler_lehman_core.cpython-39-darwin.so +0 -0
  200. sknetwork/topology/weisfeiler_lehman_core.pyx +116 -0
  201. sknetwork/utils/__init__.py +40 -0
  202. sknetwork/utils/base.py +35 -0
  203. sknetwork/utils/check.py +354 -0
  204. sknetwork/utils/co_neighbor.py +71 -0
  205. sknetwork/utils/format.py +219 -0
  206. sknetwork/utils/kmeans.py +89 -0
  207. sknetwork/utils/knn.py +166 -0
  208. sknetwork/utils/knn1d.cpython-39-darwin.so +0 -0
  209. sknetwork/utils/knn1d.pyx +80 -0
  210. sknetwork/utils/membership.py +82 -0
  211. sknetwork/utils/minheap.cpython-39-darwin.so +0 -0
  212. sknetwork/utils/minheap.pxd +22 -0
  213. sknetwork/utils/minheap.pyx +111 -0
  214. sknetwork/utils/neighbors.py +115 -0
  215. sknetwork/utils/seeds.py +75 -0
  216. sknetwork/utils/simplex.py +140 -0
  217. sknetwork/utils/tests/__init__.py +1 -0
  218. sknetwork/utils/tests/test_base.py +28 -0
  219. sknetwork/utils/tests/test_bunch.py +16 -0
  220. sknetwork/utils/tests/test_check.py +190 -0
  221. sknetwork/utils/tests/test_co_neighbor.py +43 -0
  222. sknetwork/utils/tests/test_format.py +61 -0
  223. sknetwork/utils/tests/test_kmeans.py +21 -0
  224. sknetwork/utils/tests/test_knn.py +32 -0
  225. sknetwork/utils/tests/test_membership.py +24 -0
  226. sknetwork/utils/tests/test_neighbors.py +41 -0
  227. sknetwork/utils/tests/test_projection_simplex.py +33 -0
  228. sknetwork/utils/tests/test_seeds.py +67 -0
  229. sknetwork/utils/tests/test_verbose.py +15 -0
  230. sknetwork/utils/tests/test_ward.py +20 -0
  231. sknetwork/utils/timeout.py +38 -0
  232. sknetwork/utils/verbose.py +37 -0
  233. sknetwork/utils/ward.py +60 -0
  234. sknetwork/visualization/__init__.py +4 -0
  235. sknetwork/visualization/colors.py +34 -0
  236. sknetwork/visualization/dendrograms.py +229 -0
  237. sknetwork/visualization/graphs.py +819 -0
  238. sknetwork/visualization/tests/__init__.py +1 -0
  239. sknetwork/visualization/tests/test_dendrograms.py +53 -0
  240. sknetwork/visualization/tests/test_graphs.py +167 -0
@@ -0,0 +1,350 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on June 2019
5
+ @author: Thomas Bonald <bonald@enst.fr>
6
+ @author: Bertrand Charpentier <bertrand.charpentier@live.fr>
7
+ @author: Quentin Lutz <qlutz@enst.fr>
8
+ """
9
+
10
+ import copy
11
+ from collections import defaultdict
12
+ from typing import Optional, Union, Tuple
13
+
14
+ import numpy as np
15
+
16
+ from sknetwork.utils.check import check_n_clusters, check_dendrogram
17
+
18
+
19
+ def reorder_dendrogram(dendrogram: np.ndarray) -> np.ndarray:
20
+ """Reorder the dendrogram in non-decreasing order of height."""
21
+ n = dendrogram.shape[0] + 1
22
+ order = np.zeros((2, n - 1), float)
23
+ order[0] = np.max(dendrogram[:, :2], axis=1)
24
+ order[1] = dendrogram[:, 2]
25
+ index = np.lexsort(order)
26
+ dendrogram_new = dendrogram[index]
27
+ index_new = np.arange(2 * n - 1)
28
+ index_new[n + index] = np.arange(n, 2 * n - 1)
29
+ dendrogram_new[:, 0] = index_new[dendrogram_new[:, 0].astype(int)]
30
+ dendrogram_new[:, 1] = index_new[dendrogram_new[:, 1].astype(int)]
31
+ return dendrogram_new
32
+
33
+
34
+ def get_labels(dendrogram: np.ndarray, cluster: dict, sort_clusters: bool, return_dendrogram: bool):
35
+ """Returns the labels from clusters."""
36
+ n = len(dendrogram) + 1
37
+ clusters = list(cluster.values())
38
+ if sort_clusters:
39
+ sizes = np.array([len(nodes) for nodes in clusters])
40
+ index = np.argsort(-sizes)
41
+ clusters = [clusters[i] for i in index]
42
+
43
+ labels = np.zeros(n, dtype=int)
44
+ for label, nodes in enumerate(clusters):
45
+ labels[nodes] = label
46
+
47
+ if return_dendrogram:
48
+ cluster_index = {i: label for i, label in enumerate(labels)}
49
+ cluster_size = {i: len(cluster) for i, cluster in enumerate(clusters)}
50
+ dendrogram_new = []
51
+ current_cluster = len(labels)
52
+ current_cluster_new = len(clusters)
53
+ for i, j, height, _ in dendrogram:
54
+ i_new = cluster_index.pop(int(i))
55
+ j_new = cluster_index.pop(int(j))
56
+ if i_new != j_new:
57
+ size = cluster_size.pop(i_new) + cluster_size.pop(j_new)
58
+ cluster_size[current_cluster_new] = size
59
+ cluster_index[current_cluster] = current_cluster_new
60
+ dendrogram_new.append([i_new, j_new, height, size])
61
+ current_cluster_new += 1
62
+ else:
63
+ cluster_index[current_cluster] = i_new
64
+ current_cluster += 1
65
+ dendrogram_new = np.array(dendrogram_new)
66
+ return labels, dendrogram_new
67
+ else:
68
+ return labels
69
+
70
+
71
+ def cut_straight(dendrogram: np.ndarray, n_clusters: Optional[int] = None, threshold: Optional[float] = None,
72
+ sort_clusters: bool = True, return_dendrogram: bool = False) \
73
+ -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
74
+ """Cut a dendrogram and return the corresponding clustering.
75
+
76
+ Parameters
77
+ ----------
78
+ dendrogram:
79
+ Dendrogram.
80
+ n_clusters :
81
+ Number of clusters (optional).
82
+ The number of clusters can be larger than n_clusters in case of equal heights in the dendrogram.
83
+ threshold :
84
+ Threshold on height (optional).
85
+ If both n_clusters and threshold are ``None``, n_clusters is set to 2.
86
+ sort_clusters :
87
+ If ``True``, sorts clusters in decreasing order of size.
88
+ return_dendrogram :
89
+ If ``True``, returns the dendrogram formed by the clusters up to the root.
90
+ Returns
91
+ -------
92
+ labels : np.ndarray
93
+ Cluster of each node.
94
+ dendrogram_aggregate : np.ndarray
95
+ Dendrogram starting from clusters (leaves = clusters).
96
+
97
+ Example
98
+ -------
99
+ >>> from sknetwork.hierarchy import cut_straight
100
+ >>> dendrogram = np.array([[0, 1, 0, 2], [2, 3, 1, 3]])
101
+ >>> cut_straight(dendrogram)
102
+ array([0, 0, 1])
103
+ """
104
+ check_dendrogram(dendrogram)
105
+ n = dendrogram.shape[0] + 1
106
+
107
+ if return_dendrogram:
108
+ height = dendrogram[:, 2]
109
+ if not np.any(height[1:] < height[:-1]):
110
+ dendrogram = reorder_dendrogram(dendrogram)
111
+
112
+ cluster = {i: [i] for i in range(n)}
113
+ if n_clusters is None:
114
+ if threshold is None:
115
+ n_clusters = 2
116
+ else:
117
+ n_clusters = n
118
+ else:
119
+ check_n_clusters(n_clusters, n, n_min=1)
120
+ cut = np.sort(dendrogram[:, 2])[n - n_clusters]
121
+ if threshold is not None:
122
+ cut = max(cut, threshold)
123
+ for t in range(n - 1):
124
+ i = int(dendrogram[t][0])
125
+ j = int(dendrogram[t][1])
126
+ if dendrogram[t][2] < cut and i in cluster and j in cluster:
127
+ cluster[n + t] = cluster.pop(i) + cluster.pop(j)
128
+
129
+ return get_labels(dendrogram, cluster, sort_clusters, return_dendrogram)
130
+
131
+
132
+ def cut_balanced(dendrogram: np.ndarray, max_cluster_size: int = 20, sort_clusters: bool = True,
133
+ return_dendrogram: bool = False) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
134
+ """Cuts a dendrogram with a constraint on the cluster size and returns the corresponding clustering.
135
+
136
+ Parameters
137
+ ----------
138
+ dendrogram:
139
+ Dendrogram
140
+ max_cluster_size :
141
+ Maximum size of each cluster.
142
+ sort_clusters :
143
+ If ``True``, sort labels in decreasing order of cluster size.
144
+ return_dendrogram :
145
+ If ``True``, returns the dendrogram formed by the clusters up to the root.
146
+ Returns
147
+ -------
148
+ labels : np.ndarray
149
+ Label of each node.
150
+ dendrogram_aggregate : np.ndarray
151
+ Dendrogram starting from clusters (leaves = clusters).
152
+
153
+ Example
154
+ -------
155
+ >>> from sknetwork.hierarchy import cut_balanced
156
+ >>> dendrogram = np.array([[0, 1, 0, 2], [2, 3, 1, 3]])
157
+ >>> cut_balanced(dendrogram, 2)
158
+ array([0, 0, 1])
159
+ """
160
+ check_dendrogram(dendrogram)
161
+ n = dendrogram.shape[0] + 1
162
+ if max_cluster_size < 2 or max_cluster_size > n:
163
+ raise ValueError("The maximum cluster size must be between 2 and the number of nodes.")
164
+
165
+ cluster = {i: [i] for i in range(n)}
166
+ for t in range(n - 1):
167
+ i = int(dendrogram[t][0])
168
+ j = int(dendrogram[t][1])
169
+ if i in cluster and j in cluster and len(cluster[i]) + len(cluster[j]) <= max_cluster_size:
170
+ cluster[n + t] = cluster.pop(i) + cluster.pop(j)
171
+
172
+ return get_labels(dendrogram, cluster, sort_clusters, return_dendrogram)
173
+
174
+
175
+ def aggregate_dendrogram(dendrogram: np.ndarray, n_clusters: int = 2, return_counts: bool = False) \
176
+ -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
177
+ """Aggregate a dendrogram in order to get a certain number of leaves.
178
+ The leaves in the output dendrogram correspond to subtrees in the input one.
179
+
180
+ Parameters
181
+ ----------
182
+ dendrogram:
183
+ The input to aggregate.
184
+ n_clusters:
185
+ Number of clusters (or leaves) to keep.
186
+ return_counts
187
+ If ``True``, returns an array of counts corresponding to the sizes of the merged subtrees.
188
+ The sum of the counts is equal to the number of samples in the input dendrogram.
189
+
190
+ Returns
191
+ -------
192
+ new_dendrogram:
193
+ Aggregated dendrogram. The nodes are reindexed from 0.
194
+ counts:
195
+ Size of the subtrees corresponding to each leaf in new_dendrogram.
196
+ """
197
+ n_nodes: int = dendrogram.shape[0] + 1
198
+ check_n_clusters(n_clusters, n_nodes, n_min=1)
199
+
200
+ new_dendrogram = dendrogram[n_nodes - n_clusters:].copy()
201
+ node_indices = np.array(sorted(set(new_dendrogram[:, 0]).union(set(new_dendrogram[:, 1]))))
202
+ new_index = {ix: i for i, ix in enumerate(node_indices)}
203
+
204
+ for j in range(2):
205
+ for i in range(new_dendrogram.shape[0]):
206
+ new_dendrogram[i, j] = new_index[new_dendrogram[i, j]]
207
+
208
+ if return_counts:
209
+ leaves = node_indices[:n_clusters].astype(int)
210
+ leaves_indices = leaves - n_nodes
211
+ counts = dendrogram[leaves_indices, 3]
212
+
213
+ return new_dendrogram, counts.astype(int)
214
+ else:
215
+ return new_dendrogram
216
+
217
+
218
+ def get_index(tree):
219
+ """Reindex a dendrogram from the leaves
220
+
221
+ Parameters
222
+ ----------
223
+ tree:
224
+ The tree to be indexed
225
+
226
+ Returns
227
+ -------
228
+ index:
229
+ The index of the root of the given tree
230
+ """
231
+ if type(tree) != list:
232
+ return tree
233
+ else:
234
+ return np.max([get_index(t) for t in tree])
235
+
236
+
237
+ def get_dendrogram(tree, dendrogram=None, index=None, depth=0, size=None, copy_tree=False):
238
+ """Get dendrogram from tree.
239
+
240
+ Parameters
241
+ ----------
242
+ tree :
243
+ The initial tree
244
+ dendrogram :
245
+ Intermediary dendrogram for recursive use
246
+ index :
247
+ Intermediary index for recursive use
248
+ depth :
249
+ Current depth for recursive use
250
+ size :
251
+ Current leaf count for recursive use
252
+ copy_tree :
253
+ If ``True``, ensure the passed tree remains unchanged.
254
+
255
+ Returns
256
+ -------
257
+ dendrogram`:
258
+ The reordered dendrogram
259
+ index :
260
+ The indexing array
261
+ """
262
+ if copy_tree:
263
+ return get_dendrogram(copy.deepcopy(tree))
264
+ else:
265
+ if dendrogram is None:
266
+ dendrogram = []
267
+ if index is None:
268
+ index = get_index(tree)
269
+ if size is None:
270
+ size = defaultdict(lambda: 1)
271
+ if len(tree) > 1:
272
+ lengths = np.array([len(t) for t in tree])
273
+ if np.max(lengths) == 1:
274
+ # merge all
275
+ i = tree.pop()[0]
276
+ j = tree.pop()[0]
277
+ s = size[i] + size[j]
278
+ dendrogram.append([i, j, float(-depth), s])
279
+ index += 1
280
+ while len(tree):
281
+ s += 1
282
+ dendrogram.append([index, tree.pop()[0], float(-depth), s])
283
+ index += 1
284
+ size[index] = s
285
+ tree.append(index)
286
+ return dendrogram, index
287
+ else:
288
+ i = np.argwhere(lengths > 1).ravel()[0]
289
+ dendrogram_, index_ = get_dendrogram(tree[i], None, index, depth + 1, size)
290
+ dendrogram += dendrogram_
291
+ return get_dendrogram(tree, dendrogram, index_, depth, size)
292
+ else:
293
+ return dendrogram, index
294
+
295
+
296
+ def split_dendrogram(dendrogram: np.ndarray, shape: tuple):
297
+ """Split the dendrogram of a bipartite graph into 2 dendrograms, one for each part.
298
+
299
+ Parameters
300
+ ----------
301
+ dendrogram :
302
+ Dendrogram of the bipartite graph.
303
+ shape :
304
+ Shape of the biadjacency matrix.
305
+ Returns
306
+ -------
307
+ dendrogram_row :
308
+ Dendrogram for the rows.
309
+ dendrogram_col :
310
+ Dendrogram for the columns.
311
+ """
312
+ n1, n2 = shape
313
+ dendrogram_row = []
314
+ dendrogram_col = []
315
+ id_row_new = n1
316
+ id_col_new = n2
317
+ size_row = {i: 1 for i in range(n1)}
318
+ size_col = {i + n1: 1 for i in range(n2)}
319
+ id_row = {i: i for i in range(n1)}
320
+ id_col = {i + n1: i for i in range(n2)}
321
+
322
+ for t in range(n1 + n2 - 1):
323
+ i = dendrogram[t, 0]
324
+ j = dendrogram[t, 1]
325
+
326
+ if i in id_row and j in id_row:
327
+ size_row[n1 + n2 + t] = size_row.pop(i) + size_row.pop(j)
328
+ id_row[n1 + n2 + t] = id_row_new
329
+ dendrogram_row.append([id_row.pop(i), id_row.pop(j), dendrogram[t, 2], size_row[n1 + n2 + t]])
330
+ id_row_new += 1
331
+ elif i in id_row:
332
+ size_row[n1 + n2 + t] = size_row.pop(i)
333
+ id_row[n1 + n2 + t] = id_row.pop(i)
334
+ elif j in id_row:
335
+ size_row[n1 + n2 + t] = size_row.pop(j)
336
+ id_row[n1 + n2 + t] = id_row.pop(j)
337
+
338
+ if i in id_col and j in id_col:
339
+ size_col[n1 + n2 + t] = size_col.pop(i) + size_col.pop(j)
340
+ id_col[n1 + n2 + t] = id_col_new
341
+ dendrogram_col.append([id_col.pop(i), id_col.pop(j), dendrogram[t, 2], size_col[n1 + n2 + t]])
342
+ id_col_new += 1
343
+ elif i in id_col:
344
+ size_col[n1 + n2 + t] = size_col.pop(i)
345
+ id_col[n1 + n2 + t] = id_col.pop(i)
346
+ elif j in id_col:
347
+ size_col[n1 + n2 + t] = size_col.pop(j)
348
+ id_col[n1 + n2 + t] = id_col.pop(j)
349
+
350
+ return np.array(dendrogram_row), np.array(dendrogram_col)
@@ -0,0 +1 @@
1
+ """tests for hierarchy"""
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for hierarchy API"""
4
+ import unittest
5
+
6
+ from sknetwork.data.test_graphs import *
7
+ from sknetwork.embedding import GSVD
8
+ from sknetwork.hierarchy import *
9
+
10
+
11
+ class TestHierarchyAPI(unittest.TestCase):
12
+
13
+ def test_undirected(self):
14
+ adjacency = test_graph()
15
+ n = adjacency.shape[0]
16
+
17
+ for algo in [Paris(), Ward(GSVD(3)), LouvainIteration()]:
18
+ dendrogram = algo.fit_predict(adjacency)
19
+ self.assertTupleEqual(dendrogram.shape, (n - 1, 4))
20
+
21
+ def test_disconnected(self):
22
+ adjacency = test_graph_disconnect()
23
+ for algo in [Paris(), Ward(GSVD(3)), LouvainIteration()]:
24
+ dendrogram = algo.fit_transform(adjacency)
25
+ self.assertEqual(dendrogram.shape, (9, 4))
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in March 2020
5
+ @author: Quentin Lutz <qlutz@enst.fr>
6
+ @author: Thomas Bonald <tbonald@enst.fr>
7
+ """
8
+
9
+ import unittest
10
+
11
+ from sknetwork.data.test_graphs import *
12
+ from sknetwork.hierarchy import LouvainIteration, LouvainHierarchy, Paris
13
+
14
+
15
+ class TestLouvainHierarchy(unittest.TestCase):
16
+
17
+ def test(self):
18
+ louvain_iteration = LouvainIteration()
19
+ louvain_iteration_ = LouvainIteration(resolution=2, depth=1)
20
+ louvain_hierarchy = LouvainHierarchy()
21
+ louvain_hierarchy_ = LouvainHierarchy(tol_aggregation=0.1)
22
+ paris = Paris()
23
+ paris_ = Paris(weights='uniform', reorder=False)
24
+ for algo in [louvain_iteration, louvain_iteration_, louvain_hierarchy, louvain_hierarchy_, paris, paris_]:
25
+ for input_matrix in [test_graph(), test_digraph(), test_bigraph()]:
26
+ dendrogram = algo.fit_predict(input_matrix)
27
+ self.assertEqual(dendrogram.shape, (input_matrix.shape[0] - 1, 4))
28
+ if algo.bipartite:
29
+ self.assertEqual(algo.dendrogram_full_.shape, (sum(input_matrix.shape) - 1, 4))
@@ -0,0 +1,62 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on March 2019
5
+ @author: Thomas Bonald <bonald@enst.fr>
6
+ """
7
+
8
+ import unittest
9
+
10
+ from sknetwork.data.test_graphs import *
11
+ from sknetwork.data import cyclic_graph
12
+ from sknetwork.hierarchy import Paris, LouvainIteration, dasgupta_cost, dasgupta_score, tree_sampling_divergence
13
+
14
+
15
+ # noinspection PyMissingOrEmptyDocstring
16
+ class TestMetrics(unittest.TestCase):
17
+
18
+ def setUp(self):
19
+ self.paris = Paris()
20
+ self.louvain_hierarchy = LouvainIteration()
21
+
22
+ def test_undirected(self):
23
+ adjacency = cyclic_graph(3)
24
+ dendrogram = self.paris.fit_predict(adjacency)
25
+ self.assertAlmostEqual(dasgupta_cost(adjacency, dendrogram), 2.666, 2)
26
+ self.assertAlmostEqual(dasgupta_score(adjacency, dendrogram), 0.111, 2)
27
+ self.assertAlmostEqual(tree_sampling_divergence(adjacency, dendrogram), 0.0632, 3)
28
+ self.assertAlmostEqual(tree_sampling_divergence(adjacency, dendrogram, normalized=False), 0.0256, 3)
29
+ adjacency = test_graph()
30
+ dendrogram = self.paris.fit_transform(adjacency)
31
+ self.assertAlmostEqual(dasgupta_cost(adjacency, dendrogram), 4.26, 2)
32
+ self.assertAlmostEqual(dasgupta_score(adjacency, dendrogram), 0.573, 2)
33
+ self.assertAlmostEqual(tree_sampling_divergence(adjacency, dendrogram), 0.304, 2)
34
+ dendrogram = self.louvain_hierarchy.fit_transform(adjacency)
35
+ self.assertAlmostEqual(dasgupta_cost(adjacency, dendrogram), 4.43, 2)
36
+ self.assertAlmostEqual(dasgupta_score(adjacency, dendrogram), 0.555, 2)
37
+ self.assertAlmostEqual(tree_sampling_divergence(adjacency, dendrogram), 0.286, 2)
38
+
39
+ def test_directed(self):
40
+ adjacency = test_digraph()
41
+ dendrogram = self.paris.fit_transform(adjacency)
42
+ self.assertAlmostEqual(dasgupta_score(adjacency, dendrogram), 0.566, 2)
43
+ self.assertAlmostEqual(tree_sampling_divergence(adjacency, dendrogram), 0.318, 2)
44
+ dendrogram = self.louvain_hierarchy.fit_transform(adjacency)
45
+ self.assertAlmostEqual(dasgupta_score(adjacency, dendrogram), 0.55, 2)
46
+ self.assertAlmostEqual(tree_sampling_divergence(adjacency, dendrogram), 0.313, 2)
47
+
48
+ def test_disconnected(self):
49
+ adjacency = test_graph_disconnect()
50
+ dendrogram = self.paris.fit_transform(adjacency)
51
+ self.assertAlmostEqual(dasgupta_score(adjacency, dendrogram), 0.682, 2)
52
+ self.assertAlmostEqual(tree_sampling_divergence(adjacency, dendrogram), 0.464, 2)
53
+ dendrogram = self.louvain_hierarchy.fit_transform(adjacency)
54
+ self.assertAlmostEqual(dasgupta_score(adjacency, dendrogram), 0.670, 2)
55
+ self.assertAlmostEqual(tree_sampling_divergence(adjacency, dendrogram), 0.594, 2)
56
+
57
+ def test_options(self):
58
+ adjacency = test_graph()
59
+ dendrogram = self.paris.fit_transform(adjacency)
60
+ self.assertAlmostEqual(dasgupta_score(adjacency, dendrogram, weights='degree'), 0.573, 2)
61
+ self.assertAlmostEqual(tree_sampling_divergence(adjacency, dendrogram, weights='uniform'), 0.271, 2)
62
+ self.assertAlmostEqual(tree_sampling_divergence(adjacency, dendrogram, normalized=False), 0.367, 2)
@@ -0,0 +1,57 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on March 2019
5
+ @author: Thomas Bonald <bonald@enst.fr>
6
+ @author: Quentin Lutz <qlutz@enst.fr>
7
+ """
8
+
9
+ import unittest
10
+
11
+ from sknetwork.data import karate_club
12
+ from sknetwork.hierarchy import Paris, cut_straight, cut_balanced, aggregate_dendrogram
13
+
14
+
15
+ # noinspection PyMissingOrEmptyDocstring
16
+ class TestCuts(unittest.TestCase):
17
+
18
+ def setUp(self):
19
+ paris = Paris()
20
+ self.adjacency = karate_club()
21
+ self.dendrogram = paris.fit_transform(self.adjacency)
22
+
23
+ def test_cuts(self):
24
+ labels = cut_straight(self.dendrogram)
25
+ self.assertEqual(len(set(labels)), 2)
26
+ labels = cut_straight(self.dendrogram, n_clusters=5)
27
+ self.assertEqual(len(set(labels)), 5)
28
+ labels = cut_balanced(self.dendrogram, 2)
29
+ self.assertEqual(len(set(labels)), 21)
30
+ labels, new_dendrogram = cut_balanced(self.dendrogram, max_cluster_size=4, return_dendrogram=True)
31
+ self.assertEqual(len(set(labels)), 12)
32
+ self.assertTupleEqual(new_dendrogram.shape, (11, 4))
33
+ paris = Paris(reorder=False)
34
+ dendrogram = paris.fit_predict(self.adjacency)
35
+ labels = cut_balanced(dendrogram, 4)
36
+ self.assertEqual(len(set(labels)), 12)
37
+
38
+ def test_options(self):
39
+ labels = cut_straight(self.dendrogram, threshold=0.5)
40
+ self.assertEqual(len(set(labels)), 7)
41
+ labels = cut_straight(self.dendrogram, n_clusters=3, threshold=0.5)
42
+ self.assertEqual(len(set(labels)), 3)
43
+ labels = cut_straight(self.dendrogram, sort_clusters=False)
44
+ self.assertEqual(len(set(labels)), 2)
45
+ labels = cut_balanced(self.dendrogram, max_cluster_size=2, sort_clusters=False)
46
+ self.assertEqual(len(set(labels)), 21)
47
+ labels = cut_balanced(self.dendrogram, max_cluster_size=10)
48
+ self.assertEqual(len(set(labels)), 5)
49
+
50
+ def test_aggregation(self):
51
+ aggregated = aggregate_dendrogram(self.dendrogram, n_clusters=3)
52
+ self.assertEqual(len(aggregated), 2)
53
+
54
+ aggregated, counts = aggregate_dendrogram(self.dendrogram, n_clusters=3, return_counts=True)
55
+ self.assertEqual(len(aggregated), 2)
56
+ self.assertEqual(len(counts), 3)
57
+
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on October 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+
8
+ import unittest
9
+
10
+ from sknetwork.data.test_graphs import test_graph, test_digraph, test_bigraph
11
+ from sknetwork.embedding import Spectral
12
+ from sknetwork.hierarchy import Ward
13
+
14
+
15
+ class TestWard(unittest.TestCase):
16
+
17
+ def test_options(self):
18
+ ward = Ward()
19
+ ward_options = Ward(embedding_method=Spectral(3), co_cluster=True)
20
+ for algo in [ward, ward_options]:
21
+ for input_matrix in [test_graph(), test_digraph(), test_bigraph()]:
22
+ dendrogram = algo.fit_predict(input_matrix)
23
+ self.assertEqual(dendrogram.shape, (input_matrix.shape[0] - 1, 4))
24
+ if algo.co_cluster:
25
+ self.assertEqual(algo.dendrogram_full_.shape, (sum(input_matrix.shape) - 1, 4))
@@ -0,0 +1,94 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on October 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ @author: Thomas Bonald <bonald@enst.fr>
7
+ """
8
+
9
+ from typing import Union
10
+
11
+ import numpy as np
12
+ from scipy import sparse
13
+
14
+ from sknetwork.embedding import BaseEmbedding, Spectral
15
+ from sknetwork.clustering.kmeans import get_embedding
16
+ from sknetwork.hierarchy.base import BaseHierarchy
17
+ from sknetwork.utils.check import check_format
18
+ from sknetwork.utils.ward import WardDense
19
+
20
+
21
+ class Ward(BaseHierarchy):
22
+ """Hierarchical clustering by the Ward method.
23
+
24
+ Parameters
25
+ ----------
26
+ embedding_method :
27
+ Embedding method (default = Spectral embedding in dimension 10).
28
+ co_cluster :
29
+ If ``True``, co-cluster rows and columns, considered as different nodes (default = ``False``).
30
+
31
+ Attributes
32
+ ----------
33
+ dendrogram_ :
34
+ Dendrogram of the graph.
35
+ dendrogram_row_ :
36
+ Dendrogram for the rows, for bipartite graphs.
37
+ dendrogram_col_ :
38
+ Dendrogram for the columns, for bipartite graphs.
39
+ dendrogram_full_ :
40
+ Dendrogram for both rows and columns, indexed in this order, for bipartite graphs.
41
+
42
+ Examples
43
+ --------
44
+ >>> from sknetwork.hierarchy import Ward
45
+ >>> from sknetwork.data import karate_club
46
+ >>> ward = Ward()
47
+ >>> adjacency = karate_club()
48
+ >>> dendrogram = ward.fit_transform(adjacency)
49
+ >>> dendrogram.shape
50
+ (33, 4)
51
+
52
+ References
53
+ ----------
54
+ * Ward, J. H., Jr. (1963). Hierarchical grouping to optimize an objective function.
55
+ Journal of the American Statistical Association.
56
+
57
+ * Murtagh, F., & Contreras, P. (2012). Algorithms for hierarchical clustering: an overview.
58
+ Wiley Interdisciplinary Reviews: Data Mining and Knowledge Discovery.
59
+ """
60
+ def __init__(self, embedding_method: BaseEmbedding = Spectral(10), co_cluster: bool = False):
61
+ super(Ward, self).__init__()
62
+ self.embedding_method = embedding_method
63
+ self.co_cluster = co_cluster
64
+ self.bipartite = None
65
+
66
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> 'Ward':
67
+ """Applies embedding method followed by the Ward algorithm.
68
+
69
+ Parameters
70
+ ----------
71
+ input_matrix :
72
+ Adjacency matrix or biadjacency matrix of the graph.
73
+
74
+ Returns
75
+ -------
76
+ self: :class:`Ward`
77
+ """
78
+ self._init_vars()
79
+
80
+ # input
81
+ check_format(input_matrix)
82
+
83
+ # embedding
84
+ embedding, self.bipartite = get_embedding(input_matrix, self.embedding_method, self.co_cluster)
85
+
86
+ # clustering
87
+ ward = WardDense()
88
+ self.dendrogram_ = ward.fit_transform(embedding)
89
+
90
+ # output
91
+ if self.co_cluster:
92
+ self._split_vars(input_matrix.shape)
93
+
94
+ return self
@@ -0,0 +1,9 @@
1
+ """Module of linear algebra."""
2
+ from sknetwork.linalg.basics import safe_sparse_dot
3
+ from sknetwork.linalg.eig_solver import EigSolver, LanczosEig
4
+ from sknetwork.linalg.laplacian import get_laplacian
5
+ from sknetwork.linalg.normalization import diag_pinv, normalize
6
+ from sknetwork.linalg.operators import Regularizer, Laplacian, Normalizer, CoNeighbor
7
+ from sknetwork.linalg.polynome import Polynome
8
+ from sknetwork.linalg.sparse_lowrank import SparseLR
9
+ from sknetwork.linalg.svd_solver import SVDSolver, LanczosSVD