scikit-network 0.33.3__cp312-cp312-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (228) hide show
  1. scikit_network-0.33.3.dist-info/METADATA +122 -0
  2. scikit_network-0.33.3.dist-info/RECORD +228 -0
  3. scikit_network-0.33.3.dist-info/WHEEL +6 -0
  4. scikit_network-0.33.3.dist-info/licenses/AUTHORS.rst +43 -0
  5. scikit_network-0.33.3.dist-info/licenses/LICENSE +34 -0
  6. scikit_network-0.33.3.dist-info/top_level.txt +1 -0
  7. sknetwork/__init__.py +21 -0
  8. sknetwork/base.py +67 -0
  9. sknetwork/classification/__init__.py +8 -0
  10. sknetwork/classification/base.py +142 -0
  11. sknetwork/classification/base_rank.py +133 -0
  12. sknetwork/classification/diffusion.py +134 -0
  13. sknetwork/classification/knn.py +139 -0
  14. sknetwork/classification/metrics.py +205 -0
  15. sknetwork/classification/pagerank.py +66 -0
  16. sknetwork/classification/propagation.py +152 -0
  17. sknetwork/classification/tests/__init__.py +1 -0
  18. sknetwork/classification/tests/test_API.py +30 -0
  19. sknetwork/classification/tests/test_diffusion.py +77 -0
  20. sknetwork/classification/tests/test_knn.py +23 -0
  21. sknetwork/classification/tests/test_metrics.py +53 -0
  22. sknetwork/classification/tests/test_pagerank.py +20 -0
  23. sknetwork/classification/tests/test_propagation.py +24 -0
  24. sknetwork/classification/vote.cpp +27581 -0
  25. sknetwork/classification/vote.cpython-312-darwin.so +0 -0
  26. sknetwork/classification/vote.pyx +56 -0
  27. sknetwork/clustering/__init__.py +8 -0
  28. sknetwork/clustering/base.py +172 -0
  29. sknetwork/clustering/kcenters.py +253 -0
  30. sknetwork/clustering/leiden.py +242 -0
  31. sknetwork/clustering/leiden_core.cpp +31572 -0
  32. sknetwork/clustering/leiden_core.cpython-312-darwin.so +0 -0
  33. sknetwork/clustering/leiden_core.pyx +124 -0
  34. sknetwork/clustering/louvain.py +286 -0
  35. sknetwork/clustering/louvain_core.cpp +31217 -0
  36. sknetwork/clustering/louvain_core.cpython-312-darwin.so +0 -0
  37. sknetwork/clustering/louvain_core.pyx +124 -0
  38. sknetwork/clustering/metrics.py +91 -0
  39. sknetwork/clustering/postprocess.py +66 -0
  40. sknetwork/clustering/propagation_clustering.py +104 -0
  41. sknetwork/clustering/tests/__init__.py +1 -0
  42. sknetwork/clustering/tests/test_API.py +38 -0
  43. sknetwork/clustering/tests/test_kcenters.py +60 -0
  44. sknetwork/clustering/tests/test_leiden.py +34 -0
  45. sknetwork/clustering/tests/test_louvain.py +135 -0
  46. sknetwork/clustering/tests/test_metrics.py +50 -0
  47. sknetwork/clustering/tests/test_postprocess.py +39 -0
  48. sknetwork/data/__init__.py +6 -0
  49. sknetwork/data/base.py +33 -0
  50. sknetwork/data/load.py +406 -0
  51. sknetwork/data/models.py +459 -0
  52. sknetwork/data/parse.py +644 -0
  53. sknetwork/data/test_graphs.py +84 -0
  54. sknetwork/data/tests/__init__.py +1 -0
  55. sknetwork/data/tests/test_API.py +30 -0
  56. sknetwork/data/tests/test_base.py +14 -0
  57. sknetwork/data/tests/test_load.py +95 -0
  58. sknetwork/data/tests/test_models.py +52 -0
  59. sknetwork/data/tests/test_parse.py +250 -0
  60. sknetwork/data/tests/test_test_graphs.py +29 -0
  61. sknetwork/data/tests/test_toy_graphs.py +68 -0
  62. sknetwork/data/timeout.py +38 -0
  63. sknetwork/data/toy_graphs.py +611 -0
  64. sknetwork/embedding/__init__.py +8 -0
  65. sknetwork/embedding/base.py +94 -0
  66. sknetwork/embedding/force_atlas.py +198 -0
  67. sknetwork/embedding/louvain_embedding.py +148 -0
  68. sknetwork/embedding/random_projection.py +135 -0
  69. sknetwork/embedding/spectral.py +141 -0
  70. sknetwork/embedding/spring.py +198 -0
  71. sknetwork/embedding/svd.py +359 -0
  72. sknetwork/embedding/tests/__init__.py +1 -0
  73. sknetwork/embedding/tests/test_API.py +49 -0
  74. sknetwork/embedding/tests/test_force_atlas.py +35 -0
  75. sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
  76. sknetwork/embedding/tests/test_random_projection.py +28 -0
  77. sknetwork/embedding/tests/test_spectral.py +81 -0
  78. sknetwork/embedding/tests/test_spring.py +50 -0
  79. sknetwork/embedding/tests/test_svd.py +43 -0
  80. sknetwork/gnn/__init__.py +10 -0
  81. sknetwork/gnn/activation.py +117 -0
  82. sknetwork/gnn/base.py +181 -0
  83. sknetwork/gnn/base_activation.py +90 -0
  84. sknetwork/gnn/base_layer.py +109 -0
  85. sknetwork/gnn/gnn_classifier.py +305 -0
  86. sknetwork/gnn/layer.py +153 -0
  87. sknetwork/gnn/loss.py +180 -0
  88. sknetwork/gnn/neighbor_sampler.py +65 -0
  89. sknetwork/gnn/optimizer.py +164 -0
  90. sknetwork/gnn/tests/__init__.py +1 -0
  91. sknetwork/gnn/tests/test_activation.py +56 -0
  92. sknetwork/gnn/tests/test_base.py +75 -0
  93. sknetwork/gnn/tests/test_base_layer.py +37 -0
  94. sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
  95. sknetwork/gnn/tests/test_layers.py +80 -0
  96. sknetwork/gnn/tests/test_loss.py +33 -0
  97. sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
  98. sknetwork/gnn/tests/test_optimizer.py +43 -0
  99. sknetwork/gnn/tests/test_utils.py +41 -0
  100. sknetwork/gnn/utils.py +127 -0
  101. sknetwork/hierarchy/__init__.py +6 -0
  102. sknetwork/hierarchy/base.py +96 -0
  103. sknetwork/hierarchy/louvain_hierarchy.py +272 -0
  104. sknetwork/hierarchy/metrics.py +234 -0
  105. sknetwork/hierarchy/paris.cpp +37865 -0
  106. sknetwork/hierarchy/paris.cpython-312-darwin.so +0 -0
  107. sknetwork/hierarchy/paris.pyx +316 -0
  108. sknetwork/hierarchy/postprocess.py +350 -0
  109. sknetwork/hierarchy/tests/__init__.py +1 -0
  110. sknetwork/hierarchy/tests/test_API.py +24 -0
  111. sknetwork/hierarchy/tests/test_algos.py +34 -0
  112. sknetwork/hierarchy/tests/test_metrics.py +62 -0
  113. sknetwork/hierarchy/tests/test_postprocess.py +57 -0
  114. sknetwork/linalg/__init__.py +9 -0
  115. sknetwork/linalg/basics.py +37 -0
  116. sknetwork/linalg/diteration.cpp +27397 -0
  117. sknetwork/linalg/diteration.cpython-312-darwin.so +0 -0
  118. sknetwork/linalg/diteration.pyx +47 -0
  119. sknetwork/linalg/eig_solver.py +93 -0
  120. sknetwork/linalg/laplacian.py +15 -0
  121. sknetwork/linalg/normalizer.py +86 -0
  122. sknetwork/linalg/operators.py +225 -0
  123. sknetwork/linalg/polynome.py +76 -0
  124. sknetwork/linalg/ppr_solver.py +170 -0
  125. sknetwork/linalg/push.cpp +31069 -0
  126. sknetwork/linalg/push.cpython-312-darwin.so +0 -0
  127. sknetwork/linalg/push.pyx +71 -0
  128. sknetwork/linalg/sparse_lowrank.py +142 -0
  129. sknetwork/linalg/svd_solver.py +91 -0
  130. sknetwork/linalg/tests/__init__.py +1 -0
  131. sknetwork/linalg/tests/test_eig.py +44 -0
  132. sknetwork/linalg/tests/test_laplacian.py +18 -0
  133. sknetwork/linalg/tests/test_normalization.py +34 -0
  134. sknetwork/linalg/tests/test_operators.py +66 -0
  135. sknetwork/linalg/tests/test_polynome.py +38 -0
  136. sknetwork/linalg/tests/test_ppr.py +50 -0
  137. sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
  138. sknetwork/linalg/tests/test_svd.py +38 -0
  139. sknetwork/linkpred/__init__.py +2 -0
  140. sknetwork/linkpred/base.py +46 -0
  141. sknetwork/linkpred/nn.py +126 -0
  142. sknetwork/linkpred/tests/__init__.py +1 -0
  143. sknetwork/linkpred/tests/test_nn.py +27 -0
  144. sknetwork/log.py +19 -0
  145. sknetwork/path/__init__.py +5 -0
  146. sknetwork/path/dag.py +54 -0
  147. sknetwork/path/distances.py +98 -0
  148. sknetwork/path/search.py +31 -0
  149. sknetwork/path/shortest_path.py +61 -0
  150. sknetwork/path/tests/__init__.py +1 -0
  151. sknetwork/path/tests/test_dag.py +37 -0
  152. sknetwork/path/tests/test_distances.py +62 -0
  153. sknetwork/path/tests/test_search.py +40 -0
  154. sknetwork/path/tests/test_shortest_path.py +40 -0
  155. sknetwork/ranking/__init__.py +8 -0
  156. sknetwork/ranking/base.py +61 -0
  157. sknetwork/ranking/betweenness.cpp +9704 -0
  158. sknetwork/ranking/betweenness.cpython-312-darwin.so +0 -0
  159. sknetwork/ranking/betweenness.pyx +97 -0
  160. sknetwork/ranking/closeness.py +92 -0
  161. sknetwork/ranking/hits.py +94 -0
  162. sknetwork/ranking/katz.py +83 -0
  163. sknetwork/ranking/pagerank.py +110 -0
  164. sknetwork/ranking/postprocess.py +37 -0
  165. sknetwork/ranking/tests/__init__.py +1 -0
  166. sknetwork/ranking/tests/test_API.py +32 -0
  167. sknetwork/ranking/tests/test_betweenness.py +38 -0
  168. sknetwork/ranking/tests/test_closeness.py +30 -0
  169. sknetwork/ranking/tests/test_hits.py +20 -0
  170. sknetwork/ranking/tests/test_pagerank.py +62 -0
  171. sknetwork/ranking/tests/test_postprocess.py +26 -0
  172. sknetwork/regression/__init__.py +4 -0
  173. sknetwork/regression/base.py +61 -0
  174. sknetwork/regression/diffusion.py +210 -0
  175. sknetwork/regression/tests/__init__.py +1 -0
  176. sknetwork/regression/tests/test_API.py +32 -0
  177. sknetwork/regression/tests/test_diffusion.py +56 -0
  178. sknetwork/sknetwork.py +3 -0
  179. sknetwork/test_base.py +35 -0
  180. sknetwork/test_log.py +15 -0
  181. sknetwork/topology/__init__.py +8 -0
  182. sknetwork/topology/cliques.cpp +32562 -0
  183. sknetwork/topology/cliques.cpython-312-darwin.so +0 -0
  184. sknetwork/topology/cliques.pyx +149 -0
  185. sknetwork/topology/core.cpp +30648 -0
  186. sknetwork/topology/core.cpython-312-darwin.so +0 -0
  187. sknetwork/topology/core.pyx +90 -0
  188. sknetwork/topology/cycles.py +243 -0
  189. sknetwork/topology/minheap.cpp +27329 -0
  190. sknetwork/topology/minheap.cpython-312-darwin.so +0 -0
  191. sknetwork/topology/minheap.pxd +20 -0
  192. sknetwork/topology/minheap.pyx +109 -0
  193. sknetwork/topology/structure.py +194 -0
  194. sknetwork/topology/tests/__init__.py +1 -0
  195. sknetwork/topology/tests/test_cliques.py +28 -0
  196. sknetwork/topology/tests/test_core.py +19 -0
  197. sknetwork/topology/tests/test_cycles.py +65 -0
  198. sknetwork/topology/tests/test_structure.py +85 -0
  199. sknetwork/topology/tests/test_triangles.py +38 -0
  200. sknetwork/topology/tests/test_wl.py +72 -0
  201. sknetwork/topology/triangles.cpp +8891 -0
  202. sknetwork/topology/triangles.cpython-312-darwin.so +0 -0
  203. sknetwork/topology/triangles.pyx +151 -0
  204. sknetwork/topology/weisfeiler_lehman.py +133 -0
  205. sknetwork/topology/weisfeiler_lehman_core.cpp +27632 -0
  206. sknetwork/topology/weisfeiler_lehman_core.cpython-312-darwin.so +0 -0
  207. sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
  208. sknetwork/utils/__init__.py +7 -0
  209. sknetwork/utils/check.py +355 -0
  210. sknetwork/utils/format.py +221 -0
  211. sknetwork/utils/membership.py +82 -0
  212. sknetwork/utils/neighbors.py +115 -0
  213. sknetwork/utils/tests/__init__.py +1 -0
  214. sknetwork/utils/tests/test_check.py +190 -0
  215. sknetwork/utils/tests/test_format.py +63 -0
  216. sknetwork/utils/tests/test_membership.py +24 -0
  217. sknetwork/utils/tests/test_neighbors.py +41 -0
  218. sknetwork/utils/tests/test_tfidf.py +18 -0
  219. sknetwork/utils/tests/test_values.py +66 -0
  220. sknetwork/utils/tfidf.py +37 -0
  221. sknetwork/utils/values.py +76 -0
  222. sknetwork/visualization/__init__.py +4 -0
  223. sknetwork/visualization/colors.py +34 -0
  224. sknetwork/visualization/dendrograms.py +277 -0
  225. sknetwork/visualization/graphs.py +1039 -0
  226. sknetwork/visualization/tests/__init__.py +1 -0
  227. sknetwork/visualization/tests/test_dendrograms.py +53 -0
  228. sknetwork/visualization/tests/test_graphs.py +176 -0
@@ -0,0 +1,316 @@
1
+ # distutils: language = c++
2
+ # cython: language_level=3
3
+ """
4
+ Created on March 2019
5
+ @author: Thomas Bonald <bonald@enst.fr>
6
+ @author: Bertrand Charpentier <bertrand.charpentier@live.fr>
7
+ @author: Quentin Lutz <qlutz@enst.fr>
8
+ """
9
+ import numpy as np
10
+ cimport numpy as np
11
+
12
+ cimport cython
13
+
14
+ from libcpp.vector cimport vector
15
+
16
+ from typing import Union
17
+
18
+ from scipy import sparse
19
+
20
+ from sknetwork.hierarchy.base import BaseHierarchy
21
+ from sknetwork.hierarchy.postprocess import reorder_dendrogram
22
+ from sknetwork.utils.format import check_format, get_adjacency, directed2undirected
23
+ from sknetwork.utils.check import get_probs, is_symmetric
24
+
25
+
26
+ cdef class AggregateGraph:
27
+ """A class of graphs suitable for aggregation. Each node represents a cluster.
28
+
29
+ Parameters
30
+ ----------
31
+ out_weights :
32
+ Out-weights (sums to 1).
33
+ in_weights :
34
+ In-weights (sums to 1).
35
+ data :
36
+ CSR format data array of the normalized adjacency matrix.
37
+ indices :
38
+ CSR format index array of the normalized adjacency matrix.
39
+ indptr :
40
+ CSR format index pointer array of the normalized adjacency matrix.
41
+
42
+ Attributes
43
+ ----------
44
+ neighbors : dict[dict]
45
+ Dictionary of dictionary of edge weights.
46
+ next_cluster : int
47
+ Index of the next cluster (resulting from aggregation).
48
+ cluster_sizes : dict
49
+ Dictionary of cluster sizes.
50
+ cluster_out_weights : dict
51
+ Dictionary of cluster out-weights (sums to 1).
52
+ cluster_in_weights : dict
53
+ Dictionary of cluster in-weights (sums to 1).
54
+ """
55
+ cdef public int next_cluster
56
+ cdef public dict neighbors
57
+ cdef public dict tmp
58
+ cdef dict cluster_sizes
59
+ cdef public dict cluster_out_weights
60
+ cdef public dict cluster_in_weights
61
+
62
+ def __init__(self, double[:] out_weights, double[:] in_weights, double[:] data, int[:] indices,
63
+ int[:] indptr):
64
+ cdef int n = indptr.shape[0] - 1
65
+ cdef float total_weight = np.sum(data)
66
+ cdef int i
67
+ cdef int j
68
+
69
+ self.next_cluster = n
70
+ self.neighbors = {}
71
+ for i in range(n):
72
+ # normalize so that the sum of edge weights is equal to 1
73
+ self.neighbors[i] = {}
74
+ for j in range(indptr[i], indptr[i + 1]):
75
+ self.neighbors[i][indices[j]] = data[j] / total_weight
76
+
77
+ cluster_sizes = {}
78
+ cluster_out_weights = {}
79
+ cluster_in_weights = {}
80
+ for i in range(n):
81
+ cluster_sizes[i] = 1
82
+ cluster_out_weights[i] = out_weights[i]
83
+ cluster_in_weights[i] = in_weights[i]
84
+ self.cluster_sizes = cluster_sizes
85
+ self.cluster_out_weights = cluster_out_weights
86
+ self.cluster_in_weights = cluster_in_weights
87
+
88
+ cdef float similarity(self, int node1, int node2):
89
+ """Similarity of two nodes.
90
+
91
+ Parameters
92
+ ----------
93
+ node1, node2 :
94
+ Nodes.
95
+
96
+ Returns
97
+ -------
98
+ sim: float
99
+ Similarity.
100
+ """
101
+ cdef float sim = -float("inf")
102
+ cdef float a = self.cluster_out_weights[node1] * self.cluster_in_weights[node2]
103
+ cdef float b = self.cluster_out_weights[node2] * self.cluster_in_weights[node1]
104
+ cdef float den = a + b
105
+
106
+ if den > 0:
107
+ sim = 2 * self.neighbors[node1][node2] / den
108
+ return sim
109
+
110
+ @cython.boundscheck(False)
111
+ @cython.wraparound(False)
112
+ cpdef AggregateGraph merge(self, int node1, int node2):
113
+ """Merges two nodes.
114
+
115
+ Parameters
116
+ ----------
117
+ node1, node2 :
118
+ The two nodes to merge.
119
+
120
+ Returns
121
+ -------
122
+ self: :class:`AggregateGraph`
123
+ The aggregate grate (without self-loop).
124
+ """
125
+ cdef int new_node = self.next_cluster
126
+ self.neighbors[new_node] = {}
127
+ self.neighbors[new_node][new_node] = 0
128
+ cdef set common_neighbors = set(self.neighbors[node1].keys()) & set(self.neighbors[node2].keys()) - {node1, node2}
129
+ for node in common_neighbors:
130
+ self.neighbors[new_node][node] = self.neighbors[node1].pop(node) + self.neighbors[node2].pop(node)
131
+ self.neighbors[node][new_node] = self.neighbors[node].pop(node1) + self.neighbors[node].pop(node2)
132
+ for node in {node1, node2}:
133
+ for neighbor in set(self.neighbors[node].keys()) - {node1, node2}:
134
+ self.neighbors[new_node][neighbor] = self.neighbors[node].pop(neighbor)
135
+ self.neighbors[neighbor][new_node] = self.neighbors[neighbor].pop(node)
136
+ for other_node in {node1, node2}:
137
+ if other_node in self.neighbors[node]:
138
+ self.neighbors[new_node][new_node] += self.neighbors[node][other_node]
139
+ del self.neighbors[node]
140
+ self.cluster_sizes[new_node] = self.cluster_sizes.pop(node1) + self.cluster_sizes.pop(node2)
141
+ self.cluster_out_weights[new_node] = self.cluster_out_weights.pop(node1) + self.cluster_out_weights.pop(node2)
142
+ self.cluster_in_weights[new_node] = self.cluster_in_weights.pop(node1) + self.cluster_in_weights.pop(node2)
143
+ self.next_cluster += 1
144
+ return self
145
+
146
+
147
+ class Paris(BaseHierarchy):
148
+ """Agglomerative clustering algorithm that performs greedy merge of nodes based on their similarity.
149
+
150
+ The similarity between nodes :math:`i,j` is :math:`\\dfrac{A_{ij}}{w_i w_j}` where
151
+
152
+ * :math:`A_{ij}` is the weight of edge :math:`i,j`,
153
+ * :math:`w_i, w_j` are the weights of nodes :math:`i,j`
154
+
155
+ If the input matrix :math:`B` is a biadjacency matrix (i.e., rectangular), the algorithm is applied
156
+ to the corresponding adjacency matrix :math:`A = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}`
157
+
158
+ Parameters
159
+ ----------
160
+ weights : str
161
+ Weights of nodes.
162
+ ``'degree'`` (default) or ``'uniform'``.
163
+ reorder : bool
164
+ If ``True`` (default), reorder the dendrogram in non-decreasing order of height.
165
+
166
+ Attributes
167
+ ----------
168
+ dendrogram_ : np.ndarray
169
+ Dendrogram of the graph.
170
+ dendrogram_row_ : np.ndarray
171
+ Dendrogram for the rows, for bipartite graphs.
172
+ dendrogram_col_ : np.ndarray
173
+ Dendrogram for the columns, for bipartite graphs.
174
+ dendrogram_full_ : np.ndarray
175
+ Dendrogram for both rows and columns, indexed in this order, for bipartite graphs.
176
+
177
+ Examples
178
+ --------
179
+ >>> from sknetwork.hierarchy import Paris
180
+ >>> from sknetwork.data import house
181
+ >>> paris = Paris()
182
+ >>> adjacency = house()
183
+ >>> dendrogram = paris.fit_predict(adjacency)
184
+ >>> np.round(dendrogram, 2)
185
+ array([[3. , 2. , 0.17 , 2. ],
186
+ [1. , 0. , 0.25 , 2. ],
187
+ [6. , 4. , 0.31 , 3. ],
188
+ [7. , 5. , 0.67 , 5. ]])
189
+
190
+ Notes
191
+ -----
192
+ Each row of the dendrogram = :math:`i, j`, distance, size of cluster :math:`i + j`.
193
+
194
+
195
+ See Also
196
+ --------
197
+ scipy.cluster.hierarchy.linkage
198
+
199
+ References
200
+ ----------
201
+ T. Bonald, B. Charpentier, A. Galland, A. Hollocou (2018).
202
+ `Hierarchical Graph Clustering using Node Pair Sampling.
203
+ <https://arxiv.org/abs/1806.01664>`_
204
+ Workshop on Mining and Learning with Graphs.
205
+ """
206
+ def __init__(self, weights: str = 'degree', reorder: bool = True):
207
+ super(Paris, self).__init__()
208
+ self.dendrogram_ = None
209
+ self.weights = weights
210
+ self.reorder = reorder
211
+ self.bipartite = None
212
+
213
+ @cython.boundscheck(False)
214
+ @cython.wraparound(False)
215
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) -> 'Paris':
216
+ """Agglomerative clustering using the nearest neighbor chain.
217
+
218
+ Parameters
219
+ ----------
220
+ input_matrix : sparse.csr_matrix, np.ndarray
221
+ Adjacency matrix or biadjacency matrix of the graph.
222
+ force_bipartite :
223
+ If ``True``, force the input matrix to be considered as a biadjacency matrix.
224
+
225
+ Returns
226
+ -------
227
+ self: :class:`Paris`
228
+ """
229
+ self._init_vars()
230
+
231
+ # input
232
+ adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)
233
+
234
+ weights = self.weights
235
+ out_weights = get_probs(weights, adjacency)
236
+ in_weights = get_probs(weights, adjacency.T)
237
+
238
+ if not is_symmetric(adjacency):
239
+ adjacency = directed2undirected(adjacency)
240
+
241
+ null_weights = (out_weights + in_weights) == 0
242
+ if any(null_weights):
243
+ adjacency += sparse.diags(null_weights.astype(int))
244
+
245
+ if adjacency.shape[0] <= 1:
246
+ raise ValueError('The graph must contain at least two nodes.')
247
+
248
+ # agglomerative clustering
249
+ aggregate_graph = AggregateGraph(out_weights, in_weights, adjacency.data.astype(float),
250
+ adjacency.indices, adjacency.indptr)
251
+
252
+ cdef vector[(int, int)] connected_components
253
+ dendrogram = []
254
+ cdef int node
255
+ cdef int next_node
256
+ cdef int cluster_size
257
+ cdef int next_cluster_size
258
+ cdef int neighbor
259
+ cdef int nearest_neighbor
260
+ cdef int nearest_neighbor_last
261
+ cdef vector[int] chain
262
+ cdef float sim
263
+ cdef float max_sim
264
+
265
+ while len(aggregate_graph.cluster_sizes):
266
+ for node in aggregate_graph.cluster_sizes:
267
+ break
268
+ chain.clear()
269
+ chain.push_back(node)
270
+ while chain.size():
271
+ node = chain[chain.size() - 1]
272
+ chain.pop_back()
273
+ if set(aggregate_graph.neighbors[node].keys()) - {node}:
274
+ max_sim = -float("inf")
275
+ for neighbor in set(aggregate_graph.neighbors[node].keys()) - {node}:
276
+ sim = aggregate_graph.similarity(node, neighbor)
277
+ if sim > max_sim:
278
+ nearest_neighbor = neighbor
279
+ max_sim = sim
280
+ elif sim == max_sim:
281
+ nearest_neighbor = min(neighbor, nearest_neighbor)
282
+ if chain.size():
283
+ nearest_neighbor_last = chain[chain.size() - 1]
284
+ chain.pop_back()
285
+ if nearest_neighbor_last == nearest_neighbor:
286
+ size = aggregate_graph.cluster_sizes[node] + aggregate_graph.cluster_sizes[nearest_neighbor]
287
+ dendrogram.append([node, nearest_neighbor, 1. / max_sim, size])
288
+ aggregate_graph.merge(node, nearest_neighbor)
289
+ else:
290
+ chain.push_back(nearest_neighbor_last)
291
+ chain.push_back(node)
292
+ chain.push_back(nearest_neighbor)
293
+ else:
294
+ chain.push_back(node)
295
+ chain.push_back(nearest_neighbor)
296
+ else:
297
+ connected_components.push_back((node, aggregate_graph.cluster_sizes[node]))
298
+ del aggregate_graph.cluster_sizes[node]
299
+
300
+ node, cluster_size = connected_components[connected_components.size() - 1]
301
+ connected_components.pop_back()
302
+ for next_node, next_cluster_size in connected_components:
303
+ cluster_size += next_cluster_size
304
+ dendrogram.append([node, next_node, float("inf"), cluster_size])
305
+ node = aggregate_graph.next_cluster
306
+ aggregate_graph.next_cluster += 1
307
+
308
+ dendrogram = np.array(dendrogram)
309
+ if self.reorder:
310
+ dendrogram = reorder_dendrogram(dendrogram)
311
+
312
+ self.dendrogram_ = dendrogram
313
+ if self.bipartite:
314
+ self._split_vars(input_matrix.shape)
315
+
316
+ return self
@@ -0,0 +1,350 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on June 2019
5
+ @author: Thomas Bonald <bonald@enst.fr>
6
+ @author: Bertrand Charpentier <bertrand.charpentier@live.fr>
7
+ @author: Quentin Lutz <qlutz@enst.fr>
8
+ """
9
+
10
+ import copy
11
+ from collections import defaultdict
12
+ from typing import Optional, Union, Tuple
13
+
14
+ import numpy as np
15
+
16
+ from sknetwork.utils.check import check_n_clusters, check_dendrogram
17
+
18
+
19
+ def reorder_dendrogram(dendrogram: np.ndarray) -> np.ndarray:
20
+ """Reorder the dendrogram in non-decreasing order of height."""
21
+ n = dendrogram.shape[0] + 1
22
+ order = np.zeros((2, n - 1), float)
23
+ order[0] = np.max(dendrogram[:, :2], axis=1)
24
+ order[1] = dendrogram[:, 2]
25
+ index = np.lexsort(order)
26
+ dendrogram_new = dendrogram[index]
27
+ index_new = np.arange(2 * n - 1)
28
+ index_new[n + index] = np.arange(n, 2 * n - 1)
29
+ dendrogram_new[:, 0] = index_new[dendrogram_new[:, 0].astype(int)]
30
+ dendrogram_new[:, 1] = index_new[dendrogram_new[:, 1].astype(int)]
31
+ return dendrogram_new
32
+
33
+
34
+ def get_labels(dendrogram: np.ndarray, cluster: dict, sort_clusters: bool, return_dendrogram: bool):
35
+ """Returns the labels from clusters."""
36
+ n = len(dendrogram) + 1
37
+ clusters = list(cluster.values())
38
+ if sort_clusters:
39
+ sizes = np.array([len(nodes) for nodes in clusters])
40
+ index = np.argsort(-sizes)
41
+ clusters = [clusters[i] for i in index]
42
+
43
+ labels = np.zeros(n, dtype=int)
44
+ for label, nodes in enumerate(clusters):
45
+ labels[nodes] = label
46
+
47
+ if return_dendrogram:
48
+ cluster_index = {i: label for i, label in enumerate(labels)}
49
+ cluster_size = {i: len(cluster) for i, cluster in enumerate(clusters)}
50
+ dendrogram_new = []
51
+ current_cluster = len(labels)
52
+ current_cluster_new = len(clusters)
53
+ for i, j, height, _ in dendrogram:
54
+ i_new = cluster_index.pop(int(i))
55
+ j_new = cluster_index.pop(int(j))
56
+ if i_new != j_new:
57
+ size = cluster_size.pop(i_new) + cluster_size.pop(j_new)
58
+ cluster_size[current_cluster_new] = size
59
+ cluster_index[current_cluster] = current_cluster_new
60
+ dendrogram_new.append([i_new, j_new, height, size])
61
+ current_cluster_new += 1
62
+ else:
63
+ cluster_index[current_cluster] = i_new
64
+ current_cluster += 1
65
+ dendrogram_new = np.array(dendrogram_new)
66
+ return labels, dendrogram_new
67
+ else:
68
+ return labels
69
+
70
+
71
+ def cut_straight(dendrogram: np.ndarray, n_clusters: Optional[int] = None, threshold: Optional[float] = None,
72
+ sort_clusters: bool = True, return_dendrogram: bool = False) \
73
+ -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
74
+ """Cut a dendrogram and return the corresponding clustering.
75
+
76
+ Parameters
77
+ ----------
78
+ dendrogram : np.ndarray
79
+ Dendrogram.
80
+ n_clusters : int
81
+ Number of clusters (optional).
82
+ The number of clusters can be larger than n_clusters in case of equal heights in the dendrogram.
83
+ threshold : float
84
+ Threshold on height (optional).
85
+ If both n_clusters and threshold are ``None``, n_clusters is set to 2.
86
+ sort_clusters : bool
87
+ If ``True``, sorts clusters in decreasing order of size.
88
+ return_dendrogram : bool
89
+ If ``True``, returns the dendrogram formed by the clusters up to the root.
90
+ Returns
91
+ -------
92
+ labels : np.ndarray
93
+ Cluster of each node.
94
+ dendrogram_aggregate : np.ndarray
95
+ Dendrogram starting from clusters (leaves = clusters).
96
+
97
+ Example
98
+ -------
99
+ >>> from sknetwork.hierarchy import cut_straight
100
+ >>> dendrogram = np.array([[0, 1, 0, 2], [2, 3, 1, 3]])
101
+ >>> cut_straight(dendrogram)
102
+ array([0, 0, 1])
103
+ """
104
+ check_dendrogram(dendrogram)
105
+ n = dendrogram.shape[0] + 1
106
+
107
+ if return_dendrogram:
108
+ height = dendrogram[:, 2]
109
+ if not np.all(height[:-1] <= height[1:]):
110
+ dendrogram = reorder_dendrogram(dendrogram)
111
+
112
+ cluster = {i: [i] for i in range(n)}
113
+ if n_clusters is None:
114
+ if threshold is None:
115
+ n_clusters = 2
116
+ else:
117
+ n_clusters = n
118
+ else:
119
+ check_n_clusters(n_clusters, n, n_min=1)
120
+ cut = np.sort(dendrogram[:, 2])[n - n_clusters]
121
+ if threshold is not None:
122
+ cut = max(cut, threshold)
123
+ for t in range(n - 1):
124
+ i = int(dendrogram[t][0])
125
+ j = int(dendrogram[t][1])
126
+ if dendrogram[t][2] < cut and i in cluster and j in cluster:
127
+ cluster[n + t] = cluster.pop(i) + cluster.pop(j)
128
+
129
+ return get_labels(dendrogram, cluster, sort_clusters, return_dendrogram)
130
+
131
+
132
+ def cut_balanced(dendrogram: np.ndarray, max_cluster_size: int = 20, sort_clusters: bool = True,
133
+ return_dendrogram: bool = False) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
134
+ """Cuts a dendrogram with a constraint on the cluster size and returns the corresponding clustering.
135
+
136
+ Parameters
137
+ ----------
138
+ dendrogram : np.ndarray
139
+ Dendrogram
140
+ max_cluster_size : int
141
+ Maximum size of each cluster.
142
+ sort_clusters : bool
143
+ If ``True``, sort labels in decreasing order of cluster size.
144
+ return_dendrogram : bool
145
+ If ``True``, returns the dendrogram formed by the clusters up to the root.
146
+ Returns
147
+ -------
148
+ labels : np.ndarray
149
+ Label of each node.
150
+ dendrogram_aggregate : np.ndarray
151
+ Dendrogram starting from clusters (leaves = clusters).
152
+
153
+ Example
154
+ -------
155
+ >>> from sknetwork.hierarchy import cut_balanced
156
+ >>> dendrogram = np.array([[0, 1, 0, 2], [2, 3, 1, 3]])
157
+ >>> cut_balanced(dendrogram, 2)
158
+ array([0, 0, 1])
159
+ """
160
+ check_dendrogram(dendrogram)
161
+ n = dendrogram.shape[0] + 1
162
+ if max_cluster_size < 2 or max_cluster_size > n:
163
+ raise ValueError("The maximum cluster size must be between 2 and the number of nodes.")
164
+
165
+ cluster = {i: [i] for i in range(n)}
166
+ for t in range(n - 1):
167
+ i = int(dendrogram[t][0])
168
+ j = int(dendrogram[t][1])
169
+ if i in cluster and j in cluster and len(cluster[i]) + len(cluster[j]) <= max_cluster_size:
170
+ cluster[n + t] = cluster.pop(i) + cluster.pop(j)
171
+
172
+ return get_labels(dendrogram, cluster, sort_clusters, return_dendrogram)
173
+
174
+
175
+ def aggregate_dendrogram(dendrogram: np.ndarray, n_clusters: int = 2, return_counts: bool = False) \
176
+ -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
177
+ """Aggregate a dendrogram in order to get a certain number of leaves.
178
+ The leaves in the output dendrogram correspond to subtrees in the input one.
179
+
180
+ Parameters
181
+ ----------
182
+ dendrogram : np.ndarray
183
+ The input to aggregate.
184
+ n_clusters : int
185
+ Number of clusters (or leaves) to keep.
186
+ return_counts : bool
187
+ If ``True``, returns an array of counts corresponding to the sizes of the merged subtrees.
188
+ The sum of the counts is equal to the number of samples in the input dendrogram.
189
+
190
+ Returns
191
+ -------
192
+ new_dendrogram : np.ndarray
193
+ Aggregated dendrogram. The nodes are reindexed from 0.
194
+ counts : np.ndarray
195
+ Size of the subtrees corresponding to each leaf in new_dendrogram.
196
+ """
197
+ n_nodes: int = dendrogram.shape[0] + 1
198
+ check_n_clusters(n_clusters, n_nodes, n_min=1)
199
+
200
+ new_dendrogram = dendrogram[n_nodes - n_clusters:].copy()
201
+ node_indices = np.array(sorted(set(new_dendrogram[:, 0]).union(set(new_dendrogram[:, 1]))))
202
+ new_index = {ix: i for i, ix in enumerate(node_indices)}
203
+
204
+ for j in range(2):
205
+ for i in range(new_dendrogram.shape[0]):
206
+ new_dendrogram[i, j] = new_index[new_dendrogram[i, j]]
207
+
208
+ if return_counts:
209
+ leaves = node_indices[:n_clusters].astype(int)
210
+ leaves_indices = leaves - n_nodes
211
+ counts = dendrogram[leaves_indices, 3]
212
+
213
+ return new_dendrogram, counts.astype(int)
214
+ else:
215
+ return new_dendrogram
216
+
217
+
218
+ def get_index(tree):
219
+ """Reindex a dendrogram from the leaves
220
+
221
+ Parameters
222
+ ----------
223
+ tree :
224
+ The tree to be indexed
225
+
226
+ Returns
227
+ -------
228
+ index :
229
+ The index of the root of the given tree
230
+ """
231
+ if type(tree) != list:
232
+ return tree
233
+ else:
234
+ return np.max([get_index(t) for t in tree])
235
+
236
+
237
+ def get_dendrogram(tree, dendrogram=None, index=None, depth=0, size=None, copy_tree=False):
238
+ """Get dendrogram from tree.
239
+
240
+ Parameters
241
+ ----------
242
+ tree :
243
+ The initial tree
244
+ dendrogram :
245
+ Intermediary dendrogram for recursive use
246
+ index :
247
+ Intermediary index for recursive use
248
+ depth :
249
+ Current depth for recursive use
250
+ size :
251
+ Current leaf count for recursive use
252
+ copy_tree :
253
+ If ``True``, ensure the passed tree remains unchanged.
254
+
255
+ Returns
256
+ -------
257
+ dendrogram`:
258
+ The reordered dendrogram
259
+ index :
260
+ The indexing array
261
+ """
262
+ if copy_tree:
263
+ return get_dendrogram(copy.deepcopy(tree))
264
+ else:
265
+ if dendrogram is None:
266
+ dendrogram = []
267
+ if index is None:
268
+ index = get_index(tree)
269
+ if size is None:
270
+ size = defaultdict(lambda: 1)
271
+ if len(tree) > 1:
272
+ lengths = np.array([len(t) for t in tree])
273
+ if np.max(lengths) == 1:
274
+ # merge all
275
+ i = tree.pop()[0]
276
+ j = tree.pop()[0]
277
+ s = size[i] + size[j]
278
+ dendrogram.append([i, j, float(-depth), s])
279
+ index += 1
280
+ while len(tree):
281
+ s += 1
282
+ dendrogram.append([index, tree.pop()[0], float(-depth), s])
283
+ index += 1
284
+ size[index] = s
285
+ tree.append(index)
286
+ return dendrogram, index
287
+ else:
288
+ i = np.argwhere(lengths > 1).ravel()[0]
289
+ dendrogram_, index_ = get_dendrogram(tree[i], None, index, depth + 1, size)
290
+ dendrogram += dendrogram_
291
+ return get_dendrogram(tree, dendrogram, index_, depth, size)
292
+ else:
293
+ return dendrogram, index
294
+
295
+
296
+ def split_dendrogram(dendrogram: np.ndarray, shape: tuple):
297
+ """Split the dendrogram of a bipartite graph into 2 dendrograms, one for each part.
298
+
299
+ Parameters
300
+ ----------
301
+ dendrogram :
302
+ Dendrogram of the bipartite graph.
303
+ shape :
304
+ Shape of the biadjacency matrix.
305
+ Returns
306
+ -------
307
+ dendrogram_row :
308
+ Dendrogram for the rows.
309
+ dendrogram_col :
310
+ Dendrogram for the columns.
311
+ """
312
+ n1, n2 = shape
313
+ dendrogram_row = []
314
+ dendrogram_col = []
315
+ id_row_new = n1
316
+ id_col_new = n2
317
+ size_row = {i: 1 for i in range(n1)}
318
+ size_col = {i + n1: 1 for i in range(n2)}
319
+ id_row = {i: i for i in range(n1)}
320
+ id_col = {i + n1: i for i in range(n2)}
321
+
322
+ for t in range(n1 + n2 - 1):
323
+ i = dendrogram[t, 0]
324
+ j = dendrogram[t, 1]
325
+
326
+ if i in id_row and j in id_row:
327
+ size_row[n1 + n2 + t] = size_row.pop(i) + size_row.pop(j)
328
+ id_row[n1 + n2 + t] = id_row_new
329
+ dendrogram_row.append([id_row.pop(i), id_row.pop(j), dendrogram[t, 2], size_row[n1 + n2 + t]])
330
+ id_row_new += 1
331
+ elif i in id_row:
332
+ size_row[n1 + n2 + t] = size_row.pop(i)
333
+ id_row[n1 + n2 + t] = id_row.pop(i)
334
+ elif j in id_row:
335
+ size_row[n1 + n2 + t] = size_row.pop(j)
336
+ id_row[n1 + n2 + t] = id_row.pop(j)
337
+
338
+ if i in id_col and j in id_col:
339
+ size_col[n1 + n2 + t] = size_col.pop(i) + size_col.pop(j)
340
+ id_col[n1 + n2 + t] = id_col_new
341
+ dendrogram_col.append([id_col.pop(i), id_col.pop(j), dendrogram[t, 2], size_col[n1 + n2 + t]])
342
+ id_col_new += 1
343
+ elif i in id_col:
344
+ size_col[n1 + n2 + t] = size_col.pop(i)
345
+ id_col[n1 + n2 + t] = id_col.pop(i)
346
+ elif j in id_col:
347
+ size_col[n1 + n2 + t] = size_col.pop(j)
348
+ id_col[n1 + n2 + t] = id_col.pop(j)
349
+
350
+ return np.array(dendrogram_row), np.array(dendrogram_col)
@@ -0,0 +1 @@
1
+ """tests for hierarchy"""
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for hierarchy API"""
4
+ import unittest
5
+
6
+ from sknetwork.data.test_graphs import *
7
+ from sknetwork.hierarchy import *
8
+
9
+
10
+ class TestHierarchyAPI(unittest.TestCase):
11
+
12
+ def test_undirected(self):
13
+ adjacency = test_graph()
14
+ n = adjacency.shape[0]
15
+
16
+ for algo in [Paris(), LouvainIteration()]:
17
+ dendrogram = algo.fit_predict(adjacency)
18
+ self.assertTupleEqual(dendrogram.shape, (n - 1, 4))
19
+
20
+ def test_disconnected(self):
21
+ adjacency = test_disconnected_graph()
22
+ for algo in [Paris(), LouvainIteration()]:
23
+ dendrogram = algo.fit_transform(adjacency)
24
+ self.assertEqual(dendrogram.shape, (9, 4))