scikit-network 0.33.3__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (228) hide show
  1. scikit_network-0.33.3.dist-info/METADATA +122 -0
  2. scikit_network-0.33.3.dist-info/RECORD +228 -0
  3. scikit_network-0.33.3.dist-info/WHEEL +5 -0
  4. scikit_network-0.33.3.dist-info/licenses/AUTHORS.rst +43 -0
  5. scikit_network-0.33.3.dist-info/licenses/LICENSE +34 -0
  6. scikit_network-0.33.3.dist-info/top_level.txt +1 -0
  7. sknetwork/__init__.py +21 -0
  8. sknetwork/base.py +67 -0
  9. sknetwork/classification/__init__.py +8 -0
  10. sknetwork/classification/base.py +142 -0
  11. sknetwork/classification/base_rank.py +133 -0
  12. sknetwork/classification/diffusion.py +134 -0
  13. sknetwork/classification/knn.py +139 -0
  14. sknetwork/classification/metrics.py +205 -0
  15. sknetwork/classification/pagerank.py +66 -0
  16. sknetwork/classification/propagation.py +152 -0
  17. sknetwork/classification/tests/__init__.py +1 -0
  18. sknetwork/classification/tests/test_API.py +30 -0
  19. sknetwork/classification/tests/test_diffusion.py +77 -0
  20. sknetwork/classification/tests/test_knn.py +23 -0
  21. sknetwork/classification/tests/test_metrics.py +53 -0
  22. sknetwork/classification/tests/test_pagerank.py +20 -0
  23. sknetwork/classification/tests/test_propagation.py +24 -0
  24. sknetwork/classification/vote.cp313-win_amd64.pyd +0 -0
  25. sknetwork/classification/vote.cpp +27584 -0
  26. sknetwork/classification/vote.pyx +56 -0
  27. sknetwork/clustering/__init__.py +8 -0
  28. sknetwork/clustering/base.py +172 -0
  29. sknetwork/clustering/kcenters.py +253 -0
  30. sknetwork/clustering/leiden.py +242 -0
  31. sknetwork/clustering/leiden_core.cp313-win_amd64.pyd +0 -0
  32. sknetwork/clustering/leiden_core.cpp +31575 -0
  33. sknetwork/clustering/leiden_core.pyx +124 -0
  34. sknetwork/clustering/louvain.py +286 -0
  35. sknetwork/clustering/louvain_core.cp313-win_amd64.pyd +0 -0
  36. sknetwork/clustering/louvain_core.cpp +31220 -0
  37. sknetwork/clustering/louvain_core.pyx +124 -0
  38. sknetwork/clustering/metrics.py +91 -0
  39. sknetwork/clustering/postprocess.py +66 -0
  40. sknetwork/clustering/propagation_clustering.py +104 -0
  41. sknetwork/clustering/tests/__init__.py +1 -0
  42. sknetwork/clustering/tests/test_API.py +38 -0
  43. sknetwork/clustering/tests/test_kcenters.py +60 -0
  44. sknetwork/clustering/tests/test_leiden.py +34 -0
  45. sknetwork/clustering/tests/test_louvain.py +135 -0
  46. sknetwork/clustering/tests/test_metrics.py +50 -0
  47. sknetwork/clustering/tests/test_postprocess.py +39 -0
  48. sknetwork/data/__init__.py +6 -0
  49. sknetwork/data/base.py +33 -0
  50. sknetwork/data/load.py +406 -0
  51. sknetwork/data/models.py +459 -0
  52. sknetwork/data/parse.py +644 -0
  53. sknetwork/data/test_graphs.py +84 -0
  54. sknetwork/data/tests/__init__.py +1 -0
  55. sknetwork/data/tests/test_API.py +30 -0
  56. sknetwork/data/tests/test_base.py +14 -0
  57. sknetwork/data/tests/test_load.py +95 -0
  58. sknetwork/data/tests/test_models.py +52 -0
  59. sknetwork/data/tests/test_parse.py +250 -0
  60. sknetwork/data/tests/test_test_graphs.py +29 -0
  61. sknetwork/data/tests/test_toy_graphs.py +68 -0
  62. sknetwork/data/timeout.py +38 -0
  63. sknetwork/data/toy_graphs.py +611 -0
  64. sknetwork/embedding/__init__.py +8 -0
  65. sknetwork/embedding/base.py +94 -0
  66. sknetwork/embedding/force_atlas.py +198 -0
  67. sknetwork/embedding/louvain_embedding.py +148 -0
  68. sknetwork/embedding/random_projection.py +135 -0
  69. sknetwork/embedding/spectral.py +141 -0
  70. sknetwork/embedding/spring.py +198 -0
  71. sknetwork/embedding/svd.py +359 -0
  72. sknetwork/embedding/tests/__init__.py +1 -0
  73. sknetwork/embedding/tests/test_API.py +49 -0
  74. sknetwork/embedding/tests/test_force_atlas.py +35 -0
  75. sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
  76. sknetwork/embedding/tests/test_random_projection.py +28 -0
  77. sknetwork/embedding/tests/test_spectral.py +81 -0
  78. sknetwork/embedding/tests/test_spring.py +50 -0
  79. sknetwork/embedding/tests/test_svd.py +43 -0
  80. sknetwork/gnn/__init__.py +10 -0
  81. sknetwork/gnn/activation.py +117 -0
  82. sknetwork/gnn/base.py +181 -0
  83. sknetwork/gnn/base_activation.py +90 -0
  84. sknetwork/gnn/base_layer.py +109 -0
  85. sknetwork/gnn/gnn_classifier.py +305 -0
  86. sknetwork/gnn/layer.py +153 -0
  87. sknetwork/gnn/loss.py +180 -0
  88. sknetwork/gnn/neighbor_sampler.py +65 -0
  89. sknetwork/gnn/optimizer.py +164 -0
  90. sknetwork/gnn/tests/__init__.py +1 -0
  91. sknetwork/gnn/tests/test_activation.py +56 -0
  92. sknetwork/gnn/tests/test_base.py +75 -0
  93. sknetwork/gnn/tests/test_base_layer.py +37 -0
  94. sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
  95. sknetwork/gnn/tests/test_layers.py +80 -0
  96. sknetwork/gnn/tests/test_loss.py +33 -0
  97. sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
  98. sknetwork/gnn/tests/test_optimizer.py +43 -0
  99. sknetwork/gnn/tests/test_utils.py +41 -0
  100. sknetwork/gnn/utils.py +127 -0
  101. sknetwork/hierarchy/__init__.py +6 -0
  102. sknetwork/hierarchy/base.py +96 -0
  103. sknetwork/hierarchy/louvain_hierarchy.py +272 -0
  104. sknetwork/hierarchy/metrics.py +234 -0
  105. sknetwork/hierarchy/paris.cp313-win_amd64.pyd +0 -0
  106. sknetwork/hierarchy/paris.cpp +37868 -0
  107. sknetwork/hierarchy/paris.pyx +316 -0
  108. sknetwork/hierarchy/postprocess.py +350 -0
  109. sknetwork/hierarchy/tests/__init__.py +1 -0
  110. sknetwork/hierarchy/tests/test_API.py +24 -0
  111. sknetwork/hierarchy/tests/test_algos.py +34 -0
  112. sknetwork/hierarchy/tests/test_metrics.py +62 -0
  113. sknetwork/hierarchy/tests/test_postprocess.py +57 -0
  114. sknetwork/linalg/__init__.py +9 -0
  115. sknetwork/linalg/basics.py +37 -0
  116. sknetwork/linalg/diteration.cp313-win_amd64.pyd +0 -0
  117. sknetwork/linalg/diteration.cpp +27400 -0
  118. sknetwork/linalg/diteration.pyx +47 -0
  119. sknetwork/linalg/eig_solver.py +93 -0
  120. sknetwork/linalg/laplacian.py +15 -0
  121. sknetwork/linalg/normalizer.py +86 -0
  122. sknetwork/linalg/operators.py +225 -0
  123. sknetwork/linalg/polynome.py +76 -0
  124. sknetwork/linalg/ppr_solver.py +170 -0
  125. sknetwork/linalg/push.cp313-win_amd64.pyd +0 -0
  126. sknetwork/linalg/push.cpp +31072 -0
  127. sknetwork/linalg/push.pyx +71 -0
  128. sknetwork/linalg/sparse_lowrank.py +142 -0
  129. sknetwork/linalg/svd_solver.py +91 -0
  130. sknetwork/linalg/tests/__init__.py +1 -0
  131. sknetwork/linalg/tests/test_eig.py +44 -0
  132. sknetwork/linalg/tests/test_laplacian.py +18 -0
  133. sknetwork/linalg/tests/test_normalization.py +34 -0
  134. sknetwork/linalg/tests/test_operators.py +66 -0
  135. sknetwork/linalg/tests/test_polynome.py +38 -0
  136. sknetwork/linalg/tests/test_ppr.py +50 -0
  137. sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
  138. sknetwork/linalg/tests/test_svd.py +38 -0
  139. sknetwork/linkpred/__init__.py +2 -0
  140. sknetwork/linkpred/base.py +46 -0
  141. sknetwork/linkpred/nn.py +126 -0
  142. sknetwork/linkpred/tests/__init__.py +1 -0
  143. sknetwork/linkpred/tests/test_nn.py +27 -0
  144. sknetwork/log.py +19 -0
  145. sknetwork/path/__init__.py +5 -0
  146. sknetwork/path/dag.py +54 -0
  147. sknetwork/path/distances.py +98 -0
  148. sknetwork/path/search.py +31 -0
  149. sknetwork/path/shortest_path.py +61 -0
  150. sknetwork/path/tests/__init__.py +1 -0
  151. sknetwork/path/tests/test_dag.py +37 -0
  152. sknetwork/path/tests/test_distances.py +62 -0
  153. sknetwork/path/tests/test_search.py +40 -0
  154. sknetwork/path/tests/test_shortest_path.py +40 -0
  155. sknetwork/ranking/__init__.py +8 -0
  156. sknetwork/ranking/base.py +61 -0
  157. sknetwork/ranking/betweenness.cp313-win_amd64.pyd +0 -0
  158. sknetwork/ranking/betweenness.cpp +9707 -0
  159. sknetwork/ranking/betweenness.pyx +97 -0
  160. sknetwork/ranking/closeness.py +92 -0
  161. sknetwork/ranking/hits.py +94 -0
  162. sknetwork/ranking/katz.py +83 -0
  163. sknetwork/ranking/pagerank.py +110 -0
  164. sknetwork/ranking/postprocess.py +37 -0
  165. sknetwork/ranking/tests/__init__.py +1 -0
  166. sknetwork/ranking/tests/test_API.py +32 -0
  167. sknetwork/ranking/tests/test_betweenness.py +38 -0
  168. sknetwork/ranking/tests/test_closeness.py +30 -0
  169. sknetwork/ranking/tests/test_hits.py +20 -0
  170. sknetwork/ranking/tests/test_pagerank.py +62 -0
  171. sknetwork/ranking/tests/test_postprocess.py +26 -0
  172. sknetwork/regression/__init__.py +4 -0
  173. sknetwork/regression/base.py +61 -0
  174. sknetwork/regression/diffusion.py +210 -0
  175. sknetwork/regression/tests/__init__.py +1 -0
  176. sknetwork/regression/tests/test_API.py +32 -0
  177. sknetwork/regression/tests/test_diffusion.py +56 -0
  178. sknetwork/sknetwork.py +3 -0
  179. sknetwork/test_base.py +35 -0
  180. sknetwork/test_log.py +15 -0
  181. sknetwork/topology/__init__.py +8 -0
  182. sknetwork/topology/cliques.cp313-win_amd64.pyd +0 -0
  183. sknetwork/topology/cliques.cpp +32565 -0
  184. sknetwork/topology/cliques.pyx +149 -0
  185. sknetwork/topology/core.cp313-win_amd64.pyd +0 -0
  186. sknetwork/topology/core.cpp +30651 -0
  187. sknetwork/topology/core.pyx +90 -0
  188. sknetwork/topology/cycles.py +243 -0
  189. sknetwork/topology/minheap.cp313-win_amd64.pyd +0 -0
  190. sknetwork/topology/minheap.cpp +27332 -0
  191. sknetwork/topology/minheap.pxd +20 -0
  192. sknetwork/topology/minheap.pyx +109 -0
  193. sknetwork/topology/structure.py +194 -0
  194. sknetwork/topology/tests/__init__.py +1 -0
  195. sknetwork/topology/tests/test_cliques.py +28 -0
  196. sknetwork/topology/tests/test_core.py +19 -0
  197. sknetwork/topology/tests/test_cycles.py +65 -0
  198. sknetwork/topology/tests/test_structure.py +85 -0
  199. sknetwork/topology/tests/test_triangles.py +38 -0
  200. sknetwork/topology/tests/test_wl.py +72 -0
  201. sknetwork/topology/triangles.cp313-win_amd64.pyd +0 -0
  202. sknetwork/topology/triangles.cpp +8894 -0
  203. sknetwork/topology/triangles.pyx +151 -0
  204. sknetwork/topology/weisfeiler_lehman.py +133 -0
  205. sknetwork/topology/weisfeiler_lehman_core.cp313-win_amd64.pyd +0 -0
  206. sknetwork/topology/weisfeiler_lehman_core.cpp +27635 -0
  207. sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
  208. sknetwork/utils/__init__.py +7 -0
  209. sknetwork/utils/check.py +355 -0
  210. sknetwork/utils/format.py +221 -0
  211. sknetwork/utils/membership.py +82 -0
  212. sknetwork/utils/neighbors.py +115 -0
  213. sknetwork/utils/tests/__init__.py +1 -0
  214. sknetwork/utils/tests/test_check.py +190 -0
  215. sknetwork/utils/tests/test_format.py +63 -0
  216. sknetwork/utils/tests/test_membership.py +24 -0
  217. sknetwork/utils/tests/test_neighbors.py +41 -0
  218. sknetwork/utils/tests/test_tfidf.py +18 -0
  219. sknetwork/utils/tests/test_values.py +66 -0
  220. sknetwork/utils/tfidf.py +37 -0
  221. sknetwork/utils/values.py +76 -0
  222. sknetwork/visualization/__init__.py +4 -0
  223. sknetwork/visualization/colors.py +34 -0
  224. sknetwork/visualization/dendrograms.py +277 -0
  225. sknetwork/visualization/graphs.py +1039 -0
  226. sknetwork/visualization/tests/__init__.py +1 -0
  227. sknetwork/visualization/tests/test_dendrograms.py +53 -0
  228. sknetwork/visualization/tests/test_graphs.py +176 -0
@@ -0,0 +1,272 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in March 2020
5
+ @author: Quentin Lutz <qlutz@enst.fr>
6
+ @author: Thomas Bonald <tbonald@enst.fr>
7
+ """
8
+ from typing import Optional, Union
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.clustering.louvain import Louvain
14
+ from sknetwork.hierarchy.base import BaseHierarchy
15
+ from sknetwork.hierarchy.postprocess import get_dendrogram, reorder_dendrogram
16
+ from sknetwork.utils.check import check_format
17
+ from sknetwork.utils.format import get_adjacency
18
+
19
+
20
+ class LouvainIteration(BaseHierarchy):
21
+ """Hierarchical clustering by successive instances of Louvain (top-down).
22
+
23
+ Parameters
24
+ ----------
25
+ depth : int
26
+ Depth of the tree.
27
+ A negative value is interpreted as no limit (return a tree of maximum depth).
28
+ resolution : float
29
+ Resolution parameter.
30
+ tol_optimization : float
31
+ Minimum increase in the objective function to enter a new optimization pass.
32
+ tol_aggregation : float
33
+ Minimum increase in the objective function to enter a new aggregation pass.
34
+ n_aggregations : int
35
+ Maximum number of aggregations.
36
+ A negative value is interpreted as no limit.
37
+ shuffle_nodes : bool
38
+ If ``True``, shuffle nodes before optimization.
39
+ random_state : int
40
+ Random number generator or random seed. If ``None``, numpy.random is used.
41
+ verbose : bool
42
+ Verbose mode.
43
+
44
+ Attributes
45
+ ----------
46
+ dendrogram_ : np.ndarray
47
+ Dendrogram of the graph.
48
+ dendrogram_row_ : np.ndarray
49
+ Dendrogram for the rows, for bipartite graphs.
50
+ dendrogram_col_ : np.ndarray
51
+ Dendrogram for the columns, for bipartite graphs.
52
+ dendrogram_full_ : np.ndarray
53
+ Dendrogram for both rows and columns, indexed in this order, for bipartite graphs.
54
+
55
+ Example
56
+ -------
57
+ >>> from sknetwork.hierarchy import LouvainIteration
58
+ >>> from sknetwork.data import house
59
+ >>> louvain = LouvainIteration()
60
+ >>> adjacency = house()
61
+ >>> louvain.fit_predict(adjacency)
62
+ array([[3., 2., 1., 2.],
63
+ [4., 1., 1., 2.],
64
+ [6., 0., 1., 3.],
65
+ [5., 7., 2., 5.]])
66
+
67
+ Notes
68
+ -----
69
+ Each row of the dendrogram = merge nodes, distance, size of cluster.
70
+
71
+ See Also
72
+ --------
73
+ scipy.cluster.hierarchy.dendrogram
74
+ sknetwork.clustering.Louvain
75
+ """
76
+
77
+ def __init__(self, depth: int = 3, resolution: float = 1, tol_optimization: float = 1e-3,
78
+ tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
79
+ random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False):
80
+ super(LouvainIteration, self).__init__()
81
+
82
+ self.dendrogram_ = None
83
+ self.depth = depth
84
+ self._clustering_method = Louvain(resolution=resolution, tol_optimization=tol_optimization,
85
+ tol_aggregation=tol_aggregation, n_aggregations=n_aggregations,
86
+ shuffle_nodes=shuffle_nodes, random_state=random_state, verbose=verbose)
87
+ self.bipartite = None
88
+
89
+ def _recursive_louvain(self, adjacency: Union[sparse.csr_matrix, np.ndarray], depth: int,
90
+ nodes: Optional[np.ndarray] = None):
91
+ """Recursive function for fit.
92
+
93
+ Parameters
94
+ ----------
95
+ adjacency : sparse.csr_matrix, np.ndarray
96
+ Adjacency matrix of the graph.
97
+ depth : int
98
+ Depth of the recursion.
99
+ nodes : np.ndarray
100
+ The indices of the current nodes in the original graph.
101
+
102
+ Returns
103
+ -------
104
+ tree: recursive list of list of nodes.
105
+ """
106
+ n = adjacency.shape[0]
107
+ if nodes is None:
108
+ nodes = np.arange(n)
109
+
110
+ if adjacency.nnz and depth:
111
+ labels = self._clustering_method.fit_predict(adjacency)
112
+ else:
113
+ labels = np.zeros(n)
114
+
115
+ clusters = np.unique(labels)
116
+
117
+ tree = []
118
+ if len(clusters) == 1:
119
+ if len(nodes) > 1:
120
+ return [[node] for node in nodes]
121
+ else:
122
+ return [nodes[0]]
123
+ else:
124
+ for cluster in clusters:
125
+ mask = (labels == cluster)
126
+ nodes_cluster = nodes[mask]
127
+ adjacency_cluster = adjacency[mask, :][:, mask]
128
+ tree.append(self._recursive_louvain(adjacency_cluster, depth - 1, nodes_cluster))
129
+ return tree
130
+
131
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) \
132
+ -> 'LouvainIteration':
133
+ """Fit algorithm to data.
134
+
135
+ Parameters
136
+ ----------
137
+ input_matrix : sparse.csr_matrix, np.ndarray
138
+ Adjacency matrix or biadjacency matrix of the graph.
139
+ force_bipartite :
140
+ If ``True``, force the input matrix to be considered as a biadjacency matrix.
141
+
142
+ Returns
143
+ -------
144
+ self: :class:`LouvainIteration`
145
+ """
146
+ self._init_vars()
147
+ adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)
148
+ tree = self._recursive_louvain(adjacency, self.depth)
149
+ dendrogram, _ = get_dendrogram(tree)
150
+ dendrogram = np.array(dendrogram)
151
+ dendrogram[:, 2] += 1 - min(dendrogram[:, 2])
152
+ self.dendrogram_ = reorder_dendrogram(dendrogram)
153
+ if self.bipartite:
154
+ self._split_vars(input_matrix.shape)
155
+ return self
156
+
157
+
158
+ class LouvainHierarchy(BaseHierarchy):
159
+ """Hierarchical clustering by Louvain (bottom-up).
160
+
161
+ Each level corresponds to an aggregation step of the Louvain algorithm.
162
+
163
+ Parameters
164
+ ----------
165
+ resolution : float
166
+ Resolution parameter.
167
+ tol_optimization : float
168
+ Minimum increase in the objective function to enter a new optimization pass.
169
+ tol_aggregation : float
170
+ Minimum increase in the objective function to enter a new aggregation pass.
171
+ shuffle_nodes : bool
172
+ If ``True``, shuffle nodes before optimization.
173
+ random_state : int
174
+ Random number generator or random seed. If ``None``, numpy.random is used.
175
+ verbose : bool
176
+ Verbose mode.
177
+
178
+ Attributes
179
+ ----------
180
+ dendrogram_ : np.ndarray
181
+ Dendrogram of the graph.
182
+ dendrogram_row_ : np.ndarray
183
+ Dendrogram for the rows, for bipartite graphs.
184
+ dendrogram_col_ : np.ndarray
185
+ Dendrogram for the columns, for bipartite graphs.
186
+ dendrogram_full_ : np.ndarray
187
+ Dendrogram for both rows and columns, indexed in this order, for bipartite graphs.
188
+
189
+ Example
190
+ -------
191
+ >>> from sknetwork.hierarchy import LouvainHierarchy
192
+ >>> from sknetwork.data import house
193
+ >>> louvain = LouvainHierarchy()
194
+ >>> adjacency = house()
195
+ >>> louvain.fit_predict(adjacency)
196
+ array([[3., 2., 1., 2.],
197
+ [4., 1., 1., 2.],
198
+ [6., 0., 1., 3.],
199
+ [5., 7., 2., 5.]])
200
+
201
+ Notes
202
+ -----
203
+ Each row of the dendrogram = merge nodes, distance, size of cluster.
204
+
205
+ See Also
206
+ --------
207
+ scipy.cluster.hierarchy.dendrogram
208
+ sknetwork.clustering.Louvain
209
+ """
210
+
211
+ def __init__(self, resolution: float = 1, tol_optimization: float = 1e-3,
212
+ tol_aggregation: float = 1e-3, shuffle_nodes: bool = False,
213
+ random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False):
214
+ super(LouvainHierarchy, self).__init__()
215
+
216
+ self.dendrogram_ = None
217
+ self._clustering_method = Louvain(resolution=resolution, tol_optimization=tol_optimization,
218
+ tol_aggregation=tol_aggregation, n_aggregations=1,
219
+ shuffle_nodes=shuffle_nodes, random_state=random_state, verbose=verbose)
220
+ self.bipartite = None
221
+
222
+ def _get_hierarchy(self, adjacency: Union[sparse.csr_matrix, np.ndarray]):
223
+ """Get the hierarchy from Louvain.
224
+
225
+ Parameters
226
+ ----------
227
+ adjacency : sparse.csr_matrix, np.ndarray
228
+ Adjacency matrix of the graph.
229
+
230
+ Returns
231
+ -------
232
+ tree: recursive list of list of nodes
233
+ """
234
+ tree = [[node] for node in range(adjacency.shape[0])]
235
+ labels = self._clustering_method.fit_predict(adjacency)
236
+ labels_unique = np.unique(labels)
237
+ while 1:
238
+ tree = [[tree[node] for node in np.flatnonzero(labels == label)] for label in labels_unique]
239
+ tree = [cluster[0] if len(cluster) == 1 else cluster for cluster in tree]
240
+ aggregate = self._clustering_method.aggregate_
241
+ labels = self._clustering_method.fit_predict(aggregate)
242
+ if len(labels_unique) == len(np.unique(labels)):
243
+ break
244
+ else:
245
+ labels_unique = np.unique(labels)
246
+ return tree
247
+
248
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) \
249
+ -> 'LouvainHierarchy':
250
+ """Fit algorithm to data.
251
+
252
+ Parameters
253
+ ----------
254
+ input_matrix : sparse.csr_matrix, np.ndarray
255
+ Adjacency matrix or biadjacency matrix of the graph.
256
+ force_bipartite :
257
+ If ``True``, force the input matrix to be considered as a biadjacency matrix.
258
+
259
+ Returns
260
+ -------
261
+ self: :class:`LouvainHierarchy`
262
+ """
263
+ self._init_vars()
264
+ adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)
265
+ tree = self._get_hierarchy(adjacency)
266
+ dendrogram, _ = get_dendrogram(tree)
267
+ dendrogram = np.array(dendrogram)
268
+ dendrogram[:, 2] += 1 - min(dendrogram[:, 2])
269
+ self.dendrogram_ = reorder_dendrogram(dendrogram)
270
+ if self.bipartite:
271
+ self._split_vars(input_matrix.shape)
272
+ return self
@@ -0,0 +1,234 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on March 2019
5
+ @author: Thomas Bonald <bonald@enst.fr>
6
+ """
7
+ import numpy as np
8
+ from scipy import sparse
9
+ from sknetwork.hierarchy.paris import AggregateGraph
10
+
11
+ from sknetwork.utils.check import check_format, get_probs, check_square
12
+ from sknetwork.utils.check import check_min_size, check_min_nnz
13
+ from sknetwork.utils.format import directed2undirected
14
+
15
+
16
+ def _instantiate_vars(adjacency: sparse.csr_matrix, weights: str = 'uniform'):
17
+ """Initialize standard variables for metrics."""
18
+ weights_row = get_probs(weights, adjacency)
19
+ weights_col = get_probs(weights, adjacency.T)
20
+ sym_adjacency = directed2undirected(adjacency)
21
+ aggregate_graph = AggregateGraph(weights_row, weights_col, sym_adjacency.data.astype(float),
22
+ sym_adjacency.indices, sym_adjacency.indptr)
23
+ return aggregate_graph, weights_row, weights_col
24
+
25
+
26
+ def get_sampling_distributions(adjacency: sparse.csr_matrix, dendrogram: np.ndarray, weights: str = 'uniform'):
27
+ """Get sampling distributions over each internal node of the tree.
28
+ Parameters
29
+ ----------
30
+ adjacency :
31
+ Adjacency matrix of the graph.
32
+ dendrogram :
33
+ Dendrogram.
34
+ weights :
35
+ Weights of nodes.
36
+ ``'degree'`` or ``'uniform'`` (default).
37
+ Returns
38
+ -------
39
+ edge_sampling: np.ndarray
40
+ Edge sampling distribution.
41
+ node_sampling: np.ndarray
42
+ Node sampling distribution.
43
+ cluster_weights: np.ndarray
44
+ Cluster weights.
45
+ """
46
+ n = adjacency.shape[0]
47
+ aggregate_graph, weights_row, weights_col = _instantiate_vars(adjacency, weights)
48
+ cluster_weight = np.zeros(n-1)
49
+ edge_sampling = np.zeros(n-1)
50
+ node_sampling = np.zeros(n-1)
51
+
52
+ for t in range(n - 1):
53
+ i = int(dendrogram[t][0])
54
+ j = int(dendrogram[t][1])
55
+ if j in aggregate_graph.neighbors[i]:
56
+ edge_sampling[t] += 2 * aggregate_graph.neighbors[i][j]
57
+ node_sampling[t] += aggregate_graph.cluster_out_weights[i] * aggregate_graph.cluster_in_weights[j] + \
58
+ aggregate_graph.cluster_out_weights[j] * aggregate_graph.cluster_in_weights[i]
59
+ cluster_weight[t] = aggregate_graph.cluster_out_weights[i] + aggregate_graph.cluster_out_weights[j] + \
60
+ aggregate_graph.cluster_in_weights[i] + aggregate_graph.cluster_in_weights[j]
61
+ for node in {i, j}:
62
+ if node < n:
63
+ # self-loop
64
+ node_sampling[t] += aggregate_graph.cluster_out_weights[node] * aggregate_graph.cluster_in_weights[node]
65
+ if node in aggregate_graph.neighbors[node]:
66
+ edge_sampling[t] += aggregate_graph.neighbors[node][node]
67
+ aggregate_graph.merge(i, j)
68
+ return edge_sampling, node_sampling, cluster_weight / 2
69
+
70
+
71
+ def dasgupta_cost(adjacency: sparse.csr_matrix, dendrogram: np.ndarray, weights: str = 'uniform',
72
+ normalized: bool = False) -> float:
73
+ """Dasgupta's cost of a hierarchy.
74
+
75
+ Expected size (weights = ``'uniform'``) or expected volume (weights = ``'degree'``) of the cluster induced by
76
+ random edge sampling (closest ancestor of the two nodes in the hierarchy).
77
+
78
+ Parameters
79
+ ----------
80
+ adjacency :
81
+ Adjacency matrix of the graph.
82
+ dendrogram :
83
+ Dendrogram.
84
+ weights :
85
+ Weights of nodes.
86
+ ``'degree'`` or ``'uniform'`` (default).
87
+ normalized :
88
+ If ``True``, normalized cost (between 0 and 1).
89
+
90
+ Returns
91
+ -------
92
+ cost : float
93
+ Cost.
94
+
95
+ Example
96
+ -------
97
+ >>> from sknetwork.hierarchy import dasgupta_score, Paris
98
+ >>> from sknetwork.data import house
99
+ >>> paris = Paris()
100
+ >>> adjacency = house()
101
+ >>> dendrogram = paris.fit_transform(adjacency)
102
+ >>> cost = dasgupta_cost(adjacency, dendrogram)
103
+ >>> float(np.round(cost, 2))
104
+ 3.33
105
+
106
+ References
107
+ ----------
108
+ Dasgupta, S. (2016). A cost function for similarity-based hierarchical clustering.
109
+ Proceedings of ACM symposium on Theory of Computing.
110
+ """
111
+ adjacency = check_format(adjacency)
112
+ check_square(adjacency)
113
+
114
+ n = adjacency.shape[0]
115
+ check_min_size(n, 2)
116
+
117
+ edge_sampling, _, cluster_weight = get_sampling_distributions(adjacency, dendrogram, weights)
118
+ cost = edge_sampling.dot(cluster_weight)
119
+
120
+ if not normalized:
121
+ if weights == 'degree':
122
+ cost *= adjacency.data.sum()
123
+ else:
124
+ cost *= n
125
+
126
+ return cost
127
+
128
+
129
+ def dasgupta_score(adjacency: sparse.csr_matrix, dendrogram: np.ndarray, weights: str = 'uniform') -> float:
130
+ """Dasgupta's score of a hierarchy (quality metric, between 0 and 1).
131
+
132
+ Defined as 1 - normalized Dasgupta's cost.
133
+
134
+ Parameters
135
+ ----------
136
+ adjacency :
137
+ Adjacency matrix of the graph.
138
+ dendrogram :
139
+ Dendrogram.
140
+ weights :
141
+ Weights of nodes.
142
+ ``'degree'`` or ``'uniform'`` (default).
143
+
144
+ Returns
145
+ -------
146
+ score : float
147
+ Score.
148
+
149
+ Example
150
+ -------
151
+ >>> from sknetwork.hierarchy import dasgupta_score, Paris
152
+ >>> from sknetwork.data import house
153
+ >>> paris = Paris()
154
+ >>> adjacency = house()
155
+ >>> dendrogram = paris.fit_transform(adjacency)
156
+ >>> score = dasgupta_score(adjacency, dendrogram)
157
+ >>> float(np.round(score, 2))
158
+ 0.33
159
+
160
+ References
161
+ ----------
162
+ Dasgupta, S. (2016). A cost function for similarity-based hierarchical clustering.
163
+ Proceedings of ACM symposium on Theory of Computing.
164
+ """
165
+ return 1 - dasgupta_cost(adjacency, dendrogram, weights, normalized=True)
166
+
167
+
168
+ def tree_sampling_divergence(adjacency: sparse.csr_matrix, dendrogram: np.ndarray, weights: str = 'degree',
169
+ normalized: bool = True) -> float:
170
+ """Tree sampling divergence of a hierarchy (quality metric).
171
+
172
+ Parameters
173
+ ----------
174
+ adjacency :
175
+ Adjacency matrix of the graph.
176
+ dendrogram :
177
+ Dendrogram.
178
+ weights :
179
+ Weights of nodes.
180
+ ``'degree'`` (default) or ``'uniform'``.
181
+ normalized :
182
+ If ``True``, normalized score (between 0 and 1).
183
+
184
+ Returns
185
+ -------
186
+ score : float
187
+ Score.
188
+
189
+ Example
190
+ -------
191
+ >>> from sknetwork.hierarchy import tree_sampling_divergence, Paris
192
+ >>> from sknetwork.data import house
193
+ >>> paris = Paris()
194
+ >>> adjacency = house()
195
+ >>> dendrogram = paris.fit_transform(adjacency)
196
+ >>> score = tree_sampling_divergence(adjacency, dendrogram)
197
+ >>> float(np.round(score, 2))
198
+ 0.05
199
+
200
+ References
201
+ ----------
202
+ Charpentier, B. & Bonald, T. (2019).
203
+ `Tree Sampling Divergence: An Information-Theoretic Metric for
204
+ Hierarchical Graph Clustering.
205
+ <https://hal.telecom-paristech.fr/hal-02144394/document>`_
206
+ Proceedings of IJCAI.
207
+ """
208
+ adjacency = check_format(adjacency)
209
+ check_square(adjacency)
210
+ check_min_nnz(adjacency.nnz, 1)
211
+ adjacency = adjacency.astype(float)
212
+ n = adjacency.shape[0]
213
+ check_min_size(n, 2)
214
+
215
+ adjacency.data /= adjacency.data.sum()
216
+ edge_sampling, node_sampling, _ = get_sampling_distributions(adjacency, dendrogram, weights)
217
+
218
+ index = np.where(edge_sampling)[0]
219
+ score = edge_sampling[index].dot(np.log(edge_sampling[index] / node_sampling[index]))
220
+ if normalized:
221
+ weights_row = get_probs(weights, adjacency)
222
+ weights_col = get_probs(weights, adjacency.T)
223
+ inv_out_weights = sparse.diags(weights_row, shape=(n, n), format='csr')
224
+ inv_out_weights.data = 1 / inv_out_weights.data
225
+ inv_in_weights = sparse.diags(weights_col, shape=(n, n), format='csr')
226
+ inv_in_weights.data = 1 / inv_in_weights.data
227
+ sampling_ratio = inv_out_weights.dot(adjacency.dot(inv_in_weights))
228
+ inv_out_weights.data = np.ones(len(inv_out_weights.data))
229
+ inv_in_weights.data = np.ones(len(inv_in_weights.data))
230
+ edge_sampling = inv_out_weights.dot(adjacency.dot(inv_in_weights))
231
+ mutual_information = edge_sampling.data.dot(np.log(sampling_ratio.data))
232
+ if mutual_information > 0:
233
+ score /= mutual_information
234
+ return score