scikit-network 0.33.4__cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (229) hide show
  1. scikit_network-0.33.4.dist-info/METADATA +122 -0
  2. scikit_network-0.33.4.dist-info/RECORD +229 -0
  3. scikit_network-0.33.4.dist-info/WHEEL +6 -0
  4. scikit_network-0.33.4.dist-info/licenses/AUTHORS.rst +43 -0
  5. scikit_network-0.33.4.dist-info/licenses/LICENSE +34 -0
  6. scikit_network-0.33.4.dist-info/top_level.txt +1 -0
  7. scikit_network.libs/libgomp-a34b3233.so.1.0.0 +0 -0
  8. sknetwork/__init__.py +21 -0
  9. sknetwork/base.py +67 -0
  10. sknetwork/classification/__init__.py +8 -0
  11. sknetwork/classification/base.py +138 -0
  12. sknetwork/classification/base_rank.py +129 -0
  13. sknetwork/classification/diffusion.py +127 -0
  14. sknetwork/classification/knn.py +131 -0
  15. sknetwork/classification/metrics.py +205 -0
  16. sknetwork/classification/pagerank.py +58 -0
  17. sknetwork/classification/propagation.py +144 -0
  18. sknetwork/classification/tests/__init__.py +1 -0
  19. sknetwork/classification/tests/test_API.py +30 -0
  20. sknetwork/classification/tests/test_diffusion.py +77 -0
  21. sknetwork/classification/tests/test_knn.py +23 -0
  22. sknetwork/classification/tests/test_metrics.py +53 -0
  23. sknetwork/classification/tests/test_pagerank.py +20 -0
  24. sknetwork/classification/tests/test_propagation.py +24 -0
  25. sknetwork/classification/vote.cpp +27593 -0
  26. sknetwork/classification/vote.cpython-312-x86_64-linux-gnu.so +0 -0
  27. sknetwork/classification/vote.pyx +56 -0
  28. sknetwork/clustering/__init__.py +8 -0
  29. sknetwork/clustering/base.py +168 -0
  30. sknetwork/clustering/kcenters.py +251 -0
  31. sknetwork/clustering/leiden.py +238 -0
  32. sknetwork/clustering/leiden_core.cpp +31928 -0
  33. sknetwork/clustering/leiden_core.cpython-312-x86_64-linux-gnu.so +0 -0
  34. sknetwork/clustering/leiden_core.pyx +124 -0
  35. sknetwork/clustering/louvain.py +282 -0
  36. sknetwork/clustering/louvain_core.cpp +31573 -0
  37. sknetwork/clustering/louvain_core.cpython-312-x86_64-linux-gnu.so +0 -0
  38. sknetwork/clustering/louvain_core.pyx +124 -0
  39. sknetwork/clustering/metrics.py +91 -0
  40. sknetwork/clustering/postprocess.py +66 -0
  41. sknetwork/clustering/propagation_clustering.py +100 -0
  42. sknetwork/clustering/tests/__init__.py +1 -0
  43. sknetwork/clustering/tests/test_API.py +38 -0
  44. sknetwork/clustering/tests/test_kcenters.py +60 -0
  45. sknetwork/clustering/tests/test_leiden.py +34 -0
  46. sknetwork/clustering/tests/test_louvain.py +135 -0
  47. sknetwork/clustering/tests/test_metrics.py +50 -0
  48. sknetwork/clustering/tests/test_postprocess.py +39 -0
  49. sknetwork/data/__init__.py +6 -0
  50. sknetwork/data/base.py +33 -0
  51. sknetwork/data/load.py +292 -0
  52. sknetwork/data/models.py +459 -0
  53. sknetwork/data/parse.py +644 -0
  54. sknetwork/data/test_graphs.py +93 -0
  55. sknetwork/data/tests/__init__.py +1 -0
  56. sknetwork/data/tests/test_API.py +30 -0
  57. sknetwork/data/tests/test_base.py +14 -0
  58. sknetwork/data/tests/test_load.py +61 -0
  59. sknetwork/data/tests/test_models.py +52 -0
  60. sknetwork/data/tests/test_parse.py +250 -0
  61. sknetwork/data/tests/test_test_graphs.py +29 -0
  62. sknetwork/data/tests/test_toy_graphs.py +68 -0
  63. sknetwork/data/timeout.py +38 -0
  64. sknetwork/data/toy_graphs.py +611 -0
  65. sknetwork/embedding/__init__.py +8 -0
  66. sknetwork/embedding/base.py +90 -0
  67. sknetwork/embedding/force_atlas.py +198 -0
  68. sknetwork/embedding/louvain_embedding.py +142 -0
  69. sknetwork/embedding/random_projection.py +131 -0
  70. sknetwork/embedding/spectral.py +137 -0
  71. sknetwork/embedding/spring.py +198 -0
  72. sknetwork/embedding/svd.py +351 -0
  73. sknetwork/embedding/tests/__init__.py +1 -0
  74. sknetwork/embedding/tests/test_API.py +49 -0
  75. sknetwork/embedding/tests/test_force_atlas.py +35 -0
  76. sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
  77. sknetwork/embedding/tests/test_random_projection.py +28 -0
  78. sknetwork/embedding/tests/test_spectral.py +81 -0
  79. sknetwork/embedding/tests/test_spring.py +50 -0
  80. sknetwork/embedding/tests/test_svd.py +43 -0
  81. sknetwork/gnn/__init__.py +10 -0
  82. sknetwork/gnn/activation.py +117 -0
  83. sknetwork/gnn/base.py +181 -0
  84. sknetwork/gnn/base_activation.py +90 -0
  85. sknetwork/gnn/base_layer.py +109 -0
  86. sknetwork/gnn/gnn_classifier.py +305 -0
  87. sknetwork/gnn/layer.py +153 -0
  88. sknetwork/gnn/loss.py +180 -0
  89. sknetwork/gnn/neighbor_sampler.py +65 -0
  90. sknetwork/gnn/optimizer.py +164 -0
  91. sknetwork/gnn/tests/__init__.py +1 -0
  92. sknetwork/gnn/tests/test_activation.py +56 -0
  93. sknetwork/gnn/tests/test_base.py +75 -0
  94. sknetwork/gnn/tests/test_base_layer.py +37 -0
  95. sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
  96. sknetwork/gnn/tests/test_layers.py +80 -0
  97. sknetwork/gnn/tests/test_loss.py +33 -0
  98. sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
  99. sknetwork/gnn/tests/test_optimizer.py +43 -0
  100. sknetwork/gnn/tests/test_utils.py +41 -0
  101. sknetwork/gnn/utils.py +127 -0
  102. sknetwork/hierarchy/__init__.py +6 -0
  103. sknetwork/hierarchy/base.py +90 -0
  104. sknetwork/hierarchy/louvain_hierarchy.py +260 -0
  105. sknetwork/hierarchy/metrics.py +234 -0
  106. sknetwork/hierarchy/paris.cpp +37877 -0
  107. sknetwork/hierarchy/paris.cpython-312-x86_64-linux-gnu.so +0 -0
  108. sknetwork/hierarchy/paris.pyx +310 -0
  109. sknetwork/hierarchy/postprocess.py +350 -0
  110. sknetwork/hierarchy/tests/__init__.py +1 -0
  111. sknetwork/hierarchy/tests/test_API.py +24 -0
  112. sknetwork/hierarchy/tests/test_algos.py +34 -0
  113. sknetwork/hierarchy/tests/test_metrics.py +62 -0
  114. sknetwork/hierarchy/tests/test_postprocess.py +57 -0
  115. sknetwork/linalg/__init__.py +9 -0
  116. sknetwork/linalg/basics.py +37 -0
  117. sknetwork/linalg/diteration.cpp +27409 -0
  118. sknetwork/linalg/diteration.cpython-312-x86_64-linux-gnu.so +0 -0
  119. sknetwork/linalg/diteration.pyx +47 -0
  120. sknetwork/linalg/eig_solver.py +93 -0
  121. sknetwork/linalg/laplacian.py +15 -0
  122. sknetwork/linalg/normalizer.py +86 -0
  123. sknetwork/linalg/operators.py +225 -0
  124. sknetwork/linalg/polynome.py +76 -0
  125. sknetwork/linalg/ppr_solver.py +170 -0
  126. sknetwork/linalg/push.cpp +31081 -0
  127. sknetwork/linalg/push.cpython-312-x86_64-linux-gnu.so +0 -0
  128. sknetwork/linalg/push.pyx +71 -0
  129. sknetwork/linalg/sparse_lowrank.py +142 -0
  130. sknetwork/linalg/svd_solver.py +91 -0
  131. sknetwork/linalg/tests/__init__.py +1 -0
  132. sknetwork/linalg/tests/test_eig.py +44 -0
  133. sknetwork/linalg/tests/test_laplacian.py +18 -0
  134. sknetwork/linalg/tests/test_normalization.py +34 -0
  135. sknetwork/linalg/tests/test_operators.py +66 -0
  136. sknetwork/linalg/tests/test_polynome.py +38 -0
  137. sknetwork/linalg/tests/test_ppr.py +50 -0
  138. sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
  139. sknetwork/linalg/tests/test_svd.py +38 -0
  140. sknetwork/linkpred/__init__.py +2 -0
  141. sknetwork/linkpred/base.py +46 -0
  142. sknetwork/linkpred/nn.py +126 -0
  143. sknetwork/linkpred/tests/__init__.py +1 -0
  144. sknetwork/linkpred/tests/test_nn.py +26 -0
  145. sknetwork/log.py +19 -0
  146. sknetwork/path/__init__.py +5 -0
  147. sknetwork/path/dag.py +54 -0
  148. sknetwork/path/distances.py +98 -0
  149. sknetwork/path/search.py +31 -0
  150. sknetwork/path/shortest_path.py +61 -0
  151. sknetwork/path/tests/__init__.py +1 -0
  152. sknetwork/path/tests/test_dag.py +37 -0
  153. sknetwork/path/tests/test_distances.py +62 -0
  154. sknetwork/path/tests/test_search.py +40 -0
  155. sknetwork/path/tests/test_shortest_path.py +40 -0
  156. sknetwork/ranking/__init__.py +8 -0
  157. sknetwork/ranking/base.py +57 -0
  158. sknetwork/ranking/betweenness.cpp +9716 -0
  159. sknetwork/ranking/betweenness.cpython-312-x86_64-linux-gnu.so +0 -0
  160. sknetwork/ranking/betweenness.pyx +97 -0
  161. sknetwork/ranking/closeness.py +92 -0
  162. sknetwork/ranking/hits.py +90 -0
  163. sknetwork/ranking/katz.py +79 -0
  164. sknetwork/ranking/pagerank.py +106 -0
  165. sknetwork/ranking/postprocess.py +37 -0
  166. sknetwork/ranking/tests/__init__.py +1 -0
  167. sknetwork/ranking/tests/test_API.py +32 -0
  168. sknetwork/ranking/tests/test_betweenness.py +38 -0
  169. sknetwork/ranking/tests/test_closeness.py +30 -0
  170. sknetwork/ranking/tests/test_hits.py +20 -0
  171. sknetwork/ranking/tests/test_pagerank.py +62 -0
  172. sknetwork/ranking/tests/test_postprocess.py +26 -0
  173. sknetwork/regression/__init__.py +4 -0
  174. sknetwork/regression/base.py +57 -0
  175. sknetwork/regression/diffusion.py +204 -0
  176. sknetwork/regression/tests/__init__.py +1 -0
  177. sknetwork/regression/tests/test_API.py +32 -0
  178. sknetwork/regression/tests/test_diffusion.py +56 -0
  179. sknetwork/sknetwork.py +3 -0
  180. sknetwork/test_base.py +35 -0
  181. sknetwork/test_log.py +15 -0
  182. sknetwork/topology/__init__.py +8 -0
  183. sknetwork/topology/cliques.cpp +32574 -0
  184. sknetwork/topology/cliques.cpython-312-x86_64-linux-gnu.so +0 -0
  185. sknetwork/topology/cliques.pyx +149 -0
  186. sknetwork/topology/core.cpp +30660 -0
  187. sknetwork/topology/core.cpython-312-x86_64-linux-gnu.so +0 -0
  188. sknetwork/topology/core.pyx +90 -0
  189. sknetwork/topology/cycles.py +243 -0
  190. sknetwork/topology/minheap.cpp +27341 -0
  191. sknetwork/topology/minheap.cpython-312-x86_64-linux-gnu.so +0 -0
  192. sknetwork/topology/minheap.pxd +20 -0
  193. sknetwork/topology/minheap.pyx +109 -0
  194. sknetwork/topology/structure.py +194 -0
  195. sknetwork/topology/tests/__init__.py +1 -0
  196. sknetwork/topology/tests/test_cliques.py +28 -0
  197. sknetwork/topology/tests/test_core.py +19 -0
  198. sknetwork/topology/tests/test_cycles.py +65 -0
  199. sknetwork/topology/tests/test_structure.py +85 -0
  200. sknetwork/topology/tests/test_triangles.py +38 -0
  201. sknetwork/topology/tests/test_wl.py +72 -0
  202. sknetwork/topology/triangles.cpp +8903 -0
  203. sknetwork/topology/triangles.cpython-312-x86_64-linux-gnu.so +0 -0
  204. sknetwork/topology/triangles.pyx +151 -0
  205. sknetwork/topology/weisfeiler_lehman.py +133 -0
  206. sknetwork/topology/weisfeiler_lehman_core.cpp +27644 -0
  207. sknetwork/topology/weisfeiler_lehman_core.cpython-312-x86_64-linux-gnu.so +0 -0
  208. sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
  209. sknetwork/utils/__init__.py +7 -0
  210. sknetwork/utils/check.py +355 -0
  211. sknetwork/utils/format.py +221 -0
  212. sknetwork/utils/membership.py +82 -0
  213. sknetwork/utils/neighbors.py +115 -0
  214. sknetwork/utils/tests/__init__.py +1 -0
  215. sknetwork/utils/tests/test_check.py +190 -0
  216. sknetwork/utils/tests/test_format.py +63 -0
  217. sknetwork/utils/tests/test_membership.py +24 -0
  218. sknetwork/utils/tests/test_neighbors.py +41 -0
  219. sknetwork/utils/tests/test_tfidf.py +18 -0
  220. sknetwork/utils/tests/test_values.py +66 -0
  221. sknetwork/utils/tfidf.py +37 -0
  222. sknetwork/utils/values.py +76 -0
  223. sknetwork/visualization/__init__.py +4 -0
  224. sknetwork/visualization/colors.py +34 -0
  225. sknetwork/visualization/dendrograms.py +277 -0
  226. sknetwork/visualization/graphs.py +1039 -0
  227. sknetwork/visualization/tests/__init__.py +1 -0
  228. sknetwork/visualization/tests/test_dendrograms.py +53 -0
  229. sknetwork/visualization/tests/test_graphs.py +176 -0
@@ -0,0 +1,260 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in March 2020
5
+ @author: Quentin Lutz <qlutz@enst.fr>
6
+ @author: Thomas Bonald <tbonald@enst.fr>
7
+ """
8
+ from typing import Optional, Union
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.clustering.louvain import Louvain
14
+ from sknetwork.hierarchy.base import BaseHierarchy
15
+ from sknetwork.hierarchy.postprocess import get_dendrogram, reorder_dendrogram
16
+ from sknetwork.utils.check import check_format
17
+ from sknetwork.utils.format import get_adjacency
18
+
19
+
20
+ class LouvainIteration(BaseHierarchy):
21
+ """Hierarchical clustering by successive instances of Louvain (top-down).
22
+
23
+ Parameters
24
+ ----------
25
+ depth : int
26
+ Depth of the tree.
27
+ A negative value is interpreted as no limit (return a tree of maximum depth).
28
+ resolution : float
29
+ Resolution parameter.
30
+ tol_optimization : float
31
+ Minimum increase in the objective function to enter a new optimization pass.
32
+ tol_aggregation : float
33
+ Minimum increase in the objective function to enter a new aggregation pass.
34
+ n_aggregations : int
35
+ Maximum number of aggregations.
36
+ A negative value is interpreted as no limit.
37
+ shuffle_nodes : bool
38
+ If ``True``, shuffle nodes before optimization.
39
+ random_state : int
40
+ Random number generator or random seed. If ``None``, numpy.random is used.
41
+ verbose : bool
42
+ Verbose mode.
43
+
44
+ Attributes
45
+ ----------
46
+ dendrogram\_ : np.ndarray
47
+ Dendrogram of the graph.
48
+
49
+ Example
50
+ -------
51
+ >>> from sknetwork.hierarchy import LouvainIteration
52
+ >>> from sknetwork.data import house
53
+ >>> louvain = LouvainIteration()
54
+ >>> adjacency = house()
55
+ >>> louvain.fit_predict(adjacency)
56
+ array([[3., 2., 1., 2.],
57
+ [4., 1., 1., 2.],
58
+ [6., 0., 1., 3.],
59
+ [5., 7., 2., 5.]])
60
+
61
+ Notes
62
+ -----
63
+ Each row of the dendrogram = merge nodes, distance, size of cluster.
64
+
65
+ See Also
66
+ --------
67
+ scipy.cluster.hierarchy.dendrogram
68
+ sknetwork.clustering.Louvain
69
+ """
70
+
71
+ def __init__(self, depth: int = 3, resolution: float = 1, tol_optimization: float = 1e-3,
72
+ tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
73
+ random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False):
74
+ super(LouvainIteration, self).__init__()
75
+
76
+ self.dendrogram_ = None
77
+ self.depth = depth
78
+ self._clustering_method = Louvain(resolution=resolution, tol_optimization=tol_optimization,
79
+ tol_aggregation=tol_aggregation, n_aggregations=n_aggregations,
80
+ shuffle_nodes=shuffle_nodes, random_state=random_state, verbose=verbose)
81
+ self.bipartite = None
82
+
83
+ def _recursive_louvain(self, adjacency: Union[sparse.csr_matrix, np.ndarray], depth: int,
84
+ nodes: Optional[np.ndarray] = None):
85
+ """Recursive function for fit.
86
+
87
+ Parameters
88
+ ----------
89
+ adjacency : sparse.csr_matrix, np.ndarray
90
+ Adjacency matrix of the graph.
91
+ depth : int
92
+ Depth of the recursion.
93
+ nodes : np.ndarray
94
+ The indices of the current nodes in the original graph.
95
+
96
+ Returns
97
+ -------
98
+ tree: recursive list of list of nodes.
99
+ """
100
+ n = adjacency.shape[0]
101
+ if nodes is None:
102
+ nodes = np.arange(n)
103
+
104
+ if adjacency.nnz and depth:
105
+ labels = self._clustering_method.fit_predict(adjacency)
106
+ else:
107
+ labels = np.zeros(n)
108
+
109
+ clusters = np.unique(labels)
110
+
111
+ tree = []
112
+ if len(clusters) == 1:
113
+ if len(nodes) > 1:
114
+ return [[node] for node in nodes]
115
+ else:
116
+ return [nodes[0]]
117
+ else:
118
+ for cluster in clusters:
119
+ mask = (labels == cluster)
120
+ nodes_cluster = nodes[mask]
121
+ adjacency_cluster = adjacency[mask, :][:, mask]
122
+ tree.append(self._recursive_louvain(adjacency_cluster, depth - 1, nodes_cluster))
123
+ return tree
124
+
125
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) \
126
+ -> 'LouvainIteration':
127
+ """Fit algorithm to data.
128
+
129
+ Parameters
130
+ ----------
131
+ input_matrix : sparse.csr_matrix, np.ndarray
132
+ Adjacency matrix or biadjacency matrix of the graph.
133
+ force_bipartite :
134
+ If ``True``, force the input matrix to be considered as a biadjacency matrix.
135
+
136
+ Returns
137
+ -------
138
+ self: :class:`LouvainIteration`
139
+ """
140
+ self._init_vars()
141
+ adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)
142
+ tree = self._recursive_louvain(adjacency, self.depth)
143
+ dendrogram, _ = get_dendrogram(tree)
144
+ dendrogram = np.array(dendrogram)
145
+ dendrogram[:, 2] += 1 - min(dendrogram[:, 2])
146
+ self.dendrogram_ = reorder_dendrogram(dendrogram)
147
+ if self.bipartite:
148
+ self._split_vars(input_matrix.shape)
149
+ return self
150
+
151
+
152
+ class LouvainHierarchy(BaseHierarchy):
153
+ """Hierarchical clustering by Louvain (bottom-up).
154
+
155
+ Each level corresponds to an aggregation step of the Louvain algorithm.
156
+
157
+ Parameters
158
+ ----------
159
+ resolution : float
160
+ Resolution parameter.
161
+ tol_optimization : float
162
+ Minimum increase in the objective function to enter a new optimization pass.
163
+ tol_aggregation : float
164
+ Minimum increase in the objective function to enter a new aggregation pass.
165
+ shuffle_nodes : bool
166
+ If ``True``, shuffle nodes before optimization.
167
+ random_state : int
168
+ Random number generator or random seed. If ``None``, numpy.random is used.
169
+ verbose : bool
170
+ Verbose mode.
171
+
172
+ Attributes
173
+ ----------
174
+ dendrogram\_ : np.ndarray
175
+ Dendrogram of the graph.
176
+
177
+ Example
178
+ -------
179
+ >>> from sknetwork.hierarchy import LouvainHierarchy
180
+ >>> from sknetwork.data import house
181
+ >>> louvain = LouvainHierarchy()
182
+ >>> adjacency = house()
183
+ >>> louvain.fit_predict(adjacency)
184
+ array([[3., 2., 1., 2.],
185
+ [4., 1., 1., 2.],
186
+ [6., 0., 1., 3.],
187
+ [5., 7., 2., 5.]])
188
+
189
+ Notes
190
+ -----
191
+ Each row of the dendrogram = merge nodes, distance, size of cluster.
192
+
193
+ See Also
194
+ --------
195
+ scipy.cluster.hierarchy.dendrogram
196
+ sknetwork.clustering.Louvain
197
+ """
198
+
199
+ def __init__(self, resolution: float = 1, tol_optimization: float = 1e-3,
200
+ tol_aggregation: float = 1e-3, shuffle_nodes: bool = False,
201
+ random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False):
202
+ super(LouvainHierarchy, self).__init__()
203
+
204
+ self.dendrogram_ = None
205
+ self._clustering_method = Louvain(resolution=resolution, tol_optimization=tol_optimization,
206
+ tol_aggregation=tol_aggregation, n_aggregations=1,
207
+ shuffle_nodes=shuffle_nodes, random_state=random_state, verbose=verbose)
208
+ self.bipartite = None
209
+
210
+ def _get_hierarchy(self, adjacency: Union[sparse.csr_matrix, np.ndarray]):
211
+ """Get the hierarchy from Louvain.
212
+
213
+ Parameters
214
+ ----------
215
+ adjacency : sparse.csr_matrix, np.ndarray
216
+ Adjacency matrix of the graph.
217
+
218
+ Returns
219
+ -------
220
+ tree: recursive list of list of nodes
221
+ """
222
+ tree = [[node] for node in range(adjacency.shape[0])]
223
+ labels = self._clustering_method.fit_predict(adjacency)
224
+ labels_unique = np.unique(labels)
225
+ while 1:
226
+ tree = [[tree[node] for node in np.flatnonzero(labels == label)] for label in labels_unique]
227
+ tree = [cluster[0] if len(cluster) == 1 else cluster for cluster in tree]
228
+ aggregate = self._clustering_method.aggregate_
229
+ labels = self._clustering_method.fit_predict(aggregate)
230
+ if len(labels_unique) == len(np.unique(labels)):
231
+ break
232
+ else:
233
+ labels_unique = np.unique(labels)
234
+ return tree
235
+
236
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) \
237
+ -> 'LouvainHierarchy':
238
+ """Fit algorithm to data.
239
+
240
+ Parameters
241
+ ----------
242
+ input_matrix : sparse.csr_matrix, np.ndarray
243
+ Adjacency matrix or biadjacency matrix of the graph.
244
+ force_bipartite :
245
+ If ``True``, force the input matrix to be considered as a biadjacency matrix.
246
+
247
+ Returns
248
+ -------
249
+ self: :class:`LouvainHierarchy`
250
+ """
251
+ self._init_vars()
252
+ adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)
253
+ tree = self._get_hierarchy(adjacency)
254
+ dendrogram, _ = get_dendrogram(tree)
255
+ dendrogram = np.array(dendrogram)
256
+ dendrogram[:, 2] += 1 - min(dendrogram[:, 2])
257
+ self.dendrogram_ = reorder_dendrogram(dendrogram)
258
+ if self.bipartite:
259
+ self._split_vars(input_matrix.shape)
260
+ return self
@@ -0,0 +1,234 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on March 2019
5
+ @author: Thomas Bonald <bonald@enst.fr>
6
+ """
7
+ import numpy as np
8
+ from scipy import sparse
9
+ from sknetwork.hierarchy.paris import AggregateGraph
10
+
11
+ from sknetwork.utils.check import check_format, get_probs, check_square
12
+ from sknetwork.utils.check import check_min_size, check_min_nnz
13
+ from sknetwork.utils.format import directed2undirected
14
+
15
+
16
+ def _instantiate_vars(adjacency: sparse.csr_matrix, weights: str = 'uniform'):
17
+ """Initialize standard variables for metrics."""
18
+ weights_row = get_probs(weights, adjacency)
19
+ weights_col = get_probs(weights, adjacency.T)
20
+ sym_adjacency = directed2undirected(adjacency)
21
+ aggregate_graph = AggregateGraph(weights_row, weights_col, sym_adjacency.data.astype(float),
22
+ sym_adjacency.indices, sym_adjacency.indptr)
23
+ return aggregate_graph, weights_row, weights_col
24
+
25
+
26
+ def get_sampling_distributions(adjacency: sparse.csr_matrix, dendrogram: np.ndarray, weights: str = 'uniform'):
27
+ """Get sampling distributions over each internal node of the tree.
28
+ Parameters
29
+ ----------
30
+ adjacency :
31
+ Adjacency matrix of the graph.
32
+ dendrogram :
33
+ Dendrogram.
34
+ weights :
35
+ Weights of nodes.
36
+ ``'degree'`` or ``'uniform'`` (default).
37
+ Returns
38
+ -------
39
+ edge_sampling: np.ndarray
40
+ Edge sampling distribution.
41
+ node_sampling: np.ndarray
42
+ Node sampling distribution.
43
+ cluster_weights: np.ndarray
44
+ Cluster weights.
45
+ """
46
+ n = adjacency.shape[0]
47
+ aggregate_graph, weights_row, weights_col = _instantiate_vars(adjacency, weights)
48
+ cluster_weight = np.zeros(n-1)
49
+ edge_sampling = np.zeros(n-1)
50
+ node_sampling = np.zeros(n-1)
51
+
52
+ for t in range(n - 1):
53
+ i = int(dendrogram[t][0])
54
+ j = int(dendrogram[t][1])
55
+ if j in aggregate_graph.neighbors[i]:
56
+ edge_sampling[t] += 2 * aggregate_graph.neighbors[i][j]
57
+ node_sampling[t] += aggregate_graph.cluster_out_weights[i] * aggregate_graph.cluster_in_weights[j] + \
58
+ aggregate_graph.cluster_out_weights[j] * aggregate_graph.cluster_in_weights[i]
59
+ cluster_weight[t] = aggregate_graph.cluster_out_weights[i] + aggregate_graph.cluster_out_weights[j] + \
60
+ aggregate_graph.cluster_in_weights[i] + aggregate_graph.cluster_in_weights[j]
61
+ for node in {i, j}:
62
+ if node < n:
63
+ # self-loop
64
+ node_sampling[t] += aggregate_graph.cluster_out_weights[node] * aggregate_graph.cluster_in_weights[node]
65
+ if node in aggregate_graph.neighbors[node]:
66
+ edge_sampling[t] += aggregate_graph.neighbors[node][node]
67
+ aggregate_graph.merge(i, j)
68
+ return edge_sampling, node_sampling, cluster_weight / 2
69
+
70
+
71
+ def dasgupta_cost(adjacency: sparse.csr_matrix, dendrogram: np.ndarray, weights: str = 'uniform',
72
+ normalized: bool = False) -> float:
73
+ """Dasgupta's cost of a hierarchy.
74
+
75
+ Expected size (weights = ``'uniform'``) or expected volume (weights = ``'degree'``) of the cluster induced by
76
+ random edge sampling (closest ancestor of the two nodes in the hierarchy).
77
+
78
+ Parameters
79
+ ----------
80
+ adjacency :
81
+ Adjacency matrix of the graph.
82
+ dendrogram :
83
+ Dendrogram.
84
+ weights :
85
+ Weights of nodes.
86
+ ``'degree'`` or ``'uniform'`` (default).
87
+ normalized :
88
+ If ``True``, normalized cost (between 0 and 1).
89
+
90
+ Returns
91
+ -------
92
+ cost : float
93
+ Cost.
94
+
95
+ Example
96
+ -------
97
+ >>> from sknetwork.hierarchy import dasgupta_score, Paris
98
+ >>> from sknetwork.data import house
99
+ >>> paris = Paris()
100
+ >>> adjacency = house()
101
+ >>> dendrogram = paris.fit_transform(adjacency)
102
+ >>> cost = dasgupta_cost(adjacency, dendrogram)
103
+ >>> float(np.round(cost, 2))
104
+ 3.33
105
+
106
+ References
107
+ ----------
108
+ Dasgupta, S. (2016). A cost function for similarity-based hierarchical clustering.
109
+ Proceedings of ACM symposium on Theory of Computing.
110
+ """
111
+ adjacency = check_format(adjacency)
112
+ check_square(adjacency)
113
+
114
+ n = adjacency.shape[0]
115
+ check_min_size(n, 2)
116
+
117
+ edge_sampling, _, cluster_weight = get_sampling_distributions(adjacency, dendrogram, weights)
118
+ cost = edge_sampling.dot(cluster_weight)
119
+
120
+ if not normalized:
121
+ if weights == 'degree':
122
+ cost *= adjacency.data.sum()
123
+ else:
124
+ cost *= n
125
+
126
+ return cost
127
+
128
+
129
+ def dasgupta_score(adjacency: sparse.csr_matrix, dendrogram: np.ndarray, weights: str = 'uniform') -> float:
130
+ """Dasgupta's score of a hierarchy (quality metric, between 0 and 1).
131
+
132
+ Defined as 1 - normalized Dasgupta's cost.
133
+
134
+ Parameters
135
+ ----------
136
+ adjacency :
137
+ Adjacency matrix of the graph.
138
+ dendrogram :
139
+ Dendrogram.
140
+ weights :
141
+ Weights of nodes.
142
+ ``'degree'`` or ``'uniform'`` (default).
143
+
144
+ Returns
145
+ -------
146
+ score : float
147
+ Score.
148
+
149
+ Example
150
+ -------
151
+ >>> from sknetwork.hierarchy import dasgupta_score, Paris
152
+ >>> from sknetwork.data import house
153
+ >>> paris = Paris()
154
+ >>> adjacency = house()
155
+ >>> dendrogram = paris.fit_transform(adjacency)
156
+ >>> score = dasgupta_score(adjacency, dendrogram)
157
+ >>> float(np.round(score, 2))
158
+ 0.33
159
+
160
+ References
161
+ ----------
162
+ Dasgupta, S. (2016). A cost function for similarity-based hierarchical clustering.
163
+ Proceedings of ACM symposium on Theory of Computing.
164
+ """
165
+ return 1 - dasgupta_cost(adjacency, dendrogram, weights, normalized=True)
166
+
167
+
168
+ def tree_sampling_divergence(adjacency: sparse.csr_matrix, dendrogram: np.ndarray, weights: str = 'degree',
169
+ normalized: bool = True) -> float:
170
+ """Tree sampling divergence of a hierarchy (quality metric).
171
+
172
+ Parameters
173
+ ----------
174
+ adjacency :
175
+ Adjacency matrix of the graph.
176
+ dendrogram :
177
+ Dendrogram.
178
+ weights :
179
+ Weights of nodes.
180
+ ``'degree'`` (default) or ``'uniform'``.
181
+ normalized :
182
+ If ``True``, normalized score (between 0 and 1).
183
+
184
+ Returns
185
+ -------
186
+ score : float
187
+ Score.
188
+
189
+ Example
190
+ -------
191
+ >>> from sknetwork.hierarchy import tree_sampling_divergence, Paris
192
+ >>> from sknetwork.data import house
193
+ >>> paris = Paris()
194
+ >>> adjacency = house()
195
+ >>> dendrogram = paris.fit_transform(adjacency)
196
+ >>> score = tree_sampling_divergence(adjacency, dendrogram)
197
+ >>> float(np.round(score, 2))
198
+ 0.05
199
+
200
+ References
201
+ ----------
202
+ Charpentier, B. & Bonald, T. (2019).
203
+ `Tree Sampling Divergence: An Information-Theoretic Metric for
204
+ Hierarchical Graph Clustering.
205
+ <https://hal.telecom-paristech.fr/hal-02144394/document>`_
206
+ Proceedings of IJCAI.
207
+ """
208
+ adjacency = check_format(adjacency)
209
+ check_square(adjacency)
210
+ check_min_nnz(adjacency.nnz, 1)
211
+ adjacency = adjacency.astype(float)
212
+ n = adjacency.shape[0]
213
+ check_min_size(n, 2)
214
+
215
+ adjacency.data /= adjacency.data.sum()
216
+ edge_sampling, node_sampling, _ = get_sampling_distributions(adjacency, dendrogram, weights)
217
+
218
+ index = np.where(edge_sampling)[0]
219
+ score = edge_sampling[index].dot(np.log(edge_sampling[index] / node_sampling[index]))
220
+ if normalized:
221
+ weights_row = get_probs(weights, adjacency)
222
+ weights_col = get_probs(weights, adjacency.T)
223
+ inv_out_weights = sparse.diags(weights_row, shape=(n, n), format='csr')
224
+ inv_out_weights.data = 1 / inv_out_weights.data
225
+ inv_in_weights = sparse.diags(weights_col, shape=(n, n), format='csr')
226
+ inv_in_weights.data = 1 / inv_in_weights.data
227
+ sampling_ratio = inv_out_weights.dot(adjacency.dot(inv_in_weights))
228
+ inv_out_weights.data = np.ones(len(inv_out_weights.data))
229
+ inv_in_weights.data = np.ones(len(inv_in_weights.data))
230
+ edge_sampling = inv_out_weights.dot(adjacency.dot(inv_in_weights))
231
+ mutual_information = edge_sampling.data.dot(np.log(sampling_ratio.data))
232
+ if mutual_information > 0:
233
+ score /= mutual_information
234
+ return score