scikit-network 0.28.3__cp39-cp39-macosx_12_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (240) hide show
  1. scikit_network-0.28.3.dist-info/AUTHORS.rst +41 -0
  2. scikit_network-0.28.3.dist-info/LICENSE +34 -0
  3. scikit_network-0.28.3.dist-info/METADATA +457 -0
  4. scikit_network-0.28.3.dist-info/RECORD +240 -0
  5. scikit_network-0.28.3.dist-info/WHEEL +5 -0
  6. scikit_network-0.28.3.dist-info/top_level.txt +1 -0
  7. sknetwork/__init__.py +21 -0
  8. sknetwork/classification/__init__.py +8 -0
  9. sknetwork/classification/base.py +84 -0
  10. sknetwork/classification/base_rank.py +143 -0
  11. sknetwork/classification/diffusion.py +134 -0
  12. sknetwork/classification/knn.py +162 -0
  13. sknetwork/classification/metrics.py +205 -0
  14. sknetwork/classification/pagerank.py +66 -0
  15. sknetwork/classification/propagation.py +152 -0
  16. sknetwork/classification/tests/__init__.py +1 -0
  17. sknetwork/classification/tests/test_API.py +35 -0
  18. sknetwork/classification/tests/test_diffusion.py +37 -0
  19. sknetwork/classification/tests/test_knn.py +24 -0
  20. sknetwork/classification/tests/test_metrics.py +53 -0
  21. sknetwork/classification/tests/test_pagerank.py +20 -0
  22. sknetwork/classification/tests/test_propagation.py +24 -0
  23. sknetwork/classification/vote.cpython-39-darwin.so +0 -0
  24. sknetwork/classification/vote.pyx +58 -0
  25. sknetwork/clustering/__init__.py +7 -0
  26. sknetwork/clustering/base.py +102 -0
  27. sknetwork/clustering/kmeans.py +142 -0
  28. sknetwork/clustering/louvain.py +255 -0
  29. sknetwork/clustering/louvain_core.cpython-39-darwin.so +0 -0
  30. sknetwork/clustering/louvain_core.pyx +134 -0
  31. sknetwork/clustering/metrics.py +91 -0
  32. sknetwork/clustering/postprocess.py +66 -0
  33. sknetwork/clustering/propagation_clustering.py +108 -0
  34. sknetwork/clustering/tests/__init__.py +1 -0
  35. sknetwork/clustering/tests/test_API.py +37 -0
  36. sknetwork/clustering/tests/test_kmeans.py +47 -0
  37. sknetwork/clustering/tests/test_louvain.py +104 -0
  38. sknetwork/clustering/tests/test_metrics.py +50 -0
  39. sknetwork/clustering/tests/test_post_processing.py +23 -0
  40. sknetwork/clustering/tests/test_postprocess.py +39 -0
  41. sknetwork/data/__init__.py +5 -0
  42. sknetwork/data/load.py +408 -0
  43. sknetwork/data/models.py +459 -0
  44. sknetwork/data/parse.py +621 -0
  45. sknetwork/data/test_graphs.py +84 -0
  46. sknetwork/data/tests/__init__.py +1 -0
  47. sknetwork/data/tests/test_API.py +30 -0
  48. sknetwork/data/tests/test_load.py +95 -0
  49. sknetwork/data/tests/test_models.py +52 -0
  50. sknetwork/data/tests/test_parse.py +253 -0
  51. sknetwork/data/tests/test_test_graphs.py +30 -0
  52. sknetwork/data/tests/test_toy_graphs.py +68 -0
  53. sknetwork/data/toy_graphs.py +619 -0
  54. sknetwork/embedding/__init__.py +10 -0
  55. sknetwork/embedding/base.py +90 -0
  56. sknetwork/embedding/force_atlas.py +197 -0
  57. sknetwork/embedding/louvain_embedding.py +174 -0
  58. sknetwork/embedding/louvain_hierarchy.py +142 -0
  59. sknetwork/embedding/metrics.py +66 -0
  60. sknetwork/embedding/random_projection.py +133 -0
  61. sknetwork/embedding/spectral.py +214 -0
  62. sknetwork/embedding/spring.py +198 -0
  63. sknetwork/embedding/svd.py +363 -0
  64. sknetwork/embedding/tests/__init__.py +1 -0
  65. sknetwork/embedding/tests/test_API.py +73 -0
  66. sknetwork/embedding/tests/test_force_atlas.py +35 -0
  67. sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
  68. sknetwork/embedding/tests/test_louvain_hierarchy.py +19 -0
  69. sknetwork/embedding/tests/test_metrics.py +29 -0
  70. sknetwork/embedding/tests/test_random_projection.py +28 -0
  71. sknetwork/embedding/tests/test_spectral.py +84 -0
  72. sknetwork/embedding/tests/test_spring.py +50 -0
  73. sknetwork/embedding/tests/test_svd.py +37 -0
  74. sknetwork/flow/__init__.py +3 -0
  75. sknetwork/flow/flow.py +73 -0
  76. sknetwork/flow/tests/__init__.py +1 -0
  77. sknetwork/flow/tests/test_flow.py +17 -0
  78. sknetwork/flow/tests/test_utils.py +69 -0
  79. sknetwork/flow/utils.py +91 -0
  80. sknetwork/gnn/__init__.py +10 -0
  81. sknetwork/gnn/activation.py +117 -0
  82. sknetwork/gnn/base.py +155 -0
  83. sknetwork/gnn/base_activation.py +89 -0
  84. sknetwork/gnn/base_layer.py +109 -0
  85. sknetwork/gnn/gnn_classifier.py +381 -0
  86. sknetwork/gnn/layer.py +153 -0
  87. sknetwork/gnn/layers.py +127 -0
  88. sknetwork/gnn/loss.py +180 -0
  89. sknetwork/gnn/neighbor_sampler.py +65 -0
  90. sknetwork/gnn/optimizer.py +163 -0
  91. sknetwork/gnn/tests/__init__.py +1 -0
  92. sknetwork/gnn/tests/test_activation.py +56 -0
  93. sknetwork/gnn/tests/test_base.py +79 -0
  94. sknetwork/gnn/tests/test_base_layer.py +37 -0
  95. sknetwork/gnn/tests/test_gnn_classifier.py +192 -0
  96. sknetwork/gnn/tests/test_layers.py +80 -0
  97. sknetwork/gnn/tests/test_loss.py +33 -0
  98. sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
  99. sknetwork/gnn/tests/test_optimizer.py +43 -0
  100. sknetwork/gnn/tests/test_utils.py +93 -0
  101. sknetwork/gnn/utils.py +219 -0
  102. sknetwork/hierarchy/__init__.py +7 -0
  103. sknetwork/hierarchy/base.py +69 -0
  104. sknetwork/hierarchy/louvain_hierarchy.py +264 -0
  105. sknetwork/hierarchy/metrics.py +234 -0
  106. sknetwork/hierarchy/paris.cpython-39-darwin.so +0 -0
  107. sknetwork/hierarchy/paris.pyx +317 -0
  108. sknetwork/hierarchy/postprocess.py +350 -0
  109. sknetwork/hierarchy/tests/__init__.py +1 -0
  110. sknetwork/hierarchy/tests/test_API.py +25 -0
  111. sknetwork/hierarchy/tests/test_algos.py +29 -0
  112. sknetwork/hierarchy/tests/test_metrics.py +62 -0
  113. sknetwork/hierarchy/tests/test_postprocess.py +57 -0
  114. sknetwork/hierarchy/tests/test_ward.py +25 -0
  115. sknetwork/hierarchy/ward.py +94 -0
  116. sknetwork/linalg/__init__.py +9 -0
  117. sknetwork/linalg/basics.py +37 -0
  118. sknetwork/linalg/diteration.cpython-39-darwin.so +0 -0
  119. sknetwork/linalg/diteration.pyx +49 -0
  120. sknetwork/linalg/eig_solver.py +93 -0
  121. sknetwork/linalg/laplacian.py +15 -0
  122. sknetwork/linalg/normalization.py +66 -0
  123. sknetwork/linalg/operators.py +225 -0
  124. sknetwork/linalg/polynome.py +76 -0
  125. sknetwork/linalg/ppr_solver.py +170 -0
  126. sknetwork/linalg/push.cpython-39-darwin.so +0 -0
  127. sknetwork/linalg/push.pyx +73 -0
  128. sknetwork/linalg/sparse_lowrank.py +142 -0
  129. sknetwork/linalg/svd_solver.py +91 -0
  130. sknetwork/linalg/tests/__init__.py +1 -0
  131. sknetwork/linalg/tests/test_eig.py +44 -0
  132. sknetwork/linalg/tests/test_laplacian.py +18 -0
  133. sknetwork/linalg/tests/test_normalization.py +38 -0
  134. sknetwork/linalg/tests/test_operators.py +70 -0
  135. sknetwork/linalg/tests/test_polynome.py +38 -0
  136. sknetwork/linalg/tests/test_ppr.py +50 -0
  137. sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
  138. sknetwork/linalg/tests/test_svd.py +38 -0
  139. sknetwork/linkpred/__init__.py +4 -0
  140. sknetwork/linkpred/base.py +80 -0
  141. sknetwork/linkpred/first_order.py +508 -0
  142. sknetwork/linkpred/first_order_core.cpython-39-darwin.so +0 -0
  143. sknetwork/linkpred/first_order_core.pyx +315 -0
  144. sknetwork/linkpred/postprocessing.py +98 -0
  145. sknetwork/linkpred/tests/__init__.py +1 -0
  146. sknetwork/linkpred/tests/test_API.py +49 -0
  147. sknetwork/linkpred/tests/test_postprocessing.py +21 -0
  148. sknetwork/path/__init__.py +4 -0
  149. sknetwork/path/metrics.py +148 -0
  150. sknetwork/path/search.py +65 -0
  151. sknetwork/path/shortest_path.py +186 -0
  152. sknetwork/path/tests/__init__.py +1 -0
  153. sknetwork/path/tests/test_metrics.py +29 -0
  154. sknetwork/path/tests/test_search.py +25 -0
  155. sknetwork/path/tests/test_shortest_path.py +45 -0
  156. sknetwork/ranking/__init__.py +9 -0
  157. sknetwork/ranking/base.py +56 -0
  158. sknetwork/ranking/betweenness.cpython-39-darwin.so +0 -0
  159. sknetwork/ranking/betweenness.pyx +99 -0
  160. sknetwork/ranking/closeness.py +95 -0
  161. sknetwork/ranking/harmonic.py +82 -0
  162. sknetwork/ranking/hits.py +94 -0
  163. sknetwork/ranking/katz.py +81 -0
  164. sknetwork/ranking/pagerank.py +107 -0
  165. sknetwork/ranking/postprocess.py +25 -0
  166. sknetwork/ranking/tests/__init__.py +1 -0
  167. sknetwork/ranking/tests/test_API.py +34 -0
  168. sknetwork/ranking/tests/test_betweenness.py +38 -0
  169. sknetwork/ranking/tests/test_closeness.py +34 -0
  170. sknetwork/ranking/tests/test_hits.py +20 -0
  171. sknetwork/ranking/tests/test_pagerank.py +69 -0
  172. sknetwork/regression/__init__.py +4 -0
  173. sknetwork/regression/base.py +56 -0
  174. sknetwork/regression/diffusion.py +190 -0
  175. sknetwork/regression/tests/__init__.py +1 -0
  176. sknetwork/regression/tests/test_API.py +34 -0
  177. sknetwork/regression/tests/test_diffusion.py +48 -0
  178. sknetwork/sknetwork.py +3 -0
  179. sknetwork/topology/__init__.py +9 -0
  180. sknetwork/topology/dag.py +74 -0
  181. sknetwork/topology/dag_core.cpython-39-darwin.so +0 -0
  182. sknetwork/topology/dag_core.pyx +38 -0
  183. sknetwork/topology/kcliques.cpython-39-darwin.so +0 -0
  184. sknetwork/topology/kcliques.pyx +193 -0
  185. sknetwork/topology/kcore.cpython-39-darwin.so +0 -0
  186. sknetwork/topology/kcore.pyx +120 -0
  187. sknetwork/topology/structure.py +234 -0
  188. sknetwork/topology/tests/__init__.py +1 -0
  189. sknetwork/topology/tests/test_cliques.py +28 -0
  190. sknetwork/topology/tests/test_cores.py +21 -0
  191. sknetwork/topology/tests/test_dag.py +26 -0
  192. sknetwork/topology/tests/test_structure.py +99 -0
  193. sknetwork/topology/tests/test_triangles.py +42 -0
  194. sknetwork/topology/tests/test_wl_coloring.py +49 -0
  195. sknetwork/topology/tests/test_wl_kernel.py +31 -0
  196. sknetwork/topology/triangles.cpython-39-darwin.so +0 -0
  197. sknetwork/topology/triangles.pyx +166 -0
  198. sknetwork/topology/weisfeiler_lehman.py +163 -0
  199. sknetwork/topology/weisfeiler_lehman_core.cpython-39-darwin.so +0 -0
  200. sknetwork/topology/weisfeiler_lehman_core.pyx +116 -0
  201. sknetwork/utils/__init__.py +40 -0
  202. sknetwork/utils/base.py +35 -0
  203. sknetwork/utils/check.py +354 -0
  204. sknetwork/utils/co_neighbor.py +71 -0
  205. sknetwork/utils/format.py +219 -0
  206. sknetwork/utils/kmeans.py +89 -0
  207. sknetwork/utils/knn.py +166 -0
  208. sknetwork/utils/knn1d.cpython-39-darwin.so +0 -0
  209. sknetwork/utils/knn1d.pyx +80 -0
  210. sknetwork/utils/membership.py +82 -0
  211. sknetwork/utils/minheap.cpython-39-darwin.so +0 -0
  212. sknetwork/utils/minheap.pxd +22 -0
  213. sknetwork/utils/minheap.pyx +111 -0
  214. sknetwork/utils/neighbors.py +115 -0
  215. sknetwork/utils/seeds.py +75 -0
  216. sknetwork/utils/simplex.py +140 -0
  217. sknetwork/utils/tests/__init__.py +1 -0
  218. sknetwork/utils/tests/test_base.py +28 -0
  219. sknetwork/utils/tests/test_bunch.py +16 -0
  220. sknetwork/utils/tests/test_check.py +190 -0
  221. sknetwork/utils/tests/test_co_neighbor.py +43 -0
  222. sknetwork/utils/tests/test_format.py +61 -0
  223. sknetwork/utils/tests/test_kmeans.py +21 -0
  224. sknetwork/utils/tests/test_knn.py +32 -0
  225. sknetwork/utils/tests/test_membership.py +24 -0
  226. sknetwork/utils/tests/test_neighbors.py +41 -0
  227. sknetwork/utils/tests/test_projection_simplex.py +33 -0
  228. sknetwork/utils/tests/test_seeds.py +67 -0
  229. sknetwork/utils/tests/test_verbose.py +15 -0
  230. sknetwork/utils/tests/test_ward.py +20 -0
  231. sknetwork/utils/timeout.py +38 -0
  232. sknetwork/utils/verbose.py +37 -0
  233. sknetwork/utils/ward.py +60 -0
  234. sknetwork/visualization/__init__.py +4 -0
  235. sknetwork/visualization/colors.py +34 -0
  236. sknetwork/visualization/dendrograms.py +229 -0
  237. sknetwork/visualization/graphs.py +819 -0
  238. sknetwork/visualization/tests/__init__.py +1 -0
  239. sknetwork/visualization/tests/test_dendrograms.py +53 -0
  240. sknetwork/visualization/tests/test_graphs.py +167 -0
sknetwork/gnn/utils.py ADDED
@@ -0,0 +1,219 @@
1
+ #!/usr/bin/env python3
2
+ # coding: utf-8
3
+ """
4
+ Created on Thu Apr 21 2022
5
+ @author: Simon Delarue <sdelarue@enst.fr>
6
+ """
7
+ import inspect
8
+ from typing import Union, Optional, Tuple
9
+ import warnings
10
+
11
+ import numpy as np
12
+
13
+ from sknetwork.gnn.base_activation import BaseActivation, BaseLoss
14
+ from sknetwork.gnn.base_layer import BaseLayer
15
+ from sknetwork.gnn.layer import get_layer
16
+ from sknetwork.gnn.loss import BinaryCrossEntropy, CrossEntropy
17
+ from sknetwork.utils.check import check_is_proba, check_boolean, check_labels
18
+
19
+
20
+ def filter_mask(mask: np.ndarray, proportion: Optional[float]):
21
+ """Filter a boolean mask so that the proportion of ones does not exceed some target.
22
+
23
+ Parameters
24
+ ----------
25
+ mask : np.ndarray
26
+ Boolean mask
27
+ proportion : float
28
+ Target proportion of ones.
29
+ Returns
30
+ -------
31
+ mask_filter : np.ndarray
32
+ New boolean mask
33
+ """
34
+ n_ones = sum(mask)
35
+ if n_ones:
36
+ if proportion:
37
+ ratio = proportion * len(mask) / n_ones
38
+ mask[mask] = np.random.random(n_ones) <= ratio
39
+ else:
40
+ mask = np.zeros_like(mask, dtype=bool)
41
+ return mask
42
+
43
+
44
+ def check_existing_masks(labels: np.ndarray, train_mask: Optional[np.ndarray] = None,
45
+ val_mask: Optional[np.ndarray] = None, test_mask: Optional[np.ndarray] = None,
46
+ train_size: Optional[float] = None, val_size: Optional[float] = None,
47
+ test_size: Optional[float] = None) -> Tuple:
48
+ """Check mask parameters and return mask boolean arrays.
49
+
50
+ Parameters
51
+ ----------
52
+ labels: np.ndarray
53
+ Label vectors of length :math:`n`, with :math:`n` the number of nodes in `adjacency`. Labels set to `-1`
54
+ will not be considered for training steps.
55
+ train_mask, val_mask, test_mask: np.ndarray
56
+ Boolean arrays indicating whether nodes are in training/validation/test set.
57
+ train_size, val_size, test_size: float
58
+ Proportion of the nodes in the training/validation/test set (between 0 and 1).
59
+ Only used if when corresponding masks are ``None``.
60
+
61
+ Returns
62
+ -------
63
+ Tuple containing:
64
+ * ``True`` if training mask is provided
65
+ * training, validation and test masks w.r.t values in `labels`.
66
+ """
67
+ _, _ = check_labels(labels)
68
+
69
+ is_negative_labels = labels < 0
70
+
71
+ if train_mask is not None:
72
+ check_boolean(train_mask)
73
+ train_mask_filtered = np.logical_and(train_mask, ~is_negative_labels)
74
+ check_mask_similarity(train_mask, train_mask_filtered)
75
+ train_mask = train_mask_filtered
76
+ if test_mask is not None:
77
+ check_boolean(test_mask)
78
+ if val_mask is not None:
79
+ check_boolean(val_mask)
80
+ val_mask_filtered = np.logical_and(val_mask, ~is_negative_labels)
81
+ check_mask_similarity(val_mask, val_mask_filtered)
82
+ val_mask = val_mask_filtered
83
+ if (train_mask & val_mask & test_mask).any():
84
+ raise ValueError('Masks are overlapping. Please change masks.')
85
+ else:
86
+ val_mask = np.logical_and(~train_mask, ~test_mask)
87
+ val_mask = np.logical_and(val_mask, ~is_negative_labels)
88
+ else:
89
+ if val_mask is None:
90
+ val_mask = filter_mask(~train_mask, val_size)
91
+ val_mask = np.logical_and(val_mask, ~is_negative_labels)
92
+ test_mask = np.logical_and(~train_mask, ~val_mask)
93
+ return True, train_mask, val_mask, test_mask
94
+ else:
95
+ if train_size is None and test_size is None:
96
+ raise ValueError('Either mask parameters or size parameters should be different from None.')
97
+ for size in [train_size, test_size, val_size]:
98
+ if size is not None:
99
+ check_is_proba(size)
100
+ return False, ~is_negative_labels, None, is_negative_labels
101
+
102
+
103
+ def check_mask_similarity(mask_1: np.ndarray, mask_2: np.ndarray):
104
+ """Print warning if two mask arrays are different."""
105
+ if any(mask_1 != mask_2):
106
+ warnings.warn('Nodes with label "-1" are considered in the train set or the validation set.')
107
+
108
+
109
+ def check_early_stopping(early_stopping: bool, val_mask: np.ndarray, patience: int):
110
+ """Check early stopping parameters."""
111
+ if val_mask is None or patience is None or not any(val_mask):
112
+ return False
113
+ else:
114
+ return early_stopping
115
+
116
+
117
+ def check_normalizations(normalizations: Union[str, list]):
118
+ """Check if normalization is known."""
119
+ available_norms = ['left', 'right', 'both']
120
+ if isinstance(normalizations, list):
121
+ for normalization in normalizations:
122
+ if normalization.lower() not in available_norms:
123
+ raise ValueError("Normalization must be 'left', 'right' or 'both'.")
124
+ elif normalizations.lower() not in available_norms:
125
+ raise ValueError("Normalization must be 'left', 'right' or 'both'.")
126
+
127
+
128
+ def check_output(n_channels: int, labels: np.ndarray):
129
+ """Check the output of the GNN.
130
+
131
+ Parameters
132
+ ----------
133
+ n_channels : int
134
+ Number of output channels
135
+ labels : np.ndarray
136
+ Vector of labels
137
+ """
138
+ n_labels = len(set(labels[labels >= 0]))
139
+ if n_labels > 2 and n_labels > n_channels:
140
+ raise ValueError("The dimension of the output is too small for the number of labels. "
141
+ "Please check the `dims` parameter of your GNN or the `labels` parameter.")
142
+
143
+
144
+ def check_param(param, length):
145
+ """Check the length of a parameter if a list.
146
+ """
147
+ if not isinstance(param, list):
148
+ param = length * [param]
149
+ elif len(param) != length:
150
+ raise ValueError('The number of parameters must be equal to the number of layers.')
151
+ return param
152
+
153
+
154
+ def check_loss(layer: BaseLayer):
155
+ """Check the length of a parameter if a list.
156
+ """
157
+ if not issubclass(type(layer.activation), BaseLoss):
158
+ raise ValueError('No loss specified for the last layer.')
159
+ if isinstance(layer.activation, CrossEntropy) and layer.out_channels == 1:
160
+ layer.activation = BinaryCrossEntropy()
161
+ return layer.activation
162
+
163
+
164
+ def get_layers(dims: Union[int, list], layer_types: Union[str, BaseLayer, list],
165
+ activations: Union[str, BaseActivation, list], use_bias: Union[bool, list],
166
+ normalizations: Union[str, list], self_embeddings: Union[bool, list], sample_sizes: Union[int, list],
167
+ loss: Union[str, BaseLoss]) -> list:
168
+ """Get the list of layers.
169
+
170
+ Parameters
171
+ ----------
172
+ dims :
173
+ Dimensions of layers (in forward direction).
174
+ layer_types :
175
+ Layer types.
176
+ activations :
177
+ Activation functions.
178
+ use_bias :
179
+ ``True`` if a bias vector is added.
180
+ normalizations :
181
+ Normalizations of adjacency matrix.
182
+ self_embeddings :
183
+ ``True`` if self embeddings are added. Allowed input are booleans and lists.
184
+ sample_sizes
185
+ Size of neighborhood sampled for each node.
186
+ loss :
187
+ Loss function.
188
+
189
+ Returns
190
+ -------
191
+ list
192
+ List of layers.
193
+ """
194
+ check_normalizations(normalizations)
195
+
196
+ if not isinstance(dims, list):
197
+ dims = [dims]
198
+ n_layers = len(dims)
199
+
200
+ layer_types = check_param(layer_types, n_layers)
201
+ activations = check_param(activations, n_layers)
202
+ use_bias = check_param(use_bias, n_layers)
203
+ normalizations = check_param(normalizations, n_layers)
204
+ self_embeddings = check_param(self_embeddings, n_layers)
205
+ sample_sizes = check_param(sample_sizes, n_layers)
206
+
207
+ layers = []
208
+ names_params = ['layer', 'out_channels', 'activation', 'use_bias', 'normalization', 'self_embeddings',
209
+ 'sample_size']
210
+ for i in range(n_layers):
211
+ params = [layer_types[i], dims[i], activations[i], use_bias[i], normalizations[i], self_embeddings[i],
212
+ sample_sizes[i]]
213
+ if i == n_layers - 1:
214
+ params.append(loss)
215
+ names_params.append('loss')
216
+ dict_params = dict(zip(names_params, params))
217
+ layers.append(get_layer(**dict_params))
218
+
219
+ return layers
@@ -0,0 +1,7 @@
1
+ """hierarchy module"""
2
+ from sknetwork.hierarchy.paris import Paris
3
+ from sknetwork.hierarchy.base import BaseHierarchy
4
+ from sknetwork.hierarchy.louvain_hierarchy import LouvainIteration, LouvainHierarchy
5
+ from sknetwork.hierarchy.metrics import dasgupta_cost, dasgupta_score, tree_sampling_divergence
6
+ from sknetwork.hierarchy.postprocess import cut_straight, cut_balanced, aggregate_dendrogram, reorder_dendrogram
7
+ from sknetwork.hierarchy.ward import Ward
@@ -0,0 +1,69 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on November 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+ from abc import ABC
8
+
9
+ import numpy as np
10
+
11
+ from sknetwork.hierarchy.postprocess import split_dendrogram
12
+ from sknetwork.utils.base import Algorithm
13
+
14
+
15
+ class BaseHierarchy(Algorithm, ABC):
16
+ """Base class for hierarchical clustering algorithms.
17
+ Attributes
18
+ ----------
19
+ dendrogram_ :
20
+ Dendrogram of the graph.
21
+ dendrogram_row_ :
22
+ Dendrogram for the rows, for bipartite graphs.
23
+ dendrogram_col_ :
24
+ Dendrogram for the columns, for bipartite graphs.
25
+ dendrogram_full_ :
26
+ Dendrogram for both rows and columns, indexed in this order, for bipartite graphs.
27
+ """
28
+
29
+ def __init__(self):
30
+ self._init_vars()
31
+
32
+ def fit_predict(self, *args, **kwargs) -> np.ndarray:
33
+ """Fit algorithm to data and return the dendrogram. Same parameters as the ``fit`` method.
34
+
35
+ Returns
36
+ -------
37
+ dendrogram : np.ndarray
38
+ Dendrogram.
39
+ """
40
+ self.fit(*args, **kwargs)
41
+ return self.dendrogram_
42
+
43
+ def fit_transform(self, *args, **kwargs) -> np.ndarray:
44
+ """Fit algorithm to data and return the dendrogram. Alias for ``fit_predict``.
45
+ Same parameters as the ``fit`` method.
46
+
47
+ Returns
48
+ -------
49
+ dendrogram : np.ndarray
50
+ Dendrogram.
51
+ """
52
+ self.fit(*args, **kwargs)
53
+ return self.dendrogram_
54
+
55
+ def _init_vars(self):
56
+ """Init variables."""
57
+ self.dendrogram_ = None
58
+ self.dendrogram_row_ = None
59
+ self.dendrogram_col_ = None
60
+ self.dendrogram_full_ = None
61
+
62
+ def _split_vars(self, shape):
63
+ """Split variables."""
64
+ dendrogram_row, dendrogram_col = split_dendrogram(self.dendrogram_, shape)
65
+ self.dendrogram_full_ = self.dendrogram_
66
+ self.dendrogram_ = dendrogram_row
67
+ self.dendrogram_row_ = dendrogram_row
68
+ self.dendrogram_col_ = dendrogram_col
69
+ return self
@@ -0,0 +1,264 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in March 2020
5
+ @author: Quentin Lutz <qlutz@enst.fr>
6
+ @author: Thomas Bonald <tbonald@enst.fr>
7
+ """
8
+ from typing import Optional, Union
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.clustering.louvain import Louvain
14
+ from sknetwork.hierarchy.base import BaseHierarchy
15
+ from sknetwork.hierarchy.postprocess import get_dendrogram, reorder_dendrogram
16
+ from sknetwork.utils.check import check_format
17
+ from sknetwork.utils.format import get_adjacency
18
+
19
+
20
+ class LouvainIteration(BaseHierarchy):
21
+ """Hierarchical clustering by successive instances of Louvain (top-down).
22
+
23
+ Parameters
24
+ ----------
25
+ depth :
26
+ Depth of the tree.
27
+ A negative value is interpreted as no limit (return a tree of maximum depth).
28
+ resolution :
29
+ Resolution parameter.
30
+ tol_optimization :
31
+ Minimum increase in the objective function to enter a new optimization pass.
32
+ tol_aggregation :
33
+ Minimum increase in the objective function to enter a new aggregation pass.
34
+ n_aggregations :
35
+ Maximum number of aggregations.
36
+ A negative value is interpreted as no limit.
37
+ shuffle_nodes :
38
+ Enables node shuffling before optimization.
39
+ random_state :
40
+ Random number generator or random seed. If ``None``, numpy.random is used.
41
+ verbose :
42
+ Verbose mode.
43
+
44
+ Attributes
45
+ ----------
46
+ dendrogram_ :
47
+ Dendrogram of the graph.
48
+ dendrogram_row_ :
49
+ Dendrogram for the rows, for bipartite graphs.
50
+ dendrogram_col_ :
51
+ Dendrogram for the columns, for bipartite graphs.
52
+ dendrogram_full_ :
53
+ Dendrogram for both rows and columns, indexed in this order, for bipartite graphs.
54
+
55
+ Example
56
+ -------
57
+ >>> from sknetwork.hierarchy import LouvainIteration
58
+ >>> from sknetwork.data import house
59
+ >>> louvain = LouvainIteration()
60
+ >>> adjacency = house()
61
+ >>> louvain.fit_predict(adjacency)
62
+ array([[3., 2., 0., 2.],
63
+ [4., 1., 0., 2.],
64
+ [6., 0., 0., 3.],
65
+ [5., 7., 1., 5.]])
66
+
67
+ Notes
68
+ -----
69
+ Each row of the dendrogram = merge nodes, distance, size of cluster.
70
+
71
+ See Also
72
+ --------
73
+ scipy.cluster.hierarchy.dendrogram
74
+ """
75
+
76
+ def __init__(self, depth: int = 3, resolution: float = 1, tol_optimization: float = 1e-3,
77
+ tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
78
+ random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False):
79
+ super(LouvainIteration, self).__init__()
80
+
81
+ self.dendrogram_ = None
82
+ self.depth = depth
83
+ self._clustering_method = Louvain(resolution=resolution, tol_optimization=tol_optimization,
84
+ tol_aggregation=tol_aggregation, n_aggregations=n_aggregations,
85
+ shuffle_nodes=shuffle_nodes, random_state=random_state, verbose=verbose)
86
+ self.bipartite = None
87
+
88
+ def _recursive_louvain(self, adjacency: Union[sparse.csr_matrix, np.ndarray], depth: int,
89
+ nodes: Optional[np.ndarray] = None):
90
+ """Recursive function for fit.
91
+
92
+ Parameters
93
+ ----------
94
+ adjacency :
95
+ Adjacency matrix of the graph.
96
+ depth :
97
+ Depth of the recursion.
98
+ nodes :
99
+ The indices of the current nodes in the original graph.
100
+
101
+ Returns
102
+ -------
103
+ tree: recursive list of list of nodes.
104
+ """
105
+ n = adjacency.shape[0]
106
+ if nodes is None:
107
+ nodes = np.arange(n)
108
+
109
+ if adjacency.nnz and depth:
110
+ labels = self._clustering_method.fit_transform(adjacency)
111
+ else:
112
+ labels = np.zeros(n)
113
+
114
+ clusters = np.unique(labels)
115
+
116
+ tree = []
117
+ if len(clusters) == 1:
118
+ if len(nodes) > 1:
119
+ return [[node] for node in nodes]
120
+ else:
121
+ return [nodes[0]]
122
+ else:
123
+ for cluster in clusters:
124
+ mask = (labels == cluster)
125
+ nodes_cluster = nodes[mask]
126
+ adjacency_cluster = adjacency[mask, :][:, mask]
127
+ tree.append(self._recursive_louvain(adjacency_cluster, depth - 1, nodes_cluster))
128
+ return tree
129
+
130
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> 'LouvainIteration':
131
+ """Fit algorithm to data.
132
+
133
+ Parameters
134
+ ----------
135
+ input_matrix :
136
+ Adjacency matrix or biadjacency matrix of the graph.
137
+
138
+ Returns
139
+ -------
140
+ self: :class:`LouvainIteration`
141
+ """
142
+ self._init_vars()
143
+ input_matrix = check_format(input_matrix)
144
+ adjacency, self.bipartite = get_adjacency(input_matrix)
145
+ tree = self._recursive_louvain(adjacency, self.depth)
146
+ dendrogram, _ = get_dendrogram(tree)
147
+ dendrogram = np.array(dendrogram)
148
+ dendrogram[:, 2] -= min(dendrogram[:, 2])
149
+ self.dendrogram_ = reorder_dendrogram(dendrogram)
150
+ if self.bipartite:
151
+ self._split_vars(input_matrix.shape)
152
+ return self
153
+
154
+
155
+ class LouvainHierarchy(BaseHierarchy):
156
+ """Hierarchical clustering by Louvain (bottom-up).
157
+
158
+ Parameters
159
+ ----------
160
+ resolution :
161
+ Resolution parameter.
162
+ tol_optimization :
163
+ Minimum increase in the objective function to enter a new optimization pass.
164
+ tol_aggregation :
165
+ Minimum increase in the objective function to enter a new aggregation pass.
166
+ shuffle_nodes :
167
+ Enables node shuffling before optimization.
168
+ random_state :
169
+ Random number generator or random seed. If ``None``, numpy.random is used.
170
+ verbose :
171
+ Verbose mode.
172
+
173
+ Attributes
174
+ ----------
175
+ dendrogram_ :
176
+ Dendrogram of the graph.
177
+ dendrogram_row_ :
178
+ Dendrogram for the rows, for bipartite graphs.
179
+ dendrogram_col_ :
180
+ Dendrogram for the columns, for bipartite graphs.
181
+ dendrogram_full_ :
182
+ Dendrogram for both rows and columns, indexed in this order, for bipartite graphs.
183
+
184
+ Example
185
+ -------
186
+ >>> from sknetwork.hierarchy import LouvainHierarchy
187
+ >>> from sknetwork.data import house
188
+ >>> louvain = LouvainHierarchy()
189
+ >>> adjacency = house()
190
+ >>> louvain.fit_predict(adjacency)
191
+ array([[3., 2., 0., 2.],
192
+ [4., 1., 0., 2.],
193
+ [6., 0., 0., 3.],
194
+ [5., 7., 1., 5.]])
195
+
196
+ Notes
197
+ -----
198
+ Each row of the dendrogram = merge nodes, distance, size of cluster.
199
+
200
+ See Also
201
+ --------
202
+ scipy.cluster.hierarchy.dendrogram
203
+ """
204
+
205
+ def __init__(self, resolution: float = 1, tol_optimization: float = 1e-3,
206
+ tol_aggregation: float = 1e-3, shuffle_nodes: bool = False,
207
+ random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False):
208
+ super(LouvainHierarchy, self).__init__()
209
+
210
+ self.dendrogram_ = None
211
+ self._clustering_method = Louvain(resolution=resolution, tol_optimization=tol_optimization,
212
+ tol_aggregation=tol_aggregation, n_aggregations=1,
213
+ shuffle_nodes=shuffle_nodes, random_state=random_state, verbose=verbose)
214
+ self.bipartite = None
215
+
216
+ def _get_hierarchy(self, adjacency: Union[sparse.csr_matrix, np.ndarray]):
217
+ """Get the hierarchy from Louvain.
218
+
219
+ Parameters
220
+ ----------
221
+ adjacency :
222
+ Adjacency matrix of the graph.
223
+
224
+ Returns
225
+ -------
226
+ tree: recursive list of list of nodes
227
+ """
228
+ tree = [[node] for node in range(adjacency.shape[0])]
229
+ labels = self._clustering_method.fit_transform(adjacency)
230
+ labels_unique = np.unique(labels)
231
+ while 1:
232
+ tree = [[tree[node] for node in np.flatnonzero(labels == label)] for label in labels_unique]
233
+ tree = [cluster[0] if len(cluster) == 1 else cluster for cluster in tree]
234
+ aggregate = self._clustering_method.aggregate_
235
+ labels = self._clustering_method.fit_transform(aggregate)
236
+ if len(labels_unique) == len(np.unique(labels)):
237
+ break
238
+ else:
239
+ labels_unique = np.unique(labels)
240
+ return tree
241
+
242
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> 'LouvainHierarchy':
243
+ """Fit algorithm to data.
244
+
245
+ Parameters
246
+ ----------
247
+ input_matrix :
248
+ Adjacency matrix or biadjacency matrix of the graph.
249
+
250
+ Returns
251
+ -------
252
+ self: :class:`LouvainIteration`
253
+ """
254
+ self._init_vars()
255
+ input_matrix = check_format(input_matrix)
256
+ adjacency, self.bipartite = get_adjacency(input_matrix)
257
+ tree = self._get_hierarchy(adjacency)
258
+ dendrogram, _ = get_dendrogram(tree)
259
+ dendrogram = np.array(dendrogram)
260
+ dendrogram[:, 2] -= min(dendrogram[:, 2])
261
+ self.dendrogram_ = reorder_dendrogram(dendrogram)
262
+ if self.bipartite:
263
+ self._split_vars(input_matrix.shape)
264
+ return self