scikit-network 0.30.0__cp310-cp310-win_amd64.whl → 0.32.1__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (187) hide show
  1. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/AUTHORS.rst +3 -0
  2. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/METADATA +31 -3
  3. scikit_network-0.32.1.dist-info/RECORD +228 -0
  4. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/WHEEL +1 -1
  5. sknetwork/__init__.py +1 -1
  6. sknetwork/base.py +67 -0
  7. sknetwork/classification/base.py +24 -24
  8. sknetwork/classification/base_rank.py +17 -25
  9. sknetwork/classification/diffusion.py +35 -35
  10. sknetwork/classification/knn.py +24 -21
  11. sknetwork/classification/metrics.py +1 -1
  12. sknetwork/classification/pagerank.py +10 -10
  13. sknetwork/classification/propagation.py +23 -20
  14. sknetwork/classification/tests/test_diffusion.py +13 -3
  15. sknetwork/classification/vote.cp310-win_amd64.pyd +0 -0
  16. sknetwork/classification/vote.cpp +14482 -10351
  17. sknetwork/classification/vote.pyx +1 -3
  18. sknetwork/clustering/__init__.py +3 -1
  19. sknetwork/clustering/base.py +36 -40
  20. sknetwork/clustering/kcenters.py +253 -0
  21. sknetwork/clustering/leiden.py +241 -0
  22. sknetwork/clustering/leiden_core.cp310-win_amd64.pyd +0 -0
  23. sknetwork/clustering/leiden_core.cpp +31564 -0
  24. sknetwork/clustering/leiden_core.pyx +124 -0
  25. sknetwork/clustering/louvain.py +133 -102
  26. sknetwork/clustering/louvain_core.cp310-win_amd64.pyd +0 -0
  27. sknetwork/clustering/louvain_core.cpp +22457 -18792
  28. sknetwork/clustering/louvain_core.pyx +86 -96
  29. sknetwork/clustering/postprocess.py +2 -2
  30. sknetwork/clustering/propagation_clustering.py +15 -19
  31. sknetwork/clustering/tests/test_API.py +8 -4
  32. sknetwork/clustering/tests/test_kcenters.py +92 -0
  33. sknetwork/clustering/tests/test_leiden.py +34 -0
  34. sknetwork/clustering/tests/test_louvain.py +3 -4
  35. sknetwork/data/__init__.py +2 -1
  36. sknetwork/data/base.py +28 -0
  37. sknetwork/data/load.py +38 -37
  38. sknetwork/data/models.py +18 -18
  39. sknetwork/data/parse.py +54 -33
  40. sknetwork/data/test_graphs.py +2 -2
  41. sknetwork/data/tests/test_API.py +1 -1
  42. sknetwork/data/tests/test_base.py +14 -0
  43. sknetwork/data/tests/test_load.py +1 -1
  44. sknetwork/data/tests/test_parse.py +9 -12
  45. sknetwork/data/tests/test_test_graphs.py +1 -2
  46. sknetwork/data/toy_graphs.py +18 -18
  47. sknetwork/embedding/__init__.py +0 -1
  48. sknetwork/embedding/base.py +21 -20
  49. sknetwork/embedding/force_atlas.py +3 -2
  50. sknetwork/embedding/louvain_embedding.py +2 -2
  51. sknetwork/embedding/random_projection.py +5 -3
  52. sknetwork/embedding/spectral.py +0 -73
  53. sknetwork/embedding/tests/test_API.py +4 -28
  54. sknetwork/embedding/tests/test_louvain_embedding.py +4 -9
  55. sknetwork/embedding/tests/test_random_projection.py +2 -2
  56. sknetwork/embedding/tests/test_spectral.py +5 -8
  57. sknetwork/embedding/tests/test_svd.py +1 -1
  58. sknetwork/gnn/base.py +4 -4
  59. sknetwork/gnn/base_layer.py +3 -3
  60. sknetwork/gnn/gnn_classifier.py +45 -89
  61. sknetwork/gnn/layer.py +1 -1
  62. sknetwork/gnn/loss.py +1 -1
  63. sknetwork/gnn/optimizer.py +4 -3
  64. sknetwork/gnn/tests/test_base_layer.py +4 -4
  65. sknetwork/gnn/tests/test_gnn_classifier.py +12 -35
  66. sknetwork/gnn/utils.py +8 -8
  67. sknetwork/hierarchy/base.py +29 -2
  68. sknetwork/hierarchy/louvain_hierarchy.py +45 -41
  69. sknetwork/hierarchy/paris.cp310-win_amd64.pyd +0 -0
  70. sknetwork/hierarchy/paris.cpp +27369 -22852
  71. sknetwork/hierarchy/paris.pyx +7 -9
  72. sknetwork/hierarchy/postprocess.py +16 -16
  73. sknetwork/hierarchy/tests/test_API.py +1 -1
  74. sknetwork/hierarchy/tests/test_algos.py +5 -0
  75. sknetwork/hierarchy/tests/test_metrics.py +1 -1
  76. sknetwork/linalg/__init__.py +1 -1
  77. sknetwork/linalg/diteration.cp310-win_amd64.pyd +0 -0
  78. sknetwork/linalg/diteration.cpp +13474 -9454
  79. sknetwork/linalg/diteration.pyx +0 -2
  80. sknetwork/linalg/eig_solver.py +1 -1
  81. sknetwork/linalg/{normalization.py → normalizer.py} +18 -15
  82. sknetwork/linalg/operators.py +1 -1
  83. sknetwork/linalg/ppr_solver.py +1 -1
  84. sknetwork/linalg/push.cp310-win_amd64.pyd +0 -0
  85. sknetwork/linalg/push.cpp +22993 -18807
  86. sknetwork/linalg/push.pyx +0 -2
  87. sknetwork/linalg/svd_solver.py +1 -1
  88. sknetwork/linalg/tests/test_normalization.py +3 -7
  89. sknetwork/linalg/tests/test_operators.py +4 -8
  90. sknetwork/linalg/tests/test_ppr.py +1 -1
  91. sknetwork/linkpred/base.py +13 -2
  92. sknetwork/linkpred/nn.py +6 -6
  93. sknetwork/log.py +19 -0
  94. sknetwork/path/__init__.py +4 -3
  95. sknetwork/path/dag.py +54 -0
  96. sknetwork/path/distances.py +98 -0
  97. sknetwork/path/search.py +13 -47
  98. sknetwork/path/shortest_path.py +37 -162
  99. sknetwork/path/tests/test_dag.py +37 -0
  100. sknetwork/path/tests/test_distances.py +62 -0
  101. sknetwork/path/tests/test_search.py +26 -11
  102. sknetwork/path/tests/test_shortest_path.py +31 -36
  103. sknetwork/ranking/__init__.py +0 -1
  104. sknetwork/ranking/base.py +13 -8
  105. sknetwork/ranking/betweenness.cp310-win_amd64.pyd +0 -0
  106. sknetwork/ranking/betweenness.cpp +5709 -3017
  107. sknetwork/ranking/betweenness.pyx +0 -2
  108. sknetwork/ranking/closeness.py +7 -10
  109. sknetwork/ranking/pagerank.py +14 -14
  110. sknetwork/ranking/postprocess.py +12 -3
  111. sknetwork/ranking/tests/test_API.py +2 -4
  112. sknetwork/ranking/tests/test_betweenness.py +3 -3
  113. sknetwork/ranking/tests/test_closeness.py +3 -7
  114. sknetwork/ranking/tests/test_pagerank.py +11 -5
  115. sknetwork/ranking/tests/test_postprocess.py +5 -0
  116. sknetwork/regression/base.py +19 -2
  117. sknetwork/regression/diffusion.py +24 -10
  118. sknetwork/regression/tests/test_diffusion.py +8 -0
  119. sknetwork/test_base.py +35 -0
  120. sknetwork/test_log.py +15 -0
  121. sknetwork/topology/__init__.py +7 -8
  122. sknetwork/topology/cliques.cp310-win_amd64.pyd +0 -0
  123. sknetwork/topology/{kcliques.cpp → cliques.cpp} +23412 -20276
  124. sknetwork/topology/cliques.pyx +149 -0
  125. sknetwork/topology/core.cp310-win_amd64.pyd +0 -0
  126. sknetwork/topology/{kcore.cpp → core.cpp} +21732 -18867
  127. sknetwork/topology/core.pyx +90 -0
  128. sknetwork/topology/cycles.py +243 -0
  129. sknetwork/topology/minheap.cp310-win_amd64.pyd +0 -0
  130. sknetwork/{utils → topology}/minheap.cpp +19452 -15368
  131. sknetwork/{utils → topology}/minheap.pxd +1 -3
  132. sknetwork/{utils → topology}/minheap.pyx +1 -3
  133. sknetwork/topology/structure.py +3 -43
  134. sknetwork/topology/tests/test_cliques.py +11 -11
  135. sknetwork/topology/tests/test_core.py +19 -0
  136. sknetwork/topology/tests/test_cycles.py +65 -0
  137. sknetwork/topology/tests/test_structure.py +2 -16
  138. sknetwork/topology/tests/test_triangles.py +11 -15
  139. sknetwork/topology/tests/test_wl.py +72 -0
  140. sknetwork/topology/triangles.cp310-win_amd64.pyd +0 -0
  141. sknetwork/topology/triangles.cpp +5056 -2696
  142. sknetwork/topology/triangles.pyx +74 -89
  143. sknetwork/topology/weisfeiler_lehman.py +56 -86
  144. sknetwork/topology/weisfeiler_lehman_core.cp310-win_amd64.pyd +0 -0
  145. sknetwork/topology/weisfeiler_lehman_core.cpp +14727 -10622
  146. sknetwork/topology/weisfeiler_lehman_core.pyx +0 -2
  147. sknetwork/utils/__init__.py +1 -31
  148. sknetwork/utils/check.py +2 -2
  149. sknetwork/utils/format.py +5 -3
  150. sknetwork/utils/membership.py +2 -2
  151. sknetwork/utils/tests/test_check.py +3 -3
  152. sknetwork/utils/tests/test_format.py +3 -1
  153. sknetwork/utils/values.py +1 -1
  154. sknetwork/visualization/__init__.py +2 -2
  155. sknetwork/visualization/dendrograms.py +55 -7
  156. sknetwork/visualization/graphs.py +292 -72
  157. sknetwork/visualization/tests/test_dendrograms.py +9 -9
  158. sknetwork/visualization/tests/test_graphs.py +71 -62
  159. scikit_network-0.30.0.dist-info/RECORD +0 -227
  160. sknetwork/embedding/louvain_hierarchy.py +0 -142
  161. sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
  162. sknetwork/path/metrics.py +0 -148
  163. sknetwork/path/tests/test_metrics.py +0 -29
  164. sknetwork/ranking/harmonic.py +0 -82
  165. sknetwork/topology/dag.py +0 -74
  166. sknetwork/topology/dag_core.cp310-win_amd64.pyd +0 -0
  167. sknetwork/topology/dag_core.cpp +0 -23350
  168. sknetwork/topology/dag_core.pyx +0 -38
  169. sknetwork/topology/kcliques.cp310-win_amd64.pyd +0 -0
  170. sknetwork/topology/kcliques.pyx +0 -193
  171. sknetwork/topology/kcore.cp310-win_amd64.pyd +0 -0
  172. sknetwork/topology/kcore.pyx +0 -120
  173. sknetwork/topology/tests/test_cores.py +0 -21
  174. sknetwork/topology/tests/test_dag.py +0 -26
  175. sknetwork/topology/tests/test_wl_coloring.py +0 -49
  176. sknetwork/topology/tests/test_wl_kernel.py +0 -31
  177. sknetwork/utils/base.py +0 -35
  178. sknetwork/utils/minheap.cp310-win_amd64.pyd +0 -0
  179. sknetwork/utils/simplex.py +0 -140
  180. sknetwork/utils/tests/test_base.py +0 -28
  181. sknetwork/utils/tests/test_bunch.py +0 -16
  182. sknetwork/utils/tests/test_projection_simplex.py +0 -33
  183. sknetwork/utils/tests/test_verbose.py +0 -15
  184. sknetwork/utils/verbose.py +0 -37
  185. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/LICENSE +0 -0
  186. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/top_level.txt +0 -0
  187. /sknetwork/{utils → data}/timeout.py +0 -0
@@ -4,7 +4,7 @@
4
4
  Created in April 2022
5
5
  @author: Simon Delarue <sdelarue@enst.fr>
6
6
  """
7
- from typing import Optional, Union
7
+ from typing import Iterable, Optional, Union
8
8
  from collections import defaultdict
9
9
 
10
10
  import numpy as np
@@ -26,35 +26,37 @@ class GNNClassifier(BaseGNN):
26
26
 
27
27
  Parameters
28
28
  ----------
29
- dims : list or int
30
- Dimensions of the output of each layer (in forward direction).
29
+ dims : iterable or int
30
+ Dimension of the output of each layer (in forward direction).
31
31
  If an integer, dimension of the output layer (no hidden layer).
32
32
  Optional if ``layers`` is specified.
33
- layer_types : list or str
33
+ layer_types : iterable or str
34
34
  Layer types (in forward direction).
35
- If a string, use the same type of layer for all layers.
35
+ If a string, the same type is used at each layer.
36
36
  Can be ``'Conv'``, graph convolutional layer (default) or ``'Sage'`` (GraphSage).
37
- activations : list or str
37
+ activations : iterable or str
38
38
  Activation functions (in forward direction).
39
- If a string, use the same activation function for all layers.
39
+ If a string, the same activation function is used at each layer.
40
40
  Can be either ``'Identity'``, ``'Relu'``, ``'Sigmoid'`` or ``'Softmax'`` (default = ``'Relu'``).
41
- use_bias : list or bool
42
- Whether to use a bias term at each layer.
43
- If ``True``, use a bias term at all layers.
44
- normalizations : list or str
45
- Normalization of the adjacency matrix for message passing.
46
- If a string, use the same normalization for all layers.
47
- Can be either `'left'`` (left normalization by the degrees), ``'right'`` (right normalization by the degrees),
41
+ use_bias : iterable or bool
42
+ Whether to add a bias term at each layer (in forward direction).
43
+ If ``True``, use a bias term at each layer.
44
+ normalizations : iterable or str
45
+ Normalizations of the adjacency matrix for message passing (in forward direction).
46
+ If a string, the same type of normalization is used at each layer.
47
+ Can be either ``'left'`` (left normalization by the degrees), ``'right'`` (right normalization by the degrees),
48
48
  ``'both'`` (symmetric normalization by the square root of degrees, default) or ``None`` (no normalization).
49
- self_embeddings : list or str
50
- Whether to add a self embeddings to each node of the graph for message passing.
51
- If ``True``, add self-embeddings at all layers.
52
- sample_sizes : list or int
53
- Size of neighborhood sampled for each node. Used only for ``'Sage'`` layer type.
49
+ self_embeddings : iterable or str
50
+ Whether to add the embedding to each node for message passing (in forward direction).
51
+ If ``True``, add a self-embedding at each layer.
52
+ sample_sizes : iterable or int
53
+ Sizes of neighborhood sampled for each node (in forward direction).
54
+ If an integer, the same sampling size is used at each layer.
55
+ Used only for ``'Sage'`` layer type.
54
56
  loss : str (default = ``'CrossEntropy'``) or BaseLoss
55
- Loss function name or custom loss.
56
- layers : list or None
57
- Custom layers. If used, previous parameters are ignored.
57
+ Name of loss function or custom loss function.
58
+ layers : iterable or None
59
+ Custom layers (in forward directions). If used, previous parameters are ignored.
58
60
  optimizer : str or optimizer
59
61
  * ``'Adam'``, stochastic gradient-based optimizer (default).
60
62
  * ``'GD'``, gradient descent.
@@ -72,7 +74,7 @@ class GNNClassifier(BaseGNN):
72
74
  ----------
73
75
  conv2, ..., conv1: :class:'GCNConv'
74
76
  Graph convolutional layers.
75
- output_ : array
77
+ output_ : np.ndarray
76
78
  Output of the GNN.
77
79
  labels_: np.ndarray
78
80
  Predicted node labels.
@@ -95,11 +97,11 @@ class GNNClassifier(BaseGNN):
95
97
  0.88
96
98
  """
97
99
 
98
- def __init__(self, dims: Optional[Union[int, list]] = None, layer_types: Union[str, list] = 'Conv',
99
- activations: Union[str, list] = 'ReLu', use_bias: Union[bool, list] = True,
100
- normalizations: Union[str, list] = 'both', self_embeddings: Union[bool, list] = True,
100
+ def __init__(self, dims: Optional[Union[int, Iterable]] = None, layer_types: Union[str, Iterable] = 'Conv',
101
+ activations: Union[str, Iterable] = 'ReLu', use_bias: Union[bool, list] = True,
102
+ normalizations: Union[str, Iterable] = 'both', self_embeddings: Union[bool, Iterable] = True,
101
103
  sample_sizes: Union[int, list] = 25, loss: Union[BaseLoss, str] = 'CrossEntropy',
102
- layers: Optional[list] = None, optimizer: Union[BaseOptimizer, str] = 'Adam',
104
+ layers: Optional[Iterable] = None, optimizer: Union[BaseOptimizer, str] = 'Adam',
103
105
  learning_rate: float = 0.01, early_stopping: bool = True, patience: int = 10, verbose: bool = False):
104
106
  super(GNNClassifier, self).__init__(loss, optimizer, learning_rate, verbose)
105
107
  if layers is not None:
@@ -159,7 +161,7 @@ class GNNClassifier(BaseGNN):
159
161
 
160
162
  def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], features: Union[sparse.csr_matrix, np.ndarray],
161
163
  labels: np.ndarray, n_epochs: int = 100, validation: float = 0, reinit: bool = False,
162
- random_state: Optional[int] = None, history: bool = False) -> 'GNNClassifier':
164
+ random_state: Optional[int] = None) -> 'GNNClassifier':
163
165
  """ Fit model to data and store trained parameters.
164
166
 
165
167
  Parameters
@@ -169,8 +171,8 @@ class GNNClassifier(BaseGNN):
169
171
  features : sparse.csr_matrix, np.ndarray
170
172
  Input feature of shape :math:`(n, d)` with :math:`n` the number of nodes in the graph and :math:`d`
171
173
  the size of feature space.
172
- labels :
173
- Known labels (dictionary or vector of int). Negative values ignored.
174
+ labels : dict, np.ndarray
175
+ Known labels. Negative values ignored.
174
176
  n_epochs : int (default = 100)
175
177
  Number of epochs (iterations over the whole graph).
176
178
  validation : float
@@ -179,18 +181,17 @@ class GNNClassifier(BaseGNN):
179
181
  If ``True``, reinit the trainable parameters of the GNN (weights and biases).
180
182
  random_state : int
181
183
  Random seed, used for reproducible results across multiple runs.
182
- history : bool (default = ``False``)
183
- If ``True``, save training history.
184
184
  """
185
185
  if reinit:
186
186
  for layer in self.layers:
187
187
  layer.weights_initialized = False
188
+ self.history_ = defaultdict(list)
188
189
 
189
190
  if random_state is not None:
190
191
  np.random.seed(random_state)
191
192
 
192
- check_format(adjacency)
193
- check_format(features)
193
+ check_format(adjacency, allow_empty=True)
194
+ check_format(features, allow_empty=True)
194
195
 
195
196
  labels = get_values(adjacency.shape, labels)
196
197
  labels = labels.astype(int)
@@ -199,7 +200,7 @@ class GNNClassifier(BaseGNN):
199
200
  check_output(self.layers[-1].out_channels, labels)
200
201
 
201
202
  self.train_mask = labels >= 0
202
- if 0 < validation < 1:
203
+ if self.val_mask is None and 0 < validation < 1:
203
204
  mask = np.random.random(size=len(labels)) < validation
204
205
  self.val_mask = self.train_mask & mask
205
206
  self.train_mask &= ~mask
@@ -237,28 +238,26 @@ class GNNClassifier(BaseGNN):
237
238
  self.optimizer.step(self)
238
239
 
239
240
  # Save results
240
- if history:
241
- self.history_['embedding'].append(self.layers[-1].embedding)
242
- self.history_['loss'].append(loss_value)
243
- self.history_['train_accuracy'].append(train_accuracy)
244
- if val_accuracy is not None:
245
- self.history_['val_accuracy'].append(val_accuracy)
241
+ self.history_['loss'].append(loss_value)
242
+ self.history_['train_accuracy'].append(train_accuracy)
243
+ if val_accuracy is not None:
244
+ self.history_['val_accuracy'].append(val_accuracy)
246
245
 
247
246
  if n_epochs > 10 and epoch % int(n_epochs / 10) == 0:
248
247
  if val_accuracy is not None:
249
- self.log.print(
248
+ self.print_log(
250
249
  f'In epoch {epoch:>3}, loss: {loss_value:.3f}, train accuracy: {train_accuracy:.3f}, '
251
250
  f'val accuracy: {val_accuracy:.3f}')
252
251
  else:
253
- self.log.print(
252
+ self.print_log(
254
253
  f'In epoch {epoch:>3}, loss: {loss_value:.3f}, train accuracy: {train_accuracy:.3f}')
255
254
  elif n_epochs <= 10:
256
255
  if val_accuracy is not None:
257
- self.log.print(
256
+ self.print_log(
258
257
  f'In epoch {epoch:>3}, loss: {loss_value:.3f}, train accuracy: {train_accuracy:.3f}, '
259
258
  f'val accuracy: {val_accuracy:.3f}')
260
259
  else:
261
- self.log.print(
260
+ self.print_log(
262
261
  f'In epoch {epoch:>3}, loss: {loss_value:.3f}, train accuracy: {train_accuracy:.3f}')
263
262
 
264
263
  # Early stopping
@@ -269,7 +268,7 @@ class GNNClassifier(BaseGNN):
269
268
  else:
270
269
  count += 1
271
270
  if count >= self.patience:
272
- self.log.print('Early stopping.')
271
+ self.print_log('Early stopping.')
273
272
  break
274
273
 
275
274
  output = self.forward(adjacencies, features)
@@ -304,46 +303,3 @@ class GNNClassifier(BaseGNN):
304
303
  adjacencies.append(adjacency)
305
304
 
306
305
  return adjacencies
307
-
308
- def predict(self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray] = None,
309
- feature_vectors: Union[sparse.csr_matrix, np.ndarray] = None) -> np.ndarray:
310
- """Predict labels for new nodes. If called without parameters, labels are returned for all nodes.
311
-
312
- Parameters
313
- ----------
314
- adjacency_vectors : np.ndarray
315
- Square adjacency matrix. Array of shape (n, n).
316
- feature_vectors : np.ndarray
317
- Features row vectors. Array of shape (n, n_feat). The number of features n_feat must match with the one
318
- used during training.
319
-
320
- Returns
321
- -------
322
- labels : np.ndarray
323
- Label of each node of the graph.
324
- """
325
- self._check_fitted()
326
-
327
- if adjacency_vectors is None and feature_vectors is None:
328
- return self.labels_
329
- elif adjacency_vectors is None:
330
- adjacency_vectors = sparse.identity(feature_vectors.shape[0], format='csr')
331
-
332
- check_square(adjacency_vectors)
333
- check_nonnegative(adjacency_vectors)
334
- feature_vectors = check_format(feature_vectors)
335
-
336
- n_row, n_col = adjacency_vectors.shape
337
- feat_row, feat_col = feature_vectors.shape
338
-
339
- if n_col != feat_row:
340
- raise ValueError(f'Dimension mismatch: dim0={n_col} != dim1={feat_row}.')
341
- elif feat_col != self.layers[0].weight.shape[0]:
342
- raise ValueError(f'Dimension mismatch: current number of features is {feat_col} whereas GNN has been '
343
- f'trained with '
344
- f'{self.layers[0].weight.shape[0]} features.')
345
-
346
- h = self.forward(adjacency_vectors, feature_vectors)
347
- labels = self._compute_predictions(h)
348
-
349
- return labels
sknetwork/gnn/layer.py CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  # coding: utf-8
3
3
  """
4
- Created on Thu Apr 21 2022
4
+ Created in April 2022
5
5
  @author: Simon Delarue <sdelarue@enst.fr>
6
6
  """
7
7
  from typing import Optional, Union
sknetwork/gnn/loss.py CHANGED
@@ -53,7 +53,7 @@ class CrossEntropy(BaseLoss, Softmax):
53
53
  probs = Softmax.output(signal)
54
54
 
55
55
  # for numerical stability
56
- eps = 1e-15
56
+ eps = 1e-10
57
57
  probs = np.clip(probs, eps, 1 - eps)
58
58
 
59
59
  value = -np.log(probs[np.arange(n), labels]).sum()
@@ -130,9 +130,10 @@ class ADAM(BaseOptimizer):
130
130
  layer.weight = \
131
131
  layer.weight - (self.learning_rate * m_derivative_weight_corr) / (np.sqrt(v_derivative_weight_corr)
132
132
  + self.eps)
133
- layer.bias = \
134
- layer.bias - (self.learning_rate * m_derivative_bias_corr) / (np.sqrt(v_derivative_bias_corr)
135
- + self.eps)
133
+ if layer.use_bias:
134
+ layer.bias = \
135
+ layer.bias - (self.learning_rate * m_derivative_bias_corr) / (np.sqrt(v_derivative_bias_corr)
136
+ + self.eps)
136
137
 
137
138
 
138
139
  def get_optimizer(optimizer: Union[BaseOptimizer, str] = 'Adam', learning_rate: float = 0.01) -> BaseOptimizer:
@@ -27,11 +27,11 @@ class TestBaseLayer(unittest.TestCase):
27
27
  def test_base_layer_initialize_weights(self):
28
28
  self.base_layer._initialize_weights(10)
29
29
  self.assertTrue(self.base_layer.weight.shape == (10, len(self.labels)))
30
- self.assertTrue(all(self.base_layer.bias[0] == np.zeros((len(self.labels), 1)).T[0]))
30
+ self.assertTrue(self.base_layer.bias.shape == (1, len(self.labels)))
31
31
  self.assertTrue(self.base_layer.weights_initialized)
32
32
 
33
33
  def test_base_layer_repr(self):
34
34
  self.assertTrue(self.base_layer.__repr__().startswith(" BaseLayer(layer_type: Conv, out_channels: 10"))
35
- sagelayer = BaseLayer(layer_type='sageconv', out_channels=len(self.labels))
36
- self.assertTrue('sample_size' in sagelayer.__repr__())
37
- self.assertTrue('sageconv' in sagelayer.__repr__())
35
+ sage_layer = BaseLayer(layer_type='sageconv', out_channels=len(self.labels))
36
+ self.assertTrue('sample_size' in sage_layer.__repr__())
37
+ self.assertTrue('sageconv' in sage_layer.__repr__())
@@ -44,6 +44,14 @@ class TestGNNClassifier(unittest.TestCase):
44
44
  self.assertTrue(len(y_pred) == self.n)
45
45
  self.assertTrue(embedding.shape == (self.n, 2))
46
46
 
47
+ def test_gnn_classifier_no_bias(self):
48
+ gnn = GNNClassifier([3, 2], 'Conv', 'Softmax', use_bias=[True, False])
49
+ labels_pred = gnn.fit_predict(self.adjacency, self.features, self.labels)
50
+ embedding = gnn.embedding_
51
+ self.assertTrue(len(labels_pred) == self.n)
52
+ self.assertTrue(embedding.shape == (self.n, 2))
53
+ self.assertTrue(gnn.layers[1].bias is None)
54
+
47
55
  def test_gnn_classifier_optimizer(self):
48
56
  optimizers = ['GD', 'Adam']
49
57
  for optimizer in optimizers:
@@ -88,23 +96,20 @@ class TestGNNClassifier(unittest.TestCase):
88
96
  def test_gnn_classifier_early_stopping(self):
89
97
  gnn = GNNClassifier(2, patience=2)
90
98
  labels = {0: 0, 1: 1}
91
- _ = gnn.fit_predict(self.adjacency, self.features, labels, n_epochs=100, history=True, validation=0.5,
99
+ _ = gnn.fit_predict(self.adjacency, self.features, labels, n_epochs=100, validation=0.5,
92
100
  random_state=42)
93
101
  self.assertTrue(len(gnn.history_['val_accuracy']) < 100)
94
102
 
95
103
  gnn = GNNClassifier(2, early_stopping=False)
96
- _ = gnn.fit_predict(self.adjacency, self.features, labels, n_epochs=100, history=True, validation=0.5,
104
+ _ = gnn.fit_predict(self.adjacency, self.features, labels, n_epochs=100, validation=0.5,
97
105
  random_state=42)
98
106
  self.assertTrue(len(gnn.history_['val_accuracy']) == 100)
99
107
 
100
108
  def test_gnn_classifier_reinit(self):
101
109
  gnn = GNNClassifier([4, 2])
102
- gnn.fit(self.adjacency, self.features, self.labels, reinit=False)
103
- weights = [layer.weight for layer in gnn.layers]
104
- biases = [layer.bias for layer in gnn.layers]
110
+ gnn.fit(self.adjacency, self.features, self.labels)
105
111
  gnn.fit(self.adjacency, self.features, self.labels, n_epochs=1, reinit=True)
106
- self.assertTrue(all([np.all(weight != layer.weight) for weight, layer in zip(weights, gnn.layers)]))
107
- self.assertTrue(all([np.all(bias != layer.bias) for bias, layer in zip(biases, gnn.layers)]))
112
+ self.assertTrue(gnn.embedding_.shape == (self.n, 2))
108
113
 
109
114
  def test_gnn_classifier_sageconv(self):
110
115
  gnn = GNNClassifier([4, 2], ['SAGEConv', 'SAGEConv'], sample_sizes=[5, 3])
@@ -119,34 +124,6 @@ class TestGNNClassifier(unittest.TestCase):
119
124
  self.assertTrue(all(labels_pred == gnn.labels_))
120
125
  self.assertTrue(all(labels_pred == labels_pred_))
121
126
 
122
- # Predict same nodes
123
- labels_pred_ = gnn.predict(self.adjacency, self.features)
124
- self.assertTrue(all(labels_pred_ == gnn.labels_))
125
-
126
- # Incorrect shapes
127
- new_n = sparse.csr_matrix(np.random.randint(2, size=self.features.shape[1]))
128
- new_feat = sparse.csr_matrix(np.random.randint(3, size=self.features.shape[1]))
129
- with self.assertRaises(ValueError):
130
- gnn.predict(new_n, self.features)
131
- with self.assertRaises(ValueError):
132
- gnn.predict(self.adjacency, new_feat)
133
-
134
- new_feat = sparse.csr_matrix(np.random.rand(self.adjacency.shape[0], self.features.shape[1] - 1))
135
- with self.assertRaises(ValueError):
136
- gnn.predict(self.adjacency, new_feat)
137
-
138
- # Predict new graph
139
- n = 4
140
- n_feat = self.features.shape[1]
141
- adjacency = sparse.csr_matrix(np.random.randint(2, size=(n, n)))
142
- features = sparse.csr_matrix(np.random.randint(2, size=(n, n_feat)))
143
- labels_pred = gnn.predict(adjacency, features)
144
- self.assertTrue(len(labels_pred) == n)
145
-
146
- # No adj matrix
147
- labels_pred = gnn.predict(None, features)
148
- self.assertTrue(len(labels_pred) == features.shape[0])
149
-
150
127
  def test_gnn_classifier_predict_proba(self):
151
128
  gnn = GNNClassifier([4, 2])
152
129
  probs = gnn.fit_predict_proba(self.adjacency, self.features, self.labels)
sknetwork/gnn/utils.py CHANGED
@@ -1,10 +1,10 @@
1
1
  #!/usr/bin/env python3
2
2
  # coding: utf-8
3
3
  """
4
- Created on Thu Apr 21 2022
4
+ Created in April 2022
5
5
  @author: Simon Delarue <sdelarue@enst.fr>
6
6
  """
7
- from typing import Union
7
+ from typing import Iterable, Union
8
8
 
9
9
  import numpy as np
10
10
 
@@ -22,7 +22,7 @@ def check_early_stopping(early_stopping: bool, val_mask: np.ndarray, patience: i
22
22
  return early_stopping
23
23
 
24
24
 
25
- def check_normalizations(normalizations: Union[str, list]):
25
+ def check_normalizations(normalizations: Union[str, Iterable]):
26
26
  """Check if normalization is known."""
27
27
  available_norms = ['left', 'right', 'both']
28
28
  if isinstance(normalizations, list):
@@ -69,10 +69,10 @@ def check_loss(layer: BaseLayer):
69
69
  return layer.activation
70
70
 
71
71
 
72
- def get_layers(dims: Union[int, list], layer_types: Union[str, BaseLayer, list],
73
- activations: Union[str, BaseActivation, list], use_bias: Union[bool, list],
74
- normalizations: Union[str, list], self_embeddings: Union[bool, list], sample_sizes: Union[int, list],
75
- loss: Union[str, BaseLoss]) -> list:
72
+ def get_layers(dims: Union[int, Iterable], layer_types: Union[str, BaseLayer, Iterable],
73
+ activations: Union[str, BaseActivation, list], use_bias: Union[bool, Iterable],
74
+ normalizations: Union[str, Iterable], self_embeddings: Union[bool, Iterable],
75
+ sample_sizes: Union[int, Iterable], loss: Union[str, BaseLoss]) -> list:
76
76
  """Get the list of layers.
77
77
 
78
78
  Parameters
@@ -101,7 +101,7 @@ def get_layers(dims: Union[int, list], layer_types: Union[str, BaseLayer, list],
101
101
  """
102
102
  check_normalizations(normalizations)
103
103
 
104
- if not isinstance(dims, list):
104
+ if isinstance(dims, int):
105
105
  dims = [dims]
106
106
  n_layers = len(dims)
107
107
 
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  # -*- coding: utf-8 -*-
3
3
  """
4
- Created on November 2019
4
+ Created in November 2019
5
5
  @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
6
  """
7
7
  from abc import ABC
@@ -9,7 +9,7 @@ from abc import ABC
9
9
  import numpy as np
10
10
 
11
11
  from sknetwork.hierarchy.postprocess import split_dendrogram
12
- from sknetwork.utils.base import Algorithm
12
+ from sknetwork.base import Algorithm
13
13
 
14
14
 
15
15
  class BaseHierarchy(Algorithm, ABC):
@@ -29,6 +29,33 @@ class BaseHierarchy(Algorithm, ABC):
29
29
  def __init__(self):
30
30
  self._init_vars()
31
31
 
32
+ def predict(self, columns: bool = False) -> np.ndarray:
33
+ """Return the dendrogram predicted by the algorithm.
34
+
35
+ Parameters
36
+ ----------
37
+ columns : bool
38
+ If ``True``, return the prediction for columns.
39
+
40
+ Returns
41
+ -------
42
+ dendrogram : np.ndarray
43
+ Dendrogram.
44
+ """
45
+ if columns:
46
+ return self.dendrogram_col_
47
+ return self.dendrogram_
48
+
49
+ def transform(self) -> np.ndarray:
50
+ """Return the dendrogram predicted by the algorithm.
51
+
52
+ Returns
53
+ -------
54
+ dendrogram : np.ndarray
55
+ Dendrogram.
56
+ """
57
+ return self.dendrogram_
58
+
32
59
  def fit_predict(self, *args, **kwargs) -> np.ndarray:
33
60
  """Fit algorithm to data and return the dendrogram. Same parameters as the ``fit`` method.
34
61
 
@@ -22,34 +22,34 @@ class LouvainIteration(BaseHierarchy):
22
22
 
23
23
  Parameters
24
24
  ----------
25
- depth :
25
+ depth : int
26
26
  Depth of the tree.
27
27
  A negative value is interpreted as no limit (return a tree of maximum depth).
28
- resolution :
28
+ resolution : float
29
29
  Resolution parameter.
30
- tol_optimization :
30
+ tol_optimization : float
31
31
  Minimum increase in the objective function to enter a new optimization pass.
32
- tol_aggregation :
32
+ tol_aggregation : float
33
33
  Minimum increase in the objective function to enter a new aggregation pass.
34
- n_aggregations :
34
+ n_aggregations : int
35
35
  Maximum number of aggregations.
36
36
  A negative value is interpreted as no limit.
37
- shuffle_nodes :
38
- Enables node shuffling before optimization.
39
- random_state :
37
+ shuffle_nodes : bool
38
+ If ``True``, shuffle nodes before optimization.
39
+ random_state : int
40
40
  Random number generator or random seed. If ``None``, numpy.random is used.
41
- verbose :
41
+ verbose : bool
42
42
  Verbose mode.
43
43
 
44
44
  Attributes
45
45
  ----------
46
- dendrogram_ :
46
+ dendrogram_ : np.ndarray
47
47
  Dendrogram of the graph.
48
- dendrogram_row_ :
48
+ dendrogram_row_ : np.ndarray
49
49
  Dendrogram for the rows, for bipartite graphs.
50
- dendrogram_col_ :
50
+ dendrogram_col_ : np.ndarray
51
51
  Dendrogram for the columns, for bipartite graphs.
52
- dendrogram_full_ :
52
+ dendrogram_full_ : np.ndarray
53
53
  Dendrogram for both rows and columns, indexed in this order, for bipartite graphs.
54
54
 
55
55
  Example
@@ -59,10 +59,10 @@ class LouvainIteration(BaseHierarchy):
59
59
  >>> louvain = LouvainIteration()
60
60
  >>> adjacency = house()
61
61
  >>> louvain.fit_predict(adjacency)
62
- array([[3., 2., 0., 2.],
63
- [4., 1., 0., 2.],
64
- [6., 0., 0., 3.],
65
- [5., 7., 1., 5.]])
62
+ array([[3., 2., 1., 2.],
63
+ [4., 1., 1., 2.],
64
+ [6., 0., 1., 3.],
65
+ [5., 7., 2., 5.]])
66
66
 
67
67
  Notes
68
68
  -----
@@ -71,6 +71,7 @@ class LouvainIteration(BaseHierarchy):
71
71
  See Also
72
72
  --------
73
73
  scipy.cluster.hierarchy.dendrogram
74
+ sknetwork.clustering.Louvain
74
75
  """
75
76
 
76
77
  def __init__(self, depth: int = 3, resolution: float = 1, tol_optimization: float = 1e-3,
@@ -91,11 +92,11 @@ class LouvainIteration(BaseHierarchy):
91
92
 
92
93
  Parameters
93
94
  ----------
94
- adjacency :
95
+ adjacency : sparse.csr_matrix, np.ndarray
95
96
  Adjacency matrix of the graph.
96
- depth :
97
+ depth : int
97
98
  Depth of the recursion.
98
- nodes :
99
+ nodes : np.ndarray
99
100
  The indices of the current nodes in the original graph.
100
101
 
101
102
  Returns
@@ -132,7 +133,7 @@ class LouvainIteration(BaseHierarchy):
132
133
 
133
134
  Parameters
134
135
  ----------
135
- input_matrix :
136
+ input_matrix : sparse.csr_matrix, np.ndarray
136
137
  Adjacency matrix or biadjacency matrix of the graph.
137
138
 
138
139
  Returns
@@ -145,7 +146,7 @@ class LouvainIteration(BaseHierarchy):
145
146
  tree = self._recursive_louvain(adjacency, self.depth)
146
147
  dendrogram, _ = get_dendrogram(tree)
147
148
  dendrogram = np.array(dendrogram)
148
- dendrogram[:, 2] -= min(dendrogram[:, 2])
149
+ dendrogram[:, 2] += 1 - min(dendrogram[:, 2])
149
150
  self.dendrogram_ = reorder_dendrogram(dendrogram)
150
151
  if self.bipartite:
151
152
  self._split_vars(input_matrix.shape)
@@ -155,30 +156,32 @@ class LouvainIteration(BaseHierarchy):
155
156
  class LouvainHierarchy(BaseHierarchy):
156
157
  """Hierarchical clustering by Louvain (bottom-up).
157
158
 
159
+ Each level corresponds to an aggregation step of the Louvain algorithm.
160
+
158
161
  Parameters
159
162
  ----------
160
- resolution :
163
+ resolution : float
161
164
  Resolution parameter.
162
- tol_optimization :
165
+ tol_optimization : float
163
166
  Minimum increase in the objective function to enter a new optimization pass.
164
- tol_aggregation :
167
+ tol_aggregation : float
165
168
  Minimum increase in the objective function to enter a new aggregation pass.
166
- shuffle_nodes :
167
- Enables node shuffling before optimization.
168
- random_state :
169
+ shuffle_nodes : bool
170
+ If ``True``, shuffle nodes before optimization.
171
+ random_state : int
169
172
  Random number generator or random seed. If ``None``, numpy.random is used.
170
- verbose :
173
+ verbose : bool
171
174
  Verbose mode.
172
175
 
173
176
  Attributes
174
177
  ----------
175
- dendrogram_ :
178
+ dendrogram_ : np.ndarray
176
179
  Dendrogram of the graph.
177
- dendrogram_row_ :
180
+ dendrogram_row_ : np.ndarray
178
181
  Dendrogram for the rows, for bipartite graphs.
179
- dendrogram_col_ :
182
+ dendrogram_col_ : np.ndarray
180
183
  Dendrogram for the columns, for bipartite graphs.
181
- dendrogram_full_ :
184
+ dendrogram_full_ : np.ndarray
182
185
  Dendrogram for both rows and columns, indexed in this order, for bipartite graphs.
183
186
 
184
187
  Example
@@ -188,10 +191,10 @@ class LouvainHierarchy(BaseHierarchy):
188
191
  >>> louvain = LouvainHierarchy()
189
192
  >>> adjacency = house()
190
193
  >>> louvain.fit_predict(adjacency)
191
- array([[3., 2., 0., 2.],
192
- [4., 1., 0., 2.],
193
- [6., 0., 0., 3.],
194
- [5., 7., 1., 5.]])
194
+ array([[3., 2., 1., 2.],
195
+ [4., 1., 1., 2.],
196
+ [6., 0., 1., 3.],
197
+ [5., 7., 2., 5.]])
195
198
 
196
199
  Notes
197
200
  -----
@@ -200,6 +203,7 @@ class LouvainHierarchy(BaseHierarchy):
200
203
  See Also
201
204
  --------
202
205
  scipy.cluster.hierarchy.dendrogram
206
+ sknetwork.clustering.Louvain
203
207
  """
204
208
 
205
209
  def __init__(self, resolution: float = 1, tol_optimization: float = 1e-3,
@@ -218,7 +222,7 @@ class LouvainHierarchy(BaseHierarchy):
218
222
 
219
223
  Parameters
220
224
  ----------
221
- adjacency :
225
+ adjacency : sparse.csr_matrix, np.ndarray
222
226
  Adjacency matrix of the graph.
223
227
 
224
228
  Returns
@@ -244,12 +248,12 @@ class LouvainHierarchy(BaseHierarchy):
244
248
 
245
249
  Parameters
246
250
  ----------
247
- input_matrix :
251
+ input_matrix : sparse.csr_matrix, np.ndarray
248
252
  Adjacency matrix or biadjacency matrix of the graph.
249
253
 
250
254
  Returns
251
255
  -------
252
- self: :class:`LouvainIteration`
256
+ self: :class:`LouvainHierarchy`
253
257
  """
254
258
  self._init_vars()
255
259
  input_matrix = check_format(input_matrix)
@@ -257,7 +261,7 @@ class LouvainHierarchy(BaseHierarchy):
257
261
  tree = self._get_hierarchy(adjacency)
258
262
  dendrogram, _ = get_dendrogram(tree)
259
263
  dendrogram = np.array(dendrogram)
260
- dendrogram[:, 2] -= min(dendrogram[:, 2])
264
+ dendrogram[:, 2] += 1 - min(dendrogram[:, 2])
261
265
  self.dendrogram_ = reorder_dendrogram(dendrogram)
262
266
  if self.bipartite:
263
267
  self._split_vars(input_matrix.shape)