scikit-network 0.28.3__cp39-cp39-macosx_12_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- scikit_network-0.28.3.dist-info/AUTHORS.rst +41 -0
- scikit_network-0.28.3.dist-info/LICENSE +34 -0
- scikit_network-0.28.3.dist-info/METADATA +457 -0
- scikit_network-0.28.3.dist-info/RECORD +240 -0
- scikit_network-0.28.3.dist-info/WHEEL +5 -0
- scikit_network-0.28.3.dist-info/top_level.txt +1 -0
- sknetwork/__init__.py +21 -0
- sknetwork/classification/__init__.py +8 -0
- sknetwork/classification/base.py +84 -0
- sknetwork/classification/base_rank.py +143 -0
- sknetwork/classification/diffusion.py +134 -0
- sknetwork/classification/knn.py +162 -0
- sknetwork/classification/metrics.py +205 -0
- sknetwork/classification/pagerank.py +66 -0
- sknetwork/classification/propagation.py +152 -0
- sknetwork/classification/tests/__init__.py +1 -0
- sknetwork/classification/tests/test_API.py +35 -0
- sknetwork/classification/tests/test_diffusion.py +37 -0
- sknetwork/classification/tests/test_knn.py +24 -0
- sknetwork/classification/tests/test_metrics.py +53 -0
- sknetwork/classification/tests/test_pagerank.py +20 -0
- sknetwork/classification/tests/test_propagation.py +24 -0
- sknetwork/classification/vote.cpython-39-darwin.so +0 -0
- sknetwork/classification/vote.pyx +58 -0
- sknetwork/clustering/__init__.py +7 -0
- sknetwork/clustering/base.py +102 -0
- sknetwork/clustering/kmeans.py +142 -0
- sknetwork/clustering/louvain.py +255 -0
- sknetwork/clustering/louvain_core.cpython-39-darwin.so +0 -0
- sknetwork/clustering/louvain_core.pyx +134 -0
- sknetwork/clustering/metrics.py +91 -0
- sknetwork/clustering/postprocess.py +66 -0
- sknetwork/clustering/propagation_clustering.py +108 -0
- sknetwork/clustering/tests/__init__.py +1 -0
- sknetwork/clustering/tests/test_API.py +37 -0
- sknetwork/clustering/tests/test_kmeans.py +47 -0
- sknetwork/clustering/tests/test_louvain.py +104 -0
- sknetwork/clustering/tests/test_metrics.py +50 -0
- sknetwork/clustering/tests/test_post_processing.py +23 -0
- sknetwork/clustering/tests/test_postprocess.py +39 -0
- sknetwork/data/__init__.py +5 -0
- sknetwork/data/load.py +408 -0
- sknetwork/data/models.py +459 -0
- sknetwork/data/parse.py +621 -0
- sknetwork/data/test_graphs.py +84 -0
- sknetwork/data/tests/__init__.py +1 -0
- sknetwork/data/tests/test_API.py +30 -0
- sknetwork/data/tests/test_load.py +95 -0
- sknetwork/data/tests/test_models.py +52 -0
- sknetwork/data/tests/test_parse.py +253 -0
- sknetwork/data/tests/test_test_graphs.py +30 -0
- sknetwork/data/tests/test_toy_graphs.py +68 -0
- sknetwork/data/toy_graphs.py +619 -0
- sknetwork/embedding/__init__.py +10 -0
- sknetwork/embedding/base.py +90 -0
- sknetwork/embedding/force_atlas.py +197 -0
- sknetwork/embedding/louvain_embedding.py +174 -0
- sknetwork/embedding/louvain_hierarchy.py +142 -0
- sknetwork/embedding/metrics.py +66 -0
- sknetwork/embedding/random_projection.py +133 -0
- sknetwork/embedding/spectral.py +214 -0
- sknetwork/embedding/spring.py +198 -0
- sknetwork/embedding/svd.py +363 -0
- sknetwork/embedding/tests/__init__.py +1 -0
- sknetwork/embedding/tests/test_API.py +73 -0
- sknetwork/embedding/tests/test_force_atlas.py +35 -0
- sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
- sknetwork/embedding/tests/test_louvain_hierarchy.py +19 -0
- sknetwork/embedding/tests/test_metrics.py +29 -0
- sknetwork/embedding/tests/test_random_projection.py +28 -0
- sknetwork/embedding/tests/test_spectral.py +84 -0
- sknetwork/embedding/tests/test_spring.py +50 -0
- sknetwork/embedding/tests/test_svd.py +37 -0
- sknetwork/flow/__init__.py +3 -0
- sknetwork/flow/flow.py +73 -0
- sknetwork/flow/tests/__init__.py +1 -0
- sknetwork/flow/tests/test_flow.py +17 -0
- sknetwork/flow/tests/test_utils.py +69 -0
- sknetwork/flow/utils.py +91 -0
- sknetwork/gnn/__init__.py +10 -0
- sknetwork/gnn/activation.py +117 -0
- sknetwork/gnn/base.py +155 -0
- sknetwork/gnn/base_activation.py +89 -0
- sknetwork/gnn/base_layer.py +109 -0
- sknetwork/gnn/gnn_classifier.py +381 -0
- sknetwork/gnn/layer.py +153 -0
- sknetwork/gnn/layers.py +127 -0
- sknetwork/gnn/loss.py +180 -0
- sknetwork/gnn/neighbor_sampler.py +65 -0
- sknetwork/gnn/optimizer.py +163 -0
- sknetwork/gnn/tests/__init__.py +1 -0
- sknetwork/gnn/tests/test_activation.py +56 -0
- sknetwork/gnn/tests/test_base.py +79 -0
- sknetwork/gnn/tests/test_base_layer.py +37 -0
- sknetwork/gnn/tests/test_gnn_classifier.py +192 -0
- sknetwork/gnn/tests/test_layers.py +80 -0
- sknetwork/gnn/tests/test_loss.py +33 -0
- sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
- sknetwork/gnn/tests/test_optimizer.py +43 -0
- sknetwork/gnn/tests/test_utils.py +93 -0
- sknetwork/gnn/utils.py +219 -0
- sknetwork/hierarchy/__init__.py +7 -0
- sknetwork/hierarchy/base.py +69 -0
- sknetwork/hierarchy/louvain_hierarchy.py +264 -0
- sknetwork/hierarchy/metrics.py +234 -0
- sknetwork/hierarchy/paris.cpython-39-darwin.so +0 -0
- sknetwork/hierarchy/paris.pyx +317 -0
- sknetwork/hierarchy/postprocess.py +350 -0
- sknetwork/hierarchy/tests/__init__.py +1 -0
- sknetwork/hierarchy/tests/test_API.py +25 -0
- sknetwork/hierarchy/tests/test_algos.py +29 -0
- sknetwork/hierarchy/tests/test_metrics.py +62 -0
- sknetwork/hierarchy/tests/test_postprocess.py +57 -0
- sknetwork/hierarchy/tests/test_ward.py +25 -0
- sknetwork/hierarchy/ward.py +94 -0
- sknetwork/linalg/__init__.py +9 -0
- sknetwork/linalg/basics.py +37 -0
- sknetwork/linalg/diteration.cpython-39-darwin.so +0 -0
- sknetwork/linalg/diteration.pyx +49 -0
- sknetwork/linalg/eig_solver.py +93 -0
- sknetwork/linalg/laplacian.py +15 -0
- sknetwork/linalg/normalization.py +66 -0
- sknetwork/linalg/operators.py +225 -0
- sknetwork/linalg/polynome.py +76 -0
- sknetwork/linalg/ppr_solver.py +170 -0
- sknetwork/linalg/push.cpython-39-darwin.so +0 -0
- sknetwork/linalg/push.pyx +73 -0
- sknetwork/linalg/sparse_lowrank.py +142 -0
- sknetwork/linalg/svd_solver.py +91 -0
- sknetwork/linalg/tests/__init__.py +1 -0
- sknetwork/linalg/tests/test_eig.py +44 -0
- sknetwork/linalg/tests/test_laplacian.py +18 -0
- sknetwork/linalg/tests/test_normalization.py +38 -0
- sknetwork/linalg/tests/test_operators.py +70 -0
- sknetwork/linalg/tests/test_polynome.py +38 -0
- sknetwork/linalg/tests/test_ppr.py +50 -0
- sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
- sknetwork/linalg/tests/test_svd.py +38 -0
- sknetwork/linkpred/__init__.py +4 -0
- sknetwork/linkpred/base.py +80 -0
- sknetwork/linkpred/first_order.py +508 -0
- sknetwork/linkpred/first_order_core.cpython-39-darwin.so +0 -0
- sknetwork/linkpred/first_order_core.pyx +315 -0
- sknetwork/linkpred/postprocessing.py +98 -0
- sknetwork/linkpred/tests/__init__.py +1 -0
- sknetwork/linkpred/tests/test_API.py +49 -0
- sknetwork/linkpred/tests/test_postprocessing.py +21 -0
- sknetwork/path/__init__.py +4 -0
- sknetwork/path/metrics.py +148 -0
- sknetwork/path/search.py +65 -0
- sknetwork/path/shortest_path.py +186 -0
- sknetwork/path/tests/__init__.py +1 -0
- sknetwork/path/tests/test_metrics.py +29 -0
- sknetwork/path/tests/test_search.py +25 -0
- sknetwork/path/tests/test_shortest_path.py +45 -0
- sknetwork/ranking/__init__.py +9 -0
- sknetwork/ranking/base.py +56 -0
- sknetwork/ranking/betweenness.cpython-39-darwin.so +0 -0
- sknetwork/ranking/betweenness.pyx +99 -0
- sknetwork/ranking/closeness.py +95 -0
- sknetwork/ranking/harmonic.py +82 -0
- sknetwork/ranking/hits.py +94 -0
- sknetwork/ranking/katz.py +81 -0
- sknetwork/ranking/pagerank.py +107 -0
- sknetwork/ranking/postprocess.py +25 -0
- sknetwork/ranking/tests/__init__.py +1 -0
- sknetwork/ranking/tests/test_API.py +34 -0
- sknetwork/ranking/tests/test_betweenness.py +38 -0
- sknetwork/ranking/tests/test_closeness.py +34 -0
- sknetwork/ranking/tests/test_hits.py +20 -0
- sknetwork/ranking/tests/test_pagerank.py +69 -0
- sknetwork/regression/__init__.py +4 -0
- sknetwork/regression/base.py +56 -0
- sknetwork/regression/diffusion.py +190 -0
- sknetwork/regression/tests/__init__.py +1 -0
- sknetwork/regression/tests/test_API.py +34 -0
- sknetwork/regression/tests/test_diffusion.py +48 -0
- sknetwork/sknetwork.py +3 -0
- sknetwork/topology/__init__.py +9 -0
- sknetwork/topology/dag.py +74 -0
- sknetwork/topology/dag_core.cpython-39-darwin.so +0 -0
- sknetwork/topology/dag_core.pyx +38 -0
- sknetwork/topology/kcliques.cpython-39-darwin.so +0 -0
- sknetwork/topology/kcliques.pyx +193 -0
- sknetwork/topology/kcore.cpython-39-darwin.so +0 -0
- sknetwork/topology/kcore.pyx +120 -0
- sknetwork/topology/structure.py +234 -0
- sknetwork/topology/tests/__init__.py +1 -0
- sknetwork/topology/tests/test_cliques.py +28 -0
- sknetwork/topology/tests/test_cores.py +21 -0
- sknetwork/topology/tests/test_dag.py +26 -0
- sknetwork/topology/tests/test_structure.py +99 -0
- sknetwork/topology/tests/test_triangles.py +42 -0
- sknetwork/topology/tests/test_wl_coloring.py +49 -0
- sknetwork/topology/tests/test_wl_kernel.py +31 -0
- sknetwork/topology/triangles.cpython-39-darwin.so +0 -0
- sknetwork/topology/triangles.pyx +166 -0
- sknetwork/topology/weisfeiler_lehman.py +163 -0
- sknetwork/topology/weisfeiler_lehman_core.cpython-39-darwin.so +0 -0
- sknetwork/topology/weisfeiler_lehman_core.pyx +116 -0
- sknetwork/utils/__init__.py +40 -0
- sknetwork/utils/base.py +35 -0
- sknetwork/utils/check.py +354 -0
- sknetwork/utils/co_neighbor.py +71 -0
- sknetwork/utils/format.py +219 -0
- sknetwork/utils/kmeans.py +89 -0
- sknetwork/utils/knn.py +166 -0
- sknetwork/utils/knn1d.cpython-39-darwin.so +0 -0
- sknetwork/utils/knn1d.pyx +80 -0
- sknetwork/utils/membership.py +82 -0
- sknetwork/utils/minheap.cpython-39-darwin.so +0 -0
- sknetwork/utils/minheap.pxd +22 -0
- sknetwork/utils/minheap.pyx +111 -0
- sknetwork/utils/neighbors.py +115 -0
- sknetwork/utils/seeds.py +75 -0
- sknetwork/utils/simplex.py +140 -0
- sknetwork/utils/tests/__init__.py +1 -0
- sknetwork/utils/tests/test_base.py +28 -0
- sknetwork/utils/tests/test_bunch.py +16 -0
- sknetwork/utils/tests/test_check.py +190 -0
- sknetwork/utils/tests/test_co_neighbor.py +43 -0
- sknetwork/utils/tests/test_format.py +61 -0
- sknetwork/utils/tests/test_kmeans.py +21 -0
- sknetwork/utils/tests/test_knn.py +32 -0
- sknetwork/utils/tests/test_membership.py +24 -0
- sknetwork/utils/tests/test_neighbors.py +41 -0
- sknetwork/utils/tests/test_projection_simplex.py +33 -0
- sknetwork/utils/tests/test_seeds.py +67 -0
- sknetwork/utils/tests/test_verbose.py +15 -0
- sknetwork/utils/tests/test_ward.py +20 -0
- sknetwork/utils/timeout.py +38 -0
- sknetwork/utils/verbose.py +37 -0
- sknetwork/utils/ward.py +60 -0
- sknetwork/visualization/__init__.py +4 -0
- sknetwork/visualization/colors.py +34 -0
- sknetwork/visualization/dendrograms.py +229 -0
- sknetwork/visualization/graphs.py +819 -0
- sknetwork/visualization/tests/__init__.py +1 -0
- sknetwork/visualization/tests/test_dendrograms.py +53 -0
- sknetwork/visualization/tests/test_graphs.py +167 -0
sknetwork/gnn/utils.py
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# coding: utf-8
|
|
3
|
+
"""
|
|
4
|
+
Created on Thu Apr 21 2022
|
|
5
|
+
@author: Simon Delarue <sdelarue@enst.fr>
|
|
6
|
+
"""
|
|
7
|
+
import inspect
|
|
8
|
+
from typing import Union, Optional, Tuple
|
|
9
|
+
import warnings
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
|
|
13
|
+
from sknetwork.gnn.base_activation import BaseActivation, BaseLoss
|
|
14
|
+
from sknetwork.gnn.base_layer import BaseLayer
|
|
15
|
+
from sknetwork.gnn.layer import get_layer
|
|
16
|
+
from sknetwork.gnn.loss import BinaryCrossEntropy, CrossEntropy
|
|
17
|
+
from sknetwork.utils.check import check_is_proba, check_boolean, check_labels
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def filter_mask(mask: np.ndarray, proportion: Optional[float]):
|
|
21
|
+
"""Filter a boolean mask so that the proportion of ones does not exceed some target.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
mask : np.ndarray
|
|
26
|
+
Boolean mask
|
|
27
|
+
proportion : float
|
|
28
|
+
Target proportion of ones.
|
|
29
|
+
Returns
|
|
30
|
+
-------
|
|
31
|
+
mask_filter : np.ndarray
|
|
32
|
+
New boolean mask
|
|
33
|
+
"""
|
|
34
|
+
n_ones = sum(mask)
|
|
35
|
+
if n_ones:
|
|
36
|
+
if proportion:
|
|
37
|
+
ratio = proportion * len(mask) / n_ones
|
|
38
|
+
mask[mask] = np.random.random(n_ones) <= ratio
|
|
39
|
+
else:
|
|
40
|
+
mask = np.zeros_like(mask, dtype=bool)
|
|
41
|
+
return mask
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def check_existing_masks(labels: np.ndarray, train_mask: Optional[np.ndarray] = None,
|
|
45
|
+
val_mask: Optional[np.ndarray] = None, test_mask: Optional[np.ndarray] = None,
|
|
46
|
+
train_size: Optional[float] = None, val_size: Optional[float] = None,
|
|
47
|
+
test_size: Optional[float] = None) -> Tuple:
|
|
48
|
+
"""Check mask parameters and return mask boolean arrays.
|
|
49
|
+
|
|
50
|
+
Parameters
|
|
51
|
+
----------
|
|
52
|
+
labels: np.ndarray
|
|
53
|
+
Label vectors of length :math:`n`, with :math:`n` the number of nodes in `adjacency`. Labels set to `-1`
|
|
54
|
+
will not be considered for training steps.
|
|
55
|
+
train_mask, val_mask, test_mask: np.ndarray
|
|
56
|
+
Boolean arrays indicating whether nodes are in training/validation/test set.
|
|
57
|
+
train_size, val_size, test_size: float
|
|
58
|
+
Proportion of the nodes in the training/validation/test set (between 0 and 1).
|
|
59
|
+
Only used if when corresponding masks are ``None``.
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
Tuple containing:
|
|
64
|
+
* ``True`` if training mask is provided
|
|
65
|
+
* training, validation and test masks w.r.t values in `labels`.
|
|
66
|
+
"""
|
|
67
|
+
_, _ = check_labels(labels)
|
|
68
|
+
|
|
69
|
+
is_negative_labels = labels < 0
|
|
70
|
+
|
|
71
|
+
if train_mask is not None:
|
|
72
|
+
check_boolean(train_mask)
|
|
73
|
+
train_mask_filtered = np.logical_and(train_mask, ~is_negative_labels)
|
|
74
|
+
check_mask_similarity(train_mask, train_mask_filtered)
|
|
75
|
+
train_mask = train_mask_filtered
|
|
76
|
+
if test_mask is not None:
|
|
77
|
+
check_boolean(test_mask)
|
|
78
|
+
if val_mask is not None:
|
|
79
|
+
check_boolean(val_mask)
|
|
80
|
+
val_mask_filtered = np.logical_and(val_mask, ~is_negative_labels)
|
|
81
|
+
check_mask_similarity(val_mask, val_mask_filtered)
|
|
82
|
+
val_mask = val_mask_filtered
|
|
83
|
+
if (train_mask & val_mask & test_mask).any():
|
|
84
|
+
raise ValueError('Masks are overlapping. Please change masks.')
|
|
85
|
+
else:
|
|
86
|
+
val_mask = np.logical_and(~train_mask, ~test_mask)
|
|
87
|
+
val_mask = np.logical_and(val_mask, ~is_negative_labels)
|
|
88
|
+
else:
|
|
89
|
+
if val_mask is None:
|
|
90
|
+
val_mask = filter_mask(~train_mask, val_size)
|
|
91
|
+
val_mask = np.logical_and(val_mask, ~is_negative_labels)
|
|
92
|
+
test_mask = np.logical_and(~train_mask, ~val_mask)
|
|
93
|
+
return True, train_mask, val_mask, test_mask
|
|
94
|
+
else:
|
|
95
|
+
if train_size is None and test_size is None:
|
|
96
|
+
raise ValueError('Either mask parameters or size parameters should be different from None.')
|
|
97
|
+
for size in [train_size, test_size, val_size]:
|
|
98
|
+
if size is not None:
|
|
99
|
+
check_is_proba(size)
|
|
100
|
+
return False, ~is_negative_labels, None, is_negative_labels
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def check_mask_similarity(mask_1: np.ndarray, mask_2: np.ndarray):
|
|
104
|
+
"""Print warning if two mask arrays are different."""
|
|
105
|
+
if any(mask_1 != mask_2):
|
|
106
|
+
warnings.warn('Nodes with label "-1" are considered in the train set or the validation set.')
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def check_early_stopping(early_stopping: bool, val_mask: np.ndarray, patience: int):
|
|
110
|
+
"""Check early stopping parameters."""
|
|
111
|
+
if val_mask is None or patience is None or not any(val_mask):
|
|
112
|
+
return False
|
|
113
|
+
else:
|
|
114
|
+
return early_stopping
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def check_normalizations(normalizations: Union[str, list]):
|
|
118
|
+
"""Check if normalization is known."""
|
|
119
|
+
available_norms = ['left', 'right', 'both']
|
|
120
|
+
if isinstance(normalizations, list):
|
|
121
|
+
for normalization in normalizations:
|
|
122
|
+
if normalization.lower() not in available_norms:
|
|
123
|
+
raise ValueError("Normalization must be 'left', 'right' or 'both'.")
|
|
124
|
+
elif normalizations.lower() not in available_norms:
|
|
125
|
+
raise ValueError("Normalization must be 'left', 'right' or 'both'.")
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def check_output(n_channels: int, labels: np.ndarray):
|
|
129
|
+
"""Check the output of the GNN.
|
|
130
|
+
|
|
131
|
+
Parameters
|
|
132
|
+
----------
|
|
133
|
+
n_channels : int
|
|
134
|
+
Number of output channels
|
|
135
|
+
labels : np.ndarray
|
|
136
|
+
Vector of labels
|
|
137
|
+
"""
|
|
138
|
+
n_labels = len(set(labels[labels >= 0]))
|
|
139
|
+
if n_labels > 2 and n_labels > n_channels:
|
|
140
|
+
raise ValueError("The dimension of the output is too small for the number of labels. "
|
|
141
|
+
"Please check the `dims` parameter of your GNN or the `labels` parameter.")
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def check_param(param, length):
|
|
145
|
+
"""Check the length of a parameter if a list.
|
|
146
|
+
"""
|
|
147
|
+
if not isinstance(param, list):
|
|
148
|
+
param = length * [param]
|
|
149
|
+
elif len(param) != length:
|
|
150
|
+
raise ValueError('The number of parameters must be equal to the number of layers.')
|
|
151
|
+
return param
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def check_loss(layer: BaseLayer):
|
|
155
|
+
"""Check the length of a parameter if a list.
|
|
156
|
+
"""
|
|
157
|
+
if not issubclass(type(layer.activation), BaseLoss):
|
|
158
|
+
raise ValueError('No loss specified for the last layer.')
|
|
159
|
+
if isinstance(layer.activation, CrossEntropy) and layer.out_channels == 1:
|
|
160
|
+
layer.activation = BinaryCrossEntropy()
|
|
161
|
+
return layer.activation
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def get_layers(dims: Union[int, list], layer_types: Union[str, BaseLayer, list],
|
|
165
|
+
activations: Union[str, BaseActivation, list], use_bias: Union[bool, list],
|
|
166
|
+
normalizations: Union[str, list], self_embeddings: Union[bool, list], sample_sizes: Union[int, list],
|
|
167
|
+
loss: Union[str, BaseLoss]) -> list:
|
|
168
|
+
"""Get the list of layers.
|
|
169
|
+
|
|
170
|
+
Parameters
|
|
171
|
+
----------
|
|
172
|
+
dims :
|
|
173
|
+
Dimensions of layers (in forward direction).
|
|
174
|
+
layer_types :
|
|
175
|
+
Layer types.
|
|
176
|
+
activations :
|
|
177
|
+
Activation functions.
|
|
178
|
+
use_bias :
|
|
179
|
+
``True`` if a bias vector is added.
|
|
180
|
+
normalizations :
|
|
181
|
+
Normalizations of adjacency matrix.
|
|
182
|
+
self_embeddings :
|
|
183
|
+
``True`` if self embeddings are added. Allowed input are booleans and lists.
|
|
184
|
+
sample_sizes
|
|
185
|
+
Size of neighborhood sampled for each node.
|
|
186
|
+
loss :
|
|
187
|
+
Loss function.
|
|
188
|
+
|
|
189
|
+
Returns
|
|
190
|
+
-------
|
|
191
|
+
list
|
|
192
|
+
List of layers.
|
|
193
|
+
"""
|
|
194
|
+
check_normalizations(normalizations)
|
|
195
|
+
|
|
196
|
+
if not isinstance(dims, list):
|
|
197
|
+
dims = [dims]
|
|
198
|
+
n_layers = len(dims)
|
|
199
|
+
|
|
200
|
+
layer_types = check_param(layer_types, n_layers)
|
|
201
|
+
activations = check_param(activations, n_layers)
|
|
202
|
+
use_bias = check_param(use_bias, n_layers)
|
|
203
|
+
normalizations = check_param(normalizations, n_layers)
|
|
204
|
+
self_embeddings = check_param(self_embeddings, n_layers)
|
|
205
|
+
sample_sizes = check_param(sample_sizes, n_layers)
|
|
206
|
+
|
|
207
|
+
layers = []
|
|
208
|
+
names_params = ['layer', 'out_channels', 'activation', 'use_bias', 'normalization', 'self_embeddings',
|
|
209
|
+
'sample_size']
|
|
210
|
+
for i in range(n_layers):
|
|
211
|
+
params = [layer_types[i], dims[i], activations[i], use_bias[i], normalizations[i], self_embeddings[i],
|
|
212
|
+
sample_sizes[i]]
|
|
213
|
+
if i == n_layers - 1:
|
|
214
|
+
params.append(loss)
|
|
215
|
+
names_params.append('loss')
|
|
216
|
+
dict_params = dict(zip(names_params, params))
|
|
217
|
+
layers.append(get_layer(**dict_params))
|
|
218
|
+
|
|
219
|
+
return layers
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"""hierarchy module"""
|
|
2
|
+
from sknetwork.hierarchy.paris import Paris
|
|
3
|
+
from sknetwork.hierarchy.base import BaseHierarchy
|
|
4
|
+
from sknetwork.hierarchy.louvain_hierarchy import LouvainIteration, LouvainHierarchy
|
|
5
|
+
from sknetwork.hierarchy.metrics import dasgupta_cost, dasgupta_score, tree_sampling_divergence
|
|
6
|
+
from sknetwork.hierarchy.postprocess import cut_straight, cut_balanced, aggregate_dendrogram, reorder_dendrogram
|
|
7
|
+
from sknetwork.hierarchy.ward import Ward
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created on November 2019
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
"""
|
|
7
|
+
from abc import ABC
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
from sknetwork.hierarchy.postprocess import split_dendrogram
|
|
12
|
+
from sknetwork.utils.base import Algorithm
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class BaseHierarchy(Algorithm, ABC):
|
|
16
|
+
"""Base class for hierarchical clustering algorithms.
|
|
17
|
+
Attributes
|
|
18
|
+
----------
|
|
19
|
+
dendrogram_ :
|
|
20
|
+
Dendrogram of the graph.
|
|
21
|
+
dendrogram_row_ :
|
|
22
|
+
Dendrogram for the rows, for bipartite graphs.
|
|
23
|
+
dendrogram_col_ :
|
|
24
|
+
Dendrogram for the columns, for bipartite graphs.
|
|
25
|
+
dendrogram_full_ :
|
|
26
|
+
Dendrogram for both rows and columns, indexed in this order, for bipartite graphs.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self):
|
|
30
|
+
self._init_vars()
|
|
31
|
+
|
|
32
|
+
def fit_predict(self, *args, **kwargs) -> np.ndarray:
|
|
33
|
+
"""Fit algorithm to data and return the dendrogram. Same parameters as the ``fit`` method.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
dendrogram : np.ndarray
|
|
38
|
+
Dendrogram.
|
|
39
|
+
"""
|
|
40
|
+
self.fit(*args, **kwargs)
|
|
41
|
+
return self.dendrogram_
|
|
42
|
+
|
|
43
|
+
def fit_transform(self, *args, **kwargs) -> np.ndarray:
|
|
44
|
+
"""Fit algorithm to data and return the dendrogram. Alias for ``fit_predict``.
|
|
45
|
+
Same parameters as the ``fit`` method.
|
|
46
|
+
|
|
47
|
+
Returns
|
|
48
|
+
-------
|
|
49
|
+
dendrogram : np.ndarray
|
|
50
|
+
Dendrogram.
|
|
51
|
+
"""
|
|
52
|
+
self.fit(*args, **kwargs)
|
|
53
|
+
return self.dendrogram_
|
|
54
|
+
|
|
55
|
+
def _init_vars(self):
|
|
56
|
+
"""Init variables."""
|
|
57
|
+
self.dendrogram_ = None
|
|
58
|
+
self.dendrogram_row_ = None
|
|
59
|
+
self.dendrogram_col_ = None
|
|
60
|
+
self.dendrogram_full_ = None
|
|
61
|
+
|
|
62
|
+
def _split_vars(self, shape):
|
|
63
|
+
"""Split variables."""
|
|
64
|
+
dendrogram_row, dendrogram_col = split_dendrogram(self.dendrogram_, shape)
|
|
65
|
+
self.dendrogram_full_ = self.dendrogram_
|
|
66
|
+
self.dendrogram_ = dendrogram_row
|
|
67
|
+
self.dendrogram_row_ = dendrogram_row
|
|
68
|
+
self.dendrogram_col_ = dendrogram_col
|
|
69
|
+
return self
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created in March 2020
|
|
5
|
+
@author: Quentin Lutz <qlutz@enst.fr>
|
|
6
|
+
@author: Thomas Bonald <tbonald@enst.fr>
|
|
7
|
+
"""
|
|
8
|
+
from typing import Optional, Union
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from scipy import sparse
|
|
12
|
+
|
|
13
|
+
from sknetwork.clustering.louvain import Louvain
|
|
14
|
+
from sknetwork.hierarchy.base import BaseHierarchy
|
|
15
|
+
from sknetwork.hierarchy.postprocess import get_dendrogram, reorder_dendrogram
|
|
16
|
+
from sknetwork.utils.check import check_format
|
|
17
|
+
from sknetwork.utils.format import get_adjacency
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class LouvainIteration(BaseHierarchy):
|
|
21
|
+
"""Hierarchical clustering by successive instances of Louvain (top-down).
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
depth :
|
|
26
|
+
Depth of the tree.
|
|
27
|
+
A negative value is interpreted as no limit (return a tree of maximum depth).
|
|
28
|
+
resolution :
|
|
29
|
+
Resolution parameter.
|
|
30
|
+
tol_optimization :
|
|
31
|
+
Minimum increase in the objective function to enter a new optimization pass.
|
|
32
|
+
tol_aggregation :
|
|
33
|
+
Minimum increase in the objective function to enter a new aggregation pass.
|
|
34
|
+
n_aggregations :
|
|
35
|
+
Maximum number of aggregations.
|
|
36
|
+
A negative value is interpreted as no limit.
|
|
37
|
+
shuffle_nodes :
|
|
38
|
+
Enables node shuffling before optimization.
|
|
39
|
+
random_state :
|
|
40
|
+
Random number generator or random seed. If ``None``, numpy.random is used.
|
|
41
|
+
verbose :
|
|
42
|
+
Verbose mode.
|
|
43
|
+
|
|
44
|
+
Attributes
|
|
45
|
+
----------
|
|
46
|
+
dendrogram_ :
|
|
47
|
+
Dendrogram of the graph.
|
|
48
|
+
dendrogram_row_ :
|
|
49
|
+
Dendrogram for the rows, for bipartite graphs.
|
|
50
|
+
dendrogram_col_ :
|
|
51
|
+
Dendrogram for the columns, for bipartite graphs.
|
|
52
|
+
dendrogram_full_ :
|
|
53
|
+
Dendrogram for both rows and columns, indexed in this order, for bipartite graphs.
|
|
54
|
+
|
|
55
|
+
Example
|
|
56
|
+
-------
|
|
57
|
+
>>> from sknetwork.hierarchy import LouvainIteration
|
|
58
|
+
>>> from sknetwork.data import house
|
|
59
|
+
>>> louvain = LouvainIteration()
|
|
60
|
+
>>> adjacency = house()
|
|
61
|
+
>>> louvain.fit_predict(adjacency)
|
|
62
|
+
array([[3., 2., 0., 2.],
|
|
63
|
+
[4., 1., 0., 2.],
|
|
64
|
+
[6., 0., 0., 3.],
|
|
65
|
+
[5., 7., 1., 5.]])
|
|
66
|
+
|
|
67
|
+
Notes
|
|
68
|
+
-----
|
|
69
|
+
Each row of the dendrogram = merge nodes, distance, size of cluster.
|
|
70
|
+
|
|
71
|
+
See Also
|
|
72
|
+
--------
|
|
73
|
+
scipy.cluster.hierarchy.dendrogram
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
def __init__(self, depth: int = 3, resolution: float = 1, tol_optimization: float = 1e-3,
|
|
77
|
+
tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
|
|
78
|
+
random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False):
|
|
79
|
+
super(LouvainIteration, self).__init__()
|
|
80
|
+
|
|
81
|
+
self.dendrogram_ = None
|
|
82
|
+
self.depth = depth
|
|
83
|
+
self._clustering_method = Louvain(resolution=resolution, tol_optimization=tol_optimization,
|
|
84
|
+
tol_aggregation=tol_aggregation, n_aggregations=n_aggregations,
|
|
85
|
+
shuffle_nodes=shuffle_nodes, random_state=random_state, verbose=verbose)
|
|
86
|
+
self.bipartite = None
|
|
87
|
+
|
|
88
|
+
def _recursive_louvain(self, adjacency: Union[sparse.csr_matrix, np.ndarray], depth: int,
|
|
89
|
+
nodes: Optional[np.ndarray] = None):
|
|
90
|
+
"""Recursive function for fit.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
adjacency :
|
|
95
|
+
Adjacency matrix of the graph.
|
|
96
|
+
depth :
|
|
97
|
+
Depth of the recursion.
|
|
98
|
+
nodes :
|
|
99
|
+
The indices of the current nodes in the original graph.
|
|
100
|
+
|
|
101
|
+
Returns
|
|
102
|
+
-------
|
|
103
|
+
tree: recursive list of list of nodes.
|
|
104
|
+
"""
|
|
105
|
+
n = adjacency.shape[0]
|
|
106
|
+
if nodes is None:
|
|
107
|
+
nodes = np.arange(n)
|
|
108
|
+
|
|
109
|
+
if adjacency.nnz and depth:
|
|
110
|
+
labels = self._clustering_method.fit_transform(adjacency)
|
|
111
|
+
else:
|
|
112
|
+
labels = np.zeros(n)
|
|
113
|
+
|
|
114
|
+
clusters = np.unique(labels)
|
|
115
|
+
|
|
116
|
+
tree = []
|
|
117
|
+
if len(clusters) == 1:
|
|
118
|
+
if len(nodes) > 1:
|
|
119
|
+
return [[node] for node in nodes]
|
|
120
|
+
else:
|
|
121
|
+
return [nodes[0]]
|
|
122
|
+
else:
|
|
123
|
+
for cluster in clusters:
|
|
124
|
+
mask = (labels == cluster)
|
|
125
|
+
nodes_cluster = nodes[mask]
|
|
126
|
+
adjacency_cluster = adjacency[mask, :][:, mask]
|
|
127
|
+
tree.append(self._recursive_louvain(adjacency_cluster, depth - 1, nodes_cluster))
|
|
128
|
+
return tree
|
|
129
|
+
|
|
130
|
+
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> 'LouvainIteration':
|
|
131
|
+
"""Fit algorithm to data.
|
|
132
|
+
|
|
133
|
+
Parameters
|
|
134
|
+
----------
|
|
135
|
+
input_matrix :
|
|
136
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
137
|
+
|
|
138
|
+
Returns
|
|
139
|
+
-------
|
|
140
|
+
self: :class:`LouvainIteration`
|
|
141
|
+
"""
|
|
142
|
+
self._init_vars()
|
|
143
|
+
input_matrix = check_format(input_matrix)
|
|
144
|
+
adjacency, self.bipartite = get_adjacency(input_matrix)
|
|
145
|
+
tree = self._recursive_louvain(adjacency, self.depth)
|
|
146
|
+
dendrogram, _ = get_dendrogram(tree)
|
|
147
|
+
dendrogram = np.array(dendrogram)
|
|
148
|
+
dendrogram[:, 2] -= min(dendrogram[:, 2])
|
|
149
|
+
self.dendrogram_ = reorder_dendrogram(dendrogram)
|
|
150
|
+
if self.bipartite:
|
|
151
|
+
self._split_vars(input_matrix.shape)
|
|
152
|
+
return self
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class LouvainHierarchy(BaseHierarchy):
|
|
156
|
+
"""Hierarchical clustering by Louvain (bottom-up).
|
|
157
|
+
|
|
158
|
+
Parameters
|
|
159
|
+
----------
|
|
160
|
+
resolution :
|
|
161
|
+
Resolution parameter.
|
|
162
|
+
tol_optimization :
|
|
163
|
+
Minimum increase in the objective function to enter a new optimization pass.
|
|
164
|
+
tol_aggregation :
|
|
165
|
+
Minimum increase in the objective function to enter a new aggregation pass.
|
|
166
|
+
shuffle_nodes :
|
|
167
|
+
Enables node shuffling before optimization.
|
|
168
|
+
random_state :
|
|
169
|
+
Random number generator or random seed. If ``None``, numpy.random is used.
|
|
170
|
+
verbose :
|
|
171
|
+
Verbose mode.
|
|
172
|
+
|
|
173
|
+
Attributes
|
|
174
|
+
----------
|
|
175
|
+
dendrogram_ :
|
|
176
|
+
Dendrogram of the graph.
|
|
177
|
+
dendrogram_row_ :
|
|
178
|
+
Dendrogram for the rows, for bipartite graphs.
|
|
179
|
+
dendrogram_col_ :
|
|
180
|
+
Dendrogram for the columns, for bipartite graphs.
|
|
181
|
+
dendrogram_full_ :
|
|
182
|
+
Dendrogram for both rows and columns, indexed in this order, for bipartite graphs.
|
|
183
|
+
|
|
184
|
+
Example
|
|
185
|
+
-------
|
|
186
|
+
>>> from sknetwork.hierarchy import LouvainHierarchy
|
|
187
|
+
>>> from sknetwork.data import house
|
|
188
|
+
>>> louvain = LouvainHierarchy()
|
|
189
|
+
>>> adjacency = house()
|
|
190
|
+
>>> louvain.fit_predict(adjacency)
|
|
191
|
+
array([[3., 2., 0., 2.],
|
|
192
|
+
[4., 1., 0., 2.],
|
|
193
|
+
[6., 0., 0., 3.],
|
|
194
|
+
[5., 7., 1., 5.]])
|
|
195
|
+
|
|
196
|
+
Notes
|
|
197
|
+
-----
|
|
198
|
+
Each row of the dendrogram = merge nodes, distance, size of cluster.
|
|
199
|
+
|
|
200
|
+
See Also
|
|
201
|
+
--------
|
|
202
|
+
scipy.cluster.hierarchy.dendrogram
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
def __init__(self, resolution: float = 1, tol_optimization: float = 1e-3,
|
|
206
|
+
tol_aggregation: float = 1e-3, shuffle_nodes: bool = False,
|
|
207
|
+
random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False):
|
|
208
|
+
super(LouvainHierarchy, self).__init__()
|
|
209
|
+
|
|
210
|
+
self.dendrogram_ = None
|
|
211
|
+
self._clustering_method = Louvain(resolution=resolution, tol_optimization=tol_optimization,
|
|
212
|
+
tol_aggregation=tol_aggregation, n_aggregations=1,
|
|
213
|
+
shuffle_nodes=shuffle_nodes, random_state=random_state, verbose=verbose)
|
|
214
|
+
self.bipartite = None
|
|
215
|
+
|
|
216
|
+
def _get_hierarchy(self, adjacency: Union[sparse.csr_matrix, np.ndarray]):
|
|
217
|
+
"""Get the hierarchy from Louvain.
|
|
218
|
+
|
|
219
|
+
Parameters
|
|
220
|
+
----------
|
|
221
|
+
adjacency :
|
|
222
|
+
Adjacency matrix of the graph.
|
|
223
|
+
|
|
224
|
+
Returns
|
|
225
|
+
-------
|
|
226
|
+
tree: recursive list of list of nodes
|
|
227
|
+
"""
|
|
228
|
+
tree = [[node] for node in range(adjacency.shape[0])]
|
|
229
|
+
labels = self._clustering_method.fit_transform(adjacency)
|
|
230
|
+
labels_unique = np.unique(labels)
|
|
231
|
+
while 1:
|
|
232
|
+
tree = [[tree[node] for node in np.flatnonzero(labels == label)] for label in labels_unique]
|
|
233
|
+
tree = [cluster[0] if len(cluster) == 1 else cluster for cluster in tree]
|
|
234
|
+
aggregate = self._clustering_method.aggregate_
|
|
235
|
+
labels = self._clustering_method.fit_transform(aggregate)
|
|
236
|
+
if len(labels_unique) == len(np.unique(labels)):
|
|
237
|
+
break
|
|
238
|
+
else:
|
|
239
|
+
labels_unique = np.unique(labels)
|
|
240
|
+
return tree
|
|
241
|
+
|
|
242
|
+
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> 'LouvainHierarchy':
|
|
243
|
+
"""Fit algorithm to data.
|
|
244
|
+
|
|
245
|
+
Parameters
|
|
246
|
+
----------
|
|
247
|
+
input_matrix :
|
|
248
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
249
|
+
|
|
250
|
+
Returns
|
|
251
|
+
-------
|
|
252
|
+
self: :class:`LouvainIteration`
|
|
253
|
+
"""
|
|
254
|
+
self._init_vars()
|
|
255
|
+
input_matrix = check_format(input_matrix)
|
|
256
|
+
adjacency, self.bipartite = get_adjacency(input_matrix)
|
|
257
|
+
tree = self._get_hierarchy(adjacency)
|
|
258
|
+
dendrogram, _ = get_dendrogram(tree)
|
|
259
|
+
dendrogram = np.array(dendrogram)
|
|
260
|
+
dendrogram[:, 2] -= min(dendrogram[:, 2])
|
|
261
|
+
self.dendrogram_ = reorder_dendrogram(dendrogram)
|
|
262
|
+
if self.bipartite:
|
|
263
|
+
self._split_vars(input_matrix.shape)
|
|
264
|
+
return self
|