scikit-network 0.33.3__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- scikit_network-0.33.3.dist-info/METADATA +122 -0
- scikit_network-0.33.3.dist-info/RECORD +228 -0
- scikit_network-0.33.3.dist-info/WHEEL +5 -0
- scikit_network-0.33.3.dist-info/licenses/AUTHORS.rst +43 -0
- scikit_network-0.33.3.dist-info/licenses/LICENSE +34 -0
- scikit_network-0.33.3.dist-info/top_level.txt +1 -0
- sknetwork/__init__.py +21 -0
- sknetwork/base.py +67 -0
- sknetwork/classification/__init__.py +8 -0
- sknetwork/classification/base.py +142 -0
- sknetwork/classification/base_rank.py +133 -0
- sknetwork/classification/diffusion.py +134 -0
- sknetwork/classification/knn.py +139 -0
- sknetwork/classification/metrics.py +205 -0
- sknetwork/classification/pagerank.py +66 -0
- sknetwork/classification/propagation.py +152 -0
- sknetwork/classification/tests/__init__.py +1 -0
- sknetwork/classification/tests/test_API.py +30 -0
- sknetwork/classification/tests/test_diffusion.py +77 -0
- sknetwork/classification/tests/test_knn.py +23 -0
- sknetwork/classification/tests/test_metrics.py +53 -0
- sknetwork/classification/tests/test_pagerank.py +20 -0
- sknetwork/classification/tests/test_propagation.py +24 -0
- sknetwork/classification/vote.cp313-win_amd64.pyd +0 -0
- sknetwork/classification/vote.cpp +27584 -0
- sknetwork/classification/vote.pyx +56 -0
- sknetwork/clustering/__init__.py +8 -0
- sknetwork/clustering/base.py +172 -0
- sknetwork/clustering/kcenters.py +253 -0
- sknetwork/clustering/leiden.py +242 -0
- sknetwork/clustering/leiden_core.cp313-win_amd64.pyd +0 -0
- sknetwork/clustering/leiden_core.cpp +31575 -0
- sknetwork/clustering/leiden_core.pyx +124 -0
- sknetwork/clustering/louvain.py +286 -0
- sknetwork/clustering/louvain_core.cp313-win_amd64.pyd +0 -0
- sknetwork/clustering/louvain_core.cpp +31220 -0
- sknetwork/clustering/louvain_core.pyx +124 -0
- sknetwork/clustering/metrics.py +91 -0
- sknetwork/clustering/postprocess.py +66 -0
- sknetwork/clustering/propagation_clustering.py +104 -0
- sknetwork/clustering/tests/__init__.py +1 -0
- sknetwork/clustering/tests/test_API.py +38 -0
- sknetwork/clustering/tests/test_kcenters.py +60 -0
- sknetwork/clustering/tests/test_leiden.py +34 -0
- sknetwork/clustering/tests/test_louvain.py +135 -0
- sknetwork/clustering/tests/test_metrics.py +50 -0
- sknetwork/clustering/tests/test_postprocess.py +39 -0
- sknetwork/data/__init__.py +6 -0
- sknetwork/data/base.py +33 -0
- sknetwork/data/load.py +406 -0
- sknetwork/data/models.py +459 -0
- sknetwork/data/parse.py +644 -0
- sknetwork/data/test_graphs.py +84 -0
- sknetwork/data/tests/__init__.py +1 -0
- sknetwork/data/tests/test_API.py +30 -0
- sknetwork/data/tests/test_base.py +14 -0
- sknetwork/data/tests/test_load.py +95 -0
- sknetwork/data/tests/test_models.py +52 -0
- sknetwork/data/tests/test_parse.py +250 -0
- sknetwork/data/tests/test_test_graphs.py +29 -0
- sknetwork/data/tests/test_toy_graphs.py +68 -0
- sknetwork/data/timeout.py +38 -0
- sknetwork/data/toy_graphs.py +611 -0
- sknetwork/embedding/__init__.py +8 -0
- sknetwork/embedding/base.py +94 -0
- sknetwork/embedding/force_atlas.py +198 -0
- sknetwork/embedding/louvain_embedding.py +148 -0
- sknetwork/embedding/random_projection.py +135 -0
- sknetwork/embedding/spectral.py +141 -0
- sknetwork/embedding/spring.py +198 -0
- sknetwork/embedding/svd.py +359 -0
- sknetwork/embedding/tests/__init__.py +1 -0
- sknetwork/embedding/tests/test_API.py +49 -0
- sknetwork/embedding/tests/test_force_atlas.py +35 -0
- sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
- sknetwork/embedding/tests/test_random_projection.py +28 -0
- sknetwork/embedding/tests/test_spectral.py +81 -0
- sknetwork/embedding/tests/test_spring.py +50 -0
- sknetwork/embedding/tests/test_svd.py +43 -0
- sknetwork/gnn/__init__.py +10 -0
- sknetwork/gnn/activation.py +117 -0
- sknetwork/gnn/base.py +181 -0
- sknetwork/gnn/base_activation.py +90 -0
- sknetwork/gnn/base_layer.py +109 -0
- sknetwork/gnn/gnn_classifier.py +305 -0
- sknetwork/gnn/layer.py +153 -0
- sknetwork/gnn/loss.py +180 -0
- sknetwork/gnn/neighbor_sampler.py +65 -0
- sknetwork/gnn/optimizer.py +164 -0
- sknetwork/gnn/tests/__init__.py +1 -0
- sknetwork/gnn/tests/test_activation.py +56 -0
- sknetwork/gnn/tests/test_base.py +75 -0
- sknetwork/gnn/tests/test_base_layer.py +37 -0
- sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
- sknetwork/gnn/tests/test_layers.py +80 -0
- sknetwork/gnn/tests/test_loss.py +33 -0
- sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
- sknetwork/gnn/tests/test_optimizer.py +43 -0
- sknetwork/gnn/tests/test_utils.py +41 -0
- sknetwork/gnn/utils.py +127 -0
- sknetwork/hierarchy/__init__.py +6 -0
- sknetwork/hierarchy/base.py +96 -0
- sknetwork/hierarchy/louvain_hierarchy.py +272 -0
- sknetwork/hierarchy/metrics.py +234 -0
- sknetwork/hierarchy/paris.cp313-win_amd64.pyd +0 -0
- sknetwork/hierarchy/paris.cpp +37868 -0
- sknetwork/hierarchy/paris.pyx +316 -0
- sknetwork/hierarchy/postprocess.py +350 -0
- sknetwork/hierarchy/tests/__init__.py +1 -0
- sknetwork/hierarchy/tests/test_API.py +24 -0
- sknetwork/hierarchy/tests/test_algos.py +34 -0
- sknetwork/hierarchy/tests/test_metrics.py +62 -0
- sknetwork/hierarchy/tests/test_postprocess.py +57 -0
- sknetwork/linalg/__init__.py +9 -0
- sknetwork/linalg/basics.py +37 -0
- sknetwork/linalg/diteration.cp313-win_amd64.pyd +0 -0
- sknetwork/linalg/diteration.cpp +27400 -0
- sknetwork/linalg/diteration.pyx +47 -0
- sknetwork/linalg/eig_solver.py +93 -0
- sknetwork/linalg/laplacian.py +15 -0
- sknetwork/linalg/normalizer.py +86 -0
- sknetwork/linalg/operators.py +225 -0
- sknetwork/linalg/polynome.py +76 -0
- sknetwork/linalg/ppr_solver.py +170 -0
- sknetwork/linalg/push.cp313-win_amd64.pyd +0 -0
- sknetwork/linalg/push.cpp +31072 -0
- sknetwork/linalg/push.pyx +71 -0
- sknetwork/linalg/sparse_lowrank.py +142 -0
- sknetwork/linalg/svd_solver.py +91 -0
- sknetwork/linalg/tests/__init__.py +1 -0
- sknetwork/linalg/tests/test_eig.py +44 -0
- sknetwork/linalg/tests/test_laplacian.py +18 -0
- sknetwork/linalg/tests/test_normalization.py +34 -0
- sknetwork/linalg/tests/test_operators.py +66 -0
- sknetwork/linalg/tests/test_polynome.py +38 -0
- sknetwork/linalg/tests/test_ppr.py +50 -0
- sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
- sknetwork/linalg/tests/test_svd.py +38 -0
- sknetwork/linkpred/__init__.py +2 -0
- sknetwork/linkpred/base.py +46 -0
- sknetwork/linkpred/nn.py +126 -0
- sknetwork/linkpred/tests/__init__.py +1 -0
- sknetwork/linkpred/tests/test_nn.py +27 -0
- sknetwork/log.py +19 -0
- sknetwork/path/__init__.py +5 -0
- sknetwork/path/dag.py +54 -0
- sknetwork/path/distances.py +98 -0
- sknetwork/path/search.py +31 -0
- sknetwork/path/shortest_path.py +61 -0
- sknetwork/path/tests/__init__.py +1 -0
- sknetwork/path/tests/test_dag.py +37 -0
- sknetwork/path/tests/test_distances.py +62 -0
- sknetwork/path/tests/test_search.py +40 -0
- sknetwork/path/tests/test_shortest_path.py +40 -0
- sknetwork/ranking/__init__.py +8 -0
- sknetwork/ranking/base.py +61 -0
- sknetwork/ranking/betweenness.cp313-win_amd64.pyd +0 -0
- sknetwork/ranking/betweenness.cpp +9707 -0
- sknetwork/ranking/betweenness.pyx +97 -0
- sknetwork/ranking/closeness.py +92 -0
- sknetwork/ranking/hits.py +94 -0
- sknetwork/ranking/katz.py +83 -0
- sknetwork/ranking/pagerank.py +110 -0
- sknetwork/ranking/postprocess.py +37 -0
- sknetwork/ranking/tests/__init__.py +1 -0
- sknetwork/ranking/tests/test_API.py +32 -0
- sknetwork/ranking/tests/test_betweenness.py +38 -0
- sknetwork/ranking/tests/test_closeness.py +30 -0
- sknetwork/ranking/tests/test_hits.py +20 -0
- sknetwork/ranking/tests/test_pagerank.py +62 -0
- sknetwork/ranking/tests/test_postprocess.py +26 -0
- sknetwork/regression/__init__.py +4 -0
- sknetwork/regression/base.py +61 -0
- sknetwork/regression/diffusion.py +210 -0
- sknetwork/regression/tests/__init__.py +1 -0
- sknetwork/regression/tests/test_API.py +32 -0
- sknetwork/regression/tests/test_diffusion.py +56 -0
- sknetwork/sknetwork.py +3 -0
- sknetwork/test_base.py +35 -0
- sknetwork/test_log.py +15 -0
- sknetwork/topology/__init__.py +8 -0
- sknetwork/topology/cliques.cp313-win_amd64.pyd +0 -0
- sknetwork/topology/cliques.cpp +32565 -0
- sknetwork/topology/cliques.pyx +149 -0
- sknetwork/topology/core.cp313-win_amd64.pyd +0 -0
- sknetwork/topology/core.cpp +30651 -0
- sknetwork/topology/core.pyx +90 -0
- sknetwork/topology/cycles.py +243 -0
- sknetwork/topology/minheap.cp313-win_amd64.pyd +0 -0
- sknetwork/topology/minheap.cpp +27332 -0
- sknetwork/topology/minheap.pxd +20 -0
- sknetwork/topology/minheap.pyx +109 -0
- sknetwork/topology/structure.py +194 -0
- sknetwork/topology/tests/__init__.py +1 -0
- sknetwork/topology/tests/test_cliques.py +28 -0
- sknetwork/topology/tests/test_core.py +19 -0
- sknetwork/topology/tests/test_cycles.py +65 -0
- sknetwork/topology/tests/test_structure.py +85 -0
- sknetwork/topology/tests/test_triangles.py +38 -0
- sknetwork/topology/tests/test_wl.py +72 -0
- sknetwork/topology/triangles.cp313-win_amd64.pyd +0 -0
- sknetwork/topology/triangles.cpp +8894 -0
- sknetwork/topology/triangles.pyx +151 -0
- sknetwork/topology/weisfeiler_lehman.py +133 -0
- sknetwork/topology/weisfeiler_lehman_core.cp313-win_amd64.pyd +0 -0
- sknetwork/topology/weisfeiler_lehman_core.cpp +27635 -0
- sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
- sknetwork/utils/__init__.py +7 -0
- sknetwork/utils/check.py +355 -0
- sknetwork/utils/format.py +221 -0
- sknetwork/utils/membership.py +82 -0
- sknetwork/utils/neighbors.py +115 -0
- sknetwork/utils/tests/__init__.py +1 -0
- sknetwork/utils/tests/test_check.py +190 -0
- sknetwork/utils/tests/test_format.py +63 -0
- sknetwork/utils/tests/test_membership.py +24 -0
- sknetwork/utils/tests/test_neighbors.py +41 -0
- sknetwork/utils/tests/test_tfidf.py +18 -0
- sknetwork/utils/tests/test_values.py +66 -0
- sknetwork/utils/tfidf.py +37 -0
- sknetwork/utils/values.py +76 -0
- sknetwork/visualization/__init__.py +4 -0
- sknetwork/visualization/colors.py +34 -0
- sknetwork/visualization/dendrograms.py +277 -0
- sknetwork/visualization/graphs.py +1039 -0
- sknetwork/visualization/tests/__init__.py +1 -0
- sknetwork/visualization/tests/test_dendrograms.py +53 -0
- sknetwork/visualization/tests/test_graphs.py +176 -0
sknetwork/gnn/loss.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# coding: utf-8
|
|
3
|
+
"""
|
|
4
|
+
Created in April 2022
|
|
5
|
+
@author: Simon Delarue <sdelarue@enst.fr>
|
|
6
|
+
@author: Thomas Bonald <bonald@enst.fr>
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Union
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
|
|
13
|
+
from sknetwork.gnn.base_activation import BaseLoss
|
|
14
|
+
from sknetwork.gnn.activation import Sigmoid, Softmax
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CrossEntropy(BaseLoss, Softmax):
|
|
18
|
+
"""Cross entropy loss with softmax activation.
|
|
19
|
+
|
|
20
|
+
For a single sample with value :math:`x` and true label :math:`y`, the cross-entropy loss
|
|
21
|
+
is:
|
|
22
|
+
|
|
23
|
+
:math:`-\\sum_i 1_{\\{y=i\\}} \\log (p_i)`
|
|
24
|
+
|
|
25
|
+
with
|
|
26
|
+
|
|
27
|
+
:math:`p_i = e^{x_i} / \\sum_j e^{x_j}`.
|
|
28
|
+
|
|
29
|
+
For :math:`n` samples, return the average loss.
|
|
30
|
+
"""
|
|
31
|
+
def __init__(self):
|
|
32
|
+
super(CrossEntropy, self).__init__()
|
|
33
|
+
self.name = 'Cross entropy'
|
|
34
|
+
|
|
35
|
+
@staticmethod
|
|
36
|
+
def loss(signal: np.ndarray, labels: np.ndarray) -> float:
|
|
37
|
+
"""Get loss value.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
signal : np.ndarray, shape (n_samples, n_channels)
|
|
42
|
+
Input signal (before activation).
|
|
43
|
+
The number of channels must be at least 2.
|
|
44
|
+
labels : np.ndarray, shape (n_samples)
|
|
45
|
+
True labels.
|
|
46
|
+
|
|
47
|
+
Returns
|
|
48
|
+
-------
|
|
49
|
+
value : float
|
|
50
|
+
Loss value.
|
|
51
|
+
"""
|
|
52
|
+
n = len(labels)
|
|
53
|
+
probs = Softmax.output(signal)
|
|
54
|
+
|
|
55
|
+
# for numerical stability
|
|
56
|
+
eps = 1e-10
|
|
57
|
+
probs = np.clip(probs, eps, 1 - eps)
|
|
58
|
+
|
|
59
|
+
value = -np.log(probs[np.arange(n), labels]).sum()
|
|
60
|
+
|
|
61
|
+
return value / n
|
|
62
|
+
|
|
63
|
+
@staticmethod
|
|
64
|
+
def loss_gradient(signal: np.ndarray, labels: np.ndarray) -> np.ndarray:
|
|
65
|
+
"""Get the gradient of the loss function (including activation).
|
|
66
|
+
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
signal : np.ndarray, shape (n_samples, n_channels)
|
|
70
|
+
Input signal (before activation).
|
|
71
|
+
labels : np.ndarray, shape (n_samples)
|
|
72
|
+
True labels.
|
|
73
|
+
Returns
|
|
74
|
+
-------
|
|
75
|
+
gradient: float
|
|
76
|
+
Gradient of the loss function.
|
|
77
|
+
"""
|
|
78
|
+
probs = Softmax.output(signal)
|
|
79
|
+
one_hot_encoding = np.zeros_like(probs)
|
|
80
|
+
one_hot_encoding[np.arange(len(labels)), labels] = 1
|
|
81
|
+
gradient = probs - one_hot_encoding
|
|
82
|
+
|
|
83
|
+
return gradient
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class BinaryCrossEntropy(BaseLoss, Sigmoid):
|
|
87
|
+
"""Binary cross entropy loss with sigmoid activation.
|
|
88
|
+
|
|
89
|
+
For a single sample with true label :math:`y` and predicted probability :math:`p`, the binary cross-entropy loss
|
|
90
|
+
is:
|
|
91
|
+
|
|
92
|
+
:math:`-y \\log (p) - (1-y) \\log (1 - p).`
|
|
93
|
+
|
|
94
|
+
For :math:`n` samples, return the average loss.
|
|
95
|
+
"""
|
|
96
|
+
def __init__(self):
|
|
97
|
+
super(BinaryCrossEntropy, self).__init__()
|
|
98
|
+
self.name = 'Binary cross entropy'
|
|
99
|
+
|
|
100
|
+
@staticmethod
|
|
101
|
+
def loss(signal: np.ndarray, labels: np.ndarray) -> float:
|
|
102
|
+
"""Get loss value.
|
|
103
|
+
|
|
104
|
+
Parameters
|
|
105
|
+
----------
|
|
106
|
+
signal : np.ndarray, shape (n_samples, n_channels)
|
|
107
|
+
Input signal (before activation).
|
|
108
|
+
The number of channels must be at least 2.
|
|
109
|
+
labels : np.ndarray, shape (n_samples)
|
|
110
|
+
True labels.
|
|
111
|
+
|
|
112
|
+
Returns
|
|
113
|
+
-------
|
|
114
|
+
value : float
|
|
115
|
+
Loss value.
|
|
116
|
+
"""
|
|
117
|
+
probs = Sigmoid.output(signal)
|
|
118
|
+
n = len(labels)
|
|
119
|
+
|
|
120
|
+
# for numerical stability
|
|
121
|
+
eps = 1e-15
|
|
122
|
+
probs = np.clip(probs, eps, 1 - eps)
|
|
123
|
+
|
|
124
|
+
if probs.shape[1] == 1:
|
|
125
|
+
# binary labels
|
|
126
|
+
value = -np.log(probs[labels > 0]).sum()
|
|
127
|
+
value -= np.log((1 - probs)[labels == 0]).sum()
|
|
128
|
+
else:
|
|
129
|
+
# general case
|
|
130
|
+
value = -np.log(1 - probs)
|
|
131
|
+
value[np.arange(n), labels] = -np.log(probs[np.arange(n), labels])
|
|
132
|
+
value = value.sum()
|
|
133
|
+
|
|
134
|
+
return value / n
|
|
135
|
+
|
|
136
|
+
@staticmethod
|
|
137
|
+
def loss_gradient(signal: np.ndarray, labels: np.ndarray) -> np.ndarray:
|
|
138
|
+
"""Get the gradient of the loss function (including activation).
|
|
139
|
+
|
|
140
|
+
Parameters
|
|
141
|
+
----------
|
|
142
|
+
signal : np.ndarray, shape (n_samples, n_channels)
|
|
143
|
+
Input signal (before activation).
|
|
144
|
+
labels : np.ndarray, shape (n_samples)
|
|
145
|
+
True labels.
|
|
146
|
+
Returns
|
|
147
|
+
-------
|
|
148
|
+
gradient: float
|
|
149
|
+
Gradient of the loss function.
|
|
150
|
+
"""
|
|
151
|
+
probs = Sigmoid.output(signal)
|
|
152
|
+
gradient = (probs.T - labels).T
|
|
153
|
+
|
|
154
|
+
return gradient
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def get_loss(loss: Union[BaseLoss, str] = 'CrossEntropyLoss') -> BaseLoss:
|
|
158
|
+
"""Instantiate loss function according to parameters.
|
|
159
|
+
|
|
160
|
+
Parameters
|
|
161
|
+
----------
|
|
162
|
+
loss : str or loss function.
|
|
163
|
+
Which loss function to use. Can be ``'CrossEntropy'`` or ``'BinaryCrossEntropy'`` or custom loss.
|
|
164
|
+
|
|
165
|
+
Returns
|
|
166
|
+
-------
|
|
167
|
+
Loss function object.
|
|
168
|
+
"""
|
|
169
|
+
if issubclass(type(loss), BaseLoss):
|
|
170
|
+
return loss
|
|
171
|
+
elif type(loss) == str:
|
|
172
|
+
loss = loss.lower().replace(' ', '')
|
|
173
|
+
if loss in ['crossentropy', 'ce']:
|
|
174
|
+
return CrossEntropy()
|
|
175
|
+
elif loss in ['binarycrossentropy', 'bce']:
|
|
176
|
+
return BinaryCrossEntropy()
|
|
177
|
+
else:
|
|
178
|
+
raise ValueError("Loss must be either \"CrossEntropy\" or \"BinaryCrossEntropy\".")
|
|
179
|
+
else:
|
|
180
|
+
raise TypeError("Loss must be either an \"BaseLoss\" object or a string.")
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# coding: utf-8
|
|
3
|
+
"""
|
|
4
|
+
@author: Simon Delarue <sdelarue@enst.fr>
|
|
5
|
+
"""
|
|
6
|
+
from typing import Union
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
from scipy import sparse
|
|
10
|
+
|
|
11
|
+
from sknetwork.utils import get_degrees
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class UniformNeighborSampler:
|
|
15
|
+
"""Neighbor node sampler.
|
|
16
|
+
|
|
17
|
+
Uniformly sample nodes over neighborhood.
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
sample_size : int
|
|
22
|
+
Size of neighborhood sampled for each node.
|
|
23
|
+
"""
|
|
24
|
+
def __init__(self, sample_size: int):
|
|
25
|
+
self.sample_size = sample_size
|
|
26
|
+
|
|
27
|
+
def _sample_indexes(self, size: int) -> np.ndarray:
|
|
28
|
+
"""Randomly chose indexes without replacement.
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
size : int
|
|
33
|
+
Highest index available. This index is used if lower than a threshold.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
Array of sampled indexes.
|
|
38
|
+
"""
|
|
39
|
+
return np.random.choice(size, size=min(size, self.sample_size), replace=False)
|
|
40
|
+
|
|
41
|
+
def __call__(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> sparse.csr_matrix:
|
|
42
|
+
"""Apply node sampling on each node and return filtered adjacency matrix.
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
adjacency
|
|
47
|
+
Adjacency matrix of the graph.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
Filtered adjacency matrix using node sampling.
|
|
52
|
+
"""
|
|
53
|
+
n_row, _ = adjacency.shape
|
|
54
|
+
sampled_adjacency = adjacency.copy()
|
|
55
|
+
|
|
56
|
+
degrees = get_degrees(adjacency)
|
|
57
|
+
neighbor_samples = list(map(self._sample_indexes, degrees))
|
|
58
|
+
|
|
59
|
+
for i, neighbors in enumerate(neighbor_samples):
|
|
60
|
+
sampled_adjacency.data[sampled_adjacency.indptr[i]:sampled_adjacency.indptr[i + 1]] = np.zeros(degrees[i])
|
|
61
|
+
sampled_adjacency.data[sampled_adjacency.indptr[i]:sampled_adjacency.indptr[i + 1]][neighbors] = 1
|
|
62
|
+
|
|
63
|
+
sampled_adjacency.eliminate_zeros()
|
|
64
|
+
|
|
65
|
+
return sampled_adjacency
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
Created on Thu Apr 21 2022
|
|
5
|
+
@author: Simon Delarue <sdelarue@enst.fr>
|
|
6
|
+
"""
|
|
7
|
+
from typing import Union, TYPE_CHECKING
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from sknetwork.gnn.base import BaseGNN
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class BaseOptimizer:
|
|
16
|
+
"""Base class for optimizers.
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
learning_rate: float (default = 0.01)
|
|
21
|
+
Learning rate for updating weights.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, learning_rate):
|
|
25
|
+
self.learning_rate = learning_rate
|
|
26
|
+
|
|
27
|
+
def step(self, gnn: BaseGNN):
|
|
28
|
+
"""Update model parameters according to gradient values.
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
gnn: BaseGNNClassifier
|
|
33
|
+
Model containing parameters to update.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class GD(BaseOptimizer):
|
|
38
|
+
"""Gradient Descent optimizer.
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
learning_rate: float (default = 0.01)
|
|
43
|
+
Learning rate for updating weights.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(self, learning_rate: float = 0.01):
|
|
47
|
+
super(GD, self).__init__(learning_rate)
|
|
48
|
+
|
|
49
|
+
def step(self, gnn: BaseGNN):
|
|
50
|
+
"""Update model parameters according to gradient values.
|
|
51
|
+
|
|
52
|
+
Parameters
|
|
53
|
+
----------
|
|
54
|
+
gnn: BaseGNNClassifier
|
|
55
|
+
Model containing parameters to update.
|
|
56
|
+
"""
|
|
57
|
+
for idx, layer in enumerate(gnn.layers):
|
|
58
|
+
layer.weight = layer.weight - self.learning_rate * gnn.derivative_weight[idx]
|
|
59
|
+
layer.bias = layer.bias - self.learning_rate * gnn.derivative_bias[idx]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class ADAM(BaseOptimizer):
|
|
63
|
+
"""Adam optimizer.
|
|
64
|
+
|
|
65
|
+
Parameters
|
|
66
|
+
----------
|
|
67
|
+
learning_rate: float (default = 0.01)
|
|
68
|
+
Learning rate for updating weights.
|
|
69
|
+
beta1, beta2: float
|
|
70
|
+
Coefficients used for computing running averages of gradients.
|
|
71
|
+
eps: float (default = 1e-8)
|
|
72
|
+
Term added to the denominator to improve stability.
|
|
73
|
+
|
|
74
|
+
References
|
|
75
|
+
----------
|
|
76
|
+
Kingma, D. P., & Ba, J. (2014).
|
|
77
|
+
`Adam: A method for stochastic optimization.
|
|
78
|
+
<https://arxiv.org/pdf/1412.6980.pdf>`_
|
|
79
|
+
3rd International Conference for Learning Representation.
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
def __init__(self, learning_rate: float = 0.01, beta1: float = 0.9, beta2: float = 0.999,
|
|
83
|
+
eps: float = 1e-8):
|
|
84
|
+
super(ADAM, self).__init__(learning_rate)
|
|
85
|
+
self.beta1 = beta1
|
|
86
|
+
self.beta2 = beta2
|
|
87
|
+
self.eps = eps
|
|
88
|
+
self.m_derivative_weight, self.v_derivative_weight = [], []
|
|
89
|
+
self.m_derivative_bias, self.v_derivative_bias = [], []
|
|
90
|
+
self.t = 0
|
|
91
|
+
|
|
92
|
+
def step(self, gnn: BaseGNN):
|
|
93
|
+
"""Update model parameters according to gradient values and parameters.
|
|
94
|
+
|
|
95
|
+
Parameters
|
|
96
|
+
----------
|
|
97
|
+
gnn: `BaseGNNClassifier`
|
|
98
|
+
Model containing parameters to update.
|
|
99
|
+
"""
|
|
100
|
+
if self.t == 0:
|
|
101
|
+
self.m_derivative_weight, self.v_derivative_weight = \
|
|
102
|
+
[np.zeros(x.shape) for x in gnn.derivative_weight], [np.zeros(x.shape) for x in gnn.derivative_weight]
|
|
103
|
+
self.m_derivative_bias, self.v_derivative_bias = \
|
|
104
|
+
[np.zeros(x.shape) for x in gnn.derivative_bias], [np.zeros(x.shape) for x in gnn.derivative_bias]
|
|
105
|
+
|
|
106
|
+
for idx, layer in enumerate(gnn.layers):
|
|
107
|
+
self.t += 1
|
|
108
|
+
|
|
109
|
+
# Moving averages
|
|
110
|
+
self.m_derivative_weight[idx] = \
|
|
111
|
+
self.beta1 * self.m_derivative_weight[idx] + (1 - self.beta1) * gnn.derivative_weight[idx]
|
|
112
|
+
self.m_derivative_bias[idx] = \
|
|
113
|
+
self.beta1 * self.m_derivative_bias[idx] + (1 - self.beta1) * gnn.derivative_bias[idx]
|
|
114
|
+
|
|
115
|
+
self.v_derivative_weight[idx] = \
|
|
116
|
+
self.beta2 * self.v_derivative_weight[idx] + (1 - self.beta2) * (gnn.derivative_weight[idx] ** 2)
|
|
117
|
+
self.v_derivative_bias[idx] = \
|
|
118
|
+
self.beta2 * self.v_derivative_bias[idx] + (1 - self.beta2) * (gnn.derivative_bias[idx] ** 2)
|
|
119
|
+
|
|
120
|
+
# Correcting moving averages
|
|
121
|
+
denom_1 = (1 - self.beta1 ** self.t)
|
|
122
|
+
denom_2 = (1 - self.beta2 ** self.t)
|
|
123
|
+
|
|
124
|
+
m_derivative_weight_corr = self.m_derivative_weight[idx] / denom_1
|
|
125
|
+
m_derivative_bias_corr = self.m_derivative_bias[idx] / denom_1
|
|
126
|
+
v_derivative_weight_corr = self.v_derivative_weight[idx] / denom_2
|
|
127
|
+
v_derivative_bias_corr = self.v_derivative_bias[idx] / denom_2
|
|
128
|
+
|
|
129
|
+
# Parameters update
|
|
130
|
+
layer.weight = \
|
|
131
|
+
layer.weight - (self.learning_rate * m_derivative_weight_corr) / (np.sqrt(v_derivative_weight_corr)
|
|
132
|
+
+ self.eps)
|
|
133
|
+
if layer.use_bias:
|
|
134
|
+
layer.bias = \
|
|
135
|
+
layer.bias - (self.learning_rate * m_derivative_bias_corr) / (np.sqrt(v_derivative_bias_corr)
|
|
136
|
+
+ self.eps)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def get_optimizer(optimizer: Union[BaseOptimizer, str] = 'Adam', learning_rate: float = 0.01) -> BaseOptimizer:
|
|
140
|
+
"""Instantiate optimizer according to parameters.
|
|
141
|
+
|
|
142
|
+
Parameters
|
|
143
|
+
----------
|
|
144
|
+
optimizer : str or optimizer
|
|
145
|
+
Which optimizer to use. Can be ``'Adam'`` or ``'GD'`` or custom optimizer.
|
|
146
|
+
learning_rate: float
|
|
147
|
+
Learning rate.
|
|
148
|
+
|
|
149
|
+
Returns
|
|
150
|
+
-------
|
|
151
|
+
Optimizer object
|
|
152
|
+
"""
|
|
153
|
+
if issubclass(type(optimizer), BaseOptimizer):
|
|
154
|
+
return optimizer
|
|
155
|
+
elif type(optimizer) == str:
|
|
156
|
+
optimizer = optimizer.lower()
|
|
157
|
+
if optimizer == 'adam':
|
|
158
|
+
return ADAM(learning_rate=learning_rate)
|
|
159
|
+
elif optimizer in ['gd', 'gradient']:
|
|
160
|
+
return GD(learning_rate=learning_rate)
|
|
161
|
+
else:
|
|
162
|
+
raise ValueError("Optimizer must be either \"Adam\" or \"GD\" (Gradient Descent).")
|
|
163
|
+
else:
|
|
164
|
+
raise TypeError("Optimizer must be either an \"BaseOptimizer\" object or a string.")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""tests for gnn"""
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""tests for activation"""
|
|
4
|
+
|
|
5
|
+
import unittest
|
|
6
|
+
|
|
7
|
+
from sknetwork.gnn.activation import *
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TestActivation(unittest.TestCase):
|
|
11
|
+
|
|
12
|
+
def test_get_activation(self):
|
|
13
|
+
self.assertTrue(isinstance(get_activation('Identity'), BaseActivation))
|
|
14
|
+
self.assertTrue(isinstance(get_activation('Relu'), ReLu))
|
|
15
|
+
self.assertTrue(isinstance(get_activation('Sigmoid'), Sigmoid))
|
|
16
|
+
self.assertTrue(isinstance(get_activation('Softmax'), Softmax))
|
|
17
|
+
with self.assertRaises(ValueError):
|
|
18
|
+
get_activation('foo')
|
|
19
|
+
|
|
20
|
+
base_act = BaseActivation()
|
|
21
|
+
self.assertTrue(base_act == get_activation(base_act))
|
|
22
|
+
with self.assertRaises(TypeError):
|
|
23
|
+
get_activation(0)
|
|
24
|
+
|
|
25
|
+
def test_activation_identity(self):
|
|
26
|
+
activation = get_activation('Identity')
|
|
27
|
+
signal = np.arange(5)
|
|
28
|
+
self.assertTrue((activation.output(signal) == signal).all())
|
|
29
|
+
direction = np.arange(5)
|
|
30
|
+
self.assertTrue((activation.gradient(signal, direction) == direction).all())
|
|
31
|
+
|
|
32
|
+
def test_activation_relu(self):
|
|
33
|
+
activation = get_activation('ReLu')
|
|
34
|
+
signal = np.linspace(-2, 2, 5)
|
|
35
|
+
self.assertTrue((activation.output(signal) == [0., 0., 0., 1., 2.]).all())
|
|
36
|
+
direction = np.arange(5)
|
|
37
|
+
self.assertTrue((activation.gradient(signal, direction) == direction * (signal > 0)).all())
|
|
38
|
+
|
|
39
|
+
def test_activation_sigmoid(self):
|
|
40
|
+
activation = get_activation('Sigmoid')
|
|
41
|
+
signal = np.array([-np.inf, -1.5, 0, 1.5, np.inf])
|
|
42
|
+
self.assertTrue(np.allclose(activation.output(signal), np.array([0., 0.18242552, 0.5, 0.81757448, 1.])))
|
|
43
|
+
signal = np.array([[-1000, 1000, 1000]])
|
|
44
|
+
direction = np.arange(3)
|
|
45
|
+
self.assertTrue(np.allclose(activation.output(signal), np.array([[0., 1., 1.]])))
|
|
46
|
+
self.assertTrue(np.allclose(activation.gradient(signal, direction), np.array([[0., 0., 0.]])))
|
|
47
|
+
|
|
48
|
+
def test_activation_softmax(self):
|
|
49
|
+
activation = get_activation('Softmax')
|
|
50
|
+
signal = np.array([[-1, 0, 3, 5]])
|
|
51
|
+
output = activation.output(signal)
|
|
52
|
+
self.assertTrue(np.allclose(output, np.array([[0.0021657, 0.00588697, 0.11824302, 0.87370431]])))
|
|
53
|
+
signal = np.array([[-1000, 1000, 1000]])
|
|
54
|
+
direction = np.arange(3)
|
|
55
|
+
self.assertTrue(np.allclose(activation.output(signal), np.array([[0., 0.5, 0.5]])))
|
|
56
|
+
self.assertTrue(np.allclose(activation.gradient(signal, direction), np.array([[0., -0.25, 0.25]])))
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""tests for base gnn"""
|
|
4
|
+
|
|
5
|
+
import unittest
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
from sknetwork.data.test_graphs import test_graph
|
|
10
|
+
from sknetwork.gnn.base import BaseGNN
|
|
11
|
+
from sknetwork.gnn.gnn_classifier import GNNClassifier
|
|
12
|
+
from sknetwork.gnn.layer import Convolution
|
|
13
|
+
from sknetwork.gnn.optimizer import ADAM
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class TestBaseGNN(unittest.TestCase):
|
|
17
|
+
|
|
18
|
+
def setUp(self) -> None:
|
|
19
|
+
"""Test graph for tests."""
|
|
20
|
+
self.adjacency = test_graph()
|
|
21
|
+
self.n = self.adjacency.shape[0]
|
|
22
|
+
self.features = self.adjacency
|
|
23
|
+
self.labels = np.array(4 * [0, 1] + 2 * [-1])
|
|
24
|
+
|
|
25
|
+
def test_base_gnn_fit(self):
|
|
26
|
+
gnn = BaseGNN()
|
|
27
|
+
with self.assertRaises(NotImplementedError):
|
|
28
|
+
gnn.fit(self.adjacency, self.features, self.labels, test_size=0.2)
|
|
29
|
+
|
|
30
|
+
def test_gnn_fit_transform(self):
|
|
31
|
+
gnn = GNNClassifier(dims=2, layer_types='Conv', activations='Relu', optimizer='GD', verbose=False)
|
|
32
|
+
embedding = gnn.fit_transform(self.adjacency, self.features, labels=self.labels, n_epochs=1)
|
|
33
|
+
self.assertTrue(len(embedding) == self.n)
|
|
34
|
+
self.assertTrue(embedding.shape == (self.n, 2))
|
|
35
|
+
|
|
36
|
+
def test_gnn_custom_optimizer(self):
|
|
37
|
+
gnn = GNNClassifier(dims=2, layer_types='Conv', activations='Relu', optimizer=ADAM(beta1=0.5), verbose=False)
|
|
38
|
+
embedding = gnn.fit_transform(self.adjacency, self.features, labels=self.labels, n_epochs=1)
|
|
39
|
+
self.assertTrue(len(embedding) == self.n)
|
|
40
|
+
self.assertTrue(embedding.shape == (self.n, 2))
|
|
41
|
+
|
|
42
|
+
def test_gnn_custom_layers(self):
|
|
43
|
+
gnn = GNNClassifier(layers=[Convolution('Conv', 2, loss='CrossEntropy')], optimizer=ADAM(beta1=0.5))
|
|
44
|
+
embedding = gnn.fit_transform(self.adjacency, self.features, labels=self.labels, n_epochs=1)
|
|
45
|
+
self.assertTrue(len(embedding) == self.n)
|
|
46
|
+
self.assertTrue(embedding.shape == (self.n, 2))
|
|
47
|
+
|
|
48
|
+
gnn = GNNClassifier(layers=[Convolution('SAGEConv', 2, sample_size=5, loss='CrossEntropy')],
|
|
49
|
+
optimizer=ADAM(beta1=0.5),)
|
|
50
|
+
embedding = gnn.fit_transform(self.adjacency, self.features, labels=self.labels, n_epochs=1)
|
|
51
|
+
self.assertTrue(len(embedding) == self.n)
|
|
52
|
+
self.assertTrue(embedding.shape == (self.n, 2))
|
|
53
|
+
|
|
54
|
+
def test_gnn_custom(self):
|
|
55
|
+
gnn = GNNClassifier(dims=[20, 8, 2], layer_types='conv',
|
|
56
|
+
activations=['Relu', 'Sigmoid', 'Softmax'], optimizer='Adam')
|
|
57
|
+
self.assertTrue(isinstance(gnn, GNNClassifier))
|
|
58
|
+
self.assertTrue(gnn.layers[-1].activation.name == 'Cross entropy')
|
|
59
|
+
y_pred = gnn.fit_predict(self.adjacency, self.features, labels=self.labels, n_epochs=1)
|
|
60
|
+
self.assertTrue(len(y_pred) == self.n)
|
|
61
|
+
|
|
62
|
+
def test_check_fitted(self):
|
|
63
|
+
gnn = BaseGNN()
|
|
64
|
+
with self.assertRaises(ValueError):
|
|
65
|
+
gnn._check_fitted()
|
|
66
|
+
gnn = GNNClassifier(dims=2, layer_types='conv', activations='Relu', optimizer='GD')
|
|
67
|
+
gnn.fit_transform(self.adjacency, self.features, labels=self.labels, n_epochs=1)
|
|
68
|
+
fit_gnn = gnn._check_fitted()
|
|
69
|
+
self.assertTrue(isinstance(fit_gnn, GNNClassifier))
|
|
70
|
+
self.assertTrue(fit_gnn.embedding_ is not None)
|
|
71
|
+
|
|
72
|
+
def test_base_gnn_repr(self):
|
|
73
|
+
gnn = GNNClassifier(dims=[8, 2], layer_types='conv', activations=['Relu', 'Softmax'], optimizer='Adam')
|
|
74
|
+
self.assertTrue(gnn.__repr__().startswith("GNNClassifier"))
|
|
75
|
+
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""tests for base layer gnn"""
|
|
4
|
+
|
|
5
|
+
import unittest
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
from sknetwork.data.test_graphs import test_graph
|
|
10
|
+
from sknetwork.gnn.base_layer import BaseLayer
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TestBaseLayer(unittest.TestCase):
|
|
14
|
+
|
|
15
|
+
def setUp(self) -> None:
|
|
16
|
+
"""Test graph for tests."""
|
|
17
|
+
self.adjacency = test_graph()
|
|
18
|
+
self.n = self.adjacency.shape[0]
|
|
19
|
+
self.features = self.adjacency
|
|
20
|
+
self.labels = np.array([0]*5 + [1]*5)
|
|
21
|
+
self.base_layer = BaseLayer('Conv', len(self.labels))
|
|
22
|
+
|
|
23
|
+
def test_base_layer_init(self):
|
|
24
|
+
with self.assertRaises(NotImplementedError):
|
|
25
|
+
self.base_layer.forward(self.adjacency, self.features)
|
|
26
|
+
|
|
27
|
+
def test_base_layer_initialize_weights(self):
|
|
28
|
+
self.base_layer._initialize_weights(10)
|
|
29
|
+
self.assertTrue(self.base_layer.weight.shape == (10, len(self.labels)))
|
|
30
|
+
self.assertTrue(self.base_layer.bias.shape == (1, len(self.labels)))
|
|
31
|
+
self.assertTrue(self.base_layer.weights_initialized)
|
|
32
|
+
|
|
33
|
+
def test_base_layer_repr(self):
|
|
34
|
+
self.assertTrue(self.base_layer.__repr__().startswith(" BaseLayer(layer_type: Conv, out_channels: 10"))
|
|
35
|
+
sage_layer = BaseLayer(layer_type='sageconv', out_channels=len(self.labels))
|
|
36
|
+
self.assertTrue('sample_size' in sage_layer.__repr__())
|
|
37
|
+
self.assertTrue('sageconv' in sage_layer.__repr__())
|