scikit-network 0.31.0__cp39-cp39-win_amd64.whl → 0.32.1__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/AUTHORS.rst +3 -0
- {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/METADATA +19 -3
- {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/RECORD +112 -105
- {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/WHEEL +1 -1
- sknetwork/__init__.py +1 -1
- sknetwork/classification/base.py +1 -1
- sknetwork/classification/base_rank.py +3 -3
- sknetwork/classification/diffusion.py +21 -13
- sknetwork/classification/knn.py +19 -13
- sknetwork/classification/metrics.py +1 -1
- sknetwork/classification/pagerank.py +12 -8
- sknetwork/classification/propagation.py +22 -15
- sknetwork/classification/tests/test_diffusion.py +10 -0
- sknetwork/classification/vote.cp39-win_amd64.pyd +0 -0
- sknetwork/classification/vote.cpp +14549 -8668
- sknetwork/clustering/__init__.py +3 -1
- sknetwork/clustering/base.py +1 -1
- sknetwork/clustering/kcenters.py +253 -0
- sknetwork/clustering/leiden.py +241 -0
- sknetwork/clustering/leiden_core.cp39-win_amd64.pyd +0 -0
- sknetwork/clustering/leiden_core.cpp +31564 -0
- sknetwork/clustering/leiden_core.pyx +124 -0
- sknetwork/clustering/louvain.py +118 -83
- sknetwork/clustering/louvain_core.cp39-win_amd64.pyd +0 -0
- sknetwork/clustering/louvain_core.cpp +21876 -16332
- sknetwork/clustering/louvain_core.pyx +86 -94
- sknetwork/clustering/postprocess.py +2 -2
- sknetwork/clustering/propagation_clustering.py +4 -4
- sknetwork/clustering/tests/test_API.py +7 -3
- sknetwork/clustering/tests/test_kcenters.py +92 -0
- sknetwork/clustering/tests/test_leiden.py +34 -0
- sknetwork/clustering/tests/test_louvain.py +2 -3
- sknetwork/data/load.py +2 -4
- sknetwork/data/parse.py +41 -20
- sknetwork/data/tests/test_parse.py +9 -12
- sknetwork/embedding/__init__.py +0 -1
- sknetwork/embedding/base.py +20 -19
- sknetwork/embedding/force_atlas.py +3 -2
- sknetwork/embedding/louvain_embedding.py +1 -1
- sknetwork/embedding/random_projection.py +5 -3
- sknetwork/embedding/spectral.py +0 -73
- sknetwork/embedding/tests/test_API.py +4 -28
- sknetwork/embedding/tests/test_louvain_embedding.py +4 -9
- sknetwork/embedding/tests/test_spectral.py +2 -5
- sknetwork/embedding/tests/test_svd.py +1 -1
- sknetwork/gnn/base_layer.py +3 -3
- sknetwork/gnn/gnn_classifier.py +40 -86
- sknetwork/gnn/layer.py +1 -1
- sknetwork/gnn/loss.py +1 -1
- sknetwork/gnn/optimizer.py +4 -3
- sknetwork/gnn/tests/test_base_layer.py +4 -4
- sknetwork/gnn/tests/test_gnn_classifier.py +12 -39
- sknetwork/gnn/utils.py +8 -8
- sknetwork/hierarchy/base.py +27 -0
- sknetwork/hierarchy/louvain_hierarchy.py +45 -41
- sknetwork/hierarchy/paris.cp39-win_amd64.pyd +0 -0
- sknetwork/hierarchy/paris.cpp +27521 -20771
- sknetwork/hierarchy/paris.pyx +7 -7
- sknetwork/hierarchy/postprocess.py +16 -16
- sknetwork/hierarchy/tests/test_algos.py +5 -0
- sknetwork/linalg/__init__.py +1 -1
- sknetwork/linalg/diteration.cp39-win_amd64.pyd +0 -0
- sknetwork/linalg/diteration.cpp +13916 -8050
- sknetwork/linalg/{normalization.py → normalizer.py} +17 -14
- sknetwork/linalg/operators.py +1 -1
- sknetwork/linalg/ppr_solver.py +1 -1
- sknetwork/linalg/push.cp39-win_amd64.pyd +0 -0
- sknetwork/linalg/push.cpp +23187 -16973
- sknetwork/linalg/tests/test_normalization.py +3 -7
- sknetwork/linalg/tests/test_operators.py +2 -6
- sknetwork/linalg/tests/test_ppr.py +1 -1
- sknetwork/linkpred/base.py +12 -1
- sknetwork/linkpred/nn.py +6 -6
- sknetwork/path/distances.py +11 -4
- sknetwork/path/shortest_path.py +1 -1
- sknetwork/path/tests/test_distances.py +7 -0
- sknetwork/path/tests/test_search.py +2 -2
- sknetwork/ranking/base.py +11 -6
- sknetwork/ranking/betweenness.cp39-win_amd64.pyd +0 -0
- sknetwork/ranking/betweenness.cpp +5256 -2190
- sknetwork/ranking/pagerank.py +13 -12
- sknetwork/ranking/tests/test_API.py +0 -2
- sknetwork/ranking/tests/test_betweenness.py +1 -1
- sknetwork/ranking/tests/test_pagerank.py +11 -5
- sknetwork/regression/base.py +18 -1
- sknetwork/regression/diffusion.py +24 -10
- sknetwork/regression/tests/test_diffusion.py +8 -0
- sknetwork/topology/__init__.py +3 -1
- sknetwork/topology/cliques.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/cliques.cpp +23528 -16848
- sknetwork/topology/core.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/core.cpp +22849 -16581
- sknetwork/topology/cycles.py +243 -0
- sknetwork/topology/minheap.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/minheap.cpp +19495 -13469
- sknetwork/topology/structure.py +2 -42
- sknetwork/topology/tests/test_cycles.py +65 -0
- sknetwork/topology/tests/test_structure.py +2 -16
- sknetwork/topology/triangles.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/triangles.cpp +5283 -1397
- sknetwork/topology/triangles.pyx +7 -4
- sknetwork/topology/weisfeiler_lehman_core.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/weisfeiler_lehman_core.cpp +14781 -8915
- sknetwork/utils/format.py +1 -1
- sknetwork/utils/membership.py +2 -2
- sknetwork/visualization/__init__.py +2 -2
- sknetwork/visualization/dendrograms.py +55 -7
- sknetwork/visualization/graphs.py +261 -44
- sknetwork/visualization/tests/test_dendrograms.py +9 -9
- sknetwork/visualization/tests/test_graphs.py +63 -57
- sknetwork/embedding/louvain_hierarchy.py +0 -142
- sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
- {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/LICENSE +0 -0
- {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/top_level.txt +0 -0
|
@@ -20,6 +20,10 @@ class TestParser(unittest.TestCase):
|
|
|
20
20
|
self.assertTrue((adjacency.indices == [2, 3, 0, 1, 5, 4]).all())
|
|
21
21
|
self.assertTrue((adjacency.indptr == [0, 1, 2, 3, 4, 5, 6]).all())
|
|
22
22
|
self.assertTrue((adjacency.data == [1, 1, 1, 1, 1, 1]).all())
|
|
23
|
+
adjacency = parse.from_csv(self.stub_data_1, shape=(7, 7))
|
|
24
|
+
self.assertTrue((adjacency.shape == (7, 7)))
|
|
25
|
+
biadjacency = parse.from_csv(self.stub_data_1, bipartite=True, shape=(7, 9))
|
|
26
|
+
self.assertTrue((biadjacency.shape == (7, 9)))
|
|
23
27
|
remove(self.stub_data_1)
|
|
24
28
|
|
|
25
29
|
def test_labeled_weighted(self):
|
|
@@ -33,13 +37,14 @@ class TestParser(unittest.TestCase):
|
|
|
33
37
|
self.assertTrue((adjacency.indptr == [0, 1, 2, 3, 4, 5, 6]).all())
|
|
34
38
|
self.assertTrue((adjacency.data == [1, 6, 5, 6, 1, 5]).all())
|
|
35
39
|
self.assertTrue((names == [' b', ' d', ' e', 'a', 'c', 'f']).all())
|
|
40
|
+
|
|
36
41
|
remove(self.stub_data_2)
|
|
37
42
|
|
|
38
43
|
def test_auto_reindex(self):
|
|
39
44
|
self.stub_data_4 = 'stub_4.txt'
|
|
40
45
|
with open(self.stub_data_4, "w") as text_file:
|
|
41
46
|
text_file.write('%stub\n14 31\n42 50\n0 12')
|
|
42
|
-
graph = parse.from_csv(self.stub_data_4)
|
|
47
|
+
graph = parse.from_csv(self.stub_data_4, reindex=True)
|
|
43
48
|
adjacency = graph.adjacency
|
|
44
49
|
names = graph.names
|
|
45
50
|
self.assertTrue((adjacency.data == [1, 1, 1, 1, 1, 1]).all())
|
|
@@ -164,23 +169,15 @@ class TestParser(unittest.TestCase):
|
|
|
164
169
|
self.stub_data_9 = 'stub_9.txt'
|
|
165
170
|
with open(self.stub_data_9, "w") as text_file:
|
|
166
171
|
text_file.write('#stub\n1 3\n4 5\n0 3')
|
|
167
|
-
graph = parse.from_csv(self.stub_data_9, bipartite=True)
|
|
172
|
+
graph = parse.from_csv(self.stub_data_9, bipartite=True, reindex=True)
|
|
168
173
|
biadjacency = graph.biadjacency
|
|
169
174
|
self.assertTrue((biadjacency.indices == [0, 0, 1]).all())
|
|
170
175
|
self.assertTrue((biadjacency.indptr == [0, 1, 2, 3]).all())
|
|
171
176
|
self.assertTrue((biadjacency.data == [1, 1, 1]).all())
|
|
177
|
+
biadjacency = parse.from_csv(self.stub_data_9, bipartite=True)
|
|
178
|
+
self.assertTrue(biadjacency.shape == (5, 6))
|
|
172
179
|
remove(self.stub_data_9)
|
|
173
180
|
|
|
174
|
-
def test_csv_adjacency_bipartite(self):
|
|
175
|
-
self.stub_data_10 = 'stub_10.txt'
|
|
176
|
-
with open(self.stub_data_10, "w") as text_file:
|
|
177
|
-
text_file.write('%stub\n3\n3\n0')
|
|
178
|
-
graph = parse.from_csv(self.stub_data_10, bipartite=True)
|
|
179
|
-
biadjacency = graph.biadjacency
|
|
180
|
-
self.assertTupleEqual(biadjacency.shape, (3, 2))
|
|
181
|
-
self.assertTrue((biadjacency.data == [1, 1, 1]).all())
|
|
182
|
-
remove(self.stub_data_10)
|
|
183
|
-
|
|
184
181
|
def test_edge_list(self):
|
|
185
182
|
edge_list_1 = [('Alice', 'Bob'), ('Carol', 'Alice')]
|
|
186
183
|
graph = parse.from_edge_list(edge_list_1)
|
sknetwork/embedding/__init__.py
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
from sknetwork.embedding.base import BaseEmbedding
|
|
3
3
|
from sknetwork.embedding.force_atlas import ForceAtlas
|
|
4
4
|
from sknetwork.embedding.louvain_embedding import LouvainEmbedding
|
|
5
|
-
from sknetwork.embedding.louvain_hierarchy import LouvainNE
|
|
6
5
|
from sknetwork.embedding.random_projection import RandomProjection
|
|
7
6
|
from sknetwork.embedding.spectral import Spectral
|
|
8
7
|
from sknetwork.embedding.spring import Spring
|
sknetwork/embedding/base.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
3
|
"""
|
|
4
|
-
Created
|
|
4
|
+
Created in November 2019
|
|
5
5
|
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
6
|
"""
|
|
7
7
|
from abc import ABC
|
|
8
|
-
from typing import Union
|
|
8
|
+
from typing import Optional, Union
|
|
9
9
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
from scipy import sparse
|
|
@@ -26,10 +26,19 @@ class BaseEmbedding(Algorithm, ABC):
|
|
|
26
26
|
embedding_col_ : array, shape = (n_col, n_components)
|
|
27
27
|
Embedding of the columns, for bipartite graphs.
|
|
28
28
|
"""
|
|
29
|
-
|
|
30
29
|
def __init__(self):
|
|
31
30
|
self._init_vars()
|
|
32
31
|
|
|
32
|
+
def transform(self) -> np.ndarray:
|
|
33
|
+
"""Return the embedding.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
embedding : np.ndarray
|
|
38
|
+
Embedding.
|
|
39
|
+
"""
|
|
40
|
+
return self.embedding_
|
|
41
|
+
|
|
33
42
|
def fit_transform(self, *args, **kwargs) -> np.ndarray:
|
|
34
43
|
"""Fit to data and return the embedding. Same parameters as the ``fit`` method.
|
|
35
44
|
|
|
@@ -41,30 +50,22 @@ class BaseEmbedding(Algorithm, ABC):
|
|
|
41
50
|
self.fit(*args, **kwargs)
|
|
42
51
|
return self.embedding_
|
|
43
52
|
|
|
44
|
-
def predict(self,
|
|
45
|
-
"""
|
|
46
|
-
|
|
47
|
-
Each new node is defined by its adjacency row vector.
|
|
53
|
+
def predict(self, columns: bool = False) -> np.ndarray:
|
|
54
|
+
"""Return the embedding of nodes.
|
|
48
55
|
|
|
49
56
|
Parameters
|
|
50
57
|
----------
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
Array of shape (n_col,) (single vector) or (n_vectors, n_col)
|
|
58
|
+
columns : bool
|
|
59
|
+
If ``True``, return the prediction for columns.
|
|
54
60
|
|
|
55
61
|
Returns
|
|
56
62
|
-------
|
|
57
|
-
|
|
63
|
+
embedding_ : np.ndarray
|
|
58
64
|
Embedding of the nodes.
|
|
59
65
|
"""
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
if self.embedding_ is None:
|
|
64
|
-
raise ValueError("This embedding instance is not fitted yet."
|
|
65
|
-
" Call 'fit' with appropriate arguments before using this method.")
|
|
66
|
-
else:
|
|
67
|
-
return self
|
|
66
|
+
if columns:
|
|
67
|
+
return self.embedding_col_
|
|
68
|
+
return self.embedding_
|
|
68
69
|
|
|
69
70
|
@staticmethod
|
|
70
71
|
def _get_regularization(regularization: float, adjacency: sparse.csr_matrix) -> float:
|
|
@@ -77,6 +77,7 @@ class ForceAtlas(BaseEmbedding):
|
|
|
77
77
|
self.tolerance = tolerance
|
|
78
78
|
self.speed = speed
|
|
79
79
|
self.speed_max = speed_max
|
|
80
|
+
self.embedding_ = None
|
|
80
81
|
|
|
81
82
|
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], pos_init: Optional[np.ndarray] = None,
|
|
82
83
|
n_iter: Optional[int] = None) -> 'ForceAtlas':
|
|
@@ -155,7 +156,7 @@ class ForceAtlas(BaseEmbedding):
|
|
|
155
156
|
if tree is None:
|
|
156
157
|
neighbors = np.arange(n)
|
|
157
158
|
else:
|
|
158
|
-
neighbors = tree.query_ball_point(position[i], self.approx_radius)
|
|
159
|
+
neighbors = tree.query_ball_point(position[i], self.approx_radius, p=2)
|
|
159
160
|
|
|
160
161
|
grad: np.ndarray = (position[i] - position[neighbors]) # shape (n_neigh, n_components)
|
|
161
162
|
distance: np.ndarray = np.linalg.norm(grad, axis=1) # shape (n_neigh,)
|
|
@@ -191,7 +192,7 @@ class ForceAtlas(BaseEmbedding):
|
|
|
191
192
|
|
|
192
193
|
position += delta # calculating displacement and final position of points after iteration
|
|
193
194
|
if (swing_vector < 1).all():
|
|
194
|
-
break # if the swing of all nodes is zero, then convergence is reached
|
|
195
|
+
break # if the swing of all nodes is zero, then convergence is reached.
|
|
195
196
|
|
|
196
197
|
self.embedding_ = position
|
|
197
198
|
return self
|
|
@@ -12,7 +12,7 @@ from scipy import sparse
|
|
|
12
12
|
|
|
13
13
|
from sknetwork.clustering.louvain import Louvain
|
|
14
14
|
from sknetwork.embedding.base import BaseEmbedding
|
|
15
|
-
from sknetwork.linalg.
|
|
15
|
+
from sknetwork.linalg.normalizer import normalize
|
|
16
16
|
from sknetwork.utils.check import check_random_state, check_adjacency_vector, check_nonnegative, is_square
|
|
17
17
|
from sknetwork.utils.membership import get_membership
|
|
18
18
|
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
# coding: utf-8
|
|
3
3
|
"""
|
|
4
|
-
Created
|
|
4
|
+
Created in January 2021
|
|
5
5
|
@author: Thomas Bonald <bonald@enst.fr>
|
|
6
6
|
"""
|
|
7
|
+
from abc import ABC
|
|
7
8
|
from typing import Union
|
|
8
9
|
|
|
9
10
|
import numpy as np
|
|
@@ -15,7 +16,7 @@ from sknetwork.utils.check import check_format, check_random_state
|
|
|
15
16
|
from sknetwork.utils.format import get_adjacency
|
|
16
17
|
|
|
17
18
|
|
|
18
|
-
class RandomProjection(BaseEmbedding):
|
|
19
|
+
class RandomProjection(BaseEmbedding, ABC):
|
|
19
20
|
"""Embedding of graphs based the random projection of the adjacency matrix:
|
|
20
21
|
|
|
21
22
|
:math:`(I + \\alpha A +... + (\\alpha A)^K)G`
|
|
@@ -71,6 +72,7 @@ class RandomProjection(BaseEmbedding):
|
|
|
71
72
|
regularization: float = -1, normalized: bool = True, random_state: int = None):
|
|
72
73
|
super(RandomProjection, self).__init__()
|
|
73
74
|
|
|
75
|
+
self.embedding_ = None
|
|
74
76
|
self.n_components = n_components
|
|
75
77
|
self.alpha = alpha
|
|
76
78
|
self.n_iter = n_iter
|
|
@@ -87,7 +89,7 @@ class RandomProjection(BaseEmbedding):
|
|
|
87
89
|
|
|
88
90
|
Parameters
|
|
89
91
|
----------
|
|
90
|
-
input_matrix :
|
|
92
|
+
input_matrix : sparse.csr_matrix, np.ndarray
|
|
91
93
|
Adjacency matrix or biadjacency matrix of the graph.
|
|
92
94
|
force_bipartite : bool (default = ``False``)
|
|
93
95
|
If ``True``, force the input matrix to be considered as a biadjacency matrix.
|
sknetwork/embedding/spectral.py
CHANGED
|
@@ -139,76 +139,3 @@ class Spectral(BaseEmbedding):
|
|
|
139
139
|
self._split_vars(input_matrix.shape)
|
|
140
140
|
|
|
141
141
|
return self
|
|
142
|
-
|
|
143
|
-
def predict(self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray:
|
|
144
|
-
"""Predict the embedding of new nodes, when possible (otherwise return 0).
|
|
145
|
-
|
|
146
|
-
Each new node is defined by its adjacency row vector.
|
|
147
|
-
|
|
148
|
-
Parameters
|
|
149
|
-
----------
|
|
150
|
-
adjacency_vectors :
|
|
151
|
-
Adjacency vectors of nodes.
|
|
152
|
-
Array of shape (n_col,) (single vector) or (n_vectors, n_col)
|
|
153
|
-
|
|
154
|
-
Returns
|
|
155
|
-
-------
|
|
156
|
-
embedding_vectors : np.ndarray
|
|
157
|
-
Embedding of the nodes.
|
|
158
|
-
|
|
159
|
-
Example
|
|
160
|
-
-------
|
|
161
|
-
>>> from sknetwork.embedding import Spectral
|
|
162
|
-
>>> from sknetwork.data import karate_club
|
|
163
|
-
>>> spectral = Spectral(n_components=3)
|
|
164
|
-
>>> adjacency = karate_club()
|
|
165
|
-
>>> adjacency_vector = np.arange(34) < 5
|
|
166
|
-
>>> _ = spectral.fit(adjacency)
|
|
167
|
-
>>> len(spectral.predict(adjacency_vector))
|
|
168
|
-
3
|
|
169
|
-
"""
|
|
170
|
-
self._check_fitted()
|
|
171
|
-
|
|
172
|
-
# input
|
|
173
|
-
if self.bipartite:
|
|
174
|
-
n = len(self.embedding_col_)
|
|
175
|
-
else:
|
|
176
|
-
n = len(self.embedding_)
|
|
177
|
-
adjacency_vectors = check_adjacency_vector(adjacency_vectors, n)
|
|
178
|
-
check_nonnegative(adjacency_vectors)
|
|
179
|
-
|
|
180
|
-
if self.bipartite:
|
|
181
|
-
shape = (adjacency_vectors.shape[0], self.embedding_row_.shape[0])
|
|
182
|
-
adjacency_vectors = sparse.csr_matrix(adjacency_vectors)
|
|
183
|
-
adjacency_vectors = sparse.hstack([sparse.csr_matrix(shape), adjacency_vectors], format='csr')
|
|
184
|
-
eigenvectors = self.eigenvectors_
|
|
185
|
-
eigenvalues = self.eigenvalues_
|
|
186
|
-
|
|
187
|
-
# regularization
|
|
188
|
-
if self.regularized:
|
|
189
|
-
regularization = np.abs(self.regularization)
|
|
190
|
-
else:
|
|
191
|
-
regularization = 0
|
|
192
|
-
normalizer = Normalizer(adjacency_vectors, regularization)
|
|
193
|
-
|
|
194
|
-
# prediction
|
|
195
|
-
embedding_vectors = normalizer.dot(eigenvectors)
|
|
196
|
-
normalized_laplacian = self.decomposition == 'rw'
|
|
197
|
-
if normalized_laplacian:
|
|
198
|
-
norm_vect = eigenvalues.copy()
|
|
199
|
-
norm_vect[norm_vect == 0] = 1
|
|
200
|
-
embedding_vectors /= norm_vect
|
|
201
|
-
else:
|
|
202
|
-
norm_matrix = sparse.csr_matrix(1 - np.outer(normalizer.norm_diag.data, eigenvalues))
|
|
203
|
-
norm_matrix.data = 1 / norm_matrix.data
|
|
204
|
-
embedding_vectors *= norm_matrix.toarray()
|
|
205
|
-
|
|
206
|
-
# normalization
|
|
207
|
-
if self.normalized:
|
|
208
|
-
embedding_vectors = normalize(embedding_vectors, p=2)
|
|
209
|
-
|
|
210
|
-
# shape
|
|
211
|
-
if len(embedding_vectors) == 1:
|
|
212
|
-
embedding_vectors = embedding_vectors.ravel()
|
|
213
|
-
|
|
214
|
-
return embedding_vectors
|
|
@@ -13,7 +13,6 @@ class TestEmbeddings(unittest.TestCase):
|
|
|
13
13
|
def setUp(self):
|
|
14
14
|
"""Algorithms by input types."""
|
|
15
15
|
self.methods = [Spectral(), GSVD(), SVD()]
|
|
16
|
-
self.bimethods = [GSVD(), SVD()]
|
|
17
16
|
|
|
18
17
|
def test_undirected(self):
|
|
19
18
|
adjacency = test_graph()
|
|
@@ -22,44 +21,21 @@ class TestEmbeddings(unittest.TestCase):
|
|
|
22
21
|
method = Spring()
|
|
23
22
|
embedding = method.fit_transform(adjacency)
|
|
24
23
|
self.assertEqual(embedding.shape, (n, 2))
|
|
25
|
-
pred1 = method.predict(adjacency[0])
|
|
26
|
-
pred2 = method.predict(adjacency[0].toarray())
|
|
27
|
-
self.assertEqual(pred1.shape, (2,))
|
|
28
|
-
self.assertAlmostEqual(np.linalg.norm(pred1 - pred2), 0)
|
|
29
24
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
self.assertTupleEqual(pred1.shape, (n, 2))
|
|
33
|
-
self.assertAlmostEqual(np.linalg.norm(pred1 - pred2), 0)
|
|
34
|
-
|
|
35
|
-
def test_bimethods(self):
|
|
25
|
+
embedding = method.transform()
|
|
26
|
+
self.assertEqual(embedding.shape, (n, 2))
|
|
36
27
|
|
|
28
|
+
def test_bipartite(self):
|
|
37
29
|
for adjacency in [test_digraph(), test_bigraph()]:
|
|
38
30
|
n_row, n_col = adjacency.shape
|
|
39
31
|
|
|
40
|
-
for method in self.
|
|
32
|
+
for method in self.methods:
|
|
41
33
|
method.fit(adjacency)
|
|
42
34
|
|
|
43
35
|
self.assertEqual(method.embedding_.shape, (n_row, 2))
|
|
44
36
|
self.assertEqual(method.embedding_row_.shape, (n_row, 2))
|
|
45
37
|
self.assertEqual(method.embedding_col_.shape, (n_col, 2))
|
|
46
38
|
|
|
47
|
-
ref = method.embedding_[0]
|
|
48
|
-
pred1 = method.predict(adjacency[0])
|
|
49
|
-
pred2 = method.predict(adjacency[0].toarray())
|
|
50
|
-
|
|
51
|
-
self.assertEqual(pred1.shape, (2,))
|
|
52
|
-
self.assertAlmostEqual(np.linalg.norm(pred1 - pred2), 0)
|
|
53
|
-
self.assertAlmostEqual(np.linalg.norm(pred1 - ref), 0)
|
|
54
|
-
|
|
55
|
-
ref = method.embedding_
|
|
56
|
-
pred1 = method.predict(adjacency)
|
|
57
|
-
pred2 = method.predict(adjacency.toarray())
|
|
58
|
-
|
|
59
|
-
self.assertTupleEqual(pred1.shape, (n_row, 2))
|
|
60
|
-
self.assertAlmostEqual(np.linalg.norm(pred1 - pred2), 0)
|
|
61
|
-
self.assertAlmostEqual(np.linalg.norm(pred1 - ref), 0)
|
|
62
|
-
|
|
63
39
|
def test_disconnected(self):
|
|
64
40
|
n = 10
|
|
65
41
|
adjacency = np.eye(n)
|
|
@@ -12,22 +12,17 @@ from sknetwork.embedding import LouvainEmbedding
|
|
|
12
12
|
class TestLouvainEmbedding(unittest.TestCase):
|
|
13
13
|
|
|
14
14
|
def test_predict(self):
|
|
15
|
+
adjacency = test_graph()
|
|
15
16
|
louvain = LouvainEmbedding()
|
|
16
17
|
louvain.fit(test_graph())
|
|
17
18
|
self.assertEqual(louvain.embedding_.shape[0], 10)
|
|
18
|
-
louvain.fit(
|
|
19
|
+
louvain.fit(adjacency, force_bipartite=True)
|
|
19
20
|
self.assertEqual(louvain.embedding_.shape[0], 10)
|
|
20
21
|
|
|
21
22
|
for method in ['remove', 'merge', 'keep']:
|
|
22
23
|
louvain = LouvainEmbedding(isolated_nodes=method)
|
|
23
|
-
louvain.
|
|
24
|
-
|
|
25
|
-
self.assertEqual(embedding_vector.shape[0], 1)
|
|
24
|
+
embedding = louvain.fit_transform(adjacency)
|
|
25
|
+
self.assertEqual(embedding.shape[0], adjacency.shape[0])
|
|
26
26
|
|
|
27
|
-
for method in ['remove', 'merge', 'keep']:
|
|
28
|
-
bilouvain = LouvainEmbedding(isolated_nodes=method)
|
|
29
|
-
bilouvain.fit(test_bigraph())
|
|
30
|
-
embedding_vector = bilouvain.predict(np.array([1, 0, 0, 0, 1, 1, 0, 1]))
|
|
31
|
-
self.assertEqual(embedding_vector.shape[0], 1)
|
|
32
27
|
|
|
33
28
|
|
|
@@ -22,24 +22,21 @@ class TestEmbeddings(unittest.TestCase):
|
|
|
22
22
|
if not is_weakly_connected(adjacency):
|
|
23
23
|
weights += 1
|
|
24
24
|
self.assertAlmostEqual(np.linalg.norm(embedding.T.dot(weights)), 0)
|
|
25
|
-
self.assertAlmostEqual(np.linalg.norm(embedding[1:4] - spectral.predict(adjacency[1:4])), 0)
|
|
26
25
|
# Laplacian
|
|
27
26
|
spectral = Spectral(3, decomposition='laplacian', normalized=False)
|
|
28
27
|
embedding = spectral.fit_transform(adjacency)
|
|
29
28
|
self.assertAlmostEqual(np.linalg.norm(embedding.sum(axis=0)), 0)
|
|
30
|
-
self.assertAlmostEqual(np.linalg.norm(embedding[1:4] - spectral.predict(adjacency[1:4])), 0)
|
|
31
29
|
|
|
32
30
|
def test_directed(self):
|
|
33
31
|
for adjacency in [test_digraph(), test_digraph().astype(bool)]:
|
|
34
32
|
# random walk
|
|
35
33
|
spectral = Spectral(3, normalized=False)
|
|
36
34
|
embedding = spectral.fit_transform(adjacency)
|
|
37
|
-
self.assertAlmostEqual(
|
|
35
|
+
self.assertAlmostEqual(embedding.shape[0], adjacency.shape[0])
|
|
38
36
|
# Laplacian
|
|
39
37
|
spectral = Spectral(3, decomposition='laplacian', normalized=False)
|
|
40
|
-
|
|
38
|
+
spectral.fit(adjacency)
|
|
41
39
|
self.assertAlmostEqual(np.linalg.norm(spectral.eigenvectors_.sum(axis=0)), 0)
|
|
42
|
-
self.assertAlmostEqual(np.linalg.norm(embedding[6:8] - spectral.predict(adjacency[6:8])), 0)
|
|
43
40
|
|
|
44
41
|
def test_regularization(self):
|
|
45
42
|
for adjacency in [test_graph(), test_disconnected_graph()]:
|
|
@@ -26,7 +26,7 @@ class TestSVD(unittest.TestCase):
|
|
|
26
26
|
|
|
27
27
|
gsvd = GSVD(n_components=1, regularization=0.1, solver='lanczos')
|
|
28
28
|
gsvd.fit(biadjacency)
|
|
29
|
-
|
|
29
|
+
self.assertEqual(gsvd.embedding_row_.shape, (n_row, 1))
|
|
30
30
|
|
|
31
31
|
pca = PCA(n_components=min_dim, solver='lanczos')
|
|
32
32
|
pca.fit(biadjacency)
|
sknetwork/gnn/base_layer.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
3
|
"""
|
|
4
|
-
Created
|
|
4
|
+
Created in July 2022
|
|
5
5
|
@author: Simon Delarue <sdelarue@enst.fr>
|
|
6
6
|
"""
|
|
7
7
|
from typing import Optional, Union
|
|
@@ -73,10 +73,10 @@ class BaseLayer:
|
|
|
73
73
|
in_channels: int
|
|
74
74
|
Number of input channels.
|
|
75
75
|
"""
|
|
76
|
-
#
|
|
76
|
+
# He initialization
|
|
77
77
|
self.weight = np.random.randn(in_channels, self.out_channels) * np.sqrt(2 / self.out_channels)
|
|
78
78
|
if self.use_bias:
|
|
79
|
-
self.bias = np.zeros((self.out_channels
|
|
79
|
+
self.bias = np.zeros((1, self.out_channels))
|
|
80
80
|
self.weights_initialized = True
|
|
81
81
|
|
|
82
82
|
def forward(self, *args, **kwargs):
|
sknetwork/gnn/gnn_classifier.py
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
Created in April 2022
|
|
5
5
|
@author: Simon Delarue <sdelarue@enst.fr>
|
|
6
6
|
"""
|
|
7
|
-
from typing import Optional, Union
|
|
7
|
+
from typing import Iterable, Optional, Union
|
|
8
8
|
from collections import defaultdict
|
|
9
9
|
|
|
10
10
|
import numpy as np
|
|
@@ -26,35 +26,37 @@ class GNNClassifier(BaseGNN):
|
|
|
26
26
|
|
|
27
27
|
Parameters
|
|
28
28
|
----------
|
|
29
|
-
dims :
|
|
30
|
-
|
|
29
|
+
dims : iterable or int
|
|
30
|
+
Dimension of the output of each layer (in forward direction).
|
|
31
31
|
If an integer, dimension of the output layer (no hidden layer).
|
|
32
32
|
Optional if ``layers`` is specified.
|
|
33
|
-
layer_types :
|
|
33
|
+
layer_types : iterable or str
|
|
34
34
|
Layer types (in forward direction).
|
|
35
|
-
If a string,
|
|
35
|
+
If a string, the same type is used at each layer.
|
|
36
36
|
Can be ``'Conv'``, graph convolutional layer (default) or ``'Sage'`` (GraphSage).
|
|
37
|
-
activations :
|
|
37
|
+
activations : iterable or str
|
|
38
38
|
Activation functions (in forward direction).
|
|
39
|
-
If a string,
|
|
39
|
+
If a string, the same activation function is used at each layer.
|
|
40
40
|
Can be either ``'Identity'``, ``'Relu'``, ``'Sigmoid'`` or ``'Softmax'`` (default = ``'Relu'``).
|
|
41
|
-
use_bias :
|
|
42
|
-
Whether to
|
|
43
|
-
If ``True``, use a bias term at
|
|
44
|
-
normalizations :
|
|
45
|
-
|
|
46
|
-
If a string,
|
|
47
|
-
Can be either
|
|
41
|
+
use_bias : iterable or bool
|
|
42
|
+
Whether to add a bias term at each layer (in forward direction).
|
|
43
|
+
If ``True``, use a bias term at each layer.
|
|
44
|
+
normalizations : iterable or str
|
|
45
|
+
Normalizations of the adjacency matrix for message passing (in forward direction).
|
|
46
|
+
If a string, the same type of normalization is used at each layer.
|
|
47
|
+
Can be either ``'left'`` (left normalization by the degrees), ``'right'`` (right normalization by the degrees),
|
|
48
48
|
``'both'`` (symmetric normalization by the square root of degrees, default) or ``None`` (no normalization).
|
|
49
|
-
self_embeddings :
|
|
50
|
-
Whether to add
|
|
51
|
-
If ``True``, add self-
|
|
52
|
-
sample_sizes :
|
|
53
|
-
|
|
49
|
+
self_embeddings : iterable or str
|
|
50
|
+
Whether to add the embedding to each node for message passing (in forward direction).
|
|
51
|
+
If ``True``, add a self-embedding at each layer.
|
|
52
|
+
sample_sizes : iterable or int
|
|
53
|
+
Sizes of neighborhood sampled for each node (in forward direction).
|
|
54
|
+
If an integer, the same sampling size is used at each layer.
|
|
55
|
+
Used only for ``'Sage'`` layer type.
|
|
54
56
|
loss : str (default = ``'CrossEntropy'``) or BaseLoss
|
|
55
|
-
|
|
56
|
-
layers :
|
|
57
|
-
Custom layers. If used, previous parameters are ignored.
|
|
57
|
+
Name of loss function or custom loss function.
|
|
58
|
+
layers : iterable or None
|
|
59
|
+
Custom layers (in forward directions). If used, previous parameters are ignored.
|
|
58
60
|
optimizer : str or optimizer
|
|
59
61
|
* ``'Adam'``, stochastic gradient-based optimizer (default).
|
|
60
62
|
* ``'GD'``, gradient descent.
|
|
@@ -72,7 +74,7 @@ class GNNClassifier(BaseGNN):
|
|
|
72
74
|
----------
|
|
73
75
|
conv2, ..., conv1: :class:'GCNConv'
|
|
74
76
|
Graph convolutional layers.
|
|
75
|
-
output_ :
|
|
77
|
+
output_ : np.ndarray
|
|
76
78
|
Output of the GNN.
|
|
77
79
|
labels_: np.ndarray
|
|
78
80
|
Predicted node labels.
|
|
@@ -95,11 +97,11 @@ class GNNClassifier(BaseGNN):
|
|
|
95
97
|
0.88
|
|
96
98
|
"""
|
|
97
99
|
|
|
98
|
-
def __init__(self, dims: Optional[Union[int,
|
|
99
|
-
activations: Union[str,
|
|
100
|
-
normalizations: Union[str,
|
|
100
|
+
def __init__(self, dims: Optional[Union[int, Iterable]] = None, layer_types: Union[str, Iterable] = 'Conv',
|
|
101
|
+
activations: Union[str, Iterable] = 'ReLu', use_bias: Union[bool, list] = True,
|
|
102
|
+
normalizations: Union[str, Iterable] = 'both', self_embeddings: Union[bool, Iterable] = True,
|
|
101
103
|
sample_sizes: Union[int, list] = 25, loss: Union[BaseLoss, str] = 'CrossEntropy',
|
|
102
|
-
layers: Optional[
|
|
104
|
+
layers: Optional[Iterable] = None, optimizer: Union[BaseOptimizer, str] = 'Adam',
|
|
103
105
|
learning_rate: float = 0.01, early_stopping: bool = True, patience: int = 10, verbose: bool = False):
|
|
104
106
|
super(GNNClassifier, self).__init__(loss, optimizer, learning_rate, verbose)
|
|
105
107
|
if layers is not None:
|
|
@@ -159,7 +161,7 @@ class GNNClassifier(BaseGNN):
|
|
|
159
161
|
|
|
160
162
|
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], features: Union[sparse.csr_matrix, np.ndarray],
|
|
161
163
|
labels: np.ndarray, n_epochs: int = 100, validation: float = 0, reinit: bool = False,
|
|
162
|
-
random_state: Optional[int] = None
|
|
164
|
+
random_state: Optional[int] = None) -> 'GNNClassifier':
|
|
163
165
|
""" Fit model to data and store trained parameters.
|
|
164
166
|
|
|
165
167
|
Parameters
|
|
@@ -169,8 +171,8 @@ class GNNClassifier(BaseGNN):
|
|
|
169
171
|
features : sparse.csr_matrix, np.ndarray
|
|
170
172
|
Input feature of shape :math:`(n, d)` with :math:`n` the number of nodes in the graph and :math:`d`
|
|
171
173
|
the size of feature space.
|
|
172
|
-
labels :
|
|
173
|
-
Known labels
|
|
174
|
+
labels : dict, np.ndarray
|
|
175
|
+
Known labels. Negative values ignored.
|
|
174
176
|
n_epochs : int (default = 100)
|
|
175
177
|
Number of epochs (iterations over the whole graph).
|
|
176
178
|
validation : float
|
|
@@ -179,18 +181,17 @@ class GNNClassifier(BaseGNN):
|
|
|
179
181
|
If ``True``, reinit the trainable parameters of the GNN (weights and biases).
|
|
180
182
|
random_state : int
|
|
181
183
|
Random seed, used for reproducible results across multiple runs.
|
|
182
|
-
history : bool (default = ``False``)
|
|
183
|
-
If ``True``, save training history.
|
|
184
184
|
"""
|
|
185
185
|
if reinit:
|
|
186
186
|
for layer in self.layers:
|
|
187
187
|
layer.weights_initialized = False
|
|
188
|
+
self.history_ = defaultdict(list)
|
|
188
189
|
|
|
189
190
|
if random_state is not None:
|
|
190
191
|
np.random.seed(random_state)
|
|
191
192
|
|
|
192
|
-
check_format(adjacency)
|
|
193
|
-
check_format(features)
|
|
193
|
+
check_format(adjacency, allow_empty=True)
|
|
194
|
+
check_format(features, allow_empty=True)
|
|
194
195
|
|
|
195
196
|
labels = get_values(adjacency.shape, labels)
|
|
196
197
|
labels = labels.astype(int)
|
|
@@ -199,7 +200,7 @@ class GNNClassifier(BaseGNN):
|
|
|
199
200
|
check_output(self.layers[-1].out_channels, labels)
|
|
200
201
|
|
|
201
202
|
self.train_mask = labels >= 0
|
|
202
|
-
if 0 < validation < 1:
|
|
203
|
+
if self.val_mask is None and 0 < validation < 1:
|
|
203
204
|
mask = np.random.random(size=len(labels)) < validation
|
|
204
205
|
self.val_mask = self.train_mask & mask
|
|
205
206
|
self.train_mask &= ~mask
|
|
@@ -237,12 +238,10 @@ class GNNClassifier(BaseGNN):
|
|
|
237
238
|
self.optimizer.step(self)
|
|
238
239
|
|
|
239
240
|
# Save results
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
self.history_['
|
|
244
|
-
if val_accuracy is not None:
|
|
245
|
-
self.history_['val_accuracy'].append(val_accuracy)
|
|
241
|
+
self.history_['loss'].append(loss_value)
|
|
242
|
+
self.history_['train_accuracy'].append(train_accuracy)
|
|
243
|
+
if val_accuracy is not None:
|
|
244
|
+
self.history_['val_accuracy'].append(val_accuracy)
|
|
246
245
|
|
|
247
246
|
if n_epochs > 10 and epoch % int(n_epochs / 10) == 0:
|
|
248
247
|
if val_accuracy is not None:
|
|
@@ -304,48 +303,3 @@ class GNNClassifier(BaseGNN):
|
|
|
304
303
|
adjacencies.append(adjacency)
|
|
305
304
|
|
|
306
305
|
return adjacencies
|
|
307
|
-
|
|
308
|
-
def predict(self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray] = None,
|
|
309
|
-
feature_vectors: Union[sparse.csr_matrix, np.ndarray] = None) -> np.ndarray:
|
|
310
|
-
"""Predict labels for new nodes. If called without parameters, labels are returned for all nodes.
|
|
311
|
-
|
|
312
|
-
Parameters
|
|
313
|
-
----------
|
|
314
|
-
adjacency_vectors : np.ndarray
|
|
315
|
-
Square adjacency matrix. Array of shape (n, n).
|
|
316
|
-
feature_vectors : np.ndarray
|
|
317
|
-
Features row vectors. Array of shape (n, n_feat). The number of features n_feat must match with the one
|
|
318
|
-
used during training.
|
|
319
|
-
|
|
320
|
-
Returns
|
|
321
|
-
-------
|
|
322
|
-
labels : np.ndarray
|
|
323
|
-
Label of each node of the graph.
|
|
324
|
-
"""
|
|
325
|
-
self._check_fitted()
|
|
326
|
-
|
|
327
|
-
if adjacency_vectors is None and feature_vectors is None:
|
|
328
|
-
return self.labels_
|
|
329
|
-
elif adjacency_vectors is not None and feature_vectors is None:
|
|
330
|
-
raise ValueError('Missing value: feature matrix is missing.')
|
|
331
|
-
elif adjacency_vectors is None:
|
|
332
|
-
adjacency_vectors = sparse.identity(feature_vectors.shape[0], format='csr')
|
|
333
|
-
|
|
334
|
-
check_square(adjacency_vectors)
|
|
335
|
-
check_nonnegative(adjacency_vectors)
|
|
336
|
-
feature_vectors = check_format(feature_vectors)
|
|
337
|
-
|
|
338
|
-
n_row, n_col = adjacency_vectors.shape
|
|
339
|
-
feat_row, feat_col = feature_vectors.shape
|
|
340
|
-
|
|
341
|
-
if n_col != feat_row:
|
|
342
|
-
raise ValueError(f'Dimension mismatch: dim0={n_col} != dim1={feat_row}.')
|
|
343
|
-
elif feat_col != self.layers[0].weight.shape[0]:
|
|
344
|
-
raise ValueError(f'Dimension mismatch: current number of features is {feat_col} whereas GNN has been '
|
|
345
|
-
f'trained with '
|
|
346
|
-
f'{self.layers[0].weight.shape[0]} features.')
|
|
347
|
-
|
|
348
|
-
h = self.forward(adjacency_vectors, feature_vectors)
|
|
349
|
-
labels = self._compute_predictions(h)
|
|
350
|
-
|
|
351
|
-
return labels
|
sknetwork/gnn/layer.py
CHANGED
sknetwork/gnn/loss.py
CHANGED