scikit-network 0.31.0__cp310-cp310-win_amd64.whl → 0.32.1__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (114) hide show
  1. {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/AUTHORS.rst +3 -0
  2. {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/METADATA +19 -3
  3. {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/RECORD +112 -105
  4. {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/WHEEL +1 -1
  5. sknetwork/__init__.py +1 -1
  6. sknetwork/classification/base.py +1 -1
  7. sknetwork/classification/base_rank.py +3 -3
  8. sknetwork/classification/diffusion.py +21 -13
  9. sknetwork/classification/knn.py +19 -13
  10. sknetwork/classification/metrics.py +1 -1
  11. sknetwork/classification/pagerank.py +12 -8
  12. sknetwork/classification/propagation.py +22 -15
  13. sknetwork/classification/tests/test_diffusion.py +10 -0
  14. sknetwork/classification/vote.cp310-win_amd64.pyd +0 -0
  15. sknetwork/classification/vote.cpp +14549 -8668
  16. sknetwork/clustering/__init__.py +3 -1
  17. sknetwork/clustering/base.py +1 -1
  18. sknetwork/clustering/kcenters.py +253 -0
  19. sknetwork/clustering/leiden.py +241 -0
  20. sknetwork/clustering/leiden_core.cp310-win_amd64.pyd +0 -0
  21. sknetwork/clustering/leiden_core.cpp +31564 -0
  22. sknetwork/clustering/leiden_core.pyx +124 -0
  23. sknetwork/clustering/louvain.py +118 -83
  24. sknetwork/clustering/louvain_core.cp310-win_amd64.pyd +0 -0
  25. sknetwork/clustering/louvain_core.cpp +21876 -16332
  26. sknetwork/clustering/louvain_core.pyx +86 -94
  27. sknetwork/clustering/postprocess.py +2 -2
  28. sknetwork/clustering/propagation_clustering.py +4 -4
  29. sknetwork/clustering/tests/test_API.py +7 -3
  30. sknetwork/clustering/tests/test_kcenters.py +92 -0
  31. sknetwork/clustering/tests/test_leiden.py +34 -0
  32. sknetwork/clustering/tests/test_louvain.py +2 -3
  33. sknetwork/data/load.py +2 -4
  34. sknetwork/data/parse.py +41 -20
  35. sknetwork/data/tests/test_parse.py +9 -12
  36. sknetwork/embedding/__init__.py +0 -1
  37. sknetwork/embedding/base.py +20 -19
  38. sknetwork/embedding/force_atlas.py +3 -2
  39. sknetwork/embedding/louvain_embedding.py +1 -1
  40. sknetwork/embedding/random_projection.py +5 -3
  41. sknetwork/embedding/spectral.py +0 -73
  42. sknetwork/embedding/tests/test_API.py +4 -28
  43. sknetwork/embedding/tests/test_louvain_embedding.py +4 -9
  44. sknetwork/embedding/tests/test_spectral.py +2 -5
  45. sknetwork/embedding/tests/test_svd.py +1 -1
  46. sknetwork/gnn/base_layer.py +3 -3
  47. sknetwork/gnn/gnn_classifier.py +40 -86
  48. sknetwork/gnn/layer.py +1 -1
  49. sknetwork/gnn/loss.py +1 -1
  50. sknetwork/gnn/optimizer.py +4 -3
  51. sknetwork/gnn/tests/test_base_layer.py +4 -4
  52. sknetwork/gnn/tests/test_gnn_classifier.py +12 -39
  53. sknetwork/gnn/utils.py +8 -8
  54. sknetwork/hierarchy/base.py +27 -0
  55. sknetwork/hierarchy/louvain_hierarchy.py +45 -41
  56. sknetwork/hierarchy/paris.cp310-win_amd64.pyd +0 -0
  57. sknetwork/hierarchy/paris.cpp +27521 -20771
  58. sknetwork/hierarchy/paris.pyx +7 -7
  59. sknetwork/hierarchy/postprocess.py +16 -16
  60. sknetwork/hierarchy/tests/test_algos.py +5 -0
  61. sknetwork/linalg/__init__.py +1 -1
  62. sknetwork/linalg/diteration.cp310-win_amd64.pyd +0 -0
  63. sknetwork/linalg/diteration.cpp +13916 -8050
  64. sknetwork/linalg/{normalization.py → normalizer.py} +17 -14
  65. sknetwork/linalg/operators.py +1 -1
  66. sknetwork/linalg/ppr_solver.py +1 -1
  67. sknetwork/linalg/push.cp310-win_amd64.pyd +0 -0
  68. sknetwork/linalg/push.cpp +23187 -16973
  69. sknetwork/linalg/tests/test_normalization.py +3 -7
  70. sknetwork/linalg/tests/test_operators.py +2 -6
  71. sknetwork/linalg/tests/test_ppr.py +1 -1
  72. sknetwork/linkpred/base.py +12 -1
  73. sknetwork/linkpred/nn.py +6 -6
  74. sknetwork/path/distances.py +11 -4
  75. sknetwork/path/shortest_path.py +1 -1
  76. sknetwork/path/tests/test_distances.py +7 -0
  77. sknetwork/path/tests/test_search.py +2 -2
  78. sknetwork/ranking/base.py +11 -6
  79. sknetwork/ranking/betweenness.cp310-win_amd64.pyd +0 -0
  80. sknetwork/ranking/betweenness.cpp +5256 -2190
  81. sknetwork/ranking/pagerank.py +13 -12
  82. sknetwork/ranking/tests/test_API.py +0 -2
  83. sknetwork/ranking/tests/test_betweenness.py +1 -1
  84. sknetwork/ranking/tests/test_pagerank.py +11 -5
  85. sknetwork/regression/base.py +18 -1
  86. sknetwork/regression/diffusion.py +24 -10
  87. sknetwork/regression/tests/test_diffusion.py +8 -0
  88. sknetwork/topology/__init__.py +3 -1
  89. sknetwork/topology/cliques.cp310-win_amd64.pyd +0 -0
  90. sknetwork/topology/cliques.cpp +23528 -16848
  91. sknetwork/topology/core.cp310-win_amd64.pyd +0 -0
  92. sknetwork/topology/core.cpp +22849 -16581
  93. sknetwork/topology/cycles.py +243 -0
  94. sknetwork/topology/minheap.cp310-win_amd64.pyd +0 -0
  95. sknetwork/topology/minheap.cpp +19495 -13469
  96. sknetwork/topology/structure.py +2 -42
  97. sknetwork/topology/tests/test_cycles.py +65 -0
  98. sknetwork/topology/tests/test_structure.py +2 -16
  99. sknetwork/topology/triangles.cp310-win_amd64.pyd +0 -0
  100. sknetwork/topology/triangles.cpp +5283 -1397
  101. sknetwork/topology/triangles.pyx +7 -4
  102. sknetwork/topology/weisfeiler_lehman_core.cp310-win_amd64.pyd +0 -0
  103. sknetwork/topology/weisfeiler_lehman_core.cpp +14781 -8915
  104. sknetwork/utils/format.py +1 -1
  105. sknetwork/utils/membership.py +2 -2
  106. sknetwork/visualization/__init__.py +2 -2
  107. sknetwork/visualization/dendrograms.py +55 -7
  108. sknetwork/visualization/graphs.py +261 -44
  109. sknetwork/visualization/tests/test_dendrograms.py +9 -9
  110. sknetwork/visualization/tests/test_graphs.py +63 -57
  111. sknetwork/embedding/louvain_hierarchy.py +0 -142
  112. sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
  113. {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/LICENSE +0 -0
  114. {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/top_level.txt +0 -0
@@ -20,6 +20,10 @@ class TestParser(unittest.TestCase):
20
20
  self.assertTrue((adjacency.indices == [2, 3, 0, 1, 5, 4]).all())
21
21
  self.assertTrue((adjacency.indptr == [0, 1, 2, 3, 4, 5, 6]).all())
22
22
  self.assertTrue((adjacency.data == [1, 1, 1, 1, 1, 1]).all())
23
+ adjacency = parse.from_csv(self.stub_data_1, shape=(7, 7))
24
+ self.assertTrue((adjacency.shape == (7, 7)))
25
+ biadjacency = parse.from_csv(self.stub_data_1, bipartite=True, shape=(7, 9))
26
+ self.assertTrue((biadjacency.shape == (7, 9)))
23
27
  remove(self.stub_data_1)
24
28
 
25
29
  def test_labeled_weighted(self):
@@ -33,13 +37,14 @@ class TestParser(unittest.TestCase):
33
37
  self.assertTrue((adjacency.indptr == [0, 1, 2, 3, 4, 5, 6]).all())
34
38
  self.assertTrue((adjacency.data == [1, 6, 5, 6, 1, 5]).all())
35
39
  self.assertTrue((names == [' b', ' d', ' e', 'a', 'c', 'f']).all())
40
+
36
41
  remove(self.stub_data_2)
37
42
 
38
43
  def test_auto_reindex(self):
39
44
  self.stub_data_4 = 'stub_4.txt'
40
45
  with open(self.stub_data_4, "w") as text_file:
41
46
  text_file.write('%stub\n14 31\n42 50\n0 12')
42
- graph = parse.from_csv(self.stub_data_4)
47
+ graph = parse.from_csv(self.stub_data_4, reindex=True)
43
48
  adjacency = graph.adjacency
44
49
  names = graph.names
45
50
  self.assertTrue((adjacency.data == [1, 1, 1, 1, 1, 1]).all())
@@ -164,23 +169,15 @@ class TestParser(unittest.TestCase):
164
169
  self.stub_data_9 = 'stub_9.txt'
165
170
  with open(self.stub_data_9, "w") as text_file:
166
171
  text_file.write('#stub\n1 3\n4 5\n0 3')
167
- graph = parse.from_csv(self.stub_data_9, bipartite=True)
172
+ graph = parse.from_csv(self.stub_data_9, bipartite=True, reindex=True)
168
173
  biadjacency = graph.biadjacency
169
174
  self.assertTrue((biadjacency.indices == [0, 0, 1]).all())
170
175
  self.assertTrue((biadjacency.indptr == [0, 1, 2, 3]).all())
171
176
  self.assertTrue((biadjacency.data == [1, 1, 1]).all())
177
+ biadjacency = parse.from_csv(self.stub_data_9, bipartite=True)
178
+ self.assertTrue(biadjacency.shape == (5, 6))
172
179
  remove(self.stub_data_9)
173
180
 
174
- def test_csv_adjacency_bipartite(self):
175
- self.stub_data_10 = 'stub_10.txt'
176
- with open(self.stub_data_10, "w") as text_file:
177
- text_file.write('%stub\n3\n3\n0')
178
- graph = parse.from_csv(self.stub_data_10, bipartite=True)
179
- biadjacency = graph.biadjacency
180
- self.assertTupleEqual(biadjacency.shape, (3, 2))
181
- self.assertTrue((biadjacency.data == [1, 1, 1]).all())
182
- remove(self.stub_data_10)
183
-
184
181
  def test_edge_list(self):
185
182
  edge_list_1 = [('Alice', 'Bob'), ('Carol', 'Alice')]
186
183
  graph = parse.from_edge_list(edge_list_1)
@@ -2,7 +2,6 @@
2
2
  from sknetwork.embedding.base import BaseEmbedding
3
3
  from sknetwork.embedding.force_atlas import ForceAtlas
4
4
  from sknetwork.embedding.louvain_embedding import LouvainEmbedding
5
- from sknetwork.embedding.louvain_hierarchy import LouvainNE
6
5
  from sknetwork.embedding.random_projection import RandomProjection
7
6
  from sknetwork.embedding.spectral import Spectral
8
7
  from sknetwork.embedding.spring import Spring
@@ -1,11 +1,11 @@
1
1
  #!/usr/bin/env python3
2
2
  # -*- coding: utf-8 -*-
3
3
  """
4
- Created on Nov, 2019
4
+ Created in November 2019
5
5
  @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
6
  """
7
7
  from abc import ABC
8
- from typing import Union
8
+ from typing import Optional, Union
9
9
 
10
10
  import numpy as np
11
11
  from scipy import sparse
@@ -26,10 +26,19 @@ class BaseEmbedding(Algorithm, ABC):
26
26
  embedding_col_ : array, shape = (n_col, n_components)
27
27
  Embedding of the columns, for bipartite graphs.
28
28
  """
29
-
30
29
  def __init__(self):
31
30
  self._init_vars()
32
31
 
32
+ def transform(self) -> np.ndarray:
33
+ """Return the embedding.
34
+
35
+ Returns
36
+ -------
37
+ embedding : np.ndarray
38
+ Embedding.
39
+ """
40
+ return self.embedding_
41
+
33
42
  def fit_transform(self, *args, **kwargs) -> np.ndarray:
34
43
  """Fit to data and return the embedding. Same parameters as the ``fit`` method.
35
44
 
@@ -41,30 +50,22 @@ class BaseEmbedding(Algorithm, ABC):
41
50
  self.fit(*args, **kwargs)
42
51
  return self.embedding_
43
52
 
44
- def predict(self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray:
45
- """Predict the embedding of new nodes.
46
-
47
- Each new node is defined by its adjacency row vector.
53
+ def predict(self, columns: bool = False) -> np.ndarray:
54
+ """Return the embedding of nodes.
48
55
 
49
56
  Parameters
50
57
  ----------
51
- adjacency_vectors :
52
- Adjacency vectors of nodes.
53
- Array of shape (n_col,) (single vector) or (n_vectors, n_col)
58
+ columns : bool
59
+ If ``True``, return the prediction for columns.
54
60
 
55
61
  Returns
56
62
  -------
57
- embedding_vectors : np.ndarray
63
+ embedding_ : np.ndarray
58
64
  Embedding of the nodes.
59
65
  """
60
- raise NotImplementedError
61
-
62
- def _check_fitted(self):
63
- if self.embedding_ is None:
64
- raise ValueError("This embedding instance is not fitted yet."
65
- " Call 'fit' with appropriate arguments before using this method.")
66
- else:
67
- return self
66
+ if columns:
67
+ return self.embedding_col_
68
+ return self.embedding_
68
69
 
69
70
  @staticmethod
70
71
  def _get_regularization(regularization: float, adjacency: sparse.csr_matrix) -> float:
@@ -77,6 +77,7 @@ class ForceAtlas(BaseEmbedding):
77
77
  self.tolerance = tolerance
78
78
  self.speed = speed
79
79
  self.speed_max = speed_max
80
+ self.embedding_ = None
80
81
 
81
82
  def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], pos_init: Optional[np.ndarray] = None,
82
83
  n_iter: Optional[int] = None) -> 'ForceAtlas':
@@ -155,7 +156,7 @@ class ForceAtlas(BaseEmbedding):
155
156
  if tree is None:
156
157
  neighbors = np.arange(n)
157
158
  else:
158
- neighbors = tree.query_ball_point(position[i], self.approx_radius)
159
+ neighbors = tree.query_ball_point(position[i], self.approx_radius, p=2)
159
160
 
160
161
  grad: np.ndarray = (position[i] - position[neighbors]) # shape (n_neigh, n_components)
161
162
  distance: np.ndarray = np.linalg.norm(grad, axis=1) # shape (n_neigh,)
@@ -191,7 +192,7 @@ class ForceAtlas(BaseEmbedding):
191
192
 
192
193
  position += delta # calculating displacement and final position of points after iteration
193
194
  if (swing_vector < 1).all():
194
- break # if the swing of all nodes is zero, then convergence is reached and we break.
195
+ break # if the swing of all nodes is zero, then convergence is reached.
195
196
 
196
197
  self.embedding_ = position
197
198
  return self
@@ -12,7 +12,7 @@ from scipy import sparse
12
12
 
13
13
  from sknetwork.clustering.louvain import Louvain
14
14
  from sknetwork.embedding.base import BaseEmbedding
15
- from sknetwork.linalg.normalization import normalize
15
+ from sknetwork.linalg.normalizer import normalize
16
16
  from sknetwork.utils.check import check_random_state, check_adjacency_vector, check_nonnegative, is_square
17
17
  from sknetwork.utils.membership import get_membership
18
18
 
@@ -1,9 +1,10 @@
1
1
  #!/usr/bin/env python3
2
2
  # coding: utf-8
3
3
  """
4
- Created on January, 15 2021
4
+ Created in January 2021
5
5
  @author: Thomas Bonald <bonald@enst.fr>
6
6
  """
7
+ from abc import ABC
7
8
  from typing import Union
8
9
 
9
10
  import numpy as np
@@ -15,7 +16,7 @@ from sknetwork.utils.check import check_format, check_random_state
15
16
  from sknetwork.utils.format import get_adjacency
16
17
 
17
18
 
18
- class RandomProjection(BaseEmbedding):
19
+ class RandomProjection(BaseEmbedding, ABC):
19
20
  """Embedding of graphs based the random projection of the adjacency matrix:
20
21
 
21
22
  :math:`(I + \\alpha A +... + (\\alpha A)^K)G`
@@ -71,6 +72,7 @@ class RandomProjection(BaseEmbedding):
71
72
  regularization: float = -1, normalized: bool = True, random_state: int = None):
72
73
  super(RandomProjection, self).__init__()
73
74
 
75
+ self.embedding_ = None
74
76
  self.n_components = n_components
75
77
  self.alpha = alpha
76
78
  self.n_iter = n_iter
@@ -87,7 +89,7 @@ class RandomProjection(BaseEmbedding):
87
89
 
88
90
  Parameters
89
91
  ----------
90
- input_matrix :
92
+ input_matrix : sparse.csr_matrix, np.ndarray
91
93
  Adjacency matrix or biadjacency matrix of the graph.
92
94
  force_bipartite : bool (default = ``False``)
93
95
  If ``True``, force the input matrix to be considered as a biadjacency matrix.
@@ -139,76 +139,3 @@ class Spectral(BaseEmbedding):
139
139
  self._split_vars(input_matrix.shape)
140
140
 
141
141
  return self
142
-
143
- def predict(self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray:
144
- """Predict the embedding of new nodes, when possible (otherwise return 0).
145
-
146
- Each new node is defined by its adjacency row vector.
147
-
148
- Parameters
149
- ----------
150
- adjacency_vectors :
151
- Adjacency vectors of nodes.
152
- Array of shape (n_col,) (single vector) or (n_vectors, n_col)
153
-
154
- Returns
155
- -------
156
- embedding_vectors : np.ndarray
157
- Embedding of the nodes.
158
-
159
- Example
160
- -------
161
- >>> from sknetwork.embedding import Spectral
162
- >>> from sknetwork.data import karate_club
163
- >>> spectral = Spectral(n_components=3)
164
- >>> adjacency = karate_club()
165
- >>> adjacency_vector = np.arange(34) < 5
166
- >>> _ = spectral.fit(adjacency)
167
- >>> len(spectral.predict(adjacency_vector))
168
- 3
169
- """
170
- self._check_fitted()
171
-
172
- # input
173
- if self.bipartite:
174
- n = len(self.embedding_col_)
175
- else:
176
- n = len(self.embedding_)
177
- adjacency_vectors = check_adjacency_vector(adjacency_vectors, n)
178
- check_nonnegative(adjacency_vectors)
179
-
180
- if self.bipartite:
181
- shape = (adjacency_vectors.shape[0], self.embedding_row_.shape[0])
182
- adjacency_vectors = sparse.csr_matrix(adjacency_vectors)
183
- adjacency_vectors = sparse.hstack([sparse.csr_matrix(shape), adjacency_vectors], format='csr')
184
- eigenvectors = self.eigenvectors_
185
- eigenvalues = self.eigenvalues_
186
-
187
- # regularization
188
- if self.regularized:
189
- regularization = np.abs(self.regularization)
190
- else:
191
- regularization = 0
192
- normalizer = Normalizer(adjacency_vectors, regularization)
193
-
194
- # prediction
195
- embedding_vectors = normalizer.dot(eigenvectors)
196
- normalized_laplacian = self.decomposition == 'rw'
197
- if normalized_laplacian:
198
- norm_vect = eigenvalues.copy()
199
- norm_vect[norm_vect == 0] = 1
200
- embedding_vectors /= norm_vect
201
- else:
202
- norm_matrix = sparse.csr_matrix(1 - np.outer(normalizer.norm_diag.data, eigenvalues))
203
- norm_matrix.data = 1 / norm_matrix.data
204
- embedding_vectors *= norm_matrix.toarray()
205
-
206
- # normalization
207
- if self.normalized:
208
- embedding_vectors = normalize(embedding_vectors, p=2)
209
-
210
- # shape
211
- if len(embedding_vectors) == 1:
212
- embedding_vectors = embedding_vectors.ravel()
213
-
214
- return embedding_vectors
@@ -13,7 +13,6 @@ class TestEmbeddings(unittest.TestCase):
13
13
  def setUp(self):
14
14
  """Algorithms by input types."""
15
15
  self.methods = [Spectral(), GSVD(), SVD()]
16
- self.bimethods = [GSVD(), SVD()]
17
16
 
18
17
  def test_undirected(self):
19
18
  adjacency = test_graph()
@@ -22,44 +21,21 @@ class TestEmbeddings(unittest.TestCase):
22
21
  method = Spring()
23
22
  embedding = method.fit_transform(adjacency)
24
23
  self.assertEqual(embedding.shape, (n, 2))
25
- pred1 = method.predict(adjacency[0])
26
- pred2 = method.predict(adjacency[0].toarray())
27
- self.assertEqual(pred1.shape, (2,))
28
- self.assertAlmostEqual(np.linalg.norm(pred1 - pred2), 0)
29
24
 
30
- pred1 = method.predict(adjacency)
31
- pred2 = method.predict(adjacency.toarray())
32
- self.assertTupleEqual(pred1.shape, (n, 2))
33
- self.assertAlmostEqual(np.linalg.norm(pred1 - pred2), 0)
34
-
35
- def test_bimethods(self):
25
+ embedding = method.transform()
26
+ self.assertEqual(embedding.shape, (n, 2))
36
27
 
28
+ def test_bipartite(self):
37
29
  for adjacency in [test_digraph(), test_bigraph()]:
38
30
  n_row, n_col = adjacency.shape
39
31
 
40
- for method in self.bimethods:
32
+ for method in self.methods:
41
33
  method.fit(adjacency)
42
34
 
43
35
  self.assertEqual(method.embedding_.shape, (n_row, 2))
44
36
  self.assertEqual(method.embedding_row_.shape, (n_row, 2))
45
37
  self.assertEqual(method.embedding_col_.shape, (n_col, 2))
46
38
 
47
- ref = method.embedding_[0]
48
- pred1 = method.predict(adjacency[0])
49
- pred2 = method.predict(adjacency[0].toarray())
50
-
51
- self.assertEqual(pred1.shape, (2,))
52
- self.assertAlmostEqual(np.linalg.norm(pred1 - pred2), 0)
53
- self.assertAlmostEqual(np.linalg.norm(pred1 - ref), 0)
54
-
55
- ref = method.embedding_
56
- pred1 = method.predict(adjacency)
57
- pred2 = method.predict(adjacency.toarray())
58
-
59
- self.assertTupleEqual(pred1.shape, (n_row, 2))
60
- self.assertAlmostEqual(np.linalg.norm(pred1 - pred2), 0)
61
- self.assertAlmostEqual(np.linalg.norm(pred1 - ref), 0)
62
-
63
39
  def test_disconnected(self):
64
40
  n = 10
65
41
  adjacency = np.eye(n)
@@ -12,22 +12,17 @@ from sknetwork.embedding import LouvainEmbedding
12
12
  class TestLouvainEmbedding(unittest.TestCase):
13
13
 
14
14
  def test_predict(self):
15
+ adjacency = test_graph()
15
16
  louvain = LouvainEmbedding()
16
17
  louvain.fit(test_graph())
17
18
  self.assertEqual(louvain.embedding_.shape[0], 10)
18
- louvain.fit(test_graph(), force_bipartite=True)
19
+ louvain.fit(adjacency, force_bipartite=True)
19
20
  self.assertEqual(louvain.embedding_.shape[0], 10)
20
21
 
21
22
  for method in ['remove', 'merge', 'keep']:
22
23
  louvain = LouvainEmbedding(isolated_nodes=method)
23
- louvain.fit(test_graph())
24
- embedding_vector = louvain.predict(np.array([1, 0, 0, 0, 1, 1, 0, 0, 0, 1]))
25
- self.assertEqual(embedding_vector.shape[0], 1)
24
+ embedding = louvain.fit_transform(adjacency)
25
+ self.assertEqual(embedding.shape[0], adjacency.shape[0])
26
26
 
27
- for method in ['remove', 'merge', 'keep']:
28
- bilouvain = LouvainEmbedding(isolated_nodes=method)
29
- bilouvain.fit(test_bigraph())
30
- embedding_vector = bilouvain.predict(np.array([1, 0, 0, 0, 1, 1, 0, 1]))
31
- self.assertEqual(embedding_vector.shape[0], 1)
32
27
 
33
28
 
@@ -22,24 +22,21 @@ class TestEmbeddings(unittest.TestCase):
22
22
  if not is_weakly_connected(adjacency):
23
23
  weights += 1
24
24
  self.assertAlmostEqual(np.linalg.norm(embedding.T.dot(weights)), 0)
25
- self.assertAlmostEqual(np.linalg.norm(embedding[1:4] - spectral.predict(adjacency[1:4])), 0)
26
25
  # Laplacian
27
26
  spectral = Spectral(3, decomposition='laplacian', normalized=False)
28
27
  embedding = spectral.fit_transform(adjacency)
29
28
  self.assertAlmostEqual(np.linalg.norm(embedding.sum(axis=0)), 0)
30
- self.assertAlmostEqual(np.linalg.norm(embedding[1:4] - spectral.predict(adjacency[1:4])), 0)
31
29
 
32
30
  def test_directed(self):
33
31
  for adjacency in [test_digraph(), test_digraph().astype(bool)]:
34
32
  # random walk
35
33
  spectral = Spectral(3, normalized=False)
36
34
  embedding = spectral.fit_transform(adjacency)
37
- self.assertAlmostEqual(np.linalg.norm(embedding[6:8] - spectral.predict(adjacency[6:8])), 0)
35
+ self.assertAlmostEqual(embedding.shape[0], adjacency.shape[0])
38
36
  # Laplacian
39
37
  spectral = Spectral(3, decomposition='laplacian', normalized=False)
40
- embedding = spectral.fit_transform(adjacency)
38
+ spectral.fit(adjacency)
41
39
  self.assertAlmostEqual(np.linalg.norm(spectral.eigenvectors_.sum(axis=0)), 0)
42
- self.assertAlmostEqual(np.linalg.norm(embedding[6:8] - spectral.predict(adjacency[6:8])), 0)
43
40
 
44
41
  def test_regularization(self):
45
42
  for adjacency in [test_graph(), test_disconnected_graph()]:
@@ -26,7 +26,7 @@ class TestSVD(unittest.TestCase):
26
26
 
27
27
  gsvd = GSVD(n_components=1, regularization=0.1, solver='lanczos')
28
28
  gsvd.fit(biadjacency)
29
- gsvd.predict(np.random.rand(n_col))
29
+ self.assertEqual(gsvd.embedding_row_.shape, (n_row, 1))
30
30
 
31
31
  pca = PCA(n_components=min_dim, solver='lanczos')
32
32
  pca.fit(biadjacency)
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  # -*- coding: utf-8 -*-
3
3
  """
4
- Created on July 2022
4
+ Created in July 2022
5
5
  @author: Simon Delarue <sdelarue@enst.fr>
6
6
  """
7
7
  from typing import Optional, Union
@@ -73,10 +73,10 @@ class BaseLayer:
73
73
  in_channels: int
74
74
  Number of input channels.
75
75
  """
76
- # Trainable parameters with He initialization
76
+ # He initialization
77
77
  self.weight = np.random.randn(in_channels, self.out_channels) * np.sqrt(2 / self.out_channels)
78
78
  if self.use_bias:
79
- self.bias = np.zeros((self.out_channels, 1)).T
79
+ self.bias = np.zeros((1, self.out_channels))
80
80
  self.weights_initialized = True
81
81
 
82
82
  def forward(self, *args, **kwargs):
@@ -4,7 +4,7 @@
4
4
  Created in April 2022
5
5
  @author: Simon Delarue <sdelarue@enst.fr>
6
6
  """
7
- from typing import Optional, Union
7
+ from typing import Iterable, Optional, Union
8
8
  from collections import defaultdict
9
9
 
10
10
  import numpy as np
@@ -26,35 +26,37 @@ class GNNClassifier(BaseGNN):
26
26
 
27
27
  Parameters
28
28
  ----------
29
- dims : list or int
30
- Dimensions of the output of each layer (in forward direction).
29
+ dims : iterable or int
30
+ Dimension of the output of each layer (in forward direction).
31
31
  If an integer, dimension of the output layer (no hidden layer).
32
32
  Optional if ``layers`` is specified.
33
- layer_types : list or str
33
+ layer_types : iterable or str
34
34
  Layer types (in forward direction).
35
- If a string, use the same type of layer for all layers.
35
+ If a string, the same type is used at each layer.
36
36
  Can be ``'Conv'``, graph convolutional layer (default) or ``'Sage'`` (GraphSage).
37
- activations : list or str
37
+ activations : iterable or str
38
38
  Activation functions (in forward direction).
39
- If a string, use the same activation function for all layers.
39
+ If a string, the same activation function is used at each layer.
40
40
  Can be either ``'Identity'``, ``'Relu'``, ``'Sigmoid'`` or ``'Softmax'`` (default = ``'Relu'``).
41
- use_bias : list or bool
42
- Whether to use a bias term at each layer.
43
- If ``True``, use a bias term at all layers.
44
- normalizations : list or str
45
- Normalization of the adjacency matrix for message passing.
46
- If a string, use the same normalization for all layers.
47
- Can be either `'left'`` (left normalization by the degrees), ``'right'`` (right normalization by the degrees),
41
+ use_bias : iterable or bool
42
+ Whether to add a bias term at each layer (in forward direction).
43
+ If ``True``, use a bias term at each layer.
44
+ normalizations : iterable or str
45
+ Normalizations of the adjacency matrix for message passing (in forward direction).
46
+ If a string, the same type of normalization is used at each layer.
47
+ Can be either ``'left'`` (left normalization by the degrees), ``'right'`` (right normalization by the degrees),
48
48
  ``'both'`` (symmetric normalization by the square root of degrees, default) or ``None`` (no normalization).
49
- self_embeddings : list or str
50
- Whether to add a self embeddings to each node of the graph for message passing.
51
- If ``True``, add self-embeddings at all layers.
52
- sample_sizes : list or int
53
- Size of neighborhood sampled for each node. Used only for ``'Sage'`` layer type.
49
+ self_embeddings : iterable or str
50
+ Whether to add the embedding to each node for message passing (in forward direction).
51
+ If ``True``, add a self-embedding at each layer.
52
+ sample_sizes : iterable or int
53
+ Sizes of neighborhood sampled for each node (in forward direction).
54
+ If an integer, the same sampling size is used at each layer.
55
+ Used only for ``'Sage'`` layer type.
54
56
  loss : str (default = ``'CrossEntropy'``) or BaseLoss
55
- Loss function name or custom loss.
56
- layers : list or None
57
- Custom layers. If used, previous parameters are ignored.
57
+ Name of loss function or custom loss function.
58
+ layers : iterable or None
59
+ Custom layers (in forward directions). If used, previous parameters are ignored.
58
60
  optimizer : str or optimizer
59
61
  * ``'Adam'``, stochastic gradient-based optimizer (default).
60
62
  * ``'GD'``, gradient descent.
@@ -72,7 +74,7 @@ class GNNClassifier(BaseGNN):
72
74
  ----------
73
75
  conv2, ..., conv1: :class:'GCNConv'
74
76
  Graph convolutional layers.
75
- output_ : array
77
+ output_ : np.ndarray
76
78
  Output of the GNN.
77
79
  labels_: np.ndarray
78
80
  Predicted node labels.
@@ -95,11 +97,11 @@ class GNNClassifier(BaseGNN):
95
97
  0.88
96
98
  """
97
99
 
98
- def __init__(self, dims: Optional[Union[int, list]] = None, layer_types: Union[str, list] = 'Conv',
99
- activations: Union[str, list] = 'ReLu', use_bias: Union[bool, list] = True,
100
- normalizations: Union[str, list] = 'both', self_embeddings: Union[bool, list] = True,
100
+ def __init__(self, dims: Optional[Union[int, Iterable]] = None, layer_types: Union[str, Iterable] = 'Conv',
101
+ activations: Union[str, Iterable] = 'ReLu', use_bias: Union[bool, list] = True,
102
+ normalizations: Union[str, Iterable] = 'both', self_embeddings: Union[bool, Iterable] = True,
101
103
  sample_sizes: Union[int, list] = 25, loss: Union[BaseLoss, str] = 'CrossEntropy',
102
- layers: Optional[list] = None, optimizer: Union[BaseOptimizer, str] = 'Adam',
104
+ layers: Optional[Iterable] = None, optimizer: Union[BaseOptimizer, str] = 'Adam',
103
105
  learning_rate: float = 0.01, early_stopping: bool = True, patience: int = 10, verbose: bool = False):
104
106
  super(GNNClassifier, self).__init__(loss, optimizer, learning_rate, verbose)
105
107
  if layers is not None:
@@ -159,7 +161,7 @@ class GNNClassifier(BaseGNN):
159
161
 
160
162
  def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], features: Union[sparse.csr_matrix, np.ndarray],
161
163
  labels: np.ndarray, n_epochs: int = 100, validation: float = 0, reinit: bool = False,
162
- random_state: Optional[int] = None, history: bool = False) -> 'GNNClassifier':
164
+ random_state: Optional[int] = None) -> 'GNNClassifier':
163
165
  """ Fit model to data and store trained parameters.
164
166
 
165
167
  Parameters
@@ -169,8 +171,8 @@ class GNNClassifier(BaseGNN):
169
171
  features : sparse.csr_matrix, np.ndarray
170
172
  Input feature of shape :math:`(n, d)` with :math:`n` the number of nodes in the graph and :math:`d`
171
173
  the size of feature space.
172
- labels :
173
- Known labels (dictionary or vector of int). Negative values ignored.
174
+ labels : dict, np.ndarray
175
+ Known labels. Negative values ignored.
174
176
  n_epochs : int (default = 100)
175
177
  Number of epochs (iterations over the whole graph).
176
178
  validation : float
@@ -179,18 +181,17 @@ class GNNClassifier(BaseGNN):
179
181
  If ``True``, reinit the trainable parameters of the GNN (weights and biases).
180
182
  random_state : int
181
183
  Random seed, used for reproducible results across multiple runs.
182
- history : bool (default = ``False``)
183
- If ``True``, save training history.
184
184
  """
185
185
  if reinit:
186
186
  for layer in self.layers:
187
187
  layer.weights_initialized = False
188
+ self.history_ = defaultdict(list)
188
189
 
189
190
  if random_state is not None:
190
191
  np.random.seed(random_state)
191
192
 
192
- check_format(adjacency)
193
- check_format(features)
193
+ check_format(adjacency, allow_empty=True)
194
+ check_format(features, allow_empty=True)
194
195
 
195
196
  labels = get_values(adjacency.shape, labels)
196
197
  labels = labels.astype(int)
@@ -199,7 +200,7 @@ class GNNClassifier(BaseGNN):
199
200
  check_output(self.layers[-1].out_channels, labels)
200
201
 
201
202
  self.train_mask = labels >= 0
202
- if 0 < validation < 1:
203
+ if self.val_mask is None and 0 < validation < 1:
203
204
  mask = np.random.random(size=len(labels)) < validation
204
205
  self.val_mask = self.train_mask & mask
205
206
  self.train_mask &= ~mask
@@ -237,12 +238,10 @@ class GNNClassifier(BaseGNN):
237
238
  self.optimizer.step(self)
238
239
 
239
240
  # Save results
240
- if history:
241
- self.history_['embedding'].append(self.layers[-1].embedding)
242
- self.history_['loss'].append(loss_value)
243
- self.history_['train_accuracy'].append(train_accuracy)
244
- if val_accuracy is not None:
245
- self.history_['val_accuracy'].append(val_accuracy)
241
+ self.history_['loss'].append(loss_value)
242
+ self.history_['train_accuracy'].append(train_accuracy)
243
+ if val_accuracy is not None:
244
+ self.history_['val_accuracy'].append(val_accuracy)
246
245
 
247
246
  if n_epochs > 10 and epoch % int(n_epochs / 10) == 0:
248
247
  if val_accuracy is not None:
@@ -304,48 +303,3 @@ class GNNClassifier(BaseGNN):
304
303
  adjacencies.append(adjacency)
305
304
 
306
305
  return adjacencies
307
-
308
- def predict(self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray] = None,
309
- feature_vectors: Union[sparse.csr_matrix, np.ndarray] = None) -> np.ndarray:
310
- """Predict labels for new nodes. If called without parameters, labels are returned for all nodes.
311
-
312
- Parameters
313
- ----------
314
- adjacency_vectors : np.ndarray
315
- Square adjacency matrix. Array of shape (n, n).
316
- feature_vectors : np.ndarray
317
- Features row vectors. Array of shape (n, n_feat). The number of features n_feat must match with the one
318
- used during training.
319
-
320
- Returns
321
- -------
322
- labels : np.ndarray
323
- Label of each node of the graph.
324
- """
325
- self._check_fitted()
326
-
327
- if adjacency_vectors is None and feature_vectors is None:
328
- return self.labels_
329
- elif adjacency_vectors is not None and feature_vectors is None:
330
- raise ValueError('Missing value: feature matrix is missing.')
331
- elif adjacency_vectors is None:
332
- adjacency_vectors = sparse.identity(feature_vectors.shape[0], format='csr')
333
-
334
- check_square(adjacency_vectors)
335
- check_nonnegative(adjacency_vectors)
336
- feature_vectors = check_format(feature_vectors)
337
-
338
- n_row, n_col = adjacency_vectors.shape
339
- feat_row, feat_col = feature_vectors.shape
340
-
341
- if n_col != feat_row:
342
- raise ValueError(f'Dimension mismatch: dim0={n_col} != dim1={feat_row}.')
343
- elif feat_col != self.layers[0].weight.shape[0]:
344
- raise ValueError(f'Dimension mismatch: current number of features is {feat_col} whereas GNN has been '
345
- f'trained with '
346
- f'{self.layers[0].weight.shape[0]} features.')
347
-
348
- h = self.forward(adjacency_vectors, feature_vectors)
349
- labels = self._compute_predictions(h)
350
-
351
- return labels
sknetwork/gnn/layer.py CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  # coding: utf-8
3
3
  """
4
- Created on Thu Apr 21 2022
4
+ Created in April 2022
5
5
  @author: Simon Delarue <sdelarue@enst.fr>
6
6
  """
7
7
  from typing import Optional, Union
sknetwork/gnn/loss.py CHANGED
@@ -53,7 +53,7 @@ class CrossEntropy(BaseLoss, Softmax):
53
53
  probs = Softmax.output(signal)
54
54
 
55
55
  # for numerical stability
56
- eps = 1e-15
56
+ eps = 1e-10
57
57
  probs = np.clip(probs, eps, 1 - eps)
58
58
 
59
59
  value = -np.log(probs[np.arange(n), labels]).sum()