scikit-network 0.31.0__cp39-cp39-win_amd64.whl → 0.33.0__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (126) hide show
  1. {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/AUTHORS.rst +3 -1
  2. {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/METADATA +27 -5
  3. scikit_network-0.33.0.dist-info/RECORD +228 -0
  4. {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/WHEEL +1 -1
  5. sknetwork/__init__.py +1 -1
  6. sknetwork/classification/base.py +1 -1
  7. sknetwork/classification/base_rank.py +3 -3
  8. sknetwork/classification/diffusion.py +25 -16
  9. sknetwork/classification/knn.py +23 -16
  10. sknetwork/classification/metrics.py +4 -4
  11. sknetwork/classification/pagerank.py +12 -8
  12. sknetwork/classification/propagation.py +25 -17
  13. sknetwork/classification/tests/test_diffusion.py +10 -0
  14. sknetwork/classification/vote.cp39-win_amd64.pyd +0 -0
  15. sknetwork/classification/vote.cpp +14549 -8668
  16. sknetwork/clustering/__init__.py +3 -1
  17. sknetwork/clustering/base.py +1 -1
  18. sknetwork/clustering/kcenters.py +253 -0
  19. sknetwork/clustering/leiden.py +242 -0
  20. sknetwork/clustering/leiden_core.cp39-win_amd64.pyd +0 -0
  21. sknetwork/clustering/leiden_core.cpp +31564 -0
  22. sknetwork/clustering/leiden_core.pyx +124 -0
  23. sknetwork/clustering/louvain.py +118 -83
  24. sknetwork/clustering/louvain_core.cp39-win_amd64.pyd +0 -0
  25. sknetwork/clustering/louvain_core.cpp +21876 -16332
  26. sknetwork/clustering/louvain_core.pyx +86 -94
  27. sknetwork/clustering/postprocess.py +2 -2
  28. sknetwork/clustering/propagation_clustering.py +4 -4
  29. sknetwork/clustering/tests/test_API.py +7 -3
  30. sknetwork/clustering/tests/test_kcenters.py +60 -0
  31. sknetwork/clustering/tests/test_leiden.py +34 -0
  32. sknetwork/clustering/tests/test_louvain.py +2 -3
  33. sknetwork/data/__init__.py +1 -1
  34. sknetwork/data/base.py +7 -2
  35. sknetwork/data/load.py +20 -25
  36. sknetwork/data/models.py +15 -15
  37. sknetwork/data/parse.py +57 -34
  38. sknetwork/data/tests/test_API.py +3 -3
  39. sknetwork/data/tests/test_base.py +2 -2
  40. sknetwork/data/tests/test_parse.py +9 -12
  41. sknetwork/data/tests/test_toy_graphs.py +33 -33
  42. sknetwork/data/toy_graphs.py +35 -43
  43. sknetwork/embedding/__init__.py +0 -1
  44. sknetwork/embedding/base.py +23 -19
  45. sknetwork/embedding/force_atlas.py +3 -2
  46. sknetwork/embedding/louvain_embedding.py +1 -27
  47. sknetwork/embedding/random_projection.py +5 -3
  48. sknetwork/embedding/spectral.py +0 -73
  49. sknetwork/embedding/svd.py +0 -4
  50. sknetwork/embedding/tests/test_API.py +4 -28
  51. sknetwork/embedding/tests/test_louvain_embedding.py +13 -13
  52. sknetwork/embedding/tests/test_spectral.py +2 -5
  53. sknetwork/embedding/tests/test_svd.py +7 -1
  54. sknetwork/gnn/base_layer.py +3 -3
  55. sknetwork/gnn/gnn_classifier.py +41 -87
  56. sknetwork/gnn/layer.py +1 -1
  57. sknetwork/gnn/loss.py +1 -1
  58. sknetwork/gnn/optimizer.py +4 -3
  59. sknetwork/gnn/tests/test_base_layer.py +4 -4
  60. sknetwork/gnn/tests/test_gnn_classifier.py +12 -39
  61. sknetwork/gnn/utils.py +8 -8
  62. sknetwork/hierarchy/base.py +27 -0
  63. sknetwork/hierarchy/louvain_hierarchy.py +55 -47
  64. sknetwork/hierarchy/paris.cp39-win_amd64.pyd +0 -0
  65. sknetwork/hierarchy/paris.cpp +27667 -20915
  66. sknetwork/hierarchy/paris.pyx +11 -10
  67. sknetwork/hierarchy/postprocess.py +16 -16
  68. sknetwork/hierarchy/tests/test_algos.py +5 -0
  69. sknetwork/hierarchy/tests/test_metrics.py +4 -4
  70. sknetwork/linalg/__init__.py +1 -1
  71. sknetwork/linalg/diteration.cp39-win_amd64.pyd +0 -0
  72. sknetwork/linalg/diteration.cpp +13916 -8050
  73. sknetwork/linalg/{normalization.py → normalizer.py} +17 -14
  74. sknetwork/linalg/operators.py +1 -1
  75. sknetwork/linalg/ppr_solver.py +1 -1
  76. sknetwork/linalg/push.cp39-win_amd64.pyd +0 -0
  77. sknetwork/linalg/push.cpp +23187 -16973
  78. sknetwork/linalg/tests/test_normalization.py +3 -7
  79. sknetwork/linalg/tests/test_operators.py +2 -6
  80. sknetwork/linalg/tests/test_ppr.py +1 -1
  81. sknetwork/linkpred/base.py +12 -1
  82. sknetwork/linkpred/nn.py +6 -6
  83. sknetwork/path/distances.py +11 -4
  84. sknetwork/path/shortest_path.py +1 -1
  85. sknetwork/path/tests/test_distances.py +7 -0
  86. sknetwork/path/tests/test_search.py +2 -2
  87. sknetwork/ranking/base.py +11 -6
  88. sknetwork/ranking/betweenness.cp39-win_amd64.pyd +0 -0
  89. sknetwork/ranking/betweenness.cpp +5256 -2190
  90. sknetwork/ranking/pagerank.py +13 -12
  91. sknetwork/ranking/tests/test_API.py +0 -2
  92. sknetwork/ranking/tests/test_betweenness.py +1 -1
  93. sknetwork/ranking/tests/test_pagerank.py +11 -5
  94. sknetwork/regression/base.py +18 -1
  95. sknetwork/regression/diffusion.py +30 -14
  96. sknetwork/regression/tests/test_diffusion.py +8 -0
  97. sknetwork/topology/__init__.py +3 -1
  98. sknetwork/topology/cliques.cp39-win_amd64.pyd +0 -0
  99. sknetwork/topology/cliques.cpp +23528 -16848
  100. sknetwork/topology/core.cp39-win_amd64.pyd +0 -0
  101. sknetwork/topology/core.cpp +22849 -16581
  102. sknetwork/topology/cycles.py +243 -0
  103. sknetwork/topology/minheap.cp39-win_amd64.pyd +0 -0
  104. sknetwork/topology/minheap.cpp +19495 -13469
  105. sknetwork/topology/structure.py +2 -42
  106. sknetwork/topology/tests/test_cycles.py +65 -0
  107. sknetwork/topology/tests/test_structure.py +2 -16
  108. sknetwork/topology/triangles.cp39-win_amd64.pyd +0 -0
  109. sknetwork/topology/triangles.cpp +5283 -1397
  110. sknetwork/topology/triangles.pyx +7 -4
  111. sknetwork/topology/weisfeiler_lehman_core.cp39-win_amd64.pyd +0 -0
  112. sknetwork/topology/weisfeiler_lehman_core.cpp +14781 -8915
  113. sknetwork/utils/__init__.py +1 -1
  114. sknetwork/utils/format.py +1 -1
  115. sknetwork/utils/membership.py +2 -2
  116. sknetwork/utils/values.py +5 -3
  117. sknetwork/visualization/__init__.py +2 -2
  118. sknetwork/visualization/dendrograms.py +55 -7
  119. sknetwork/visualization/graphs.py +261 -44
  120. sknetwork/visualization/tests/test_dendrograms.py +9 -9
  121. sknetwork/visualization/tests/test_graphs.py +63 -57
  122. scikit_network-0.31.0.dist-info/RECORD +0 -221
  123. sknetwork/embedding/louvain_hierarchy.py +0 -142
  124. sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
  125. {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/LICENSE +0 -0
  126. {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/top_level.txt +0 -0
@@ -11,10 +11,10 @@ from typing import Union
11
11
  import numpy as np
12
12
  from scipy import sparse
13
13
 
14
- from sknetwork.data.base import Bunch
14
+ from sknetwork.data.base import Dataset
15
15
 
16
16
 
17
- def house(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
17
+ def house(metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
18
18
  """House graph.
19
19
 
20
20
  * Undirected graph
@@ -46,7 +46,7 @@ def house(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
46
46
  if metadata:
47
47
  x = np.array([0, -1, -1, 1, 1])
48
48
  y = np.array([2, 1, -1, -1, 1])
49
- graph = Bunch()
49
+ graph = Dataset()
50
50
  graph.adjacency = adjacency
51
51
  graph.position = np.vstack((x, y)).T
52
52
  graph.name = 'house'
@@ -55,7 +55,7 @@ def house(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
55
55
  return adjacency
56
56
 
57
57
 
58
- def bow_tie(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
58
+ def bow_tie(metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
59
59
  """Bow tie graph.
60
60
 
61
61
  * Undirected graph
@@ -86,7 +86,7 @@ def bow_tie(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
86
86
  if metadata:
87
87
  x = np.array([0, -1, 1, -1, 1])
88
88
  y = np.array([0, 1, 1, -1, -1])
89
- graph = Bunch()
89
+ graph = Dataset()
90
90
  graph.adjacency = adjacency
91
91
  graph.position = np.vstack((x, y)).T
92
92
  graph.name = 'bow_tie'
@@ -95,7 +95,7 @@ def bow_tie(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
95
95
  return adjacency
96
96
 
97
97
 
98
- def karate_club(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
98
+ def karate_club(metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
99
99
  """Karate club graph.
100
100
 
101
101
  * Undirected graph
@@ -150,7 +150,7 @@ def karate_club(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
150
150
  [-0.33, -0.15, -0.01, -0.28, -0.64, -0.75, -0.76, -0.25, 0.09, 0.23, -0.62, -0.4, -0.53, -0.07,
151
151
  0.55, 0.64, -1., -0.42, 0.6, -0.01, 0.45, -0.34, 0.61, 0.41, 0.14, 0.28, 0.68, 0.21,
152
152
  0.12, 0.54, 0.19, 0.09, 0.38, 0.33])
153
- graph = Bunch()
153
+ graph = Dataset()
154
154
  graph.adjacency = adjacency
155
155
  graph.labels = labels
156
156
  graph.position = np.vstack((x, y)).T
@@ -160,7 +160,7 @@ def karate_club(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
160
160
  return adjacency
161
161
 
162
162
 
163
- def miserables(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
163
+ def miserables(metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
164
164
  """Co-occurrence graph of the characters in the novel Les miserables by Victor Hugo.
165
165
 
166
166
  * Undirected graph
@@ -257,7 +257,7 @@ def miserables(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
257
257
  0.05, 0.12, 0.82, 0.44, 0.06, -0.2, -0.4, -0.28, -0.68, -0.79, -0.4, -0.07, -0.51, -0.17, -0.03,
258
258
  -0.09, -0.14, -0.04, -0.04, -0.07, -0.06, -0.11, -0.06, -0.35, 0.24, 0.19, 0.22, 0.29, -0.2,
259
259
  0.06, 0.14, 0.3, -0.1])
260
- graph = Bunch()
260
+ graph = Dataset()
261
261
  graph.adjacency = adjacency
262
262
  graph.names = np.array(names)
263
263
  graph.position = np.vstack((x, y)).T
@@ -267,7 +267,7 @@ def miserables(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
267
267
  return adjacency
268
268
 
269
269
 
270
- def painters(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
270
+ def painters(metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
271
271
  """Graph of links between some famous painters on Wikipedia.
272
272
 
273
273
  * Directed graph
@@ -312,7 +312,7 @@ def painters(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
312
312
  y = np.array(
313
313
  [0.53, 0.19, -0.71, 0.44, -0.48, -0.65, 0.69, -0.11, 0.01,
314
314
  -1., 0.49, 0.28, 0.06, 0.27])
315
- graph = Bunch()
315
+ graph = Dataset()
316
316
  graph.adjacency = adjacency
317
317
  graph.names = names
318
318
  graph.position = np.stack((x, y)).T
@@ -322,7 +322,7 @@ def painters(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
322
322
  return adjacency
323
323
 
324
324
 
325
- def hourglass(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
325
+ def hourglass(metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
326
326
  """Hourglass graph.
327
327
 
328
328
  * Bipartite graph
@@ -342,14 +342,14 @@ def hourglass(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
342
342
  """
343
343
  biadjacency = sparse.csr_matrix(np.ones((2, 2), dtype=bool))
344
344
  if metadata:
345
- graph = Bunch()
345
+ graph = Dataset()
346
346
  graph.biadjacency = biadjacency
347
347
  return graph
348
348
  else:
349
349
  return biadjacency
350
350
 
351
351
 
352
- def star_wars(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
352
+ def star_wars(metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
353
353
  """Bipartite graph connecting some Star Wars villains to the movies in which they appear.
354
354
 
355
355
  * Bipartite graph
@@ -380,7 +380,7 @@ def star_wars(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
380
380
  if metadata:
381
381
  villains = np.array(['Jabba', 'Greedo', 'Vader', 'Boba'])
382
382
  movies = np.array(['A New Hope', 'The Empire Strikes Back', 'Return Of The Jedi'])
383
- graph = Bunch()
383
+ graph = Dataset()
384
384
  graph.biadjacency = biadjacency
385
385
  graph.names = villains
386
386
  graph.names_row = villains
@@ -391,14 +391,12 @@ def star_wars(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
391
391
  return biadjacency
392
392
 
393
393
 
394
- def movie_actor(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
394
+ def movie_actor(metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
395
395
  """Bipartite graph connecting movies to some actors starring in them.
396
396
 
397
397
  * Bipartite graph
398
- * 31 nodes (15 movies, 16 actors), 42 edges
399
- * 9 labels (rows)
398
+ * 32 nodes (15 movies, 17 actors), 43 edges
400
399
  * Names of movies (rows) and actors (columns)
401
- * Names of movies production company (rows)
402
400
 
403
401
  Parameters
404
402
  ----------
@@ -407,53 +405,47 @@ def movie_actor(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
407
405
 
408
406
  Returns
409
407
  -------
410
- biadjacency or graph : Union[sparse.csr_matrix, Dataset]
411
- Biadjacency matrix or graph with metadata (names).
408
+ biadjacency or dataset : Union[sparse.csr_matrix, Dataset]
409
+ Biadjacency matrix or dataset with metadata (names of movies and actors).
412
410
 
413
411
  Example
414
412
  -------
415
413
  >>> from sknetwork.data import movie_actor
416
414
  >>> biadjacency = movie_actor()
417
415
  >>> biadjacency.shape
418
- (15, 16)
416
+ (15, 17)
419
417
  """
420
418
  row = np.array(
421
419
  [0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6,
422
- 6, 6, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11,
420
+ 6, 6, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11,
423
421
  12, 12, 12, 13, 13, 14, 14])
424
422
  col = np.array(
425
423
  [0, 1, 2, 1, 2, 3, 3, 4, 5, 8, 4, 6, 0, 6, 4, 7, 4,
426
- 7, 8, 3, 8, 9, 10, 11, 12, 15, 0, 11, 12, 9, 10, 13, 5, 9, 13,
427
- 1, 9, 15, 12, 14, 11, 14])
428
- biadjacency = sparse.csr_matrix((np.ones(len(row), dtype=bool), (row, col)), shape=(15, 16))
424
+ 7, 8, 3, 8, 9, 10, 11, 12, 15, 0, 11, 12, 9, 10, 13, 16, 5, 9, 13,
425
+ 1, 9, 15, 12, 14, 14, 16])
426
+ biadjacency = sparse.csr_matrix((np.ones(len(row), dtype=bool), (row, col)), shape=(15, 17))
429
427
 
430
428
  if metadata:
431
429
  movies = np.array(
432
430
  ['Inception', 'The Dark Knight Rises', 'The Big Short', 'Drive', 'The Great Gatsby', 'La La Land',
433
431
  'Crazy Stupid Love', 'Vice', 'The Grand Budapest Hotel', 'Aviator', '007 Spectre', 'Inglourious Basterds',
434
- 'Midnight In Paris', 'Murder on the Orient Express', 'Fantastic Beasts 2'])
432
+ 'Midnight In Paris', 'Murder on the Orient Express', "Pirates of the Caribbean: At World's End"])
435
433
  actors = np.array(
436
434
  ['Leonardo DiCaprio', 'Marion Cotillard', 'Joseph Gordon Lewitt', 'Christian Bale', 'Ryan Gosling',
437
435
  'Brad Pitt', 'Carey Mulligan', 'Emma Stone', 'Steve Carell', 'Lea Seydoux', 'Ralph Fiennes', 'Jude Law',
438
- 'Willem Dafoe', 'Christophe Waltz', 'Johnny Depp', 'Owen Wilson'])
439
- graph = Bunch()
440
- graph.biadjacency = biadjacency
441
- graph.names = movies
442
- graph.names_row = movies
443
- graph.names_col = actors
444
- graph.labels = np.array([0, 0, 1, 2, 3, 2, 4, 1, 5, 0, 6, 5, 7, 8, 0])
445
- graph.labels_name = np.array(['Warner Bros', 'Plan B Entertainment', 'Marc Platt Productions', 'Bazmark Films',
446
- 'Carousel Productions', 'Babelsberg Studios', 'MGM', 'Gravier Productions',
447
- 'Genre Films'])
448
- graph.labels_row = graph.labels
449
- graph.labels_row_name = graph.labels_name
450
- graph.name = 'movie_actor'
451
- return graph
436
+ 'Willem Dafoe', 'Christophe Waltz', 'Johnny Depp', 'Owen Wilson', 'Naomie Harris'])
437
+ dataset = Dataset()
438
+ dataset.biadjacency = biadjacency
439
+ dataset.names = movies
440
+ dataset.names_row = movies
441
+ dataset.names_col = actors
442
+ dataset.name = 'movie_actor'
443
+ return dataset
452
444
  else:
453
445
  return biadjacency
454
446
 
455
447
 
456
- def art_philo_science(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
448
+ def art_philo_science(metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
457
449
  """Wikipedia links between 30 articles (10 artists, 10 philosophers, 10 scientists).
458
450
 
459
451
  * Directed graph
@@ -605,7 +597,7 @@ def art_philo_science(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]
605
597
  words = np.array(
606
598
  ['contribution', 'theory', 'invention', 'time', 'modern',
607
599
  'century', 'study', 'logic', 'school', 'author', 'compose'])
608
- graph = Bunch()
600
+ graph = Dataset()
609
601
  graph.adjacency = adjacency
610
602
  graph.names = names
611
603
  graph.position = position
@@ -2,7 +2,6 @@
2
2
  from sknetwork.embedding.base import BaseEmbedding
3
3
  from sknetwork.embedding.force_atlas import ForceAtlas
4
4
  from sknetwork.embedding.louvain_embedding import LouvainEmbedding
5
- from sknetwork.embedding.louvain_hierarchy import LouvainNE
6
5
  from sknetwork.embedding.random_projection import RandomProjection
7
6
  from sknetwork.embedding.spectral import Spectral
8
7
  from sknetwork.embedding.spring import Spring
@@ -1,11 +1,11 @@
1
1
  #!/usr/bin/env python3
2
2
  # -*- coding: utf-8 -*-
3
3
  """
4
- Created on Nov, 2019
4
+ Created in November 2019
5
5
  @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
6
  """
7
7
  from abc import ABC
8
- from typing import Union
8
+ from typing import Optional, Union
9
9
 
10
10
  import numpy as np
11
11
  from scipy import sparse
@@ -26,10 +26,19 @@ class BaseEmbedding(Algorithm, ABC):
26
26
  embedding_col_ : array, shape = (n_col, n_components)
27
27
  Embedding of the columns, for bipartite graphs.
28
28
  """
29
-
30
29
  def __init__(self):
31
30
  self._init_vars()
32
31
 
32
+ def transform(self) -> np.ndarray:
33
+ """Return the embedding.
34
+
35
+ Returns
36
+ -------
37
+ embedding : np.ndarray
38
+ Embedding.
39
+ """
40
+ return self.embedding_
41
+
33
42
  def fit_transform(self, *args, **kwargs) -> np.ndarray:
34
43
  """Fit to data and return the embedding. Same parameters as the ``fit`` method.
35
44
 
@@ -41,30 +50,22 @@ class BaseEmbedding(Algorithm, ABC):
41
50
  self.fit(*args, **kwargs)
42
51
  return self.embedding_
43
52
 
44
- def predict(self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray:
45
- """Predict the embedding of new nodes.
46
-
47
- Each new node is defined by its adjacency row vector.
53
+ def predict(self, columns: bool = False) -> np.ndarray:
54
+ """Return the embedding of nodes.
48
55
 
49
56
  Parameters
50
57
  ----------
51
- adjacency_vectors :
52
- Adjacency vectors of nodes.
53
- Array of shape (n_col,) (single vector) or (n_vectors, n_col)
58
+ columns : bool
59
+ If ``True``, return the prediction for columns.
54
60
 
55
61
  Returns
56
62
  -------
57
- embedding_vectors : np.ndarray
63
+ embedding_ : np.ndarray
58
64
  Embedding of the nodes.
59
65
  """
60
- raise NotImplementedError
61
-
62
- def _check_fitted(self):
63
- if self.embedding_ is None:
64
- raise ValueError("This embedding instance is not fitted yet."
65
- " Call 'fit' with appropriate arguments before using this method.")
66
- else:
67
- return self
66
+ if columns:
67
+ return self.embedding_col_
68
+ return self.embedding_
68
69
 
69
70
  @staticmethod
70
71
  def _get_regularization(regularization: float, adjacency: sparse.csr_matrix) -> float:
@@ -81,6 +82,9 @@ class BaseEmbedding(Algorithm, ABC):
81
82
  self.embedding_row_ = None
82
83
  self.embedding_col_ = None
83
84
 
85
+ def _check_fitted(self):
86
+ return self.embedding_ is not None
87
+
84
88
  def _split_vars(self, shape):
85
89
  """Split labels_ into labels_row_ and labels_col_"""
86
90
  n_row = shape[0]
@@ -77,6 +77,7 @@ class ForceAtlas(BaseEmbedding):
77
77
  self.tolerance = tolerance
78
78
  self.speed = speed
79
79
  self.speed_max = speed_max
80
+ self.embedding_ = None
80
81
 
81
82
  def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], pos_init: Optional[np.ndarray] = None,
82
83
  n_iter: Optional[int] = None) -> 'ForceAtlas':
@@ -155,7 +156,7 @@ class ForceAtlas(BaseEmbedding):
155
156
  if tree is None:
156
157
  neighbors = np.arange(n)
157
158
  else:
158
- neighbors = tree.query_ball_point(position[i], self.approx_radius)
159
+ neighbors = tree.query_ball_point(position[i], self.approx_radius, p=2)
159
160
 
160
161
  grad: np.ndarray = (position[i] - position[neighbors]) # shape (n_neigh, n_components)
161
162
  distance: np.ndarray = np.linalg.norm(grad, axis=1) # shape (n_neigh,)
@@ -191,7 +192,7 @@ class ForceAtlas(BaseEmbedding):
191
192
 
192
193
  position += delta # calculating displacement and final position of points after iteration
193
194
  if (swing_vector < 1).all():
194
- break # if the swing of all nodes is zero, then convergence is reached and we break.
195
+ break # if the swing of all nodes is zero, then convergence is reached.
195
196
 
196
197
  self.embedding_ = position
197
198
  return self
@@ -12,7 +12,7 @@ from scipy import sparse
12
12
 
13
13
  from sknetwork.clustering.louvain import Louvain
14
14
  from sknetwork.embedding.base import BaseEmbedding
15
- from sknetwork.linalg.normalization import normalize
15
+ from sknetwork.linalg.normalizer import normalize
16
16
  from sknetwork.utils.check import check_random_state, check_adjacency_vector, check_nonnegative, is_square
17
17
  from sknetwork.utils.membership import get_membership
18
18
 
@@ -146,29 +146,3 @@ class LouvainEmbedding(BaseEmbedding):
146
146
  self.embedding_col_ = embedding_col.toarray()
147
147
 
148
148
  return self
149
-
150
- def predict(self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray:
151
- """Predict the embedding of new rows, defined by their adjacency vectors.
152
-
153
- Parameters
154
- ----------
155
- adjacency_vectors :
156
- Adjacency row vectors.
157
- Array of shape (n_col,) (single vector) or (n_vectors, n_col)
158
-
159
- Returns
160
- -------
161
- embedding_vectors : np.ndarray
162
- Embedding of the nodes.
163
- """
164
- self._check_fitted()
165
- if self.embedding_col_ is not None:
166
- n = len(self.embedding_col_)
167
- else:
168
- n = len(self.embedding_)
169
-
170
- adjacency_vectors = check_adjacency_vector(adjacency_vectors, n)
171
- check_nonnegative(adjacency_vectors)
172
- membership = get_membership(self.labels_)
173
-
174
- return normalize(adjacency_vectors).dot(membership)
@@ -1,9 +1,10 @@
1
1
  #!/usr/bin/env python3
2
2
  # coding: utf-8
3
3
  """
4
- Created on January, 15 2021
4
+ Created in January 2021
5
5
  @author: Thomas Bonald <bonald@enst.fr>
6
6
  """
7
+ from abc import ABC
7
8
  from typing import Union
8
9
 
9
10
  import numpy as np
@@ -15,7 +16,7 @@ from sknetwork.utils.check import check_format, check_random_state
15
16
  from sknetwork.utils.format import get_adjacency
16
17
 
17
18
 
18
- class RandomProjection(BaseEmbedding):
19
+ class RandomProjection(BaseEmbedding, ABC):
19
20
  """Embedding of graphs based the random projection of the adjacency matrix:
20
21
 
21
22
  :math:`(I + \\alpha A +... + (\\alpha A)^K)G`
@@ -71,6 +72,7 @@ class RandomProjection(BaseEmbedding):
71
72
  regularization: float = -1, normalized: bool = True, random_state: int = None):
72
73
  super(RandomProjection, self).__init__()
73
74
 
75
+ self.embedding_ = None
74
76
  self.n_components = n_components
75
77
  self.alpha = alpha
76
78
  self.n_iter = n_iter
@@ -87,7 +89,7 @@ class RandomProjection(BaseEmbedding):
87
89
 
88
90
  Parameters
89
91
  ----------
90
- input_matrix :
92
+ input_matrix : sparse.csr_matrix, np.ndarray
91
93
  Adjacency matrix or biadjacency matrix of the graph.
92
94
  force_bipartite : bool (default = ``False``)
93
95
  If ``True``, force the input matrix to be considered as a biadjacency matrix.
@@ -139,76 +139,3 @@ class Spectral(BaseEmbedding):
139
139
  self._split_vars(input_matrix.shape)
140
140
 
141
141
  return self
142
-
143
- def predict(self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray:
144
- """Predict the embedding of new nodes, when possible (otherwise return 0).
145
-
146
- Each new node is defined by its adjacency row vector.
147
-
148
- Parameters
149
- ----------
150
- adjacency_vectors :
151
- Adjacency vectors of nodes.
152
- Array of shape (n_col,) (single vector) or (n_vectors, n_col)
153
-
154
- Returns
155
- -------
156
- embedding_vectors : np.ndarray
157
- Embedding of the nodes.
158
-
159
- Example
160
- -------
161
- >>> from sknetwork.embedding import Spectral
162
- >>> from sknetwork.data import karate_club
163
- >>> spectral = Spectral(n_components=3)
164
- >>> adjacency = karate_club()
165
- >>> adjacency_vector = np.arange(34) < 5
166
- >>> _ = spectral.fit(adjacency)
167
- >>> len(spectral.predict(adjacency_vector))
168
- 3
169
- """
170
- self._check_fitted()
171
-
172
- # input
173
- if self.bipartite:
174
- n = len(self.embedding_col_)
175
- else:
176
- n = len(self.embedding_)
177
- adjacency_vectors = check_adjacency_vector(adjacency_vectors, n)
178
- check_nonnegative(adjacency_vectors)
179
-
180
- if self.bipartite:
181
- shape = (adjacency_vectors.shape[0], self.embedding_row_.shape[0])
182
- adjacency_vectors = sparse.csr_matrix(adjacency_vectors)
183
- adjacency_vectors = sparse.hstack([sparse.csr_matrix(shape), adjacency_vectors], format='csr')
184
- eigenvectors = self.eigenvectors_
185
- eigenvalues = self.eigenvalues_
186
-
187
- # regularization
188
- if self.regularized:
189
- regularization = np.abs(self.regularization)
190
- else:
191
- regularization = 0
192
- normalizer = Normalizer(adjacency_vectors, regularization)
193
-
194
- # prediction
195
- embedding_vectors = normalizer.dot(eigenvectors)
196
- normalized_laplacian = self.decomposition == 'rw'
197
- if normalized_laplacian:
198
- norm_vect = eigenvalues.copy()
199
- norm_vect[norm_vect == 0] = 1
200
- embedding_vectors /= norm_vect
201
- else:
202
- norm_matrix = sparse.csr_matrix(1 - np.outer(normalizer.norm_diag.data, eigenvalues))
203
- norm_matrix.data = 1 / norm_matrix.data
204
- embedding_vectors *= norm_matrix.toarray()
205
-
206
- # normalization
207
- if self.normalized:
208
- embedding_vectors = normalize(embedding_vectors, p=2)
209
-
210
- # shape
211
- if len(embedding_vectors) == 1:
212
- embedding_vectors = embedding_vectors.ravel()
213
-
214
- return embedding_vectors
@@ -277,10 +277,6 @@ class SVD(GSVD):
277
277
  factor_singular=factor_singular, factor_row=0., factor_col=0., normalized=normalized,
278
278
  solver=solver)
279
279
 
280
- @staticmethod
281
- def _check_adj_vector(adjacency_vectors: np.ndarray):
282
- return
283
-
284
280
 
285
281
  class PCA(SVD):
286
282
  """Graph embedding by Principal Component Analysis of the adjacency or biadjacency matrix.
@@ -13,7 +13,6 @@ class TestEmbeddings(unittest.TestCase):
13
13
  def setUp(self):
14
14
  """Algorithms by input types."""
15
15
  self.methods = [Spectral(), GSVD(), SVD()]
16
- self.bimethods = [GSVD(), SVD()]
17
16
 
18
17
  def test_undirected(self):
19
18
  adjacency = test_graph()
@@ -22,44 +21,21 @@ class TestEmbeddings(unittest.TestCase):
22
21
  method = Spring()
23
22
  embedding = method.fit_transform(adjacency)
24
23
  self.assertEqual(embedding.shape, (n, 2))
25
- pred1 = method.predict(adjacency[0])
26
- pred2 = method.predict(adjacency[0].toarray())
27
- self.assertEqual(pred1.shape, (2,))
28
- self.assertAlmostEqual(np.linalg.norm(pred1 - pred2), 0)
29
24
 
30
- pred1 = method.predict(adjacency)
31
- pred2 = method.predict(adjacency.toarray())
32
- self.assertTupleEqual(pred1.shape, (n, 2))
33
- self.assertAlmostEqual(np.linalg.norm(pred1 - pred2), 0)
34
-
35
- def test_bimethods(self):
25
+ embedding = method.transform()
26
+ self.assertEqual(embedding.shape, (n, 2))
36
27
 
28
+ def test_bipartite(self):
37
29
  for adjacency in [test_digraph(), test_bigraph()]:
38
30
  n_row, n_col = adjacency.shape
39
31
 
40
- for method in self.bimethods:
32
+ for method in self.methods:
41
33
  method.fit(adjacency)
42
34
 
43
35
  self.assertEqual(method.embedding_.shape, (n_row, 2))
44
36
  self.assertEqual(method.embedding_row_.shape, (n_row, 2))
45
37
  self.assertEqual(method.embedding_col_.shape, (n_col, 2))
46
38
 
47
- ref = method.embedding_[0]
48
- pred1 = method.predict(adjacency[0])
49
- pred2 = method.predict(adjacency[0].toarray())
50
-
51
- self.assertEqual(pred1.shape, (2,))
52
- self.assertAlmostEqual(np.linalg.norm(pred1 - pred2), 0)
53
- self.assertAlmostEqual(np.linalg.norm(pred1 - ref), 0)
54
-
55
- ref = method.embedding_
56
- pred1 = method.predict(adjacency)
57
- pred2 = method.predict(adjacency.toarray())
58
-
59
- self.assertTupleEqual(pred1.shape, (n_row, 2))
60
- self.assertAlmostEqual(np.linalg.norm(pred1 - pred2), 0)
61
- self.assertAlmostEqual(np.linalg.norm(pred1 - ref), 0)
62
-
63
39
  def test_disconnected(self):
64
40
  n = 10
65
41
  adjacency = np.eye(n)
@@ -12,22 +12,22 @@ from sknetwork.embedding import LouvainEmbedding
12
12
  class TestLouvainEmbedding(unittest.TestCase):
13
13
 
14
14
  def test_predict(self):
15
+ adjacency = test_graph()
16
+ adjacency_vector = np.zeros(10, dtype=int)
17
+ adjacency_vector[:5] = 1
15
18
  louvain = LouvainEmbedding()
16
- louvain.fit(test_graph())
19
+ louvain.fit(adjacency)
17
20
  self.assertEqual(louvain.embedding_.shape[0], 10)
18
- louvain.fit(test_graph(), force_bipartite=True)
21
+ louvain.fit(adjacency, force_bipartite=True)
19
22
  self.assertEqual(louvain.embedding_.shape[0], 10)
20
23
 
21
- for method in ['remove', 'merge', 'keep']:
22
- louvain = LouvainEmbedding(isolated_nodes=method)
23
- louvain.fit(test_graph())
24
- embedding_vector = louvain.predict(np.array([1, 0, 0, 0, 1, 1, 0, 0, 0, 1]))
25
- self.assertEqual(embedding_vector.shape[0], 1)
24
+ # bipartite
25
+ biadjacency = test_bigraph()
26
+ louvain.fit(biadjacency)
27
+ self.assertEqual(louvain.embedding_row_.shape[0], 6)
28
+ self.assertEqual(louvain.embedding_col_.shape[0], 8)
26
29
 
27
30
  for method in ['remove', 'merge', 'keep']:
28
- bilouvain = LouvainEmbedding(isolated_nodes=method)
29
- bilouvain.fit(test_bigraph())
30
- embedding_vector = bilouvain.predict(np.array([1, 0, 0, 0, 1, 1, 0, 1]))
31
- self.assertEqual(embedding_vector.shape[0], 1)
32
-
33
-
31
+ louvain = LouvainEmbedding(isolated_nodes=method)
32
+ embedding = louvain.fit_transform(adjacency)
33
+ self.assertEqual(embedding.shape[0], adjacency.shape[0])
@@ -22,24 +22,21 @@ class TestEmbeddings(unittest.TestCase):
22
22
  if not is_weakly_connected(adjacency):
23
23
  weights += 1
24
24
  self.assertAlmostEqual(np.linalg.norm(embedding.T.dot(weights)), 0)
25
- self.assertAlmostEqual(np.linalg.norm(embedding[1:4] - spectral.predict(adjacency[1:4])), 0)
26
25
  # Laplacian
27
26
  spectral = Spectral(3, decomposition='laplacian', normalized=False)
28
27
  embedding = spectral.fit_transform(adjacency)
29
28
  self.assertAlmostEqual(np.linalg.norm(embedding.sum(axis=0)), 0)
30
- self.assertAlmostEqual(np.linalg.norm(embedding[1:4] - spectral.predict(adjacency[1:4])), 0)
31
29
 
32
30
  def test_directed(self):
33
31
  for adjacency in [test_digraph(), test_digraph().astype(bool)]:
34
32
  # random walk
35
33
  spectral = Spectral(3, normalized=False)
36
34
  embedding = spectral.fit_transform(adjacency)
37
- self.assertAlmostEqual(np.linalg.norm(embedding[6:8] - spectral.predict(adjacency[6:8])), 0)
35
+ self.assertAlmostEqual(embedding.shape[0], adjacency.shape[0])
38
36
  # Laplacian
39
37
  spectral = Spectral(3, decomposition='laplacian', normalized=False)
40
- embedding = spectral.fit_transform(adjacency)
38
+ spectral.fit(adjacency)
41
39
  self.assertAlmostEqual(np.linalg.norm(spectral.eigenvectors_.sum(axis=0)), 0)
42
- self.assertAlmostEqual(np.linalg.norm(embedding[6:8] - spectral.predict(adjacency[6:8])), 0)
43
40
 
44
41
  def test_regularization(self):
45
42
  for adjacency in [test_graph(), test_disconnected_graph()]:
@@ -24,13 +24,19 @@ class TestSVD(unittest.TestCase):
24
24
  self.assertEqual(gsvd.embedding_row_.shape, (n_row, min_dim))
25
25
  self.assertEqual(gsvd.embedding_col_.shape, (n_col, min_dim))
26
26
 
27
+ embedding = gsvd.predict(np.array([0, 1, 1]))
28
+ self.assertEqual(embedding.shape, (min_dim,))
29
+
27
30
  gsvd = GSVD(n_components=1, regularization=0.1, solver='lanczos')
28
31
  gsvd.fit(biadjacency)
29
- gsvd.predict(np.random.rand(n_col))
32
+ self.assertEqual(gsvd.embedding_row_.shape, (n_row, 1))
30
33
 
31
34
  pca = PCA(n_components=min_dim, solver='lanczos')
32
35
  pca.fit(biadjacency)
33
36
  self.assertEqual(pca.embedding_row_.shape, (n_row, min_dim))
37
+ pca = PCA(n_components=min_dim, solver=LanczosSVD())
38
+ pca.fit(biadjacency)
39
+ self.assertEqual(pca.embedding_row_.shape, (n_row, min_dim))
34
40
 
35
41
  svd = SVD(n_components=min_dim, solver=LanczosSVD())
36
42
  svd.fit(biadjacency)