scikit-network 0.31.0__cp39-cp39-win_amd64.whl → 0.33.0__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/AUTHORS.rst +3 -1
- {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/METADATA +27 -5
- scikit_network-0.33.0.dist-info/RECORD +228 -0
- {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/WHEEL +1 -1
- sknetwork/__init__.py +1 -1
- sknetwork/classification/base.py +1 -1
- sknetwork/classification/base_rank.py +3 -3
- sknetwork/classification/diffusion.py +25 -16
- sknetwork/classification/knn.py +23 -16
- sknetwork/classification/metrics.py +4 -4
- sknetwork/classification/pagerank.py +12 -8
- sknetwork/classification/propagation.py +25 -17
- sknetwork/classification/tests/test_diffusion.py +10 -0
- sknetwork/classification/vote.cp39-win_amd64.pyd +0 -0
- sknetwork/classification/vote.cpp +14549 -8668
- sknetwork/clustering/__init__.py +3 -1
- sknetwork/clustering/base.py +1 -1
- sknetwork/clustering/kcenters.py +253 -0
- sknetwork/clustering/leiden.py +242 -0
- sknetwork/clustering/leiden_core.cp39-win_amd64.pyd +0 -0
- sknetwork/clustering/leiden_core.cpp +31564 -0
- sknetwork/clustering/leiden_core.pyx +124 -0
- sknetwork/clustering/louvain.py +118 -83
- sknetwork/clustering/louvain_core.cp39-win_amd64.pyd +0 -0
- sknetwork/clustering/louvain_core.cpp +21876 -16332
- sknetwork/clustering/louvain_core.pyx +86 -94
- sknetwork/clustering/postprocess.py +2 -2
- sknetwork/clustering/propagation_clustering.py +4 -4
- sknetwork/clustering/tests/test_API.py +7 -3
- sknetwork/clustering/tests/test_kcenters.py +60 -0
- sknetwork/clustering/tests/test_leiden.py +34 -0
- sknetwork/clustering/tests/test_louvain.py +2 -3
- sknetwork/data/__init__.py +1 -1
- sknetwork/data/base.py +7 -2
- sknetwork/data/load.py +20 -25
- sknetwork/data/models.py +15 -15
- sknetwork/data/parse.py +57 -34
- sknetwork/data/tests/test_API.py +3 -3
- sknetwork/data/tests/test_base.py +2 -2
- sknetwork/data/tests/test_parse.py +9 -12
- sknetwork/data/tests/test_toy_graphs.py +33 -33
- sknetwork/data/toy_graphs.py +35 -43
- sknetwork/embedding/__init__.py +0 -1
- sknetwork/embedding/base.py +23 -19
- sknetwork/embedding/force_atlas.py +3 -2
- sknetwork/embedding/louvain_embedding.py +1 -27
- sknetwork/embedding/random_projection.py +5 -3
- sknetwork/embedding/spectral.py +0 -73
- sknetwork/embedding/svd.py +0 -4
- sknetwork/embedding/tests/test_API.py +4 -28
- sknetwork/embedding/tests/test_louvain_embedding.py +13 -13
- sknetwork/embedding/tests/test_spectral.py +2 -5
- sknetwork/embedding/tests/test_svd.py +7 -1
- sknetwork/gnn/base_layer.py +3 -3
- sknetwork/gnn/gnn_classifier.py +41 -87
- sknetwork/gnn/layer.py +1 -1
- sknetwork/gnn/loss.py +1 -1
- sknetwork/gnn/optimizer.py +4 -3
- sknetwork/gnn/tests/test_base_layer.py +4 -4
- sknetwork/gnn/tests/test_gnn_classifier.py +12 -39
- sknetwork/gnn/utils.py +8 -8
- sknetwork/hierarchy/base.py +27 -0
- sknetwork/hierarchy/louvain_hierarchy.py +55 -47
- sknetwork/hierarchy/paris.cp39-win_amd64.pyd +0 -0
- sknetwork/hierarchy/paris.cpp +27667 -20915
- sknetwork/hierarchy/paris.pyx +11 -10
- sknetwork/hierarchy/postprocess.py +16 -16
- sknetwork/hierarchy/tests/test_algos.py +5 -0
- sknetwork/hierarchy/tests/test_metrics.py +4 -4
- sknetwork/linalg/__init__.py +1 -1
- sknetwork/linalg/diteration.cp39-win_amd64.pyd +0 -0
- sknetwork/linalg/diteration.cpp +13916 -8050
- sknetwork/linalg/{normalization.py → normalizer.py} +17 -14
- sknetwork/linalg/operators.py +1 -1
- sknetwork/linalg/ppr_solver.py +1 -1
- sknetwork/linalg/push.cp39-win_amd64.pyd +0 -0
- sknetwork/linalg/push.cpp +23187 -16973
- sknetwork/linalg/tests/test_normalization.py +3 -7
- sknetwork/linalg/tests/test_operators.py +2 -6
- sknetwork/linalg/tests/test_ppr.py +1 -1
- sknetwork/linkpred/base.py +12 -1
- sknetwork/linkpred/nn.py +6 -6
- sknetwork/path/distances.py +11 -4
- sknetwork/path/shortest_path.py +1 -1
- sknetwork/path/tests/test_distances.py +7 -0
- sknetwork/path/tests/test_search.py +2 -2
- sknetwork/ranking/base.py +11 -6
- sknetwork/ranking/betweenness.cp39-win_amd64.pyd +0 -0
- sknetwork/ranking/betweenness.cpp +5256 -2190
- sknetwork/ranking/pagerank.py +13 -12
- sknetwork/ranking/tests/test_API.py +0 -2
- sknetwork/ranking/tests/test_betweenness.py +1 -1
- sknetwork/ranking/tests/test_pagerank.py +11 -5
- sknetwork/regression/base.py +18 -1
- sknetwork/regression/diffusion.py +30 -14
- sknetwork/regression/tests/test_diffusion.py +8 -0
- sknetwork/topology/__init__.py +3 -1
- sknetwork/topology/cliques.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/cliques.cpp +23528 -16848
- sknetwork/topology/core.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/core.cpp +22849 -16581
- sknetwork/topology/cycles.py +243 -0
- sknetwork/topology/minheap.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/minheap.cpp +19495 -13469
- sknetwork/topology/structure.py +2 -42
- sknetwork/topology/tests/test_cycles.py +65 -0
- sknetwork/topology/tests/test_structure.py +2 -16
- sknetwork/topology/triangles.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/triangles.cpp +5283 -1397
- sknetwork/topology/triangles.pyx +7 -4
- sknetwork/topology/weisfeiler_lehman_core.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/weisfeiler_lehman_core.cpp +14781 -8915
- sknetwork/utils/__init__.py +1 -1
- sknetwork/utils/format.py +1 -1
- sknetwork/utils/membership.py +2 -2
- sknetwork/utils/values.py +5 -3
- sknetwork/visualization/__init__.py +2 -2
- sknetwork/visualization/dendrograms.py +55 -7
- sknetwork/visualization/graphs.py +261 -44
- sknetwork/visualization/tests/test_dendrograms.py +9 -9
- sknetwork/visualization/tests/test_graphs.py +63 -57
- scikit_network-0.31.0.dist-info/RECORD +0 -221
- sknetwork/embedding/louvain_hierarchy.py +0 -142
- sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
- {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/LICENSE +0 -0
- {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/top_level.txt +0 -0
sknetwork/data/toy_graphs.py
CHANGED
|
@@ -11,10 +11,10 @@ from typing import Union
|
|
|
11
11
|
import numpy as np
|
|
12
12
|
from scipy import sparse
|
|
13
13
|
|
|
14
|
-
from sknetwork.data.base import
|
|
14
|
+
from sknetwork.data.base import Dataset
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
def house(metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
17
|
+
def house(metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
|
|
18
18
|
"""House graph.
|
|
19
19
|
|
|
20
20
|
* Undirected graph
|
|
@@ -46,7 +46,7 @@ def house(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
|
|
|
46
46
|
if metadata:
|
|
47
47
|
x = np.array([0, -1, -1, 1, 1])
|
|
48
48
|
y = np.array([2, 1, -1, -1, 1])
|
|
49
|
-
graph =
|
|
49
|
+
graph = Dataset()
|
|
50
50
|
graph.adjacency = adjacency
|
|
51
51
|
graph.position = np.vstack((x, y)).T
|
|
52
52
|
graph.name = 'house'
|
|
@@ -55,7 +55,7 @@ def house(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
|
|
|
55
55
|
return adjacency
|
|
56
56
|
|
|
57
57
|
|
|
58
|
-
def bow_tie(metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
58
|
+
def bow_tie(metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
|
|
59
59
|
"""Bow tie graph.
|
|
60
60
|
|
|
61
61
|
* Undirected graph
|
|
@@ -86,7 +86,7 @@ def bow_tie(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
|
|
|
86
86
|
if metadata:
|
|
87
87
|
x = np.array([0, -1, 1, -1, 1])
|
|
88
88
|
y = np.array([0, 1, 1, -1, -1])
|
|
89
|
-
graph =
|
|
89
|
+
graph = Dataset()
|
|
90
90
|
graph.adjacency = adjacency
|
|
91
91
|
graph.position = np.vstack((x, y)).T
|
|
92
92
|
graph.name = 'bow_tie'
|
|
@@ -95,7 +95,7 @@ def bow_tie(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
|
|
|
95
95
|
return adjacency
|
|
96
96
|
|
|
97
97
|
|
|
98
|
-
def karate_club(metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
98
|
+
def karate_club(metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
|
|
99
99
|
"""Karate club graph.
|
|
100
100
|
|
|
101
101
|
* Undirected graph
|
|
@@ -150,7 +150,7 @@ def karate_club(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
|
|
|
150
150
|
[-0.33, -0.15, -0.01, -0.28, -0.64, -0.75, -0.76, -0.25, 0.09, 0.23, -0.62, -0.4, -0.53, -0.07,
|
|
151
151
|
0.55, 0.64, -1., -0.42, 0.6, -0.01, 0.45, -0.34, 0.61, 0.41, 0.14, 0.28, 0.68, 0.21,
|
|
152
152
|
0.12, 0.54, 0.19, 0.09, 0.38, 0.33])
|
|
153
|
-
graph =
|
|
153
|
+
graph = Dataset()
|
|
154
154
|
graph.adjacency = adjacency
|
|
155
155
|
graph.labels = labels
|
|
156
156
|
graph.position = np.vstack((x, y)).T
|
|
@@ -160,7 +160,7 @@ def karate_club(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
|
|
|
160
160
|
return adjacency
|
|
161
161
|
|
|
162
162
|
|
|
163
|
-
def miserables(metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
163
|
+
def miserables(metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
|
|
164
164
|
"""Co-occurrence graph of the characters in the novel Les miserables by Victor Hugo.
|
|
165
165
|
|
|
166
166
|
* Undirected graph
|
|
@@ -257,7 +257,7 @@ def miserables(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
|
|
|
257
257
|
0.05, 0.12, 0.82, 0.44, 0.06, -0.2, -0.4, -0.28, -0.68, -0.79, -0.4, -0.07, -0.51, -0.17, -0.03,
|
|
258
258
|
-0.09, -0.14, -0.04, -0.04, -0.07, -0.06, -0.11, -0.06, -0.35, 0.24, 0.19, 0.22, 0.29, -0.2,
|
|
259
259
|
0.06, 0.14, 0.3, -0.1])
|
|
260
|
-
graph =
|
|
260
|
+
graph = Dataset()
|
|
261
261
|
graph.adjacency = adjacency
|
|
262
262
|
graph.names = np.array(names)
|
|
263
263
|
graph.position = np.vstack((x, y)).T
|
|
@@ -267,7 +267,7 @@ def miserables(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
|
|
|
267
267
|
return adjacency
|
|
268
268
|
|
|
269
269
|
|
|
270
|
-
def painters(metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
270
|
+
def painters(metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
|
|
271
271
|
"""Graph of links between some famous painters on Wikipedia.
|
|
272
272
|
|
|
273
273
|
* Directed graph
|
|
@@ -312,7 +312,7 @@ def painters(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
|
|
|
312
312
|
y = np.array(
|
|
313
313
|
[0.53, 0.19, -0.71, 0.44, -0.48, -0.65, 0.69, -0.11, 0.01,
|
|
314
314
|
-1., 0.49, 0.28, 0.06, 0.27])
|
|
315
|
-
graph =
|
|
315
|
+
graph = Dataset()
|
|
316
316
|
graph.adjacency = adjacency
|
|
317
317
|
graph.names = names
|
|
318
318
|
graph.position = np.stack((x, y)).T
|
|
@@ -322,7 +322,7 @@ def painters(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
|
|
|
322
322
|
return adjacency
|
|
323
323
|
|
|
324
324
|
|
|
325
|
-
def hourglass(metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
325
|
+
def hourglass(metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
|
|
326
326
|
"""Hourglass graph.
|
|
327
327
|
|
|
328
328
|
* Bipartite graph
|
|
@@ -342,14 +342,14 @@ def hourglass(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
|
|
|
342
342
|
"""
|
|
343
343
|
biadjacency = sparse.csr_matrix(np.ones((2, 2), dtype=bool))
|
|
344
344
|
if metadata:
|
|
345
|
-
graph =
|
|
345
|
+
graph = Dataset()
|
|
346
346
|
graph.biadjacency = biadjacency
|
|
347
347
|
return graph
|
|
348
348
|
else:
|
|
349
349
|
return biadjacency
|
|
350
350
|
|
|
351
351
|
|
|
352
|
-
def star_wars(metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
352
|
+
def star_wars(metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
|
|
353
353
|
"""Bipartite graph connecting some Star Wars villains to the movies in which they appear.
|
|
354
354
|
|
|
355
355
|
* Bipartite graph
|
|
@@ -380,7 +380,7 @@ def star_wars(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
|
|
|
380
380
|
if metadata:
|
|
381
381
|
villains = np.array(['Jabba', 'Greedo', 'Vader', 'Boba'])
|
|
382
382
|
movies = np.array(['A New Hope', 'The Empire Strikes Back', 'Return Of The Jedi'])
|
|
383
|
-
graph =
|
|
383
|
+
graph = Dataset()
|
|
384
384
|
graph.biadjacency = biadjacency
|
|
385
385
|
graph.names = villains
|
|
386
386
|
graph.names_row = villains
|
|
@@ -391,14 +391,12 @@ def star_wars(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
|
|
|
391
391
|
return biadjacency
|
|
392
392
|
|
|
393
393
|
|
|
394
|
-
def movie_actor(metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
394
|
+
def movie_actor(metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
|
|
395
395
|
"""Bipartite graph connecting movies to some actors starring in them.
|
|
396
396
|
|
|
397
397
|
* Bipartite graph
|
|
398
|
-
*
|
|
399
|
-
* 9 labels (rows)
|
|
398
|
+
* 32 nodes (15 movies, 17 actors), 43 edges
|
|
400
399
|
* Names of movies (rows) and actors (columns)
|
|
401
|
-
* Names of movies production company (rows)
|
|
402
400
|
|
|
403
401
|
Parameters
|
|
404
402
|
----------
|
|
@@ -407,53 +405,47 @@ def movie_actor(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
|
|
|
407
405
|
|
|
408
406
|
Returns
|
|
409
407
|
-------
|
|
410
|
-
biadjacency or
|
|
411
|
-
Biadjacency matrix or
|
|
408
|
+
biadjacency or dataset : Union[sparse.csr_matrix, Dataset]
|
|
409
|
+
Biadjacency matrix or dataset with metadata (names of movies and actors).
|
|
412
410
|
|
|
413
411
|
Example
|
|
414
412
|
-------
|
|
415
413
|
>>> from sknetwork.data import movie_actor
|
|
416
414
|
>>> biadjacency = movie_actor()
|
|
417
415
|
>>> biadjacency.shape
|
|
418
|
-
(15,
|
|
416
|
+
(15, 17)
|
|
419
417
|
"""
|
|
420
418
|
row = np.array(
|
|
421
419
|
[0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6,
|
|
422
|
-
6, 6, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11,
|
|
420
|
+
6, 6, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11,
|
|
423
421
|
12, 12, 12, 13, 13, 14, 14])
|
|
424
422
|
col = np.array(
|
|
425
423
|
[0, 1, 2, 1, 2, 3, 3, 4, 5, 8, 4, 6, 0, 6, 4, 7, 4,
|
|
426
|
-
7, 8, 3, 8, 9, 10, 11, 12, 15, 0, 11, 12, 9, 10, 13, 5, 9, 13,
|
|
427
|
-
1, 9, 15, 12, 14,
|
|
428
|
-
biadjacency = sparse.csr_matrix((np.ones(len(row), dtype=bool), (row, col)), shape=(15,
|
|
424
|
+
7, 8, 3, 8, 9, 10, 11, 12, 15, 0, 11, 12, 9, 10, 13, 16, 5, 9, 13,
|
|
425
|
+
1, 9, 15, 12, 14, 14, 16])
|
|
426
|
+
biadjacency = sparse.csr_matrix((np.ones(len(row), dtype=bool), (row, col)), shape=(15, 17))
|
|
429
427
|
|
|
430
428
|
if metadata:
|
|
431
429
|
movies = np.array(
|
|
432
430
|
['Inception', 'The Dark Knight Rises', 'The Big Short', 'Drive', 'The Great Gatsby', 'La La Land',
|
|
433
431
|
'Crazy Stupid Love', 'Vice', 'The Grand Budapest Hotel', 'Aviator', '007 Spectre', 'Inglourious Basterds',
|
|
434
|
-
'Midnight In Paris', 'Murder on the Orient Express',
|
|
432
|
+
'Midnight In Paris', 'Murder on the Orient Express', "Pirates of the Caribbean: At World's End"])
|
|
435
433
|
actors = np.array(
|
|
436
434
|
['Leonardo DiCaprio', 'Marion Cotillard', 'Joseph Gordon Lewitt', 'Christian Bale', 'Ryan Gosling',
|
|
437
435
|
'Brad Pitt', 'Carey Mulligan', 'Emma Stone', 'Steve Carell', 'Lea Seydoux', 'Ralph Fiennes', 'Jude Law',
|
|
438
|
-
'Willem Dafoe', 'Christophe Waltz', 'Johnny Depp', 'Owen Wilson'])
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
'Carousel Productions', 'Babelsberg Studios', 'MGM', 'Gravier Productions',
|
|
447
|
-
'Genre Films'])
|
|
448
|
-
graph.labels_row = graph.labels
|
|
449
|
-
graph.labels_row_name = graph.labels_name
|
|
450
|
-
graph.name = 'movie_actor'
|
|
451
|
-
return graph
|
|
436
|
+
'Willem Dafoe', 'Christophe Waltz', 'Johnny Depp', 'Owen Wilson', 'Naomie Harris'])
|
|
437
|
+
dataset = Dataset()
|
|
438
|
+
dataset.biadjacency = biadjacency
|
|
439
|
+
dataset.names = movies
|
|
440
|
+
dataset.names_row = movies
|
|
441
|
+
dataset.names_col = actors
|
|
442
|
+
dataset.name = 'movie_actor'
|
|
443
|
+
return dataset
|
|
452
444
|
else:
|
|
453
445
|
return biadjacency
|
|
454
446
|
|
|
455
447
|
|
|
456
|
-
def art_philo_science(metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
448
|
+
def art_philo_science(metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
|
|
457
449
|
"""Wikipedia links between 30 articles (10 artists, 10 philosophers, 10 scientists).
|
|
458
450
|
|
|
459
451
|
* Directed graph
|
|
@@ -605,7 +597,7 @@ def art_philo_science(metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]
|
|
|
605
597
|
words = np.array(
|
|
606
598
|
['contribution', 'theory', 'invention', 'time', 'modern',
|
|
607
599
|
'century', 'study', 'logic', 'school', 'author', 'compose'])
|
|
608
|
-
graph =
|
|
600
|
+
graph = Dataset()
|
|
609
601
|
graph.adjacency = adjacency
|
|
610
602
|
graph.names = names
|
|
611
603
|
graph.position = position
|
sknetwork/embedding/__init__.py
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
from sknetwork.embedding.base import BaseEmbedding
|
|
3
3
|
from sknetwork.embedding.force_atlas import ForceAtlas
|
|
4
4
|
from sknetwork.embedding.louvain_embedding import LouvainEmbedding
|
|
5
|
-
from sknetwork.embedding.louvain_hierarchy import LouvainNE
|
|
6
5
|
from sknetwork.embedding.random_projection import RandomProjection
|
|
7
6
|
from sknetwork.embedding.spectral import Spectral
|
|
8
7
|
from sknetwork.embedding.spring import Spring
|
sknetwork/embedding/base.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
3
|
"""
|
|
4
|
-
Created
|
|
4
|
+
Created in November 2019
|
|
5
5
|
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
6
|
"""
|
|
7
7
|
from abc import ABC
|
|
8
|
-
from typing import Union
|
|
8
|
+
from typing import Optional, Union
|
|
9
9
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
from scipy import sparse
|
|
@@ -26,10 +26,19 @@ class BaseEmbedding(Algorithm, ABC):
|
|
|
26
26
|
embedding_col_ : array, shape = (n_col, n_components)
|
|
27
27
|
Embedding of the columns, for bipartite graphs.
|
|
28
28
|
"""
|
|
29
|
-
|
|
30
29
|
def __init__(self):
|
|
31
30
|
self._init_vars()
|
|
32
31
|
|
|
32
|
+
def transform(self) -> np.ndarray:
|
|
33
|
+
"""Return the embedding.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
embedding : np.ndarray
|
|
38
|
+
Embedding.
|
|
39
|
+
"""
|
|
40
|
+
return self.embedding_
|
|
41
|
+
|
|
33
42
|
def fit_transform(self, *args, **kwargs) -> np.ndarray:
|
|
34
43
|
"""Fit to data and return the embedding. Same parameters as the ``fit`` method.
|
|
35
44
|
|
|
@@ -41,30 +50,22 @@ class BaseEmbedding(Algorithm, ABC):
|
|
|
41
50
|
self.fit(*args, **kwargs)
|
|
42
51
|
return self.embedding_
|
|
43
52
|
|
|
44
|
-
def predict(self,
|
|
45
|
-
"""
|
|
46
|
-
|
|
47
|
-
Each new node is defined by its adjacency row vector.
|
|
53
|
+
def predict(self, columns: bool = False) -> np.ndarray:
|
|
54
|
+
"""Return the embedding of nodes.
|
|
48
55
|
|
|
49
56
|
Parameters
|
|
50
57
|
----------
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
Array of shape (n_col,) (single vector) or (n_vectors, n_col)
|
|
58
|
+
columns : bool
|
|
59
|
+
If ``True``, return the prediction for columns.
|
|
54
60
|
|
|
55
61
|
Returns
|
|
56
62
|
-------
|
|
57
|
-
|
|
63
|
+
embedding_ : np.ndarray
|
|
58
64
|
Embedding of the nodes.
|
|
59
65
|
"""
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
if self.embedding_ is None:
|
|
64
|
-
raise ValueError("This embedding instance is not fitted yet."
|
|
65
|
-
" Call 'fit' with appropriate arguments before using this method.")
|
|
66
|
-
else:
|
|
67
|
-
return self
|
|
66
|
+
if columns:
|
|
67
|
+
return self.embedding_col_
|
|
68
|
+
return self.embedding_
|
|
68
69
|
|
|
69
70
|
@staticmethod
|
|
70
71
|
def _get_regularization(regularization: float, adjacency: sparse.csr_matrix) -> float:
|
|
@@ -81,6 +82,9 @@ class BaseEmbedding(Algorithm, ABC):
|
|
|
81
82
|
self.embedding_row_ = None
|
|
82
83
|
self.embedding_col_ = None
|
|
83
84
|
|
|
85
|
+
def _check_fitted(self):
|
|
86
|
+
return self.embedding_ is not None
|
|
87
|
+
|
|
84
88
|
def _split_vars(self, shape):
|
|
85
89
|
"""Split labels_ into labels_row_ and labels_col_"""
|
|
86
90
|
n_row = shape[0]
|
|
@@ -77,6 +77,7 @@ class ForceAtlas(BaseEmbedding):
|
|
|
77
77
|
self.tolerance = tolerance
|
|
78
78
|
self.speed = speed
|
|
79
79
|
self.speed_max = speed_max
|
|
80
|
+
self.embedding_ = None
|
|
80
81
|
|
|
81
82
|
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], pos_init: Optional[np.ndarray] = None,
|
|
82
83
|
n_iter: Optional[int] = None) -> 'ForceAtlas':
|
|
@@ -155,7 +156,7 @@ class ForceAtlas(BaseEmbedding):
|
|
|
155
156
|
if tree is None:
|
|
156
157
|
neighbors = np.arange(n)
|
|
157
158
|
else:
|
|
158
|
-
neighbors = tree.query_ball_point(position[i], self.approx_radius)
|
|
159
|
+
neighbors = tree.query_ball_point(position[i], self.approx_radius, p=2)
|
|
159
160
|
|
|
160
161
|
grad: np.ndarray = (position[i] - position[neighbors]) # shape (n_neigh, n_components)
|
|
161
162
|
distance: np.ndarray = np.linalg.norm(grad, axis=1) # shape (n_neigh,)
|
|
@@ -191,7 +192,7 @@ class ForceAtlas(BaseEmbedding):
|
|
|
191
192
|
|
|
192
193
|
position += delta # calculating displacement and final position of points after iteration
|
|
193
194
|
if (swing_vector < 1).all():
|
|
194
|
-
break # if the swing of all nodes is zero, then convergence is reached
|
|
195
|
+
break # if the swing of all nodes is zero, then convergence is reached.
|
|
195
196
|
|
|
196
197
|
self.embedding_ = position
|
|
197
198
|
return self
|
|
@@ -12,7 +12,7 @@ from scipy import sparse
|
|
|
12
12
|
|
|
13
13
|
from sknetwork.clustering.louvain import Louvain
|
|
14
14
|
from sknetwork.embedding.base import BaseEmbedding
|
|
15
|
-
from sknetwork.linalg.
|
|
15
|
+
from sknetwork.linalg.normalizer import normalize
|
|
16
16
|
from sknetwork.utils.check import check_random_state, check_adjacency_vector, check_nonnegative, is_square
|
|
17
17
|
from sknetwork.utils.membership import get_membership
|
|
18
18
|
|
|
@@ -146,29 +146,3 @@ class LouvainEmbedding(BaseEmbedding):
|
|
|
146
146
|
self.embedding_col_ = embedding_col.toarray()
|
|
147
147
|
|
|
148
148
|
return self
|
|
149
|
-
|
|
150
|
-
def predict(self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray:
|
|
151
|
-
"""Predict the embedding of new rows, defined by their adjacency vectors.
|
|
152
|
-
|
|
153
|
-
Parameters
|
|
154
|
-
----------
|
|
155
|
-
adjacency_vectors :
|
|
156
|
-
Adjacency row vectors.
|
|
157
|
-
Array of shape (n_col,) (single vector) or (n_vectors, n_col)
|
|
158
|
-
|
|
159
|
-
Returns
|
|
160
|
-
-------
|
|
161
|
-
embedding_vectors : np.ndarray
|
|
162
|
-
Embedding of the nodes.
|
|
163
|
-
"""
|
|
164
|
-
self._check_fitted()
|
|
165
|
-
if self.embedding_col_ is not None:
|
|
166
|
-
n = len(self.embedding_col_)
|
|
167
|
-
else:
|
|
168
|
-
n = len(self.embedding_)
|
|
169
|
-
|
|
170
|
-
adjacency_vectors = check_adjacency_vector(adjacency_vectors, n)
|
|
171
|
-
check_nonnegative(adjacency_vectors)
|
|
172
|
-
membership = get_membership(self.labels_)
|
|
173
|
-
|
|
174
|
-
return normalize(adjacency_vectors).dot(membership)
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
# coding: utf-8
|
|
3
3
|
"""
|
|
4
|
-
Created
|
|
4
|
+
Created in January 2021
|
|
5
5
|
@author: Thomas Bonald <bonald@enst.fr>
|
|
6
6
|
"""
|
|
7
|
+
from abc import ABC
|
|
7
8
|
from typing import Union
|
|
8
9
|
|
|
9
10
|
import numpy as np
|
|
@@ -15,7 +16,7 @@ from sknetwork.utils.check import check_format, check_random_state
|
|
|
15
16
|
from sknetwork.utils.format import get_adjacency
|
|
16
17
|
|
|
17
18
|
|
|
18
|
-
class RandomProjection(BaseEmbedding):
|
|
19
|
+
class RandomProjection(BaseEmbedding, ABC):
|
|
19
20
|
"""Embedding of graphs based the random projection of the adjacency matrix:
|
|
20
21
|
|
|
21
22
|
:math:`(I + \\alpha A +... + (\\alpha A)^K)G`
|
|
@@ -71,6 +72,7 @@ class RandomProjection(BaseEmbedding):
|
|
|
71
72
|
regularization: float = -1, normalized: bool = True, random_state: int = None):
|
|
72
73
|
super(RandomProjection, self).__init__()
|
|
73
74
|
|
|
75
|
+
self.embedding_ = None
|
|
74
76
|
self.n_components = n_components
|
|
75
77
|
self.alpha = alpha
|
|
76
78
|
self.n_iter = n_iter
|
|
@@ -87,7 +89,7 @@ class RandomProjection(BaseEmbedding):
|
|
|
87
89
|
|
|
88
90
|
Parameters
|
|
89
91
|
----------
|
|
90
|
-
input_matrix :
|
|
92
|
+
input_matrix : sparse.csr_matrix, np.ndarray
|
|
91
93
|
Adjacency matrix or biadjacency matrix of the graph.
|
|
92
94
|
force_bipartite : bool (default = ``False``)
|
|
93
95
|
If ``True``, force the input matrix to be considered as a biadjacency matrix.
|
sknetwork/embedding/spectral.py
CHANGED
|
@@ -139,76 +139,3 @@ class Spectral(BaseEmbedding):
|
|
|
139
139
|
self._split_vars(input_matrix.shape)
|
|
140
140
|
|
|
141
141
|
return self
|
|
142
|
-
|
|
143
|
-
def predict(self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray:
|
|
144
|
-
"""Predict the embedding of new nodes, when possible (otherwise return 0).
|
|
145
|
-
|
|
146
|
-
Each new node is defined by its adjacency row vector.
|
|
147
|
-
|
|
148
|
-
Parameters
|
|
149
|
-
----------
|
|
150
|
-
adjacency_vectors :
|
|
151
|
-
Adjacency vectors of nodes.
|
|
152
|
-
Array of shape (n_col,) (single vector) or (n_vectors, n_col)
|
|
153
|
-
|
|
154
|
-
Returns
|
|
155
|
-
-------
|
|
156
|
-
embedding_vectors : np.ndarray
|
|
157
|
-
Embedding of the nodes.
|
|
158
|
-
|
|
159
|
-
Example
|
|
160
|
-
-------
|
|
161
|
-
>>> from sknetwork.embedding import Spectral
|
|
162
|
-
>>> from sknetwork.data import karate_club
|
|
163
|
-
>>> spectral = Spectral(n_components=3)
|
|
164
|
-
>>> adjacency = karate_club()
|
|
165
|
-
>>> adjacency_vector = np.arange(34) < 5
|
|
166
|
-
>>> _ = spectral.fit(adjacency)
|
|
167
|
-
>>> len(spectral.predict(adjacency_vector))
|
|
168
|
-
3
|
|
169
|
-
"""
|
|
170
|
-
self._check_fitted()
|
|
171
|
-
|
|
172
|
-
# input
|
|
173
|
-
if self.bipartite:
|
|
174
|
-
n = len(self.embedding_col_)
|
|
175
|
-
else:
|
|
176
|
-
n = len(self.embedding_)
|
|
177
|
-
adjacency_vectors = check_adjacency_vector(adjacency_vectors, n)
|
|
178
|
-
check_nonnegative(adjacency_vectors)
|
|
179
|
-
|
|
180
|
-
if self.bipartite:
|
|
181
|
-
shape = (adjacency_vectors.shape[0], self.embedding_row_.shape[0])
|
|
182
|
-
adjacency_vectors = sparse.csr_matrix(adjacency_vectors)
|
|
183
|
-
adjacency_vectors = sparse.hstack([sparse.csr_matrix(shape), adjacency_vectors], format='csr')
|
|
184
|
-
eigenvectors = self.eigenvectors_
|
|
185
|
-
eigenvalues = self.eigenvalues_
|
|
186
|
-
|
|
187
|
-
# regularization
|
|
188
|
-
if self.regularized:
|
|
189
|
-
regularization = np.abs(self.regularization)
|
|
190
|
-
else:
|
|
191
|
-
regularization = 0
|
|
192
|
-
normalizer = Normalizer(adjacency_vectors, regularization)
|
|
193
|
-
|
|
194
|
-
# prediction
|
|
195
|
-
embedding_vectors = normalizer.dot(eigenvectors)
|
|
196
|
-
normalized_laplacian = self.decomposition == 'rw'
|
|
197
|
-
if normalized_laplacian:
|
|
198
|
-
norm_vect = eigenvalues.copy()
|
|
199
|
-
norm_vect[norm_vect == 0] = 1
|
|
200
|
-
embedding_vectors /= norm_vect
|
|
201
|
-
else:
|
|
202
|
-
norm_matrix = sparse.csr_matrix(1 - np.outer(normalizer.norm_diag.data, eigenvalues))
|
|
203
|
-
norm_matrix.data = 1 / norm_matrix.data
|
|
204
|
-
embedding_vectors *= norm_matrix.toarray()
|
|
205
|
-
|
|
206
|
-
# normalization
|
|
207
|
-
if self.normalized:
|
|
208
|
-
embedding_vectors = normalize(embedding_vectors, p=2)
|
|
209
|
-
|
|
210
|
-
# shape
|
|
211
|
-
if len(embedding_vectors) == 1:
|
|
212
|
-
embedding_vectors = embedding_vectors.ravel()
|
|
213
|
-
|
|
214
|
-
return embedding_vectors
|
sknetwork/embedding/svd.py
CHANGED
|
@@ -277,10 +277,6 @@ class SVD(GSVD):
|
|
|
277
277
|
factor_singular=factor_singular, factor_row=0., factor_col=0., normalized=normalized,
|
|
278
278
|
solver=solver)
|
|
279
279
|
|
|
280
|
-
@staticmethod
|
|
281
|
-
def _check_adj_vector(adjacency_vectors: np.ndarray):
|
|
282
|
-
return
|
|
283
|
-
|
|
284
280
|
|
|
285
281
|
class PCA(SVD):
|
|
286
282
|
"""Graph embedding by Principal Component Analysis of the adjacency or biadjacency matrix.
|
|
@@ -13,7 +13,6 @@ class TestEmbeddings(unittest.TestCase):
|
|
|
13
13
|
def setUp(self):
|
|
14
14
|
"""Algorithms by input types."""
|
|
15
15
|
self.methods = [Spectral(), GSVD(), SVD()]
|
|
16
|
-
self.bimethods = [GSVD(), SVD()]
|
|
17
16
|
|
|
18
17
|
def test_undirected(self):
|
|
19
18
|
adjacency = test_graph()
|
|
@@ -22,44 +21,21 @@ class TestEmbeddings(unittest.TestCase):
|
|
|
22
21
|
method = Spring()
|
|
23
22
|
embedding = method.fit_transform(adjacency)
|
|
24
23
|
self.assertEqual(embedding.shape, (n, 2))
|
|
25
|
-
pred1 = method.predict(adjacency[0])
|
|
26
|
-
pred2 = method.predict(adjacency[0].toarray())
|
|
27
|
-
self.assertEqual(pred1.shape, (2,))
|
|
28
|
-
self.assertAlmostEqual(np.linalg.norm(pred1 - pred2), 0)
|
|
29
24
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
self.assertTupleEqual(pred1.shape, (n, 2))
|
|
33
|
-
self.assertAlmostEqual(np.linalg.norm(pred1 - pred2), 0)
|
|
34
|
-
|
|
35
|
-
def test_bimethods(self):
|
|
25
|
+
embedding = method.transform()
|
|
26
|
+
self.assertEqual(embedding.shape, (n, 2))
|
|
36
27
|
|
|
28
|
+
def test_bipartite(self):
|
|
37
29
|
for adjacency in [test_digraph(), test_bigraph()]:
|
|
38
30
|
n_row, n_col = adjacency.shape
|
|
39
31
|
|
|
40
|
-
for method in self.
|
|
32
|
+
for method in self.methods:
|
|
41
33
|
method.fit(adjacency)
|
|
42
34
|
|
|
43
35
|
self.assertEqual(method.embedding_.shape, (n_row, 2))
|
|
44
36
|
self.assertEqual(method.embedding_row_.shape, (n_row, 2))
|
|
45
37
|
self.assertEqual(method.embedding_col_.shape, (n_col, 2))
|
|
46
38
|
|
|
47
|
-
ref = method.embedding_[0]
|
|
48
|
-
pred1 = method.predict(adjacency[0])
|
|
49
|
-
pred2 = method.predict(adjacency[0].toarray())
|
|
50
|
-
|
|
51
|
-
self.assertEqual(pred1.shape, (2,))
|
|
52
|
-
self.assertAlmostEqual(np.linalg.norm(pred1 - pred2), 0)
|
|
53
|
-
self.assertAlmostEqual(np.linalg.norm(pred1 - ref), 0)
|
|
54
|
-
|
|
55
|
-
ref = method.embedding_
|
|
56
|
-
pred1 = method.predict(adjacency)
|
|
57
|
-
pred2 = method.predict(adjacency.toarray())
|
|
58
|
-
|
|
59
|
-
self.assertTupleEqual(pred1.shape, (n_row, 2))
|
|
60
|
-
self.assertAlmostEqual(np.linalg.norm(pred1 - pred2), 0)
|
|
61
|
-
self.assertAlmostEqual(np.linalg.norm(pred1 - ref), 0)
|
|
62
|
-
|
|
63
39
|
def test_disconnected(self):
|
|
64
40
|
n = 10
|
|
65
41
|
adjacency = np.eye(n)
|
|
@@ -12,22 +12,22 @@ from sknetwork.embedding import LouvainEmbedding
|
|
|
12
12
|
class TestLouvainEmbedding(unittest.TestCase):
|
|
13
13
|
|
|
14
14
|
def test_predict(self):
|
|
15
|
+
adjacency = test_graph()
|
|
16
|
+
adjacency_vector = np.zeros(10, dtype=int)
|
|
17
|
+
adjacency_vector[:5] = 1
|
|
15
18
|
louvain = LouvainEmbedding()
|
|
16
|
-
louvain.fit(
|
|
19
|
+
louvain.fit(adjacency)
|
|
17
20
|
self.assertEqual(louvain.embedding_.shape[0], 10)
|
|
18
|
-
louvain.fit(
|
|
21
|
+
louvain.fit(adjacency, force_bipartite=True)
|
|
19
22
|
self.assertEqual(louvain.embedding_.shape[0], 10)
|
|
20
23
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
24
|
+
# bipartite
|
|
25
|
+
biadjacency = test_bigraph()
|
|
26
|
+
louvain.fit(biadjacency)
|
|
27
|
+
self.assertEqual(louvain.embedding_row_.shape[0], 6)
|
|
28
|
+
self.assertEqual(louvain.embedding_col_.shape[0], 8)
|
|
26
29
|
|
|
27
30
|
for method in ['remove', 'merge', 'keep']:
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
self.assertEqual(embedding_vector.shape[0], 1)
|
|
32
|
-
|
|
33
|
-
|
|
31
|
+
louvain = LouvainEmbedding(isolated_nodes=method)
|
|
32
|
+
embedding = louvain.fit_transform(adjacency)
|
|
33
|
+
self.assertEqual(embedding.shape[0], adjacency.shape[0])
|
|
@@ -22,24 +22,21 @@ class TestEmbeddings(unittest.TestCase):
|
|
|
22
22
|
if not is_weakly_connected(adjacency):
|
|
23
23
|
weights += 1
|
|
24
24
|
self.assertAlmostEqual(np.linalg.norm(embedding.T.dot(weights)), 0)
|
|
25
|
-
self.assertAlmostEqual(np.linalg.norm(embedding[1:4] - spectral.predict(adjacency[1:4])), 0)
|
|
26
25
|
# Laplacian
|
|
27
26
|
spectral = Spectral(3, decomposition='laplacian', normalized=False)
|
|
28
27
|
embedding = spectral.fit_transform(adjacency)
|
|
29
28
|
self.assertAlmostEqual(np.linalg.norm(embedding.sum(axis=0)), 0)
|
|
30
|
-
self.assertAlmostEqual(np.linalg.norm(embedding[1:4] - spectral.predict(adjacency[1:4])), 0)
|
|
31
29
|
|
|
32
30
|
def test_directed(self):
|
|
33
31
|
for adjacency in [test_digraph(), test_digraph().astype(bool)]:
|
|
34
32
|
# random walk
|
|
35
33
|
spectral = Spectral(3, normalized=False)
|
|
36
34
|
embedding = spectral.fit_transform(adjacency)
|
|
37
|
-
self.assertAlmostEqual(
|
|
35
|
+
self.assertAlmostEqual(embedding.shape[0], adjacency.shape[0])
|
|
38
36
|
# Laplacian
|
|
39
37
|
spectral = Spectral(3, decomposition='laplacian', normalized=False)
|
|
40
|
-
|
|
38
|
+
spectral.fit(adjacency)
|
|
41
39
|
self.assertAlmostEqual(np.linalg.norm(spectral.eigenvectors_.sum(axis=0)), 0)
|
|
42
|
-
self.assertAlmostEqual(np.linalg.norm(embedding[6:8] - spectral.predict(adjacency[6:8])), 0)
|
|
43
40
|
|
|
44
41
|
def test_regularization(self):
|
|
45
42
|
for adjacency in [test_graph(), test_disconnected_graph()]:
|
|
@@ -24,13 +24,19 @@ class TestSVD(unittest.TestCase):
|
|
|
24
24
|
self.assertEqual(gsvd.embedding_row_.shape, (n_row, min_dim))
|
|
25
25
|
self.assertEqual(gsvd.embedding_col_.shape, (n_col, min_dim))
|
|
26
26
|
|
|
27
|
+
embedding = gsvd.predict(np.array([0, 1, 1]))
|
|
28
|
+
self.assertEqual(embedding.shape, (min_dim,))
|
|
29
|
+
|
|
27
30
|
gsvd = GSVD(n_components=1, regularization=0.1, solver='lanczos')
|
|
28
31
|
gsvd.fit(biadjacency)
|
|
29
|
-
|
|
32
|
+
self.assertEqual(gsvd.embedding_row_.shape, (n_row, 1))
|
|
30
33
|
|
|
31
34
|
pca = PCA(n_components=min_dim, solver='lanczos')
|
|
32
35
|
pca.fit(biadjacency)
|
|
33
36
|
self.assertEqual(pca.embedding_row_.shape, (n_row, min_dim))
|
|
37
|
+
pca = PCA(n_components=min_dim, solver=LanczosSVD())
|
|
38
|
+
pca.fit(biadjacency)
|
|
39
|
+
self.assertEqual(pca.embedding_row_.shape, (n_row, min_dim))
|
|
34
40
|
|
|
35
41
|
svd = SVD(n_components=min_dim, solver=LanczosSVD())
|
|
36
42
|
svd.fit(biadjacency)
|