scikit-network 0.33.3__cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- scikit_network-0.33.3.dist-info/METADATA +122 -0
- scikit_network-0.33.3.dist-info/RECORD +229 -0
- scikit_network-0.33.3.dist-info/WHEEL +6 -0
- scikit_network-0.33.3.dist-info/licenses/AUTHORS.rst +43 -0
- scikit_network-0.33.3.dist-info/licenses/LICENSE +34 -0
- scikit_network-0.33.3.dist-info/top_level.txt +1 -0
- scikit_network.libs/libgomp-d22c30c5.so.1.0.0 +0 -0
- sknetwork/__init__.py +21 -0
- sknetwork/base.py +67 -0
- sknetwork/classification/__init__.py +8 -0
- sknetwork/classification/base.py +142 -0
- sknetwork/classification/base_rank.py +133 -0
- sknetwork/classification/diffusion.py +134 -0
- sknetwork/classification/knn.py +139 -0
- sknetwork/classification/metrics.py +205 -0
- sknetwork/classification/pagerank.py +66 -0
- sknetwork/classification/propagation.py +152 -0
- sknetwork/classification/tests/__init__.py +1 -0
- sknetwork/classification/tests/test_API.py +30 -0
- sknetwork/classification/tests/test_diffusion.py +77 -0
- sknetwork/classification/tests/test_knn.py +23 -0
- sknetwork/classification/tests/test_metrics.py +53 -0
- sknetwork/classification/tests/test_pagerank.py +20 -0
- sknetwork/classification/tests/test_propagation.py +24 -0
- sknetwork/classification/vote.cpp +27587 -0
- sknetwork/classification/vote.cpython-39-aarch64-linux-gnu.so +0 -0
- sknetwork/classification/vote.pyx +56 -0
- sknetwork/clustering/__init__.py +8 -0
- sknetwork/clustering/base.py +172 -0
- sknetwork/clustering/kcenters.py +253 -0
- sknetwork/clustering/leiden.py +242 -0
- sknetwork/clustering/leiden_core.cpp +31578 -0
- sknetwork/clustering/leiden_core.cpython-39-aarch64-linux-gnu.so +0 -0
- sknetwork/clustering/leiden_core.pyx +124 -0
- sknetwork/clustering/louvain.py +286 -0
- sknetwork/clustering/louvain_core.cpp +31223 -0
- sknetwork/clustering/louvain_core.cpython-39-aarch64-linux-gnu.so +0 -0
- sknetwork/clustering/louvain_core.pyx +124 -0
- sknetwork/clustering/metrics.py +91 -0
- sknetwork/clustering/postprocess.py +66 -0
- sknetwork/clustering/propagation_clustering.py +104 -0
- sknetwork/clustering/tests/__init__.py +1 -0
- sknetwork/clustering/tests/test_API.py +38 -0
- sknetwork/clustering/tests/test_kcenters.py +60 -0
- sknetwork/clustering/tests/test_leiden.py +34 -0
- sknetwork/clustering/tests/test_louvain.py +135 -0
- sknetwork/clustering/tests/test_metrics.py +50 -0
- sknetwork/clustering/tests/test_postprocess.py +39 -0
- sknetwork/data/__init__.py +6 -0
- sknetwork/data/base.py +33 -0
- sknetwork/data/load.py +406 -0
- sknetwork/data/models.py +459 -0
- sknetwork/data/parse.py +644 -0
- sknetwork/data/test_graphs.py +84 -0
- sknetwork/data/tests/__init__.py +1 -0
- sknetwork/data/tests/test_API.py +30 -0
- sknetwork/data/tests/test_base.py +14 -0
- sknetwork/data/tests/test_load.py +95 -0
- sknetwork/data/tests/test_models.py +52 -0
- sknetwork/data/tests/test_parse.py +250 -0
- sknetwork/data/tests/test_test_graphs.py +29 -0
- sknetwork/data/tests/test_toy_graphs.py +68 -0
- sknetwork/data/timeout.py +38 -0
- sknetwork/data/toy_graphs.py +611 -0
- sknetwork/embedding/__init__.py +8 -0
- sknetwork/embedding/base.py +94 -0
- sknetwork/embedding/force_atlas.py +198 -0
- sknetwork/embedding/louvain_embedding.py +148 -0
- sknetwork/embedding/random_projection.py +135 -0
- sknetwork/embedding/spectral.py +141 -0
- sknetwork/embedding/spring.py +198 -0
- sknetwork/embedding/svd.py +359 -0
- sknetwork/embedding/tests/__init__.py +1 -0
- sknetwork/embedding/tests/test_API.py +49 -0
- sknetwork/embedding/tests/test_force_atlas.py +35 -0
- sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
- sknetwork/embedding/tests/test_random_projection.py +28 -0
- sknetwork/embedding/tests/test_spectral.py +81 -0
- sknetwork/embedding/tests/test_spring.py +50 -0
- sknetwork/embedding/tests/test_svd.py +43 -0
- sknetwork/gnn/__init__.py +10 -0
- sknetwork/gnn/activation.py +117 -0
- sknetwork/gnn/base.py +181 -0
- sknetwork/gnn/base_activation.py +90 -0
- sknetwork/gnn/base_layer.py +109 -0
- sknetwork/gnn/gnn_classifier.py +305 -0
- sknetwork/gnn/layer.py +153 -0
- sknetwork/gnn/loss.py +180 -0
- sknetwork/gnn/neighbor_sampler.py +65 -0
- sknetwork/gnn/optimizer.py +164 -0
- sknetwork/gnn/tests/__init__.py +1 -0
- sknetwork/gnn/tests/test_activation.py +56 -0
- sknetwork/gnn/tests/test_base.py +75 -0
- sknetwork/gnn/tests/test_base_layer.py +37 -0
- sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
- sknetwork/gnn/tests/test_layers.py +80 -0
- sknetwork/gnn/tests/test_loss.py +33 -0
- sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
- sknetwork/gnn/tests/test_optimizer.py +43 -0
- sknetwork/gnn/tests/test_utils.py +41 -0
- sknetwork/gnn/utils.py +127 -0
- sknetwork/hierarchy/__init__.py +6 -0
- sknetwork/hierarchy/base.py +96 -0
- sknetwork/hierarchy/louvain_hierarchy.py +272 -0
- sknetwork/hierarchy/metrics.py +234 -0
- sknetwork/hierarchy/paris.cpp +37889 -0
- sknetwork/hierarchy/paris.cpython-39-aarch64-linux-gnu.so +0 -0
- sknetwork/hierarchy/paris.pyx +316 -0
- sknetwork/hierarchy/postprocess.py +350 -0
- sknetwork/hierarchy/tests/__init__.py +1 -0
- sknetwork/hierarchy/tests/test_API.py +24 -0
- sknetwork/hierarchy/tests/test_algos.py +34 -0
- sknetwork/hierarchy/tests/test_metrics.py +62 -0
- sknetwork/hierarchy/tests/test_postprocess.py +57 -0
- sknetwork/linalg/__init__.py +9 -0
- sknetwork/linalg/basics.py +37 -0
- sknetwork/linalg/diteration.cpp +27403 -0
- sknetwork/linalg/diteration.cpython-39-aarch64-linux-gnu.so +0 -0
- sknetwork/linalg/diteration.pyx +47 -0
- sknetwork/linalg/eig_solver.py +93 -0
- sknetwork/linalg/laplacian.py +15 -0
- sknetwork/linalg/normalizer.py +86 -0
- sknetwork/linalg/operators.py +225 -0
- sknetwork/linalg/polynome.py +76 -0
- sknetwork/linalg/ppr_solver.py +170 -0
- sknetwork/linalg/push.cpp +31093 -0
- sknetwork/linalg/push.cpython-39-aarch64-linux-gnu.so +0 -0
- sknetwork/linalg/push.pyx +71 -0
- sknetwork/linalg/sparse_lowrank.py +142 -0
- sknetwork/linalg/svd_solver.py +91 -0
- sknetwork/linalg/tests/__init__.py +1 -0
- sknetwork/linalg/tests/test_eig.py +44 -0
- sknetwork/linalg/tests/test_laplacian.py +18 -0
- sknetwork/linalg/tests/test_normalization.py +34 -0
- sknetwork/linalg/tests/test_operators.py +66 -0
- sknetwork/linalg/tests/test_polynome.py +38 -0
- sknetwork/linalg/tests/test_ppr.py +50 -0
- sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
- sknetwork/linalg/tests/test_svd.py +38 -0
- sknetwork/linkpred/__init__.py +2 -0
- sknetwork/linkpred/base.py +46 -0
- sknetwork/linkpred/nn.py +126 -0
- sknetwork/linkpred/tests/__init__.py +1 -0
- sknetwork/linkpred/tests/test_nn.py +27 -0
- sknetwork/log.py +19 -0
- sknetwork/path/__init__.py +5 -0
- sknetwork/path/dag.py +54 -0
- sknetwork/path/distances.py +98 -0
- sknetwork/path/search.py +31 -0
- sknetwork/path/shortest_path.py +61 -0
- sknetwork/path/tests/__init__.py +1 -0
- sknetwork/path/tests/test_dag.py +37 -0
- sknetwork/path/tests/test_distances.py +62 -0
- sknetwork/path/tests/test_search.py +40 -0
- sknetwork/path/tests/test_shortest_path.py +40 -0
- sknetwork/ranking/__init__.py +8 -0
- sknetwork/ranking/base.py +61 -0
- sknetwork/ranking/betweenness.cpp +9710 -0
- sknetwork/ranking/betweenness.cpython-39-aarch64-linux-gnu.so +0 -0
- sknetwork/ranking/betweenness.pyx +97 -0
- sknetwork/ranking/closeness.py +92 -0
- sknetwork/ranking/hits.py +94 -0
- sknetwork/ranking/katz.py +83 -0
- sknetwork/ranking/pagerank.py +110 -0
- sknetwork/ranking/postprocess.py +37 -0
- sknetwork/ranking/tests/__init__.py +1 -0
- sknetwork/ranking/tests/test_API.py +32 -0
- sknetwork/ranking/tests/test_betweenness.py +38 -0
- sknetwork/ranking/tests/test_closeness.py +30 -0
- sknetwork/ranking/tests/test_hits.py +20 -0
- sknetwork/ranking/tests/test_pagerank.py +62 -0
- sknetwork/ranking/tests/test_postprocess.py +26 -0
- sknetwork/regression/__init__.py +4 -0
- sknetwork/regression/base.py +61 -0
- sknetwork/regression/diffusion.py +210 -0
- sknetwork/regression/tests/__init__.py +1 -0
- sknetwork/regression/tests/test_API.py +32 -0
- sknetwork/regression/tests/test_diffusion.py +56 -0
- sknetwork/sknetwork.py +3 -0
- sknetwork/test_base.py +35 -0
- sknetwork/test_log.py +15 -0
- sknetwork/topology/__init__.py +8 -0
- sknetwork/topology/cliques.cpp +32586 -0
- sknetwork/topology/cliques.cpython-39-aarch64-linux-gnu.so +0 -0
- sknetwork/topology/cliques.pyx +149 -0
- sknetwork/topology/core.cpp +30672 -0
- sknetwork/topology/core.cpython-39-aarch64-linux-gnu.so +0 -0
- sknetwork/topology/core.pyx +90 -0
- sknetwork/topology/cycles.py +243 -0
- sknetwork/topology/minheap.cpp +27335 -0
- sknetwork/topology/minheap.cpython-39-aarch64-linux-gnu.so +0 -0
- sknetwork/topology/minheap.pxd +20 -0
- sknetwork/topology/minheap.pyx +109 -0
- sknetwork/topology/structure.py +194 -0
- sknetwork/topology/tests/__init__.py +1 -0
- sknetwork/topology/tests/test_cliques.py +28 -0
- sknetwork/topology/tests/test_core.py +19 -0
- sknetwork/topology/tests/test_cycles.py +65 -0
- sknetwork/topology/tests/test_structure.py +85 -0
- sknetwork/topology/tests/test_triangles.py +38 -0
- sknetwork/topology/tests/test_wl.py +72 -0
- sknetwork/topology/triangles.cpp +8897 -0
- sknetwork/topology/triangles.cpython-39-aarch64-linux-gnu.so +0 -0
- sknetwork/topology/triangles.pyx +151 -0
- sknetwork/topology/weisfeiler_lehman.py +133 -0
- sknetwork/topology/weisfeiler_lehman_core.cpp +27638 -0
- sknetwork/topology/weisfeiler_lehman_core.cpython-39-aarch64-linux-gnu.so +0 -0
- sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
- sknetwork/utils/__init__.py +7 -0
- sknetwork/utils/check.py +355 -0
- sknetwork/utils/format.py +221 -0
- sknetwork/utils/membership.py +82 -0
- sknetwork/utils/neighbors.py +115 -0
- sknetwork/utils/tests/__init__.py +1 -0
- sknetwork/utils/tests/test_check.py +190 -0
- sknetwork/utils/tests/test_format.py +63 -0
- sknetwork/utils/tests/test_membership.py +24 -0
- sknetwork/utils/tests/test_neighbors.py +41 -0
- sknetwork/utils/tests/test_tfidf.py +18 -0
- sknetwork/utils/tests/test_values.py +66 -0
- sknetwork/utils/tfidf.py +37 -0
- sknetwork/utils/values.py +76 -0
- sknetwork/visualization/__init__.py +4 -0
- sknetwork/visualization/colors.py +34 -0
- sknetwork/visualization/dendrograms.py +277 -0
- sknetwork/visualization/graphs.py +1039 -0
- sknetwork/visualization/tests/__init__.py +1 -0
- sknetwork/visualization/tests/test_dendrograms.py +53 -0
- sknetwork/visualization/tests/test_graphs.py +176 -0
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created in November 2019
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
"""
|
|
7
|
+
from abc import ABC
|
|
8
|
+
from typing import Optional, Union
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from scipy import sparse
|
|
12
|
+
|
|
13
|
+
from sknetwork.topology.structure import is_connected
|
|
14
|
+
from sknetwork.base import Algorithm
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class BaseEmbedding(Algorithm, ABC):
|
|
18
|
+
"""Base class for embedding algorithms.
|
|
19
|
+
|
|
20
|
+
Attributes
|
|
21
|
+
----------
|
|
22
|
+
embedding_ : array, shape = (n, n_components)
|
|
23
|
+
Embedding of the nodes.
|
|
24
|
+
embedding_row_ : array, shape = (n_row, n_components)
|
|
25
|
+
Embedding of the rows, for bipartite graphs.
|
|
26
|
+
embedding_col_ : array, shape = (n_col, n_components)
|
|
27
|
+
Embedding of the columns, for bipartite graphs.
|
|
28
|
+
"""
|
|
29
|
+
def __init__(self):
|
|
30
|
+
self._init_vars()
|
|
31
|
+
|
|
32
|
+
def transform(self) -> np.ndarray:
|
|
33
|
+
"""Return the embedding.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
embedding : np.ndarray
|
|
38
|
+
Embedding.
|
|
39
|
+
"""
|
|
40
|
+
return self.embedding_
|
|
41
|
+
|
|
42
|
+
def fit_transform(self, *args, **kwargs) -> np.ndarray:
|
|
43
|
+
"""Fit to data and return the embedding. Same parameters as the ``fit`` method.
|
|
44
|
+
|
|
45
|
+
Returns
|
|
46
|
+
-------
|
|
47
|
+
embedding : np.ndarray
|
|
48
|
+
Embedding.
|
|
49
|
+
"""
|
|
50
|
+
self.fit(*args, **kwargs)
|
|
51
|
+
return self.embedding_
|
|
52
|
+
|
|
53
|
+
def predict(self, columns: bool = False) -> np.ndarray:
|
|
54
|
+
"""Return the embedding of nodes.
|
|
55
|
+
|
|
56
|
+
Parameters
|
|
57
|
+
----------
|
|
58
|
+
columns : bool
|
|
59
|
+
If ``True``, return the prediction for columns.
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
embedding_ : np.ndarray
|
|
64
|
+
Embedding of the nodes.
|
|
65
|
+
"""
|
|
66
|
+
if columns:
|
|
67
|
+
return self.embedding_col_
|
|
68
|
+
return self.embedding_
|
|
69
|
+
|
|
70
|
+
@staticmethod
|
|
71
|
+
def _get_regularization(regularization: float, adjacency: sparse.csr_matrix) -> float:
|
|
72
|
+
"""Set proper regularization depending on graph connectivity."""
|
|
73
|
+
if regularization < 0:
|
|
74
|
+
if is_connected(adjacency, connection='strong'):
|
|
75
|
+
regularization = 0
|
|
76
|
+
else:
|
|
77
|
+
regularization = np.abs(regularization)
|
|
78
|
+
return regularization
|
|
79
|
+
|
|
80
|
+
def _init_vars(self):
|
|
81
|
+
self.embedding_ = None
|
|
82
|
+
self.embedding_row_ = None
|
|
83
|
+
self.embedding_col_ = None
|
|
84
|
+
|
|
85
|
+
def _check_fitted(self):
|
|
86
|
+
return self.embedding_ is not None
|
|
87
|
+
|
|
88
|
+
def _split_vars(self, shape):
|
|
89
|
+
"""Split labels_ into labels_row_ and labels_col_"""
|
|
90
|
+
n_row = shape[0]
|
|
91
|
+
self.embedding_row_ = self.embedding_[:n_row]
|
|
92
|
+
self.embedding_col_ = self.embedding_[n_row:]
|
|
93
|
+
self.embedding_ = self.embedding_row_
|
|
94
|
+
return self
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# coding: utf-8
|
|
3
|
+
"""
|
|
4
|
+
Created on Jun 2020
|
|
5
|
+
@author: Victor Manach <victor.manach@telecom-paris.fr>
|
|
6
|
+
@author: Rémi Jaylet <remi.jaylet@telecom-paris.fr>
|
|
7
|
+
"""
|
|
8
|
+
from typing import Optional, Union
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from scipy import sparse
|
|
12
|
+
from scipy.spatial import cKDTree
|
|
13
|
+
|
|
14
|
+
from sknetwork.embedding.base import BaseEmbedding
|
|
15
|
+
from sknetwork.utils.check import check_format, is_symmetric, check_square
|
|
16
|
+
from sknetwork.utils.format import directed2undirected
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ForceAtlas(BaseEmbedding):
|
|
20
|
+
"""Force Atlas layout for displaying graphs.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
n_components : int
|
|
25
|
+
Dimension of the graph layout.
|
|
26
|
+
n_iter : int
|
|
27
|
+
Number of iterations to update positions.
|
|
28
|
+
If ``None``, use the value of self.n_iter.
|
|
29
|
+
approx_radius : float
|
|
30
|
+
If a positive value is provided, only the nodes within this distance a given node are used to compute
|
|
31
|
+
the repulsive force.
|
|
32
|
+
lin_log : bool
|
|
33
|
+
If ``True``, use lin-log mode.
|
|
34
|
+
gravity_factor : float
|
|
35
|
+
Gravity force scaling constant.
|
|
36
|
+
repulsive_factor : float
|
|
37
|
+
Repulsive force scaling constant.
|
|
38
|
+
tolerance : float
|
|
39
|
+
Tolerance defined in the swinging constant.
|
|
40
|
+
speed : float
|
|
41
|
+
Speed constant.
|
|
42
|
+
speed_max : float
|
|
43
|
+
Constant used to impose constrain on speed.
|
|
44
|
+
|
|
45
|
+
Attributes
|
|
46
|
+
----------
|
|
47
|
+
embedding_ : np.ndarray
|
|
48
|
+
Layout in given dimension.
|
|
49
|
+
|
|
50
|
+
Example
|
|
51
|
+
-------
|
|
52
|
+
>>> from sknetwork.embedding.force_atlas import ForceAtlas
|
|
53
|
+
>>> from sknetwork.data import karate_club
|
|
54
|
+
>>> force_atlas = ForceAtlas()
|
|
55
|
+
>>> adjacency = karate_club()
|
|
56
|
+
>>> embedding = force_atlas.fit_transform(adjacency)
|
|
57
|
+
>>> embedding.shape
|
|
58
|
+
(34, 2)
|
|
59
|
+
|
|
60
|
+
References
|
|
61
|
+
----------
|
|
62
|
+
Jacomy M., Venturini T., Heymann S., Bastian M. (2014).
|
|
63
|
+
`ForceAtlas2, a Continuous Graph Layout Algorithm for Handy Network Visualization Designed for the Gephi Software.
|
|
64
|
+
<https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0098679>`_
|
|
65
|
+
Plos One.
|
|
66
|
+
"""
|
|
67
|
+
def __init__(self, n_components: int = 2, n_iter: int = 50, approx_radius: float = -1, lin_log: bool = False,
|
|
68
|
+
gravity_factor: float = 0.01, repulsive_factor: float = 0.1, tolerance: float = 0.1,
|
|
69
|
+
speed: float = 0.1, speed_max: float = 10):
|
|
70
|
+
super(ForceAtlas, self).__init__()
|
|
71
|
+
self.n_components = n_components
|
|
72
|
+
self.n_iter = n_iter
|
|
73
|
+
self.approx_radius = approx_radius
|
|
74
|
+
self.lin_log = lin_log
|
|
75
|
+
self.gravity_factor = gravity_factor
|
|
76
|
+
self.repulsive_factor = repulsive_factor
|
|
77
|
+
self.tolerance = tolerance
|
|
78
|
+
self.speed = speed
|
|
79
|
+
self.speed_max = speed_max
|
|
80
|
+
self.embedding_ = None
|
|
81
|
+
|
|
82
|
+
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], pos_init: Optional[np.ndarray] = None,
|
|
83
|
+
n_iter: Optional[int] = None) -> 'ForceAtlas':
|
|
84
|
+
"""Compute layout.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
adjacency :
|
|
89
|
+
Adjacency matrix of the graph, treated as undirected.
|
|
90
|
+
pos_init :
|
|
91
|
+
Position to start with. Random if not provided.
|
|
92
|
+
n_iter : int
|
|
93
|
+
Number of iterations to update positions.
|
|
94
|
+
If ``None``, use the value of self.n_iter.
|
|
95
|
+
|
|
96
|
+
Returns
|
|
97
|
+
-------
|
|
98
|
+
self: :class:`ForceAtlas`
|
|
99
|
+
"""
|
|
100
|
+
# verify the format of the adjacency matrix
|
|
101
|
+
adjacency = check_format(adjacency)
|
|
102
|
+
check_square(adjacency)
|
|
103
|
+
if not is_symmetric(adjacency):
|
|
104
|
+
adjacency = directed2undirected(adjacency)
|
|
105
|
+
n = adjacency.shape[0]
|
|
106
|
+
|
|
107
|
+
# setting of the tolerance according to the size of the graph
|
|
108
|
+
if n < 5000:
|
|
109
|
+
tolerance = 0.1
|
|
110
|
+
elif 5000 <= n < 50000: # pragma: no cover
|
|
111
|
+
tolerance = 1
|
|
112
|
+
else: # pragma: no cover
|
|
113
|
+
tolerance = 10
|
|
114
|
+
|
|
115
|
+
if n_iter is None:
|
|
116
|
+
n_iter = self.n_iter
|
|
117
|
+
|
|
118
|
+
# initial position of the nodes of the graph
|
|
119
|
+
if pos_init is None:
|
|
120
|
+
position: np.ndarray = np.random.randn(n, self.n_components)
|
|
121
|
+
else:
|
|
122
|
+
if pos_init.shape != (n, self.n_components):
|
|
123
|
+
raise ValueError('The initial position does not have valid dimensions.')
|
|
124
|
+
else:
|
|
125
|
+
position = pos_init
|
|
126
|
+
# compute the vector with the degree of each node
|
|
127
|
+
degree: np.ndarray = adjacency.dot(np.ones(adjacency.shape[1])) + 1
|
|
128
|
+
|
|
129
|
+
# initialization of variation of position of nodes
|
|
130
|
+
resultants = np.zeros(n)
|
|
131
|
+
delta: np.ndarray = np.zeros((n, self.n_components))
|
|
132
|
+
swing_vector: np.ndarray = np.zeros(n)
|
|
133
|
+
global_speed = 1
|
|
134
|
+
|
|
135
|
+
for iteration in range(n_iter):
|
|
136
|
+
delta *= 0
|
|
137
|
+
global_swing = 0
|
|
138
|
+
global_traction = 0
|
|
139
|
+
|
|
140
|
+
if self.approx_radius > 0:
|
|
141
|
+
tree = cKDTree(position)
|
|
142
|
+
else:
|
|
143
|
+
tree = None
|
|
144
|
+
|
|
145
|
+
for i in range(n):
|
|
146
|
+
|
|
147
|
+
# attraction
|
|
148
|
+
indices = adjacency.indices[adjacency.indptr[i]:adjacency.indptr[i + 1]]
|
|
149
|
+
attraction = position[i] - position[indices]
|
|
150
|
+
|
|
151
|
+
if self.lin_log:
|
|
152
|
+
attraction = np.sign(attraction) * np.log(1 + np.abs(10 * attraction))
|
|
153
|
+
attraction = attraction.sum(axis=0)
|
|
154
|
+
|
|
155
|
+
# repulsion
|
|
156
|
+
if tree is None:
|
|
157
|
+
neighbors = np.arange(n)
|
|
158
|
+
else:
|
|
159
|
+
neighbors = tree.query_ball_point(position[i], self.approx_radius, p=2)
|
|
160
|
+
|
|
161
|
+
grad: np.ndarray = (position[i] - position[neighbors]) # shape (n_neigh, n_components)
|
|
162
|
+
distance: np.ndarray = np.linalg.norm(grad, axis=1) # shape (n_neigh,)
|
|
163
|
+
distance = np.where(distance < 0.01, 0.01, distance)
|
|
164
|
+
repulsion = grad * (degree[neighbors] / distance)[:, np.newaxis]
|
|
165
|
+
|
|
166
|
+
repulsion *= self.repulsive_factor * degree[i]
|
|
167
|
+
repulsion = repulsion.sum(axis=0)
|
|
168
|
+
|
|
169
|
+
# gravity
|
|
170
|
+
gravity = self.gravity_factor * degree[i] * grad
|
|
171
|
+
gravity = gravity.sum(axis=0)
|
|
172
|
+
|
|
173
|
+
# forces resultant applied on node i for traction, swing and speed computation
|
|
174
|
+
force = repulsion - attraction - gravity
|
|
175
|
+
resultant_new: float = np.linalg.norm(force)
|
|
176
|
+
resultant_old: float = resultants[i]
|
|
177
|
+
|
|
178
|
+
swing_node: float = np.abs(resultant_new - resultant_old) # force variation applied on node i
|
|
179
|
+
swing_vector[i] = swing_node
|
|
180
|
+
global_swing += (degree[i] + 1) * swing_node
|
|
181
|
+
|
|
182
|
+
traction: float = np.abs(resultant_new + resultant_old) / 2 # traction force applied on node i
|
|
183
|
+
global_traction += (degree[i] + 1) * traction
|
|
184
|
+
|
|
185
|
+
node_speed = self.speed * global_speed / (1 + global_speed * np.sqrt(swing_node))
|
|
186
|
+
if node_speed > self.speed_max / resultant_new: # pragma: no cover
|
|
187
|
+
node_speed = self.speed_max / resultant_new
|
|
188
|
+
|
|
189
|
+
delta[i]: np.ndarray = node_speed * force
|
|
190
|
+
resultants[i] = resultant_new
|
|
191
|
+
global_speed = tolerance * global_traction / global_swing
|
|
192
|
+
|
|
193
|
+
position += delta # calculating displacement and final position of points after iteration
|
|
194
|
+
if (swing_vector < 1).all():
|
|
195
|
+
break # if the swing of all nodes is zero, then convergence is reached.
|
|
196
|
+
|
|
197
|
+
self.embedding_ = position
|
|
198
|
+
return self
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# coding: utf-8
|
|
3
|
+
"""
|
|
4
|
+
Created in September 2020
|
|
5
|
+
@author: Quentin Lutz <qlutz@enst.fr>
|
|
6
|
+
@author: Thomas Bonald <bonald@enst.fr>
|
|
7
|
+
"""
|
|
8
|
+
from typing import Optional, Union
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from scipy import sparse
|
|
12
|
+
|
|
13
|
+
from sknetwork.clustering.louvain import Louvain
|
|
14
|
+
from sknetwork.embedding.base import BaseEmbedding
|
|
15
|
+
from sknetwork.linalg.normalizer import normalize
|
|
16
|
+
from sknetwork.utils.check import check_random_state, check_adjacency_vector, check_nonnegative, is_square
|
|
17
|
+
from sknetwork.utils.membership import get_membership
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def reindex_labels(labels: np.ndarray, labels_secondary: Optional[np.ndarray] = None, which: str = 'remove'):
|
|
21
|
+
"""Reindex labels, removing or merging labels of count 1."""
|
|
22
|
+
labels_unique, counts = np.unique(labels, return_counts=True)
|
|
23
|
+
n_labels = max(labels_unique) + 1
|
|
24
|
+
labels_keep = labels_unique[counts > 1]
|
|
25
|
+
if which == 'remove':
|
|
26
|
+
label_index = -np.ones(n_labels, dtype='int')
|
|
27
|
+
label_index[labels_keep] = np.arange(len(labels_keep))
|
|
28
|
+
elif which == 'merge':
|
|
29
|
+
label_index = len(labels_keep) * np.ones(n_labels, dtype='int')
|
|
30
|
+
label_index[labels_keep] = np.arange(len(labels_keep))
|
|
31
|
+
else:
|
|
32
|
+
label_index = np.arange(n_labels)
|
|
33
|
+
labels = label_index[labels]
|
|
34
|
+
if labels_secondary is not None:
|
|
35
|
+
labels_unique = np.unique(labels_secondary)
|
|
36
|
+
n_labels = max(labels_unique) + 1
|
|
37
|
+
label_index = -np.ones(n_labels, dtype='int')
|
|
38
|
+
label_index[labels_keep] = np.arange(len(labels_keep))
|
|
39
|
+
labels_secondary = label_index[labels_secondary]
|
|
40
|
+
return labels, labels_secondary
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class LouvainEmbedding(BaseEmbedding):
|
|
44
|
+
"""Embedding of graphs induced by Louvain clustering. Each component of the embedding corresponds
|
|
45
|
+
to a cluster obtained by Louvain.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
resolution : float
|
|
50
|
+
Resolution parameter.
|
|
51
|
+
modularity : str
|
|
52
|
+
Which objective function to maximize. Can be ``'Dugue'``, ``'Newman'`` or ``'Potts'``.
|
|
53
|
+
tol_optimization :
|
|
54
|
+
Minimum increase in the objective function to enter a new optimization pass.
|
|
55
|
+
tol_aggregation :
|
|
56
|
+
Minimum increase in the objective function to enter a new aggregation pass.
|
|
57
|
+
n_aggregations :
|
|
58
|
+
Maximum number of aggregations.
|
|
59
|
+
A negative value is interpreted as no limit.
|
|
60
|
+
shuffle_nodes :
|
|
61
|
+
Enables node shuffling before optimization.
|
|
62
|
+
random_state :
|
|
63
|
+
Random number generator or random seed. If ``None``, numpy.random is used.
|
|
64
|
+
isolated_nodes : str
|
|
65
|
+
What to do with isolated column nodes. Can be ``'remove'`` (default), ``'merge'`` or ``'keep'``.
|
|
66
|
+
|
|
67
|
+
Attributes
|
|
68
|
+
----------
|
|
69
|
+
embedding_ : array, shape = (n, n_components)
|
|
70
|
+
Embedding of the nodes.
|
|
71
|
+
embedding_row_ : array, shape = (n_row, n_components)
|
|
72
|
+
Embedding of the rows, for bipartite graphs.
|
|
73
|
+
embedding_col_ : array, shape = (n_col, n_components)
|
|
74
|
+
Embedding of the columns, for bipartite graphs.
|
|
75
|
+
labels_row_ : np.ndarray
|
|
76
|
+
Labels of the rows (used to build the embedding of the columns).
|
|
77
|
+
labels_col_ : np.ndarray
|
|
78
|
+
Labels of the columns (used to build the embedding of the rows).
|
|
79
|
+
|
|
80
|
+
Example
|
|
81
|
+
-------
|
|
82
|
+
>>> from sknetwork.embedding import LouvainEmbedding
|
|
83
|
+
>>> from sknetwork.data import house
|
|
84
|
+
>>> louvain = LouvainEmbedding()
|
|
85
|
+
>>> adjacency = house()
|
|
86
|
+
>>> embedding = louvain.fit_transform(adjacency)
|
|
87
|
+
>>> embedding.shape
|
|
88
|
+
(5, 2)
|
|
89
|
+
"""
|
|
90
|
+
def __init__(self, resolution: float = 1, modularity: str = 'Dugue', tol_optimization: float = 1e-3,
|
|
91
|
+
tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
|
|
92
|
+
random_state: Optional[Union[np.random.RandomState, int]] = None, isolated_nodes: str = 'remove'):
|
|
93
|
+
super(LouvainEmbedding, self).__init__()
|
|
94
|
+
self.resolution = resolution
|
|
95
|
+
self.modularity = modularity.lower()
|
|
96
|
+
self.tol_optimization = tol_optimization
|
|
97
|
+
self.tol_aggregation = tol_aggregation
|
|
98
|
+
self.n_aggregations = n_aggregations
|
|
99
|
+
self.shuffle_nodes = shuffle_nodes
|
|
100
|
+
self.random_state = check_random_state(random_state)
|
|
101
|
+
self.isolated_nodes = isolated_nodes
|
|
102
|
+
|
|
103
|
+
self.labels_ = None
|
|
104
|
+
self.embedding_ = None
|
|
105
|
+
self.embedding_row_ = None
|
|
106
|
+
self.embedding_col_ = None
|
|
107
|
+
|
|
108
|
+
def fit(self, input_matrix: sparse.csr_matrix, force_bipartite: bool = False):
|
|
109
|
+
"""Embedding of graphs from the clustering obtained with Louvain.
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
input_matrix :
|
|
114
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
115
|
+
force_bipartite : bool (default = ``False``)
|
|
116
|
+
If ``True``, force the input matrix to be considered as a biadjacency matrix.
|
|
117
|
+
Returns
|
|
118
|
+
-------
|
|
119
|
+
self: :class:`BiLouvainEmbedding`
|
|
120
|
+
"""
|
|
121
|
+
louvain = Louvain(resolution=self.resolution, modularity=self.modularity,
|
|
122
|
+
tol_optimization=self.tol_optimization, tol_aggregation=self.tol_aggregation,
|
|
123
|
+
n_aggregations=self.n_aggregations, shuffle_nodes=self.shuffle_nodes, sort_clusters=False,
|
|
124
|
+
return_probs=True, return_aggregate=True, random_state=self.random_state)
|
|
125
|
+
louvain.fit(input_matrix, force_bipartite=force_bipartite)
|
|
126
|
+
|
|
127
|
+
# isolated nodes
|
|
128
|
+
if is_square(input_matrix):
|
|
129
|
+
labels = louvain.labels_
|
|
130
|
+
labels_secondary = None
|
|
131
|
+
else:
|
|
132
|
+
labels = louvain.labels_col_
|
|
133
|
+
labels_secondary = louvain.labels_row_
|
|
134
|
+
|
|
135
|
+
self.labels_, labels_row = reindex_labels(labels, labels_secondary, self.isolated_nodes)
|
|
136
|
+
|
|
137
|
+
# embedding
|
|
138
|
+
probs = normalize(input_matrix)
|
|
139
|
+
embedding_ = probs.dot(get_membership(self.labels_))
|
|
140
|
+
self.embedding_ = embedding_.toarray()
|
|
141
|
+
|
|
142
|
+
if labels_row is not None:
|
|
143
|
+
probs = normalize(input_matrix.T)
|
|
144
|
+
embedding_col = probs.dot(get_membership(labels_row))
|
|
145
|
+
self.embedding_row_ = self.embedding_
|
|
146
|
+
self.embedding_col_ = embedding_col.toarray()
|
|
147
|
+
|
|
148
|
+
return self
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# coding: utf-8
|
|
3
|
+
"""
|
|
4
|
+
Created in January 2021
|
|
5
|
+
@author: Thomas Bonald <bonald@enst.fr>
|
|
6
|
+
"""
|
|
7
|
+
from abc import ABC
|
|
8
|
+
from typing import Union
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from scipy import sparse
|
|
12
|
+
|
|
13
|
+
from sknetwork.embedding.base import BaseEmbedding
|
|
14
|
+
from sknetwork.linalg import Regularizer, Normalizer, normalize
|
|
15
|
+
from sknetwork.utils.check import check_format, check_random_state
|
|
16
|
+
from sknetwork.utils.format import get_adjacency
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class RandomProjection(BaseEmbedding, ABC):
|
|
20
|
+
"""Embedding of graphs based the random projection of the adjacency matrix:
|
|
21
|
+
|
|
22
|
+
:math:`(I + \\alpha A +... + (\\alpha A)^K)G`
|
|
23
|
+
|
|
24
|
+
where :math:`A` is the adjacency matrix, :math:`G` is a random Gaussian matrix,
|
|
25
|
+
:math:`\\alpha` is some smoothing factor and :math:`K` some non-negative integer.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
n_components : int (default = 2)
|
|
30
|
+
Dimension of the embedding space.
|
|
31
|
+
alpha : float (default = 0.5)
|
|
32
|
+
Smoothing parameter.
|
|
33
|
+
n_iter : int (default = 3)
|
|
34
|
+
Number of power iterations of the adjacency matrix.
|
|
35
|
+
random_walk : bool (default = ``False``)
|
|
36
|
+
If ``True``, use the transition matrix of the random walk, :math:`P = D^{-1}A`, instead of the adjacency matrix.
|
|
37
|
+
regularization : float (default = ``-1``)
|
|
38
|
+
Regularization factor :math:`\\alpha` so that the matrix is :math:`A + \\alpha \\frac{11^T}{n}`.
|
|
39
|
+
If negative, regularization is applied only if the graph is disconnected (and then equal to the absolute value
|
|
40
|
+
of the parameter).
|
|
41
|
+
normalized : bool (default = ``True``)
|
|
42
|
+
If ``True``, normalize the embedding so that each vector has norm 1 in the embedding space, i.e.,
|
|
43
|
+
each vector lies on the unit sphere.
|
|
44
|
+
random_state : int, optional
|
|
45
|
+
Seed used by the random number generator.
|
|
46
|
+
|
|
47
|
+
Attributes
|
|
48
|
+
----------
|
|
49
|
+
embedding_ : array, shape = (n, n_components)
|
|
50
|
+
Embedding of the nodes.
|
|
51
|
+
embedding_row_ : array, shape = (n_row, n_components)
|
|
52
|
+
Embedding of the rows, for bipartite graphs.
|
|
53
|
+
embedding_col_ : array, shape = (n_col, n_components)
|
|
54
|
+
Embedding of the columns, for bipartite graphs.
|
|
55
|
+
|
|
56
|
+
Example
|
|
57
|
+
-------
|
|
58
|
+
>>> from sknetwork.embedding import RandomProjection
|
|
59
|
+
>>> from sknetwork.data import karate_club
|
|
60
|
+
>>> projection = RandomProjection()
|
|
61
|
+
>>> adjacency = karate_club()
|
|
62
|
+
>>> embedding = projection.fit_transform(adjacency)
|
|
63
|
+
>>> embedding.shape
|
|
64
|
+
(34, 2)
|
|
65
|
+
|
|
66
|
+
References
|
|
67
|
+
----------
|
|
68
|
+
Zhang, Z., Cui, P., Li, H., Wang, X., & Zhu, W. (2018).
|
|
69
|
+
Billion-scale network embedding with iterative random projection, ICDM.
|
|
70
|
+
"""
|
|
71
|
+
def __init__(self, n_components: int = 2, alpha: float = 0.5, n_iter: int = 3, random_walk: bool = False,
|
|
72
|
+
regularization: float = -1, normalized: bool = True, random_state: int = None):
|
|
73
|
+
super(RandomProjection, self).__init__()
|
|
74
|
+
|
|
75
|
+
self.embedding_ = None
|
|
76
|
+
self.n_components = n_components
|
|
77
|
+
self.alpha = alpha
|
|
78
|
+
self.n_iter = n_iter
|
|
79
|
+
self.random_walk = random_walk
|
|
80
|
+
self.regularization = regularization
|
|
81
|
+
self.normalized = normalized
|
|
82
|
+
self.random_state = random_state
|
|
83
|
+
self.bipartite = None
|
|
84
|
+
self.regularized = None
|
|
85
|
+
|
|
86
|
+
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) \
|
|
87
|
+
-> 'RandomProjection':
|
|
88
|
+
"""Compute the graph embedding.
|
|
89
|
+
|
|
90
|
+
Parameters
|
|
91
|
+
----------
|
|
92
|
+
input_matrix : sparse.csr_matrix, np.ndarray
|
|
93
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
94
|
+
force_bipartite : bool (default = ``False``)
|
|
95
|
+
If ``True``, force the input matrix to be considered as a biadjacency matrix.
|
|
96
|
+
Returns
|
|
97
|
+
-------
|
|
98
|
+
self: :class:`RandomProjection`
|
|
99
|
+
"""
|
|
100
|
+
# input
|
|
101
|
+
input_matrix = check_format(input_matrix)
|
|
102
|
+
adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)
|
|
103
|
+
n = adjacency.shape[0]
|
|
104
|
+
|
|
105
|
+
# regularization
|
|
106
|
+
regularization = self._get_regularization(self.regularization, adjacency)
|
|
107
|
+
self.regularized = regularization > 0
|
|
108
|
+
|
|
109
|
+
# multiplier
|
|
110
|
+
if self.random_walk:
|
|
111
|
+
multiplier = Normalizer(adjacency, regularization)
|
|
112
|
+
else:
|
|
113
|
+
multiplier = Regularizer(adjacency, regularization)
|
|
114
|
+
|
|
115
|
+
# random matrix
|
|
116
|
+
random_generator = check_random_state(self.random_state)
|
|
117
|
+
random_matrix = random_generator.normal(size=(n, self.n_components))
|
|
118
|
+
random_matrix, _ = np.linalg.qr(random_matrix)
|
|
119
|
+
|
|
120
|
+
# random projection
|
|
121
|
+
factor = random_matrix
|
|
122
|
+
embedding = factor.copy()
|
|
123
|
+
for t in range(self.n_iter):
|
|
124
|
+
factor = self.alpha * multiplier.dot(factor)
|
|
125
|
+
embedding += factor
|
|
126
|
+
|
|
127
|
+
# normalization
|
|
128
|
+
if self.normalized:
|
|
129
|
+
embedding = normalize(embedding, p=2)
|
|
130
|
+
|
|
131
|
+
# output
|
|
132
|
+
self.embedding_ = embedding
|
|
133
|
+
if self.bipartite:
|
|
134
|
+
self._split_vars(input_matrix.shape)
|
|
135
|
+
return self
|