scikit-network 0.28.3__cp39-cp39-macosx_12_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- scikit_network-0.28.3.dist-info/AUTHORS.rst +41 -0
- scikit_network-0.28.3.dist-info/LICENSE +34 -0
- scikit_network-0.28.3.dist-info/METADATA +457 -0
- scikit_network-0.28.3.dist-info/RECORD +240 -0
- scikit_network-0.28.3.dist-info/WHEEL +5 -0
- scikit_network-0.28.3.dist-info/top_level.txt +1 -0
- sknetwork/__init__.py +21 -0
- sknetwork/classification/__init__.py +8 -0
- sknetwork/classification/base.py +84 -0
- sknetwork/classification/base_rank.py +143 -0
- sknetwork/classification/diffusion.py +134 -0
- sknetwork/classification/knn.py +162 -0
- sknetwork/classification/metrics.py +205 -0
- sknetwork/classification/pagerank.py +66 -0
- sknetwork/classification/propagation.py +152 -0
- sknetwork/classification/tests/__init__.py +1 -0
- sknetwork/classification/tests/test_API.py +35 -0
- sknetwork/classification/tests/test_diffusion.py +37 -0
- sknetwork/classification/tests/test_knn.py +24 -0
- sknetwork/classification/tests/test_metrics.py +53 -0
- sknetwork/classification/tests/test_pagerank.py +20 -0
- sknetwork/classification/tests/test_propagation.py +24 -0
- sknetwork/classification/vote.cpython-39-darwin.so +0 -0
- sknetwork/classification/vote.pyx +58 -0
- sknetwork/clustering/__init__.py +7 -0
- sknetwork/clustering/base.py +102 -0
- sknetwork/clustering/kmeans.py +142 -0
- sknetwork/clustering/louvain.py +255 -0
- sknetwork/clustering/louvain_core.cpython-39-darwin.so +0 -0
- sknetwork/clustering/louvain_core.pyx +134 -0
- sknetwork/clustering/metrics.py +91 -0
- sknetwork/clustering/postprocess.py +66 -0
- sknetwork/clustering/propagation_clustering.py +108 -0
- sknetwork/clustering/tests/__init__.py +1 -0
- sknetwork/clustering/tests/test_API.py +37 -0
- sknetwork/clustering/tests/test_kmeans.py +47 -0
- sknetwork/clustering/tests/test_louvain.py +104 -0
- sknetwork/clustering/tests/test_metrics.py +50 -0
- sknetwork/clustering/tests/test_post_processing.py +23 -0
- sknetwork/clustering/tests/test_postprocess.py +39 -0
- sknetwork/data/__init__.py +5 -0
- sknetwork/data/load.py +408 -0
- sknetwork/data/models.py +459 -0
- sknetwork/data/parse.py +621 -0
- sknetwork/data/test_graphs.py +84 -0
- sknetwork/data/tests/__init__.py +1 -0
- sknetwork/data/tests/test_API.py +30 -0
- sknetwork/data/tests/test_load.py +95 -0
- sknetwork/data/tests/test_models.py +52 -0
- sknetwork/data/tests/test_parse.py +253 -0
- sknetwork/data/tests/test_test_graphs.py +30 -0
- sknetwork/data/tests/test_toy_graphs.py +68 -0
- sknetwork/data/toy_graphs.py +619 -0
- sknetwork/embedding/__init__.py +10 -0
- sknetwork/embedding/base.py +90 -0
- sknetwork/embedding/force_atlas.py +197 -0
- sknetwork/embedding/louvain_embedding.py +174 -0
- sknetwork/embedding/louvain_hierarchy.py +142 -0
- sknetwork/embedding/metrics.py +66 -0
- sknetwork/embedding/random_projection.py +133 -0
- sknetwork/embedding/spectral.py +214 -0
- sknetwork/embedding/spring.py +198 -0
- sknetwork/embedding/svd.py +363 -0
- sknetwork/embedding/tests/__init__.py +1 -0
- sknetwork/embedding/tests/test_API.py +73 -0
- sknetwork/embedding/tests/test_force_atlas.py +35 -0
- sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
- sknetwork/embedding/tests/test_louvain_hierarchy.py +19 -0
- sknetwork/embedding/tests/test_metrics.py +29 -0
- sknetwork/embedding/tests/test_random_projection.py +28 -0
- sknetwork/embedding/tests/test_spectral.py +84 -0
- sknetwork/embedding/tests/test_spring.py +50 -0
- sknetwork/embedding/tests/test_svd.py +37 -0
- sknetwork/flow/__init__.py +3 -0
- sknetwork/flow/flow.py +73 -0
- sknetwork/flow/tests/__init__.py +1 -0
- sknetwork/flow/tests/test_flow.py +17 -0
- sknetwork/flow/tests/test_utils.py +69 -0
- sknetwork/flow/utils.py +91 -0
- sknetwork/gnn/__init__.py +10 -0
- sknetwork/gnn/activation.py +117 -0
- sknetwork/gnn/base.py +155 -0
- sknetwork/gnn/base_activation.py +89 -0
- sknetwork/gnn/base_layer.py +109 -0
- sknetwork/gnn/gnn_classifier.py +381 -0
- sknetwork/gnn/layer.py +153 -0
- sknetwork/gnn/layers.py +127 -0
- sknetwork/gnn/loss.py +180 -0
- sknetwork/gnn/neighbor_sampler.py +65 -0
- sknetwork/gnn/optimizer.py +163 -0
- sknetwork/gnn/tests/__init__.py +1 -0
- sknetwork/gnn/tests/test_activation.py +56 -0
- sknetwork/gnn/tests/test_base.py +79 -0
- sknetwork/gnn/tests/test_base_layer.py +37 -0
- sknetwork/gnn/tests/test_gnn_classifier.py +192 -0
- sknetwork/gnn/tests/test_layers.py +80 -0
- sknetwork/gnn/tests/test_loss.py +33 -0
- sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
- sknetwork/gnn/tests/test_optimizer.py +43 -0
- sknetwork/gnn/tests/test_utils.py +93 -0
- sknetwork/gnn/utils.py +219 -0
- sknetwork/hierarchy/__init__.py +7 -0
- sknetwork/hierarchy/base.py +69 -0
- sknetwork/hierarchy/louvain_hierarchy.py +264 -0
- sknetwork/hierarchy/metrics.py +234 -0
- sknetwork/hierarchy/paris.cpython-39-darwin.so +0 -0
- sknetwork/hierarchy/paris.pyx +317 -0
- sknetwork/hierarchy/postprocess.py +350 -0
- sknetwork/hierarchy/tests/__init__.py +1 -0
- sknetwork/hierarchy/tests/test_API.py +25 -0
- sknetwork/hierarchy/tests/test_algos.py +29 -0
- sknetwork/hierarchy/tests/test_metrics.py +62 -0
- sknetwork/hierarchy/tests/test_postprocess.py +57 -0
- sknetwork/hierarchy/tests/test_ward.py +25 -0
- sknetwork/hierarchy/ward.py +94 -0
- sknetwork/linalg/__init__.py +9 -0
- sknetwork/linalg/basics.py +37 -0
- sknetwork/linalg/diteration.cpython-39-darwin.so +0 -0
- sknetwork/linalg/diteration.pyx +49 -0
- sknetwork/linalg/eig_solver.py +93 -0
- sknetwork/linalg/laplacian.py +15 -0
- sknetwork/linalg/normalization.py +66 -0
- sknetwork/linalg/operators.py +225 -0
- sknetwork/linalg/polynome.py +76 -0
- sknetwork/linalg/ppr_solver.py +170 -0
- sknetwork/linalg/push.cpython-39-darwin.so +0 -0
- sknetwork/linalg/push.pyx +73 -0
- sknetwork/linalg/sparse_lowrank.py +142 -0
- sknetwork/linalg/svd_solver.py +91 -0
- sknetwork/linalg/tests/__init__.py +1 -0
- sknetwork/linalg/tests/test_eig.py +44 -0
- sknetwork/linalg/tests/test_laplacian.py +18 -0
- sknetwork/linalg/tests/test_normalization.py +38 -0
- sknetwork/linalg/tests/test_operators.py +70 -0
- sknetwork/linalg/tests/test_polynome.py +38 -0
- sknetwork/linalg/tests/test_ppr.py +50 -0
- sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
- sknetwork/linalg/tests/test_svd.py +38 -0
- sknetwork/linkpred/__init__.py +4 -0
- sknetwork/linkpred/base.py +80 -0
- sknetwork/linkpred/first_order.py +508 -0
- sknetwork/linkpred/first_order_core.cpython-39-darwin.so +0 -0
- sknetwork/linkpred/first_order_core.pyx +315 -0
- sknetwork/linkpred/postprocessing.py +98 -0
- sknetwork/linkpred/tests/__init__.py +1 -0
- sknetwork/linkpred/tests/test_API.py +49 -0
- sknetwork/linkpred/tests/test_postprocessing.py +21 -0
- sknetwork/path/__init__.py +4 -0
- sknetwork/path/metrics.py +148 -0
- sknetwork/path/search.py +65 -0
- sknetwork/path/shortest_path.py +186 -0
- sknetwork/path/tests/__init__.py +1 -0
- sknetwork/path/tests/test_metrics.py +29 -0
- sknetwork/path/tests/test_search.py +25 -0
- sknetwork/path/tests/test_shortest_path.py +45 -0
- sknetwork/ranking/__init__.py +9 -0
- sknetwork/ranking/base.py +56 -0
- sknetwork/ranking/betweenness.cpython-39-darwin.so +0 -0
- sknetwork/ranking/betweenness.pyx +99 -0
- sknetwork/ranking/closeness.py +95 -0
- sknetwork/ranking/harmonic.py +82 -0
- sknetwork/ranking/hits.py +94 -0
- sknetwork/ranking/katz.py +81 -0
- sknetwork/ranking/pagerank.py +107 -0
- sknetwork/ranking/postprocess.py +25 -0
- sknetwork/ranking/tests/__init__.py +1 -0
- sknetwork/ranking/tests/test_API.py +34 -0
- sknetwork/ranking/tests/test_betweenness.py +38 -0
- sknetwork/ranking/tests/test_closeness.py +34 -0
- sknetwork/ranking/tests/test_hits.py +20 -0
- sknetwork/ranking/tests/test_pagerank.py +69 -0
- sknetwork/regression/__init__.py +4 -0
- sknetwork/regression/base.py +56 -0
- sknetwork/regression/diffusion.py +190 -0
- sknetwork/regression/tests/__init__.py +1 -0
- sknetwork/regression/tests/test_API.py +34 -0
- sknetwork/regression/tests/test_diffusion.py +48 -0
- sknetwork/sknetwork.py +3 -0
- sknetwork/topology/__init__.py +9 -0
- sknetwork/topology/dag.py +74 -0
- sknetwork/topology/dag_core.cpython-39-darwin.so +0 -0
- sknetwork/topology/dag_core.pyx +38 -0
- sknetwork/topology/kcliques.cpython-39-darwin.so +0 -0
- sknetwork/topology/kcliques.pyx +193 -0
- sknetwork/topology/kcore.cpython-39-darwin.so +0 -0
- sknetwork/topology/kcore.pyx +120 -0
- sknetwork/topology/structure.py +234 -0
- sknetwork/topology/tests/__init__.py +1 -0
- sknetwork/topology/tests/test_cliques.py +28 -0
- sknetwork/topology/tests/test_cores.py +21 -0
- sknetwork/topology/tests/test_dag.py +26 -0
- sknetwork/topology/tests/test_structure.py +99 -0
- sknetwork/topology/tests/test_triangles.py +42 -0
- sknetwork/topology/tests/test_wl_coloring.py +49 -0
- sknetwork/topology/tests/test_wl_kernel.py +31 -0
- sknetwork/topology/triangles.cpython-39-darwin.so +0 -0
- sknetwork/topology/triangles.pyx +166 -0
- sknetwork/topology/weisfeiler_lehman.py +163 -0
- sknetwork/topology/weisfeiler_lehman_core.cpython-39-darwin.so +0 -0
- sknetwork/topology/weisfeiler_lehman_core.pyx +116 -0
- sknetwork/utils/__init__.py +40 -0
- sknetwork/utils/base.py +35 -0
- sknetwork/utils/check.py +354 -0
- sknetwork/utils/co_neighbor.py +71 -0
- sknetwork/utils/format.py +219 -0
- sknetwork/utils/kmeans.py +89 -0
- sknetwork/utils/knn.py +166 -0
- sknetwork/utils/knn1d.cpython-39-darwin.so +0 -0
- sknetwork/utils/knn1d.pyx +80 -0
- sknetwork/utils/membership.py +82 -0
- sknetwork/utils/minheap.cpython-39-darwin.so +0 -0
- sknetwork/utils/minheap.pxd +22 -0
- sknetwork/utils/minheap.pyx +111 -0
- sknetwork/utils/neighbors.py +115 -0
- sknetwork/utils/seeds.py +75 -0
- sknetwork/utils/simplex.py +140 -0
- sknetwork/utils/tests/__init__.py +1 -0
- sknetwork/utils/tests/test_base.py +28 -0
- sknetwork/utils/tests/test_bunch.py +16 -0
- sknetwork/utils/tests/test_check.py +190 -0
- sknetwork/utils/tests/test_co_neighbor.py +43 -0
- sknetwork/utils/tests/test_format.py +61 -0
- sknetwork/utils/tests/test_kmeans.py +21 -0
- sknetwork/utils/tests/test_knn.py +32 -0
- sknetwork/utils/tests/test_membership.py +24 -0
- sknetwork/utils/tests/test_neighbors.py +41 -0
- sknetwork/utils/tests/test_projection_simplex.py +33 -0
- sknetwork/utils/tests/test_seeds.py +67 -0
- sknetwork/utils/tests/test_verbose.py +15 -0
- sknetwork/utils/tests/test_ward.py +20 -0
- sknetwork/utils/timeout.py +38 -0
- sknetwork/utils/verbose.py +37 -0
- sknetwork/utils/ward.py +60 -0
- sknetwork/visualization/__init__.py +4 -0
- sknetwork/visualization/colors.py +34 -0
- sknetwork/visualization/dendrograms.py +229 -0
- sknetwork/visualization/graphs.py +819 -0
- sknetwork/visualization/tests/__init__.py +1 -0
- sknetwork/visualization/tests/test_dendrograms.py +53 -0
- sknetwork/visualization/tests/test_graphs.py +167 -0
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created on Nov, 2019
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
"""
|
|
7
|
+
from abc import ABC
|
|
8
|
+
from typing import Union
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from scipy import sparse
|
|
12
|
+
|
|
13
|
+
from sknetwork.topology.structure import is_connected
|
|
14
|
+
from sknetwork.utils.base import Algorithm
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class BaseEmbedding(Algorithm, ABC):
|
|
18
|
+
"""Base class for embedding algorithms.
|
|
19
|
+
|
|
20
|
+
Attributes
|
|
21
|
+
----------
|
|
22
|
+
embedding_ : array, shape = (n, n_components)
|
|
23
|
+
Embedding of the nodes.
|
|
24
|
+
embedding_row_ : array, shape = (n_row, n_components)
|
|
25
|
+
Embedding of the rows, for bipartite graphs.
|
|
26
|
+
embedding_col_ : array, shape = (n_col, n_components)
|
|
27
|
+
Embedding of the columns, for bipartite graphs.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self):
|
|
31
|
+
self._init_vars()
|
|
32
|
+
|
|
33
|
+
def fit_transform(self, *args, **kwargs) -> np.ndarray:
|
|
34
|
+
"""Fit to data and return the embedding. Same parameters as the ``fit`` method.
|
|
35
|
+
|
|
36
|
+
Returns
|
|
37
|
+
-------
|
|
38
|
+
embedding : np.ndarray
|
|
39
|
+
Embedding.
|
|
40
|
+
"""
|
|
41
|
+
self.fit(*args, **kwargs)
|
|
42
|
+
return self.embedding_
|
|
43
|
+
|
|
44
|
+
def predict(self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray:
|
|
45
|
+
"""Predict the embedding of new nodes.
|
|
46
|
+
|
|
47
|
+
Each new node is defined by its adjacency row vector.
|
|
48
|
+
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
adjacency_vectors :
|
|
52
|
+
Adjacency vectors of nodes.
|
|
53
|
+
Array of shape (n_col,) (single vector) or (n_vectors, n_col)
|
|
54
|
+
|
|
55
|
+
Returns
|
|
56
|
+
-------
|
|
57
|
+
embedding_vectors : np.ndarray
|
|
58
|
+
Embedding of the nodes.
|
|
59
|
+
"""
|
|
60
|
+
raise NotImplementedError
|
|
61
|
+
|
|
62
|
+
def _check_fitted(self):
|
|
63
|
+
if self.embedding_ is None:
|
|
64
|
+
raise ValueError("This embedding instance is not fitted yet."
|
|
65
|
+
" Call 'fit' with appropriate arguments before using this method.")
|
|
66
|
+
else:
|
|
67
|
+
return self
|
|
68
|
+
|
|
69
|
+
@staticmethod
|
|
70
|
+
def _get_regularization(regularization: float, adjacency: sparse.csr_matrix) -> float:
|
|
71
|
+
"""Set proper regularization depending on graph connectivity."""
|
|
72
|
+
if regularization < 0:
|
|
73
|
+
if is_connected(adjacency, connection='strong'):
|
|
74
|
+
regularization = 0
|
|
75
|
+
else:
|
|
76
|
+
regularization = np.abs(regularization)
|
|
77
|
+
return regularization
|
|
78
|
+
|
|
79
|
+
def _init_vars(self):
|
|
80
|
+
self.embedding_ = None
|
|
81
|
+
self.embedding_row_ = None
|
|
82
|
+
self.embedding_col_ = None
|
|
83
|
+
|
|
84
|
+
def _split_vars(self, shape):
|
|
85
|
+
"""Split labels_ into labels_row_ and labels_col_"""
|
|
86
|
+
n_row = shape[0]
|
|
87
|
+
self.embedding_row_ = self.embedding_[:n_row]
|
|
88
|
+
self.embedding_col_ = self.embedding_[n_row:]
|
|
89
|
+
self.embedding_ = self.embedding_row_
|
|
90
|
+
return self
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# coding: utf-8
|
|
3
|
+
"""
|
|
4
|
+
Created on Jun 2020
|
|
5
|
+
@author: Victor Manach <victor.manach@telecom-paris.fr>
|
|
6
|
+
@author: Rémi Jaylet <remi.jaylet@telecom-paris.fr>
|
|
7
|
+
"""
|
|
8
|
+
from typing import Optional, Union
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from scipy import sparse
|
|
12
|
+
from scipy.spatial import cKDTree
|
|
13
|
+
|
|
14
|
+
from sknetwork.embedding.base import BaseEmbedding
|
|
15
|
+
from sknetwork.utils.check import check_format, is_symmetric, check_square
|
|
16
|
+
from sknetwork.utils.format import directed2undirected
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ForceAtlas(BaseEmbedding):
|
|
20
|
+
"""Force Atlas layout for displaying graphs.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
n_components : int
|
|
25
|
+
Dimension of the graph layout.
|
|
26
|
+
n_iter : int
|
|
27
|
+
Number of iterations to update positions.
|
|
28
|
+
If ``None``, use the value of self.n_iter.
|
|
29
|
+
approx_radius : float
|
|
30
|
+
If a positive value is provided, only the nodes within this distance a given node are used to compute
|
|
31
|
+
the repulsive force.
|
|
32
|
+
lin_log : bool
|
|
33
|
+
If ``True``, use lin-log mode.
|
|
34
|
+
gravity_factor : float
|
|
35
|
+
Gravity force scaling constant.
|
|
36
|
+
repulsive_factor : float
|
|
37
|
+
Repulsive force scaling constant.
|
|
38
|
+
tolerance : float
|
|
39
|
+
Tolerance defined in the swinging constant.
|
|
40
|
+
speed : float
|
|
41
|
+
Speed constant.
|
|
42
|
+
speed_max : float
|
|
43
|
+
Constant used to impose constrain on speed.
|
|
44
|
+
|
|
45
|
+
Attributes
|
|
46
|
+
----------
|
|
47
|
+
embedding_ : np.ndarray
|
|
48
|
+
Layout in given dimension.
|
|
49
|
+
|
|
50
|
+
Example
|
|
51
|
+
-------
|
|
52
|
+
>>> from sknetwork.embedding.force_atlas import ForceAtlas
|
|
53
|
+
>>> from sknetwork.data import karate_club
|
|
54
|
+
>>> force_atlas = ForceAtlas()
|
|
55
|
+
>>> adjacency = karate_club()
|
|
56
|
+
>>> embedding = force_atlas.fit_transform(adjacency)
|
|
57
|
+
>>> embedding.shape
|
|
58
|
+
(34, 2)
|
|
59
|
+
|
|
60
|
+
References
|
|
61
|
+
----------
|
|
62
|
+
Jacomy M., Venturini T., Heymann S., Bastian M. (2014).
|
|
63
|
+
`ForceAtlas2, a Continuous Graph Layout Algorithm for Handy Network Visualization Designed for the Gephi Software.
|
|
64
|
+
<https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0098679>`_
|
|
65
|
+
Plos One.
|
|
66
|
+
"""
|
|
67
|
+
def __init__(self, n_components: int = 2, n_iter: int = 50, approx_radius: float = -1, lin_log: bool = False,
|
|
68
|
+
gravity_factor: float = 0.01, repulsive_factor: float = 0.1, tolerance: float = 0.1,
|
|
69
|
+
speed: float = 0.1, speed_max: float = 10):
|
|
70
|
+
super(ForceAtlas, self).__init__()
|
|
71
|
+
self.n_components = n_components
|
|
72
|
+
self.n_iter = n_iter
|
|
73
|
+
self.approx_radius = approx_radius
|
|
74
|
+
self.lin_log = lin_log
|
|
75
|
+
self.gravity_factor = gravity_factor
|
|
76
|
+
self.repulsive_factor = repulsive_factor
|
|
77
|
+
self.tolerance = tolerance
|
|
78
|
+
self.speed = speed
|
|
79
|
+
self.speed_max = speed_max
|
|
80
|
+
|
|
81
|
+
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], pos_init: Optional[np.ndarray] = None,
|
|
82
|
+
n_iter: Optional[int] = None) -> 'ForceAtlas':
|
|
83
|
+
"""Compute layout.
|
|
84
|
+
|
|
85
|
+
Parameters
|
|
86
|
+
----------
|
|
87
|
+
adjacency :
|
|
88
|
+
Adjacency matrix of the graph, treated as undirected.
|
|
89
|
+
pos_init :
|
|
90
|
+
Position to start with. Random if not provided.
|
|
91
|
+
n_iter : int
|
|
92
|
+
Number of iterations to update positions.
|
|
93
|
+
If ``None``, use the value of self.n_iter.
|
|
94
|
+
|
|
95
|
+
Returns
|
|
96
|
+
-------
|
|
97
|
+
self: :class:`ForceAtlas`
|
|
98
|
+
"""
|
|
99
|
+
# verify the format of the adjacency matrix
|
|
100
|
+
adjacency = check_format(adjacency)
|
|
101
|
+
check_square(adjacency)
|
|
102
|
+
if not is_symmetric(adjacency):
|
|
103
|
+
adjacency = directed2undirected(adjacency)
|
|
104
|
+
n = adjacency.shape[0]
|
|
105
|
+
|
|
106
|
+
# setting of the tolerance according to the size of the graph
|
|
107
|
+
if n < 5000:
|
|
108
|
+
tolerance = 0.1
|
|
109
|
+
elif 5000 <= n < 50000: # pragma: no cover
|
|
110
|
+
tolerance = 1
|
|
111
|
+
else: # pragma: no cover
|
|
112
|
+
tolerance = 10
|
|
113
|
+
|
|
114
|
+
if n_iter is None:
|
|
115
|
+
n_iter = self.n_iter
|
|
116
|
+
|
|
117
|
+
# initial position of the nodes of the graph
|
|
118
|
+
if pos_init is None:
|
|
119
|
+
position: np.ndarray = np.random.randn(n, self.n_components)
|
|
120
|
+
else:
|
|
121
|
+
if pos_init.shape != (n, self.n_components):
|
|
122
|
+
raise ValueError('The initial position does not have valid dimensions.')
|
|
123
|
+
else:
|
|
124
|
+
position = pos_init
|
|
125
|
+
# compute the vector with the degree of each node
|
|
126
|
+
degree: np.ndarray = adjacency.dot(np.ones(adjacency.shape[1])) + 1
|
|
127
|
+
|
|
128
|
+
# initialization of variation of position of nodes
|
|
129
|
+
resultants = np.zeros(n)
|
|
130
|
+
delta: np.ndarray = np.zeros((n, self.n_components))
|
|
131
|
+
swing_vector: np.ndarray = np.zeros(n)
|
|
132
|
+
global_speed = 1
|
|
133
|
+
|
|
134
|
+
for iteration in range(n_iter):
|
|
135
|
+
delta *= 0
|
|
136
|
+
global_swing = 0
|
|
137
|
+
global_traction = 0
|
|
138
|
+
|
|
139
|
+
if self.approx_radius > 0:
|
|
140
|
+
tree = cKDTree(position)
|
|
141
|
+
else:
|
|
142
|
+
tree = None
|
|
143
|
+
|
|
144
|
+
for i in range(n):
|
|
145
|
+
|
|
146
|
+
# attraction
|
|
147
|
+
indices = adjacency.indices[adjacency.indptr[i]:adjacency.indptr[i + 1]]
|
|
148
|
+
attraction = position[i] - position[indices]
|
|
149
|
+
|
|
150
|
+
if self.lin_log:
|
|
151
|
+
attraction = np.sign(attraction) * np.log(1 + np.abs(10 * attraction))
|
|
152
|
+
attraction = attraction.sum(axis=0)
|
|
153
|
+
|
|
154
|
+
# repulsion
|
|
155
|
+
if tree is None:
|
|
156
|
+
neighbors = np.arange(n)
|
|
157
|
+
else:
|
|
158
|
+
neighbors = tree.query_ball_point(position[i], self.approx_radius)
|
|
159
|
+
|
|
160
|
+
grad: np.ndarray = (position[i] - position[neighbors]) # shape (n_neigh, n_components)
|
|
161
|
+
distance: np.ndarray = np.linalg.norm(grad, axis=1) # shape (n_neigh,)
|
|
162
|
+
distance = np.where(distance < 0.01, 0.01, distance)
|
|
163
|
+
repulsion = grad * (degree[neighbors] / distance)[:, np.newaxis]
|
|
164
|
+
|
|
165
|
+
repulsion *= self.repulsive_factor * degree[i]
|
|
166
|
+
repulsion = repulsion.sum(axis=0)
|
|
167
|
+
|
|
168
|
+
# gravity
|
|
169
|
+
gravity = self.gravity_factor * degree[i] * grad
|
|
170
|
+
gravity = gravity.sum(axis=0)
|
|
171
|
+
|
|
172
|
+
# forces resultant applied on node i for traction, swing and speed computation
|
|
173
|
+
force = repulsion - attraction - gravity
|
|
174
|
+
resultant_new: float = np.linalg.norm(force)
|
|
175
|
+
resultant_old: float = resultants[i]
|
|
176
|
+
|
|
177
|
+
swing_node: float = np.abs(resultant_new - resultant_old) # force variation applied on node i
|
|
178
|
+
swing_vector[i] = swing_node
|
|
179
|
+
global_swing += (degree[i] + 1) * swing_node
|
|
180
|
+
|
|
181
|
+
traction: float = np.abs(resultant_new + resultant_old) / 2 # traction force applied on node i
|
|
182
|
+
global_traction += (degree[i] + 1) * traction
|
|
183
|
+
|
|
184
|
+
node_speed = self.speed * global_speed / (1 + global_speed * np.sqrt(swing_node))
|
|
185
|
+
if node_speed > self.speed_max / resultant_new: # pragma: no cover
|
|
186
|
+
node_speed = self.speed_max / resultant_new
|
|
187
|
+
|
|
188
|
+
delta[i]: np.ndarray = node_speed * force
|
|
189
|
+
resultants[i] = resultant_new
|
|
190
|
+
global_speed = tolerance * global_traction / global_swing
|
|
191
|
+
|
|
192
|
+
position += delta # calculating displacement and final position of points after iteration
|
|
193
|
+
if (swing_vector < 1).all():
|
|
194
|
+
break # if the swing of all nodes is zero, then convergence is reached and we break.
|
|
195
|
+
|
|
196
|
+
self.embedding_ = position
|
|
197
|
+
return self
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# coding: utf-8
|
|
3
|
+
"""
|
|
4
|
+
Created in September 2020
|
|
5
|
+
@author: Quentin Lutz <qlutz@enst.fr>
|
|
6
|
+
@author: Thomas Bonald <bonald@enst.fr>
|
|
7
|
+
"""
|
|
8
|
+
from typing import Optional, Union
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from scipy import sparse
|
|
12
|
+
|
|
13
|
+
from sknetwork.clustering.louvain import Louvain
|
|
14
|
+
from sknetwork.embedding.base import BaseEmbedding
|
|
15
|
+
from sknetwork.linalg.normalization import normalize
|
|
16
|
+
from sknetwork.utils.check import check_random_state, check_adjacency_vector, check_nonnegative, is_square
|
|
17
|
+
from sknetwork.utils.membership import get_membership
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def reindex_labels(labels: np.ndarray, labels_secondary: Optional[np.ndarray] = None, which: str = 'remove'):
|
|
21
|
+
"""Reindex labels, removing or merging labels of count 1."""
|
|
22
|
+
labels_unique, counts = np.unique(labels, return_counts=True)
|
|
23
|
+
n_labels = max(labels_unique) + 1
|
|
24
|
+
labels_keep = labels_unique[counts > 1]
|
|
25
|
+
if which == 'remove':
|
|
26
|
+
label_index = -np.ones(n_labels, dtype='int')
|
|
27
|
+
label_index[labels_keep] = np.arange(len(labels_keep))
|
|
28
|
+
elif which == 'merge':
|
|
29
|
+
label_index = len(labels_keep) * np.ones(n_labels, dtype='int')
|
|
30
|
+
label_index[labels_keep] = np.arange(len(labels_keep))
|
|
31
|
+
else:
|
|
32
|
+
label_index = np.arange(n_labels)
|
|
33
|
+
labels = label_index[labels]
|
|
34
|
+
if labels_secondary is not None:
|
|
35
|
+
labels_unique = np.unique(labels_secondary)
|
|
36
|
+
n_labels = max(labels_unique) + 1
|
|
37
|
+
label_index = -np.ones(n_labels, dtype='int')
|
|
38
|
+
label_index[labels_keep] = np.arange(len(labels_keep))
|
|
39
|
+
labels_secondary = label_index[labels_secondary]
|
|
40
|
+
return labels, labels_secondary
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class LouvainEmbedding(BaseEmbedding):
|
|
44
|
+
"""Embedding of graphs induced by Louvain clustering. Each component of the embedding corresponds
|
|
45
|
+
to a cluster obtained by Louvain.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
resolution : float
|
|
50
|
+
Resolution parameter.
|
|
51
|
+
modularity : str
|
|
52
|
+
Which objective function to maximize. Can be ``'Dugue'``, ``'Newman'`` or ``'Potts'``.
|
|
53
|
+
tol_optimization :
|
|
54
|
+
Minimum increase in the objective function to enter a new optimization pass.
|
|
55
|
+
tol_aggregation :
|
|
56
|
+
Minimum increase in the objective function to enter a new aggregation pass.
|
|
57
|
+
n_aggregations :
|
|
58
|
+
Maximum number of aggregations.
|
|
59
|
+
A negative value is interpreted as no limit.
|
|
60
|
+
shuffle_nodes :
|
|
61
|
+
Enables node shuffling before optimization.
|
|
62
|
+
random_state :
|
|
63
|
+
Random number generator or random seed. If ``None``, numpy.random is used.
|
|
64
|
+
isolated_nodes : str
|
|
65
|
+
What to do with isolated column nodes. Can be ``'remove'`` (default), ``'merge'`` or ``'keep'``.
|
|
66
|
+
|
|
67
|
+
Attributes
|
|
68
|
+
----------
|
|
69
|
+
embedding_ : array, shape = (n, n_components)
|
|
70
|
+
Embedding of the nodes.
|
|
71
|
+
embedding_row_ : array, shape = (n_row, n_components)
|
|
72
|
+
Embedding of the rows, for bipartite graphs.
|
|
73
|
+
embedding_col_ : array, shape = (n_col, n_components)
|
|
74
|
+
Embedding of the columns, for bipartite graphs.
|
|
75
|
+
labels_row_ : np.ndarray
|
|
76
|
+
Labels of the rows (used to build the embedding of the columns).
|
|
77
|
+
labels_col_ : np.ndarray
|
|
78
|
+
Labels of the columns (used to build the embedding of the rows).
|
|
79
|
+
|
|
80
|
+
Example
|
|
81
|
+
-------
|
|
82
|
+
>>> from sknetwork.embedding import LouvainEmbedding
|
|
83
|
+
>>> from sknetwork.data import house
|
|
84
|
+
>>> louvain = LouvainEmbedding()
|
|
85
|
+
>>> adjacency = house()
|
|
86
|
+
>>> embedding = louvain.fit_transform(adjacency)
|
|
87
|
+
>>> embedding.shape
|
|
88
|
+
(5, 2)
|
|
89
|
+
"""
|
|
90
|
+
def __init__(self, resolution: float = 1, modularity: str = 'Dugue', tol_optimization: float = 1e-3,
|
|
91
|
+
tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
|
|
92
|
+
random_state: Optional[Union[np.random.RandomState, int]] = None, isolated_nodes: str = 'remove'):
|
|
93
|
+
super(LouvainEmbedding, self).__init__()
|
|
94
|
+
self.resolution = resolution
|
|
95
|
+
self.modularity = modularity.lower()
|
|
96
|
+
self.tol_optimization = tol_optimization
|
|
97
|
+
self.tol_aggregation = tol_aggregation
|
|
98
|
+
self.n_aggregations = n_aggregations
|
|
99
|
+
self.shuffle_nodes = shuffle_nodes
|
|
100
|
+
self.random_state = check_random_state(random_state)
|
|
101
|
+
self.isolated_nodes = isolated_nodes
|
|
102
|
+
|
|
103
|
+
self.labels_ = None
|
|
104
|
+
self.embedding_ = None
|
|
105
|
+
self.embedding_row_ = None
|
|
106
|
+
self.embedding_col_ = None
|
|
107
|
+
|
|
108
|
+
def fit(self, input_matrix: sparse.csr_matrix, force_bipartite: bool = False):
|
|
109
|
+
"""Embedding of graphs from the clustering obtained with Louvain.
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
input_matrix :
|
|
114
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
115
|
+
force_bipartite : bool (default = ``False``)
|
|
116
|
+
If ``True``, force the input matrix to be considered as a biadjacency matrix.
|
|
117
|
+
Returns
|
|
118
|
+
-------
|
|
119
|
+
self: :class:`BiLouvainEmbedding`
|
|
120
|
+
"""
|
|
121
|
+
louvain = Louvain(resolution=self.resolution, modularity=self.modularity,
|
|
122
|
+
tol_optimization=self.tol_optimization, tol_aggregation=self.tol_aggregation,
|
|
123
|
+
n_aggregations=self.n_aggregations, shuffle_nodes=self.shuffle_nodes, sort_clusters=False,
|
|
124
|
+
return_membership=True, return_aggregate=True, random_state=self.random_state)
|
|
125
|
+
louvain.fit(input_matrix, force_bipartite=force_bipartite)
|
|
126
|
+
|
|
127
|
+
# isolated nodes
|
|
128
|
+
if is_square(input_matrix):
|
|
129
|
+
labels = louvain.labels_
|
|
130
|
+
labels_secondary = None
|
|
131
|
+
else:
|
|
132
|
+
labels = louvain.labels_col_
|
|
133
|
+
labels_secondary = louvain.labels_row_
|
|
134
|
+
|
|
135
|
+
self.labels_, labels_row = reindex_labels(labels, labels_secondary, self.isolated_nodes)
|
|
136
|
+
|
|
137
|
+
# embedding
|
|
138
|
+
probs = normalize(input_matrix)
|
|
139
|
+
embedding_ = probs.dot(get_membership(self.labels_))
|
|
140
|
+
self.embedding_ = embedding_.toarray()
|
|
141
|
+
|
|
142
|
+
if labels_row is not None:
|
|
143
|
+
probs = normalize(input_matrix.T)
|
|
144
|
+
embedding_col = probs.dot(get_membership(labels_row))
|
|
145
|
+
self.embedding_row_ = self.embedding_
|
|
146
|
+
self.embedding_col_ = embedding_col.toarray()
|
|
147
|
+
|
|
148
|
+
return self
|
|
149
|
+
|
|
150
|
+
def predict(self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray:
|
|
151
|
+
"""Predict the embedding of new rows, defined by their adjacency vectors.
|
|
152
|
+
|
|
153
|
+
Parameters
|
|
154
|
+
----------
|
|
155
|
+
adjacency_vectors :
|
|
156
|
+
Adjacency row vectors.
|
|
157
|
+
Array of shape (n_col,) (single vector) or (n_vectors, n_col)
|
|
158
|
+
|
|
159
|
+
Returns
|
|
160
|
+
-------
|
|
161
|
+
embedding_vectors : np.ndarray
|
|
162
|
+
Embedding of the nodes.
|
|
163
|
+
"""
|
|
164
|
+
self._check_fitted()
|
|
165
|
+
if self.embedding_col_ is not None:
|
|
166
|
+
n = len(self.embedding_col_)
|
|
167
|
+
else:
|
|
168
|
+
n = len(self.embedding_)
|
|
169
|
+
|
|
170
|
+
adjacency_vectors = check_adjacency_vector(adjacency_vectors, n)
|
|
171
|
+
check_nonnegative(adjacency_vectors)
|
|
172
|
+
membership = get_membership(self.labels_)
|
|
173
|
+
|
|
174
|
+
return normalize(adjacency_vectors).dot(membership)
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# coding: utf-8
|
|
3
|
+
"""
|
|
4
|
+
Created on Dec 2020
|
|
5
|
+
@author: Quentin Lutz <qlutz@enst.fr>
|
|
6
|
+
"""
|
|
7
|
+
from typing import Optional, Union
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
from scipy import sparse
|
|
11
|
+
|
|
12
|
+
from sknetwork.utils.check import check_format, check_random_state
|
|
13
|
+
from sknetwork.utils.format import get_adjacency
|
|
14
|
+
from sknetwork.clustering.louvain import Louvain
|
|
15
|
+
from sknetwork.embedding.base import BaseEmbedding
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class LouvainNE(BaseEmbedding):
|
|
19
|
+
"""Embedding of graphs based on the hierarchical Louvain algorithm with random scattering per level.
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
n_components : int
|
|
24
|
+
Dimension of the embedding.
|
|
25
|
+
scale : float
|
|
26
|
+
Dilution factor to be applied on the random vector to be added at each iteration of the clustering method.
|
|
27
|
+
resolution :
|
|
28
|
+
Resolution parameter.
|
|
29
|
+
tol_optimization :
|
|
30
|
+
Minimum increase in the objective function to enter a new optimization pass.
|
|
31
|
+
tol_aggregation :
|
|
32
|
+
Minimum increase in the objective function to enter a new aggregation pass.
|
|
33
|
+
n_aggregations :
|
|
34
|
+
Maximum number of aggregations.
|
|
35
|
+
A negative value is interpreted as no limit.
|
|
36
|
+
shuffle_nodes :
|
|
37
|
+
Enables node shuffling before optimization.
|
|
38
|
+
random_state :
|
|
39
|
+
Random number generator or random seed. If None, numpy.random is used.
|
|
40
|
+
|
|
41
|
+
Attributes
|
|
42
|
+
----------
|
|
43
|
+
embedding_ : array, shape = (n, n_components)
|
|
44
|
+
Embedding of the nodes.
|
|
45
|
+
embedding_row_ : array, shape = (n_row, n_components)
|
|
46
|
+
Embedding of the rows, for bipartite graphs.
|
|
47
|
+
embedding_col_ : array, shape = (n_col, n_components)
|
|
48
|
+
Embedding of the columns, for bipartite graphs.
|
|
49
|
+
Example
|
|
50
|
+
-------
|
|
51
|
+
>>> from sknetwork.embedding import LouvainNE
|
|
52
|
+
>>> from sknetwork.data import karate_club
|
|
53
|
+
>>> louvain = LouvainNE(n_components=3)
|
|
54
|
+
>>> adjacency = karate_club()
|
|
55
|
+
>>> embedding = louvain.fit_transform(adjacency)
|
|
56
|
+
>>> embedding.shape
|
|
57
|
+
(34, 3)
|
|
58
|
+
|
|
59
|
+
References
|
|
60
|
+
----------
|
|
61
|
+
Bhowmick, A. K., Meneni, K., Danisch, M., Guillaume, J. L., & Mitra, B. (2020, January).
|
|
62
|
+
`LouvainNE: Hierarchical Louvain Method for High Quality and Scalable Network Embedding.
|
|
63
|
+
<https://hal.archives-ouvertes.fr/hal-02999888/document>`_
|
|
64
|
+
In Proceedings of the 13th International Conference on Web Search and Data Mining (pp. 43-51).
|
|
65
|
+
"""
|
|
66
|
+
def __init__(self, n_components: int = 2, scale: float = .1, resolution: float = 1, tol_optimization: float = 1e-3,
|
|
67
|
+
tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
|
|
68
|
+
random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False):
|
|
69
|
+
super(LouvainNE, self).__init__()
|
|
70
|
+
|
|
71
|
+
self.n_components = n_components
|
|
72
|
+
self.scale = scale
|
|
73
|
+
self._clustering_method = Louvain(resolution=resolution, tol_optimization=tol_optimization,
|
|
74
|
+
tol_aggregation=tol_aggregation, n_aggregations=n_aggregations,
|
|
75
|
+
shuffle_nodes=shuffle_nodes, random_state=random_state, verbose=verbose)
|
|
76
|
+
self.random_state = check_random_state(random_state)
|
|
77
|
+
self.bipartite = None
|
|
78
|
+
|
|
79
|
+
def _recursive_louvain(self, adjacency: Union[sparse.csr_matrix, np.ndarray], depth: int,
|
|
80
|
+
nodes: Optional[np.ndarray] = None):
|
|
81
|
+
"""Recursive function for fit, modifies the embedding in place.
|
|
82
|
+
|
|
83
|
+
Parameters
|
|
84
|
+
----------
|
|
85
|
+
adjacency :
|
|
86
|
+
Adjacency matrix of the graph.
|
|
87
|
+
depth :
|
|
88
|
+
Depth of the recursion.
|
|
89
|
+
nodes :
|
|
90
|
+
The indices of the current nodes in the original graph.
|
|
91
|
+
"""
|
|
92
|
+
n = adjacency.shape[0]
|
|
93
|
+
if nodes is None:
|
|
94
|
+
nodes = np.arange(n)
|
|
95
|
+
|
|
96
|
+
if adjacency.nnz:
|
|
97
|
+
labels = self._clustering_method.fit_transform(adjacency)
|
|
98
|
+
else:
|
|
99
|
+
labels = np.zeros(n)
|
|
100
|
+
|
|
101
|
+
clusters = np.unique(labels)
|
|
102
|
+
|
|
103
|
+
if len(clusters) != 1:
|
|
104
|
+
random_vectors = (self.scale ** depth) * self.random_state.rand(self.n_components, len(clusters))
|
|
105
|
+
for index, cluster in enumerate(clusters):
|
|
106
|
+
mask = (labels == cluster)
|
|
107
|
+
nodes_cluster = nodes[mask]
|
|
108
|
+
self.embedding_[nodes_cluster, :] += random_vectors[:, index]
|
|
109
|
+
n_row = len(mask)
|
|
110
|
+
indptr = np.zeros(n_row + 1, dtype=int)
|
|
111
|
+
indptr[1:] = np.cumsum(mask)
|
|
112
|
+
n_col = indptr[-1]
|
|
113
|
+
combiner = sparse.csr_matrix((np.ones(n_col), np.arange(n_col, dtype=int), indptr),
|
|
114
|
+
shape=(n_row, n_col))
|
|
115
|
+
adjacency_cluster = adjacency[mask, :].dot(combiner)
|
|
116
|
+
self._recursive_louvain(adjacency_cluster, depth + 1, nodes_cluster)
|
|
117
|
+
|
|
118
|
+
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False):
|
|
119
|
+
"""Embedding of graphs from a clustering obtained with Louvain.
|
|
120
|
+
|
|
121
|
+
Parameters
|
|
122
|
+
----------
|
|
123
|
+
input_matrix :
|
|
124
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
125
|
+
force_bipartite :
|
|
126
|
+
If ``True``, force the input matrix to be considered as a biadjacency matrix even if square.
|
|
127
|
+
Returns
|
|
128
|
+
-------
|
|
129
|
+
self: :class:`LouvainNE`
|
|
130
|
+
"""
|
|
131
|
+
# input
|
|
132
|
+
input_matrix = check_format(input_matrix)
|
|
133
|
+
adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)
|
|
134
|
+
n = adjacency.shape[0]
|
|
135
|
+
|
|
136
|
+
# embedding
|
|
137
|
+
self.embedding_ = np.zeros((n, self.n_components))
|
|
138
|
+
self._recursive_louvain(adjacency, 0)
|
|
139
|
+
|
|
140
|
+
if self.bipartite:
|
|
141
|
+
self._split_vars(input_matrix.shape)
|
|
142
|
+
return self
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created in November 2018
|
|
5
|
+
@author: Thomas Bonald <thomas.bonald@telecom-paris.fr>
|
|
6
|
+
@author: Nathan De Lara <nathan.delara@polytechnique.org>
|
|
7
|
+
"""
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
from sknetwork.linalg import normalize
|
|
11
|
+
from sknetwork.utils.check import check_format, check_square
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_cosine_similarity(input_matrix, embedding: np.ndarray, embedding_col=None):
|
|
15
|
+
"""Average cosine similarity of an embedding :math:`x` defined by:
|
|
16
|
+
|
|
17
|
+
:math:`Q = \\sum_{ij}\\dfrac{A_{ij}}{w}\\cos(x_i, x_j)}`
|
|
18
|
+
|
|
19
|
+
where :math:`w = 1^TA1` is the total weight of the graph.
|
|
20
|
+
|
|
21
|
+
For bipartite graphs with column embedding :math:`y`, the metric is
|
|
22
|
+
|
|
23
|
+
:math:`Q = \\sum_{ij} \\dfrac{B_{ij}}{w} \\cos(x_i, y_j)`
|
|
24
|
+
|
|
25
|
+
where :math:`w = 1^TB1` is the total weight of the graph.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
input_matrix :
|
|
30
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
31
|
+
embedding :
|
|
32
|
+
Embedding of the nodes.
|
|
33
|
+
embedding_col :
|
|
34
|
+
Embedding of the columns (for bipartite graphs).
|
|
35
|
+
|
|
36
|
+
Returns
|
|
37
|
+
-------
|
|
38
|
+
cosine_similarity : float
|
|
39
|
+
|
|
40
|
+
Example
|
|
41
|
+
-------
|
|
42
|
+
>>> from sknetwork.embedding import get_cosine_similarity
|
|
43
|
+
>>> from sknetwork.data import karate_club
|
|
44
|
+
>>> graph = karate_club(metadata=True)
|
|
45
|
+
>>> adjacency = graph.adjacency
|
|
46
|
+
>>> embedding = graph.position
|
|
47
|
+
>>> np.round(get_cosine_similarity(adjacency, embedding), 2)
|
|
48
|
+
0.7
|
|
49
|
+
"""
|
|
50
|
+
input_matrix = check_format(input_matrix)
|
|
51
|
+
total_weight = input_matrix.data.sum()
|
|
52
|
+
|
|
53
|
+
if embedding_col is None:
|
|
54
|
+
check_square(input_matrix)
|
|
55
|
+
embedding_col = embedding.copy()
|
|
56
|
+
|
|
57
|
+
embedding_row_norm = normalize(embedding, p=2)
|
|
58
|
+
embedding_col_norm = normalize(embedding_col, p=2)
|
|
59
|
+
|
|
60
|
+
input_matrix_coo = input_matrix.tocoo()
|
|
61
|
+
row = input_matrix_coo.row
|
|
62
|
+
col = input_matrix_coo.col
|
|
63
|
+
|
|
64
|
+
cosine_similarity = np.multiply(embedding_row_norm[row], embedding_col_norm[col]).sum() / total_weight
|
|
65
|
+
|
|
66
|
+
return cosine_similarity
|