scikit-network 0.33.0__cp312-cp312-macosx_10_9_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- scikit_network-0.33.0.dist-info/AUTHORS.rst +43 -0
- scikit_network-0.33.0.dist-info/LICENSE +34 -0
- scikit_network-0.33.0.dist-info/METADATA +517 -0
- scikit_network-0.33.0.dist-info/RECORD +216 -0
- scikit_network-0.33.0.dist-info/WHEEL +5 -0
- scikit_network-0.33.0.dist-info/top_level.txt +1 -0
- sknetwork/__init__.py +21 -0
- sknetwork/base.py +67 -0
- sknetwork/classification/__init__.py +8 -0
- sknetwork/classification/base.py +142 -0
- sknetwork/classification/base_rank.py +133 -0
- sknetwork/classification/diffusion.py +134 -0
- sknetwork/classification/knn.py +139 -0
- sknetwork/classification/metrics.py +205 -0
- sknetwork/classification/pagerank.py +66 -0
- sknetwork/classification/propagation.py +152 -0
- sknetwork/classification/tests/__init__.py +1 -0
- sknetwork/classification/tests/test_API.py +30 -0
- sknetwork/classification/tests/test_diffusion.py +77 -0
- sknetwork/classification/tests/test_knn.py +23 -0
- sknetwork/classification/tests/test_metrics.py +53 -0
- sknetwork/classification/tests/test_pagerank.py +20 -0
- sknetwork/classification/tests/test_propagation.py +24 -0
- sknetwork/classification/vote.cpython-312-darwin.so +0 -0
- sknetwork/classification/vote.pyx +56 -0
- sknetwork/clustering/__init__.py +8 -0
- sknetwork/clustering/base.py +172 -0
- sknetwork/clustering/kcenters.py +253 -0
- sknetwork/clustering/leiden.py +242 -0
- sknetwork/clustering/leiden_core.cpython-312-darwin.so +0 -0
- sknetwork/clustering/leiden_core.pyx +124 -0
- sknetwork/clustering/louvain.py +286 -0
- sknetwork/clustering/louvain_core.cpython-312-darwin.so +0 -0
- sknetwork/clustering/louvain_core.pyx +124 -0
- sknetwork/clustering/metrics.py +91 -0
- sknetwork/clustering/postprocess.py +66 -0
- sknetwork/clustering/propagation_clustering.py +104 -0
- sknetwork/clustering/tests/__init__.py +1 -0
- sknetwork/clustering/tests/test_API.py +38 -0
- sknetwork/clustering/tests/test_kcenters.py +60 -0
- sknetwork/clustering/tests/test_leiden.py +34 -0
- sknetwork/clustering/tests/test_louvain.py +129 -0
- sknetwork/clustering/tests/test_metrics.py +50 -0
- sknetwork/clustering/tests/test_postprocess.py +39 -0
- sknetwork/data/__init__.py +6 -0
- sknetwork/data/base.py +33 -0
- sknetwork/data/load.py +406 -0
- sknetwork/data/models.py +459 -0
- sknetwork/data/parse.py +644 -0
- sknetwork/data/test_graphs.py +84 -0
- sknetwork/data/tests/__init__.py +1 -0
- sknetwork/data/tests/test_API.py +30 -0
- sknetwork/data/tests/test_base.py +14 -0
- sknetwork/data/tests/test_load.py +95 -0
- sknetwork/data/tests/test_models.py +52 -0
- sknetwork/data/tests/test_parse.py +250 -0
- sknetwork/data/tests/test_test_graphs.py +29 -0
- sknetwork/data/tests/test_toy_graphs.py +68 -0
- sknetwork/data/timeout.py +38 -0
- sknetwork/data/toy_graphs.py +611 -0
- sknetwork/embedding/__init__.py +8 -0
- sknetwork/embedding/base.py +94 -0
- sknetwork/embedding/force_atlas.py +198 -0
- sknetwork/embedding/louvain_embedding.py +148 -0
- sknetwork/embedding/random_projection.py +135 -0
- sknetwork/embedding/spectral.py +141 -0
- sknetwork/embedding/spring.py +198 -0
- sknetwork/embedding/svd.py +359 -0
- sknetwork/embedding/tests/__init__.py +1 -0
- sknetwork/embedding/tests/test_API.py +49 -0
- sknetwork/embedding/tests/test_force_atlas.py +35 -0
- sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
- sknetwork/embedding/tests/test_random_projection.py +28 -0
- sknetwork/embedding/tests/test_spectral.py +81 -0
- sknetwork/embedding/tests/test_spring.py +50 -0
- sknetwork/embedding/tests/test_svd.py +43 -0
- sknetwork/gnn/__init__.py +10 -0
- sknetwork/gnn/activation.py +117 -0
- sknetwork/gnn/base.py +181 -0
- sknetwork/gnn/base_activation.py +89 -0
- sknetwork/gnn/base_layer.py +109 -0
- sknetwork/gnn/gnn_classifier.py +305 -0
- sknetwork/gnn/layer.py +153 -0
- sknetwork/gnn/loss.py +180 -0
- sknetwork/gnn/neighbor_sampler.py +65 -0
- sknetwork/gnn/optimizer.py +164 -0
- sknetwork/gnn/tests/__init__.py +1 -0
- sknetwork/gnn/tests/test_activation.py +56 -0
- sknetwork/gnn/tests/test_base.py +75 -0
- sknetwork/gnn/tests/test_base_layer.py +37 -0
- sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
- sknetwork/gnn/tests/test_layers.py +80 -0
- sknetwork/gnn/tests/test_loss.py +33 -0
- sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
- sknetwork/gnn/tests/test_optimizer.py +43 -0
- sknetwork/gnn/tests/test_utils.py +41 -0
- sknetwork/gnn/utils.py +127 -0
- sknetwork/hierarchy/__init__.py +6 -0
- sknetwork/hierarchy/base.py +96 -0
- sknetwork/hierarchy/louvain_hierarchy.py +272 -0
- sknetwork/hierarchy/metrics.py +234 -0
- sknetwork/hierarchy/paris.cpython-312-darwin.so +0 -0
- sknetwork/hierarchy/paris.pyx +316 -0
- sknetwork/hierarchy/postprocess.py +350 -0
- sknetwork/hierarchy/tests/__init__.py +1 -0
- sknetwork/hierarchy/tests/test_API.py +24 -0
- sknetwork/hierarchy/tests/test_algos.py +34 -0
- sknetwork/hierarchy/tests/test_metrics.py +62 -0
- sknetwork/hierarchy/tests/test_postprocess.py +57 -0
- sknetwork/linalg/__init__.py +9 -0
- sknetwork/linalg/basics.py +37 -0
- sknetwork/linalg/diteration.cpython-312-darwin.so +0 -0
- sknetwork/linalg/diteration.pyx +47 -0
- sknetwork/linalg/eig_solver.py +93 -0
- sknetwork/linalg/laplacian.py +15 -0
- sknetwork/linalg/normalizer.py +86 -0
- sknetwork/linalg/operators.py +225 -0
- sknetwork/linalg/polynome.py +76 -0
- sknetwork/linalg/ppr_solver.py +170 -0
- sknetwork/linalg/push.cpython-312-darwin.so +0 -0
- sknetwork/linalg/push.pyx +71 -0
- sknetwork/linalg/sparse_lowrank.py +142 -0
- sknetwork/linalg/svd_solver.py +91 -0
- sknetwork/linalg/tests/__init__.py +1 -0
- sknetwork/linalg/tests/test_eig.py +44 -0
- sknetwork/linalg/tests/test_laplacian.py +18 -0
- sknetwork/linalg/tests/test_normalization.py +34 -0
- sknetwork/linalg/tests/test_operators.py +66 -0
- sknetwork/linalg/tests/test_polynome.py +38 -0
- sknetwork/linalg/tests/test_ppr.py +50 -0
- sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
- sknetwork/linalg/tests/test_svd.py +38 -0
- sknetwork/linkpred/__init__.py +2 -0
- sknetwork/linkpred/base.py +46 -0
- sknetwork/linkpred/nn.py +126 -0
- sknetwork/linkpred/tests/__init__.py +1 -0
- sknetwork/linkpred/tests/test_nn.py +27 -0
- sknetwork/log.py +19 -0
- sknetwork/path/__init__.py +5 -0
- sknetwork/path/dag.py +54 -0
- sknetwork/path/distances.py +98 -0
- sknetwork/path/search.py +31 -0
- sknetwork/path/shortest_path.py +61 -0
- sknetwork/path/tests/__init__.py +1 -0
- sknetwork/path/tests/test_dag.py +37 -0
- sknetwork/path/tests/test_distances.py +62 -0
- sknetwork/path/tests/test_search.py +40 -0
- sknetwork/path/tests/test_shortest_path.py +40 -0
- sknetwork/ranking/__init__.py +8 -0
- sknetwork/ranking/base.py +61 -0
- sknetwork/ranking/betweenness.cpython-312-darwin.so +0 -0
- sknetwork/ranking/betweenness.pyx +97 -0
- sknetwork/ranking/closeness.py +92 -0
- sknetwork/ranking/hits.py +94 -0
- sknetwork/ranking/katz.py +83 -0
- sknetwork/ranking/pagerank.py +110 -0
- sknetwork/ranking/postprocess.py +37 -0
- sknetwork/ranking/tests/__init__.py +1 -0
- sknetwork/ranking/tests/test_API.py +32 -0
- sknetwork/ranking/tests/test_betweenness.py +38 -0
- sknetwork/ranking/tests/test_closeness.py +30 -0
- sknetwork/ranking/tests/test_hits.py +20 -0
- sknetwork/ranking/tests/test_pagerank.py +62 -0
- sknetwork/ranking/tests/test_postprocess.py +26 -0
- sknetwork/regression/__init__.py +4 -0
- sknetwork/regression/base.py +61 -0
- sknetwork/regression/diffusion.py +210 -0
- sknetwork/regression/tests/__init__.py +1 -0
- sknetwork/regression/tests/test_API.py +32 -0
- sknetwork/regression/tests/test_diffusion.py +56 -0
- sknetwork/sknetwork.py +3 -0
- sknetwork/test_base.py +35 -0
- sknetwork/test_log.py +15 -0
- sknetwork/topology/__init__.py +8 -0
- sknetwork/topology/cliques.cpython-312-darwin.so +0 -0
- sknetwork/topology/cliques.pyx +149 -0
- sknetwork/topology/core.cpython-312-darwin.so +0 -0
- sknetwork/topology/core.pyx +90 -0
- sknetwork/topology/cycles.py +243 -0
- sknetwork/topology/minheap.cpython-312-darwin.so +0 -0
- sknetwork/topology/minheap.pxd +20 -0
- sknetwork/topology/minheap.pyx +109 -0
- sknetwork/topology/structure.py +194 -0
- sknetwork/topology/tests/__init__.py +1 -0
- sknetwork/topology/tests/test_cliques.py +28 -0
- sknetwork/topology/tests/test_core.py +19 -0
- sknetwork/topology/tests/test_cycles.py +65 -0
- sknetwork/topology/tests/test_structure.py +85 -0
- sknetwork/topology/tests/test_triangles.py +38 -0
- sknetwork/topology/tests/test_wl.py +72 -0
- sknetwork/topology/triangles.cpython-312-darwin.so +0 -0
- sknetwork/topology/triangles.pyx +151 -0
- sknetwork/topology/weisfeiler_lehman.py +133 -0
- sknetwork/topology/weisfeiler_lehman_core.cpython-312-darwin.so +0 -0
- sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
- sknetwork/utils/__init__.py +7 -0
- sknetwork/utils/check.py +355 -0
- sknetwork/utils/format.py +221 -0
- sknetwork/utils/membership.py +82 -0
- sknetwork/utils/neighbors.py +115 -0
- sknetwork/utils/tests/__init__.py +1 -0
- sknetwork/utils/tests/test_check.py +190 -0
- sknetwork/utils/tests/test_format.py +63 -0
- sknetwork/utils/tests/test_membership.py +24 -0
- sknetwork/utils/tests/test_neighbors.py +41 -0
- sknetwork/utils/tests/test_tfidf.py +18 -0
- sknetwork/utils/tests/test_values.py +66 -0
- sknetwork/utils/tfidf.py +37 -0
- sknetwork/utils/values.py +76 -0
- sknetwork/visualization/__init__.py +4 -0
- sknetwork/visualization/colors.py +34 -0
- sknetwork/visualization/dendrograms.py +277 -0
- sknetwork/visualization/graphs.py +1039 -0
- sknetwork/visualization/tests/__init__.py +1 -0
- sknetwork/visualization/tests/test_dendrograms.py +53 -0
- sknetwork/visualization/tests/test_graphs.py +176 -0
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created in July 2022
|
|
5
|
+
@author: Thomas Bonald <thomas.bonald@telecom-paris.fr>
|
|
6
|
+
"""
|
|
7
|
+
from typing import Optional, Union
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
from scipy import sparse
|
|
11
|
+
|
|
12
|
+
from sknetwork.classification.base import BaseClassifier
|
|
13
|
+
from sknetwork.path.distances import get_distances
|
|
14
|
+
from sknetwork.linalg.normalizer import normalize
|
|
15
|
+
from sknetwork.utils.format import get_adjacency_values
|
|
16
|
+
from sknetwork.utils.membership import get_membership
|
|
17
|
+
from sknetwork.utils.neighbors import get_degrees
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class DiffusionClassifier(BaseClassifier):
|
|
21
|
+
"""Node classification by heat diffusion.
|
|
22
|
+
|
|
23
|
+
For each label, the temperature of a node corresponds to its probability to have this label.
|
|
24
|
+
|
|
25
|
+
Parameters
|
|
26
|
+
----------
|
|
27
|
+
n_iter : int
|
|
28
|
+
Number of iterations of the diffusion (discrete time).
|
|
29
|
+
centering : bool
|
|
30
|
+
If ``True``, center the temperature of each label to its mean before classification (default).
|
|
31
|
+
scale : float
|
|
32
|
+
Multiplicative factor applied to tempreatures before softmax (default = 5).
|
|
33
|
+
Used only when centering is ``True``.
|
|
34
|
+
|
|
35
|
+
Attributes
|
|
36
|
+
----------
|
|
37
|
+
labels_ : np.ndarray, shape (n_labels,)
|
|
38
|
+
Labels of nodes.
|
|
39
|
+
probs_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
40
|
+
Probability distribution over labels.
|
|
41
|
+
labels_row_ : np.ndarray
|
|
42
|
+
Labels of rows, for bipartite graphs.
|
|
43
|
+
labels_col_ : np.ndarray
|
|
44
|
+
Labels of columns, for bipartite graphs.
|
|
45
|
+
probs_row_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
46
|
+
Probability distributions over labels of rows, for bipartite graphs.
|
|
47
|
+
probs_col_ : sparse.csr_matrix, shape (n_col, n_labels)
|
|
48
|
+
Probability distributions over labels of columns, for bipartite graphs.
|
|
49
|
+
Example
|
|
50
|
+
-------
|
|
51
|
+
>>> from sknetwork.data import karate_club
|
|
52
|
+
>>> diffusion = DiffusionClassifier()
|
|
53
|
+
>>> graph = karate_club(metadata=True)
|
|
54
|
+
>>> adjacency = graph.adjacency
|
|
55
|
+
>>> labels_true = graph.labels
|
|
56
|
+
>>> labels = {0: labels_true[0], 33: labels_true[33]}
|
|
57
|
+
>>> labels_pred = diffusion.fit_predict(adjacency, labels)
|
|
58
|
+
>>> round(np.mean(labels_pred == labels_true), 2)
|
|
59
|
+
0.97
|
|
60
|
+
|
|
61
|
+
References
|
|
62
|
+
----------
|
|
63
|
+
Zhu, X., Lafferty, J., & Rosenfeld, R. (2005). `Semi-supervised learning with graphs`
|
|
64
|
+
(Doctoral dissertation, Carnegie Mellon University, language technologies institute, school of computer science).
|
|
65
|
+
"""
|
|
66
|
+
def __init__(self, n_iter: int = 10, centering: bool = True, scale: float = 5):
|
|
67
|
+
super(DiffusionClassifier, self).__init__()
|
|
68
|
+
|
|
69
|
+
if n_iter <= 0:
|
|
70
|
+
raise ValueError('The number of iterations must be positive.')
|
|
71
|
+
else:
|
|
72
|
+
self.n_iter = n_iter
|
|
73
|
+
self.centering = centering
|
|
74
|
+
self.scale = scale
|
|
75
|
+
|
|
76
|
+
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray],
|
|
77
|
+
labels: Optional[Union[dict, list, np.ndarray]] = None,
|
|
78
|
+
labels_row: Optional[Union[dict, list, np.ndarray]] = None,
|
|
79
|
+
labels_col: Optional[Union[dict, list, np.ndarray]] = None, force_bipartite: bool = False) \
|
|
80
|
+
-> 'DiffusionClassifier':
|
|
81
|
+
"""Compute the solution to the Dirichlet problem (temperatures at equilibrium).
|
|
82
|
+
|
|
83
|
+
Parameters
|
|
84
|
+
----------
|
|
85
|
+
input_matrix : sparse.csr_matrix, np.ndarray
|
|
86
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
87
|
+
labels : dict, np.ndarray
|
|
88
|
+
Known labels (dictionary or vector of int). Negative values ignored.
|
|
89
|
+
labels_row : dict, np.ndarray
|
|
90
|
+
Labels of rows for bipartite graphs. Negative values ignored.
|
|
91
|
+
labels_col : dict, np.ndarray
|
|
92
|
+
Labels of columns for bipartite graphs. Negative values ignored.
|
|
93
|
+
force_bipartite : bool
|
|
94
|
+
If ``True``, consider the input matrix as a biadjacency matrix (default = ``False``).
|
|
95
|
+
|
|
96
|
+
Returns
|
|
97
|
+
-------
|
|
98
|
+
self: :class:`DiffusionClassifier`
|
|
99
|
+
"""
|
|
100
|
+
adjacency, values, self.bipartite = get_adjacency_values(input_matrix, force_bipartite=force_bipartite,
|
|
101
|
+
values=labels,
|
|
102
|
+
values_row=labels_row,
|
|
103
|
+
values_col=labels_col)
|
|
104
|
+
labels = values.astype(int)
|
|
105
|
+
if (labels < 0).all():
|
|
106
|
+
raise ValueError('At least one node must be given a non-negative label.')
|
|
107
|
+
labels_reindex = labels.copy()
|
|
108
|
+
labels_unique, inverse = np.unique(labels[labels >= 0], return_inverse=True)
|
|
109
|
+
labels_reindex[labels >= 0] = inverse
|
|
110
|
+
temperatures = get_membership(labels_reindex).toarray()
|
|
111
|
+
temperatures_seeds = temperatures[labels >= 0]
|
|
112
|
+
temperatures[labels < 0] = 0.5
|
|
113
|
+
diffusion = normalize(adjacency)
|
|
114
|
+
for i in range(self.n_iter):
|
|
115
|
+
temperatures = diffusion.dot(temperatures)
|
|
116
|
+
temperatures[labels >= 0] = temperatures_seeds
|
|
117
|
+
if self.centering:
|
|
118
|
+
temperatures -= temperatures.mean(axis=0)
|
|
119
|
+
labels_ = labels_unique[temperatures.argmax(axis=1)]
|
|
120
|
+
|
|
121
|
+
# softmax
|
|
122
|
+
if self.centering:
|
|
123
|
+
temperatures = np.exp(self.scale * temperatures)
|
|
124
|
+
|
|
125
|
+
# set label -1 to nodes not reached by diffusion
|
|
126
|
+
distances = get_distances(adjacency, source=np.flatnonzero(labels >= 0))
|
|
127
|
+
labels_[distances < 0] = -1
|
|
128
|
+
temperatures[distances < 0] = 0
|
|
129
|
+
|
|
130
|
+
self.labels_ = labels_
|
|
131
|
+
self.probs_ = sparse.csr_matrix(normalize(temperatures))
|
|
132
|
+
self._split_vars(input_matrix.shape)
|
|
133
|
+
|
|
134
|
+
return self
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created in November 2019
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
@author: Thomas Bonald <tbonald@enst.fr>
|
|
7
|
+
"""
|
|
8
|
+
from typing import Optional, Union
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from scipy import sparse
|
|
12
|
+
|
|
13
|
+
from sknetwork.classification.base import BaseClassifier
|
|
14
|
+
from sknetwork.embedding.base import BaseEmbedding
|
|
15
|
+
from sknetwork.linalg.normalizer import get_norms, normalize
|
|
16
|
+
from sknetwork.utils.check import check_n_neighbors
|
|
17
|
+
from sknetwork.utils.format import get_adjacency_values
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class NNClassifier(BaseClassifier):
|
|
21
|
+
"""Node classification by K-nearest neighbors in the embedding space.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
n_neighbors : int
|
|
26
|
+
Number of nearest neighbors .
|
|
27
|
+
embedding_method : :class:`BaseEmbedding`
|
|
28
|
+
Embedding method used to represent nodes in vector space.
|
|
29
|
+
If ``None`` (default), use identity.
|
|
30
|
+
normalize : bool
|
|
31
|
+
If ``True``, apply normalization so that all vectors have norm 1 in the embedding space.
|
|
32
|
+
|
|
33
|
+
Attributes
|
|
34
|
+
----------
|
|
35
|
+
labels_ : np.ndarray, shape (n_labels,)
|
|
36
|
+
Labels of nodes.
|
|
37
|
+
probs_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
38
|
+
Probability distribution over labels.
|
|
39
|
+
labels_row_ : np.ndarray
|
|
40
|
+
Labels of rows, for bipartite graphs.
|
|
41
|
+
labels_col_ : np.ndarray
|
|
42
|
+
Labels of columns, for bipartite graphs.
|
|
43
|
+
probs_row_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
44
|
+
Probability distributions over labels of rows, for bipartite graphs.
|
|
45
|
+
probs_col_ : sparse.csr_matrix, shape (n_col, n_labels)
|
|
46
|
+
Probability distributions over labels of columns, for bipartite graphs.
|
|
47
|
+
|
|
48
|
+
Example
|
|
49
|
+
-------
|
|
50
|
+
>>> from sknetwork.classification import NNClassifier
|
|
51
|
+
>>> from sknetwork.data import karate_club
|
|
52
|
+
>>> classifier = NNClassifier(n_neighbors=1)
|
|
53
|
+
>>> graph = karate_club(metadata=True)
|
|
54
|
+
>>> adjacency = graph.adjacency
|
|
55
|
+
>>> labels_true = graph.labels
|
|
56
|
+
>>> labels = {0: labels_true[0], 33: labels_true[33]}
|
|
57
|
+
>>> labels_pred = classifier.fit_predict(adjacency, labels)
|
|
58
|
+
>>> round(np.mean(labels_pred == labels_true), 2)
|
|
59
|
+
0.82
|
|
60
|
+
"""
|
|
61
|
+
def __init__(self, n_neighbors: int = 3, embedding_method: Optional[BaseEmbedding] = None, normalize: bool = True):
|
|
62
|
+
super(NNClassifier, self).__init__()
|
|
63
|
+
self.n_neighbors = n_neighbors
|
|
64
|
+
self.embedding_method = embedding_method
|
|
65
|
+
self.normalize = normalize
|
|
66
|
+
|
|
67
|
+
@staticmethod
|
|
68
|
+
def _instantiate_vars(labels: np.ndarray):
|
|
69
|
+
index_train = np.flatnonzero(labels >= 0)
|
|
70
|
+
index_test = np.flatnonzero(labels < 0)
|
|
71
|
+
return index_train, index_test
|
|
72
|
+
|
|
73
|
+
def _fit_core(self, embedding, labels, index_train, index_test):
|
|
74
|
+
n_neighbors = check_n_neighbors(self.n_neighbors, len(index_train))
|
|
75
|
+
|
|
76
|
+
norms_train = get_norms(embedding[index_train], p=2)
|
|
77
|
+
neighbors = []
|
|
78
|
+
for i in index_test:
|
|
79
|
+
vector = embedding[i]
|
|
80
|
+
if sparse.issparse(vector):
|
|
81
|
+
vector = vector.toarray().ravel()
|
|
82
|
+
distances = norms_train**2 - 2 * embedding[index_train].dot(vector) + np.sum(vector**2)
|
|
83
|
+
neighbors += list(index_train[np.argpartition(distances, n_neighbors)[:n_neighbors]])
|
|
84
|
+
labels_neighbor = labels[neighbors]
|
|
85
|
+
|
|
86
|
+
# membership matrix
|
|
87
|
+
row = list(np.repeat(index_test, n_neighbors))
|
|
88
|
+
col = list(labels_neighbor)
|
|
89
|
+
data = list(np.ones_like(labels_neighbor))
|
|
90
|
+
|
|
91
|
+
row += list(index_train)
|
|
92
|
+
col += list(labels[index_train])
|
|
93
|
+
data += list(np.ones_like(index_train))
|
|
94
|
+
|
|
95
|
+
probs = normalize(sparse.csr_matrix((data, (row, col)), shape=(len(labels), np.max(labels) + 1)))
|
|
96
|
+
labels = np.argmax(probs.toarray(), axis=1)
|
|
97
|
+
|
|
98
|
+
return probs, labels
|
|
99
|
+
|
|
100
|
+
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], labels: Union[np.ndarray, list, dict] = None,
|
|
101
|
+
labels_row: Union[np.ndarray, list, dict] = None,
|
|
102
|
+
labels_col: Union[np.ndarray, list, dict] = None) -> 'NNClassifier':
|
|
103
|
+
"""Node classification by k-nearest neighbors in the embedding space.
|
|
104
|
+
|
|
105
|
+
Parameters
|
|
106
|
+
----------
|
|
107
|
+
input_matrix : sparse.csr_matrix, np.ndarray
|
|
108
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
109
|
+
labels : np.ndarray, dict
|
|
110
|
+
Known labels. Negative values ignored.
|
|
111
|
+
labels_row : np.ndarray, dict
|
|
112
|
+
Known labels of rows, for bipartite graphs.
|
|
113
|
+
labels_col : np.ndarray, dict
|
|
114
|
+
Known labels of columns, for bipartite graphs.
|
|
115
|
+
|
|
116
|
+
Returns
|
|
117
|
+
-------
|
|
118
|
+
self: :class:`KNN`
|
|
119
|
+
"""
|
|
120
|
+
adjacency, labels, self.bipartite = get_adjacency_values(input_matrix, values=labels, values_row=labels_row,
|
|
121
|
+
values_col=labels_col)
|
|
122
|
+
labels = labels.astype(int)
|
|
123
|
+
index_seed, index_remain = self._instantiate_vars(labels)
|
|
124
|
+
|
|
125
|
+
if self.embedding_method is None:
|
|
126
|
+
embedding = adjacency
|
|
127
|
+
else:
|
|
128
|
+
embedding = self.embedding_method.fit_transform(adjacency)
|
|
129
|
+
|
|
130
|
+
if self.normalize:
|
|
131
|
+
embedding = normalize(embedding, p=2)
|
|
132
|
+
|
|
133
|
+
probs, labels = self._fit_core(embedding, labels, index_seed, index_remain)
|
|
134
|
+
|
|
135
|
+
self.labels_ = labels
|
|
136
|
+
self.probs_ = probs
|
|
137
|
+
self._split_vars(input_matrix.shape)
|
|
138
|
+
|
|
139
|
+
return self
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created in July 2020
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
@author: Thomas Bonald <thomas.bonald@telecom-paris.fr>
|
|
7
|
+
"""
|
|
8
|
+
from typing import Union, Tuple
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from scipy import sparse
|
|
12
|
+
|
|
13
|
+
from sknetwork.utils.check import check_vector_format
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_accuracy_score(labels_true: np.ndarray, labels_pred: np.ndarray) -> float:
|
|
17
|
+
"""Return the proportion of correctly labeled samples.
|
|
18
|
+
Negative labels ignored.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
labels_true : np.ndarray
|
|
23
|
+
True labels.
|
|
24
|
+
labels_pred : np.ndarray
|
|
25
|
+
Predicted labels
|
|
26
|
+
|
|
27
|
+
Returns
|
|
28
|
+
-------
|
|
29
|
+
accuracy : float
|
|
30
|
+
A score between 0 and 1.
|
|
31
|
+
|
|
32
|
+
Examples
|
|
33
|
+
--------
|
|
34
|
+
>>> import numpy as np
|
|
35
|
+
>>> labels_true = np.array([0, 0, 1, 1])
|
|
36
|
+
>>> labels_pred = np.array([0, 0, 0, 1])
|
|
37
|
+
>>> round(get_accuracy_score(labels_true, labels_pred), 2)
|
|
38
|
+
0.75
|
|
39
|
+
"""
|
|
40
|
+
check_vector_format(labels_true, labels_pred)
|
|
41
|
+
mask = (labels_true >= 0) & (labels_pred >= 0)
|
|
42
|
+
if np.sum(mask):
|
|
43
|
+
return np.mean(labels_true[mask] == labels_pred[mask])
|
|
44
|
+
else:
|
|
45
|
+
raise ValueError('No sample with both true non-negative label and predicted non-negative label.')
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def get_confusion_matrix(labels_true: np.ndarray, labels_pred: np.ndarray) -> sparse.csr_matrix:
|
|
49
|
+
"""Return the confusion matrix in sparse format (true labels on rows, predicted labels on columns).
|
|
50
|
+
Negative labels ignored.
|
|
51
|
+
|
|
52
|
+
Parameters
|
|
53
|
+
----------
|
|
54
|
+
labels_true : np.ndarray
|
|
55
|
+
True labels.
|
|
56
|
+
labels_pred : np.ndarray
|
|
57
|
+
Predicted labels
|
|
58
|
+
|
|
59
|
+
Returns
|
|
60
|
+
-------
|
|
61
|
+
confusion matrix : sparse.csr_matrix
|
|
62
|
+
Confusion matrix.
|
|
63
|
+
|
|
64
|
+
Examples
|
|
65
|
+
--------
|
|
66
|
+
>>> import numpy as np
|
|
67
|
+
>>> labels_true = np.array([0, 0, 1, 1])
|
|
68
|
+
>>> labels_pred = np.array([0, 0, 0, 1])
|
|
69
|
+
>>> get_confusion_matrix(labels_true, labels_pred).toarray()
|
|
70
|
+
array([[2, 0],
|
|
71
|
+
[1, 1]])
|
|
72
|
+
"""
|
|
73
|
+
check_vector_format(labels_true, labels_pred)
|
|
74
|
+
mask = (labels_true >= 0) & (labels_pred >= 0)
|
|
75
|
+
if np.sum(mask):
|
|
76
|
+
n_labels = max(max(labels_true), max(labels_pred)) + 1
|
|
77
|
+
row = labels_true[mask]
|
|
78
|
+
col = labels_pred[mask]
|
|
79
|
+
data = np.ones(np.sum(mask), dtype=int)
|
|
80
|
+
return sparse.csr_matrix((data, (row, col)), shape=(n_labels, n_labels))
|
|
81
|
+
else:
|
|
82
|
+
raise ValueError('No sample with both true non-negative label and predicted non-negative label.')
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def get_f1_score(labels_true: np.ndarray, labels_pred: np.ndarray, return_precision_recall: bool = False) \
|
|
86
|
+
-> Union[float, Tuple[float, float, float]]:
|
|
87
|
+
"""Return the f1 score of binary classification.
|
|
88
|
+
Negative labels ignored.
|
|
89
|
+
|
|
90
|
+
Parameters
|
|
91
|
+
----------
|
|
92
|
+
labels_true : np.ndarray
|
|
93
|
+
True labels.
|
|
94
|
+
labels_pred : np.ndarray
|
|
95
|
+
Predicted labels
|
|
96
|
+
return_precision_recall : bool
|
|
97
|
+
If ``True``, also return precision and recall.
|
|
98
|
+
|
|
99
|
+
Returns
|
|
100
|
+
-------
|
|
101
|
+
score, [precision, recall] : np.ndarray
|
|
102
|
+
F1 score (between 0 and 1). Optionally, also return precision and recall.
|
|
103
|
+
Examples
|
|
104
|
+
--------
|
|
105
|
+
>>> import numpy as np
|
|
106
|
+
>>> labels_true = np.array([0, 0, 1, 1])
|
|
107
|
+
>>> labels_pred = np.array([0, 0, 0, 1])
|
|
108
|
+
>>> round(get_f1_score(labels_true, labels_pred), 2)
|
|
109
|
+
0.67
|
|
110
|
+
"""
|
|
111
|
+
values = set(labels_true[labels_true >= 0]) | set(labels_pred[labels_pred >= 0])
|
|
112
|
+
if values != {0, 1}:
|
|
113
|
+
raise ValueError('Labels must be binary. '
|
|
114
|
+
'Check get_f1_scores or get_average_f1_score for multi-label classification.')
|
|
115
|
+
if return_precision_recall:
|
|
116
|
+
f1_scores, precisions, recalls = get_f1_scores(labels_true, labels_pred, True)
|
|
117
|
+
return f1_scores[1], precisions[1], recalls[1]
|
|
118
|
+
else:
|
|
119
|
+
f1_scores = get_f1_scores(labels_true, labels_pred, False)
|
|
120
|
+
return f1_scores[1]
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def get_f1_scores(labels_true: np.ndarray, labels_pred: np.ndarray, return_precision_recall: bool = False) \
|
|
124
|
+
-> Union[np.ndarray, Tuple[np.ndarray, np.ndarray, np.ndarray]]:
|
|
125
|
+
"""Return the f1 scores of multi-label classification (one per label).
|
|
126
|
+
Negative labels ignored.
|
|
127
|
+
|
|
128
|
+
Parameters
|
|
129
|
+
----------
|
|
130
|
+
labels_true : np.ndarray
|
|
131
|
+
True labels.
|
|
132
|
+
labels_pred : np.ndarray
|
|
133
|
+
Predicted labels
|
|
134
|
+
return_precision_recall : bool
|
|
135
|
+
If ``True``, also return precisions and recalls.
|
|
136
|
+
|
|
137
|
+
Returns
|
|
138
|
+
-------
|
|
139
|
+
scores, [precisions, recalls] : np.ndarray
|
|
140
|
+
F1 scores (between 0 and 1). Optionally, also return F1 precisions and recalls.
|
|
141
|
+
Examples
|
|
142
|
+
--------
|
|
143
|
+
>>> import numpy as np
|
|
144
|
+
>>> labels_true = np.array([0, 0, 1, 1])
|
|
145
|
+
>>> labels_pred = np.array([0, 0, 0, 1])
|
|
146
|
+
>>> np.round(get_f1_scores(labels_true, labels_pred), 2)
|
|
147
|
+
array([0.8 , 0.67])
|
|
148
|
+
"""
|
|
149
|
+
confusion = get_confusion_matrix(labels_true, labels_pred)
|
|
150
|
+
n_labels = confusion.shape[0]
|
|
151
|
+
counts_correct = confusion.diagonal()
|
|
152
|
+
counts_true = confusion.dot(np.ones(n_labels))
|
|
153
|
+
counts_pred = confusion.T.dot(np.ones(n_labels))
|
|
154
|
+
mask = counts_true > 0
|
|
155
|
+
recalls = np.zeros(n_labels)
|
|
156
|
+
recalls[mask] = counts_correct[mask] / counts_true[mask]
|
|
157
|
+
precisions = np.zeros(n_labels)
|
|
158
|
+
mask = counts_pred > 0
|
|
159
|
+
precisions[mask] = counts_correct[mask] / counts_pred[mask]
|
|
160
|
+
f1_scores = np.zeros(n_labels)
|
|
161
|
+
mask = (precisions > 0) & (recalls > 0)
|
|
162
|
+
f1_scores[mask] = 2 / (1 / precisions[mask] + 1 / recalls[mask])
|
|
163
|
+
if return_precision_recall:
|
|
164
|
+
return f1_scores, precisions, recalls
|
|
165
|
+
else:
|
|
166
|
+
return f1_scores
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def get_average_f1_score(labels_true: np.ndarray, labels_pred: np.ndarray, average: str = 'macro') -> float:
|
|
170
|
+
"""Return the average f1 score of multi-label classification.
|
|
171
|
+
Negative labels ignored.
|
|
172
|
+
|
|
173
|
+
Parameters
|
|
174
|
+
----------
|
|
175
|
+
labels_true : np.ndarray
|
|
176
|
+
True labels.
|
|
177
|
+
labels_pred : np.ndarray
|
|
178
|
+
Predicted labels
|
|
179
|
+
average : str
|
|
180
|
+
Averaging method. Can be either ``'macro'`` (default), ``'micro'`` or ``'weighted'``.
|
|
181
|
+
|
|
182
|
+
Returns
|
|
183
|
+
-------
|
|
184
|
+
score : float
|
|
185
|
+
Average F1 score (between 0 and 1).
|
|
186
|
+
Examples
|
|
187
|
+
--------
|
|
188
|
+
>>> import numpy as np
|
|
189
|
+
>>> labels_true = np.array([0, 0, 1, 1])
|
|
190
|
+
>>> labels_pred = np.array([0, 0, 0, 1])
|
|
191
|
+
>>> round(get_average_f1_score(labels_true, labels_pred), 2)
|
|
192
|
+
0.73
|
|
193
|
+
"""
|
|
194
|
+
if average == 'micro':
|
|
195
|
+
# micro averaging = accuracy
|
|
196
|
+
return get_accuracy_score(labels_true, labels_pred)
|
|
197
|
+
else:
|
|
198
|
+
f1_scores = get_f1_scores(labels_true, labels_pred)
|
|
199
|
+
if average == 'macro':
|
|
200
|
+
return np.mean(f1_scores)
|
|
201
|
+
elif average == 'weighted':
|
|
202
|
+
labels_unique, counts = np.unique(labels_true[labels_true >= 0], return_counts=True)
|
|
203
|
+
return np.sum(f1_scores[labels_unique] * counts) / np.sum(counts)
|
|
204
|
+
else:
|
|
205
|
+
raise ValueError('Check the ``average`` parameter.')
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created in March 2020
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
"""
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
from sknetwork.classification.base_rank import RankClassifier
|
|
12
|
+
from sknetwork.ranking.pagerank import PageRank
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class PageRankClassifier(RankClassifier):
|
|
16
|
+
"""Node classification by multiple personalized PageRanks.
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
damping_factor: float
|
|
21
|
+
Probability to continue the random walk.
|
|
22
|
+
solver : str
|
|
23
|
+
Which solver to use: 'piteration', 'diteration', 'bicgstab', 'lanczos'.
|
|
24
|
+
n_iter : int
|
|
25
|
+
Number of iterations for some solvers such as ``'piteration'`` or ``'diteration'``.
|
|
26
|
+
tol : float
|
|
27
|
+
Tolerance for the convergence of some solvers such as ``'bicgstab'`` or ``'lanczos'``.
|
|
28
|
+
|
|
29
|
+
Attributes
|
|
30
|
+
----------
|
|
31
|
+
labels_ : np.ndarray, shape (n_labels,)
|
|
32
|
+
Labels of nodes.
|
|
33
|
+
probs_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
34
|
+
Probability distribution over labels.
|
|
35
|
+
labels_row_ : np.ndarray
|
|
36
|
+
Labels of rows, for bipartite graphs.
|
|
37
|
+
labels_col_ : np.ndarray
|
|
38
|
+
Labels of columns, for bipartite graphs.
|
|
39
|
+
probs_row_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
40
|
+
Probability distributions over labels of rows, for bipartite graphs.
|
|
41
|
+
probs_col_ : sparse.csr_matrix, shape (n_col, n_labels)
|
|
42
|
+
Probability distributions over labels of columns, for bipartite graphs.
|
|
43
|
+
|
|
44
|
+
Example
|
|
45
|
+
-------
|
|
46
|
+
>>> from sknetwork.classification import PageRankClassifier
|
|
47
|
+
>>> from sknetwork.data import karate_club
|
|
48
|
+
>>> pagerank = PageRankClassifier()
|
|
49
|
+
>>> graph = karate_club(metadata=True)
|
|
50
|
+
>>> adjacency = graph.adjacency
|
|
51
|
+
>>> labels_true = graph.labels
|
|
52
|
+
>>> labels = {0: labels_true[0], 33: labels_true[33]}
|
|
53
|
+
>>> labels_pred = pagerank.fit_predict(adjacency, labels)
|
|
54
|
+
>>> np.round(np.mean(labels_pred == labels_true), 2)
|
|
55
|
+
0.97
|
|
56
|
+
|
|
57
|
+
References
|
|
58
|
+
----------
|
|
59
|
+
Lin, F., & Cohen, W. W. (2010). `Semi-supervised classification of network data using very few labels.
|
|
60
|
+
<https://lti.cs.cmu.edu/sites/default/files/research/reports/2009/cmulti09017.pdf>`_
|
|
61
|
+
In IEEE International Conference on Advances in Social Networks Analysis and Mining.
|
|
62
|
+
"""
|
|
63
|
+
def __init__(self, damping_factor: float = 0.85, solver: str = 'piteration', n_iter: int = 10, tol: float = 0.,
|
|
64
|
+
n_jobs: Optional[int] = None, verbose: bool = False):
|
|
65
|
+
algorithm = PageRank(damping_factor, solver, n_iter, tol)
|
|
66
|
+
super(PageRankClassifier, self).__init__(algorithm, n_jobs, verbose)
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# coding: utf-8
|
|
3
|
+
"""
|
|
4
|
+
Created in April 2020
|
|
5
|
+
@author: Thomas Bonald <tbonald@enst.fr>
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Union
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from scipy import sparse
|
|
12
|
+
|
|
13
|
+
from sknetwork.classification.base import BaseClassifier
|
|
14
|
+
from sknetwork.classification.vote import vote_update
|
|
15
|
+
from sknetwork.linalg.normalizer import normalize
|
|
16
|
+
from sknetwork.utils.format import get_adjacency_values
|
|
17
|
+
from sknetwork.utils.membership import get_membership
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Propagation(BaseClassifier):
|
|
21
|
+
"""Node classification by label propagation.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
n_iter : float
|
|
26
|
+
Maximum number of iterations (-1 for infinity).
|
|
27
|
+
node_order : str
|
|
28
|
+
* ``'random'``: node labels are updated in random order.
|
|
29
|
+
* ``'increasing'``: node labels are updated by increasing order of (in-) weight.
|
|
30
|
+
* ``'decreasing'``: node labels are updated by decreasing order of (in-) weight.
|
|
31
|
+
* Otherwise, node labels are updated by index order.
|
|
32
|
+
weighted : bool
|
|
33
|
+
If ``True``, the vote of each neighbor is proportional to the edge weight.
|
|
34
|
+
Otherwise, all votes have weight 1.
|
|
35
|
+
|
|
36
|
+
Attributes
|
|
37
|
+
----------
|
|
38
|
+
labels_ : np.ndarray, shape (n_labels,)
|
|
39
|
+
Labels of nodes.
|
|
40
|
+
probs_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
41
|
+
Probability distribution over labels.
|
|
42
|
+
labels_row_ : np.ndarray
|
|
43
|
+
Labels of rows, for bipartite graphs.
|
|
44
|
+
labels_col_ : np.ndarray
|
|
45
|
+
Labels of columns, for bipartite graphs.
|
|
46
|
+
probs_row_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
47
|
+
Probability distributions over labels of rows, for bipartite graphs.
|
|
48
|
+
probs_col_ : sparse.csr_matrix, shape (n_col, n_labels)
|
|
49
|
+
Probability distributions over labels of columns, for bipartite graphs.
|
|
50
|
+
|
|
51
|
+
Example
|
|
52
|
+
-------
|
|
53
|
+
>>> from sknetwork.classification import Propagation
|
|
54
|
+
>>> from sknetwork.data import karate_club
|
|
55
|
+
>>> propagation = Propagation()
|
|
56
|
+
>>> graph = karate_club(metadata=True)
|
|
57
|
+
>>> adjacency = graph.adjacency
|
|
58
|
+
>>> labels_true = graph.labels
|
|
59
|
+
>>> labels = {0: labels_true[0], 33: labels_true[33]}
|
|
60
|
+
>>> labels_pred = propagation.fit_predict(adjacency, labels)
|
|
61
|
+
>>> np.round(np.mean(labels_pred == labels_true), 2)
|
|
62
|
+
0.94
|
|
63
|
+
|
|
64
|
+
References
|
|
65
|
+
----------
|
|
66
|
+
Raghavan, U. N., Albert, R., & Kumara, S. (2007).
|
|
67
|
+
`Near linear time algorithm to detect community structures in large-scale networks.
|
|
68
|
+
<https://arxiv.org/pdf/0709.2938.pdf>`_
|
|
69
|
+
Physical review E, 76(3), 036106.
|
|
70
|
+
"""
|
|
71
|
+
def __init__(self, n_iter: float = -1, node_order: str = None, weighted: bool = True):
|
|
72
|
+
super(Propagation, self).__init__()
|
|
73
|
+
|
|
74
|
+
if n_iter < 0:
|
|
75
|
+
self.n_iter = np.inf
|
|
76
|
+
else:
|
|
77
|
+
self.n_iter = n_iter
|
|
78
|
+
self.node_order = node_order
|
|
79
|
+
self.weighted = weighted
|
|
80
|
+
|
|
81
|
+
@staticmethod
|
|
82
|
+
def _instantiate_vars(labels: np.ndarray):
|
|
83
|
+
"""Instantiate variables for label propagation."""
|
|
84
|
+
n = len(labels)
|
|
85
|
+
if len(set(labels)) == n:
|
|
86
|
+
index_seed = np.arange(n)
|
|
87
|
+
index_remain = np.arange(n)
|
|
88
|
+
else:
|
|
89
|
+
index_seed = np.argwhere(labels >= 0).ravel()
|
|
90
|
+
index_remain = np.argwhere(labels < 0).ravel()
|
|
91
|
+
labels = labels[index_seed]
|
|
92
|
+
return index_seed.astype(np.int32), index_remain.astype(np.int32), labels.astype(np.int32)
|
|
93
|
+
|
|
94
|
+
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], labels: Union[np.ndarray, list, dict] = None,
|
|
95
|
+
labels_row: Union[np.ndarray, list, dict] = None,
|
|
96
|
+
labels_col: Union[np.ndarray, list, dict] = None) -> 'Propagation':
|
|
97
|
+
"""Node classification by label propagation.
|
|
98
|
+
|
|
99
|
+
Parameters
|
|
100
|
+
----------
|
|
101
|
+
input_matrix : sparse.csr_matrix, np.ndarray
|
|
102
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
103
|
+
labels : array, list or dict
|
|
104
|
+
Known labels. Negative values ignored.
|
|
105
|
+
labels_row : array, list or dict
|
|
106
|
+
Known labels of rows, for bipartite graphs.
|
|
107
|
+
labels_col : array, list or dict
|
|
108
|
+
Known labels of columns, for bipartite graphs.
|
|
109
|
+
|
|
110
|
+
Returns
|
|
111
|
+
-------
|
|
112
|
+
self: :class:`Propagation`
|
|
113
|
+
"""
|
|
114
|
+
adjacency, seeds, self.bipartite = get_adjacency_values(input_matrix, values=labels, values_row=labels_row,
|
|
115
|
+
values_col=labels_col, which='labels')
|
|
116
|
+
n = adjacency.shape[0]
|
|
117
|
+
index_seed, index_remain, labels_seed = self._instantiate_vars(seeds)
|
|
118
|
+
|
|
119
|
+
if self.node_order == 'random':
|
|
120
|
+
np.random.shuffle(index_remain)
|
|
121
|
+
elif self.node_order == 'decreasing':
|
|
122
|
+
index = np.argsort(-adjacency.T.dot(np.ones(n))).astype(np.int32)
|
|
123
|
+
index_remain = index[index_remain]
|
|
124
|
+
elif self.node_order == 'increasing':
|
|
125
|
+
index = np.argsort(adjacency.T.dot(np.ones(n))).astype(np.int32)
|
|
126
|
+
index_remain = index[index_remain]
|
|
127
|
+
|
|
128
|
+
labels = -np.ones(n, dtype=np.int32)
|
|
129
|
+
labels[index_seed] = labels_seed
|
|
130
|
+
labels_remain = np.zeros_like(index_remain, dtype=np.int32)
|
|
131
|
+
|
|
132
|
+
indptr = adjacency.indptr.astype(np.int32)
|
|
133
|
+
indices = adjacency.indices.astype(np.int32)
|
|
134
|
+
if self.weighted:
|
|
135
|
+
data = adjacency.data.astype(np.float32)
|
|
136
|
+
else:
|
|
137
|
+
data = np.ones(n, dtype=np.float32)
|
|
138
|
+
|
|
139
|
+
t = 0
|
|
140
|
+
while t < self.n_iter and not np.array_equal(labels_remain, labels[index_remain]):
|
|
141
|
+
t += 1
|
|
142
|
+
labels_remain = labels[index_remain].copy()
|
|
143
|
+
labels = np.asarray(vote_update(indptr, indices, data, labels, index_remain))
|
|
144
|
+
|
|
145
|
+
probs = get_membership(labels)
|
|
146
|
+
probs = normalize(adjacency.dot(probs))
|
|
147
|
+
|
|
148
|
+
self.labels_ = labels
|
|
149
|
+
self.probs_ = probs
|
|
150
|
+
self._split_vars(input_matrix.shape)
|
|
151
|
+
|
|
152
|
+
return self
|