scikit-network 0.33.3__cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- scikit_network-0.33.3.dist-info/METADATA +122 -0
- scikit_network-0.33.3.dist-info/RECORD +229 -0
- scikit_network-0.33.3.dist-info/WHEEL +6 -0
- scikit_network-0.33.3.dist-info/licenses/AUTHORS.rst +43 -0
- scikit_network-0.33.3.dist-info/licenses/LICENSE +34 -0
- scikit_network-0.33.3.dist-info/top_level.txt +1 -0
- scikit_network.libs/libgomp-d22c30c5.so.1.0.0 +0 -0
- sknetwork/__init__.py +21 -0
- sknetwork/base.py +67 -0
- sknetwork/classification/__init__.py +8 -0
- sknetwork/classification/base.py +142 -0
- sknetwork/classification/base_rank.py +133 -0
- sknetwork/classification/diffusion.py +134 -0
- sknetwork/classification/knn.py +139 -0
- sknetwork/classification/metrics.py +205 -0
- sknetwork/classification/pagerank.py +66 -0
- sknetwork/classification/propagation.py +152 -0
- sknetwork/classification/tests/__init__.py +1 -0
- sknetwork/classification/tests/test_API.py +30 -0
- sknetwork/classification/tests/test_diffusion.py +77 -0
- sknetwork/classification/tests/test_knn.py +23 -0
- sknetwork/classification/tests/test_metrics.py +53 -0
- sknetwork/classification/tests/test_pagerank.py +20 -0
- sknetwork/classification/tests/test_propagation.py +24 -0
- sknetwork/classification/vote.cpp +27587 -0
- sknetwork/classification/vote.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/classification/vote.pyx +56 -0
- sknetwork/clustering/__init__.py +8 -0
- sknetwork/clustering/base.py +172 -0
- sknetwork/clustering/kcenters.py +253 -0
- sknetwork/clustering/leiden.py +242 -0
- sknetwork/clustering/leiden_core.cpp +31578 -0
- sknetwork/clustering/leiden_core.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/clustering/leiden_core.pyx +124 -0
- sknetwork/clustering/louvain.py +286 -0
- sknetwork/clustering/louvain_core.cpp +31223 -0
- sknetwork/clustering/louvain_core.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/clustering/louvain_core.pyx +124 -0
- sknetwork/clustering/metrics.py +91 -0
- sknetwork/clustering/postprocess.py +66 -0
- sknetwork/clustering/propagation_clustering.py +104 -0
- sknetwork/clustering/tests/__init__.py +1 -0
- sknetwork/clustering/tests/test_API.py +38 -0
- sknetwork/clustering/tests/test_kcenters.py +60 -0
- sknetwork/clustering/tests/test_leiden.py +34 -0
- sknetwork/clustering/tests/test_louvain.py +135 -0
- sknetwork/clustering/tests/test_metrics.py +50 -0
- sknetwork/clustering/tests/test_postprocess.py +39 -0
- sknetwork/data/__init__.py +6 -0
- sknetwork/data/base.py +33 -0
- sknetwork/data/load.py +406 -0
- sknetwork/data/models.py +459 -0
- sknetwork/data/parse.py +644 -0
- sknetwork/data/test_graphs.py +84 -0
- sknetwork/data/tests/__init__.py +1 -0
- sknetwork/data/tests/test_API.py +30 -0
- sknetwork/data/tests/test_base.py +14 -0
- sknetwork/data/tests/test_load.py +95 -0
- sknetwork/data/tests/test_models.py +52 -0
- sknetwork/data/tests/test_parse.py +250 -0
- sknetwork/data/tests/test_test_graphs.py +29 -0
- sknetwork/data/tests/test_toy_graphs.py +68 -0
- sknetwork/data/timeout.py +38 -0
- sknetwork/data/toy_graphs.py +611 -0
- sknetwork/embedding/__init__.py +8 -0
- sknetwork/embedding/base.py +94 -0
- sknetwork/embedding/force_atlas.py +198 -0
- sknetwork/embedding/louvain_embedding.py +148 -0
- sknetwork/embedding/random_projection.py +135 -0
- sknetwork/embedding/spectral.py +141 -0
- sknetwork/embedding/spring.py +198 -0
- sknetwork/embedding/svd.py +359 -0
- sknetwork/embedding/tests/__init__.py +1 -0
- sknetwork/embedding/tests/test_API.py +49 -0
- sknetwork/embedding/tests/test_force_atlas.py +35 -0
- sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
- sknetwork/embedding/tests/test_random_projection.py +28 -0
- sknetwork/embedding/tests/test_spectral.py +81 -0
- sknetwork/embedding/tests/test_spring.py +50 -0
- sknetwork/embedding/tests/test_svd.py +43 -0
- sknetwork/gnn/__init__.py +10 -0
- sknetwork/gnn/activation.py +117 -0
- sknetwork/gnn/base.py +181 -0
- sknetwork/gnn/base_activation.py +90 -0
- sknetwork/gnn/base_layer.py +109 -0
- sknetwork/gnn/gnn_classifier.py +305 -0
- sknetwork/gnn/layer.py +153 -0
- sknetwork/gnn/loss.py +180 -0
- sknetwork/gnn/neighbor_sampler.py +65 -0
- sknetwork/gnn/optimizer.py +164 -0
- sknetwork/gnn/tests/__init__.py +1 -0
- sknetwork/gnn/tests/test_activation.py +56 -0
- sknetwork/gnn/tests/test_base.py +75 -0
- sknetwork/gnn/tests/test_base_layer.py +37 -0
- sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
- sknetwork/gnn/tests/test_layers.py +80 -0
- sknetwork/gnn/tests/test_loss.py +33 -0
- sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
- sknetwork/gnn/tests/test_optimizer.py +43 -0
- sknetwork/gnn/tests/test_utils.py +41 -0
- sknetwork/gnn/utils.py +127 -0
- sknetwork/hierarchy/__init__.py +6 -0
- sknetwork/hierarchy/base.py +96 -0
- sknetwork/hierarchy/louvain_hierarchy.py +272 -0
- sknetwork/hierarchy/metrics.py +234 -0
- sknetwork/hierarchy/paris.cpp +37871 -0
- sknetwork/hierarchy/paris.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/hierarchy/paris.pyx +316 -0
- sknetwork/hierarchy/postprocess.py +350 -0
- sknetwork/hierarchy/tests/__init__.py +1 -0
- sknetwork/hierarchy/tests/test_API.py +24 -0
- sknetwork/hierarchy/tests/test_algos.py +34 -0
- sknetwork/hierarchy/tests/test_metrics.py +62 -0
- sknetwork/hierarchy/tests/test_postprocess.py +57 -0
- sknetwork/linalg/__init__.py +9 -0
- sknetwork/linalg/basics.py +37 -0
- sknetwork/linalg/diteration.cpp +27403 -0
- sknetwork/linalg/diteration.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/linalg/diteration.pyx +47 -0
- sknetwork/linalg/eig_solver.py +93 -0
- sknetwork/linalg/laplacian.py +15 -0
- sknetwork/linalg/normalizer.py +86 -0
- sknetwork/linalg/operators.py +225 -0
- sknetwork/linalg/polynome.py +76 -0
- sknetwork/linalg/ppr_solver.py +170 -0
- sknetwork/linalg/push.cpp +31075 -0
- sknetwork/linalg/push.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/linalg/push.pyx +71 -0
- sknetwork/linalg/sparse_lowrank.py +142 -0
- sknetwork/linalg/svd_solver.py +91 -0
- sknetwork/linalg/tests/__init__.py +1 -0
- sknetwork/linalg/tests/test_eig.py +44 -0
- sknetwork/linalg/tests/test_laplacian.py +18 -0
- sknetwork/linalg/tests/test_normalization.py +34 -0
- sknetwork/linalg/tests/test_operators.py +66 -0
- sknetwork/linalg/tests/test_polynome.py +38 -0
- sknetwork/linalg/tests/test_ppr.py +50 -0
- sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
- sknetwork/linalg/tests/test_svd.py +38 -0
- sknetwork/linkpred/__init__.py +2 -0
- sknetwork/linkpred/base.py +46 -0
- sknetwork/linkpred/nn.py +126 -0
- sknetwork/linkpred/tests/__init__.py +1 -0
- sknetwork/linkpred/tests/test_nn.py +27 -0
- sknetwork/log.py +19 -0
- sknetwork/path/__init__.py +5 -0
- sknetwork/path/dag.py +54 -0
- sknetwork/path/distances.py +98 -0
- sknetwork/path/search.py +31 -0
- sknetwork/path/shortest_path.py +61 -0
- sknetwork/path/tests/__init__.py +1 -0
- sknetwork/path/tests/test_dag.py +37 -0
- sknetwork/path/tests/test_distances.py +62 -0
- sknetwork/path/tests/test_search.py +40 -0
- sknetwork/path/tests/test_shortest_path.py +40 -0
- sknetwork/ranking/__init__.py +8 -0
- sknetwork/ranking/base.py +61 -0
- sknetwork/ranking/betweenness.cpp +9710 -0
- sknetwork/ranking/betweenness.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/ranking/betweenness.pyx +97 -0
- sknetwork/ranking/closeness.py +92 -0
- sknetwork/ranking/hits.py +94 -0
- sknetwork/ranking/katz.py +83 -0
- sknetwork/ranking/pagerank.py +110 -0
- sknetwork/ranking/postprocess.py +37 -0
- sknetwork/ranking/tests/__init__.py +1 -0
- sknetwork/ranking/tests/test_API.py +32 -0
- sknetwork/ranking/tests/test_betweenness.py +38 -0
- sknetwork/ranking/tests/test_closeness.py +30 -0
- sknetwork/ranking/tests/test_hits.py +20 -0
- sknetwork/ranking/tests/test_pagerank.py +62 -0
- sknetwork/ranking/tests/test_postprocess.py +26 -0
- sknetwork/regression/__init__.py +4 -0
- sknetwork/regression/base.py +61 -0
- sknetwork/regression/diffusion.py +210 -0
- sknetwork/regression/tests/__init__.py +1 -0
- sknetwork/regression/tests/test_API.py +32 -0
- sknetwork/regression/tests/test_diffusion.py +56 -0
- sknetwork/sknetwork.py +3 -0
- sknetwork/test_base.py +35 -0
- sknetwork/test_log.py +15 -0
- sknetwork/topology/__init__.py +8 -0
- sknetwork/topology/cliques.cpp +32568 -0
- sknetwork/topology/cliques.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/topology/cliques.pyx +149 -0
- sknetwork/topology/core.cpp +30654 -0
- sknetwork/topology/core.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/topology/core.pyx +90 -0
- sknetwork/topology/cycles.py +243 -0
- sknetwork/topology/minheap.cpp +27335 -0
- sknetwork/topology/minheap.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/topology/minheap.pxd +20 -0
- sknetwork/topology/minheap.pyx +109 -0
- sknetwork/topology/structure.py +194 -0
- sknetwork/topology/tests/__init__.py +1 -0
- sknetwork/topology/tests/test_cliques.py +28 -0
- sknetwork/topology/tests/test_core.py +19 -0
- sknetwork/topology/tests/test_cycles.py +65 -0
- sknetwork/topology/tests/test_structure.py +85 -0
- sknetwork/topology/tests/test_triangles.py +38 -0
- sknetwork/topology/tests/test_wl.py +72 -0
- sknetwork/topology/triangles.cpp +8897 -0
- sknetwork/topology/triangles.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/topology/triangles.pyx +151 -0
- sknetwork/topology/weisfeiler_lehman.py +133 -0
- sknetwork/topology/weisfeiler_lehman_core.cpp +27638 -0
- sknetwork/topology/weisfeiler_lehman_core.cpython-313-aarch64-linux-gnu.so +0 -0
- sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
- sknetwork/utils/__init__.py +7 -0
- sknetwork/utils/check.py +355 -0
- sknetwork/utils/format.py +221 -0
- sknetwork/utils/membership.py +82 -0
- sknetwork/utils/neighbors.py +115 -0
- sknetwork/utils/tests/__init__.py +1 -0
- sknetwork/utils/tests/test_check.py +190 -0
- sknetwork/utils/tests/test_format.py +63 -0
- sknetwork/utils/tests/test_membership.py +24 -0
- sknetwork/utils/tests/test_neighbors.py +41 -0
- sknetwork/utils/tests/test_tfidf.py +18 -0
- sknetwork/utils/tests/test_values.py +66 -0
- sknetwork/utils/tfidf.py +37 -0
- sknetwork/utils/values.py +76 -0
- sknetwork/visualization/__init__.py +4 -0
- sknetwork/visualization/colors.py +34 -0
- sknetwork/visualization/dendrograms.py +277 -0
- sknetwork/visualization/graphs.py +1039 -0
- sknetwork/visualization/tests/__init__.py +1 -0
- sknetwork/visualization/tests/test_dendrograms.py +53 -0
- sknetwork/visualization/tests/test_graphs.py +176 -0
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created in November 2019
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
"""
|
|
7
|
+
from abc import ABC
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
from scipy import sparse
|
|
11
|
+
|
|
12
|
+
from sknetwork.base import Algorithm
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class BaseClassifier(Algorithm, ABC):
|
|
16
|
+
"""Base class for classifiers.
|
|
17
|
+
|
|
18
|
+
Attributes
|
|
19
|
+
----------
|
|
20
|
+
bipartite : bool
|
|
21
|
+
If ``True``, the fitted graph is bipartite.
|
|
22
|
+
labels_ : np.ndarray, shape (n_labels,)
|
|
23
|
+
Labels of nodes.
|
|
24
|
+
probs_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
25
|
+
Probability distribution over labels (soft classification).
|
|
26
|
+
labels_row_ , labels_col_ : np.ndarray
|
|
27
|
+
Labels of rows and columns (for bipartite graphs).
|
|
28
|
+
probs_row_, probs_col_ : sparse.csr_matrix, shapes (n_row, n_labels) and (n_col, n_labels)
|
|
29
|
+
Probability distributions over labels for rows and columns (for bipartite graphs).
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self):
|
|
33
|
+
self.bipartite = None
|
|
34
|
+
self.labels_ = None
|
|
35
|
+
self.probs_ = None
|
|
36
|
+
self.labels_row_ = None
|
|
37
|
+
self.labels_col_ = None
|
|
38
|
+
self.probs_row_ = None
|
|
39
|
+
self.probs_col_ = None
|
|
40
|
+
|
|
41
|
+
def predict(self, columns: bool = False) -> np.ndarray:
|
|
42
|
+
"""Return the labels predicted by the algorithm.
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
columns : bool
|
|
47
|
+
If ``True``, return the prediction for columns.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
labels : np.ndarray
|
|
52
|
+
Labels.
|
|
53
|
+
"""
|
|
54
|
+
if columns:
|
|
55
|
+
return self.labels_col_
|
|
56
|
+
return self.labels_
|
|
57
|
+
|
|
58
|
+
def fit_predict(self, *args, **kwargs) -> np.ndarray:
|
|
59
|
+
"""Fit algorithm to the data and return the labels. Same parameters as the ``fit`` method.
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
labels : np.ndarray
|
|
64
|
+
Labels.
|
|
65
|
+
"""
|
|
66
|
+
self.fit(*args, **kwargs)
|
|
67
|
+
return self.predict()
|
|
68
|
+
|
|
69
|
+
def predict_proba(self, columns=False) -> np.ndarray:
|
|
70
|
+
"""Return the probability distribution over labels as predicted by the algorithm.
|
|
71
|
+
|
|
72
|
+
Parameters
|
|
73
|
+
----------
|
|
74
|
+
columns : bool
|
|
75
|
+
If ``True``, return the prediction for columns.
|
|
76
|
+
|
|
77
|
+
Returns
|
|
78
|
+
-------
|
|
79
|
+
probs : np.ndarray
|
|
80
|
+
Probability distribution over labels.
|
|
81
|
+
"""
|
|
82
|
+
if columns:
|
|
83
|
+
return self.probs_col_.toarray()
|
|
84
|
+
return self.probs_.toarray()
|
|
85
|
+
|
|
86
|
+
def fit_predict_proba(self, *args, **kwargs) -> np.ndarray:
|
|
87
|
+
"""Fit algorithm to the data and return the probability distribution over labels.
|
|
88
|
+
Same parameters as the ``fit`` method.
|
|
89
|
+
|
|
90
|
+
Returns
|
|
91
|
+
-------
|
|
92
|
+
probs : np.ndarray
|
|
93
|
+
Probability of each label.
|
|
94
|
+
"""
|
|
95
|
+
self.fit(*args, **kwargs)
|
|
96
|
+
return self.predict_proba()
|
|
97
|
+
|
|
98
|
+
def transform(self, columns=False) -> sparse.csr_matrix:
|
|
99
|
+
"""Return the probability distribution over labels in sparse format.
|
|
100
|
+
|
|
101
|
+
Parameters
|
|
102
|
+
----------
|
|
103
|
+
columns : bool
|
|
104
|
+
If ``True``, return the prediction for columns.
|
|
105
|
+
|
|
106
|
+
Returns
|
|
107
|
+
-------
|
|
108
|
+
probs : sparse.csr_matrix
|
|
109
|
+
Probability distribution over labels.
|
|
110
|
+
"""
|
|
111
|
+
if columns:
|
|
112
|
+
return self.probs_col_
|
|
113
|
+
return self.probs_
|
|
114
|
+
|
|
115
|
+
def fit_transform(self, *args, **kwargs) -> sparse.csr_matrix:
|
|
116
|
+
"""Fit algorithm to the data and return the probability distribution over labels in sparse format.
|
|
117
|
+
Same parameters as the ``fit`` method.
|
|
118
|
+
|
|
119
|
+
Returns
|
|
120
|
+
-------
|
|
121
|
+
probs : sparse.csr_matrix
|
|
122
|
+
Probability distribution over labels.
|
|
123
|
+
"""
|
|
124
|
+
self.fit(*args, **kwargs)
|
|
125
|
+
return self.transform()
|
|
126
|
+
|
|
127
|
+
def _split_vars(self, shape: tuple):
|
|
128
|
+
"""Split variables for bipartite graphs."""
|
|
129
|
+
if self.bipartite:
|
|
130
|
+
n_row = shape[0]
|
|
131
|
+
self.labels_row_ = self.labels_[:n_row]
|
|
132
|
+
self.labels_col_ = self.labels_[n_row:]
|
|
133
|
+
self.labels_ = self.labels_row_
|
|
134
|
+
self.probs_row_ = self.probs_[:n_row]
|
|
135
|
+
self.probs_col_ = self.probs_[n_row:]
|
|
136
|
+
self.probs_ = self.probs_row_
|
|
137
|
+
else:
|
|
138
|
+
self.labels_row_ = self.labels_
|
|
139
|
+
self.labels_col_ = self.labels_
|
|
140
|
+
self.probs_row_ = self.probs_
|
|
141
|
+
self.probs_col_ = self.probs_
|
|
142
|
+
return self
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created in March 2020
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
"""
|
|
7
|
+
from functools import partial
|
|
8
|
+
from multiprocessing import Pool
|
|
9
|
+
from typing import Union, Optional
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
from scipy import sparse
|
|
13
|
+
|
|
14
|
+
from sknetwork.classification.base import BaseClassifier
|
|
15
|
+
from sknetwork.linalg.normalizer import normalize
|
|
16
|
+
from sknetwork.ranking.base import BaseRanking
|
|
17
|
+
from sknetwork.utils.check import check_labels, check_n_jobs
|
|
18
|
+
from sknetwork.utils.format import get_adjacency_values
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class RankClassifier(BaseClassifier):
|
|
22
|
+
"""Generic class for ranking based classifiers.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
algorithm :
|
|
27
|
+
Which ranking algorithm to use.
|
|
28
|
+
n_jobs :
|
|
29
|
+
If positive, number of parallel jobs allowed (-1 means maximum number).
|
|
30
|
+
If ``None``, no parallel computations are made.
|
|
31
|
+
|
|
32
|
+
Attributes
|
|
33
|
+
----------
|
|
34
|
+
labels_ : np.ndarray, shape (n_labels,)
|
|
35
|
+
Label of each node.
|
|
36
|
+
probs_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
37
|
+
Probability distribution over labels.
|
|
38
|
+
labels_row_, labels_col_ : np.ndarray
|
|
39
|
+
Labels of rows and columns, for bipartite graphs.
|
|
40
|
+
probs_row_, probs_col_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
41
|
+
Probability distributions over labels for rows and columns (for bipartite graphs).
|
|
42
|
+
"""
|
|
43
|
+
def __init__(self, algorithm: BaseRanking, n_jobs: Optional[int] = None, verbose: bool = False):
|
|
44
|
+
super(RankClassifier, self).__init__()
|
|
45
|
+
|
|
46
|
+
self.algorithm = algorithm
|
|
47
|
+
self.n_jobs = check_n_jobs(n_jobs)
|
|
48
|
+
self.verbose = verbose
|
|
49
|
+
|
|
50
|
+
@staticmethod
|
|
51
|
+
def _process_labels(labels: np.ndarray) -> list:
|
|
52
|
+
"""Make one-vs-all binary labels from labels.
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
labels
|
|
57
|
+
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
|
+
List of binary labels.
|
|
61
|
+
"""
|
|
62
|
+
labels_all = []
|
|
63
|
+
labels_unique, _ = check_labels(labels)
|
|
64
|
+
|
|
65
|
+
for label in labels_unique:
|
|
66
|
+
labels_binary = np.array(labels == label).astype(int)
|
|
67
|
+
labels_all.append(labels_binary)
|
|
68
|
+
|
|
69
|
+
return labels_all
|
|
70
|
+
|
|
71
|
+
@staticmethod
|
|
72
|
+
def _process_scores(scores: np.ndarray) -> np.ndarray:
|
|
73
|
+
"""Post-processing of the scores.
|
|
74
|
+
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
scores
|
|
78
|
+
Matrix of scores, shape number of nodes x number of labels.
|
|
79
|
+
|
|
80
|
+
Returns
|
|
81
|
+
-------
|
|
82
|
+
scores : np.ndarray
|
|
83
|
+
"""
|
|
84
|
+
return scores
|
|
85
|
+
|
|
86
|
+
def _split_vars(self, shape):
|
|
87
|
+
"""Split the vector of labels and build membership matrix."""
|
|
88
|
+
n_row = shape[0]
|
|
89
|
+
self.labels_row_ = self.labels_[:n_row]
|
|
90
|
+
self.labels_col_ = self.labels_[n_row:]
|
|
91
|
+
self.labels_ = self.labels_row_
|
|
92
|
+
self.probs_row_ = self.probs_[:n_row]
|
|
93
|
+
self.probs_col_ = self.probs_[n_row:]
|
|
94
|
+
self.probs_ = self.probs_row_
|
|
95
|
+
|
|
96
|
+
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], labels: Union[np.ndarray, dict] = None,
|
|
97
|
+
labels_row: Union[np.ndarray, dict] = None, labels_col: Union[np.ndarray, dict] = None) -> 'RankClassifier':
|
|
98
|
+
"""Fit algorithm to data.
|
|
99
|
+
|
|
100
|
+
Parameters
|
|
101
|
+
----------
|
|
102
|
+
input_matrix :
|
|
103
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
104
|
+
labels :
|
|
105
|
+
Known labels (dictionary or array; negative values ignored).
|
|
106
|
+
labels_row, labels_col :
|
|
107
|
+
Known labels on rows and columns (for bipartite graphs).
|
|
108
|
+
Returns
|
|
109
|
+
-------
|
|
110
|
+
self: :class:`RankClassifier`
|
|
111
|
+
"""
|
|
112
|
+
adjacency, seeds_labels, bipartite = get_adjacency_values(input_matrix, values=labels, values_row=labels_row,
|
|
113
|
+
values_col=labels_col)
|
|
114
|
+
seeds_labels = seeds_labels.astype(int)
|
|
115
|
+
labels_unique, n_classes = check_labels(seeds_labels)
|
|
116
|
+
seeds_all = self._process_labels(seeds_labels)
|
|
117
|
+
local_function = partial(self.algorithm.fit_predict, adjacency)
|
|
118
|
+
with Pool(self.n_jobs) as pool:
|
|
119
|
+
scores = np.array(pool.map(local_function, seeds_all))
|
|
120
|
+
scores = scores.T
|
|
121
|
+
|
|
122
|
+
scores = self._process_scores(scores)
|
|
123
|
+
scores = normalize(scores)
|
|
124
|
+
|
|
125
|
+
probs = sparse.coo_matrix(scores)
|
|
126
|
+
probs.col = labels_unique[probs.col]
|
|
127
|
+
|
|
128
|
+
labels = np.argmax(scores, axis=1)
|
|
129
|
+
self.labels_ = labels_unique[labels]
|
|
130
|
+
self.probs_ = sparse.csr_matrix(probs, shape=(adjacency.shape[0], np.max(seeds_labels) + 1))
|
|
131
|
+
self._split_vars(input_matrix.shape)
|
|
132
|
+
|
|
133
|
+
return self
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created in July 2022
|
|
5
|
+
@author: Thomas Bonald <thomas.bonald@telecom-paris.fr>
|
|
6
|
+
"""
|
|
7
|
+
from typing import Optional, Union
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
from scipy import sparse
|
|
11
|
+
|
|
12
|
+
from sknetwork.classification.base import BaseClassifier
|
|
13
|
+
from sknetwork.path.distances import get_distances
|
|
14
|
+
from sknetwork.linalg.normalizer import normalize
|
|
15
|
+
from sknetwork.utils.format import get_adjacency_values
|
|
16
|
+
from sknetwork.utils.membership import get_membership
|
|
17
|
+
from sknetwork.utils.neighbors import get_degrees
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class DiffusionClassifier(BaseClassifier):
|
|
21
|
+
"""Node classification by heat diffusion.
|
|
22
|
+
|
|
23
|
+
For each label, the temperature of a node corresponds to its probability to have this label.
|
|
24
|
+
|
|
25
|
+
Parameters
|
|
26
|
+
----------
|
|
27
|
+
n_iter : int
|
|
28
|
+
Number of iterations of the diffusion (discrete time).
|
|
29
|
+
centering : bool
|
|
30
|
+
If ``True``, center the temperature of each label to its mean before classification (default).
|
|
31
|
+
scale : float
|
|
32
|
+
Multiplicative factor applied to tempreatures before softmax (default = 5).
|
|
33
|
+
Used only when centering is ``True``.
|
|
34
|
+
|
|
35
|
+
Attributes
|
|
36
|
+
----------
|
|
37
|
+
labels_ : np.ndarray, shape (n_labels,)
|
|
38
|
+
Labels of nodes.
|
|
39
|
+
probs_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
40
|
+
Probability distribution over labels.
|
|
41
|
+
labels_row_ : np.ndarray
|
|
42
|
+
Labels of rows, for bipartite graphs.
|
|
43
|
+
labels_col_ : np.ndarray
|
|
44
|
+
Labels of columns, for bipartite graphs.
|
|
45
|
+
probs_row_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
46
|
+
Probability distributions over labels of rows, for bipartite graphs.
|
|
47
|
+
probs_col_ : sparse.csr_matrix, shape (n_col, n_labels)
|
|
48
|
+
Probability distributions over labels of columns, for bipartite graphs.
|
|
49
|
+
Example
|
|
50
|
+
-------
|
|
51
|
+
>>> from sknetwork.data import karate_club
|
|
52
|
+
>>> diffusion = DiffusionClassifier()
|
|
53
|
+
>>> graph = karate_club(metadata=True)
|
|
54
|
+
>>> adjacency = graph.adjacency
|
|
55
|
+
>>> labels_true = graph.labels
|
|
56
|
+
>>> labels = {0: labels_true[0], 33: labels_true[33]}
|
|
57
|
+
>>> labels_pred = diffusion.fit_predict(adjacency, labels)
|
|
58
|
+
>>> float(round(np.mean(labels_pred == labels_true), 2))
|
|
59
|
+
0.97
|
|
60
|
+
|
|
61
|
+
References
|
|
62
|
+
----------
|
|
63
|
+
Zhu, X., Lafferty, J., & Rosenfeld, R. (2005). `Semi-supervised learning with graphs`
|
|
64
|
+
(Doctoral dissertation, Carnegie Mellon University, language technologies institute, school of computer science).
|
|
65
|
+
"""
|
|
66
|
+
def __init__(self, n_iter: int = 10, centering: bool = True, scale: float = 5):
|
|
67
|
+
super(DiffusionClassifier, self).__init__()
|
|
68
|
+
|
|
69
|
+
if n_iter <= 0:
|
|
70
|
+
raise ValueError('The number of iterations must be positive.')
|
|
71
|
+
else:
|
|
72
|
+
self.n_iter = n_iter
|
|
73
|
+
self.centering = centering
|
|
74
|
+
self.scale = scale
|
|
75
|
+
|
|
76
|
+
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray],
|
|
77
|
+
labels: Optional[Union[dict, list, np.ndarray]] = None,
|
|
78
|
+
labels_row: Optional[Union[dict, list, np.ndarray]] = None,
|
|
79
|
+
labels_col: Optional[Union[dict, list, np.ndarray]] = None, force_bipartite: bool = False) \
|
|
80
|
+
-> 'DiffusionClassifier':
|
|
81
|
+
"""Compute the solution to the Dirichlet problem (temperatures at equilibrium).
|
|
82
|
+
|
|
83
|
+
Parameters
|
|
84
|
+
----------
|
|
85
|
+
input_matrix : sparse.csr_matrix, np.ndarray
|
|
86
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
87
|
+
labels : dict, np.ndarray
|
|
88
|
+
Known labels (dictionary or vector of int). Negative values ignored.
|
|
89
|
+
labels_row : dict, np.ndarray
|
|
90
|
+
Labels of rows for bipartite graphs. Negative values ignored.
|
|
91
|
+
labels_col : dict, np.ndarray
|
|
92
|
+
Labels of columns for bipartite graphs. Negative values ignored.
|
|
93
|
+
force_bipartite : bool
|
|
94
|
+
If ``True``, consider the input matrix as a biadjacency matrix (default = ``False``).
|
|
95
|
+
|
|
96
|
+
Returns
|
|
97
|
+
-------
|
|
98
|
+
self: :class:`DiffusionClassifier`
|
|
99
|
+
"""
|
|
100
|
+
adjacency, values, self.bipartite = get_adjacency_values(input_matrix, force_bipartite=force_bipartite,
|
|
101
|
+
values=labels,
|
|
102
|
+
values_row=labels_row,
|
|
103
|
+
values_col=labels_col)
|
|
104
|
+
labels = values.astype(int)
|
|
105
|
+
if (labels < 0).all():
|
|
106
|
+
raise ValueError('At least one node must be given a non-negative label.')
|
|
107
|
+
labels_reindex = labels.copy()
|
|
108
|
+
labels_unique, inverse = np.unique(labels[labels >= 0], return_inverse=True)
|
|
109
|
+
labels_reindex[labels >= 0] = inverse
|
|
110
|
+
temperatures = get_membership(labels_reindex).toarray()
|
|
111
|
+
temperatures_seeds = temperatures[labels >= 0]
|
|
112
|
+
temperatures[labels < 0] = 0.5
|
|
113
|
+
diffusion = normalize(adjacency)
|
|
114
|
+
for i in range(self.n_iter):
|
|
115
|
+
temperatures = diffusion.dot(temperatures)
|
|
116
|
+
temperatures[labels >= 0] = temperatures_seeds
|
|
117
|
+
if self.centering:
|
|
118
|
+
temperatures -= temperatures.mean(axis=0)
|
|
119
|
+
labels_ = labels_unique[temperatures.argmax(axis=1)]
|
|
120
|
+
|
|
121
|
+
# softmax
|
|
122
|
+
if self.centering:
|
|
123
|
+
temperatures = np.exp(self.scale * temperatures)
|
|
124
|
+
|
|
125
|
+
# set label -1 to nodes not reached by diffusion
|
|
126
|
+
distances = get_distances(adjacency, source=np.flatnonzero(labels >= 0))
|
|
127
|
+
labels_[distances < 0] = -1
|
|
128
|
+
temperatures[distances < 0] = 0
|
|
129
|
+
|
|
130
|
+
self.labels_ = labels_
|
|
131
|
+
self.probs_ = sparse.csr_matrix(normalize(temperatures))
|
|
132
|
+
self._split_vars(input_matrix.shape)
|
|
133
|
+
|
|
134
|
+
return self
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created in November 2019
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
@author: Thomas Bonald <tbonald@enst.fr>
|
|
7
|
+
"""
|
|
8
|
+
from typing import Optional, Union
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from scipy import sparse
|
|
12
|
+
|
|
13
|
+
from sknetwork.classification.base import BaseClassifier
|
|
14
|
+
from sknetwork.embedding.base import BaseEmbedding
|
|
15
|
+
from sknetwork.linalg.normalizer import get_norms, normalize
|
|
16
|
+
from sknetwork.utils.check import check_n_neighbors
|
|
17
|
+
from sknetwork.utils.format import get_adjacency_values
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class NNClassifier(BaseClassifier):
|
|
21
|
+
"""Node classification by K-nearest neighbors in the embedding space.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
n_neighbors : int
|
|
26
|
+
Number of nearest neighbors .
|
|
27
|
+
embedding_method : :class:`BaseEmbedding`
|
|
28
|
+
Embedding method used to represent nodes in vector space.
|
|
29
|
+
If ``None`` (default), use identity.
|
|
30
|
+
normalize : bool
|
|
31
|
+
If ``True``, apply normalization so that all vectors have norm 1 in the embedding space.
|
|
32
|
+
|
|
33
|
+
Attributes
|
|
34
|
+
----------
|
|
35
|
+
labels_ : np.ndarray, shape (n_labels,)
|
|
36
|
+
Labels of nodes.
|
|
37
|
+
probs_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
38
|
+
Probability distribution over labels.
|
|
39
|
+
labels_row_ : np.ndarray
|
|
40
|
+
Labels of rows, for bipartite graphs.
|
|
41
|
+
labels_col_ : np.ndarray
|
|
42
|
+
Labels of columns, for bipartite graphs.
|
|
43
|
+
probs_row_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
44
|
+
Probability distributions over labels of rows, for bipartite graphs.
|
|
45
|
+
probs_col_ : sparse.csr_matrix, shape (n_col, n_labels)
|
|
46
|
+
Probability distributions over labels of columns, for bipartite graphs.
|
|
47
|
+
|
|
48
|
+
Example
|
|
49
|
+
-------
|
|
50
|
+
>>> from sknetwork.classification import NNClassifier
|
|
51
|
+
>>> from sknetwork.data import karate_club
|
|
52
|
+
>>> classifier = NNClassifier(n_neighbors=1)
|
|
53
|
+
>>> graph = karate_club(metadata=True)
|
|
54
|
+
>>> adjacency = graph.adjacency
|
|
55
|
+
>>> labels_true = graph.labels
|
|
56
|
+
>>> labels = {0: labels_true[0], 33: labels_true[33]}
|
|
57
|
+
>>> labels_pred = classifier.fit_predict(adjacency, labels)
|
|
58
|
+
>>> float(round(np.mean(labels_pred == labels_true), 2))
|
|
59
|
+
0.82
|
|
60
|
+
"""
|
|
61
|
+
def __init__(self, n_neighbors: int = 3, embedding_method: Optional[BaseEmbedding] = None, normalize: bool = True):
|
|
62
|
+
super(NNClassifier, self).__init__()
|
|
63
|
+
self.n_neighbors = n_neighbors
|
|
64
|
+
self.embedding_method = embedding_method
|
|
65
|
+
self.normalize = normalize
|
|
66
|
+
|
|
67
|
+
@staticmethod
|
|
68
|
+
def _instantiate_vars(labels: np.ndarray):
|
|
69
|
+
index_train = np.flatnonzero(labels >= 0)
|
|
70
|
+
index_test = np.flatnonzero(labels < 0)
|
|
71
|
+
return index_train, index_test
|
|
72
|
+
|
|
73
|
+
def _fit_core(self, embedding, labels, index_train, index_test):
|
|
74
|
+
n_neighbors = check_n_neighbors(self.n_neighbors, len(index_train))
|
|
75
|
+
|
|
76
|
+
norms_train = get_norms(embedding[index_train], p=2)
|
|
77
|
+
neighbors = []
|
|
78
|
+
for i in index_test:
|
|
79
|
+
vector = embedding[i]
|
|
80
|
+
if sparse.issparse(vector):
|
|
81
|
+
vector = vector.toarray().ravel()
|
|
82
|
+
distances = norms_train**2 - 2 * embedding[index_train].dot(vector) + np.sum(vector**2)
|
|
83
|
+
neighbors += list(index_train[np.argpartition(distances, n_neighbors)[:n_neighbors]])
|
|
84
|
+
labels_neighbor = labels[neighbors]
|
|
85
|
+
|
|
86
|
+
# membership matrix
|
|
87
|
+
row = list(np.repeat(index_test, n_neighbors))
|
|
88
|
+
col = list(labels_neighbor)
|
|
89
|
+
data = list(np.ones_like(labels_neighbor))
|
|
90
|
+
|
|
91
|
+
row += list(index_train)
|
|
92
|
+
col += list(labels[index_train])
|
|
93
|
+
data += list(np.ones_like(index_train))
|
|
94
|
+
|
|
95
|
+
probs = normalize(sparse.csr_matrix((data, (row, col)), shape=(len(labels), np.max(labels) + 1)))
|
|
96
|
+
labels = np.argmax(probs.toarray(), axis=1)
|
|
97
|
+
|
|
98
|
+
return probs, labels
|
|
99
|
+
|
|
100
|
+
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], labels: Union[np.ndarray, list, dict] = None,
|
|
101
|
+
labels_row: Union[np.ndarray, list, dict] = None,
|
|
102
|
+
labels_col: Union[np.ndarray, list, dict] = None) -> 'NNClassifier':
|
|
103
|
+
"""Node classification by k-nearest neighbors in the embedding space.
|
|
104
|
+
|
|
105
|
+
Parameters
|
|
106
|
+
----------
|
|
107
|
+
input_matrix : sparse.csr_matrix, np.ndarray
|
|
108
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
109
|
+
labels : np.ndarray, dict
|
|
110
|
+
Known labels. Negative values ignored.
|
|
111
|
+
labels_row : np.ndarray, dict
|
|
112
|
+
Known labels of rows, for bipartite graphs.
|
|
113
|
+
labels_col : np.ndarray, dict
|
|
114
|
+
Known labels of columns, for bipartite graphs.
|
|
115
|
+
|
|
116
|
+
Returns
|
|
117
|
+
-------
|
|
118
|
+
self: :class:`KNN`
|
|
119
|
+
"""
|
|
120
|
+
adjacency, labels, self.bipartite = get_adjacency_values(input_matrix, values=labels, values_row=labels_row,
|
|
121
|
+
values_col=labels_col)
|
|
122
|
+
labels = labels.astype(int)
|
|
123
|
+
index_seed, index_remain = self._instantiate_vars(labels)
|
|
124
|
+
|
|
125
|
+
if self.embedding_method is None:
|
|
126
|
+
embedding = adjacency
|
|
127
|
+
else:
|
|
128
|
+
embedding = self.embedding_method.fit_transform(adjacency)
|
|
129
|
+
|
|
130
|
+
if self.normalize:
|
|
131
|
+
embedding = normalize(embedding, p=2)
|
|
132
|
+
|
|
133
|
+
probs, labels = self._fit_core(embedding, labels, index_seed, index_remain)
|
|
134
|
+
|
|
135
|
+
self.labels_ = labels
|
|
136
|
+
self.probs_ = probs
|
|
137
|
+
self._split_vars(input_matrix.shape)
|
|
138
|
+
|
|
139
|
+
return self
|