scikit-network 0.30.0__cp310-cp310-win_amd64.whl → 0.32.1__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/AUTHORS.rst +3 -0
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/METADATA +31 -3
- scikit_network-0.32.1.dist-info/RECORD +228 -0
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/WHEEL +1 -1
- sknetwork/__init__.py +1 -1
- sknetwork/base.py +67 -0
- sknetwork/classification/base.py +24 -24
- sknetwork/classification/base_rank.py +17 -25
- sknetwork/classification/diffusion.py +35 -35
- sknetwork/classification/knn.py +24 -21
- sknetwork/classification/metrics.py +1 -1
- sknetwork/classification/pagerank.py +10 -10
- sknetwork/classification/propagation.py +23 -20
- sknetwork/classification/tests/test_diffusion.py +13 -3
- sknetwork/classification/vote.cp310-win_amd64.pyd +0 -0
- sknetwork/classification/vote.cpp +14482 -10351
- sknetwork/classification/vote.pyx +1 -3
- sknetwork/clustering/__init__.py +3 -1
- sknetwork/clustering/base.py +36 -40
- sknetwork/clustering/kcenters.py +253 -0
- sknetwork/clustering/leiden.py +241 -0
- sknetwork/clustering/leiden_core.cp310-win_amd64.pyd +0 -0
- sknetwork/clustering/leiden_core.cpp +31564 -0
- sknetwork/clustering/leiden_core.pyx +124 -0
- sknetwork/clustering/louvain.py +133 -102
- sknetwork/clustering/louvain_core.cp310-win_amd64.pyd +0 -0
- sknetwork/clustering/louvain_core.cpp +22457 -18792
- sknetwork/clustering/louvain_core.pyx +86 -96
- sknetwork/clustering/postprocess.py +2 -2
- sknetwork/clustering/propagation_clustering.py +15 -19
- sknetwork/clustering/tests/test_API.py +8 -4
- sknetwork/clustering/tests/test_kcenters.py +92 -0
- sknetwork/clustering/tests/test_leiden.py +34 -0
- sknetwork/clustering/tests/test_louvain.py +3 -4
- sknetwork/data/__init__.py +2 -1
- sknetwork/data/base.py +28 -0
- sknetwork/data/load.py +38 -37
- sknetwork/data/models.py +18 -18
- sknetwork/data/parse.py +54 -33
- sknetwork/data/test_graphs.py +2 -2
- sknetwork/data/tests/test_API.py +1 -1
- sknetwork/data/tests/test_base.py +14 -0
- sknetwork/data/tests/test_load.py +1 -1
- sknetwork/data/tests/test_parse.py +9 -12
- sknetwork/data/tests/test_test_graphs.py +1 -2
- sknetwork/data/toy_graphs.py +18 -18
- sknetwork/embedding/__init__.py +0 -1
- sknetwork/embedding/base.py +21 -20
- sknetwork/embedding/force_atlas.py +3 -2
- sknetwork/embedding/louvain_embedding.py +2 -2
- sknetwork/embedding/random_projection.py +5 -3
- sknetwork/embedding/spectral.py +0 -73
- sknetwork/embedding/tests/test_API.py +4 -28
- sknetwork/embedding/tests/test_louvain_embedding.py +4 -9
- sknetwork/embedding/tests/test_random_projection.py +2 -2
- sknetwork/embedding/tests/test_spectral.py +5 -8
- sknetwork/embedding/tests/test_svd.py +1 -1
- sknetwork/gnn/base.py +4 -4
- sknetwork/gnn/base_layer.py +3 -3
- sknetwork/gnn/gnn_classifier.py +45 -89
- sknetwork/gnn/layer.py +1 -1
- sknetwork/gnn/loss.py +1 -1
- sknetwork/gnn/optimizer.py +4 -3
- sknetwork/gnn/tests/test_base_layer.py +4 -4
- sknetwork/gnn/tests/test_gnn_classifier.py +12 -35
- sknetwork/gnn/utils.py +8 -8
- sknetwork/hierarchy/base.py +29 -2
- sknetwork/hierarchy/louvain_hierarchy.py +45 -41
- sknetwork/hierarchy/paris.cp310-win_amd64.pyd +0 -0
- sknetwork/hierarchy/paris.cpp +27369 -22852
- sknetwork/hierarchy/paris.pyx +7 -9
- sknetwork/hierarchy/postprocess.py +16 -16
- sknetwork/hierarchy/tests/test_API.py +1 -1
- sknetwork/hierarchy/tests/test_algos.py +5 -0
- sknetwork/hierarchy/tests/test_metrics.py +1 -1
- sknetwork/linalg/__init__.py +1 -1
- sknetwork/linalg/diteration.cp310-win_amd64.pyd +0 -0
- sknetwork/linalg/diteration.cpp +13474 -9454
- sknetwork/linalg/diteration.pyx +0 -2
- sknetwork/linalg/eig_solver.py +1 -1
- sknetwork/linalg/{normalization.py → normalizer.py} +18 -15
- sknetwork/linalg/operators.py +1 -1
- sknetwork/linalg/ppr_solver.py +1 -1
- sknetwork/linalg/push.cp310-win_amd64.pyd +0 -0
- sknetwork/linalg/push.cpp +22993 -18807
- sknetwork/linalg/push.pyx +0 -2
- sknetwork/linalg/svd_solver.py +1 -1
- sknetwork/linalg/tests/test_normalization.py +3 -7
- sknetwork/linalg/tests/test_operators.py +4 -8
- sknetwork/linalg/tests/test_ppr.py +1 -1
- sknetwork/linkpred/base.py +13 -2
- sknetwork/linkpred/nn.py +6 -6
- sknetwork/log.py +19 -0
- sknetwork/path/__init__.py +4 -3
- sknetwork/path/dag.py +54 -0
- sknetwork/path/distances.py +98 -0
- sknetwork/path/search.py +13 -47
- sknetwork/path/shortest_path.py +37 -162
- sknetwork/path/tests/test_dag.py +37 -0
- sknetwork/path/tests/test_distances.py +62 -0
- sknetwork/path/tests/test_search.py +26 -11
- sknetwork/path/tests/test_shortest_path.py +31 -36
- sknetwork/ranking/__init__.py +0 -1
- sknetwork/ranking/base.py +13 -8
- sknetwork/ranking/betweenness.cp310-win_amd64.pyd +0 -0
- sknetwork/ranking/betweenness.cpp +5709 -3017
- sknetwork/ranking/betweenness.pyx +0 -2
- sknetwork/ranking/closeness.py +7 -10
- sknetwork/ranking/pagerank.py +14 -14
- sknetwork/ranking/postprocess.py +12 -3
- sknetwork/ranking/tests/test_API.py +2 -4
- sknetwork/ranking/tests/test_betweenness.py +3 -3
- sknetwork/ranking/tests/test_closeness.py +3 -7
- sknetwork/ranking/tests/test_pagerank.py +11 -5
- sknetwork/ranking/tests/test_postprocess.py +5 -0
- sknetwork/regression/base.py +19 -2
- sknetwork/regression/diffusion.py +24 -10
- sknetwork/regression/tests/test_diffusion.py +8 -0
- sknetwork/test_base.py +35 -0
- sknetwork/test_log.py +15 -0
- sknetwork/topology/__init__.py +7 -8
- sknetwork/topology/cliques.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/{kcliques.cpp → cliques.cpp} +23412 -20276
- sknetwork/topology/cliques.pyx +149 -0
- sknetwork/topology/core.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/{kcore.cpp → core.cpp} +21732 -18867
- sknetwork/topology/core.pyx +90 -0
- sknetwork/topology/cycles.py +243 -0
- sknetwork/topology/minheap.cp310-win_amd64.pyd +0 -0
- sknetwork/{utils → topology}/minheap.cpp +19452 -15368
- sknetwork/{utils → topology}/minheap.pxd +1 -3
- sknetwork/{utils → topology}/minheap.pyx +1 -3
- sknetwork/topology/structure.py +3 -43
- sknetwork/topology/tests/test_cliques.py +11 -11
- sknetwork/topology/tests/test_core.py +19 -0
- sknetwork/topology/tests/test_cycles.py +65 -0
- sknetwork/topology/tests/test_structure.py +2 -16
- sknetwork/topology/tests/test_triangles.py +11 -15
- sknetwork/topology/tests/test_wl.py +72 -0
- sknetwork/topology/triangles.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/triangles.cpp +5056 -2696
- sknetwork/topology/triangles.pyx +74 -89
- sknetwork/topology/weisfeiler_lehman.py +56 -86
- sknetwork/topology/weisfeiler_lehman_core.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/weisfeiler_lehman_core.cpp +14727 -10622
- sknetwork/topology/weisfeiler_lehman_core.pyx +0 -2
- sknetwork/utils/__init__.py +1 -31
- sknetwork/utils/check.py +2 -2
- sknetwork/utils/format.py +5 -3
- sknetwork/utils/membership.py +2 -2
- sknetwork/utils/tests/test_check.py +3 -3
- sknetwork/utils/tests/test_format.py +3 -1
- sknetwork/utils/values.py +1 -1
- sknetwork/visualization/__init__.py +2 -2
- sknetwork/visualization/dendrograms.py +55 -7
- sknetwork/visualization/graphs.py +292 -72
- sknetwork/visualization/tests/test_dendrograms.py +9 -9
- sknetwork/visualization/tests/test_graphs.py +71 -62
- scikit_network-0.30.0.dist-info/RECORD +0 -227
- sknetwork/embedding/louvain_hierarchy.py +0 -142
- sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
- sknetwork/path/metrics.py +0 -148
- sknetwork/path/tests/test_metrics.py +0 -29
- sknetwork/ranking/harmonic.py +0 -82
- sknetwork/topology/dag.py +0 -74
- sknetwork/topology/dag_core.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/dag_core.cpp +0 -23350
- sknetwork/topology/dag_core.pyx +0 -38
- sknetwork/topology/kcliques.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/kcliques.pyx +0 -193
- sknetwork/topology/kcore.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/kcore.pyx +0 -120
- sknetwork/topology/tests/test_cores.py +0 -21
- sknetwork/topology/tests/test_dag.py +0 -26
- sknetwork/topology/tests/test_wl_coloring.py +0 -49
- sknetwork/topology/tests/test_wl_kernel.py +0 -31
- sknetwork/utils/base.py +0 -35
- sknetwork/utils/minheap.cp310-win_amd64.pyd +0 -0
- sknetwork/utils/simplex.py +0 -140
- sknetwork/utils/tests/test_base.py +0 -28
- sknetwork/utils/tests/test_bunch.py +0 -16
- sknetwork/utils/tests/test_projection_simplex.py +0 -33
- sknetwork/utils/tests/test_verbose.py +0 -15
- sknetwork/utils/verbose.py +0 -37
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/LICENSE +0 -0
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/top_level.txt +0 -0
- /sknetwork/{utils → data}/timeout.py +0 -0
|
@@ -10,7 +10,8 @@ import numpy as np
|
|
|
10
10
|
from scipy import sparse
|
|
11
11
|
|
|
12
12
|
from sknetwork.classification.base import BaseClassifier
|
|
13
|
-
from sknetwork.
|
|
13
|
+
from sknetwork.path.distances import get_distances
|
|
14
|
+
from sknetwork.linalg.normalizer import normalize
|
|
14
15
|
from sknetwork.utils.format import get_adjacency_values
|
|
15
16
|
from sknetwork.utils.membership import get_membership
|
|
16
17
|
from sknetwork.utils.neighbors import get_degrees
|
|
@@ -27,25 +28,24 @@ class DiffusionClassifier(BaseClassifier):
|
|
|
27
28
|
Number of iterations of the diffusion (discrete time).
|
|
28
29
|
centering : bool
|
|
29
30
|
If ``True``, center the temperature of each label to its mean before classification (default).
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
scale : float
|
|
32
|
+
Multiplicative factor applied to tempreatures before softmax (default = 5).
|
|
33
|
+
Used only when centering is ``True``.
|
|
33
34
|
|
|
34
35
|
Attributes
|
|
35
36
|
----------
|
|
36
37
|
labels_ : np.ndarray, shape (n_labels,)
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
38
|
+
Labels of nodes.
|
|
39
|
+
probs_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
40
|
+
Probability distribution over labels.
|
|
40
41
|
labels_row_ : np.ndarray
|
|
41
42
|
Labels of rows, for bipartite graphs.
|
|
42
43
|
labels_col_ : np.ndarray
|
|
43
44
|
Labels of columns, for bipartite graphs.
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
45
|
+
probs_row_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
46
|
+
Probability distributions over labels of rows, for bipartite graphs.
|
|
47
|
+
probs_col_ : sparse.csr_matrix, shape (n_col, n_labels)
|
|
48
|
+
Probability distributions over labels of columns, for bipartite graphs.
|
|
49
49
|
Example
|
|
50
50
|
-------
|
|
51
51
|
>>> from sknetwork.data import karate_club
|
|
@@ -63,7 +63,7 @@ class DiffusionClassifier(BaseClassifier):
|
|
|
63
63
|
Zhu, X., Lafferty, J., & Rosenfeld, R. (2005). `Semi-supervised learning with graphs`
|
|
64
64
|
(Doctoral dissertation, Carnegie Mellon University, language technologies institute, school of computer science).
|
|
65
65
|
"""
|
|
66
|
-
def __init__(self, n_iter: int = 10, centering: bool = True,
|
|
66
|
+
def __init__(self, n_iter: int = 10, centering: bool = True, scale: float = 5):
|
|
67
67
|
super(DiffusionClassifier, self).__init__()
|
|
68
68
|
|
|
69
69
|
if n_iter <= 0:
|
|
@@ -71,7 +71,7 @@ class DiffusionClassifier(BaseClassifier):
|
|
|
71
71
|
else:
|
|
72
72
|
self.n_iter = n_iter
|
|
73
73
|
self.centering = centering
|
|
74
|
-
self.
|
|
74
|
+
self.scale = scale
|
|
75
75
|
|
|
76
76
|
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray],
|
|
77
77
|
labels: Optional[Union[dict, np.ndarray]] = None, labels_row: Optional[Union[dict, np.ndarray]] = None,
|
|
@@ -81,13 +81,15 @@ class DiffusionClassifier(BaseClassifier):
|
|
|
81
81
|
|
|
82
82
|
Parameters
|
|
83
83
|
----------
|
|
84
|
-
input_matrix :
|
|
84
|
+
input_matrix : sparse.csr_matrix, np.ndarray
|
|
85
85
|
Adjacency matrix or biadjacency matrix of the graph.
|
|
86
|
-
labels :
|
|
86
|
+
labels : dict, np.ndarray
|
|
87
87
|
Known labels (dictionary or vector of int). Negative values ignored.
|
|
88
|
-
labels_row,
|
|
89
|
-
Labels of rows
|
|
90
|
-
|
|
88
|
+
labels_row : dict, np.ndarray
|
|
89
|
+
Labels of rows for bipartite graphs. Negative values ignored.
|
|
90
|
+
labels_col : dict, np.ndarray
|
|
91
|
+
Labels of columns for bipartite graphs. Negative values ignored.
|
|
92
|
+
force_bipartite : bool
|
|
91
93
|
If ``True``, consider the input matrix as a biadjacency matrix (default = ``False``).
|
|
92
94
|
|
|
93
95
|
Returns
|
|
@@ -101,33 +103,31 @@ class DiffusionClassifier(BaseClassifier):
|
|
|
101
103
|
labels = values.astype(int)
|
|
102
104
|
if (labels < 0).all():
|
|
103
105
|
raise ValueError('At least one node must be given a non-negative label.')
|
|
104
|
-
|
|
106
|
+
labels_reindex = labels.copy()
|
|
107
|
+
labels_unique, inverse = np.unique(labels[labels >= 0], return_inverse=True)
|
|
108
|
+
labels_reindex[labels >= 0] = inverse
|
|
109
|
+
temperatures = get_membership(labels_reindex).toarray()
|
|
105
110
|
temperatures_seeds = temperatures[labels >= 0]
|
|
106
|
-
|
|
107
|
-
temperatures[labels < 0] = 1 / n_labels
|
|
111
|
+
temperatures[labels < 0] = 0.5
|
|
108
112
|
diffusion = normalize(adjacency)
|
|
109
113
|
for i in range(self.n_iter):
|
|
110
114
|
temperatures = diffusion.dot(temperatures)
|
|
111
115
|
temperatures[labels >= 0] = temperatures_seeds
|
|
112
|
-
|
|
113
|
-
self.membership_ = sparse.csr_matrix(temperatures)
|
|
114
|
-
|
|
115
116
|
if self.centering:
|
|
116
117
|
temperatures -= temperatures.mean(axis=0)
|
|
118
|
+
labels_ = labels_unique[temperatures.argmax(axis=1)]
|
|
117
119
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
120
|
+
# softmax
|
|
121
|
+
if self.centering:
|
|
122
|
+
temperatures = np.exp(self.scale * temperatures)
|
|
121
123
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
top_temperatures = temperatures
|
|
127
|
-
differences = np.abs(top_temperatures[:, 0] - top_temperatures[:, 1])
|
|
128
|
-
labels_[differences <= self.threshold] = -1
|
|
124
|
+
# set label -1 to nodes not reached by diffusion
|
|
125
|
+
distances = get_distances(adjacency, source=np.flatnonzero(labels >= 0))
|
|
126
|
+
labels_[distances < 0] = -1
|
|
127
|
+
temperatures[distances < 0] = 0
|
|
129
128
|
|
|
130
129
|
self.labels_ = labels_
|
|
130
|
+
self.probs_ = sparse.csr_matrix(normalize(temperatures))
|
|
131
131
|
self._split_vars(input_matrix.shape)
|
|
132
132
|
|
|
133
133
|
return self
|
sknetwork/classification/knn.py
CHANGED
|
@@ -12,7 +12,7 @@ from scipy import sparse
|
|
|
12
12
|
|
|
13
13
|
from sknetwork.classification.base import BaseClassifier
|
|
14
14
|
from sknetwork.embedding.base import BaseEmbedding
|
|
15
|
-
from sknetwork.linalg.
|
|
15
|
+
from sknetwork.linalg.normalizer import get_norms, normalize
|
|
16
16
|
from sknetwork.utils.check import check_n_neighbors
|
|
17
17
|
from sknetwork.utils.format import get_adjacency_values
|
|
18
18
|
|
|
@@ -22,28 +22,29 @@ class NNClassifier(BaseClassifier):
|
|
|
22
22
|
|
|
23
23
|
Parameters
|
|
24
24
|
----------
|
|
25
|
-
n_neighbors :
|
|
25
|
+
n_neighbors : int
|
|
26
26
|
Number of nearest neighbors .
|
|
27
|
-
embedding_method :
|
|
27
|
+
embedding_method : :class:`BaseEmbedding`
|
|
28
28
|
Embedding method used to represent nodes in vector space.
|
|
29
29
|
If ``None`` (default), use identity.
|
|
30
|
-
normalize :
|
|
30
|
+
normalize : bool
|
|
31
31
|
If ``True``, apply normalization so that all vectors have norm 1 in the embedding space.
|
|
32
32
|
|
|
33
33
|
Attributes
|
|
34
34
|
----------
|
|
35
35
|
labels_ : np.ndarray, shape (n_labels,)
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
36
|
+
Labels of nodes.
|
|
37
|
+
probs_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
38
|
+
Probability distribution over labels.
|
|
39
39
|
labels_row_ : np.ndarray
|
|
40
40
|
Labels of rows, for bipartite graphs.
|
|
41
41
|
labels_col_ : np.ndarray
|
|
42
42
|
Labels of columns, for bipartite graphs.
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
43
|
+
probs_row_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
44
|
+
Probability distributions over labels of rows, for bipartite graphs.
|
|
45
|
+
probs_col_ : sparse.csr_matrix, shape (n_col, n_labels)
|
|
46
|
+
Probability distributions over labels of columns, for bipartite graphs.
|
|
47
|
+
|
|
47
48
|
Example
|
|
48
49
|
-------
|
|
49
50
|
>>> from sknetwork.classification import NNClassifier
|
|
@@ -91,10 +92,10 @@ class NNClassifier(BaseClassifier):
|
|
|
91
92
|
col += list(labels[index_train])
|
|
92
93
|
data += list(np.ones_like(index_train))
|
|
93
94
|
|
|
94
|
-
|
|
95
|
-
labels = np.argmax(
|
|
95
|
+
probs = normalize(sparse.csr_matrix((data, (row, col)), shape=(len(labels), np.max(labels) + 1)))
|
|
96
|
+
labels = np.argmax(probs.toarray(), axis=1)
|
|
96
97
|
|
|
97
|
-
return
|
|
98
|
+
return probs, labels
|
|
98
99
|
|
|
99
100
|
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], labels: Union[np.ndarray, dict] = None,
|
|
100
101
|
labels_row: Union[np.ndarray, dict] = None, labels_col: Union[np.ndarray, dict] = None) -> 'NNClassifier':
|
|
@@ -102,12 +103,14 @@ class NNClassifier(BaseClassifier):
|
|
|
102
103
|
|
|
103
104
|
Parameters
|
|
104
105
|
----------
|
|
105
|
-
input_matrix :
|
|
106
|
+
input_matrix : sparse.csr_matrix, np.ndarray
|
|
106
107
|
Adjacency matrix or biadjacency matrix of the graph.
|
|
107
|
-
labels :
|
|
108
|
-
Known labels
|
|
109
|
-
labels_row,
|
|
110
|
-
|
|
108
|
+
labels : np.ndarray, dict
|
|
109
|
+
Known labels. Negative values ignored.
|
|
110
|
+
labels_row : np.ndarray, dict
|
|
111
|
+
Known labels of rows, for bipartite graphs.
|
|
112
|
+
labels_col : np.ndarray, dict
|
|
113
|
+
Known labels of columns, for bipartite graphs.
|
|
111
114
|
|
|
112
115
|
Returns
|
|
113
116
|
-------
|
|
@@ -126,10 +129,10 @@ class NNClassifier(BaseClassifier):
|
|
|
126
129
|
if self.normalize:
|
|
127
130
|
embedding = normalize(embedding, p=2)
|
|
128
131
|
|
|
129
|
-
|
|
132
|
+
probs, labels = self._fit_core(embedding, labels, index_seed, index_remain)
|
|
130
133
|
|
|
131
134
|
self.labels_ = labels
|
|
132
|
-
self.
|
|
135
|
+
self.probs_ = probs
|
|
133
136
|
self._split_vars(input_matrix.shape)
|
|
134
137
|
|
|
135
138
|
return self
|
|
@@ -158,7 +158,7 @@ def get_f1_scores(labels_true: np.ndarray, labels_pred: np.ndarray, return_preci
|
|
|
158
158
|
mask = counts_pred > 0
|
|
159
159
|
precisions[mask] = counts_correct[mask] / counts_pred[mask]
|
|
160
160
|
f1_scores = np.zeros(n_labels)
|
|
161
|
-
mask = (
|
|
161
|
+
mask = (precisions > 0) & (recalls > 0)
|
|
162
162
|
f1_scores[mask] = 2 / (1 / precisions[mask] + 1 / recalls[mask])
|
|
163
163
|
if return_precision_recall:
|
|
164
164
|
return f1_scores, precisions, recalls
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
3
|
"""
|
|
4
|
-
Created
|
|
4
|
+
Created in March 2020
|
|
5
5
|
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
6
|
"""
|
|
7
7
|
from typing import Optional
|
|
@@ -17,9 +17,9 @@ class PageRankClassifier(RankClassifier):
|
|
|
17
17
|
|
|
18
18
|
Parameters
|
|
19
19
|
----------
|
|
20
|
-
damping_factor:
|
|
20
|
+
damping_factor: float
|
|
21
21
|
Probability to continue the random walk.
|
|
22
|
-
solver :
|
|
22
|
+
solver : str
|
|
23
23
|
Which solver to use: 'piteration', 'diteration', 'bicgstab', 'lanczos'.
|
|
24
24
|
n_iter : int
|
|
25
25
|
Number of iterations for some solvers such as ``'piteration'`` or ``'diteration'``.
|
|
@@ -29,17 +29,17 @@ class PageRankClassifier(RankClassifier):
|
|
|
29
29
|
Attributes
|
|
30
30
|
----------
|
|
31
31
|
labels_ : np.ndarray, shape (n_labels,)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
32
|
+
Labels of nodes.
|
|
33
|
+
probs_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
34
|
+
Probability distribution over labels.
|
|
35
35
|
labels_row_ : np.ndarray
|
|
36
36
|
Labels of rows, for bipartite graphs.
|
|
37
37
|
labels_col_ : np.ndarray
|
|
38
38
|
Labels of columns, for bipartite graphs.
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
39
|
+
probs_row_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
40
|
+
Probability distributions over labels of rows, for bipartite graphs.
|
|
41
|
+
probs_col_ : sparse.csr_matrix, shape (n_col, n_labels)
|
|
42
|
+
Probability distributions over labels of columns, for bipartite graphs.
|
|
43
43
|
|
|
44
44
|
Example
|
|
45
45
|
-------
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
# coding: utf-8
|
|
3
3
|
"""
|
|
4
|
-
Created
|
|
4
|
+
Created in April 2020
|
|
5
5
|
@author: Thomas Bonald <tbonald@enst.fr>
|
|
6
6
|
"""
|
|
7
7
|
|
|
@@ -12,7 +12,7 @@ from scipy import sparse
|
|
|
12
12
|
|
|
13
13
|
from sknetwork.classification.base import BaseClassifier
|
|
14
14
|
from sknetwork.classification.vote import vote_update
|
|
15
|
-
from sknetwork.linalg.
|
|
15
|
+
from sknetwork.linalg.normalizer import normalize
|
|
16
16
|
from sknetwork.utils.format import get_adjacency_values
|
|
17
17
|
from sknetwork.utils.membership import get_membership
|
|
18
18
|
|
|
@@ -25,9 +25,9 @@ class Propagation(BaseClassifier):
|
|
|
25
25
|
n_iter : float
|
|
26
26
|
Maximum number of iterations (-1 for infinity).
|
|
27
27
|
node_order : str
|
|
28
|
-
*
|
|
29
|
-
*
|
|
30
|
-
*
|
|
28
|
+
* ``'random'``: node labels are updated in random order.
|
|
29
|
+
* ``'increasing'``: node labels are updated by increasing order of (in-) weight.
|
|
30
|
+
* ``'decreasing'``: node labels are updated by decreasing order of (in-) weight.
|
|
31
31
|
* Otherwise, node labels are updated by index order.
|
|
32
32
|
weighted : bool
|
|
33
33
|
If ``True``, the vote of each neighbor is proportional to the edge weight.
|
|
@@ -36,17 +36,17 @@ class Propagation(BaseClassifier):
|
|
|
36
36
|
Attributes
|
|
37
37
|
----------
|
|
38
38
|
labels_ : np.ndarray, shape (n_labels,)
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
39
|
+
Labels of nodes.
|
|
40
|
+
probs_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
41
|
+
Probability distribution over labels.
|
|
42
42
|
labels_row_ : np.ndarray
|
|
43
43
|
Labels of rows, for bipartite graphs.
|
|
44
44
|
labels_col_ : np.ndarray
|
|
45
45
|
Labels of columns, for bipartite graphs.
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
46
|
+
probs_row_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
47
|
+
Probability distributions over labels of rows, for bipartite graphs.
|
|
48
|
+
probs_col_ : sparse.csr_matrix, shape (n_col, n_labels)
|
|
49
|
+
Probability distributions over labels of columns, for bipartite graphs.
|
|
50
50
|
|
|
51
51
|
Example
|
|
52
52
|
-------
|
|
@@ -97,12 +97,15 @@ class Propagation(BaseClassifier):
|
|
|
97
97
|
|
|
98
98
|
Parameters
|
|
99
99
|
----------
|
|
100
|
-
input_matrix :
|
|
100
|
+
input_matrix : sparse.csr_matrix, np.ndarray
|
|
101
101
|
Adjacency matrix or biadjacency matrix of the graph.
|
|
102
|
-
labels :
|
|
103
|
-
Known labels
|
|
104
|
-
labels_row,
|
|
105
|
-
|
|
102
|
+
labels : np.ndarray, dict
|
|
103
|
+
Known labels. Negative values ignored.
|
|
104
|
+
labels_row : np.ndarray, dict
|
|
105
|
+
Known labels of rows, for bipartite graphs.
|
|
106
|
+
labels_col : np.ndarray, dict
|
|
107
|
+
Known labels of columns, for bipartite graphs.
|
|
108
|
+
|
|
106
109
|
Returns
|
|
107
110
|
-------
|
|
108
111
|
self: :class:`Propagation`
|
|
@@ -138,11 +141,11 @@ class Propagation(BaseClassifier):
|
|
|
138
141
|
labels_remain = labels[index_remain].copy()
|
|
139
142
|
labels = np.asarray(vote_update(indptr, indices, data, labels, index_remain))
|
|
140
143
|
|
|
141
|
-
|
|
142
|
-
|
|
144
|
+
probs = get_membership(labels)
|
|
145
|
+
probs = normalize(adjacency.dot(probs))
|
|
143
146
|
|
|
144
147
|
self.labels_ = labels
|
|
145
|
-
self.
|
|
148
|
+
self.probs_ = probs
|
|
146
149
|
self._split_vars(input_matrix.shape)
|
|
147
150
|
|
|
148
151
|
return self
|
|
@@ -23,9 +23,9 @@ class TestDiffusionClassifier(unittest.TestCase):
|
|
|
23
23
|
self.assertTrue(len(algo.labels_) == n_nodes)
|
|
24
24
|
with self.assertRaises(ValueError):
|
|
25
25
|
DiffusionClassifier(n_iter=0)
|
|
26
|
-
algo = DiffusionClassifier(centering=
|
|
27
|
-
algo.
|
|
28
|
-
self.assertTrue(max(
|
|
26
|
+
algo = DiffusionClassifier(centering=True, scale=10)
|
|
27
|
+
probs = algo.fit_predict_proba(adjacency, labels=labels)[:, 1]
|
|
28
|
+
self.assertTrue(max(probs) > 0.99)
|
|
29
29
|
|
|
30
30
|
def test_bipartite(self):
|
|
31
31
|
biadjacency = test_bigraph()
|
|
@@ -36,6 +36,7 @@ class TestDiffusionClassifier(unittest.TestCase):
|
|
|
36
36
|
algo.fit(biadjacency, labels_row=labels_row, labels_col=labels_col)
|
|
37
37
|
self.assertTrue(len(algo.labels_row_) == n_row)
|
|
38
38
|
self.assertTrue(len(algo.labels_col_) == n_col)
|
|
39
|
+
self.assertTrue(all(algo.labels_col_ == algo.predict(columns=True)))
|
|
39
40
|
|
|
40
41
|
def test_predict(self):
|
|
41
42
|
adjacency = test_graph()
|
|
@@ -65,3 +66,12 @@ class TestDiffusionClassifier(unittest.TestCase):
|
|
|
65
66
|
self.assertTrue(membership.shape == (n_row, 2))
|
|
66
67
|
membership = algo.transform(columns=True)
|
|
67
68
|
self.assertTrue(membership.shape == (n_col, 2))
|
|
69
|
+
|
|
70
|
+
def test_reindex_label(self):
|
|
71
|
+
adjacency = test_graph()
|
|
72
|
+
n_nodes = adjacency.shape[0]
|
|
73
|
+
labels = {0: 0, 1: 2, 2: 3}
|
|
74
|
+
algo = DiffusionClassifier()
|
|
75
|
+
labels_pred = algo.fit_predict(adjacency, labels=labels)
|
|
76
|
+
self.assertTrue(len(labels_pred) == n_nodes)
|
|
77
|
+
self.assertTrue(set(list(labels_pred)) == {0, 2, 3})
|
|
Binary file
|