scikit-network 0.33.0__cp312-cp312-macosx_10_9_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- scikit_network-0.33.0.dist-info/AUTHORS.rst +43 -0
- scikit_network-0.33.0.dist-info/LICENSE +34 -0
- scikit_network-0.33.0.dist-info/METADATA +517 -0
- scikit_network-0.33.0.dist-info/RECORD +216 -0
- scikit_network-0.33.0.dist-info/WHEEL +5 -0
- scikit_network-0.33.0.dist-info/top_level.txt +1 -0
- sknetwork/__init__.py +21 -0
- sknetwork/base.py +67 -0
- sknetwork/classification/__init__.py +8 -0
- sknetwork/classification/base.py +142 -0
- sknetwork/classification/base_rank.py +133 -0
- sknetwork/classification/diffusion.py +134 -0
- sknetwork/classification/knn.py +139 -0
- sknetwork/classification/metrics.py +205 -0
- sknetwork/classification/pagerank.py +66 -0
- sknetwork/classification/propagation.py +152 -0
- sknetwork/classification/tests/__init__.py +1 -0
- sknetwork/classification/tests/test_API.py +30 -0
- sknetwork/classification/tests/test_diffusion.py +77 -0
- sknetwork/classification/tests/test_knn.py +23 -0
- sknetwork/classification/tests/test_metrics.py +53 -0
- sknetwork/classification/tests/test_pagerank.py +20 -0
- sknetwork/classification/tests/test_propagation.py +24 -0
- sknetwork/classification/vote.cpython-312-darwin.so +0 -0
- sknetwork/classification/vote.pyx +56 -0
- sknetwork/clustering/__init__.py +8 -0
- sknetwork/clustering/base.py +172 -0
- sknetwork/clustering/kcenters.py +253 -0
- sknetwork/clustering/leiden.py +242 -0
- sknetwork/clustering/leiden_core.cpython-312-darwin.so +0 -0
- sknetwork/clustering/leiden_core.pyx +124 -0
- sknetwork/clustering/louvain.py +286 -0
- sknetwork/clustering/louvain_core.cpython-312-darwin.so +0 -0
- sknetwork/clustering/louvain_core.pyx +124 -0
- sknetwork/clustering/metrics.py +91 -0
- sknetwork/clustering/postprocess.py +66 -0
- sknetwork/clustering/propagation_clustering.py +104 -0
- sknetwork/clustering/tests/__init__.py +1 -0
- sknetwork/clustering/tests/test_API.py +38 -0
- sknetwork/clustering/tests/test_kcenters.py +60 -0
- sknetwork/clustering/tests/test_leiden.py +34 -0
- sknetwork/clustering/tests/test_louvain.py +129 -0
- sknetwork/clustering/tests/test_metrics.py +50 -0
- sknetwork/clustering/tests/test_postprocess.py +39 -0
- sknetwork/data/__init__.py +6 -0
- sknetwork/data/base.py +33 -0
- sknetwork/data/load.py +406 -0
- sknetwork/data/models.py +459 -0
- sknetwork/data/parse.py +644 -0
- sknetwork/data/test_graphs.py +84 -0
- sknetwork/data/tests/__init__.py +1 -0
- sknetwork/data/tests/test_API.py +30 -0
- sknetwork/data/tests/test_base.py +14 -0
- sknetwork/data/tests/test_load.py +95 -0
- sknetwork/data/tests/test_models.py +52 -0
- sknetwork/data/tests/test_parse.py +250 -0
- sknetwork/data/tests/test_test_graphs.py +29 -0
- sknetwork/data/tests/test_toy_graphs.py +68 -0
- sknetwork/data/timeout.py +38 -0
- sknetwork/data/toy_graphs.py +611 -0
- sknetwork/embedding/__init__.py +8 -0
- sknetwork/embedding/base.py +94 -0
- sknetwork/embedding/force_atlas.py +198 -0
- sknetwork/embedding/louvain_embedding.py +148 -0
- sknetwork/embedding/random_projection.py +135 -0
- sknetwork/embedding/spectral.py +141 -0
- sknetwork/embedding/spring.py +198 -0
- sknetwork/embedding/svd.py +359 -0
- sknetwork/embedding/tests/__init__.py +1 -0
- sknetwork/embedding/tests/test_API.py +49 -0
- sknetwork/embedding/tests/test_force_atlas.py +35 -0
- sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
- sknetwork/embedding/tests/test_random_projection.py +28 -0
- sknetwork/embedding/tests/test_spectral.py +81 -0
- sknetwork/embedding/tests/test_spring.py +50 -0
- sknetwork/embedding/tests/test_svd.py +43 -0
- sknetwork/gnn/__init__.py +10 -0
- sknetwork/gnn/activation.py +117 -0
- sknetwork/gnn/base.py +181 -0
- sknetwork/gnn/base_activation.py +89 -0
- sknetwork/gnn/base_layer.py +109 -0
- sknetwork/gnn/gnn_classifier.py +305 -0
- sknetwork/gnn/layer.py +153 -0
- sknetwork/gnn/loss.py +180 -0
- sknetwork/gnn/neighbor_sampler.py +65 -0
- sknetwork/gnn/optimizer.py +164 -0
- sknetwork/gnn/tests/__init__.py +1 -0
- sknetwork/gnn/tests/test_activation.py +56 -0
- sknetwork/gnn/tests/test_base.py +75 -0
- sknetwork/gnn/tests/test_base_layer.py +37 -0
- sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
- sknetwork/gnn/tests/test_layers.py +80 -0
- sknetwork/gnn/tests/test_loss.py +33 -0
- sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
- sknetwork/gnn/tests/test_optimizer.py +43 -0
- sknetwork/gnn/tests/test_utils.py +41 -0
- sknetwork/gnn/utils.py +127 -0
- sknetwork/hierarchy/__init__.py +6 -0
- sknetwork/hierarchy/base.py +96 -0
- sknetwork/hierarchy/louvain_hierarchy.py +272 -0
- sknetwork/hierarchy/metrics.py +234 -0
- sknetwork/hierarchy/paris.cpython-312-darwin.so +0 -0
- sknetwork/hierarchy/paris.pyx +316 -0
- sknetwork/hierarchy/postprocess.py +350 -0
- sknetwork/hierarchy/tests/__init__.py +1 -0
- sknetwork/hierarchy/tests/test_API.py +24 -0
- sknetwork/hierarchy/tests/test_algos.py +34 -0
- sknetwork/hierarchy/tests/test_metrics.py +62 -0
- sknetwork/hierarchy/tests/test_postprocess.py +57 -0
- sknetwork/linalg/__init__.py +9 -0
- sknetwork/linalg/basics.py +37 -0
- sknetwork/linalg/diteration.cpython-312-darwin.so +0 -0
- sknetwork/linalg/diteration.pyx +47 -0
- sknetwork/linalg/eig_solver.py +93 -0
- sknetwork/linalg/laplacian.py +15 -0
- sknetwork/linalg/normalizer.py +86 -0
- sknetwork/linalg/operators.py +225 -0
- sknetwork/linalg/polynome.py +76 -0
- sknetwork/linalg/ppr_solver.py +170 -0
- sknetwork/linalg/push.cpython-312-darwin.so +0 -0
- sknetwork/linalg/push.pyx +71 -0
- sknetwork/linalg/sparse_lowrank.py +142 -0
- sknetwork/linalg/svd_solver.py +91 -0
- sknetwork/linalg/tests/__init__.py +1 -0
- sknetwork/linalg/tests/test_eig.py +44 -0
- sknetwork/linalg/tests/test_laplacian.py +18 -0
- sknetwork/linalg/tests/test_normalization.py +34 -0
- sknetwork/linalg/tests/test_operators.py +66 -0
- sknetwork/linalg/tests/test_polynome.py +38 -0
- sknetwork/linalg/tests/test_ppr.py +50 -0
- sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
- sknetwork/linalg/tests/test_svd.py +38 -0
- sknetwork/linkpred/__init__.py +2 -0
- sknetwork/linkpred/base.py +46 -0
- sknetwork/linkpred/nn.py +126 -0
- sknetwork/linkpred/tests/__init__.py +1 -0
- sknetwork/linkpred/tests/test_nn.py +27 -0
- sknetwork/log.py +19 -0
- sknetwork/path/__init__.py +5 -0
- sknetwork/path/dag.py +54 -0
- sknetwork/path/distances.py +98 -0
- sknetwork/path/search.py +31 -0
- sknetwork/path/shortest_path.py +61 -0
- sknetwork/path/tests/__init__.py +1 -0
- sknetwork/path/tests/test_dag.py +37 -0
- sknetwork/path/tests/test_distances.py +62 -0
- sknetwork/path/tests/test_search.py +40 -0
- sknetwork/path/tests/test_shortest_path.py +40 -0
- sknetwork/ranking/__init__.py +8 -0
- sknetwork/ranking/base.py +61 -0
- sknetwork/ranking/betweenness.cpython-312-darwin.so +0 -0
- sknetwork/ranking/betweenness.pyx +97 -0
- sknetwork/ranking/closeness.py +92 -0
- sknetwork/ranking/hits.py +94 -0
- sknetwork/ranking/katz.py +83 -0
- sknetwork/ranking/pagerank.py +110 -0
- sknetwork/ranking/postprocess.py +37 -0
- sknetwork/ranking/tests/__init__.py +1 -0
- sknetwork/ranking/tests/test_API.py +32 -0
- sknetwork/ranking/tests/test_betweenness.py +38 -0
- sknetwork/ranking/tests/test_closeness.py +30 -0
- sknetwork/ranking/tests/test_hits.py +20 -0
- sknetwork/ranking/tests/test_pagerank.py +62 -0
- sknetwork/ranking/tests/test_postprocess.py +26 -0
- sknetwork/regression/__init__.py +4 -0
- sknetwork/regression/base.py +61 -0
- sknetwork/regression/diffusion.py +210 -0
- sknetwork/regression/tests/__init__.py +1 -0
- sknetwork/regression/tests/test_API.py +32 -0
- sknetwork/regression/tests/test_diffusion.py +56 -0
- sknetwork/sknetwork.py +3 -0
- sknetwork/test_base.py +35 -0
- sknetwork/test_log.py +15 -0
- sknetwork/topology/__init__.py +8 -0
- sknetwork/topology/cliques.cpython-312-darwin.so +0 -0
- sknetwork/topology/cliques.pyx +149 -0
- sknetwork/topology/core.cpython-312-darwin.so +0 -0
- sknetwork/topology/core.pyx +90 -0
- sknetwork/topology/cycles.py +243 -0
- sknetwork/topology/minheap.cpython-312-darwin.so +0 -0
- sknetwork/topology/minheap.pxd +20 -0
- sknetwork/topology/minheap.pyx +109 -0
- sknetwork/topology/structure.py +194 -0
- sknetwork/topology/tests/__init__.py +1 -0
- sknetwork/topology/tests/test_cliques.py +28 -0
- sknetwork/topology/tests/test_core.py +19 -0
- sknetwork/topology/tests/test_cycles.py +65 -0
- sknetwork/topology/tests/test_structure.py +85 -0
- sknetwork/topology/tests/test_triangles.py +38 -0
- sknetwork/topology/tests/test_wl.py +72 -0
- sknetwork/topology/triangles.cpython-312-darwin.so +0 -0
- sknetwork/topology/triangles.pyx +151 -0
- sknetwork/topology/weisfeiler_lehman.py +133 -0
- sknetwork/topology/weisfeiler_lehman_core.cpython-312-darwin.so +0 -0
- sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
- sknetwork/utils/__init__.py +7 -0
- sknetwork/utils/check.py +355 -0
- sknetwork/utils/format.py +221 -0
- sknetwork/utils/membership.py +82 -0
- sknetwork/utils/neighbors.py +115 -0
- sknetwork/utils/tests/__init__.py +1 -0
- sknetwork/utils/tests/test_check.py +190 -0
- sknetwork/utils/tests/test_format.py +63 -0
- sknetwork/utils/tests/test_membership.py +24 -0
- sknetwork/utils/tests/test_neighbors.py +41 -0
- sknetwork/utils/tests/test_tfidf.py +18 -0
- sknetwork/utils/tests/test_values.py +66 -0
- sknetwork/utils/tfidf.py +37 -0
- sknetwork/utils/values.py +76 -0
- sknetwork/visualization/__init__.py +4 -0
- sknetwork/visualization/colors.py +34 -0
- sknetwork/visualization/dendrograms.py +277 -0
- sknetwork/visualization/graphs.py +1039 -0
- sknetwork/visualization/tests/__init__.py +1 -0
- sknetwork/visualization/tests/test_dendrograms.py +53 -0
- sknetwork/visualization/tests/test_graphs.py +176 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""tests for classification"""
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""Tests for classification API"""
|
|
4
|
+
|
|
5
|
+
import unittest
|
|
6
|
+
|
|
7
|
+
from sknetwork.classification import *
|
|
8
|
+
from sknetwork.data.test_graphs import *
|
|
9
|
+
from sknetwork.embedding import LouvainEmbedding
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TestClassificationAPI(unittest.TestCase):
|
|
13
|
+
|
|
14
|
+
def test_undirected(self):
|
|
15
|
+
for adjacency in [test_graph(), test_digraph()]:
|
|
16
|
+
n = adjacency.shape[0]
|
|
17
|
+
seeds_array = -np.ones(n)
|
|
18
|
+
seeds_array[:2] = np.arange(2)
|
|
19
|
+
seeds_dict = {0: 0, 1: 1}
|
|
20
|
+
|
|
21
|
+
classifiers = [PageRankClassifier(), DiffusionClassifier(),
|
|
22
|
+
NNClassifier(embedding_method=LouvainEmbedding(), n_neighbors=1), Propagation()]
|
|
23
|
+
|
|
24
|
+
for algo in classifiers:
|
|
25
|
+
labels1 = algo.fit_predict(adjacency, seeds_array)
|
|
26
|
+
labels2 = algo.fit_predict(adjacency, seeds_dict)
|
|
27
|
+
self.assertTrue((labels1 == labels2).all())
|
|
28
|
+
self.assertEqual(labels2.shape, (n,))
|
|
29
|
+
membership = algo.fit_transform(adjacency, seeds_array)
|
|
30
|
+
self.assertTupleEqual(membership.shape, (n, 2))
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""Tests for DiffusionClassifier"""
|
|
4
|
+
|
|
5
|
+
import unittest
|
|
6
|
+
|
|
7
|
+
from sknetwork.classification import DiffusionClassifier
|
|
8
|
+
from sknetwork.data.test_graphs import *
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestDiffusionClassifier(unittest.TestCase):
|
|
12
|
+
|
|
13
|
+
def test_graph(self):
|
|
14
|
+
adjacency = test_graph()
|
|
15
|
+
n_nodes = adjacency.shape[0]
|
|
16
|
+
labels = {0: 0, 1: 1}
|
|
17
|
+
algo = DiffusionClassifier()
|
|
18
|
+
algo.fit(adjacency, labels=labels)
|
|
19
|
+
self.assertTrue(len(algo.labels_) == n_nodes)
|
|
20
|
+
adjacency = test_digraph()
|
|
21
|
+
algo = DiffusionClassifier(centering=False)
|
|
22
|
+
algo.fit(adjacency, labels=labels)
|
|
23
|
+
self.assertTrue(len(algo.labels_) == n_nodes)
|
|
24
|
+
with self.assertRaises(ValueError):
|
|
25
|
+
DiffusionClassifier(n_iter=0)
|
|
26
|
+
algo = DiffusionClassifier(centering=True, scale=10)
|
|
27
|
+
probs = algo.fit_predict_proba(adjacency, labels=labels)[:, 1]
|
|
28
|
+
self.assertTrue(max(probs) > 0.99)
|
|
29
|
+
|
|
30
|
+
def test_bipartite(self):
|
|
31
|
+
biadjacency = test_bigraph()
|
|
32
|
+
n_row, n_col = biadjacency.shape
|
|
33
|
+
labels_row = {0: 0, 1: 1}
|
|
34
|
+
labels_col = {5: 1}
|
|
35
|
+
algo = DiffusionClassifier()
|
|
36
|
+
algo.fit(biadjacency, labels_row=labels_row, labels_col=labels_col)
|
|
37
|
+
self.assertTrue(len(algo.labels_row_) == n_row)
|
|
38
|
+
self.assertTrue(len(algo.labels_col_) == n_col)
|
|
39
|
+
self.assertTrue(all(algo.labels_col_ == algo.predict(columns=True)))
|
|
40
|
+
|
|
41
|
+
def test_predict(self):
|
|
42
|
+
adjacency = test_graph()
|
|
43
|
+
n_nodes = adjacency.shape[0]
|
|
44
|
+
labels = {0: 0, 1: 1}
|
|
45
|
+
algo = DiffusionClassifier()
|
|
46
|
+
labels_pred = algo.fit_predict(adjacency, labels=labels)
|
|
47
|
+
self.assertTrue(len(labels_pred) == n_nodes)
|
|
48
|
+
probs_pred = algo.fit_predict_proba(adjacency, labels=labels)
|
|
49
|
+
self.assertTrue(probs_pred.shape == (n_nodes, 2))
|
|
50
|
+
membership = algo.fit_transform(adjacency, labels=labels)
|
|
51
|
+
self.assertTrue(membership.shape == (n_nodes, 2))
|
|
52
|
+
|
|
53
|
+
biadjacency = test_bigraph()
|
|
54
|
+
n_row, n_col = biadjacency.shape
|
|
55
|
+
labels_row = {0: 0, 1: 1}
|
|
56
|
+
algo = DiffusionClassifier()
|
|
57
|
+
labels_pred = algo.fit_predict(biadjacency, labels_row=labels_row)
|
|
58
|
+
self.assertTrue(len(labels_pred) == n_row)
|
|
59
|
+
labels_pred = algo.predict(columns=True)
|
|
60
|
+
self.assertTrue(len(labels_pred) == n_col)
|
|
61
|
+
probs_pred = algo.fit_predict_proba(biadjacency, labels_row=labels_row)
|
|
62
|
+
self.assertTrue(probs_pred.shape == (n_row, 2))
|
|
63
|
+
probs_pred = algo.predict_proba(columns=True)
|
|
64
|
+
self.assertTrue(probs_pred.shape == (n_col, 2))
|
|
65
|
+
membership = algo.fit_transform(biadjacency, labels_row=labels_row)
|
|
66
|
+
self.assertTrue(membership.shape == (n_row, 2))
|
|
67
|
+
membership = algo.transform(columns=True)
|
|
68
|
+
self.assertTrue(membership.shape == (n_col, 2))
|
|
69
|
+
|
|
70
|
+
def test_reindex_label(self):
|
|
71
|
+
adjacency = test_graph()
|
|
72
|
+
n_nodes = adjacency.shape[0]
|
|
73
|
+
labels = {0: 0, 1: 2, 2: 3}
|
|
74
|
+
algo = DiffusionClassifier()
|
|
75
|
+
labels_pred = algo.fit_predict(adjacency, labels=labels)
|
|
76
|
+
self.assertTrue(len(labels_pred) == n_nodes)
|
|
77
|
+
self.assertTrue(set(list(labels_pred)) == {0, 2, 3})
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""Tests for KNN"""
|
|
4
|
+
import unittest
|
|
5
|
+
|
|
6
|
+
from sknetwork.classification import NNClassifier
|
|
7
|
+
from sknetwork.data.test_graphs import *
|
|
8
|
+
from sknetwork.embedding import Spectral
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestKNNClassifier(unittest.TestCase):
|
|
12
|
+
|
|
13
|
+
def test_classification(self):
|
|
14
|
+
for adjacency in [test_graph(), test_digraph(), test_bigraph()]:
|
|
15
|
+
labels = {0: 0, 1: 1}
|
|
16
|
+
|
|
17
|
+
algo = NNClassifier(n_neighbors=1)
|
|
18
|
+
labels_pred = algo.fit_predict(adjacency, labels)
|
|
19
|
+
self.assertTrue(len(set(labels_pred)) == 2)
|
|
20
|
+
|
|
21
|
+
algo = NNClassifier(n_neighbors=1, embedding_method=Spectral(2), normalize=False)
|
|
22
|
+
labels_pred = algo.fit_predict(adjacency, labels)
|
|
23
|
+
self.assertTrue(len(set(labels_pred)) == 2)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""Tests for classification metrics"""
|
|
4
|
+
|
|
5
|
+
import unittest
|
|
6
|
+
|
|
7
|
+
from sknetwork.classification.metrics import *
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TestMetrics(unittest.TestCase):
|
|
11
|
+
|
|
12
|
+
def setUp(self) -> None:
|
|
13
|
+
self.labels_true = np.array([0, 1, 1, 2, 2, -1])
|
|
14
|
+
self.labels_pred1 = np.array([0, -1, 1, 2, 0, 0])
|
|
15
|
+
self.labels_pred2 = np.array([-1, -1, -1, -1, -1, 0])
|
|
16
|
+
|
|
17
|
+
def test_accuracy(self):
|
|
18
|
+
self.assertEqual(get_accuracy_score(self.labels_true, self.labels_pred1), 0.75)
|
|
19
|
+
with self.assertRaises(ValueError):
|
|
20
|
+
get_accuracy_score(self.labels_true, self.labels_pred2)
|
|
21
|
+
|
|
22
|
+
def test_confusion(self):
|
|
23
|
+
confusion = get_confusion_matrix(self.labels_true, self.labels_pred1)
|
|
24
|
+
self.assertEqual(confusion.data.sum(), 4)
|
|
25
|
+
self.assertEqual(confusion.diagonal().sum(), 3)
|
|
26
|
+
with self.assertRaises(ValueError):
|
|
27
|
+
get_accuracy_score(self.labels_true, self.labels_pred2)
|
|
28
|
+
|
|
29
|
+
def test_f1_score(self):
|
|
30
|
+
f1_score = get_f1_score(np.array([0, 0, 1]), np.array([0, 1, 1]))
|
|
31
|
+
self.assertAlmostEqual(f1_score, 0.67, 2)
|
|
32
|
+
with self.assertRaises(ValueError):
|
|
33
|
+
get_f1_score(self.labels_true, self.labels_pred1)
|
|
34
|
+
|
|
35
|
+
def test_f1_scores(self):
|
|
36
|
+
f1_scores = get_f1_scores(self.labels_true, self.labels_pred1)
|
|
37
|
+
self.assertAlmostEqual(min(f1_scores), 0.67, 2)
|
|
38
|
+
f1_scores, precisions, recalls = get_f1_scores(self.labels_true, self.labels_pred1, True)
|
|
39
|
+
self.assertAlmostEqual(min(f1_scores), 0.67, 2)
|
|
40
|
+
self.assertAlmostEqual(min(precisions), 0.5, 2)
|
|
41
|
+
self.assertAlmostEqual(min(recalls), 0.5, 2)
|
|
42
|
+
with self.assertRaises(ValueError):
|
|
43
|
+
get_f1_scores(self.labels_true, self.labels_pred2)
|
|
44
|
+
|
|
45
|
+
def test_average_f1_score(self):
|
|
46
|
+
f1_score = get_average_f1_score(self.labels_true, self.labels_pred1)
|
|
47
|
+
self.assertAlmostEqual(f1_score, 0.78, 2)
|
|
48
|
+
f1_score = get_average_f1_score(self.labels_true, self.labels_pred1, average='micro')
|
|
49
|
+
self.assertEqual(f1_score, 0.75)
|
|
50
|
+
f1_score = get_average_f1_score(self.labels_true, self.labels_pred1, average='weighted')
|
|
51
|
+
self.assertEqual(f1_score, 0.80)
|
|
52
|
+
with self.assertRaises(ValueError):
|
|
53
|
+
get_average_f1_score(self.labels_true, self.labels_pred2, 'toto')
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""Tests for PageRankClassifier"""
|
|
4
|
+
|
|
5
|
+
import unittest
|
|
6
|
+
|
|
7
|
+
from sknetwork.classification import PageRankClassifier
|
|
8
|
+
from sknetwork.data.test_graphs import *
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestPageRankClassifier(unittest.TestCase):
|
|
12
|
+
|
|
13
|
+
def test_solvers(self):
|
|
14
|
+
adjacency = test_graph()
|
|
15
|
+
labels = {0: 0, 1: 1}
|
|
16
|
+
|
|
17
|
+
ref = PageRankClassifier(solver='piteration').fit_predict(adjacency, labels)
|
|
18
|
+
for solver in ['lanczos', 'bicgstab']:
|
|
19
|
+
labels_pred = PageRankClassifier(solver=solver).fit_predict(adjacency, labels)
|
|
20
|
+
self.assertTrue((ref == labels_pred).all())
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""Tests for label propagation"""
|
|
4
|
+
|
|
5
|
+
import unittest
|
|
6
|
+
|
|
7
|
+
from sknetwork.classification import Propagation
|
|
8
|
+
from sknetwork.data.test_graphs import *
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestLabelPropagation(unittest.TestCase):
|
|
12
|
+
|
|
13
|
+
def test_algo(self):
|
|
14
|
+
for adjacency in [test_graph(), test_digraph(), test_bigraph()]:
|
|
15
|
+
n = adjacency.shape[0]
|
|
16
|
+
labels = {0: 0, 1: 1}
|
|
17
|
+
propagation = Propagation(n_iter=3, weighted=False)
|
|
18
|
+
labels_pred = propagation.fit_predict(adjacency, labels)
|
|
19
|
+
self.assertEqual(labels_pred.shape, (n,))
|
|
20
|
+
|
|
21
|
+
for order in ['random', 'decreasing', 'increasing']:
|
|
22
|
+
propagation = Propagation(node_order=order)
|
|
23
|
+
labels_pred = propagation.fit_predict(adjacency, labels)
|
|
24
|
+
self.assertEqual(labels_pred.shape, (n,))
|
|
Binary file
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# distutils: language = c++
|
|
2
|
+
# cython: language_level=3
|
|
3
|
+
"""
|
|
4
|
+
Created in April 2020
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
"""
|
|
7
|
+
from libcpp.set cimport set
|
|
8
|
+
from libcpp.vector cimport vector
|
|
9
|
+
|
|
10
|
+
cimport cython
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@cython.boundscheck(False)
|
|
14
|
+
@cython.wraparound(False)
|
|
15
|
+
def vote_update(int[:] indptr, int[:] indices, float[:] data, int[:] labels, int[:] index):
|
|
16
|
+
"""One pass of label updates over the graph by majority vote among neighbors."""
|
|
17
|
+
cdef int i
|
|
18
|
+
cdef int ii
|
|
19
|
+
cdef int j
|
|
20
|
+
cdef int jj
|
|
21
|
+
cdef int n_indices = index.shape[0]
|
|
22
|
+
cdef int label
|
|
23
|
+
cdef int label_neigh_size
|
|
24
|
+
cdef float best_score
|
|
25
|
+
|
|
26
|
+
cdef vector[int] labels_neigh
|
|
27
|
+
cdef vector[float] votes_neigh, votes
|
|
28
|
+
cdef set[int] labels_unique = ()
|
|
29
|
+
|
|
30
|
+
cdef int n = labels.shape[0]
|
|
31
|
+
for i in range(n):
|
|
32
|
+
votes.push_back(0)
|
|
33
|
+
|
|
34
|
+
for ii in range(n_indices):
|
|
35
|
+
i = index[ii]
|
|
36
|
+
labels_neigh.clear()
|
|
37
|
+
for j in range(indptr[i], indptr[i + 1]):
|
|
38
|
+
jj = indices[j]
|
|
39
|
+
labels_neigh.push_back(labels[jj])
|
|
40
|
+
votes_neigh.push_back(data[jj])
|
|
41
|
+
|
|
42
|
+
labels_unique.clear()
|
|
43
|
+
label_neigh_size = labels_neigh.size()
|
|
44
|
+
for jj in range(label_neigh_size):
|
|
45
|
+
label = labels_neigh[jj]
|
|
46
|
+
if label >= 0:
|
|
47
|
+
labels_unique.insert(label)
|
|
48
|
+
votes[label] += votes_neigh[jj]
|
|
49
|
+
|
|
50
|
+
best_score = -1
|
|
51
|
+
for label in labels_unique:
|
|
52
|
+
if votes[label] > best_score:
|
|
53
|
+
labels[i] = label
|
|
54
|
+
best_score = votes[label]
|
|
55
|
+
votes[label] = 0
|
|
56
|
+
return labels
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""clustering module"""
|
|
2
|
+
from sknetwork.clustering.base import BaseClustering
|
|
3
|
+
from sknetwork.clustering.louvain import Louvain
|
|
4
|
+
from sknetwork.clustering.leiden import Leiden
|
|
5
|
+
from sknetwork.clustering.propagation_clustering import PropagationClustering
|
|
6
|
+
from sknetwork.clustering.metrics import get_modularity
|
|
7
|
+
from sknetwork.clustering.postprocess import reindex_labels, aggregate_graph
|
|
8
|
+
from sknetwork.clustering.kcenters import KCenters
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created on Nov, 2019
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
"""
|
|
7
|
+
from abc import ABC
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
from scipy import sparse
|
|
11
|
+
|
|
12
|
+
from sknetwork.linalg.normalizer import normalize
|
|
13
|
+
from sknetwork.base import Algorithm
|
|
14
|
+
from sknetwork.utils.membership import get_membership
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class BaseClustering(Algorithm, ABC):
|
|
18
|
+
"""Base class for clustering algorithms.
|
|
19
|
+
|
|
20
|
+
Attributes
|
|
21
|
+
----------
|
|
22
|
+
labels_ : np.ndarray, shape (n_labels,)
|
|
23
|
+
Label of each node.
|
|
24
|
+
probs_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
25
|
+
Probability distribution over labels.
|
|
26
|
+
labels_row_, labels_col_ : np.ndarray
|
|
27
|
+
Labels of rows and columns, for bipartite graphs.
|
|
28
|
+
probs_row_, probs_col_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
29
|
+
Probability distributions over labels for rows and columns (for bipartite graphs).
|
|
30
|
+
aggregate_ : sparse.csr_matrix
|
|
31
|
+
Aggregate adjacency matrix or biadjacency matrix between clusters.
|
|
32
|
+
"""
|
|
33
|
+
def __init__(self, sort_clusters: bool = True, return_probs: bool = False, return_aggregate: bool = False):
|
|
34
|
+
self.sort_clusters = sort_clusters
|
|
35
|
+
self.return_probs = return_probs
|
|
36
|
+
self.return_aggregate = return_aggregate
|
|
37
|
+
self._init_vars()
|
|
38
|
+
|
|
39
|
+
def predict(self, columns=False) -> np.ndarray:
|
|
40
|
+
"""Return the labels predicted by the algorithm.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
columns : bool
|
|
45
|
+
If ``True``, return the prediction for columns.
|
|
46
|
+
|
|
47
|
+
Returns
|
|
48
|
+
-------
|
|
49
|
+
labels : np.ndarray
|
|
50
|
+
Labels.
|
|
51
|
+
"""
|
|
52
|
+
if columns:
|
|
53
|
+
return self.labels_col_
|
|
54
|
+
return self.labels_
|
|
55
|
+
|
|
56
|
+
def fit_predict(self, *args, **kwargs) -> np.ndarray:
|
|
57
|
+
"""Fit algorithm to the data and return the labels. Same parameters as the ``fit`` method.
|
|
58
|
+
|
|
59
|
+
Returns
|
|
60
|
+
-------
|
|
61
|
+
labels : np.ndarray
|
|
62
|
+
Labels.
|
|
63
|
+
"""
|
|
64
|
+
self.fit(*args, **kwargs)
|
|
65
|
+
return self.predict()
|
|
66
|
+
|
|
67
|
+
def predict_proba(self, columns=False) -> np.ndarray:
|
|
68
|
+
"""Return the probability distribution over labels as predicted by the algorithm.
|
|
69
|
+
|
|
70
|
+
Parameters
|
|
71
|
+
----------
|
|
72
|
+
columns : bool
|
|
73
|
+
If ``True``, return the prediction for columns.
|
|
74
|
+
|
|
75
|
+
Returns
|
|
76
|
+
-------
|
|
77
|
+
probs : np.ndarray
|
|
78
|
+
Probability distribution over labels.
|
|
79
|
+
"""
|
|
80
|
+
if columns:
|
|
81
|
+
return self.probs_col_.toarray()
|
|
82
|
+
return self.probs_.toarray()
|
|
83
|
+
|
|
84
|
+
def fit_predict_proba(self, *args, **kwargs) -> np.ndarray:
|
|
85
|
+
"""Fit algorithm to the data and return the probability distribution over labels.
|
|
86
|
+
Same parameters as the ``fit`` method.
|
|
87
|
+
|
|
88
|
+
Returns
|
|
89
|
+
-------
|
|
90
|
+
probs : np.ndarray
|
|
91
|
+
Probability of each label.
|
|
92
|
+
"""
|
|
93
|
+
self.fit(*args, **kwargs)
|
|
94
|
+
return self.predict_proba()
|
|
95
|
+
|
|
96
|
+
def transform(self, columns=False) -> sparse.csr_matrix:
|
|
97
|
+
"""Return the probability distribution over labels in sparse format.
|
|
98
|
+
|
|
99
|
+
Parameters
|
|
100
|
+
----------
|
|
101
|
+
columns : bool
|
|
102
|
+
If ``True``, return the prediction for columns.
|
|
103
|
+
|
|
104
|
+
Returns
|
|
105
|
+
-------
|
|
106
|
+
probs : sparse.csr_matrix
|
|
107
|
+
Probability distribution over labels.
|
|
108
|
+
"""
|
|
109
|
+
if columns:
|
|
110
|
+
return self.probs_col_
|
|
111
|
+
return self.probs_
|
|
112
|
+
|
|
113
|
+
def fit_transform(self, *args, **kwargs) -> np.ndarray:
|
|
114
|
+
"""Fit algorithm to the data and return the membership matrix. Same parameters as the ``fit`` method.
|
|
115
|
+
|
|
116
|
+
Returns
|
|
117
|
+
-------
|
|
118
|
+
membership : np.ndarray
|
|
119
|
+
Membership matrix (distribution over clusters).
|
|
120
|
+
"""
|
|
121
|
+
self.fit(*args, **kwargs)
|
|
122
|
+
return self.transform()
|
|
123
|
+
|
|
124
|
+
def _init_vars(self):
|
|
125
|
+
"""Init variables."""
|
|
126
|
+
self.labels_ = None
|
|
127
|
+
self.labels_row_ = None
|
|
128
|
+
self.labels_col_ = None
|
|
129
|
+
self.probs_ = None
|
|
130
|
+
self.probs_row_ = None
|
|
131
|
+
self.probs_col_ = None
|
|
132
|
+
self.aggregate_ = None
|
|
133
|
+
self.bipartite = None
|
|
134
|
+
return self
|
|
135
|
+
|
|
136
|
+
def _split_vars(self, shape):
|
|
137
|
+
"""Split labels_ into labels_row_ and labels_col_"""
|
|
138
|
+
n_row = shape[0]
|
|
139
|
+
self.labels_row_ = self.labels_[:n_row]
|
|
140
|
+
self.labels_col_ = self.labels_[n_row:]
|
|
141
|
+
self.labels_ = self.labels_row_
|
|
142
|
+
return self
|
|
143
|
+
|
|
144
|
+
def _secondary_outputs(self, input_matrix: sparse.csr_matrix):
|
|
145
|
+
"""Compute different variables from labels_."""
|
|
146
|
+
if self.return_probs or self.return_aggregate:
|
|
147
|
+
input_matrix = input_matrix.astype(float)
|
|
148
|
+
if not self.bipartite:
|
|
149
|
+
probs = get_membership(self.labels_)
|
|
150
|
+
if self.return_probs:
|
|
151
|
+
self.probs_ = normalize(input_matrix.dot(probs))
|
|
152
|
+
if self.return_aggregate:
|
|
153
|
+
self.aggregate_ = sparse.csr_matrix(probs.T.dot(input_matrix.dot(probs)))
|
|
154
|
+
else:
|
|
155
|
+
if self.labels_col_ is None:
|
|
156
|
+
n_labels = max(self.labels_) + 1
|
|
157
|
+
probs_row = get_membership(self.labels_, n_labels=n_labels)
|
|
158
|
+
probs_col = normalize(input_matrix.T.dot(probs_row))
|
|
159
|
+
else:
|
|
160
|
+
n_labels = max(max(self.labels_row_), max(self.labels_col_)) + 1
|
|
161
|
+
probs_row = get_membership(self.labels_row_, n_labels=n_labels)
|
|
162
|
+
probs_col = get_membership(self.labels_col_, n_labels=n_labels)
|
|
163
|
+
if self.return_probs:
|
|
164
|
+
self.probs_row_ = normalize(input_matrix.dot(probs_col))
|
|
165
|
+
self.probs_col_ = normalize(input_matrix.T.dot(probs_row))
|
|
166
|
+
self.probs_ = self.probs_row_
|
|
167
|
+
if self.return_aggregate:
|
|
168
|
+
aggregate_ = sparse.csr_matrix(probs_row.T.dot(input_matrix))
|
|
169
|
+
aggregate_ = aggregate_.dot(probs_col)
|
|
170
|
+
self.aggregate_ = aggregate_
|
|
171
|
+
|
|
172
|
+
return self
|