scikit-network 0.30.0__cp38-cp38-win_amd64.whl → 0.32.1__cp38-cp38-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/AUTHORS.rst +3 -0
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/METADATA +31 -3
- scikit_network-0.32.1.dist-info/RECORD +228 -0
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/WHEEL +1 -1
- sknetwork/__init__.py +1 -1
- sknetwork/base.py +67 -0
- sknetwork/classification/base.py +24 -24
- sknetwork/classification/base_rank.py +17 -25
- sknetwork/classification/diffusion.py +35 -35
- sknetwork/classification/knn.py +24 -21
- sknetwork/classification/metrics.py +1 -1
- sknetwork/classification/pagerank.py +10 -10
- sknetwork/classification/propagation.py +23 -20
- sknetwork/classification/tests/test_diffusion.py +13 -3
- sknetwork/classification/vote.cp38-win_amd64.pyd +0 -0
- sknetwork/classification/vote.cpp +14482 -10351
- sknetwork/classification/vote.pyx +1 -3
- sknetwork/clustering/__init__.py +3 -1
- sknetwork/clustering/base.py +36 -40
- sknetwork/clustering/kcenters.py +253 -0
- sknetwork/clustering/leiden.py +241 -0
- sknetwork/clustering/leiden_core.cp38-win_amd64.pyd +0 -0
- sknetwork/clustering/leiden_core.cpp +31564 -0
- sknetwork/clustering/leiden_core.pyx +124 -0
- sknetwork/clustering/louvain.py +133 -102
- sknetwork/clustering/louvain_core.cp38-win_amd64.pyd +0 -0
- sknetwork/clustering/louvain_core.cpp +22457 -18792
- sknetwork/clustering/louvain_core.pyx +86 -96
- sknetwork/clustering/postprocess.py +2 -2
- sknetwork/clustering/propagation_clustering.py +15 -19
- sknetwork/clustering/tests/test_API.py +8 -4
- sknetwork/clustering/tests/test_kcenters.py +92 -0
- sknetwork/clustering/tests/test_leiden.py +34 -0
- sknetwork/clustering/tests/test_louvain.py +3 -4
- sknetwork/data/__init__.py +2 -1
- sknetwork/data/base.py +28 -0
- sknetwork/data/load.py +38 -37
- sknetwork/data/models.py +18 -18
- sknetwork/data/parse.py +54 -33
- sknetwork/data/test_graphs.py +2 -2
- sknetwork/data/tests/test_API.py +1 -1
- sknetwork/data/tests/test_base.py +14 -0
- sknetwork/data/tests/test_load.py +1 -1
- sknetwork/data/tests/test_parse.py +9 -12
- sknetwork/data/tests/test_test_graphs.py +1 -2
- sknetwork/data/toy_graphs.py +18 -18
- sknetwork/embedding/__init__.py +0 -1
- sknetwork/embedding/base.py +21 -20
- sknetwork/embedding/force_atlas.py +3 -2
- sknetwork/embedding/louvain_embedding.py +2 -2
- sknetwork/embedding/random_projection.py +5 -3
- sknetwork/embedding/spectral.py +0 -73
- sknetwork/embedding/tests/test_API.py +4 -28
- sknetwork/embedding/tests/test_louvain_embedding.py +4 -9
- sknetwork/embedding/tests/test_random_projection.py +2 -2
- sknetwork/embedding/tests/test_spectral.py +5 -8
- sknetwork/embedding/tests/test_svd.py +1 -1
- sknetwork/gnn/base.py +4 -4
- sknetwork/gnn/base_layer.py +3 -3
- sknetwork/gnn/gnn_classifier.py +45 -89
- sknetwork/gnn/layer.py +1 -1
- sknetwork/gnn/loss.py +1 -1
- sknetwork/gnn/optimizer.py +4 -3
- sknetwork/gnn/tests/test_base_layer.py +4 -4
- sknetwork/gnn/tests/test_gnn_classifier.py +12 -35
- sknetwork/gnn/utils.py +8 -8
- sknetwork/hierarchy/base.py +29 -2
- sknetwork/hierarchy/louvain_hierarchy.py +45 -41
- sknetwork/hierarchy/paris.cp38-win_amd64.pyd +0 -0
- sknetwork/hierarchy/paris.cpp +27371 -22844
- sknetwork/hierarchy/paris.pyx +7 -9
- sknetwork/hierarchy/postprocess.py +16 -16
- sknetwork/hierarchy/tests/test_API.py +1 -1
- sknetwork/hierarchy/tests/test_algos.py +5 -0
- sknetwork/hierarchy/tests/test_metrics.py +1 -1
- sknetwork/linalg/__init__.py +1 -1
- sknetwork/linalg/diteration.cp38-win_amd64.pyd +0 -0
- sknetwork/linalg/diteration.cpp +13474 -9454
- sknetwork/linalg/diteration.pyx +0 -2
- sknetwork/linalg/eig_solver.py +1 -1
- sknetwork/linalg/{normalization.py → normalizer.py} +18 -15
- sknetwork/linalg/operators.py +1 -1
- sknetwork/linalg/ppr_solver.py +1 -1
- sknetwork/linalg/push.cp38-win_amd64.pyd +0 -0
- sknetwork/linalg/push.cpp +23003 -18807
- sknetwork/linalg/push.pyx +0 -2
- sknetwork/linalg/svd_solver.py +1 -1
- sknetwork/linalg/tests/test_normalization.py +3 -7
- sknetwork/linalg/tests/test_operators.py +4 -8
- sknetwork/linalg/tests/test_ppr.py +1 -1
- sknetwork/linkpred/base.py +13 -2
- sknetwork/linkpred/nn.py +6 -6
- sknetwork/log.py +19 -0
- sknetwork/path/__init__.py +4 -3
- sknetwork/path/dag.py +54 -0
- sknetwork/path/distances.py +98 -0
- sknetwork/path/search.py +13 -47
- sknetwork/path/shortest_path.py +37 -162
- sknetwork/path/tests/test_dag.py +37 -0
- sknetwork/path/tests/test_distances.py +62 -0
- sknetwork/path/tests/test_search.py +26 -11
- sknetwork/path/tests/test_shortest_path.py +31 -36
- sknetwork/ranking/__init__.py +0 -1
- sknetwork/ranking/base.py +13 -8
- sknetwork/ranking/betweenness.cp38-win_amd64.pyd +0 -0
- sknetwork/ranking/betweenness.cpp +5709 -3017
- sknetwork/ranking/betweenness.pyx +0 -2
- sknetwork/ranking/closeness.py +7 -10
- sknetwork/ranking/pagerank.py +14 -14
- sknetwork/ranking/postprocess.py +12 -3
- sknetwork/ranking/tests/test_API.py +2 -4
- sknetwork/ranking/tests/test_betweenness.py +3 -3
- sknetwork/ranking/tests/test_closeness.py +3 -7
- sknetwork/ranking/tests/test_pagerank.py +11 -5
- sknetwork/ranking/tests/test_postprocess.py +5 -0
- sknetwork/regression/base.py +19 -2
- sknetwork/regression/diffusion.py +24 -10
- sknetwork/regression/tests/test_diffusion.py +8 -0
- sknetwork/test_base.py +35 -0
- sknetwork/test_log.py +15 -0
- sknetwork/topology/__init__.py +7 -8
- sknetwork/topology/cliques.cp38-win_amd64.pyd +0 -0
- sknetwork/topology/{kcliques.cpp → cliques.cpp} +23423 -20277
- sknetwork/topology/cliques.pyx +149 -0
- sknetwork/topology/core.cp38-win_amd64.pyd +0 -0
- sknetwork/topology/{kcore.cpp → core.cpp} +21637 -18762
- sknetwork/topology/core.pyx +90 -0
- sknetwork/topology/cycles.py +243 -0
- sknetwork/topology/minheap.cp38-win_amd64.pyd +0 -0
- sknetwork/{utils → topology}/minheap.cpp +19452 -15368
- sknetwork/{utils → topology}/minheap.pxd +1 -3
- sknetwork/{utils → topology}/minheap.pyx +1 -3
- sknetwork/topology/structure.py +3 -43
- sknetwork/topology/tests/test_cliques.py +11 -11
- sknetwork/topology/tests/test_core.py +19 -0
- sknetwork/topology/tests/test_cycles.py +65 -0
- sknetwork/topology/tests/test_structure.py +2 -16
- sknetwork/topology/tests/test_triangles.py +11 -15
- sknetwork/topology/tests/test_wl.py +72 -0
- sknetwork/topology/triangles.cp38-win_amd64.pyd +0 -0
- sknetwork/topology/triangles.cpp +5056 -2696
- sknetwork/topology/triangles.pyx +74 -89
- sknetwork/topology/weisfeiler_lehman.py +56 -86
- sknetwork/topology/weisfeiler_lehman_core.cp38-win_amd64.pyd +0 -0
- sknetwork/topology/weisfeiler_lehman_core.cpp +14727 -10622
- sknetwork/topology/weisfeiler_lehman_core.pyx +0 -2
- sknetwork/utils/__init__.py +1 -31
- sknetwork/utils/check.py +2 -2
- sknetwork/utils/format.py +5 -3
- sknetwork/utils/membership.py +2 -2
- sknetwork/utils/tests/test_check.py +3 -3
- sknetwork/utils/tests/test_format.py +3 -1
- sknetwork/utils/values.py +1 -1
- sknetwork/visualization/__init__.py +2 -2
- sknetwork/visualization/dendrograms.py +55 -7
- sknetwork/visualization/graphs.py +292 -72
- sknetwork/visualization/tests/test_dendrograms.py +9 -9
- sknetwork/visualization/tests/test_graphs.py +71 -62
- scikit_network-0.30.0.dist-info/RECORD +0 -227
- sknetwork/embedding/louvain_hierarchy.py +0 -142
- sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
- sknetwork/path/metrics.py +0 -148
- sknetwork/path/tests/test_metrics.py +0 -29
- sknetwork/ranking/harmonic.py +0 -82
- sknetwork/topology/dag.py +0 -74
- sknetwork/topology/dag_core.cp38-win_amd64.pyd +0 -0
- sknetwork/topology/dag_core.cpp +0 -23350
- sknetwork/topology/dag_core.pyx +0 -38
- sknetwork/topology/kcliques.cp38-win_amd64.pyd +0 -0
- sknetwork/topology/kcliques.pyx +0 -193
- sknetwork/topology/kcore.cp38-win_amd64.pyd +0 -0
- sknetwork/topology/kcore.pyx +0 -120
- sknetwork/topology/tests/test_cores.py +0 -21
- sknetwork/topology/tests/test_dag.py +0 -26
- sknetwork/topology/tests/test_wl_coloring.py +0 -49
- sknetwork/topology/tests/test_wl_kernel.py +0 -31
- sknetwork/utils/base.py +0 -35
- sknetwork/utils/minheap.cp38-win_amd64.pyd +0 -0
- sknetwork/utils/simplex.py +0 -140
- sknetwork/utils/tests/test_base.py +0 -28
- sknetwork/utils/tests/test_bunch.py +0 -16
- sknetwork/utils/tests/test_projection_simplex.py +0 -33
- sknetwork/utils/tests/test_verbose.py +0 -15
- sknetwork/utils/verbose.py +0 -37
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/LICENSE +0 -0
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/top_level.txt +0 -0
- /sknetwork/{utils → data}/timeout.py +0 -0
sknetwork/ranking/closeness.py
CHANGED
|
@@ -16,20 +16,15 @@ from sknetwork.utils.check import check_format, check_square, check_connected
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class Closeness(BaseRanking):
|
|
19
|
-
"""
|
|
19
|
+
"""Ranking by closeness centrality of each node in a connected graph, corresponding to the average length of the
|
|
20
20
|
shortest paths from that node to all the other ones.
|
|
21
21
|
|
|
22
|
-
For a directed graph, the closeness centrality is computed in terms of outgoing paths.
|
|
23
|
-
|
|
24
22
|
Parameters
|
|
25
23
|
----------
|
|
26
24
|
method :
|
|
27
25
|
Denotes if the results should be exact or approximate.
|
|
28
26
|
tol: float
|
|
29
27
|
If ``method=='approximate'``, the allowed tolerance on each score entry.
|
|
30
|
-
n_jobs:
|
|
31
|
-
If an integer value is given, denotes the number of workers to use (-1 means the maximum number will be used).
|
|
32
|
-
If ``None``, no parallel computations are made.
|
|
33
28
|
|
|
34
29
|
Attributes
|
|
35
30
|
----------
|
|
@@ -55,12 +50,11 @@ class Closeness(BaseRanking):
|
|
|
55
50
|
Society for Industrial and Applied Mathematics.
|
|
56
51
|
"""
|
|
57
52
|
|
|
58
|
-
def __init__(self, method: str = 'exact', tol: float = 1e-1
|
|
53
|
+
def __init__(self, method: str = 'exact', tol: float = 1e-1):
|
|
59
54
|
super(Closeness, self).__init__()
|
|
60
55
|
|
|
61
56
|
self.method = method
|
|
62
57
|
self.tol = tol
|
|
63
|
-
self.n_jobs = n_jobs
|
|
64
58
|
|
|
65
59
|
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'Closeness':
|
|
66
60
|
"""Closeness centrality for connected graphs.
|
|
@@ -88,8 +82,11 @@ class Closeness(BaseRanking):
|
|
|
88
82
|
else:
|
|
89
83
|
raise ValueError("Method should be either 'exact' or 'approximate'.")
|
|
90
84
|
|
|
91
|
-
|
|
85
|
+
distances = np.array([get_distances(adjacency, source=source) for source in sources])
|
|
92
86
|
|
|
93
|
-
|
|
87
|
+
distances_min = np.min(distances, axis=1)
|
|
88
|
+
scores = (n - 1) / n / np.mean(distances, axis=1)
|
|
89
|
+
scores[distances_min < 0] = 0
|
|
90
|
+
self.scores_ = scores
|
|
94
91
|
|
|
95
92
|
return self
|
sknetwork/ranking/pagerank.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
3
|
"""
|
|
4
|
-
Created
|
|
4
|
+
Created in May 2019
|
|
5
5
|
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
6
|
@author: Thomas Bonald <bonald@enst.fr>
|
|
7
7
|
"""
|
|
@@ -9,16 +9,14 @@ from typing import Union, Optional
|
|
|
9
9
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
from scipy import sparse
|
|
12
|
-
from scipy.sparse.linalg import LinearOperator
|
|
13
12
|
|
|
14
13
|
from sknetwork.linalg.ppr_solver import get_pagerank
|
|
15
14
|
from sknetwork.ranking.base import BaseRanking
|
|
16
15
|
from sknetwork.utils.check import check_damping_factor
|
|
17
16
|
from sknetwork.utils.format import get_adjacency_values
|
|
18
|
-
from sknetwork.utils.verbose import VerboseMixin
|
|
19
17
|
|
|
20
18
|
|
|
21
|
-
class PageRank(BaseRanking
|
|
19
|
+
class PageRank(BaseRanking):
|
|
22
20
|
"""PageRank of each node, corresponding to its frequency of visit by a random walk.
|
|
23
21
|
|
|
24
22
|
The random walk restarts with some fixed probability. The restart distribution can be personalized by the user.
|
|
@@ -74,24 +72,26 @@ class PageRank(BaseRanking, VerboseMixin):
|
|
|
74
72
|
self.tol = tol
|
|
75
73
|
self.bipartite = None
|
|
76
74
|
|
|
77
|
-
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray
|
|
75
|
+
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray],
|
|
78
76
|
weights: Optional[Union[dict, np.ndarray]] = None, weights_row: Optional[Union[dict, np.ndarray]] = None,
|
|
79
77
|
weights_col: Optional[Union[dict, np.ndarray]] = None, force_bipartite: bool = False) -> 'PageRank':
|
|
80
|
-
"""
|
|
78
|
+
"""Compute the pagerank of each node.
|
|
81
79
|
|
|
82
80
|
Parameters
|
|
83
81
|
----------
|
|
84
|
-
input_matrix :
|
|
82
|
+
input_matrix : sparse.csr_matrix, np.ndarray
|
|
85
83
|
Adjacency matrix or biadjacency matrix of the graph.
|
|
86
|
-
weights :
|
|
87
|
-
|
|
88
|
-
Restart distribution as a vector or a dict (node: weight).
|
|
84
|
+
weights : np.ndarray, dict
|
|
85
|
+
Weights of the restart distribution for Personalized PageRank.
|
|
89
86
|
If ``None``, the uniform distribution is used (no personalization, default).
|
|
90
|
-
weights_row,
|
|
91
|
-
|
|
92
|
-
|
|
87
|
+
weights_row : np.ndarray, dict
|
|
88
|
+
Weights on rows of the restart distribution for Personalized PageRank.
|
|
89
|
+
Used for bipartite graphs.
|
|
93
90
|
If both weights_row and weights_col are ``None`` (default), the uniform distribution on rows is used.
|
|
94
|
-
|
|
91
|
+
weights_col : np.ndarray, dict
|
|
92
|
+
Weights on columns of the restart distribution for Personalized PageRank.
|
|
93
|
+
Used for bipartite graphs.
|
|
94
|
+
force_bipartite : bool
|
|
95
95
|
If ``True``, consider the input matrix as the biadjacency matrix of a bipartite graph.
|
|
96
96
|
Returns
|
|
97
97
|
-------
|
sknetwork/ranking/postprocess.py
CHANGED
|
@@ -7,7 +7,7 @@ Created on May 2019
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
def top_k(scores: np.ndarray, k: int = 1):
|
|
10
|
+
def top_k(scores: np.ndarray, k: int = 1, sort: bool = True):
|
|
11
11
|
"""Return the indices of the k elements of highest values.
|
|
12
12
|
|
|
13
13
|
Parameters
|
|
@@ -16,13 +16,22 @@ def top_k(scores: np.ndarray, k: int = 1):
|
|
|
16
16
|
Array of values.
|
|
17
17
|
k : int
|
|
18
18
|
Number of elements to return.
|
|
19
|
+
sort : bool
|
|
20
|
+
If ``True``, sort the indices in decreasing order of value (element of highest value first).
|
|
19
21
|
|
|
20
22
|
Examples
|
|
21
23
|
--------
|
|
22
24
|
>>> top_k([1, 3, 2], k=2)
|
|
23
25
|
array([1, 2])
|
|
24
26
|
"""
|
|
27
|
+
scores = np.array(scores)
|
|
25
28
|
if k >= len(scores):
|
|
26
|
-
|
|
29
|
+
if sort:
|
|
30
|
+
index = np.argsort(-scores)
|
|
31
|
+
else:
|
|
32
|
+
index = np.arange(scores)
|
|
27
33
|
else:
|
|
28
|
-
|
|
34
|
+
index = np.argpartition(-scores, k)[:k]
|
|
35
|
+
if sort:
|
|
36
|
+
index = index[np.argsort(-scores[index])]
|
|
37
|
+
return index
|
|
@@ -3,22 +3,20 @@
|
|
|
3
3
|
"""tests for ranking API"""
|
|
4
4
|
import unittest
|
|
5
5
|
|
|
6
|
-
from sknetwork.data.test_graphs import
|
|
6
|
+
from sknetwork.data.test_graphs import *
|
|
7
7
|
from sknetwork.ranking import *
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class TestPageRank(unittest.TestCase):
|
|
11
11
|
|
|
12
12
|
def test_basic(self):
|
|
13
|
-
methods = [PageRank(), Closeness(), HITS(),
|
|
13
|
+
methods = [PageRank(), Closeness(), HITS(), Katz()]
|
|
14
14
|
for adjacency in [test_graph(), test_digraph()]:
|
|
15
15
|
n = adjacency.shape[0]
|
|
16
16
|
for method in methods:
|
|
17
17
|
score = method.fit_predict(adjacency)
|
|
18
18
|
self.assertEqual(score.shape, (n, ))
|
|
19
19
|
self.assertTrue(min(score) >= 0)
|
|
20
|
-
score = method.fit_transform(adjacency)
|
|
21
|
-
self.assertEqual(score.shape, (n,))
|
|
22
20
|
|
|
23
21
|
def test_bipartite(self):
|
|
24
22
|
biadjacency = test_bigraph()
|
|
@@ -6,7 +6,7 @@ import unittest
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
|
|
8
8
|
from sknetwork.ranking.betweenness import Betweenness
|
|
9
|
-
from sknetwork.data.test_graphs import test_graph,
|
|
9
|
+
from sknetwork.data.test_graphs import test_graph, test_disconnected_graph
|
|
10
10
|
from sknetwork.data.toy_graphs import bow_tie, star_wars
|
|
11
11
|
|
|
12
12
|
|
|
@@ -25,7 +25,7 @@ class TestBetweenness(unittest.TestCase):
|
|
|
25
25
|
self.assertEqual(np.sum(scores > 0), 1)
|
|
26
26
|
|
|
27
27
|
def test_disconnected(self):
|
|
28
|
-
adjacency =
|
|
28
|
+
adjacency = test_disconnected_graph()
|
|
29
29
|
betweenness = Betweenness()
|
|
30
30
|
with self.assertRaises(ValueError):
|
|
31
31
|
betweenness.fit(adjacency)
|
|
@@ -35,4 +35,4 @@ class TestBetweenness(unittest.TestCase):
|
|
|
35
35
|
betweenness = Betweenness()
|
|
36
36
|
|
|
37
37
|
with self.assertRaises(ValueError):
|
|
38
|
-
betweenness.
|
|
38
|
+
betweenness.fit_predict(adjacency)
|
|
@@ -20,15 +20,11 @@ class TestDiffusion(unittest.TestCase):
|
|
|
20
20
|
n = adjacency.shape[0]
|
|
21
21
|
|
|
22
22
|
closeness = Closeness(method='approximate')
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
scores2 = closeness.fit_predict(adjacency)
|
|
26
|
-
|
|
27
|
-
self.assertEqual(scores1.shape, (n,))
|
|
28
|
-
self.assertAlmostEqual(np.linalg.norm(scores1 - scores2), 0)
|
|
23
|
+
scores = closeness.fit_predict(adjacency)
|
|
24
|
+
self.assertEqual(scores.shape, (n,))
|
|
29
25
|
|
|
30
26
|
def test_disconnected(self):
|
|
31
|
-
adjacency =
|
|
27
|
+
adjacency = test_disconnected_graph()
|
|
32
28
|
closeness = Closeness()
|
|
33
29
|
with self.assertRaises(ValueError):
|
|
34
30
|
closeness.fit(adjacency)
|
|
@@ -7,7 +7,7 @@ import unittest
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
|
|
9
9
|
from sknetwork.data.models import cyclic_digraph
|
|
10
|
-
from sknetwork.data.test_graphs import test_bigraph
|
|
10
|
+
from sknetwork.data.test_graphs import test_graph, test_digraph, test_bigraph
|
|
11
11
|
from sknetwork.ranking.pagerank import PageRank
|
|
12
12
|
|
|
13
13
|
|
|
@@ -37,8 +37,8 @@ class TestPageRank(unittest.TestCase):
|
|
|
37
37
|
seeds_array[0] = 1.
|
|
38
38
|
seeds_dict = {0: 1}
|
|
39
39
|
|
|
40
|
-
scores1 = pagerank.
|
|
41
|
-
scores2 = pagerank.
|
|
40
|
+
scores1 = pagerank.fit_predict(self.adjacency, seeds_array)
|
|
41
|
+
scores2 = pagerank.fit_predict(self.adjacency, seeds_dict)
|
|
42
42
|
self.assertAlmostEqual(np.linalg.norm(scores1 - scores2), 0.)
|
|
43
43
|
|
|
44
44
|
def test_input(self):
|
|
@@ -48,9 +48,15 @@ class TestPageRank(unittest.TestCase):
|
|
|
48
48
|
|
|
49
49
|
def test_damping(self):
|
|
50
50
|
pagerank = PageRank(damping_factor=0.99)
|
|
51
|
-
scores = pagerank.
|
|
51
|
+
scores = pagerank.fit_predict(self.adjacency)
|
|
52
52
|
self.assertAlmostEqual(np.linalg.norm(scores - self.truth), 0.)
|
|
53
53
|
|
|
54
54
|
pagerank = PageRank(damping_factor=0.01)
|
|
55
|
-
scores = pagerank.
|
|
55
|
+
scores = pagerank.fit_predict(self.adjacency)
|
|
56
56
|
self.assertAlmostEqual(np.linalg.norm(scores - self.truth), 0.)
|
|
57
|
+
|
|
58
|
+
def test_bigraph(self):
|
|
59
|
+
pagerank = PageRank()
|
|
60
|
+
for adjacency in [test_graph(), test_digraph(), test_bigraph()]:
|
|
61
|
+
pagerank.fit(adjacency, weights_col={0: 1})
|
|
62
|
+
self.assertAlmostEqual(np.linalg.norm(pagerank.scores_col_ - pagerank.predict(columns=True)), 0.)
|
|
@@ -19,3 +19,8 @@ class TestPostprocessing(unittest.TestCase):
|
|
|
19
19
|
self.assertTrue(len(index) == 10)
|
|
20
20
|
index = top_k(scores, 20)
|
|
21
21
|
self.assertTrue(len(index) == 10)
|
|
22
|
+
scores = [3, 1, 6, 2]
|
|
23
|
+
index = top_k(scores, 2)
|
|
24
|
+
self.assertTrue(set(index) == {0, 2})
|
|
25
|
+
index = top_k(scores, 2, sort=True)
|
|
26
|
+
self.assertTrue(list(index) == [2, 0])
|
sknetwork/regression/base.py
CHANGED
|
@@ -8,7 +8,7 @@ from abc import ABC
|
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
|
|
11
|
-
from sknetwork.
|
|
11
|
+
from sknetwork.base import Algorithm
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class BaseRegressor(Algorithm, ABC):
|
|
@@ -26,8 +26,25 @@ class BaseRegressor(Algorithm, ABC):
|
|
|
26
26
|
def __init__(self):
|
|
27
27
|
self.values_ = None
|
|
28
28
|
|
|
29
|
+
def predict(self, columns: bool = False) -> np.ndarray:
|
|
30
|
+
"""Return the values predicted by the algorithm.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
columns : bool
|
|
35
|
+
If ``True``, return the prediction for columns.
|
|
36
|
+
|
|
37
|
+
Returns
|
|
38
|
+
-------
|
|
39
|
+
values : np.ndarray
|
|
40
|
+
Values.
|
|
41
|
+
"""
|
|
42
|
+
if columns:
|
|
43
|
+
return self.values_col_
|
|
44
|
+
return self.values_
|
|
45
|
+
|
|
29
46
|
def fit_predict(self, *args, **kwargs) -> np.ndarray:
|
|
30
|
-
"""Fit algorithm to data and return the
|
|
47
|
+
"""Fit algorithm to data and return the values. Same parameters as the ``fit`` method.
|
|
31
48
|
|
|
32
49
|
Returns
|
|
33
50
|
-------
|
|
@@ -10,9 +10,9 @@ from typing import Union, Optional, Tuple
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
from scipy import sparse
|
|
12
12
|
|
|
13
|
-
from sknetwork.linalg.
|
|
13
|
+
from sknetwork.linalg.normalizer import normalize
|
|
14
14
|
from sknetwork.regression.base import BaseRegressor
|
|
15
|
-
from sknetwork.utils
|
|
15
|
+
from sknetwork.utils import get_adjacency_values, get_degrees
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
def init_temperatures(seeds: np.ndarray, init: Optional[float]) -> Tuple[np.ndarray, np.ndarray]:
|
|
@@ -30,6 +30,12 @@ def init_temperatures(seeds: np.ndarray, init: Optional[float]) -> Tuple[np.ndar
|
|
|
30
30
|
class Diffusion(BaseRegressor):
|
|
31
31
|
"""Regression by diffusion along the edges, given the temperatures of some seed nodes (heat equation).
|
|
32
32
|
|
|
33
|
+
The row vector of tempreatures :math:`T` evolves like:
|
|
34
|
+
|
|
35
|
+
:math:`T \\gets (1-\\alpha) T + \\alpha PT`
|
|
36
|
+
|
|
37
|
+
where :math:`\\alpha` is the damping factor and :math:`P` is the transition matrix of the random walk in the graph.
|
|
38
|
+
|
|
33
39
|
All values are updated, including those of seed nodes (free diffusion).
|
|
34
40
|
See ``Dirichlet`` for diffusion with boundary constraints.
|
|
35
41
|
|
|
@@ -37,6 +43,8 @@ class Diffusion(BaseRegressor):
|
|
|
37
43
|
----------
|
|
38
44
|
n_iter : int
|
|
39
45
|
Number of iterations of the diffusion (must be positive).
|
|
46
|
+
damping_factor : float
|
|
47
|
+
Damping factor.
|
|
40
48
|
|
|
41
49
|
Attributes
|
|
42
50
|
----------
|
|
@@ -49,24 +57,25 @@ class Diffusion(BaseRegressor):
|
|
|
49
57
|
Example
|
|
50
58
|
-------
|
|
51
59
|
>>> from sknetwork.data import house
|
|
52
|
-
>>> diffusion = Diffusion(n_iter=
|
|
60
|
+
>>> diffusion = Diffusion(n_iter=1)
|
|
53
61
|
>>> adjacency = house()
|
|
54
62
|
>>> values = {0: 1, 2: 0}
|
|
55
63
|
>>> values_pred = diffusion.fit_predict(adjacency, values)
|
|
56
|
-
>>> np.round(values_pred,
|
|
57
|
-
array([0.
|
|
64
|
+
>>> np.round(values_pred, 1)
|
|
65
|
+
array([0.8, 0.5, 0.2, 0.4, 0.6])
|
|
58
66
|
|
|
59
67
|
References
|
|
60
68
|
----------
|
|
61
69
|
Chung, F. (2007). The heat kernel as the pagerank of a graph. Proceedings of the National Academy of Sciences.
|
|
62
70
|
"""
|
|
63
|
-
def __init__(self, n_iter: int = 3):
|
|
71
|
+
def __init__(self, n_iter: int = 3, damping_factor: float = 0.5):
|
|
64
72
|
super(Diffusion, self).__init__()
|
|
65
73
|
|
|
66
74
|
if n_iter <= 0:
|
|
67
75
|
raise ValueError('The number of iterations must be positive.')
|
|
68
76
|
else:
|
|
69
77
|
self.n_iter = n_iter
|
|
78
|
+
self.damping_factor = damping_factor
|
|
70
79
|
self.bipartite = None
|
|
71
80
|
|
|
72
81
|
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray],
|
|
@@ -98,7 +107,13 @@ class Diffusion(BaseRegressor):
|
|
|
98
107
|
values_row=values_row,
|
|
99
108
|
values_col=values_col)
|
|
100
109
|
values, _ = init_temperatures(values, init)
|
|
101
|
-
diffusion = normalize(adjacency)
|
|
110
|
+
diffusion = normalize(adjacency.T.tocsr())
|
|
111
|
+
degrees = get_degrees(diffusion)
|
|
112
|
+
diag = sparse.diags((degrees == 0).astype(int)).tocsr()
|
|
113
|
+
diffusion += diag
|
|
114
|
+
|
|
115
|
+
diffusion = (1 - self.damping_factor) * sparse.identity(len(degrees)).tocsr() + self.damping_factor * diffusion
|
|
116
|
+
|
|
102
117
|
for i in range(self.n_iter):
|
|
103
118
|
values = diffusion.dot(values)
|
|
104
119
|
|
|
@@ -110,10 +125,9 @@ class Diffusion(BaseRegressor):
|
|
|
110
125
|
|
|
111
126
|
|
|
112
127
|
class Dirichlet(BaseRegressor):
|
|
113
|
-
"""Regression by the Dirichlet problem
|
|
114
|
-
(heat diffusion with boundary constraints).
|
|
128
|
+
"""Regression by the Dirichlet problem (heat diffusion with boundary constraints).
|
|
115
129
|
|
|
116
|
-
|
|
130
|
+
The temperatures of some seed nodes are fixed. The temperatures of other nodes are computed.
|
|
117
131
|
|
|
118
132
|
Parameters
|
|
119
133
|
----------
|
|
@@ -14,6 +14,13 @@ class TestDiffusion(unittest.TestCase):
|
|
|
14
14
|
def setUp(self):
|
|
15
15
|
self.algos = [Diffusion(), Dirichlet()]
|
|
16
16
|
|
|
17
|
+
def test_predict(self):
|
|
18
|
+
adjacency = test_graph()
|
|
19
|
+
for algo in self.algos:
|
|
20
|
+
values = algo.fit_predict(adjacency, {0: 0, 1: 1, 2: 0.5})
|
|
21
|
+
values_ = algo.predict()
|
|
22
|
+
self.assertAlmostEqual(np.linalg.norm(values - values_), 0)
|
|
23
|
+
|
|
17
24
|
def test_no_iter(self):
|
|
18
25
|
with self.assertRaises(ValueError):
|
|
19
26
|
Diffusion(n_iter=-1)
|
|
@@ -35,6 +42,7 @@ class TestDiffusion(unittest.TestCase):
|
|
|
35
42
|
self.assertTrue(np.all(values <= 1) and np.all(values >= 0))
|
|
36
43
|
values = algo.fit_predict(biadjacency, values_row={0: 0.1}, values_col={1: 2}, init=0.3)
|
|
37
44
|
self.assertTrue(np.all(values <= 2) and np.all(values >= 0.1))
|
|
45
|
+
self.assertAlmostEqual(np.linalg.norm(algo.values_col_ - algo.predict(columns=True)), 0)
|
|
38
46
|
|
|
39
47
|
def test_initial_state(self):
|
|
40
48
|
for adjacency in [test_graph(), test_digraph()]:
|
sknetwork/test_base.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""tests for base.py"""
|
|
4
|
+
import unittest
|
|
5
|
+
|
|
6
|
+
from sknetwork.base import Algorithm
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TestBase(unittest.TestCase):
|
|
10
|
+
|
|
11
|
+
def setUp(self):
|
|
12
|
+
class NewAlgo(Algorithm):
|
|
13
|
+
"""Docstring"""
|
|
14
|
+
def __init__(self, param: int, name: str):
|
|
15
|
+
self.param = param
|
|
16
|
+
self.name = name
|
|
17
|
+
|
|
18
|
+
def fit(self):
|
|
19
|
+
"""Docstring"""
|
|
20
|
+
pass
|
|
21
|
+
self.algo = NewAlgo(1, 'abc')
|
|
22
|
+
|
|
23
|
+
def test_repr(self):
|
|
24
|
+
self.assertEqual(repr(self.algo), "NewAlgo(param=1, name='abc')")
|
|
25
|
+
|
|
26
|
+
def test_get_params(self):
|
|
27
|
+
self.assertEqual(len(self.algo.get_params()), 2)
|
|
28
|
+
|
|
29
|
+
def test_set_params(self):
|
|
30
|
+
self.algo.set_params({'param': 3})
|
|
31
|
+
self.assertEqual(self.algo.param, 3)
|
|
32
|
+
|
|
33
|
+
def test_fit(self):
|
|
34
|
+
stub = Algorithm()
|
|
35
|
+
self.assertRaises(NotImplementedError, stub.fit, None)
|
sknetwork/test_log.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""tests for verbose.py"""
|
|
4
|
+
|
|
5
|
+
import unittest
|
|
6
|
+
|
|
7
|
+
from sknetwork.log import Log
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TestVerbose(unittest.TestCase):
|
|
11
|
+
|
|
12
|
+
def test_prints(self):
|
|
13
|
+
logger = Log(verbose=True)
|
|
14
|
+
logger.print_log('Hello', 42)
|
|
15
|
+
self.assertEqual(str(logger.log), 'Hello 42\n')
|
sknetwork/topology/__init__.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
"""Module on topology."""
|
|
2
|
-
from sknetwork.topology.
|
|
3
|
-
from sknetwork.topology.
|
|
4
|
-
from sknetwork.topology.triangles import
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
from sknetwork.topology.
|
|
8
|
-
|
|
9
|
-
from sknetwork.topology.weisfeiler_lehman import WeisfeilerLehman, are_isomorphic
|
|
2
|
+
from sknetwork.topology.cliques import count_cliques
|
|
3
|
+
from sknetwork.topology.core import get_core_decomposition
|
|
4
|
+
from sknetwork.topology.triangles import count_triangles, get_clustering_coefficient
|
|
5
|
+
from sknetwork.topology.structure import is_connected, is_bipartite, is_symmetric, get_connected_components, \
|
|
6
|
+
get_largest_connected_component
|
|
7
|
+
from sknetwork.topology.cycles import is_acyclic, get_cycles, break_cycles
|
|
8
|
+
from sknetwork.topology.weisfeiler_lehman import color_weisfeiler_lehman, are_isomorphic
|
|
Binary file
|