scikit-network 0.33.0__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (216) hide show
  1. scikit_network-0.33.0.dist-info/AUTHORS.rst +43 -0
  2. scikit_network-0.33.0.dist-info/LICENSE +34 -0
  3. scikit_network-0.33.0.dist-info/METADATA +517 -0
  4. scikit_network-0.33.0.dist-info/RECORD +216 -0
  5. scikit_network-0.33.0.dist-info/WHEEL +5 -0
  6. scikit_network-0.33.0.dist-info/top_level.txt +1 -0
  7. sknetwork/__init__.py +21 -0
  8. sknetwork/base.py +67 -0
  9. sknetwork/classification/__init__.py +8 -0
  10. sknetwork/classification/base.py +142 -0
  11. sknetwork/classification/base_rank.py +133 -0
  12. sknetwork/classification/diffusion.py +134 -0
  13. sknetwork/classification/knn.py +139 -0
  14. sknetwork/classification/metrics.py +205 -0
  15. sknetwork/classification/pagerank.py +66 -0
  16. sknetwork/classification/propagation.py +152 -0
  17. sknetwork/classification/tests/__init__.py +1 -0
  18. sknetwork/classification/tests/test_API.py +30 -0
  19. sknetwork/classification/tests/test_diffusion.py +77 -0
  20. sknetwork/classification/tests/test_knn.py +23 -0
  21. sknetwork/classification/tests/test_metrics.py +53 -0
  22. sknetwork/classification/tests/test_pagerank.py +20 -0
  23. sknetwork/classification/tests/test_propagation.py +24 -0
  24. sknetwork/classification/vote.cpython-312-darwin.so +0 -0
  25. sknetwork/classification/vote.pyx +56 -0
  26. sknetwork/clustering/__init__.py +8 -0
  27. sknetwork/clustering/base.py +172 -0
  28. sknetwork/clustering/kcenters.py +253 -0
  29. sknetwork/clustering/leiden.py +242 -0
  30. sknetwork/clustering/leiden_core.cpython-312-darwin.so +0 -0
  31. sknetwork/clustering/leiden_core.pyx +124 -0
  32. sknetwork/clustering/louvain.py +286 -0
  33. sknetwork/clustering/louvain_core.cpython-312-darwin.so +0 -0
  34. sknetwork/clustering/louvain_core.pyx +124 -0
  35. sknetwork/clustering/metrics.py +91 -0
  36. sknetwork/clustering/postprocess.py +66 -0
  37. sknetwork/clustering/propagation_clustering.py +104 -0
  38. sknetwork/clustering/tests/__init__.py +1 -0
  39. sknetwork/clustering/tests/test_API.py +38 -0
  40. sknetwork/clustering/tests/test_kcenters.py +60 -0
  41. sknetwork/clustering/tests/test_leiden.py +34 -0
  42. sknetwork/clustering/tests/test_louvain.py +129 -0
  43. sknetwork/clustering/tests/test_metrics.py +50 -0
  44. sknetwork/clustering/tests/test_postprocess.py +39 -0
  45. sknetwork/data/__init__.py +6 -0
  46. sknetwork/data/base.py +33 -0
  47. sknetwork/data/load.py +406 -0
  48. sknetwork/data/models.py +459 -0
  49. sknetwork/data/parse.py +644 -0
  50. sknetwork/data/test_graphs.py +84 -0
  51. sknetwork/data/tests/__init__.py +1 -0
  52. sknetwork/data/tests/test_API.py +30 -0
  53. sknetwork/data/tests/test_base.py +14 -0
  54. sknetwork/data/tests/test_load.py +95 -0
  55. sknetwork/data/tests/test_models.py +52 -0
  56. sknetwork/data/tests/test_parse.py +250 -0
  57. sknetwork/data/tests/test_test_graphs.py +29 -0
  58. sknetwork/data/tests/test_toy_graphs.py +68 -0
  59. sknetwork/data/timeout.py +38 -0
  60. sknetwork/data/toy_graphs.py +611 -0
  61. sknetwork/embedding/__init__.py +8 -0
  62. sknetwork/embedding/base.py +94 -0
  63. sknetwork/embedding/force_atlas.py +198 -0
  64. sknetwork/embedding/louvain_embedding.py +148 -0
  65. sknetwork/embedding/random_projection.py +135 -0
  66. sknetwork/embedding/spectral.py +141 -0
  67. sknetwork/embedding/spring.py +198 -0
  68. sknetwork/embedding/svd.py +359 -0
  69. sknetwork/embedding/tests/__init__.py +1 -0
  70. sknetwork/embedding/tests/test_API.py +49 -0
  71. sknetwork/embedding/tests/test_force_atlas.py +35 -0
  72. sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
  73. sknetwork/embedding/tests/test_random_projection.py +28 -0
  74. sknetwork/embedding/tests/test_spectral.py +81 -0
  75. sknetwork/embedding/tests/test_spring.py +50 -0
  76. sknetwork/embedding/tests/test_svd.py +43 -0
  77. sknetwork/gnn/__init__.py +10 -0
  78. sknetwork/gnn/activation.py +117 -0
  79. sknetwork/gnn/base.py +181 -0
  80. sknetwork/gnn/base_activation.py +89 -0
  81. sknetwork/gnn/base_layer.py +109 -0
  82. sknetwork/gnn/gnn_classifier.py +305 -0
  83. sknetwork/gnn/layer.py +153 -0
  84. sknetwork/gnn/loss.py +180 -0
  85. sknetwork/gnn/neighbor_sampler.py +65 -0
  86. sknetwork/gnn/optimizer.py +164 -0
  87. sknetwork/gnn/tests/__init__.py +1 -0
  88. sknetwork/gnn/tests/test_activation.py +56 -0
  89. sknetwork/gnn/tests/test_base.py +75 -0
  90. sknetwork/gnn/tests/test_base_layer.py +37 -0
  91. sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
  92. sknetwork/gnn/tests/test_layers.py +80 -0
  93. sknetwork/gnn/tests/test_loss.py +33 -0
  94. sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
  95. sknetwork/gnn/tests/test_optimizer.py +43 -0
  96. sknetwork/gnn/tests/test_utils.py +41 -0
  97. sknetwork/gnn/utils.py +127 -0
  98. sknetwork/hierarchy/__init__.py +6 -0
  99. sknetwork/hierarchy/base.py +96 -0
  100. sknetwork/hierarchy/louvain_hierarchy.py +272 -0
  101. sknetwork/hierarchy/metrics.py +234 -0
  102. sknetwork/hierarchy/paris.cpython-312-darwin.so +0 -0
  103. sknetwork/hierarchy/paris.pyx +316 -0
  104. sknetwork/hierarchy/postprocess.py +350 -0
  105. sknetwork/hierarchy/tests/__init__.py +1 -0
  106. sknetwork/hierarchy/tests/test_API.py +24 -0
  107. sknetwork/hierarchy/tests/test_algos.py +34 -0
  108. sknetwork/hierarchy/tests/test_metrics.py +62 -0
  109. sknetwork/hierarchy/tests/test_postprocess.py +57 -0
  110. sknetwork/linalg/__init__.py +9 -0
  111. sknetwork/linalg/basics.py +37 -0
  112. sknetwork/linalg/diteration.cpython-312-darwin.so +0 -0
  113. sknetwork/linalg/diteration.pyx +47 -0
  114. sknetwork/linalg/eig_solver.py +93 -0
  115. sknetwork/linalg/laplacian.py +15 -0
  116. sknetwork/linalg/normalizer.py +86 -0
  117. sknetwork/linalg/operators.py +225 -0
  118. sknetwork/linalg/polynome.py +76 -0
  119. sknetwork/linalg/ppr_solver.py +170 -0
  120. sknetwork/linalg/push.cpython-312-darwin.so +0 -0
  121. sknetwork/linalg/push.pyx +71 -0
  122. sknetwork/linalg/sparse_lowrank.py +142 -0
  123. sknetwork/linalg/svd_solver.py +91 -0
  124. sknetwork/linalg/tests/__init__.py +1 -0
  125. sknetwork/linalg/tests/test_eig.py +44 -0
  126. sknetwork/linalg/tests/test_laplacian.py +18 -0
  127. sknetwork/linalg/tests/test_normalization.py +34 -0
  128. sknetwork/linalg/tests/test_operators.py +66 -0
  129. sknetwork/linalg/tests/test_polynome.py +38 -0
  130. sknetwork/linalg/tests/test_ppr.py +50 -0
  131. sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
  132. sknetwork/linalg/tests/test_svd.py +38 -0
  133. sknetwork/linkpred/__init__.py +2 -0
  134. sknetwork/linkpred/base.py +46 -0
  135. sknetwork/linkpred/nn.py +126 -0
  136. sknetwork/linkpred/tests/__init__.py +1 -0
  137. sknetwork/linkpred/tests/test_nn.py +27 -0
  138. sknetwork/log.py +19 -0
  139. sknetwork/path/__init__.py +5 -0
  140. sknetwork/path/dag.py +54 -0
  141. sknetwork/path/distances.py +98 -0
  142. sknetwork/path/search.py +31 -0
  143. sknetwork/path/shortest_path.py +61 -0
  144. sknetwork/path/tests/__init__.py +1 -0
  145. sknetwork/path/tests/test_dag.py +37 -0
  146. sknetwork/path/tests/test_distances.py +62 -0
  147. sknetwork/path/tests/test_search.py +40 -0
  148. sknetwork/path/tests/test_shortest_path.py +40 -0
  149. sknetwork/ranking/__init__.py +8 -0
  150. sknetwork/ranking/base.py +61 -0
  151. sknetwork/ranking/betweenness.cpython-312-darwin.so +0 -0
  152. sknetwork/ranking/betweenness.pyx +97 -0
  153. sknetwork/ranking/closeness.py +92 -0
  154. sknetwork/ranking/hits.py +94 -0
  155. sknetwork/ranking/katz.py +83 -0
  156. sknetwork/ranking/pagerank.py +110 -0
  157. sknetwork/ranking/postprocess.py +37 -0
  158. sknetwork/ranking/tests/__init__.py +1 -0
  159. sknetwork/ranking/tests/test_API.py +32 -0
  160. sknetwork/ranking/tests/test_betweenness.py +38 -0
  161. sknetwork/ranking/tests/test_closeness.py +30 -0
  162. sknetwork/ranking/tests/test_hits.py +20 -0
  163. sknetwork/ranking/tests/test_pagerank.py +62 -0
  164. sknetwork/ranking/tests/test_postprocess.py +26 -0
  165. sknetwork/regression/__init__.py +4 -0
  166. sknetwork/regression/base.py +61 -0
  167. sknetwork/regression/diffusion.py +210 -0
  168. sknetwork/regression/tests/__init__.py +1 -0
  169. sknetwork/regression/tests/test_API.py +32 -0
  170. sknetwork/regression/tests/test_diffusion.py +56 -0
  171. sknetwork/sknetwork.py +3 -0
  172. sknetwork/test_base.py +35 -0
  173. sknetwork/test_log.py +15 -0
  174. sknetwork/topology/__init__.py +8 -0
  175. sknetwork/topology/cliques.cpython-312-darwin.so +0 -0
  176. sknetwork/topology/cliques.pyx +149 -0
  177. sknetwork/topology/core.cpython-312-darwin.so +0 -0
  178. sknetwork/topology/core.pyx +90 -0
  179. sknetwork/topology/cycles.py +243 -0
  180. sknetwork/topology/minheap.cpython-312-darwin.so +0 -0
  181. sknetwork/topology/minheap.pxd +20 -0
  182. sknetwork/topology/minheap.pyx +109 -0
  183. sknetwork/topology/structure.py +194 -0
  184. sknetwork/topology/tests/__init__.py +1 -0
  185. sknetwork/topology/tests/test_cliques.py +28 -0
  186. sknetwork/topology/tests/test_core.py +19 -0
  187. sknetwork/topology/tests/test_cycles.py +65 -0
  188. sknetwork/topology/tests/test_structure.py +85 -0
  189. sknetwork/topology/tests/test_triangles.py +38 -0
  190. sknetwork/topology/tests/test_wl.py +72 -0
  191. sknetwork/topology/triangles.cpython-312-darwin.so +0 -0
  192. sknetwork/topology/triangles.pyx +151 -0
  193. sknetwork/topology/weisfeiler_lehman.py +133 -0
  194. sknetwork/topology/weisfeiler_lehman_core.cpython-312-darwin.so +0 -0
  195. sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
  196. sknetwork/utils/__init__.py +7 -0
  197. sknetwork/utils/check.py +355 -0
  198. sknetwork/utils/format.py +221 -0
  199. sknetwork/utils/membership.py +82 -0
  200. sknetwork/utils/neighbors.py +115 -0
  201. sknetwork/utils/tests/__init__.py +1 -0
  202. sknetwork/utils/tests/test_check.py +190 -0
  203. sknetwork/utils/tests/test_format.py +63 -0
  204. sknetwork/utils/tests/test_membership.py +24 -0
  205. sknetwork/utils/tests/test_neighbors.py +41 -0
  206. sknetwork/utils/tests/test_tfidf.py +18 -0
  207. sknetwork/utils/tests/test_values.py +66 -0
  208. sknetwork/utils/tfidf.py +37 -0
  209. sknetwork/utils/values.py +76 -0
  210. sknetwork/visualization/__init__.py +4 -0
  211. sknetwork/visualization/colors.py +34 -0
  212. sknetwork/visualization/dendrograms.py +277 -0
  213. sknetwork/visualization/graphs.py +1039 -0
  214. sknetwork/visualization/tests/__init__.py +1 -0
  215. sknetwork/visualization/tests/test_dendrograms.py +53 -0
  216. sknetwork/visualization/tests/test_graphs.py +176 -0
@@ -0,0 +1,110 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in May 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ @author: Thomas Bonald <bonald@enst.fr>
7
+ """
8
+ from typing import Union, Optional
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.linalg.ppr_solver import get_pagerank
14
+ from sknetwork.ranking.base import BaseRanking
15
+ from sknetwork.utils.check import check_damping_factor
16
+ from sknetwork.utils.format import get_adjacency_values
17
+
18
+
19
+ class PageRank(BaseRanking):
20
+ """PageRank of each node, corresponding to its frequency of visit by a random walk.
21
+
22
+ The random walk restarts with some fixed probability. The restart distribution can be personalized by the user.
23
+ This variant is known as Personalized PageRank.
24
+
25
+ Parameters
26
+ ----------
27
+ damping_factor : float
28
+ Probability to continue the random walk.
29
+ solver : str
30
+ * ``'piteration'``, use power iteration for a given number of iterations.
31
+ * ``'diteration'``, use asynchronous parallel diffusion for a given number of iterations.
32
+ * ``'lanczos'``, use eigensolver with a given tolerance.
33
+ * ``'bicgstab'``, use Biconjugate Gradient Stabilized method for a given tolerance.
34
+ * ``'RH'``, use a Ruffini-Horner polynomial evaluation.
35
+ * ``'push'``, use push-based algorithm for a given tolerance
36
+ n_iter : int
37
+ Number of iterations for some solvers.
38
+ tol : float
39
+ Tolerance for the convergence of some solvers.
40
+
41
+ Attributes
42
+ ----------
43
+ scores_ : np.ndarray
44
+ PageRank score of each node.
45
+ scores_row_: np.ndarray
46
+ Scores of rows, for bipartite graphs.
47
+ scores_col_: np.ndarray
48
+ Scores of columns, for bipartite graphs.
49
+
50
+ Example
51
+ -------
52
+ >>> from sknetwork.ranking import PageRank
53
+ >>> from sknetwork.data import house
54
+ >>> pagerank = PageRank()
55
+ >>> adjacency = house()
56
+ >>> weights = {0: 1}
57
+ >>> scores = pagerank.fit_predict(adjacency, weights)
58
+ >>> np.round(scores, 2)
59
+ array([0.29, 0.24, 0.12, 0.12, 0.24])
60
+
61
+ References
62
+ ----------
63
+ Page, L., Brin, S., Motwani, R., & Winograd, T. (1999). The PageRank citation ranking: Bringing order to the web.
64
+ Stanford InfoLab.
65
+ """
66
+ def __init__(self, damping_factor: float = 0.85, solver: str = 'piteration', n_iter: int = 10, tol: float = 1e-6):
67
+ super(PageRank, self).__init__()
68
+ check_damping_factor(damping_factor)
69
+ self.damping_factor = damping_factor
70
+ self.solver = solver
71
+ self.n_iter = n_iter
72
+ self.tol = tol
73
+ self.bipartite = None
74
+
75
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray],
76
+ weights: Optional[Union[dict, np.ndarray]] = None, weights_row: Optional[Union[dict, np.ndarray]] = None,
77
+ weights_col: Optional[Union[dict, np.ndarray]] = None, force_bipartite: bool = False) -> 'PageRank':
78
+ """Compute the pagerank of each node.
79
+
80
+ Parameters
81
+ ----------
82
+ input_matrix : sparse.csr_matrix, np.ndarray
83
+ Adjacency matrix or biadjacency matrix of the graph.
84
+ weights : np.ndarray, dict
85
+ Weights of the restart distribution for Personalized PageRank.
86
+ If ``None``, the uniform distribution is used (no personalization, default).
87
+ weights_row : np.ndarray, dict
88
+ Weights on rows of the restart distribution for Personalized PageRank.
89
+ Used for bipartite graphs.
90
+ If both weights_row and weights_col are ``None`` (default), the uniform distribution on rows is used.
91
+ weights_col : np.ndarray, dict
92
+ Weights on columns of the restart distribution for Personalized PageRank.
93
+ Used for bipartite graphs.
94
+ force_bipartite : bool
95
+ If ``True``, consider the input matrix as the biadjacency matrix of a bipartite graph.
96
+ Returns
97
+ -------
98
+ self: :class:`PageRank`
99
+ """
100
+ adjacency, values, self.bipartite = get_adjacency_values(input_matrix, force_bipartite=force_bipartite,
101
+ values=weights,
102
+ values_row=weights_row,
103
+ values_col=weights_col,
104
+ default_value=0,
105
+ which='probs')
106
+ self.scores_ = get_pagerank(adjacency, values, damping_factor=self.damping_factor, n_iter=self.n_iter,
107
+ solver=self.solver, tol=self.tol)
108
+ if self.bipartite:
109
+ self._split_vars(input_matrix.shape)
110
+ return self
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on May 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+ import numpy as np
8
+
9
+
10
+ def top_k(scores: np.ndarray, k: int = 1, sort: bool = True):
11
+ """Return the indices of the k elements of highest values.
12
+
13
+ Parameters
14
+ ----------
15
+ scores : np.ndarray
16
+ Array of values.
17
+ k : int
18
+ Number of elements to return.
19
+ sort : bool
20
+ If ``True``, sort the indices in decreasing order of value (element of highest value first).
21
+
22
+ Examples
23
+ --------
24
+ >>> top_k([1, 3, 2], k=2)
25
+ array([1, 2])
26
+ """
27
+ scores = np.array(scores)
28
+ if k >= len(scores):
29
+ if sort:
30
+ index = np.argsort(-scores)
31
+ else:
32
+ index = np.arange(scores)
33
+ else:
34
+ index = np.argpartition(-scores, k)[:k]
35
+ if sort:
36
+ index = index[np.argsort(-scores[index])]
37
+ return index
@@ -0,0 +1 @@
1
+ """tests for ranking"""
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """tests for ranking API"""
4
+ import unittest
5
+
6
+ from sknetwork.data.test_graphs import *
7
+ from sknetwork.ranking import *
8
+
9
+
10
+ class TestPageRank(unittest.TestCase):
11
+
12
+ def test_basic(self):
13
+ methods = [PageRank(), Closeness(), HITS(), Katz()]
14
+ for adjacency in [test_graph(), test_digraph()]:
15
+ n = adjacency.shape[0]
16
+ for method in methods:
17
+ score = method.fit_predict(adjacency)
18
+ self.assertEqual(score.shape, (n, ))
19
+ self.assertTrue(min(score) >= 0)
20
+
21
+ def test_bipartite(self):
22
+ biadjacency = test_bigraph()
23
+ n_row, n_col = biadjacency.shape
24
+
25
+ methods = [PageRank(), HITS(), Katz()]
26
+ for method in methods:
27
+ method.fit(biadjacency)
28
+ scores_row = method.scores_row_
29
+ scores_col = method.scores_col_
30
+
31
+ self.assertEqual(scores_row.shape, (n_row,))
32
+ self.assertEqual(scores_col.shape, (n_col,))
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """tests for betweenness.py"""
4
+
5
+ import unittest
6
+ import numpy as np
7
+
8
+ from sknetwork.ranking.betweenness import Betweenness
9
+ from sknetwork.data.test_graphs import test_graph, test_disconnected_graph
10
+ from sknetwork.data.toy_graphs import bow_tie, star_wars
11
+
12
+
13
+ class TestBetweenness(unittest.TestCase):
14
+
15
+ def test_basic(self):
16
+ adjacency = test_graph()
17
+ betweenness = Betweenness()
18
+ scores = betweenness.fit_predict(adjacency)
19
+ self.assertEqual(len(scores), adjacency.shape[0])
20
+
21
+ def test_bowtie(self):
22
+ adjacency = bow_tie()
23
+ betweenness = Betweenness()
24
+ scores = betweenness.fit_predict(adjacency)
25
+ self.assertEqual(np.sum(scores > 0), 1)
26
+
27
+ def test_disconnected(self):
28
+ adjacency = test_disconnected_graph()
29
+ betweenness = Betweenness()
30
+ with self.assertRaises(ValueError):
31
+ betweenness.fit(adjacency)
32
+
33
+ def test_bipartite(self):
34
+ adjacency = star_wars()
35
+ betweenness = Betweenness()
36
+
37
+ with self.assertRaises(ValueError):
38
+ betweenness.fit_predict(adjacency)
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """tests for closeness.py"""
4
+
5
+ import unittest
6
+
7
+ from sknetwork.data.test_graphs import *
8
+ from sknetwork.ranking.closeness import Closeness
9
+
10
+
11
+ class TestDiffusion(unittest.TestCase):
12
+
13
+ def test_params(self):
14
+ with self.assertRaises(ValueError):
15
+ adjacency = test_graph()
16
+ Closeness(method='toto').fit(adjacency)
17
+
18
+ def test_parallel(self):
19
+ adjacency = test_graph()
20
+ n = adjacency.shape[0]
21
+
22
+ closeness = Closeness(method='approximate')
23
+ scores = closeness.fit_predict(adjacency)
24
+ self.assertEqual(scores.shape, (n,))
25
+
26
+ def test_disconnected(self):
27
+ adjacency = test_disconnected_graph()
28
+ closeness = Closeness()
29
+ with self.assertRaises(ValueError):
30
+ closeness.fit(adjacency)
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for his.py"""
4
+
5
+ import unittest
6
+
7
+ from sknetwork.data.test_graphs import test_bigraph
8
+ from sknetwork.ranking import HITS
9
+
10
+
11
+ class TestHITS(unittest.TestCase):
12
+
13
+ def test_keywords(self):
14
+ biadjacency = test_bigraph()
15
+ n_row, n_col = biadjacency.shape
16
+
17
+ hits = HITS()
18
+ hits.fit(biadjacency)
19
+ self.assertEqual(hits.scores_row_.shape, (n_row,))
20
+ self.assertEqual(hits.scores_col_.shape, (n_col,))
@@ -0,0 +1,62 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """tests for pagerank.py"""
4
+
5
+ import unittest
6
+
7
+ import numpy as np
8
+
9
+ from sknetwork.data.models import cyclic_digraph
10
+ from sknetwork.data.test_graphs import test_graph, test_digraph, test_bigraph
11
+ from sknetwork.ranking.pagerank import PageRank
12
+
13
+
14
+ class TestPageRank(unittest.TestCase):
15
+
16
+ def setUp(self) -> None:
17
+ """Cycle graph for tests."""
18
+ self.n = 5
19
+ self.adjacency = cyclic_digraph(self.n)
20
+ self.truth = np.ones(self.n) / self.n
21
+
22
+ def test_params(self):
23
+ with self.assertRaises(ValueError):
24
+ PageRank(damping_factor=1789)
25
+
26
+ def test_solvers(self):
27
+ for solver in ['piteration', 'lanczos', 'bicgstab', 'RH']:
28
+ pagerank = PageRank(solver=solver)
29
+ scores = pagerank.fit_predict(self.adjacency)
30
+ self.assertAlmostEqual(0, np.linalg.norm(scores - self.truth))
31
+ with self.assertRaises(ValueError):
32
+ PageRank(solver='toto').fit_predict(self.adjacency)
33
+
34
+ def test_seeding(self):
35
+ pagerank = PageRank()
36
+ seeds_array = np.zeros(self.n)
37
+ seeds_array[0] = 1.
38
+ seeds_dict = {0: 1}
39
+
40
+ scores1 = pagerank.fit_predict(self.adjacency, seeds_array)
41
+ scores2 = pagerank.fit_predict(self.adjacency, seeds_dict)
42
+ self.assertAlmostEqual(np.linalg.norm(scores1 - scores2), 0.)
43
+
44
+ def test_input(self):
45
+ pagerank = PageRank()
46
+ scores = pagerank.fit_predict(self.adjacency, force_bipartite=True)
47
+ self.assertEqual(len(scores), len(pagerank.scores_col_))
48
+
49
+ def test_damping(self):
50
+ pagerank = PageRank(damping_factor=0.99)
51
+ scores = pagerank.fit_predict(self.adjacency)
52
+ self.assertAlmostEqual(np.linalg.norm(scores - self.truth), 0.)
53
+
54
+ pagerank = PageRank(damping_factor=0.01)
55
+ scores = pagerank.fit_predict(self.adjacency)
56
+ self.assertAlmostEqual(np.linalg.norm(scores - self.truth), 0.)
57
+
58
+ def test_bigraph(self):
59
+ pagerank = PageRank()
60
+ for adjacency in [test_graph(), test_digraph(), test_bigraph()]:
61
+ pagerank.fit(adjacency, weights_col={0: 1})
62
+ self.assertAlmostEqual(np.linalg.norm(pagerank.scores_col_ - pagerank.predict(columns=True)), 0.)
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """tests for postprocessing"""
4
+
5
+ import unittest
6
+
7
+ import numpy as np
8
+
9
+ from sknetwork.ranking.postprocess import top_k
10
+
11
+
12
+ class TestPostprocessing(unittest.TestCase):
13
+
14
+ def test_top_k(self):
15
+ scores = np.arange(10)
16
+ index = top_k(scores, 3)
17
+ self.assertTrue(set(index) == {7, 8, 9})
18
+ index = top_k(scores, 10)
19
+ self.assertTrue(len(index) == 10)
20
+ index = top_k(scores, 20)
21
+ self.assertTrue(len(index) == 10)
22
+ scores = [3, 1, 6, 2]
23
+ index = top_k(scores, 2)
24
+ self.assertTrue(set(index) == {0, 2})
25
+ index = top_k(scores, 2, sort=True)
26
+ self.assertTrue(list(index) == [2, 0])
@@ -0,0 +1,4 @@
1
+ """regression module"""
2
+ from sknetwork.regression.base import BaseRegressor
3
+ from sknetwork.regression.diffusion import Diffusion, Dirichlet
4
+
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on April 2022
5
+ @author: Thomas Bonald <bonald@enst.fr>
6
+ """
7
+ from abc import ABC
8
+
9
+ import numpy as np
10
+
11
+ from sknetwork.base import Algorithm
12
+
13
+
14
+ class BaseRegressor(Algorithm, ABC):
15
+ """Base class for regression algorithms.
16
+
17
+ Attributes
18
+ ----------
19
+ values_ : np.ndarray
20
+ Value of each node.
21
+ values_row_: np.ndarray
22
+ Values of rows, for bipartite graphs.
23
+ values_col_: np.ndarray
24
+ Values of columns, for bipartite graphs.
25
+ """
26
+ def __init__(self):
27
+ self.values_ = None
28
+
29
+ def predict(self, columns: bool = False) -> np.ndarray:
30
+ """Return the values predicted by the algorithm.
31
+
32
+ Parameters
33
+ ----------
34
+ columns : bool
35
+ If ``True``, return the prediction for columns.
36
+
37
+ Returns
38
+ -------
39
+ values : np.ndarray
40
+ Values.
41
+ """
42
+ if columns:
43
+ return self.values_col_
44
+ return self.values_
45
+
46
+ def fit_predict(self, *args, **kwargs) -> np.ndarray:
47
+ """Fit algorithm to data and return the values. Same parameters as the ``fit`` method.
48
+
49
+ Returns
50
+ -------
51
+ values : np.ndarray
52
+ Values.
53
+ """
54
+ self.fit(*args, **kwargs)
55
+ return self.values_
56
+
57
+ def _split_vars(self, shape):
58
+ n_row = shape[0]
59
+ self.values_row_ = self.values_[:n_row]
60
+ self.values_col_ = self.values_[n_row:]
61
+ self.values_ = self.values_row_
@@ -0,0 +1,210 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in July 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ @author: Thomas Bonald <thomas.bonald@telecom-paris.fr>
7
+ """
8
+ from typing import Union, Optional, Tuple
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.linalg.normalizer import normalize
14
+ from sknetwork.regression.base import BaseRegressor
15
+ from sknetwork.utils import get_adjacency_values, get_degrees
16
+
17
+
18
+ def init_temperatures(seeds: np.ndarray, init: Optional[float]) -> Tuple[np.ndarray, np.ndarray]:
19
+ """Init temperatures."""
20
+ n = len(seeds)
21
+ border = (seeds >= 0)
22
+ if init is None:
23
+ temperatures = seeds[border].mean() * np.ones(n)
24
+ else:
25
+ temperatures = init * np.ones(n)
26
+ temperatures[border] = seeds[border]
27
+ return temperatures, border
28
+
29
+
30
+ class Diffusion(BaseRegressor):
31
+ """Regression by diffusion along the edges, given the temperatures of some seed nodes (heat equation).
32
+
33
+ The row vector of tempreatures :math:`T` evolves like:
34
+
35
+ :math:`T \\gets (1-\\alpha) T + \\alpha PT`
36
+
37
+ where :math:`\\alpha` is the damping factor and :math:`P` is the transition matrix of the random walk in the graph.
38
+
39
+ All values are updated, including those of seed nodes (free diffusion).
40
+ See ``Dirichlet`` for diffusion with boundary constraints.
41
+
42
+ Parameters
43
+ ----------
44
+ n_iter : int
45
+ Number of iterations of the diffusion (must be positive).
46
+ damping_factor : float
47
+ Damping factor.
48
+
49
+ Attributes
50
+ ----------
51
+ values_ : np.ndarray
52
+ Value of each node (= temperature).
53
+ values_row_: np.ndarray
54
+ Values of rows, for bipartite graphs.
55
+ values_col_: np.ndarray
56
+ Values of columns, for bipartite graphs.
57
+ Example
58
+ -------
59
+ >>> from sknetwork.data import house
60
+ >>> diffusion = Diffusion(n_iter=1)
61
+ >>> adjacency = house()
62
+ >>> values = {0: 1, 2: 0}
63
+ >>> values_pred = diffusion.fit_predict(adjacency, values)
64
+ >>> np.round(values_pred, 1)
65
+ array([0.8, 0.5, 0.2, 0.4, 0.6])
66
+
67
+ References
68
+ ----------
69
+ Chung, F. (2007). The heat kernel as the pagerank of a graph. Proceedings of the National Academy of Sciences.
70
+ """
71
+ def __init__(self, n_iter: int = 3, damping_factor: float = 0.5):
72
+ super(Diffusion, self).__init__()
73
+
74
+ if n_iter <= 0:
75
+ raise ValueError('The number of iterations must be positive.')
76
+ else:
77
+ self.n_iter = n_iter
78
+ self.damping_factor = damping_factor
79
+ self.bipartite = None
80
+
81
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray],
82
+ values: Optional[Union[dict, list, np.ndarray]] = None,
83
+ values_row: Optional[Union[dict, list, np.ndarray]] = None,
84
+ values_col: Optional[Union[dict, list, np.ndarray]] = None, init: Optional[float] = None,
85
+ force_bipartite: bool = False) -> 'Diffusion':
86
+ """Compute the diffusion (temperatures at equilibrium).
87
+
88
+ Parameters
89
+ ----------
90
+ input_matrix :
91
+ Adjacency matrix or biadjacency matrix of the graph.
92
+ values :
93
+ Temperatures of nodes in initial state (dictionary or vector). Negative temperatures ignored.
94
+ values_row, values_col :
95
+ Temperatures of rows and columns for bipartite graphs. Negative temperatures ignored.
96
+ init :
97
+ Temperature of nodes in initial state.
98
+ If ``None``, use the average temperature of seed nodes (default).
99
+ force_bipartite :
100
+ If ``True``, consider the input matrix as a biadjacency matrix (default = ``False``).
101
+
102
+ Returns
103
+ -------
104
+ self: :class:`Diffusion`
105
+ """
106
+ adjacency, values, self.bipartite = get_adjacency_values(input_matrix, force_bipartite=force_bipartite,
107
+ values=values,
108
+ values_row=values_row,
109
+ values_col=values_col)
110
+ values, _ = init_temperatures(values, init)
111
+ diffusion = normalize(adjacency.T.tocsr())
112
+ degrees = get_degrees(diffusion)
113
+ diag = sparse.diags((degrees == 0).astype(int)).tocsr()
114
+ diffusion += diag
115
+
116
+ diffusion = (1 - self.damping_factor) * sparse.identity(len(degrees)).tocsr() + self.damping_factor * diffusion
117
+
118
+ for i in range(self.n_iter):
119
+ values = diffusion.dot(values)
120
+
121
+ self.values_ = values
122
+ if self.bipartite:
123
+ self._split_vars(input_matrix.shape)
124
+
125
+ return self
126
+
127
+
128
+ class Dirichlet(BaseRegressor):
129
+ """Regression by the Dirichlet problem (heat diffusion with boundary constraints).
130
+
131
+ The temperatures of some seed nodes are fixed. The temperatures of other nodes are computed.
132
+
133
+ Parameters
134
+ ----------
135
+ n_iter : int
136
+ Number of iterations of the diffusion (must be positive).
137
+
138
+ Attributes
139
+ ----------
140
+ values_ : np.ndarray
141
+ Value of each node (= temperature).
142
+ values_row_: np.ndarray
143
+ Values of rows, for bipartite graphs.
144
+ values_col_: np.ndarray
145
+ Values of columns, for bipartite graphs.
146
+ Example
147
+ -------
148
+ >>> from sknetwork.regression import Dirichlet
149
+ >>> from sknetwork.data import house
150
+ >>> dirichlet = Dirichlet()
151
+ >>> adjacency = house()
152
+ >>> values = {0: 1, 2: 0}
153
+ >>> values_pred = dirichlet.fit_predict(adjacency, values)
154
+ >>> np.round(values_pred, 2)
155
+ array([1. , 0.54, 0. , 0.31, 0.62])
156
+
157
+ References
158
+ ----------
159
+ Chung, F. (2007). The heat kernel as the pagerank of a graph. Proceedings of the National Academy of Sciences.
160
+ """
161
+ def __init__(self, n_iter: int = 10):
162
+ super(Dirichlet, self).__init__()
163
+
164
+ if n_iter <= 0:
165
+ raise ValueError('The number of iterations must be positive.')
166
+ else:
167
+ self.n_iter = n_iter
168
+ self.bipartite = None
169
+
170
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray],
171
+ values: Optional[Union[dict, list, np.ndarray]] = None,
172
+ values_row: Optional[Union[dict, list, np.ndarray]] = None,
173
+ values_col: Optional[Union[dict, list, np.ndarray]] = None, init: Optional[float] = None,
174
+ force_bipartite: bool = False) -> 'Dirichlet':
175
+ """Compute the solution to the Dirichlet problem (temperatures at equilibrium).
176
+
177
+ Parameters
178
+ ----------
179
+ input_matrix :
180
+ Adjacency matrix or biadjacency matrix of the graph.
181
+ values :
182
+ Temperatures of nodes (dictionary or vector). Negative temperatures ignored.
183
+ values_row, values_col :
184
+ Temperatures of rows and columns for bipartite graphs. Negative temperatures ignored.
185
+ init :
186
+ Temperature of nodes in initial state.
187
+ If ``None``, use the average temperature of seed nodes (default).
188
+ force_bipartite :
189
+ If ``True``, consider the input matrix as a biadjacency matrix (default = ``False``).
190
+
191
+ Returns
192
+ -------
193
+ self: :class:`Dirichlet`
194
+ """
195
+ adjacency, values, self.bipartite = get_adjacency_values(input_matrix, force_bipartite=force_bipartite,
196
+ values=values,
197
+ values_row=values_row,
198
+ values_col=values_col)
199
+ temperatures, border = init_temperatures(values, init)
200
+ values = temperatures.copy()
201
+ diffusion = normalize(adjacency)
202
+ for i in range(self.n_iter):
203
+ values = diffusion.dot(values)
204
+ values[border] = temperatures[border]
205
+
206
+ self.values_ = values
207
+ if self.bipartite:
208
+ self._split_vars(input_matrix.shape)
209
+
210
+ return self
@@ -0,0 +1 @@
1
+ """tests for regression"""
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """tests for regression API"""
4
+ import unittest
5
+
6
+ from sknetwork.data.test_graphs import test_bigraph, test_graph, test_digraph
7
+ from sknetwork.regression import *
8
+
9
+
10
+ class TestAPI(unittest.TestCase):
11
+
12
+ def test_basic(self):
13
+ methods = [Diffusion(), Dirichlet()]
14
+ for adjacency in [test_graph(), test_digraph()]:
15
+ n = adjacency.shape[0]
16
+ for method in methods:
17
+ score = method.fit_predict(adjacency)
18
+ self.assertEqual(score.shape, (n, ))
19
+ self.assertTrue(min(score) >= 0)
20
+
21
+ def test_bipartite(self):
22
+ biadjacency = test_bigraph()
23
+ n_row, n_col = biadjacency.shape
24
+
25
+ methods = [Diffusion(), Dirichlet()]
26
+ for method in methods:
27
+ method.fit(biadjacency)
28
+ values_row = method.values_row_
29
+ values_col = method.values_col_
30
+
31
+ self.assertEqual(values_row.shape, (n_row,))
32
+ self.assertEqual(values_col.shape, (n_col,))