scikit-network 0.33.4__cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (229) hide show
  1. scikit_network-0.33.4.dist-info/METADATA +122 -0
  2. scikit_network-0.33.4.dist-info/RECORD +229 -0
  3. scikit_network-0.33.4.dist-info/WHEEL +6 -0
  4. scikit_network-0.33.4.dist-info/licenses/AUTHORS.rst +43 -0
  5. scikit_network-0.33.4.dist-info/licenses/LICENSE +34 -0
  6. scikit_network-0.33.4.dist-info/top_level.txt +1 -0
  7. scikit_network.libs/libgomp-a34b3233.so.1.0.0 +0 -0
  8. sknetwork/__init__.py +21 -0
  9. sknetwork/base.py +67 -0
  10. sknetwork/classification/__init__.py +8 -0
  11. sknetwork/classification/base.py +138 -0
  12. sknetwork/classification/base_rank.py +129 -0
  13. sknetwork/classification/diffusion.py +127 -0
  14. sknetwork/classification/knn.py +131 -0
  15. sknetwork/classification/metrics.py +205 -0
  16. sknetwork/classification/pagerank.py +58 -0
  17. sknetwork/classification/propagation.py +144 -0
  18. sknetwork/classification/tests/__init__.py +1 -0
  19. sknetwork/classification/tests/test_API.py +30 -0
  20. sknetwork/classification/tests/test_diffusion.py +77 -0
  21. sknetwork/classification/tests/test_knn.py +23 -0
  22. sknetwork/classification/tests/test_metrics.py +53 -0
  23. sknetwork/classification/tests/test_pagerank.py +20 -0
  24. sknetwork/classification/tests/test_propagation.py +24 -0
  25. sknetwork/classification/vote.cpp +27593 -0
  26. sknetwork/classification/vote.cpython-312-x86_64-linux-gnu.so +0 -0
  27. sknetwork/classification/vote.pyx +56 -0
  28. sknetwork/clustering/__init__.py +8 -0
  29. sknetwork/clustering/base.py +168 -0
  30. sknetwork/clustering/kcenters.py +251 -0
  31. sknetwork/clustering/leiden.py +238 -0
  32. sknetwork/clustering/leiden_core.cpp +31928 -0
  33. sknetwork/clustering/leiden_core.cpython-312-x86_64-linux-gnu.so +0 -0
  34. sknetwork/clustering/leiden_core.pyx +124 -0
  35. sknetwork/clustering/louvain.py +282 -0
  36. sknetwork/clustering/louvain_core.cpp +31573 -0
  37. sknetwork/clustering/louvain_core.cpython-312-x86_64-linux-gnu.so +0 -0
  38. sknetwork/clustering/louvain_core.pyx +124 -0
  39. sknetwork/clustering/metrics.py +91 -0
  40. sknetwork/clustering/postprocess.py +66 -0
  41. sknetwork/clustering/propagation_clustering.py +100 -0
  42. sknetwork/clustering/tests/__init__.py +1 -0
  43. sknetwork/clustering/tests/test_API.py +38 -0
  44. sknetwork/clustering/tests/test_kcenters.py +60 -0
  45. sknetwork/clustering/tests/test_leiden.py +34 -0
  46. sknetwork/clustering/tests/test_louvain.py +135 -0
  47. sknetwork/clustering/tests/test_metrics.py +50 -0
  48. sknetwork/clustering/tests/test_postprocess.py +39 -0
  49. sknetwork/data/__init__.py +6 -0
  50. sknetwork/data/base.py +33 -0
  51. sknetwork/data/load.py +292 -0
  52. sknetwork/data/models.py +459 -0
  53. sknetwork/data/parse.py +644 -0
  54. sknetwork/data/test_graphs.py +93 -0
  55. sknetwork/data/tests/__init__.py +1 -0
  56. sknetwork/data/tests/test_API.py +30 -0
  57. sknetwork/data/tests/test_base.py +14 -0
  58. sknetwork/data/tests/test_load.py +61 -0
  59. sknetwork/data/tests/test_models.py +52 -0
  60. sknetwork/data/tests/test_parse.py +250 -0
  61. sknetwork/data/tests/test_test_graphs.py +29 -0
  62. sknetwork/data/tests/test_toy_graphs.py +68 -0
  63. sknetwork/data/timeout.py +38 -0
  64. sknetwork/data/toy_graphs.py +611 -0
  65. sknetwork/embedding/__init__.py +8 -0
  66. sknetwork/embedding/base.py +90 -0
  67. sknetwork/embedding/force_atlas.py +198 -0
  68. sknetwork/embedding/louvain_embedding.py +142 -0
  69. sknetwork/embedding/random_projection.py +131 -0
  70. sknetwork/embedding/spectral.py +137 -0
  71. sknetwork/embedding/spring.py +198 -0
  72. sknetwork/embedding/svd.py +351 -0
  73. sknetwork/embedding/tests/__init__.py +1 -0
  74. sknetwork/embedding/tests/test_API.py +49 -0
  75. sknetwork/embedding/tests/test_force_atlas.py +35 -0
  76. sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
  77. sknetwork/embedding/tests/test_random_projection.py +28 -0
  78. sknetwork/embedding/tests/test_spectral.py +81 -0
  79. sknetwork/embedding/tests/test_spring.py +50 -0
  80. sknetwork/embedding/tests/test_svd.py +43 -0
  81. sknetwork/gnn/__init__.py +10 -0
  82. sknetwork/gnn/activation.py +117 -0
  83. sknetwork/gnn/base.py +181 -0
  84. sknetwork/gnn/base_activation.py +90 -0
  85. sknetwork/gnn/base_layer.py +109 -0
  86. sknetwork/gnn/gnn_classifier.py +305 -0
  87. sknetwork/gnn/layer.py +153 -0
  88. sknetwork/gnn/loss.py +180 -0
  89. sknetwork/gnn/neighbor_sampler.py +65 -0
  90. sknetwork/gnn/optimizer.py +164 -0
  91. sknetwork/gnn/tests/__init__.py +1 -0
  92. sknetwork/gnn/tests/test_activation.py +56 -0
  93. sknetwork/gnn/tests/test_base.py +75 -0
  94. sknetwork/gnn/tests/test_base_layer.py +37 -0
  95. sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
  96. sknetwork/gnn/tests/test_layers.py +80 -0
  97. sknetwork/gnn/tests/test_loss.py +33 -0
  98. sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
  99. sknetwork/gnn/tests/test_optimizer.py +43 -0
  100. sknetwork/gnn/tests/test_utils.py +41 -0
  101. sknetwork/gnn/utils.py +127 -0
  102. sknetwork/hierarchy/__init__.py +6 -0
  103. sknetwork/hierarchy/base.py +90 -0
  104. sknetwork/hierarchy/louvain_hierarchy.py +260 -0
  105. sknetwork/hierarchy/metrics.py +234 -0
  106. sknetwork/hierarchy/paris.cpp +37877 -0
  107. sknetwork/hierarchy/paris.cpython-312-x86_64-linux-gnu.so +0 -0
  108. sknetwork/hierarchy/paris.pyx +310 -0
  109. sknetwork/hierarchy/postprocess.py +350 -0
  110. sknetwork/hierarchy/tests/__init__.py +1 -0
  111. sknetwork/hierarchy/tests/test_API.py +24 -0
  112. sknetwork/hierarchy/tests/test_algos.py +34 -0
  113. sknetwork/hierarchy/tests/test_metrics.py +62 -0
  114. sknetwork/hierarchy/tests/test_postprocess.py +57 -0
  115. sknetwork/linalg/__init__.py +9 -0
  116. sknetwork/linalg/basics.py +37 -0
  117. sknetwork/linalg/diteration.cpp +27409 -0
  118. sknetwork/linalg/diteration.cpython-312-x86_64-linux-gnu.so +0 -0
  119. sknetwork/linalg/diteration.pyx +47 -0
  120. sknetwork/linalg/eig_solver.py +93 -0
  121. sknetwork/linalg/laplacian.py +15 -0
  122. sknetwork/linalg/normalizer.py +86 -0
  123. sknetwork/linalg/operators.py +225 -0
  124. sknetwork/linalg/polynome.py +76 -0
  125. sknetwork/linalg/ppr_solver.py +170 -0
  126. sknetwork/linalg/push.cpp +31081 -0
  127. sknetwork/linalg/push.cpython-312-x86_64-linux-gnu.so +0 -0
  128. sknetwork/linalg/push.pyx +71 -0
  129. sknetwork/linalg/sparse_lowrank.py +142 -0
  130. sknetwork/linalg/svd_solver.py +91 -0
  131. sknetwork/linalg/tests/__init__.py +1 -0
  132. sknetwork/linalg/tests/test_eig.py +44 -0
  133. sknetwork/linalg/tests/test_laplacian.py +18 -0
  134. sknetwork/linalg/tests/test_normalization.py +34 -0
  135. sknetwork/linalg/tests/test_operators.py +66 -0
  136. sknetwork/linalg/tests/test_polynome.py +38 -0
  137. sknetwork/linalg/tests/test_ppr.py +50 -0
  138. sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
  139. sknetwork/linalg/tests/test_svd.py +38 -0
  140. sknetwork/linkpred/__init__.py +2 -0
  141. sknetwork/linkpred/base.py +46 -0
  142. sknetwork/linkpred/nn.py +126 -0
  143. sknetwork/linkpred/tests/__init__.py +1 -0
  144. sknetwork/linkpred/tests/test_nn.py +26 -0
  145. sknetwork/log.py +19 -0
  146. sknetwork/path/__init__.py +5 -0
  147. sknetwork/path/dag.py +54 -0
  148. sknetwork/path/distances.py +98 -0
  149. sknetwork/path/search.py +31 -0
  150. sknetwork/path/shortest_path.py +61 -0
  151. sknetwork/path/tests/__init__.py +1 -0
  152. sknetwork/path/tests/test_dag.py +37 -0
  153. sknetwork/path/tests/test_distances.py +62 -0
  154. sknetwork/path/tests/test_search.py +40 -0
  155. sknetwork/path/tests/test_shortest_path.py +40 -0
  156. sknetwork/ranking/__init__.py +8 -0
  157. sknetwork/ranking/base.py +57 -0
  158. sknetwork/ranking/betweenness.cpp +9716 -0
  159. sknetwork/ranking/betweenness.cpython-312-x86_64-linux-gnu.so +0 -0
  160. sknetwork/ranking/betweenness.pyx +97 -0
  161. sknetwork/ranking/closeness.py +92 -0
  162. sknetwork/ranking/hits.py +90 -0
  163. sknetwork/ranking/katz.py +79 -0
  164. sknetwork/ranking/pagerank.py +106 -0
  165. sknetwork/ranking/postprocess.py +37 -0
  166. sknetwork/ranking/tests/__init__.py +1 -0
  167. sknetwork/ranking/tests/test_API.py +32 -0
  168. sknetwork/ranking/tests/test_betweenness.py +38 -0
  169. sknetwork/ranking/tests/test_closeness.py +30 -0
  170. sknetwork/ranking/tests/test_hits.py +20 -0
  171. sknetwork/ranking/tests/test_pagerank.py +62 -0
  172. sknetwork/ranking/tests/test_postprocess.py +26 -0
  173. sknetwork/regression/__init__.py +4 -0
  174. sknetwork/regression/base.py +57 -0
  175. sknetwork/regression/diffusion.py +204 -0
  176. sknetwork/regression/tests/__init__.py +1 -0
  177. sknetwork/regression/tests/test_API.py +32 -0
  178. sknetwork/regression/tests/test_diffusion.py +56 -0
  179. sknetwork/sknetwork.py +3 -0
  180. sknetwork/test_base.py +35 -0
  181. sknetwork/test_log.py +15 -0
  182. sknetwork/topology/__init__.py +8 -0
  183. sknetwork/topology/cliques.cpp +32574 -0
  184. sknetwork/topology/cliques.cpython-312-x86_64-linux-gnu.so +0 -0
  185. sknetwork/topology/cliques.pyx +149 -0
  186. sknetwork/topology/core.cpp +30660 -0
  187. sknetwork/topology/core.cpython-312-x86_64-linux-gnu.so +0 -0
  188. sknetwork/topology/core.pyx +90 -0
  189. sknetwork/topology/cycles.py +243 -0
  190. sknetwork/topology/minheap.cpp +27341 -0
  191. sknetwork/topology/minheap.cpython-312-x86_64-linux-gnu.so +0 -0
  192. sknetwork/topology/minheap.pxd +20 -0
  193. sknetwork/topology/minheap.pyx +109 -0
  194. sknetwork/topology/structure.py +194 -0
  195. sknetwork/topology/tests/__init__.py +1 -0
  196. sknetwork/topology/tests/test_cliques.py +28 -0
  197. sknetwork/topology/tests/test_core.py +19 -0
  198. sknetwork/topology/tests/test_cycles.py +65 -0
  199. sknetwork/topology/tests/test_structure.py +85 -0
  200. sknetwork/topology/tests/test_triangles.py +38 -0
  201. sknetwork/topology/tests/test_wl.py +72 -0
  202. sknetwork/topology/triangles.cpp +8903 -0
  203. sknetwork/topology/triangles.cpython-312-x86_64-linux-gnu.so +0 -0
  204. sknetwork/topology/triangles.pyx +151 -0
  205. sknetwork/topology/weisfeiler_lehman.py +133 -0
  206. sknetwork/topology/weisfeiler_lehman_core.cpp +27644 -0
  207. sknetwork/topology/weisfeiler_lehman_core.cpython-312-x86_64-linux-gnu.so +0 -0
  208. sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
  209. sknetwork/utils/__init__.py +7 -0
  210. sknetwork/utils/check.py +355 -0
  211. sknetwork/utils/format.py +221 -0
  212. sknetwork/utils/membership.py +82 -0
  213. sknetwork/utils/neighbors.py +115 -0
  214. sknetwork/utils/tests/__init__.py +1 -0
  215. sknetwork/utils/tests/test_check.py +190 -0
  216. sknetwork/utils/tests/test_format.py +63 -0
  217. sknetwork/utils/tests/test_membership.py +24 -0
  218. sknetwork/utils/tests/test_neighbors.py +41 -0
  219. sknetwork/utils/tests/test_tfidf.py +18 -0
  220. sknetwork/utils/tests/test_values.py +66 -0
  221. sknetwork/utils/tfidf.py +37 -0
  222. sknetwork/utils/values.py +76 -0
  223. sknetwork/visualization/__init__.py +4 -0
  224. sknetwork/visualization/colors.py +34 -0
  225. sknetwork/visualization/dendrograms.py +277 -0
  226. sknetwork/visualization/graphs.py +1039 -0
  227. sknetwork/visualization/tests/__init__.py +1 -0
  228. sknetwork/visualization/tests/test_dendrograms.py +53 -0
  229. sknetwork/visualization/tests/test_graphs.py +176 -0
@@ -0,0 +1,97 @@
1
+ # distutils: language = c++
2
+ # cython: language_level=3
3
+ """
4
+ Created on September 17 2020
5
+ @author: Tiphaine Viard <tiphaine.viard@telecom-paris.fr>
6
+ """
7
+ from typing import Union
8
+ import numpy as np
9
+ from scipy import sparse
10
+
11
+ from sknetwork.ranking.base import BaseRanking
12
+ from sknetwork.utils.check import check_format, check_square, check_connected
13
+
14
+ from libcpp.vector cimport vector
15
+ from libcpp.queue cimport queue
16
+
17
+ class Betweenness(BaseRanking):
18
+ """ Betweenness centrality, based on Brandes' algorithm.
19
+
20
+ Attributes
21
+ ----------
22
+ scores_ : np.ndarray
23
+ Betweenness centrality value of each node
24
+
25
+ Example
26
+ -------
27
+ >>> from sknetwork.ranking import Betweenness
28
+ >>> from sknetwork.data.toy_graphs import bow_tie
29
+ >>> betweenness = Betweenness()
30
+ >>> adjacency = bow_tie()
31
+ >>> scores = betweenness.fit_transform(adjacency)
32
+ >>> scores
33
+ array([4., 0., 0., 0., 0.])
34
+
35
+ References
36
+ ----------
37
+ Brandes, Ulrik (2001). A faster algorithm for betweenness centrality. Journal of Mathematical Sociology.
38
+ """
39
+
40
+ def __init__(self, normalized: bool = False):
41
+ super(Betweenness, self).__init__()
42
+ self.normalized_ = normalized
43
+
44
+ def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'Betweenness':
45
+ adjacency = check_format(adjacency)
46
+ check_square(adjacency)
47
+ check_connected(adjacency)
48
+
49
+ cdef int source
50
+ cdef vector[ vector[int] ] preds
51
+ cdef vector[int] sigma
52
+ cdef vector[int] dists
53
+ cdef int i
54
+ cdef int j
55
+ cdef vector[float] delta
56
+
57
+ cdef int n = adjacency.shape[0]
58
+ self.scores_ = np.zeros(n)
59
+ cdef vector[int] seen # Using list as stack
60
+ cdef queue[int] bfs_queue
61
+
62
+ for source in range(n):
63
+ preds = [[] for _ in range(n)]
64
+ sigma = np.zeros(n)
65
+ sigma[source] = 1
66
+ dists = -np.ones(n, dtype=int)
67
+ dists[source] = 0
68
+ bfs_queue.push(source)
69
+
70
+ while bfs_queue.size() != 0:
71
+ i = bfs_queue.front()
72
+ bfs_queue.pop()
73
+
74
+ seen.push_back(i)
75
+ neighbors = adjacency.indices[adjacency.indptr[i]:adjacency.indptr[i + 1]]
76
+ for j in neighbors:
77
+ if dists[j] < 0: # j found for the first time?
78
+ dists[j] = dists[i] + 1
79
+ bfs_queue.push(j)
80
+ if dists[j] == dists[i] + 1: # shortest path to j via i?
81
+ sigma[j] += sigma[i]
82
+ preds[j].push_back(i)
83
+
84
+ # Now backtrack to compute betweenness scores
85
+ delta = np.zeros(n)
86
+ while len(seen) != 0:
87
+ j = seen.back()
88
+ seen.pop_back()
89
+ for i in preds[j]:
90
+ delta[i] += sigma[i] / sigma[j] * (1 + delta[j])
91
+ if j != source:
92
+ self.scores_[j] += delta[j]
93
+
94
+ # Undirected graph, divide all values by two
95
+ self.scores_ = 1 / 2 * self.scores_
96
+
97
+ return self
@@ -0,0 +1,92 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on November 12 2019
5
+ @author: Quentin Lutz <qlutz@enst.fr>
6
+ """
7
+ from math import log
8
+ from typing import Union, Optional
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.path.shortest_path import get_distances
14
+ from sknetwork.ranking.base import BaseRanking
15
+ from sknetwork.utils.check import check_format, check_square, check_connected
16
+
17
+
18
+ class Closeness(BaseRanking):
19
+ """Ranking by closeness centrality of each node in a connected graph, corresponding to the average length of the
20
+ shortest paths from that node to all the other ones.
21
+
22
+ Parameters
23
+ ----------
24
+ method :
25
+ Denotes if the results should be exact or approximate.
26
+ tol: float
27
+ If ``method=='approximate'``, the allowed tolerance on each score entry.
28
+
29
+ Attributes
30
+ ----------
31
+ scores_ : np.ndarray
32
+ Closeness centrality of each node.
33
+
34
+ Example
35
+ -------
36
+ >>> from sknetwork.ranking import Closeness
37
+ >>> from sknetwork.data import cyclic_digraph
38
+ >>> closeness = Closeness()
39
+ >>> adjacency = cyclic_digraph(3)
40
+ >>> scores = closeness.fit_predict(adjacency)
41
+ >>> np.round(scores, 2)
42
+ array([0.67, 0.67, 0.67])
43
+
44
+ References
45
+ ----------
46
+ Eppstein, D., & Wang, J. (2001, January).
47
+ `Fast approximation of centrality.
48
+ <http://jgaa.info/accepted/2004/EppsteinWang2004.8.1.pdf>`_
49
+ In Proceedings of the twelfth annual ACM-SIAM symposium on Discrete algorithms (pp. 228-229).
50
+ Society for Industrial and Applied Mathematics.
51
+ """
52
+
53
+ def __init__(self, method: str = 'exact', tol: float = 1e-1):
54
+ super(Closeness, self).__init__()
55
+
56
+ self.method = method
57
+ self.tol = tol
58
+
59
+ def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'Closeness':
60
+ """Closeness centrality for connected graphs.
61
+
62
+ Parameters
63
+ ----------
64
+ adjacency :
65
+ Adjacency matrix of the graph.
66
+
67
+ Returns
68
+ -------
69
+ self: :class:`Closeness`
70
+ """
71
+ adjacency = check_format(adjacency)
72
+ check_square(adjacency)
73
+ check_connected(adjacency)
74
+ n = adjacency.shape[0]
75
+
76
+ if self.method == 'exact':
77
+ n_sources = n
78
+ sources = np.arange(n)
79
+ elif self.method == 'approximate':
80
+ n_sources = min(int(log(n) / self.tol ** 2), n)
81
+ sources = np.random.choice(np.arange(n), n_sources, replace=False)
82
+ else:
83
+ raise ValueError("Method should be either 'exact' or 'approximate'.")
84
+
85
+ distances = np.array([get_distances(adjacency, source=source) for source in sources])
86
+
87
+ distances_min = np.min(distances, axis=1)
88
+ scores = (n - 1) / n / np.mean(distances, axis=1)
89
+ scores[distances_min < 0] = 0
90
+ self.scores_ = scores
91
+
92
+ return self
@@ -0,0 +1,90 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on Oct 07 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+
8
+ from typing import Union
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.linalg import SVDSolver, LanczosSVD
14
+ from sknetwork.ranking.base import BaseRanking
15
+ from sknetwork.utils.check import check_format
16
+
17
+
18
+ class HITS(BaseRanking):
19
+ """Hub and authority scores of each node.
20
+ For bipartite graphs, the hub score is computed on rows and the authority score on columns.
21
+
22
+ Parameters
23
+ ----------
24
+ solver : ``'lanczos'`` (default, Lanczos algorithm) or :class:`SVDSolver` (custom solver)
25
+ Which solver to use.
26
+
27
+ Attributes
28
+ ----------
29
+ scores_ : np.ndarray
30
+ Hub score of each node.
31
+
32
+ Example
33
+ -------
34
+ >>> from sknetwork.ranking import HITS
35
+ >>> from sknetwork.data import star_wars
36
+ >>> hits = HITS()
37
+ >>> biadjacency = star_wars()
38
+ >>> scores = hits.fit_predict(biadjacency)
39
+ >>> np.round(scores, 2)
40
+ array([0.5 , 0.23, 0.69, 0.46])
41
+
42
+ References
43
+ ----------
44
+ Kleinberg, J. M. (1999). Authoritative sources in a hyperlinked environment.
45
+ Journal of the ACM, 46(5), 604-632.
46
+ """
47
+ def __init__(self, solver: Union[str, SVDSolver] = 'lanczos'):
48
+ super(HITS, self).__init__()
49
+
50
+ if type(solver) == str:
51
+ self.solver: SVDSolver = LanczosSVD()
52
+ else:
53
+ self.solver = solver
54
+
55
+ def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'HITS':
56
+ """Compute HITS algorithm with a spectral method.
57
+
58
+ Parameters
59
+ ----------
60
+ adjacency :
61
+ Adjacency or biadjacency matrix of the graph.
62
+
63
+ Returns
64
+ -------
65
+ self: :class:`HITS`
66
+ """
67
+ adjacency = check_format(adjacency)
68
+
69
+ self.solver.fit(adjacency, 1)
70
+ hubs: np.ndarray = self.solver.singular_vectors_left_.reshape(-1)
71
+ authorities: np.ndarray = self.solver.singular_vectors_right_.reshape(-1)
72
+
73
+ h_pos, h_neg = (hubs > 0).sum(), (hubs < 0).sum()
74
+ a_pos, a_neg = (authorities > 0).sum(), (authorities < 0).sum()
75
+
76
+ if h_pos > h_neg:
77
+ hubs = np.clip(hubs, a_min=0., a_max=None)
78
+ else:
79
+ hubs = np.clip(-hubs, a_min=0., a_max=None)
80
+
81
+ if a_pos > a_neg:
82
+ authorities = np.clip(authorities, a_min=0., a_max=None)
83
+ else:
84
+ authorities = np.clip(-authorities, a_min=0., a_max=None)
85
+
86
+ self.scores_row_ = hubs
87
+ self.scores_col_ = authorities
88
+ self.scores_ = hubs
89
+
90
+ return self
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on May 2020
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+ from typing import Union
8
+
9
+ import numpy as np
10
+ from scipy import sparse
11
+ from scipy.sparse.linalg import LinearOperator
12
+
13
+ from sknetwork.linalg.polynome import Polynome
14
+ from sknetwork.ranking.base import BaseRanking
15
+ from sknetwork.utils.check import check_format
16
+ from sknetwork.utils.format import get_adjacency
17
+
18
+
19
+ class Katz(BaseRanking):
20
+ """Katz centrality, defined by:
21
+
22
+ :math:`\\sum_{k=1}^K\\alpha^k(A^k)^T\\mathbf{1}`
23
+
24
+ where :math:`A` is the adjacency matrix, :math:`\\alpha` is the damping factor and :math:`K` is the path length.
25
+
26
+ Parameters
27
+ ----------
28
+ damping_factor : float
29
+ Damping factor for path contributions.
30
+ path_length : int
31
+ Maximum length of the paths.
32
+
33
+ Attributes
34
+ ----------
35
+ scores_ : np.ndarray
36
+ Score of each node.
37
+
38
+ Examples
39
+ --------
40
+ >>> from sknetwork.data.toy_graphs import house
41
+ >>> adjacency = house()
42
+ >>> katz = Katz()
43
+ >>> scores = katz.fit_predict(adjacency)
44
+ >>> np.round(scores, 2)
45
+ array([6.5 , 8.25, 5.62, 5.62, 8.25])
46
+
47
+ References
48
+ ----------
49
+ Katz, L. (1953). `A new status index derived from sociometric analysis
50
+ <https://link.springer.com/content/pdf/10.1007/BF02289026.pdf>`_. Psychometrika, 18(1), 39-43.
51
+ """
52
+ def __init__(self, damping_factor: float = 0.5, path_length: int = 4):
53
+ super(Katz, self).__init__()
54
+ self.damping_factor = damping_factor
55
+ self.path_length = path_length
56
+ self.bipartite = None
57
+
58
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray, LinearOperator]) -> 'Katz':
59
+ """Katz centrality.
60
+
61
+ Parameters
62
+ ----------
63
+ input_matrix :
64
+ Adjacency matrix or biadjacency matrix of the graph.
65
+
66
+ Returns
67
+ -------
68
+ self: :class:`Katz`
69
+ """
70
+ input_matrix = check_format(input_matrix)
71
+ adjacency, self.bipartite = get_adjacency(input_matrix)
72
+ n = adjacency.shape[0]
73
+ coefs = self.damping_factor ** np.arange(self.path_length + 1)
74
+ coefs[0] = 0.
75
+ polynome = Polynome(adjacency.T.astype(bool).tocsr(), coefs)
76
+ self.scores_ = polynome.dot(np.ones(n))
77
+ if self.bipartite:
78
+ self._split_vars(input_matrix.shape)
79
+ return self
@@ -0,0 +1,106 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in May 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ @author: Thomas Bonald <bonald@enst.fr>
7
+ """
8
+ from typing import Union, Optional
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.linalg.ppr_solver import get_pagerank
14
+ from sknetwork.ranking.base import BaseRanking
15
+ from sknetwork.utils.check import check_damping_factor
16
+ from sknetwork.utils.format import get_adjacency_values
17
+
18
+
19
+ class PageRank(BaseRanking):
20
+ """PageRank of each node, corresponding to its frequency of visit by a random walk.
21
+
22
+ The random walk restarts with some fixed probability. The restart distribution can be personalized by the user.
23
+ This variant is known as Personalized PageRank.
24
+
25
+ Parameters
26
+ ----------
27
+ damping_factor : float
28
+ Probability to continue the random walk.
29
+ solver : str
30
+ * ``'piteration'``, use power iteration for a given number of iterations.
31
+ * ``'diteration'``, use asynchronous parallel diffusion for a given number of iterations.
32
+ * ``'lanczos'``, use eigensolver with a given tolerance.
33
+ * ``'bicgstab'``, use Biconjugate Gradient Stabilized method for a given tolerance.
34
+ * ``'RH'``, use a Ruffini-Horner polynomial evaluation.
35
+ * ``'push'``, use push-based algorithm for a given tolerance
36
+ n_iter : int
37
+ Number of iterations for some solvers.
38
+ tol : float
39
+ Tolerance for the convergence of some solvers.
40
+
41
+ Attributes
42
+ ----------
43
+ scores\_ : np.ndarray
44
+ PageRank score of each node.
45
+
46
+ Example
47
+ -------
48
+ >>> from sknetwork.ranking import PageRank
49
+ >>> from sknetwork.data import house
50
+ >>> pagerank = PageRank()
51
+ >>> adjacency = house()
52
+ >>> weights = {0: 1}
53
+ >>> scores = pagerank.fit_predict(adjacency, weights)
54
+ >>> np.round(scores, 2)
55
+ array([0.29, 0.24, 0.12, 0.12, 0.24])
56
+
57
+ References
58
+ ----------
59
+ Page, L., Brin, S., Motwani, R., & Winograd, T. (1999). The PageRank citation ranking: Bringing order to the web.
60
+ Stanford InfoLab.
61
+ """
62
+ def __init__(self, damping_factor: float = 0.85, solver: str = 'piteration', n_iter: int = 10, tol: float = 1e-6):
63
+ super(PageRank, self).__init__()
64
+ check_damping_factor(damping_factor)
65
+ self.damping_factor = damping_factor
66
+ self.solver = solver
67
+ self.n_iter = n_iter
68
+ self.tol = tol
69
+ self.bipartite = None
70
+
71
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray],
72
+ weights: Optional[Union[dict, np.ndarray]] = None, weights_row: Optional[Union[dict, np.ndarray]] = None,
73
+ weights_col: Optional[Union[dict, np.ndarray]] = None, force_bipartite: bool = False) -> 'PageRank':
74
+ """Compute the pagerank of each node.
75
+
76
+ Parameters
77
+ ----------
78
+ input_matrix : sparse.csr_matrix, np.ndarray
79
+ Adjacency matrix or biadjacency matrix of the graph.
80
+ weights : np.ndarray, dict
81
+ Weights of the restart distribution for Personalized PageRank.
82
+ If ``None``, the uniform distribution is used (no personalization, default).
83
+ weights_row : np.ndarray, dict
84
+ Weights on rows of the restart distribution for Personalized PageRank.
85
+ Used for bipartite graphs.
86
+ If both weights_row and weights_col are ``None`` (default), the uniform distribution on rows is used.
87
+ weights_col : np.ndarray, dict
88
+ Weights on columns of the restart distribution for Personalized PageRank.
89
+ Used for bipartite graphs.
90
+ force_bipartite : bool
91
+ If ``True``, consider the input matrix as the biadjacency matrix of a bipartite graph.
92
+ Returns
93
+ -------
94
+ self: :class:`PageRank`
95
+ """
96
+ adjacency, values, self.bipartite = get_adjacency_values(input_matrix, force_bipartite=force_bipartite,
97
+ values=weights,
98
+ values_row=weights_row,
99
+ values_col=weights_col,
100
+ default_value=0,
101
+ which='probs')
102
+ self.scores_ = get_pagerank(adjacency, values, damping_factor=self.damping_factor, n_iter=self.n_iter,
103
+ solver=self.solver, tol=self.tol)
104
+ if self.bipartite:
105
+ self._split_vars(input_matrix.shape)
106
+ return self
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on May 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+ import numpy as np
8
+
9
+
10
+ def top_k(scores: np.ndarray, k: int = 1, sort: bool = True):
11
+ """Return the indices of the k elements of highest values.
12
+
13
+ Parameters
14
+ ----------
15
+ scores : np.ndarray
16
+ Array of values.
17
+ k : int
18
+ Number of elements to return.
19
+ sort : bool
20
+ If ``True``, sort the indices in decreasing order of value (element of highest value first).
21
+
22
+ Examples
23
+ --------
24
+ >>> top_k([1, 3, 2], k=2)
25
+ array([1, 2])
26
+ """
27
+ scores = np.array(scores)
28
+ if k >= len(scores):
29
+ if sort:
30
+ index = np.argsort(-scores)
31
+ else:
32
+ index = np.arange(scores)
33
+ else:
34
+ index = np.argpartition(-scores, k)[:k]
35
+ if sort:
36
+ index = index[np.argsort(-scores[index])]
37
+ return index
@@ -0,0 +1 @@
1
+ """tests for ranking"""
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """tests for ranking API"""
4
+ import unittest
5
+
6
+ from sknetwork.data.test_graphs import *
7
+ from sknetwork.ranking import *
8
+
9
+
10
+ class TestPageRank(unittest.TestCase):
11
+
12
+ def test_basic(self):
13
+ methods = [PageRank(), Closeness(), HITS(), Katz()]
14
+ for adjacency in [test_graph(), test_digraph()]:
15
+ n = adjacency.shape[0]
16
+ for method in methods:
17
+ score = method.fit_predict(adjacency)
18
+ self.assertEqual(score.shape, (n, ))
19
+ self.assertTrue(min(score) >= 0)
20
+
21
+ def test_bipartite(self):
22
+ biadjacency = test_bigraph()
23
+ n_row, n_col = biadjacency.shape
24
+
25
+ methods = [PageRank(), HITS(), Katz()]
26
+ for method in methods:
27
+ method.fit(biadjacency)
28
+ scores_row = method.scores_row_
29
+ scores_col = method.scores_col_
30
+
31
+ self.assertEqual(scores_row.shape, (n_row,))
32
+ self.assertEqual(scores_col.shape, (n_col,))
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """tests for betweenness.py"""
4
+
5
+ import unittest
6
+ import numpy as np
7
+
8
+ from sknetwork.ranking.betweenness import Betweenness
9
+ from sknetwork.data.test_graphs import test_graph, test_disconnected_graph
10
+ from sknetwork.data.toy_graphs import bow_tie, star_wars
11
+
12
+
13
+ class TestBetweenness(unittest.TestCase):
14
+
15
+ def test_basic(self):
16
+ adjacency = test_graph()
17
+ betweenness = Betweenness()
18
+ scores = betweenness.fit_predict(adjacency)
19
+ self.assertEqual(len(scores), adjacency.shape[0])
20
+
21
+ def test_bowtie(self):
22
+ adjacency = bow_tie()
23
+ betweenness = Betweenness()
24
+ scores = betweenness.fit_predict(adjacency)
25
+ self.assertEqual(np.sum(scores > 0), 1)
26
+
27
+ def test_disconnected(self):
28
+ adjacency = test_disconnected_graph()
29
+ betweenness = Betweenness()
30
+ with self.assertRaises(ValueError):
31
+ betweenness.fit(adjacency)
32
+
33
+ def test_bipartite(self):
34
+ adjacency = star_wars()
35
+ betweenness = Betweenness()
36
+
37
+ with self.assertRaises(ValueError):
38
+ betweenness.fit_predict(adjacency)
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """tests for closeness.py"""
4
+
5
+ import unittest
6
+
7
+ from sknetwork.data.test_graphs import *
8
+ from sknetwork.ranking.closeness import Closeness
9
+
10
+
11
+ class TestDiffusion(unittest.TestCase):
12
+
13
+ def test_params(self):
14
+ with self.assertRaises(ValueError):
15
+ adjacency = test_graph()
16
+ Closeness(method='toto').fit(adjacency)
17
+
18
+ def test_parallel(self):
19
+ adjacency = test_graph()
20
+ n = adjacency.shape[0]
21
+
22
+ closeness = Closeness(method='approximate')
23
+ scores = closeness.fit_predict(adjacency)
24
+ self.assertEqual(scores.shape, (n,))
25
+
26
+ def test_disconnected(self):
27
+ adjacency = test_disconnected_graph()
28
+ closeness = Closeness()
29
+ with self.assertRaises(ValueError):
30
+ closeness.fit(adjacency)
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for his.py"""
4
+
5
+ import unittest
6
+
7
+ from sknetwork.data.test_graphs import test_bigraph
8
+ from sknetwork.ranking import HITS
9
+
10
+
11
+ class TestHITS(unittest.TestCase):
12
+
13
+ def test_keywords(self):
14
+ biadjacency = test_bigraph()
15
+ n_row, n_col = biadjacency.shape
16
+
17
+ hits = HITS()
18
+ hits.fit(biadjacency)
19
+ self.assertEqual(hits.scores_row_.shape, (n_row,))
20
+ self.assertEqual(hits.scores_col_.shape, (n_col,))
@@ -0,0 +1,62 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """tests for pagerank.py"""
4
+
5
+ import unittest
6
+
7
+ import numpy as np
8
+
9
+ from sknetwork.data.models import cyclic_digraph
10
+ from sknetwork.data.test_graphs import test_graph, test_digraph, test_bigraph
11
+ from sknetwork.ranking.pagerank import PageRank
12
+
13
+
14
+ class TestPageRank(unittest.TestCase):
15
+
16
+ def setUp(self) -> None:
17
+ """Cycle graph for tests."""
18
+ self.n = 5
19
+ self.adjacency = cyclic_digraph(self.n)
20
+ self.truth = np.ones(self.n) / self.n
21
+
22
+ def test_params(self):
23
+ with self.assertRaises(ValueError):
24
+ PageRank(damping_factor=1789)
25
+
26
+ def test_solvers(self):
27
+ for solver in ['piteration', 'lanczos', 'bicgstab', 'RH']:
28
+ pagerank = PageRank(solver=solver)
29
+ scores = pagerank.fit_predict(self.adjacency)
30
+ self.assertAlmostEqual(0, np.linalg.norm(scores - self.truth))
31
+ with self.assertRaises(ValueError):
32
+ PageRank(solver='toto').fit_predict(self.adjacency)
33
+
34
+ def test_seeding(self):
35
+ pagerank = PageRank()
36
+ seeds_array = np.zeros(self.n)
37
+ seeds_array[0] = 1.
38
+ seeds_dict = {0: 1}
39
+
40
+ scores1 = pagerank.fit_predict(self.adjacency, seeds_array)
41
+ scores2 = pagerank.fit_predict(self.adjacency, seeds_dict)
42
+ self.assertAlmostEqual(np.linalg.norm(scores1 - scores2), 0.)
43
+
44
+ def test_input(self):
45
+ pagerank = PageRank()
46
+ scores = pagerank.fit_predict(self.adjacency, force_bipartite=True)
47
+ self.assertEqual(len(scores), len(pagerank.scores_col_))
48
+
49
+ def test_damping(self):
50
+ pagerank = PageRank(damping_factor=0.99)
51
+ scores = pagerank.fit_predict(self.adjacency)
52
+ self.assertAlmostEqual(np.linalg.norm(scores - self.truth), 0.)
53
+
54
+ pagerank = PageRank(damping_factor=0.01)
55
+ scores = pagerank.fit_predict(self.adjacency)
56
+ self.assertAlmostEqual(np.linalg.norm(scores - self.truth), 0.)
57
+
58
+ def test_bigraph(self):
59
+ pagerank = PageRank()
60
+ for adjacency in [test_graph(), test_digraph(), test_bigraph()]:
61
+ pagerank.fit(adjacency, weights_col={0: 1})
62
+ self.assertAlmostEqual(np.linalg.norm(pagerank.scores_col_ - pagerank.predict(columns=True)), 0.)