scikit-network 0.33.3__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (228) hide show
  1. scikit_network-0.33.3.dist-info/METADATA +122 -0
  2. scikit_network-0.33.3.dist-info/RECORD +228 -0
  3. scikit_network-0.33.3.dist-info/WHEEL +5 -0
  4. scikit_network-0.33.3.dist-info/licenses/AUTHORS.rst +43 -0
  5. scikit_network-0.33.3.dist-info/licenses/LICENSE +34 -0
  6. scikit_network-0.33.3.dist-info/top_level.txt +1 -0
  7. sknetwork/__init__.py +21 -0
  8. sknetwork/base.py +67 -0
  9. sknetwork/classification/__init__.py +8 -0
  10. sknetwork/classification/base.py +142 -0
  11. sknetwork/classification/base_rank.py +133 -0
  12. sknetwork/classification/diffusion.py +134 -0
  13. sknetwork/classification/knn.py +139 -0
  14. sknetwork/classification/metrics.py +205 -0
  15. sknetwork/classification/pagerank.py +66 -0
  16. sknetwork/classification/propagation.py +152 -0
  17. sknetwork/classification/tests/__init__.py +1 -0
  18. sknetwork/classification/tests/test_API.py +30 -0
  19. sknetwork/classification/tests/test_diffusion.py +77 -0
  20. sknetwork/classification/tests/test_knn.py +23 -0
  21. sknetwork/classification/tests/test_metrics.py +53 -0
  22. sknetwork/classification/tests/test_pagerank.py +20 -0
  23. sknetwork/classification/tests/test_propagation.py +24 -0
  24. sknetwork/classification/vote.cp313-win_amd64.pyd +0 -0
  25. sknetwork/classification/vote.cpp +27584 -0
  26. sknetwork/classification/vote.pyx +56 -0
  27. sknetwork/clustering/__init__.py +8 -0
  28. sknetwork/clustering/base.py +172 -0
  29. sknetwork/clustering/kcenters.py +253 -0
  30. sknetwork/clustering/leiden.py +242 -0
  31. sknetwork/clustering/leiden_core.cp313-win_amd64.pyd +0 -0
  32. sknetwork/clustering/leiden_core.cpp +31575 -0
  33. sknetwork/clustering/leiden_core.pyx +124 -0
  34. sknetwork/clustering/louvain.py +286 -0
  35. sknetwork/clustering/louvain_core.cp313-win_amd64.pyd +0 -0
  36. sknetwork/clustering/louvain_core.cpp +31220 -0
  37. sknetwork/clustering/louvain_core.pyx +124 -0
  38. sknetwork/clustering/metrics.py +91 -0
  39. sknetwork/clustering/postprocess.py +66 -0
  40. sknetwork/clustering/propagation_clustering.py +104 -0
  41. sknetwork/clustering/tests/__init__.py +1 -0
  42. sknetwork/clustering/tests/test_API.py +38 -0
  43. sknetwork/clustering/tests/test_kcenters.py +60 -0
  44. sknetwork/clustering/tests/test_leiden.py +34 -0
  45. sknetwork/clustering/tests/test_louvain.py +135 -0
  46. sknetwork/clustering/tests/test_metrics.py +50 -0
  47. sknetwork/clustering/tests/test_postprocess.py +39 -0
  48. sknetwork/data/__init__.py +6 -0
  49. sknetwork/data/base.py +33 -0
  50. sknetwork/data/load.py +406 -0
  51. sknetwork/data/models.py +459 -0
  52. sknetwork/data/parse.py +644 -0
  53. sknetwork/data/test_graphs.py +84 -0
  54. sknetwork/data/tests/__init__.py +1 -0
  55. sknetwork/data/tests/test_API.py +30 -0
  56. sknetwork/data/tests/test_base.py +14 -0
  57. sknetwork/data/tests/test_load.py +95 -0
  58. sknetwork/data/tests/test_models.py +52 -0
  59. sknetwork/data/tests/test_parse.py +250 -0
  60. sknetwork/data/tests/test_test_graphs.py +29 -0
  61. sknetwork/data/tests/test_toy_graphs.py +68 -0
  62. sknetwork/data/timeout.py +38 -0
  63. sknetwork/data/toy_graphs.py +611 -0
  64. sknetwork/embedding/__init__.py +8 -0
  65. sknetwork/embedding/base.py +94 -0
  66. sknetwork/embedding/force_atlas.py +198 -0
  67. sknetwork/embedding/louvain_embedding.py +148 -0
  68. sknetwork/embedding/random_projection.py +135 -0
  69. sknetwork/embedding/spectral.py +141 -0
  70. sknetwork/embedding/spring.py +198 -0
  71. sknetwork/embedding/svd.py +359 -0
  72. sknetwork/embedding/tests/__init__.py +1 -0
  73. sknetwork/embedding/tests/test_API.py +49 -0
  74. sknetwork/embedding/tests/test_force_atlas.py +35 -0
  75. sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
  76. sknetwork/embedding/tests/test_random_projection.py +28 -0
  77. sknetwork/embedding/tests/test_spectral.py +81 -0
  78. sknetwork/embedding/tests/test_spring.py +50 -0
  79. sknetwork/embedding/tests/test_svd.py +43 -0
  80. sknetwork/gnn/__init__.py +10 -0
  81. sknetwork/gnn/activation.py +117 -0
  82. sknetwork/gnn/base.py +181 -0
  83. sknetwork/gnn/base_activation.py +90 -0
  84. sknetwork/gnn/base_layer.py +109 -0
  85. sknetwork/gnn/gnn_classifier.py +305 -0
  86. sknetwork/gnn/layer.py +153 -0
  87. sknetwork/gnn/loss.py +180 -0
  88. sknetwork/gnn/neighbor_sampler.py +65 -0
  89. sknetwork/gnn/optimizer.py +164 -0
  90. sknetwork/gnn/tests/__init__.py +1 -0
  91. sknetwork/gnn/tests/test_activation.py +56 -0
  92. sknetwork/gnn/tests/test_base.py +75 -0
  93. sknetwork/gnn/tests/test_base_layer.py +37 -0
  94. sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
  95. sknetwork/gnn/tests/test_layers.py +80 -0
  96. sknetwork/gnn/tests/test_loss.py +33 -0
  97. sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
  98. sknetwork/gnn/tests/test_optimizer.py +43 -0
  99. sknetwork/gnn/tests/test_utils.py +41 -0
  100. sknetwork/gnn/utils.py +127 -0
  101. sknetwork/hierarchy/__init__.py +6 -0
  102. sknetwork/hierarchy/base.py +96 -0
  103. sknetwork/hierarchy/louvain_hierarchy.py +272 -0
  104. sknetwork/hierarchy/metrics.py +234 -0
  105. sknetwork/hierarchy/paris.cp313-win_amd64.pyd +0 -0
  106. sknetwork/hierarchy/paris.cpp +37868 -0
  107. sknetwork/hierarchy/paris.pyx +316 -0
  108. sknetwork/hierarchy/postprocess.py +350 -0
  109. sknetwork/hierarchy/tests/__init__.py +1 -0
  110. sknetwork/hierarchy/tests/test_API.py +24 -0
  111. sknetwork/hierarchy/tests/test_algos.py +34 -0
  112. sknetwork/hierarchy/tests/test_metrics.py +62 -0
  113. sknetwork/hierarchy/tests/test_postprocess.py +57 -0
  114. sknetwork/linalg/__init__.py +9 -0
  115. sknetwork/linalg/basics.py +37 -0
  116. sknetwork/linalg/diteration.cp313-win_amd64.pyd +0 -0
  117. sknetwork/linalg/diteration.cpp +27400 -0
  118. sknetwork/linalg/diteration.pyx +47 -0
  119. sknetwork/linalg/eig_solver.py +93 -0
  120. sknetwork/linalg/laplacian.py +15 -0
  121. sknetwork/linalg/normalizer.py +86 -0
  122. sknetwork/linalg/operators.py +225 -0
  123. sknetwork/linalg/polynome.py +76 -0
  124. sknetwork/linalg/ppr_solver.py +170 -0
  125. sknetwork/linalg/push.cp313-win_amd64.pyd +0 -0
  126. sknetwork/linalg/push.cpp +31072 -0
  127. sknetwork/linalg/push.pyx +71 -0
  128. sknetwork/linalg/sparse_lowrank.py +142 -0
  129. sknetwork/linalg/svd_solver.py +91 -0
  130. sknetwork/linalg/tests/__init__.py +1 -0
  131. sknetwork/linalg/tests/test_eig.py +44 -0
  132. sknetwork/linalg/tests/test_laplacian.py +18 -0
  133. sknetwork/linalg/tests/test_normalization.py +34 -0
  134. sknetwork/linalg/tests/test_operators.py +66 -0
  135. sknetwork/linalg/tests/test_polynome.py +38 -0
  136. sknetwork/linalg/tests/test_ppr.py +50 -0
  137. sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
  138. sknetwork/linalg/tests/test_svd.py +38 -0
  139. sknetwork/linkpred/__init__.py +2 -0
  140. sknetwork/linkpred/base.py +46 -0
  141. sknetwork/linkpred/nn.py +126 -0
  142. sknetwork/linkpred/tests/__init__.py +1 -0
  143. sknetwork/linkpred/tests/test_nn.py +27 -0
  144. sknetwork/log.py +19 -0
  145. sknetwork/path/__init__.py +5 -0
  146. sknetwork/path/dag.py +54 -0
  147. sknetwork/path/distances.py +98 -0
  148. sknetwork/path/search.py +31 -0
  149. sknetwork/path/shortest_path.py +61 -0
  150. sknetwork/path/tests/__init__.py +1 -0
  151. sknetwork/path/tests/test_dag.py +37 -0
  152. sknetwork/path/tests/test_distances.py +62 -0
  153. sknetwork/path/tests/test_search.py +40 -0
  154. sknetwork/path/tests/test_shortest_path.py +40 -0
  155. sknetwork/ranking/__init__.py +8 -0
  156. sknetwork/ranking/base.py +61 -0
  157. sknetwork/ranking/betweenness.cp313-win_amd64.pyd +0 -0
  158. sknetwork/ranking/betweenness.cpp +9707 -0
  159. sknetwork/ranking/betweenness.pyx +97 -0
  160. sknetwork/ranking/closeness.py +92 -0
  161. sknetwork/ranking/hits.py +94 -0
  162. sknetwork/ranking/katz.py +83 -0
  163. sknetwork/ranking/pagerank.py +110 -0
  164. sknetwork/ranking/postprocess.py +37 -0
  165. sknetwork/ranking/tests/__init__.py +1 -0
  166. sknetwork/ranking/tests/test_API.py +32 -0
  167. sknetwork/ranking/tests/test_betweenness.py +38 -0
  168. sknetwork/ranking/tests/test_closeness.py +30 -0
  169. sknetwork/ranking/tests/test_hits.py +20 -0
  170. sknetwork/ranking/tests/test_pagerank.py +62 -0
  171. sknetwork/ranking/tests/test_postprocess.py +26 -0
  172. sknetwork/regression/__init__.py +4 -0
  173. sknetwork/regression/base.py +61 -0
  174. sknetwork/regression/diffusion.py +210 -0
  175. sknetwork/regression/tests/__init__.py +1 -0
  176. sknetwork/regression/tests/test_API.py +32 -0
  177. sknetwork/regression/tests/test_diffusion.py +56 -0
  178. sknetwork/sknetwork.py +3 -0
  179. sknetwork/test_base.py +35 -0
  180. sknetwork/test_log.py +15 -0
  181. sknetwork/topology/__init__.py +8 -0
  182. sknetwork/topology/cliques.cp313-win_amd64.pyd +0 -0
  183. sknetwork/topology/cliques.cpp +32565 -0
  184. sknetwork/topology/cliques.pyx +149 -0
  185. sknetwork/topology/core.cp313-win_amd64.pyd +0 -0
  186. sknetwork/topology/core.cpp +30651 -0
  187. sknetwork/topology/core.pyx +90 -0
  188. sknetwork/topology/cycles.py +243 -0
  189. sknetwork/topology/minheap.cp313-win_amd64.pyd +0 -0
  190. sknetwork/topology/minheap.cpp +27332 -0
  191. sknetwork/topology/minheap.pxd +20 -0
  192. sknetwork/topology/minheap.pyx +109 -0
  193. sknetwork/topology/structure.py +194 -0
  194. sknetwork/topology/tests/__init__.py +1 -0
  195. sknetwork/topology/tests/test_cliques.py +28 -0
  196. sknetwork/topology/tests/test_core.py +19 -0
  197. sknetwork/topology/tests/test_cycles.py +65 -0
  198. sknetwork/topology/tests/test_structure.py +85 -0
  199. sknetwork/topology/tests/test_triangles.py +38 -0
  200. sknetwork/topology/tests/test_wl.py +72 -0
  201. sknetwork/topology/triangles.cp313-win_amd64.pyd +0 -0
  202. sknetwork/topology/triangles.cpp +8894 -0
  203. sknetwork/topology/triangles.pyx +151 -0
  204. sknetwork/topology/weisfeiler_lehman.py +133 -0
  205. sknetwork/topology/weisfeiler_lehman_core.cp313-win_amd64.pyd +0 -0
  206. sknetwork/topology/weisfeiler_lehman_core.cpp +27635 -0
  207. sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
  208. sknetwork/utils/__init__.py +7 -0
  209. sknetwork/utils/check.py +355 -0
  210. sknetwork/utils/format.py +221 -0
  211. sknetwork/utils/membership.py +82 -0
  212. sknetwork/utils/neighbors.py +115 -0
  213. sknetwork/utils/tests/__init__.py +1 -0
  214. sknetwork/utils/tests/test_check.py +190 -0
  215. sknetwork/utils/tests/test_format.py +63 -0
  216. sknetwork/utils/tests/test_membership.py +24 -0
  217. sknetwork/utils/tests/test_neighbors.py +41 -0
  218. sknetwork/utils/tests/test_tfidf.py +18 -0
  219. sknetwork/utils/tests/test_values.py +66 -0
  220. sknetwork/utils/tfidf.py +37 -0
  221. sknetwork/utils/values.py +76 -0
  222. sknetwork/visualization/__init__.py +4 -0
  223. sknetwork/visualization/colors.py +34 -0
  224. sknetwork/visualization/dendrograms.py +277 -0
  225. sknetwork/visualization/graphs.py +1039 -0
  226. sknetwork/visualization/tests/__init__.py +1 -0
  227. sknetwork/visualization/tests/test_dendrograms.py +53 -0
  228. sknetwork/visualization/tests/test_graphs.py +176 -0
@@ -0,0 +1,97 @@
1
+ # distutils: language = c++
2
+ # cython: language_level=3
3
+ """
4
+ Created on September 17 2020
5
+ @author: Tiphaine Viard <tiphaine.viard@telecom-paris.fr>
6
+ """
7
+ from typing import Union
8
+ import numpy as np
9
+ from scipy import sparse
10
+
11
+ from sknetwork.ranking.base import BaseRanking
12
+ from sknetwork.utils.check import check_format, check_square, check_connected
13
+
14
+ from libcpp.vector cimport vector
15
+ from libcpp.queue cimport queue
16
+
17
+ class Betweenness(BaseRanking):
18
+ """ Betweenness centrality, based on Brandes' algorithm.
19
+
20
+ Attributes
21
+ ----------
22
+ scores_ : np.ndarray
23
+ Betweenness centrality value of each node
24
+
25
+ Example
26
+ -------
27
+ >>> from sknetwork.ranking import Betweenness
28
+ >>> from sknetwork.data.toy_graphs import bow_tie
29
+ >>> betweenness = Betweenness()
30
+ >>> adjacency = bow_tie()
31
+ >>> scores = betweenness.fit_transform(adjacency)
32
+ >>> scores
33
+ array([4., 0., 0., 0., 0.])
34
+
35
+ References
36
+ ----------
37
+ Brandes, Ulrik (2001). A faster algorithm for betweenness centrality. Journal of Mathematical Sociology.
38
+ """
39
+
40
+ def __init__(self, normalized: bool = False):
41
+ super(Betweenness, self).__init__()
42
+ self.normalized_ = normalized
43
+
44
+ def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'Betweenness':
45
+ adjacency = check_format(adjacency)
46
+ check_square(adjacency)
47
+ check_connected(adjacency)
48
+
49
+ cdef int source
50
+ cdef vector[ vector[int] ] preds
51
+ cdef vector[int] sigma
52
+ cdef vector[int] dists
53
+ cdef int i
54
+ cdef int j
55
+ cdef vector[float] delta
56
+
57
+ cdef int n = adjacency.shape[0]
58
+ self.scores_ = np.zeros(n)
59
+ cdef vector[int] seen # Using list as stack
60
+ cdef queue[int] bfs_queue
61
+
62
+ for source in range(n):
63
+ preds = [[] for _ in range(n)]
64
+ sigma = np.zeros(n)
65
+ sigma[source] = 1
66
+ dists = -np.ones(n, dtype=int)
67
+ dists[source] = 0
68
+ bfs_queue.push(source)
69
+
70
+ while bfs_queue.size() != 0:
71
+ i = bfs_queue.front()
72
+ bfs_queue.pop()
73
+
74
+ seen.push_back(i)
75
+ neighbors = adjacency.indices[adjacency.indptr[i]:adjacency.indptr[i + 1]]
76
+ for j in neighbors:
77
+ if dists[j] < 0: # j found for the first time?
78
+ dists[j] = dists[i] + 1
79
+ bfs_queue.push(j)
80
+ if dists[j] == dists[i] + 1: # shortest path to j via i?
81
+ sigma[j] += sigma[i]
82
+ preds[j].push_back(i)
83
+
84
+ # Now backtrack to compute betweenness scores
85
+ delta = np.zeros(n)
86
+ while len(seen) != 0:
87
+ j = seen.back()
88
+ seen.pop_back()
89
+ for i in preds[j]:
90
+ delta[i] += sigma[i] / sigma[j] * (1 + delta[j])
91
+ if j != source:
92
+ self.scores_[j] += delta[j]
93
+
94
+ # Undirected graph, divide all values by two
95
+ self.scores_ = 1 / 2 * self.scores_
96
+
97
+ return self
@@ -0,0 +1,92 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on November 12 2019
5
+ @author: Quentin Lutz <qlutz@enst.fr>
6
+ """
7
+ from math import log
8
+ from typing import Union, Optional
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.path.shortest_path import get_distances
14
+ from sknetwork.ranking.base import BaseRanking
15
+ from sknetwork.utils.check import check_format, check_square, check_connected
16
+
17
+
18
+ class Closeness(BaseRanking):
19
+ """Ranking by closeness centrality of each node in a connected graph, corresponding to the average length of the
20
+ shortest paths from that node to all the other ones.
21
+
22
+ Parameters
23
+ ----------
24
+ method :
25
+ Denotes if the results should be exact or approximate.
26
+ tol: float
27
+ If ``method=='approximate'``, the allowed tolerance on each score entry.
28
+
29
+ Attributes
30
+ ----------
31
+ scores_ : np.ndarray
32
+ Closeness centrality of each node.
33
+
34
+ Example
35
+ -------
36
+ >>> from sknetwork.ranking import Closeness
37
+ >>> from sknetwork.data import cyclic_digraph
38
+ >>> closeness = Closeness()
39
+ >>> adjacency = cyclic_digraph(3)
40
+ >>> scores = closeness.fit_predict(adjacency)
41
+ >>> np.round(scores, 2)
42
+ array([0.67, 0.67, 0.67])
43
+
44
+ References
45
+ ----------
46
+ Eppstein, D., & Wang, J. (2001, January).
47
+ `Fast approximation of centrality.
48
+ <http://jgaa.info/accepted/2004/EppsteinWang2004.8.1.pdf>`_
49
+ In Proceedings of the twelfth annual ACM-SIAM symposium on Discrete algorithms (pp. 228-229).
50
+ Society for Industrial and Applied Mathematics.
51
+ """
52
+
53
+ def __init__(self, method: str = 'exact', tol: float = 1e-1):
54
+ super(Closeness, self).__init__()
55
+
56
+ self.method = method
57
+ self.tol = tol
58
+
59
+ def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'Closeness':
60
+ """Closeness centrality for connected graphs.
61
+
62
+ Parameters
63
+ ----------
64
+ adjacency :
65
+ Adjacency matrix of the graph.
66
+
67
+ Returns
68
+ -------
69
+ self: :class:`Closeness`
70
+ """
71
+ adjacency = check_format(adjacency)
72
+ check_square(adjacency)
73
+ check_connected(adjacency)
74
+ n = adjacency.shape[0]
75
+
76
+ if self.method == 'exact':
77
+ n_sources = n
78
+ sources = np.arange(n)
79
+ elif self.method == 'approximate':
80
+ n_sources = min(int(log(n) / self.tol ** 2), n)
81
+ sources = np.random.choice(np.arange(n), n_sources, replace=False)
82
+ else:
83
+ raise ValueError("Method should be either 'exact' or 'approximate'.")
84
+
85
+ distances = np.array([get_distances(adjacency, source=source) for source in sources])
86
+
87
+ distances_min = np.min(distances, axis=1)
88
+ scores = (n - 1) / n / np.mean(distances, axis=1)
89
+ scores[distances_min < 0] = 0
90
+ self.scores_ = scores
91
+
92
+ return self
@@ -0,0 +1,94 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on Oct 07 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+
8
+ from typing import Union
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.linalg import SVDSolver, LanczosSVD
14
+ from sknetwork.ranking.base import BaseRanking
15
+ from sknetwork.utils.check import check_format
16
+
17
+
18
+ class HITS(BaseRanking):
19
+ """Hub and authority scores of each node.
20
+ For bipartite graphs, the hub score is computed on rows and the authority score on columns.
21
+
22
+ Parameters
23
+ ----------
24
+ solver : ``'lanczos'`` (default, Lanczos algorithm) or :class:`SVDSolver` (custom solver)
25
+ Which solver to use.
26
+
27
+ Attributes
28
+ ----------
29
+ scores_ : np.ndarray
30
+ Hub score of each node.
31
+ scores_row_ : np.ndarray
32
+ Hub score of each row, for bipartite graphs.
33
+ scores_col_ : np.ndarray
34
+ Authority score of each column, for bipartite graphs.
35
+
36
+ Example
37
+ -------
38
+ >>> from sknetwork.ranking import HITS
39
+ >>> from sknetwork.data import star_wars
40
+ >>> hits = HITS()
41
+ >>> biadjacency = star_wars()
42
+ >>> scores = hits.fit_predict(biadjacency)
43
+ >>> np.round(scores, 2)
44
+ array([0.5 , 0.23, 0.69, 0.46])
45
+
46
+ References
47
+ ----------
48
+ Kleinberg, J. M. (1999). Authoritative sources in a hyperlinked environment.
49
+ Journal of the ACM, 46(5), 604-632.
50
+ """
51
+ def __init__(self, solver: Union[str, SVDSolver] = 'lanczos'):
52
+ super(HITS, self).__init__()
53
+
54
+ if type(solver) == str:
55
+ self.solver: SVDSolver = LanczosSVD()
56
+ else:
57
+ self.solver = solver
58
+
59
+ def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'HITS':
60
+ """Compute HITS algorithm with a spectral method.
61
+
62
+ Parameters
63
+ ----------
64
+ adjacency :
65
+ Adjacency or biadjacency matrix of the graph.
66
+
67
+ Returns
68
+ -------
69
+ self: :class:`HITS`
70
+ """
71
+ adjacency = check_format(adjacency)
72
+
73
+ self.solver.fit(adjacency, 1)
74
+ hubs: np.ndarray = self.solver.singular_vectors_left_.reshape(-1)
75
+ authorities: np.ndarray = self.solver.singular_vectors_right_.reshape(-1)
76
+
77
+ h_pos, h_neg = (hubs > 0).sum(), (hubs < 0).sum()
78
+ a_pos, a_neg = (authorities > 0).sum(), (authorities < 0).sum()
79
+
80
+ if h_pos > h_neg:
81
+ hubs = np.clip(hubs, a_min=0., a_max=None)
82
+ else:
83
+ hubs = np.clip(-hubs, a_min=0., a_max=None)
84
+
85
+ if a_pos > a_neg:
86
+ authorities = np.clip(authorities, a_min=0., a_max=None)
87
+ else:
88
+ authorities = np.clip(-authorities, a_min=0., a_max=None)
89
+
90
+ self.scores_row_ = hubs
91
+ self.scores_col_ = authorities
92
+ self.scores_ = hubs
93
+
94
+ return self
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on May 2020
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+ from typing import Union
8
+
9
+ import numpy as np
10
+ from scipy import sparse
11
+ from scipy.sparse.linalg import LinearOperator
12
+
13
+ from sknetwork.linalg.polynome import Polynome
14
+ from sknetwork.ranking.base import BaseRanking
15
+ from sknetwork.utils.check import check_format
16
+ from sknetwork.utils.format import get_adjacency
17
+
18
+
19
+ class Katz(BaseRanking):
20
+ """Katz centrality, defined by:
21
+
22
+ :math:`\\sum_{k=1}^K\\alpha^k(A^k)^T\\mathbf{1}`
23
+
24
+ where :math:`A` is the adjacency matrix, :math:`\\alpha` is the damping factor and :math:`K` is the path length.
25
+
26
+ Parameters
27
+ ----------
28
+ damping_factor : float
29
+ Damping factor for path contributions.
30
+ path_length : int
31
+ Maximum length of the paths.
32
+
33
+ Attributes
34
+ ----------
35
+ scores_ : np.ndarray
36
+ Score of each node.
37
+ scores_row_: np.ndarray
38
+ Scores of rows, for bipartite graphs.
39
+ scores_col_: np.ndarray
40
+ Scores of columns, for bipartite graphs.
41
+
42
+ Examples
43
+ --------
44
+ >>> from sknetwork.data.toy_graphs import house
45
+ >>> adjacency = house()
46
+ >>> katz = Katz()
47
+ >>> scores = katz.fit_predict(adjacency)
48
+ >>> np.round(scores, 2)
49
+ array([6.5 , 8.25, 5.62, 5.62, 8.25])
50
+
51
+ References
52
+ ----------
53
+ Katz, L. (1953). `A new status index derived from sociometric analysis
54
+ <https://link.springer.com/content/pdf/10.1007/BF02289026.pdf>`_. Psychometrika, 18(1), 39-43.
55
+ """
56
+ def __init__(self, damping_factor: float = 0.5, path_length: int = 4):
57
+ super(Katz, self).__init__()
58
+ self.damping_factor = damping_factor
59
+ self.path_length = path_length
60
+ self.bipartite = None
61
+
62
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray, LinearOperator]) -> 'Katz':
63
+ """Katz centrality.
64
+
65
+ Parameters
66
+ ----------
67
+ input_matrix :
68
+ Adjacency matrix or biadjacency matrix of the graph.
69
+
70
+ Returns
71
+ -------
72
+ self: :class:`Katz`
73
+ """
74
+ input_matrix = check_format(input_matrix)
75
+ adjacency, self.bipartite = get_adjacency(input_matrix)
76
+ n = adjacency.shape[0]
77
+ coefs = self.damping_factor ** np.arange(self.path_length + 1)
78
+ coefs[0] = 0.
79
+ polynome = Polynome(adjacency.T.astype(bool).tocsr(), coefs)
80
+ self.scores_ = polynome.dot(np.ones(n))
81
+ if self.bipartite:
82
+ self._split_vars(input_matrix.shape)
83
+ return self
@@ -0,0 +1,110 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in May 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ @author: Thomas Bonald <bonald@enst.fr>
7
+ """
8
+ from typing import Union, Optional
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.linalg.ppr_solver import get_pagerank
14
+ from sknetwork.ranking.base import BaseRanking
15
+ from sknetwork.utils.check import check_damping_factor
16
+ from sknetwork.utils.format import get_adjacency_values
17
+
18
+
19
+ class PageRank(BaseRanking):
20
+ """PageRank of each node, corresponding to its frequency of visit by a random walk.
21
+
22
+ The random walk restarts with some fixed probability. The restart distribution can be personalized by the user.
23
+ This variant is known as Personalized PageRank.
24
+
25
+ Parameters
26
+ ----------
27
+ damping_factor : float
28
+ Probability to continue the random walk.
29
+ solver : str
30
+ * ``'piteration'``, use power iteration for a given number of iterations.
31
+ * ``'diteration'``, use asynchronous parallel diffusion for a given number of iterations.
32
+ * ``'lanczos'``, use eigensolver with a given tolerance.
33
+ * ``'bicgstab'``, use Biconjugate Gradient Stabilized method for a given tolerance.
34
+ * ``'RH'``, use a Ruffini-Horner polynomial evaluation.
35
+ * ``'push'``, use push-based algorithm for a given tolerance
36
+ n_iter : int
37
+ Number of iterations for some solvers.
38
+ tol : float
39
+ Tolerance for the convergence of some solvers.
40
+
41
+ Attributes
42
+ ----------
43
+ scores_ : np.ndarray
44
+ PageRank score of each node.
45
+ scores_row_: np.ndarray
46
+ Scores of rows, for bipartite graphs.
47
+ scores_col_: np.ndarray
48
+ Scores of columns, for bipartite graphs.
49
+
50
+ Example
51
+ -------
52
+ >>> from sknetwork.ranking import PageRank
53
+ >>> from sknetwork.data import house
54
+ >>> pagerank = PageRank()
55
+ >>> adjacency = house()
56
+ >>> weights = {0: 1}
57
+ >>> scores = pagerank.fit_predict(adjacency, weights)
58
+ >>> np.round(scores, 2)
59
+ array([0.29, 0.24, 0.12, 0.12, 0.24])
60
+
61
+ References
62
+ ----------
63
+ Page, L., Brin, S., Motwani, R., & Winograd, T. (1999). The PageRank citation ranking: Bringing order to the web.
64
+ Stanford InfoLab.
65
+ """
66
+ def __init__(self, damping_factor: float = 0.85, solver: str = 'piteration', n_iter: int = 10, tol: float = 1e-6):
67
+ super(PageRank, self).__init__()
68
+ check_damping_factor(damping_factor)
69
+ self.damping_factor = damping_factor
70
+ self.solver = solver
71
+ self.n_iter = n_iter
72
+ self.tol = tol
73
+ self.bipartite = None
74
+
75
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray],
76
+ weights: Optional[Union[dict, np.ndarray]] = None, weights_row: Optional[Union[dict, np.ndarray]] = None,
77
+ weights_col: Optional[Union[dict, np.ndarray]] = None, force_bipartite: bool = False) -> 'PageRank':
78
+ """Compute the pagerank of each node.
79
+
80
+ Parameters
81
+ ----------
82
+ input_matrix : sparse.csr_matrix, np.ndarray
83
+ Adjacency matrix or biadjacency matrix of the graph.
84
+ weights : np.ndarray, dict
85
+ Weights of the restart distribution for Personalized PageRank.
86
+ If ``None``, the uniform distribution is used (no personalization, default).
87
+ weights_row : np.ndarray, dict
88
+ Weights on rows of the restart distribution for Personalized PageRank.
89
+ Used for bipartite graphs.
90
+ If both weights_row and weights_col are ``None`` (default), the uniform distribution on rows is used.
91
+ weights_col : np.ndarray, dict
92
+ Weights on columns of the restart distribution for Personalized PageRank.
93
+ Used for bipartite graphs.
94
+ force_bipartite : bool
95
+ If ``True``, consider the input matrix as the biadjacency matrix of a bipartite graph.
96
+ Returns
97
+ -------
98
+ self: :class:`PageRank`
99
+ """
100
+ adjacency, values, self.bipartite = get_adjacency_values(input_matrix, force_bipartite=force_bipartite,
101
+ values=weights,
102
+ values_row=weights_row,
103
+ values_col=weights_col,
104
+ default_value=0,
105
+ which='probs')
106
+ self.scores_ = get_pagerank(adjacency, values, damping_factor=self.damping_factor, n_iter=self.n_iter,
107
+ solver=self.solver, tol=self.tol)
108
+ if self.bipartite:
109
+ self._split_vars(input_matrix.shape)
110
+ return self
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on May 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+ import numpy as np
8
+
9
+
10
+ def top_k(scores: np.ndarray, k: int = 1, sort: bool = True):
11
+ """Return the indices of the k elements of highest values.
12
+
13
+ Parameters
14
+ ----------
15
+ scores : np.ndarray
16
+ Array of values.
17
+ k : int
18
+ Number of elements to return.
19
+ sort : bool
20
+ If ``True``, sort the indices in decreasing order of value (element of highest value first).
21
+
22
+ Examples
23
+ --------
24
+ >>> top_k([1, 3, 2], k=2)
25
+ array([1, 2])
26
+ """
27
+ scores = np.array(scores)
28
+ if k >= len(scores):
29
+ if sort:
30
+ index = np.argsort(-scores)
31
+ else:
32
+ index = np.arange(scores)
33
+ else:
34
+ index = np.argpartition(-scores, k)[:k]
35
+ if sort:
36
+ index = index[np.argsort(-scores[index])]
37
+ return index
@@ -0,0 +1 @@
1
+ """tests for ranking"""
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """tests for ranking API"""
4
+ import unittest
5
+
6
+ from sknetwork.data.test_graphs import *
7
+ from sknetwork.ranking import *
8
+
9
+
10
+ class TestPageRank(unittest.TestCase):
11
+
12
+ def test_basic(self):
13
+ methods = [PageRank(), Closeness(), HITS(), Katz()]
14
+ for adjacency in [test_graph(), test_digraph()]:
15
+ n = adjacency.shape[0]
16
+ for method in methods:
17
+ score = method.fit_predict(adjacency)
18
+ self.assertEqual(score.shape, (n, ))
19
+ self.assertTrue(min(score) >= 0)
20
+
21
+ def test_bipartite(self):
22
+ biadjacency = test_bigraph()
23
+ n_row, n_col = biadjacency.shape
24
+
25
+ methods = [PageRank(), HITS(), Katz()]
26
+ for method in methods:
27
+ method.fit(biadjacency)
28
+ scores_row = method.scores_row_
29
+ scores_col = method.scores_col_
30
+
31
+ self.assertEqual(scores_row.shape, (n_row,))
32
+ self.assertEqual(scores_col.shape, (n_col,))
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """tests for betweenness.py"""
4
+
5
+ import unittest
6
+ import numpy as np
7
+
8
+ from sknetwork.ranking.betweenness import Betweenness
9
+ from sknetwork.data.test_graphs import test_graph, test_disconnected_graph
10
+ from sknetwork.data.toy_graphs import bow_tie, star_wars
11
+
12
+
13
+ class TestBetweenness(unittest.TestCase):
14
+
15
+ def test_basic(self):
16
+ adjacency = test_graph()
17
+ betweenness = Betweenness()
18
+ scores = betweenness.fit_predict(adjacency)
19
+ self.assertEqual(len(scores), adjacency.shape[0])
20
+
21
+ def test_bowtie(self):
22
+ adjacency = bow_tie()
23
+ betweenness = Betweenness()
24
+ scores = betweenness.fit_predict(adjacency)
25
+ self.assertEqual(np.sum(scores > 0), 1)
26
+
27
+ def test_disconnected(self):
28
+ adjacency = test_disconnected_graph()
29
+ betweenness = Betweenness()
30
+ with self.assertRaises(ValueError):
31
+ betweenness.fit(adjacency)
32
+
33
+ def test_bipartite(self):
34
+ adjacency = star_wars()
35
+ betweenness = Betweenness()
36
+
37
+ with self.assertRaises(ValueError):
38
+ betweenness.fit_predict(adjacency)
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """tests for closeness.py"""
4
+
5
+ import unittest
6
+
7
+ from sknetwork.data.test_graphs import *
8
+ from sknetwork.ranking.closeness import Closeness
9
+
10
+
11
+ class TestDiffusion(unittest.TestCase):
12
+
13
+ def test_params(self):
14
+ with self.assertRaises(ValueError):
15
+ adjacency = test_graph()
16
+ Closeness(method='toto').fit(adjacency)
17
+
18
+ def test_parallel(self):
19
+ adjacency = test_graph()
20
+ n = adjacency.shape[0]
21
+
22
+ closeness = Closeness(method='approximate')
23
+ scores = closeness.fit_predict(adjacency)
24
+ self.assertEqual(scores.shape, (n,))
25
+
26
+ def test_disconnected(self):
27
+ adjacency = test_disconnected_graph()
28
+ closeness = Closeness()
29
+ with self.assertRaises(ValueError):
30
+ closeness.fit(adjacency)
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for his.py"""
4
+
5
+ import unittest
6
+
7
+ from sknetwork.data.test_graphs import test_bigraph
8
+ from sknetwork.ranking import HITS
9
+
10
+
11
+ class TestHITS(unittest.TestCase):
12
+
13
+ def test_keywords(self):
14
+ biadjacency = test_bigraph()
15
+ n_row, n_col = biadjacency.shape
16
+
17
+ hits = HITS()
18
+ hits.fit(biadjacency)
19
+ self.assertEqual(hits.scores_row_.shape, (n_row,))
20
+ self.assertEqual(hits.scores_col_.shape, (n_col,))