scikit-network 0.33.3__cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (229) hide show
  1. scikit_network-0.33.3.dist-info/METADATA +122 -0
  2. scikit_network-0.33.3.dist-info/RECORD +229 -0
  3. scikit_network-0.33.3.dist-info/WHEEL +6 -0
  4. scikit_network-0.33.3.dist-info/licenses/AUTHORS.rst +43 -0
  5. scikit_network-0.33.3.dist-info/licenses/LICENSE +34 -0
  6. scikit_network-0.33.3.dist-info/top_level.txt +1 -0
  7. scikit_network.libs/libgomp-d22c30c5.so.1.0.0 +0 -0
  8. sknetwork/__init__.py +21 -0
  9. sknetwork/base.py +67 -0
  10. sknetwork/classification/__init__.py +8 -0
  11. sknetwork/classification/base.py +142 -0
  12. sknetwork/classification/base_rank.py +133 -0
  13. sknetwork/classification/diffusion.py +134 -0
  14. sknetwork/classification/knn.py +139 -0
  15. sknetwork/classification/metrics.py +205 -0
  16. sknetwork/classification/pagerank.py +66 -0
  17. sknetwork/classification/propagation.py +152 -0
  18. sknetwork/classification/tests/__init__.py +1 -0
  19. sknetwork/classification/tests/test_API.py +30 -0
  20. sknetwork/classification/tests/test_diffusion.py +77 -0
  21. sknetwork/classification/tests/test_knn.py +23 -0
  22. sknetwork/classification/tests/test_metrics.py +53 -0
  23. sknetwork/classification/tests/test_pagerank.py +20 -0
  24. sknetwork/classification/tests/test_propagation.py +24 -0
  25. sknetwork/classification/vote.cpp +27587 -0
  26. sknetwork/classification/vote.cpython-313-aarch64-linux-gnu.so +0 -0
  27. sknetwork/classification/vote.pyx +56 -0
  28. sknetwork/clustering/__init__.py +8 -0
  29. sknetwork/clustering/base.py +172 -0
  30. sknetwork/clustering/kcenters.py +253 -0
  31. sknetwork/clustering/leiden.py +242 -0
  32. sknetwork/clustering/leiden_core.cpp +31578 -0
  33. sknetwork/clustering/leiden_core.cpython-313-aarch64-linux-gnu.so +0 -0
  34. sknetwork/clustering/leiden_core.pyx +124 -0
  35. sknetwork/clustering/louvain.py +286 -0
  36. sknetwork/clustering/louvain_core.cpp +31223 -0
  37. sknetwork/clustering/louvain_core.cpython-313-aarch64-linux-gnu.so +0 -0
  38. sknetwork/clustering/louvain_core.pyx +124 -0
  39. sknetwork/clustering/metrics.py +91 -0
  40. sknetwork/clustering/postprocess.py +66 -0
  41. sknetwork/clustering/propagation_clustering.py +104 -0
  42. sknetwork/clustering/tests/__init__.py +1 -0
  43. sknetwork/clustering/tests/test_API.py +38 -0
  44. sknetwork/clustering/tests/test_kcenters.py +60 -0
  45. sknetwork/clustering/tests/test_leiden.py +34 -0
  46. sknetwork/clustering/tests/test_louvain.py +135 -0
  47. sknetwork/clustering/tests/test_metrics.py +50 -0
  48. sknetwork/clustering/tests/test_postprocess.py +39 -0
  49. sknetwork/data/__init__.py +6 -0
  50. sknetwork/data/base.py +33 -0
  51. sknetwork/data/load.py +406 -0
  52. sknetwork/data/models.py +459 -0
  53. sknetwork/data/parse.py +644 -0
  54. sknetwork/data/test_graphs.py +84 -0
  55. sknetwork/data/tests/__init__.py +1 -0
  56. sknetwork/data/tests/test_API.py +30 -0
  57. sknetwork/data/tests/test_base.py +14 -0
  58. sknetwork/data/tests/test_load.py +95 -0
  59. sknetwork/data/tests/test_models.py +52 -0
  60. sknetwork/data/tests/test_parse.py +250 -0
  61. sknetwork/data/tests/test_test_graphs.py +29 -0
  62. sknetwork/data/tests/test_toy_graphs.py +68 -0
  63. sknetwork/data/timeout.py +38 -0
  64. sknetwork/data/toy_graphs.py +611 -0
  65. sknetwork/embedding/__init__.py +8 -0
  66. sknetwork/embedding/base.py +94 -0
  67. sknetwork/embedding/force_atlas.py +198 -0
  68. sknetwork/embedding/louvain_embedding.py +148 -0
  69. sknetwork/embedding/random_projection.py +135 -0
  70. sknetwork/embedding/spectral.py +141 -0
  71. sknetwork/embedding/spring.py +198 -0
  72. sknetwork/embedding/svd.py +359 -0
  73. sknetwork/embedding/tests/__init__.py +1 -0
  74. sknetwork/embedding/tests/test_API.py +49 -0
  75. sknetwork/embedding/tests/test_force_atlas.py +35 -0
  76. sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
  77. sknetwork/embedding/tests/test_random_projection.py +28 -0
  78. sknetwork/embedding/tests/test_spectral.py +81 -0
  79. sknetwork/embedding/tests/test_spring.py +50 -0
  80. sknetwork/embedding/tests/test_svd.py +43 -0
  81. sknetwork/gnn/__init__.py +10 -0
  82. sknetwork/gnn/activation.py +117 -0
  83. sknetwork/gnn/base.py +181 -0
  84. sknetwork/gnn/base_activation.py +90 -0
  85. sknetwork/gnn/base_layer.py +109 -0
  86. sknetwork/gnn/gnn_classifier.py +305 -0
  87. sknetwork/gnn/layer.py +153 -0
  88. sknetwork/gnn/loss.py +180 -0
  89. sknetwork/gnn/neighbor_sampler.py +65 -0
  90. sknetwork/gnn/optimizer.py +164 -0
  91. sknetwork/gnn/tests/__init__.py +1 -0
  92. sknetwork/gnn/tests/test_activation.py +56 -0
  93. sknetwork/gnn/tests/test_base.py +75 -0
  94. sknetwork/gnn/tests/test_base_layer.py +37 -0
  95. sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
  96. sknetwork/gnn/tests/test_layers.py +80 -0
  97. sknetwork/gnn/tests/test_loss.py +33 -0
  98. sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
  99. sknetwork/gnn/tests/test_optimizer.py +43 -0
  100. sknetwork/gnn/tests/test_utils.py +41 -0
  101. sknetwork/gnn/utils.py +127 -0
  102. sknetwork/hierarchy/__init__.py +6 -0
  103. sknetwork/hierarchy/base.py +96 -0
  104. sknetwork/hierarchy/louvain_hierarchy.py +272 -0
  105. sknetwork/hierarchy/metrics.py +234 -0
  106. sknetwork/hierarchy/paris.cpp +37871 -0
  107. sknetwork/hierarchy/paris.cpython-313-aarch64-linux-gnu.so +0 -0
  108. sknetwork/hierarchy/paris.pyx +316 -0
  109. sknetwork/hierarchy/postprocess.py +350 -0
  110. sknetwork/hierarchy/tests/__init__.py +1 -0
  111. sknetwork/hierarchy/tests/test_API.py +24 -0
  112. sknetwork/hierarchy/tests/test_algos.py +34 -0
  113. sknetwork/hierarchy/tests/test_metrics.py +62 -0
  114. sknetwork/hierarchy/tests/test_postprocess.py +57 -0
  115. sknetwork/linalg/__init__.py +9 -0
  116. sknetwork/linalg/basics.py +37 -0
  117. sknetwork/linalg/diteration.cpp +27403 -0
  118. sknetwork/linalg/diteration.cpython-313-aarch64-linux-gnu.so +0 -0
  119. sknetwork/linalg/diteration.pyx +47 -0
  120. sknetwork/linalg/eig_solver.py +93 -0
  121. sknetwork/linalg/laplacian.py +15 -0
  122. sknetwork/linalg/normalizer.py +86 -0
  123. sknetwork/linalg/operators.py +225 -0
  124. sknetwork/linalg/polynome.py +76 -0
  125. sknetwork/linalg/ppr_solver.py +170 -0
  126. sknetwork/linalg/push.cpp +31075 -0
  127. sknetwork/linalg/push.cpython-313-aarch64-linux-gnu.so +0 -0
  128. sknetwork/linalg/push.pyx +71 -0
  129. sknetwork/linalg/sparse_lowrank.py +142 -0
  130. sknetwork/linalg/svd_solver.py +91 -0
  131. sknetwork/linalg/tests/__init__.py +1 -0
  132. sknetwork/linalg/tests/test_eig.py +44 -0
  133. sknetwork/linalg/tests/test_laplacian.py +18 -0
  134. sknetwork/linalg/tests/test_normalization.py +34 -0
  135. sknetwork/linalg/tests/test_operators.py +66 -0
  136. sknetwork/linalg/tests/test_polynome.py +38 -0
  137. sknetwork/linalg/tests/test_ppr.py +50 -0
  138. sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
  139. sknetwork/linalg/tests/test_svd.py +38 -0
  140. sknetwork/linkpred/__init__.py +2 -0
  141. sknetwork/linkpred/base.py +46 -0
  142. sknetwork/linkpred/nn.py +126 -0
  143. sknetwork/linkpred/tests/__init__.py +1 -0
  144. sknetwork/linkpred/tests/test_nn.py +27 -0
  145. sknetwork/log.py +19 -0
  146. sknetwork/path/__init__.py +5 -0
  147. sknetwork/path/dag.py +54 -0
  148. sknetwork/path/distances.py +98 -0
  149. sknetwork/path/search.py +31 -0
  150. sknetwork/path/shortest_path.py +61 -0
  151. sknetwork/path/tests/__init__.py +1 -0
  152. sknetwork/path/tests/test_dag.py +37 -0
  153. sknetwork/path/tests/test_distances.py +62 -0
  154. sknetwork/path/tests/test_search.py +40 -0
  155. sknetwork/path/tests/test_shortest_path.py +40 -0
  156. sknetwork/ranking/__init__.py +8 -0
  157. sknetwork/ranking/base.py +61 -0
  158. sknetwork/ranking/betweenness.cpp +9710 -0
  159. sknetwork/ranking/betweenness.cpython-313-aarch64-linux-gnu.so +0 -0
  160. sknetwork/ranking/betweenness.pyx +97 -0
  161. sknetwork/ranking/closeness.py +92 -0
  162. sknetwork/ranking/hits.py +94 -0
  163. sknetwork/ranking/katz.py +83 -0
  164. sknetwork/ranking/pagerank.py +110 -0
  165. sknetwork/ranking/postprocess.py +37 -0
  166. sknetwork/ranking/tests/__init__.py +1 -0
  167. sknetwork/ranking/tests/test_API.py +32 -0
  168. sknetwork/ranking/tests/test_betweenness.py +38 -0
  169. sknetwork/ranking/tests/test_closeness.py +30 -0
  170. sknetwork/ranking/tests/test_hits.py +20 -0
  171. sknetwork/ranking/tests/test_pagerank.py +62 -0
  172. sknetwork/ranking/tests/test_postprocess.py +26 -0
  173. sknetwork/regression/__init__.py +4 -0
  174. sknetwork/regression/base.py +61 -0
  175. sknetwork/regression/diffusion.py +210 -0
  176. sknetwork/regression/tests/__init__.py +1 -0
  177. sknetwork/regression/tests/test_API.py +32 -0
  178. sknetwork/regression/tests/test_diffusion.py +56 -0
  179. sknetwork/sknetwork.py +3 -0
  180. sknetwork/test_base.py +35 -0
  181. sknetwork/test_log.py +15 -0
  182. sknetwork/topology/__init__.py +8 -0
  183. sknetwork/topology/cliques.cpp +32568 -0
  184. sknetwork/topology/cliques.cpython-313-aarch64-linux-gnu.so +0 -0
  185. sknetwork/topology/cliques.pyx +149 -0
  186. sknetwork/topology/core.cpp +30654 -0
  187. sknetwork/topology/core.cpython-313-aarch64-linux-gnu.so +0 -0
  188. sknetwork/topology/core.pyx +90 -0
  189. sknetwork/topology/cycles.py +243 -0
  190. sknetwork/topology/minheap.cpp +27335 -0
  191. sknetwork/topology/minheap.cpython-313-aarch64-linux-gnu.so +0 -0
  192. sknetwork/topology/minheap.pxd +20 -0
  193. sknetwork/topology/minheap.pyx +109 -0
  194. sknetwork/topology/structure.py +194 -0
  195. sknetwork/topology/tests/__init__.py +1 -0
  196. sknetwork/topology/tests/test_cliques.py +28 -0
  197. sknetwork/topology/tests/test_core.py +19 -0
  198. sknetwork/topology/tests/test_cycles.py +65 -0
  199. sknetwork/topology/tests/test_structure.py +85 -0
  200. sknetwork/topology/tests/test_triangles.py +38 -0
  201. sknetwork/topology/tests/test_wl.py +72 -0
  202. sknetwork/topology/triangles.cpp +8897 -0
  203. sknetwork/topology/triangles.cpython-313-aarch64-linux-gnu.so +0 -0
  204. sknetwork/topology/triangles.pyx +151 -0
  205. sknetwork/topology/weisfeiler_lehman.py +133 -0
  206. sknetwork/topology/weisfeiler_lehman_core.cpp +27638 -0
  207. sknetwork/topology/weisfeiler_lehman_core.cpython-313-aarch64-linux-gnu.so +0 -0
  208. sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
  209. sknetwork/utils/__init__.py +7 -0
  210. sknetwork/utils/check.py +355 -0
  211. sknetwork/utils/format.py +221 -0
  212. sknetwork/utils/membership.py +82 -0
  213. sknetwork/utils/neighbors.py +115 -0
  214. sknetwork/utils/tests/__init__.py +1 -0
  215. sknetwork/utils/tests/test_check.py +190 -0
  216. sknetwork/utils/tests/test_format.py +63 -0
  217. sknetwork/utils/tests/test_membership.py +24 -0
  218. sknetwork/utils/tests/test_neighbors.py +41 -0
  219. sknetwork/utils/tests/test_tfidf.py +18 -0
  220. sknetwork/utils/tests/test_values.py +66 -0
  221. sknetwork/utils/tfidf.py +37 -0
  222. sknetwork/utils/values.py +76 -0
  223. sknetwork/visualization/__init__.py +4 -0
  224. sknetwork/visualization/colors.py +34 -0
  225. sknetwork/visualization/dendrograms.py +277 -0
  226. sknetwork/visualization/graphs.py +1039 -0
  227. sknetwork/visualization/tests/__init__.py +1 -0
  228. sknetwork/visualization/tests/test_dendrograms.py +53 -0
  229. sknetwork/visualization/tests/test_graphs.py +176 -0
@@ -0,0 +1,114 @@
1
+ # distutils: language = c++
2
+ # cython: language_level=3
3
+ """
4
+ Created on July 1, 2020
5
+ @author: Pierre Pebereau <pierre.pebereau@telecom-paris.fr>
6
+ @author: Alexis Barreaux <alexis.barreaux@telecom-paris.fr>
7
+ """
8
+ from libcpp.vector cimport vector
9
+ from libc.math cimport pow
10
+ cimport cython
11
+
12
+ ctypedef (int, double, int) ctuple
13
+
14
+ cdef extern from "<algorithm>" namespace "std":
15
+ # fixed sort as per https://stackoverflow.com/questions/57584909/unable-to-use-cdef-function-in-stdsort-as-comparison-function
16
+ void sort(...)
17
+ void csort "sort"(...)
18
+
19
+ cdef bint is_lower(ctuple a, ctuple b) :
20
+ """Lexicographic comparison between triplets based on the first two values.
21
+
22
+ Parameters
23
+ ----------
24
+ a:
25
+ First triplet.
26
+ b:
27
+ Second triplet.
28
+
29
+ Returns
30
+ -------
31
+ ``True`` if a < b, and ``False`` otherwise.
32
+ """
33
+ cdef int a1, b1
34
+ cdef double a2, b2
35
+
36
+ a1, a2, _ = a
37
+ b1, b2, _ = b
38
+ if a1 == b1 :
39
+ return a2 < b2
40
+ return a1 < b1
41
+
42
+
43
+ @cython.boundscheck(False)
44
+ @cython.wraparound(False)
45
+ def weisfeiler_lehman_coloring(int[:] indptr, int[:] indices, int[:] labels, double [:] powers, int max_iter):
46
+ """Weisfeiler-Lehman coloring.
47
+
48
+ Parameters
49
+ ----------
50
+ indptr :
51
+ Indptr of the CSR.
52
+ indices :
53
+ Indices of the CSR.
54
+ labels : int[:]
55
+ Labels to be changed.
56
+ powers : double [:]
57
+ Powers being used as hash and put in a memory view to limit several identical calculations.
58
+ max_iter : int
59
+ Maximum number of iterations.
60
+
61
+ Returns
62
+ -------
63
+ labels : int[:]
64
+ Colors of the nodes.
65
+ has_changed : bint
66
+ True if the output labels are not the same as the input ones. False otherwise.
67
+ """
68
+ cdef int n = indptr.shape[0] -1
69
+ cdef int iteration = 0
70
+ cdef int i, j, j1, j2, jj, label
71
+
72
+ cdef double epsilon = pow(10, -10)
73
+
74
+ cdef vector[ctuple] new_labels
75
+ cdef ctuple tuple_ref, tuple_new
76
+ cdef double hash_ref, hash_new
77
+ cdef int label_ref, label_new
78
+
79
+ cdef bint has_changed = True
80
+
81
+ while iteration < max_iter and has_changed:
82
+ new_labels.clear()
83
+ has_changed = False
84
+
85
+ for i in range(n):
86
+ hash_ref = 0
87
+ j1 = indptr[i]
88
+ j2 = indptr[i + 1]
89
+
90
+ for jj in range(j1, j2):
91
+ j = indices[jj]
92
+ hash_ref += powers[labels[j]]
93
+
94
+ new_labels.push_back((labels[i], hash_ref, i))
95
+ csort(new_labels.begin(), new_labels.end(), is_lower)
96
+ label = 0
97
+ tuple_new = new_labels[0]
98
+ labels[tuple_new[2]] = label
99
+ for j in range(1, n):
100
+ tuple_ref = tuple_new
101
+ tuple_new = new_labels[j]
102
+ label_ref, hash_ref, _ = tuple_ref
103
+ label_new, hash_new, i = tuple_new
104
+
105
+ if abs(hash_new - hash_ref) > epsilon or label_new != label_ref :
106
+ label += 1
107
+
108
+ if labels[i] != label:
109
+ has_changed = True
110
+
111
+ labels[i] = label
112
+ iteration += 1
113
+
114
+ return labels, has_changed
@@ -0,0 +1,7 @@
1
+ """utils module"""
2
+ from sknetwork.data import *
3
+ from sknetwork.utils.check import is_symmetric
4
+ from sknetwork.utils.format import *
5
+ from sknetwork.utils.membership import get_membership, from_membership
6
+ from sknetwork.utils.neighbors import get_neighbors, get_degrees, get_weights
7
+ from sknetwork.utils.tfidf import get_tfidf
@@ -0,0 +1,355 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in April 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+ import warnings
8
+ from typing import Union, Optional
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+
14
+ def has_nonnegative_entries(input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> bool:
15
+ """True if the array has non-negative entries."""
16
+ if type(input_matrix) == sparse.csr_matrix:
17
+ return np.all(input_matrix.data >= 0)
18
+ else:
19
+ return np.all(input_matrix >= 0)
20
+
21
+
22
+ def is_weakly_connected(adjacency: sparse.csr_matrix) -> bool:
23
+ """Check whether a graph is weakly connected.
24
+ Parameters
25
+ ----------
26
+ adjacency:
27
+ Adjacency matrix of the graph.
28
+ """
29
+ n_cc = sparse.csgraph.connected_components(adjacency, (not is_symmetric(adjacency)), 'weak', False)
30
+ return n_cc == 1
31
+
32
+
33
+ def check_connected(adjacency: sparse.csr_matrix):
34
+ """Check is a graph is weakly connected and return an error otherwise."""
35
+ if is_weakly_connected(adjacency):
36
+ return
37
+ else:
38
+ raise ValueError('The graph is expected to be connected.')
39
+
40
+
41
+ def check_nonnegative(input_matrix: Union[sparse.csr_matrix, np.ndarray]):
42
+ """Check whether the array has non-negative entries."""
43
+ if not has_nonnegative_entries(input_matrix):
44
+ raise ValueError('Only nonnegative values are expected.')
45
+
46
+
47
+ def has_positive_entries(input_matrix: np.ndarray) -> bool:
48
+ """True if the array has positive entries."""
49
+ if type(input_matrix) != np.ndarray:
50
+ raise TypeError('Entry must be a dense NumPy array.')
51
+ else:
52
+ return np.all(input_matrix > 0)
53
+
54
+
55
+ def check_positive(input_matrix: Union[sparse.csr_matrix, np.ndarray]):
56
+ """Check whether the array has positive entries."""
57
+ if not has_positive_entries(input_matrix):
58
+ raise ValueError('Only positive values are expected.')
59
+
60
+
61
+ def is_proba_array(input_matrix: np.ndarray) -> bool:
62
+ """True if each line of the array has non-negative entries which sum to 1."""
63
+ if len(input_matrix.shape) == 1:
64
+ return has_nonnegative_entries(input_matrix) and np.isclose(input_matrix.sum(), 1)
65
+ elif len(input_matrix.shape) == 2:
66
+ n_row, n_col = input_matrix.shape
67
+ err = input_matrix.dot(np.ones(n_col)) - np.ones(n_row)
68
+ return has_nonnegative_entries(input_matrix) and np.isclose(np.linalg.norm(err), 0)
69
+ else:
70
+ raise TypeError('Entry must be one or two-dimensional array.')
71
+
72
+
73
+ def is_square(input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> bool:
74
+ """True if the matrix is square."""
75
+ return input_matrix.shape[0] == input_matrix.shape[1]
76
+
77
+
78
+ def check_square(input_matrix: Union[sparse.csr_matrix, np.ndarray]):
79
+ """Check whether a matrix is square and return an error otherwise."""
80
+ if is_square(input_matrix):
81
+ return
82
+ else:
83
+ raise ValueError('The adjacency matrix is expected to be square.')
84
+
85
+
86
+ def is_symmetric(input_matrix: sparse.csr_matrix) -> bool:
87
+ """True if the matrix is symmetric."""
88
+ return sparse.csr_matrix(input_matrix - input_matrix.T).nnz == 0
89
+
90
+
91
+ def check_symmetry(input_matrix: sparse.csr_matrix):
92
+ """Check whether a matrix is symmetric and return an error otherwise."""
93
+ if not is_symmetric(input_matrix):
94
+ raise ValueError('The input matrix is expected to be symmetric.')
95
+
96
+
97
+ def make_weights(distribution: str, adjacency: sparse.csr_matrix) -> np.ndarray:
98
+ """Array of weights from a matrix and a desired distribution.
99
+
100
+ Parameters
101
+ ----------
102
+ distribution:
103
+ Distribution for node sampling. Only ``'degree'`` or ``'uniform'`` are accepted.
104
+ adjacency:
105
+ The adjacency matrix of the neighbors.
106
+
107
+ Returns
108
+ -------
109
+ node_weights: np.ndarray
110
+ Weights of nodes.
111
+ """
112
+ n = adjacency.shape[0]
113
+ distribution = distribution.lower()
114
+ if distribution == 'degree':
115
+ node_weights_vec = adjacency.dot(np.ones(adjacency.shape[1]))
116
+ elif distribution == 'uniform':
117
+ node_weights_vec = np.ones(n)
118
+ else:
119
+ raise ValueError('Unknown distribution of node weights.')
120
+ return node_weights_vec
121
+
122
+
123
+ def check_format(input_matrix: Union[sparse.csr_matrix, sparse.csc_matrix, sparse.coo_matrix, sparse.lil_matrix,
124
+ np.ndarray], allow_empty: bool = False) -> sparse.csr_matrix:
125
+ """Check whether the matrix is a NumPy array or a Scipy sparse matrix and return
126
+ the corresponding Scipy CSR matrix.
127
+ """
128
+ formats = {sparse.csr_matrix, sparse.csc_matrix, sparse.coo_matrix, sparse.lil_matrix, np.ndarray}
129
+ if type(input_matrix) not in formats:
130
+ raise TypeError('The input matrix must be in Scipy sparse format or Numpy ndarray format.')
131
+ input_matrix = sparse.csr_matrix(input_matrix)
132
+ if not allow_empty and input_matrix.nnz == 0:
133
+ raise ValueError('The input matrix is empty.')
134
+ return input_matrix
135
+
136
+
137
+ def check_is_proba(entry: Union[float, int], name: str = None):
138
+ """Check whether the number is non-negative and less than or equal to 1."""
139
+ if name is None:
140
+ name = 'Probabilities'
141
+ if type(entry) not in [float, int]:
142
+ raise TypeError('{} must be floats (or ints if 0 or 1).'.format(name))
143
+ if entry < 0 or entry > 1:
144
+ raise ValueError('{} must be between 0 and 1.'.format(name))
145
+
146
+
147
+ def check_damping_factor(damping_factor: float):
148
+ """Check if the damping factor has a valid value."""
149
+ if damping_factor < 0 or damping_factor >= 1:
150
+ raise ValueError('A damping factor must have a value in [0, 1[.')
151
+
152
+
153
+ def check_weights(weights: Union['str', np.ndarray], adjacency: Union[sparse.csr_matrix, sparse.csc_matrix],
154
+ positive_entries: bool = False) -> np.ndarray:
155
+ """Check whether the weights are a valid distribution for the adjacency and return a probability vector.
156
+
157
+ Parameters
158
+ ----------
159
+ weights:
160
+ Probabilities for node sampling in the null model. ``'degree'``, ``'uniform'`` or custom weights.
161
+ adjacency:
162
+ The adjacency matrix of the graph.
163
+ positive_entries:
164
+ If true, the weights must all be positive, if False, the weights must be non-negative.
165
+
166
+ Returns
167
+ -------
168
+ node_weights: np.ndarray
169
+ Valid weights of nodes.
170
+ """
171
+ n = adjacency.shape[0]
172
+ if type(weights) == np.ndarray:
173
+ if len(weights) != n:
174
+ raise ValueError('The number of node weights must match the number of nodes.')
175
+ else:
176
+ node_weights_vec = weights
177
+ elif type(weights) == str:
178
+ node_weights_vec = make_weights(weights, adjacency)
179
+ else:
180
+ raise TypeError(
181
+ 'Node weights must be a known distribution ("degree" or "uniform" string) or a custom NumPy array.')
182
+
183
+ if positive_entries and not has_positive_entries(node_weights_vec):
184
+ raise ValueError('All weights must be positive.')
185
+ else:
186
+ if np.any(node_weights_vec < 0) or node_weights_vec.sum() <= 0:
187
+ raise ValueError('Node weights must be non-negative with positive sum.')
188
+
189
+ return node_weights_vec
190
+
191
+
192
+ def get_probs(weights: Union['str', np.ndarray], adjacency: Union[sparse.csr_matrix, sparse.csc_matrix],
193
+ positive_entries: bool = False) -> np.ndarray:
194
+ """Check whether the weights are a valid distribution for the adjacency
195
+ and return a normalized probability vector.
196
+ """
197
+ weights = check_weights(weights, adjacency, positive_entries)
198
+ return weights / np.sum(weights)
199
+
200
+
201
+ def check_random_state(random_state: Optional[Union[np.random.RandomState, int]]):
202
+ """Check whether the argument is a seed or a NumPy random state. If None, 'numpy.random' is used by default."""
203
+ if random_state is None:
204
+ return np.random.RandomState()
205
+ elif type(random_state) == int:
206
+ return np.random.RandomState(random_state)
207
+ elif type(random_state) == np.random.RandomState:
208
+ return random_state
209
+ else:
210
+ raise TypeError('To specify a random state, pass the seed (as an int) or a NumPy random state object.')
211
+
212
+
213
+ def check_n_neighbors(n_neighbors: int, n_seeds: int):
214
+ """Set the number of neighbors so that it is less than the number of labeled samples."""
215
+ if n_neighbors >= n_seeds:
216
+ warnings.warn(Warning("The number of neighbors must be lower than the number of nodes with known labels. "
217
+ "Changed accordingly."))
218
+ n_neighbors = n_seeds - 1
219
+ return n_neighbors
220
+
221
+
222
+ def check_labels(labels: np.ndarray):
223
+ """Check labels of the seeds for semi-supervised algorithms."""
224
+
225
+ classes: np.ndarray = np.unique(labels[labels >= 0])
226
+ n_classes: int = len(classes)
227
+
228
+ if n_classes < 2:
229
+ raise ValueError('There must be at least two distinct labels.')
230
+ else:
231
+ return classes, n_classes
232
+
233
+
234
+ def check_n_jobs(n_jobs: Optional[int] = None):
235
+ """Parse the ``n_jobs`` parameter for multiprocessing."""
236
+ if n_jobs == -1:
237
+ return None
238
+ elif n_jobs is None:
239
+ return 1
240
+ else:
241
+ return n_jobs
242
+
243
+
244
+ def check_adjacency_vector(adjacency_vectors: Union[sparse.csr_matrix, np.ndarray],
245
+ n: Optional[int] = None) -> sparse.csr_matrix:
246
+ """Check format of new samples for predict methods"""
247
+ adjacency_vectors = check_format(adjacency_vectors)
248
+ if n is not None and adjacency_vectors.shape[1] != n:
249
+ raise ValueError('The adjacency vector must be of length equal to the number nodes in the graph.')
250
+ return adjacency_vectors
251
+
252
+
253
+ def check_n_clusters(n_clusters: int, n_row: int, n_min: int = 0):
254
+ """Check that the number of clusters"""
255
+ if n_clusters > n_row:
256
+ raise ValueError('The number of clusters exceeds the number of rows.')
257
+ if n_clusters < n_min:
258
+ raise ValueError('The number of clusters must be at least {}.'.format(n_min))
259
+ else:
260
+ return
261
+
262
+
263
+ def check_min_size(n_row, n_min):
264
+ """Check that an adjacency has the required number of rows and returns an error otherwise."""
265
+ if n_row < n_min:
266
+ raise ValueError('The graph must contain at least {} nodes.'.format(n_min))
267
+ else:
268
+ return
269
+
270
+
271
+ def check_dendrogram(dendrogram):
272
+ """Check the shape of a dendrogram."""
273
+ if dendrogram.ndim != 2 or dendrogram.shape[1] != 4:
274
+ raise ValueError("Dendrogram has incorrect shape.")
275
+ else:
276
+ return
277
+
278
+
279
+ def check_min_nnz(nnz, n_min):
280
+ """Check that an adjacency has the required number of edges and returns an error otherwise."""
281
+ if nnz < n_min:
282
+ raise ValueError('The graph must contain at least {} edge(s).'.format(n_min))
283
+ else:
284
+ return
285
+
286
+
287
+ def check_n_components(n_components, n_min) -> int:
288
+ """Check the number of components"""
289
+ if n_components > n_min:
290
+ warnings.warn(Warning("The dimension of the embedding cannot exceed {}. Changed accordingly.".format(n_min)))
291
+ return n_min
292
+ else:
293
+ return n_components
294
+
295
+
296
+ def check_scaling(scaling: float, adjacency: sparse.csr_matrix, regularize: bool):
297
+ """Check the scaling factor"""
298
+ if scaling < 0:
299
+ raise ValueError("The 'scaling' parameter must be non-negative.")
300
+
301
+ if scaling and (not regularize) and not is_weakly_connected(adjacency):
302
+ raise ValueError("Positive 'scaling' is valid only if the graph is connected or with regularization."
303
+ "Call 'fit' either with 'scaling' = 0 or positive 'regularization'.")
304
+
305
+
306
+ def has_boolean_entries(input_matrix: np.ndarray) -> bool:
307
+ """True if the array has boolean entries."""
308
+ if type(input_matrix) != np.ndarray:
309
+ raise TypeError('Entry must be a dense NumPy array.')
310
+ else:
311
+ return input_matrix.dtype == 'bool'
312
+
313
+
314
+ def check_boolean(input_matrix: np.ndarray):
315
+ """Check whether the array has positive entries."""
316
+ if not has_boolean_entries(input_matrix):
317
+ raise ValueError('Only boolean values are expected.')
318
+
319
+
320
+ def check_vector_format(vector_1: np.ndarray, vector_2: np.ndarray):
321
+ """Check whether the inputs are vectors of same length."""
322
+ if len(vector_1.shape) > 1 or len(vector_2.shape) > 1:
323
+ raise ValueError('The arrays must be 1-dimensional.')
324
+ if vector_1.shape[0] != vector_2.shape[0]:
325
+ raise ValueError('The arrays do not have the same length.')
326
+
327
+
328
+ def has_self_loops(input_matrix: sparse.csr_matrix) -> bool:
329
+ """True if each node has a self loop."""
330
+ return all(input_matrix.diagonal().astype(bool))
331
+
332
+
333
+ def add_self_loops(adjacency: sparse.csr_matrix) -> sparse.csr_matrix:
334
+ """Add self loops to adjacency matrix.
335
+
336
+ Parameters
337
+ ----------
338
+ adjacency : sparse.csr_matrix
339
+ Adjacency matrix of the graph.
340
+
341
+ Returns
342
+ -------
343
+ sparse.csr_matrix
344
+ Adjacency matrix of the graph with self loops.
345
+ """
346
+ n_row, n_col = adjacency.shape
347
+
348
+ if is_square(adjacency):
349
+ adjacency = sparse.diags(np.ones(n_col), format='csr') + adjacency
350
+ else:
351
+ tmp = sparse.eye(n_row)
352
+ tmp.resize(n_row, n_col)
353
+ adjacency += tmp
354
+
355
+ return adjacency
@@ -0,0 +1,221 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in April 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+ from typing import Union, Tuple, Optional
8
+
9
+ import numpy as np
10
+ from scipy import sparse
11
+
12
+ from sknetwork.linalg.sparse_lowrank import SparseLR
13
+ from sknetwork.utils.check import check_format, is_square, is_symmetric
14
+ from sknetwork.utils.values import stack_values, get_values
15
+
16
+
17
+ def check_csr_or_slr(adjacency):
18
+ """Check if input is csr or SparseLR and raise an error otherwise."""
19
+ if type(adjacency) not in [sparse.csr_matrix, SparseLR]:
20
+ raise TypeError('Input must be a scipy CSR matrix or a SparseLR object.')
21
+
22
+
23
+ def directed2undirected(adjacency: Union[sparse.csr_matrix, SparseLR],
24
+ weighted: bool = True) -> Union[sparse.csr_matrix, SparseLR]:
25
+ """Adjacency matrix of the undirected graph associated with some directed graph.
26
+
27
+ The new adjacency matrix becomes either:
28
+
29
+ :math:`A+A^T` (default)
30
+
31
+ or
32
+
33
+ :math:`\\max(A,A^T) > 0` (binary)
34
+
35
+ If the initial adjacency matrix :math:`A` is binary, bidirectional edges have weight 2
36
+ (first method, default) or 1 (second method).
37
+
38
+ Parameters
39
+ ----------
40
+ adjacency :
41
+ Adjacency matrix.
42
+ weighted :
43
+ If ``True``, return the sum of the weights in both directions of each edge.
44
+
45
+ Returns
46
+ -------
47
+ new_adjacency :
48
+ New adjacency matrix (same format as input).
49
+ """
50
+ check_csr_or_slr(adjacency)
51
+ if type(adjacency) == sparse.csr_matrix:
52
+ if weighted:
53
+ if adjacency.data.dtype == float:
54
+ data_type = float
55
+ else:
56
+ data_type = int
57
+ new_adjacency = adjacency.astype(data_type)
58
+ new_adjacency += adjacency.T
59
+ else:
60
+ new_adjacency = (adjacency + adjacency.T).astype(bool)
61
+ new_adjacency.tocsr().sort_indices()
62
+ return new_adjacency
63
+ else:
64
+ if weighted:
65
+ new_tuples = [(y, x) for (x, y) in adjacency.low_rank_tuples]
66
+ return SparseLR(directed2undirected(adjacency.sparse_mat), adjacency.low_rank_tuples + new_tuples)
67
+ else:
68
+ raise ValueError('This function only works with ``weighted=True`` for SparseLR objects.')
69
+
70
+
71
+ def bipartite2directed(biadjacency: Union[sparse.csr_matrix, SparseLR]) -> Union[sparse.csr_matrix, SparseLR]:
72
+ """Adjacency matrix of the directed graph associated with a bipartite graph
73
+ (with edges from one part to the other).
74
+
75
+ The returned adjacency matrix is:
76
+
77
+ :math:`A = \\begin{bmatrix} 0 & B \\\\ 0 & 0 \\end{bmatrix}`
78
+
79
+ where :math:`B` is the biadjacency matrix.
80
+
81
+ Parameters
82
+ ----------
83
+ biadjacency :
84
+ Biadjacency matrix of the graph.
85
+
86
+ Returns
87
+ -------
88
+ adjacency :
89
+ Adjacency matrix (same format as input).
90
+ """
91
+ check_csr_or_slr(biadjacency)
92
+ n_row, n_col = biadjacency.shape
93
+ if type(biadjacency) == sparse.csr_matrix:
94
+ adjacency = sparse.bmat([[None, biadjacency], [sparse.csr_matrix((n_col, n_row)), None]], format='csr')
95
+ adjacency.sort_indices()
96
+ return adjacency
97
+ else:
98
+ new_tuples = [(np.hstack((x, np.zeros(n_col))), np.hstack((np.zeros(n_row), y)))
99
+ for (x, y) in biadjacency.low_rank_tuples]
100
+ return SparseLR(bipartite2directed(biadjacency.sparse_mat), new_tuples)
101
+
102
+
103
+ def bipartite2undirected(biadjacency: Union[sparse.csr_matrix, SparseLR]) -> Union[sparse.csr_matrix, SparseLR]:
104
+ """Adjacency matrix of a bigraph defined by its biadjacency matrix.
105
+
106
+ The returned adjacency matrix is:
107
+
108
+ :math:`A = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}`
109
+
110
+ where :math:`B` is the biadjacency matrix of the bipartite graph.
111
+
112
+ Parameters
113
+ ----------
114
+ biadjacency:
115
+ Biadjacency matrix of the graph.
116
+
117
+ Returns
118
+ -------
119
+ adjacency :
120
+ Adjacency matrix (same format as input).
121
+ """
122
+ check_csr_or_slr(biadjacency)
123
+ if type(biadjacency) == sparse.csr_matrix:
124
+ adjacency = sparse.bmat([[None, biadjacency], [biadjacency.T, None]], format='csr')
125
+ adjacency.sort_indices()
126
+ return adjacency
127
+ else:
128
+ n_row, n_col = biadjacency.shape
129
+ new_tuples = []
130
+ for (x, y) in biadjacency.low_rank_tuples:
131
+ new_tuples.append((np.hstack((x, np.zeros(n_col))), np.hstack((np.zeros(n_row), y))))
132
+ new_tuples.append((np.hstack((np.zeros(n_row), y)), np.hstack((x, np.zeros(n_col)))))
133
+ return SparseLR(bipartite2undirected(biadjacency.sparse_mat), new_tuples)
134
+
135
+
136
+ def get_adjacency(input_matrix: Union[sparse.csr_matrix, np.ndarray], allow_directed: bool = True,
137
+ force_bipartite: bool = False, force_directed: bool = False, allow_empty: bool = False)\
138
+ -> Tuple[sparse.csr_matrix, bool]:
139
+ """Check the input matrix and return a proper adjacency matrix.
140
+ Parameters
141
+ ----------
142
+ input_matrix :
143
+ Adjacency matrix of biadjacency matrix of the graph.
144
+ allow_directed :
145
+ If ``True`` (default), allow the graph to be directed.
146
+ force_bipartite : bool
147
+ If ``True``, return the adjacency matrix of a bipartite graph.
148
+ Otherwise (default), do it only if the input matrix is not square or not symmetric
149
+ with ``allow_directed=False``.
150
+ force_directed :
151
+ If ``True`` return :math:`A = \\begin{bmatrix} 0 & B \\\\ 0 & 0 \\end{bmatrix}`.
152
+ Otherwise (default), return :math:`A = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}`.
153
+ allow_empty :
154
+ If ``True``, allow the input matrix to be empty.
155
+ """
156
+ input_matrix = check_format(input_matrix, allow_empty=allow_empty)
157
+ bipartite = False
158
+ if force_bipartite or not is_square(input_matrix) or not (allow_directed or is_symmetric(input_matrix)):
159
+ bipartite = True
160
+ if bipartite:
161
+ if force_directed:
162
+ adjacency = bipartite2directed(input_matrix)
163
+ else:
164
+ adjacency = bipartite2undirected(input_matrix)
165
+ else:
166
+ adjacency = input_matrix
167
+ return adjacency, bipartite
168
+
169
+
170
+ def get_adjacency_values(input_matrix: Union[sparse.csr_matrix, np.ndarray], allow_directed: bool = True,
171
+ force_bipartite: bool = False, force_directed: bool = False,
172
+ values: Optional[Union[dict, np.ndarray]] = None,
173
+ values_row: Optional[Union[dict, np.ndarray]] = None,
174
+ values_col: Optional[Union[dict, np.ndarray]] = None,
175
+ default_value: float = -1,
176
+ which: Optional[str] = None) \
177
+ -> Tuple[sparse.csr_matrix, np.ndarray, bool]:
178
+ """Check the input matrix and return a proper adjacency matrix and vector of values.
179
+ Parameters
180
+ ----------
181
+ input_matrix :
182
+ Adjacency matrix of biadjacency matrix of the graph.
183
+ allow_directed :
184
+ If ``True`` (default), allow the graph to be directed.
185
+ force_bipartite : bool
186
+ If ``True``, return the adjacency matrix of a bipartite graph.
187
+ Otherwise (default), do it only if the input matrix is not square or not symmetric
188
+ with ``allow_directed=False``.
189
+ force_directed :
190
+ If ``True`` return :math:`A = \\begin{bmatrix} 0 & B \\\\ 0 & 0 \\end{bmatrix}`.
191
+ Otherwise (default), return :math:`A = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}`.
192
+ values :
193
+ Values of nodes (dictionary or vector). Negative values ignored.
194
+ values_row, values_col :
195
+ Values of rows and columns for bipartite graphs. Negative values ignored.
196
+ default_value :
197
+ Default value of nodes (default = -1).
198
+ which :
199
+ Which values.
200
+ If ``'probs'``, return a probability distribution.
201
+ If ``'labels'``, return the values, or distinct integer values if all are the same.
202
+ """
203
+ input_matrix = check_format(input_matrix)
204
+ if values_row is not None or values_col is not None:
205
+ force_bipartite = True
206
+ adjacency, bipartite = get_adjacency(input_matrix, allow_directed=allow_directed,
207
+ force_bipartite=force_bipartite, force_directed=force_directed)
208
+ if bipartite:
209
+ if values is None:
210
+ values = stack_values(input_matrix.shape, values_row, values_col, default_value=default_value)
211
+ else:
212
+ values = stack_values(input_matrix.shape, values, default_value=default_value)
213
+ else:
214
+ values = get_values(input_matrix.shape, values, default_value=default_value)
215
+ if which == 'probs':
216
+ if values.sum() > 0:
217
+ values /= values.sum()
218
+ elif which == 'labels':
219
+ if len(set(values[values >= 0])) == 1:
220
+ values = np.arange(len(values))
221
+ return adjacency, values, bipartite