scikit-network 0.28.3__cp39-cp39-macosx_12_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (240) hide show
  1. scikit_network-0.28.3.dist-info/AUTHORS.rst +41 -0
  2. scikit_network-0.28.3.dist-info/LICENSE +34 -0
  3. scikit_network-0.28.3.dist-info/METADATA +457 -0
  4. scikit_network-0.28.3.dist-info/RECORD +240 -0
  5. scikit_network-0.28.3.dist-info/WHEEL +5 -0
  6. scikit_network-0.28.3.dist-info/top_level.txt +1 -0
  7. sknetwork/__init__.py +21 -0
  8. sknetwork/classification/__init__.py +8 -0
  9. sknetwork/classification/base.py +84 -0
  10. sknetwork/classification/base_rank.py +143 -0
  11. sknetwork/classification/diffusion.py +134 -0
  12. sknetwork/classification/knn.py +162 -0
  13. sknetwork/classification/metrics.py +205 -0
  14. sknetwork/classification/pagerank.py +66 -0
  15. sknetwork/classification/propagation.py +152 -0
  16. sknetwork/classification/tests/__init__.py +1 -0
  17. sknetwork/classification/tests/test_API.py +35 -0
  18. sknetwork/classification/tests/test_diffusion.py +37 -0
  19. sknetwork/classification/tests/test_knn.py +24 -0
  20. sknetwork/classification/tests/test_metrics.py +53 -0
  21. sknetwork/classification/tests/test_pagerank.py +20 -0
  22. sknetwork/classification/tests/test_propagation.py +24 -0
  23. sknetwork/classification/vote.cpython-39-darwin.so +0 -0
  24. sknetwork/classification/vote.pyx +58 -0
  25. sknetwork/clustering/__init__.py +7 -0
  26. sknetwork/clustering/base.py +102 -0
  27. sknetwork/clustering/kmeans.py +142 -0
  28. sknetwork/clustering/louvain.py +255 -0
  29. sknetwork/clustering/louvain_core.cpython-39-darwin.so +0 -0
  30. sknetwork/clustering/louvain_core.pyx +134 -0
  31. sknetwork/clustering/metrics.py +91 -0
  32. sknetwork/clustering/postprocess.py +66 -0
  33. sknetwork/clustering/propagation_clustering.py +108 -0
  34. sknetwork/clustering/tests/__init__.py +1 -0
  35. sknetwork/clustering/tests/test_API.py +37 -0
  36. sknetwork/clustering/tests/test_kmeans.py +47 -0
  37. sknetwork/clustering/tests/test_louvain.py +104 -0
  38. sknetwork/clustering/tests/test_metrics.py +50 -0
  39. sknetwork/clustering/tests/test_post_processing.py +23 -0
  40. sknetwork/clustering/tests/test_postprocess.py +39 -0
  41. sknetwork/data/__init__.py +5 -0
  42. sknetwork/data/load.py +408 -0
  43. sknetwork/data/models.py +459 -0
  44. sknetwork/data/parse.py +621 -0
  45. sknetwork/data/test_graphs.py +84 -0
  46. sknetwork/data/tests/__init__.py +1 -0
  47. sknetwork/data/tests/test_API.py +30 -0
  48. sknetwork/data/tests/test_load.py +95 -0
  49. sknetwork/data/tests/test_models.py +52 -0
  50. sknetwork/data/tests/test_parse.py +253 -0
  51. sknetwork/data/tests/test_test_graphs.py +30 -0
  52. sknetwork/data/tests/test_toy_graphs.py +68 -0
  53. sknetwork/data/toy_graphs.py +619 -0
  54. sknetwork/embedding/__init__.py +10 -0
  55. sknetwork/embedding/base.py +90 -0
  56. sknetwork/embedding/force_atlas.py +197 -0
  57. sknetwork/embedding/louvain_embedding.py +174 -0
  58. sknetwork/embedding/louvain_hierarchy.py +142 -0
  59. sknetwork/embedding/metrics.py +66 -0
  60. sknetwork/embedding/random_projection.py +133 -0
  61. sknetwork/embedding/spectral.py +214 -0
  62. sknetwork/embedding/spring.py +198 -0
  63. sknetwork/embedding/svd.py +363 -0
  64. sknetwork/embedding/tests/__init__.py +1 -0
  65. sknetwork/embedding/tests/test_API.py +73 -0
  66. sknetwork/embedding/tests/test_force_atlas.py +35 -0
  67. sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
  68. sknetwork/embedding/tests/test_louvain_hierarchy.py +19 -0
  69. sknetwork/embedding/tests/test_metrics.py +29 -0
  70. sknetwork/embedding/tests/test_random_projection.py +28 -0
  71. sknetwork/embedding/tests/test_spectral.py +84 -0
  72. sknetwork/embedding/tests/test_spring.py +50 -0
  73. sknetwork/embedding/tests/test_svd.py +37 -0
  74. sknetwork/flow/__init__.py +3 -0
  75. sknetwork/flow/flow.py +73 -0
  76. sknetwork/flow/tests/__init__.py +1 -0
  77. sknetwork/flow/tests/test_flow.py +17 -0
  78. sknetwork/flow/tests/test_utils.py +69 -0
  79. sknetwork/flow/utils.py +91 -0
  80. sknetwork/gnn/__init__.py +10 -0
  81. sknetwork/gnn/activation.py +117 -0
  82. sknetwork/gnn/base.py +155 -0
  83. sknetwork/gnn/base_activation.py +89 -0
  84. sknetwork/gnn/base_layer.py +109 -0
  85. sknetwork/gnn/gnn_classifier.py +381 -0
  86. sknetwork/gnn/layer.py +153 -0
  87. sknetwork/gnn/layers.py +127 -0
  88. sknetwork/gnn/loss.py +180 -0
  89. sknetwork/gnn/neighbor_sampler.py +65 -0
  90. sknetwork/gnn/optimizer.py +163 -0
  91. sknetwork/gnn/tests/__init__.py +1 -0
  92. sknetwork/gnn/tests/test_activation.py +56 -0
  93. sknetwork/gnn/tests/test_base.py +79 -0
  94. sknetwork/gnn/tests/test_base_layer.py +37 -0
  95. sknetwork/gnn/tests/test_gnn_classifier.py +192 -0
  96. sknetwork/gnn/tests/test_layers.py +80 -0
  97. sknetwork/gnn/tests/test_loss.py +33 -0
  98. sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
  99. sknetwork/gnn/tests/test_optimizer.py +43 -0
  100. sknetwork/gnn/tests/test_utils.py +93 -0
  101. sknetwork/gnn/utils.py +219 -0
  102. sknetwork/hierarchy/__init__.py +7 -0
  103. sknetwork/hierarchy/base.py +69 -0
  104. sknetwork/hierarchy/louvain_hierarchy.py +264 -0
  105. sknetwork/hierarchy/metrics.py +234 -0
  106. sknetwork/hierarchy/paris.cpython-39-darwin.so +0 -0
  107. sknetwork/hierarchy/paris.pyx +317 -0
  108. sknetwork/hierarchy/postprocess.py +350 -0
  109. sknetwork/hierarchy/tests/__init__.py +1 -0
  110. sknetwork/hierarchy/tests/test_API.py +25 -0
  111. sknetwork/hierarchy/tests/test_algos.py +29 -0
  112. sknetwork/hierarchy/tests/test_metrics.py +62 -0
  113. sknetwork/hierarchy/tests/test_postprocess.py +57 -0
  114. sknetwork/hierarchy/tests/test_ward.py +25 -0
  115. sknetwork/hierarchy/ward.py +94 -0
  116. sknetwork/linalg/__init__.py +9 -0
  117. sknetwork/linalg/basics.py +37 -0
  118. sknetwork/linalg/diteration.cpython-39-darwin.so +0 -0
  119. sknetwork/linalg/diteration.pyx +49 -0
  120. sknetwork/linalg/eig_solver.py +93 -0
  121. sknetwork/linalg/laplacian.py +15 -0
  122. sknetwork/linalg/normalization.py +66 -0
  123. sknetwork/linalg/operators.py +225 -0
  124. sknetwork/linalg/polynome.py +76 -0
  125. sknetwork/linalg/ppr_solver.py +170 -0
  126. sknetwork/linalg/push.cpython-39-darwin.so +0 -0
  127. sknetwork/linalg/push.pyx +73 -0
  128. sknetwork/linalg/sparse_lowrank.py +142 -0
  129. sknetwork/linalg/svd_solver.py +91 -0
  130. sknetwork/linalg/tests/__init__.py +1 -0
  131. sknetwork/linalg/tests/test_eig.py +44 -0
  132. sknetwork/linalg/tests/test_laplacian.py +18 -0
  133. sknetwork/linalg/tests/test_normalization.py +38 -0
  134. sknetwork/linalg/tests/test_operators.py +70 -0
  135. sknetwork/linalg/tests/test_polynome.py +38 -0
  136. sknetwork/linalg/tests/test_ppr.py +50 -0
  137. sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
  138. sknetwork/linalg/tests/test_svd.py +38 -0
  139. sknetwork/linkpred/__init__.py +4 -0
  140. sknetwork/linkpred/base.py +80 -0
  141. sknetwork/linkpred/first_order.py +508 -0
  142. sknetwork/linkpred/first_order_core.cpython-39-darwin.so +0 -0
  143. sknetwork/linkpred/first_order_core.pyx +315 -0
  144. sknetwork/linkpred/postprocessing.py +98 -0
  145. sknetwork/linkpred/tests/__init__.py +1 -0
  146. sknetwork/linkpred/tests/test_API.py +49 -0
  147. sknetwork/linkpred/tests/test_postprocessing.py +21 -0
  148. sknetwork/path/__init__.py +4 -0
  149. sknetwork/path/metrics.py +148 -0
  150. sknetwork/path/search.py +65 -0
  151. sknetwork/path/shortest_path.py +186 -0
  152. sknetwork/path/tests/__init__.py +1 -0
  153. sknetwork/path/tests/test_metrics.py +29 -0
  154. sknetwork/path/tests/test_search.py +25 -0
  155. sknetwork/path/tests/test_shortest_path.py +45 -0
  156. sknetwork/ranking/__init__.py +9 -0
  157. sknetwork/ranking/base.py +56 -0
  158. sknetwork/ranking/betweenness.cpython-39-darwin.so +0 -0
  159. sknetwork/ranking/betweenness.pyx +99 -0
  160. sknetwork/ranking/closeness.py +95 -0
  161. sknetwork/ranking/harmonic.py +82 -0
  162. sknetwork/ranking/hits.py +94 -0
  163. sknetwork/ranking/katz.py +81 -0
  164. sknetwork/ranking/pagerank.py +107 -0
  165. sknetwork/ranking/postprocess.py +25 -0
  166. sknetwork/ranking/tests/__init__.py +1 -0
  167. sknetwork/ranking/tests/test_API.py +34 -0
  168. sknetwork/ranking/tests/test_betweenness.py +38 -0
  169. sknetwork/ranking/tests/test_closeness.py +34 -0
  170. sknetwork/ranking/tests/test_hits.py +20 -0
  171. sknetwork/ranking/tests/test_pagerank.py +69 -0
  172. sknetwork/regression/__init__.py +4 -0
  173. sknetwork/regression/base.py +56 -0
  174. sknetwork/regression/diffusion.py +190 -0
  175. sknetwork/regression/tests/__init__.py +1 -0
  176. sknetwork/regression/tests/test_API.py +34 -0
  177. sknetwork/regression/tests/test_diffusion.py +48 -0
  178. sknetwork/sknetwork.py +3 -0
  179. sknetwork/topology/__init__.py +9 -0
  180. sknetwork/topology/dag.py +74 -0
  181. sknetwork/topology/dag_core.cpython-39-darwin.so +0 -0
  182. sknetwork/topology/dag_core.pyx +38 -0
  183. sknetwork/topology/kcliques.cpython-39-darwin.so +0 -0
  184. sknetwork/topology/kcliques.pyx +193 -0
  185. sknetwork/topology/kcore.cpython-39-darwin.so +0 -0
  186. sknetwork/topology/kcore.pyx +120 -0
  187. sknetwork/topology/structure.py +234 -0
  188. sknetwork/topology/tests/__init__.py +1 -0
  189. sknetwork/topology/tests/test_cliques.py +28 -0
  190. sknetwork/topology/tests/test_cores.py +21 -0
  191. sknetwork/topology/tests/test_dag.py +26 -0
  192. sknetwork/topology/tests/test_structure.py +99 -0
  193. sknetwork/topology/tests/test_triangles.py +42 -0
  194. sknetwork/topology/tests/test_wl_coloring.py +49 -0
  195. sknetwork/topology/tests/test_wl_kernel.py +31 -0
  196. sknetwork/topology/triangles.cpython-39-darwin.so +0 -0
  197. sknetwork/topology/triangles.pyx +166 -0
  198. sknetwork/topology/weisfeiler_lehman.py +163 -0
  199. sknetwork/topology/weisfeiler_lehman_core.cpython-39-darwin.so +0 -0
  200. sknetwork/topology/weisfeiler_lehman_core.pyx +116 -0
  201. sknetwork/utils/__init__.py +40 -0
  202. sknetwork/utils/base.py +35 -0
  203. sknetwork/utils/check.py +354 -0
  204. sknetwork/utils/co_neighbor.py +71 -0
  205. sknetwork/utils/format.py +219 -0
  206. sknetwork/utils/kmeans.py +89 -0
  207. sknetwork/utils/knn.py +166 -0
  208. sknetwork/utils/knn1d.cpython-39-darwin.so +0 -0
  209. sknetwork/utils/knn1d.pyx +80 -0
  210. sknetwork/utils/membership.py +82 -0
  211. sknetwork/utils/minheap.cpython-39-darwin.so +0 -0
  212. sknetwork/utils/minheap.pxd +22 -0
  213. sknetwork/utils/minheap.pyx +111 -0
  214. sknetwork/utils/neighbors.py +115 -0
  215. sknetwork/utils/seeds.py +75 -0
  216. sknetwork/utils/simplex.py +140 -0
  217. sknetwork/utils/tests/__init__.py +1 -0
  218. sknetwork/utils/tests/test_base.py +28 -0
  219. sknetwork/utils/tests/test_bunch.py +16 -0
  220. sknetwork/utils/tests/test_check.py +190 -0
  221. sknetwork/utils/tests/test_co_neighbor.py +43 -0
  222. sknetwork/utils/tests/test_format.py +61 -0
  223. sknetwork/utils/tests/test_kmeans.py +21 -0
  224. sknetwork/utils/tests/test_knn.py +32 -0
  225. sknetwork/utils/tests/test_membership.py +24 -0
  226. sknetwork/utils/tests/test_neighbors.py +41 -0
  227. sknetwork/utils/tests/test_projection_simplex.py +33 -0
  228. sknetwork/utils/tests/test_seeds.py +67 -0
  229. sknetwork/utils/tests/test_verbose.py +15 -0
  230. sknetwork/utils/tests/test_ward.py +20 -0
  231. sknetwork/utils/timeout.py +38 -0
  232. sknetwork/utils/verbose.py +37 -0
  233. sknetwork/utils/ward.py +60 -0
  234. sknetwork/visualization/__init__.py +4 -0
  235. sknetwork/visualization/colors.py +34 -0
  236. sknetwork/visualization/dendrograms.py +229 -0
  237. sknetwork/visualization/graphs.py +819 -0
  238. sknetwork/visualization/tests/__init__.py +1 -0
  239. sknetwork/visualization/tests/test_dendrograms.py +53 -0
  240. sknetwork/visualization/tests/test_graphs.py +167 -0
@@ -0,0 +1,354 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in April 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+ import warnings
8
+ from typing import Union, Optional
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+
14
+ def has_nonnegative_entries(input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> bool:
15
+ """True if the array has non-negative entries."""
16
+ if type(input_matrix) == sparse.csr_matrix:
17
+ return np.all(input_matrix.data >= 0)
18
+ else:
19
+ return np.all(input_matrix >= 0)
20
+
21
+
22
+ def is_weakly_connected(adjacency: sparse.csr_matrix) -> bool:
23
+ """Check whether a graph is weakly connected.
24
+ Parameters
25
+ ----------
26
+ adjacency:
27
+ Adjacency matrix of the graph.
28
+ """
29
+ n_cc = sparse.csgraph.connected_components(adjacency, (not is_symmetric(adjacency)), 'weak', False)
30
+ return n_cc == 1
31
+
32
+
33
+ def check_connected(adjacency: sparse.csr_matrix):
34
+ """Check is a graph is weakly connected and return an error otherwise."""
35
+ if is_weakly_connected(adjacency):
36
+ return
37
+ else:
38
+ raise ValueError('The graph is expected to be connected.')
39
+
40
+
41
+ def check_nonnegative(input_matrix: Union[sparse.csr_matrix, np.ndarray]):
42
+ """Check whether the array has non-negative entries."""
43
+ if not has_nonnegative_entries(input_matrix):
44
+ raise ValueError('Only nonnegative values are expected.')
45
+
46
+
47
+ def has_positive_entries(input_matrix: np.ndarray) -> bool:
48
+ """True if the array has positive entries."""
49
+ if type(input_matrix) != np.ndarray:
50
+ raise TypeError('Entry must be a dense NumPy array.')
51
+ else:
52
+ return np.all(input_matrix > 0)
53
+
54
+
55
+ def check_positive(input_matrix: Union[sparse.csr_matrix, np.ndarray]):
56
+ """Check whether the array has positive entries."""
57
+ if not has_positive_entries(input_matrix):
58
+ raise ValueError('Only positive values are expected.')
59
+
60
+
61
+ def is_proba_array(input_matrix: np.ndarray) -> bool:
62
+ """True if each line of the array has non-negative entries which sum to 1."""
63
+ if len(input_matrix.shape) == 1:
64
+ return has_nonnegative_entries(input_matrix) and np.isclose(input_matrix.sum(), 1)
65
+ elif len(input_matrix.shape) == 2:
66
+ n_row, n_col = input_matrix.shape
67
+ err = input_matrix.dot(np.ones(n_col)) - np.ones(n_row)
68
+ return has_nonnegative_entries(input_matrix) and np.isclose(np.linalg.norm(err), 0)
69
+ else:
70
+ raise TypeError('Entry must be one or two-dimensional array.')
71
+
72
+
73
+ def is_square(input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> bool:
74
+ """True if the matrix is square."""
75
+ return input_matrix.shape[0] == input_matrix.shape[1]
76
+
77
+
78
+ def check_square(input_matrix: Union[sparse.csr_matrix, np.ndarray]):
79
+ """Check whether a matrix is square and return an error otherwise."""
80
+ if is_square(input_matrix):
81
+ return
82
+ else:
83
+ raise ValueError('The adjacency matrix is expected to be square.')
84
+
85
+
86
+ def is_symmetric(input_matrix: sparse.csr_matrix) -> bool:
87
+ """True if the matrix is symmetric."""
88
+ return sparse.csr_matrix(input_matrix - input_matrix.T).nnz == 0
89
+
90
+
91
+ def check_symmetry(input_matrix: sparse.csr_matrix):
92
+ """Check whether a matrix is symmetric and return an error otherwise."""
93
+ if not is_symmetric(input_matrix):
94
+ raise ValueError('The input matrix is expected to be symmetric.')
95
+
96
+
97
+ def make_weights(distribution: str, adjacency: sparse.csr_matrix) -> np.ndarray:
98
+ """Array of weights from a matrix and a desired distribution.
99
+
100
+ Parameters
101
+ ----------
102
+ distribution:
103
+ Distribution for node sampling. Only ``'degree'`` or ``'uniform'`` are accepted.
104
+ adjacency:
105
+ The adjacency matrix of the neighbors.
106
+
107
+ Returns
108
+ -------
109
+ node_weights: np.ndarray
110
+ Weights of nodes.
111
+ """
112
+ n = adjacency.shape[0]
113
+ distribution = distribution.lower()
114
+ if distribution == 'degree':
115
+ node_weights_vec = adjacency.dot(np.ones(adjacency.shape[1]))
116
+ elif distribution == 'uniform':
117
+ node_weights_vec = np.ones(n)
118
+ else:
119
+ raise ValueError('Unknown distribution of node weights.')
120
+ return node_weights_vec
121
+
122
+
123
+ def check_format(input_matrix: Union[sparse.csr_matrix, sparse.csc_matrix, sparse.coo_matrix, sparse.lil_matrix,
124
+ np.ndarray]) -> sparse.csr_matrix:
125
+ """Check whether the matrix is a NumPy array or a Scipy sparse matrix and return
126
+ the corresponding Scipy CSR matrix.
127
+ """
128
+ formats = {sparse.csr_matrix, sparse.csc_matrix, sparse.coo_matrix, sparse.lil_matrix, np.ndarray}
129
+ if type(input_matrix) not in formats:
130
+ raise TypeError('The input matrix must be in Scipy sparse format or Numpy ndarray format.')
131
+ input_matrix = sparse.csr_matrix(input_matrix)
132
+ if input_matrix.nnz == 0:
133
+ raise ValueError('The input matrix is empty.')
134
+ return input_matrix
135
+
136
+
137
+ def check_is_proba(entry: Union[float, int], name: str = None):
138
+ """Check whether the number is non-negative and less than or equal to 1."""
139
+ if name is None:
140
+ name = 'Probabilities'
141
+ if type(entry) not in [float, int]:
142
+ raise TypeError('{} must be floats (or ints if 0 or 1).'.format(name))
143
+ if entry < 0 or entry > 1:
144
+ raise ValueError('{} must be between 0 and 1.'.format(name))
145
+
146
+
147
+ def check_damping_factor(damping_factor: float):
148
+ """Check if the damping factor has a valid value."""
149
+ if damping_factor < 0 or damping_factor >= 1:
150
+ raise ValueError('A damping factor must have a value in [0, 1[.')
151
+
152
+
153
+ def check_weights(weights: Union['str', np.ndarray], adjacency: Union[sparse.csr_matrix, sparse.csc_matrix],
154
+ positive_entries: bool = False) -> np.ndarray:
155
+ """Check whether the weights are a valid distribution for the adjacency and return a probability vector.
156
+
157
+ Parameters
158
+ ----------
159
+ weights:
160
+ Probabilities for node sampling in the null model. ``'degree'``, ``'uniform'`` or custom weights.
161
+ adjacency:
162
+ The adjacency matrix of the graph.
163
+ positive_entries:
164
+ If true, the weights must all be positive, if False, the weights must be non-negative.
165
+
166
+ Returns
167
+ -------
168
+ node_weights: np.ndarray
169
+ Valid weights of nodes.
170
+ """
171
+ n = adjacency.shape[0]
172
+ if type(weights) == np.ndarray:
173
+ if len(weights) != n:
174
+ raise ValueError('The number of node weights must match the number of nodes.')
175
+ else:
176
+ node_weights_vec = weights
177
+ elif type(weights) == str:
178
+ node_weights_vec = make_weights(weights, adjacency)
179
+ else:
180
+ raise TypeError(
181
+ 'Node weights must be a known distribution ("degree" or "uniform" string) or a custom NumPy array.')
182
+
183
+ if positive_entries and not has_positive_entries(node_weights_vec):
184
+ raise ValueError('All weights must be positive.')
185
+ else:
186
+ if np.any(node_weights_vec < 0) or node_weights_vec.sum() <= 0:
187
+ raise ValueError('Node weights must be non-negative with positive sum.')
188
+
189
+ return node_weights_vec
190
+
191
+
192
+ def get_probs(weights: Union['str', np.ndarray], adjacency: Union[sparse.csr_matrix, sparse.csc_matrix],
193
+ positive_entries: bool = False) -> np.ndarray:
194
+ """Check whether the weights are a valid distribution for the adjacency
195
+ and return a normalized probability vector.
196
+ """
197
+ weights = check_weights(weights, adjacency, positive_entries)
198
+ return weights / np.sum(weights)
199
+
200
+
201
+ def check_random_state(random_state: Optional[Union[np.random.RandomState, int]]):
202
+ """Check whether the argument is a seed or a NumPy random state. If None, 'numpy.random' is used by default."""
203
+ if random_state is None:
204
+ return np.random.RandomState()
205
+ elif type(random_state) == int:
206
+ return np.random.RandomState(random_state)
207
+ elif type(random_state) == np.random.RandomState:
208
+ return random_state
209
+ else:
210
+ raise TypeError('To specify a random state, pass the seed (as an int) or a NumPy random state object.')
211
+
212
+
213
+ def check_n_neighbors(n_neighbors: int, n_seeds: int):
214
+ """Set the number of neighbors so that it does not exceed the number of labeled samples."""
215
+ if n_neighbors > n_seeds:
216
+ warnings.warn(Warning("The number of neighbors cannot exceed the number of seeds. Changed accordingly."))
217
+ n_neighbors = n_seeds
218
+ return n_neighbors
219
+
220
+
221
+ def check_labels(labels: np.ndarray):
222
+ """Check labels of the seeds for semi-supervised algorithms."""
223
+
224
+ classes: np.ndarray = np.unique(labels[labels >= 0])
225
+ n_classes: int = len(classes)
226
+
227
+ if n_classes < 2:
228
+ raise ValueError('There must be at least two distinct labels.')
229
+ else:
230
+ return classes, n_classes
231
+
232
+
233
+ def check_n_jobs(n_jobs: Optional[int] = None):
234
+ """Parse the ``n_jobs`` parameter for multiprocessing."""
235
+ if n_jobs == -1:
236
+ return None
237
+ elif n_jobs is None:
238
+ return 1
239
+ else:
240
+ return n_jobs
241
+
242
+
243
+ def check_adjacency_vector(adjacency_vectors: Union[sparse.csr_matrix, np.ndarray],
244
+ n: Optional[int] = None) -> sparse.csr_matrix:
245
+ """Check format of new samples for predict methods"""
246
+ adjacency_vectors = check_format(adjacency_vectors)
247
+ if n is not None and adjacency_vectors.shape[1] != n:
248
+ raise ValueError('The adjacency vector must be of length equal to the number nodes in the graph.')
249
+ return adjacency_vectors
250
+
251
+
252
+ def check_n_clusters(n_clusters: int, n_row: int, n_min: int = 0):
253
+ """Check that the number of clusters"""
254
+ if n_clusters > n_row:
255
+ raise ValueError('The number of clusters exceeds the number of rows.')
256
+ if n_clusters < n_min:
257
+ raise ValueError('The number of clusters must be at least {}.'.format(n_min))
258
+ else:
259
+ return
260
+
261
+
262
+ def check_min_size(n_row, n_min):
263
+ """Check that an adjacency has the required number of rows and returns an error otherwise."""
264
+ if n_row < n_min:
265
+ raise ValueError('The graph must contain at least {} nodes.'.format(n_min))
266
+ else:
267
+ return
268
+
269
+
270
+ def check_dendrogram(dendrogram):
271
+ """Check the shape of a dendrogram."""
272
+ if dendrogram.ndim != 2 or dendrogram.shape[1] != 4:
273
+ raise ValueError("Dendrogram has incorrect shape.")
274
+ else:
275
+ return
276
+
277
+
278
+ def check_min_nnz(nnz, n_min):
279
+ """Check that an adjacency has the required number of edges and returns an error otherwise."""
280
+ if nnz < n_min:
281
+ raise ValueError('The graph must contain at least {} edge(s).'.format(n_min))
282
+ else:
283
+ return
284
+
285
+
286
+ def check_n_components(n_components, n_min) -> int:
287
+ """Check the number of components"""
288
+ if n_components > n_min:
289
+ warnings.warn(Warning("The dimension of the embedding cannot exceed {}. Changed accordingly.".format(n_min)))
290
+ return n_min
291
+ else:
292
+ return n_components
293
+
294
+
295
+ def check_scaling(scaling: float, adjacency: sparse.csr_matrix, regularize: bool):
296
+ """Check the scaling factor"""
297
+ if scaling < 0:
298
+ raise ValueError("The 'scaling' parameter must be non-negative.")
299
+
300
+ if scaling and (not regularize) and not is_weakly_connected(adjacency):
301
+ raise ValueError("Positive 'scaling' is valid only if the graph is connected or with regularization."
302
+ "Call 'fit' either with 'scaling' = 0 or positive 'regularization'.")
303
+
304
+
305
+ def has_boolean_entries(input_matrix: np.ndarray) -> bool:
306
+ """True if the array has boolean entries."""
307
+ if type(input_matrix) != np.ndarray:
308
+ raise TypeError('Entry must be a dense NumPy array.')
309
+ else:
310
+ return input_matrix.dtype == 'bool'
311
+
312
+
313
+ def check_boolean(input_matrix: np.ndarray):
314
+ """Check whether the array has positive entries."""
315
+ if not has_boolean_entries(input_matrix):
316
+ raise ValueError('Only boolean values are expected.')
317
+
318
+
319
+ def check_vector_format(vector_1: np.ndarray, vector_2: np.ndarray):
320
+ """Check whether the inputs are vectors of same length."""
321
+ if len(vector_1.shape) > 1 or len(vector_2.shape) > 1:
322
+ raise ValueError('The arrays must be 1-dimensional.')
323
+ if vector_1.shape[0] != vector_2.shape[0]:
324
+ raise ValueError('The arrays do not have the same length.')
325
+
326
+
327
+ def has_self_loops(input_matrix: sparse.csr_matrix) -> bool:
328
+ """True if each node has a self loop."""
329
+ return all(input_matrix.diagonal().astype(bool))
330
+
331
+
332
+ def add_self_loops(adjacency: sparse.csr_matrix) -> sparse.csr_matrix:
333
+ """Add self loops to adjacency matrix.
334
+
335
+ Parameters
336
+ ----------
337
+ adjacency : sparse.csr_matrix
338
+ Adjacency matrix of the graph.
339
+
340
+ Returns
341
+ -------
342
+ sparse.csr_matrix
343
+ Adjacency matrix of the graph with self loops.
344
+ """
345
+ n_row, n_col = adjacency.shape
346
+
347
+ if is_square(adjacency):
348
+ adjacency = sparse.diags(np.ones(n_col), format='csr') + adjacency
349
+ else:
350
+ tmp = sparse.eye(n_row)
351
+ tmp.resize(n_row, n_col)
352
+ adjacency += tmp
353
+
354
+ return adjacency
@@ -0,0 +1,71 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on October 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+ from typing import Union
8
+
9
+ import numpy as np
10
+ from scipy import sparse
11
+
12
+ from sknetwork.embedding.svd import SVD, GSVD
13
+ from sknetwork.linalg.normalization import normalize
14
+ from sknetwork.utils.check import check_format
15
+ from sknetwork.utils.knn import KNNDense
16
+
17
+
18
+ def co_neighbor_graph(adjacency: Union[sparse.csr_matrix, np.ndarray], normalized: bool = True, method='knn',
19
+ n_neighbors: int = 5, n_components: int = 8) -> sparse.csr_matrix:
20
+ """Compute the co-neighborhood adjacency.
21
+
22
+ * Graphs
23
+ * Digraphs
24
+ * Bigraphs
25
+
26
+ :math:`\\tilde{A} = AF^{-1}A^T`,
27
+
28
+ where F is a weight matrix.
29
+
30
+ Parameters
31
+ ----------
32
+ adjacency:
33
+ Adjacency of the input graph.
34
+ normalized:
35
+ If ``True``, F is the diagonal in-degree matrix :math:`F = \\text{diag}(A^T1)`.
36
+ Otherwise, F is the identity matrix.
37
+ method:
38
+ Either ``'exact'`` or ``'knn'``. If 'exact' the output is computed with matrix multiplication.
39
+ However, the density can be much higher than in the input graph and this can trigger Memory errors.
40
+ If ``'knn'``, the co-neighborhood is approximated through KNNDense-search in an appropriate spectral embedding
41
+ space.
42
+ n_neighbors:
43
+ Number of neighbors for the KNNDense search. Only useful if ``method='knn'``.
44
+ n_components:
45
+ Dimension of the embedding space. Only useful if ``method='knn'``.
46
+
47
+ Returns
48
+ -------
49
+ adjacency : sparse.csr_matrix
50
+ Adjacency of the co-neighborhood.
51
+ """
52
+ adjacency = check_format(adjacency).astype(float)
53
+ method = method.lower()
54
+ if method == 'exact':
55
+ if normalized:
56
+ forward = normalize(adjacency.T).tocsr()
57
+ else:
58
+ forward = adjacency.T
59
+ return adjacency.dot(forward)
60
+
61
+ elif method == 'knn':
62
+ if normalized:
63
+ algo = GSVD(n_components, regularization=None)
64
+ else:
65
+ algo = SVD(n_components, regularization=None)
66
+ embedding = algo.fit_transform(adjacency)
67
+ knn = KNNDense(n_neighbors, undirected=True)
68
+ knn.fit(embedding)
69
+ return knn.adjacency_
70
+ else:
71
+ raise ValueError('method must be "exact" or "knn".')
@@ -0,0 +1,219 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in April 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+ from typing import Union, Tuple, Optional
8
+
9
+ import numpy as np
10
+ from scipy import sparse
11
+
12
+ from sknetwork.linalg.sparse_lowrank import SparseLR
13
+ from sknetwork.utils.check import check_format, is_square, is_symmetric
14
+ from sknetwork.utils.seeds import stack_seeds, get_seeds
15
+
16
+
17
+ def check_csr_or_slr(adjacency):
18
+ """Check if input is csr or SparseLR and raise an error otherwise."""
19
+ if type(adjacency) not in [sparse.csr_matrix, SparseLR]:
20
+ raise TypeError('Input must be a scipy CSR matrix or a SparseLR object.')
21
+
22
+
23
+ def directed2undirected(adjacency: Union[sparse.csr_matrix, SparseLR],
24
+ weighted: bool = True) -> Union[sparse.csr_matrix, SparseLR]:
25
+ """Adjacency matrix of the undirected graph associated with some directed graph.
26
+
27
+ The new adjacency matrix becomes either:
28
+
29
+ :math:`A+A^T` (default)
30
+
31
+ or
32
+
33
+ :math:`\\max(A,A^T)`
34
+
35
+ If the initial adjacency matrix :math:`A` is binary, bidirectional edges have weight 2
36
+ (first method, default) or 1 (second method).
37
+
38
+ Parameters
39
+ ----------
40
+ adjacency :
41
+ Adjacency matrix.
42
+ weighted :
43
+ If ``True``, return the sum of the weights in both directions of each edge.
44
+
45
+ Returns
46
+ -------
47
+ new_adjacency :
48
+ New adjacency matrix (same format as input).
49
+ """
50
+ check_csr_or_slr(adjacency)
51
+ if type(adjacency) == sparse.csr_matrix:
52
+ if weighted:
53
+ if adjacency.data.dtype == float:
54
+ data_type = float
55
+ else:
56
+ data_type = int
57
+ new_adjacency = adjacency.astype(data_type)
58
+ new_adjacency += adjacency.T
59
+ else:
60
+ new_adjacency = (adjacency + adjacency.T).astype(bool)
61
+ new_adjacency.tocsr().sort_indices()
62
+ return new_adjacency
63
+ else:
64
+ if weighted:
65
+ new_tuples = [(y, x) for (x, y) in adjacency.low_rank_tuples]
66
+ return SparseLR(directed2undirected(adjacency.sparse_mat), adjacency.low_rank_tuples + new_tuples)
67
+ else:
68
+ raise ValueError('This function only works with ``weighted=True`` for SparseLR objects.')
69
+
70
+
71
+ def bipartite2directed(biadjacency: Union[sparse.csr_matrix, SparseLR]) -> Union[sparse.csr_matrix, SparseLR]:
72
+ """Adjacency matrix of the directed graph associated with a bipartite graph
73
+ (with edges from one part to the other).
74
+
75
+ The returned adjacency matrix is:
76
+
77
+ :math:`A = \\begin{bmatrix} 0 & B \\\\ 0 & 0 \\end{bmatrix}`
78
+
79
+ where :math:`B` is the biadjacency matrix.
80
+
81
+ Parameters
82
+ ----------
83
+ biadjacency :
84
+ Biadjacency matrix of the graph.
85
+
86
+ Returns
87
+ -------
88
+ adjacency :
89
+ Adjacency matrix (same format as input).
90
+ """
91
+ check_csr_or_slr(biadjacency)
92
+ n_row, n_col = biadjacency.shape
93
+ if type(biadjacency) == sparse.csr_matrix:
94
+ adjacency = sparse.bmat([[None, biadjacency], [sparse.csr_matrix((n_col, n_row)), None]], format='csr')
95
+ adjacency.sort_indices()
96
+ return adjacency
97
+ else:
98
+ new_tuples = [(np.hstack((x, np.zeros(n_col))), np.hstack((np.zeros(n_row), y)))
99
+ for (x, y) in biadjacency.low_rank_tuples]
100
+ return SparseLR(bipartite2directed(biadjacency.sparse_mat), new_tuples)
101
+
102
+
103
+ def bipartite2undirected(biadjacency: Union[sparse.csr_matrix, SparseLR]) -> Union[sparse.csr_matrix, SparseLR]:
104
+ """Adjacency matrix of a bigraph defined by its biadjacency matrix.
105
+
106
+ The returned adjacency matrix is:
107
+
108
+ :math:`A = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}`
109
+
110
+ where :math:`B` is the biadjacency matrix of the bipartite graph.
111
+
112
+ Parameters
113
+ ----------
114
+ biadjacency:
115
+ Biadjacency matrix of the graph.
116
+
117
+ Returns
118
+ -------
119
+ adjacency :
120
+ Adjacency matrix (same format as input).
121
+ """
122
+ check_csr_or_slr(biadjacency)
123
+ if type(biadjacency) == sparse.csr_matrix:
124
+ adjacency = sparse.bmat([[None, biadjacency], [biadjacency.T, None]], format='csr')
125
+ adjacency.sort_indices()
126
+ return adjacency
127
+ else:
128
+ n_row, n_col = biadjacency.shape
129
+ new_tuples = []
130
+ for (x, y) in biadjacency.low_rank_tuples:
131
+ new_tuples.append((np.hstack((x, np.zeros(n_col))), np.hstack((np.zeros(n_row), y))))
132
+ new_tuples.append((np.hstack((np.zeros(n_row), y)), np.hstack((x, np.zeros(n_col)))))
133
+ return SparseLR(bipartite2undirected(biadjacency.sparse_mat), new_tuples)
134
+
135
+
136
+ def get_adjacency(input_matrix: Union[sparse.csr_matrix, np.ndarray], allow_directed: bool = True,
137
+ force_bipartite: bool = False, force_directed: bool = False)\
138
+ -> Tuple[sparse.csr_matrix, bool]:
139
+ """Check the input matrix and return a proper adjacency matrix.
140
+ Parameters
141
+ ----------
142
+ input_matrix :
143
+ Adjacency matrix of biadjacency matrix of the graph.
144
+ allow_directed :
145
+ If ``True`` (default), allow the graph to be directed.
146
+ force_bipartite : bool
147
+ If ``True``, return the adjacency matrix of a bipartite graph.
148
+ Otherwise (default), do it only if the input matrix is not square or not symmetric
149
+ with ``allow_directed=False``.
150
+ force_directed :
151
+ If ``True`` return :math:`A = \\begin{bmatrix} 0 & B \\\\ 0 & 0 \\end{bmatrix}`.
152
+ Otherwise (default), return :math:`A = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}`.
153
+ """
154
+ input_matrix = check_format(input_matrix)
155
+ bipartite = False
156
+ if force_bipartite or not is_square(input_matrix) or not (allow_directed or is_symmetric(input_matrix)):
157
+ bipartite = True
158
+ if bipartite:
159
+ if force_directed:
160
+ adjacency = bipartite2directed(input_matrix)
161
+ else:
162
+ adjacency = bipartite2undirected(input_matrix)
163
+ else:
164
+ adjacency = input_matrix
165
+ return adjacency, bipartite
166
+
167
+
168
+ def get_adjacency_seeds(input_matrix: Union[sparse.csr_matrix, np.ndarray], allow_directed: bool = True,
169
+ force_bipartite: bool = False, force_directed: bool = False,
170
+ seeds: Optional[Union[dict, np.ndarray]] = None,
171
+ seeds_row: Optional[Union[dict, np.ndarray]] = None,
172
+ seeds_col: Optional[Union[dict, np.ndarray]] = None,
173
+ default_value: float = -1,
174
+ which: Optional[str] = None) \
175
+ -> Tuple[sparse.csr_matrix, np.ndarray, bool]:
176
+ """Check the input matrix and return a proper adjacency matrix with seeds.
177
+ Parameters
178
+ ----------
179
+ input_matrix :
180
+ Adjacency matrix of biadjacency matrix of the graph.
181
+ allow_directed :
182
+ If ``True`` (default), allow the graph to be directed.
183
+ force_bipartite : bool
184
+ If ``True``, return the adjacency matrix of a bipartite graph.
185
+ Otherwise (default), do it only if the input matrix is not square or not symmetric
186
+ with ``allow_directed=False``.
187
+ force_directed :
188
+ If ``True`` return :math:`A = \\begin{bmatrix} 0 & B \\\\ 0 & 0 \\end{bmatrix}`.
189
+ Otherwise (default), return :math:`A = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}`.
190
+ seeds :
191
+ Values of seed nodes in initial state (dictionary or vector). Negative values ignored.
192
+ seeds_row, seeds_col :
193
+ Values of rows and columns for bipartite graphs. Negative values ignored.
194
+ default_value :
195
+ Value of non-seed nodes (default = -1).
196
+ which :
197
+ Which seed values.
198
+ If ``'probs'``, return a probability distribution.
199
+ If ``'labels'``, return distinct integer values if all are equal.
200
+ """
201
+ input_matrix = check_format(input_matrix)
202
+ if seeds_row is not None or seeds_col is not None:
203
+ force_bipartite = True
204
+ adjacency, bipartite = get_adjacency(input_matrix, allow_directed=allow_directed,
205
+ force_bipartite=force_bipartite, force_directed=force_directed)
206
+ if bipartite:
207
+ if seeds is None:
208
+ seeds = stack_seeds(input_matrix.shape, seeds_row, seeds_col, default_value=default_value)
209
+ else:
210
+ seeds = stack_seeds(input_matrix.shape, seeds, default_value=default_value)
211
+ else:
212
+ seeds = get_seeds(input_matrix.shape, seeds, default_value=default_value)
213
+ if which == 'probs':
214
+ if seeds.sum() > 0:
215
+ seeds /= seeds.sum()
216
+ elif which == 'labels':
217
+ if len(set(seeds[seeds >= 0])) == 1:
218
+ seeds = np.arange(len(seeds))
219
+ return adjacency, seeds, bipartite