scikit-network 0.28.3__cp39-cp39-macosx_12_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (240) hide show
  1. scikit_network-0.28.3.dist-info/AUTHORS.rst +41 -0
  2. scikit_network-0.28.3.dist-info/LICENSE +34 -0
  3. scikit_network-0.28.3.dist-info/METADATA +457 -0
  4. scikit_network-0.28.3.dist-info/RECORD +240 -0
  5. scikit_network-0.28.3.dist-info/WHEEL +5 -0
  6. scikit_network-0.28.3.dist-info/top_level.txt +1 -0
  7. sknetwork/__init__.py +21 -0
  8. sknetwork/classification/__init__.py +8 -0
  9. sknetwork/classification/base.py +84 -0
  10. sknetwork/classification/base_rank.py +143 -0
  11. sknetwork/classification/diffusion.py +134 -0
  12. sknetwork/classification/knn.py +162 -0
  13. sknetwork/classification/metrics.py +205 -0
  14. sknetwork/classification/pagerank.py +66 -0
  15. sknetwork/classification/propagation.py +152 -0
  16. sknetwork/classification/tests/__init__.py +1 -0
  17. sknetwork/classification/tests/test_API.py +35 -0
  18. sknetwork/classification/tests/test_diffusion.py +37 -0
  19. sknetwork/classification/tests/test_knn.py +24 -0
  20. sknetwork/classification/tests/test_metrics.py +53 -0
  21. sknetwork/classification/tests/test_pagerank.py +20 -0
  22. sknetwork/classification/tests/test_propagation.py +24 -0
  23. sknetwork/classification/vote.cpython-39-darwin.so +0 -0
  24. sknetwork/classification/vote.pyx +58 -0
  25. sknetwork/clustering/__init__.py +7 -0
  26. sknetwork/clustering/base.py +102 -0
  27. sknetwork/clustering/kmeans.py +142 -0
  28. sknetwork/clustering/louvain.py +255 -0
  29. sknetwork/clustering/louvain_core.cpython-39-darwin.so +0 -0
  30. sknetwork/clustering/louvain_core.pyx +134 -0
  31. sknetwork/clustering/metrics.py +91 -0
  32. sknetwork/clustering/postprocess.py +66 -0
  33. sknetwork/clustering/propagation_clustering.py +108 -0
  34. sknetwork/clustering/tests/__init__.py +1 -0
  35. sknetwork/clustering/tests/test_API.py +37 -0
  36. sknetwork/clustering/tests/test_kmeans.py +47 -0
  37. sknetwork/clustering/tests/test_louvain.py +104 -0
  38. sknetwork/clustering/tests/test_metrics.py +50 -0
  39. sknetwork/clustering/tests/test_post_processing.py +23 -0
  40. sknetwork/clustering/tests/test_postprocess.py +39 -0
  41. sknetwork/data/__init__.py +5 -0
  42. sknetwork/data/load.py +408 -0
  43. sknetwork/data/models.py +459 -0
  44. sknetwork/data/parse.py +621 -0
  45. sknetwork/data/test_graphs.py +84 -0
  46. sknetwork/data/tests/__init__.py +1 -0
  47. sknetwork/data/tests/test_API.py +30 -0
  48. sknetwork/data/tests/test_load.py +95 -0
  49. sknetwork/data/tests/test_models.py +52 -0
  50. sknetwork/data/tests/test_parse.py +253 -0
  51. sknetwork/data/tests/test_test_graphs.py +30 -0
  52. sknetwork/data/tests/test_toy_graphs.py +68 -0
  53. sknetwork/data/toy_graphs.py +619 -0
  54. sknetwork/embedding/__init__.py +10 -0
  55. sknetwork/embedding/base.py +90 -0
  56. sknetwork/embedding/force_atlas.py +197 -0
  57. sknetwork/embedding/louvain_embedding.py +174 -0
  58. sknetwork/embedding/louvain_hierarchy.py +142 -0
  59. sknetwork/embedding/metrics.py +66 -0
  60. sknetwork/embedding/random_projection.py +133 -0
  61. sknetwork/embedding/spectral.py +214 -0
  62. sknetwork/embedding/spring.py +198 -0
  63. sknetwork/embedding/svd.py +363 -0
  64. sknetwork/embedding/tests/__init__.py +1 -0
  65. sknetwork/embedding/tests/test_API.py +73 -0
  66. sknetwork/embedding/tests/test_force_atlas.py +35 -0
  67. sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
  68. sknetwork/embedding/tests/test_louvain_hierarchy.py +19 -0
  69. sknetwork/embedding/tests/test_metrics.py +29 -0
  70. sknetwork/embedding/tests/test_random_projection.py +28 -0
  71. sknetwork/embedding/tests/test_spectral.py +84 -0
  72. sknetwork/embedding/tests/test_spring.py +50 -0
  73. sknetwork/embedding/tests/test_svd.py +37 -0
  74. sknetwork/flow/__init__.py +3 -0
  75. sknetwork/flow/flow.py +73 -0
  76. sknetwork/flow/tests/__init__.py +1 -0
  77. sknetwork/flow/tests/test_flow.py +17 -0
  78. sknetwork/flow/tests/test_utils.py +69 -0
  79. sknetwork/flow/utils.py +91 -0
  80. sknetwork/gnn/__init__.py +10 -0
  81. sknetwork/gnn/activation.py +117 -0
  82. sknetwork/gnn/base.py +155 -0
  83. sknetwork/gnn/base_activation.py +89 -0
  84. sknetwork/gnn/base_layer.py +109 -0
  85. sknetwork/gnn/gnn_classifier.py +381 -0
  86. sknetwork/gnn/layer.py +153 -0
  87. sknetwork/gnn/layers.py +127 -0
  88. sknetwork/gnn/loss.py +180 -0
  89. sknetwork/gnn/neighbor_sampler.py +65 -0
  90. sknetwork/gnn/optimizer.py +163 -0
  91. sknetwork/gnn/tests/__init__.py +1 -0
  92. sknetwork/gnn/tests/test_activation.py +56 -0
  93. sknetwork/gnn/tests/test_base.py +79 -0
  94. sknetwork/gnn/tests/test_base_layer.py +37 -0
  95. sknetwork/gnn/tests/test_gnn_classifier.py +192 -0
  96. sknetwork/gnn/tests/test_layers.py +80 -0
  97. sknetwork/gnn/tests/test_loss.py +33 -0
  98. sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
  99. sknetwork/gnn/tests/test_optimizer.py +43 -0
  100. sknetwork/gnn/tests/test_utils.py +93 -0
  101. sknetwork/gnn/utils.py +219 -0
  102. sknetwork/hierarchy/__init__.py +7 -0
  103. sknetwork/hierarchy/base.py +69 -0
  104. sknetwork/hierarchy/louvain_hierarchy.py +264 -0
  105. sknetwork/hierarchy/metrics.py +234 -0
  106. sknetwork/hierarchy/paris.cpython-39-darwin.so +0 -0
  107. sknetwork/hierarchy/paris.pyx +317 -0
  108. sknetwork/hierarchy/postprocess.py +350 -0
  109. sknetwork/hierarchy/tests/__init__.py +1 -0
  110. sknetwork/hierarchy/tests/test_API.py +25 -0
  111. sknetwork/hierarchy/tests/test_algos.py +29 -0
  112. sknetwork/hierarchy/tests/test_metrics.py +62 -0
  113. sknetwork/hierarchy/tests/test_postprocess.py +57 -0
  114. sknetwork/hierarchy/tests/test_ward.py +25 -0
  115. sknetwork/hierarchy/ward.py +94 -0
  116. sknetwork/linalg/__init__.py +9 -0
  117. sknetwork/linalg/basics.py +37 -0
  118. sknetwork/linalg/diteration.cpython-39-darwin.so +0 -0
  119. sknetwork/linalg/diteration.pyx +49 -0
  120. sknetwork/linalg/eig_solver.py +93 -0
  121. sknetwork/linalg/laplacian.py +15 -0
  122. sknetwork/linalg/normalization.py +66 -0
  123. sknetwork/linalg/operators.py +225 -0
  124. sknetwork/linalg/polynome.py +76 -0
  125. sknetwork/linalg/ppr_solver.py +170 -0
  126. sknetwork/linalg/push.cpython-39-darwin.so +0 -0
  127. sknetwork/linalg/push.pyx +73 -0
  128. sknetwork/linalg/sparse_lowrank.py +142 -0
  129. sknetwork/linalg/svd_solver.py +91 -0
  130. sknetwork/linalg/tests/__init__.py +1 -0
  131. sknetwork/linalg/tests/test_eig.py +44 -0
  132. sknetwork/linalg/tests/test_laplacian.py +18 -0
  133. sknetwork/linalg/tests/test_normalization.py +38 -0
  134. sknetwork/linalg/tests/test_operators.py +70 -0
  135. sknetwork/linalg/tests/test_polynome.py +38 -0
  136. sknetwork/linalg/tests/test_ppr.py +50 -0
  137. sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
  138. sknetwork/linalg/tests/test_svd.py +38 -0
  139. sknetwork/linkpred/__init__.py +4 -0
  140. sknetwork/linkpred/base.py +80 -0
  141. sknetwork/linkpred/first_order.py +508 -0
  142. sknetwork/linkpred/first_order_core.cpython-39-darwin.so +0 -0
  143. sknetwork/linkpred/first_order_core.pyx +315 -0
  144. sknetwork/linkpred/postprocessing.py +98 -0
  145. sknetwork/linkpred/tests/__init__.py +1 -0
  146. sknetwork/linkpred/tests/test_API.py +49 -0
  147. sknetwork/linkpred/tests/test_postprocessing.py +21 -0
  148. sknetwork/path/__init__.py +4 -0
  149. sknetwork/path/metrics.py +148 -0
  150. sknetwork/path/search.py +65 -0
  151. sknetwork/path/shortest_path.py +186 -0
  152. sknetwork/path/tests/__init__.py +1 -0
  153. sknetwork/path/tests/test_metrics.py +29 -0
  154. sknetwork/path/tests/test_search.py +25 -0
  155. sknetwork/path/tests/test_shortest_path.py +45 -0
  156. sknetwork/ranking/__init__.py +9 -0
  157. sknetwork/ranking/base.py +56 -0
  158. sknetwork/ranking/betweenness.cpython-39-darwin.so +0 -0
  159. sknetwork/ranking/betweenness.pyx +99 -0
  160. sknetwork/ranking/closeness.py +95 -0
  161. sknetwork/ranking/harmonic.py +82 -0
  162. sknetwork/ranking/hits.py +94 -0
  163. sknetwork/ranking/katz.py +81 -0
  164. sknetwork/ranking/pagerank.py +107 -0
  165. sknetwork/ranking/postprocess.py +25 -0
  166. sknetwork/ranking/tests/__init__.py +1 -0
  167. sknetwork/ranking/tests/test_API.py +34 -0
  168. sknetwork/ranking/tests/test_betweenness.py +38 -0
  169. sknetwork/ranking/tests/test_closeness.py +34 -0
  170. sknetwork/ranking/tests/test_hits.py +20 -0
  171. sknetwork/ranking/tests/test_pagerank.py +69 -0
  172. sknetwork/regression/__init__.py +4 -0
  173. sknetwork/regression/base.py +56 -0
  174. sknetwork/regression/diffusion.py +190 -0
  175. sknetwork/regression/tests/__init__.py +1 -0
  176. sknetwork/regression/tests/test_API.py +34 -0
  177. sknetwork/regression/tests/test_diffusion.py +48 -0
  178. sknetwork/sknetwork.py +3 -0
  179. sknetwork/topology/__init__.py +9 -0
  180. sknetwork/topology/dag.py +74 -0
  181. sknetwork/topology/dag_core.cpython-39-darwin.so +0 -0
  182. sknetwork/topology/dag_core.pyx +38 -0
  183. sknetwork/topology/kcliques.cpython-39-darwin.so +0 -0
  184. sknetwork/topology/kcliques.pyx +193 -0
  185. sknetwork/topology/kcore.cpython-39-darwin.so +0 -0
  186. sknetwork/topology/kcore.pyx +120 -0
  187. sknetwork/topology/structure.py +234 -0
  188. sknetwork/topology/tests/__init__.py +1 -0
  189. sknetwork/topology/tests/test_cliques.py +28 -0
  190. sknetwork/topology/tests/test_cores.py +21 -0
  191. sknetwork/topology/tests/test_dag.py +26 -0
  192. sknetwork/topology/tests/test_structure.py +99 -0
  193. sknetwork/topology/tests/test_triangles.py +42 -0
  194. sknetwork/topology/tests/test_wl_coloring.py +49 -0
  195. sknetwork/topology/tests/test_wl_kernel.py +31 -0
  196. sknetwork/topology/triangles.cpython-39-darwin.so +0 -0
  197. sknetwork/topology/triangles.pyx +166 -0
  198. sknetwork/topology/weisfeiler_lehman.py +163 -0
  199. sknetwork/topology/weisfeiler_lehman_core.cpython-39-darwin.so +0 -0
  200. sknetwork/topology/weisfeiler_lehman_core.pyx +116 -0
  201. sknetwork/utils/__init__.py +40 -0
  202. sknetwork/utils/base.py +35 -0
  203. sknetwork/utils/check.py +354 -0
  204. sknetwork/utils/co_neighbor.py +71 -0
  205. sknetwork/utils/format.py +219 -0
  206. sknetwork/utils/kmeans.py +89 -0
  207. sknetwork/utils/knn.py +166 -0
  208. sknetwork/utils/knn1d.cpython-39-darwin.so +0 -0
  209. sknetwork/utils/knn1d.pyx +80 -0
  210. sknetwork/utils/membership.py +82 -0
  211. sknetwork/utils/minheap.cpython-39-darwin.so +0 -0
  212. sknetwork/utils/minheap.pxd +22 -0
  213. sknetwork/utils/minheap.pyx +111 -0
  214. sknetwork/utils/neighbors.py +115 -0
  215. sknetwork/utils/seeds.py +75 -0
  216. sknetwork/utils/simplex.py +140 -0
  217. sknetwork/utils/tests/__init__.py +1 -0
  218. sknetwork/utils/tests/test_base.py +28 -0
  219. sknetwork/utils/tests/test_bunch.py +16 -0
  220. sknetwork/utils/tests/test_check.py +190 -0
  221. sknetwork/utils/tests/test_co_neighbor.py +43 -0
  222. sknetwork/utils/tests/test_format.py +61 -0
  223. sknetwork/utils/tests/test_kmeans.py +21 -0
  224. sknetwork/utils/tests/test_knn.py +32 -0
  225. sknetwork/utils/tests/test_membership.py +24 -0
  226. sknetwork/utils/tests/test_neighbors.py +41 -0
  227. sknetwork/utils/tests/test_projection_simplex.py +33 -0
  228. sknetwork/utils/tests/test_seeds.py +67 -0
  229. sknetwork/utils/tests/test_verbose.py +15 -0
  230. sknetwork/utils/tests/test_ward.py +20 -0
  231. sknetwork/utils/timeout.py +38 -0
  232. sknetwork/utils/verbose.py +37 -0
  233. sknetwork/utils/ward.py +60 -0
  234. sknetwork/visualization/__init__.py +4 -0
  235. sknetwork/visualization/colors.py +34 -0
  236. sknetwork/visualization/dendrograms.py +229 -0
  237. sknetwork/visualization/graphs.py +819 -0
  238. sknetwork/visualization/tests/__init__.py +1 -0
  239. sknetwork/visualization/tests/test_dendrograms.py +53 -0
  240. sknetwork/visualization/tests/test_graphs.py +167 -0
@@ -0,0 +1,315 @@
1
+ # distutils: language = c++
2
+ # cython: language_level=3
3
+ # cython: linetrace=True
4
+ # distutils: define_macros=CYTHON_TRACE_NOGIL=1
5
+ """
6
+ Created on July, 2020
7
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
8
+ """
9
+ from libc.math cimport log, sqrt
10
+ from libcpp.vector cimport vector
11
+
12
+ ctypedef float (*int2float)(int)
13
+ ctypedef float (*vectors2float)(vector[int], vector[int])
14
+
15
+
16
+ cdef float inv(int a):
17
+ """Inverse function"""
18
+ return 1 / a
19
+
20
+
21
+ cdef float inv_log(int a):
22
+ """Inverse of log function"""
23
+ return 1 / log(a)
24
+
25
+
26
+ cdef vector[int] vector_intersection(vector[int] a, vector[int] b):
27
+ """Common elements in two sorted vectors. Each element is assumed unique in each vector."""
28
+ cdef vector[int] intersection
29
+ cdef int e_a, e_b
30
+ cdef int ix_a = 0
31
+ cdef int ix_b = 0
32
+ cdef int size_a = a.size()
33
+ cdef int size_b = b.size()
34
+
35
+ while ix_a < size_a and ix_b < size_b:
36
+ e_a = a[ix_a]
37
+ e_b = b[ix_b]
38
+
39
+ if e_a < e_b:
40
+ ix_a += 1
41
+ elif e_b < e_a:
42
+ ix_b += 1
43
+ else:
44
+ intersection.push_back(e_a)
45
+ ix_a += 1
46
+ ix_b += 1
47
+
48
+ return intersection
49
+
50
+
51
+ cdef float size_intersection(vector[int] a, vector[int] b):
52
+ """Size of the intersection of two vectors"""
53
+ return vector_intersection(a, b).size()
54
+
55
+
56
+ cdef float jaccard(vector[int] a, vector[int] b):
57
+ """Jaccard coefficient"""
58
+ cdef float size_inter = size_intersection(a, b)
59
+ cdef float size_union = a.size() + b.size() - size_inter
60
+ return size_inter / size_union
61
+
62
+
63
+ cdef float salton(vector[int] a, vector[int] b):
64
+ """Salton coefficient"""
65
+ cdef float size_inter = size_intersection(a, b)
66
+ return size_inter / sqrt(a.size() * b.size())
67
+
68
+
69
+ cdef float sorensen(vector[int] a, vector[int] b):
70
+ """Sorensen coefficient"""
71
+ cdef float size_inter = size_intersection(a, b)
72
+ return 2 * size_inter / (a.size() + b.size())
73
+
74
+
75
+ cdef float hub_promoted(vector[int] a, vector[int] b):
76
+ """Hub promoted coefficient"""
77
+ cdef float size_inter = size_intersection(a, b)
78
+ return size_inter / min(a.size(), b.size())
79
+
80
+
81
+ cdef float hub_depressed(vector[int] a, vector[int] b):
82
+ """Hub promoted coefficient"""
83
+ cdef float size_inter = size_intersection(a, b)
84
+ return size_inter / max(a.size(), b.size())
85
+
86
+
87
+ cdef vector[int] neighbors(int[:] indptr, int[:] indices, int node):
88
+ """Neighbors of a given node"""
89
+ cdef int j1 = indptr[node]
90
+ cdef int j2 = indptr[node + 1]
91
+ cdef int j
92
+ cdef vector[int] neigh = ()
93
+
94
+ for j in range(j1, j2):
95
+ neigh.push_back(indices[j])
96
+
97
+ return neigh
98
+
99
+
100
+ cdef vector[float] predict_node_core(int[:] indptr, int[:] indices, int source, int[:] targets,
101
+ vectors2float weight_func):
102
+ """Scores based on global information about common neighbors for a single source.
103
+
104
+ Parameters
105
+ ----------
106
+ indptr :
107
+ indptr array of the adjacency matrix
108
+ indices :
109
+ indices array of the adjacency matrix
110
+ source :
111
+ source index
112
+ targets :
113
+ array of target indices
114
+ weight_func :
115
+ scoring function to be used
116
+
117
+ Returns
118
+ -------
119
+ scores :
120
+ vector of node pair scores
121
+ """
122
+ cdef int target, i
123
+ cdef int n_targets = targets.shape[0]
124
+ cdef vector[float] preds
125
+
126
+ cdef vector[int] neigh_s = neighbors(indptr, indices, source)
127
+ cdef vector[int] neigh_t
128
+ for i in range(n_targets):
129
+ target = targets[i]
130
+ neigh_t = neighbors(indptr, indices, target)
131
+ preds.push_back(weight_func(neigh_s, neigh_t))
132
+
133
+ return preds
134
+
135
+
136
+ cdef vector[float] predict_edges_core(int[:] indptr, int[:] indices, int[:, :] edges,
137
+ vectors2float weight_func):
138
+ """Scores based on global information about common neighbors for a list of edges.
139
+
140
+ Parameters
141
+ ----------
142
+ indptr :
143
+ indptr array of the adjacency matrix
144
+ indices :
145
+ indices array of the adjacency matrix
146
+ edges:
147
+ array of node pairs to be scored
148
+ weight_func :
149
+ scoring function to be used
150
+
151
+ Returns
152
+ -------
153
+ scores :
154
+ vector of node pair scores
155
+ """
156
+
157
+ cdef vector[float] preds
158
+ cdef int source, target, i
159
+
160
+ cdef int n_edges = edges.shape[0]
161
+ for i in range(n_edges):
162
+ source, target = edges[i, 0], edges[i, 1]
163
+ neigh_s = neighbors(indptr, indices, source)
164
+ neigh_t = neighbors(indptr, indices, target)
165
+ preds.push_back(weight_func(neigh_s, neigh_t))
166
+
167
+ return preds
168
+
169
+ def common_neighbors_node_core(int[:] indptr, int[:] indices, int source, int[:] targets):
170
+ """Number of common neighbors"""
171
+ return predict_node_core(indptr, indices, source, targets, size_intersection)
172
+
173
+ def common_neighbors_edges_core(int[:] indptr, int[:] indices, int[:, :] edges):
174
+ """Number of common neighbors"""
175
+ return predict_edges_core(indptr, indices, edges, size_intersection)
176
+
177
+ def jaccard_node_core(int[:] indptr, int[:] indices, int source, int[:] targets):
178
+ """Jaccard coefficient of common neighbors"""
179
+ return predict_node_core(indptr, indices, source, targets, jaccard)
180
+
181
+ def jaccard_edges_core(int[:] indptr, int[:] indices, int[:, :] edges):
182
+ """Number of common neighbors"""
183
+ return predict_edges_core(indptr, indices, edges, jaccard)
184
+
185
+ def salton_node_core(int[:] indptr, int[:] indices, int source, int[:] targets):
186
+ """Salton coefficient of common neighbors"""
187
+ return predict_node_core(indptr, indices, source, targets, salton)
188
+
189
+ def salton_edges_core(int[:] indptr, int[:] indices, int[:, :] edges):
190
+ """Salton coefficient of common neighbors"""
191
+ return predict_edges_core(indptr, indices, edges, salton)
192
+
193
+ def sorensen_node_core(int[:] indptr, int[:] indices, int source, int[:] targets):
194
+ """Sorensen coefficient of common neighbors"""
195
+ return predict_node_core(indptr, indices, source, targets, sorensen)
196
+
197
+ def sorensen_edges_core(int[:] indptr, int[:] indices, int[:, :] edges):
198
+ """Sorensen coefficient of common neighbors"""
199
+ return predict_edges_core(indptr, indices, edges, sorensen)
200
+
201
+ def hub_promoted_node_core(int[:] indptr, int[:] indices, int source, int[:] targets):
202
+ """Hub promoted coefficient of common neighbors"""
203
+ return predict_node_core(indptr, indices, source, targets, hub_promoted)
204
+
205
+ def hub_promoted_edges_core(int[:] indptr, int[:] indices, int[:, :] edges):
206
+ """Hub promoted coefficient of common neighbors"""
207
+ return predict_edges_core(indptr, indices, edges, hub_promoted)
208
+
209
+ def hub_depressed_node_core(int[:] indptr, int[:] indices, int source, int[:] targets):
210
+ """Hub depressed coefficient of common neighbors"""
211
+ return predict_node_core(indptr, indices, source, targets, hub_depressed)
212
+
213
+ def hub_depressed_edges_core(int[:] indptr, int[:] indices, int[:, :] edges):
214
+ """Hub depressed coefficient of common neighbors"""
215
+ return predict_edges_core(indptr, indices, edges, hub_depressed)
216
+
217
+ cdef vector[float] predict_node_weighted_core(int[:] indptr, int[:] indices, int source, int[:] targets,
218
+ int2float weight_func):
219
+ """Scores based on the degrees of common neighbors for a single source.
220
+
221
+ Parameters
222
+ ----------
223
+ indptr :
224
+ indptr array of the adjacency matrix
225
+ indices :
226
+ indices array of the adjacency matrix
227
+ source :
228
+ source index
229
+ targets :
230
+ array of target indices
231
+ weight_func :
232
+ scoring function to be used
233
+
234
+ Returns
235
+ -------
236
+ scores :
237
+ vector of node pair scores
238
+ """
239
+ cdef int target, i, j
240
+ cdef int n_targets = targets.shape[0]
241
+ cdef float weight
242
+ cdef vector[int] intersection
243
+ cdef vector[float] preds
244
+
245
+ cdef vector[int] neigh_s = neighbors(indptr, indices, source)
246
+ cdef vector[int] neigh_t
247
+ for i in range(n_targets):
248
+ target = targets[i]
249
+ neigh_t = neighbors(indptr, indices, target)
250
+ intersection = vector_intersection(neigh_s, neigh_t)
251
+
252
+ weight = 0
253
+ for j in intersection:
254
+ weight += weight_func(indptr[j+1] - indptr[j])
255
+ preds.push_back(weight)
256
+
257
+ return preds
258
+
259
+
260
+ cdef vector[float] predict_edges_weighted_core(int[:] indptr, int[:] indices, int[:, :] edges,
261
+ int2float weight_func):
262
+ """Scores based on the degrees of common neighbors for a list of edges.
263
+
264
+ Parameters
265
+ ----------
266
+ indptr :
267
+ indptr array of the adjacency matrix
268
+ indices :
269
+ indices array of the adjacency matrix
270
+ edges:
271
+ array of node pairs to be scored
272
+ weight_func :
273
+ scoring function to be used
274
+
275
+ Returns
276
+ -------
277
+ scores :
278
+ vector of node pair scores
279
+ """
280
+ cdef vector[float] preds
281
+ cdef int source, target, i
282
+ cdef float weight
283
+ cdef vector[int] intersection
284
+
285
+ cdef int n_edges = edges.shape[0]
286
+ for i in range(n_edges):
287
+ source, target = edges[i][0], edges[i][1]
288
+ neigh_s = neighbors(indptr, indices, source)
289
+ neigh_t = neighbors(indptr, indices, target)
290
+
291
+ intersection = vector_intersection(neigh_s, neigh_t)
292
+
293
+ weight = 0
294
+ for j in intersection:
295
+ weight += weight_func(indptr[j+1] - indptr[j])
296
+ preds.push_back(weight)
297
+
298
+ return preds
299
+
300
+
301
+ def adamic_adar_node_core(int[:] indptr, int[:] indices, int source, int[:] targets):
302
+ """Adamic Adar index"""
303
+ return predict_node_weighted_core(indptr, indices, source, targets, inv_log)
304
+
305
+ def adamic_adar_edges_core(int[:] indptr, int[:] indices, int[:, :] edges):
306
+ """Adamic Adar index"""
307
+ return predict_edges_weighted_core(indptr, indices, edges, inv_log)
308
+
309
+ def resource_allocation_node_core(int[:] indptr, int[:] indices, int source, int[:] targets):
310
+ """Resource Allocation index"""
311
+ return predict_node_weighted_core(indptr, indices, source, targets, inv)
312
+
313
+ def resource_allocation_edges_core(int[:] indptr, int[:] indices, int[:, :] edges):
314
+ """Resource Allocation index"""
315
+ return predict_edges_weighted_core(indptr, indices, edges, inv)
@@ -0,0 +1,98 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on July, 2020
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+ from typing import Union, Iterable, Tuple
8
+
9
+ import numpy as np
10
+ from scipy import sparse
11
+
12
+
13
+ def is_edge(adjacency: sparse.csr_matrix, query: Union[int, Iterable, Tuple]) -> Union[bool, np.ndarray]:
14
+ """Given a query, return whether each edge is actually in the adjacency.
15
+
16
+ Parameters
17
+ ----------
18
+ adjacency :
19
+ Adjacency matrix of the graph.
20
+ query : int, Iterable or Tuple
21
+ * If int i, queries (i, j) for all j.
22
+ * If Iterable of integers, return queries (i, j) for i in query, for all j.
23
+ * If tuple (i, j), queries (i, j).
24
+ * If list of tuples or array of shape (n_queries, 2), queries (i, j) in for each line in query.
25
+
26
+ Returns
27
+ -------
28
+ labels_true : Union[bool, np.ndarray]
29
+ For each element in the query, returns ``True`` if the edge exists in the adjacency and ``False`` otherwise.
30
+
31
+ Examples
32
+ --------
33
+ >>> from sknetwork.data import house
34
+ >>> adjacency = house()
35
+ >>> is_edge(adjacency, 0)
36
+ array([False, True, False, False, True])
37
+ >>> is_edge(adjacency, [0, 1])
38
+ array([[False, True, False, False, True],
39
+ [ True, False, True, False, True]])
40
+ >>> is_edge(adjacency, (0, 1))
41
+ True
42
+ >>> is_edge(adjacency, [(0, 1), (0, 2)])
43
+ array([ True, False])
44
+ """
45
+ if np.issubdtype(type(query), np.integer):
46
+ return adjacency[query].toarray().astype(bool).ravel()
47
+ if isinstance(query, Tuple):
48
+ source, target = query
49
+ neighbors = adjacency.indices[adjacency.indptr[source]:adjacency.indptr[source + 1]]
50
+ return bool(np.isin(target, neighbors, assume_unique=True))
51
+ if isinstance(query, list):
52
+ query = np.array(query)
53
+ if isinstance(query, np.ndarray):
54
+ if query.ndim == 1:
55
+ return adjacency[query].toarray().astype(bool)
56
+ elif query.ndim == 2 and query.shape[1] == 2:
57
+ y_true = []
58
+ for edge in query:
59
+ y_true.append(is_edge(adjacency, (edge[0], edge[1])))
60
+ return np.array(y_true)
61
+ else:
62
+ raise ValueError("Query not understood.")
63
+ else:
64
+ raise ValueError("Query not understood.")
65
+
66
+
67
+ def whitened_sigmoid(scores: np.ndarray):
68
+ """Map the entries of a score array to probabilities through
69
+
70
+ :math:`\\dfrac{1}{1 + \\exp(-(x - \\mu)/\\sigma)}`,
71
+
72
+ where :math:`\\mu` and :math:`\\sigma` are respectively the mean and standard deviation of x.
73
+
74
+ Parameters
75
+ ----------
76
+ scores : np.ndarray
77
+ The input array
78
+
79
+ Returns
80
+ -------
81
+ probas : np.ndarray
82
+ Array with entries between 0 and 1.
83
+
84
+ Examples
85
+ --------
86
+ >>> probas = whitened_sigmoid(np.array([1, 5, 0.25]))
87
+ >>> probas.round(2)
88
+ array([0.37, 0.8 , 0.29])
89
+ >>> probas = whitened_sigmoid(np.array([2, 2, 2]))
90
+ >>> probas
91
+ array([1, 1, 1])
92
+ """
93
+ mu = scores.mean()
94
+ sigma = scores.std()
95
+ if sigma > 0:
96
+ return 1 / (1 + np.exp(-(scores - mu) / sigma))
97
+ else:
98
+ return np.ones_like(scores)
@@ -0,0 +1 @@
1
+ """tests for link prediction"""
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """tests for link prediction API"""
4
+ import unittest
5
+ from abc import ABC
6
+ from typing import Iterable
7
+
8
+ import numpy as np
9
+
10
+ from sknetwork.data import house
11
+ from sknetwork.linkpred.base import BaseLinkPred
12
+
13
+
14
+ class Dummy(BaseLinkPred, ABC):
15
+ """Dummy algorithm for testing purposes"""
16
+ def __init__(self):
17
+ super(Dummy, self).__init__()
18
+
19
+ def fit(self, *args, **kwargs):
20
+ """Dummy fit method"""
21
+ return self
22
+
23
+ def _predict_base(self, source: int, targets: Iterable):
24
+ return np.array([1 for _ in targets])
25
+
26
+
27
+ class TestLinkPred(unittest.TestCase):
28
+
29
+ def test_base(self):
30
+ algo = BaseLinkPred()
31
+ query = 0
32
+ with self.assertRaises(NotImplementedError):
33
+ algo.predict(query)
34
+ query = (0, 1)
35
+ with self.assertRaises(NotImplementedError):
36
+ algo.predict(query)
37
+
38
+ def test_query(self):
39
+ adjacency = house()
40
+ algo = Dummy().fit(adjacency)
41
+ query = 'toto'
42
+ with self.assertRaises(ValueError):
43
+ algo.predict(query)
44
+ query = np.ones((2, 3))
45
+ with self.assertRaises(ValueError):
46
+ algo.predict(query)
47
+ query = [(0, 1), (2, 3)]
48
+ pred = algo.predict(query)
49
+ self.assertEqual(pred.shape, (len(query),))
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """tests for link prediction postprocessing"""
4
+ import unittest
5
+
6
+ import numpy as np
7
+
8
+ from sknetwork.data import house
9
+ from sknetwork.linkpred import is_edge
10
+
11
+
12
+ class TestLinkPredPostProcessing(unittest.TestCase):
13
+
14
+ def test_signature(self):
15
+ adjacency = house()
16
+
17
+ with self.assertRaises(ValueError):
18
+ is_edge(adjacency, 'toto')
19
+
20
+ with self.assertRaises(ValueError):
21
+ is_edge(adjacency, np.ones(8).reshape((2, 2, 2)))
@@ -0,0 +1,4 @@
1
+ """Path module"""
2
+ from sknetwork.path.metrics import get_diameter, get_eccentricity, get_radius
3
+ from sknetwork.path.search import breadth_first_search, depth_first_search
4
+ from sknetwork.path.shortest_path import get_distances, get_shortest_path
@@ -0,0 +1,148 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on May, 2020
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+ from typing import Union, Optional
8
+
9
+ import numpy as np
10
+ from scipy import sparse
11
+
12
+ from sknetwork.path.shortest_path import get_distances
13
+
14
+
15
+ def get_diameter(adjacency: Union[sparse.csr_matrix, np.ndarray],
16
+ n_sources: Optional[Union[int, float]] = None,
17
+ unweighted: bool = False, n_jobs: Optional[int] = None) -> int:
18
+ """Lower bound on the diameter of a graph which is the length of the longest
19
+ shortest path between two nodes.
20
+
21
+
22
+ Parameters
23
+ ----------
24
+ adjacency :
25
+ Adjacency matrix of the graph.
26
+ n_sources :
27
+ Number of node sources to use for approximation.
28
+
29
+ * If None, compute exact diameter.
30
+ * If int, sample n_sample source nodes at random.
31
+ * If float, sample (n_samples * n) source nodes at random.
32
+ unweighted:
33
+ Whether or not the graph is unweighted.
34
+ n_jobs :
35
+ If an integer value is given, denotes the number of workers to use (-1
36
+ means the maximum number will be used).
37
+ If ``None``, no parallel computations are made.
38
+
39
+ Returns
40
+ -------
41
+ diameter : int
42
+
43
+ Examples
44
+ --------
45
+ >>> from sknetwork.data import house
46
+ >>> adjacency = house()
47
+ >>> d_exact = get_diameter(adjacency)
48
+ >>> d_exact
49
+ 2
50
+ >>> d_approx = get_diameter(adjacency, 2)
51
+ >>> d_approx <= d_exact
52
+ True
53
+ >>> d_approx = get_diameter(adjacency, 0.5)
54
+ >>> d_approx <= d_exact
55
+ True
56
+
57
+ Notes
58
+ -----
59
+ This is a basic implementation that computes distances between nodes and
60
+ returns the maximum.
61
+ """
62
+ n = adjacency.shape[0]
63
+ if n_sources is None or n_sources == n:
64
+ sources = np.arange(n)
65
+ else:
66
+ if np.issubdtype(type(n_sources), np.floating) and n_sources < 1.:
67
+ n_sources = int(n_sources * n)
68
+ if np.issubdtype(type(n_sources), np.integer) and n_sources <= n:
69
+ sources = np.random.choice(n, n_sources, replace=False)
70
+ else:
71
+ raise ValueError("n_sources must be either None, an integer smaller"
72
+ "than the number of nodes or a float"
73
+ "smaller than 1.")
74
+ dists = get_distances(adjacency, sources, method='D', return_predecessors=False,
75
+ unweighted=unweighted, n_jobs=n_jobs).astype(int)
76
+ return dists.max()
77
+
78
+
79
+ def get_radius(adjacency: Union[sparse.csr_matrix, np.ndarray],
80
+ n_sources: Optional[Union[int, float]] = None,
81
+ unweighted: bool = False, n_jobs: Optional[int] = None) -> int:
82
+ """Computes the radius of the graph which. The radius of the graph is the
83
+ minimum eccentricity of the graph.
84
+
85
+ Parameters
86
+ ----------
87
+ adjacency :
88
+ Adjacency matrix of the graph.
89
+ n_sources :
90
+ Number of node sources to use for approximation.
91
+
92
+ * If None, compute exact diameter.
93
+ * If int, sample n_sample source nodes at random.
94
+ * If float, sample (n_samples * n) source nodes at random.
95
+ unweighted:
96
+ Whether the graph is unweighted.
97
+ n_jobs :
98
+ If an integer value is given, denotes the number of workers to use (-1
99
+ means the maximum number will be used).
100
+ If ``None``, no parallel computations are made.
101
+
102
+ Returns
103
+ -------
104
+ radius : int
105
+
106
+ Notes
107
+ -----
108
+ This is a basic implementation that computes distances between nodes and
109
+ returns the maximum.
110
+ """
111
+
112
+ # Get the nodes.
113
+ dists = get_distances(adjacency, sources=n_sources, method='D',
114
+ return_predecessors=False,
115
+ unweighted=unweighted, n_jobs=n_jobs).astype(int)
116
+ # Get the eccentricities of each node.
117
+ eccentricities = dists.max(axis=1)
118
+
119
+ return eccentricities.min()
120
+
121
+
122
+ def get_eccentricity(adjacency: Union[sparse.csr_matrix, np.ndarray], node: int,
123
+ unweighted: bool = False,
124
+ n_jobs: Optional[int] = None) -> int:
125
+ """Computes the eccentricity of a node. The eccentricity of a node, u, is the
126
+ maximum length of the shortest paths from u to the other nodes in the graph.
127
+
128
+ Parameters
129
+ ----------
130
+ adjacency :
131
+ Adjacency matrix of the graph.
132
+ node:
133
+ The node to compute the eccentricity for.
134
+ unweighted:
135
+ Whether or not the graph is unweighted.
136
+ n_jobs :
137
+ If an integer value is given, denotes the number of workers to use (-1
138
+ means the maximum number will be used).
139
+ If ``None``, no parallel computations are made.
140
+
141
+ Returns
142
+ -------
143
+ eccentricity : int
144
+ """
145
+
146
+ dists = get_distances(adjacency, node, method='D', return_predecessors=False,
147
+ unweighted=unweighted, n_jobs=n_jobs).astype(int)
148
+ return dists.max()