scikit-network 0.28.3__cp39-cp39-macosx_12_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- scikit_network-0.28.3.dist-info/AUTHORS.rst +41 -0
- scikit_network-0.28.3.dist-info/LICENSE +34 -0
- scikit_network-0.28.3.dist-info/METADATA +457 -0
- scikit_network-0.28.3.dist-info/RECORD +240 -0
- scikit_network-0.28.3.dist-info/WHEEL +5 -0
- scikit_network-0.28.3.dist-info/top_level.txt +1 -0
- sknetwork/__init__.py +21 -0
- sknetwork/classification/__init__.py +8 -0
- sknetwork/classification/base.py +84 -0
- sknetwork/classification/base_rank.py +143 -0
- sknetwork/classification/diffusion.py +134 -0
- sknetwork/classification/knn.py +162 -0
- sknetwork/classification/metrics.py +205 -0
- sknetwork/classification/pagerank.py +66 -0
- sknetwork/classification/propagation.py +152 -0
- sknetwork/classification/tests/__init__.py +1 -0
- sknetwork/classification/tests/test_API.py +35 -0
- sknetwork/classification/tests/test_diffusion.py +37 -0
- sknetwork/classification/tests/test_knn.py +24 -0
- sknetwork/classification/tests/test_metrics.py +53 -0
- sknetwork/classification/tests/test_pagerank.py +20 -0
- sknetwork/classification/tests/test_propagation.py +24 -0
- sknetwork/classification/vote.cpython-39-darwin.so +0 -0
- sknetwork/classification/vote.pyx +58 -0
- sknetwork/clustering/__init__.py +7 -0
- sknetwork/clustering/base.py +102 -0
- sknetwork/clustering/kmeans.py +142 -0
- sknetwork/clustering/louvain.py +255 -0
- sknetwork/clustering/louvain_core.cpython-39-darwin.so +0 -0
- sknetwork/clustering/louvain_core.pyx +134 -0
- sknetwork/clustering/metrics.py +91 -0
- sknetwork/clustering/postprocess.py +66 -0
- sknetwork/clustering/propagation_clustering.py +108 -0
- sknetwork/clustering/tests/__init__.py +1 -0
- sknetwork/clustering/tests/test_API.py +37 -0
- sknetwork/clustering/tests/test_kmeans.py +47 -0
- sknetwork/clustering/tests/test_louvain.py +104 -0
- sknetwork/clustering/tests/test_metrics.py +50 -0
- sknetwork/clustering/tests/test_post_processing.py +23 -0
- sknetwork/clustering/tests/test_postprocess.py +39 -0
- sknetwork/data/__init__.py +5 -0
- sknetwork/data/load.py +408 -0
- sknetwork/data/models.py +459 -0
- sknetwork/data/parse.py +621 -0
- sknetwork/data/test_graphs.py +84 -0
- sknetwork/data/tests/__init__.py +1 -0
- sknetwork/data/tests/test_API.py +30 -0
- sknetwork/data/tests/test_load.py +95 -0
- sknetwork/data/tests/test_models.py +52 -0
- sknetwork/data/tests/test_parse.py +253 -0
- sknetwork/data/tests/test_test_graphs.py +30 -0
- sknetwork/data/tests/test_toy_graphs.py +68 -0
- sknetwork/data/toy_graphs.py +619 -0
- sknetwork/embedding/__init__.py +10 -0
- sknetwork/embedding/base.py +90 -0
- sknetwork/embedding/force_atlas.py +197 -0
- sknetwork/embedding/louvain_embedding.py +174 -0
- sknetwork/embedding/louvain_hierarchy.py +142 -0
- sknetwork/embedding/metrics.py +66 -0
- sknetwork/embedding/random_projection.py +133 -0
- sknetwork/embedding/spectral.py +214 -0
- sknetwork/embedding/spring.py +198 -0
- sknetwork/embedding/svd.py +363 -0
- sknetwork/embedding/tests/__init__.py +1 -0
- sknetwork/embedding/tests/test_API.py +73 -0
- sknetwork/embedding/tests/test_force_atlas.py +35 -0
- sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
- sknetwork/embedding/tests/test_louvain_hierarchy.py +19 -0
- sknetwork/embedding/tests/test_metrics.py +29 -0
- sknetwork/embedding/tests/test_random_projection.py +28 -0
- sknetwork/embedding/tests/test_spectral.py +84 -0
- sknetwork/embedding/tests/test_spring.py +50 -0
- sknetwork/embedding/tests/test_svd.py +37 -0
- sknetwork/flow/__init__.py +3 -0
- sknetwork/flow/flow.py +73 -0
- sknetwork/flow/tests/__init__.py +1 -0
- sknetwork/flow/tests/test_flow.py +17 -0
- sknetwork/flow/tests/test_utils.py +69 -0
- sknetwork/flow/utils.py +91 -0
- sknetwork/gnn/__init__.py +10 -0
- sknetwork/gnn/activation.py +117 -0
- sknetwork/gnn/base.py +155 -0
- sknetwork/gnn/base_activation.py +89 -0
- sknetwork/gnn/base_layer.py +109 -0
- sknetwork/gnn/gnn_classifier.py +381 -0
- sknetwork/gnn/layer.py +153 -0
- sknetwork/gnn/layers.py +127 -0
- sknetwork/gnn/loss.py +180 -0
- sknetwork/gnn/neighbor_sampler.py +65 -0
- sknetwork/gnn/optimizer.py +163 -0
- sknetwork/gnn/tests/__init__.py +1 -0
- sknetwork/gnn/tests/test_activation.py +56 -0
- sknetwork/gnn/tests/test_base.py +79 -0
- sknetwork/gnn/tests/test_base_layer.py +37 -0
- sknetwork/gnn/tests/test_gnn_classifier.py +192 -0
- sknetwork/gnn/tests/test_layers.py +80 -0
- sknetwork/gnn/tests/test_loss.py +33 -0
- sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
- sknetwork/gnn/tests/test_optimizer.py +43 -0
- sknetwork/gnn/tests/test_utils.py +93 -0
- sknetwork/gnn/utils.py +219 -0
- sknetwork/hierarchy/__init__.py +7 -0
- sknetwork/hierarchy/base.py +69 -0
- sknetwork/hierarchy/louvain_hierarchy.py +264 -0
- sknetwork/hierarchy/metrics.py +234 -0
- sknetwork/hierarchy/paris.cpython-39-darwin.so +0 -0
- sknetwork/hierarchy/paris.pyx +317 -0
- sknetwork/hierarchy/postprocess.py +350 -0
- sknetwork/hierarchy/tests/__init__.py +1 -0
- sknetwork/hierarchy/tests/test_API.py +25 -0
- sknetwork/hierarchy/tests/test_algos.py +29 -0
- sknetwork/hierarchy/tests/test_metrics.py +62 -0
- sknetwork/hierarchy/tests/test_postprocess.py +57 -0
- sknetwork/hierarchy/tests/test_ward.py +25 -0
- sknetwork/hierarchy/ward.py +94 -0
- sknetwork/linalg/__init__.py +9 -0
- sknetwork/linalg/basics.py +37 -0
- sknetwork/linalg/diteration.cpython-39-darwin.so +0 -0
- sknetwork/linalg/diteration.pyx +49 -0
- sknetwork/linalg/eig_solver.py +93 -0
- sknetwork/linalg/laplacian.py +15 -0
- sknetwork/linalg/normalization.py +66 -0
- sknetwork/linalg/operators.py +225 -0
- sknetwork/linalg/polynome.py +76 -0
- sknetwork/linalg/ppr_solver.py +170 -0
- sknetwork/linalg/push.cpython-39-darwin.so +0 -0
- sknetwork/linalg/push.pyx +73 -0
- sknetwork/linalg/sparse_lowrank.py +142 -0
- sknetwork/linalg/svd_solver.py +91 -0
- sknetwork/linalg/tests/__init__.py +1 -0
- sknetwork/linalg/tests/test_eig.py +44 -0
- sknetwork/linalg/tests/test_laplacian.py +18 -0
- sknetwork/linalg/tests/test_normalization.py +38 -0
- sknetwork/linalg/tests/test_operators.py +70 -0
- sknetwork/linalg/tests/test_polynome.py +38 -0
- sknetwork/linalg/tests/test_ppr.py +50 -0
- sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
- sknetwork/linalg/tests/test_svd.py +38 -0
- sknetwork/linkpred/__init__.py +4 -0
- sknetwork/linkpred/base.py +80 -0
- sknetwork/linkpred/first_order.py +508 -0
- sknetwork/linkpred/first_order_core.cpython-39-darwin.so +0 -0
- sknetwork/linkpred/first_order_core.pyx +315 -0
- sknetwork/linkpred/postprocessing.py +98 -0
- sknetwork/linkpred/tests/__init__.py +1 -0
- sknetwork/linkpred/tests/test_API.py +49 -0
- sknetwork/linkpred/tests/test_postprocessing.py +21 -0
- sknetwork/path/__init__.py +4 -0
- sknetwork/path/metrics.py +148 -0
- sknetwork/path/search.py +65 -0
- sknetwork/path/shortest_path.py +186 -0
- sknetwork/path/tests/__init__.py +1 -0
- sknetwork/path/tests/test_metrics.py +29 -0
- sknetwork/path/tests/test_search.py +25 -0
- sknetwork/path/tests/test_shortest_path.py +45 -0
- sknetwork/ranking/__init__.py +9 -0
- sknetwork/ranking/base.py +56 -0
- sknetwork/ranking/betweenness.cpython-39-darwin.so +0 -0
- sknetwork/ranking/betweenness.pyx +99 -0
- sknetwork/ranking/closeness.py +95 -0
- sknetwork/ranking/harmonic.py +82 -0
- sknetwork/ranking/hits.py +94 -0
- sknetwork/ranking/katz.py +81 -0
- sknetwork/ranking/pagerank.py +107 -0
- sknetwork/ranking/postprocess.py +25 -0
- sknetwork/ranking/tests/__init__.py +1 -0
- sknetwork/ranking/tests/test_API.py +34 -0
- sknetwork/ranking/tests/test_betweenness.py +38 -0
- sknetwork/ranking/tests/test_closeness.py +34 -0
- sknetwork/ranking/tests/test_hits.py +20 -0
- sknetwork/ranking/tests/test_pagerank.py +69 -0
- sknetwork/regression/__init__.py +4 -0
- sknetwork/regression/base.py +56 -0
- sknetwork/regression/diffusion.py +190 -0
- sknetwork/regression/tests/__init__.py +1 -0
- sknetwork/regression/tests/test_API.py +34 -0
- sknetwork/regression/tests/test_diffusion.py +48 -0
- sknetwork/sknetwork.py +3 -0
- sknetwork/topology/__init__.py +9 -0
- sknetwork/topology/dag.py +74 -0
- sknetwork/topology/dag_core.cpython-39-darwin.so +0 -0
- sknetwork/topology/dag_core.pyx +38 -0
- sknetwork/topology/kcliques.cpython-39-darwin.so +0 -0
- sknetwork/topology/kcliques.pyx +193 -0
- sknetwork/topology/kcore.cpython-39-darwin.so +0 -0
- sknetwork/topology/kcore.pyx +120 -0
- sknetwork/topology/structure.py +234 -0
- sknetwork/topology/tests/__init__.py +1 -0
- sknetwork/topology/tests/test_cliques.py +28 -0
- sknetwork/topology/tests/test_cores.py +21 -0
- sknetwork/topology/tests/test_dag.py +26 -0
- sknetwork/topology/tests/test_structure.py +99 -0
- sknetwork/topology/tests/test_triangles.py +42 -0
- sknetwork/topology/tests/test_wl_coloring.py +49 -0
- sknetwork/topology/tests/test_wl_kernel.py +31 -0
- sknetwork/topology/triangles.cpython-39-darwin.so +0 -0
- sknetwork/topology/triangles.pyx +166 -0
- sknetwork/topology/weisfeiler_lehman.py +163 -0
- sknetwork/topology/weisfeiler_lehman_core.cpython-39-darwin.so +0 -0
- sknetwork/topology/weisfeiler_lehman_core.pyx +116 -0
- sknetwork/utils/__init__.py +40 -0
- sknetwork/utils/base.py +35 -0
- sknetwork/utils/check.py +354 -0
- sknetwork/utils/co_neighbor.py +71 -0
- sknetwork/utils/format.py +219 -0
- sknetwork/utils/kmeans.py +89 -0
- sknetwork/utils/knn.py +166 -0
- sknetwork/utils/knn1d.cpython-39-darwin.so +0 -0
- sknetwork/utils/knn1d.pyx +80 -0
- sknetwork/utils/membership.py +82 -0
- sknetwork/utils/minheap.cpython-39-darwin.so +0 -0
- sknetwork/utils/minheap.pxd +22 -0
- sknetwork/utils/minheap.pyx +111 -0
- sknetwork/utils/neighbors.py +115 -0
- sknetwork/utils/seeds.py +75 -0
- sknetwork/utils/simplex.py +140 -0
- sknetwork/utils/tests/__init__.py +1 -0
- sknetwork/utils/tests/test_base.py +28 -0
- sknetwork/utils/tests/test_bunch.py +16 -0
- sknetwork/utils/tests/test_check.py +190 -0
- sknetwork/utils/tests/test_co_neighbor.py +43 -0
- sknetwork/utils/tests/test_format.py +61 -0
- sknetwork/utils/tests/test_kmeans.py +21 -0
- sknetwork/utils/tests/test_knn.py +32 -0
- sknetwork/utils/tests/test_membership.py +24 -0
- sknetwork/utils/tests/test_neighbors.py +41 -0
- sknetwork/utils/tests/test_projection_simplex.py +33 -0
- sknetwork/utils/tests/test_seeds.py +67 -0
- sknetwork/utils/tests/test_verbose.py +15 -0
- sknetwork/utils/tests/test_ward.py +20 -0
- sknetwork/utils/timeout.py +38 -0
- sknetwork/utils/verbose.py +37 -0
- sknetwork/utils/ward.py +60 -0
- sknetwork/visualization/__init__.py +4 -0
- sknetwork/visualization/colors.py +34 -0
- sknetwork/visualization/dendrograms.py +229 -0
- sknetwork/visualization/graphs.py +819 -0
- sknetwork/visualization/tests/__init__.py +1 -0
- sknetwork/visualization/tests/test_dendrograms.py +53 -0
- sknetwork/visualization/tests/test_graphs.py +167 -0
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
# distutils: language = c++
|
|
2
|
+
# cython: language_level=3
|
|
3
|
+
# cython: linetrace=True
|
|
4
|
+
# distutils: define_macros=CYTHON_TRACE_NOGIL=1
|
|
5
|
+
"""
|
|
6
|
+
Created on July, 2020
|
|
7
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
8
|
+
"""
|
|
9
|
+
from libc.math cimport log, sqrt
|
|
10
|
+
from libcpp.vector cimport vector
|
|
11
|
+
|
|
12
|
+
ctypedef float (*int2float)(int)
|
|
13
|
+
ctypedef float (*vectors2float)(vector[int], vector[int])
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
cdef float inv(int a):
|
|
17
|
+
"""Inverse function"""
|
|
18
|
+
return 1 / a
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
cdef float inv_log(int a):
|
|
22
|
+
"""Inverse of log function"""
|
|
23
|
+
return 1 / log(a)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
cdef vector[int] vector_intersection(vector[int] a, vector[int] b):
|
|
27
|
+
"""Common elements in two sorted vectors. Each element is assumed unique in each vector."""
|
|
28
|
+
cdef vector[int] intersection
|
|
29
|
+
cdef int e_a, e_b
|
|
30
|
+
cdef int ix_a = 0
|
|
31
|
+
cdef int ix_b = 0
|
|
32
|
+
cdef int size_a = a.size()
|
|
33
|
+
cdef int size_b = b.size()
|
|
34
|
+
|
|
35
|
+
while ix_a < size_a and ix_b < size_b:
|
|
36
|
+
e_a = a[ix_a]
|
|
37
|
+
e_b = b[ix_b]
|
|
38
|
+
|
|
39
|
+
if e_a < e_b:
|
|
40
|
+
ix_a += 1
|
|
41
|
+
elif e_b < e_a:
|
|
42
|
+
ix_b += 1
|
|
43
|
+
else:
|
|
44
|
+
intersection.push_back(e_a)
|
|
45
|
+
ix_a += 1
|
|
46
|
+
ix_b += 1
|
|
47
|
+
|
|
48
|
+
return intersection
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
cdef float size_intersection(vector[int] a, vector[int] b):
|
|
52
|
+
"""Size of the intersection of two vectors"""
|
|
53
|
+
return vector_intersection(a, b).size()
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
cdef float jaccard(vector[int] a, vector[int] b):
|
|
57
|
+
"""Jaccard coefficient"""
|
|
58
|
+
cdef float size_inter = size_intersection(a, b)
|
|
59
|
+
cdef float size_union = a.size() + b.size() - size_inter
|
|
60
|
+
return size_inter / size_union
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
cdef float salton(vector[int] a, vector[int] b):
|
|
64
|
+
"""Salton coefficient"""
|
|
65
|
+
cdef float size_inter = size_intersection(a, b)
|
|
66
|
+
return size_inter / sqrt(a.size() * b.size())
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
cdef float sorensen(vector[int] a, vector[int] b):
|
|
70
|
+
"""Sorensen coefficient"""
|
|
71
|
+
cdef float size_inter = size_intersection(a, b)
|
|
72
|
+
return 2 * size_inter / (a.size() + b.size())
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
cdef float hub_promoted(vector[int] a, vector[int] b):
|
|
76
|
+
"""Hub promoted coefficient"""
|
|
77
|
+
cdef float size_inter = size_intersection(a, b)
|
|
78
|
+
return size_inter / min(a.size(), b.size())
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
cdef float hub_depressed(vector[int] a, vector[int] b):
|
|
82
|
+
"""Hub promoted coefficient"""
|
|
83
|
+
cdef float size_inter = size_intersection(a, b)
|
|
84
|
+
return size_inter / max(a.size(), b.size())
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
cdef vector[int] neighbors(int[:] indptr, int[:] indices, int node):
|
|
88
|
+
"""Neighbors of a given node"""
|
|
89
|
+
cdef int j1 = indptr[node]
|
|
90
|
+
cdef int j2 = indptr[node + 1]
|
|
91
|
+
cdef int j
|
|
92
|
+
cdef vector[int] neigh = ()
|
|
93
|
+
|
|
94
|
+
for j in range(j1, j2):
|
|
95
|
+
neigh.push_back(indices[j])
|
|
96
|
+
|
|
97
|
+
return neigh
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
cdef vector[float] predict_node_core(int[:] indptr, int[:] indices, int source, int[:] targets,
|
|
101
|
+
vectors2float weight_func):
|
|
102
|
+
"""Scores based on global information about common neighbors for a single source.
|
|
103
|
+
|
|
104
|
+
Parameters
|
|
105
|
+
----------
|
|
106
|
+
indptr :
|
|
107
|
+
indptr array of the adjacency matrix
|
|
108
|
+
indices :
|
|
109
|
+
indices array of the adjacency matrix
|
|
110
|
+
source :
|
|
111
|
+
source index
|
|
112
|
+
targets :
|
|
113
|
+
array of target indices
|
|
114
|
+
weight_func :
|
|
115
|
+
scoring function to be used
|
|
116
|
+
|
|
117
|
+
Returns
|
|
118
|
+
-------
|
|
119
|
+
scores :
|
|
120
|
+
vector of node pair scores
|
|
121
|
+
"""
|
|
122
|
+
cdef int target, i
|
|
123
|
+
cdef int n_targets = targets.shape[0]
|
|
124
|
+
cdef vector[float] preds
|
|
125
|
+
|
|
126
|
+
cdef vector[int] neigh_s = neighbors(indptr, indices, source)
|
|
127
|
+
cdef vector[int] neigh_t
|
|
128
|
+
for i in range(n_targets):
|
|
129
|
+
target = targets[i]
|
|
130
|
+
neigh_t = neighbors(indptr, indices, target)
|
|
131
|
+
preds.push_back(weight_func(neigh_s, neigh_t))
|
|
132
|
+
|
|
133
|
+
return preds
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
cdef vector[float] predict_edges_core(int[:] indptr, int[:] indices, int[:, :] edges,
|
|
137
|
+
vectors2float weight_func):
|
|
138
|
+
"""Scores based on global information about common neighbors for a list of edges.
|
|
139
|
+
|
|
140
|
+
Parameters
|
|
141
|
+
----------
|
|
142
|
+
indptr :
|
|
143
|
+
indptr array of the adjacency matrix
|
|
144
|
+
indices :
|
|
145
|
+
indices array of the adjacency matrix
|
|
146
|
+
edges:
|
|
147
|
+
array of node pairs to be scored
|
|
148
|
+
weight_func :
|
|
149
|
+
scoring function to be used
|
|
150
|
+
|
|
151
|
+
Returns
|
|
152
|
+
-------
|
|
153
|
+
scores :
|
|
154
|
+
vector of node pair scores
|
|
155
|
+
"""
|
|
156
|
+
|
|
157
|
+
cdef vector[float] preds
|
|
158
|
+
cdef int source, target, i
|
|
159
|
+
|
|
160
|
+
cdef int n_edges = edges.shape[0]
|
|
161
|
+
for i in range(n_edges):
|
|
162
|
+
source, target = edges[i, 0], edges[i, 1]
|
|
163
|
+
neigh_s = neighbors(indptr, indices, source)
|
|
164
|
+
neigh_t = neighbors(indptr, indices, target)
|
|
165
|
+
preds.push_back(weight_func(neigh_s, neigh_t))
|
|
166
|
+
|
|
167
|
+
return preds
|
|
168
|
+
|
|
169
|
+
def common_neighbors_node_core(int[:] indptr, int[:] indices, int source, int[:] targets):
|
|
170
|
+
"""Number of common neighbors"""
|
|
171
|
+
return predict_node_core(indptr, indices, source, targets, size_intersection)
|
|
172
|
+
|
|
173
|
+
def common_neighbors_edges_core(int[:] indptr, int[:] indices, int[:, :] edges):
|
|
174
|
+
"""Number of common neighbors"""
|
|
175
|
+
return predict_edges_core(indptr, indices, edges, size_intersection)
|
|
176
|
+
|
|
177
|
+
def jaccard_node_core(int[:] indptr, int[:] indices, int source, int[:] targets):
|
|
178
|
+
"""Jaccard coefficient of common neighbors"""
|
|
179
|
+
return predict_node_core(indptr, indices, source, targets, jaccard)
|
|
180
|
+
|
|
181
|
+
def jaccard_edges_core(int[:] indptr, int[:] indices, int[:, :] edges):
|
|
182
|
+
"""Number of common neighbors"""
|
|
183
|
+
return predict_edges_core(indptr, indices, edges, jaccard)
|
|
184
|
+
|
|
185
|
+
def salton_node_core(int[:] indptr, int[:] indices, int source, int[:] targets):
|
|
186
|
+
"""Salton coefficient of common neighbors"""
|
|
187
|
+
return predict_node_core(indptr, indices, source, targets, salton)
|
|
188
|
+
|
|
189
|
+
def salton_edges_core(int[:] indptr, int[:] indices, int[:, :] edges):
|
|
190
|
+
"""Salton coefficient of common neighbors"""
|
|
191
|
+
return predict_edges_core(indptr, indices, edges, salton)
|
|
192
|
+
|
|
193
|
+
def sorensen_node_core(int[:] indptr, int[:] indices, int source, int[:] targets):
|
|
194
|
+
"""Sorensen coefficient of common neighbors"""
|
|
195
|
+
return predict_node_core(indptr, indices, source, targets, sorensen)
|
|
196
|
+
|
|
197
|
+
def sorensen_edges_core(int[:] indptr, int[:] indices, int[:, :] edges):
|
|
198
|
+
"""Sorensen coefficient of common neighbors"""
|
|
199
|
+
return predict_edges_core(indptr, indices, edges, sorensen)
|
|
200
|
+
|
|
201
|
+
def hub_promoted_node_core(int[:] indptr, int[:] indices, int source, int[:] targets):
|
|
202
|
+
"""Hub promoted coefficient of common neighbors"""
|
|
203
|
+
return predict_node_core(indptr, indices, source, targets, hub_promoted)
|
|
204
|
+
|
|
205
|
+
def hub_promoted_edges_core(int[:] indptr, int[:] indices, int[:, :] edges):
|
|
206
|
+
"""Hub promoted coefficient of common neighbors"""
|
|
207
|
+
return predict_edges_core(indptr, indices, edges, hub_promoted)
|
|
208
|
+
|
|
209
|
+
def hub_depressed_node_core(int[:] indptr, int[:] indices, int source, int[:] targets):
|
|
210
|
+
"""Hub depressed coefficient of common neighbors"""
|
|
211
|
+
return predict_node_core(indptr, indices, source, targets, hub_depressed)
|
|
212
|
+
|
|
213
|
+
def hub_depressed_edges_core(int[:] indptr, int[:] indices, int[:, :] edges):
|
|
214
|
+
"""Hub depressed coefficient of common neighbors"""
|
|
215
|
+
return predict_edges_core(indptr, indices, edges, hub_depressed)
|
|
216
|
+
|
|
217
|
+
cdef vector[float] predict_node_weighted_core(int[:] indptr, int[:] indices, int source, int[:] targets,
|
|
218
|
+
int2float weight_func):
|
|
219
|
+
"""Scores based on the degrees of common neighbors for a single source.
|
|
220
|
+
|
|
221
|
+
Parameters
|
|
222
|
+
----------
|
|
223
|
+
indptr :
|
|
224
|
+
indptr array of the adjacency matrix
|
|
225
|
+
indices :
|
|
226
|
+
indices array of the adjacency matrix
|
|
227
|
+
source :
|
|
228
|
+
source index
|
|
229
|
+
targets :
|
|
230
|
+
array of target indices
|
|
231
|
+
weight_func :
|
|
232
|
+
scoring function to be used
|
|
233
|
+
|
|
234
|
+
Returns
|
|
235
|
+
-------
|
|
236
|
+
scores :
|
|
237
|
+
vector of node pair scores
|
|
238
|
+
"""
|
|
239
|
+
cdef int target, i, j
|
|
240
|
+
cdef int n_targets = targets.shape[0]
|
|
241
|
+
cdef float weight
|
|
242
|
+
cdef vector[int] intersection
|
|
243
|
+
cdef vector[float] preds
|
|
244
|
+
|
|
245
|
+
cdef vector[int] neigh_s = neighbors(indptr, indices, source)
|
|
246
|
+
cdef vector[int] neigh_t
|
|
247
|
+
for i in range(n_targets):
|
|
248
|
+
target = targets[i]
|
|
249
|
+
neigh_t = neighbors(indptr, indices, target)
|
|
250
|
+
intersection = vector_intersection(neigh_s, neigh_t)
|
|
251
|
+
|
|
252
|
+
weight = 0
|
|
253
|
+
for j in intersection:
|
|
254
|
+
weight += weight_func(indptr[j+1] - indptr[j])
|
|
255
|
+
preds.push_back(weight)
|
|
256
|
+
|
|
257
|
+
return preds
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
cdef vector[float] predict_edges_weighted_core(int[:] indptr, int[:] indices, int[:, :] edges,
|
|
261
|
+
int2float weight_func):
|
|
262
|
+
"""Scores based on the degrees of common neighbors for a list of edges.
|
|
263
|
+
|
|
264
|
+
Parameters
|
|
265
|
+
----------
|
|
266
|
+
indptr :
|
|
267
|
+
indptr array of the adjacency matrix
|
|
268
|
+
indices :
|
|
269
|
+
indices array of the adjacency matrix
|
|
270
|
+
edges:
|
|
271
|
+
array of node pairs to be scored
|
|
272
|
+
weight_func :
|
|
273
|
+
scoring function to be used
|
|
274
|
+
|
|
275
|
+
Returns
|
|
276
|
+
-------
|
|
277
|
+
scores :
|
|
278
|
+
vector of node pair scores
|
|
279
|
+
"""
|
|
280
|
+
cdef vector[float] preds
|
|
281
|
+
cdef int source, target, i
|
|
282
|
+
cdef float weight
|
|
283
|
+
cdef vector[int] intersection
|
|
284
|
+
|
|
285
|
+
cdef int n_edges = edges.shape[0]
|
|
286
|
+
for i in range(n_edges):
|
|
287
|
+
source, target = edges[i][0], edges[i][1]
|
|
288
|
+
neigh_s = neighbors(indptr, indices, source)
|
|
289
|
+
neigh_t = neighbors(indptr, indices, target)
|
|
290
|
+
|
|
291
|
+
intersection = vector_intersection(neigh_s, neigh_t)
|
|
292
|
+
|
|
293
|
+
weight = 0
|
|
294
|
+
for j in intersection:
|
|
295
|
+
weight += weight_func(indptr[j+1] - indptr[j])
|
|
296
|
+
preds.push_back(weight)
|
|
297
|
+
|
|
298
|
+
return preds
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def adamic_adar_node_core(int[:] indptr, int[:] indices, int source, int[:] targets):
|
|
302
|
+
"""Adamic Adar index"""
|
|
303
|
+
return predict_node_weighted_core(indptr, indices, source, targets, inv_log)
|
|
304
|
+
|
|
305
|
+
def adamic_adar_edges_core(int[:] indptr, int[:] indices, int[:, :] edges):
|
|
306
|
+
"""Adamic Adar index"""
|
|
307
|
+
return predict_edges_weighted_core(indptr, indices, edges, inv_log)
|
|
308
|
+
|
|
309
|
+
def resource_allocation_node_core(int[:] indptr, int[:] indices, int source, int[:] targets):
|
|
310
|
+
"""Resource Allocation index"""
|
|
311
|
+
return predict_node_weighted_core(indptr, indices, source, targets, inv)
|
|
312
|
+
|
|
313
|
+
def resource_allocation_edges_core(int[:] indptr, int[:] indices, int[:, :] edges):
|
|
314
|
+
"""Resource Allocation index"""
|
|
315
|
+
return predict_edges_weighted_core(indptr, indices, edges, inv)
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created on July, 2020
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
"""
|
|
7
|
+
from typing import Union, Iterable, Tuple
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
from scipy import sparse
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def is_edge(adjacency: sparse.csr_matrix, query: Union[int, Iterable, Tuple]) -> Union[bool, np.ndarray]:
|
|
14
|
+
"""Given a query, return whether each edge is actually in the adjacency.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
adjacency :
|
|
19
|
+
Adjacency matrix of the graph.
|
|
20
|
+
query : int, Iterable or Tuple
|
|
21
|
+
* If int i, queries (i, j) for all j.
|
|
22
|
+
* If Iterable of integers, return queries (i, j) for i in query, for all j.
|
|
23
|
+
* If tuple (i, j), queries (i, j).
|
|
24
|
+
* If list of tuples or array of shape (n_queries, 2), queries (i, j) in for each line in query.
|
|
25
|
+
|
|
26
|
+
Returns
|
|
27
|
+
-------
|
|
28
|
+
labels_true : Union[bool, np.ndarray]
|
|
29
|
+
For each element in the query, returns ``True`` if the edge exists in the adjacency and ``False`` otherwise.
|
|
30
|
+
|
|
31
|
+
Examples
|
|
32
|
+
--------
|
|
33
|
+
>>> from sknetwork.data import house
|
|
34
|
+
>>> adjacency = house()
|
|
35
|
+
>>> is_edge(adjacency, 0)
|
|
36
|
+
array([False, True, False, False, True])
|
|
37
|
+
>>> is_edge(adjacency, [0, 1])
|
|
38
|
+
array([[False, True, False, False, True],
|
|
39
|
+
[ True, False, True, False, True]])
|
|
40
|
+
>>> is_edge(adjacency, (0, 1))
|
|
41
|
+
True
|
|
42
|
+
>>> is_edge(adjacency, [(0, 1), (0, 2)])
|
|
43
|
+
array([ True, False])
|
|
44
|
+
"""
|
|
45
|
+
if np.issubdtype(type(query), np.integer):
|
|
46
|
+
return adjacency[query].toarray().astype(bool).ravel()
|
|
47
|
+
if isinstance(query, Tuple):
|
|
48
|
+
source, target = query
|
|
49
|
+
neighbors = adjacency.indices[adjacency.indptr[source]:adjacency.indptr[source + 1]]
|
|
50
|
+
return bool(np.isin(target, neighbors, assume_unique=True))
|
|
51
|
+
if isinstance(query, list):
|
|
52
|
+
query = np.array(query)
|
|
53
|
+
if isinstance(query, np.ndarray):
|
|
54
|
+
if query.ndim == 1:
|
|
55
|
+
return adjacency[query].toarray().astype(bool)
|
|
56
|
+
elif query.ndim == 2 and query.shape[1] == 2:
|
|
57
|
+
y_true = []
|
|
58
|
+
for edge in query:
|
|
59
|
+
y_true.append(is_edge(adjacency, (edge[0], edge[1])))
|
|
60
|
+
return np.array(y_true)
|
|
61
|
+
else:
|
|
62
|
+
raise ValueError("Query not understood.")
|
|
63
|
+
else:
|
|
64
|
+
raise ValueError("Query not understood.")
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def whitened_sigmoid(scores: np.ndarray):
|
|
68
|
+
"""Map the entries of a score array to probabilities through
|
|
69
|
+
|
|
70
|
+
:math:`\\dfrac{1}{1 + \\exp(-(x - \\mu)/\\sigma)}`,
|
|
71
|
+
|
|
72
|
+
where :math:`\\mu` and :math:`\\sigma` are respectively the mean and standard deviation of x.
|
|
73
|
+
|
|
74
|
+
Parameters
|
|
75
|
+
----------
|
|
76
|
+
scores : np.ndarray
|
|
77
|
+
The input array
|
|
78
|
+
|
|
79
|
+
Returns
|
|
80
|
+
-------
|
|
81
|
+
probas : np.ndarray
|
|
82
|
+
Array with entries between 0 and 1.
|
|
83
|
+
|
|
84
|
+
Examples
|
|
85
|
+
--------
|
|
86
|
+
>>> probas = whitened_sigmoid(np.array([1, 5, 0.25]))
|
|
87
|
+
>>> probas.round(2)
|
|
88
|
+
array([0.37, 0.8 , 0.29])
|
|
89
|
+
>>> probas = whitened_sigmoid(np.array([2, 2, 2]))
|
|
90
|
+
>>> probas
|
|
91
|
+
array([1, 1, 1])
|
|
92
|
+
"""
|
|
93
|
+
mu = scores.mean()
|
|
94
|
+
sigma = scores.std()
|
|
95
|
+
if sigma > 0:
|
|
96
|
+
return 1 / (1 + np.exp(-(scores - mu) / sigma))
|
|
97
|
+
else:
|
|
98
|
+
return np.ones_like(scores)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""tests for link prediction"""
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""tests for link prediction API"""
|
|
4
|
+
import unittest
|
|
5
|
+
from abc import ABC
|
|
6
|
+
from typing import Iterable
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
from sknetwork.data import house
|
|
11
|
+
from sknetwork.linkpred.base import BaseLinkPred
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Dummy(BaseLinkPred, ABC):
|
|
15
|
+
"""Dummy algorithm for testing purposes"""
|
|
16
|
+
def __init__(self):
|
|
17
|
+
super(Dummy, self).__init__()
|
|
18
|
+
|
|
19
|
+
def fit(self, *args, **kwargs):
|
|
20
|
+
"""Dummy fit method"""
|
|
21
|
+
return self
|
|
22
|
+
|
|
23
|
+
def _predict_base(self, source: int, targets: Iterable):
|
|
24
|
+
return np.array([1 for _ in targets])
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class TestLinkPred(unittest.TestCase):
|
|
28
|
+
|
|
29
|
+
def test_base(self):
|
|
30
|
+
algo = BaseLinkPred()
|
|
31
|
+
query = 0
|
|
32
|
+
with self.assertRaises(NotImplementedError):
|
|
33
|
+
algo.predict(query)
|
|
34
|
+
query = (0, 1)
|
|
35
|
+
with self.assertRaises(NotImplementedError):
|
|
36
|
+
algo.predict(query)
|
|
37
|
+
|
|
38
|
+
def test_query(self):
|
|
39
|
+
adjacency = house()
|
|
40
|
+
algo = Dummy().fit(adjacency)
|
|
41
|
+
query = 'toto'
|
|
42
|
+
with self.assertRaises(ValueError):
|
|
43
|
+
algo.predict(query)
|
|
44
|
+
query = np.ones((2, 3))
|
|
45
|
+
with self.assertRaises(ValueError):
|
|
46
|
+
algo.predict(query)
|
|
47
|
+
query = [(0, 1), (2, 3)]
|
|
48
|
+
pred = algo.predict(query)
|
|
49
|
+
self.assertEqual(pred.shape, (len(query),))
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""tests for link prediction postprocessing"""
|
|
4
|
+
import unittest
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
from sknetwork.data import house
|
|
9
|
+
from sknetwork.linkpred import is_edge
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TestLinkPredPostProcessing(unittest.TestCase):
|
|
13
|
+
|
|
14
|
+
def test_signature(self):
|
|
15
|
+
adjacency = house()
|
|
16
|
+
|
|
17
|
+
with self.assertRaises(ValueError):
|
|
18
|
+
is_edge(adjacency, 'toto')
|
|
19
|
+
|
|
20
|
+
with self.assertRaises(ValueError):
|
|
21
|
+
is_edge(adjacency, np.ones(8).reshape((2, 2, 2)))
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created on May, 2020
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
"""
|
|
7
|
+
from typing import Union, Optional
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
from scipy import sparse
|
|
11
|
+
|
|
12
|
+
from sknetwork.path.shortest_path import get_distances
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_diameter(adjacency: Union[sparse.csr_matrix, np.ndarray],
|
|
16
|
+
n_sources: Optional[Union[int, float]] = None,
|
|
17
|
+
unweighted: bool = False, n_jobs: Optional[int] = None) -> int:
|
|
18
|
+
"""Lower bound on the diameter of a graph which is the length of the longest
|
|
19
|
+
shortest path between two nodes.
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
adjacency :
|
|
25
|
+
Adjacency matrix of the graph.
|
|
26
|
+
n_sources :
|
|
27
|
+
Number of node sources to use for approximation.
|
|
28
|
+
|
|
29
|
+
* If None, compute exact diameter.
|
|
30
|
+
* If int, sample n_sample source nodes at random.
|
|
31
|
+
* If float, sample (n_samples * n) source nodes at random.
|
|
32
|
+
unweighted:
|
|
33
|
+
Whether or not the graph is unweighted.
|
|
34
|
+
n_jobs :
|
|
35
|
+
If an integer value is given, denotes the number of workers to use (-1
|
|
36
|
+
means the maximum number will be used).
|
|
37
|
+
If ``None``, no parallel computations are made.
|
|
38
|
+
|
|
39
|
+
Returns
|
|
40
|
+
-------
|
|
41
|
+
diameter : int
|
|
42
|
+
|
|
43
|
+
Examples
|
|
44
|
+
--------
|
|
45
|
+
>>> from sknetwork.data import house
|
|
46
|
+
>>> adjacency = house()
|
|
47
|
+
>>> d_exact = get_diameter(adjacency)
|
|
48
|
+
>>> d_exact
|
|
49
|
+
2
|
|
50
|
+
>>> d_approx = get_diameter(adjacency, 2)
|
|
51
|
+
>>> d_approx <= d_exact
|
|
52
|
+
True
|
|
53
|
+
>>> d_approx = get_diameter(adjacency, 0.5)
|
|
54
|
+
>>> d_approx <= d_exact
|
|
55
|
+
True
|
|
56
|
+
|
|
57
|
+
Notes
|
|
58
|
+
-----
|
|
59
|
+
This is a basic implementation that computes distances between nodes and
|
|
60
|
+
returns the maximum.
|
|
61
|
+
"""
|
|
62
|
+
n = adjacency.shape[0]
|
|
63
|
+
if n_sources is None or n_sources == n:
|
|
64
|
+
sources = np.arange(n)
|
|
65
|
+
else:
|
|
66
|
+
if np.issubdtype(type(n_sources), np.floating) and n_sources < 1.:
|
|
67
|
+
n_sources = int(n_sources * n)
|
|
68
|
+
if np.issubdtype(type(n_sources), np.integer) and n_sources <= n:
|
|
69
|
+
sources = np.random.choice(n, n_sources, replace=False)
|
|
70
|
+
else:
|
|
71
|
+
raise ValueError("n_sources must be either None, an integer smaller"
|
|
72
|
+
"than the number of nodes or a float"
|
|
73
|
+
"smaller than 1.")
|
|
74
|
+
dists = get_distances(adjacency, sources, method='D', return_predecessors=False,
|
|
75
|
+
unweighted=unweighted, n_jobs=n_jobs).astype(int)
|
|
76
|
+
return dists.max()
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def get_radius(adjacency: Union[sparse.csr_matrix, np.ndarray],
|
|
80
|
+
n_sources: Optional[Union[int, float]] = None,
|
|
81
|
+
unweighted: bool = False, n_jobs: Optional[int] = None) -> int:
|
|
82
|
+
"""Computes the radius of the graph which. The radius of the graph is the
|
|
83
|
+
minimum eccentricity of the graph.
|
|
84
|
+
|
|
85
|
+
Parameters
|
|
86
|
+
----------
|
|
87
|
+
adjacency :
|
|
88
|
+
Adjacency matrix of the graph.
|
|
89
|
+
n_sources :
|
|
90
|
+
Number of node sources to use for approximation.
|
|
91
|
+
|
|
92
|
+
* If None, compute exact diameter.
|
|
93
|
+
* If int, sample n_sample source nodes at random.
|
|
94
|
+
* If float, sample (n_samples * n) source nodes at random.
|
|
95
|
+
unweighted:
|
|
96
|
+
Whether the graph is unweighted.
|
|
97
|
+
n_jobs :
|
|
98
|
+
If an integer value is given, denotes the number of workers to use (-1
|
|
99
|
+
means the maximum number will be used).
|
|
100
|
+
If ``None``, no parallel computations are made.
|
|
101
|
+
|
|
102
|
+
Returns
|
|
103
|
+
-------
|
|
104
|
+
radius : int
|
|
105
|
+
|
|
106
|
+
Notes
|
|
107
|
+
-----
|
|
108
|
+
This is a basic implementation that computes distances between nodes and
|
|
109
|
+
returns the maximum.
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
# Get the nodes.
|
|
113
|
+
dists = get_distances(adjacency, sources=n_sources, method='D',
|
|
114
|
+
return_predecessors=False,
|
|
115
|
+
unweighted=unweighted, n_jobs=n_jobs).astype(int)
|
|
116
|
+
# Get the eccentricities of each node.
|
|
117
|
+
eccentricities = dists.max(axis=1)
|
|
118
|
+
|
|
119
|
+
return eccentricities.min()
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def get_eccentricity(adjacency: Union[sparse.csr_matrix, np.ndarray], node: int,
|
|
123
|
+
unweighted: bool = False,
|
|
124
|
+
n_jobs: Optional[int] = None) -> int:
|
|
125
|
+
"""Computes the eccentricity of a node. The eccentricity of a node, u, is the
|
|
126
|
+
maximum length of the shortest paths from u to the other nodes in the graph.
|
|
127
|
+
|
|
128
|
+
Parameters
|
|
129
|
+
----------
|
|
130
|
+
adjacency :
|
|
131
|
+
Adjacency matrix of the graph.
|
|
132
|
+
node:
|
|
133
|
+
The node to compute the eccentricity for.
|
|
134
|
+
unweighted:
|
|
135
|
+
Whether or not the graph is unweighted.
|
|
136
|
+
n_jobs :
|
|
137
|
+
If an integer value is given, denotes the number of workers to use (-1
|
|
138
|
+
means the maximum number will be used).
|
|
139
|
+
If ``None``, no parallel computations are made.
|
|
140
|
+
|
|
141
|
+
Returns
|
|
142
|
+
-------
|
|
143
|
+
eccentricity : int
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
dists = get_distances(adjacency, node, method='D', return_predecessors=False,
|
|
147
|
+
unweighted=unweighted, n_jobs=n_jobs).astype(int)
|
|
148
|
+
return dists.max()
|