scikit-network 0.30.0__cp39-cp39-win_amd64.whl → 0.32.1__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/AUTHORS.rst +3 -0
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/METADATA +31 -3
- scikit_network-0.32.1.dist-info/RECORD +228 -0
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/WHEEL +1 -1
- sknetwork/__init__.py +1 -1
- sknetwork/base.py +67 -0
- sknetwork/classification/base.py +24 -24
- sknetwork/classification/base_rank.py +17 -25
- sknetwork/classification/diffusion.py +35 -35
- sknetwork/classification/knn.py +24 -21
- sknetwork/classification/metrics.py +1 -1
- sknetwork/classification/pagerank.py +10 -10
- sknetwork/classification/propagation.py +23 -20
- sknetwork/classification/tests/test_diffusion.py +13 -3
- sknetwork/classification/vote.cp39-win_amd64.pyd +0 -0
- sknetwork/classification/vote.cpp +14482 -10351
- sknetwork/classification/vote.pyx +1 -3
- sknetwork/clustering/__init__.py +3 -1
- sknetwork/clustering/base.py +36 -40
- sknetwork/clustering/kcenters.py +253 -0
- sknetwork/clustering/leiden.py +241 -0
- sknetwork/clustering/leiden_core.cp39-win_amd64.pyd +0 -0
- sknetwork/clustering/leiden_core.cpp +31564 -0
- sknetwork/clustering/leiden_core.pyx +124 -0
- sknetwork/clustering/louvain.py +133 -102
- sknetwork/clustering/louvain_core.cp39-win_amd64.pyd +0 -0
- sknetwork/clustering/louvain_core.cpp +22457 -18792
- sknetwork/clustering/louvain_core.pyx +86 -96
- sknetwork/clustering/postprocess.py +2 -2
- sknetwork/clustering/propagation_clustering.py +15 -19
- sknetwork/clustering/tests/test_API.py +8 -4
- sknetwork/clustering/tests/test_kcenters.py +92 -0
- sknetwork/clustering/tests/test_leiden.py +34 -0
- sknetwork/clustering/tests/test_louvain.py +3 -4
- sknetwork/data/__init__.py +2 -1
- sknetwork/data/base.py +28 -0
- sknetwork/data/load.py +38 -37
- sknetwork/data/models.py +18 -18
- sknetwork/data/parse.py +54 -33
- sknetwork/data/test_graphs.py +2 -2
- sknetwork/data/tests/test_API.py +1 -1
- sknetwork/data/tests/test_base.py +14 -0
- sknetwork/data/tests/test_load.py +1 -1
- sknetwork/data/tests/test_parse.py +9 -12
- sknetwork/data/tests/test_test_graphs.py +1 -2
- sknetwork/data/toy_graphs.py +18 -18
- sknetwork/embedding/__init__.py +0 -1
- sknetwork/embedding/base.py +21 -20
- sknetwork/embedding/force_atlas.py +3 -2
- sknetwork/embedding/louvain_embedding.py +2 -2
- sknetwork/embedding/random_projection.py +5 -3
- sknetwork/embedding/spectral.py +0 -73
- sknetwork/embedding/tests/test_API.py +4 -28
- sknetwork/embedding/tests/test_louvain_embedding.py +4 -9
- sknetwork/embedding/tests/test_random_projection.py +2 -2
- sknetwork/embedding/tests/test_spectral.py +5 -8
- sknetwork/embedding/tests/test_svd.py +1 -1
- sknetwork/gnn/base.py +4 -4
- sknetwork/gnn/base_layer.py +3 -3
- sknetwork/gnn/gnn_classifier.py +45 -89
- sknetwork/gnn/layer.py +1 -1
- sknetwork/gnn/loss.py +1 -1
- sknetwork/gnn/optimizer.py +4 -3
- sknetwork/gnn/tests/test_base_layer.py +4 -4
- sknetwork/gnn/tests/test_gnn_classifier.py +12 -35
- sknetwork/gnn/utils.py +8 -8
- sknetwork/hierarchy/base.py +29 -2
- sknetwork/hierarchy/louvain_hierarchy.py +45 -41
- sknetwork/hierarchy/paris.cp39-win_amd64.pyd +0 -0
- sknetwork/hierarchy/paris.cpp +27369 -22852
- sknetwork/hierarchy/paris.pyx +7 -9
- sknetwork/hierarchy/postprocess.py +16 -16
- sknetwork/hierarchy/tests/test_API.py +1 -1
- sknetwork/hierarchy/tests/test_algos.py +5 -0
- sknetwork/hierarchy/tests/test_metrics.py +1 -1
- sknetwork/linalg/__init__.py +1 -1
- sknetwork/linalg/diteration.cp39-win_amd64.pyd +0 -0
- sknetwork/linalg/diteration.cpp +13474 -9454
- sknetwork/linalg/diteration.pyx +0 -2
- sknetwork/linalg/eig_solver.py +1 -1
- sknetwork/linalg/{normalization.py → normalizer.py} +18 -15
- sknetwork/linalg/operators.py +1 -1
- sknetwork/linalg/ppr_solver.py +1 -1
- sknetwork/linalg/push.cp39-win_amd64.pyd +0 -0
- sknetwork/linalg/push.cpp +22993 -18807
- sknetwork/linalg/push.pyx +0 -2
- sknetwork/linalg/svd_solver.py +1 -1
- sknetwork/linalg/tests/test_normalization.py +3 -7
- sknetwork/linalg/tests/test_operators.py +4 -8
- sknetwork/linalg/tests/test_ppr.py +1 -1
- sknetwork/linkpred/base.py +13 -2
- sknetwork/linkpred/nn.py +6 -6
- sknetwork/log.py +19 -0
- sknetwork/path/__init__.py +4 -3
- sknetwork/path/dag.py +54 -0
- sknetwork/path/distances.py +98 -0
- sknetwork/path/search.py +13 -47
- sknetwork/path/shortest_path.py +37 -162
- sknetwork/path/tests/test_dag.py +37 -0
- sknetwork/path/tests/test_distances.py +62 -0
- sknetwork/path/tests/test_search.py +26 -11
- sknetwork/path/tests/test_shortest_path.py +31 -36
- sknetwork/ranking/__init__.py +0 -1
- sknetwork/ranking/base.py +13 -8
- sknetwork/ranking/betweenness.cp39-win_amd64.pyd +0 -0
- sknetwork/ranking/betweenness.cpp +5709 -3017
- sknetwork/ranking/betweenness.pyx +0 -2
- sknetwork/ranking/closeness.py +7 -10
- sknetwork/ranking/pagerank.py +14 -14
- sknetwork/ranking/postprocess.py +12 -3
- sknetwork/ranking/tests/test_API.py +2 -4
- sknetwork/ranking/tests/test_betweenness.py +3 -3
- sknetwork/ranking/tests/test_closeness.py +3 -7
- sknetwork/ranking/tests/test_pagerank.py +11 -5
- sknetwork/ranking/tests/test_postprocess.py +5 -0
- sknetwork/regression/base.py +19 -2
- sknetwork/regression/diffusion.py +24 -10
- sknetwork/regression/tests/test_diffusion.py +8 -0
- sknetwork/test_base.py +35 -0
- sknetwork/test_log.py +15 -0
- sknetwork/topology/__init__.py +7 -8
- sknetwork/topology/cliques.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/{kcliques.cpp → cliques.cpp} +23412 -20276
- sknetwork/topology/cliques.pyx +149 -0
- sknetwork/topology/core.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/{kcore.cpp → core.cpp} +21732 -18867
- sknetwork/topology/core.pyx +90 -0
- sknetwork/topology/cycles.py +243 -0
- sknetwork/topology/minheap.cp39-win_amd64.pyd +0 -0
- sknetwork/{utils → topology}/minheap.cpp +19452 -15368
- sknetwork/{utils → topology}/minheap.pxd +1 -3
- sknetwork/{utils → topology}/minheap.pyx +1 -3
- sknetwork/topology/structure.py +3 -43
- sknetwork/topology/tests/test_cliques.py +11 -11
- sknetwork/topology/tests/test_core.py +19 -0
- sknetwork/topology/tests/test_cycles.py +65 -0
- sknetwork/topology/tests/test_structure.py +2 -16
- sknetwork/topology/tests/test_triangles.py +11 -15
- sknetwork/topology/tests/test_wl.py +72 -0
- sknetwork/topology/triangles.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/triangles.cpp +5056 -2696
- sknetwork/topology/triangles.pyx +74 -89
- sknetwork/topology/weisfeiler_lehman.py +56 -86
- sknetwork/topology/weisfeiler_lehman_core.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/weisfeiler_lehman_core.cpp +14727 -10622
- sknetwork/topology/weisfeiler_lehman_core.pyx +0 -2
- sknetwork/utils/__init__.py +1 -31
- sknetwork/utils/check.py +2 -2
- sknetwork/utils/format.py +5 -3
- sknetwork/utils/membership.py +2 -2
- sknetwork/utils/tests/test_check.py +3 -3
- sknetwork/utils/tests/test_format.py +3 -1
- sknetwork/utils/values.py +1 -1
- sknetwork/visualization/__init__.py +2 -2
- sknetwork/visualization/dendrograms.py +55 -7
- sknetwork/visualization/graphs.py +292 -72
- sknetwork/visualization/tests/test_dendrograms.py +9 -9
- sknetwork/visualization/tests/test_graphs.py +71 -62
- scikit_network-0.30.0.dist-info/RECORD +0 -227
- sknetwork/embedding/louvain_hierarchy.py +0 -142
- sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
- sknetwork/path/metrics.py +0 -148
- sknetwork/path/tests/test_metrics.py +0 -29
- sknetwork/ranking/harmonic.py +0 -82
- sknetwork/topology/dag.py +0 -74
- sknetwork/topology/dag_core.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/dag_core.cpp +0 -23350
- sknetwork/topology/dag_core.pyx +0 -38
- sknetwork/topology/kcliques.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/kcliques.pyx +0 -193
- sknetwork/topology/kcore.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/kcore.pyx +0 -120
- sknetwork/topology/tests/test_cores.py +0 -21
- sknetwork/topology/tests/test_dag.py +0 -26
- sknetwork/topology/tests/test_wl_coloring.py +0 -49
- sknetwork/topology/tests/test_wl_kernel.py +0 -31
- sknetwork/utils/base.py +0 -35
- sknetwork/utils/minheap.cp39-win_amd64.pyd +0 -0
- sknetwork/utils/simplex.py +0 -140
- sknetwork/utils/tests/test_base.py +0 -28
- sknetwork/utils/tests/test_bunch.py +0 -16
- sknetwork/utils/tests/test_projection_simplex.py +0 -33
- sknetwork/utils/tests/test_verbose.py +0 -15
- sknetwork/utils/verbose.py +0 -37
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/LICENSE +0 -0
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/top_level.txt +0 -0
- /sknetwork/{utils → data}/timeout.py +0 -0
sknetwork/topology/dag_core.pyx
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
# distutils: language = c++
|
|
2
|
-
# cython: language_level=3
|
|
3
|
-
# cython: linetrace=True
|
|
4
|
-
# distutils: define_macros=CYTHON_TRACE_NOGIL=1
|
|
5
|
-
"""
|
|
6
|
-
Created on Jun 3, 2020
|
|
7
|
-
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
8
|
-
"""
|
|
9
|
-
from libcpp.vector cimport vector
|
|
10
|
-
|
|
11
|
-
cimport cython
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@cython.boundscheck(False)
|
|
15
|
-
@cython.wraparound(False)
|
|
16
|
-
def fit_core(int[:] indptr, int[:] indices, int[:] sorted_nodes, int[:] ix):
|
|
17
|
-
"""Build DAG given an order of the nodes.
|
|
18
|
-
"""
|
|
19
|
-
cdef int n = indptr.shape[0] - 1
|
|
20
|
-
cdef int u, v, k
|
|
21
|
-
cdef long n_triangles = 0
|
|
22
|
-
cdef vector[int] dag_indptr, dag_indices
|
|
23
|
-
|
|
24
|
-
for i in range(n):
|
|
25
|
-
ix[sorted_nodes[i]] = i
|
|
26
|
-
|
|
27
|
-
# create the DAG
|
|
28
|
-
cdef int ptr = 0
|
|
29
|
-
dag_indptr.push_back(ptr)
|
|
30
|
-
for u in range(n):
|
|
31
|
-
for k in range(indptr[u], indptr[u+1]):
|
|
32
|
-
v = indices[k]
|
|
33
|
-
if ix[u] < ix[v]: # the edge needs to be added
|
|
34
|
-
dag_indices.push_back(v)
|
|
35
|
-
ptr += 1
|
|
36
|
-
dag_indptr.push_back(ptr)
|
|
37
|
-
|
|
38
|
-
return dag_indptr, dag_indices
|
|
Binary file
|
sknetwork/topology/kcliques.pyx
DELETED
|
@@ -1,193 +0,0 @@
|
|
|
1
|
-
# distutils: language = c++
|
|
2
|
-
# cython: language_level=3
|
|
3
|
-
# cython: linetrace=True
|
|
4
|
-
# distutils: define_macros=CYTHON_TRACE_NOGIL=1
|
|
5
|
-
"""
|
|
6
|
-
Created on Jun 3, 2020
|
|
7
|
-
@author: Julien Simonnet <julien.simonnet@etu.upmc.fr>
|
|
8
|
-
@author: Yohann Robert <yohann.robert@etu.upmc.fr>
|
|
9
|
-
"""
|
|
10
|
-
from libcpp.vector cimport vector
|
|
11
|
-
import numpy as np
|
|
12
|
-
cimport numpy as np
|
|
13
|
-
from scipy import sparse
|
|
14
|
-
|
|
15
|
-
cimport cython
|
|
16
|
-
|
|
17
|
-
from sknetwork.topology.dag import DAG
|
|
18
|
-
from sknetwork.topology.kcore import CoreDecomposition
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
# ----- Collections of arrays used by our listing algorithm -----
|
|
22
|
-
@cython.boundscheck(False)
|
|
23
|
-
@cython.wraparound(False)
|
|
24
|
-
cdef class ListingBox:
|
|
25
|
-
cdef int[:] ns
|
|
26
|
-
cdef np.ndarray degrees
|
|
27
|
-
cdef np.ndarray subs
|
|
28
|
-
cdef short[:] lab
|
|
29
|
-
|
|
30
|
-
def __cinit__(self, vector[int] indptr, int k):
|
|
31
|
-
cdef int n = indptr.size() - 1
|
|
32
|
-
cdef int i
|
|
33
|
-
cdef int max_deg = 0
|
|
34
|
-
|
|
35
|
-
cdef np.ndarray[int, ndim=1] ns = np.empty((k+1,), dtype=np.int32)
|
|
36
|
-
ns[k] = n
|
|
37
|
-
self.ns = ns
|
|
38
|
-
|
|
39
|
-
cdef np.ndarray[short, ndim=1] lab = np.full((n,), k, dtype=np.int16)
|
|
40
|
-
self.lab = lab
|
|
41
|
-
|
|
42
|
-
cdef np.ndarray[int, ndim=1] deg = np.zeros(n, dtype=np.int32)
|
|
43
|
-
cdef np.ndarray[int, ndim=1] sub = np.zeros(n, dtype=np.int32)
|
|
44
|
-
|
|
45
|
-
for i in range(n):
|
|
46
|
-
deg[i] = indptr[i+1] - indptr[i]
|
|
47
|
-
max_deg = max(deg[i], max_deg)
|
|
48
|
-
sub[i] = i
|
|
49
|
-
|
|
50
|
-
self.degrees = np.empty((k+1,), dtype=object)
|
|
51
|
-
self.subs = np.empty((k+1,), dtype=object)
|
|
52
|
-
|
|
53
|
-
self.degrees[k] = deg
|
|
54
|
-
self.subs[k] = sub
|
|
55
|
-
|
|
56
|
-
for i in range(2, k):
|
|
57
|
-
deg = np.zeros(n, dtype=np.int32)
|
|
58
|
-
sub = np.zeros(max_deg, dtype=np.int32)
|
|
59
|
-
self.degrees[i] = deg
|
|
60
|
-
self.subs[i] = sub
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
@cython.boundscheck(False)
|
|
64
|
-
@cython.wraparound(False)
|
|
65
|
-
cdef long fit_core(vector[int] indptr, vector[int] indices, int l, ListingBox box):
|
|
66
|
-
cdef int n = indptr.size() - 1
|
|
67
|
-
cdef long n_cliques = 0
|
|
68
|
-
cdef int i, j, k
|
|
69
|
-
cdef int u, v, w
|
|
70
|
-
cdef int cd
|
|
71
|
-
|
|
72
|
-
if l == 2:
|
|
73
|
-
degree_l = box.degrees[2]
|
|
74
|
-
sub_l = box.subs[2]
|
|
75
|
-
for i in range(box.ns[2]):
|
|
76
|
-
j = sub_l[i]
|
|
77
|
-
n_cliques += degree_l[j]
|
|
78
|
-
|
|
79
|
-
return n_cliques
|
|
80
|
-
|
|
81
|
-
sub_l = box.subs[l]
|
|
82
|
-
sub_prev = box.subs[l-1]
|
|
83
|
-
degree_l = box.degrees[l]
|
|
84
|
-
deg_prev = box.degrees[l-1]
|
|
85
|
-
for i in range(box.ns[l]):
|
|
86
|
-
u = sub_l[i]
|
|
87
|
-
box.ns[l-1] = 0
|
|
88
|
-
cd = indptr[u] + degree_l[u]
|
|
89
|
-
for j in range(indptr[u], cd):
|
|
90
|
-
v = indices[j]
|
|
91
|
-
if box.lab[v] == l:
|
|
92
|
-
box.lab[v] = l-1
|
|
93
|
-
sub_prev[box.ns[l-1]] = v
|
|
94
|
-
box.ns[l-1] += 1
|
|
95
|
-
deg_prev[v] = 0
|
|
96
|
-
|
|
97
|
-
for j in range(box.ns[l-1]):
|
|
98
|
-
v = sub_prev[j]
|
|
99
|
-
cd = indptr[v] + degree_l[v]
|
|
100
|
-
k = indptr[v]
|
|
101
|
-
while k < cd:
|
|
102
|
-
w = indices[k]
|
|
103
|
-
if box.lab[w] == l-1:
|
|
104
|
-
deg_prev[v] += 1
|
|
105
|
-
else:
|
|
106
|
-
cd -= 1
|
|
107
|
-
indices[k] = indices[cd]
|
|
108
|
-
k -= 1
|
|
109
|
-
indices[cd] = w
|
|
110
|
-
|
|
111
|
-
k += 1
|
|
112
|
-
|
|
113
|
-
n_cliques += fit_core(indptr, indices, l-1, box)
|
|
114
|
-
for j in range(box.ns[l-1]):
|
|
115
|
-
v = sub_prev[j]
|
|
116
|
-
box.lab[v] = l
|
|
117
|
-
|
|
118
|
-
return n_cliques
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
class Cliques:
|
|
122
|
-
""" Clique counting algorithm.
|
|
123
|
-
|
|
124
|
-
Parameters
|
|
125
|
-
----------
|
|
126
|
-
k : int
|
|
127
|
-
Clique order (e.g., k = 3 means triangles).
|
|
128
|
-
|
|
129
|
-
Attributes
|
|
130
|
-
----------
|
|
131
|
-
n_cliques_ : int
|
|
132
|
-
Number of cliques.
|
|
133
|
-
|
|
134
|
-
Example
|
|
135
|
-
-------
|
|
136
|
-
>>> from sknetwork.data import karate_club
|
|
137
|
-
>>> cliques = Cliques(k=3)
|
|
138
|
-
>>> adjacency = karate_club()
|
|
139
|
-
>>> cliques.fit_transform(adjacency)
|
|
140
|
-
45
|
|
141
|
-
|
|
142
|
-
References
|
|
143
|
-
----------
|
|
144
|
-
Danisch, M., Balalau, O., & Sozio, M. (2018, April).
|
|
145
|
-
`Listing k-cliques in sparse real-world graphs.
|
|
146
|
-
<https://dl.acm.org/doi/pdf/10.1145/3178876.3186125>`_
|
|
147
|
-
In Proceedings of the 2018 World Wide Web Conference (pp. 589-598).
|
|
148
|
-
"""
|
|
149
|
-
def __init__(self, k: int):
|
|
150
|
-
self.k = np.int32(k)
|
|
151
|
-
self.n_cliques_ = 0
|
|
152
|
-
|
|
153
|
-
def fit(self, adjacency: sparse.csr_matrix) -> 'Cliques':
|
|
154
|
-
"""K-cliques count.
|
|
155
|
-
|
|
156
|
-
Parameters
|
|
157
|
-
----------
|
|
158
|
-
adjacency :
|
|
159
|
-
Adjacency matrix of the graph.
|
|
160
|
-
|
|
161
|
-
Returns
|
|
162
|
-
-------
|
|
163
|
-
self: :class:`Cliques`
|
|
164
|
-
"""
|
|
165
|
-
if self.k < 2:
|
|
166
|
-
raise ValueError("k should be at least 2")
|
|
167
|
-
|
|
168
|
-
kcore = CoreDecomposition()
|
|
169
|
-
labels = kcore.fit_transform(adjacency)
|
|
170
|
-
sorted_nodes = np.argsort(labels)
|
|
171
|
-
|
|
172
|
-
dag = DAG()
|
|
173
|
-
dag.fit(adjacency, sorted_nodes)
|
|
174
|
-
indptr = dag.indptr_
|
|
175
|
-
indices = dag.indices_
|
|
176
|
-
|
|
177
|
-
box = ListingBox.__new__(ListingBox, indptr, self.k)
|
|
178
|
-
self.n_cliques_ = fit_core(indptr, indices, self.k, box)
|
|
179
|
-
|
|
180
|
-
return self
|
|
181
|
-
|
|
182
|
-
def fit_transform(self, adjacency: sparse.csr_matrix) -> int:
|
|
183
|
-
""" Fit algorithm to the data and return the number of cliques. Same parameters as the ``fit`` method.
|
|
184
|
-
|
|
185
|
-
Returns
|
|
186
|
-
-------
|
|
187
|
-
n_cliques : int
|
|
188
|
-
Number of k-cliques.
|
|
189
|
-
"""
|
|
190
|
-
self.fit(adjacency)
|
|
191
|
-
return self.n_cliques_
|
|
192
|
-
|
|
193
|
-
|
|
Binary file
|
sknetwork/topology/kcore.pyx
DELETED
|
@@ -1,120 +0,0 @@
|
|
|
1
|
-
# distutils: language = c++
|
|
2
|
-
# cython: language_level=3
|
|
3
|
-
# cython: linetrace=True
|
|
4
|
-
# distutils: define_macros=CYTHON_TRACE_NOGIL=1
|
|
5
|
-
"""
|
|
6
|
-
Created on Jun 3, 2020
|
|
7
|
-
@author: Julien Simonnet <julien.simonnet@etu.upmc.fr>
|
|
8
|
-
@author: Yohann Robert <yohann.robert@etu.upmc.fr>
|
|
9
|
-
"""
|
|
10
|
-
cimport cython
|
|
11
|
-
|
|
12
|
-
import numpy as np
|
|
13
|
-
cimport numpy as np
|
|
14
|
-
|
|
15
|
-
from scipy import sparse
|
|
16
|
-
|
|
17
|
-
from sknetwork.utils.base import Algorithm
|
|
18
|
-
from sknetwork.utils.minheap cimport MinHeap
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
@cython.boundscheck(False)
|
|
22
|
-
@cython.wraparound(False)
|
|
23
|
-
cdef fit_core(int[:] indptr, int[:] indices):
|
|
24
|
-
"""Compute the core value of each node.
|
|
25
|
-
|
|
26
|
-
Parameters
|
|
27
|
-
----------
|
|
28
|
-
indptr :
|
|
29
|
-
CSR format index array of the normalized adjacency matrix.
|
|
30
|
-
indices :
|
|
31
|
-
CSR format index pointer array of the normalized adjacency matrix.
|
|
32
|
-
|
|
33
|
-
Returns
|
|
34
|
-
-------
|
|
35
|
-
labels :
|
|
36
|
-
Core value of each node.
|
|
37
|
-
"""
|
|
38
|
-
cdef int n = indptr.shape[0] - 1
|
|
39
|
-
cdef int core_value = 0 # current/max core value of the graph
|
|
40
|
-
cdef int min_node # current node of minimum degree
|
|
41
|
-
cdef int i, j, k
|
|
42
|
-
cdef int[:] degrees = np.asarray(indptr)[1:] - np.asarray(indptr)[:n]
|
|
43
|
-
cdef np.ndarray[int, ndim=1] labels = np.empty((n,), dtype=np.int32)
|
|
44
|
-
cdef MinHeap mh = MinHeap.__new__(MinHeap, n) # minimum heap with an update system
|
|
45
|
-
|
|
46
|
-
# inserts all nodes in the heap
|
|
47
|
-
for i in range(n):
|
|
48
|
-
mh.insert_key(i, degrees)
|
|
49
|
-
|
|
50
|
-
i = n - 1 # index of the rear of the list/array
|
|
51
|
-
while not mh.empty(): # until the heap is emptied
|
|
52
|
-
min_node = mh.pop_min(degrees)
|
|
53
|
-
core_value = max(core_value, degrees[min_node])
|
|
54
|
-
|
|
55
|
-
# decreases the degree of each neighbors of min_node to simulate its deletion
|
|
56
|
-
for k in range(indptr[min_node], indptr[min_node+1]):
|
|
57
|
-
j = indices[k]
|
|
58
|
-
degrees[j] -= 1
|
|
59
|
-
mh.decrease_key(j, degrees) # updates the heap to take into account the new degrees
|
|
60
|
-
|
|
61
|
-
labels[min_node] = core_value
|
|
62
|
-
i -= 1
|
|
63
|
-
|
|
64
|
-
return np.asarray(labels)
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
class CoreDecomposition(Algorithm):
|
|
68
|
-
"""K-core decomposition algorithm.
|
|
69
|
-
|
|
70
|
-
* Graphs
|
|
71
|
-
|
|
72
|
-
Attributes
|
|
73
|
-
----------
|
|
74
|
-
labels_ : np.ndarray
|
|
75
|
-
Core value of each node.
|
|
76
|
-
core_value_ : int
|
|
77
|
-
Maximum core value of the graph
|
|
78
|
-
|
|
79
|
-
Example
|
|
80
|
-
-------
|
|
81
|
-
>>> from sknetwork.topology import CoreDecomposition
|
|
82
|
-
>>> from sknetwork.data import karate_club
|
|
83
|
-
>>> kcore = CoreDecomposition()
|
|
84
|
-
>>> adjacency = karate_club()
|
|
85
|
-
>>> kcore.fit(adjacency)
|
|
86
|
-
>>> kcore.core_value_
|
|
87
|
-
4
|
|
88
|
-
"""
|
|
89
|
-
def __init__(self):
|
|
90
|
-
super(CoreDecomposition, self).__init__()
|
|
91
|
-
self.labels_ = None
|
|
92
|
-
self.core_value_ = None
|
|
93
|
-
|
|
94
|
-
def fit(self, adjacency: sparse.csr_matrix) -> 'CoreDecomposition':
|
|
95
|
-
"""K-core decomposition.
|
|
96
|
-
|
|
97
|
-
Parameters
|
|
98
|
-
----------
|
|
99
|
-
adjacency :
|
|
100
|
-
Adjacency matrix of the graph.
|
|
101
|
-
|
|
102
|
-
Returns
|
|
103
|
-
-------
|
|
104
|
-
self: :class:`CoreDecomposition`
|
|
105
|
-
"""
|
|
106
|
-
labels = fit_core(adjacency.indptr, adjacency.indices)
|
|
107
|
-
self.labels_ = labels
|
|
108
|
-
self.core_value_ = labels.max()
|
|
109
|
-
return self
|
|
110
|
-
|
|
111
|
-
def fit_transform(self, adjacency: sparse.csr_matrix):
|
|
112
|
-
"""Fit algorithm to the data and return the core value of each node. Same parameters as the ``fit`` method.
|
|
113
|
-
|
|
114
|
-
Returns
|
|
115
|
-
-------
|
|
116
|
-
labels :
|
|
117
|
-
Core value of the nodes.
|
|
118
|
-
"""
|
|
119
|
-
self.fit(adjacency)
|
|
120
|
-
return self.labels_
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
"""Tests for k-core decimposition"""
|
|
4
|
-
import unittest
|
|
5
|
-
|
|
6
|
-
from sknetwork.data.test_graphs import *
|
|
7
|
-
from sknetwork.topology import CoreDecomposition
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class TestCoreDecomposition(unittest.TestCase):
|
|
11
|
-
|
|
12
|
-
def test_empty(self):
|
|
13
|
-
adjacency = test_graph_empty()
|
|
14
|
-
kcore = CoreDecomposition().fit(adjacency)
|
|
15
|
-
self.assertEqual(kcore.core_value_, 0)
|
|
16
|
-
|
|
17
|
-
def test_cliques(self):
|
|
18
|
-
adjacency = test_graph_clique()
|
|
19
|
-
n = adjacency.shape[0]
|
|
20
|
-
kcore = CoreDecomposition().fit(adjacency)
|
|
21
|
-
self.assertEqual(kcore.core_value_, n - 1)
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
""""tests for dag.py"""
|
|
4
|
-
import unittest
|
|
5
|
-
|
|
6
|
-
import numpy as np
|
|
7
|
-
|
|
8
|
-
from sknetwork.data import house
|
|
9
|
-
from sknetwork.topology import DAG
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class TestDAG(unittest.TestCase):
|
|
13
|
-
|
|
14
|
-
def test_options(self):
|
|
15
|
-
adjacency = house()
|
|
16
|
-
dag = DAG()
|
|
17
|
-
dag.fit(adjacency)
|
|
18
|
-
self.assertEqual(dag.indptr_.shape[0], adjacency.shape[0]+1)
|
|
19
|
-
self.assertEqual(dag.indices_.shape[0], 6)
|
|
20
|
-
|
|
21
|
-
with self.assertRaises(ValueError):
|
|
22
|
-
dag.fit(adjacency, sorted_nodes=np.arange(3))
|
|
23
|
-
|
|
24
|
-
with self.assertRaises(ValueError):
|
|
25
|
-
dag = DAG(ordering='toto')
|
|
26
|
-
dag.fit(adjacency)
|
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
"""Tests for Weisfeiler-Lehman coloring"""
|
|
4
|
-
import unittest
|
|
5
|
-
|
|
6
|
-
from sknetwork.data import house, bow_tie
|
|
7
|
-
from sknetwork.data.test_graphs import *
|
|
8
|
-
from sknetwork.topology import WeisfeilerLehman
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class TestWLColoring(unittest.TestCase):
|
|
12
|
-
|
|
13
|
-
def test_empty(self):
|
|
14
|
-
adjacency = test_graph_empty()
|
|
15
|
-
labels = WeisfeilerLehman().fit_transform(adjacency)
|
|
16
|
-
self.assertTrue((labels == np.zeros(10)).all())
|
|
17
|
-
|
|
18
|
-
def test_cliques(self):
|
|
19
|
-
adjacency = test_graph_clique()
|
|
20
|
-
labels = WeisfeilerLehman().fit_transform(adjacency)
|
|
21
|
-
self.assertTrue((labels == np.zeros(10)).all())
|
|
22
|
-
|
|
23
|
-
def test_house(self):
|
|
24
|
-
adjacency = house()
|
|
25
|
-
labels = WeisfeilerLehman().fit_transform(adjacency)
|
|
26
|
-
self.assertTrue((labels == np.array([0, 2, 1, 1, 2])).all())
|
|
27
|
-
|
|
28
|
-
def test_bow_tie(self):
|
|
29
|
-
adjacency = bow_tie()
|
|
30
|
-
labels = WeisfeilerLehman().fit_transform(adjacency)
|
|
31
|
-
self.assertTrue((labels == np.array([1, 0, 0, 0, 0])).all())
|
|
32
|
-
|
|
33
|
-
def test_iso(self):
|
|
34
|
-
adjacency = house()
|
|
35
|
-
n = adjacency.indptr.shape[0] - 1
|
|
36
|
-
reorder = list(range(n))
|
|
37
|
-
np.random.shuffle(reorder)
|
|
38
|
-
adjacency2 = adjacency[reorder][:, reorder]
|
|
39
|
-
l1 = WeisfeilerLehman().fit_transform(adjacency)
|
|
40
|
-
l2 = WeisfeilerLehman().fit_transform(adjacency2)
|
|
41
|
-
l1.sort()
|
|
42
|
-
l2.sort()
|
|
43
|
-
self.assertTrue((l1 == l2).all())
|
|
44
|
-
|
|
45
|
-
def test_early_stop(self):
|
|
46
|
-
adjacency = house()
|
|
47
|
-
wl = WeisfeilerLehman(max_iter=1)
|
|
48
|
-
labels = wl.fit_transform(adjacency)
|
|
49
|
-
self.assertTrue((labels == np.array([0, 1, 0, 0, 1])).all())
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
"""Tests for Weisfeiler-Lehman kernels"""
|
|
4
|
-
import unittest
|
|
5
|
-
|
|
6
|
-
import numpy as np
|
|
7
|
-
|
|
8
|
-
from sknetwork.data import house, bow_tie, linear_graph
|
|
9
|
-
from sknetwork.topology import are_isomorphic
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class TestWLKernel(unittest.TestCase):
|
|
13
|
-
|
|
14
|
-
def test_isomorphism(self):
|
|
15
|
-
ref = house()
|
|
16
|
-
n = ref.shape[0]
|
|
17
|
-
|
|
18
|
-
adjacency = house()
|
|
19
|
-
reorder = list(range(n))
|
|
20
|
-
np.random.shuffle(reorder)
|
|
21
|
-
adjacency = adjacency[reorder][:, reorder]
|
|
22
|
-
self.assertTrue(are_isomorphic(ref, adjacency))
|
|
23
|
-
|
|
24
|
-
adjacency = bow_tie()
|
|
25
|
-
self.assertFalse(are_isomorphic(ref, adjacency))
|
|
26
|
-
|
|
27
|
-
adjacency = linear_graph(n)
|
|
28
|
-
self.assertFalse(are_isomorphic(ref, adjacency))
|
|
29
|
-
|
|
30
|
-
adjacency = linear_graph(n + 1)
|
|
31
|
-
self.assertFalse(are_isomorphic(ref, adjacency))
|
sknetwork/utils/base.py
DELETED
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
Created on Jun 28, 2019
|
|
5
|
-
@author: Quentin Lutz <qlutz@enst.fr>
|
|
6
|
-
"""
|
|
7
|
-
import inspect
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class Algorithm:
|
|
11
|
-
"""Base class for all algorithms."""
|
|
12
|
-
def __repr__(self):
|
|
13
|
-
# parameters not to display
|
|
14
|
-
arg_black_list = ['self', 'random_state', 'verbose']
|
|
15
|
-
output = self.__class__.__name__ + '('
|
|
16
|
-
signature = inspect.signature(self.__class__.__init__)
|
|
17
|
-
arguments = [arg.name for arg in signature.parameters.values() if arg.name not in arg_black_list]
|
|
18
|
-
for p in arguments:
|
|
19
|
-
try:
|
|
20
|
-
val = self.__dict__[p]
|
|
21
|
-
except KeyError:
|
|
22
|
-
continue
|
|
23
|
-
if type(val) == str:
|
|
24
|
-
val = "'" + val + "'"
|
|
25
|
-
else:
|
|
26
|
-
val = str(val)
|
|
27
|
-
output += p + '=' + val + ', '
|
|
28
|
-
if output[-1] != '(':
|
|
29
|
-
return output[:-2] + ')'
|
|
30
|
-
else:
|
|
31
|
-
return output + ')'
|
|
32
|
-
|
|
33
|
-
def fit(self, *args, **kwargs):
|
|
34
|
-
"""Fit algorithm to data."""
|
|
35
|
-
raise NotImplementedError
|
|
Binary file
|
sknetwork/utils/simplex.py
DELETED
|
@@ -1,140 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
"""
|
|
4
|
-
Created on June 4 2019
|
|
5
|
-
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
-
"""
|
|
7
|
-
from typing import Union
|
|
8
|
-
|
|
9
|
-
import numpy as np
|
|
10
|
-
from scipy import sparse
|
|
11
|
-
|
|
12
|
-
from sknetwork.linalg.normalization import normalize
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def projection_simplex_array(array: np.ndarray, scale: float = 1) -> np.ndarray:
|
|
16
|
-
"""Project each line of the input onto the Euclidean simplex i.e. solve
|
|
17
|
-
|
|
18
|
-
:math:`\\underset{w}{min} ||w - x_i||_2^2` s.t. :math:`\\sum w_j = z, w_j \\ge 0`.
|
|
19
|
-
|
|
20
|
-
Parameters
|
|
21
|
-
----------
|
|
22
|
-
array: np.ndarray
|
|
23
|
-
Data to project. Either one or two dimensional.
|
|
24
|
-
scale: float
|
|
25
|
-
Scale of the simplex i.e. sums of the projected coefficients.
|
|
26
|
-
|
|
27
|
-
Returns
|
|
28
|
-
-------
|
|
29
|
-
projection : np.ndarray
|
|
30
|
-
Array with the same shape as the input.
|
|
31
|
-
|
|
32
|
-
Example
|
|
33
|
-
-------
|
|
34
|
-
>>> X = np.array([[2, 2], [-0.75, 0.25]])
|
|
35
|
-
>>> projection_simplex_array(X)
|
|
36
|
-
array([[0.5, 0.5],
|
|
37
|
-
[0. , 1. ]])
|
|
38
|
-
"""
|
|
39
|
-
if len(array.shape) == 1:
|
|
40
|
-
array = array.reshape(1, array.shape[0])
|
|
41
|
-
n_row, n_col = array.shape
|
|
42
|
-
|
|
43
|
-
sorted_array = -np.sort(-array)
|
|
44
|
-
cumsum_array = np.cumsum(sorted_array, axis=1) - scale
|
|
45
|
-
denom = 1 + np.arange(n_col)
|
|
46
|
-
condition = sorted_array - cumsum_array / denom > 0
|
|
47
|
-
max_index = np.argmax(condition / denom[::-1], axis=1)
|
|
48
|
-
threshold = (cumsum_array / denom)[np.arange(n_row), max_index]
|
|
49
|
-
|
|
50
|
-
return np.maximum(array - threshold[:, np.newaxis], 0)
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
def projection_simplex_csr(matrix: sparse.csr_matrix, scale: float = 1):
|
|
54
|
-
"""Project each line of the input onto the Euclidean simplex i.e. solve
|
|
55
|
-
|
|
56
|
-
:math:`\\underset{w}{min} ||w - x_i||_2^2` s.t. :math:`\\sum w_j = z, w_j \\ge 0`.
|
|
57
|
-
|
|
58
|
-
Parameters
|
|
59
|
-
----------
|
|
60
|
-
matrix : sparse.csr_matrix
|
|
61
|
-
Matrix whose rows must be projected.
|
|
62
|
-
scale: float
|
|
63
|
-
Scale of the simplex i.e. sums of the projected coefficients.
|
|
64
|
-
|
|
65
|
-
Returns
|
|
66
|
-
-------
|
|
67
|
-
projection : sparse.csr_matrix
|
|
68
|
-
Matrix with the same shape as the input.
|
|
69
|
-
|
|
70
|
-
Examples
|
|
71
|
-
--------
|
|
72
|
-
>>> X = sparse.csr_matrix(np.array([[2, 2], [-0.75, 0.25]]))
|
|
73
|
-
>>> X_proj = projection_simplex_csr(X)
|
|
74
|
-
>>> X_proj.nnz
|
|
75
|
-
3
|
|
76
|
-
>>> X_proj.toarray()
|
|
77
|
-
array([[0.5, 0.5],
|
|
78
|
-
[0. , 1. ]])
|
|
79
|
-
"""
|
|
80
|
-
data = matrix.data
|
|
81
|
-
if data.dtype == bool or (data.min() == data.max()):
|
|
82
|
-
return normalize(matrix, p=1)
|
|
83
|
-
|
|
84
|
-
indptr = matrix.indptr
|
|
85
|
-
new_data = np.zeros_like(data)
|
|
86
|
-
|
|
87
|
-
for i in range(indptr.size-1):
|
|
88
|
-
j1 = indptr[i]
|
|
89
|
-
j2 = indptr[i+1]
|
|
90
|
-
new_data[j1:j2] = projection_simplex_array(data[j1:j2], scale=scale)
|
|
91
|
-
|
|
92
|
-
new_matrix = sparse.csr_matrix((new_data, matrix.indices, indptr), shape=matrix.shape)
|
|
93
|
-
new_matrix.eliminate_zeros()
|
|
94
|
-
return new_matrix
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
def projection_simplex(x: Union[np.ndarray, sparse.csr_matrix], scale: float = 1.):
|
|
98
|
-
"""Project each line of the input onto the Euclidean simplex i.e. solve
|
|
99
|
-
|
|
100
|
-
:math:`\\underset{w}{min} ||w - x_i||_2^2` s.t. :math:`\\sum w_j = z, w_j \\ge 0`.
|
|
101
|
-
|
|
102
|
-
Parameters
|
|
103
|
-
----------
|
|
104
|
-
x :
|
|
105
|
-
Data to project. Either one or two dimensional. Can be sparse or dense.
|
|
106
|
-
scale : float
|
|
107
|
-
Scale of the simplex i.e. sums of the projected coefficients.
|
|
108
|
-
|
|
109
|
-
Returns
|
|
110
|
-
-------
|
|
111
|
-
projection : np.ndarray or sparse.csr_matrix
|
|
112
|
-
Array with the same type and shape as the input.
|
|
113
|
-
|
|
114
|
-
Example
|
|
115
|
-
-------
|
|
116
|
-
>>> X = np.array([[2, 2], [-0.75, 0.25]])
|
|
117
|
-
>>> projection_simplex(X)
|
|
118
|
-
array([[0.5, 0.5],
|
|
119
|
-
[0. , 1. ]])
|
|
120
|
-
>>> X_csr = sparse.csr_matrix(X)
|
|
121
|
-
>>> X_proj = projection_simplex(X_csr)
|
|
122
|
-
>>> X_proj.nnz
|
|
123
|
-
3
|
|
124
|
-
>>> X_proj.toarray()
|
|
125
|
-
array([[0.5, 0.5],
|
|
126
|
-
[0. , 1. ]])
|
|
127
|
-
|
|
128
|
-
References
|
|
129
|
-
----------
|
|
130
|
-
Duchi, J., Shalev-Shwartz, S., Singer, Y., & Chandra, T. (2008, July).
|
|
131
|
-
`Efficient projections onto the l 1-ball for learning in high dimensions.
|
|
132
|
-
<http://machinelearning.org/archive/icml2008/papers/361.pdf>`_
|
|
133
|
-
In Proceedings of the 25th international conference on Machine learning (pp. 272-279). ACM.
|
|
134
|
-
"""
|
|
135
|
-
if isinstance(x, np.ndarray):
|
|
136
|
-
return projection_simplex_array(x, scale)
|
|
137
|
-
elif isinstance(x, sparse.csr_matrix):
|
|
138
|
-
return projection_simplex_csr(x, scale)
|
|
139
|
-
else:
|
|
140
|
-
raise TypeError('Input must be a numpy array or a CSR matrix.')
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
"""tests for base.py"""
|
|
4
|
-
import unittest
|
|
5
|
-
|
|
6
|
-
from sknetwork.utils.base import Algorithm
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class TestBase(unittest.TestCase):
|
|
10
|
-
|
|
11
|
-
def test_repr(self):
|
|
12
|
-
class Stub(Algorithm):
|
|
13
|
-
"""Docstring"""
|
|
14
|
-
def __init__(self, some_param: int, another_param: str, random_state: int, some_unused_param: int):
|
|
15
|
-
self.some_param = some_param
|
|
16
|
-
self.another_param = another_param
|
|
17
|
-
self.random_state = random_state
|
|
18
|
-
|
|
19
|
-
def fit(self):
|
|
20
|
-
"""Docstring"""
|
|
21
|
-
pass
|
|
22
|
-
|
|
23
|
-
stub = Stub(1, 'abc', 3, 4)
|
|
24
|
-
self.assertEqual(repr(stub), "Stub(some_param=1, another_param='abc')")
|
|
25
|
-
|
|
26
|
-
def test_fit(self):
|
|
27
|
-
stub = Algorithm()
|
|
28
|
-
self.assertRaises(NotImplementedError, stub.fit, None)
|