scikit-network 0.28.3__cp39-cp39-macosx_12_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- scikit_network-0.28.3.dist-info/AUTHORS.rst +41 -0
- scikit_network-0.28.3.dist-info/LICENSE +34 -0
- scikit_network-0.28.3.dist-info/METADATA +457 -0
- scikit_network-0.28.3.dist-info/RECORD +240 -0
- scikit_network-0.28.3.dist-info/WHEEL +5 -0
- scikit_network-0.28.3.dist-info/top_level.txt +1 -0
- sknetwork/__init__.py +21 -0
- sknetwork/classification/__init__.py +8 -0
- sknetwork/classification/base.py +84 -0
- sknetwork/classification/base_rank.py +143 -0
- sknetwork/classification/diffusion.py +134 -0
- sknetwork/classification/knn.py +162 -0
- sknetwork/classification/metrics.py +205 -0
- sknetwork/classification/pagerank.py +66 -0
- sknetwork/classification/propagation.py +152 -0
- sknetwork/classification/tests/__init__.py +1 -0
- sknetwork/classification/tests/test_API.py +35 -0
- sknetwork/classification/tests/test_diffusion.py +37 -0
- sknetwork/classification/tests/test_knn.py +24 -0
- sknetwork/classification/tests/test_metrics.py +53 -0
- sknetwork/classification/tests/test_pagerank.py +20 -0
- sknetwork/classification/tests/test_propagation.py +24 -0
- sknetwork/classification/vote.cpython-39-darwin.so +0 -0
- sknetwork/classification/vote.pyx +58 -0
- sknetwork/clustering/__init__.py +7 -0
- sknetwork/clustering/base.py +102 -0
- sknetwork/clustering/kmeans.py +142 -0
- sknetwork/clustering/louvain.py +255 -0
- sknetwork/clustering/louvain_core.cpython-39-darwin.so +0 -0
- sknetwork/clustering/louvain_core.pyx +134 -0
- sknetwork/clustering/metrics.py +91 -0
- sknetwork/clustering/postprocess.py +66 -0
- sknetwork/clustering/propagation_clustering.py +108 -0
- sknetwork/clustering/tests/__init__.py +1 -0
- sknetwork/clustering/tests/test_API.py +37 -0
- sknetwork/clustering/tests/test_kmeans.py +47 -0
- sknetwork/clustering/tests/test_louvain.py +104 -0
- sknetwork/clustering/tests/test_metrics.py +50 -0
- sknetwork/clustering/tests/test_post_processing.py +23 -0
- sknetwork/clustering/tests/test_postprocess.py +39 -0
- sknetwork/data/__init__.py +5 -0
- sknetwork/data/load.py +408 -0
- sknetwork/data/models.py +459 -0
- sknetwork/data/parse.py +621 -0
- sknetwork/data/test_graphs.py +84 -0
- sknetwork/data/tests/__init__.py +1 -0
- sknetwork/data/tests/test_API.py +30 -0
- sknetwork/data/tests/test_load.py +95 -0
- sknetwork/data/tests/test_models.py +52 -0
- sknetwork/data/tests/test_parse.py +253 -0
- sknetwork/data/tests/test_test_graphs.py +30 -0
- sknetwork/data/tests/test_toy_graphs.py +68 -0
- sknetwork/data/toy_graphs.py +619 -0
- sknetwork/embedding/__init__.py +10 -0
- sknetwork/embedding/base.py +90 -0
- sknetwork/embedding/force_atlas.py +197 -0
- sknetwork/embedding/louvain_embedding.py +174 -0
- sknetwork/embedding/louvain_hierarchy.py +142 -0
- sknetwork/embedding/metrics.py +66 -0
- sknetwork/embedding/random_projection.py +133 -0
- sknetwork/embedding/spectral.py +214 -0
- sknetwork/embedding/spring.py +198 -0
- sknetwork/embedding/svd.py +363 -0
- sknetwork/embedding/tests/__init__.py +1 -0
- sknetwork/embedding/tests/test_API.py +73 -0
- sknetwork/embedding/tests/test_force_atlas.py +35 -0
- sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
- sknetwork/embedding/tests/test_louvain_hierarchy.py +19 -0
- sknetwork/embedding/tests/test_metrics.py +29 -0
- sknetwork/embedding/tests/test_random_projection.py +28 -0
- sknetwork/embedding/tests/test_spectral.py +84 -0
- sknetwork/embedding/tests/test_spring.py +50 -0
- sknetwork/embedding/tests/test_svd.py +37 -0
- sknetwork/flow/__init__.py +3 -0
- sknetwork/flow/flow.py +73 -0
- sknetwork/flow/tests/__init__.py +1 -0
- sknetwork/flow/tests/test_flow.py +17 -0
- sknetwork/flow/tests/test_utils.py +69 -0
- sknetwork/flow/utils.py +91 -0
- sknetwork/gnn/__init__.py +10 -0
- sknetwork/gnn/activation.py +117 -0
- sknetwork/gnn/base.py +155 -0
- sknetwork/gnn/base_activation.py +89 -0
- sknetwork/gnn/base_layer.py +109 -0
- sknetwork/gnn/gnn_classifier.py +381 -0
- sknetwork/gnn/layer.py +153 -0
- sknetwork/gnn/layers.py +127 -0
- sknetwork/gnn/loss.py +180 -0
- sknetwork/gnn/neighbor_sampler.py +65 -0
- sknetwork/gnn/optimizer.py +163 -0
- sknetwork/gnn/tests/__init__.py +1 -0
- sknetwork/gnn/tests/test_activation.py +56 -0
- sknetwork/gnn/tests/test_base.py +79 -0
- sknetwork/gnn/tests/test_base_layer.py +37 -0
- sknetwork/gnn/tests/test_gnn_classifier.py +192 -0
- sknetwork/gnn/tests/test_layers.py +80 -0
- sknetwork/gnn/tests/test_loss.py +33 -0
- sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
- sknetwork/gnn/tests/test_optimizer.py +43 -0
- sknetwork/gnn/tests/test_utils.py +93 -0
- sknetwork/gnn/utils.py +219 -0
- sknetwork/hierarchy/__init__.py +7 -0
- sknetwork/hierarchy/base.py +69 -0
- sknetwork/hierarchy/louvain_hierarchy.py +264 -0
- sknetwork/hierarchy/metrics.py +234 -0
- sknetwork/hierarchy/paris.cpython-39-darwin.so +0 -0
- sknetwork/hierarchy/paris.pyx +317 -0
- sknetwork/hierarchy/postprocess.py +350 -0
- sknetwork/hierarchy/tests/__init__.py +1 -0
- sknetwork/hierarchy/tests/test_API.py +25 -0
- sknetwork/hierarchy/tests/test_algos.py +29 -0
- sknetwork/hierarchy/tests/test_metrics.py +62 -0
- sknetwork/hierarchy/tests/test_postprocess.py +57 -0
- sknetwork/hierarchy/tests/test_ward.py +25 -0
- sknetwork/hierarchy/ward.py +94 -0
- sknetwork/linalg/__init__.py +9 -0
- sknetwork/linalg/basics.py +37 -0
- sknetwork/linalg/diteration.cpython-39-darwin.so +0 -0
- sknetwork/linalg/diteration.pyx +49 -0
- sknetwork/linalg/eig_solver.py +93 -0
- sknetwork/linalg/laplacian.py +15 -0
- sknetwork/linalg/normalization.py +66 -0
- sknetwork/linalg/operators.py +225 -0
- sknetwork/linalg/polynome.py +76 -0
- sknetwork/linalg/ppr_solver.py +170 -0
- sknetwork/linalg/push.cpython-39-darwin.so +0 -0
- sknetwork/linalg/push.pyx +73 -0
- sknetwork/linalg/sparse_lowrank.py +142 -0
- sknetwork/linalg/svd_solver.py +91 -0
- sknetwork/linalg/tests/__init__.py +1 -0
- sknetwork/linalg/tests/test_eig.py +44 -0
- sknetwork/linalg/tests/test_laplacian.py +18 -0
- sknetwork/linalg/tests/test_normalization.py +38 -0
- sknetwork/linalg/tests/test_operators.py +70 -0
- sknetwork/linalg/tests/test_polynome.py +38 -0
- sknetwork/linalg/tests/test_ppr.py +50 -0
- sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
- sknetwork/linalg/tests/test_svd.py +38 -0
- sknetwork/linkpred/__init__.py +4 -0
- sknetwork/linkpred/base.py +80 -0
- sknetwork/linkpred/first_order.py +508 -0
- sknetwork/linkpred/first_order_core.cpython-39-darwin.so +0 -0
- sknetwork/linkpred/first_order_core.pyx +315 -0
- sknetwork/linkpred/postprocessing.py +98 -0
- sknetwork/linkpred/tests/__init__.py +1 -0
- sknetwork/linkpred/tests/test_API.py +49 -0
- sknetwork/linkpred/tests/test_postprocessing.py +21 -0
- sknetwork/path/__init__.py +4 -0
- sknetwork/path/metrics.py +148 -0
- sknetwork/path/search.py +65 -0
- sknetwork/path/shortest_path.py +186 -0
- sknetwork/path/tests/__init__.py +1 -0
- sknetwork/path/tests/test_metrics.py +29 -0
- sknetwork/path/tests/test_search.py +25 -0
- sknetwork/path/tests/test_shortest_path.py +45 -0
- sknetwork/ranking/__init__.py +9 -0
- sknetwork/ranking/base.py +56 -0
- sknetwork/ranking/betweenness.cpython-39-darwin.so +0 -0
- sknetwork/ranking/betweenness.pyx +99 -0
- sknetwork/ranking/closeness.py +95 -0
- sknetwork/ranking/harmonic.py +82 -0
- sknetwork/ranking/hits.py +94 -0
- sknetwork/ranking/katz.py +81 -0
- sknetwork/ranking/pagerank.py +107 -0
- sknetwork/ranking/postprocess.py +25 -0
- sknetwork/ranking/tests/__init__.py +1 -0
- sknetwork/ranking/tests/test_API.py +34 -0
- sknetwork/ranking/tests/test_betweenness.py +38 -0
- sknetwork/ranking/tests/test_closeness.py +34 -0
- sknetwork/ranking/tests/test_hits.py +20 -0
- sknetwork/ranking/tests/test_pagerank.py +69 -0
- sknetwork/regression/__init__.py +4 -0
- sknetwork/regression/base.py +56 -0
- sknetwork/regression/diffusion.py +190 -0
- sknetwork/regression/tests/__init__.py +1 -0
- sknetwork/regression/tests/test_API.py +34 -0
- sknetwork/regression/tests/test_diffusion.py +48 -0
- sknetwork/sknetwork.py +3 -0
- sknetwork/topology/__init__.py +9 -0
- sknetwork/topology/dag.py +74 -0
- sknetwork/topology/dag_core.cpython-39-darwin.so +0 -0
- sknetwork/topology/dag_core.pyx +38 -0
- sknetwork/topology/kcliques.cpython-39-darwin.so +0 -0
- sknetwork/topology/kcliques.pyx +193 -0
- sknetwork/topology/kcore.cpython-39-darwin.so +0 -0
- sknetwork/topology/kcore.pyx +120 -0
- sknetwork/topology/structure.py +234 -0
- sknetwork/topology/tests/__init__.py +1 -0
- sknetwork/topology/tests/test_cliques.py +28 -0
- sknetwork/topology/tests/test_cores.py +21 -0
- sknetwork/topology/tests/test_dag.py +26 -0
- sknetwork/topology/tests/test_structure.py +99 -0
- sknetwork/topology/tests/test_triangles.py +42 -0
- sknetwork/topology/tests/test_wl_coloring.py +49 -0
- sknetwork/topology/tests/test_wl_kernel.py +31 -0
- sknetwork/topology/triangles.cpython-39-darwin.so +0 -0
- sknetwork/topology/triangles.pyx +166 -0
- sknetwork/topology/weisfeiler_lehman.py +163 -0
- sknetwork/topology/weisfeiler_lehman_core.cpython-39-darwin.so +0 -0
- sknetwork/topology/weisfeiler_lehman_core.pyx +116 -0
- sknetwork/utils/__init__.py +40 -0
- sknetwork/utils/base.py +35 -0
- sknetwork/utils/check.py +354 -0
- sknetwork/utils/co_neighbor.py +71 -0
- sknetwork/utils/format.py +219 -0
- sknetwork/utils/kmeans.py +89 -0
- sknetwork/utils/knn.py +166 -0
- sknetwork/utils/knn1d.cpython-39-darwin.so +0 -0
- sknetwork/utils/knn1d.pyx +80 -0
- sknetwork/utils/membership.py +82 -0
- sknetwork/utils/minheap.cpython-39-darwin.so +0 -0
- sknetwork/utils/minheap.pxd +22 -0
- sknetwork/utils/minheap.pyx +111 -0
- sknetwork/utils/neighbors.py +115 -0
- sknetwork/utils/seeds.py +75 -0
- sknetwork/utils/simplex.py +140 -0
- sknetwork/utils/tests/__init__.py +1 -0
- sknetwork/utils/tests/test_base.py +28 -0
- sknetwork/utils/tests/test_bunch.py +16 -0
- sknetwork/utils/tests/test_check.py +190 -0
- sknetwork/utils/tests/test_co_neighbor.py +43 -0
- sknetwork/utils/tests/test_format.py +61 -0
- sknetwork/utils/tests/test_kmeans.py +21 -0
- sknetwork/utils/tests/test_knn.py +32 -0
- sknetwork/utils/tests/test_membership.py +24 -0
- sknetwork/utils/tests/test_neighbors.py +41 -0
- sknetwork/utils/tests/test_projection_simplex.py +33 -0
- sknetwork/utils/tests/test_seeds.py +67 -0
- sknetwork/utils/tests/test_verbose.py +15 -0
- sknetwork/utils/tests/test_ward.py +20 -0
- sknetwork/utils/timeout.py +38 -0
- sknetwork/utils/verbose.py +37 -0
- sknetwork/utils/ward.py +60 -0
- sknetwork/visualization/__init__.py +4 -0
- sknetwork/visualization/colors.py +34 -0
- sknetwork/visualization/dendrograms.py +229 -0
- sknetwork/visualization/graphs.py +819 -0
- sknetwork/visualization/tests/__init__.py +1 -0
- sknetwork/visualization/tests/test_dendrograms.py +53 -0
- sknetwork/visualization/tests/test_graphs.py +167 -0
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created on October 2019
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
"""
|
|
7
|
+
import numpy as np
|
|
8
|
+
from scipy.cluster.vq import kmeans2
|
|
9
|
+
|
|
10
|
+
from sknetwork.utils.base import Algorithm
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class KMeansDense(Algorithm):
|
|
14
|
+
"""Standard KMeansDense clustering based on SciPy function ``kmeans2``.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
n_clusters :
|
|
19
|
+
Number of desired clusters.
|
|
20
|
+
init :
|
|
21
|
+
Method for initialization. Available methods are ‘random’, ‘points’, ‘++’ and ‘matrix’:
|
|
22
|
+
* ‘random’: generate k centroids from a Gaussian with mean and variance estimated from the data.
|
|
23
|
+
* ‘points’: choose k observations (rows) at random from data for the initial centroids.
|
|
24
|
+
* ‘++’: choose k observations accordingly to the kmeans++ method (careful seeding)
|
|
25
|
+
* ‘matrix’: interpret the k parameter as a k by M (or length k array for one-dimensional data) array of initial
|
|
26
|
+
centroids.
|
|
27
|
+
n_init :
|
|
28
|
+
Number of iterations of the k-means algorithm to run.
|
|
29
|
+
tol :
|
|
30
|
+
Relative tolerance with regards to inertia to declare convergence.
|
|
31
|
+
|
|
32
|
+
Attributes
|
|
33
|
+
----------
|
|
34
|
+
labels_ :
|
|
35
|
+
Label of each sample.
|
|
36
|
+
cluster_centers_ :
|
|
37
|
+
A ‘k’ by ‘N’ array of centroids found at the last iteration of k-means.
|
|
38
|
+
|
|
39
|
+
References
|
|
40
|
+
----------
|
|
41
|
+
* MacQueen, J. (1967, June). Some methods for classification and analysis of multivariate observations.
|
|
42
|
+
In Proceedings of the fifth Berkeley symposium on mathematical statistics and probability
|
|
43
|
+
(Vol. 1, No. 14, pp. 281-297).
|
|
44
|
+
|
|
45
|
+
* Arthur, D., & Vassilvitskii, S. (2007, January). k-means++: The advantages of careful seeding.
|
|
46
|
+
In Proceedings of the eighteenth annual ACM-SIAM symposium on Discrete algorithms (pp. 1027-1035).
|
|
47
|
+
Society for Industrial and Applied Mathematics.
|
|
48
|
+
"""
|
|
49
|
+
def __init__(self, n_clusters: int = 8, init: str = '++', n_init: int = 10, tol: float = 1e-4):
|
|
50
|
+
self.n_clusters = n_clusters
|
|
51
|
+
self.init = init.lower()
|
|
52
|
+
self.n_init = n_init
|
|
53
|
+
self.tol = tol
|
|
54
|
+
|
|
55
|
+
self.labels_ = None
|
|
56
|
+
self.cluster_centers_ = None
|
|
57
|
+
|
|
58
|
+
def fit(self, x: np.ndarray) -> 'KMeansDense':
|
|
59
|
+
"""Fit algorithm to the data.
|
|
60
|
+
|
|
61
|
+
Parameters
|
|
62
|
+
----------
|
|
63
|
+
x:
|
|
64
|
+
Data to cluster.
|
|
65
|
+
|
|
66
|
+
Returns
|
|
67
|
+
-------
|
|
68
|
+
self: :class:`KMeansDense`
|
|
69
|
+
"""
|
|
70
|
+
centroids, labels = kmeans2(data=x, k=self.n_clusters, iter=self.n_init, thresh=self.tol, minit=self.init)
|
|
71
|
+
self.cluster_centers_ = centroids
|
|
72
|
+
self.labels_ = labels
|
|
73
|
+
|
|
74
|
+
return self
|
|
75
|
+
|
|
76
|
+
def fit_transform(self, x: np.ndarray) -> np.ndarray:
|
|
77
|
+
"""Fit algorithm to the data and return the labels.
|
|
78
|
+
|
|
79
|
+
Parameters
|
|
80
|
+
----------
|
|
81
|
+
x:
|
|
82
|
+
Data to cluster.
|
|
83
|
+
|
|
84
|
+
Returns
|
|
85
|
+
-------
|
|
86
|
+
labels: np.ndarray
|
|
87
|
+
"""
|
|
88
|
+
self.fit(x)
|
|
89
|
+
return self.labels_
|
sknetwork/utils/knn.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created on October 2019
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
"""
|
|
7
|
+
from abc import ABC
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
from scipy import sparse
|
|
11
|
+
from scipy.spatial import cKDTree
|
|
12
|
+
|
|
13
|
+
from sknetwork.utils.base import Algorithm
|
|
14
|
+
from sknetwork.utils.format import directed2undirected
|
|
15
|
+
from sknetwork.utils.knn1d import knn1d
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class BaseTransformer(Algorithm, ABC):
|
|
19
|
+
"""Base class for transformers."""
|
|
20
|
+
def __init__(self, undirected: bool = False):
|
|
21
|
+
self.undirected = undirected
|
|
22
|
+
|
|
23
|
+
self.adjacency_ = None
|
|
24
|
+
|
|
25
|
+
def fit_transform(self, x: np.ndarray) -> sparse.csr_matrix:
|
|
26
|
+
"""Fit algorithm to the data and return the computed adjacency.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
x: np.ndarray
|
|
31
|
+
Input data.
|
|
32
|
+
|
|
33
|
+
Returns
|
|
34
|
+
-------
|
|
35
|
+
adjacency : sparse.csr_matrix
|
|
36
|
+
"""
|
|
37
|
+
self.fit(x)
|
|
38
|
+
return self.adjacency_
|
|
39
|
+
|
|
40
|
+
def make_undirected(self):
|
|
41
|
+
"""Modifies the adjacency to match desired constrains."""
|
|
42
|
+
if self.adjacency_ is not None and self.undirected:
|
|
43
|
+
dtype = self.adjacency_.dtype
|
|
44
|
+
self.adjacency_ = directed2undirected(self.adjacency_, weighted=False).astype(dtype)
|
|
45
|
+
|
|
46
|
+
return self
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class KNNDense(BaseTransformer):
|
|
50
|
+
"""Extract adjacency from vector data through k-nearest-neighbor search with KD-Tree.
|
|
51
|
+
|
|
52
|
+
Parameters
|
|
53
|
+
----------
|
|
54
|
+
n_neighbors :
|
|
55
|
+
Number of neighbors for each sample in the transformed sparse graph.
|
|
56
|
+
undirected :
|
|
57
|
+
As the nearest neighbor relationship is not symmetric, the graph is directed by default.
|
|
58
|
+
Setting this parameter to ``True`` forces the algorithm to return undirected graphs.
|
|
59
|
+
leaf_size :
|
|
60
|
+
Leaf size passed to KDTree.
|
|
61
|
+
This can affect the speed of the construction and query, as well as the memory required to store the tree.
|
|
62
|
+
p :
|
|
63
|
+
Which Minkowski p-norm to use. 1 is the sum-of-absolute-values “Manhattan” distance,
|
|
64
|
+
2 is the usual Euclidean distance infinity is the maximum-coordinate-difference distance.
|
|
65
|
+
A finite large p may cause a ValueError if overflow can occur.
|
|
66
|
+
eps :
|
|
67
|
+
Return approximate nearest neighbors; the k-th returned value is guaranteed to be no further than (1+tol_nn)
|
|
68
|
+
times the distance to the real k-th nearest neighbor.
|
|
69
|
+
n_jobs :
|
|
70
|
+
Number of jobs to schedule for parallel processing. If -1 is given all processors are used.
|
|
71
|
+
|
|
72
|
+
Attributes
|
|
73
|
+
----------
|
|
74
|
+
adjacency_ :
|
|
75
|
+
Adjacency matrix of the graph.
|
|
76
|
+
|
|
77
|
+
References
|
|
78
|
+
----------
|
|
79
|
+
Maneewongvatana, S., & Mount, D. M. (1999, December). It’s okay to be skinny, if your friends are fat.
|
|
80
|
+
In Center for Geometric Computing 4th Annual Workshop on Computational Geometry (Vol. 2, pp. 1-8).
|
|
81
|
+
"""
|
|
82
|
+
def __init__(self, n_neighbors: int = 5, undirected: bool = False, leaf_size: int = 16, p=2, eps: float = 0.01,
|
|
83
|
+
n_jobs=1):
|
|
84
|
+
super(KNNDense, self).__init__(undirected)
|
|
85
|
+
|
|
86
|
+
self.n_neighbors = n_neighbors
|
|
87
|
+
self.leaf_size = leaf_size
|
|
88
|
+
self.p = p
|
|
89
|
+
self.eps = eps
|
|
90
|
+
self.n_jobs = n_jobs
|
|
91
|
+
|
|
92
|
+
def fit(self, x: np.ndarray) -> 'KNNDense':
|
|
93
|
+
"""Fit algorithm to the data.
|
|
94
|
+
|
|
95
|
+
Parameters
|
|
96
|
+
----------
|
|
97
|
+
x :
|
|
98
|
+
Data to transform into adjacency.
|
|
99
|
+
|
|
100
|
+
Returns
|
|
101
|
+
-------
|
|
102
|
+
self : :class:`KNNDense`
|
|
103
|
+
"""
|
|
104
|
+
tree = cKDTree(x, self.leaf_size)
|
|
105
|
+
_, neighbors = tree.query(x, self.n_neighbors + 1, self.eps, self.p, workers=self.n_jobs)
|
|
106
|
+
|
|
107
|
+
n: int = x.shape[0]
|
|
108
|
+
indptr: np.ndarray = np.arange(n + 1) * (self.n_neighbors + 1)
|
|
109
|
+
indices: np.ndarray = neighbors.reshape(-1)
|
|
110
|
+
data = np.ones(indices.shape[0], dtype=bool)
|
|
111
|
+
|
|
112
|
+
self.adjacency_ = sparse.csr_matrix((data, indices, indptr))
|
|
113
|
+
self.make_undirected()
|
|
114
|
+
self.adjacency_.setdiag(0)
|
|
115
|
+
|
|
116
|
+
return self
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class CNNDense(BaseTransformer):
|
|
120
|
+
"""Extract adjacency from vector data through component-wise k-nearest-neighbor search.
|
|
121
|
+
KNN is applied independently on each column of the input matrix.
|
|
122
|
+
|
|
123
|
+
Parameters
|
|
124
|
+
----------
|
|
125
|
+
n_neighbors :
|
|
126
|
+
Number of neighbors per dimension.
|
|
127
|
+
undirected :
|
|
128
|
+
As the nearest neighbor relationship is not symmetric, the graph is directed by default.
|
|
129
|
+
Setting this parameter to ``True`` forces the algorithm to return undirected graphs.
|
|
130
|
+
|
|
131
|
+
Attributes
|
|
132
|
+
----------
|
|
133
|
+
adjacency_ :
|
|
134
|
+
Adjacency matrix of the graph.
|
|
135
|
+
"""
|
|
136
|
+
def __init__(self, n_neighbors: int = 1, undirected: bool = False):
|
|
137
|
+
super(CNNDense, self).__init__(undirected)
|
|
138
|
+
|
|
139
|
+
self.n_neighbors = n_neighbors
|
|
140
|
+
|
|
141
|
+
def fit(self, x: np.ndarray) -> 'CNNDense':
|
|
142
|
+
"""Fit algorithm to the data.
|
|
143
|
+
|
|
144
|
+
Parameters
|
|
145
|
+
----------
|
|
146
|
+
x:
|
|
147
|
+
Data to transform into adjacency.
|
|
148
|
+
|
|
149
|
+
Returns
|
|
150
|
+
-------
|
|
151
|
+
self: :class:`CNNDense`
|
|
152
|
+
"""
|
|
153
|
+
rows, cols = [], []
|
|
154
|
+
for j in range(x.shape[1]):
|
|
155
|
+
row, col = knn1d(x[:, j].astype(np.float32), self.n_neighbors)
|
|
156
|
+
rows += row
|
|
157
|
+
cols += col
|
|
158
|
+
|
|
159
|
+
rows = np.array(rows)
|
|
160
|
+
cols = np.array(cols)
|
|
161
|
+
data = np.ones(cols.shape[0], dtype=bool)
|
|
162
|
+
|
|
163
|
+
self.adjacency_ = sparse.csr_matrix((data, (rows, cols)))
|
|
164
|
+
self.make_undirected()
|
|
165
|
+
|
|
166
|
+
return self
|
|
Binary file
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# distutils: language = c++
|
|
2
|
+
# cython: language_level=3
|
|
3
|
+
# cython: linetrace=True
|
|
4
|
+
# distutils: define_macros=CYTHON_TRACE_NOGIL=1
|
|
5
|
+
""" One dimensional nearest neighbor search.
|
|
6
|
+
Created on Mar, 2020
|
|
7
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
8
|
+
"""
|
|
9
|
+
import numpy as np
|
|
10
|
+
cimport numpy as np
|
|
11
|
+
|
|
12
|
+
from libcpp.vector cimport vector
|
|
13
|
+
cimport cython
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@cython.boundscheck(False)
|
|
17
|
+
@cython.wraparound(False)
|
|
18
|
+
def knn1d(float[:] x, int n_neighbors):
|
|
19
|
+
"""K nearest neighbors search for 1-dimensional arrays.
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
x: np.ndarray
|
|
24
|
+
1-d data
|
|
25
|
+
n_neighbors: int
|
|
26
|
+
Number of neighbors to return.
|
|
27
|
+
Returns
|
|
28
|
+
-------
|
|
29
|
+
tuple
|
|
30
|
+
List of source nodes and list of target nodes.
|
|
31
|
+
"""
|
|
32
|
+
cdef int n
|
|
33
|
+
cdef int i
|
|
34
|
+
cdef int j
|
|
35
|
+
cdef int ix
|
|
36
|
+
cdef int low
|
|
37
|
+
cdef int hgh
|
|
38
|
+
cdef int neigh
|
|
39
|
+
cdef int val
|
|
40
|
+
|
|
41
|
+
cdef vector[int] sorted_ix
|
|
42
|
+
cdef vector[int] row
|
|
43
|
+
cdef vector[int] col
|
|
44
|
+
cdef vector[int] candidates
|
|
45
|
+
cdef vector[int] sorted_candidates
|
|
46
|
+
cdef vector[int] sorted_deltas
|
|
47
|
+
cdef vector[int] tmp
|
|
48
|
+
|
|
49
|
+
cdef vector[float] deltas
|
|
50
|
+
|
|
51
|
+
n = x.shape[0]
|
|
52
|
+
tmp = np.argsort(x)
|
|
53
|
+
for i in range(n):
|
|
54
|
+
sorted_ix.push_back(tmp[i])
|
|
55
|
+
|
|
56
|
+
for i in range(n):
|
|
57
|
+
deltas.clear()
|
|
58
|
+
sorted_candidates.clear()
|
|
59
|
+
|
|
60
|
+
ix = sorted_ix[i]
|
|
61
|
+
low = max(0, i - n_neighbors)
|
|
62
|
+
hgh = min(n - 1, i + n_neighbors + 1)
|
|
63
|
+
candidates = sorted_ix[low:hgh]
|
|
64
|
+
|
|
65
|
+
for j in range(len(candidates)):
|
|
66
|
+
deltas.push_back(abs(x[candidates[j]] - x[ix]))
|
|
67
|
+
|
|
68
|
+
sorted_deltas = np.argsort(deltas)
|
|
69
|
+
for j in range(len(sorted_deltas)):
|
|
70
|
+
val = candidates[sorted_deltas[j]]
|
|
71
|
+
sorted_candidates.push_back(val)
|
|
72
|
+
sorted_candidates = sorted_candidates[:n_neighbors+1]
|
|
73
|
+
|
|
74
|
+
for j in range(len(sorted_candidates)):
|
|
75
|
+
neigh = sorted_candidates[j]
|
|
76
|
+
if neigh != ix:
|
|
77
|
+
row.push_back(ix)
|
|
78
|
+
col.push_back(neigh)
|
|
79
|
+
|
|
80
|
+
return row, col
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created in July 2019
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
@author: Thomas Bonald <bonald@enst.fr>
|
|
7
|
+
"""
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from scipy import sparse
|
|
12
|
+
|
|
13
|
+
from sknetwork.utils.neighbors import get_degrees
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_membership(labels: np.ndarray, dtype=bool, n_labels: Optional[int] = None) -> sparse.csr_matrix:
|
|
17
|
+
"""Build the binary matrix of the label assignments, of shape n_samples x n_labels.
|
|
18
|
+
Negative labels are ignored.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
labels :
|
|
23
|
+
Label of each node.
|
|
24
|
+
dtype :
|
|
25
|
+
Type of the entries. Boolean by default.
|
|
26
|
+
n_labels : int
|
|
27
|
+
Number of labels.
|
|
28
|
+
|
|
29
|
+
Returns
|
|
30
|
+
-------
|
|
31
|
+
membership : sparse.csr_matrix
|
|
32
|
+
Binary matrix of label assignments.
|
|
33
|
+
|
|
34
|
+
Example
|
|
35
|
+
-------
|
|
36
|
+
>>> from sknetwork.utils import get_membership
|
|
37
|
+
>>> labels = np.array([0, 0, 1, 2])
|
|
38
|
+
>>> membership = get_membership(labels)
|
|
39
|
+
>>> membership.toarray().astype(int)
|
|
40
|
+
array([[1, 0, 0],
|
|
41
|
+
[1, 0, 0],
|
|
42
|
+
[0, 1, 0],
|
|
43
|
+
[0, 0, 1]])
|
|
44
|
+
"""
|
|
45
|
+
n: int = len(labels)
|
|
46
|
+
if n_labels is None:
|
|
47
|
+
shape = (n, max(labels)+1)
|
|
48
|
+
else:
|
|
49
|
+
shape = (n, n_labels)
|
|
50
|
+
ix = (labels >= 0)
|
|
51
|
+
data = np.ones(ix.sum())
|
|
52
|
+
row = np.arange(n)[ix]
|
|
53
|
+
col = labels[ix]
|
|
54
|
+
return sparse.csr_matrix((data, (row, col)), shape=shape, dtype=dtype)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def from_membership(membership: sparse.csr_matrix) -> np.ndarray:
|
|
58
|
+
"""Get the labels from a membership matrix (n_samples x n_labels)
|
|
59
|
+
Samples without label get -1.
|
|
60
|
+
|
|
61
|
+
Parameters
|
|
62
|
+
----------
|
|
63
|
+
membership :
|
|
64
|
+
Membership matrix.
|
|
65
|
+
|
|
66
|
+
Returns
|
|
67
|
+
-------
|
|
68
|
+
labels : np.ndarray
|
|
69
|
+
Labels (columns indices of the membership matrix).
|
|
70
|
+
Example
|
|
71
|
+
-------
|
|
72
|
+
>>> from scipy import sparse
|
|
73
|
+
>>> from sknetwork.utils import from_membership
|
|
74
|
+
>>> membership = sparse.eye(3).tocsr()
|
|
75
|
+
>>> labels = from_membership(membership)
|
|
76
|
+
>>> labels
|
|
77
|
+
array([0, 1, 2])
|
|
78
|
+
"""
|
|
79
|
+
mask = get_degrees(membership) > 0
|
|
80
|
+
labels = -np.ones(membership.shape[0], dtype=int)
|
|
81
|
+
labels[mask] = membership.indices
|
|
82
|
+
return labels
|
|
Binary file
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# distutils: language = c++
|
|
2
|
+
# cython: language_level=3
|
|
3
|
+
# cython: linetrace=True
|
|
4
|
+
# distutils: define_macros=CYTHON_TRACE_NOGIL=1
|
|
5
|
+
"""
|
|
6
|
+
Created on Jun 3, 2020
|
|
7
|
+
@author: Julien Simonnet <julien.simonnet@etu.upmc.fr>
|
|
8
|
+
@author: Yohann Robert <yohann.robert@etu.upmc.fr>
|
|
9
|
+
"""
|
|
10
|
+
from libcpp.vector cimport vector
|
|
11
|
+
|
|
12
|
+
cdef class MinHeap:
|
|
13
|
+
|
|
14
|
+
cdef vector[int] val, pos
|
|
15
|
+
cdef int size
|
|
16
|
+
|
|
17
|
+
cdef int pop_min(self, int[:] scores)
|
|
18
|
+
cdef bint empty(self)
|
|
19
|
+
cdef void swap(self, int x, int y)
|
|
20
|
+
cdef void insert_key(self, int k, int[:] scores)
|
|
21
|
+
cdef void decrease_key(self, int i, int[:] scores)
|
|
22
|
+
cdef void min_heapify(self, int i, int[:] scores)
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# distutils: language = c++
|
|
2
|
+
# cython: language_level=3
|
|
3
|
+
# cython: linetrace=True
|
|
4
|
+
# distutils: define_macros=CYTHON_TRACE_NOGIL=1
|
|
5
|
+
"""
|
|
6
|
+
Created on Jun 3, 2020
|
|
7
|
+
@author: Julien Simonnet <julien.simonnet@etu.upmc.fr>
|
|
8
|
+
@author: Yohann Robert <yohann.robert@etu.upmc.fr>
|
|
9
|
+
"""
|
|
10
|
+
cimport cython
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
cdef inline int parent(int i):
|
|
14
|
+
"""Index of the parent node of i in the tree."""
|
|
15
|
+
return (i - 1) // 2
|
|
16
|
+
|
|
17
|
+
cdef inline int left(int i):
|
|
18
|
+
"""Index of the left child of i in the tree."""
|
|
19
|
+
return 2 * i + 1
|
|
20
|
+
|
|
21
|
+
cdef inline int right(int i):
|
|
22
|
+
"""Index of the right child of i in the tree."""
|
|
23
|
+
return 2 * i + 2
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@cython.boundscheck(False)
|
|
27
|
+
@cython.wraparound(False)
|
|
28
|
+
cdef class MinHeap:
|
|
29
|
+
"""Min heap data structure."""
|
|
30
|
+
def __cinit__(self, int n):
|
|
31
|
+
self.val.reserve(n) # reserves the necessary space in the vector
|
|
32
|
+
self.pos.reserve(n) # reserves the necessary space in the other vector
|
|
33
|
+
self.size = 0
|
|
34
|
+
|
|
35
|
+
cdef bint empty(self):
|
|
36
|
+
"""Check if the heap is empty."""
|
|
37
|
+
return self.size == 0
|
|
38
|
+
|
|
39
|
+
cdef inline void swap(self, int x, int y):
|
|
40
|
+
"""Exchange two elements in the heap."""
|
|
41
|
+
cdef int tmp
|
|
42
|
+
tmp = self.val[x]
|
|
43
|
+
self.val[x] = self.val[y]
|
|
44
|
+
self.val[y] = tmp
|
|
45
|
+
|
|
46
|
+
# updates the position of the corresponding elements
|
|
47
|
+
self.pos[self.val[x]] = x
|
|
48
|
+
self.pos[self.val[y]] = y
|
|
49
|
+
|
|
50
|
+
# Inserts a new key k
|
|
51
|
+
cdef void insert_key(self, int k, int[:] scores):
|
|
52
|
+
"""Insert new element into the heap"""
|
|
53
|
+
# First insert the new key at the end
|
|
54
|
+
self.val[self.size] = k
|
|
55
|
+
self.pos[k] = self.size
|
|
56
|
+
cdef int i = self.size
|
|
57
|
+
self.size += 1
|
|
58
|
+
|
|
59
|
+
cdef int p = parent(i)
|
|
60
|
+
while (p >= 0) and (scores[self.val[p]] > scores[self.val[i]]) :
|
|
61
|
+
self.swap(i, p)
|
|
62
|
+
i = p
|
|
63
|
+
p = parent(i)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
cdef void decrease_key(self, int i, int[:] scores):
|
|
67
|
+
"""Decrease value of key at index 'i' to new_val.
|
|
68
|
+
It is assumed that the new value is smaller than the old one.
|
|
69
|
+
"""
|
|
70
|
+
cdef int pos, p
|
|
71
|
+
pos = self.pos[i]
|
|
72
|
+
if pos < self.size:
|
|
73
|
+
p = parent(pos)
|
|
74
|
+
|
|
75
|
+
while (pos != 0) and (scores[self.val[p]] > scores[self.val[pos]]):
|
|
76
|
+
self.swap(pos, p)
|
|
77
|
+
pos = p
|
|
78
|
+
p = parent(pos)
|
|
79
|
+
|
|
80
|
+
cdef int pop_min(self, int[:] scores):
|
|
81
|
+
"""Remove and return the minimum element (or root) from the heap."""
|
|
82
|
+
if self.size == 1:
|
|
83
|
+
self.size = 0
|
|
84
|
+
return self.val[0]
|
|
85
|
+
|
|
86
|
+
# Store the minimum value, and remove it from heap
|
|
87
|
+
cdef int root = self.val[0]
|
|
88
|
+
self.val[0] = self.val[self.size-1]
|
|
89
|
+
self.pos[self.val[0]] = 0
|
|
90
|
+
self.size -= 1
|
|
91
|
+
self.min_heapify(0, scores)
|
|
92
|
+
|
|
93
|
+
return root
|
|
94
|
+
|
|
95
|
+
cdef void min_heapify(self, int i, int[:] scores):
|
|
96
|
+
"""A recursive method to heapify a subtree with the root at given index
|
|
97
|
+
This function assumes that the subtrees are already heapified.
|
|
98
|
+
"""
|
|
99
|
+
cdef int l, r, smallest
|
|
100
|
+
l = left(i)
|
|
101
|
+
r = right(i)
|
|
102
|
+
smallest = i
|
|
103
|
+
if (l < self.size) and (scores[self.val[l]] < scores[self.val[i]]):
|
|
104
|
+
smallest = l
|
|
105
|
+
|
|
106
|
+
if (r < self.size) and (scores[self.val[r]] < scores[self.val[smallest]]):
|
|
107
|
+
smallest = r
|
|
108
|
+
|
|
109
|
+
if smallest != i:
|
|
110
|
+
self.swap(i, smallest)
|
|
111
|
+
self.min_heapify(smallest, scores)
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created on December 29, 2020
|
|
5
|
+
@author: Thomas Bonald <bonald@enst.fr>
|
|
6
|
+
"""
|
|
7
|
+
import numpy as np
|
|
8
|
+
from scipy import sparse
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_neighbors(input_matrix: sparse.csr_matrix, node: int, transpose: bool = False) -> np.ndarray:
|
|
12
|
+
"""Get the neighbors of a node.
|
|
13
|
+
|
|
14
|
+
If the graph is directed, returns the vector of successors. Set ``transpose=True``
|
|
15
|
+
to get the predecessors.
|
|
16
|
+
|
|
17
|
+
For a biadjacency matrix, returns the neighbors of a row node. Set ``transpose=True``
|
|
18
|
+
to get the neighbors of a column node.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
input_matrix : sparse.csr_matrix
|
|
23
|
+
Adjacency or biadjacency matrix.
|
|
24
|
+
node : int
|
|
25
|
+
Target node.
|
|
26
|
+
transpose :
|
|
27
|
+
If ``True``, transpose the input matrix.
|
|
28
|
+
Returns
|
|
29
|
+
-------
|
|
30
|
+
neighbors : np.ndarray
|
|
31
|
+
Array of neighbors of the target node.
|
|
32
|
+
|
|
33
|
+
Example
|
|
34
|
+
-------
|
|
35
|
+
>>> from sknetwork.data import house
|
|
36
|
+
>>> adjacency = house()
|
|
37
|
+
>>> get_neighbors(adjacency, node=0)
|
|
38
|
+
array([1, 4], dtype=int32)
|
|
39
|
+
"""
|
|
40
|
+
if transpose:
|
|
41
|
+
matrix = sparse.csr_matrix(input_matrix.T)
|
|
42
|
+
else:
|
|
43
|
+
matrix = input_matrix
|
|
44
|
+
neighbors = matrix.indices[matrix.indptr[node]: matrix.indptr[node + 1]]
|
|
45
|
+
return neighbors
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def get_degrees(input_matrix: sparse.csr_matrix, transpose: bool = False) -> np.ndarray:
|
|
49
|
+
"""Get the vector of degrees of a graph.
|
|
50
|
+
|
|
51
|
+
If the graph is directed, returns the out-degrees (number of successors). Set ``transpose=True``
|
|
52
|
+
to get the in-degrees (number of predecessors).
|
|
53
|
+
|
|
54
|
+
For a biadjacency matrix, returns the degrees of rows. Set ``transpose=True``
|
|
55
|
+
to get the degrees of columns.
|
|
56
|
+
|
|
57
|
+
Parameters
|
|
58
|
+
----------
|
|
59
|
+
input_matrix : sparse.csr_matrix
|
|
60
|
+
Adjacency or biadjacency matrix.
|
|
61
|
+
transpose :
|
|
62
|
+
If ``True``, transpose the input matrix.
|
|
63
|
+
Returns
|
|
64
|
+
-------
|
|
65
|
+
degrees : np.ndarray
|
|
66
|
+
Array of degrees.
|
|
67
|
+
|
|
68
|
+
Example
|
|
69
|
+
-------
|
|
70
|
+
>>> from sknetwork.data import house
|
|
71
|
+
>>> adjacency = house()
|
|
72
|
+
>>> get_degrees(adjacency)
|
|
73
|
+
array([2, 3, 2, 2, 3], dtype=int32)
|
|
74
|
+
"""
|
|
75
|
+
if transpose:
|
|
76
|
+
matrix = sparse.csr_matrix(input_matrix.T)
|
|
77
|
+
else:
|
|
78
|
+
matrix = input_matrix
|
|
79
|
+
degrees = matrix.indptr[1:] - matrix.indptr[:-1]
|
|
80
|
+
return degrees
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def get_weights(input_matrix: sparse.csr_matrix, transpose: bool = False) -> np.ndarray:
|
|
84
|
+
"""Get the vector of weights of the nodes of a graph. If the graph is not weighted, return the vector of degrees.
|
|
85
|
+
|
|
86
|
+
If the graph is directed, returns the out-weights (total weight of outgoing links). Set ``transpose=True``
|
|
87
|
+
to get the in-weights (total weight of incoming links).
|
|
88
|
+
|
|
89
|
+
For a biadjacency matrix, returns the weights of rows. Set ``transpose=True``
|
|
90
|
+
to get the weights of columns.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
input_matrix : sparse.csr_matrix
|
|
95
|
+
Adjacency or biadjacency matrix.
|
|
96
|
+
transpose :
|
|
97
|
+
If ``True``, transpose the input matrix.
|
|
98
|
+
Returns
|
|
99
|
+
-------
|
|
100
|
+
weights : np.ndarray
|
|
101
|
+
Array of weights.
|
|
102
|
+
|
|
103
|
+
Example
|
|
104
|
+
-------
|
|
105
|
+
>>> from sknetwork.data import house
|
|
106
|
+
>>> adjacency = house()
|
|
107
|
+
>>> get_weights(adjacency)
|
|
108
|
+
array([2., 3., 2., 2., 3.])
|
|
109
|
+
"""
|
|
110
|
+
if transpose:
|
|
111
|
+
matrix = sparse.csr_matrix(input_matrix.T)
|
|
112
|
+
else:
|
|
113
|
+
matrix = input_matrix
|
|
114
|
+
weights = matrix.dot(np.ones(matrix.shape[1]))
|
|
115
|
+
return weights
|