scikit-network 0.28.3__cp39-cp39-macosx_12_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- scikit_network-0.28.3.dist-info/AUTHORS.rst +41 -0
- scikit_network-0.28.3.dist-info/LICENSE +34 -0
- scikit_network-0.28.3.dist-info/METADATA +457 -0
- scikit_network-0.28.3.dist-info/RECORD +240 -0
- scikit_network-0.28.3.dist-info/WHEEL +5 -0
- scikit_network-0.28.3.dist-info/top_level.txt +1 -0
- sknetwork/__init__.py +21 -0
- sknetwork/classification/__init__.py +8 -0
- sknetwork/classification/base.py +84 -0
- sknetwork/classification/base_rank.py +143 -0
- sknetwork/classification/diffusion.py +134 -0
- sknetwork/classification/knn.py +162 -0
- sknetwork/classification/metrics.py +205 -0
- sknetwork/classification/pagerank.py +66 -0
- sknetwork/classification/propagation.py +152 -0
- sknetwork/classification/tests/__init__.py +1 -0
- sknetwork/classification/tests/test_API.py +35 -0
- sknetwork/classification/tests/test_diffusion.py +37 -0
- sknetwork/classification/tests/test_knn.py +24 -0
- sknetwork/classification/tests/test_metrics.py +53 -0
- sknetwork/classification/tests/test_pagerank.py +20 -0
- sknetwork/classification/tests/test_propagation.py +24 -0
- sknetwork/classification/vote.cpython-39-darwin.so +0 -0
- sknetwork/classification/vote.pyx +58 -0
- sknetwork/clustering/__init__.py +7 -0
- sknetwork/clustering/base.py +102 -0
- sknetwork/clustering/kmeans.py +142 -0
- sknetwork/clustering/louvain.py +255 -0
- sknetwork/clustering/louvain_core.cpython-39-darwin.so +0 -0
- sknetwork/clustering/louvain_core.pyx +134 -0
- sknetwork/clustering/metrics.py +91 -0
- sknetwork/clustering/postprocess.py +66 -0
- sknetwork/clustering/propagation_clustering.py +108 -0
- sknetwork/clustering/tests/__init__.py +1 -0
- sknetwork/clustering/tests/test_API.py +37 -0
- sknetwork/clustering/tests/test_kmeans.py +47 -0
- sknetwork/clustering/tests/test_louvain.py +104 -0
- sknetwork/clustering/tests/test_metrics.py +50 -0
- sknetwork/clustering/tests/test_post_processing.py +23 -0
- sknetwork/clustering/tests/test_postprocess.py +39 -0
- sknetwork/data/__init__.py +5 -0
- sknetwork/data/load.py +408 -0
- sknetwork/data/models.py +459 -0
- sknetwork/data/parse.py +621 -0
- sknetwork/data/test_graphs.py +84 -0
- sknetwork/data/tests/__init__.py +1 -0
- sknetwork/data/tests/test_API.py +30 -0
- sknetwork/data/tests/test_load.py +95 -0
- sknetwork/data/tests/test_models.py +52 -0
- sknetwork/data/tests/test_parse.py +253 -0
- sknetwork/data/tests/test_test_graphs.py +30 -0
- sknetwork/data/tests/test_toy_graphs.py +68 -0
- sknetwork/data/toy_graphs.py +619 -0
- sknetwork/embedding/__init__.py +10 -0
- sknetwork/embedding/base.py +90 -0
- sknetwork/embedding/force_atlas.py +197 -0
- sknetwork/embedding/louvain_embedding.py +174 -0
- sknetwork/embedding/louvain_hierarchy.py +142 -0
- sknetwork/embedding/metrics.py +66 -0
- sknetwork/embedding/random_projection.py +133 -0
- sknetwork/embedding/spectral.py +214 -0
- sknetwork/embedding/spring.py +198 -0
- sknetwork/embedding/svd.py +363 -0
- sknetwork/embedding/tests/__init__.py +1 -0
- sknetwork/embedding/tests/test_API.py +73 -0
- sknetwork/embedding/tests/test_force_atlas.py +35 -0
- sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
- sknetwork/embedding/tests/test_louvain_hierarchy.py +19 -0
- sknetwork/embedding/tests/test_metrics.py +29 -0
- sknetwork/embedding/tests/test_random_projection.py +28 -0
- sknetwork/embedding/tests/test_spectral.py +84 -0
- sknetwork/embedding/tests/test_spring.py +50 -0
- sknetwork/embedding/tests/test_svd.py +37 -0
- sknetwork/flow/__init__.py +3 -0
- sknetwork/flow/flow.py +73 -0
- sknetwork/flow/tests/__init__.py +1 -0
- sknetwork/flow/tests/test_flow.py +17 -0
- sknetwork/flow/tests/test_utils.py +69 -0
- sknetwork/flow/utils.py +91 -0
- sknetwork/gnn/__init__.py +10 -0
- sknetwork/gnn/activation.py +117 -0
- sknetwork/gnn/base.py +155 -0
- sknetwork/gnn/base_activation.py +89 -0
- sknetwork/gnn/base_layer.py +109 -0
- sknetwork/gnn/gnn_classifier.py +381 -0
- sknetwork/gnn/layer.py +153 -0
- sknetwork/gnn/layers.py +127 -0
- sknetwork/gnn/loss.py +180 -0
- sknetwork/gnn/neighbor_sampler.py +65 -0
- sknetwork/gnn/optimizer.py +163 -0
- sknetwork/gnn/tests/__init__.py +1 -0
- sknetwork/gnn/tests/test_activation.py +56 -0
- sknetwork/gnn/tests/test_base.py +79 -0
- sknetwork/gnn/tests/test_base_layer.py +37 -0
- sknetwork/gnn/tests/test_gnn_classifier.py +192 -0
- sknetwork/gnn/tests/test_layers.py +80 -0
- sknetwork/gnn/tests/test_loss.py +33 -0
- sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
- sknetwork/gnn/tests/test_optimizer.py +43 -0
- sknetwork/gnn/tests/test_utils.py +93 -0
- sknetwork/gnn/utils.py +219 -0
- sknetwork/hierarchy/__init__.py +7 -0
- sknetwork/hierarchy/base.py +69 -0
- sknetwork/hierarchy/louvain_hierarchy.py +264 -0
- sknetwork/hierarchy/metrics.py +234 -0
- sknetwork/hierarchy/paris.cpython-39-darwin.so +0 -0
- sknetwork/hierarchy/paris.pyx +317 -0
- sknetwork/hierarchy/postprocess.py +350 -0
- sknetwork/hierarchy/tests/__init__.py +1 -0
- sknetwork/hierarchy/tests/test_API.py +25 -0
- sknetwork/hierarchy/tests/test_algos.py +29 -0
- sknetwork/hierarchy/tests/test_metrics.py +62 -0
- sknetwork/hierarchy/tests/test_postprocess.py +57 -0
- sknetwork/hierarchy/tests/test_ward.py +25 -0
- sknetwork/hierarchy/ward.py +94 -0
- sknetwork/linalg/__init__.py +9 -0
- sknetwork/linalg/basics.py +37 -0
- sknetwork/linalg/diteration.cpython-39-darwin.so +0 -0
- sknetwork/linalg/diteration.pyx +49 -0
- sknetwork/linalg/eig_solver.py +93 -0
- sknetwork/linalg/laplacian.py +15 -0
- sknetwork/linalg/normalization.py +66 -0
- sknetwork/linalg/operators.py +225 -0
- sknetwork/linalg/polynome.py +76 -0
- sknetwork/linalg/ppr_solver.py +170 -0
- sknetwork/linalg/push.cpython-39-darwin.so +0 -0
- sknetwork/linalg/push.pyx +73 -0
- sknetwork/linalg/sparse_lowrank.py +142 -0
- sknetwork/linalg/svd_solver.py +91 -0
- sknetwork/linalg/tests/__init__.py +1 -0
- sknetwork/linalg/tests/test_eig.py +44 -0
- sknetwork/linalg/tests/test_laplacian.py +18 -0
- sknetwork/linalg/tests/test_normalization.py +38 -0
- sknetwork/linalg/tests/test_operators.py +70 -0
- sknetwork/linalg/tests/test_polynome.py +38 -0
- sknetwork/linalg/tests/test_ppr.py +50 -0
- sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
- sknetwork/linalg/tests/test_svd.py +38 -0
- sknetwork/linkpred/__init__.py +4 -0
- sknetwork/linkpred/base.py +80 -0
- sknetwork/linkpred/first_order.py +508 -0
- sknetwork/linkpred/first_order_core.cpython-39-darwin.so +0 -0
- sknetwork/linkpred/first_order_core.pyx +315 -0
- sknetwork/linkpred/postprocessing.py +98 -0
- sknetwork/linkpred/tests/__init__.py +1 -0
- sknetwork/linkpred/tests/test_API.py +49 -0
- sknetwork/linkpred/tests/test_postprocessing.py +21 -0
- sknetwork/path/__init__.py +4 -0
- sknetwork/path/metrics.py +148 -0
- sknetwork/path/search.py +65 -0
- sknetwork/path/shortest_path.py +186 -0
- sknetwork/path/tests/__init__.py +1 -0
- sknetwork/path/tests/test_metrics.py +29 -0
- sknetwork/path/tests/test_search.py +25 -0
- sknetwork/path/tests/test_shortest_path.py +45 -0
- sknetwork/ranking/__init__.py +9 -0
- sknetwork/ranking/base.py +56 -0
- sknetwork/ranking/betweenness.cpython-39-darwin.so +0 -0
- sknetwork/ranking/betweenness.pyx +99 -0
- sknetwork/ranking/closeness.py +95 -0
- sknetwork/ranking/harmonic.py +82 -0
- sknetwork/ranking/hits.py +94 -0
- sknetwork/ranking/katz.py +81 -0
- sknetwork/ranking/pagerank.py +107 -0
- sknetwork/ranking/postprocess.py +25 -0
- sknetwork/ranking/tests/__init__.py +1 -0
- sknetwork/ranking/tests/test_API.py +34 -0
- sknetwork/ranking/tests/test_betweenness.py +38 -0
- sknetwork/ranking/tests/test_closeness.py +34 -0
- sknetwork/ranking/tests/test_hits.py +20 -0
- sknetwork/ranking/tests/test_pagerank.py +69 -0
- sknetwork/regression/__init__.py +4 -0
- sknetwork/regression/base.py +56 -0
- sknetwork/regression/diffusion.py +190 -0
- sknetwork/regression/tests/__init__.py +1 -0
- sknetwork/regression/tests/test_API.py +34 -0
- sknetwork/regression/tests/test_diffusion.py +48 -0
- sknetwork/sknetwork.py +3 -0
- sknetwork/topology/__init__.py +9 -0
- sknetwork/topology/dag.py +74 -0
- sknetwork/topology/dag_core.cpython-39-darwin.so +0 -0
- sknetwork/topology/dag_core.pyx +38 -0
- sknetwork/topology/kcliques.cpython-39-darwin.so +0 -0
- sknetwork/topology/kcliques.pyx +193 -0
- sknetwork/topology/kcore.cpython-39-darwin.so +0 -0
- sknetwork/topology/kcore.pyx +120 -0
- sknetwork/topology/structure.py +234 -0
- sknetwork/topology/tests/__init__.py +1 -0
- sknetwork/topology/tests/test_cliques.py +28 -0
- sknetwork/topology/tests/test_cores.py +21 -0
- sknetwork/topology/tests/test_dag.py +26 -0
- sknetwork/topology/tests/test_structure.py +99 -0
- sknetwork/topology/tests/test_triangles.py +42 -0
- sknetwork/topology/tests/test_wl_coloring.py +49 -0
- sknetwork/topology/tests/test_wl_kernel.py +31 -0
- sknetwork/topology/triangles.cpython-39-darwin.so +0 -0
- sknetwork/topology/triangles.pyx +166 -0
- sknetwork/topology/weisfeiler_lehman.py +163 -0
- sknetwork/topology/weisfeiler_lehman_core.cpython-39-darwin.so +0 -0
- sknetwork/topology/weisfeiler_lehman_core.pyx +116 -0
- sknetwork/utils/__init__.py +40 -0
- sknetwork/utils/base.py +35 -0
- sknetwork/utils/check.py +354 -0
- sknetwork/utils/co_neighbor.py +71 -0
- sknetwork/utils/format.py +219 -0
- sknetwork/utils/kmeans.py +89 -0
- sknetwork/utils/knn.py +166 -0
- sknetwork/utils/knn1d.cpython-39-darwin.so +0 -0
- sknetwork/utils/knn1d.pyx +80 -0
- sknetwork/utils/membership.py +82 -0
- sknetwork/utils/minheap.cpython-39-darwin.so +0 -0
- sknetwork/utils/minheap.pxd +22 -0
- sknetwork/utils/minheap.pyx +111 -0
- sknetwork/utils/neighbors.py +115 -0
- sknetwork/utils/seeds.py +75 -0
- sknetwork/utils/simplex.py +140 -0
- sknetwork/utils/tests/__init__.py +1 -0
- sknetwork/utils/tests/test_base.py +28 -0
- sknetwork/utils/tests/test_bunch.py +16 -0
- sknetwork/utils/tests/test_check.py +190 -0
- sknetwork/utils/tests/test_co_neighbor.py +43 -0
- sknetwork/utils/tests/test_format.py +61 -0
- sknetwork/utils/tests/test_kmeans.py +21 -0
- sknetwork/utils/tests/test_knn.py +32 -0
- sknetwork/utils/tests/test_membership.py +24 -0
- sknetwork/utils/tests/test_neighbors.py +41 -0
- sknetwork/utils/tests/test_projection_simplex.py +33 -0
- sknetwork/utils/tests/test_seeds.py +67 -0
- sknetwork/utils/tests/test_verbose.py +15 -0
- sknetwork/utils/tests/test_ward.py +20 -0
- sknetwork/utils/timeout.py +38 -0
- sknetwork/utils/verbose.py +37 -0
- sknetwork/utils/ward.py +60 -0
- sknetwork/visualization/__init__.py +4 -0
- sknetwork/visualization/colors.py +34 -0
- sknetwork/visualization/dendrograms.py +229 -0
- sknetwork/visualization/graphs.py +819 -0
- sknetwork/visualization/tests/__init__.py +1 -0
- sknetwork/visualization/tests/test_dendrograms.py +53 -0
- sknetwork/visualization/tests/test_graphs.py +167 -0
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created in July 2019
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
@author: Thomas Bonald <thomas.bonald@telecom-paris.fr>
|
|
7
|
+
"""
|
|
8
|
+
from typing import Union, Optional, Tuple
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from scipy import sparse
|
|
12
|
+
|
|
13
|
+
from sknetwork.linalg.normalization import normalize
|
|
14
|
+
from sknetwork.regression.base import BaseRegressor
|
|
15
|
+
from sknetwork.utils.format import get_adjacency_seeds
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def init_temperatures(seeds: np.ndarray, init: Optional[float]) -> Tuple[np.ndarray, np.ndarray]:
|
|
19
|
+
"""Init temperatures."""
|
|
20
|
+
n = len(seeds)
|
|
21
|
+
border = (seeds >= 0)
|
|
22
|
+
if init is None:
|
|
23
|
+
temperatures = seeds[border].mean() * np.ones(n)
|
|
24
|
+
else:
|
|
25
|
+
temperatures = init * np.ones(n)
|
|
26
|
+
temperatures[border] = seeds[border]
|
|
27
|
+
return temperatures, border
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class Diffusion(BaseRegressor):
|
|
31
|
+
"""Regression by diffusion along the edges, given the temperatures of some seed nodes (heat equation).
|
|
32
|
+
|
|
33
|
+
All values are updated, including those of seed nodes (free diffusion).
|
|
34
|
+
See ``Dirichlet`` for diffusion with boundary constraints.
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
n_iter : int
|
|
39
|
+
Number of iterations of the diffusion (must be positive).
|
|
40
|
+
|
|
41
|
+
Attributes
|
|
42
|
+
----------
|
|
43
|
+
values_ : np.ndarray
|
|
44
|
+
Value of each node (= temperature).
|
|
45
|
+
values_row_: np.ndarray
|
|
46
|
+
Values of rows, for bipartite graphs.
|
|
47
|
+
values_col_: np.ndarray
|
|
48
|
+
Values of columns, for bipartite graphs.
|
|
49
|
+
Example
|
|
50
|
+
-------
|
|
51
|
+
>>> from sknetwork.data import house
|
|
52
|
+
>>> diffusion = Diffusion(n_iter=2)
|
|
53
|
+
>>> adjacency = house()
|
|
54
|
+
>>> seeds = {0: 1, 2: 0}
|
|
55
|
+
>>> values = diffusion.fit_predict(adjacency, seeds)
|
|
56
|
+
>>> np.round(values, 2)
|
|
57
|
+
array([0.58, 0.56, 0.38, 0.58, 0.42])
|
|
58
|
+
|
|
59
|
+
References
|
|
60
|
+
----------
|
|
61
|
+
Chung, F. (2007). The heat kernel as the pagerank of a graph. Proceedings of the National Academy of Sciences.
|
|
62
|
+
"""
|
|
63
|
+
def __init__(self, n_iter: int = 3):
|
|
64
|
+
super(Diffusion, self).__init__()
|
|
65
|
+
|
|
66
|
+
if n_iter <= 0:
|
|
67
|
+
raise ValueError('The number of iterations must be positive.')
|
|
68
|
+
else:
|
|
69
|
+
self.n_iter = n_iter
|
|
70
|
+
self.bipartite = None
|
|
71
|
+
|
|
72
|
+
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray],
|
|
73
|
+
seeds: Optional[Union[dict, np.ndarray]] = None, seeds_row: Optional[Union[dict, np.ndarray]] = None,
|
|
74
|
+
seeds_col: Optional[Union[dict, np.ndarray]] = None, init: Optional[float] = None,
|
|
75
|
+
force_bipartite: bool = False) -> 'Diffusion':
|
|
76
|
+
"""Compute the diffusion (temperatures at equilibrium).
|
|
77
|
+
|
|
78
|
+
Parameters
|
|
79
|
+
----------
|
|
80
|
+
input_matrix :
|
|
81
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
82
|
+
seeds :
|
|
83
|
+
Temperatures of seed nodes in initial state (dictionary or vector). Negative temperatures ignored.
|
|
84
|
+
seeds_row, seeds_col :
|
|
85
|
+
Temperatures of rows and columns for bipartite graphs. Negative temperatures ignored.
|
|
86
|
+
init :
|
|
87
|
+
Temperature of non-seed nodes in initial state.
|
|
88
|
+
If ``None``, use the average temperature of seed nodes (default).
|
|
89
|
+
force_bipartite :
|
|
90
|
+
If ``True``, consider the input matrix as a biadjacency matrix (default = ``False``).
|
|
91
|
+
|
|
92
|
+
Returns
|
|
93
|
+
-------
|
|
94
|
+
self: :class:`Diffusion`
|
|
95
|
+
"""
|
|
96
|
+
adjacency, seeds, self.bipartite = get_adjacency_seeds(input_matrix, force_bipartite=force_bipartite,
|
|
97
|
+
seeds=seeds, seeds_row=seeds_row, seeds_col=seeds_col)
|
|
98
|
+
values, _ = init_temperatures(seeds, init)
|
|
99
|
+
diffusion = normalize(adjacency)
|
|
100
|
+
for i in range(self.n_iter):
|
|
101
|
+
values = diffusion.dot(values)
|
|
102
|
+
|
|
103
|
+
self.values_ = values
|
|
104
|
+
if self.bipartite:
|
|
105
|
+
self._split_vars(input_matrix.shape)
|
|
106
|
+
|
|
107
|
+
return self
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class Dirichlet(BaseRegressor):
|
|
111
|
+
"""Regression by the Dirichlet problem, given the temperature of some seed nodes
|
|
112
|
+
(heat diffusion with boundary constraints).
|
|
113
|
+
|
|
114
|
+
Only values of non-seed nodes are updated. The temperatures of seed nodes are fixed.
|
|
115
|
+
|
|
116
|
+
Parameters
|
|
117
|
+
----------
|
|
118
|
+
n_iter : int
|
|
119
|
+
Number of iterations of the diffusion (must be positive).
|
|
120
|
+
|
|
121
|
+
Attributes
|
|
122
|
+
----------
|
|
123
|
+
values_ : np.ndarray
|
|
124
|
+
Value of each node (= temperature).
|
|
125
|
+
values_row_: np.ndarray
|
|
126
|
+
Values of rows, for bipartite graphs.
|
|
127
|
+
values_col_: np.ndarray
|
|
128
|
+
Values of columns, for bipartite graphs.
|
|
129
|
+
Example
|
|
130
|
+
-------
|
|
131
|
+
>>> from sknetwork.regression import Dirichlet
|
|
132
|
+
>>> from sknetwork.data import house
|
|
133
|
+
>>> dirichlet = Dirichlet()
|
|
134
|
+
>>> adjacency = house()
|
|
135
|
+
>>> seeds = {0: 1, 2: 0}
|
|
136
|
+
>>> values = dirichlet.fit_predict(adjacency, seeds)
|
|
137
|
+
>>> np.round(values, 2)
|
|
138
|
+
array([1. , 0.54, 0. , 0.31, 0.62])
|
|
139
|
+
|
|
140
|
+
References
|
|
141
|
+
----------
|
|
142
|
+
Chung, F. (2007). The heat kernel as the pagerank of a graph. Proceedings of the National Academy of Sciences.
|
|
143
|
+
"""
|
|
144
|
+
def __init__(self, n_iter: int = 10):
|
|
145
|
+
super(Dirichlet, self).__init__()
|
|
146
|
+
|
|
147
|
+
if n_iter <= 0:
|
|
148
|
+
raise ValueError('The number of iterations must be positive.')
|
|
149
|
+
else:
|
|
150
|
+
self.n_iter = n_iter
|
|
151
|
+
self.bipartite = None
|
|
152
|
+
|
|
153
|
+
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray],
|
|
154
|
+
seeds: Optional[Union[dict, np.ndarray]] = None, seeds_row: Optional[Union[dict, np.ndarray]] = None,
|
|
155
|
+
seeds_col: Optional[Union[dict, np.ndarray]] = None, init: Optional[float] = None,
|
|
156
|
+
force_bipartite: bool = False) -> 'Dirichlet':
|
|
157
|
+
"""Compute the solution to the Dirichlet problem (temperatures at equilibrium).
|
|
158
|
+
|
|
159
|
+
Parameters
|
|
160
|
+
----------
|
|
161
|
+
input_matrix :
|
|
162
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
163
|
+
seeds :
|
|
164
|
+
Temperatures of seed nodes (dictionary or vector). Negative temperatures ignored.
|
|
165
|
+
seeds_row, seeds_col :
|
|
166
|
+
Temperatures of rows and columns for bipartite graphs. Negative temperatures ignored.
|
|
167
|
+
init :
|
|
168
|
+
Temperature of non-seed nodes in initial state.
|
|
169
|
+
If ``None``, use the average temperature of seed nodes (default).
|
|
170
|
+
force_bipartite :
|
|
171
|
+
If ``True``, consider the input matrix as a biadjacency matrix (default = ``False``).
|
|
172
|
+
|
|
173
|
+
Returns
|
|
174
|
+
-------
|
|
175
|
+
self: :class:`Dirichlet`
|
|
176
|
+
"""
|
|
177
|
+
adjacency, seeds, self.bipartite = get_adjacency_seeds(input_matrix, force_bipartite=force_bipartite,
|
|
178
|
+
seeds=seeds, seeds_row=seeds_row, seeds_col=seeds_col)
|
|
179
|
+
temperatures, border = init_temperatures(seeds, init)
|
|
180
|
+
values = temperatures.copy()
|
|
181
|
+
diffusion = normalize(adjacency)
|
|
182
|
+
for i in range(self.n_iter):
|
|
183
|
+
values = diffusion.dot(values)
|
|
184
|
+
values[border] = temperatures[border]
|
|
185
|
+
|
|
186
|
+
self.values_ = values
|
|
187
|
+
if self.bipartite:
|
|
188
|
+
self._split_vars(input_matrix.shape)
|
|
189
|
+
|
|
190
|
+
return self
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""tests for regression"""
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""tests for regression API"""
|
|
4
|
+
import unittest
|
|
5
|
+
|
|
6
|
+
from sknetwork.data.test_graphs import test_bigraph, test_graph, test_digraph
|
|
7
|
+
from sknetwork.regression import *
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TestAPI(unittest.TestCase):
|
|
11
|
+
|
|
12
|
+
def test_basic(self):
|
|
13
|
+
methods = [Diffusion(), Dirichlet()]
|
|
14
|
+
for adjacency in [test_graph(), test_digraph()]:
|
|
15
|
+
n = adjacency.shape[0]
|
|
16
|
+
for method in methods:
|
|
17
|
+
score = method.fit_predict(adjacency)
|
|
18
|
+
self.assertEqual(score.shape, (n, ))
|
|
19
|
+
self.assertTrue(min(score) >= 0)
|
|
20
|
+
score = method.fit_transform(adjacency)
|
|
21
|
+
self.assertEqual(score.shape, (n,))
|
|
22
|
+
|
|
23
|
+
def test_bipartite(self):
|
|
24
|
+
biadjacency = test_bigraph()
|
|
25
|
+
n_row, n_col = biadjacency.shape
|
|
26
|
+
|
|
27
|
+
methods = [Diffusion(), Dirichlet()]
|
|
28
|
+
for method in methods:
|
|
29
|
+
method.fit(biadjacency)
|
|
30
|
+
values_row = method.values_row_
|
|
31
|
+
values_col = method.values_col_
|
|
32
|
+
|
|
33
|
+
self.assertEqual(values_row.shape, (n_row,))
|
|
34
|
+
self.assertEqual(values_col.shape, (n_col,))
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""tests for diffusion.py"""
|
|
4
|
+
|
|
5
|
+
import unittest
|
|
6
|
+
|
|
7
|
+
from sknetwork.data.test_graphs import *
|
|
8
|
+
from sknetwork.regression import Diffusion, Dirichlet
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# noinspection DuplicatedCode
|
|
12
|
+
class TestDiffusion(unittest.TestCase):
|
|
13
|
+
|
|
14
|
+
def setUp(self):
|
|
15
|
+
self.algos = [Diffusion(), Dirichlet()]
|
|
16
|
+
|
|
17
|
+
def test_no_iter(self):
|
|
18
|
+
with self.assertRaises(ValueError):
|
|
19
|
+
Diffusion(n_iter=-1)
|
|
20
|
+
|
|
21
|
+
def test_single_node_graph(self):
|
|
22
|
+
for algo in self.algos:
|
|
23
|
+
algo.fit(sparse.identity(1, format='csr'), {0: 1})
|
|
24
|
+
self.assertEqual(algo.values_, [1])
|
|
25
|
+
|
|
26
|
+
def test_range(self):
|
|
27
|
+
for adjacency in [test_graph(), test_digraph()]:
|
|
28
|
+
for algo in self.algos:
|
|
29
|
+
values = algo.fit_transform(adjacency, {0: 0, 1: 1, 2: 0.5})
|
|
30
|
+
self.assertTrue(np.all(values <= 1) and np.all(values >= 0))
|
|
31
|
+
|
|
32
|
+
biadjacency = test_bigraph()
|
|
33
|
+
for algo in [Diffusion(), Dirichlet()]:
|
|
34
|
+
values = algo.fit_predict(biadjacency, seeds_row={0: 1})
|
|
35
|
+
self.assertTrue(np.all(values <= 1) and np.all(values >= 0))
|
|
36
|
+
values = algo.fit_predict(biadjacency, seeds_row={0: 0.1}, seeds_col={1: 2}, init=0.3)
|
|
37
|
+
self.assertTrue(np.all(values <= 2) and np.all(values >= 0.1))
|
|
38
|
+
|
|
39
|
+
def test_initial_state(self):
|
|
40
|
+
for adjacency in [test_graph(), test_digraph()]:
|
|
41
|
+
for algo in self.algos:
|
|
42
|
+
values = algo.fit_transform(adjacency, {0: 0, 1: 1, 2: 0.5}, 0.3)
|
|
43
|
+
self.assertTrue(np.all(values <= 1) and np.all(values >= 0))
|
|
44
|
+
|
|
45
|
+
def test_n_iter(self):
|
|
46
|
+
with self.assertRaises(ValueError):
|
|
47
|
+
Dirichlet(n_iter=0)
|
|
48
|
+
|
sknetwork/sknetwork.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""Module on topology."""
|
|
2
|
+
from sknetwork.topology.kcliques import Cliques
|
|
3
|
+
from sknetwork.topology.kcore import CoreDecomposition
|
|
4
|
+
from sknetwork.topology.triangles import Triangles
|
|
5
|
+
|
|
6
|
+
from sknetwork.topology.dag import DAG
|
|
7
|
+
from sknetwork.topology.structure import is_acyclic, is_bipartite, is_connected, get_largest_connected_component, \
|
|
8
|
+
get_connected_components
|
|
9
|
+
from sknetwork.topology.weisfeiler_lehman import WeisfeilerLehman, are_isomorphic
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created on Jun 3, 2020
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
"""
|
|
7
|
+
import numpy as np
|
|
8
|
+
from scipy import sparse
|
|
9
|
+
from sknetwork.topology.dag_core import fit_core
|
|
10
|
+
|
|
11
|
+
from sknetwork.utils.base import Algorithm
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DAG(Algorithm):
|
|
15
|
+
"""Build a Directed Acyclic Graph from an adjacency.
|
|
16
|
+
|
|
17
|
+
* Graphs
|
|
18
|
+
* DiGraphs
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
ordering : str
|
|
23
|
+
A method to sort the nodes.
|
|
24
|
+
|
|
25
|
+
* If None, the default order is the index.
|
|
26
|
+
* If ``'degree'``, the nodes are sorted by ascending degree.
|
|
27
|
+
|
|
28
|
+
Attributes
|
|
29
|
+
----------
|
|
30
|
+
indptr_ : np.ndarray
|
|
31
|
+
Pointer index as for CSR format.
|
|
32
|
+
indices_ : np.ndarray
|
|
33
|
+
Indices as for CSR format.
|
|
34
|
+
"""
|
|
35
|
+
def __init__(self, ordering: str = None):
|
|
36
|
+
super(DAG, self).__init__()
|
|
37
|
+
self.ordering = ordering
|
|
38
|
+
self.indptr_ = None
|
|
39
|
+
self.indices_ = None
|
|
40
|
+
|
|
41
|
+
def fit(self, adjacency: sparse.csr_matrix, sorted_nodes=None):
|
|
42
|
+
"""Fit algorithm to the data.
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
adjacency :
|
|
47
|
+
Adjacency matrix of the graph.
|
|
48
|
+
sorted_nodes : np.ndarray
|
|
49
|
+
An order on the nodes such that the DAG only contains edges (i, j) such that
|
|
50
|
+
``sorted_nodes[i] < sorted_nodes[j]``.
|
|
51
|
+
"""
|
|
52
|
+
indptr = adjacency.indptr.astype(np.int32)
|
|
53
|
+
indices = adjacency.indices.astype(np.int32)
|
|
54
|
+
|
|
55
|
+
if sorted_nodes is not None:
|
|
56
|
+
if adjacency.shape[0] != sorted_nodes.shape[0]:
|
|
57
|
+
raise ValueError('Dimensions mismatch between adjacency and sorted_nodes.')
|
|
58
|
+
else:
|
|
59
|
+
sorted_nodes = sorted_nodes.astype(np.int32)
|
|
60
|
+
else:
|
|
61
|
+
if self.ordering is None:
|
|
62
|
+
sorted_nodes = np.arange(adjacency.shape[0]).astype(np.int32)
|
|
63
|
+
elif self.ordering == 'degree':
|
|
64
|
+
degrees = indptr[1:] - indptr[:-1]
|
|
65
|
+
sorted_nodes = np.argsort(degrees).astype(np.int32)
|
|
66
|
+
else:
|
|
67
|
+
raise ValueError('Unknown ordering of nodes.')
|
|
68
|
+
|
|
69
|
+
ix = np.zeros(adjacency.shape[0], dtype=np.int32)
|
|
70
|
+
dag_indptr, dag_indices = fit_core(indptr, indices, sorted_nodes, ix)
|
|
71
|
+
self.indptr_ = np.asarray(dag_indptr)
|
|
72
|
+
self.indices_ = np.asarray(dag_indices)
|
|
73
|
+
|
|
74
|
+
return self
|
|
Binary file
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# distutils: language = c++
|
|
2
|
+
# cython: language_level=3
|
|
3
|
+
# cython: linetrace=True
|
|
4
|
+
# distutils: define_macros=CYTHON_TRACE_NOGIL=1
|
|
5
|
+
"""
|
|
6
|
+
Created on Jun 3, 2020
|
|
7
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
8
|
+
"""
|
|
9
|
+
from libcpp.vector cimport vector
|
|
10
|
+
|
|
11
|
+
cimport cython
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@cython.boundscheck(False)
|
|
15
|
+
@cython.wraparound(False)
|
|
16
|
+
def fit_core(int[:] indptr, int[:] indices, int[:] sorted_nodes, int[:] ix):
|
|
17
|
+
"""Build DAG given an order of the nodes.
|
|
18
|
+
"""
|
|
19
|
+
cdef int n = indptr.shape[0] - 1
|
|
20
|
+
cdef int u, v, k
|
|
21
|
+
cdef long n_triangles = 0
|
|
22
|
+
cdef vector[int] dag_indptr, dag_indices
|
|
23
|
+
|
|
24
|
+
for i in range(n):
|
|
25
|
+
ix[sorted_nodes[i]] = i
|
|
26
|
+
|
|
27
|
+
# create the DAG
|
|
28
|
+
cdef int ptr = 0
|
|
29
|
+
dag_indptr.push_back(ptr)
|
|
30
|
+
for u in range(n):
|
|
31
|
+
for k in range(indptr[u], indptr[u+1]):
|
|
32
|
+
v = indices[k]
|
|
33
|
+
if ix[u] < ix[v]: # the edge needs to be added
|
|
34
|
+
dag_indices.push_back(v)
|
|
35
|
+
ptr += 1
|
|
36
|
+
dag_indptr.push_back(ptr)
|
|
37
|
+
|
|
38
|
+
return dag_indptr, dag_indices
|
|
Binary file
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
# distutils: language = c++
|
|
2
|
+
# cython: language_level=3
|
|
3
|
+
# cython: linetrace=True
|
|
4
|
+
# distutils: define_macros=CYTHON_TRACE_NOGIL=1
|
|
5
|
+
"""
|
|
6
|
+
Created on Jun 3, 2020
|
|
7
|
+
@author: Julien Simonnet <julien.simonnet@etu.upmc.fr>
|
|
8
|
+
@author: Yohann Robert <yohann.robert@etu.upmc.fr>
|
|
9
|
+
"""
|
|
10
|
+
from libcpp.vector cimport vector
|
|
11
|
+
import numpy as np
|
|
12
|
+
cimport numpy as np
|
|
13
|
+
from scipy import sparse
|
|
14
|
+
|
|
15
|
+
cimport cython
|
|
16
|
+
|
|
17
|
+
from sknetwork.topology.dag import DAG
|
|
18
|
+
from sknetwork.topology.kcore import CoreDecomposition
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# ----- Collections of arrays used by our listing algorithm -----
|
|
22
|
+
@cython.boundscheck(False)
|
|
23
|
+
@cython.wraparound(False)
|
|
24
|
+
cdef class ListingBox:
|
|
25
|
+
cdef int[:] ns
|
|
26
|
+
cdef np.ndarray degrees
|
|
27
|
+
cdef np.ndarray subs
|
|
28
|
+
cdef short[:] lab
|
|
29
|
+
|
|
30
|
+
def __cinit__(self, vector[int] indptr, int k):
|
|
31
|
+
cdef int n = indptr.size() - 1
|
|
32
|
+
cdef int i
|
|
33
|
+
cdef int max_deg = 0
|
|
34
|
+
|
|
35
|
+
cdef np.ndarray[int, ndim=1] ns = np.empty((k+1,), dtype=np.int32)
|
|
36
|
+
ns[k] = n
|
|
37
|
+
self.ns = ns
|
|
38
|
+
|
|
39
|
+
cdef np.ndarray[short, ndim=1] lab = np.full((n,), k, dtype=np.int16)
|
|
40
|
+
self.lab = lab
|
|
41
|
+
|
|
42
|
+
cdef np.ndarray[int, ndim=1] deg = np.zeros(n, dtype=np.int32)
|
|
43
|
+
cdef np.ndarray[int, ndim=1] sub = np.zeros(n, dtype=np.int32)
|
|
44
|
+
|
|
45
|
+
for i in range(n):
|
|
46
|
+
deg[i] = indptr[i+1] - indptr[i]
|
|
47
|
+
max_deg = max(deg[i], max_deg)
|
|
48
|
+
sub[i] = i
|
|
49
|
+
|
|
50
|
+
self.degrees = np.empty((k+1,), dtype=object)
|
|
51
|
+
self.subs = np.empty((k+1,), dtype=object)
|
|
52
|
+
|
|
53
|
+
self.degrees[k] = deg
|
|
54
|
+
self.subs[k] = sub
|
|
55
|
+
|
|
56
|
+
for i in range(2, k):
|
|
57
|
+
deg = np.zeros(n, dtype=np.int32)
|
|
58
|
+
sub = np.zeros(max_deg, dtype=np.int32)
|
|
59
|
+
self.degrees[i] = deg
|
|
60
|
+
self.subs[i] = sub
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@cython.boundscheck(False)
|
|
64
|
+
@cython.wraparound(False)
|
|
65
|
+
cdef long fit_core(vector[int] indptr, vector[int] indices, int l, ListingBox box):
|
|
66
|
+
cdef int n = indptr.size() - 1
|
|
67
|
+
cdef long n_cliques = 0
|
|
68
|
+
cdef int i, j, k
|
|
69
|
+
cdef int u, v, w
|
|
70
|
+
cdef int cd
|
|
71
|
+
|
|
72
|
+
if l == 2:
|
|
73
|
+
degree_l = box.degrees[2]
|
|
74
|
+
sub_l = box.subs[2]
|
|
75
|
+
for i in range(box.ns[2]):
|
|
76
|
+
j = sub_l[i]
|
|
77
|
+
n_cliques += degree_l[j]
|
|
78
|
+
|
|
79
|
+
return n_cliques
|
|
80
|
+
|
|
81
|
+
sub_l = box.subs[l]
|
|
82
|
+
sub_prev = box.subs[l-1]
|
|
83
|
+
degree_l = box.degrees[l]
|
|
84
|
+
deg_prev = box.degrees[l-1]
|
|
85
|
+
for i in range(box.ns[l]):
|
|
86
|
+
u = sub_l[i]
|
|
87
|
+
box.ns[l-1] = 0
|
|
88
|
+
cd = indptr[u] + degree_l[u]
|
|
89
|
+
for j in range(indptr[u], cd):
|
|
90
|
+
v = indices[j]
|
|
91
|
+
if box.lab[v] == l:
|
|
92
|
+
box.lab[v] = l-1
|
|
93
|
+
sub_prev[box.ns[l-1]] = v
|
|
94
|
+
box.ns[l-1] += 1
|
|
95
|
+
deg_prev[v] = 0
|
|
96
|
+
|
|
97
|
+
for j in range(box.ns[l-1]):
|
|
98
|
+
v = sub_prev[j]
|
|
99
|
+
cd = indptr[v] + degree_l[v]
|
|
100
|
+
k = indptr[v]
|
|
101
|
+
while k < cd:
|
|
102
|
+
w = indices[k]
|
|
103
|
+
if box.lab[w] == l-1:
|
|
104
|
+
deg_prev[v] += 1
|
|
105
|
+
else:
|
|
106
|
+
cd -= 1
|
|
107
|
+
indices[k] = indices[cd]
|
|
108
|
+
k -= 1
|
|
109
|
+
indices[cd] = w
|
|
110
|
+
|
|
111
|
+
k += 1
|
|
112
|
+
|
|
113
|
+
n_cliques += fit_core(indptr, indices, l-1, box)
|
|
114
|
+
for j in range(box.ns[l-1]):
|
|
115
|
+
v = sub_prev[j]
|
|
116
|
+
box.lab[v] = l
|
|
117
|
+
|
|
118
|
+
return n_cliques
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class Cliques:
|
|
122
|
+
""" Clique counting algorithm.
|
|
123
|
+
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
k : int
|
|
127
|
+
Clique order (e.g., k = 3 means triangles).
|
|
128
|
+
|
|
129
|
+
Attributes
|
|
130
|
+
----------
|
|
131
|
+
n_cliques_ : int
|
|
132
|
+
Number of cliques.
|
|
133
|
+
|
|
134
|
+
Example
|
|
135
|
+
-------
|
|
136
|
+
>>> from sknetwork.data import karate_club
|
|
137
|
+
>>> cliques = Cliques(k=3)
|
|
138
|
+
>>> adjacency = karate_club()
|
|
139
|
+
>>> cliques.fit_transform(adjacency)
|
|
140
|
+
45
|
|
141
|
+
|
|
142
|
+
References
|
|
143
|
+
----------
|
|
144
|
+
Danisch, M., Balalau, O., & Sozio, M. (2018, April).
|
|
145
|
+
`Listing k-cliques in sparse real-world graphs.
|
|
146
|
+
<https://dl.acm.org/doi/pdf/10.1145/3178876.3186125>`_
|
|
147
|
+
In Proceedings of the 2018 World Wide Web Conference (pp. 589-598).
|
|
148
|
+
"""
|
|
149
|
+
def __init__(self, k: int):
|
|
150
|
+
self.k = np.int32(k)
|
|
151
|
+
self.n_cliques_ = 0
|
|
152
|
+
|
|
153
|
+
def fit(self, adjacency: sparse.csr_matrix) -> 'Cliques':
|
|
154
|
+
"""K-cliques count.
|
|
155
|
+
|
|
156
|
+
Parameters
|
|
157
|
+
----------
|
|
158
|
+
adjacency :
|
|
159
|
+
Adjacency matrix of the graph.
|
|
160
|
+
|
|
161
|
+
Returns
|
|
162
|
+
-------
|
|
163
|
+
self: :class:`Cliques`
|
|
164
|
+
"""
|
|
165
|
+
if self.k < 2:
|
|
166
|
+
raise ValueError("k should be at least 2")
|
|
167
|
+
|
|
168
|
+
kcore = CoreDecomposition()
|
|
169
|
+
labels = kcore.fit_transform(adjacency)
|
|
170
|
+
sorted_nodes = np.argsort(labels)
|
|
171
|
+
|
|
172
|
+
dag = DAG()
|
|
173
|
+
dag.fit(adjacency, sorted_nodes)
|
|
174
|
+
indptr = dag.indptr_
|
|
175
|
+
indices = dag.indices_
|
|
176
|
+
|
|
177
|
+
box = ListingBox.__new__(ListingBox, indptr, self.k)
|
|
178
|
+
self.n_cliques_ = fit_core(indptr, indices, self.k, box)
|
|
179
|
+
|
|
180
|
+
return self
|
|
181
|
+
|
|
182
|
+
def fit_transform(self, adjacency: sparse.csr_matrix) -> int:
|
|
183
|
+
""" Fit algorithm to the data and return the number of cliques. Same parameters as the ``fit`` method.
|
|
184
|
+
|
|
185
|
+
Returns
|
|
186
|
+
-------
|
|
187
|
+
n_cliques : int
|
|
188
|
+
Number of k-cliques.
|
|
189
|
+
"""
|
|
190
|
+
self.fit(adjacency)
|
|
191
|
+
return self.n_cliques_
|
|
192
|
+
|
|
193
|
+
|
|
Binary file
|