scikit-network 0.31.0__cp310-cp310-win_amd64.whl → 0.33.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/AUTHORS.rst +3 -1
- {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/METADATA +27 -5
- scikit_network-0.33.0.dist-info/RECORD +228 -0
- {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/WHEEL +1 -1
- sknetwork/__init__.py +1 -1
- sknetwork/classification/base.py +1 -1
- sknetwork/classification/base_rank.py +3 -3
- sknetwork/classification/diffusion.py +25 -16
- sknetwork/classification/knn.py +23 -16
- sknetwork/classification/metrics.py +4 -4
- sknetwork/classification/pagerank.py +12 -8
- sknetwork/classification/propagation.py +25 -17
- sknetwork/classification/tests/test_diffusion.py +10 -0
- sknetwork/classification/vote.cp310-win_amd64.pyd +0 -0
- sknetwork/classification/vote.cpp +14549 -8668
- sknetwork/clustering/__init__.py +3 -1
- sknetwork/clustering/base.py +1 -1
- sknetwork/clustering/kcenters.py +253 -0
- sknetwork/clustering/leiden.py +242 -0
- sknetwork/clustering/leiden_core.cp310-win_amd64.pyd +0 -0
- sknetwork/clustering/leiden_core.cpp +31564 -0
- sknetwork/clustering/leiden_core.pyx +124 -0
- sknetwork/clustering/louvain.py +118 -83
- sknetwork/clustering/louvain_core.cp310-win_amd64.pyd +0 -0
- sknetwork/clustering/louvain_core.cpp +21876 -16332
- sknetwork/clustering/louvain_core.pyx +86 -94
- sknetwork/clustering/postprocess.py +2 -2
- sknetwork/clustering/propagation_clustering.py +4 -4
- sknetwork/clustering/tests/test_API.py +7 -3
- sknetwork/clustering/tests/test_kcenters.py +60 -0
- sknetwork/clustering/tests/test_leiden.py +34 -0
- sknetwork/clustering/tests/test_louvain.py +2 -3
- sknetwork/data/__init__.py +1 -1
- sknetwork/data/base.py +7 -2
- sknetwork/data/load.py +20 -25
- sknetwork/data/models.py +15 -15
- sknetwork/data/parse.py +57 -34
- sknetwork/data/tests/test_API.py +3 -3
- sknetwork/data/tests/test_base.py +2 -2
- sknetwork/data/tests/test_parse.py +9 -12
- sknetwork/data/tests/test_toy_graphs.py +33 -33
- sknetwork/data/toy_graphs.py +35 -43
- sknetwork/embedding/__init__.py +0 -1
- sknetwork/embedding/base.py +23 -19
- sknetwork/embedding/force_atlas.py +3 -2
- sknetwork/embedding/louvain_embedding.py +1 -27
- sknetwork/embedding/random_projection.py +5 -3
- sknetwork/embedding/spectral.py +0 -73
- sknetwork/embedding/svd.py +0 -4
- sknetwork/embedding/tests/test_API.py +4 -28
- sknetwork/embedding/tests/test_louvain_embedding.py +13 -13
- sknetwork/embedding/tests/test_spectral.py +2 -5
- sknetwork/embedding/tests/test_svd.py +7 -1
- sknetwork/gnn/base_layer.py +3 -3
- sknetwork/gnn/gnn_classifier.py +41 -87
- sknetwork/gnn/layer.py +1 -1
- sknetwork/gnn/loss.py +1 -1
- sknetwork/gnn/optimizer.py +4 -3
- sknetwork/gnn/tests/test_base_layer.py +4 -4
- sknetwork/gnn/tests/test_gnn_classifier.py +12 -39
- sknetwork/gnn/utils.py +8 -8
- sknetwork/hierarchy/base.py +27 -0
- sknetwork/hierarchy/louvain_hierarchy.py +55 -47
- sknetwork/hierarchy/paris.cp310-win_amd64.pyd +0 -0
- sknetwork/hierarchy/paris.cpp +27667 -20915
- sknetwork/hierarchy/paris.pyx +11 -10
- sknetwork/hierarchy/postprocess.py +16 -16
- sknetwork/hierarchy/tests/test_algos.py +5 -0
- sknetwork/hierarchy/tests/test_metrics.py +4 -4
- sknetwork/linalg/__init__.py +1 -1
- sknetwork/linalg/diteration.cp310-win_amd64.pyd +0 -0
- sknetwork/linalg/diteration.cpp +13916 -8050
- sknetwork/linalg/{normalization.py → normalizer.py} +17 -14
- sknetwork/linalg/operators.py +1 -1
- sknetwork/linalg/ppr_solver.py +1 -1
- sknetwork/linalg/push.cp310-win_amd64.pyd +0 -0
- sknetwork/linalg/push.cpp +23187 -16973
- sknetwork/linalg/tests/test_normalization.py +3 -7
- sknetwork/linalg/tests/test_operators.py +2 -6
- sknetwork/linalg/tests/test_ppr.py +1 -1
- sknetwork/linkpred/base.py +12 -1
- sknetwork/linkpred/nn.py +6 -6
- sknetwork/path/distances.py +11 -4
- sknetwork/path/shortest_path.py +1 -1
- sknetwork/path/tests/test_distances.py +7 -0
- sknetwork/path/tests/test_search.py +2 -2
- sknetwork/ranking/base.py +11 -6
- sknetwork/ranking/betweenness.cp310-win_amd64.pyd +0 -0
- sknetwork/ranking/betweenness.cpp +5256 -2190
- sknetwork/ranking/pagerank.py +13 -12
- sknetwork/ranking/tests/test_API.py +0 -2
- sknetwork/ranking/tests/test_betweenness.py +1 -1
- sknetwork/ranking/tests/test_pagerank.py +11 -5
- sknetwork/regression/base.py +18 -1
- sknetwork/regression/diffusion.py +30 -14
- sknetwork/regression/tests/test_diffusion.py +8 -0
- sknetwork/topology/__init__.py +3 -1
- sknetwork/topology/cliques.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/cliques.cpp +23528 -16848
- sknetwork/topology/core.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/core.cpp +22849 -16581
- sknetwork/topology/cycles.py +243 -0
- sknetwork/topology/minheap.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/minheap.cpp +19495 -13469
- sknetwork/topology/structure.py +2 -42
- sknetwork/topology/tests/test_cycles.py +65 -0
- sknetwork/topology/tests/test_structure.py +2 -16
- sknetwork/topology/triangles.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/triangles.cpp +5283 -1397
- sknetwork/topology/triangles.pyx +7 -4
- sknetwork/topology/weisfeiler_lehman_core.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/weisfeiler_lehman_core.cpp +14781 -8915
- sknetwork/utils/__init__.py +1 -1
- sknetwork/utils/format.py +1 -1
- sknetwork/utils/membership.py +2 -2
- sknetwork/utils/values.py +5 -3
- sknetwork/visualization/__init__.py +2 -2
- sknetwork/visualization/dendrograms.py +55 -7
- sknetwork/visualization/graphs.py +261 -44
- sknetwork/visualization/tests/test_dendrograms.py +9 -9
- sknetwork/visualization/tests/test_graphs.py +63 -57
- scikit_network-0.31.0.dist-info/RECORD +0 -221
- sknetwork/embedding/louvain_hierarchy.py +0 -142
- sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
- {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/LICENSE +0 -0
- {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/top_level.txt +0 -0
sknetwork/data/models.py
CHANGED
|
@@ -12,7 +12,7 @@ from typing import Union, Optional, Iterable
|
|
|
12
12
|
import numpy as np
|
|
13
13
|
from scipy import sparse
|
|
14
14
|
|
|
15
|
-
from sknetwork.data.base import
|
|
15
|
+
from sknetwork.data.base import Dataset
|
|
16
16
|
from sknetwork.data.parse import from_edge_list
|
|
17
17
|
from sknetwork.utils.check import check_random_state
|
|
18
18
|
from sknetwork.utils.format import directed2undirected
|
|
@@ -20,7 +20,7 @@ from sknetwork.utils.format import directed2undirected
|
|
|
20
20
|
|
|
21
21
|
def block_model(sizes: Iterable, p_in: Union[float, list, np.ndarray] = .2, p_out: float = .05,
|
|
22
22
|
directed: bool = False, self_loops: bool = False, metadata: bool = False, seed: Optional[int] = None) \
|
|
23
|
-
-> Union[sparse.csr_matrix,
|
|
23
|
+
-> Union[sparse.csr_matrix, Dataset]:
|
|
24
24
|
"""Stochastic block model.
|
|
25
25
|
|
|
26
26
|
Parameters
|
|
@@ -83,7 +83,7 @@ def block_model(sizes: Iterable, p_in: Union[float, list, np.ndarray] = .2, p_ou
|
|
|
83
83
|
else:
|
|
84
84
|
adjacency = directed2undirected(sparse.csr_matrix(sparse.triu(adjacency)), weighted=False)
|
|
85
85
|
if metadata:
|
|
86
|
-
graph =
|
|
86
|
+
graph = Dataset()
|
|
87
87
|
graph.adjacency = adjacency
|
|
88
88
|
labels = np.repeat(np.arange(len(sizes)), sizes)
|
|
89
89
|
graph.labels = labels
|
|
@@ -129,7 +129,7 @@ def erdos_renyi(n: int = 20, p: float = .3, directed: bool = False, self_loops:
|
|
|
129
129
|
return block_model([n], p, 0., directed=directed, self_loops=self_loops, metadata=False, seed=seed)
|
|
130
130
|
|
|
131
131
|
|
|
132
|
-
def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
132
|
+
def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
|
|
133
133
|
"""Linear graph (directed).
|
|
134
134
|
|
|
135
135
|
Parameters
|
|
@@ -158,7 +158,7 @@ def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
|
|
|
158
158
|
if metadata:
|
|
159
159
|
x = np.arange(n)
|
|
160
160
|
y = np.zeros(n)
|
|
161
|
-
graph =
|
|
161
|
+
graph = Dataset()
|
|
162
162
|
graph.adjacency = adjacency
|
|
163
163
|
graph.position = np.array((x, y)).T
|
|
164
164
|
return graph
|
|
@@ -166,7 +166,7 @@ def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
|
|
|
166
166
|
return adjacency
|
|
167
167
|
|
|
168
168
|
|
|
169
|
-
def linear_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
169
|
+
def linear_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
|
|
170
170
|
"""Linear graph (undirected).
|
|
171
171
|
|
|
172
172
|
Parameters
|
|
@@ -218,7 +218,7 @@ def cyclic_position(n: int) -> np.ndarray:
|
|
|
218
218
|
return position
|
|
219
219
|
|
|
220
220
|
|
|
221
|
-
def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
221
|
+
def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
|
|
222
222
|
"""Cyclic graph (directed).
|
|
223
223
|
|
|
224
224
|
Parameters
|
|
@@ -245,7 +245,7 @@ def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
|
|
|
245
245
|
adjacency = sparse.csr_matrix((np.ones(len(row), dtype=int), (row, col)), shape=(n, n))
|
|
246
246
|
|
|
247
247
|
if metadata:
|
|
248
|
-
graph =
|
|
248
|
+
graph = Dataset()
|
|
249
249
|
graph.adjacency = adjacency
|
|
250
250
|
graph.position = cyclic_position(n)
|
|
251
251
|
return graph
|
|
@@ -253,7 +253,7 @@ def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
|
|
|
253
253
|
return adjacency
|
|
254
254
|
|
|
255
255
|
|
|
256
|
-
def cyclic_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
256
|
+
def cyclic_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
|
|
257
257
|
"""Cyclic graph (undirected).
|
|
258
258
|
|
|
259
259
|
Parameters
|
|
@@ -283,7 +283,7 @@ def cyclic_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
|
283
283
|
return graph.adjacency
|
|
284
284
|
|
|
285
285
|
|
|
286
|
-
def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
286
|
+
def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
|
|
287
287
|
"""Grid (undirected).
|
|
288
288
|
|
|
289
289
|
Parameters
|
|
@@ -312,7 +312,7 @@ def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr
|
|
|
312
312
|
edges = list(map(lambda edge: (node_id[edge[0]], node_id[edge[1]]), edges))
|
|
313
313
|
adjacency = from_edge_list(edges, reindex=False, matrix_only=True)
|
|
314
314
|
if metadata:
|
|
315
|
-
graph =
|
|
315
|
+
graph = Dataset()
|
|
316
316
|
graph.adjacency = adjacency
|
|
317
317
|
graph.position = np.array(nodes)
|
|
318
318
|
return graph
|
|
@@ -320,7 +320,7 @@ def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr
|
|
|
320
320
|
return adjacency
|
|
321
321
|
|
|
322
322
|
|
|
323
|
-
def star(n_branches: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
323
|
+
def star(n_branches: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
|
|
324
324
|
"""Star (undirected).
|
|
325
325
|
|
|
326
326
|
Parameters
|
|
@@ -345,7 +345,7 @@ def star(n_branches: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix
|
|
|
345
345
|
edges = [(0, i+1) for i in range(n_branches)]
|
|
346
346
|
adjacency = from_edge_list(edges, reindex=False, matrix_only=True)
|
|
347
347
|
if metadata:
|
|
348
|
-
graph =
|
|
348
|
+
graph = Dataset()
|
|
349
349
|
graph.adjacency = adjacency
|
|
350
350
|
angles = 2 * np.pi * np.arange(n_branches) / n_branches
|
|
351
351
|
x = [0] + list(np.cos(angles))
|
|
@@ -402,7 +402,7 @@ def albert_barabasi(n: int = 100, degree: int = 3, directed: bool = False, seed:
|
|
|
402
402
|
|
|
403
403
|
|
|
404
404
|
def watts_strogatz(n: int = 100, degree: int = 6, prob: float = 0.05, seed: Optional[int] = None,
|
|
405
|
-
metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
405
|
+
metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
|
|
406
406
|
"""Watts-Strogatz model.
|
|
407
407
|
|
|
408
408
|
Parameters
|
|
@@ -451,7 +451,7 @@ def watts_strogatz(n: int = 100, degree: int = 6, prob: float = 0.05, seed: Opti
|
|
|
451
451
|
adjacency[j, i] = 0
|
|
452
452
|
adjacency = sparse.csr_matrix(adjacency, shape=adjacency.shape)
|
|
453
453
|
if metadata:
|
|
454
|
-
graph =
|
|
454
|
+
graph = Dataset()
|
|
455
455
|
graph.adjacency = adjacency
|
|
456
456
|
graph.position = cyclic_position(n)
|
|
457
457
|
return graph
|
sknetwork/data/parse.py
CHANGED
|
@@ -8,19 +8,19 @@ Created in December 2018
|
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
from csv import reader
|
|
11
|
-
from typing import Dict, List, Tuple, Union
|
|
11
|
+
from typing import Dict, List, Tuple, Union, Optional
|
|
12
12
|
from xml.etree import ElementTree
|
|
13
13
|
|
|
14
14
|
import numpy as np
|
|
15
15
|
from scipy import sparse
|
|
16
16
|
|
|
17
|
-
from sknetwork.data.base import
|
|
17
|
+
from sknetwork.data.base import Dataset
|
|
18
18
|
from sknetwork.utils.format import directed2undirected
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = False,
|
|
22
|
-
bipartite: bool = False, weighted: bool = True, reindex: bool =
|
|
23
|
-
sum_duplicates: bool = True, matrix_only: bool = None) -> Union[
|
|
22
|
+
bipartite: bool = False, weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
|
|
23
|
+
sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
|
|
24
24
|
"""Load a graph from an edge list.
|
|
25
25
|
|
|
26
26
|
Parameters
|
|
@@ -37,6 +37,9 @@ def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = F
|
|
|
37
37
|
reindex : bool
|
|
38
38
|
If ``True``, reindex nodes and returns the original node indices as names.
|
|
39
39
|
Reindexing is enforced if nodes are not integers.
|
|
40
|
+
shape : tuple
|
|
41
|
+
Shape of the adjacency or biadjacency matrix.
|
|
42
|
+
If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
|
|
40
43
|
sum_duplicates : bool
|
|
41
44
|
If ``True`` (default), sums weights of duplicate edges.
|
|
42
45
|
Otherwise, the weight of each edge is that of the first occurrence of this edge.
|
|
@@ -83,12 +86,14 @@ def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = F
|
|
|
83
86
|
else:
|
|
84
87
|
raise TypeError('The edge list must be given as a NumPy array or a list of tuples.')
|
|
85
88
|
return from_edge_array(edge_array=edge_array, weights=weights, directed=directed, bipartite=bipartite,
|
|
86
|
-
weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
|
|
89
|
+
weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
|
|
90
|
+
matrix_only=matrix_only)
|
|
87
91
|
|
|
88
92
|
|
|
89
93
|
def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], directed: bool = False,
|
|
90
|
-
bipartite: bool = False, weighted: bool = True, reindex: bool =
|
|
91
|
-
sum_duplicates: bool = True, matrix_only: bool = None)
|
|
94
|
+
bipartite: bool = False, weighted: bool = True, reindex: bool = False,
|
|
95
|
+
shape: Optional[tuple] = None, sum_duplicates: bool = True, matrix_only: bool = None) \
|
|
96
|
+
-> Union[Dataset, sparse.csr_matrix]:
|
|
92
97
|
"""Load a graph from an adjacency list.
|
|
93
98
|
|
|
94
99
|
Parameters
|
|
@@ -104,6 +109,9 @@ def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], dire
|
|
|
104
109
|
reindex : bool
|
|
105
110
|
If ``True``, reindex nodes and returns the original node indices as names.
|
|
106
111
|
Reindexing is enforced if nodes are not integers.
|
|
112
|
+
shape : tuple
|
|
113
|
+
Shape of the adjacency or biadjacency matrix.
|
|
114
|
+
If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
|
|
107
115
|
sum_duplicates : bool
|
|
108
116
|
If ``True`` (default), sums weights of duplicate edges.
|
|
109
117
|
Otherwise, the weight of each edge is that of the first occurrence of this edge.
|
|
@@ -134,12 +142,12 @@ def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], dire
|
|
|
134
142
|
else:
|
|
135
143
|
raise TypeError('The adjacency list must be given as a list of lists or a dict of lists.')
|
|
136
144
|
return from_edge_list(edge_list=edge_list, directed=directed, bipartite=bipartite, weighted=weighted,
|
|
137
|
-
reindex=reindex, sum_duplicates=sum_duplicates, matrix_only=matrix_only)
|
|
145
|
+
reindex=reindex, shape=shape, sum_duplicates=sum_duplicates, matrix_only=matrix_only)
|
|
138
146
|
|
|
139
147
|
|
|
140
148
|
def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed: bool = False, bipartite: bool = False,
|
|
141
|
-
weighted: bool = True, reindex: bool =
|
|
142
|
-
matrix_only: bool = None) -> Union[
|
|
149
|
+
weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
|
|
150
|
+
sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
|
|
143
151
|
"""Load a graph from an edge array of shape (n_edges, 2) and weights (optional).
|
|
144
152
|
|
|
145
153
|
Parameters
|
|
@@ -157,6 +165,9 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
|
|
|
157
165
|
reindex : bool
|
|
158
166
|
If ``True``, reindex nodes and returns the original node indices as names.
|
|
159
167
|
Reindexing is enforced if nodes are not integers.
|
|
168
|
+
shape : tuple
|
|
169
|
+
Shape of the adjacency or biadjacency matrix.
|
|
170
|
+
If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
|
|
160
171
|
sum_duplicates : bool
|
|
161
172
|
If ``True`` (default), sums weights of duplicate edges.
|
|
162
173
|
Otherwise, the weight of each edge is that of the first occurrence of this edge.
|
|
@@ -191,32 +202,39 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
|
|
|
191
202
|
_, index = np.unique(edge_array, axis=0, return_index=True)
|
|
192
203
|
edge_array = edge_array[index]
|
|
193
204
|
weights = weights[index]
|
|
194
|
-
graph =
|
|
205
|
+
graph = Dataset()
|
|
195
206
|
if bipartite:
|
|
196
207
|
row = edge_array[:, 0]
|
|
197
208
|
col = edge_array[:, 1]
|
|
198
|
-
if row.dtype != int or
|
|
209
|
+
if row.dtype != int or reindex:
|
|
199
210
|
names_row, row = np.unique(row, return_inverse=True)
|
|
200
211
|
graph.names_row = names_row
|
|
201
212
|
graph.names = names_row
|
|
202
213
|
n_row = len(names_row)
|
|
214
|
+
elif shape is not None:
|
|
215
|
+
n_row = max(shape[0], max(row) + 1)
|
|
203
216
|
else:
|
|
204
217
|
n_row = max(row) + 1
|
|
205
|
-
if col.dtype != int or
|
|
218
|
+
if col.dtype != int or reindex:
|
|
206
219
|
names_col, col = np.unique(col, return_inverse=True)
|
|
207
220
|
graph.names_col = names_col
|
|
208
221
|
n_col = len(names_col)
|
|
222
|
+
elif shape is not None:
|
|
223
|
+
n_col = max(shape[1], max(col) + 1)
|
|
209
224
|
else:
|
|
210
225
|
n_col = max(col) + 1
|
|
211
226
|
matrix = sparse.csr_matrix((weights, (row, col)), shape=(n_row, n_col))
|
|
227
|
+
matrix.sum_duplicates()
|
|
212
228
|
graph.biadjacency = matrix
|
|
213
229
|
else:
|
|
214
230
|
nodes = edge_array.ravel()
|
|
215
|
-
if nodes.dtype != int or
|
|
231
|
+
if nodes.dtype != int or reindex:
|
|
216
232
|
names, nodes = np.unique(nodes, return_inverse=True)
|
|
217
233
|
graph.names = names
|
|
218
234
|
n = len(names)
|
|
219
235
|
edge_array = nodes.reshape(-1, 2)
|
|
236
|
+
elif shape is not None:
|
|
237
|
+
n = max(shape[0], max(nodes) + 1)
|
|
220
238
|
else:
|
|
221
239
|
n = max(nodes) + 1
|
|
222
240
|
row = edge_array[:, 0]
|
|
@@ -224,6 +242,7 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
|
|
|
224
242
|
matrix = sparse.csr_matrix((weights, (row, col)), shape=(n, n))
|
|
225
243
|
if not directed:
|
|
226
244
|
matrix = directed2undirected(matrix)
|
|
245
|
+
matrix.sum_duplicates()
|
|
227
246
|
graph.adjacency = matrix
|
|
228
247
|
if matrix_only or (matrix_only is None and len(graph) == 1):
|
|
229
248
|
return matrix
|
|
@@ -233,8 +252,8 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
|
|
|
233
252
|
|
|
234
253
|
def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: str = '#%',
|
|
235
254
|
data_structure: str = None, directed: bool = False, bipartite: bool = False, weighted: bool = True,
|
|
236
|
-
reindex: bool =
|
|
237
|
-
|
|
255
|
+
reindex: bool = False, shape: Optional[tuple] = None, sum_duplicates: bool = True,
|
|
256
|
+
matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
|
|
238
257
|
"""Load a graph from a CSV or TSV file.
|
|
239
258
|
The delimiter can be specified (e.g., ' ' for space-separated values).
|
|
240
259
|
|
|
@@ -249,9 +268,10 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
|
|
|
249
268
|
comments : str
|
|
250
269
|
Characters for comment lines.
|
|
251
270
|
data_structure : str
|
|
252
|
-
If 'edge_list',
|
|
253
|
-
If 'adjacency_list',
|
|
254
|
-
|
|
271
|
+
If 'edge_list', consider each row of the file as an edge (tuple of size 2 or 3).
|
|
272
|
+
If 'adjacency_list', consider each row of the file as an adjacency list (list of neighbors,
|
|
273
|
+
in the order of node indices; an empty line means no neighbor).
|
|
274
|
+
If 'adjacency_dict', consider each row of the file as an adjacency dictionary with key
|
|
255
275
|
given by the first column (node: list of neighbors).
|
|
256
276
|
If ``None`` (default), data_structure is guessed from the first rows of the file.
|
|
257
277
|
directed : bool
|
|
@@ -263,6 +283,9 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
|
|
|
263
283
|
reindex : bool
|
|
264
284
|
If ``True``, reindex nodes and returns the original node indices as names.
|
|
265
285
|
Reindexing is enforced if nodes are not integers.
|
|
286
|
+
shape : tuple
|
|
287
|
+
Shape of the adjacency or biadjacency matrix.
|
|
288
|
+
If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
|
|
266
289
|
sum_duplicates : bool
|
|
267
290
|
If ``True`` (default), sums weights of duplicate edges.
|
|
268
291
|
Otherwise, the weight of each edge is that of the first occurrence of this edge.
|
|
@@ -295,7 +318,7 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
|
|
|
295
318
|
else:
|
|
296
319
|
weights = None
|
|
297
320
|
return from_edge_array(edge_array=edge_array, weights=weights, directed=directed, bipartite=bipartite,
|
|
298
|
-
weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
|
|
321
|
+
weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
|
|
299
322
|
matrix_only=matrix_only)
|
|
300
323
|
except TypeError:
|
|
301
324
|
pass
|
|
@@ -306,17 +329,17 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
|
|
|
306
329
|
if data_structure == 'edge_list':
|
|
307
330
|
edge_list = [tuple(row) for row in csv_reader]
|
|
308
331
|
return from_edge_list(edge_list=edge_list, directed=directed, bipartite=bipartite,
|
|
309
|
-
weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
|
|
332
|
+
weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
|
|
310
333
|
matrix_only=matrix_only)
|
|
311
334
|
elif data_structure == 'adjacency_list':
|
|
312
335
|
adjacency_list = [row for row in csv_reader]
|
|
313
336
|
return from_adjacency_list(adjacency_list=adjacency_list, directed=directed, bipartite=bipartite,
|
|
314
|
-
weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
|
|
337
|
+
weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
|
|
315
338
|
matrix_only=matrix_only)
|
|
316
339
|
elif data_structure == 'adjacency_dict':
|
|
317
340
|
adjacency_list = {row[0]: row[1:] for row in csv_reader}
|
|
318
341
|
return from_adjacency_list(adjacency_list=adjacency_list, directed=directed, bipartite=bipartite,
|
|
319
|
-
weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
|
|
342
|
+
weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
|
|
320
343
|
matrix_only=matrix_only)
|
|
321
344
|
|
|
322
345
|
|
|
@@ -411,9 +434,9 @@ def load_header(file: str):
|
|
|
411
434
|
return directed, bipartite, weighted
|
|
412
435
|
|
|
413
436
|
|
|
414
|
-
def load_metadata(file: str, delimiter: str = ': ') ->
|
|
437
|
+
def load_metadata(file: str, delimiter: str = ': ') -> Dataset:
|
|
415
438
|
"""Extract metadata from the file."""
|
|
416
|
-
metadata =
|
|
439
|
+
metadata = Dataset()
|
|
417
440
|
with open(file, 'r', encoding='utf-8') as f:
|
|
418
441
|
for row in f:
|
|
419
442
|
parts = row.split(delimiter)
|
|
@@ -422,7 +445,7 @@ def load_metadata(file: str, delimiter: str = ': ') -> Bunch:
|
|
|
422
445
|
return metadata
|
|
423
446
|
|
|
424
447
|
|
|
425
|
-
def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: int = 512) ->
|
|
448
|
+
def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: int = 512) -> Dataset:
|
|
426
449
|
"""Load graph from GraphML file.
|
|
427
450
|
|
|
428
451
|
Hyperedges and nested graphs are not supported.
|
|
@@ -438,7 +461,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
|
|
|
438
461
|
|
|
439
462
|
Returns
|
|
440
463
|
-------
|
|
441
|
-
data: :class:`
|
|
464
|
+
data: :class:`Dataset`
|
|
442
465
|
The dataset in a Dataset with the adjacency as a CSR matrix.
|
|
443
466
|
"""
|
|
444
467
|
# see http://graphml.graphdrawing.org/primer/graphml-primer.html
|
|
@@ -454,12 +477,12 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
|
|
|
454
477
|
# indices in the graph tree
|
|
455
478
|
node_indices = []
|
|
456
479
|
edge_indices = []
|
|
457
|
-
data =
|
|
480
|
+
data = Dataset()
|
|
458
481
|
graph = None
|
|
459
482
|
file_description = None
|
|
460
|
-
attribute_descriptions =
|
|
461
|
-
attribute_descriptions.node =
|
|
462
|
-
attribute_descriptions.edge =
|
|
483
|
+
attribute_descriptions = Dataset()
|
|
484
|
+
attribute_descriptions.node = Dataset()
|
|
485
|
+
attribute_descriptions.edge = Dataset()
|
|
463
486
|
keys = {}
|
|
464
487
|
for file_element in tree.getroot():
|
|
465
488
|
if file_element.tag.endswith('graph'):
|
|
@@ -497,7 +520,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
|
|
|
497
520
|
if file_element.attrib['for'] == 'node':
|
|
498
521
|
size = n_nodes
|
|
499
522
|
if 'node_attribute' not in data:
|
|
500
|
-
data.node_attribute =
|
|
523
|
+
data.node_attribute = Dataset()
|
|
501
524
|
for key_element in file_element:
|
|
502
525
|
if key_element.tag.endswith('desc'):
|
|
503
526
|
attribute_descriptions.node[attribute_name] = key_element.text
|
|
@@ -514,7 +537,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
|
|
|
514
537
|
elif file_element.attrib['for'] == 'edge':
|
|
515
538
|
size = n_edges
|
|
516
539
|
if 'edge_attribute' not in data:
|
|
517
|
-
data.edge_attribute =
|
|
540
|
+
data.edge_attribute = Dataset()
|
|
518
541
|
for key_element in file_element:
|
|
519
542
|
if key_element.tag.endswith('desc'):
|
|
520
543
|
attribute_descriptions.edge[attribute_name] = key_element.text
|
|
@@ -532,7 +555,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
|
|
|
532
555
|
elif file_element.tag.endswith('desc'):
|
|
533
556
|
file_description = file_element.text
|
|
534
557
|
if file_description or attribute_descriptions.node or attribute_descriptions.edge:
|
|
535
|
-
data.meta =
|
|
558
|
+
data.meta = Dataset()
|
|
536
559
|
if file_description:
|
|
537
560
|
data.meta['description'] = file_description
|
|
538
561
|
if attribute_descriptions.node or attribute_descriptions.edge:
|
sknetwork/data/tests/test_API.py
CHANGED
|
@@ -8,7 +8,7 @@ import warnings
|
|
|
8
8
|
|
|
9
9
|
from sknetwork.data.load import *
|
|
10
10
|
from sknetwork.data.toy_graphs import *
|
|
11
|
-
from sknetwork.data import
|
|
11
|
+
from sknetwork.data import Dataset
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class TestDataAPI(unittest.TestCase):
|
|
@@ -17,14 +17,14 @@ class TestDataAPI(unittest.TestCase):
|
|
|
17
17
|
toy_graphs = [karate_club, painters, bow_tie, house, miserables]
|
|
18
18
|
for toy_graph in toy_graphs:
|
|
19
19
|
self.assertEqual(type(toy_graph()), sparse.csr_matrix)
|
|
20
|
-
self.assertEqual(type(toy_graph(metadata=True)),
|
|
20
|
+
self.assertEqual(type(toy_graph(metadata=True)), Dataset)
|
|
21
21
|
|
|
22
22
|
def test_load(self):
|
|
23
23
|
tmp_data_dir = tempfile.gettempdir() + '/stub'
|
|
24
24
|
clear_data_home(tmp_data_dir)
|
|
25
25
|
try:
|
|
26
26
|
graph = load_netset('stub', tmp_data_dir)
|
|
27
|
-
self.assertEqual(type(graph),
|
|
27
|
+
self.assertEqual(type(graph), Dataset)
|
|
28
28
|
except URLError: # pragma: no cover
|
|
29
29
|
warnings.warn('Could not reach NetSet. Corresponding test has not been performed.', RuntimeWarning)
|
|
30
30
|
return
|
|
@@ -3,12 +3,12 @@
|
|
|
3
3
|
|
|
4
4
|
import unittest
|
|
5
5
|
|
|
6
|
-
from sknetwork.data.base import
|
|
6
|
+
from sknetwork.data.base import Dataset
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class TestDataset(unittest.TestCase):
|
|
10
10
|
|
|
11
11
|
def test(self):
|
|
12
|
-
dataset =
|
|
12
|
+
dataset = Dataset(name='dataset')
|
|
13
13
|
self.assertEqual(dataset.name, 'dataset')
|
|
14
14
|
self.assertEqual(dataset['name'], 'dataset')
|
|
@@ -20,6 +20,10 @@ class TestParser(unittest.TestCase):
|
|
|
20
20
|
self.assertTrue((adjacency.indices == [2, 3, 0, 1, 5, 4]).all())
|
|
21
21
|
self.assertTrue((adjacency.indptr == [0, 1, 2, 3, 4, 5, 6]).all())
|
|
22
22
|
self.assertTrue((adjacency.data == [1, 1, 1, 1, 1, 1]).all())
|
|
23
|
+
adjacency = parse.from_csv(self.stub_data_1, shape=(7, 7))
|
|
24
|
+
self.assertTrue((adjacency.shape == (7, 7)))
|
|
25
|
+
biadjacency = parse.from_csv(self.stub_data_1, bipartite=True, shape=(7, 9))
|
|
26
|
+
self.assertTrue((biadjacency.shape == (7, 9)))
|
|
23
27
|
remove(self.stub_data_1)
|
|
24
28
|
|
|
25
29
|
def test_labeled_weighted(self):
|
|
@@ -33,13 +37,14 @@ class TestParser(unittest.TestCase):
|
|
|
33
37
|
self.assertTrue((adjacency.indptr == [0, 1, 2, 3, 4, 5, 6]).all())
|
|
34
38
|
self.assertTrue((adjacency.data == [1, 6, 5, 6, 1, 5]).all())
|
|
35
39
|
self.assertTrue((names == [' b', ' d', ' e', 'a', 'c', 'f']).all())
|
|
40
|
+
|
|
36
41
|
remove(self.stub_data_2)
|
|
37
42
|
|
|
38
43
|
def test_auto_reindex(self):
|
|
39
44
|
self.stub_data_4 = 'stub_4.txt'
|
|
40
45
|
with open(self.stub_data_4, "w") as text_file:
|
|
41
46
|
text_file.write('%stub\n14 31\n42 50\n0 12')
|
|
42
|
-
graph = parse.from_csv(self.stub_data_4)
|
|
47
|
+
graph = parse.from_csv(self.stub_data_4, reindex=True)
|
|
43
48
|
adjacency = graph.adjacency
|
|
44
49
|
names = graph.names
|
|
45
50
|
self.assertTrue((adjacency.data == [1, 1, 1, 1, 1, 1]).all())
|
|
@@ -164,23 +169,15 @@ class TestParser(unittest.TestCase):
|
|
|
164
169
|
self.stub_data_9 = 'stub_9.txt'
|
|
165
170
|
with open(self.stub_data_9, "w") as text_file:
|
|
166
171
|
text_file.write('#stub\n1 3\n4 5\n0 3')
|
|
167
|
-
graph = parse.from_csv(self.stub_data_9, bipartite=True)
|
|
172
|
+
graph = parse.from_csv(self.stub_data_9, bipartite=True, reindex=True)
|
|
168
173
|
biadjacency = graph.biadjacency
|
|
169
174
|
self.assertTrue((biadjacency.indices == [0, 0, 1]).all())
|
|
170
175
|
self.assertTrue((biadjacency.indptr == [0, 1, 2, 3]).all())
|
|
171
176
|
self.assertTrue((biadjacency.data == [1, 1, 1]).all())
|
|
177
|
+
biadjacency = parse.from_csv(self.stub_data_9, bipartite=True)
|
|
178
|
+
self.assertTrue(biadjacency.shape == (5, 6))
|
|
172
179
|
remove(self.stub_data_9)
|
|
173
180
|
|
|
174
|
-
def test_csv_adjacency_bipartite(self):
|
|
175
|
-
self.stub_data_10 = 'stub_10.txt'
|
|
176
|
-
with open(self.stub_data_10, "w") as text_file:
|
|
177
|
-
text_file.write('%stub\n3\n3\n0')
|
|
178
|
-
graph = parse.from_csv(self.stub_data_10, bipartite=True)
|
|
179
|
-
biadjacency = graph.biadjacency
|
|
180
|
-
self.assertTupleEqual(biadjacency.shape, (3, 2))
|
|
181
|
-
self.assertTrue((biadjacency.data == [1, 1, 1]).all())
|
|
182
|
-
remove(self.stub_data_10)
|
|
183
|
-
|
|
184
181
|
def test_edge_list(self):
|
|
185
182
|
edge_list_1 = [('Alice', 'Bob'), ('Carol', 'Alice')]
|
|
186
183
|
graph = parse.from_edge_list(edge_list_1)
|
|
@@ -16,22 +16,22 @@ class TestToys(unittest.TestCase):
|
|
|
16
16
|
adjacency = house()
|
|
17
17
|
self.assertEqual(adjacency.shape, (5, 5))
|
|
18
18
|
|
|
19
|
-
|
|
20
|
-
self.assertEqual(
|
|
19
|
+
dataset = house(metadata=True)
|
|
20
|
+
self.assertEqual(dataset.position.shape, (5, 2))
|
|
21
21
|
|
|
22
22
|
adjacency = bow_tie()
|
|
23
23
|
self.assertEqual(adjacency.shape, (5, 5))
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
self.assertEqual(
|
|
25
|
+
dataset = bow_tie(metadata=True)
|
|
26
|
+
self.assertEqual(dataset.position.shape, (5, 2))
|
|
27
27
|
|
|
28
|
-
|
|
29
|
-
self.assertEqual(
|
|
30
|
-
self.assertEqual(len(
|
|
28
|
+
dataset = karate_club(True)
|
|
29
|
+
self.assertEqual(dataset.adjacency.shape, (34, 34))
|
|
30
|
+
self.assertEqual(len(dataset.labels), 34)
|
|
31
31
|
|
|
32
|
-
|
|
33
|
-
self.assertEqual(
|
|
34
|
-
self.assertEqual(len(
|
|
32
|
+
dataset = miserables(True)
|
|
33
|
+
self.assertEqual(dataset.adjacency.shape, (77, 77))
|
|
34
|
+
self.assertEqual(len(dataset.names), 77)
|
|
35
35
|
|
|
36
36
|
def test_directed(self):
|
|
37
37
|
adjacency = painters()
|
|
@@ -40,29 +40,29 @@ class TestToys(unittest.TestCase):
|
|
|
40
40
|
adjacency = art_philo_science()
|
|
41
41
|
self.assertEqual(adjacency.shape, (30, 30))
|
|
42
42
|
|
|
43
|
-
|
|
44
|
-
self.assertEqual(
|
|
45
|
-
self.assertEqual(len(
|
|
43
|
+
dataset = painters(True)
|
|
44
|
+
self.assertEqual(dataset.adjacency.shape, (14, 14))
|
|
45
|
+
self.assertEqual(len(dataset.names), 14)
|
|
46
46
|
|
|
47
|
-
|
|
48
|
-
self.assertEqual(
|
|
49
|
-
self.assertEqual(len(
|
|
47
|
+
dataset = art_philo_science(True)
|
|
48
|
+
self.assertEqual(dataset.adjacency.shape, (30, 30))
|
|
49
|
+
self.assertEqual(len(dataset.names), 30)
|
|
50
50
|
|
|
51
51
|
def test_bipartite(self):
|
|
52
|
-
|
|
53
|
-
self.assertEqual(
|
|
54
|
-
self.assertEqual(len(
|
|
55
|
-
self.assertEqual(len(
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
self.assertEqual(
|
|
59
|
-
self.assertEqual(len(
|
|
60
|
-
self.assertEqual(len(
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
self.assertEqual(
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
self.assertEqual(
|
|
67
|
-
self.assertEqual(len(
|
|
68
|
-
self.assertEqual(len(
|
|
52
|
+
dataset = star_wars(True)
|
|
53
|
+
self.assertEqual(dataset.biadjacency.shape, (4, 3))
|
|
54
|
+
self.assertEqual(len(dataset.names), 4)
|
|
55
|
+
self.assertEqual(len(dataset.names_col), 3)
|
|
56
|
+
|
|
57
|
+
dataset = movie_actor(True)
|
|
58
|
+
self.assertEqual(dataset.biadjacency.shape, (15, 17))
|
|
59
|
+
self.assertEqual(len(dataset.names), 15)
|
|
60
|
+
self.assertEqual(len(dataset.names_col), 17)
|
|
61
|
+
|
|
62
|
+
dataset = hourglass(True)
|
|
63
|
+
self.assertEqual(dataset.biadjacency.shape, (2, 2))
|
|
64
|
+
|
|
65
|
+
dataset = art_philo_science(True)
|
|
66
|
+
self.assertEqual(dataset.biadjacency.shape, (30, 11))
|
|
67
|
+
self.assertEqual(len(dataset.names), 30)
|
|
68
|
+
self.assertEqual(len(dataset.names_col), 11)
|