scikit-network 0.32.1__cp311-cp311-win_amd64.whl → 0.33.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- {scikit_network-0.32.1.dist-info → scikit_network-0.33.0.dist-info}/AUTHORS.rst +0 -1
- {scikit_network-0.32.1.dist-info → scikit_network-0.33.0.dist-info}/METADATA +9 -3
- {scikit_network-0.32.1.dist-info → scikit_network-0.33.0.dist-info}/RECORD +60 -60
- {scikit_network-0.32.1.dist-info → scikit_network-0.33.0.dist-info}/WHEEL +1 -1
- sknetwork/__init__.py +1 -1
- sknetwork/classification/diffusion.py +4 -3
- sknetwork/classification/knn.py +4 -3
- sknetwork/classification/metrics.py +3 -3
- sknetwork/classification/propagation.py +6 -5
- sknetwork/classification/vote.cp311-win_amd64.pyd +0 -0
- sknetwork/classification/vote.cpp +1 -1
- sknetwork/clustering/leiden.py +2 -1
- sknetwork/clustering/leiden_core.cp311-win_amd64.pyd +0 -0
- sknetwork/clustering/leiden_core.cpp +1 -1
- sknetwork/clustering/louvain.py +3 -3
- sknetwork/clustering/louvain_core.cp311-win_amd64.pyd +0 -0
- sknetwork/clustering/louvain_core.cpp +1 -1
- sknetwork/clustering/tests/test_kcenters.py +5 -37
- sknetwork/data/__init__.py +1 -1
- sknetwork/data/base.py +7 -2
- sknetwork/data/load.py +18 -21
- sknetwork/data/models.py +15 -15
- sknetwork/data/parse.py +19 -17
- sknetwork/data/tests/test_API.py +3 -3
- sknetwork/data/tests/test_base.py +2 -2
- sknetwork/data/tests/test_toy_graphs.py +33 -33
- sknetwork/data/toy_graphs.py +35 -43
- sknetwork/embedding/base.py +3 -0
- sknetwork/embedding/louvain_embedding.py +0 -26
- sknetwork/embedding/svd.py +0 -4
- sknetwork/embedding/tests/test_louvain_embedding.py +9 -4
- sknetwork/embedding/tests/test_svd.py +6 -0
- sknetwork/gnn/gnn_classifier.py +1 -1
- sknetwork/hierarchy/louvain_hierarchy.py +10 -6
- sknetwork/hierarchy/paris.cp311-win_amd64.pyd +0 -0
- sknetwork/hierarchy/paris.cpp +757 -755
- sknetwork/hierarchy/paris.pyx +4 -3
- sknetwork/hierarchy/tests/test_metrics.py +4 -4
- sknetwork/linalg/diteration.cp311-win_amd64.pyd +0 -0
- sknetwork/linalg/diteration.cpp +1 -1
- sknetwork/linalg/push.cp311-win_amd64.pyd +0 -0
- sknetwork/linalg/push.cpp +123 -123
- sknetwork/ranking/betweenness.cp311-win_amd64.pyd +0 -0
- sknetwork/ranking/betweenness.cpp +1 -1
- sknetwork/regression/diffusion.py +6 -4
- sknetwork/topology/cliques.cp311-win_amd64.pyd +0 -0
- sknetwork/topology/cliques.cpp +123 -123
- sknetwork/topology/core.cp311-win_amd64.pyd +0 -0
- sknetwork/topology/core.cpp +123 -123
- sknetwork/topology/minheap.cp311-win_amd64.pyd +0 -0
- sknetwork/topology/minheap.cpp +1 -1
- sknetwork/topology/triangles.cp311-win_amd64.pyd +0 -0
- sknetwork/topology/triangles.cpp +1 -1
- sknetwork/topology/weisfeiler_lehman_core.cp311-win_amd64.pyd +0 -0
- sknetwork/topology/weisfeiler_lehman_core.cpp +1 -1
- sknetwork/utils/__init__.py +1 -1
- sknetwork/utils/values.py +5 -3
- sknetwork/visualization/graphs.py +1 -1
- {scikit_network-0.32.1.dist-info → scikit_network-0.33.0.dist-info}/LICENSE +0 -0
- {scikit_network-0.32.1.dist-info → scikit_network-0.33.0.dist-info}/top_level.txt +0 -0
sknetwork/data/load.py
CHANGED
|
@@ -19,15 +19,12 @@ import numpy as np
|
|
|
19
19
|
from scipy import sparse
|
|
20
20
|
|
|
21
21
|
from sknetwork.data.parse import from_csv, load_labels, load_header, load_metadata
|
|
22
|
-
from sknetwork.data.base import
|
|
22
|
+
from sknetwork.data.base import Dataset
|
|
23
23
|
from sknetwork.utils.check import is_square
|
|
24
24
|
from sknetwork.log import Log
|
|
25
25
|
|
|
26
26
|
NETSET_URL = 'https://netset.telecom-paris.fr'
|
|
27
27
|
|
|
28
|
-
# former name of Dataset
|
|
29
|
-
Bunch = Bunch
|
|
30
|
-
|
|
31
28
|
|
|
32
29
|
def is_within_directory(directory, target):
|
|
33
30
|
"""Utility function."""
|
|
@@ -89,7 +86,7 @@ def clean_data_home(data_home: Optional[Union[str, Path]] = None):
|
|
|
89
86
|
|
|
90
87
|
|
|
91
88
|
def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]] = None,
|
|
92
|
-
verbose: bool = True) -> Optional[
|
|
89
|
+
verbose: bool = True) -> Optional[Dataset]:
|
|
93
90
|
"""Load a dataset from the `NetSet collection
|
|
94
91
|
<https://netset.telecom-paris.fr/>`_.
|
|
95
92
|
|
|
@@ -105,10 +102,10 @@ def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]
|
|
|
105
102
|
|
|
106
103
|
Returns
|
|
107
104
|
-------
|
|
108
|
-
dataset : :class:`
|
|
105
|
+
dataset : :class:`Dataset`
|
|
109
106
|
Returned dataset.
|
|
110
107
|
"""
|
|
111
|
-
dataset =
|
|
108
|
+
dataset = Dataset()
|
|
112
109
|
dataset_folder = NETSET_URL + '/datasets/'
|
|
113
110
|
folder_npz = NETSET_URL + '/datasets_npz/'
|
|
114
111
|
|
|
@@ -167,7 +164,7 @@ def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]
|
|
|
167
164
|
|
|
168
165
|
|
|
169
166
|
def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_numpy_bundle: bool = True,
|
|
170
|
-
verbose: bool = True) ->
|
|
167
|
+
verbose: bool = True) -> Dataset:
|
|
171
168
|
"""Load a dataset from the `Konect database
|
|
172
169
|
<http://konect.cc/networks/>`_.
|
|
173
170
|
|
|
@@ -186,7 +183,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
|
|
|
186
183
|
|
|
187
184
|
Returns
|
|
188
185
|
-------
|
|
189
|
-
dataset : :class:`
|
|
186
|
+
dataset : :class:`Dataset`
|
|
190
187
|
Object with the following attributes:
|
|
191
188
|
|
|
192
189
|
* `adjacency` or `biadjacency`: the adjacency/biadjacency matrix for the dataset
|
|
@@ -240,7 +237,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
|
|
|
240
237
|
logger.print_log('Loading from local bundle...')
|
|
241
238
|
return load_from_numpy_bundle(name + '_bundle', data_path)
|
|
242
239
|
|
|
243
|
-
dataset =
|
|
240
|
+
dataset = Dataset()
|
|
244
241
|
path = data_konect / name / name
|
|
245
242
|
if not path.exists() or len(listdir(path)) == 0:
|
|
246
243
|
raise Exception("No data downloaded.")
|
|
@@ -269,7 +266,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
|
|
|
269
266
|
else:
|
|
270
267
|
dataset.meta.name = name
|
|
271
268
|
else:
|
|
272
|
-
dataset.meta =
|
|
269
|
+
dataset.meta = Dataset()
|
|
273
270
|
dataset.meta.name = name
|
|
274
271
|
|
|
275
272
|
if auto_numpy_bundle:
|
|
@@ -280,12 +277,12 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
|
|
|
280
277
|
return dataset
|
|
281
278
|
|
|
282
279
|
|
|
283
|
-
def save_to_numpy_bundle(data:
|
|
280
|
+
def save_to_numpy_bundle(data: Dataset, bundle_name: str, data_home: Optional[Union[str, Path]] = None):
|
|
284
281
|
"""Save a dataset in the specified data home to a collection of Numpy and Pickle files for faster subsequent loads.
|
|
285
282
|
|
|
286
283
|
Parameters
|
|
287
284
|
----------
|
|
288
|
-
data:
|
|
285
|
+
data: Dataset
|
|
289
286
|
Data to save.
|
|
290
287
|
bundle_name: str
|
|
291
288
|
Name to be used for the bundle folder.
|
|
@@ -317,7 +314,7 @@ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path
|
|
|
317
314
|
|
|
318
315
|
Returns
|
|
319
316
|
-------
|
|
320
|
-
data:
|
|
317
|
+
data: Dataset
|
|
321
318
|
Data.
|
|
322
319
|
"""
|
|
323
320
|
data_home = get_data_home(data_home)
|
|
@@ -326,7 +323,7 @@ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path
|
|
|
326
323
|
raise FileNotFoundError('No bundle at ' + str(data_path))
|
|
327
324
|
else:
|
|
328
325
|
files = listdir(data_path)
|
|
329
|
-
data =
|
|
326
|
+
data = Dataset()
|
|
330
327
|
for file in files:
|
|
331
328
|
if len(file.split('.')) == 2:
|
|
332
329
|
file_name, file_extension = file.split('.')
|
|
@@ -340,7 +337,7 @@ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path
|
|
|
340
337
|
return data
|
|
341
338
|
|
|
342
339
|
|
|
343
|
-
def save(folder: Union[str, Path], data: Union[sparse.csr_matrix,
|
|
340
|
+
def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Dataset]):
|
|
344
341
|
"""Save a dataset or a CSR matrix in the current directory to a collection of Numpy and Pickle files for faster
|
|
345
342
|
subsequent loads. Supported attribute types include sparse matrices, NumPy arrays, strings and objects Dataset.
|
|
346
343
|
|
|
@@ -348,13 +345,13 @@ def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
|
|
|
348
345
|
----------
|
|
349
346
|
folder : str or :class:`pathlib.Path`
|
|
350
347
|
Name of the bundle folder.
|
|
351
|
-
data : Union[sparse.csr_matrix,
|
|
348
|
+
data : Union[sparse.csr_matrix, Dataset]
|
|
352
349
|
Data to save.
|
|
353
350
|
|
|
354
351
|
Example
|
|
355
352
|
-------
|
|
356
353
|
>>> from sknetwork.data import save
|
|
357
|
-
>>> dataset =
|
|
354
|
+
>>> dataset = Dataset()
|
|
358
355
|
>>> dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
|
|
359
356
|
>>> dataset.names = np.array(['a', 'b', 'c'])
|
|
360
357
|
>>> save('dataset', dataset)
|
|
@@ -366,7 +363,7 @@ def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
|
|
|
366
363
|
if folder.exists():
|
|
367
364
|
shutil.rmtree(folder)
|
|
368
365
|
if isinstance(data, sparse.csr_matrix):
|
|
369
|
-
dataset =
|
|
366
|
+
dataset = Dataset()
|
|
370
367
|
if is_square(data):
|
|
371
368
|
dataset.adjacency = data
|
|
372
369
|
else:
|
|
@@ -388,13 +385,13 @@ def load(folder: Union[str, Path]):
|
|
|
388
385
|
|
|
389
386
|
Returns
|
|
390
387
|
-------
|
|
391
|
-
data:
|
|
388
|
+
data: Dataset
|
|
392
389
|
Data.
|
|
393
390
|
|
|
394
391
|
Example
|
|
395
392
|
-------
|
|
396
393
|
>>> from sknetwork.data import save
|
|
397
|
-
>>> dataset =
|
|
394
|
+
>>> dataset = Dataset()
|
|
398
395
|
>>> dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
|
|
399
396
|
>>> dataset.names = np.array(['a', 'b', 'c'])
|
|
400
397
|
>>> save('dataset', dataset)
|
sknetwork/data/models.py
CHANGED
|
@@ -12,7 +12,7 @@ from typing import Union, Optional, Iterable
|
|
|
12
12
|
import numpy as np
|
|
13
13
|
from scipy import sparse
|
|
14
14
|
|
|
15
|
-
from sknetwork.data.base import
|
|
15
|
+
from sknetwork.data.base import Dataset
|
|
16
16
|
from sknetwork.data.parse import from_edge_list
|
|
17
17
|
from sknetwork.utils.check import check_random_state
|
|
18
18
|
from sknetwork.utils.format import directed2undirected
|
|
@@ -20,7 +20,7 @@ from sknetwork.utils.format import directed2undirected
|
|
|
20
20
|
|
|
21
21
|
def block_model(sizes: Iterable, p_in: Union[float, list, np.ndarray] = .2, p_out: float = .05,
|
|
22
22
|
directed: bool = False, self_loops: bool = False, metadata: bool = False, seed: Optional[int] = None) \
|
|
23
|
-
-> Union[sparse.csr_matrix,
|
|
23
|
+
-> Union[sparse.csr_matrix, Dataset]:
|
|
24
24
|
"""Stochastic block model.
|
|
25
25
|
|
|
26
26
|
Parameters
|
|
@@ -83,7 +83,7 @@ def block_model(sizes: Iterable, p_in: Union[float, list, np.ndarray] = .2, p_ou
|
|
|
83
83
|
else:
|
|
84
84
|
adjacency = directed2undirected(sparse.csr_matrix(sparse.triu(adjacency)), weighted=False)
|
|
85
85
|
if metadata:
|
|
86
|
-
graph =
|
|
86
|
+
graph = Dataset()
|
|
87
87
|
graph.adjacency = adjacency
|
|
88
88
|
labels = np.repeat(np.arange(len(sizes)), sizes)
|
|
89
89
|
graph.labels = labels
|
|
@@ -129,7 +129,7 @@ def erdos_renyi(n: int = 20, p: float = .3, directed: bool = False, self_loops:
|
|
|
129
129
|
return block_model([n], p, 0., directed=directed, self_loops=self_loops, metadata=False, seed=seed)
|
|
130
130
|
|
|
131
131
|
|
|
132
|
-
def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
132
|
+
def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
|
|
133
133
|
"""Linear graph (directed).
|
|
134
134
|
|
|
135
135
|
Parameters
|
|
@@ -158,7 +158,7 @@ def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
|
|
|
158
158
|
if metadata:
|
|
159
159
|
x = np.arange(n)
|
|
160
160
|
y = np.zeros(n)
|
|
161
|
-
graph =
|
|
161
|
+
graph = Dataset()
|
|
162
162
|
graph.adjacency = adjacency
|
|
163
163
|
graph.position = np.array((x, y)).T
|
|
164
164
|
return graph
|
|
@@ -166,7 +166,7 @@ def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
|
|
|
166
166
|
return adjacency
|
|
167
167
|
|
|
168
168
|
|
|
169
|
-
def linear_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
169
|
+
def linear_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
|
|
170
170
|
"""Linear graph (undirected).
|
|
171
171
|
|
|
172
172
|
Parameters
|
|
@@ -218,7 +218,7 @@ def cyclic_position(n: int) -> np.ndarray:
|
|
|
218
218
|
return position
|
|
219
219
|
|
|
220
220
|
|
|
221
|
-
def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
221
|
+
def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
|
|
222
222
|
"""Cyclic graph (directed).
|
|
223
223
|
|
|
224
224
|
Parameters
|
|
@@ -245,7 +245,7 @@ def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
|
|
|
245
245
|
adjacency = sparse.csr_matrix((np.ones(len(row), dtype=int), (row, col)), shape=(n, n))
|
|
246
246
|
|
|
247
247
|
if metadata:
|
|
248
|
-
graph =
|
|
248
|
+
graph = Dataset()
|
|
249
249
|
graph.adjacency = adjacency
|
|
250
250
|
graph.position = cyclic_position(n)
|
|
251
251
|
return graph
|
|
@@ -253,7 +253,7 @@ def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
|
|
|
253
253
|
return adjacency
|
|
254
254
|
|
|
255
255
|
|
|
256
|
-
def cyclic_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
256
|
+
def cyclic_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
|
|
257
257
|
"""Cyclic graph (undirected).
|
|
258
258
|
|
|
259
259
|
Parameters
|
|
@@ -283,7 +283,7 @@ def cyclic_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
|
283
283
|
return graph.adjacency
|
|
284
284
|
|
|
285
285
|
|
|
286
|
-
def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
286
|
+
def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
|
|
287
287
|
"""Grid (undirected).
|
|
288
288
|
|
|
289
289
|
Parameters
|
|
@@ -312,7 +312,7 @@ def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr
|
|
|
312
312
|
edges = list(map(lambda edge: (node_id[edge[0]], node_id[edge[1]]), edges))
|
|
313
313
|
adjacency = from_edge_list(edges, reindex=False, matrix_only=True)
|
|
314
314
|
if metadata:
|
|
315
|
-
graph =
|
|
315
|
+
graph = Dataset()
|
|
316
316
|
graph.adjacency = adjacency
|
|
317
317
|
graph.position = np.array(nodes)
|
|
318
318
|
return graph
|
|
@@ -320,7 +320,7 @@ def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr
|
|
|
320
320
|
return adjacency
|
|
321
321
|
|
|
322
322
|
|
|
323
|
-
def star(n_branches: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
323
|
+
def star(n_branches: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
|
|
324
324
|
"""Star (undirected).
|
|
325
325
|
|
|
326
326
|
Parameters
|
|
@@ -345,7 +345,7 @@ def star(n_branches: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix
|
|
|
345
345
|
edges = [(0, i+1) for i in range(n_branches)]
|
|
346
346
|
adjacency = from_edge_list(edges, reindex=False, matrix_only=True)
|
|
347
347
|
if metadata:
|
|
348
|
-
graph =
|
|
348
|
+
graph = Dataset()
|
|
349
349
|
graph.adjacency = adjacency
|
|
350
350
|
angles = 2 * np.pi * np.arange(n_branches) / n_branches
|
|
351
351
|
x = [0] + list(np.cos(angles))
|
|
@@ -402,7 +402,7 @@ def albert_barabasi(n: int = 100, degree: int = 3, directed: bool = False, seed:
|
|
|
402
402
|
|
|
403
403
|
|
|
404
404
|
def watts_strogatz(n: int = 100, degree: int = 6, prob: float = 0.05, seed: Optional[int] = None,
|
|
405
|
-
metadata: bool = False) -> Union[sparse.csr_matrix,
|
|
405
|
+
metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
|
|
406
406
|
"""Watts-Strogatz model.
|
|
407
407
|
|
|
408
408
|
Parameters
|
|
@@ -451,7 +451,7 @@ def watts_strogatz(n: int = 100, degree: int = 6, prob: float = 0.05, seed: Opti
|
|
|
451
451
|
adjacency[j, i] = 0
|
|
452
452
|
adjacency = sparse.csr_matrix(adjacency, shape=adjacency.shape)
|
|
453
453
|
if metadata:
|
|
454
|
-
graph =
|
|
454
|
+
graph = Dataset()
|
|
455
455
|
graph.adjacency = adjacency
|
|
456
456
|
graph.position = cyclic_position(n)
|
|
457
457
|
return graph
|
sknetwork/data/parse.py
CHANGED
|
@@ -14,13 +14,13 @@ from xml.etree import ElementTree
|
|
|
14
14
|
import numpy as np
|
|
15
15
|
from scipy import sparse
|
|
16
16
|
|
|
17
|
-
from sknetwork.data.base import
|
|
17
|
+
from sknetwork.data.base import Dataset
|
|
18
18
|
from sknetwork.utils.format import directed2undirected
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = False,
|
|
22
22
|
bipartite: bool = False, weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
|
|
23
|
-
sum_duplicates: bool = True, matrix_only: bool = None) -> Union[
|
|
23
|
+
sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
|
|
24
24
|
"""Load a graph from an edge list.
|
|
25
25
|
|
|
26
26
|
Parameters
|
|
@@ -93,7 +93,7 @@ def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = F
|
|
|
93
93
|
def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], directed: bool = False,
|
|
94
94
|
bipartite: bool = False, weighted: bool = True, reindex: bool = False,
|
|
95
95
|
shape: Optional[tuple] = None, sum_duplicates: bool = True, matrix_only: bool = None) \
|
|
96
|
-
-> Union[
|
|
96
|
+
-> Union[Dataset, sparse.csr_matrix]:
|
|
97
97
|
"""Load a graph from an adjacency list.
|
|
98
98
|
|
|
99
99
|
Parameters
|
|
@@ -147,7 +147,7 @@ def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], dire
|
|
|
147
147
|
|
|
148
148
|
def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed: bool = False, bipartite: bool = False,
|
|
149
149
|
weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
|
|
150
|
-
sum_duplicates: bool = True, matrix_only: bool = None) -> Union[
|
|
150
|
+
sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
|
|
151
151
|
"""Load a graph from an edge array of shape (n_edges, 2) and weights (optional).
|
|
152
152
|
|
|
153
153
|
Parameters
|
|
@@ -202,7 +202,7 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
|
|
|
202
202
|
_, index = np.unique(edge_array, axis=0, return_index=True)
|
|
203
203
|
edge_array = edge_array[index]
|
|
204
204
|
weights = weights[index]
|
|
205
|
-
graph =
|
|
205
|
+
graph = Dataset()
|
|
206
206
|
if bipartite:
|
|
207
207
|
row = edge_array[:, 0]
|
|
208
208
|
col = edge_array[:, 1]
|
|
@@ -224,6 +224,7 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
|
|
|
224
224
|
else:
|
|
225
225
|
n_col = max(col) + 1
|
|
226
226
|
matrix = sparse.csr_matrix((weights, (row, col)), shape=(n_row, n_col))
|
|
227
|
+
matrix.sum_duplicates()
|
|
227
228
|
graph.biadjacency = matrix
|
|
228
229
|
else:
|
|
229
230
|
nodes = edge_array.ravel()
|
|
@@ -241,6 +242,7 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
|
|
|
241
242
|
matrix = sparse.csr_matrix((weights, (row, col)), shape=(n, n))
|
|
242
243
|
if not directed:
|
|
243
244
|
matrix = directed2undirected(matrix)
|
|
245
|
+
matrix.sum_duplicates()
|
|
244
246
|
graph.adjacency = matrix
|
|
245
247
|
if matrix_only or (matrix_only is None and len(graph) == 1):
|
|
246
248
|
return matrix
|
|
@@ -251,7 +253,7 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
|
|
|
251
253
|
def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: str = '#%',
|
|
252
254
|
data_structure: str = None, directed: bool = False, bipartite: bool = False, weighted: bool = True,
|
|
253
255
|
reindex: bool = False, shape: Optional[tuple] = None, sum_duplicates: bool = True,
|
|
254
|
-
matrix_only: bool = None) -> Union[
|
|
256
|
+
matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
|
|
255
257
|
"""Load a graph from a CSV or TSV file.
|
|
256
258
|
The delimiter can be specified (e.g., ' ' for space-separated values).
|
|
257
259
|
|
|
@@ -432,9 +434,9 @@ def load_header(file: str):
|
|
|
432
434
|
return directed, bipartite, weighted
|
|
433
435
|
|
|
434
436
|
|
|
435
|
-
def load_metadata(file: str, delimiter: str = ': ') ->
|
|
437
|
+
def load_metadata(file: str, delimiter: str = ': ') -> Dataset:
|
|
436
438
|
"""Extract metadata from the file."""
|
|
437
|
-
metadata =
|
|
439
|
+
metadata = Dataset()
|
|
438
440
|
with open(file, 'r', encoding='utf-8') as f:
|
|
439
441
|
for row in f:
|
|
440
442
|
parts = row.split(delimiter)
|
|
@@ -443,7 +445,7 @@ def load_metadata(file: str, delimiter: str = ': ') -> Bunch:
|
|
|
443
445
|
return metadata
|
|
444
446
|
|
|
445
447
|
|
|
446
|
-
def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: int = 512) ->
|
|
448
|
+
def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: int = 512) -> Dataset:
|
|
447
449
|
"""Load graph from GraphML file.
|
|
448
450
|
|
|
449
451
|
Hyperedges and nested graphs are not supported.
|
|
@@ -459,7 +461,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
|
|
|
459
461
|
|
|
460
462
|
Returns
|
|
461
463
|
-------
|
|
462
|
-
data: :class:`
|
|
464
|
+
data: :class:`Dataset`
|
|
463
465
|
The dataset in a Dataset with the adjacency as a CSR matrix.
|
|
464
466
|
"""
|
|
465
467
|
# see http://graphml.graphdrawing.org/primer/graphml-primer.html
|
|
@@ -475,12 +477,12 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
|
|
|
475
477
|
# indices in the graph tree
|
|
476
478
|
node_indices = []
|
|
477
479
|
edge_indices = []
|
|
478
|
-
data =
|
|
480
|
+
data = Dataset()
|
|
479
481
|
graph = None
|
|
480
482
|
file_description = None
|
|
481
|
-
attribute_descriptions =
|
|
482
|
-
attribute_descriptions.node =
|
|
483
|
-
attribute_descriptions.edge =
|
|
483
|
+
attribute_descriptions = Dataset()
|
|
484
|
+
attribute_descriptions.node = Dataset()
|
|
485
|
+
attribute_descriptions.edge = Dataset()
|
|
484
486
|
keys = {}
|
|
485
487
|
for file_element in tree.getroot():
|
|
486
488
|
if file_element.tag.endswith('graph'):
|
|
@@ -518,7 +520,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
|
|
|
518
520
|
if file_element.attrib['for'] == 'node':
|
|
519
521
|
size = n_nodes
|
|
520
522
|
if 'node_attribute' not in data:
|
|
521
|
-
data.node_attribute =
|
|
523
|
+
data.node_attribute = Dataset()
|
|
522
524
|
for key_element in file_element:
|
|
523
525
|
if key_element.tag.endswith('desc'):
|
|
524
526
|
attribute_descriptions.node[attribute_name] = key_element.text
|
|
@@ -535,7 +537,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
|
|
|
535
537
|
elif file_element.attrib['for'] == 'edge':
|
|
536
538
|
size = n_edges
|
|
537
539
|
if 'edge_attribute' not in data:
|
|
538
|
-
data.edge_attribute =
|
|
540
|
+
data.edge_attribute = Dataset()
|
|
539
541
|
for key_element in file_element:
|
|
540
542
|
if key_element.tag.endswith('desc'):
|
|
541
543
|
attribute_descriptions.edge[attribute_name] = key_element.text
|
|
@@ -553,7 +555,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
|
|
|
553
555
|
elif file_element.tag.endswith('desc'):
|
|
554
556
|
file_description = file_element.text
|
|
555
557
|
if file_description or attribute_descriptions.node or attribute_descriptions.edge:
|
|
556
|
-
data.meta =
|
|
558
|
+
data.meta = Dataset()
|
|
557
559
|
if file_description:
|
|
558
560
|
data.meta['description'] = file_description
|
|
559
561
|
if attribute_descriptions.node or attribute_descriptions.edge:
|
sknetwork/data/tests/test_API.py
CHANGED
|
@@ -8,7 +8,7 @@ import warnings
|
|
|
8
8
|
|
|
9
9
|
from sknetwork.data.load import *
|
|
10
10
|
from sknetwork.data.toy_graphs import *
|
|
11
|
-
from sknetwork.data import
|
|
11
|
+
from sknetwork.data import Dataset
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class TestDataAPI(unittest.TestCase):
|
|
@@ -17,14 +17,14 @@ class TestDataAPI(unittest.TestCase):
|
|
|
17
17
|
toy_graphs = [karate_club, painters, bow_tie, house, miserables]
|
|
18
18
|
for toy_graph in toy_graphs:
|
|
19
19
|
self.assertEqual(type(toy_graph()), sparse.csr_matrix)
|
|
20
|
-
self.assertEqual(type(toy_graph(metadata=True)),
|
|
20
|
+
self.assertEqual(type(toy_graph(metadata=True)), Dataset)
|
|
21
21
|
|
|
22
22
|
def test_load(self):
|
|
23
23
|
tmp_data_dir = tempfile.gettempdir() + '/stub'
|
|
24
24
|
clear_data_home(tmp_data_dir)
|
|
25
25
|
try:
|
|
26
26
|
graph = load_netset('stub', tmp_data_dir)
|
|
27
|
-
self.assertEqual(type(graph),
|
|
27
|
+
self.assertEqual(type(graph), Dataset)
|
|
28
28
|
except URLError: # pragma: no cover
|
|
29
29
|
warnings.warn('Could not reach NetSet. Corresponding test has not been performed.', RuntimeWarning)
|
|
30
30
|
return
|
|
@@ -3,12 +3,12 @@
|
|
|
3
3
|
|
|
4
4
|
import unittest
|
|
5
5
|
|
|
6
|
-
from sknetwork.data.base import
|
|
6
|
+
from sknetwork.data.base import Dataset
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class TestDataset(unittest.TestCase):
|
|
10
10
|
|
|
11
11
|
def test(self):
|
|
12
|
-
dataset =
|
|
12
|
+
dataset = Dataset(name='dataset')
|
|
13
13
|
self.assertEqual(dataset.name, 'dataset')
|
|
14
14
|
self.assertEqual(dataset['name'], 'dataset')
|
|
@@ -16,22 +16,22 @@ class TestToys(unittest.TestCase):
|
|
|
16
16
|
adjacency = house()
|
|
17
17
|
self.assertEqual(adjacency.shape, (5, 5))
|
|
18
18
|
|
|
19
|
-
|
|
20
|
-
self.assertEqual(
|
|
19
|
+
dataset = house(metadata=True)
|
|
20
|
+
self.assertEqual(dataset.position.shape, (5, 2))
|
|
21
21
|
|
|
22
22
|
adjacency = bow_tie()
|
|
23
23
|
self.assertEqual(adjacency.shape, (5, 5))
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
self.assertEqual(
|
|
25
|
+
dataset = bow_tie(metadata=True)
|
|
26
|
+
self.assertEqual(dataset.position.shape, (5, 2))
|
|
27
27
|
|
|
28
|
-
|
|
29
|
-
self.assertEqual(
|
|
30
|
-
self.assertEqual(len(
|
|
28
|
+
dataset = karate_club(True)
|
|
29
|
+
self.assertEqual(dataset.adjacency.shape, (34, 34))
|
|
30
|
+
self.assertEqual(len(dataset.labels), 34)
|
|
31
31
|
|
|
32
|
-
|
|
33
|
-
self.assertEqual(
|
|
34
|
-
self.assertEqual(len(
|
|
32
|
+
dataset = miserables(True)
|
|
33
|
+
self.assertEqual(dataset.adjacency.shape, (77, 77))
|
|
34
|
+
self.assertEqual(len(dataset.names), 77)
|
|
35
35
|
|
|
36
36
|
def test_directed(self):
|
|
37
37
|
adjacency = painters()
|
|
@@ -40,29 +40,29 @@ class TestToys(unittest.TestCase):
|
|
|
40
40
|
adjacency = art_philo_science()
|
|
41
41
|
self.assertEqual(adjacency.shape, (30, 30))
|
|
42
42
|
|
|
43
|
-
|
|
44
|
-
self.assertEqual(
|
|
45
|
-
self.assertEqual(len(
|
|
43
|
+
dataset = painters(True)
|
|
44
|
+
self.assertEqual(dataset.adjacency.shape, (14, 14))
|
|
45
|
+
self.assertEqual(len(dataset.names), 14)
|
|
46
46
|
|
|
47
|
-
|
|
48
|
-
self.assertEqual(
|
|
49
|
-
self.assertEqual(len(
|
|
47
|
+
dataset = art_philo_science(True)
|
|
48
|
+
self.assertEqual(dataset.adjacency.shape, (30, 30))
|
|
49
|
+
self.assertEqual(len(dataset.names), 30)
|
|
50
50
|
|
|
51
51
|
def test_bipartite(self):
|
|
52
|
-
|
|
53
|
-
self.assertEqual(
|
|
54
|
-
self.assertEqual(len(
|
|
55
|
-
self.assertEqual(len(
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
self.assertEqual(
|
|
59
|
-
self.assertEqual(len(
|
|
60
|
-
self.assertEqual(len(
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
self.assertEqual(
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
self.assertEqual(
|
|
67
|
-
self.assertEqual(len(
|
|
68
|
-
self.assertEqual(len(
|
|
52
|
+
dataset = star_wars(True)
|
|
53
|
+
self.assertEqual(dataset.biadjacency.shape, (4, 3))
|
|
54
|
+
self.assertEqual(len(dataset.names), 4)
|
|
55
|
+
self.assertEqual(len(dataset.names_col), 3)
|
|
56
|
+
|
|
57
|
+
dataset = movie_actor(True)
|
|
58
|
+
self.assertEqual(dataset.biadjacency.shape, (15, 17))
|
|
59
|
+
self.assertEqual(len(dataset.names), 15)
|
|
60
|
+
self.assertEqual(len(dataset.names_col), 17)
|
|
61
|
+
|
|
62
|
+
dataset = hourglass(True)
|
|
63
|
+
self.assertEqual(dataset.biadjacency.shape, (2, 2))
|
|
64
|
+
|
|
65
|
+
dataset = art_philo_science(True)
|
|
66
|
+
self.assertEqual(dataset.biadjacency.shape, (30, 11))
|
|
67
|
+
self.assertEqual(len(dataset.names), 30)
|
|
68
|
+
self.assertEqual(len(dataset.names_col), 11)
|