scikit-network 0.32.1__cp310-cp310-win_amd64.whl → 0.33.1__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (67) hide show
  1. {scikit_network-0.32.1.dist-info → scikit_network-0.33.1.dist-info}/AUTHORS.rst +0 -1
  2. scikit_network-0.33.1.dist-info/METADATA +120 -0
  3. {scikit_network-0.32.1.dist-info → scikit_network-0.33.1.dist-info}/RECORD +66 -66
  4. {scikit_network-0.32.1.dist-info → scikit_network-0.33.1.dist-info}/WHEEL +1 -1
  5. sknetwork/__init__.py +1 -1
  6. sknetwork/classification/diffusion.py +4 -3
  7. sknetwork/classification/knn.py +4 -3
  8. sknetwork/classification/metrics.py +3 -3
  9. sknetwork/classification/pagerank.py +1 -1
  10. sknetwork/classification/propagation.py +7 -6
  11. sknetwork/classification/vote.cp310-win_amd64.pyd +0 -0
  12. sknetwork/classification/vote.cpp +684 -677
  13. sknetwork/clustering/leiden.py +2 -1
  14. sknetwork/clustering/leiden_core.cp310-win_amd64.pyd +0 -0
  15. sknetwork/clustering/leiden_core.cpp +713 -702
  16. sknetwork/clustering/louvain.py +6 -6
  17. sknetwork/clustering/louvain_core.cp310-win_amd64.pyd +0 -0
  18. sknetwork/clustering/louvain_core.cpp +713 -702
  19. sknetwork/clustering/metrics.py +1 -1
  20. sknetwork/clustering/tests/test_kcenters.py +5 -37
  21. sknetwork/clustering/tests/test_louvain.py +6 -0
  22. sknetwork/data/__init__.py +1 -1
  23. sknetwork/data/base.py +7 -2
  24. sknetwork/data/load.py +18 -21
  25. sknetwork/data/models.py +15 -15
  26. sknetwork/data/parse.py +19 -17
  27. sknetwork/data/tests/test_API.py +3 -3
  28. sknetwork/data/tests/test_base.py +2 -2
  29. sknetwork/data/tests/test_toy_graphs.py +33 -33
  30. sknetwork/data/toy_graphs.py +35 -43
  31. sknetwork/embedding/base.py +3 -0
  32. sknetwork/embedding/louvain_embedding.py +0 -26
  33. sknetwork/embedding/svd.py +0 -4
  34. sknetwork/embedding/tests/test_louvain_embedding.py +9 -4
  35. sknetwork/embedding/tests/test_svd.py +6 -0
  36. sknetwork/gnn/gnn_classifier.py +1 -1
  37. sknetwork/hierarchy/louvain_hierarchy.py +10 -6
  38. sknetwork/hierarchy/metrics.py +3 -3
  39. sknetwork/hierarchy/paris.cp310-win_amd64.pyd +0 -0
  40. sknetwork/hierarchy/paris.cpp +2651 -2027
  41. sknetwork/hierarchy/paris.pyx +4 -3
  42. sknetwork/hierarchy/tests/test_metrics.py +4 -4
  43. sknetwork/linalg/diteration.cp310-win_amd64.pyd +0 -0
  44. sknetwork/linalg/diteration.cpp +684 -677
  45. sknetwork/linalg/push.cp310-win_amd64.pyd +0 -0
  46. sknetwork/linalg/push.cpp +1769 -1153
  47. sknetwork/linalg/sparse_lowrank.py +1 -1
  48. sknetwork/ranking/betweenness.cp310-win_amd64.pyd +0 -0
  49. sknetwork/ranking/betweenness.cpp +563 -557
  50. sknetwork/regression/diffusion.py +6 -4
  51. sknetwork/topology/cliques.cp310-win_amd64.pyd +0 -0
  52. sknetwork/topology/cliques.cpp +1729 -1110
  53. sknetwork/topology/core.cp310-win_amd64.pyd +0 -0
  54. sknetwork/topology/core.cpp +1755 -1139
  55. sknetwork/topology/cycles.py +1 -1
  56. sknetwork/topology/minheap.cp310-win_amd64.pyd +0 -0
  57. sknetwork/topology/minheap.cpp +687 -677
  58. sknetwork/topology/triangles.cp310-win_amd64.pyd +0 -0
  59. sknetwork/topology/triangles.cpp +437 -432
  60. sknetwork/topology/weisfeiler_lehman_core.cp310-win_amd64.pyd +0 -0
  61. sknetwork/topology/weisfeiler_lehman_core.cpp +684 -677
  62. sknetwork/utils/__init__.py +1 -1
  63. sknetwork/utils/values.py +5 -3
  64. sknetwork/visualization/graphs.py +1 -1
  65. scikit_network-0.32.1.dist-info/METADATA +0 -511
  66. {scikit_network-0.32.1.dist-info → scikit_network-0.33.1.dist-info}/LICENSE +0 -0
  67. {scikit_network-0.32.1.dist-info → scikit_network-0.33.1.dist-info}/top_level.txt +0 -0
@@ -64,7 +64,7 @@ def get_modularity(input_matrix: Union[sparse.csr_matrix, np.ndarray], labels: n
64
64
  >>> from sknetwork.data import house
65
65
  >>> adjacency = house()
66
66
  >>> labels = np.array([0, 0, 1, 1, 0])
67
- >>> np.round(get_modularity(adjacency, labels), 2)
67
+ >>> float(np.round(get_modularity(adjacency, labels), 2))
68
68
  0.11
69
69
  """
70
70
  adjacency, bipartite = get_adjacency(input_matrix.astype(float))
@@ -4,7 +4,6 @@
4
4
  import unittest
5
5
 
6
6
  from sknetwork.clustering import KCenters
7
- from sknetwork.data import karate_club, painters, star_wars
8
7
  from sknetwork.data.test_graphs import *
9
8
 
10
9
 
@@ -13,7 +12,7 @@ class TestKCentersClustering(unittest.TestCase):
13
12
  def test_kcenters(self):
14
13
  # Test undirected graph
15
14
  n_clusters = 2
16
- adjacency = karate_club()
15
+ adjacency = test_graph()
17
16
  n_row = adjacency.shape[0]
18
17
  kcenters = KCenters(n_clusters=n_clusters)
19
18
  labels = kcenters.fit_predict(adjacency)
@@ -22,7 +21,7 @@ class TestKCentersClustering(unittest.TestCase):
22
21
 
23
22
  # Test directed graph
24
23
  n_clusters = 3
25
- adjacency = painters()
24
+ adjacency = test_digraph()
26
25
  n_row = adjacency.shape[0]
27
26
  kcenters = KCenters(n_clusters=n_clusters, directed=True)
28
27
  labels = kcenters.fit_predict(adjacency)
@@ -31,7 +30,7 @@ class TestKCentersClustering(unittest.TestCase):
31
30
 
32
31
  # Test bipartite graph
33
32
  n_clusters = 2
34
- biadjacency = star_wars()
33
+ biadjacency = test_bigraph()
35
34
  n_row, n_col = biadjacency.shape
36
35
  kcenters = KCenters(n_clusters=n_clusters)
37
36
  kcenters.fit(biadjacency)
@@ -40,41 +39,10 @@ class TestKCentersClustering(unittest.TestCase):
40
39
  self.assertEqual(len(kcenters.labels_col_), n_col)
41
40
  self.assertEqual(len(set(labels)), n_clusters)
42
41
 
43
- def test_kcenters_centers(self):
44
- # Test centers for undirected graphs
45
- n_clusters = 2
46
- adjacency = karate_club()
47
- kcenters = KCenters(n_clusters=n_clusters)
48
- kcenters.fit(adjacency)
49
- centers = kcenters.centers_
50
- self.assertEqual(n_clusters, len(set(centers)))
51
-
52
- # Test centers for bipartite graphs
53
- n_clusters = 2
54
- biadjacency = star_wars()
55
- n_row, n_col = biadjacency.shape
56
- for position in ["row", "col", "both"]:
57
- kcenters = KCenters(n_clusters=n_clusters, center_position=position)
58
- kcenters.fit(biadjacency)
59
- centers_row = kcenters.centers_row_
60
- centers_col = kcenters.centers_col_
61
- if position == "row":
62
- self.assertEqual(n_clusters, len(set(centers_row)))
63
- self.assertTrue(np.all(centers_row < n_row))
64
- self.assertTrue(centers_col is None)
65
- if position == "col":
66
- self.assertEqual(n_clusters, len(set(centers_col)))
67
- self.assertTrue(np.all((centers_col < n_col) & (0 <= centers_col)))
68
- self.assertTrue(centers_row is None)
69
- if position == "both":
70
- self.assertEqual(n_clusters, len(set(centers_row)) + len(set(centers_col)))
71
- self.assertTrue(np.all(centers_row < n_row))
72
- self.assertTrue(np.all((centers_col < n_col) & (0 <= centers_col)))
73
-
74
42
  def test_kcenters_error(self):
75
43
  # Test value errors
76
- adjacency = karate_club()
77
- biadjacency = star_wars()
44
+ adjacency = test_graph()
45
+ biadjacency = test_bigraph()
78
46
 
79
47
  # test n_clusters error
80
48
  kcenters = KCenters(n_clusters=1)
@@ -17,6 +17,12 @@ class TestLouvainClustering(unittest.TestCase):
17
17
  labels = Louvain().fit_predict(adjacency)
18
18
  self.assertEqual(len(labels), n)
19
19
 
20
+ def test_format(self):
21
+ adjacency = test_graph()
22
+ n = adjacency.shape[0]
23
+ labels = Louvain().fit_predict(adjacency.toarray())
24
+ self.assertEqual(len(labels), n)
25
+
20
26
  def test_modularity(self):
21
27
  adjacency = karate_club()
22
28
  louvain_d = Louvain(modularity='dugue')
@@ -1,5 +1,5 @@
1
1
  """data module"""
2
- from sknetwork.data.base import Bunch
2
+ from sknetwork.data.base import *
3
3
  from sknetwork.data.load import *
4
4
  from sknetwork.data.models import *
5
5
  from sknetwork.data.parse import from_edge_list, from_adjacency_list, from_csv, from_graphml
sknetwork/data/base.py CHANGED
@@ -6,10 +6,10 @@ Created in May 2023
6
6
  """
7
7
 
8
8
 
9
- class Bunch(dict):
9
+ class Dataset(dict):
10
10
  """Container object for datasets.
11
11
  Dictionary-like object that exposes its keys as attributes.
12
- >>> dataset = Bunch(name='dataset')
12
+ >>> dataset = Dataset(name='dataset')
13
13
  >>> dataset['name']
14
14
  'dataset'
15
15
  >>> dataset.name
@@ -26,3 +26,8 @@ class Bunch(dict):
26
26
  return self[key]
27
27
  except KeyError:
28
28
  raise AttributeError(key)
29
+
30
+
31
+ # alias for Dataset
32
+ Bunch = Dataset
33
+
sknetwork/data/load.py CHANGED
@@ -19,15 +19,12 @@ import numpy as np
19
19
  from scipy import sparse
20
20
 
21
21
  from sknetwork.data.parse import from_csv, load_labels, load_header, load_metadata
22
- from sknetwork.data.base import Bunch
22
+ from sknetwork.data.base import Dataset
23
23
  from sknetwork.utils.check import is_square
24
24
  from sknetwork.log import Log
25
25
 
26
26
  NETSET_URL = 'https://netset.telecom-paris.fr'
27
27
 
28
- # former name of Dataset
29
- Bunch = Bunch
30
-
31
28
 
32
29
  def is_within_directory(directory, target):
33
30
  """Utility function."""
@@ -89,7 +86,7 @@ def clean_data_home(data_home: Optional[Union[str, Path]] = None):
89
86
 
90
87
 
91
88
  def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]] = None,
92
- verbose: bool = True) -> Optional[Bunch]:
89
+ verbose: bool = True) -> Optional[Dataset]:
93
90
  """Load a dataset from the `NetSet collection
94
91
  <https://netset.telecom-paris.fr/>`_.
95
92
 
@@ -105,10 +102,10 @@ def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]
105
102
 
106
103
  Returns
107
104
  -------
108
- dataset : :class:`Bunch`
105
+ dataset : :class:`Dataset`
109
106
  Returned dataset.
110
107
  """
111
- dataset = Bunch()
108
+ dataset = Dataset()
112
109
  dataset_folder = NETSET_URL + '/datasets/'
113
110
  folder_npz = NETSET_URL + '/datasets_npz/'
114
111
 
@@ -167,7 +164,7 @@ def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]
167
164
 
168
165
 
169
166
  def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_numpy_bundle: bool = True,
170
- verbose: bool = True) -> Bunch:
167
+ verbose: bool = True) -> Dataset:
171
168
  """Load a dataset from the `Konect database
172
169
  <http://konect.cc/networks/>`_.
173
170
 
@@ -186,7 +183,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
186
183
 
187
184
  Returns
188
185
  -------
189
- dataset : :class:`Bunch`
186
+ dataset : :class:`Dataset`
190
187
  Object with the following attributes:
191
188
 
192
189
  * `adjacency` or `biadjacency`: the adjacency/biadjacency matrix for the dataset
@@ -240,7 +237,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
240
237
  logger.print_log('Loading from local bundle...')
241
238
  return load_from_numpy_bundle(name + '_bundle', data_path)
242
239
 
243
- dataset = Bunch()
240
+ dataset = Dataset()
244
241
  path = data_konect / name / name
245
242
  if not path.exists() or len(listdir(path)) == 0:
246
243
  raise Exception("No data downloaded.")
@@ -269,7 +266,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
269
266
  else:
270
267
  dataset.meta.name = name
271
268
  else:
272
- dataset.meta = Bunch()
269
+ dataset.meta = Dataset()
273
270
  dataset.meta.name = name
274
271
 
275
272
  if auto_numpy_bundle:
@@ -280,12 +277,12 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
280
277
  return dataset
281
278
 
282
279
 
283
- def save_to_numpy_bundle(data: Bunch, bundle_name: str, data_home: Optional[Union[str, Path]] = None):
280
+ def save_to_numpy_bundle(data: Dataset, bundle_name: str, data_home: Optional[Union[str, Path]] = None):
284
281
  """Save a dataset in the specified data home to a collection of Numpy and Pickle files for faster subsequent loads.
285
282
 
286
283
  Parameters
287
284
  ----------
288
- data: Bunch
285
+ data: Dataset
289
286
  Data to save.
290
287
  bundle_name: str
291
288
  Name to be used for the bundle folder.
@@ -317,7 +314,7 @@ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path
317
314
 
318
315
  Returns
319
316
  -------
320
- data: Bunch
317
+ data: Dataset
321
318
  Data.
322
319
  """
323
320
  data_home = get_data_home(data_home)
@@ -326,7 +323,7 @@ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path
326
323
  raise FileNotFoundError('No bundle at ' + str(data_path))
327
324
  else:
328
325
  files = listdir(data_path)
329
- data = Bunch()
326
+ data = Dataset()
330
327
  for file in files:
331
328
  if len(file.split('.')) == 2:
332
329
  file_name, file_extension = file.split('.')
@@ -340,7 +337,7 @@ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path
340
337
  return data
341
338
 
342
339
 
343
- def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
340
+ def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Dataset]):
344
341
  """Save a dataset or a CSR matrix in the current directory to a collection of Numpy and Pickle files for faster
345
342
  subsequent loads. Supported attribute types include sparse matrices, NumPy arrays, strings and objects Dataset.
346
343
 
@@ -348,13 +345,13 @@ def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
348
345
  ----------
349
346
  folder : str or :class:`pathlib.Path`
350
347
  Name of the bundle folder.
351
- data : Union[sparse.csr_matrix, Bunch]
348
+ data : Union[sparse.csr_matrix, Dataset]
352
349
  Data to save.
353
350
 
354
351
  Example
355
352
  -------
356
353
  >>> from sknetwork.data import save
357
- >>> dataset = Bunch()
354
+ >>> dataset = Dataset()
358
355
  >>> dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
359
356
  >>> dataset.names = np.array(['a', 'b', 'c'])
360
357
  >>> save('dataset', dataset)
@@ -366,7 +363,7 @@ def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
366
363
  if folder.exists():
367
364
  shutil.rmtree(folder)
368
365
  if isinstance(data, sparse.csr_matrix):
369
- dataset = Bunch()
366
+ dataset = Dataset()
370
367
  if is_square(data):
371
368
  dataset.adjacency = data
372
369
  else:
@@ -388,13 +385,13 @@ def load(folder: Union[str, Path]):
388
385
 
389
386
  Returns
390
387
  -------
391
- data: Bunch
388
+ data: Dataset
392
389
  Data.
393
390
 
394
391
  Example
395
392
  -------
396
393
  >>> from sknetwork.data import save
397
- >>> dataset = Bunch()
394
+ >>> dataset = Dataset()
398
395
  >>> dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
399
396
  >>> dataset.names = np.array(['a', 'b', 'c'])
400
397
  >>> save('dataset', dataset)
sknetwork/data/models.py CHANGED
@@ -12,7 +12,7 @@ from typing import Union, Optional, Iterable
12
12
  import numpy as np
13
13
  from scipy import sparse
14
14
 
15
- from sknetwork.data.base import Bunch
15
+ from sknetwork.data.base import Dataset
16
16
  from sknetwork.data.parse import from_edge_list
17
17
  from sknetwork.utils.check import check_random_state
18
18
  from sknetwork.utils.format import directed2undirected
@@ -20,7 +20,7 @@ from sknetwork.utils.format import directed2undirected
20
20
 
21
21
  def block_model(sizes: Iterable, p_in: Union[float, list, np.ndarray] = .2, p_out: float = .05,
22
22
  directed: bool = False, self_loops: bool = False, metadata: bool = False, seed: Optional[int] = None) \
23
- -> Union[sparse.csr_matrix, Bunch]:
23
+ -> Union[sparse.csr_matrix, Dataset]:
24
24
  """Stochastic block model.
25
25
 
26
26
  Parameters
@@ -83,7 +83,7 @@ def block_model(sizes: Iterable, p_in: Union[float, list, np.ndarray] = .2, p_ou
83
83
  else:
84
84
  adjacency = directed2undirected(sparse.csr_matrix(sparse.triu(adjacency)), weighted=False)
85
85
  if metadata:
86
- graph = Bunch()
86
+ graph = Dataset()
87
87
  graph.adjacency = adjacency
88
88
  labels = np.repeat(np.arange(len(sizes)), sizes)
89
89
  graph.labels = labels
@@ -129,7 +129,7 @@ def erdos_renyi(n: int = 20, p: float = .3, directed: bool = False, self_loops:
129
129
  return block_model([n], p, 0., directed=directed, self_loops=self_loops, metadata=False, seed=seed)
130
130
 
131
131
 
132
- def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
132
+ def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
133
133
  """Linear graph (directed).
134
134
 
135
135
  Parameters
@@ -158,7 +158,7 @@ def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
158
158
  if metadata:
159
159
  x = np.arange(n)
160
160
  y = np.zeros(n)
161
- graph = Bunch()
161
+ graph = Dataset()
162
162
  graph.adjacency = adjacency
163
163
  graph.position = np.array((x, y)).T
164
164
  return graph
@@ -166,7 +166,7 @@ def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
166
166
  return adjacency
167
167
 
168
168
 
169
- def linear_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
169
+ def linear_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
170
170
  """Linear graph (undirected).
171
171
 
172
172
  Parameters
@@ -218,7 +218,7 @@ def cyclic_position(n: int) -> np.ndarray:
218
218
  return position
219
219
 
220
220
 
221
- def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
221
+ def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
222
222
  """Cyclic graph (directed).
223
223
 
224
224
  Parameters
@@ -245,7 +245,7 @@ def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
245
245
  adjacency = sparse.csr_matrix((np.ones(len(row), dtype=int), (row, col)), shape=(n, n))
246
246
 
247
247
  if metadata:
248
- graph = Bunch()
248
+ graph = Dataset()
249
249
  graph.adjacency = adjacency
250
250
  graph.position = cyclic_position(n)
251
251
  return graph
@@ -253,7 +253,7 @@ def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
253
253
  return adjacency
254
254
 
255
255
 
256
- def cyclic_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
256
+ def cyclic_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
257
257
  """Cyclic graph (undirected).
258
258
 
259
259
  Parameters
@@ -283,7 +283,7 @@ def cyclic_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix,
283
283
  return graph.adjacency
284
284
 
285
285
 
286
- def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
286
+ def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
287
287
  """Grid (undirected).
288
288
 
289
289
  Parameters
@@ -312,7 +312,7 @@ def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr
312
312
  edges = list(map(lambda edge: (node_id[edge[0]], node_id[edge[1]]), edges))
313
313
  adjacency = from_edge_list(edges, reindex=False, matrix_only=True)
314
314
  if metadata:
315
- graph = Bunch()
315
+ graph = Dataset()
316
316
  graph.adjacency = adjacency
317
317
  graph.position = np.array(nodes)
318
318
  return graph
@@ -320,7 +320,7 @@ def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr
320
320
  return adjacency
321
321
 
322
322
 
323
- def star(n_branches: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
323
+ def star(n_branches: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
324
324
  """Star (undirected).
325
325
 
326
326
  Parameters
@@ -345,7 +345,7 @@ def star(n_branches: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix
345
345
  edges = [(0, i+1) for i in range(n_branches)]
346
346
  adjacency = from_edge_list(edges, reindex=False, matrix_only=True)
347
347
  if metadata:
348
- graph = Bunch()
348
+ graph = Dataset()
349
349
  graph.adjacency = adjacency
350
350
  angles = 2 * np.pi * np.arange(n_branches) / n_branches
351
351
  x = [0] + list(np.cos(angles))
@@ -402,7 +402,7 @@ def albert_barabasi(n: int = 100, degree: int = 3, directed: bool = False, seed:
402
402
 
403
403
 
404
404
  def watts_strogatz(n: int = 100, degree: int = 6, prob: float = 0.05, seed: Optional[int] = None,
405
- metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
405
+ metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
406
406
  """Watts-Strogatz model.
407
407
 
408
408
  Parameters
@@ -451,7 +451,7 @@ def watts_strogatz(n: int = 100, degree: int = 6, prob: float = 0.05, seed: Opti
451
451
  adjacency[j, i] = 0
452
452
  adjacency = sparse.csr_matrix(adjacency, shape=adjacency.shape)
453
453
  if metadata:
454
- graph = Bunch()
454
+ graph = Dataset()
455
455
  graph.adjacency = adjacency
456
456
  graph.position = cyclic_position(n)
457
457
  return graph
sknetwork/data/parse.py CHANGED
@@ -14,13 +14,13 @@ from xml.etree import ElementTree
14
14
  import numpy as np
15
15
  from scipy import sparse
16
16
 
17
- from sknetwork.data.base import Bunch
17
+ from sknetwork.data.base import Dataset
18
18
  from sknetwork.utils.format import directed2undirected
19
19
 
20
20
 
21
21
  def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = False,
22
22
  bipartite: bool = False, weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
23
- sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
23
+ sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
24
24
  """Load a graph from an edge list.
25
25
 
26
26
  Parameters
@@ -93,7 +93,7 @@ def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = F
93
93
  def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], directed: bool = False,
94
94
  bipartite: bool = False, weighted: bool = True, reindex: bool = False,
95
95
  shape: Optional[tuple] = None, sum_duplicates: bool = True, matrix_only: bool = None) \
96
- -> Union[Bunch, sparse.csr_matrix]:
96
+ -> Union[Dataset, sparse.csr_matrix]:
97
97
  """Load a graph from an adjacency list.
98
98
 
99
99
  Parameters
@@ -147,7 +147,7 @@ def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], dire
147
147
 
148
148
  def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed: bool = False, bipartite: bool = False,
149
149
  weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
150
- sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
150
+ sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
151
151
  """Load a graph from an edge array of shape (n_edges, 2) and weights (optional).
152
152
 
153
153
  Parameters
@@ -202,7 +202,7 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
202
202
  _, index = np.unique(edge_array, axis=0, return_index=True)
203
203
  edge_array = edge_array[index]
204
204
  weights = weights[index]
205
- graph = Bunch()
205
+ graph = Dataset()
206
206
  if bipartite:
207
207
  row = edge_array[:, 0]
208
208
  col = edge_array[:, 1]
@@ -224,6 +224,7 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
224
224
  else:
225
225
  n_col = max(col) + 1
226
226
  matrix = sparse.csr_matrix((weights, (row, col)), shape=(n_row, n_col))
227
+ matrix.sum_duplicates()
227
228
  graph.biadjacency = matrix
228
229
  else:
229
230
  nodes = edge_array.ravel()
@@ -241,6 +242,7 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
241
242
  matrix = sparse.csr_matrix((weights, (row, col)), shape=(n, n))
242
243
  if not directed:
243
244
  matrix = directed2undirected(matrix)
245
+ matrix.sum_duplicates()
244
246
  graph.adjacency = matrix
245
247
  if matrix_only or (matrix_only is None and len(graph) == 1):
246
248
  return matrix
@@ -251,7 +253,7 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
251
253
  def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: str = '#%',
252
254
  data_structure: str = None, directed: bool = False, bipartite: bool = False, weighted: bool = True,
253
255
  reindex: bool = False, shape: Optional[tuple] = None, sum_duplicates: bool = True,
254
- matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
256
+ matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
255
257
  """Load a graph from a CSV or TSV file.
256
258
  The delimiter can be specified (e.g., ' ' for space-separated values).
257
259
 
@@ -432,9 +434,9 @@ def load_header(file: str):
432
434
  return directed, bipartite, weighted
433
435
 
434
436
 
435
- def load_metadata(file: str, delimiter: str = ': ') -> Bunch:
437
+ def load_metadata(file: str, delimiter: str = ': ') -> Dataset:
436
438
  """Extract metadata from the file."""
437
- metadata = Bunch()
439
+ metadata = Dataset()
438
440
  with open(file, 'r', encoding='utf-8') as f:
439
441
  for row in f:
440
442
  parts = row.split(delimiter)
@@ -443,7 +445,7 @@ def load_metadata(file: str, delimiter: str = ': ') -> Bunch:
443
445
  return metadata
444
446
 
445
447
 
446
- def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: int = 512) -> Bunch:
448
+ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: int = 512) -> Dataset:
447
449
  """Load graph from GraphML file.
448
450
 
449
451
  Hyperedges and nested graphs are not supported.
@@ -459,7 +461,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
459
461
 
460
462
  Returns
461
463
  -------
462
- data: :class:`Bunch`
464
+ data: :class:`Dataset`
463
465
  The dataset in a Dataset with the adjacency as a CSR matrix.
464
466
  """
465
467
  # see http://graphml.graphdrawing.org/primer/graphml-primer.html
@@ -475,12 +477,12 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
475
477
  # indices in the graph tree
476
478
  node_indices = []
477
479
  edge_indices = []
478
- data = Bunch()
480
+ data = Dataset()
479
481
  graph = None
480
482
  file_description = None
481
- attribute_descriptions = Bunch()
482
- attribute_descriptions.node = Bunch()
483
- attribute_descriptions.edge = Bunch()
483
+ attribute_descriptions = Dataset()
484
+ attribute_descriptions.node = Dataset()
485
+ attribute_descriptions.edge = Dataset()
484
486
  keys = {}
485
487
  for file_element in tree.getroot():
486
488
  if file_element.tag.endswith('graph'):
@@ -518,7 +520,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
518
520
  if file_element.attrib['for'] == 'node':
519
521
  size = n_nodes
520
522
  if 'node_attribute' not in data:
521
- data.node_attribute = Bunch()
523
+ data.node_attribute = Dataset()
522
524
  for key_element in file_element:
523
525
  if key_element.tag.endswith('desc'):
524
526
  attribute_descriptions.node[attribute_name] = key_element.text
@@ -535,7 +537,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
535
537
  elif file_element.attrib['for'] == 'edge':
536
538
  size = n_edges
537
539
  if 'edge_attribute' not in data:
538
- data.edge_attribute = Bunch()
540
+ data.edge_attribute = Dataset()
539
541
  for key_element in file_element:
540
542
  if key_element.tag.endswith('desc'):
541
543
  attribute_descriptions.edge[attribute_name] = key_element.text
@@ -553,7 +555,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
553
555
  elif file_element.tag.endswith('desc'):
554
556
  file_description = file_element.text
555
557
  if file_description or attribute_descriptions.node or attribute_descriptions.edge:
556
- data.meta = Bunch()
558
+ data.meta = Dataset()
557
559
  if file_description:
558
560
  data.meta['description'] = file_description
559
561
  if attribute_descriptions.node or attribute_descriptions.edge:
@@ -8,7 +8,7 @@ import warnings
8
8
 
9
9
  from sknetwork.data.load import *
10
10
  from sknetwork.data.toy_graphs import *
11
- from sknetwork.data import Bunch
11
+ from sknetwork.data import Dataset
12
12
 
13
13
 
14
14
  class TestDataAPI(unittest.TestCase):
@@ -17,14 +17,14 @@ class TestDataAPI(unittest.TestCase):
17
17
  toy_graphs = [karate_club, painters, bow_tie, house, miserables]
18
18
  for toy_graph in toy_graphs:
19
19
  self.assertEqual(type(toy_graph()), sparse.csr_matrix)
20
- self.assertEqual(type(toy_graph(metadata=True)), Bunch)
20
+ self.assertEqual(type(toy_graph(metadata=True)), Dataset)
21
21
 
22
22
  def test_load(self):
23
23
  tmp_data_dir = tempfile.gettempdir() + '/stub'
24
24
  clear_data_home(tmp_data_dir)
25
25
  try:
26
26
  graph = load_netset('stub', tmp_data_dir)
27
- self.assertEqual(type(graph), Bunch)
27
+ self.assertEqual(type(graph), Dataset)
28
28
  except URLError: # pragma: no cover
29
29
  warnings.warn('Could not reach NetSet. Corresponding test has not been performed.', RuntimeWarning)
30
30
  return
@@ -3,12 +3,12 @@
3
3
 
4
4
  import unittest
5
5
 
6
- from sknetwork.data.base import Bunch
6
+ from sknetwork.data.base import Dataset
7
7
 
8
8
 
9
9
  class TestDataset(unittest.TestCase):
10
10
 
11
11
  def test(self):
12
- dataset = Bunch(name='dataset')
12
+ dataset = Dataset(name='dataset')
13
13
  self.assertEqual(dataset.name, 'dataset')
14
14
  self.assertEqual(dataset['name'], 'dataset')