scikit-network 0.30.0__cp39-cp39-win_amd64.whl → 0.32.1__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (187) hide show
  1. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/AUTHORS.rst +3 -0
  2. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/METADATA +31 -3
  3. scikit_network-0.32.1.dist-info/RECORD +228 -0
  4. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/WHEEL +1 -1
  5. sknetwork/__init__.py +1 -1
  6. sknetwork/base.py +67 -0
  7. sknetwork/classification/base.py +24 -24
  8. sknetwork/classification/base_rank.py +17 -25
  9. sknetwork/classification/diffusion.py +35 -35
  10. sknetwork/classification/knn.py +24 -21
  11. sknetwork/classification/metrics.py +1 -1
  12. sknetwork/classification/pagerank.py +10 -10
  13. sknetwork/classification/propagation.py +23 -20
  14. sknetwork/classification/tests/test_diffusion.py +13 -3
  15. sknetwork/classification/vote.cp39-win_amd64.pyd +0 -0
  16. sknetwork/classification/vote.cpp +14482 -10351
  17. sknetwork/classification/vote.pyx +1 -3
  18. sknetwork/clustering/__init__.py +3 -1
  19. sknetwork/clustering/base.py +36 -40
  20. sknetwork/clustering/kcenters.py +253 -0
  21. sknetwork/clustering/leiden.py +241 -0
  22. sknetwork/clustering/leiden_core.cp39-win_amd64.pyd +0 -0
  23. sknetwork/clustering/leiden_core.cpp +31564 -0
  24. sknetwork/clustering/leiden_core.pyx +124 -0
  25. sknetwork/clustering/louvain.py +133 -102
  26. sknetwork/clustering/louvain_core.cp39-win_amd64.pyd +0 -0
  27. sknetwork/clustering/louvain_core.cpp +22457 -18792
  28. sknetwork/clustering/louvain_core.pyx +86 -96
  29. sknetwork/clustering/postprocess.py +2 -2
  30. sknetwork/clustering/propagation_clustering.py +15 -19
  31. sknetwork/clustering/tests/test_API.py +8 -4
  32. sknetwork/clustering/tests/test_kcenters.py +92 -0
  33. sknetwork/clustering/tests/test_leiden.py +34 -0
  34. sknetwork/clustering/tests/test_louvain.py +3 -4
  35. sknetwork/data/__init__.py +2 -1
  36. sknetwork/data/base.py +28 -0
  37. sknetwork/data/load.py +38 -37
  38. sknetwork/data/models.py +18 -18
  39. sknetwork/data/parse.py +54 -33
  40. sknetwork/data/test_graphs.py +2 -2
  41. sknetwork/data/tests/test_API.py +1 -1
  42. sknetwork/data/tests/test_base.py +14 -0
  43. sknetwork/data/tests/test_load.py +1 -1
  44. sknetwork/data/tests/test_parse.py +9 -12
  45. sknetwork/data/tests/test_test_graphs.py +1 -2
  46. sknetwork/data/toy_graphs.py +18 -18
  47. sknetwork/embedding/__init__.py +0 -1
  48. sknetwork/embedding/base.py +21 -20
  49. sknetwork/embedding/force_atlas.py +3 -2
  50. sknetwork/embedding/louvain_embedding.py +2 -2
  51. sknetwork/embedding/random_projection.py +5 -3
  52. sknetwork/embedding/spectral.py +0 -73
  53. sknetwork/embedding/tests/test_API.py +4 -28
  54. sknetwork/embedding/tests/test_louvain_embedding.py +4 -9
  55. sknetwork/embedding/tests/test_random_projection.py +2 -2
  56. sknetwork/embedding/tests/test_spectral.py +5 -8
  57. sknetwork/embedding/tests/test_svd.py +1 -1
  58. sknetwork/gnn/base.py +4 -4
  59. sknetwork/gnn/base_layer.py +3 -3
  60. sknetwork/gnn/gnn_classifier.py +45 -89
  61. sknetwork/gnn/layer.py +1 -1
  62. sknetwork/gnn/loss.py +1 -1
  63. sknetwork/gnn/optimizer.py +4 -3
  64. sknetwork/gnn/tests/test_base_layer.py +4 -4
  65. sknetwork/gnn/tests/test_gnn_classifier.py +12 -35
  66. sknetwork/gnn/utils.py +8 -8
  67. sknetwork/hierarchy/base.py +29 -2
  68. sknetwork/hierarchy/louvain_hierarchy.py +45 -41
  69. sknetwork/hierarchy/paris.cp39-win_amd64.pyd +0 -0
  70. sknetwork/hierarchy/paris.cpp +27369 -22852
  71. sknetwork/hierarchy/paris.pyx +7 -9
  72. sknetwork/hierarchy/postprocess.py +16 -16
  73. sknetwork/hierarchy/tests/test_API.py +1 -1
  74. sknetwork/hierarchy/tests/test_algos.py +5 -0
  75. sknetwork/hierarchy/tests/test_metrics.py +1 -1
  76. sknetwork/linalg/__init__.py +1 -1
  77. sknetwork/linalg/diteration.cp39-win_amd64.pyd +0 -0
  78. sknetwork/linalg/diteration.cpp +13474 -9454
  79. sknetwork/linalg/diteration.pyx +0 -2
  80. sknetwork/linalg/eig_solver.py +1 -1
  81. sknetwork/linalg/{normalization.py → normalizer.py} +18 -15
  82. sknetwork/linalg/operators.py +1 -1
  83. sknetwork/linalg/ppr_solver.py +1 -1
  84. sknetwork/linalg/push.cp39-win_amd64.pyd +0 -0
  85. sknetwork/linalg/push.cpp +22993 -18807
  86. sknetwork/linalg/push.pyx +0 -2
  87. sknetwork/linalg/svd_solver.py +1 -1
  88. sknetwork/linalg/tests/test_normalization.py +3 -7
  89. sknetwork/linalg/tests/test_operators.py +4 -8
  90. sknetwork/linalg/tests/test_ppr.py +1 -1
  91. sknetwork/linkpred/base.py +13 -2
  92. sknetwork/linkpred/nn.py +6 -6
  93. sknetwork/log.py +19 -0
  94. sknetwork/path/__init__.py +4 -3
  95. sknetwork/path/dag.py +54 -0
  96. sknetwork/path/distances.py +98 -0
  97. sknetwork/path/search.py +13 -47
  98. sknetwork/path/shortest_path.py +37 -162
  99. sknetwork/path/tests/test_dag.py +37 -0
  100. sknetwork/path/tests/test_distances.py +62 -0
  101. sknetwork/path/tests/test_search.py +26 -11
  102. sknetwork/path/tests/test_shortest_path.py +31 -36
  103. sknetwork/ranking/__init__.py +0 -1
  104. sknetwork/ranking/base.py +13 -8
  105. sknetwork/ranking/betweenness.cp39-win_amd64.pyd +0 -0
  106. sknetwork/ranking/betweenness.cpp +5709 -3017
  107. sknetwork/ranking/betweenness.pyx +0 -2
  108. sknetwork/ranking/closeness.py +7 -10
  109. sknetwork/ranking/pagerank.py +14 -14
  110. sknetwork/ranking/postprocess.py +12 -3
  111. sknetwork/ranking/tests/test_API.py +2 -4
  112. sknetwork/ranking/tests/test_betweenness.py +3 -3
  113. sknetwork/ranking/tests/test_closeness.py +3 -7
  114. sknetwork/ranking/tests/test_pagerank.py +11 -5
  115. sknetwork/ranking/tests/test_postprocess.py +5 -0
  116. sknetwork/regression/base.py +19 -2
  117. sknetwork/regression/diffusion.py +24 -10
  118. sknetwork/regression/tests/test_diffusion.py +8 -0
  119. sknetwork/test_base.py +35 -0
  120. sknetwork/test_log.py +15 -0
  121. sknetwork/topology/__init__.py +7 -8
  122. sknetwork/topology/cliques.cp39-win_amd64.pyd +0 -0
  123. sknetwork/topology/{kcliques.cpp → cliques.cpp} +23412 -20276
  124. sknetwork/topology/cliques.pyx +149 -0
  125. sknetwork/topology/core.cp39-win_amd64.pyd +0 -0
  126. sknetwork/topology/{kcore.cpp → core.cpp} +21732 -18867
  127. sknetwork/topology/core.pyx +90 -0
  128. sknetwork/topology/cycles.py +243 -0
  129. sknetwork/topology/minheap.cp39-win_amd64.pyd +0 -0
  130. sknetwork/{utils → topology}/minheap.cpp +19452 -15368
  131. sknetwork/{utils → topology}/minheap.pxd +1 -3
  132. sknetwork/{utils → topology}/minheap.pyx +1 -3
  133. sknetwork/topology/structure.py +3 -43
  134. sknetwork/topology/tests/test_cliques.py +11 -11
  135. sknetwork/topology/tests/test_core.py +19 -0
  136. sknetwork/topology/tests/test_cycles.py +65 -0
  137. sknetwork/topology/tests/test_structure.py +2 -16
  138. sknetwork/topology/tests/test_triangles.py +11 -15
  139. sknetwork/topology/tests/test_wl.py +72 -0
  140. sknetwork/topology/triangles.cp39-win_amd64.pyd +0 -0
  141. sknetwork/topology/triangles.cpp +5056 -2696
  142. sknetwork/topology/triangles.pyx +74 -89
  143. sknetwork/topology/weisfeiler_lehman.py +56 -86
  144. sknetwork/topology/weisfeiler_lehman_core.cp39-win_amd64.pyd +0 -0
  145. sknetwork/topology/weisfeiler_lehman_core.cpp +14727 -10622
  146. sknetwork/topology/weisfeiler_lehman_core.pyx +0 -2
  147. sknetwork/utils/__init__.py +1 -31
  148. sknetwork/utils/check.py +2 -2
  149. sknetwork/utils/format.py +5 -3
  150. sknetwork/utils/membership.py +2 -2
  151. sknetwork/utils/tests/test_check.py +3 -3
  152. sknetwork/utils/tests/test_format.py +3 -1
  153. sknetwork/utils/values.py +1 -1
  154. sknetwork/visualization/__init__.py +2 -2
  155. sknetwork/visualization/dendrograms.py +55 -7
  156. sknetwork/visualization/graphs.py +292 -72
  157. sknetwork/visualization/tests/test_dendrograms.py +9 -9
  158. sknetwork/visualization/tests/test_graphs.py +71 -62
  159. scikit_network-0.30.0.dist-info/RECORD +0 -227
  160. sknetwork/embedding/louvain_hierarchy.py +0 -142
  161. sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
  162. sknetwork/path/metrics.py +0 -148
  163. sknetwork/path/tests/test_metrics.py +0 -29
  164. sknetwork/ranking/harmonic.py +0 -82
  165. sknetwork/topology/dag.py +0 -74
  166. sknetwork/topology/dag_core.cp39-win_amd64.pyd +0 -0
  167. sknetwork/topology/dag_core.cpp +0 -23350
  168. sknetwork/topology/dag_core.pyx +0 -38
  169. sknetwork/topology/kcliques.cp39-win_amd64.pyd +0 -0
  170. sknetwork/topology/kcliques.pyx +0 -193
  171. sknetwork/topology/kcore.cp39-win_amd64.pyd +0 -0
  172. sknetwork/topology/kcore.pyx +0 -120
  173. sknetwork/topology/tests/test_cores.py +0 -21
  174. sknetwork/topology/tests/test_dag.py +0 -26
  175. sknetwork/topology/tests/test_wl_coloring.py +0 -49
  176. sknetwork/topology/tests/test_wl_kernel.py +0 -31
  177. sknetwork/utils/base.py +0 -35
  178. sknetwork/utils/minheap.cp39-win_amd64.pyd +0 -0
  179. sknetwork/utils/simplex.py +0 -140
  180. sknetwork/utils/tests/test_base.py +0 -28
  181. sknetwork/utils/tests/test_bunch.py +0 -16
  182. sknetwork/utils/tests/test_projection_simplex.py +0 -33
  183. sknetwork/utils/tests/test_verbose.py +0 -15
  184. sknetwork/utils/verbose.py +0 -37
  185. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/LICENSE +0 -0
  186. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/top_level.txt +0 -0
  187. /sknetwork/{utils → data}/timeout.py +0 -0
sknetwork/data/load.py CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  # -*- coding: utf-8 -*-
3
3
  """
4
- Created on November 15, 2019
4
+ Created in November 2019
5
5
  @author: Quentin Lutz <qlutz@enst.fr>
6
6
  """
7
7
 
@@ -19,12 +19,15 @@ import numpy as np
19
19
  from scipy import sparse
20
20
 
21
21
  from sknetwork.data.parse import from_csv, load_labels, load_header, load_metadata
22
- from sknetwork.utils import Bunch
22
+ from sknetwork.data.base import Bunch
23
23
  from sknetwork.utils.check import is_square
24
- from sknetwork.utils.verbose import Log
24
+ from sknetwork.log import Log
25
25
 
26
26
  NETSET_URL = 'https://netset.telecom-paris.fr'
27
27
 
28
+ # former name of Dataset
29
+ Bunch = Bunch
30
+
28
31
 
29
32
  def is_within_directory(directory, target):
30
33
  """Utility function."""
@@ -132,7 +135,7 @@ def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]
132
135
  if not data_path.exists():
133
136
  name_npz = name + '_npz.tar.gz'
134
137
  try:
135
- logger.print('Downloading', name, 'from NetSet...')
138
+ logger.print_log('Downloading', name, 'from NetSet...')
136
139
  urlretrieve(folder_npz + name_npz, data_netset / name_npz)
137
140
  except HTTPError:
138
141
  raise ValueError('Invalid dataset: ' + name + '.'
@@ -141,11 +144,11 @@ def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]
141
144
  except ConnectionResetError: # pragma: no cover
142
145
  raise RuntimeError("Could not reach Netset.")
143
146
  with tarfile.open(data_netset / name_npz, 'r:gz') as tar_ref:
144
- logger.print('Unpacking archive...')
147
+ logger.print_log('Unpacking archive...')
145
148
  safe_extract(tar_ref, data_path)
146
149
 
147
150
  files = [file for file in listdir(data_path)]
148
- logger.print('Parsing files...')
151
+ logger.print_log('Parsing files...')
149
152
  for file in files:
150
153
  file_components = file.split('.')
151
154
  if len(file_components) == 2:
@@ -159,7 +162,7 @@ def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]
159
162
  dataset[file_name] = pickle.load(f)
160
163
 
161
164
  clean_data_home(data_netset)
162
- logger.print('Done.')
165
+ logger.print_log('Done.')
163
166
  return dataset
164
167
 
165
168
 
@@ -192,7 +195,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
192
195
 
193
196
  Notes
194
197
  -----
195
- An attribute `meta` of the `Bunch` class is used to store information about the dataset if present. In any case,
198
+ An attribute `meta` of the `Dataset` class is used to store information about the dataset if present. In any case,
196
199
  `meta` has the attribute `name` which, if not given, is equal to the name of the dataset as passed to this function.
197
200
 
198
201
  References
@@ -221,11 +224,11 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
221
224
  data_path = data_konect / name
222
225
  name_tar = name + '.tar.bz2'
223
226
  if not data_path.exists():
224
- logger.print('Downloading', name, 'from Konect...')
227
+ logger.print_log('Downloading', name, 'from Konect...')
225
228
  try:
226
229
  urlretrieve('http://konect.cc/files/download.tsv.' + name_tar, data_konect / name_tar)
227
230
  with tarfile.open(data_konect / name_tar, 'r:bz2') as tar_ref:
228
- logger.print('Unpacking archive...')
231
+ logger.print_log('Unpacking archive...')
229
232
  safe_extract(tar_ref, data_path)
230
233
  except (HTTPError, tarfile.ReadError):
231
234
  raise ValueError('Invalid dataset ' + name + '.'
@@ -234,7 +237,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
234
237
  except (URLError, ConnectionResetError): # pragma: no cover
235
238
  raise RuntimeError("Could not reach Konect.")
236
239
  elif exists(data_path / (name + '_bundle')):
237
- logger.print('Loading from local bundle...')
240
+ logger.print_log('Loading from local bundle...')
238
241
  return load_from_numpy_bundle(name + '_bundle', data_path)
239
242
 
240
243
  dataset = Bunch()
@@ -242,12 +245,12 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
242
245
  if not path.exists() or len(listdir(path)) == 0:
243
246
  raise Exception("No data downloaded.")
244
247
  files = [file for file in listdir(path) if name in file]
245
- logger.print('Parsing files...')
248
+ logger.print_log('Parsing files...')
246
249
  matrix = [file for file in files if 'out.' in file]
247
250
  if matrix:
248
251
  file = matrix[0]
249
252
  directed, bipartite, weighted = load_header(path / file)
250
- dataset = from_csv(path / file, directed=directed, bipartite=bipartite, weighted=weighted)
253
+ dataset = from_csv(path / file, directed=directed, bipartite=bipartite, weighted=weighted, reindex=True)
251
254
 
252
255
  metadata = [file for file in files if 'meta.' in file]
253
256
  if metadata:
@@ -278,7 +281,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
278
281
 
279
282
 
280
283
  def save_to_numpy_bundle(data: Bunch, bundle_name: str, data_home: Optional[Union[str, Path]] = None):
281
- """Save a Bunch in the specified data home to a collection of Numpy and Pickle files for faster subsequent loads.
284
+ """Save a dataset in the specified data home to a collection of Numpy and Pickle files for faster subsequent loads.
282
285
 
283
286
  Parameters
284
287
  ----------
@@ -297,15 +300,13 @@ def save_to_numpy_bundle(data: Bunch, bundle_name: str, data_home: Optional[Unio
297
300
  sparse.save_npz(data_path / attribute, data[attribute])
298
301
  elif type(data[attribute]) == np.ndarray:
299
302
  np.save(data_path / attribute, data[attribute])
300
- elif type(data[attribute]) == Bunch or type(data[attribute]) == str:
303
+ else:
301
304
  with open(data_path / (attribute + '.p'), 'wb') as file:
302
305
  pickle.dump(data[attribute], file)
303
- else:
304
- raise TypeError('Unsupported data attribute type '+str(type(data[attribute])) + '.')
305
306
 
306
307
 
307
308
  def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path]] = None):
308
- """Load a Bunch from a collection of Numpy and Pickle files (inverse function of ``save_to_numpy_bundle``).
309
+ """Load a dataset from a collection of Numpy and Pickle files (inverse function of ``save_to_numpy_bundle``).
309
310
 
310
311
  Parameters
311
312
  ----------
@@ -340,8 +341,8 @@ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path
340
341
 
341
342
 
342
343
  def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
343
- """Save a Bunch or a CSR matrix in the current directory to a collection of Numpy and Pickle files for faster
344
- subsequent loads. Supported attribute types include sparse matrices, NumPy arrays, strings and Bunch.
344
+ """Save a dataset or a CSR matrix in the current directory to a collection of Numpy and Pickle files for faster
345
+ subsequent loads. Supported attribute types include sparse matrices, NumPy arrays, strings and objects Dataset.
345
346
 
346
347
  Parameters
347
348
  ----------
@@ -353,11 +354,11 @@ def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
353
354
  Example
354
355
  -------
355
356
  >>> from sknetwork.data import save
356
- >>> my_dataset = Bunch()
357
- >>> my_dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
358
- >>> my_dataset.names = np.array(['a', 'b', 'c'])
359
- >>> save('my_dataset', my_dataset)
360
- >>> 'my_dataset' in listdir('.')
357
+ >>> dataset = Bunch()
358
+ >>> dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
359
+ >>> dataset.names = np.array(['a', 'b', 'c'])
360
+ >>> save('dataset', dataset)
361
+ >>> 'dataset' in listdir('.')
361
362
  True
362
363
  """
363
364
  folder = Path(folder)
@@ -365,12 +366,12 @@ def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
365
366
  if folder.exists():
366
367
  shutil.rmtree(folder)
367
368
  if isinstance(data, sparse.csr_matrix):
368
- bunch = Bunch()
369
+ dataset = Bunch()
369
370
  if is_square(data):
370
- bunch.adjacency = data
371
+ dataset.adjacency = data
371
372
  else:
372
- bunch.biadjacency = data
373
- data = bunch
373
+ dataset.biadjacency = data
374
+ data = dataset
374
375
  if folder.is_absolute():
375
376
  save_to_numpy_bundle(data, folder, '/')
376
377
  else:
@@ -378,7 +379,7 @@ def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
378
379
 
379
380
 
380
381
  def load(folder: Union[str, Path]):
381
- """Load a Bunch from a previously created bundle from the current directory (inverse function of ``save``).
382
+ """Load a dataset from a previously created bundle from the current directory (inverse function of ``save``).
382
383
 
383
384
  Parameters
384
385
  ----------
@@ -393,13 +394,13 @@ def load(folder: Union[str, Path]):
393
394
  Example
394
395
  -------
395
396
  >>> from sknetwork.data import save
396
- >>> my_dataset = Bunch()
397
- >>> my_dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
398
- >>> my_dataset.names = np.array(['a', 'b', 'c'])
399
- >>> save('my_dataset', my_dataset)
400
- >>> loaded_graph = load('my_dataset')
401
- >>> loaded_graph.names[0]
402
- 'a'
397
+ >>> dataset = Bunch()
398
+ >>> dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
399
+ >>> dataset.names = np.array(['a', 'b', 'c'])
400
+ >>> save('dataset', dataset)
401
+ >>> dataset = load('dataset')
402
+ >>> print(dataset.names)
403
+ ['a' 'b' 'c']
403
404
  """
404
405
  folder = Path(folder)
405
406
  if folder.is_absolute():
sknetwork/data/models.py CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  # -*- coding: utf-8 -*-
3
3
  """
4
- Created on Jul 1, 2019
4
+ Created in July 2019
5
5
  @author: Thomas Bonald <bonald@enst.fr>
6
6
  @author: Quentin Lutz <qlutz@enst.fr>
7
7
  @author: Nathan de Lara <nathan.delara@polytechnique.org>
@@ -12,8 +12,8 @@ from typing import Union, Optional, Iterable
12
12
  import numpy as np
13
13
  from scipy import sparse
14
14
 
15
+ from sknetwork.data.base import Bunch
15
16
  from sknetwork.data.parse import from_edge_list
16
- from sknetwork.utils import Bunch
17
17
  from sknetwork.utils.check import check_random_state
18
18
  from sknetwork.utils.format import directed2undirected
19
19
 
@@ -36,12 +36,12 @@ def block_model(sizes: Iterable, p_in: Union[float, list, np.ndarray] = .2, p_ou
36
36
  self_loops :
37
37
  If ``True``, allow self-loops.
38
38
  metadata :
39
- If ``True``, return a `Bunch` object with labels.
39
+ If ``True``, return a `Dataset` object with labels.
40
40
  seed :
41
41
  Seed of the random generator (optional).
42
42
  Returns
43
43
  -------
44
- adjacency or graph : Union[sparse.csr_matrix, Bunch]
44
+ adjacency or graph : Union[sparse.csr_matrix, Dataset]
45
45
  Adjacency matrix or graph with metadata (labels).
46
46
 
47
47
  Example
@@ -137,11 +137,11 @@ def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
137
137
  n : int
138
138
  Number of nodes.
139
139
  metadata : bool
140
- If ``True``, return a `Bunch` object with metadata.
140
+ If ``True``, return a `Dataset` object with metadata.
141
141
 
142
142
  Returns
143
143
  -------
144
- adjacency or graph : Union[sparse.csr_matrix, Bunch]
144
+ adjacency or graph : Union[sparse.csr_matrix, Dataset]
145
145
  Adjacency matrix or graph with metadata (positions).
146
146
 
147
147
  Example
@@ -174,11 +174,11 @@ def linear_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix,
174
174
  n : int
175
175
  Number of nodes.
176
176
  metadata : bool
177
- If ``True``, return a `Bunch` object with metadata.
177
+ If ``True``, return a `Dataset` object with metadata.
178
178
 
179
179
  Returns
180
180
  -------
181
- adjacency or graph : Union[sparse.csr_matrix, Bunch]
181
+ adjacency or graph : Union[sparse.csr_matrix, Dataset]
182
182
  Adjacency matrix or graph with metadata (positions).
183
183
 
184
184
  Example
@@ -226,11 +226,11 @@ def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
226
226
  n : int
227
227
  Number of nodes.
228
228
  metadata : bool
229
- If ``True``, return a `Bunch` object with metadata.
229
+ If ``True``, return a `Dataset` object with metadata.
230
230
 
231
231
  Returns
232
232
  -------
233
- adjacency or graph : Union[sparse.csr_matrix, Bunch]
233
+ adjacency or graph : Union[sparse.csr_matrix, Dataset]
234
234
  Adjacency matrix or graph with metadata (positions).
235
235
 
236
236
  Example
@@ -261,11 +261,11 @@ def cyclic_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix,
261
261
  n : int
262
262
  Number of nodes.
263
263
  metadata : bool
264
- If ``True``, return a `Bunch` object with metadata.
264
+ If ``True``, return a `Dataset` object with metadata.
265
265
 
266
266
  Returns
267
267
  -------
268
- adjacency or graph : Union[sparse.csr_matrix, Bunch]
268
+ adjacency or graph : Union[sparse.csr_matrix, Dataset]
269
269
  Adjacency matrix or graph with metadata (positions).
270
270
 
271
271
  Example
@@ -291,11 +291,11 @@ def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr
291
291
  n1, n2 : int
292
292
  Grid dimension.
293
293
  metadata : bool
294
- If ``True``, return a `Bunch` object with metadata.
294
+ If ``True``, return a `Dataset` object with metadata.
295
295
 
296
296
  Returns
297
297
  -------
298
- adjacency or graph : Union[sparse.csr_matrix, Bunch]
298
+ adjacency or graph : Union[sparse.csr_matrix, Dataset]
299
299
  Adjacency matrix or graph with metadata (positions).
300
300
 
301
301
  Example
@@ -328,11 +328,11 @@ def star(n_branches: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix
328
328
  n_branches : int
329
329
  Number of branches.
330
330
  metadata : bool
331
- If ``True``, return a `Bunch` object with metadata (positions).
331
+ If ``True``, return a `Dataset` object with metadata (positions).
332
332
 
333
333
  Returns
334
334
  -------
335
- adjacency or graph : Union[sparse.csr_matrix, Bunch]
335
+ adjacency or graph : Union[sparse.csr_matrix, Dataset]
336
336
  Adjacency matrix or graph with metadata (positions).
337
337
 
338
338
  Example
@@ -416,10 +416,10 @@ def watts_strogatz(n: int = 100, degree: int = 6, prob: float = 0.05, seed: Opti
416
416
  seed :
417
417
  Seed of the random generator (optional).
418
418
  metadata :
419
- If ``True``, return a `Bunch` object with metadata.
419
+ If ``True``, return a `Dataset` object with metadata.
420
420
  Returns
421
421
  -------
422
- adjacency or graph : Union[sparse.csr_matrix, Bunch]
422
+ adjacency or graph : Union[sparse.csr_matrix, Dataset]
423
423
  Adjacency matrix or graph with metadata (positions).
424
424
 
425
425
  Example
sknetwork/data/parse.py CHANGED
@@ -1,25 +1,25 @@
1
1
  #!/usr/bin/env python3
2
2
  # -*- coding: utf-8 -*-
3
3
  """
4
- Created on Dec 5, 2018
4
+ Created in December 2018
5
5
  @author: Quentin Lutz <qlutz@enst.fr>
6
- Nathan de Lara <nathan.delara@polytechnique.org>
7
- Thomas Bonald <bonald@enst.fr>
6
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
7
+ @author: Thomas Bonald <bonald@enst.fr>
8
8
  """
9
9
 
10
10
  from csv import reader
11
- from typing import Dict, List, Tuple, Union
11
+ from typing import Dict, List, Tuple, Union, Optional
12
12
  from xml.etree import ElementTree
13
13
 
14
14
  import numpy as np
15
15
  from scipy import sparse
16
16
 
17
- from sknetwork.utils import Bunch
17
+ from sknetwork.data.base import Bunch
18
18
  from sknetwork.utils.format import directed2undirected
19
19
 
20
20
 
21
21
  def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = False,
22
- bipartite: bool = False, weighted: bool = True, reindex: bool = True,
22
+ bipartite: bool = False, weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
23
23
  sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
24
24
  """Load a graph from an edge list.
25
25
 
@@ -37,16 +37,19 @@ def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = F
37
37
  reindex : bool
38
38
  If ``True``, reindex nodes and returns the original node indices as names.
39
39
  Reindexing is enforced if nodes are not integers.
40
+ shape : tuple
41
+ Shape of the adjacency or biadjacency matrix.
42
+ If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
40
43
  sum_duplicates : bool
41
44
  If ``True`` (default), sums weights of duplicate edges.
42
45
  Otherwise, the weight of each edge is that of the first occurrence of this edge.
43
46
  matrix_only : bool
44
47
  If ``True``, returns only the adjacency or biadjacency matrix.
45
- Otherwise, returns a ``Bunch`` object with graph attributes (e.g., node names).
48
+ Otherwise, returns a ``Dataset`` object with graph attributes (e.g., node names).
46
49
  If not specified (default), selects the most appropriate format.
47
50
  Returns
48
51
  -------
49
- graph : :class:`Bunch` (including node names) or sparse matrix
52
+ graph : :class:`Dataset` (including node names) or sparse matrix
50
53
 
51
54
  Examples
52
55
  --------
@@ -83,12 +86,14 @@ def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = F
83
86
  else:
84
87
  raise TypeError('The edge list must be given as a NumPy array or a list of tuples.')
85
88
  return from_edge_array(edge_array=edge_array, weights=weights, directed=directed, bipartite=bipartite,
86
- weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates, matrix_only=matrix_only)
89
+ weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
90
+ matrix_only=matrix_only)
87
91
 
88
92
 
89
93
  def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], directed: bool = False,
90
- bipartite: bool = False, weighted: bool = True, reindex: bool = True,
91
- sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
94
+ bipartite: bool = False, weighted: bool = True, reindex: bool = False,
95
+ shape: Optional[tuple] = None, sum_duplicates: bool = True, matrix_only: bool = None) \
96
+ -> Union[Bunch, sparse.csr_matrix]:
92
97
  """Load a graph from an adjacency list.
93
98
 
94
99
  Parameters
@@ -104,16 +109,19 @@ def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], dire
104
109
  reindex : bool
105
110
  If ``True``, reindex nodes and returns the original node indices as names.
106
111
  Reindexing is enforced if nodes are not integers.
112
+ shape : tuple
113
+ Shape of the adjacency or biadjacency matrix.
114
+ If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
107
115
  sum_duplicates : bool
108
116
  If ``True`` (default), sums weights of duplicate edges.
109
117
  Otherwise, the weight of each edge is that of the first occurrence of this edge.
110
118
  matrix_only : bool
111
119
  If ``True``, returns only the adjacency or biadjacency matrix.
112
- Otherwise, returns a ``Bunch`` object with graph attributes (e.g., node names).
120
+ Otherwise, returns a ``Dataset`` object with graph attributes (e.g., node names).
113
121
  If not specified (default), selects the most appropriate format.
114
122
  Returns
115
123
  -------
116
- graph : :class:`Bunch` or sparse matrix
124
+ graph : :class:`Dataset` or sparse matrix
117
125
 
118
126
  Example
119
127
  -------
@@ -134,12 +142,12 @@ def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], dire
134
142
  else:
135
143
  raise TypeError('The adjacency list must be given as a list of lists or a dict of lists.')
136
144
  return from_edge_list(edge_list=edge_list, directed=directed, bipartite=bipartite, weighted=weighted,
137
- reindex=reindex, sum_duplicates=sum_duplicates, matrix_only=matrix_only)
145
+ reindex=reindex, shape=shape, sum_duplicates=sum_duplicates, matrix_only=matrix_only)
138
146
 
139
147
 
140
148
  def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed: bool = False, bipartite: bool = False,
141
- weighted: bool = True, reindex: bool = True, sum_duplicates: bool = True,
142
- matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
149
+ weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
150
+ sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
143
151
  """Load a graph from an edge array of shape (n_edges, 2) and weights (optional).
144
152
 
145
153
  Parameters
@@ -157,17 +165,20 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
157
165
  reindex : bool
158
166
  If ``True``, reindex nodes and returns the original node indices as names.
159
167
  Reindexing is enforced if nodes are not integers.
168
+ shape : tuple
169
+ Shape of the adjacency or biadjacency matrix.
170
+ If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
160
171
  sum_duplicates : bool
161
172
  If ``True`` (default), sums weights of duplicate edges.
162
173
  Otherwise, the weight of each edge is that of the first occurrence of this edge.
163
174
  matrix_only : bool
164
175
  If ``True``, returns only the adjacency or biadjacency matrix.
165
- Otherwise, returns a ``Bunch`` object with graph attributes (e.g., node names).
176
+ Otherwise, returns a ``Dataset`` object with graph attributes (e.g., node names).
166
177
  If not specified (default), selects the most appropriate format.
167
178
 
168
179
  Returns
169
180
  -------
170
- graph : :class:`Bunch` or sparse matrix
181
+ graph : :class:`Dataset` or sparse matrix
171
182
  """
172
183
  try:
173
184
  edge_array = edge_array.astype(float)
@@ -195,28 +206,34 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
195
206
  if bipartite:
196
207
  row = edge_array[:, 0]
197
208
  col = edge_array[:, 1]
198
- if row.dtype != int or (reindex and len(set(row)) < max(row) + 1):
209
+ if row.dtype != int or reindex:
199
210
  names_row, row = np.unique(row, return_inverse=True)
200
211
  graph.names_row = names_row
201
212
  graph.names = names_row
202
213
  n_row = len(names_row)
214
+ elif shape is not None:
215
+ n_row = max(shape[0], max(row) + 1)
203
216
  else:
204
217
  n_row = max(row) + 1
205
- if col.dtype != int or (reindex and len(set(col)) < max(col) + 1):
218
+ if col.dtype != int or reindex:
206
219
  names_col, col = np.unique(col, return_inverse=True)
207
220
  graph.names_col = names_col
208
221
  n_col = len(names_col)
222
+ elif shape is not None:
223
+ n_col = max(shape[1], max(col) + 1)
209
224
  else:
210
225
  n_col = max(col) + 1
211
226
  matrix = sparse.csr_matrix((weights, (row, col)), shape=(n_row, n_col))
212
227
  graph.biadjacency = matrix
213
228
  else:
214
229
  nodes = edge_array.ravel()
215
- if nodes.dtype != int or (reindex and len(set(nodes)) < max(nodes) + 1):
230
+ if nodes.dtype != int or reindex:
216
231
  names, nodes = np.unique(nodes, return_inverse=True)
217
232
  graph.names = names
218
233
  n = len(names)
219
234
  edge_array = nodes.reshape(-1, 2)
235
+ elif shape is not None:
236
+ n = max(shape[0], max(nodes) + 1)
220
237
  else:
221
238
  n = max(nodes) + 1
222
239
  row = edge_array[:, 0]
@@ -233,8 +250,8 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
233
250
 
234
251
  def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: str = '#%',
235
252
  data_structure: str = None, directed: bool = False, bipartite: bool = False, weighted: bool = True,
236
- reindex: bool = True, sum_duplicates: bool = True, matrix_only: bool = None) \
237
- -> Union[Bunch, sparse.csr_matrix]:
253
+ reindex: bool = False, shape: Optional[tuple] = None, sum_duplicates: bool = True,
254
+ matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
238
255
  """Load a graph from a CSV or TSV file.
239
256
  The delimiter can be specified (e.g., ' ' for space-separated values).
240
257
 
@@ -249,9 +266,10 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
249
266
  comments : str
250
267
  Characters for comment lines.
251
268
  data_structure : str
252
- If 'edge_list', considers each row of the file as an edge (tuple of size 2 or 3).
253
- If 'adjacency_list', considers each row of the file as an adjacency list (list of neighbors).
254
- If 'adjacency_dict', considers each row of the file as an adjacency dictionary with key
269
+ If 'edge_list', consider each row of the file as an edge (tuple of size 2 or 3).
270
+ If 'adjacency_list', consider each row of the file as an adjacency list (list of neighbors,
271
+ in the order of node indices; an empty line means no neighbor).
272
+ If 'adjacency_dict', consider each row of the file as an adjacency dictionary with key
255
273
  given by the first column (node: list of neighbors).
256
274
  If ``None`` (default), data_structure is guessed from the first rows of the file.
257
275
  directed : bool
@@ -263,17 +281,20 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
263
281
  reindex : bool
264
282
  If ``True``, reindex nodes and returns the original node indices as names.
265
283
  Reindexing is enforced if nodes are not integers.
284
+ shape : tuple
285
+ Shape of the adjacency or biadjacency matrix.
286
+ If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
266
287
  sum_duplicates : bool
267
288
  If ``True`` (default), sums weights of duplicate edges.
268
289
  Otherwise, the weight of each edge is that of the first occurrence of this edge.
269
290
  matrix_only : bool
270
291
  If ``True``, returns only the adjacency or biadjacency matrix.
271
- Otherwise, returns a ``Bunch`` object with graph attributes (e.g., node names).
292
+ Otherwise, returns a ``Dataset`` object with graph attributes (e.g., node names).
272
293
  If not specified (default), selects the most appropriate format.
273
294
 
274
295
  Returns
275
296
  -------
276
- graph: :class:`Bunch` or sparse matrix
297
+ graph: :class:`Dataset` or sparse matrix
277
298
  """
278
299
  header_length, delimiter_guess, comment_guess, data_structure_guess = scan_header(file_path, delimiters=delimiter,
279
300
  comments=comments)
@@ -295,7 +316,7 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
295
316
  else:
296
317
  weights = None
297
318
  return from_edge_array(edge_array=edge_array, weights=weights, directed=directed, bipartite=bipartite,
298
- weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
319
+ weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
299
320
  matrix_only=matrix_only)
300
321
  except TypeError:
301
322
  pass
@@ -306,17 +327,17 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
306
327
  if data_structure == 'edge_list':
307
328
  edge_list = [tuple(row) for row in csv_reader]
308
329
  return from_edge_list(edge_list=edge_list, directed=directed, bipartite=bipartite,
309
- weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
330
+ weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
310
331
  matrix_only=matrix_only)
311
332
  elif data_structure == 'adjacency_list':
312
333
  adjacency_list = [row for row in csv_reader]
313
334
  return from_adjacency_list(adjacency_list=adjacency_list, directed=directed, bipartite=bipartite,
314
- weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
335
+ weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
315
336
  matrix_only=matrix_only)
316
337
  elif data_structure == 'adjacency_dict':
317
338
  adjacency_list = {row[0]: row[1:] for row in csv_reader}
318
339
  return from_adjacency_list(adjacency_list=adjacency_list, directed=directed, bipartite=bipartite,
319
- weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
340
+ weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
320
341
  matrix_only=matrix_only)
321
342
 
322
343
 
@@ -439,7 +460,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
439
460
  Returns
440
461
  -------
441
462
  data: :class:`Bunch`
442
- The dataset in a bunch with the adjacency as a CSR matrix.
463
+ The dataset in a Dataset with the adjacency as a CSR matrix.
443
464
  """
444
465
  # see http://graphml.graphdrawing.org/primer/graphml-primer.html
445
466
  # and http://graphml.graphdrawing.org/specification/dtd.html#top
@@ -40,7 +40,7 @@ def test_bigraph():
40
40
  return sparse.csr_matrix((data, (row, col)), shape=(6, 8))
41
41
 
42
42
 
43
- def test_graph_disconnect():
43
+ def test_disconnected_graph():
44
44
  """Simple disconnected undirected graph, used for testing.
45
45
  10 nodes, 10 edges.
46
46
  """
@@ -68,7 +68,7 @@ def test_graph_bool():
68
68
  return adjacency
69
69
 
70
70
 
71
- def test_graph_clique():
71
+ def test_clique():
72
72
  """Clique graph, used for testing (10 nodes, 45 edges).
73
73
  """
74
74
  n = 10
@@ -8,7 +8,7 @@ import warnings
8
8
 
9
9
  from sknetwork.data.load import *
10
10
  from sknetwork.data.toy_graphs import *
11
- from sknetwork.utils import Bunch
11
+ from sknetwork.data import Bunch
12
12
 
13
13
 
14
14
  class TestDataAPI(unittest.TestCase):
@@ -0,0 +1,14 @@
1
+ # -*- coding: utf-8 -*-
2
+ """tests for dataset"""
3
+
4
+ import unittest
5
+
6
+ from sknetwork.data.base import Bunch
7
+
8
+
9
+ class TestDataset(unittest.TestCase):
10
+
11
+ def test(self):
12
+ dataset = Bunch(name='dataset')
13
+ self.assertEqual(dataset.name, 'dataset')
14
+ self.assertEqual(dataset['name'], 'dataset')
@@ -10,7 +10,7 @@ import numpy as np
10
10
 
11
11
  from sknetwork.data.load import load_netset, load_konect, clear_data_home, save, load
12
12
  from sknetwork.data.toy_graphs import house, star_wars
13
- from sknetwork.utils.timeout import TimeOut
13
+ from sknetwork.data.timeout import TimeOut
14
14
 
15
15
 
16
16
  class TestLoader(unittest.TestCase):