scikit-network 0.28.3__cp39-cp39-macosx_12_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (240) hide show
  1. scikit_network-0.28.3.dist-info/AUTHORS.rst +41 -0
  2. scikit_network-0.28.3.dist-info/LICENSE +34 -0
  3. scikit_network-0.28.3.dist-info/METADATA +457 -0
  4. scikit_network-0.28.3.dist-info/RECORD +240 -0
  5. scikit_network-0.28.3.dist-info/WHEEL +5 -0
  6. scikit_network-0.28.3.dist-info/top_level.txt +1 -0
  7. sknetwork/__init__.py +21 -0
  8. sknetwork/classification/__init__.py +8 -0
  9. sknetwork/classification/base.py +84 -0
  10. sknetwork/classification/base_rank.py +143 -0
  11. sknetwork/classification/diffusion.py +134 -0
  12. sknetwork/classification/knn.py +162 -0
  13. sknetwork/classification/metrics.py +205 -0
  14. sknetwork/classification/pagerank.py +66 -0
  15. sknetwork/classification/propagation.py +152 -0
  16. sknetwork/classification/tests/__init__.py +1 -0
  17. sknetwork/classification/tests/test_API.py +35 -0
  18. sknetwork/classification/tests/test_diffusion.py +37 -0
  19. sknetwork/classification/tests/test_knn.py +24 -0
  20. sknetwork/classification/tests/test_metrics.py +53 -0
  21. sknetwork/classification/tests/test_pagerank.py +20 -0
  22. sknetwork/classification/tests/test_propagation.py +24 -0
  23. sknetwork/classification/vote.cpython-39-darwin.so +0 -0
  24. sknetwork/classification/vote.pyx +58 -0
  25. sknetwork/clustering/__init__.py +7 -0
  26. sknetwork/clustering/base.py +102 -0
  27. sknetwork/clustering/kmeans.py +142 -0
  28. sknetwork/clustering/louvain.py +255 -0
  29. sknetwork/clustering/louvain_core.cpython-39-darwin.so +0 -0
  30. sknetwork/clustering/louvain_core.pyx +134 -0
  31. sknetwork/clustering/metrics.py +91 -0
  32. sknetwork/clustering/postprocess.py +66 -0
  33. sknetwork/clustering/propagation_clustering.py +108 -0
  34. sknetwork/clustering/tests/__init__.py +1 -0
  35. sknetwork/clustering/tests/test_API.py +37 -0
  36. sknetwork/clustering/tests/test_kmeans.py +47 -0
  37. sknetwork/clustering/tests/test_louvain.py +104 -0
  38. sknetwork/clustering/tests/test_metrics.py +50 -0
  39. sknetwork/clustering/tests/test_post_processing.py +23 -0
  40. sknetwork/clustering/tests/test_postprocess.py +39 -0
  41. sknetwork/data/__init__.py +5 -0
  42. sknetwork/data/load.py +408 -0
  43. sknetwork/data/models.py +459 -0
  44. sknetwork/data/parse.py +621 -0
  45. sknetwork/data/test_graphs.py +84 -0
  46. sknetwork/data/tests/__init__.py +1 -0
  47. sknetwork/data/tests/test_API.py +30 -0
  48. sknetwork/data/tests/test_load.py +95 -0
  49. sknetwork/data/tests/test_models.py +52 -0
  50. sknetwork/data/tests/test_parse.py +253 -0
  51. sknetwork/data/tests/test_test_graphs.py +30 -0
  52. sknetwork/data/tests/test_toy_graphs.py +68 -0
  53. sknetwork/data/toy_graphs.py +619 -0
  54. sknetwork/embedding/__init__.py +10 -0
  55. sknetwork/embedding/base.py +90 -0
  56. sknetwork/embedding/force_atlas.py +197 -0
  57. sknetwork/embedding/louvain_embedding.py +174 -0
  58. sknetwork/embedding/louvain_hierarchy.py +142 -0
  59. sknetwork/embedding/metrics.py +66 -0
  60. sknetwork/embedding/random_projection.py +133 -0
  61. sknetwork/embedding/spectral.py +214 -0
  62. sknetwork/embedding/spring.py +198 -0
  63. sknetwork/embedding/svd.py +363 -0
  64. sknetwork/embedding/tests/__init__.py +1 -0
  65. sknetwork/embedding/tests/test_API.py +73 -0
  66. sknetwork/embedding/tests/test_force_atlas.py +35 -0
  67. sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
  68. sknetwork/embedding/tests/test_louvain_hierarchy.py +19 -0
  69. sknetwork/embedding/tests/test_metrics.py +29 -0
  70. sknetwork/embedding/tests/test_random_projection.py +28 -0
  71. sknetwork/embedding/tests/test_spectral.py +84 -0
  72. sknetwork/embedding/tests/test_spring.py +50 -0
  73. sknetwork/embedding/tests/test_svd.py +37 -0
  74. sknetwork/flow/__init__.py +3 -0
  75. sknetwork/flow/flow.py +73 -0
  76. sknetwork/flow/tests/__init__.py +1 -0
  77. sknetwork/flow/tests/test_flow.py +17 -0
  78. sknetwork/flow/tests/test_utils.py +69 -0
  79. sknetwork/flow/utils.py +91 -0
  80. sknetwork/gnn/__init__.py +10 -0
  81. sknetwork/gnn/activation.py +117 -0
  82. sknetwork/gnn/base.py +155 -0
  83. sknetwork/gnn/base_activation.py +89 -0
  84. sknetwork/gnn/base_layer.py +109 -0
  85. sknetwork/gnn/gnn_classifier.py +381 -0
  86. sknetwork/gnn/layer.py +153 -0
  87. sknetwork/gnn/layers.py +127 -0
  88. sknetwork/gnn/loss.py +180 -0
  89. sknetwork/gnn/neighbor_sampler.py +65 -0
  90. sknetwork/gnn/optimizer.py +163 -0
  91. sknetwork/gnn/tests/__init__.py +1 -0
  92. sknetwork/gnn/tests/test_activation.py +56 -0
  93. sknetwork/gnn/tests/test_base.py +79 -0
  94. sknetwork/gnn/tests/test_base_layer.py +37 -0
  95. sknetwork/gnn/tests/test_gnn_classifier.py +192 -0
  96. sknetwork/gnn/tests/test_layers.py +80 -0
  97. sknetwork/gnn/tests/test_loss.py +33 -0
  98. sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
  99. sknetwork/gnn/tests/test_optimizer.py +43 -0
  100. sknetwork/gnn/tests/test_utils.py +93 -0
  101. sknetwork/gnn/utils.py +219 -0
  102. sknetwork/hierarchy/__init__.py +7 -0
  103. sknetwork/hierarchy/base.py +69 -0
  104. sknetwork/hierarchy/louvain_hierarchy.py +264 -0
  105. sknetwork/hierarchy/metrics.py +234 -0
  106. sknetwork/hierarchy/paris.cpython-39-darwin.so +0 -0
  107. sknetwork/hierarchy/paris.pyx +317 -0
  108. sknetwork/hierarchy/postprocess.py +350 -0
  109. sknetwork/hierarchy/tests/__init__.py +1 -0
  110. sknetwork/hierarchy/tests/test_API.py +25 -0
  111. sknetwork/hierarchy/tests/test_algos.py +29 -0
  112. sknetwork/hierarchy/tests/test_metrics.py +62 -0
  113. sknetwork/hierarchy/tests/test_postprocess.py +57 -0
  114. sknetwork/hierarchy/tests/test_ward.py +25 -0
  115. sknetwork/hierarchy/ward.py +94 -0
  116. sknetwork/linalg/__init__.py +9 -0
  117. sknetwork/linalg/basics.py +37 -0
  118. sknetwork/linalg/diteration.cpython-39-darwin.so +0 -0
  119. sknetwork/linalg/diteration.pyx +49 -0
  120. sknetwork/linalg/eig_solver.py +93 -0
  121. sknetwork/linalg/laplacian.py +15 -0
  122. sknetwork/linalg/normalization.py +66 -0
  123. sknetwork/linalg/operators.py +225 -0
  124. sknetwork/linalg/polynome.py +76 -0
  125. sknetwork/linalg/ppr_solver.py +170 -0
  126. sknetwork/linalg/push.cpython-39-darwin.so +0 -0
  127. sknetwork/linalg/push.pyx +73 -0
  128. sknetwork/linalg/sparse_lowrank.py +142 -0
  129. sknetwork/linalg/svd_solver.py +91 -0
  130. sknetwork/linalg/tests/__init__.py +1 -0
  131. sknetwork/linalg/tests/test_eig.py +44 -0
  132. sknetwork/linalg/tests/test_laplacian.py +18 -0
  133. sknetwork/linalg/tests/test_normalization.py +38 -0
  134. sknetwork/linalg/tests/test_operators.py +70 -0
  135. sknetwork/linalg/tests/test_polynome.py +38 -0
  136. sknetwork/linalg/tests/test_ppr.py +50 -0
  137. sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
  138. sknetwork/linalg/tests/test_svd.py +38 -0
  139. sknetwork/linkpred/__init__.py +4 -0
  140. sknetwork/linkpred/base.py +80 -0
  141. sknetwork/linkpred/first_order.py +508 -0
  142. sknetwork/linkpred/first_order_core.cpython-39-darwin.so +0 -0
  143. sknetwork/linkpred/first_order_core.pyx +315 -0
  144. sknetwork/linkpred/postprocessing.py +98 -0
  145. sknetwork/linkpred/tests/__init__.py +1 -0
  146. sknetwork/linkpred/tests/test_API.py +49 -0
  147. sknetwork/linkpred/tests/test_postprocessing.py +21 -0
  148. sknetwork/path/__init__.py +4 -0
  149. sknetwork/path/metrics.py +148 -0
  150. sknetwork/path/search.py +65 -0
  151. sknetwork/path/shortest_path.py +186 -0
  152. sknetwork/path/tests/__init__.py +1 -0
  153. sknetwork/path/tests/test_metrics.py +29 -0
  154. sknetwork/path/tests/test_search.py +25 -0
  155. sknetwork/path/tests/test_shortest_path.py +45 -0
  156. sknetwork/ranking/__init__.py +9 -0
  157. sknetwork/ranking/base.py +56 -0
  158. sknetwork/ranking/betweenness.cpython-39-darwin.so +0 -0
  159. sknetwork/ranking/betweenness.pyx +99 -0
  160. sknetwork/ranking/closeness.py +95 -0
  161. sknetwork/ranking/harmonic.py +82 -0
  162. sknetwork/ranking/hits.py +94 -0
  163. sknetwork/ranking/katz.py +81 -0
  164. sknetwork/ranking/pagerank.py +107 -0
  165. sknetwork/ranking/postprocess.py +25 -0
  166. sknetwork/ranking/tests/__init__.py +1 -0
  167. sknetwork/ranking/tests/test_API.py +34 -0
  168. sknetwork/ranking/tests/test_betweenness.py +38 -0
  169. sknetwork/ranking/tests/test_closeness.py +34 -0
  170. sknetwork/ranking/tests/test_hits.py +20 -0
  171. sknetwork/ranking/tests/test_pagerank.py +69 -0
  172. sknetwork/regression/__init__.py +4 -0
  173. sknetwork/regression/base.py +56 -0
  174. sknetwork/regression/diffusion.py +190 -0
  175. sknetwork/regression/tests/__init__.py +1 -0
  176. sknetwork/regression/tests/test_API.py +34 -0
  177. sknetwork/regression/tests/test_diffusion.py +48 -0
  178. sknetwork/sknetwork.py +3 -0
  179. sknetwork/topology/__init__.py +9 -0
  180. sknetwork/topology/dag.py +74 -0
  181. sknetwork/topology/dag_core.cpython-39-darwin.so +0 -0
  182. sknetwork/topology/dag_core.pyx +38 -0
  183. sknetwork/topology/kcliques.cpython-39-darwin.so +0 -0
  184. sknetwork/topology/kcliques.pyx +193 -0
  185. sknetwork/topology/kcore.cpython-39-darwin.so +0 -0
  186. sknetwork/topology/kcore.pyx +120 -0
  187. sknetwork/topology/structure.py +234 -0
  188. sknetwork/topology/tests/__init__.py +1 -0
  189. sknetwork/topology/tests/test_cliques.py +28 -0
  190. sknetwork/topology/tests/test_cores.py +21 -0
  191. sknetwork/topology/tests/test_dag.py +26 -0
  192. sknetwork/topology/tests/test_structure.py +99 -0
  193. sknetwork/topology/tests/test_triangles.py +42 -0
  194. sknetwork/topology/tests/test_wl_coloring.py +49 -0
  195. sknetwork/topology/tests/test_wl_kernel.py +31 -0
  196. sknetwork/topology/triangles.cpython-39-darwin.so +0 -0
  197. sknetwork/topology/triangles.pyx +166 -0
  198. sknetwork/topology/weisfeiler_lehman.py +163 -0
  199. sknetwork/topology/weisfeiler_lehman_core.cpython-39-darwin.so +0 -0
  200. sknetwork/topology/weisfeiler_lehman_core.pyx +116 -0
  201. sknetwork/utils/__init__.py +40 -0
  202. sknetwork/utils/base.py +35 -0
  203. sknetwork/utils/check.py +354 -0
  204. sknetwork/utils/co_neighbor.py +71 -0
  205. sknetwork/utils/format.py +219 -0
  206. sknetwork/utils/kmeans.py +89 -0
  207. sknetwork/utils/knn.py +166 -0
  208. sknetwork/utils/knn1d.cpython-39-darwin.so +0 -0
  209. sknetwork/utils/knn1d.pyx +80 -0
  210. sknetwork/utils/membership.py +82 -0
  211. sknetwork/utils/minheap.cpython-39-darwin.so +0 -0
  212. sknetwork/utils/minheap.pxd +22 -0
  213. sknetwork/utils/minheap.pyx +111 -0
  214. sknetwork/utils/neighbors.py +115 -0
  215. sknetwork/utils/seeds.py +75 -0
  216. sknetwork/utils/simplex.py +140 -0
  217. sknetwork/utils/tests/__init__.py +1 -0
  218. sknetwork/utils/tests/test_base.py +28 -0
  219. sknetwork/utils/tests/test_bunch.py +16 -0
  220. sknetwork/utils/tests/test_check.py +190 -0
  221. sknetwork/utils/tests/test_co_neighbor.py +43 -0
  222. sknetwork/utils/tests/test_format.py +61 -0
  223. sknetwork/utils/tests/test_kmeans.py +21 -0
  224. sknetwork/utils/tests/test_knn.py +32 -0
  225. sknetwork/utils/tests/test_membership.py +24 -0
  226. sknetwork/utils/tests/test_neighbors.py +41 -0
  227. sknetwork/utils/tests/test_projection_simplex.py +33 -0
  228. sknetwork/utils/tests/test_seeds.py +67 -0
  229. sknetwork/utils/tests/test_verbose.py +15 -0
  230. sknetwork/utils/tests/test_ward.py +20 -0
  231. sknetwork/utils/timeout.py +38 -0
  232. sknetwork/utils/verbose.py +37 -0
  233. sknetwork/utils/ward.py +60 -0
  234. sknetwork/visualization/__init__.py +4 -0
  235. sknetwork/visualization/colors.py +34 -0
  236. sknetwork/visualization/dendrograms.py +229 -0
  237. sknetwork/visualization/graphs.py +819 -0
  238. sknetwork/visualization/tests/__init__.py +1 -0
  239. sknetwork/visualization/tests/test_dendrograms.py +53 -0
  240. sknetwork/visualization/tests/test_graphs.py +167 -0
@@ -0,0 +1,39 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for clustering post-processing"""
4
+ import unittest
5
+
6
+ import numpy as np
7
+
8
+ from sknetwork.data import house, star_wars
9
+ from sknetwork.clustering.postprocess import reindex_labels, aggregate_graph
10
+
11
+
12
+ class TestClusteringPostProcessing(unittest.TestCase):
13
+
14
+ def test_reindex_clusters(self):
15
+ truth = np.array([1, 1, 2, 0, 0, 0])
16
+
17
+ labels = np.array([0, 0, 1, 2, 2, 2])
18
+ output = reindex_labels(labels)
19
+ self.assertTrue(np.array_equal(truth, output))
20
+
21
+ labels = np.array([0, 0, 5, 2, 2, 2])
22
+ output = reindex_labels(labels)
23
+ self.assertTrue(np.array_equal(truth, output))
24
+
25
+ def test_aggregate_graph(self):
26
+ adjacency = house()
27
+ labels = np.array([0, 0, 1, 1, 2])
28
+ aggregate = aggregate_graph(adjacency, labels)
29
+ self.assertEqual(aggregate.shape, (3, 3))
30
+
31
+ biadjacency = star_wars()
32
+ labels = np.array([0, 0, 1, 2])
33
+ labels_row = np.array([0, 1, 3, -1])
34
+ labels_col = np.array([0, 0, 1])
35
+ aggregate = aggregate_graph(biadjacency, labels=labels, labels_col=labels_col)
36
+ self.assertEqual(aggregate.shape, (3, 2))
37
+ self.assertEqual(aggregate.shape, (3, 2))
38
+ aggregate = aggregate_graph(biadjacency, labels_row=labels_row, labels_col=labels_col)
39
+ self.assertEqual(aggregate.shape, (4, 2))
@@ -0,0 +1,5 @@
1
+ """data module"""
2
+ from sknetwork.data.load import load_netset, load_konect, clear_data_home, clean_data_home, get_data_home, save, load
3
+ from sknetwork.data.models import *
4
+ from sknetwork.data.parse import from_edge_list, from_adjacency_list, from_csv, from_graphml
5
+ from sknetwork.data.toy_graphs import *
sknetwork/data/load.py ADDED
@@ -0,0 +1,408 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on November 15, 2019
5
+ @author: Quentin Lutz <qlutz@enst.fr>
6
+ """
7
+
8
+ import pickle
9
+ import shutil
10
+ import tarfile
11
+ from os import environ, makedirs, remove, listdir
12
+ from os.path import abspath, commonprefix, exists, expanduser, isfile, join
13
+ from pathlib import Path
14
+ from typing import Optional, Union
15
+ from urllib.error import HTTPError, URLError
16
+ from urllib.request import urlretrieve
17
+
18
+ import numpy as np
19
+ from scipy import sparse
20
+
21
+ from sknetwork.data.parse import from_csv, load_labels, load_header, load_metadata
22
+ from sknetwork.utils import Bunch
23
+ from sknetwork.utils.check import is_square
24
+ from sknetwork.utils.verbose import Log
25
+
26
+ NETSET_URL = 'https://netset.telecom-paris.fr'
27
+
28
+
29
+ def is_within_directory(directory, target):
30
+ """Utility function."""
31
+ abs_directory = abspath(directory)
32
+ abs_target = abspath(target)
33
+ prefix = commonprefix([abs_directory, abs_target])
34
+ return prefix == abs_directory
35
+
36
+
37
+ def safe_extract(tar, path=".", members=None, *, numeric_owner=False):
38
+ """Safe extraction."""
39
+ for member in tar.getmembers():
40
+ member_path = join(path, member.name)
41
+ if not is_within_directory(path, member_path):
42
+ raise Exception("Attempted path traversal in tar file.")
43
+ tar.extractall(path, members, numeric_owner=numeric_owner)
44
+
45
+
46
+ def get_data_home(data_home: Optional[Union[str, Path]] = None) -> Path:
47
+ """Return a path to a storage folder depending on the dedicated environment variable and user input.
48
+
49
+ Parameters
50
+ ----------
51
+ data_home: str
52
+ The folder to be used for dataset storage
53
+ """
54
+ if data_home is None:
55
+ data_home = environ.get('SCIKIT_NETWORK_DATA', join('~', 'scikit_network_data'))
56
+ data_home = expanduser(data_home)
57
+ if not exists(data_home):
58
+ makedirs(data_home)
59
+ return Path(data_home)
60
+
61
+
62
+ def clear_data_home(data_home: Optional[Union[str, Path]] = None):
63
+ """Clear storage folder.
64
+
65
+ Parameters
66
+ ----------
67
+ data_home: str or :class:`pathlib.Path`
68
+ The folder to be used for dataset storage.
69
+ """
70
+ data_home = get_data_home(data_home)
71
+ shutil.rmtree(data_home)
72
+
73
+
74
+ def clean_data_home(data_home: Optional[Union[str, Path]] = None):
75
+ """Clean storage folder so that it contains folders only.
76
+
77
+ Parameters
78
+ ----------
79
+ data_home: str or :class:`pathlib.Path`
80
+ The folder to be used for dataset storage
81
+ """
82
+ data_home = get_data_home(data_home)
83
+ for file in listdir(data_home):
84
+ if isfile(join(data_home, file)):
85
+ remove(join(data_home, file))
86
+
87
+
88
+ def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]] = None,
89
+ verbose: bool = True) -> Optional[Bunch]:
90
+ """Load a dataset from the `NetSet database
91
+ <https://netset.telecom-paris.fr/>`_.
92
+
93
+ Parameters
94
+ ----------
95
+ name : str
96
+ Name of the dataset (all low-case). Examples include 'openflights', 'cinema' and 'wikivitals'.
97
+ data_home : str or :class:`pathlib.Path`
98
+ Folder to be used for dataset storage.
99
+ This folder must be empty or contain other folders (datasets); files will be removed.
100
+ verbose : bool
101
+ Enable verbosity.
102
+
103
+ Returns
104
+ -------
105
+ dataset : :class:`Bunch`
106
+ Returned dataset.
107
+ """
108
+ dataset = Bunch()
109
+ dataset_folder = NETSET_URL + '/datasets/'
110
+ folder_npz = NETSET_URL + '/datasets_npz/'
111
+
112
+ logger = Log(verbose)
113
+
114
+ if name is None:
115
+ print("Please specify the dataset (e.g., 'wikivitals').\n" +
116
+ f"Complete list available here: <{dataset_folder}>.")
117
+ return None
118
+ else:
119
+ name = name.lower()
120
+ data_home = get_data_home(data_home)
121
+ data_netset = data_home / 'netset'
122
+ if not data_netset.exists():
123
+ clean_data_home(data_home)
124
+ makedirs(data_netset)
125
+
126
+ # remove previous dataset if not in the netset folder
127
+ direct_path = data_home / name
128
+ if direct_path.exists():
129
+ shutil.rmtree(direct_path)
130
+
131
+ data_path = data_netset / name
132
+ if not data_path.exists():
133
+ name_npz = name + '_npz.tar.gz'
134
+ try:
135
+ logger.print('Downloading', name, 'from NetSet...')
136
+ urlretrieve(folder_npz + name_npz, data_netset / name_npz)
137
+ except HTTPError:
138
+ raise ValueError('Invalid dataset: ' + name + '.'
139
+ + "\nAvailable datasets include 'openflights' and 'wikivitals'."
140
+ + f"\nSee <{NETSET_URL}>")
141
+ except ConnectionResetError: # pragma: no cover
142
+ raise RuntimeError("Could not reach Netset.")
143
+ with tarfile.open(data_netset / name_npz, 'r:gz') as tar_ref:
144
+ logger.print('Unpacking archive...')
145
+ safe_extract(tar_ref, data_path)
146
+
147
+ files = [file for file in listdir(data_path)]
148
+ logger.print('Parsing files...')
149
+ for file in files:
150
+ file_components = file.split('.')
151
+ if len(file_components) == 2:
152
+ file_name, file_extension = tuple(file_components)
153
+ if file_extension == 'npz':
154
+ dataset[file_name] = sparse.load_npz(data_path / file)
155
+ elif file_extension == 'npy':
156
+ dataset[file_name] = np.load(data_path / file, allow_pickle=True)
157
+ elif file_extension == 'p':
158
+ with open(data_path / file, 'rb') as f:
159
+ dataset[file_name] = pickle.load(f)
160
+
161
+ clean_data_home(data_netset)
162
+ logger.print('Done.')
163
+ return dataset
164
+
165
+
166
+ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_numpy_bundle: bool = True,
167
+ verbose: bool = True) -> Bunch:
168
+ """Load a dataset from the `Konect database
169
+ <http://konect.cc/networks/>`_.
170
+
171
+ Parameters
172
+ ----------
173
+ name : str
174
+ Name of the dataset as specified on the Konect website (e.g. for the Zachary Karate club dataset,
175
+ the corresponding name is ``'ucidata-zachary'``).
176
+ data_home : str or :class:`pathlib.Path`
177
+ Folder to be used for dataset storage.
178
+ auto_numpy_bundle : bool
179
+ Whether the dataset should be stored in its default format (False) or using Numpy files for faster
180
+ subsequent access to the dataset (True).
181
+ verbose : bool
182
+ Enable verbosity.
183
+
184
+ Returns
185
+ -------
186
+ dataset : :class:`Bunch`
187
+ Object with the following attributes:
188
+
189
+ * `adjacency` or `biadjacency`: the adjacency/biadjacency matrix for the dataset
190
+ * `meta`: a dictionary containing the metadata as specified by Konect
191
+ * each attribute specified by Konect (ent.* file)
192
+
193
+ Notes
194
+ -----
195
+ An attribute `meta` of the `Bunch` class is used to store information about the dataset if present. In any case,
196
+ `meta` has the attribute `name` which, if not given, is equal to the name of the dataset as passed to this function.
197
+
198
+ References
199
+ ----------
200
+ Kunegis, J. (2013, May).
201
+ `Konect: the Koblenz network collection.
202
+ <https://dl.acm.org/doi/abs/10.1145/2487788.2488173>`_
203
+ In Proceedings of the 22nd International Conference on World Wide Web (pp. 1343-1350).
204
+ """
205
+ logger = Log(verbose)
206
+ if name == '':
207
+ raise ValueError("Please specify the dataset. "
208
+ + "\nExamples include 'actor-movie' and 'ego-facebook'."
209
+ + "\n See 'http://konect.cc/networks/' for the full list.")
210
+ data_home = get_data_home(data_home)
211
+ data_konect = data_home / 'konect'
212
+ if not data_konect.exists():
213
+ clean_data_home(data_home)
214
+ makedirs(data_konect)
215
+
216
+ # remove previous dataset if not in the konect folder
217
+ direct_path = data_home / name
218
+ if direct_path.exists():
219
+ shutil.rmtree(direct_path)
220
+
221
+ data_path = data_konect / name
222
+ name_tar = name + '.tar.bz2'
223
+ if not data_path.exists():
224
+ logger.print('Downloading', name, 'from Konect...')
225
+ try:
226
+ urlretrieve('http://konect.cc/files/download.tsv.' + name_tar, data_konect / name_tar)
227
+ with tarfile.open(data_konect / name_tar, 'r:bz2') as tar_ref:
228
+ logger.print('Unpacking archive...')
229
+ safe_extract(tar_ref, data_path)
230
+ except (HTTPError, tarfile.ReadError):
231
+ raise ValueError('Invalid dataset ' + name + '.'
232
+ + "\nExamples include 'actor-movie' and 'ego-facebook'."
233
+ + "\n See 'http://konect.cc/networks/' for the full list.")
234
+ except (URLError, ConnectionResetError): # pragma: no cover
235
+ raise RuntimeError("Could not reach Konect.")
236
+ elif exists(data_path / (name + '_bundle')):
237
+ logger.print('Loading from local bundle...')
238
+ return load_from_numpy_bundle(name + '_bundle', data_path)
239
+
240
+ dataset = Bunch()
241
+ path = data_konect / name / name
242
+ if not path.exists() or len(listdir(path)) == 0:
243
+ raise Exception("No data downloaded.")
244
+ files = [file for file in listdir(path) if name in file]
245
+ logger.print('Parsing files...')
246
+ matrix = [file for file in files if 'out.' in file]
247
+ if matrix:
248
+ file = matrix[0]
249
+ directed, bipartite, weighted = load_header(path / file)
250
+ dataset = from_csv(path / file, directed=directed, bipartite=bipartite, weighted=weighted)
251
+
252
+ metadata = [file for file in files if 'meta.' in file]
253
+ if metadata:
254
+ file = metadata[0]
255
+ dataset.meta = load_metadata(path / file)
256
+
257
+ attributes = [file for file in files if 'ent.' + name in file]
258
+ if attributes:
259
+ for file in attributes:
260
+ attribute_name = file.split('.')[-1]
261
+ dataset[attribute_name] = load_labels(path / file)
262
+
263
+ if hasattr(dataset, 'meta'):
264
+ if hasattr(dataset.meta, 'name'):
265
+ pass
266
+ else:
267
+ dataset.meta.name = name
268
+ else:
269
+ dataset.meta = Bunch()
270
+ dataset.meta.name = name
271
+
272
+ if auto_numpy_bundle:
273
+ save_to_numpy_bundle(dataset, name + '_bundle', data_path)
274
+
275
+ clean_data_home(data_konect)
276
+
277
+ return dataset
278
+
279
+
280
+ def save_to_numpy_bundle(data: Bunch, bundle_name: str, data_home: Optional[Union[str, Path]] = None):
281
+ """Save a Bunch in the specified data home to a collection of Numpy and Pickle files for faster subsequent loads.
282
+
283
+ Parameters
284
+ ----------
285
+ data: Bunch
286
+ Data to save.
287
+ bundle_name: str
288
+ Name to be used for the bundle folder.
289
+ data_home: str or :class:`pathlib.Path`
290
+ Folder to be used for dataset storage.
291
+ """
292
+ data_home = get_data_home(data_home)
293
+ data_path = data_home / bundle_name
294
+ makedirs(data_path, exist_ok=True)
295
+ for attribute in data:
296
+ if type(data[attribute]) == sparse.csr_matrix:
297
+ sparse.save_npz(data_path / attribute, data[attribute])
298
+ elif type(data[attribute]) == np.ndarray:
299
+ np.save(data_path / attribute, data[attribute])
300
+ elif type(data[attribute]) == Bunch or type(data[attribute]) == str:
301
+ with open(data_path / (attribute + '.p'), 'wb') as file:
302
+ pickle.dump(data[attribute], file)
303
+ else:
304
+ raise TypeError('Unsupported data attribute type '+str(type(data[attribute])) + '.')
305
+
306
+
307
+ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path]] = None):
308
+ """Load a Bunch from a collection of Numpy and Pickle files (inverse function of ``save_to_numpy_bundle``).
309
+
310
+ Parameters
311
+ ----------
312
+ bundle_name: str
313
+ Name of the bundle folder.
314
+ data_home: str or :class:`pathlib.Path`
315
+ Folder used for dataset storage.
316
+
317
+ Returns
318
+ -------
319
+ data: Bunch
320
+ Data.
321
+ """
322
+ data_home = get_data_home(data_home)
323
+ data_path = data_home / bundle_name
324
+ if not data_path.exists():
325
+ raise FileNotFoundError('No bundle at ' + str(data_path))
326
+ else:
327
+ files = listdir(data_path)
328
+ data = Bunch()
329
+ for file in files:
330
+ if len(file.split('.')) == 2:
331
+ file_name, file_extension = file.split('.')
332
+ if file_extension == 'npz':
333
+ data[file_name] = sparse.load_npz(data_path / file)
334
+ elif file_extension == 'npy':
335
+ data[file_name] = np.load(data_path / file, allow_pickle=True)
336
+ elif file_extension == 'p':
337
+ with open(data_path / file, 'rb') as f:
338
+ data[file_name] = pickle.load(f)
339
+ return data
340
+
341
+
342
+ def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
343
+ """Save a Bunch or a CSR matrix in the current directory to a collection of Numpy and Pickle files for faster
344
+ subsequent loads. Supported attribute types include sparse matrices, NumPy arrays, strings and Bunch.
345
+
346
+ Parameters
347
+ ----------
348
+ folder : str or :class:`pathlib.Path`
349
+ Name of the bundle folder.
350
+ data : Union[sparse.csr_matrix, Bunch]
351
+ Data to save.
352
+
353
+ Example
354
+ -------
355
+ >>> from sknetwork.data import save
356
+ >>> my_dataset = Bunch()
357
+ >>> my_dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
358
+ >>> my_dataset.names = np.array(['a', 'b', 'c'])
359
+ >>> save('my_dataset', my_dataset)
360
+ >>> 'my_dataset' in listdir('.')
361
+ True
362
+ """
363
+ folder = Path(folder)
364
+ folder = folder.expanduser()
365
+ if folder.exists():
366
+ shutil.rmtree(folder)
367
+ if isinstance(data, sparse.csr_matrix):
368
+ bunch = Bunch()
369
+ if is_square(data):
370
+ bunch.adjacency = data
371
+ else:
372
+ bunch.biadjacency = data
373
+ data = bunch
374
+ if folder.is_absolute():
375
+ save_to_numpy_bundle(data, folder, '/')
376
+ else:
377
+ save_to_numpy_bundle(data, folder, '.')
378
+
379
+
380
+ def load(folder: Union[str, Path]):
381
+ """Load a Bunch from a previously created bundle from the current directory (inverse function of ``save``).
382
+
383
+ Parameters
384
+ ----------
385
+ folder: str
386
+ Name of the bundle folder.
387
+
388
+ Returns
389
+ -------
390
+ data: Bunch
391
+ Data.
392
+
393
+ Example
394
+ -------
395
+ >>> from sknetwork.data import save
396
+ >>> my_dataset = Bunch()
397
+ >>> my_dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
398
+ >>> my_dataset.names = np.array(['a', 'b', 'c'])
399
+ >>> save('my_dataset', my_dataset)
400
+ >>> loaded_graph = load('my_dataset')
401
+ >>> loaded_graph.names[0]
402
+ 'a'
403
+ """
404
+ folder = Path(folder)
405
+ if folder.is_absolute():
406
+ return load_from_numpy_bundle(folder, '/')
407
+ else:
408
+ return load_from_numpy_bundle(folder, '.')