PyPI - scikit-network - Versions diffs - 0.30.0__cp38-cp38-win_amd64.whl → 0.32.1__cp38-cp38-win_amd64.whl - Mend

scikit-network 0.30.0__cp38-cp38-win_amd64.whl → 0.32.1__cp38-cp38-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scikit-network might be problematic. Click here for more details.

Files changed (187) hide show

{scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/AUTHORS.rst +3 -0
{scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/METADATA +31 -3
scikit_network-0.32.1.dist-info/RECORD +228 -0
{scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/WHEEL +1 -1
sknetwork/__init__.py +1 -1
sknetwork/base.py +67 -0
sknetwork/classification/base.py +24 -24
sknetwork/classification/base_rank.py +17 -25
sknetwork/classification/diffusion.py +35 -35
sknetwork/classification/knn.py +24 -21
sknetwork/classification/metrics.py +1 -1
sknetwork/classification/pagerank.py +10 -10
sknetwork/classification/propagation.py +23 -20
sknetwork/classification/tests/test_diffusion.py +13 -3
sknetwork/classification/vote.cp38-win_amd64.pyd +0 -0
sknetwork/classification/vote.cpp +14482 -10351
sknetwork/classification/vote.pyx +1 -3
sknetwork/clustering/__init__.py +3 -1
sknetwork/clustering/base.py +36 -40
sknetwork/clustering/kcenters.py +253 -0
sknetwork/clustering/leiden.py +241 -0
sknetwork/clustering/leiden_core.cp38-win_amd64.pyd +0 -0
sknetwork/clustering/leiden_core.cpp +31564 -0
sknetwork/clustering/leiden_core.pyx +124 -0
sknetwork/clustering/louvain.py +133 -102
sknetwork/clustering/louvain_core.cp38-win_amd64.pyd +0 -0
sknetwork/clustering/louvain_core.cpp +22457 -18792
sknetwork/clustering/louvain_core.pyx +86 -96
sknetwork/clustering/postprocess.py +2 -2
sknetwork/clustering/propagation_clustering.py +15 -19
sknetwork/clustering/tests/test_API.py +8 -4
sknetwork/clustering/tests/test_kcenters.py +92 -0
sknetwork/clustering/tests/test_leiden.py +34 -0
sknetwork/clustering/tests/test_louvain.py +3 -4
sknetwork/data/__init__.py +2 -1
sknetwork/data/base.py +28 -0
sknetwork/data/load.py +38 -37
sknetwork/data/models.py +18 -18
sknetwork/data/parse.py +54 -33
sknetwork/data/test_graphs.py +2 -2
sknetwork/data/tests/test_API.py +1 -1
sknetwork/data/tests/test_base.py +14 -0
sknetwork/data/tests/test_load.py +1 -1
sknetwork/data/tests/test_parse.py +9 -12
sknetwork/data/tests/test_test_graphs.py +1 -2
sknetwork/data/toy_graphs.py +18 -18
sknetwork/embedding/__init__.py +0 -1
sknetwork/embedding/base.py +21 -20
sknetwork/embedding/force_atlas.py +3 -2
sknetwork/embedding/louvain_embedding.py +2 -2
sknetwork/embedding/random_projection.py +5 -3
sknetwork/embedding/spectral.py +0 -73
sknetwork/embedding/tests/test_API.py +4 -28
sknetwork/embedding/tests/test_louvain_embedding.py +4 -9
sknetwork/embedding/tests/test_random_projection.py +2 -2
sknetwork/embedding/tests/test_spectral.py +5 -8
sknetwork/embedding/tests/test_svd.py +1 -1
sknetwork/gnn/base.py +4 -4
sknetwork/gnn/base_layer.py +3 -3
sknetwork/gnn/gnn_classifier.py +45 -89
sknetwork/gnn/layer.py +1 -1
sknetwork/gnn/loss.py +1 -1
sknetwork/gnn/optimizer.py +4 -3
sknetwork/gnn/tests/test_base_layer.py +4 -4
sknetwork/gnn/tests/test_gnn_classifier.py +12 -35
sknetwork/gnn/utils.py +8 -8
sknetwork/hierarchy/base.py +29 -2
sknetwork/hierarchy/louvain_hierarchy.py +45 -41
sknetwork/hierarchy/paris.cp38-win_amd64.pyd +0 -0
sknetwork/hierarchy/paris.cpp +27371 -22844
sknetwork/hierarchy/paris.pyx +7 -9
sknetwork/hierarchy/postprocess.py +16 -16
sknetwork/hierarchy/tests/test_API.py +1 -1
sknetwork/hierarchy/tests/test_algos.py +5 -0
sknetwork/hierarchy/tests/test_metrics.py +1 -1
sknetwork/linalg/__init__.py +1 -1
sknetwork/linalg/diteration.cp38-win_amd64.pyd +0 -0
sknetwork/linalg/diteration.cpp +13474 -9454
sknetwork/linalg/diteration.pyx +0 -2
sknetwork/linalg/eig_solver.py +1 -1
sknetwork/linalg/{normalization.py → normalizer.py} +18 -15
sknetwork/linalg/operators.py +1 -1
sknetwork/linalg/ppr_solver.py +1 -1
sknetwork/linalg/push.cp38-win_amd64.pyd +0 -0
sknetwork/linalg/push.cpp +23003 -18807
sknetwork/linalg/push.pyx +0 -2
sknetwork/linalg/svd_solver.py +1 -1
sknetwork/linalg/tests/test_normalization.py +3 -7
sknetwork/linalg/tests/test_operators.py +4 -8
sknetwork/linalg/tests/test_ppr.py +1 -1
sknetwork/linkpred/base.py +13 -2
sknetwork/linkpred/nn.py +6 -6
sknetwork/log.py +19 -0
sknetwork/path/__init__.py +4 -3
sknetwork/path/dag.py +54 -0
sknetwork/path/distances.py +98 -0
sknetwork/path/search.py +13 -47
sknetwork/path/shortest_path.py +37 -162
sknetwork/path/tests/test_dag.py +37 -0
sknetwork/path/tests/test_distances.py +62 -0
sknetwork/path/tests/test_search.py +26 -11
sknetwork/path/tests/test_shortest_path.py +31 -36
sknetwork/ranking/__init__.py +0 -1
sknetwork/ranking/base.py +13 -8
sknetwork/ranking/betweenness.cp38-win_amd64.pyd +0 -0
sknetwork/ranking/betweenness.cpp +5709 -3017
sknetwork/ranking/betweenness.pyx +0 -2
sknetwork/ranking/closeness.py +7 -10
sknetwork/ranking/pagerank.py +14 -14
sknetwork/ranking/postprocess.py +12 -3
sknetwork/ranking/tests/test_API.py +2 -4
sknetwork/ranking/tests/test_betweenness.py +3 -3
sknetwork/ranking/tests/test_closeness.py +3 -7
sknetwork/ranking/tests/test_pagerank.py +11 -5
sknetwork/ranking/tests/test_postprocess.py +5 -0
sknetwork/regression/base.py +19 -2
sknetwork/regression/diffusion.py +24 -10
sknetwork/regression/tests/test_diffusion.py +8 -0
sknetwork/test_base.py +35 -0
sknetwork/test_log.py +15 -0
sknetwork/topology/__init__.py +7 -8
sknetwork/topology/cliques.cp38-win_amd64.pyd +0 -0
sknetwork/topology/{kcliques.cpp → cliques.cpp} +23423 -20277
sknetwork/topology/cliques.pyx +149 -0
sknetwork/topology/core.cp38-win_amd64.pyd +0 -0
sknetwork/topology/{kcore.cpp → core.cpp} +21637 -18762
sknetwork/topology/core.pyx +90 -0
sknetwork/topology/cycles.py +243 -0
sknetwork/topology/minheap.cp38-win_amd64.pyd +0 -0
sknetwork/{utils → topology}/minheap.cpp +19452 -15368
sknetwork/{utils → topology}/minheap.pxd +1 -3
sknetwork/{utils → topology}/minheap.pyx +1 -3
sknetwork/topology/structure.py +3 -43
sknetwork/topology/tests/test_cliques.py +11 -11
sknetwork/topology/tests/test_core.py +19 -0
sknetwork/topology/tests/test_cycles.py +65 -0
sknetwork/topology/tests/test_structure.py +2 -16
sknetwork/topology/tests/test_triangles.py +11 -15
sknetwork/topology/tests/test_wl.py +72 -0
sknetwork/topology/triangles.cp38-win_amd64.pyd +0 -0
sknetwork/topology/triangles.cpp +5056 -2696
sknetwork/topology/triangles.pyx +74 -89
sknetwork/topology/weisfeiler_lehman.py +56 -86
sknetwork/topology/weisfeiler_lehman_core.cp38-win_amd64.pyd +0 -0
sknetwork/topology/weisfeiler_lehman_core.cpp +14727 -10622
sknetwork/topology/weisfeiler_lehman_core.pyx +0 -2
sknetwork/utils/__init__.py +1 -31
sknetwork/utils/check.py +2 -2
sknetwork/utils/format.py +5 -3
sknetwork/utils/membership.py +2 -2
sknetwork/utils/tests/test_check.py +3 -3
sknetwork/utils/tests/test_format.py +3 -1
sknetwork/utils/values.py +1 -1
sknetwork/visualization/__init__.py +2 -2
sknetwork/visualization/dendrograms.py +55 -7
sknetwork/visualization/graphs.py +292 -72
sknetwork/visualization/tests/test_dendrograms.py +9 -9
sknetwork/visualization/tests/test_graphs.py +71 -62
scikit_network-0.30.0.dist-info/RECORD +0 -227
sknetwork/embedding/louvain_hierarchy.py +0 -142
sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
sknetwork/path/metrics.py +0 -148
sknetwork/path/tests/test_metrics.py +0 -29
sknetwork/ranking/harmonic.py +0 -82
sknetwork/topology/dag.py +0 -74
sknetwork/topology/dag_core.cp38-win_amd64.pyd +0 -0
sknetwork/topology/dag_core.cpp +0 -23350
sknetwork/topology/dag_core.pyx +0 -38
sknetwork/topology/kcliques.cp38-win_amd64.pyd +0 -0
sknetwork/topology/kcliques.pyx +0 -193
sknetwork/topology/kcore.cp38-win_amd64.pyd +0 -0
sknetwork/topology/kcore.pyx +0 -120
sknetwork/topology/tests/test_cores.py +0 -21
sknetwork/topology/tests/test_dag.py +0 -26
sknetwork/topology/tests/test_wl_coloring.py +0 -49
sknetwork/topology/tests/test_wl_kernel.py +0 -31
sknetwork/utils/base.py +0 -35
sknetwork/utils/minheap.cp38-win_amd64.pyd +0 -0
sknetwork/utils/simplex.py +0 -140
sknetwork/utils/tests/test_base.py +0 -28
sknetwork/utils/tests/test_bunch.py +0 -16
sknetwork/utils/tests/test_projection_simplex.py +0 -33
sknetwork/utils/tests/test_verbose.py +0 -15
sknetwork/utils/verbose.py +0 -37
{scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/LICENSE +0 -0
{scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/top_level.txt +0 -0
/sknetwork/{utils → data}/timeout.py +0 -0

sknetwork/data/load.py CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
-Created on November 15, 2019
+Created in November 2019
 @author: Quentin Lutz <qlutz@enst.fr>
 """
@@ -19,12 +19,15 @@ import numpy as np
 from scipy import sparse
 from sknetwork.data.parse import from_csv, load_labels, load_header, load_metadata
-from sknetwork.utils import Bunch
+from sknetwork.data.base import Bunch
 from sknetwork.utils.check import is_square
-from sknetwork.utils.verbose import Log
+from sknetwork.log import Log
 NETSET_URL = 'https://netset.telecom-paris.fr'
+# former name of Dataset
+Bunch = Bunch
 def is_within_directory(directory, target):
     """Utility function."""
@@ -132,7 +135,7 @@ def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]
     if not data_path.exists():
         name_npz = name + '_npz.tar.gz'
         try:
-            logger.print('Downloading', name, 'from NetSet...')
+            logger.print_log('Downloading', name, 'from NetSet...')
             urlretrieve(folder_npz + name_npz, data_netset / name_npz)
         except HTTPError:
             raise ValueError('Invalid dataset: ' + name + '.'
@@ -141,11 +144,11 @@ def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]
         except ConnectionResetError:  # pragma: no cover
             raise RuntimeError("Could not reach Netset.")
         with tarfile.open(data_netset / name_npz, 'r:gz') as tar_ref:
-            logger.print('Unpacking archive...')
+            logger.print_log('Unpacking archive...')
             safe_extract(tar_ref, data_path)
     files = [file for file in listdir(data_path)]
-    logger.print('Parsing files...')
+    logger.print_log('Parsing files...')
     for file in files:
         file_components = file.split('.')
         if len(file_components) == 2:
@@ -159,7 +162,7 @@ def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]
                     dataset[file_name] = pickle.load(f)
     clean_data_home(data_netset)
-    logger.print('Done.')
+    logger.print_log('Done.')
     return dataset
@@ -192,7 +195,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
     Notes
     -----
-    An attribute `meta` of the `Bunch` class is used to store information about the dataset if present. In any case,
+    An attribute `meta` of the `Dataset` class is used to store information about the dataset if present. In any case,
     `meta` has the attribute `name` which, if not given, is equal to the name of the dataset as passed to this function.
     References
@@ -221,11 +224,11 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
     data_path = data_konect / name
     name_tar = name + '.tar.bz2'
     if not data_path.exists():
-        logger.print('Downloading', name, 'from Konect...')
+        logger.print_log('Downloading', name, 'from Konect...')
         try:
             urlretrieve('http://konect.cc/files/download.tsv.' + name_tar, data_konect / name_tar)
             with tarfile.open(data_konect / name_tar, 'r:bz2') as tar_ref:
-                logger.print('Unpacking archive...')
+                logger.print_log('Unpacking archive...')
                 safe_extract(tar_ref, data_path)
         except (HTTPError, tarfile.ReadError):
             raise ValueError('Invalid dataset ' + name + '.'
@@ -234,7 +237,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
         except (URLError, ConnectionResetError):  # pragma: no cover
             raise RuntimeError("Could not reach Konect.")
     elif exists(data_path / (name + '_bundle')):
-        logger.print('Loading from local bundle...')
+        logger.print_log('Loading from local bundle...')
         return load_from_numpy_bundle(name + '_bundle', data_path)
     dataset = Bunch()
@@ -242,12 +245,12 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
     if not path.exists() or len(listdir(path)) == 0:
         raise Exception("No data downloaded.")
     files = [file for file in listdir(path) if name in file]
-    logger.print('Parsing files...')
+    logger.print_log('Parsing files...')
     matrix = [file for file in files if 'out.' in file]
     if matrix:
         file = matrix[0]
         directed, bipartite, weighted = load_header(path / file)
-        dataset = from_csv(path / file, directed=directed, bipartite=bipartite, weighted=weighted)
+        dataset = from_csv(path / file, directed=directed, bipartite=bipartite, weighted=weighted, reindex=True)
     metadata = [file for file in files if 'meta.' in file]
     if metadata:
@@ -278,7 +281,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
 def save_to_numpy_bundle(data: Bunch, bundle_name: str, data_home: Optional[Union[str, Path]] = None):
-    """Save a Bunch in the specified data home to a collection of Numpy and Pickle files for faster subsequent loads.
+    """Save a dataset in the specified data home to a collection of Numpy and Pickle files for faster subsequent loads.
     Parameters
     ----------
@@ -297,15 +300,13 @@ def save_to_numpy_bundle(data: Bunch, bundle_name: str, data_home: Optional[Unio
             sparse.save_npz(data_path / attribute, data[attribute])
         elif type(data[attribute]) == np.ndarray:
             np.save(data_path / attribute, data[attribute])
-        elif type(data[attribute]) == Bunch or type(data[attribute]) == str:
+        else:
             with open(data_path / (attribute + '.p'), 'wb') as file:
                 pickle.dump(data[attribute], file)
-        else:
-            raise TypeError('Unsupported data attribute type '+str(type(data[attribute])) + '.')
 def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path]] = None):
-    """Load a Bunch from a collection of Numpy and Pickle files (inverse function of ``save_to_numpy_bundle``).
+    """Load a dataset from a collection of Numpy and Pickle files (inverse function of ``save_to_numpy_bundle``).
     Parameters
     ----------
@@ -340,8 +341,8 @@ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path
 def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
-    """Save a Bunch or a CSR matrix in the current directory to a collection of Numpy and Pickle files for faster
-    subsequent loads. Supported attribute types include sparse matrices, NumPy arrays, strings and Bunch.
+    """Save a dataset or a CSR matrix in the current directory to a collection of Numpy and Pickle files for faster
+    subsequent loads. Supported attribute types include sparse matrices, NumPy arrays, strings and objects Dataset.
     Parameters
     ----------
@@ -353,11 +354,11 @@ def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
     Example
     -------
     >>> from sknetwork.data import save
-    >>> my_dataset = Bunch()
-    >>> my_dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
-    >>> my_dataset.names = np.array(['a', 'b', 'c'])
-    >>> save('my_dataset', my_dataset)
-    >>> 'my_dataset' in listdir('.')
+    >>> dataset = Bunch()
+    >>> dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
+    >>> dataset.names = np.array(['a', 'b', 'c'])
+    >>> save('dataset', dataset)
+    >>> 'dataset' in listdir('.')
     True
     """
     folder = Path(folder)
@@ -365,12 +366,12 @@ def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
     if folder.exists():
         shutil.rmtree(folder)
     if isinstance(data, sparse.csr_matrix):
-        bunch = Bunch()
+        dataset = Bunch()
         if is_square(data):
-            bunch.adjacency = data
+            dataset.adjacency = data
         else:
-            bunch.biadjacency = data
-        data = bunch
+            dataset.biadjacency = data
+        data = dataset
     if folder.is_absolute():
         save_to_numpy_bundle(data, folder, '/')
     else:
@@ -378,7 +379,7 @@ def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
 def load(folder: Union[str, Path]):
-    """Load a Bunch from a previously created bundle from the current directory (inverse function of ``save``).
+    """Load a dataset from a previously created bundle from the current directory (inverse function of ``save``).
     Parameters
     ----------
@@ -393,13 +394,13 @@ def load(folder: Union[str, Path]):
     Example
     -------
     >>> from sknetwork.data import save
-    >>> my_dataset = Bunch()
-    >>> my_dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
-    >>> my_dataset.names = np.array(['a', 'b', 'c'])
-    >>> save('my_dataset', my_dataset)
-    >>> loaded_graph = load('my_dataset')
-    >>> loaded_graph.names[0]
-    'a'
+    >>> dataset = Bunch()
+    >>> dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
+    >>> dataset.names = np.array(['a', 'b', 'c'])
+    >>> save('dataset', dataset)
+    >>> dataset = load('dataset')
+    >>> print(dataset.names)
+    ['a' 'b' 'c']
     """
     folder = Path(folder)
     if folder.is_absolute():

sknetwork/data/models.py CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
-Created on Jul 1, 2019
+Created in July 2019
 @author: Thomas Bonald <bonald@enst.fr>
 @author: Quentin Lutz <qlutz@enst.fr>
 @author: Nathan de Lara <nathan.delara@polytechnique.org>
@@ -12,8 +12,8 @@ from typing import Union, Optional, Iterable
 import numpy as np
 from scipy import sparse
+from sknetwork.data.base import Bunch
 from sknetwork.data.parse import from_edge_list
-from sknetwork.utils import Bunch
 from sknetwork.utils.check import check_random_state
 from sknetwork.utils.format import directed2undirected
@@ -36,12 +36,12 @@ def block_model(sizes: Iterable, p_in: Union[float, list, np.ndarray] = .2, p_ou
     self_loops :
          If ``True``, allow self-loops.
     metadata :
-        If ``True``, return a `Bunch` object with labels.
+        If ``True``, return a `Dataset` object with labels.
     seed :
         Seed of the random generator (optional).
     Returns
     -------
-    adjacency or graph : Union[sparse.csr_matrix, Bunch]
+    adjacency or graph : Union[sparse.csr_matrix, Dataset]
         Adjacency matrix or graph with metadata (labels).
     Example
@@ -137,11 +137,11 @@ def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
     n : int
         Number of nodes.
     metadata : bool
-        If ``True``, return a `Bunch` object with metadata.
+        If ``True``, return a `Dataset` object with metadata.
     Returns
     -------
-    adjacency or graph : Union[sparse.csr_matrix, Bunch]
+    adjacency or graph : Union[sparse.csr_matrix, Dataset]
         Adjacency matrix or graph with metadata (positions).
     Example
@@ -174,11 +174,11 @@ def linear_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix,
     n : int
         Number of nodes.
     metadata : bool
-        If ``True``, return a `Bunch` object with metadata.
+        If ``True``, return a `Dataset` object with metadata.
     Returns
     -------
-    adjacency or graph : Union[sparse.csr_matrix, Bunch]
+    adjacency or graph : Union[sparse.csr_matrix, Dataset]
         Adjacency matrix or graph with metadata (positions).
     Example
@@ -226,11 +226,11 @@ def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
     n : int
         Number of nodes.
     metadata : bool
-        If ``True``, return a `Bunch` object with metadata.
+        If ``True``, return a `Dataset` object with metadata.
     Returns
     -------
-    adjacency or graph : Union[sparse.csr_matrix, Bunch]
+    adjacency or graph : Union[sparse.csr_matrix, Dataset]
         Adjacency matrix or graph with metadata (positions).
     Example
@@ -261,11 +261,11 @@ def cyclic_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix,
     n : int
         Number of nodes.
     metadata : bool
-        If ``True``, return a `Bunch` object with metadata.
+        If ``True``, return a `Dataset` object with metadata.
     Returns
     -------
-    adjacency or graph : Union[sparse.csr_matrix, Bunch]
+    adjacency or graph : Union[sparse.csr_matrix, Dataset]
         Adjacency matrix or graph with metadata (positions).
     Example
@@ -291,11 +291,11 @@ def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr
     n1, n2 : int
         Grid dimension.
     metadata : bool
-        If ``True``, return a `Bunch` object with metadata.
+        If ``True``, return a `Dataset` object with metadata.
     Returns
     -------
-    adjacency or graph : Union[sparse.csr_matrix, Bunch]
+    adjacency or graph : Union[sparse.csr_matrix, Dataset]
         Adjacency matrix or graph with metadata (positions).
     Example
@@ -328,11 +328,11 @@ def star(n_branches: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix
     n_branches : int
         Number of branches.
     metadata : bool
-        If ``True``, return a `Bunch` object with metadata (positions).
+        If ``True``, return a `Dataset` object with metadata (positions).
     Returns
     -------
-    adjacency or graph : Union[sparse.csr_matrix, Bunch]
+    adjacency or graph : Union[sparse.csr_matrix, Dataset]
         Adjacency matrix or graph with metadata (positions).
     Example
@@ -416,10 +416,10 @@ def watts_strogatz(n: int = 100, degree: int = 6, prob: float = 0.05, seed: Opti
     seed :
         Seed of the random generator (optional).
     metadata :
-        If ``True``, return a `Bunch` object with metadata.
+        If ``True``, return a `Dataset` object with metadata.
     Returns
     -------
-    adjacency or graph : Union[sparse.csr_matrix, Bunch]
+    adjacency or graph : Union[sparse.csr_matrix, Dataset]
         Adjacency matrix or graph with metadata (positions).
     Example

sknetwork/data/parse.py CHANGED Viewed

@@ -1,25 +1,25 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
-Created on Dec 5, 2018
+Created in December 2018
 @author: Quentin Lutz <qlutz@enst.fr>
-Nathan de Lara <nathan.delara@polytechnique.org>
-Thomas Bonald <bonald@enst.fr>
+@author: Nathan de Lara <nathan.delara@polytechnique.org>
+@author: Thomas Bonald <bonald@enst.fr>
 """
 from csv import reader
-from typing import Dict, List, Tuple, Union
+from typing import Dict, List, Tuple, Union, Optional
 from xml.etree import ElementTree
 import numpy as np
 from scipy import sparse
-from sknetwork.utils import Bunch
+from sknetwork.data.base import Bunch
 from sknetwork.utils.format import directed2undirected
 def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = False,
-                   bipartite: bool = False, weighted: bool = True, reindex: bool = True,
+                   bipartite: bool = False, weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
                    sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
     """Load a graph from an edge list.
@@ -37,16 +37,19 @@ def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = F
     reindex : bool
         If ``True``, reindex nodes and returns the original node indices as names.
         Reindexing is enforced if nodes are not integers.
+    shape : tuple
+        Shape of the adjacency or biadjacency matrix.
+        If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
     sum_duplicates : bool
         If ``True`` (default), sums weights of duplicate edges.
         Otherwise, the weight of each edge is that of the first occurrence of this edge.
     matrix_only : bool
         If ``True``, returns only the adjacency or biadjacency matrix.
-        Otherwise, returns a ``Bunch`` object with graph attributes (e.g., node names).
+        Otherwise, returns a ``Dataset`` object with graph attributes (e.g., node names).
         If not specified (default), selects the most appropriate format.
     Returns
     -------
-    graph : :class:`Bunch` (including node names) or sparse matrix
+    graph : :class:`Dataset` (including node names) or sparse matrix
     Examples
     --------
@@ -83,12 +86,14 @@ def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = F
     else:
         raise TypeError('The edge list must be given as a NumPy array or a list of tuples.')
     return from_edge_array(edge_array=edge_array, weights=weights, directed=directed, bipartite=bipartite,
-                           weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates, matrix_only=matrix_only)
+                           weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
+                           matrix_only=matrix_only)
 def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], directed: bool = False,
-                        bipartite: bool = False, weighted: bool = True, reindex: bool = True,
-                        sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
+                        bipartite: bool = False, weighted: bool = True, reindex: bool = False,
+                        shape: Optional[tuple] = None, sum_duplicates: bool = True, matrix_only: bool = None) \
+                        -> Union[Bunch, sparse.csr_matrix]:
     """Load a graph from an adjacency list.
     Parameters
@@ -104,16 +109,19 @@ def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], dire
     reindex : bool
         If ``True``, reindex nodes and returns the original node indices as names.
         Reindexing is enforced if nodes are not integers.
+    shape : tuple
+        Shape of the adjacency or biadjacency matrix.
+        If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
     sum_duplicates : bool
         If ``True`` (default), sums weights of duplicate edges.
         Otherwise, the weight of each edge is that of the first occurrence of this edge.
     matrix_only : bool
         If ``True``, returns only the adjacency or biadjacency matrix.
-        Otherwise, returns a ``Bunch`` object with graph attributes (e.g., node names).
+        Otherwise, returns a ``Dataset`` object with graph attributes (e.g., node names).
         If not specified (default), selects the most appropriate format.
     Returns
     -------
-    graph : :class:`Bunch` or sparse matrix
+    graph : :class:`Dataset` or sparse matrix
     Example
     -------
@@ -134,12 +142,12 @@ def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], dire
     else:
         raise TypeError('The adjacency list must be given as a list of lists or a dict of lists.')
     return from_edge_list(edge_list=edge_list, directed=directed, bipartite=bipartite, weighted=weighted,
-                          reindex=reindex, sum_duplicates=sum_duplicates, matrix_only=matrix_only)
+                          reindex=reindex, shape=shape, sum_duplicates=sum_duplicates, matrix_only=matrix_only)
 def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed: bool = False, bipartite: bool = False,
-                    weighted: bool = True, reindex: bool = True, sum_duplicates: bool = True,
-                    matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
+                    weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
+                    sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
     """Load a graph from an edge array of shape (n_edges, 2) and weights (optional).
     Parameters
@@ -157,17 +165,20 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
     reindex : bool
         If ``True``, reindex nodes and returns the original node indices as names.
         Reindexing is enforced if nodes are not integers.
+    shape : tuple
+        Shape of the adjacency or biadjacency matrix.
+        If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
     sum_duplicates : bool
         If ``True`` (default), sums weights of duplicate edges.
         Otherwise, the weight of each edge is that of the first occurrence of this edge.
     matrix_only : bool
         If ``True``, returns only the adjacency or biadjacency matrix.
-        Otherwise, returns a ``Bunch`` object with graph attributes (e.g., node names).
+        Otherwise, returns a ``Dataset`` object with graph attributes (e.g., node names).
         If not specified (default), selects the most appropriate format.
     Returns
     -------
-    graph : :class:`Bunch` or sparse matrix
+    graph : :class:`Dataset` or sparse matrix
     """
     try:
         edge_array = edge_array.astype(float)
@@ -195,28 +206,34 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
     if bipartite:
         row = edge_array[:, 0]
         col = edge_array[:, 1]
-        if row.dtype != int or (reindex and len(set(row)) < max(row) + 1):
+        if row.dtype != int or reindex:
             names_row, row = np.unique(row, return_inverse=True)
             graph.names_row = names_row
             graph.names = names_row
             n_row = len(names_row)
+        elif shape is not None:
+            n_row = max(shape[0], max(row) + 1)
         else:
             n_row = max(row) + 1
-        if col.dtype != int or (reindex and len(set(col)) < max(col) + 1):
+        if col.dtype != int or reindex:
             names_col, col = np.unique(col, return_inverse=True)
             graph.names_col = names_col
             n_col = len(names_col)
+        elif shape is not None:
+            n_col = max(shape[1], max(col) + 1)
         else:
             n_col = max(col) + 1
         matrix = sparse.csr_matrix((weights, (row, col)), shape=(n_row, n_col))
         graph.biadjacency = matrix
     else:
         nodes = edge_array.ravel()
-        if nodes.dtype != int or (reindex and len(set(nodes)) < max(nodes) + 1):
+        if nodes.dtype != int or reindex:
             names, nodes = np.unique(nodes, return_inverse=True)
             graph.names = names
             n = len(names)
             edge_array = nodes.reshape(-1, 2)
+        elif shape is not None:
+            n = max(shape[0], max(nodes) + 1)
         else:
             n = max(nodes) + 1
         row = edge_array[:, 0]
@@ -233,8 +250,8 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
 def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: str = '#%',
              data_structure: str = None, directed: bool = False, bipartite: bool = False, weighted: bool = True,
-             reindex: bool = True, sum_duplicates: bool = True, matrix_only: bool = None) \
-        -> Union[Bunch, sparse.csr_matrix]:
+             reindex: bool = False, shape: Optional[tuple] = None, sum_duplicates: bool = True,
+             matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
     """Load a graph from a CSV or TSV file.
     The delimiter can be specified (e.g., ' ' for space-separated values).
@@ -249,9 +266,10 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
     comments : str
         Characters for comment lines.
     data_structure : str
-        If 'edge_list', considers each row of the file as an edge (tuple of size 2 or 3).
-        If 'adjacency_list', considers each row of the file as an adjacency list (list of neighbors).
-        If 'adjacency_dict', considers each row of the file as an adjacency dictionary with key
+        If 'edge_list', consider each row of the file as an edge (tuple of size 2 or 3).
+        If 'adjacency_list', consider each row of the file as an adjacency list (list of neighbors,
+        in the order of node indices; an empty line means no neighbor).
+        If 'adjacency_dict', consider each row of the file as an adjacency dictionary with key
         given by the first column (node: list of neighbors).
         If ``None`` (default), data_structure is guessed from the first rows of the file.
     directed : bool
@@ -263,17 +281,20 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
     reindex : bool
         If ``True``, reindex nodes and returns the original node indices as names.
         Reindexing is enforced if nodes are not integers.
+    shape : tuple
+        Shape of the adjacency or biadjacency matrix.
+        If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
     sum_duplicates : bool
         If ``True`` (default), sums weights of duplicate edges.
         Otherwise, the weight of each edge is that of the first occurrence of this edge.
     matrix_only : bool
         If ``True``, returns only the adjacency or biadjacency matrix.
-        Otherwise, returns a ``Bunch`` object with graph attributes (e.g., node names).
+        Otherwise, returns a ``Dataset`` object with graph attributes (e.g., node names).
         If not specified (default), selects the most appropriate format.
     Returns
     -------
-    graph: :class:`Bunch` or sparse matrix
+    graph: :class:`Dataset` or sparse matrix
     """
     header_length, delimiter_guess, comment_guess, data_structure_guess = scan_header(file_path, delimiters=delimiter,
                                                                                       comments=comments)
@@ -295,7 +316,7 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
             else:
                 weights = None
             return from_edge_array(edge_array=edge_array, weights=weights, directed=directed, bipartite=bipartite,
-                                   weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
+                                   weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
                                    matrix_only=matrix_only)
         except TypeError:
             pass
@@ -306,17 +327,17 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
         if data_structure == 'edge_list':
             edge_list = [tuple(row) for row in csv_reader]
             return from_edge_list(edge_list=edge_list, directed=directed, bipartite=bipartite,
-                                  weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
+                                  weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
                                   matrix_only=matrix_only)
         elif data_structure == 'adjacency_list':
             adjacency_list = [row for row in csv_reader]
             return from_adjacency_list(adjacency_list=adjacency_list, directed=directed, bipartite=bipartite,
-                                       weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
+                                       weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
                                        matrix_only=matrix_only)
         elif data_structure == 'adjacency_dict':
             adjacency_list = {row[0]: row[1:] for row in csv_reader}
             return from_adjacency_list(adjacency_list=adjacency_list, directed=directed, bipartite=bipartite,
-                                       weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
+                                       weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
                                        matrix_only=matrix_only)
@@ -439,7 +460,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
     Returns
     -------
     data: :class:`Bunch`
-        The dataset in a bunch with the adjacency as a CSR matrix.
+        The dataset in a Dataset with the adjacency as a CSR matrix.
     """
     # see http://graphml.graphdrawing.org/primer/graphml-primer.html
     # and http://graphml.graphdrawing.org/specification/dtd.html#top

sknetwork/data/test_graphs.py CHANGED Viewed

@@ -40,7 +40,7 @@ def test_bigraph():
     return sparse.csr_matrix((data, (row, col)), shape=(6, 8))
-def test_graph_disconnect():
+def test_disconnected_graph():
     """Simple disconnected undirected graph, used for testing.
     10 nodes, 10 edges.
     """
@@ -68,7 +68,7 @@ def test_graph_bool():
     return adjacency
-def test_graph_clique():
+def test_clique():
     """Clique graph, used for testing (10 nodes, 45 edges).
     """
     n = 10

sknetwork/data/tests/test_API.py CHANGED Viewed

@@ -8,7 +8,7 @@ import warnings
 from sknetwork.data.load import *
 from sknetwork.data.toy_graphs import *
-from sknetwork.utils import Bunch
+from sknetwork.data import Bunch
 class TestDataAPI(unittest.TestCase):

sknetwork/data/tests/test_base.py ADDED Viewed

@@ -0,0 +1,14 @@
+# -*- coding: utf-8 -*-
+"""tests for dataset"""
+import unittest
+from sknetwork.data.base import Bunch
+class TestDataset(unittest.TestCase):
+    def test(self):
+        dataset = Bunch(name='dataset')
+        self.assertEqual(dataset.name, 'dataset')
+        self.assertEqual(dataset['name'], 'dataset')

sknetwork/data/tests/test_load.py CHANGED Viewed

@@ -10,7 +10,7 @@ import numpy as np
 from sknetwork.data.load import load_netset, load_konect, clear_data_home, save, load
 from sknetwork.data.toy_graphs import house, star_wars
-from sknetwork.utils.timeout import TimeOut
+from sknetwork.data.timeout import TimeOut
 class TestLoader(unittest.TestCase):