PyPI - scikit-network - Versions diffs - 0.32.1__cp310-cp310-win_amd64.whl → 0.33.1__cp310-cp310-win_amd64.whl - Mend

scikit-network 0.32.1__cp310-cp310-win_amd64.whl → 0.33.1__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scikit-network might be problematic. Click here for more details.

Files changed (67) hide show

{scikit_network-0.32.1.dist-info → scikit_network-0.33.1.dist-info}/AUTHORS.rst +0 -1
scikit_network-0.33.1.dist-info/METADATA +120 -0
{scikit_network-0.32.1.dist-info → scikit_network-0.33.1.dist-info}/RECORD +66 -66
{scikit_network-0.32.1.dist-info → scikit_network-0.33.1.dist-info}/WHEEL +1 -1
sknetwork/__init__.py +1 -1
sknetwork/classification/diffusion.py +4 -3
sknetwork/classification/knn.py +4 -3
sknetwork/classification/metrics.py +3 -3
sknetwork/classification/pagerank.py +1 -1
sknetwork/classification/propagation.py +7 -6
sknetwork/classification/vote.cp310-win_amd64.pyd +0 -0
sknetwork/classification/vote.cpp +684 -677
sknetwork/clustering/leiden.py +2 -1
sknetwork/clustering/leiden_core.cp310-win_amd64.pyd +0 -0
sknetwork/clustering/leiden_core.cpp +713 -702
sknetwork/clustering/louvain.py +6 -6
sknetwork/clustering/louvain_core.cp310-win_amd64.pyd +0 -0
sknetwork/clustering/louvain_core.cpp +713 -702
sknetwork/clustering/metrics.py +1 -1
sknetwork/clustering/tests/test_kcenters.py +5 -37
sknetwork/clustering/tests/test_louvain.py +6 -0
sknetwork/data/__init__.py +1 -1
sknetwork/data/base.py +7 -2
sknetwork/data/load.py +18 -21
sknetwork/data/models.py +15 -15
sknetwork/data/parse.py +19 -17
sknetwork/data/tests/test_API.py +3 -3
sknetwork/data/tests/test_base.py +2 -2
sknetwork/data/tests/test_toy_graphs.py +33 -33
sknetwork/data/toy_graphs.py +35 -43
sknetwork/embedding/base.py +3 -0
sknetwork/embedding/louvain_embedding.py +0 -26
sknetwork/embedding/svd.py +0 -4
sknetwork/embedding/tests/test_louvain_embedding.py +9 -4
sknetwork/embedding/tests/test_svd.py +6 -0
sknetwork/gnn/gnn_classifier.py +1 -1
sknetwork/hierarchy/louvain_hierarchy.py +10 -6
sknetwork/hierarchy/metrics.py +3 -3
sknetwork/hierarchy/paris.cp310-win_amd64.pyd +0 -0
sknetwork/hierarchy/paris.cpp +2651 -2027
sknetwork/hierarchy/paris.pyx +4 -3
sknetwork/hierarchy/tests/test_metrics.py +4 -4
sknetwork/linalg/diteration.cp310-win_amd64.pyd +0 -0
sknetwork/linalg/diteration.cpp +684 -677
sknetwork/linalg/push.cp310-win_amd64.pyd +0 -0
sknetwork/linalg/push.cpp +1769 -1153
sknetwork/linalg/sparse_lowrank.py +1 -1
sknetwork/ranking/betweenness.cp310-win_amd64.pyd +0 -0
sknetwork/ranking/betweenness.cpp +563 -557
sknetwork/regression/diffusion.py +6 -4
sknetwork/topology/cliques.cp310-win_amd64.pyd +0 -0
sknetwork/topology/cliques.cpp +1729 -1110
sknetwork/topology/core.cp310-win_amd64.pyd +0 -0
sknetwork/topology/core.cpp +1755 -1139
sknetwork/topology/cycles.py +1 -1
sknetwork/topology/minheap.cp310-win_amd64.pyd +0 -0
sknetwork/topology/minheap.cpp +687 -677
sknetwork/topology/triangles.cp310-win_amd64.pyd +0 -0
sknetwork/topology/triangles.cpp +437 -432
sknetwork/topology/weisfeiler_lehman_core.cp310-win_amd64.pyd +0 -0
sknetwork/topology/weisfeiler_lehman_core.cpp +684 -677
sknetwork/utils/__init__.py +1 -1
sknetwork/utils/values.py +5 -3
sknetwork/visualization/graphs.py +1 -1
scikit_network-0.32.1.dist-info/METADATA +0 -511
{scikit_network-0.32.1.dist-info → scikit_network-0.33.1.dist-info}/LICENSE +0 -0
{scikit_network-0.32.1.dist-info → scikit_network-0.33.1.dist-info}/top_level.txt +0 -0

sknetwork/clustering/metrics.py CHANGED Viewed

@@ -64,7 +64,7 @@ def get_modularity(input_matrix: Union[sparse.csr_matrix, np.ndarray], labels: n
     >>> from sknetwork.data import house
     >>> adjacency = house()
     >>> labels = np.array([0, 0, 1, 1, 0])
-    >>> np.round(get_modularity(adjacency, labels), 2)
+    >>> float(np.round(get_modularity(adjacency, labels), 2))
     0.11
     """
     adjacency, bipartite = get_adjacency(input_matrix.astype(float))

sknetwork/clustering/tests/test_kcenters.py CHANGED Viewed

@@ -4,7 +4,6 @@
 import unittest
 from sknetwork.clustering import KCenters
-from sknetwork.data import karate_club, painters, star_wars
 from sknetwork.data.test_graphs import *
@@ -13,7 +12,7 @@ class TestKCentersClustering(unittest.TestCase):
     def test_kcenters(self):
         # Test undirected graph
         n_clusters = 2
-        adjacency = karate_club()
+        adjacency = test_graph()
         n_row = adjacency.shape[0]
         kcenters = KCenters(n_clusters=n_clusters)
         labels = kcenters.fit_predict(adjacency)
@@ -22,7 +21,7 @@ class TestKCentersClustering(unittest.TestCase):
         # Test directed graph
         n_clusters = 3
-        adjacency = painters()
+        adjacency = test_digraph()
         n_row = adjacency.shape[0]
         kcenters = KCenters(n_clusters=n_clusters, directed=True)
         labels = kcenters.fit_predict(adjacency)
@@ -31,7 +30,7 @@ class TestKCentersClustering(unittest.TestCase):
         # Test bipartite graph
         n_clusters = 2
-        biadjacency = star_wars()
+        biadjacency = test_bigraph()
         n_row, n_col = biadjacency.shape
         kcenters = KCenters(n_clusters=n_clusters)
         kcenters.fit(biadjacency)
@@ -40,41 +39,10 @@ class TestKCentersClustering(unittest.TestCase):
         self.assertEqual(len(kcenters.labels_col_), n_col)
         self.assertEqual(len(set(labels)), n_clusters)
-    def test_kcenters_centers(self):
-        # Test centers for undirected graphs
-        n_clusters = 2
-        adjacency = karate_club()
-        kcenters = KCenters(n_clusters=n_clusters)
-        kcenters.fit(adjacency)
-        centers = kcenters.centers_
-        self.assertEqual(n_clusters, len(set(centers)))
-        # Test centers for bipartite graphs
-        n_clusters = 2
-        biadjacency = star_wars()
-        n_row, n_col = biadjacency.shape
-        for position in ["row", "col", "both"]:
-            kcenters = KCenters(n_clusters=n_clusters, center_position=position)
-            kcenters.fit(biadjacency)
-            centers_row = kcenters.centers_row_
-            centers_col = kcenters.centers_col_
-            if position == "row":
-                self.assertEqual(n_clusters, len(set(centers_row)))
-                self.assertTrue(np.all(centers_row < n_row))
-                self.assertTrue(centers_col is None)
-            if position == "col":
-                self.assertEqual(n_clusters, len(set(centers_col)))
-                self.assertTrue(np.all((centers_col < n_col) & (0 <= centers_col)))
-                self.assertTrue(centers_row is None)
-            if position == "both":
-                self.assertEqual(n_clusters, len(set(centers_row)) + len(set(centers_col)))
-                self.assertTrue(np.all(centers_row < n_row))
-                self.assertTrue(np.all((centers_col < n_col) & (0 <= centers_col)))
     def test_kcenters_error(self):
         # Test value errors
-        adjacency = karate_club()
-        biadjacency = star_wars()
+        adjacency = test_graph()
+        biadjacency = test_bigraph()
         # test n_clusters error
         kcenters = KCenters(n_clusters=1)

sknetwork/clustering/tests/test_louvain.py CHANGED Viewed

@@ -17,6 +17,12 @@ class TestLouvainClustering(unittest.TestCase):
         labels = Louvain().fit_predict(adjacency)
         self.assertEqual(len(labels), n)
+    def test_format(self):
+        adjacency = test_graph()
+        n = adjacency.shape[0]
+        labels = Louvain().fit_predict(adjacency.toarray())
+        self.assertEqual(len(labels), n)
     def test_modularity(self):
         adjacency = karate_club()
         louvain_d = Louvain(modularity='dugue')

sknetwork/data/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """data module"""
-from sknetwork.data.base import Bunch
+from sknetwork.data.base import *
 from sknetwork.data.load import *
 from sknetwork.data.models import *
 from sknetwork.data.parse import from_edge_list, from_adjacency_list, from_csv, from_graphml

sknetwork/data/base.py CHANGED Viewed

@@ -6,10 +6,10 @@ Created in May 2023
 """
-class Bunch(dict):
+class Dataset(dict):
     """Container object for datasets.
     Dictionary-like object that exposes its keys as attributes.
-    >>> dataset = Bunch(name='dataset')
+    >>> dataset = Dataset(name='dataset')
     >>> dataset['name']
     'dataset'
     >>> dataset.name
@@ -26,3 +26,8 @@ class Bunch(dict):
             return self[key]
         except KeyError:
             raise AttributeError(key)
+# alias for Dataset
+Bunch = Dataset

sknetwork/data/load.py CHANGED Viewed

@@ -19,15 +19,12 @@ import numpy as np
 from scipy import sparse
 from sknetwork.data.parse import from_csv, load_labels, load_header, load_metadata
-from sknetwork.data.base import Bunch
+from sknetwork.data.base import Dataset
 from sknetwork.utils.check import is_square
 from sknetwork.log import Log
 NETSET_URL = 'https://netset.telecom-paris.fr'
-# former name of Dataset
-Bunch = Bunch
 def is_within_directory(directory, target):
     """Utility function."""
@@ -89,7 +86,7 @@ def clean_data_home(data_home: Optional[Union[str, Path]] = None):
 def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]] = None,
-                verbose: bool = True) -> Optional[Bunch]:
+                verbose: bool = True) -> Optional[Dataset]:
     """Load a dataset from the `NetSet collection
     <https://netset.telecom-paris.fr/>`_.
@@ -105,10 +102,10 @@ def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]
     Returns
     -------
-    dataset : :class:`Bunch`
+    dataset : :class:`Dataset`
         Returned dataset.
     """
-    dataset = Bunch()
+    dataset = Dataset()
     dataset_folder = NETSET_URL + '/datasets/'
     folder_npz = NETSET_URL + '/datasets_npz/'
@@ -167,7 +164,7 @@ def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]
 def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_numpy_bundle: bool = True,
-                verbose: bool = True) -> Bunch:
+                verbose: bool = True) -> Dataset:
     """Load a dataset from the `Konect database
     <http://konect.cc/networks/>`_.
@@ -186,7 +183,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
     Returns
     -------
-    dataset : :class:`Bunch`
+    dataset : :class:`Dataset`
         Object with the following attributes:
              * `adjacency` or `biadjacency`: the adjacency/biadjacency matrix for the dataset
@@ -240,7 +237,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
         logger.print_log('Loading from local bundle...')
         return load_from_numpy_bundle(name + '_bundle', data_path)
-    dataset = Bunch()
+    dataset = Dataset()
     path = data_konect / name / name
     if not path.exists() or len(listdir(path)) == 0:
         raise Exception("No data downloaded.")
@@ -269,7 +266,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
         else:
             dataset.meta.name = name
     else:
-        dataset.meta = Bunch()
+        dataset.meta = Dataset()
         dataset.meta.name = name
     if auto_numpy_bundle:
@@ -280,12 +277,12 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
     return dataset
-def save_to_numpy_bundle(data: Bunch, bundle_name: str, data_home: Optional[Union[str, Path]] = None):
+def save_to_numpy_bundle(data: Dataset, bundle_name: str, data_home: Optional[Union[str, Path]] = None):
     """Save a dataset in the specified data home to a collection of Numpy and Pickle files for faster subsequent loads.
     Parameters
     ----------
-    data: Bunch
+    data: Dataset
         Data to save.
     bundle_name: str
         Name to be used for the bundle folder.
@@ -317,7 +314,7 @@ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path
     Returns
     -------
-    data: Bunch
+    data: Dataset
         Data.
     """
     data_home = get_data_home(data_home)
@@ -326,7 +323,7 @@ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path
         raise FileNotFoundError('No bundle at ' + str(data_path))
     else:
         files = listdir(data_path)
-        data = Bunch()
+        data = Dataset()
         for file in files:
             if len(file.split('.')) == 2:
                 file_name, file_extension = file.split('.')
@@ -340,7 +337,7 @@ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path
         return data
-def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
+def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Dataset]):
     """Save a dataset or a CSR matrix in the current directory to a collection of Numpy and Pickle files for faster
     subsequent loads. Supported attribute types include sparse matrices, NumPy arrays, strings and objects Dataset.
@@ -348,13 +345,13 @@ def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
     ----------
     folder : str or :class:`pathlib.Path`
         Name of the bundle folder.
-    data : Union[sparse.csr_matrix, Bunch]
+    data : Union[sparse.csr_matrix, Dataset]
         Data to save.
     Example
     -------
     >>> from sknetwork.data import save
-    >>> dataset = Bunch()
+    >>> dataset = Dataset()
     >>> dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
     >>> dataset.names = np.array(['a', 'b', 'c'])
     >>> save('dataset', dataset)
@@ -366,7 +363,7 @@ def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
     if folder.exists():
         shutil.rmtree(folder)
     if isinstance(data, sparse.csr_matrix):
-        dataset = Bunch()
+        dataset = Dataset()
         if is_square(data):
             dataset.adjacency = data
         else:
@@ -388,13 +385,13 @@ def load(folder: Union[str, Path]):
     Returns
     -------
-    data: Bunch
+    data: Dataset
         Data.
     Example
     -------
     >>> from sknetwork.data import save
-    >>> dataset = Bunch()
+    >>> dataset = Dataset()
     >>> dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
     >>> dataset.names = np.array(['a', 'b', 'c'])
     >>> save('dataset', dataset)

sknetwork/data/models.py CHANGED Viewed

@@ -12,7 +12,7 @@ from typing import Union, Optional, Iterable
 import numpy as np
 from scipy import sparse
-from sknetwork.data.base import Bunch
+from sknetwork.data.base import Dataset
 from sknetwork.data.parse import from_edge_list
 from sknetwork.utils.check import check_random_state
 from sknetwork.utils.format import directed2undirected
@@ -20,7 +20,7 @@ from sknetwork.utils.format import directed2undirected
 def block_model(sizes: Iterable, p_in: Union[float, list, np.ndarray] = .2, p_out: float = .05,
                 directed: bool = False, self_loops: bool = False, metadata: bool = False, seed: Optional[int] = None) \
-                -> Union[sparse.csr_matrix, Bunch]:
+                -> Union[sparse.csr_matrix, Dataset]:
     """Stochastic block model.
     Parameters
@@ -83,7 +83,7 @@ def block_model(sizes: Iterable, p_in: Union[float, list, np.ndarray] = .2, p_ou
     else:
         adjacency = directed2undirected(sparse.csr_matrix(sparse.triu(adjacency)), weighted=False)
     if metadata:
-        graph = Bunch()
+        graph = Dataset()
         graph.adjacency = adjacency
         labels = np.repeat(np.arange(len(sizes)), sizes)
         graph.labels = labels
@@ -129,7 +129,7 @@ def erdos_renyi(n: int = 20, p: float = .3, directed: bool = False, self_loops:
     return block_model([n], p, 0., directed=directed, self_loops=self_loops, metadata=False, seed=seed)
-def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
+def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
     """Linear graph (directed).
     Parameters
@@ -158,7 +158,7 @@ def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
     if metadata:
         x = np.arange(n)
         y = np.zeros(n)
-        graph = Bunch()
+        graph = Dataset()
         graph.adjacency = adjacency
         graph.position = np.array((x, y)).T
         return graph
@@ -166,7 +166,7 @@ def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
         return adjacency
-def linear_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
+def linear_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
     """Linear graph (undirected).
     Parameters
@@ -218,7 +218,7 @@ def cyclic_position(n: int) -> np.ndarray:
     return position
-def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
+def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
     """Cyclic graph (directed).
     Parameters
@@ -245,7 +245,7 @@ def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
     adjacency = sparse.csr_matrix((np.ones(len(row), dtype=int), (row, col)), shape=(n, n))
     if metadata:
-        graph = Bunch()
+        graph = Dataset()
         graph.adjacency = adjacency
         graph.position = cyclic_position(n)
         return graph
@@ -253,7 +253,7 @@ def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
         return adjacency
-def cyclic_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
+def cyclic_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
     """Cyclic graph (undirected).
     Parameters
@@ -283,7 +283,7 @@ def cyclic_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix,
         return graph.adjacency
-def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
+def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
     """Grid (undirected).
     Parameters
@@ -312,7 +312,7 @@ def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr
     edges = list(map(lambda edge: (node_id[edge[0]], node_id[edge[1]]), edges))
     adjacency = from_edge_list(edges, reindex=False, matrix_only=True)
     if metadata:
-        graph = Bunch()
+        graph = Dataset()
         graph.adjacency = adjacency
         graph.position = np.array(nodes)
         return graph
@@ -320,7 +320,7 @@ def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr
         return adjacency
-def star(n_branches: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
+def star(n_branches: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
     """Star (undirected).
     Parameters
@@ -345,7 +345,7 @@ def star(n_branches: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix
     edges = [(0, i+1) for i in range(n_branches)]
     adjacency = from_edge_list(edges, reindex=False, matrix_only=True)
     if metadata:
-        graph = Bunch()
+        graph = Dataset()
         graph.adjacency = adjacency
         angles = 2 * np.pi * np.arange(n_branches) / n_branches
         x = [0] + list(np.cos(angles))
@@ -402,7 +402,7 @@ def albert_barabasi(n: int = 100, degree: int = 3, directed: bool = False, seed:
 def watts_strogatz(n: int = 100, degree: int = 6, prob: float = 0.05, seed: Optional[int] = None,
-                   metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
+                   metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
     """Watts-Strogatz model.
     Parameters
@@ -451,7 +451,7 @@ def watts_strogatz(n: int = 100, degree: int = 6, prob: float = 0.05, seed: Opti
                 adjacency[j, i] = 0
     adjacency = sparse.csr_matrix(adjacency, shape=adjacency.shape)
     if metadata:
-        graph = Bunch()
+        graph = Dataset()
         graph.adjacency = adjacency
         graph.position = cyclic_position(n)
         return graph

sknetwork/data/parse.py CHANGED Viewed

@@ -14,13 +14,13 @@ from xml.etree import ElementTree
 import numpy as np
 from scipy import sparse
-from sknetwork.data.base import Bunch
+from sknetwork.data.base import Dataset
 from sknetwork.utils.format import directed2undirected
 def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = False,
                    bipartite: bool = False, weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
-                   sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
+                   sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
     """Load a graph from an edge list.
     Parameters
@@ -93,7 +93,7 @@ def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = F
 def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], directed: bool = False,
                         bipartite: bool = False, weighted: bool = True, reindex: bool = False,
                         shape: Optional[tuple] = None, sum_duplicates: bool = True, matrix_only: bool = None) \
-                        -> Union[Bunch, sparse.csr_matrix]:
+                        -> Union[Dataset, sparse.csr_matrix]:
     """Load a graph from an adjacency list.
     Parameters
@@ -147,7 +147,7 @@ def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], dire
 def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed: bool = False, bipartite: bool = False,
                     weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
-                    sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
+                    sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
     """Load a graph from an edge array of shape (n_edges, 2) and weights (optional).
     Parameters
@@ -202,7 +202,7 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
         _, index = np.unique(edge_array, axis=0, return_index=True)
         edge_array = edge_array[index]
         weights = weights[index]
-    graph = Bunch()
+    graph = Dataset()
     if bipartite:
         row = edge_array[:, 0]
         col = edge_array[:, 1]
@@ -224,6 +224,7 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
         else:
             n_col = max(col) + 1
         matrix = sparse.csr_matrix((weights, (row, col)), shape=(n_row, n_col))
+        matrix.sum_duplicates()
         graph.biadjacency = matrix
     else:
         nodes = edge_array.ravel()
@@ -241,6 +242,7 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
         matrix = sparse.csr_matrix((weights, (row, col)), shape=(n, n))
         if not directed:
             matrix = directed2undirected(matrix)
+        matrix.sum_duplicates()
         graph.adjacency = matrix
     if matrix_only or (matrix_only is None and len(graph) == 1):
         return matrix
@@ -251,7 +253,7 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
 def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: str = '#%',
              data_structure: str = None, directed: bool = False, bipartite: bool = False, weighted: bool = True,
              reindex: bool = False, shape: Optional[tuple] = None, sum_duplicates: bool = True,
-             matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
+             matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
     """Load a graph from a CSV or TSV file.
     The delimiter can be specified (e.g., ' ' for space-separated values).
@@ -432,9 +434,9 @@ def load_header(file: str):
     return directed, bipartite, weighted
-def load_metadata(file: str, delimiter: str = ': ') -> Bunch:
+def load_metadata(file: str, delimiter: str = ': ') -> Dataset:
     """Extract metadata from the file."""
-    metadata = Bunch()
+    metadata = Dataset()
     with open(file, 'r', encoding='utf-8') as f:
         for row in f:
             parts = row.split(delimiter)
@@ -443,7 +445,7 @@ def load_metadata(file: str, delimiter: str = ': ') -> Bunch:
     return metadata
-def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: int = 512) -> Bunch:
+def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: int = 512) -> Dataset:
     """Load graph from GraphML file.
     Hyperedges and nested graphs are not supported.
@@ -459,7 +461,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
     Returns
     -------
-    data: :class:`Bunch`
+    data: :class:`Dataset`
         The dataset in a Dataset with the adjacency as a CSR matrix.
     """
     # see http://graphml.graphdrawing.org/primer/graphml-primer.html
@@ -475,12 +477,12 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
     # indices in the graph tree
     node_indices = []
     edge_indices = []
-    data = Bunch()
+    data = Dataset()
     graph = None
     file_description = None
-    attribute_descriptions = Bunch()
-    attribute_descriptions.node = Bunch()
-    attribute_descriptions.edge = Bunch()
+    attribute_descriptions = Dataset()
+    attribute_descriptions.node = Dataset()
+    attribute_descriptions.edge = Dataset()
     keys = {}
     for file_element in tree.getroot():
         if file_element.tag.endswith('graph'):
@@ -518,7 +520,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
                 if file_element.attrib['for'] == 'node':
                     size = n_nodes
                     if 'node_attribute' not in data:
-                        data.node_attribute = Bunch()
+                        data.node_attribute = Dataset()
                     for key_element in file_element:
                         if key_element.tag.endswith('desc'):
                             attribute_descriptions.node[attribute_name] = key_element.text
@@ -535,7 +537,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
                 elif file_element.attrib['for'] == 'edge':
                     size = n_edges
                     if 'edge_attribute' not in data:
-                        data.edge_attribute = Bunch()
+                        data.edge_attribute = Dataset()
                     for key_element in file_element:
                         if key_element.tag.endswith('desc'):
                             attribute_descriptions.edge[attribute_name] = key_element.text
@@ -553,7 +555,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
         elif file_element.tag.endswith('desc'):
             file_description = file_element.text
     if file_description or attribute_descriptions.node or attribute_descriptions.edge:
-        data.meta = Bunch()
+        data.meta = Dataset()
         if file_description:
             data.meta['description'] = file_description
         if attribute_descriptions.node or attribute_descriptions.edge:

sknetwork/data/tests/test_API.py CHANGED Viewed

@@ -8,7 +8,7 @@ import warnings
 from sknetwork.data.load import *
 from sknetwork.data.toy_graphs import *
-from sknetwork.data import Bunch
+from sknetwork.data import Dataset
 class TestDataAPI(unittest.TestCase):
@@ -17,14 +17,14 @@ class TestDataAPI(unittest.TestCase):
         toy_graphs = [karate_club, painters, bow_tie, house, miserables]
         for toy_graph in toy_graphs:
             self.assertEqual(type(toy_graph()), sparse.csr_matrix)
-            self.assertEqual(type(toy_graph(metadata=True)), Bunch)
+            self.assertEqual(type(toy_graph(metadata=True)), Dataset)
     def test_load(self):
         tmp_data_dir = tempfile.gettempdir() + '/stub'
         clear_data_home(tmp_data_dir)
         try:
             graph = load_netset('stub', tmp_data_dir)
-            self.assertEqual(type(graph), Bunch)
+            self.assertEqual(type(graph), Dataset)
         except URLError:  # pragma: no cover
             warnings.warn('Could not reach NetSet. Corresponding test has not been performed.', RuntimeWarning)
             return

sknetwork/data/tests/test_base.py CHANGED Viewed

@@ -3,12 +3,12 @@
 import unittest
-from sknetwork.data.base import Bunch
+from sknetwork.data.base import Dataset
 class TestDataset(unittest.TestCase):
     def test(self):
-        dataset = Bunch(name='dataset')
+        dataset = Dataset(name='dataset')
         self.assertEqual(dataset.name, 'dataset')
         self.assertEqual(dataset['name'], 'dataset')