PyPI - scikit-network - Versions diffs - 0.31.0__cp310-cp310-win_amd64.whl → 0.33.0__cp310-cp310-win_amd64.whl - Mend

scikit-network 0.31.0__cp310-cp310-win_amd64.whl → 0.33.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scikit-network might be problematic. Click here for more details.

Files changed (126) hide show

{scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/AUTHORS.rst +3 -1
{scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/METADATA +27 -5
scikit_network-0.33.0.dist-info/RECORD +228 -0
{scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/WHEEL +1 -1
sknetwork/__init__.py +1 -1
sknetwork/classification/base.py +1 -1
sknetwork/classification/base_rank.py +3 -3
sknetwork/classification/diffusion.py +25 -16
sknetwork/classification/knn.py +23 -16
sknetwork/classification/metrics.py +4 -4
sknetwork/classification/pagerank.py +12 -8
sknetwork/classification/propagation.py +25 -17
sknetwork/classification/tests/test_diffusion.py +10 -0
sknetwork/classification/vote.cp310-win_amd64.pyd +0 -0
sknetwork/classification/vote.cpp +14549 -8668
sknetwork/clustering/__init__.py +3 -1
sknetwork/clustering/base.py +1 -1
sknetwork/clustering/kcenters.py +253 -0
sknetwork/clustering/leiden.py +242 -0
sknetwork/clustering/leiden_core.cp310-win_amd64.pyd +0 -0
sknetwork/clustering/leiden_core.cpp +31564 -0
sknetwork/clustering/leiden_core.pyx +124 -0
sknetwork/clustering/louvain.py +118 -83
sknetwork/clustering/louvain_core.cp310-win_amd64.pyd +0 -0
sknetwork/clustering/louvain_core.cpp +21876 -16332
sknetwork/clustering/louvain_core.pyx +86 -94
sknetwork/clustering/postprocess.py +2 -2
sknetwork/clustering/propagation_clustering.py +4 -4
sknetwork/clustering/tests/test_API.py +7 -3
sknetwork/clustering/tests/test_kcenters.py +60 -0
sknetwork/clustering/tests/test_leiden.py +34 -0
sknetwork/clustering/tests/test_louvain.py +2 -3
sknetwork/data/__init__.py +1 -1
sknetwork/data/base.py +7 -2
sknetwork/data/load.py +20 -25
sknetwork/data/models.py +15 -15
sknetwork/data/parse.py +57 -34
sknetwork/data/tests/test_API.py +3 -3
sknetwork/data/tests/test_base.py +2 -2
sknetwork/data/tests/test_parse.py +9 -12
sknetwork/data/tests/test_toy_graphs.py +33 -33
sknetwork/data/toy_graphs.py +35 -43
sknetwork/embedding/__init__.py +0 -1
sknetwork/embedding/base.py +23 -19
sknetwork/embedding/force_atlas.py +3 -2
sknetwork/embedding/louvain_embedding.py +1 -27
sknetwork/embedding/random_projection.py +5 -3
sknetwork/embedding/spectral.py +0 -73
sknetwork/embedding/svd.py +0 -4
sknetwork/embedding/tests/test_API.py +4 -28
sknetwork/embedding/tests/test_louvain_embedding.py +13 -13
sknetwork/embedding/tests/test_spectral.py +2 -5
sknetwork/embedding/tests/test_svd.py +7 -1
sknetwork/gnn/base_layer.py +3 -3
sknetwork/gnn/gnn_classifier.py +41 -87
sknetwork/gnn/layer.py +1 -1
sknetwork/gnn/loss.py +1 -1
sknetwork/gnn/optimizer.py +4 -3
sknetwork/gnn/tests/test_base_layer.py +4 -4
sknetwork/gnn/tests/test_gnn_classifier.py +12 -39
sknetwork/gnn/utils.py +8 -8
sknetwork/hierarchy/base.py +27 -0
sknetwork/hierarchy/louvain_hierarchy.py +55 -47
sknetwork/hierarchy/paris.cp310-win_amd64.pyd +0 -0
sknetwork/hierarchy/paris.cpp +27667 -20915
sknetwork/hierarchy/paris.pyx +11 -10
sknetwork/hierarchy/postprocess.py +16 -16
sknetwork/hierarchy/tests/test_algos.py +5 -0
sknetwork/hierarchy/tests/test_metrics.py +4 -4
sknetwork/linalg/__init__.py +1 -1
sknetwork/linalg/diteration.cp310-win_amd64.pyd +0 -0
sknetwork/linalg/diteration.cpp +13916 -8050
sknetwork/linalg/{normalization.py → normalizer.py} +17 -14
sknetwork/linalg/operators.py +1 -1
sknetwork/linalg/ppr_solver.py +1 -1
sknetwork/linalg/push.cp310-win_amd64.pyd +0 -0
sknetwork/linalg/push.cpp +23187 -16973
sknetwork/linalg/tests/test_normalization.py +3 -7
sknetwork/linalg/tests/test_operators.py +2 -6
sknetwork/linalg/tests/test_ppr.py +1 -1
sknetwork/linkpred/base.py +12 -1
sknetwork/linkpred/nn.py +6 -6
sknetwork/path/distances.py +11 -4
sknetwork/path/shortest_path.py +1 -1
sknetwork/path/tests/test_distances.py +7 -0
sknetwork/path/tests/test_search.py +2 -2
sknetwork/ranking/base.py +11 -6
sknetwork/ranking/betweenness.cp310-win_amd64.pyd +0 -0
sknetwork/ranking/betweenness.cpp +5256 -2190
sknetwork/ranking/pagerank.py +13 -12
sknetwork/ranking/tests/test_API.py +0 -2
sknetwork/ranking/tests/test_betweenness.py +1 -1
sknetwork/ranking/tests/test_pagerank.py +11 -5
sknetwork/regression/base.py +18 -1
sknetwork/regression/diffusion.py +30 -14
sknetwork/regression/tests/test_diffusion.py +8 -0
sknetwork/topology/__init__.py +3 -1
sknetwork/topology/cliques.cp310-win_amd64.pyd +0 -0
sknetwork/topology/cliques.cpp +23528 -16848
sknetwork/topology/core.cp310-win_amd64.pyd +0 -0
sknetwork/topology/core.cpp +22849 -16581
sknetwork/topology/cycles.py +243 -0
sknetwork/topology/minheap.cp310-win_amd64.pyd +0 -0
sknetwork/topology/minheap.cpp +19495 -13469
sknetwork/topology/structure.py +2 -42
sknetwork/topology/tests/test_cycles.py +65 -0
sknetwork/topology/tests/test_structure.py +2 -16
sknetwork/topology/triangles.cp310-win_amd64.pyd +0 -0
sknetwork/topology/triangles.cpp +5283 -1397
sknetwork/topology/triangles.pyx +7 -4
sknetwork/topology/weisfeiler_lehman_core.cp310-win_amd64.pyd +0 -0
sknetwork/topology/weisfeiler_lehman_core.cpp +14781 -8915
sknetwork/utils/__init__.py +1 -1
sknetwork/utils/format.py +1 -1
sknetwork/utils/membership.py +2 -2
sknetwork/utils/values.py +5 -3
sknetwork/visualization/__init__.py +2 -2
sknetwork/visualization/dendrograms.py +55 -7
sknetwork/visualization/graphs.py +261 -44
sknetwork/visualization/tests/test_dendrograms.py +9 -9
sknetwork/visualization/tests/test_graphs.py +63 -57
scikit_network-0.31.0.dist-info/RECORD +0 -221
sknetwork/embedding/louvain_hierarchy.py +0 -142
sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
{scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/LICENSE +0 -0
{scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/top_level.txt +0 -0

sknetwork/data/models.py CHANGED Viewed

@@ -12,7 +12,7 @@ from typing import Union, Optional, Iterable
 import numpy as np
 from scipy import sparse
-from sknetwork.data.base import Bunch
+from sknetwork.data.base import Dataset
 from sknetwork.data.parse import from_edge_list
 from sknetwork.utils.check import check_random_state
 from sknetwork.utils.format import directed2undirected
@@ -20,7 +20,7 @@ from sknetwork.utils.format import directed2undirected
 def block_model(sizes: Iterable, p_in: Union[float, list, np.ndarray] = .2, p_out: float = .05,
                 directed: bool = False, self_loops: bool = False, metadata: bool = False, seed: Optional[int] = None) \
-                -> Union[sparse.csr_matrix, Bunch]:
+                -> Union[sparse.csr_matrix, Dataset]:
     """Stochastic block model.
     Parameters
@@ -83,7 +83,7 @@ def block_model(sizes: Iterable, p_in: Union[float, list, np.ndarray] = .2, p_ou
     else:
         adjacency = directed2undirected(sparse.csr_matrix(sparse.triu(adjacency)), weighted=False)
     if metadata:
-        graph = Bunch()
+        graph = Dataset()
         graph.adjacency = adjacency
         labels = np.repeat(np.arange(len(sizes)), sizes)
         graph.labels = labels
@@ -129,7 +129,7 @@ def erdos_renyi(n: int = 20, p: float = .3, directed: bool = False, self_loops:
     return block_model([n], p, 0., directed=directed, self_loops=self_loops, metadata=False, seed=seed)
-def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
+def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
     """Linear graph (directed).
     Parameters
@@ -158,7 +158,7 @@ def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
     if metadata:
         x = np.arange(n)
         y = np.zeros(n)
-        graph = Bunch()
+        graph = Dataset()
         graph.adjacency = adjacency
         graph.position = np.array((x, y)).T
         return graph
@@ -166,7 +166,7 @@ def linear_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
         return adjacency
-def linear_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
+def linear_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
     """Linear graph (undirected).
     Parameters
@@ -218,7 +218,7 @@ def cyclic_position(n: int) -> np.ndarray:
     return position
-def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
+def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
     """Cyclic graph (directed).
     Parameters
@@ -245,7 +245,7 @@ def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
     adjacency = sparse.csr_matrix((np.ones(len(row), dtype=int), (row, col)), shape=(n, n))
     if metadata:
-        graph = Bunch()
+        graph = Dataset()
         graph.adjacency = adjacency
         graph.position = cyclic_position(n)
         return graph
@@ -253,7 +253,7 @@ def cyclic_digraph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matri
         return adjacency
-def cyclic_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
+def cyclic_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
     """Cyclic graph (undirected).
     Parameters
@@ -283,7 +283,7 @@ def cyclic_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix,
         return graph.adjacency
-def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
+def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
     """Grid (undirected).
     Parameters
@@ -312,7 +312,7 @@ def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr
     edges = list(map(lambda edge: (node_id[edge[0]], node_id[edge[1]]), edges))
     adjacency = from_edge_list(edges, reindex=False, matrix_only=True)
     if metadata:
-        graph = Bunch()
+        graph = Dataset()
         graph.adjacency = adjacency
         graph.position = np.array(nodes)
         return graph
@@ -320,7 +320,7 @@ def grid(n1: int = 10, n2: int = 10, metadata: bool = False) -> Union[sparse.csr
         return adjacency
-def star(n_branches: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
+def star(n_branches: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
     """Star (undirected).
     Parameters
@@ -345,7 +345,7 @@ def star(n_branches: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix
     edges = [(0, i+1) for i in range(n_branches)]
     adjacency = from_edge_list(edges, reindex=False, matrix_only=True)
     if metadata:
-        graph = Bunch()
+        graph = Dataset()
         graph.adjacency = adjacency
         angles = 2 * np.pi * np.arange(n_branches) / n_branches
         x = [0] + list(np.cos(angles))
@@ -402,7 +402,7 @@ def albert_barabasi(n: int = 100, degree: int = 3, directed: bool = False, seed:
 def watts_strogatz(n: int = 100, degree: int = 6, prob: float = 0.05, seed: Optional[int] = None,
-                   metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]:
+                   metadata: bool = False) -> Union[sparse.csr_matrix, Dataset]:
     """Watts-Strogatz model.
     Parameters
@@ -451,7 +451,7 @@ def watts_strogatz(n: int = 100, degree: int = 6, prob: float = 0.05, seed: Opti
                 adjacency[j, i] = 0
     adjacency = sparse.csr_matrix(adjacency, shape=adjacency.shape)
     if metadata:
-        graph = Bunch()
+        graph = Dataset()
         graph.adjacency = adjacency
         graph.position = cyclic_position(n)
         return graph

sknetwork/data/parse.py CHANGED Viewed

@@ -8,19 +8,19 @@ Created in December 2018
 """
 from csv import reader
-from typing import Dict, List, Tuple, Union
+from typing import Dict, List, Tuple, Union, Optional
 from xml.etree import ElementTree
 import numpy as np
 from scipy import sparse
-from sknetwork.data.base import Bunch
+from sknetwork.data.base import Dataset
 from sknetwork.utils.format import directed2undirected
 def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = False,
-                   bipartite: bool = False, weighted: bool = True, reindex: bool = True,
-                   sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
+                   bipartite: bool = False, weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
+                   sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
     """Load a graph from an edge list.
     Parameters
@@ -37,6 +37,9 @@ def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = F
     reindex : bool
         If ``True``, reindex nodes and returns the original node indices as names.
         Reindexing is enforced if nodes are not integers.
+    shape : tuple
+        Shape of the adjacency or biadjacency matrix.
+        If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
     sum_duplicates : bool
         If ``True`` (default), sums weights of duplicate edges.
         Otherwise, the weight of each edge is that of the first occurrence of this edge.
@@ -83,12 +86,14 @@ def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = F
     else:
         raise TypeError('The edge list must be given as a NumPy array or a list of tuples.')
     return from_edge_array(edge_array=edge_array, weights=weights, directed=directed, bipartite=bipartite,
-                           weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates, matrix_only=matrix_only)
+                           weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
+                           matrix_only=matrix_only)
 def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], directed: bool = False,
-                        bipartite: bool = False, weighted: bool = True, reindex: bool = True,
-                        sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
+                        bipartite: bool = False, weighted: bool = True, reindex: bool = False,
+                        shape: Optional[tuple] = None, sum_duplicates: bool = True, matrix_only: bool = None) \
+                        -> Union[Dataset, sparse.csr_matrix]:
     """Load a graph from an adjacency list.
     Parameters
@@ -104,6 +109,9 @@ def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], dire
     reindex : bool
         If ``True``, reindex nodes and returns the original node indices as names.
         Reindexing is enforced if nodes are not integers.
+    shape : tuple
+        Shape of the adjacency or biadjacency matrix.
+        If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
     sum_duplicates : bool
         If ``True`` (default), sums weights of duplicate edges.
         Otherwise, the weight of each edge is that of the first occurrence of this edge.
@@ -134,12 +142,12 @@ def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], dire
     else:
         raise TypeError('The adjacency list must be given as a list of lists or a dict of lists.')
     return from_edge_list(edge_list=edge_list, directed=directed, bipartite=bipartite, weighted=weighted,
-                          reindex=reindex, sum_duplicates=sum_duplicates, matrix_only=matrix_only)
+                          reindex=reindex, shape=shape, sum_duplicates=sum_duplicates, matrix_only=matrix_only)
 def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed: bool = False, bipartite: bool = False,
-                    weighted: bool = True, reindex: bool = True, sum_duplicates: bool = True,
-                    matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
+                    weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
+                    sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
     """Load a graph from an edge array of shape (n_edges, 2) and weights (optional).
     Parameters
@@ -157,6 +165,9 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
     reindex : bool
         If ``True``, reindex nodes and returns the original node indices as names.
         Reindexing is enforced if nodes are not integers.
+    shape : tuple
+        Shape of the adjacency or biadjacency matrix.
+        If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
     sum_duplicates : bool
         If ``True`` (default), sums weights of duplicate edges.
         Otherwise, the weight of each edge is that of the first occurrence of this edge.
@@ -191,32 +202,39 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
         _, index = np.unique(edge_array, axis=0, return_index=True)
         edge_array = edge_array[index]
         weights = weights[index]
-    graph = Bunch()
+    graph = Dataset()
     if bipartite:
         row = edge_array[:, 0]
         col = edge_array[:, 1]
-        if row.dtype != int or (reindex and len(set(row)) < max(row) + 1):
+        if row.dtype != int or reindex:
             names_row, row = np.unique(row, return_inverse=True)
             graph.names_row = names_row
             graph.names = names_row
             n_row = len(names_row)
+        elif shape is not None:
+            n_row = max(shape[0], max(row) + 1)
         else:
             n_row = max(row) + 1
-        if col.dtype != int or (reindex and len(set(col)) < max(col) + 1):
+        if col.dtype != int or reindex:
             names_col, col = np.unique(col, return_inverse=True)
             graph.names_col = names_col
             n_col = len(names_col)
+        elif shape is not None:
+            n_col = max(shape[1], max(col) + 1)
         else:
             n_col = max(col) + 1
         matrix = sparse.csr_matrix((weights, (row, col)), shape=(n_row, n_col))
+        matrix.sum_duplicates()
         graph.biadjacency = matrix
     else:
         nodes = edge_array.ravel()
-        if nodes.dtype != int or (reindex and len(set(nodes)) < max(nodes) + 1):
+        if nodes.dtype != int or reindex:
             names, nodes = np.unique(nodes, return_inverse=True)
             graph.names = names
             n = len(names)
             edge_array = nodes.reshape(-1, 2)
+        elif shape is not None:
+            n = max(shape[0], max(nodes) + 1)
         else:
             n = max(nodes) + 1
         row = edge_array[:, 0]
@@ -224,6 +242,7 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
         matrix = sparse.csr_matrix((weights, (row, col)), shape=(n, n))
         if not directed:
             matrix = directed2undirected(matrix)
+        matrix.sum_duplicates()
         graph.adjacency = matrix
     if matrix_only or (matrix_only is None and len(graph) == 1):
         return matrix
@@ -233,8 +252,8 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
 def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: str = '#%',
              data_structure: str = None, directed: bool = False, bipartite: bool = False, weighted: bool = True,
-             reindex: bool = True, sum_duplicates: bool = True, matrix_only: bool = None) \
-        -> Union[Bunch, sparse.csr_matrix]:
+             reindex: bool = False, shape: Optional[tuple] = None, sum_duplicates: bool = True,
+             matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
     """Load a graph from a CSV or TSV file.
     The delimiter can be specified (e.g., ' ' for space-separated values).
@@ -249,9 +268,10 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
     comments : str
         Characters for comment lines.
     data_structure : str
-        If 'edge_list', considers each row of the file as an edge (tuple of size 2 or 3).
-        If 'adjacency_list', considers each row of the file as an adjacency list (list of neighbors).
-        If 'adjacency_dict', considers each row of the file as an adjacency dictionary with key
+        If 'edge_list', consider each row of the file as an edge (tuple of size 2 or 3).
+        If 'adjacency_list', consider each row of the file as an adjacency list (list of neighbors,
+        in the order of node indices; an empty line means no neighbor).
+        If 'adjacency_dict', consider each row of the file as an adjacency dictionary with key
         given by the first column (node: list of neighbors).
         If ``None`` (default), data_structure is guessed from the first rows of the file.
     directed : bool
@@ -263,6 +283,9 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
     reindex : bool
         If ``True``, reindex nodes and returns the original node indices as names.
         Reindexing is enforced if nodes are not integers.
+    shape : tuple
+        Shape of the adjacency or biadjacency matrix.
+        If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
     sum_duplicates : bool
         If ``True`` (default), sums weights of duplicate edges.
         Otherwise, the weight of each edge is that of the first occurrence of this edge.
@@ -295,7 +318,7 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
             else:
                 weights = None
             return from_edge_array(edge_array=edge_array, weights=weights, directed=directed, bipartite=bipartite,
-                                   weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
+                                   weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
                                    matrix_only=matrix_only)
         except TypeError:
             pass
@@ -306,17 +329,17 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
         if data_structure == 'edge_list':
             edge_list = [tuple(row) for row in csv_reader]
             return from_edge_list(edge_list=edge_list, directed=directed, bipartite=bipartite,
-                                  weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
+                                  weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
                                   matrix_only=matrix_only)
         elif data_structure == 'adjacency_list':
             adjacency_list = [row for row in csv_reader]
             return from_adjacency_list(adjacency_list=adjacency_list, directed=directed, bipartite=bipartite,
-                                       weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
+                                       weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
                                        matrix_only=matrix_only)
         elif data_structure == 'adjacency_dict':
             adjacency_list = {row[0]: row[1:] for row in csv_reader}
             return from_adjacency_list(adjacency_list=adjacency_list, directed=directed, bipartite=bipartite,
-                                       weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
+                                       weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
                                        matrix_only=matrix_only)
@@ -411,9 +434,9 @@ def load_header(file: str):
     return directed, bipartite, weighted
-def load_metadata(file: str, delimiter: str = ': ') -> Bunch:
+def load_metadata(file: str, delimiter: str = ': ') -> Dataset:
     """Extract metadata from the file."""
-    metadata = Bunch()
+    metadata = Dataset()
     with open(file, 'r', encoding='utf-8') as f:
         for row in f:
             parts = row.split(delimiter)
@@ -422,7 +445,7 @@ def load_metadata(file: str, delimiter: str = ': ') -> Bunch:
     return metadata
-def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: int = 512) -> Bunch:
+def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: int = 512) -> Dataset:
     """Load graph from GraphML file.
     Hyperedges and nested graphs are not supported.
@@ -438,7 +461,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
     Returns
     -------
-    data: :class:`Bunch`
+    data: :class:`Dataset`
         The dataset in a Dataset with the adjacency as a CSR matrix.
     """
     # see http://graphml.graphdrawing.org/primer/graphml-primer.html
@@ -454,12 +477,12 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
     # indices in the graph tree
     node_indices = []
     edge_indices = []
-    data = Bunch()
+    data = Dataset()
     graph = None
     file_description = None
-    attribute_descriptions = Bunch()
-    attribute_descriptions.node = Bunch()
-    attribute_descriptions.edge = Bunch()
+    attribute_descriptions = Dataset()
+    attribute_descriptions.node = Dataset()
+    attribute_descriptions.edge = Dataset()
     keys = {}
     for file_element in tree.getroot():
         if file_element.tag.endswith('graph'):
@@ -497,7 +520,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
                 if file_element.attrib['for'] == 'node':
                     size = n_nodes
                     if 'node_attribute' not in data:
-                        data.node_attribute = Bunch()
+                        data.node_attribute = Dataset()
                     for key_element in file_element:
                         if key_element.tag.endswith('desc'):
                             attribute_descriptions.node[attribute_name] = key_element.text
@@ -514,7 +537,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
                 elif file_element.attrib['for'] == 'edge':
                     size = n_edges
                     if 'edge_attribute' not in data:
-                        data.edge_attribute = Bunch()
+                        data.edge_attribute = Dataset()
                     for key_element in file_element:
                         if key_element.tag.endswith('desc'):
                             attribute_descriptions.edge[attribute_name] = key_element.text
@@ -532,7 +555,7 @@ def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: in
         elif file_element.tag.endswith('desc'):
             file_description = file_element.text
     if file_description or attribute_descriptions.node or attribute_descriptions.edge:
-        data.meta = Bunch()
+        data.meta = Dataset()
         if file_description:
             data.meta['description'] = file_description
         if attribute_descriptions.node or attribute_descriptions.edge:

sknetwork/data/tests/test_API.py CHANGED Viewed

@@ -8,7 +8,7 @@ import warnings
 from sknetwork.data.load import *
 from sknetwork.data.toy_graphs import *
-from sknetwork.data import Bunch
+from sknetwork.data import Dataset
 class TestDataAPI(unittest.TestCase):
@@ -17,14 +17,14 @@ class TestDataAPI(unittest.TestCase):
         toy_graphs = [karate_club, painters, bow_tie, house, miserables]
         for toy_graph in toy_graphs:
             self.assertEqual(type(toy_graph()), sparse.csr_matrix)
-            self.assertEqual(type(toy_graph(metadata=True)), Bunch)
+            self.assertEqual(type(toy_graph(metadata=True)), Dataset)
     def test_load(self):
         tmp_data_dir = tempfile.gettempdir() + '/stub'
         clear_data_home(tmp_data_dir)
         try:
             graph = load_netset('stub', tmp_data_dir)
-            self.assertEqual(type(graph), Bunch)
+            self.assertEqual(type(graph), Dataset)
         except URLError:  # pragma: no cover
             warnings.warn('Could not reach NetSet. Corresponding test has not been performed.', RuntimeWarning)
             return

sknetwork/data/tests/test_base.py CHANGED Viewed

@@ -3,12 +3,12 @@
 import unittest
-from sknetwork.data.base import Bunch
+from sknetwork.data.base import Dataset
 class TestDataset(unittest.TestCase):
     def test(self):
-        dataset = Bunch(name='dataset')
+        dataset = Dataset(name='dataset')
         self.assertEqual(dataset.name, 'dataset')
         self.assertEqual(dataset['name'], 'dataset')

sknetwork/data/tests/test_parse.py CHANGED Viewed

@@ -20,6 +20,10 @@ class TestParser(unittest.TestCase):
         self.assertTrue((adjacency.indices == [2, 3, 0, 1, 5, 4]).all())
         self.assertTrue((adjacency.indptr == [0, 1, 2, 3, 4, 5, 6]).all())
         self.assertTrue((adjacency.data == [1, 1, 1, 1, 1, 1]).all())
+        adjacency = parse.from_csv(self.stub_data_1, shape=(7, 7))
+        self.assertTrue((adjacency.shape == (7, 7)))
+        biadjacency = parse.from_csv(self.stub_data_1, bipartite=True, shape=(7, 9))
+        self.assertTrue((biadjacency.shape == (7, 9)))
         remove(self.stub_data_1)
     def test_labeled_weighted(self):
@@ -33,13 +37,14 @@ class TestParser(unittest.TestCase):
         self.assertTrue((adjacency.indptr == [0, 1, 2, 3, 4, 5, 6]).all())
         self.assertTrue((adjacency.data == [1, 6, 5, 6, 1, 5]).all())
         self.assertTrue((names == [' b', ' d', ' e', 'a', 'c', 'f']).all())
         remove(self.stub_data_2)
     def test_auto_reindex(self):
         self.stub_data_4 = 'stub_4.txt'
         with open(self.stub_data_4, "w") as text_file:
             text_file.write('%stub\n14 31\n42 50\n0 12')
-        graph = parse.from_csv(self.stub_data_4)
+        graph = parse.from_csv(self.stub_data_4, reindex=True)
         adjacency = graph.adjacency
         names = graph.names
         self.assertTrue((adjacency.data == [1, 1, 1, 1, 1, 1]).all())
@@ -164,23 +169,15 @@ class TestParser(unittest.TestCase):
         self.stub_data_9 = 'stub_9.txt'
         with open(self.stub_data_9, "w") as text_file:
             text_file.write('#stub\n1 3\n4 5\n0 3')
-        graph = parse.from_csv(self.stub_data_9, bipartite=True)
+        graph = parse.from_csv(self.stub_data_9, bipartite=True, reindex=True)
         biadjacency = graph.biadjacency
         self.assertTrue((biadjacency.indices == [0, 0, 1]).all())
         self.assertTrue((biadjacency.indptr == [0, 1, 2, 3]).all())
         self.assertTrue((biadjacency.data == [1, 1, 1]).all())
+        biadjacency = parse.from_csv(self.stub_data_9, bipartite=True)
+        self.assertTrue(biadjacency.shape == (5, 6))
         remove(self.stub_data_9)
-    def test_csv_adjacency_bipartite(self):
-        self.stub_data_10 = 'stub_10.txt'
-        with open(self.stub_data_10, "w") as text_file:
-            text_file.write('%stub\n3\n3\n0')
-        graph = parse.from_csv(self.stub_data_10, bipartite=True)
-        biadjacency = graph.biadjacency
-        self.assertTupleEqual(biadjacency.shape, (3, 2))
-        self.assertTrue((biadjacency.data == [1, 1, 1]).all())
-        remove(self.stub_data_10)
     def test_edge_list(self):
         edge_list_1 = [('Alice', 'Bob'), ('Carol', 'Alice')]
         graph = parse.from_edge_list(edge_list_1)

sknetwork/data/tests/test_toy_graphs.py CHANGED Viewed

@@ -16,22 +16,22 @@ class TestToys(unittest.TestCase):
         adjacency = house()
         self.assertEqual(adjacency.shape, (5, 5))
-        graph = house(metadata=True)
-        self.assertEqual(graph.position.shape, (5, 2))
+        dataset = house(metadata=True)
+        self.assertEqual(dataset.position.shape, (5, 2))
         adjacency = bow_tie()
         self.assertEqual(adjacency.shape, (5, 5))
-        graph = bow_tie(metadata=True)
-        self.assertEqual(graph.position.shape, (5, 2))
+        dataset = bow_tie(metadata=True)
+        self.assertEqual(dataset.position.shape, (5, 2))
-        graph = karate_club(True)
-        self.assertEqual(graph.adjacency.shape, (34, 34))
-        self.assertEqual(len(graph.labels), 34)
+        dataset = karate_club(True)
+        self.assertEqual(dataset.adjacency.shape, (34, 34))
+        self.assertEqual(len(dataset.labels), 34)
-        graph = miserables(True)
-        self.assertEqual(graph.adjacency.shape, (77, 77))
-        self.assertEqual(len(graph.names), 77)
+        dataset = miserables(True)
+        self.assertEqual(dataset.adjacency.shape, (77, 77))
+        self.assertEqual(len(dataset.names), 77)
     def test_directed(self):
         adjacency = painters()
@@ -40,29 +40,29 @@ class TestToys(unittest.TestCase):
         adjacency = art_philo_science()
         self.assertEqual(adjacency.shape, (30, 30))
-        graph = painters(True)
-        self.assertEqual(graph.adjacency.shape, (14, 14))
-        self.assertEqual(len(graph.names), 14)
+        dataset = painters(True)
+        self.assertEqual(dataset.adjacency.shape, (14, 14))
+        self.assertEqual(len(dataset.names), 14)
-        graph = art_philo_science(True)
-        self.assertEqual(graph.adjacency.shape, (30, 30))
-        self.assertEqual(len(graph.names), 30)
+        dataset = art_philo_science(True)
+        self.assertEqual(dataset.adjacency.shape, (30, 30))
+        self.assertEqual(len(dataset.names), 30)
     def test_bipartite(self):
-        graph = star_wars(True)
-        self.assertEqual(graph.biadjacency.shape, (4, 3))
-        self.assertEqual(len(graph.names), 4)
-        self.assertEqual(len(graph.names_col), 3)
-        graph = movie_actor(True)
-        self.assertEqual(graph.biadjacency.shape, (15, 16))
-        self.assertEqual(len(graph.names), 15)
-        self.assertEqual(len(graph.names_col), 16)
-        graph = hourglass(True)
-        self.assertEqual(graph.biadjacency.shape, (2, 2))
-        graph = art_philo_science(True)
-        self.assertEqual(graph.biadjacency.shape, (30, 11))
-        self.assertEqual(len(graph.names), 30)
-        self.assertEqual(len(graph.names_col), 11)
+        dataset = star_wars(True)
+        self.assertEqual(dataset.biadjacency.shape, (4, 3))
+        self.assertEqual(len(dataset.names), 4)
+        self.assertEqual(len(dataset.names_col), 3)
+        dataset = movie_actor(True)
+        self.assertEqual(dataset.biadjacency.shape, (15, 17))
+        self.assertEqual(len(dataset.names), 15)
+        self.assertEqual(len(dataset.names_col), 17)
+        dataset = hourglass(True)
+        self.assertEqual(dataset.biadjacency.shape, (2, 2))
+        dataset = art_philo_science(True)
+        self.assertEqual(dataset.biadjacency.shape, (30, 11))
+        self.assertEqual(len(dataset.names), 30)
+        self.assertEqual(len(dataset.names_col), 11)