PyPI - scikit-network - Versions diffs - 0.31.0__cp310-cp310-win_amd64.whl → 0.33.0__cp310-cp310-win_amd64.whl - Mend

scikit-network 0.31.0__cp310-cp310-win_amd64.whl → 0.33.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scikit-network might be problematic. Click here for more details.

Files changed (126) hide show

{scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/AUTHORS.rst +3 -1
{scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/METADATA +27 -5
scikit_network-0.33.0.dist-info/RECORD +228 -0
{scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/WHEEL +1 -1
sknetwork/__init__.py +1 -1
sknetwork/classification/base.py +1 -1
sknetwork/classification/base_rank.py +3 -3
sknetwork/classification/diffusion.py +25 -16
sknetwork/classification/knn.py +23 -16
sknetwork/classification/metrics.py +4 -4
sknetwork/classification/pagerank.py +12 -8
sknetwork/classification/propagation.py +25 -17
sknetwork/classification/tests/test_diffusion.py +10 -0
sknetwork/classification/vote.cp310-win_amd64.pyd +0 -0
sknetwork/classification/vote.cpp +14549 -8668
sknetwork/clustering/__init__.py +3 -1
sknetwork/clustering/base.py +1 -1
sknetwork/clustering/kcenters.py +253 -0
sknetwork/clustering/leiden.py +242 -0
sknetwork/clustering/leiden_core.cp310-win_amd64.pyd +0 -0
sknetwork/clustering/leiden_core.cpp +31564 -0
sknetwork/clustering/leiden_core.pyx +124 -0
sknetwork/clustering/louvain.py +118 -83
sknetwork/clustering/louvain_core.cp310-win_amd64.pyd +0 -0
sknetwork/clustering/louvain_core.cpp +21876 -16332
sknetwork/clustering/louvain_core.pyx +86 -94
sknetwork/clustering/postprocess.py +2 -2
sknetwork/clustering/propagation_clustering.py +4 -4
sknetwork/clustering/tests/test_API.py +7 -3
sknetwork/clustering/tests/test_kcenters.py +60 -0
sknetwork/clustering/tests/test_leiden.py +34 -0
sknetwork/clustering/tests/test_louvain.py +2 -3
sknetwork/data/__init__.py +1 -1
sknetwork/data/base.py +7 -2
sknetwork/data/load.py +20 -25
sknetwork/data/models.py +15 -15
sknetwork/data/parse.py +57 -34
sknetwork/data/tests/test_API.py +3 -3
sknetwork/data/tests/test_base.py +2 -2
sknetwork/data/tests/test_parse.py +9 -12
sknetwork/data/tests/test_toy_graphs.py +33 -33
sknetwork/data/toy_graphs.py +35 -43
sknetwork/embedding/__init__.py +0 -1
sknetwork/embedding/base.py +23 -19
sknetwork/embedding/force_atlas.py +3 -2
sknetwork/embedding/louvain_embedding.py +1 -27
sknetwork/embedding/random_projection.py +5 -3
sknetwork/embedding/spectral.py +0 -73
sknetwork/embedding/svd.py +0 -4
sknetwork/embedding/tests/test_API.py +4 -28
sknetwork/embedding/tests/test_louvain_embedding.py +13 -13
sknetwork/embedding/tests/test_spectral.py +2 -5
sknetwork/embedding/tests/test_svd.py +7 -1
sknetwork/gnn/base_layer.py +3 -3
sknetwork/gnn/gnn_classifier.py +41 -87
sknetwork/gnn/layer.py +1 -1
sknetwork/gnn/loss.py +1 -1
sknetwork/gnn/optimizer.py +4 -3
sknetwork/gnn/tests/test_base_layer.py +4 -4
sknetwork/gnn/tests/test_gnn_classifier.py +12 -39
sknetwork/gnn/utils.py +8 -8
sknetwork/hierarchy/base.py +27 -0
sknetwork/hierarchy/louvain_hierarchy.py +55 -47
sknetwork/hierarchy/paris.cp310-win_amd64.pyd +0 -0
sknetwork/hierarchy/paris.cpp +27667 -20915
sknetwork/hierarchy/paris.pyx +11 -10
sknetwork/hierarchy/postprocess.py +16 -16
sknetwork/hierarchy/tests/test_algos.py +5 -0
sknetwork/hierarchy/tests/test_metrics.py +4 -4
sknetwork/linalg/__init__.py +1 -1
sknetwork/linalg/diteration.cp310-win_amd64.pyd +0 -0
sknetwork/linalg/diteration.cpp +13916 -8050
sknetwork/linalg/{normalization.py → normalizer.py} +17 -14
sknetwork/linalg/operators.py +1 -1
sknetwork/linalg/ppr_solver.py +1 -1
sknetwork/linalg/push.cp310-win_amd64.pyd +0 -0
sknetwork/linalg/push.cpp +23187 -16973
sknetwork/linalg/tests/test_normalization.py +3 -7
sknetwork/linalg/tests/test_operators.py +2 -6
sknetwork/linalg/tests/test_ppr.py +1 -1
sknetwork/linkpred/base.py +12 -1
sknetwork/linkpred/nn.py +6 -6
sknetwork/path/distances.py +11 -4
sknetwork/path/shortest_path.py +1 -1
sknetwork/path/tests/test_distances.py +7 -0
sknetwork/path/tests/test_search.py +2 -2
sknetwork/ranking/base.py +11 -6
sknetwork/ranking/betweenness.cp310-win_amd64.pyd +0 -0
sknetwork/ranking/betweenness.cpp +5256 -2190
sknetwork/ranking/pagerank.py +13 -12
sknetwork/ranking/tests/test_API.py +0 -2
sknetwork/ranking/tests/test_betweenness.py +1 -1
sknetwork/ranking/tests/test_pagerank.py +11 -5
sknetwork/regression/base.py +18 -1
sknetwork/regression/diffusion.py +30 -14
sknetwork/regression/tests/test_diffusion.py +8 -0
sknetwork/topology/__init__.py +3 -1
sknetwork/topology/cliques.cp310-win_amd64.pyd +0 -0
sknetwork/topology/cliques.cpp +23528 -16848
sknetwork/topology/core.cp310-win_amd64.pyd +0 -0
sknetwork/topology/core.cpp +22849 -16581
sknetwork/topology/cycles.py +243 -0
sknetwork/topology/minheap.cp310-win_amd64.pyd +0 -0
sknetwork/topology/minheap.cpp +19495 -13469
sknetwork/topology/structure.py +2 -42
sknetwork/topology/tests/test_cycles.py +65 -0
sknetwork/topology/tests/test_structure.py +2 -16
sknetwork/topology/triangles.cp310-win_amd64.pyd +0 -0
sknetwork/topology/triangles.cpp +5283 -1397
sknetwork/topology/triangles.pyx +7 -4
sknetwork/topology/weisfeiler_lehman_core.cp310-win_amd64.pyd +0 -0
sknetwork/topology/weisfeiler_lehman_core.cpp +14781 -8915
sknetwork/utils/__init__.py +1 -1
sknetwork/utils/format.py +1 -1
sknetwork/utils/membership.py +2 -2
sknetwork/utils/values.py +5 -3
sknetwork/visualization/__init__.py +2 -2
sknetwork/visualization/dendrograms.py +55 -7
sknetwork/visualization/graphs.py +261 -44
sknetwork/visualization/tests/test_dendrograms.py +9 -9
sknetwork/visualization/tests/test_graphs.py +63 -57
scikit_network-0.31.0.dist-info/RECORD +0 -221
sknetwork/embedding/louvain_hierarchy.py +0 -142
sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
{scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/LICENSE +0 -0
{scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/top_level.txt +0 -0

sknetwork/clustering/louvain_core.pyx CHANGED Viewed

@@ -1,7 +1,6 @@
-# distutils: language = c++
+# distutils: language=c++
 # cython: language_level=3
 from libcpp.set cimport set
-from libcpp.vector cimport vector
 cimport cython
 ctypedef fused int_or_long:
@@ -10,123 +9,116 @@ ctypedef fused int_or_long:
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def fit_core(float resolution, float tol, float[:] ou_node_probs, float[:] in_node_probs, float[:] self_loops,
-             float[:] data, int_or_long[:] indices, int_or_long[:] indptr):  # pragma: no cover
-    """Fit the clusters to the objective function.
+def optimize_core(int_or_long[:] labels, int_or_long[:] indices, int_or_long[:] indptr, float[:] data,
+    float[:] out_weights, float[:] in_weights, float[:] out_cluster_weights, float[:] in_cluster_weights,
+    float[:] cluster_weights, float[:] self_loops, float resolution, float tol_optimization):  # pragma: no cover
+    """Find clusters maximizing modularity.
     Parameters
     ----------
-    resolution :
-        Resolution parameter (positive).
-    tol :
-        Minimum increase in modularity to enter a new optimization pass.
-    ou_node_probs :
-        Distribution of node weights based on their out-edges (sums to 1).
-    in_node_probs :
-        Distribution of node weights based on their in-edges (sums to 1).
-    self_loops :
-        Weights of self loops.
-    data :
-        CSR format data array of the normalized adjacency matrix.
+    labels :
+        Initial labels.
     indices :
         CSR format index array of the normalized adjacency matrix.
     indptr :
         CSR format index pointer array of the normalized adjacency matrix.
+    data :
+        CSR format data array of the normalized adjacency matrix.
+    out_weights :
+        Out-weights of nodes (sum to 1).
+    in_weights :
+        In-weights of nodes (sum to 1).
+    out_cluster_weights :
+        Out-weights of clusters (sum to 1).
+    in_cluster_weights :
+        In-weights of clusters (sum to 1).
+    cluster_weights :
+        Weights of clusters (initialized to 0).
+    self_loops :
+        Weights of self loops.
+    resolution :
+        Resolution parameter (positive).
+    tol_optimization :
+        Minimum increase in modularity to enter a new optimization pass.
     Returns
     -------
     labels :
-        Cluster index of each node.
-    total_increase :
-        Score of the clustering (total increase in modularity).
+        Labels of nodes.
+    increase :
+        Increase in modularity.
     """
-    cdef int_or_long n = indptr.shape[0] - 1
-    cdef int_or_long increase = 1
-    cdef int_or_long cluster
-    cdef int_or_long cluster_best
-    cdef int_or_long cluster_node
+    cdef int_or_long n
+    cdef int_or_long stop = 0
+    cdef int_or_long label
+    cdef int_or_long label_target
+    cdef int_or_long label_best
     cdef int_or_long i
     cdef int_or_long j
-    cdef int_or_long j1
-    cdef int_or_long j2
-    cdef int_or_long label
+    cdef int_or_long start
+    cdef int_or_long end
-    cdef float increase_total = 0
+    cdef float increase = 0
     cdef float increase_pass
     cdef float delta
-    cdef float delta_best
-    cdef float delta_exit
     cdef float delta_local
-    cdef float node_prob_in
-    cdef float node_prob_ou
-    cdef float ratio_in
-    cdef float ratio_ou
-    cdef vector[int_or_long] labels
-    cdef vector[float] neighbor_clusters_weights
-    cdef vector[float] ou_clusters_weights
-    cdef vector[float] in_clusters_weights
-    cdef set[int_or_long] unique_clusters = ()
-    for i in range(n):
-        labels.push_back(i)
-        neighbor_clusters_weights.push_back(0.)
-        ou_clusters_weights.push_back(ou_node_probs[i])
-        in_clusters_weights.push_back(in_node_probs[i])
-    while increase == 1:
-        increase = 0
-        increase_pass = 0
-        for i in range(n):
-            unique_clusters.clear()
-            cluster_node = labels[i]
-            j1 = indptr[i]
-            j2 = indptr[i + 1]
-            for j in range(j1, j2):
-                label = labels[indices[j]]
-                neighbor_clusters_weights[label] += data[j]
-                unique_clusters.insert(label)
+    cdef float delta_best
+    cdef float in_weight
+    cdef float out_weight
-            unique_clusters.erase(cluster_node)
+    cdef set[int_or_long] label_set = ()
-            if not unique_clusters.empty():
-                node_prob_ou = ou_node_probs[i]
-                node_prob_in = in_node_probs[i]
-                ratio_ou = resolution * node_prob_ou
-                ratio_in = resolution * node_prob_in
+    n = labels.shape[0]
+    while not stop:
+        increase_pass = 0
-                delta_exit = 2 * (neighbor_clusters_weights[cluster_node] - self_loops[i])
-                delta_exit -= ratio_ou * (in_clusters_weights[cluster_node] - node_prob_in)
-                delta_exit -= ratio_in * (ou_clusters_weights[cluster_node] - node_prob_ou)
+        for i in range(n):
+            label_set.clear()
+            label = labels[i]
+            start = indptr[i]
+            end = indptr[i+1]
+            # neighboring clusters
+            for j in range(start, end):
+                label_target = labels[indices[j]]
+                label_set.insert(label_target)
+                cluster_weights[label_target] += data[j]
+            label_set.erase(label)
+            if not label_set.empty():
+                out_weight = out_weights[i]
+                in_weight = in_weights[i]
+                # node leaving the current cluster
+                delta = 2 * (cluster_weights[label] - self_loops[i])
+                delta -= resolution * out_weight * (in_cluster_weights[label] - in_weight)
+                delta -= resolution * in_weight * (out_cluster_weights[label] - out_weight)
                 delta_best = 0
-                cluster_best = cluster_node
+                label_best = label
-                for cluster in unique_clusters:
-                    delta = 2 * neighbor_clusters_weights[cluster]
-                    delta -= ratio_ou * in_clusters_weights[cluster]
-                    delta -= ratio_in * ou_clusters_weights[cluster]
-                    delta_local = delta - delta_exit
+                for label_target in label_set:
+                    delta_local = 2 * cluster_weights[label_target]
+                    delta_local -= resolution * out_weight * in_cluster_weights[label_target]
+                    delta_local -= resolution * in_weight * out_cluster_weights[label_target]
+                    delta_local -= delta
                     if delta_local > delta_best:
                         delta_best = delta_local
-                        cluster_best = cluster
-                    neighbor_clusters_weights[cluster] = 0
+                        label_best = label_target
+                    cluster_weights[label_target] = 0
-                if delta_best > 0:
+                if label_best != label:
                     increase_pass += delta_best
-                    ou_clusters_weights[cluster_node] -= node_prob_ou
-                    in_clusters_weights[cluster_node] -= node_prob_in
-                    ou_clusters_weights[cluster_best] += node_prob_ou
-                    in_clusters_weights[cluster_best] += node_prob_in
-                    labels[i] = cluster_best
-            neighbor_clusters_weights[cluster_node] = 0
-        increase_total += increase_pass
-        if increase_pass > tol:
-            increase = 1
-    return labels, increase_total
+                    labels[i] = label_best
+                    # update weights
+                    out_cluster_weights[label] -= out_weight
+                    in_cluster_weights[label] -= in_weight
+                    out_cluster_weights[label_best] += out_weight
+                    in_cluster_weights[label_best] += in_weight
+            cluster_weights[label] = 0
+        increase += increase_pass
+        stop = increase_pass <= tol_optimization
+    return labels, increase

sknetwork/clustering/postprocess.py CHANGED Viewed

@@ -41,7 +41,7 @@ def aggregate_graph(input_matrix: sparse.csr_matrix, labels: Optional[np.ndarray
                     labels_row: Optional[np.ndarray] = None, labels_col: Optional[np.ndarray] = None) \
         -> sparse.csr_matrix:
     """Aggregate graph per label. All nodes with the same label become a single node.
-    Negative labels are ignored (corresponding nodes are not discarded).
+    Negative labels are ignored (corresponding nodes are discarded).
     Parameters
     ----------
@@ -63,4 +63,4 @@ def aggregate_graph(input_matrix: sparse.csr_matrix, labels: Optional[np.ndarray
     else:
         membership_col = membership_row
     aggregate_matrix = membership_row.T.dot(input_matrix).dot(membership_col)
-    return aggregate_matrix
+    return aggregate_matrix.tocsr()

sknetwork/clustering/propagation_clustering.py CHANGED Viewed

@@ -29,11 +29,11 @@ class PropagationClustering(BaseClustering, Propagation):
     weighted : bool
         If ``True``, the vote of each neighbor is proportional to the edge weight.
         Otherwise, all votes have weight 1.
-    sort_clusters :
+    sort_clusters : bool
         If ``True``, sort labels in decreasing order of cluster size.
-    return_probs :
+    return_probs : bool
         If ``True``, return the probability distribution over clusters (soft clustering).
-    return_aggregate :
+    return_aggregate : bool
         If ``True``, return the aggregate adjacency matrix or biadjacency matrix between clusters.
     Attributes
@@ -78,7 +78,7 @@ class PropagationClustering(BaseClustering, Propagation):
         Parameters
         ----------
-        input_matrix :
+        input_matrix : sparse.csr_matrix, np.ndarray
             Adjacency matrix or biadjacency matrix of the graph.
         Returns

sknetwork/clustering/tests/test_API.py CHANGED Viewed

@@ -9,8 +9,12 @@ from sknetwork.data.test_graphs import *
 class TestClusteringAPI(unittest.TestCase):
+    def setUp(self):
+        self.algos = [Louvain(return_aggregate=True), Leiden(return_aggregate=True),
+                      PropagationClustering(return_aggregate=True)]
     def test_regular(self):
-        for algo in [Louvain(return_aggregate=True), PropagationClustering(return_aggregate=True)]:
+        for algo in self.algos:
             for adjacency in [test_graph(), test_digraph(), test_disconnected_graph()]:
                 n = adjacency.shape[0]
                 labels = algo.fit_predict(adjacency)
@@ -22,13 +26,13 @@ class TestClusteringAPI(unittest.TestCase):
                 n_labels = len(set(labels))
                 self.assertEqual(labels.shape, (n,))
                 self.assertEqual(algo.aggregate_.shape, (n_labels, n_labels))
-                membership = algo.fit_transform(adjacency)
+                membership = algo.fit_transform(adjacency_bool)
                 self.assertEqual(membership.shape, (n, n_labels))
     def test_bipartite(self):
         biadjacency = test_bigraph()
         n_row, n_col = biadjacency.shape
-        for algo in [Louvain(return_aggregate=True), PropagationClustering(return_aggregate=True)]:
+        for algo in self.algos:
             algo.fit(biadjacency)
             self.assertEqual(algo.labels_row_.shape, (n_row,))
             self.assertEqual(algo.labels_col_.shape, (n_col,))

sknetwork/clustering/tests/test_kcenters.py ADDED Viewed

@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""Tests for KCenters"""
+import unittest
+from sknetwork.clustering import KCenters
+from sknetwork.data.test_graphs import *
+class TestKCentersClustering(unittest.TestCase):
+    def test_kcenters(self):
+        # Test undirected graph
+        n_clusters = 2
+        adjacency = test_graph()
+        n_row = adjacency.shape[0]
+        kcenters = KCenters(n_clusters=n_clusters)
+        labels = kcenters.fit_predict(adjacency)
+        self.assertEqual(len(labels), n_row)
+        self.assertEqual(len(set(labels)), n_clusters)
+        # Test directed graph
+        n_clusters = 3
+        adjacency = test_digraph()
+        n_row = adjacency.shape[0]
+        kcenters = KCenters(n_clusters=n_clusters, directed=True)
+        labels = kcenters.fit_predict(adjacency)
+        self.assertEqual(len(labels), n_row)
+        self.assertEqual(len(set(labels)), n_clusters)
+        # Test bipartite graph
+        n_clusters = 2
+        biadjacency = test_bigraph()
+        n_row, n_col = biadjacency.shape
+        kcenters = KCenters(n_clusters=n_clusters)
+        kcenters.fit(biadjacency)
+        labels = kcenters.labels_
+        self.assertEqual(len(kcenters.labels_row_), n_row)
+        self.assertEqual(len(kcenters.labels_col_), n_col)
+        self.assertEqual(len(set(labels)), n_clusters)
+    def test_kcenters_error(self):
+        # Test value errors
+        adjacency = test_graph()
+        biadjacency = test_bigraph()
+        # test n_clusters error
+        kcenters = KCenters(n_clusters=1)
+        with self.assertRaises(ValueError):
+            kcenters.fit(adjacency)
+        # test n_init error
+        kcenters = KCenters(n_clusters=2, n_init=0)
+        with self.assertRaises(ValueError):
+            kcenters.fit(adjacency)
+        # test center_position error
+        kcenters = KCenters(n_clusters=2, center_position="other")
+        with self.assertRaises(ValueError):
+            kcenters.fit(biadjacency)

sknetwork/clustering/tests/test_leiden.py ADDED Viewed

@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""Tests for Leiden"""
+import unittest
+from sknetwork.clustering import Leiden
+from sknetwork.data.test_graphs import *
+from sknetwork.utils import bipartite2undirected
+class TestLeidenClustering(unittest.TestCase):
+    def test_disconnected(self):
+        adjacency = test_disconnected_graph()
+        n = adjacency.shape[0]
+        labels = Leiden().fit_predict(adjacency)
+        self.assertEqual(len(labels), n)
+    def test_modularity(self):
+        adjacency = test_graph()
+        leiden_d = Leiden(modularity='dugue')
+        leiden_n = Leiden(modularity='newman')
+        labels_d = leiden_d.fit_predict(adjacency)
+        labels_n = leiden_n.fit_predict(adjacency)
+        self.assertTrue((labels_d == labels_n).all())
+    def test_bipartite(self):
+        biadjacency = test_bigraph()
+        adjacency = bipartite2undirected(biadjacency)
+        leiden = Leiden(modularity='newman')
+        labels1 = leiden.fit_predict(adjacency)
+        leiden.fit(biadjacency)
+        labels2 = np.concatenate((leiden.labels_row_, leiden.labels_col_))
+        self.assertTrue((labels1 == labels2).all())

sknetwork/clustering/tests/test_louvain.py CHANGED Viewed

@@ -24,7 +24,6 @@ class TestLouvainClustering(unittest.TestCase):
         labels_d = louvain_d.fit_predict(adjacency)
         labels_n = louvain_n.fit_predict(adjacency)
         self.assertTrue((labels_d == labels_n).all())
         louvain_p = Louvain(modularity='potts')
         louvain_p.fit_predict(adjacency)
@@ -48,7 +47,7 @@ class TestLouvainClustering(unittest.TestCase):
         # tolerance
         louvain = Louvain(resolution=2, tol_aggregation=0.1)
         labels = louvain.fit_predict(adjacency)
-        self.assertEqual(len(set(labels)), 12)
+        self.assertEqual(len(set(labels)), 7)
         # shuffling
         louvain = Louvain(resolution=2, shuffle_nodes=True, random_state=42)
@@ -78,7 +77,7 @@ class TestLouvainClustering(unittest.TestCase):
         # tolerance
         louvain = Louvain(resolution=2, tol_aggregation=0.1)
         labels = louvain.fit_predict(adjacency)
-        self.assertEqual(len(set(labels)), 12)
+        self.assertEqual(len(set(labels)), 7)
         # shuffling
         louvain = Louvain(resolution=2, shuffle_nodes=True, random_state=42)

sknetwork/data/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """data module"""
-from sknetwork.data.base import Bunch
+from sknetwork.data.base import *
 from sknetwork.data.load import *
 from sknetwork.data.models import *
 from sknetwork.data.parse import from_edge_list, from_adjacency_list, from_csv, from_graphml

sknetwork/data/base.py CHANGED Viewed

@@ -6,10 +6,10 @@ Created in May 2023
 """
-class Bunch(dict):
+class Dataset(dict):
     """Container object for datasets.
     Dictionary-like object that exposes its keys as attributes.
-    >>> dataset = Bunch(name='dataset')
+    >>> dataset = Dataset(name='dataset')
     >>> dataset['name']
     'dataset'
     >>> dataset.name
@@ -26,3 +26,8 @@ class Bunch(dict):
             return self[key]
         except KeyError:
             raise AttributeError(key)
+# alias for Dataset
+Bunch = Dataset

sknetwork/data/load.py CHANGED Viewed

@@ -19,15 +19,12 @@ import numpy as np
 from scipy import sparse
 from sknetwork.data.parse import from_csv, load_labels, load_header, load_metadata
-from sknetwork.data.base import Bunch
+from sknetwork.data.base import Dataset
 from sknetwork.utils.check import is_square
 from sknetwork.log import Log
 NETSET_URL = 'https://netset.telecom-paris.fr'
-# former name of Dataset
-Bunch = Bunch
 def is_within_directory(directory, target):
     """Utility function."""
@@ -89,7 +86,7 @@ def clean_data_home(data_home: Optional[Union[str, Path]] = None):
 def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]] = None,
-                verbose: bool = True) -> Optional[Bunch]:
+                verbose: bool = True) -> Optional[Dataset]:
     """Load a dataset from the `NetSet collection
     <https://netset.telecom-paris.fr/>`_.
@@ -105,10 +102,10 @@ def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]
     Returns
     -------
-    dataset : :class:`Bunch`
+    dataset : :class:`Dataset`
         Returned dataset.
     """
-    dataset = Bunch()
+    dataset = Dataset()
     dataset_folder = NETSET_URL + '/datasets/'
     folder_npz = NETSET_URL + '/datasets_npz/'
@@ -167,7 +164,7 @@ def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]
 def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_numpy_bundle: bool = True,
-                verbose: bool = True) -> Bunch:
+                verbose: bool = True) -> Dataset:
     """Load a dataset from the `Konect database
     <http://konect.cc/networks/>`_.
@@ -186,7 +183,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
     Returns
     -------
-    dataset : :class:`Bunch`
+    dataset : :class:`Dataset`
         Object with the following attributes:
              * `adjacency` or `biadjacency`: the adjacency/biadjacency matrix for the dataset
@@ -240,7 +237,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
         logger.print_log('Loading from local bundle...')
         return load_from_numpy_bundle(name + '_bundle', data_path)
-    dataset = Bunch()
+    dataset = Dataset()
     path = data_konect / name / name
     if not path.exists() or len(listdir(path)) == 0:
         raise Exception("No data downloaded.")
@@ -250,7 +247,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
     if matrix:
         file = matrix[0]
         directed, bipartite, weighted = load_header(path / file)
-        dataset = from_csv(path / file, directed=directed, bipartite=bipartite, weighted=weighted)
+        dataset = from_csv(path / file, directed=directed, bipartite=bipartite, weighted=weighted, reindex=True)
     metadata = [file for file in files if 'meta.' in file]
     if metadata:
@@ -269,7 +266,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
         else:
             dataset.meta.name = name
     else:
-        dataset.meta = Bunch()
+        dataset.meta = Dataset()
         dataset.meta.name = name
     if auto_numpy_bundle:
@@ -280,12 +277,12 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
     return dataset
-def save_to_numpy_bundle(data: Bunch, bundle_name: str, data_home: Optional[Union[str, Path]] = None):
+def save_to_numpy_bundle(data: Dataset, bundle_name: str, data_home: Optional[Union[str, Path]] = None):
     """Save a dataset in the specified data home to a collection of Numpy and Pickle files for faster subsequent loads.
     Parameters
     ----------
-    data: Bunch
+    data: Dataset
         Data to save.
     bundle_name: str
         Name to be used for the bundle folder.
@@ -300,11 +297,9 @@ def save_to_numpy_bundle(data: Bunch, bundle_name: str, data_home: Optional[Unio
             sparse.save_npz(data_path / attribute, data[attribute])
         elif type(data[attribute]) == np.ndarray:
             np.save(data_path / attribute, data[attribute])
-        elif type(data[attribute]) == Bunch or type(data[attribute]) == str:
+        else:
             with open(data_path / (attribute + '.p'), 'wb') as file:
                 pickle.dump(data[attribute], file)
-        else:
-            raise TypeError('Unsupported data attribute type '+str(type(data[attribute])) + '.')
 def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path]] = None):
@@ -319,7 +314,7 @@ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path
     Returns
     -------
-    data: Bunch
+    data: Dataset
         Data.
     """
     data_home = get_data_home(data_home)
@@ -328,7 +323,7 @@ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path
         raise FileNotFoundError('No bundle at ' + str(data_path))
     else:
         files = listdir(data_path)
-        data = Bunch()
+        data = Dataset()
         for file in files:
             if len(file.split('.')) == 2:
                 file_name, file_extension = file.split('.')
@@ -342,7 +337,7 @@ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path
         return data
-def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
+def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Dataset]):
     """Save a dataset or a CSR matrix in the current directory to a collection of Numpy and Pickle files for faster
     subsequent loads. Supported attribute types include sparse matrices, NumPy arrays, strings and objects Dataset.
@@ -350,13 +345,13 @@ def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
     ----------
     folder : str or :class:`pathlib.Path`
         Name of the bundle folder.
-    data : Union[sparse.csr_matrix, Bunch]
+    data : Union[sparse.csr_matrix, Dataset]
         Data to save.
     Example
     -------
     >>> from sknetwork.data import save
-    >>> dataset = Bunch()
+    >>> dataset = Dataset()
     >>> dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
     >>> dataset.names = np.array(['a', 'b', 'c'])
     >>> save('dataset', dataset)
@@ -368,7 +363,7 @@ def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
     if folder.exists():
         shutil.rmtree(folder)
     if isinstance(data, sparse.csr_matrix):
-        dataset = Bunch()
+        dataset = Dataset()
         if is_square(data):
             dataset.adjacency = data
         else:
@@ -390,13 +385,13 @@ def load(folder: Union[str, Path]):
     Returns
     -------
-    data: Bunch
+    data: Dataset
         Data.
     Example
     -------
     >>> from sknetwork.data import save
-    >>> dataset = Bunch()
+    >>> dataset = Dataset()
     >>> dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
     >>> dataset.names = np.array(['a', 'b', 'c'])
     >>> save('dataset', dataset)