PyPI - scikit-network - Versions diffs - 0.31.0__cp39-cp39-win_amd64.whl → 0.33.0__cp39-cp39-win_amd64.whl - Mend

scikit-network 0.31.0__cp39-cp39-win_amd64.whl → 0.33.0__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scikit-network might be problematic. Click here for more details.

Files changed (126) hide show

{scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/AUTHORS.rst +3 -1
{scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/METADATA +27 -5
scikit_network-0.33.0.dist-info/RECORD +228 -0
{scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/WHEEL +1 -1
sknetwork/__init__.py +1 -1
sknetwork/classification/base.py +1 -1
sknetwork/classification/base_rank.py +3 -3
sknetwork/classification/diffusion.py +25 -16
sknetwork/classification/knn.py +23 -16
sknetwork/classification/metrics.py +4 -4
sknetwork/classification/pagerank.py +12 -8
sknetwork/classification/propagation.py +25 -17
sknetwork/classification/tests/test_diffusion.py +10 -0
sknetwork/classification/vote.cp39-win_amd64.pyd +0 -0
sknetwork/classification/vote.cpp +14549 -8668
sknetwork/clustering/__init__.py +3 -1
sknetwork/clustering/base.py +1 -1
sknetwork/clustering/kcenters.py +253 -0
sknetwork/clustering/leiden.py +242 -0
sknetwork/clustering/leiden_core.cp39-win_amd64.pyd +0 -0
sknetwork/clustering/leiden_core.cpp +31564 -0
sknetwork/clustering/leiden_core.pyx +124 -0
sknetwork/clustering/louvain.py +118 -83
sknetwork/clustering/louvain_core.cp39-win_amd64.pyd +0 -0
sknetwork/clustering/louvain_core.cpp +21876 -16332
sknetwork/clustering/louvain_core.pyx +86 -94
sknetwork/clustering/postprocess.py +2 -2
sknetwork/clustering/propagation_clustering.py +4 -4
sknetwork/clustering/tests/test_API.py +7 -3
sknetwork/clustering/tests/test_kcenters.py +60 -0
sknetwork/clustering/tests/test_leiden.py +34 -0
sknetwork/clustering/tests/test_louvain.py +2 -3
sknetwork/data/__init__.py +1 -1
sknetwork/data/base.py +7 -2
sknetwork/data/load.py +20 -25
sknetwork/data/models.py +15 -15
sknetwork/data/parse.py +57 -34
sknetwork/data/tests/test_API.py +3 -3
sknetwork/data/tests/test_base.py +2 -2
sknetwork/data/tests/test_parse.py +9 -12
sknetwork/data/tests/test_toy_graphs.py +33 -33
sknetwork/data/toy_graphs.py +35 -43
sknetwork/embedding/__init__.py +0 -1
sknetwork/embedding/base.py +23 -19
sknetwork/embedding/force_atlas.py +3 -2
sknetwork/embedding/louvain_embedding.py +1 -27
sknetwork/embedding/random_projection.py +5 -3
sknetwork/embedding/spectral.py +0 -73
sknetwork/embedding/svd.py +0 -4
sknetwork/embedding/tests/test_API.py +4 -28
sknetwork/embedding/tests/test_louvain_embedding.py +13 -13
sknetwork/embedding/tests/test_spectral.py +2 -5
sknetwork/embedding/tests/test_svd.py +7 -1
sknetwork/gnn/base_layer.py +3 -3
sknetwork/gnn/gnn_classifier.py +41 -87
sknetwork/gnn/layer.py +1 -1
sknetwork/gnn/loss.py +1 -1
sknetwork/gnn/optimizer.py +4 -3
sknetwork/gnn/tests/test_base_layer.py +4 -4
sknetwork/gnn/tests/test_gnn_classifier.py +12 -39
sknetwork/gnn/utils.py +8 -8
sknetwork/hierarchy/base.py +27 -0
sknetwork/hierarchy/louvain_hierarchy.py +55 -47
sknetwork/hierarchy/paris.cp39-win_amd64.pyd +0 -0
sknetwork/hierarchy/paris.cpp +27667 -20915
sknetwork/hierarchy/paris.pyx +11 -10
sknetwork/hierarchy/postprocess.py +16 -16
sknetwork/hierarchy/tests/test_algos.py +5 -0
sknetwork/hierarchy/tests/test_metrics.py +4 -4
sknetwork/linalg/__init__.py +1 -1
sknetwork/linalg/diteration.cp39-win_amd64.pyd +0 -0
sknetwork/linalg/diteration.cpp +13916 -8050
sknetwork/linalg/{normalization.py → normalizer.py} +17 -14
sknetwork/linalg/operators.py +1 -1
sknetwork/linalg/ppr_solver.py +1 -1
sknetwork/linalg/push.cp39-win_amd64.pyd +0 -0
sknetwork/linalg/push.cpp +23187 -16973
sknetwork/linalg/tests/test_normalization.py +3 -7
sknetwork/linalg/tests/test_operators.py +2 -6
sknetwork/linalg/tests/test_ppr.py +1 -1
sknetwork/linkpred/base.py +12 -1
sknetwork/linkpred/nn.py +6 -6
sknetwork/path/distances.py +11 -4
sknetwork/path/shortest_path.py +1 -1
sknetwork/path/tests/test_distances.py +7 -0
sknetwork/path/tests/test_search.py +2 -2
sknetwork/ranking/base.py +11 -6
sknetwork/ranking/betweenness.cp39-win_amd64.pyd +0 -0
sknetwork/ranking/betweenness.cpp +5256 -2190
sknetwork/ranking/pagerank.py +13 -12
sknetwork/ranking/tests/test_API.py +0 -2
sknetwork/ranking/tests/test_betweenness.py +1 -1
sknetwork/ranking/tests/test_pagerank.py +11 -5
sknetwork/regression/base.py +18 -1
sknetwork/regression/diffusion.py +30 -14
sknetwork/regression/tests/test_diffusion.py +8 -0
sknetwork/topology/__init__.py +3 -1
sknetwork/topology/cliques.cp39-win_amd64.pyd +0 -0
sknetwork/topology/cliques.cpp +23528 -16848
sknetwork/topology/core.cp39-win_amd64.pyd +0 -0
sknetwork/topology/core.cpp +22849 -16581
sknetwork/topology/cycles.py +243 -0
sknetwork/topology/minheap.cp39-win_amd64.pyd +0 -0
sknetwork/topology/minheap.cpp +19495 -13469
sknetwork/topology/structure.py +2 -42
sknetwork/topology/tests/test_cycles.py +65 -0
sknetwork/topology/tests/test_structure.py +2 -16
sknetwork/topology/triangles.cp39-win_amd64.pyd +0 -0
sknetwork/topology/triangles.cpp +5283 -1397
sknetwork/topology/triangles.pyx +7 -4
sknetwork/topology/weisfeiler_lehman_core.cp39-win_amd64.pyd +0 -0
sknetwork/topology/weisfeiler_lehman_core.cpp +14781 -8915
sknetwork/utils/__init__.py +1 -1
sknetwork/utils/format.py +1 -1
sknetwork/utils/membership.py +2 -2
sknetwork/utils/values.py +5 -3
sknetwork/visualization/__init__.py +2 -2
sknetwork/visualization/dendrograms.py +55 -7
sknetwork/visualization/graphs.py +261 -44
sknetwork/visualization/tests/test_dendrograms.py +9 -9
sknetwork/visualization/tests/test_graphs.py +63 -57
scikit_network-0.31.0.dist-info/RECORD +0 -221
sknetwork/embedding/louvain_hierarchy.py +0 -142
sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
{scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/LICENSE +0 -0
{scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/top_level.txt +0 -0

sknetwork/clustering/leiden_core.pyx ADDED Viewed

@@ -0,0 +1,124 @@
+# distutils: language=c++
+# cython: language_level=3
+from libcpp.set cimport set
+from libc.stdlib cimport rand
+cimport cython
+ctypedef fused int_or_long:
+    int
+    long
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def optimize_refine_core(int_or_long[:] labels, int_or_long[:] labels_refined, int_or_long[:] indices,
+    int_or_long[:] indptr, float[:] data, float[:] out_weights, float[:] in_weights, float[:] out_cluster_weights,
+    float[:] in_cluster_weights, float[:] cluster_weights, float[:] self_loops, float resolution):  # pragma: no cover
+    """Refine clusters while maximizing modularity.
+    Parameters
+    ----------
+    labels :
+        Labels (initial partition).
+    labels_refined :
+        Refined labels.
+    indices :
+        CSR format index array of the normalized adjacency matrix.
+    indptr :
+        CSR format index pointer array of the normalized adjacency matrix.
+    data :
+        CSR format data array of the normalized adjacency matrix.
+    out_weights :
+        Out-weights of nodes (sum to 1).
+    in_weights :
+        In-weights of nodes (sum to 1).
+    out_cluster_weights :
+        Out-weights of clusters (sum to 1).
+    in_cluster_weights :
+        In-weights of clusters (sum to 1).
+    cluster_weights :
+        Weights of clusters (initialized to 0).
+    self_loops :
+        Weights of self loops.
+    resolution :
+        Resolution parameter (positive).
+    Returns
+    -------
+    labels_refined :
+        Refined labels.
+    """
+    cdef int_or_long n
+    cdef int_or_long label
+    cdef int_or_long label_refined
+    cdef int_or_long label_target
+    cdef int_or_long label_best
+    cdef int_or_long i
+    cdef int_or_long j
+    cdef int_or_long start
+    cdef int_or_long end
+    cdef float increase = 1
+    cdef float delta
+    cdef float delta_local
+    cdef float delta_best
+    cdef float in_weight
+    cdef float out_weight
+    cdef set[int_or_long] label_set
+    cdef set[int_or_long] label_target_set
+    n = labels.shape[0]
+    while increase:
+        increase = 0
+        for i in range(n):
+            label_set = ()
+            label = labels[i]
+            label_refined = labels_refined[i]
+            start = indptr[i]
+            end = indptr[i+1]
+            # neighboring clusters
+            for j in range(start, end):
+                if labels[indices[j]] == label:
+                    label_target = labels_refined[indices[j]]
+                    label_set.insert(label_target)
+                    cluster_weights[label_target] += data[j]
+            label_set.erase(label_refined)
+            if not label_set.empty():
+                out_weight = out_weights[i]
+                in_weight = in_weights[i]
+                # node leaving the current cluster
+                delta = 2 * (cluster_weights[label_refined] - self_loops[i])
+                delta -= resolution * out_weight * (in_cluster_weights[label_refined] - in_weight)
+                delta -= resolution * in_weight * (out_cluster_weights[label_refined] - out_weight)
+                label_target_set = ()
+                for label_target in label_set:
+                    delta_local = 2 * cluster_weights[label_target]
+                    delta_local -= resolution * out_weight * in_cluster_weights[label_target]
+                    delta_local -= resolution * in_weight * out_cluster_weights[label_target]
+                    delta_local -= delta
+                    if delta_local > 0:
+                        label_target_set.insert(label_target)
+                    cluster_weights[label_target] = 0
+                if not label_target_set.empty():
+                    increase = 1
+                    k = rand() % label_target_set.size()
+                    for label_target in label_target_set:
+                        k -= 1
+                        if k == 0:
+                            break
+                    labels_refined[i] = label_target
+                    # update weights
+                    out_cluster_weights[label_refined] -= out_weight
+                    in_cluster_weights[label_refined] -= in_weight
+                    out_cluster_weights[label_target] += out_weight
+                    in_cluster_weights[label_target] += in_weight
+            cluster_weights[label_refined] = 0
+    return labels_refined

sknetwork/clustering/louvain.py CHANGED Viewed

@@ -12,7 +12,7 @@ import numpy as np
 from scipy import sparse
 from sknetwork.clustering.base import BaseClustering
-from sknetwork.clustering.louvain_core import fit_core
+from sknetwork.clustering.louvain_core import optimize_core
 from sknetwork.clustering.postprocess import reindex_labels
 from sknetwork.utils.check import check_random_state, get_probs
 from sknetwork.utils.format import check_format, get_adjacency, directed2undirected
@@ -30,11 +30,11 @@ class Louvain(BaseClustering, Log):
     resolution :
         Resolution parameter.
     modularity : str
-        Which objective function to maximize. Can be ``'Dugue'``, ``'Newman'`` or ``'Potts'`` (default = ``'dugue'``).
+        Type of modularity to maximize. Can be ``'Dugue'``, ``'Newman'`` or ``'Potts'`` (default = ``'dugue'``).
     tol_optimization :
-        Minimum increase in the objective function to enter a new optimization pass.
+        Minimum increase in modularity to enter a new optimization pass in the local search.
     tol_aggregation :
-        Minimum increase in the objective function to enter a new aggregation pass.
+        Minimum increase in modularity to enter a new aggregation pass.
     n_aggregations :
         Maximum number of aggregations.
         A negative value is interpreted as no limit.
@@ -91,6 +91,7 @@ class Louvain(BaseClustering, Log):
       <https://arxiv.org/pdf/0707.1616>`_
       Physical Review E, 76(6).
     """
     def __init__(self, resolution: float = 1, modularity: str = 'dugue', tol_optimization: float = 1e-3,
                  tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
                  sort_clusters: bool = True, return_probs: bool = True, return_aggregate: bool = True,
@@ -102,73 +103,76 @@ class Louvain(BaseClustering, Log):
         self.labels_ = None
         self.resolution = resolution
         self.modularity = modularity.lower()
-        self.tol = tol_optimization
+        self.tol_optimization = tol_optimization
         self.tol_aggregation = tol_aggregation
         self.n_aggregations = n_aggregations
         self.shuffle_nodes = shuffle_nodes
         self.random_state = check_random_state(random_state)
         self.bipartite = None
-    def _optimize(self, adjacency_norm, probs_ou, probs_in):
-        """One local optimization pass of the Louvain algorithm
+    def _optimize(self, labels, adjacency, out_weights, in_weights):
+        """One optimization pass of the Louvain algorithm.
         Parameters
         ----------
-        adjacency_norm :
-            the norm of the adjacency
-        probs_ou :
-            the array of degrees of the adjacency
-        probs_in :
-            the array of degrees of the transpose of the adjacency
+        labels :
+            Labels of nodes.
+        adjacency :
+            Adjacency matrix.
+        out_weights :
+            Out-weights of nodes.
+        in_weights :
+            In-weights of nodes
         Returns
         -------
         labels :
-            the communities of each node after optimization
-        pass_increase :
-            the increase in modularity gained after optimization
+            Labels of nodes after optimization.
+        increase :
+            Gain in modularity after optimization.
         """
-        node_probs_in = probs_in.astype(np.float32)
-        node_probs_ou = probs_ou.astype(np.float32)
-        adjacency = 0.5 * directed2undirected(adjacency_norm)
+        labels = labels.astype(np.int64)
+        indices = adjacency.indices.astype(np.int64)
+        indptr = adjacency.indptr.astype(np.int64)
+        data = adjacency.data.astype(np.float32)
+        out_weights = out_weights.astype(np.float32)
+        in_weights = in_weights.astype(np.float32)
+        out_cluster_weights = out_weights.copy()
+        in_cluster_weights = in_weights.copy()
+        cluster_weights = np.zeros_like(out_cluster_weights).astype(np.float32)
         self_loops = adjacency.diagonal().astype(np.float32)
-        indptr: np.ndarray = adjacency.indptr
-        indices: np.ndarray = adjacency.indices
-        data: np.ndarray = adjacency.data.astype(np.float32)
-        return fit_core(self.resolution, self.tol, node_probs_ou, node_probs_in, self_loops, data, indices, indptr)
+        return optimize_core(labels, indices, indptr, data, out_weights, in_weights, out_cluster_weights,
+                             in_cluster_weights, cluster_weights, self_loops, self.resolution, self.tol_optimization)
     @staticmethod
-    def _aggregate(adjacency_norm, probs_out, probs_in, membership: Union[sparse.csr_matrix, np.ndarray]):
+    def _aggregate(labels, adjacency, out_weights, in_weights):
         """Aggregate nodes belonging to the same cluster.
         Parameters
         ----------
-        adjacency_norm :
-            the norm of the adjacency
-        probs_out :
-            the array of degrees of the adjacency
-        probs_in :
-            the array of degrees of the transpose of the adjacency
-        membership :
-            membership matrix (rows).
+        labels :
+            Labels of nodes.
+        adjacency :
+            Adjacency matrix.
+        out_weights :
+            Out-weights of nodes.
+        in_weights :
+            In-weights of nodes.
         Returns
         -------
-        Aggregate graph.
+        Aggregate graph (adjacency matrix, out-weights, in-weights).
         """
-        adjacency_norm = (membership.T.dot(adjacency_norm.dot(membership))).tocsr()
-        probs_in = np.array(membership.T.dot(probs_in).T)
-        probs_out = np.array(membership.T.dot(probs_out).T)
-        return adjacency_norm, probs_out, probs_in
+        membership = get_membership(labels)
+        adjacency_ = membership.T.tocsr().dot(adjacency.dot(membership))
+        out_weights_ = membership.T.dot(out_weights)
+        in_weights_ = membership.T.dot(in_weights)
+        return adjacency_, out_weights_, in_weights_
-    def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) -> 'Louvain':
-        """Fit algorithm to data.
+    def _pre_processing(self, input_matrix, force_bipartite):
+        """Pre-processing for Louvain.
-        Parameters
+         Parameters
         ----------
         input_matrix :
             Adjacency matrix or biadjacency matrix of the graph.
@@ -177,63 +181,64 @@ class Louvain(BaseClustering, Log):
         Returns
         -------
-        self : :class:`Louvain`
+        adjacency :
+            Adjacency matrix.
+        out_weights, in_weights :
+            Node weights.
+        membership :
+            Membership matrix (labels).
+        index :
+            Index of nodes.
         """
         self._init_vars()
+        # adjacency matrix
         input_matrix = check_format(input_matrix)
-        if self.modularity == 'dugue':
-            adjacency, self.bipartite = get_adjacency(input_matrix, force_directed=True,
-                                                      force_bipartite=force_bipartite)
-        else:
-            adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)
+        force_directed = self.modularity == 'dugue'
+        adjacency, self.bipartite = get_adjacency(input_matrix, force_directed=force_directed,
+                                                  force_bipartite=force_bipartite)
+        # shuffling
         n = adjacency.shape[0]
         index = np.arange(n)
         if self.shuffle_nodes:
             index = self.random_state.permutation(index)
             adjacency = adjacency[index][:, index]
+        # node weights
         if self.modularity == 'potts':
-            probs_out = get_probs('uniform', adjacency)
-            probs_in = probs_out.copy()
+            out_weights = get_probs('uniform', adjacency)
+            in_weights = out_weights.copy()
         elif self.modularity == 'newman':
-            probs_out = get_probs('degree', adjacency)
-            probs_in = probs_out.copy()
+            out_weights = get_probs('degree', adjacency)
+            in_weights = out_weights.copy()
         elif self.modularity == 'dugue':
-            probs_out = get_probs('degree', adjacency)
-            probs_in = get_probs('degree', adjacency.T)
+            out_weights = get_probs('degree', adjacency)
+            in_weights = get_probs('degree', adjacency.T)
         else:
             raise ValueError('Unknown modularity function.')
-        adjacency_cluster = adjacency / adjacency.data.sum()
+        # normalized, symmetric adjacency matrix (sums to 1)
+        adjacency = directed2undirected(adjacency)
+        adjacency = adjacency / adjacency.data.sum()
+        # cluster membership
         membership = sparse.identity(n, format='csr')
-        increase = True
-        count_aggregations = 0
-        self.print_log("Starting with", n, "nodes.")
-        while increase:
-            count_aggregations += 1
-            labels_cluster, pass_increase = self._optimize(adjacency_cluster, probs_out, probs_in)
-            _, labels_cluster = np.unique(labels_cluster, return_inverse=True)
-            if pass_increase <= self.tol_aggregation:
-                increase = False
-            else:
-                membership_cluster = get_membership(labels_cluster)
-                membership = membership.dot(membership_cluster)
-                adjacency_cluster, probs_out, probs_in = self._aggregate(adjacency_cluster, probs_out, probs_in,
-                                                                         membership_cluster)
-                n = adjacency_cluster.shape[0]
-                if n == 1:
-                    break
-            self.print_log("Aggregation", count_aggregations, "completed with", n, "clusters and ",
-                           pass_increase, "increment.")
-            if count_aggregations == self.n_aggregations:
-                break
+        return adjacency, out_weights, in_weights, membership, index
+    def _post_processing(self, input_matrix, membership, index):
+        """Post-processing for Louvain.
+         Parameters
+        ----------
+        input_matrix :
+            Adjacency matrix or biadjacency matrix of the graph.
+        membership :
+            Membership matrix (labels).
+        index :
+            Index of nodes.
+        """
         if self.sort_clusters:
             labels = reindex_labels(membership.indices)
         else:
@@ -242,10 +247,40 @@ class Louvain(BaseClustering, Log):
             reverse = np.empty(index.size, index.dtype)
             reverse[index] = np.arange(index.size)
             labels = labels[reverse]
         self.labels_ = labels
         if self.bipartite:
             self._split_vars(input_matrix.shape)
         self._secondary_outputs(input_matrix)
+    def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) -> 'Louvain':
+        """Fit algorithm to data.
+        Parameters
+        ----------
+        input_matrix :
+            Adjacency matrix or biadjacency matrix of the graph.
+        force_bipartite :
+            If ``True``, force the input matrix to be considered as a biadjacency matrix even if square.
+        Returns
+        -------
+        self : :class:`Louvain`
+        """
+        adjacency, out_weights, in_weights, membership, index = self._pre_processing(input_matrix, force_bipartite)
+        n = adjacency.shape[0]
+        count = 0
+        stop = False
+        while not stop:
+            count += 1
+            labels = np.arange(n)
+            labels, increase = self._optimize(labels, adjacency, out_weights, in_weights)
+            _, labels = np.unique(labels, return_inverse=True)
+            adjacency, out_weights, in_weights = self._aggregate(labels, adjacency, out_weights, in_weights)
+            membership = membership.dot(get_membership(labels))
+            n = adjacency.shape[0]
+            stop = n == 1
+            stop |= increase <= self.tol_aggregation
+            stop |= count == self.n_aggregations
+            self.print_log("Aggregation:", count, " Clusters:", n, " Increase:", increase)
+        self._post_processing(input_matrix, membership, index)
         return self

sknetwork/clustering/louvain_core.cp39-win_amd64.pyd CHANGED Viewed

Binary file