PyPI - scikit-network - Versions diffs - 0.33.3__cp312-cp312-macosx_10_13_x86_64.whl - Mend

scikit-network 0.33.3__cp312-cp312-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scikit-network might be problematic. Click here for more details.

Files changed (228) hide show

scikit_network-0.33.3.dist-info/METADATA +122 -0
scikit_network-0.33.3.dist-info/RECORD +228 -0
scikit_network-0.33.3.dist-info/WHEEL +6 -0
scikit_network-0.33.3.dist-info/licenses/AUTHORS.rst +43 -0
scikit_network-0.33.3.dist-info/licenses/LICENSE +34 -0
scikit_network-0.33.3.dist-info/top_level.txt +1 -0
sknetwork/__init__.py +21 -0
sknetwork/base.py +67 -0
sknetwork/classification/__init__.py +8 -0
sknetwork/classification/base.py +142 -0
sknetwork/classification/base_rank.py +133 -0
sknetwork/classification/diffusion.py +134 -0
sknetwork/classification/knn.py +139 -0
sknetwork/classification/metrics.py +205 -0
sknetwork/classification/pagerank.py +66 -0
sknetwork/classification/propagation.py +152 -0
sknetwork/classification/tests/__init__.py +1 -0
sknetwork/classification/tests/test_API.py +30 -0
sknetwork/classification/tests/test_diffusion.py +77 -0
sknetwork/classification/tests/test_knn.py +23 -0
sknetwork/classification/tests/test_metrics.py +53 -0
sknetwork/classification/tests/test_pagerank.py +20 -0
sknetwork/classification/tests/test_propagation.py +24 -0
sknetwork/classification/vote.cpp +27581 -0
sknetwork/classification/vote.cpython-312-darwin.so +0 -0
sknetwork/classification/vote.pyx +56 -0
sknetwork/clustering/__init__.py +8 -0
sknetwork/clustering/base.py +172 -0
sknetwork/clustering/kcenters.py +253 -0
sknetwork/clustering/leiden.py +242 -0
sknetwork/clustering/leiden_core.cpp +31572 -0
sknetwork/clustering/leiden_core.cpython-312-darwin.so +0 -0
sknetwork/clustering/leiden_core.pyx +124 -0
sknetwork/clustering/louvain.py +286 -0
sknetwork/clustering/louvain_core.cpp +31217 -0
sknetwork/clustering/louvain_core.cpython-312-darwin.so +0 -0
sknetwork/clustering/louvain_core.pyx +124 -0
sknetwork/clustering/metrics.py +91 -0
sknetwork/clustering/postprocess.py +66 -0
sknetwork/clustering/propagation_clustering.py +104 -0
sknetwork/clustering/tests/__init__.py +1 -0
sknetwork/clustering/tests/test_API.py +38 -0
sknetwork/clustering/tests/test_kcenters.py +60 -0
sknetwork/clustering/tests/test_leiden.py +34 -0
sknetwork/clustering/tests/test_louvain.py +135 -0
sknetwork/clustering/tests/test_metrics.py +50 -0
sknetwork/clustering/tests/test_postprocess.py +39 -0
sknetwork/data/__init__.py +6 -0
sknetwork/data/base.py +33 -0
sknetwork/data/load.py +406 -0
sknetwork/data/models.py +459 -0
sknetwork/data/parse.py +644 -0
sknetwork/data/test_graphs.py +84 -0
sknetwork/data/tests/__init__.py +1 -0
sknetwork/data/tests/test_API.py +30 -0
sknetwork/data/tests/test_base.py +14 -0
sknetwork/data/tests/test_load.py +95 -0
sknetwork/data/tests/test_models.py +52 -0
sknetwork/data/tests/test_parse.py +250 -0
sknetwork/data/tests/test_test_graphs.py +29 -0
sknetwork/data/tests/test_toy_graphs.py +68 -0
sknetwork/data/timeout.py +38 -0
sknetwork/data/toy_graphs.py +611 -0
sknetwork/embedding/__init__.py +8 -0
sknetwork/embedding/base.py +94 -0
sknetwork/embedding/force_atlas.py +198 -0
sknetwork/embedding/louvain_embedding.py +148 -0
sknetwork/embedding/random_projection.py +135 -0
sknetwork/embedding/spectral.py +141 -0
sknetwork/embedding/spring.py +198 -0
sknetwork/embedding/svd.py +359 -0
sknetwork/embedding/tests/__init__.py +1 -0
sknetwork/embedding/tests/test_API.py +49 -0
sknetwork/embedding/tests/test_force_atlas.py +35 -0
sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
sknetwork/embedding/tests/test_random_projection.py +28 -0
sknetwork/embedding/tests/test_spectral.py +81 -0
sknetwork/embedding/tests/test_spring.py +50 -0
sknetwork/embedding/tests/test_svd.py +43 -0
sknetwork/gnn/__init__.py +10 -0
sknetwork/gnn/activation.py +117 -0
sknetwork/gnn/base.py +181 -0
sknetwork/gnn/base_activation.py +90 -0
sknetwork/gnn/base_layer.py +109 -0
sknetwork/gnn/gnn_classifier.py +305 -0
sknetwork/gnn/layer.py +153 -0
sknetwork/gnn/loss.py +180 -0
sknetwork/gnn/neighbor_sampler.py +65 -0
sknetwork/gnn/optimizer.py +164 -0
sknetwork/gnn/tests/__init__.py +1 -0
sknetwork/gnn/tests/test_activation.py +56 -0
sknetwork/gnn/tests/test_base.py +75 -0
sknetwork/gnn/tests/test_base_layer.py +37 -0
sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
sknetwork/gnn/tests/test_layers.py +80 -0
sknetwork/gnn/tests/test_loss.py +33 -0
sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
sknetwork/gnn/tests/test_optimizer.py +43 -0
sknetwork/gnn/tests/test_utils.py +41 -0
sknetwork/gnn/utils.py +127 -0
sknetwork/hierarchy/__init__.py +6 -0
sknetwork/hierarchy/base.py +96 -0
sknetwork/hierarchy/louvain_hierarchy.py +272 -0
sknetwork/hierarchy/metrics.py +234 -0
sknetwork/hierarchy/paris.cpp +37865 -0
sknetwork/hierarchy/paris.cpython-312-darwin.so +0 -0
sknetwork/hierarchy/paris.pyx +316 -0
sknetwork/hierarchy/postprocess.py +350 -0
sknetwork/hierarchy/tests/__init__.py +1 -0
sknetwork/hierarchy/tests/test_API.py +24 -0
sknetwork/hierarchy/tests/test_algos.py +34 -0
sknetwork/hierarchy/tests/test_metrics.py +62 -0
sknetwork/hierarchy/tests/test_postprocess.py +57 -0
sknetwork/linalg/__init__.py +9 -0
sknetwork/linalg/basics.py +37 -0
sknetwork/linalg/diteration.cpp +27397 -0
sknetwork/linalg/diteration.cpython-312-darwin.so +0 -0
sknetwork/linalg/diteration.pyx +47 -0
sknetwork/linalg/eig_solver.py +93 -0
sknetwork/linalg/laplacian.py +15 -0
sknetwork/linalg/normalizer.py +86 -0
sknetwork/linalg/operators.py +225 -0
sknetwork/linalg/polynome.py +76 -0
sknetwork/linalg/ppr_solver.py +170 -0
sknetwork/linalg/push.cpp +31069 -0
sknetwork/linalg/push.cpython-312-darwin.so +0 -0
sknetwork/linalg/push.pyx +71 -0
sknetwork/linalg/sparse_lowrank.py +142 -0
sknetwork/linalg/svd_solver.py +91 -0
sknetwork/linalg/tests/__init__.py +1 -0
sknetwork/linalg/tests/test_eig.py +44 -0
sknetwork/linalg/tests/test_laplacian.py +18 -0
sknetwork/linalg/tests/test_normalization.py +34 -0
sknetwork/linalg/tests/test_operators.py +66 -0
sknetwork/linalg/tests/test_polynome.py +38 -0
sknetwork/linalg/tests/test_ppr.py +50 -0
sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
sknetwork/linalg/tests/test_svd.py +38 -0
sknetwork/linkpred/__init__.py +2 -0
sknetwork/linkpred/base.py +46 -0
sknetwork/linkpred/nn.py +126 -0
sknetwork/linkpred/tests/__init__.py +1 -0
sknetwork/linkpred/tests/test_nn.py +27 -0
sknetwork/log.py +19 -0
sknetwork/path/__init__.py +5 -0
sknetwork/path/dag.py +54 -0
sknetwork/path/distances.py +98 -0
sknetwork/path/search.py +31 -0
sknetwork/path/shortest_path.py +61 -0
sknetwork/path/tests/__init__.py +1 -0
sknetwork/path/tests/test_dag.py +37 -0
sknetwork/path/tests/test_distances.py +62 -0
sknetwork/path/tests/test_search.py +40 -0
sknetwork/path/tests/test_shortest_path.py +40 -0
sknetwork/ranking/__init__.py +8 -0
sknetwork/ranking/base.py +61 -0
sknetwork/ranking/betweenness.cpp +9704 -0
sknetwork/ranking/betweenness.cpython-312-darwin.so +0 -0
sknetwork/ranking/betweenness.pyx +97 -0
sknetwork/ranking/closeness.py +92 -0
sknetwork/ranking/hits.py +94 -0
sknetwork/ranking/katz.py +83 -0
sknetwork/ranking/pagerank.py +110 -0
sknetwork/ranking/postprocess.py +37 -0
sknetwork/ranking/tests/__init__.py +1 -0
sknetwork/ranking/tests/test_API.py +32 -0
sknetwork/ranking/tests/test_betweenness.py +38 -0
sknetwork/ranking/tests/test_closeness.py +30 -0
sknetwork/ranking/tests/test_hits.py +20 -0
sknetwork/ranking/tests/test_pagerank.py +62 -0
sknetwork/ranking/tests/test_postprocess.py +26 -0
sknetwork/regression/__init__.py +4 -0
sknetwork/regression/base.py +61 -0
sknetwork/regression/diffusion.py +210 -0
sknetwork/regression/tests/__init__.py +1 -0
sknetwork/regression/tests/test_API.py +32 -0
sknetwork/regression/tests/test_diffusion.py +56 -0
sknetwork/sknetwork.py +3 -0
sknetwork/test_base.py +35 -0
sknetwork/test_log.py +15 -0
sknetwork/topology/__init__.py +8 -0
sknetwork/topology/cliques.cpp +32562 -0
sknetwork/topology/cliques.cpython-312-darwin.so +0 -0
sknetwork/topology/cliques.pyx +149 -0
sknetwork/topology/core.cpp +30648 -0
sknetwork/topology/core.cpython-312-darwin.so +0 -0
sknetwork/topology/core.pyx +90 -0
sknetwork/topology/cycles.py +243 -0
sknetwork/topology/minheap.cpp +27329 -0
sknetwork/topology/minheap.cpython-312-darwin.so +0 -0
sknetwork/topology/minheap.pxd +20 -0
sknetwork/topology/minheap.pyx +109 -0
sknetwork/topology/structure.py +194 -0
sknetwork/topology/tests/__init__.py +1 -0
sknetwork/topology/tests/test_cliques.py +28 -0
sknetwork/topology/tests/test_core.py +19 -0
sknetwork/topology/tests/test_cycles.py +65 -0
sknetwork/topology/tests/test_structure.py +85 -0
sknetwork/topology/tests/test_triangles.py +38 -0
sknetwork/topology/tests/test_wl.py +72 -0
sknetwork/topology/triangles.cpp +8891 -0
sknetwork/topology/triangles.cpython-312-darwin.so +0 -0
sknetwork/topology/triangles.pyx +151 -0
sknetwork/topology/weisfeiler_lehman.py +133 -0
sknetwork/topology/weisfeiler_lehman_core.cpp +27632 -0
sknetwork/topology/weisfeiler_lehman_core.cpython-312-darwin.so +0 -0
sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
sknetwork/utils/__init__.py +7 -0
sknetwork/utils/check.py +355 -0
sknetwork/utils/format.py +221 -0
sknetwork/utils/membership.py +82 -0
sknetwork/utils/neighbors.py +115 -0
sknetwork/utils/tests/__init__.py +1 -0
sknetwork/utils/tests/test_check.py +190 -0
sknetwork/utils/tests/test_format.py +63 -0
sknetwork/utils/tests/test_membership.py +24 -0
sknetwork/utils/tests/test_neighbors.py +41 -0
sknetwork/utils/tests/test_tfidf.py +18 -0
sknetwork/utils/tests/test_values.py +66 -0
sknetwork/utils/tfidf.py +37 -0
sknetwork/utils/values.py +76 -0
sknetwork/visualization/__init__.py +4 -0
sknetwork/visualization/colors.py +34 -0
sknetwork/visualization/dendrograms.py +277 -0
sknetwork/visualization/graphs.py +1039 -0
sknetwork/visualization/tests/__init__.py +1 -0
sknetwork/visualization/tests/test_dendrograms.py +53 -0
sknetwork/visualization/tests/test_graphs.py +176 -0

sknetwork/hierarchy/paris.cpython-312-darwin.so ADDED Viewed

Binary file

sknetwork/hierarchy/paris.pyx ADDED Viewed

@@ -0,0 +1,316 @@
+# distutils: language = c++
+# cython: language_level=3
+"""
+Created on March 2019
+@author: Thomas Bonald <bonald@enst.fr>
+@author: Bertrand Charpentier <bertrand.charpentier@live.fr>
+@author: Quentin Lutz <qlutz@enst.fr>
+"""
+import numpy as np
+cimport numpy as np
+cimport cython
+from libcpp.vector cimport vector
+from typing import Union
+from scipy import sparse
+from sknetwork.hierarchy.base import BaseHierarchy
+from sknetwork.hierarchy.postprocess import reorder_dendrogram
+from sknetwork.utils.format import check_format, get_adjacency, directed2undirected
+from sknetwork.utils.check import get_probs, is_symmetric
+cdef class AggregateGraph:
+    """A class of graphs suitable for aggregation. Each node represents a cluster.
+    Parameters
+    ----------
+    out_weights :
+        Out-weights (sums to 1).
+    in_weights :
+        In-weights (sums to 1).
+    data :
+        CSR format data array of the normalized adjacency matrix.
+    indices :
+        CSR format index array of the normalized adjacency matrix.
+    indptr :
+        CSR format index pointer array of the normalized adjacency matrix.
+    Attributes
+    ----------
+    neighbors : dict[dict]
+        Dictionary of dictionary of edge weights.
+    next_cluster : int
+        Index of the next cluster (resulting from aggregation).
+    cluster_sizes : dict
+        Dictionary of cluster sizes.
+    cluster_out_weights : dict
+        Dictionary of cluster out-weights (sums to 1).
+    cluster_in_weights : dict
+        Dictionary of cluster in-weights (sums to 1).
+    """
+    cdef public int next_cluster
+    cdef public dict neighbors
+    cdef public dict tmp
+    cdef dict cluster_sizes
+    cdef public dict cluster_out_weights
+    cdef public dict cluster_in_weights
+    def __init__(self, double[:] out_weights, double[:] in_weights, double[:] data, int[:] indices,
+                 int[:] indptr):
+        cdef int n = indptr.shape[0] - 1
+        cdef float total_weight = np.sum(data)
+        cdef int i
+        cdef int j
+        self.next_cluster = n
+        self.neighbors = {}
+        for i in range(n):
+            # normalize so that the sum of edge weights is equal to 1
+            self.neighbors[i] = {}
+            for j in range(indptr[i], indptr[i + 1]):
+                self.neighbors[i][indices[j]] = data[j] / total_weight
+        cluster_sizes = {}
+        cluster_out_weights = {}
+        cluster_in_weights = {}
+        for i in range(n):
+            cluster_sizes[i] = 1
+            cluster_out_weights[i] = out_weights[i]
+            cluster_in_weights[i] = in_weights[i]
+        self.cluster_sizes = cluster_sizes
+        self.cluster_out_weights = cluster_out_weights
+        self.cluster_in_weights = cluster_in_weights
+    cdef float similarity(self, int node1, int node2):
+        """Similarity of two nodes.
+        Parameters
+        ----------
+        node1, node2 :
+            Nodes.
+        Returns
+        -------
+        sim: float
+            Similarity.
+        """
+        cdef float sim = -float("inf")
+        cdef float a = self.cluster_out_weights[node1] * self.cluster_in_weights[node2]
+        cdef float b = self.cluster_out_weights[node2] * self.cluster_in_weights[node1]
+        cdef float den = a + b
+        if den > 0:
+            sim = 2 * self.neighbors[node1][node2] / den
+        return sim
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cpdef AggregateGraph merge(self, int node1, int node2):
+        """Merges two nodes.
+        Parameters
+        ----------
+        node1, node2 :
+            The two nodes to merge.
+        Returns
+        -------
+        self: :class:`AggregateGraph`
+            The aggregate grate (without self-loop).
+        """
+        cdef int new_node = self.next_cluster
+        self.neighbors[new_node] = {}
+        self.neighbors[new_node][new_node] = 0
+        cdef set common_neighbors = set(self.neighbors[node1].keys()) & set(self.neighbors[node2].keys()) - {node1, node2}
+        for node in common_neighbors:
+            self.neighbors[new_node][node] = self.neighbors[node1].pop(node) + self.neighbors[node2].pop(node)
+            self.neighbors[node][new_node] = self.neighbors[node].pop(node1) + self.neighbors[node].pop(node2)
+        for node in {node1, node2}:
+            for neighbor in set(self.neighbors[node].keys()) - {node1, node2}:
+                self.neighbors[new_node][neighbor] = self.neighbors[node].pop(neighbor)
+                self.neighbors[neighbor][new_node] = self.neighbors[neighbor].pop(node)
+            for other_node in {node1, node2}:
+                if other_node in self.neighbors[node]:
+                    self.neighbors[new_node][new_node] += self.neighbors[node][other_node]
+            del self.neighbors[node]
+        self.cluster_sizes[new_node] = self.cluster_sizes.pop(node1) + self.cluster_sizes.pop(node2)
+        self.cluster_out_weights[new_node] = self.cluster_out_weights.pop(node1) + self.cluster_out_weights.pop(node2)
+        self.cluster_in_weights[new_node] = self.cluster_in_weights.pop(node1) + self.cluster_in_weights.pop(node2)
+        self.next_cluster += 1
+        return self
+class Paris(BaseHierarchy):
+    """Agglomerative clustering algorithm that performs greedy merge of nodes based on their similarity.
+    The similarity between nodes :math:`i,j` is :math:`\\dfrac{A_{ij}}{w_i w_j}` where
+    * :math:`A_{ij}` is the weight of edge :math:`i,j`,
+    * :math:`w_i, w_j` are the weights of nodes :math:`i,j`
+    If the input matrix :math:`B` is a biadjacency matrix (i.e., rectangular), the algorithm is applied
+    to the corresponding adjacency matrix :math:`A  = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}`
+    Parameters
+    ----------
+    weights : str
+        Weights of nodes.
+        ``'degree'`` (default) or ``'uniform'``.
+    reorder : bool
+        If ``True`` (default), reorder the dendrogram in non-decreasing order of height.
+    Attributes
+    ----------
+    dendrogram_ : np.ndarray
+        Dendrogram of the graph.
+    dendrogram_row_ : np.ndarray
+        Dendrogram for the rows, for bipartite graphs.
+    dendrogram_col_ : np.ndarray
+        Dendrogram for the columns, for bipartite graphs.
+    dendrogram_full_ : np.ndarray
+        Dendrogram for both rows and columns, indexed in this order, for bipartite graphs.
+    Examples
+    --------
+    >>> from sknetwork.hierarchy import Paris
+    >>> from sknetwork.data import house
+    >>> paris = Paris()
+    >>> adjacency = house()
+    >>> dendrogram = paris.fit_predict(adjacency)
+    >>> np.round(dendrogram, 2)
+    array([[3.        , 2.        , 0.17      , 2.        ],
+           [1.        , 0.        , 0.25      , 2.        ],
+           [6.        , 4.        , 0.31      , 3.        ],
+           [7.        , 5.        , 0.67      , 5.        ]])
+    Notes
+    -----
+    Each row of the dendrogram = :math:`i, j`, distance, size of cluster :math:`i + j`.
+    See Also
+    --------
+    scipy.cluster.hierarchy.linkage
+    References
+    ----------
+    T. Bonald, B. Charpentier, A. Galland, A. Hollocou (2018).
+    `Hierarchical Graph Clustering using Node Pair Sampling.
+    <https://arxiv.org/abs/1806.01664>`_
+    Workshop on Mining and Learning with Graphs.
+    """
+    def __init__(self, weights: str = 'degree', reorder: bool = True):
+        super(Paris, self).__init__()
+        self.dendrogram_ = None
+        self.weights = weights
+        self.reorder = reorder
+        self.bipartite = None
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) -> 'Paris':
+        """Agglomerative clustering using the nearest neighbor chain.
+        Parameters
+        ----------
+        input_matrix : sparse.csr_matrix, np.ndarray
+            Adjacency matrix or biadjacency matrix of the graph.
+        force_bipartite :
+            If ``True``, force the input matrix to be considered as a biadjacency matrix.
+        Returns
+        -------
+        self: :class:`Paris`
+        """
+        self._init_vars()
+        # input
+        adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)
+        weights = self.weights
+        out_weights = get_probs(weights, adjacency)
+        in_weights = get_probs(weights, adjacency.T)
+        if not is_symmetric(adjacency):
+            adjacency = directed2undirected(adjacency)
+        null_weights = (out_weights + in_weights) == 0
+        if any(null_weights):
+            adjacency += sparse.diags(null_weights.astype(int))
+        if adjacency.shape[0] <= 1:
+            raise ValueError('The graph must contain at least two nodes.')
+        # agglomerative clustering
+        aggregate_graph = AggregateGraph(out_weights, in_weights, adjacency.data.astype(float),
+                                         adjacency.indices, adjacency.indptr)
+        cdef vector[(int, int)] connected_components
+        dendrogram = []
+        cdef int node
+        cdef int next_node
+        cdef int cluster_size
+        cdef int next_cluster_size
+        cdef int neighbor
+        cdef int nearest_neighbor
+        cdef int nearest_neighbor_last
+        cdef vector[int] chain
+        cdef float sim
+        cdef float max_sim
+        while len(aggregate_graph.cluster_sizes):
+            for node in aggregate_graph.cluster_sizes:
+                break
+            chain.clear()
+            chain.push_back(node)
+            while chain.size():
+                node = chain[chain.size() - 1]
+                chain.pop_back()
+                if set(aggregate_graph.neighbors[node].keys()) - {node}:
+                    max_sim = -float("inf")
+                    for neighbor in set(aggregate_graph.neighbors[node].keys()) - {node}:
+                        sim = aggregate_graph.similarity(node, neighbor)
+                        if sim > max_sim:
+                            nearest_neighbor = neighbor
+                            max_sim = sim
+                        elif sim == max_sim:
+                            nearest_neighbor = min(neighbor, nearest_neighbor)
+                    if chain.size():
+                        nearest_neighbor_last = chain[chain.size() - 1]
+                        chain.pop_back()
+                        if nearest_neighbor_last == nearest_neighbor:
+                            size = aggregate_graph.cluster_sizes[node] + aggregate_graph.cluster_sizes[nearest_neighbor]
+                            dendrogram.append([node, nearest_neighbor, 1. / max_sim, size])
+                            aggregate_graph.merge(node, nearest_neighbor)
+                        else:
+                            chain.push_back(nearest_neighbor_last)
+                            chain.push_back(node)
+                            chain.push_back(nearest_neighbor)
+                    else:
+                        chain.push_back(node)
+                        chain.push_back(nearest_neighbor)
+                else:
+                    connected_components.push_back((node, aggregate_graph.cluster_sizes[node]))
+                    del aggregate_graph.cluster_sizes[node]
+        node, cluster_size = connected_components[connected_components.size() - 1]
+        connected_components.pop_back()
+        for next_node, next_cluster_size in connected_components:
+            cluster_size += next_cluster_size
+            dendrogram.append([node, next_node, float("inf"), cluster_size])
+            node = aggregate_graph.next_cluster
+            aggregate_graph.next_cluster += 1
+        dendrogram = np.array(dendrogram)
+        if self.reorder:
+            dendrogram = reorder_dendrogram(dendrogram)
+        self.dendrogram_ = dendrogram
+        if self.bipartite:
+            self._split_vars(input_matrix.shape)
+        return self

sknetwork/hierarchy/postprocess.py ADDED Viewed

@@ -0,0 +1,350 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on June 2019
+@author: Thomas Bonald <bonald@enst.fr>
+@author: Bertrand Charpentier <bertrand.charpentier@live.fr>
+@author: Quentin Lutz <qlutz@enst.fr>
+"""
+import copy
+from collections import defaultdict
+from typing import Optional, Union, Tuple
+import numpy as np
+from sknetwork.utils.check import check_n_clusters, check_dendrogram
+def reorder_dendrogram(dendrogram: np.ndarray) -> np.ndarray:
+    """Reorder the dendrogram in non-decreasing order of height."""
+    n = dendrogram.shape[0] + 1
+    order = np.zeros((2, n - 1), float)
+    order[0] = np.max(dendrogram[:, :2], axis=1)
+    order[1] = dendrogram[:, 2]
+    index = np.lexsort(order)
+    dendrogram_new = dendrogram[index]
+    index_new = np.arange(2 * n - 1)
+    index_new[n + index] = np.arange(n, 2 * n - 1)
+    dendrogram_new[:, 0] = index_new[dendrogram_new[:, 0].astype(int)]
+    dendrogram_new[:, 1] = index_new[dendrogram_new[:, 1].astype(int)]
+    return dendrogram_new
+def get_labels(dendrogram: np.ndarray, cluster: dict, sort_clusters: bool, return_dendrogram: bool):
+    """Returns the labels from clusters."""
+    n = len(dendrogram) + 1
+    clusters = list(cluster.values())
+    if sort_clusters:
+        sizes = np.array([len(nodes) for nodes in clusters])
+        index = np.argsort(-sizes)
+        clusters = [clusters[i] for i in index]
+    labels = np.zeros(n, dtype=int)
+    for label, nodes in enumerate(clusters):
+        labels[nodes] = label
+    if return_dendrogram:
+        cluster_index = {i: label for i, label in enumerate(labels)}
+        cluster_size = {i: len(cluster) for i, cluster in enumerate(clusters)}
+        dendrogram_new = []
+        current_cluster = len(labels)
+        current_cluster_new = len(clusters)
+        for i, j, height, _ in dendrogram:
+            i_new = cluster_index.pop(int(i))
+            j_new = cluster_index.pop(int(j))
+            if i_new != j_new:
+                size = cluster_size.pop(i_new) + cluster_size.pop(j_new)
+                cluster_size[current_cluster_new] = size
+                cluster_index[current_cluster] = current_cluster_new
+                dendrogram_new.append([i_new, j_new, height, size])
+                current_cluster_new += 1
+            else:
+                cluster_index[current_cluster] = i_new
+            current_cluster += 1
+        dendrogram_new = np.array(dendrogram_new)
+        return labels, dendrogram_new
+    else:
+        return labels
+def cut_straight(dendrogram: np.ndarray, n_clusters: Optional[int] = None, threshold: Optional[float] = None,
+                 sort_clusters: bool = True, return_dendrogram: bool = False) \
+                -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
+    """Cut a dendrogram and return the corresponding clustering.
+    Parameters
+    ----------
+    dendrogram : np.ndarray
+        Dendrogram.
+    n_clusters : int
+        Number of clusters (optional).
+        The number of clusters can be larger than n_clusters in case of equal heights in the dendrogram.
+    threshold : float
+        Threshold on height (optional).
+        If both n_clusters and threshold are ``None``, n_clusters is set to 2.
+    sort_clusters : bool
+        If ``True``,  sorts clusters in decreasing order of size.
+    return_dendrogram : bool
+        If ``True``, returns the dendrogram formed by the clusters up to the root.
+    Returns
+    -------
+    labels : np.ndarray
+        Cluster of each node.
+    dendrogram_aggregate : np.ndarray
+        Dendrogram starting from clusters (leaves = clusters).
+    Example
+    -------
+    >>> from sknetwork.hierarchy import cut_straight
+    >>> dendrogram = np.array([[0, 1, 0, 2], [2, 3, 1, 3]])
+    >>> cut_straight(dendrogram)
+    array([0, 0, 1])
+    """
+    check_dendrogram(dendrogram)
+    n = dendrogram.shape[0] + 1
+    if return_dendrogram:
+        height = dendrogram[:, 2]
+        if not np.all(height[:-1] <= height[1:]):
+            dendrogram = reorder_dendrogram(dendrogram)
+    cluster = {i: [i] for i in range(n)}
+    if n_clusters is None:
+        if threshold is None:
+            n_clusters = 2
+        else:
+            n_clusters = n
+    else:
+        check_n_clusters(n_clusters, n, n_min=1)
+    cut = np.sort(dendrogram[:, 2])[n - n_clusters]
+    if threshold is not None:
+        cut = max(cut, threshold)
+    for t in range(n - 1):
+        i = int(dendrogram[t][0])
+        j = int(dendrogram[t][1])
+        if dendrogram[t][2] < cut and i in cluster and j in cluster:
+            cluster[n + t] = cluster.pop(i) + cluster.pop(j)
+    return get_labels(dendrogram, cluster, sort_clusters, return_dendrogram)
+def cut_balanced(dendrogram: np.ndarray, max_cluster_size: int = 20, sort_clusters: bool = True,
+                 return_dendrogram: bool = False) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
+    """Cuts a dendrogram with a constraint on the cluster size and returns the corresponding clustering.
+    Parameters
+    ----------
+    dendrogram : np.ndarray
+        Dendrogram
+    max_cluster_size : int
+        Maximum size of each cluster.
+    sort_clusters : bool
+        If ``True``, sort labels in decreasing order of cluster size.
+    return_dendrogram : bool
+        If ``True``, returns the dendrogram formed by the clusters up to the root.
+    Returns
+    -------
+    labels : np.ndarray
+        Label of each node.
+    dendrogram_aggregate : np.ndarray
+        Dendrogram starting from clusters (leaves = clusters).
+    Example
+    -------
+    >>> from sknetwork.hierarchy import cut_balanced
+    >>> dendrogram = np.array([[0, 1, 0, 2], [2, 3, 1, 3]])
+    >>> cut_balanced(dendrogram, 2)
+    array([0, 0, 1])
+    """
+    check_dendrogram(dendrogram)
+    n = dendrogram.shape[0] + 1
+    if max_cluster_size < 2 or max_cluster_size > n:
+        raise ValueError("The maximum cluster size must be between 2 and the number of nodes.")
+    cluster = {i: [i] for i in range(n)}
+    for t in range(n - 1):
+        i = int(dendrogram[t][0])
+        j = int(dendrogram[t][1])
+        if i in cluster and j in cluster and len(cluster[i]) + len(cluster[j]) <= max_cluster_size:
+            cluster[n + t] = cluster.pop(i) + cluster.pop(j)
+    return get_labels(dendrogram, cluster, sort_clusters, return_dendrogram)
+def aggregate_dendrogram(dendrogram: np.ndarray, n_clusters: int = 2, return_counts: bool = False) \
+                        -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
+    """Aggregate a dendrogram in order to get a certain number of leaves.
+    The leaves in the output dendrogram correspond to subtrees in the input one.
+    Parameters
+    ----------
+    dendrogram : np.ndarray
+        The input to aggregate.
+    n_clusters : int
+        Number of clusters (or leaves) to keep.
+    return_counts :  bool
+        If ``True``, returns an array of counts corresponding to the sizes of the merged subtrees.
+        The sum of the counts is equal to the number of samples in the input dendrogram.
+    Returns
+    -------
+    new_dendrogram : np.ndarray
+        Aggregated dendrogram. The nodes are reindexed from 0.
+    counts : np.ndarray
+        Size of the subtrees corresponding to each leaf in new_dendrogram.
+    """
+    n_nodes: int = dendrogram.shape[0] + 1
+    check_n_clusters(n_clusters, n_nodes, n_min=1)
+    new_dendrogram = dendrogram[n_nodes - n_clusters:].copy()
+    node_indices = np.array(sorted(set(new_dendrogram[:, 0]).union(set(new_dendrogram[:, 1]))))
+    new_index = {ix: i for i, ix in enumerate(node_indices)}
+    for j in range(2):
+        for i in range(new_dendrogram.shape[0]):
+            new_dendrogram[i, j] = new_index[new_dendrogram[i, j]]
+    if return_counts:
+        leaves = node_indices[:n_clusters].astype(int)
+        leaves_indices = leaves - n_nodes
+        counts = dendrogram[leaves_indices, 3]
+        return new_dendrogram, counts.astype(int)
+    else:
+        return new_dendrogram
+def get_index(tree):
+    """Reindex a dendrogram from the leaves
+    Parameters
+    ----------
+    tree :
+        The tree to be indexed
+    Returns
+    -------
+    index :
+        The index of the root of the given tree
+    """
+    if type(tree) != list:
+        return tree
+    else:
+        return np.max([get_index(t) for t in tree])
+def get_dendrogram(tree, dendrogram=None, index=None, depth=0, size=None, copy_tree=False):
+    """Get dendrogram from tree.
+    Parameters
+    ----------
+    tree :
+        The initial tree
+    dendrogram :
+        Intermediary dendrogram for recursive use
+    index :
+        Intermediary index for recursive use
+    depth :
+        Current depth for recursive use
+    size :
+        Current leaf count for recursive use
+    copy_tree :
+        If ``True``, ensure the passed tree remains unchanged.
+    Returns
+    -------
+    dendrogram`:
+        The reordered dendrogram
+    index :
+        The indexing array
+    """
+    if copy_tree:
+        return get_dendrogram(copy.deepcopy(tree))
+    else:
+        if dendrogram is None:
+            dendrogram = []
+        if index is None:
+            index = get_index(tree)
+        if size is None:
+            size = defaultdict(lambda: 1)
+        if len(tree) > 1:
+            lengths = np.array([len(t) for t in tree])
+            if np.max(lengths) == 1:
+                # merge all
+                i = tree.pop()[0]
+                j = tree.pop()[0]
+                s = size[i] + size[j]
+                dendrogram.append([i, j, float(-depth), s])
+                index += 1
+                while len(tree):
+                    s += 1
+                    dendrogram.append([index, tree.pop()[0], float(-depth), s])
+                    index += 1
+                size[index] = s
+                tree.append(index)
+                return dendrogram, index
+            else:
+                i = np.argwhere(lengths > 1).ravel()[0]
+                dendrogram_, index_ = get_dendrogram(tree[i], None, index, depth + 1, size)
+                dendrogram += dendrogram_
+                return get_dendrogram(tree, dendrogram, index_, depth, size)
+        else:
+            return dendrogram, index
+def split_dendrogram(dendrogram: np.ndarray, shape: tuple):
+    """Split the dendrogram of a bipartite graph into 2 dendrograms, one for each part.
+    Parameters
+    ----------
+    dendrogram :
+        Dendrogram of the bipartite graph.
+    shape :
+        Shape of the biadjacency matrix.
+    Returns
+    -------
+    dendrogram_row :
+        Dendrogram for the rows.
+    dendrogram_col :
+        Dendrogram for the columns.
+    """
+    n1, n2 = shape
+    dendrogram_row = []
+    dendrogram_col = []
+    id_row_new = n1
+    id_col_new = n2
+    size_row = {i: 1 for i in range(n1)}
+    size_col = {i + n1: 1 for i in range(n2)}
+    id_row = {i: i for i in range(n1)}
+    id_col = {i + n1: i for i in range(n2)}
+    for t in range(n1 + n2 - 1):
+        i = dendrogram[t, 0]
+        j = dendrogram[t, 1]
+        if i in id_row and j in id_row:
+            size_row[n1 + n2 + t] = size_row.pop(i) + size_row.pop(j)
+            id_row[n1 + n2 + t] = id_row_new
+            dendrogram_row.append([id_row.pop(i), id_row.pop(j), dendrogram[t, 2], size_row[n1 + n2 + t]])
+            id_row_new += 1
+        elif i in id_row:
+            size_row[n1 + n2 + t] = size_row.pop(i)
+            id_row[n1 + n2 + t] = id_row.pop(i)
+        elif j in id_row:
+            size_row[n1 + n2 + t] = size_row.pop(j)
+            id_row[n1 + n2 + t] = id_row.pop(j)
+        if i in id_col and j in id_col:
+            size_col[n1 + n2 + t] = size_col.pop(i) + size_col.pop(j)
+            id_col[n1 + n2 + t] = id_col_new
+            dendrogram_col.append([id_col.pop(i), id_col.pop(j), dendrogram[t, 2], size_col[n1 + n2 + t]])
+            id_col_new += 1
+        elif i in id_col:
+            size_col[n1 + n2 + t] = size_col.pop(i)
+            id_col[n1 + n2 + t] = id_col.pop(i)
+        elif j in id_col:
+            size_col[n1 + n2 + t] = size_col.pop(j)
+            id_col[n1 + n2 + t] = id_col.pop(j)
+    return np.array(dendrogram_row), np.array(dendrogram_col)

sknetwork/hierarchy/tests/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """tests for hierarchy"""

sknetwork/hierarchy/tests/test_API.py ADDED Viewed

@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""Tests for hierarchy API"""
+import unittest
+from sknetwork.data.test_graphs import *
+from sknetwork.hierarchy import *
+class TestHierarchyAPI(unittest.TestCase):
+    def test_undirected(self):
+        adjacency = test_graph()
+        n = adjacency.shape[0]
+        for algo in [Paris(), LouvainIteration()]:
+            dendrogram = algo.fit_predict(adjacency)
+            self.assertTupleEqual(dendrogram.shape, (n - 1, 4))
+    def test_disconnected(self):
+        adjacency = test_disconnected_graph()
+        for algo in [Paris(), LouvainIteration()]:
+            dendrogram = algo.fit_transform(adjacency)
+            self.assertEqual(dendrogram.shape, (9, 4))