PyPI - scikit-network - Versions diffs - 0.33.4__cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - Mend

scikit-network 0.33.4__cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (229) hide show

scikit_network-0.33.4.dist-info/METADATA +122 -0
scikit_network-0.33.4.dist-info/RECORD +229 -0
scikit_network-0.33.4.dist-info/WHEEL +6 -0
scikit_network-0.33.4.dist-info/licenses/AUTHORS.rst +43 -0
scikit_network-0.33.4.dist-info/licenses/LICENSE +34 -0
scikit_network-0.33.4.dist-info/top_level.txt +1 -0
scikit_network.libs/libgomp-a34b3233.so.1.0.0 +0 -0
sknetwork/__init__.py +21 -0
sknetwork/base.py +67 -0
sknetwork/classification/__init__.py +8 -0
sknetwork/classification/base.py +138 -0
sknetwork/classification/base_rank.py +129 -0
sknetwork/classification/diffusion.py +127 -0
sknetwork/classification/knn.py +131 -0
sknetwork/classification/metrics.py +205 -0
sknetwork/classification/pagerank.py +58 -0
sknetwork/classification/propagation.py +144 -0
sknetwork/classification/tests/__init__.py +1 -0
sknetwork/classification/tests/test_API.py +30 -0
sknetwork/classification/tests/test_diffusion.py +77 -0
sknetwork/classification/tests/test_knn.py +23 -0
sknetwork/classification/tests/test_metrics.py +53 -0
sknetwork/classification/tests/test_pagerank.py +20 -0
sknetwork/classification/tests/test_propagation.py +24 -0
sknetwork/classification/vote.cpp +27593 -0
sknetwork/classification/vote.cpython-312-x86_64-linux-gnu.so +0 -0
sknetwork/classification/vote.pyx +56 -0
sknetwork/clustering/__init__.py +8 -0
sknetwork/clustering/base.py +168 -0
sknetwork/clustering/kcenters.py +251 -0
sknetwork/clustering/leiden.py +238 -0
sknetwork/clustering/leiden_core.cpp +31928 -0
sknetwork/clustering/leiden_core.cpython-312-x86_64-linux-gnu.so +0 -0
sknetwork/clustering/leiden_core.pyx +124 -0
sknetwork/clustering/louvain.py +282 -0
sknetwork/clustering/louvain_core.cpp +31573 -0
sknetwork/clustering/louvain_core.cpython-312-x86_64-linux-gnu.so +0 -0
sknetwork/clustering/louvain_core.pyx +124 -0
sknetwork/clustering/metrics.py +91 -0
sknetwork/clustering/postprocess.py +66 -0
sknetwork/clustering/propagation_clustering.py +100 -0
sknetwork/clustering/tests/__init__.py +1 -0
sknetwork/clustering/tests/test_API.py +38 -0
sknetwork/clustering/tests/test_kcenters.py +60 -0
sknetwork/clustering/tests/test_leiden.py +34 -0
sknetwork/clustering/tests/test_louvain.py +135 -0
sknetwork/clustering/tests/test_metrics.py +50 -0
sknetwork/clustering/tests/test_postprocess.py +39 -0
sknetwork/data/__init__.py +6 -0
sknetwork/data/base.py +33 -0
sknetwork/data/load.py +292 -0
sknetwork/data/models.py +459 -0
sknetwork/data/parse.py +644 -0
sknetwork/data/test_graphs.py +93 -0
sknetwork/data/tests/__init__.py +1 -0
sknetwork/data/tests/test_API.py +30 -0
sknetwork/data/tests/test_base.py +14 -0
sknetwork/data/tests/test_load.py +61 -0
sknetwork/data/tests/test_models.py +52 -0
sknetwork/data/tests/test_parse.py +250 -0
sknetwork/data/tests/test_test_graphs.py +29 -0
sknetwork/data/tests/test_toy_graphs.py +68 -0
sknetwork/data/timeout.py +38 -0
sknetwork/data/toy_graphs.py +611 -0
sknetwork/embedding/__init__.py +8 -0
sknetwork/embedding/base.py +90 -0
sknetwork/embedding/force_atlas.py +198 -0
sknetwork/embedding/louvain_embedding.py +142 -0
sknetwork/embedding/random_projection.py +131 -0
sknetwork/embedding/spectral.py +137 -0
sknetwork/embedding/spring.py +198 -0
sknetwork/embedding/svd.py +351 -0
sknetwork/embedding/tests/__init__.py +1 -0
sknetwork/embedding/tests/test_API.py +49 -0
sknetwork/embedding/tests/test_force_atlas.py +35 -0
sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
sknetwork/embedding/tests/test_random_projection.py +28 -0
sknetwork/embedding/tests/test_spectral.py +81 -0
sknetwork/embedding/tests/test_spring.py +50 -0
sknetwork/embedding/tests/test_svd.py +43 -0
sknetwork/gnn/__init__.py +10 -0
sknetwork/gnn/activation.py +117 -0
sknetwork/gnn/base.py +181 -0
sknetwork/gnn/base_activation.py +90 -0
sknetwork/gnn/base_layer.py +109 -0
sknetwork/gnn/gnn_classifier.py +305 -0
sknetwork/gnn/layer.py +153 -0
sknetwork/gnn/loss.py +180 -0
sknetwork/gnn/neighbor_sampler.py +65 -0
sknetwork/gnn/optimizer.py +164 -0
sknetwork/gnn/tests/__init__.py +1 -0
sknetwork/gnn/tests/test_activation.py +56 -0
sknetwork/gnn/tests/test_base.py +75 -0
sknetwork/gnn/tests/test_base_layer.py +37 -0
sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
sknetwork/gnn/tests/test_layers.py +80 -0
sknetwork/gnn/tests/test_loss.py +33 -0
sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
sknetwork/gnn/tests/test_optimizer.py +43 -0
sknetwork/gnn/tests/test_utils.py +41 -0
sknetwork/gnn/utils.py +127 -0
sknetwork/hierarchy/__init__.py +6 -0
sknetwork/hierarchy/base.py +90 -0
sknetwork/hierarchy/louvain_hierarchy.py +260 -0
sknetwork/hierarchy/metrics.py +234 -0
sknetwork/hierarchy/paris.cpp +37877 -0
sknetwork/hierarchy/paris.cpython-312-x86_64-linux-gnu.so +0 -0
sknetwork/hierarchy/paris.pyx +310 -0
sknetwork/hierarchy/postprocess.py +350 -0
sknetwork/hierarchy/tests/__init__.py +1 -0
sknetwork/hierarchy/tests/test_API.py +24 -0
sknetwork/hierarchy/tests/test_algos.py +34 -0
sknetwork/hierarchy/tests/test_metrics.py +62 -0
sknetwork/hierarchy/tests/test_postprocess.py +57 -0
sknetwork/linalg/__init__.py +9 -0
sknetwork/linalg/basics.py +37 -0
sknetwork/linalg/diteration.cpp +27409 -0
sknetwork/linalg/diteration.cpython-312-x86_64-linux-gnu.so +0 -0
sknetwork/linalg/diteration.pyx +47 -0
sknetwork/linalg/eig_solver.py +93 -0
sknetwork/linalg/laplacian.py +15 -0
sknetwork/linalg/normalizer.py +86 -0
sknetwork/linalg/operators.py +225 -0
sknetwork/linalg/polynome.py +76 -0
sknetwork/linalg/ppr_solver.py +170 -0
sknetwork/linalg/push.cpp +31081 -0
sknetwork/linalg/push.cpython-312-x86_64-linux-gnu.so +0 -0
sknetwork/linalg/push.pyx +71 -0
sknetwork/linalg/sparse_lowrank.py +142 -0
sknetwork/linalg/svd_solver.py +91 -0
sknetwork/linalg/tests/__init__.py +1 -0
sknetwork/linalg/tests/test_eig.py +44 -0
sknetwork/linalg/tests/test_laplacian.py +18 -0
sknetwork/linalg/tests/test_normalization.py +34 -0
sknetwork/linalg/tests/test_operators.py +66 -0
sknetwork/linalg/tests/test_polynome.py +38 -0
sknetwork/linalg/tests/test_ppr.py +50 -0
sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
sknetwork/linalg/tests/test_svd.py +38 -0
sknetwork/linkpred/__init__.py +2 -0
sknetwork/linkpred/base.py +46 -0
sknetwork/linkpred/nn.py +126 -0
sknetwork/linkpred/tests/__init__.py +1 -0
sknetwork/linkpred/tests/test_nn.py +26 -0
sknetwork/log.py +19 -0
sknetwork/path/__init__.py +5 -0
sknetwork/path/dag.py +54 -0
sknetwork/path/distances.py +98 -0
sknetwork/path/search.py +31 -0
sknetwork/path/shortest_path.py +61 -0
sknetwork/path/tests/__init__.py +1 -0
sknetwork/path/tests/test_dag.py +37 -0
sknetwork/path/tests/test_distances.py +62 -0
sknetwork/path/tests/test_search.py +40 -0
sknetwork/path/tests/test_shortest_path.py +40 -0
sknetwork/ranking/__init__.py +8 -0
sknetwork/ranking/base.py +57 -0
sknetwork/ranking/betweenness.cpp +9716 -0
sknetwork/ranking/betweenness.cpython-312-x86_64-linux-gnu.so +0 -0
sknetwork/ranking/betweenness.pyx +97 -0
sknetwork/ranking/closeness.py +92 -0
sknetwork/ranking/hits.py +90 -0
sknetwork/ranking/katz.py +79 -0
sknetwork/ranking/pagerank.py +106 -0
sknetwork/ranking/postprocess.py +37 -0
sknetwork/ranking/tests/__init__.py +1 -0
sknetwork/ranking/tests/test_API.py +32 -0
sknetwork/ranking/tests/test_betweenness.py +38 -0
sknetwork/ranking/tests/test_closeness.py +30 -0
sknetwork/ranking/tests/test_hits.py +20 -0
sknetwork/ranking/tests/test_pagerank.py +62 -0
sknetwork/ranking/tests/test_postprocess.py +26 -0
sknetwork/regression/__init__.py +4 -0
sknetwork/regression/base.py +57 -0
sknetwork/regression/diffusion.py +204 -0
sknetwork/regression/tests/__init__.py +1 -0
sknetwork/regression/tests/test_API.py +32 -0
sknetwork/regression/tests/test_diffusion.py +56 -0
sknetwork/sknetwork.py +3 -0
sknetwork/test_base.py +35 -0
sknetwork/test_log.py +15 -0
sknetwork/topology/__init__.py +8 -0
sknetwork/topology/cliques.cpp +32574 -0
sknetwork/topology/cliques.cpython-312-x86_64-linux-gnu.so +0 -0
sknetwork/topology/cliques.pyx +149 -0
sknetwork/topology/core.cpp +30660 -0
sknetwork/topology/core.cpython-312-x86_64-linux-gnu.so +0 -0
sknetwork/topology/core.pyx +90 -0
sknetwork/topology/cycles.py +243 -0
sknetwork/topology/minheap.cpp +27341 -0
sknetwork/topology/minheap.cpython-312-x86_64-linux-gnu.so +0 -0
sknetwork/topology/minheap.pxd +20 -0
sknetwork/topology/minheap.pyx +109 -0
sknetwork/topology/structure.py +194 -0
sknetwork/topology/tests/__init__.py +1 -0
sknetwork/topology/tests/test_cliques.py +28 -0
sknetwork/topology/tests/test_core.py +19 -0
sknetwork/topology/tests/test_cycles.py +65 -0
sknetwork/topology/tests/test_structure.py +85 -0
sknetwork/topology/tests/test_triangles.py +38 -0
sknetwork/topology/tests/test_wl.py +72 -0
sknetwork/topology/triangles.cpp +8903 -0
sknetwork/topology/triangles.cpython-312-x86_64-linux-gnu.so +0 -0
sknetwork/topology/triangles.pyx +151 -0
sknetwork/topology/weisfeiler_lehman.py +133 -0
sknetwork/topology/weisfeiler_lehman_core.cpp +27644 -0
sknetwork/topology/weisfeiler_lehman_core.cpython-312-x86_64-linux-gnu.so +0 -0
sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
sknetwork/utils/__init__.py +7 -0
sknetwork/utils/check.py +355 -0
sknetwork/utils/format.py +221 -0
sknetwork/utils/membership.py +82 -0
sknetwork/utils/neighbors.py +115 -0
sknetwork/utils/tests/__init__.py +1 -0
sknetwork/utils/tests/test_check.py +190 -0
sknetwork/utils/tests/test_format.py +63 -0
sknetwork/utils/tests/test_membership.py +24 -0
sknetwork/utils/tests/test_neighbors.py +41 -0
sknetwork/utils/tests/test_tfidf.py +18 -0
sknetwork/utils/tests/test_values.py +66 -0
sknetwork/utils/tfidf.py +37 -0
sknetwork/utils/values.py +76 -0
sknetwork/visualization/__init__.py +4 -0
sknetwork/visualization/colors.py +34 -0
sknetwork/visualization/dendrograms.py +277 -0
sknetwork/visualization/graphs.py +1039 -0
sknetwork/visualization/tests/__init__.py +1 -0
sknetwork/visualization/tests/test_dendrograms.py +53 -0
sknetwork/visualization/tests/test_graphs.py +176 -0

sknetwork/topology/weisfeiler_lehman_core.cpython-312-x86_64-linux-gnu.so ADDED Viewed

Binary file

sknetwork/topology/weisfeiler_lehman_core.pyx ADDED Viewed

@@ -0,0 +1,114 @@
+# distutils: language = c++
+# cython: language_level=3
+"""
+Created on July 1, 2020
+@author: Pierre Pebereau <pierre.pebereau@telecom-paris.fr>
+@author: Alexis Barreaux <alexis.barreaux@telecom-paris.fr>
+"""
+from libcpp.vector cimport vector
+from libc.math cimport pow
+cimport cython
+ctypedef (int, double, int) ctuple
+cdef extern from "<algorithm>" namespace "std":
+    # fixed sort as per https://stackoverflow.com/questions/57584909/unable-to-use-cdef-function-in-stdsort-as-comparison-function
+    void sort(...)
+    void csort "sort"(...)
+cdef bint is_lower(ctuple a, ctuple b) :
+    """Lexicographic comparison between triplets based on the first two values.
+    Parameters
+    ----------
+    a:
+        First triplet.
+    b:
+        Second triplet.
+    Returns
+    -------
+    ``True`` if a < b, and ``False`` otherwise.
+    """
+    cdef int a1, b1
+    cdef double a2, b2
+    a1, a2, _ = a
+    b1, b2, _ = b
+    if a1 == b1 :
+        return a2 < b2
+    return a1 < b1
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def weisfeiler_lehman_coloring(int[:] indptr, int[:] indices, int[:] labels, double [:] powers, int max_iter):
+    """Weisfeiler-Lehman coloring.
+    Parameters
+    ----------
+    indptr :
+        Indptr of the CSR.
+    indices :
+        Indices of the CSR.
+    labels : int[:]
+        Labels to be changed.
+    powers : double [:]
+        Powers being used as hash and put in a memory view to limit several identical calculations.
+    max_iter : int
+        Maximum number of iterations.
+    Returns
+    -------
+    labels : int[:]
+        Colors of the nodes.
+    has_changed : bint
+        True if the output labels are not the same as the input ones. False otherwise.
+    """
+    cdef int n = indptr.shape[0] -1
+    cdef int iteration = 0
+    cdef int i, j, j1, j2, jj, label
+    cdef double epsilon = pow(10, -10)
+    cdef vector[ctuple] new_labels
+    cdef ctuple tuple_ref, tuple_new
+    cdef double hash_ref, hash_new
+    cdef int label_ref, label_new
+    cdef bint has_changed = True
+    while iteration < max_iter and has_changed:
+        new_labels.clear()
+        has_changed = False
+        for i in range(n):
+            hash_ref = 0
+            j1 = indptr[i]
+            j2 = indptr[i + 1]
+            for jj in range(j1, j2):
+                j = indices[jj]
+                hash_ref += powers[labels[j]]
+            new_labels.push_back((labels[i], hash_ref, i))
+        csort(new_labels.begin(), new_labels.end(), is_lower)
+        label = 0
+        tuple_new = new_labels[0]
+        labels[tuple_new[2]] = label
+        for j in range(1, n):
+            tuple_ref = tuple_new
+            tuple_new = new_labels[j]
+            label_ref, hash_ref, _ = tuple_ref
+            label_new, hash_new, i = tuple_new
+            if abs(hash_new - hash_ref) > epsilon or label_new != label_ref :
+                label += 1
+            if labels[i] != label:
+                has_changed = True
+            labels[i] = label
+        iteration += 1
+    return labels, has_changed

sknetwork/utils/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""utils module"""
+from sknetwork.data import *
+from sknetwork.utils.check import is_symmetric
+from sknetwork.utils.format import *
+from sknetwork.utils.membership import get_membership, from_membership
+from sknetwork.utils.neighbors import get_neighbors, get_degrees, get_weights
+from sknetwork.utils.tfidf import get_tfidf

sknetwork/utils/check.py ADDED Viewed

@@ -0,0 +1,355 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created in April 2019
+@author: Nathan de Lara <nathan.delara@polytechnique.org>
+"""
+import warnings
+from typing import Union, Optional
+import numpy as np
+from scipy import sparse
+def has_nonnegative_entries(input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> bool:
+    """True if the array has non-negative entries."""
+    if type(input_matrix) == sparse.csr_matrix:
+        return np.all(input_matrix.data >= 0)
+    else:
+        return np.all(input_matrix >= 0)
+def is_weakly_connected(adjacency: sparse.csr_matrix) -> bool:
+    """Check whether a graph is weakly connected.
+    Parameters
+    ----------
+    adjacency:
+        Adjacency matrix of the graph.
+    """
+    n_cc = sparse.csgraph.connected_components(adjacency, (not is_symmetric(adjacency)), 'weak', False)
+    return n_cc == 1
+def check_connected(adjacency: sparse.csr_matrix):
+    """Check is a graph is weakly connected and return an error otherwise."""
+    if is_weakly_connected(adjacency):
+        return
+    else:
+        raise ValueError('The graph is expected to be connected.')
+def check_nonnegative(input_matrix: Union[sparse.csr_matrix, np.ndarray]):
+    """Check whether the array has non-negative entries."""
+    if not has_nonnegative_entries(input_matrix):
+        raise ValueError('Only nonnegative values are expected.')
+def has_positive_entries(input_matrix: np.ndarray) -> bool:
+    """True if the array has positive entries."""
+    if type(input_matrix) != np.ndarray:
+        raise TypeError('Entry must be a dense NumPy array.')
+    else:
+        return np.all(input_matrix > 0)
+def check_positive(input_matrix: Union[sparse.csr_matrix, np.ndarray]):
+    """Check whether the array has positive entries."""
+    if not has_positive_entries(input_matrix):
+        raise ValueError('Only positive values are expected.')
+def is_proba_array(input_matrix: np.ndarray) -> bool:
+    """True if each line of the array has non-negative entries which sum to 1."""
+    if len(input_matrix.shape) == 1:
+        return has_nonnegative_entries(input_matrix) and np.isclose(input_matrix.sum(), 1)
+    elif len(input_matrix.shape) == 2:
+        n_row, n_col = input_matrix.shape
+        err = input_matrix.dot(np.ones(n_col)) - np.ones(n_row)
+        return has_nonnegative_entries(input_matrix) and np.isclose(np.linalg.norm(err), 0)
+    else:
+        raise TypeError('Entry must be one or two-dimensional array.')
+def is_square(input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> bool:
+    """True if the matrix is square."""
+    return input_matrix.shape[0] == input_matrix.shape[1]
+def check_square(input_matrix: Union[sparse.csr_matrix, np.ndarray]):
+    """Check whether a matrix is square and return an error otherwise."""
+    if is_square(input_matrix):
+        return
+    else:
+        raise ValueError('The adjacency matrix is expected to be square.')
+def is_symmetric(input_matrix: sparse.csr_matrix) -> bool:
+    """True if the matrix is symmetric."""
+    return sparse.csr_matrix(input_matrix - input_matrix.T).nnz == 0
+def check_symmetry(input_matrix: sparse.csr_matrix):
+    """Check whether a matrix is symmetric and return an error otherwise."""
+    if not is_symmetric(input_matrix):
+        raise ValueError('The input matrix is expected to be symmetric.')
+def make_weights(distribution: str, adjacency: sparse.csr_matrix) -> np.ndarray:
+    """Array of weights from a matrix and a desired distribution.
+   Parameters
+   ----------
+   distribution:
+       Distribution for node sampling. Only ``'degree'`` or ``'uniform'`` are accepted.
+   adjacency:
+       The adjacency matrix of the neighbors.
+   Returns
+   -------
+   node_weights: np.ndarray
+       Weights of nodes.
+    """
+    n = adjacency.shape[0]
+    distribution = distribution.lower()
+    if distribution == 'degree':
+        node_weights_vec = adjacency.dot(np.ones(adjacency.shape[1]))
+    elif distribution == 'uniform':
+        node_weights_vec = np.ones(n)
+    else:
+        raise ValueError('Unknown distribution of node weights.')
+    return node_weights_vec
+def check_format(input_matrix: Union[sparse.csr_matrix, sparse.csc_matrix, sparse.coo_matrix, sparse.lil_matrix,
+                                     np.ndarray], allow_empty: bool = False) -> sparse.csr_matrix:
+    """Check whether the matrix is a NumPy array or a Scipy sparse matrix and return
+    the corresponding Scipy CSR matrix.
+    """
+    formats = {sparse.csr_matrix, sparse.csc_matrix, sparse.coo_matrix, sparse.lil_matrix, np.ndarray}
+    if type(input_matrix) not in formats:
+        raise TypeError('The input matrix must be in Scipy sparse format or Numpy ndarray format.')
+    input_matrix = sparse.csr_matrix(input_matrix)
+    if not allow_empty and input_matrix.nnz == 0:
+        raise ValueError('The input matrix is empty.')
+    return input_matrix
+def check_is_proba(entry: Union[float, int], name: str = None):
+    """Check whether the number is non-negative and less than or equal to 1."""
+    if name is None:
+        name = 'Probabilities'
+    if type(entry) not in [float, int]:
+        raise TypeError('{} must be floats (or ints if 0 or 1).'.format(name))
+    if entry < 0 or entry > 1:
+        raise ValueError('{} must be between 0 and 1.'.format(name))
+def check_damping_factor(damping_factor: float):
+    """Check if the damping factor has a valid value."""
+    if damping_factor < 0 or damping_factor >= 1:
+        raise ValueError('A damping factor must have a value in [0, 1[.')
+def check_weights(weights: Union['str', np.ndarray], adjacency: Union[sparse.csr_matrix, sparse.csc_matrix],
+                  positive_entries: bool = False) -> np.ndarray:
+    """Check whether the weights are a valid distribution for the adjacency and return a probability vector.
+    Parameters
+    ----------
+    weights:
+        Probabilities for node sampling in the null model. ``'degree'``, ``'uniform'`` or custom weights.
+    adjacency:
+        The adjacency matrix of the graph.
+    positive_entries:
+        If true, the weights must all be positive, if False, the weights must be non-negative.
+    Returns
+    -------
+    node_weights: np.ndarray
+        Valid weights of nodes.
+    """
+    n = adjacency.shape[0]
+    if type(weights) == np.ndarray:
+        if len(weights) != n:
+            raise ValueError('The number of node weights must match the number of nodes.')
+        else:
+            node_weights_vec = weights
+    elif type(weights) == str:
+        node_weights_vec = make_weights(weights, adjacency)
+    else:
+        raise TypeError(
+            'Node weights must be a known distribution ("degree" or "uniform" string) or a custom NumPy array.')
+    if positive_entries and not has_positive_entries(node_weights_vec):
+        raise ValueError('All weights must be positive.')
+    else:
+        if np.any(node_weights_vec < 0) or node_weights_vec.sum() <= 0:
+            raise ValueError('Node weights must be non-negative with positive sum.')
+    return node_weights_vec
+def get_probs(weights: Union['str', np.ndarray], adjacency: Union[sparse.csr_matrix, sparse.csc_matrix],
+              positive_entries: bool = False) -> np.ndarray:
+    """Check whether the weights are a valid distribution for the adjacency
+    and return a normalized probability vector.
+    """
+    weights = check_weights(weights, adjacency, positive_entries)
+    return weights / np.sum(weights)
+def check_random_state(random_state: Optional[Union[np.random.RandomState, int]]):
+    """Check whether the argument is a seed or a NumPy random state. If None, 'numpy.random' is used by default."""
+    if random_state is None:
+        return np.random.RandomState()
+    elif type(random_state) == int:
+        return np.random.RandomState(random_state)
+    elif type(random_state) == np.random.RandomState:
+        return random_state
+    else:
+        raise TypeError('To specify a random state, pass the seed (as an int) or a NumPy random state object.')
+def check_n_neighbors(n_neighbors: int, n_seeds: int):
+    """Set the number of neighbors so that it is less than the number of labeled samples."""
+    if n_neighbors >= n_seeds:
+        warnings.warn(Warning("The number of neighbors must be lower than the number of nodes with known labels. "
+                              "Changed accordingly."))
+        n_neighbors = n_seeds - 1
+    return n_neighbors
+def check_labels(labels: np.ndarray):
+    """Check labels of the seeds for semi-supervised algorithms."""
+    classes: np.ndarray = np.unique(labels[labels >= 0])
+    n_classes: int = len(classes)
+    if n_classes < 2:
+        raise ValueError('There must be at least two distinct labels.')
+    else:
+        return classes, n_classes
+def check_n_jobs(n_jobs: Optional[int] = None):
+    """Parse the ``n_jobs`` parameter for multiprocessing."""
+    if n_jobs == -1:
+        return None
+    elif n_jobs is None:
+        return 1
+    else:
+        return n_jobs
+def check_adjacency_vector(adjacency_vectors: Union[sparse.csr_matrix, np.ndarray],
+                           n: Optional[int] = None) -> sparse.csr_matrix:
+    """Check format of new samples for predict methods"""
+    adjacency_vectors = check_format(adjacency_vectors)
+    if n is not None and adjacency_vectors.shape[1] != n:
+        raise ValueError('The adjacency vector must be of length equal to the number nodes in the graph.')
+    return adjacency_vectors
+def check_n_clusters(n_clusters: int, n_row: int, n_min: int = 0):
+    """Check that the number of clusters"""
+    if n_clusters > n_row:
+        raise ValueError('The number of clusters exceeds the number of rows.')
+    if n_clusters < n_min:
+        raise ValueError('The number of clusters must be at least {}.'.format(n_min))
+    else:
+        return
+def check_min_size(n_row, n_min):
+    """Check that an adjacency has the required number of rows and returns an error otherwise."""
+    if n_row < n_min:
+        raise ValueError('The graph must contain at least {} nodes.'.format(n_min))
+    else:
+        return
+def check_dendrogram(dendrogram):
+    """Check the shape of a dendrogram."""
+    if dendrogram.ndim != 2 or dendrogram.shape[1] != 4:
+        raise ValueError("Dendrogram has incorrect shape.")
+    else:
+        return
+def check_min_nnz(nnz, n_min):
+    """Check that an adjacency has the required number of edges and returns an error otherwise."""
+    if nnz < n_min:
+        raise ValueError('The graph must contain at least {} edge(s).'.format(n_min))
+    else:
+        return
+def check_n_components(n_components, n_min) -> int:
+    """Check the number of components"""
+    if n_components > n_min:
+        warnings.warn(Warning("The dimension of the embedding cannot exceed {}. Changed accordingly.".format(n_min)))
+        return n_min
+    else:
+        return n_components
+def check_scaling(scaling: float, adjacency: sparse.csr_matrix, regularize: bool):
+    """Check the scaling factor"""
+    if scaling < 0:
+        raise ValueError("The 'scaling' parameter must be non-negative.")
+    if scaling and (not regularize) and not is_weakly_connected(adjacency):
+        raise ValueError("Positive 'scaling' is valid only if the graph is connected or with regularization."
+                         "Call 'fit' either with 'scaling' = 0 or positive 'regularization'.")
+def has_boolean_entries(input_matrix: np.ndarray) -> bool:
+    """True if the array has boolean entries."""
+    if type(input_matrix) != np.ndarray:
+        raise TypeError('Entry must be a dense NumPy array.')
+    else:
+        return input_matrix.dtype == 'bool'
+def check_boolean(input_matrix: np.ndarray):
+    """Check whether the array has positive entries."""
+    if not has_boolean_entries(input_matrix):
+        raise ValueError('Only boolean values are expected.')
+def check_vector_format(vector_1: np.ndarray, vector_2: np.ndarray):
+    """Check whether the inputs are vectors of same length."""
+    if len(vector_1.shape) > 1 or len(vector_2.shape) > 1:
+        raise ValueError('The arrays must be 1-dimensional.')
+    if vector_1.shape[0] != vector_2.shape[0]:
+        raise ValueError('The arrays do not have the same length.')
+def has_self_loops(input_matrix: sparse.csr_matrix) -> bool:
+    """True if each node has a self loop."""
+    return all(input_matrix.diagonal().astype(bool))
+def add_self_loops(adjacency: sparse.csr_matrix) -> sparse.csr_matrix:
+    """Add self loops to adjacency matrix.
+    Parameters
+    ----------
+    adjacency : sparse.csr_matrix
+        Adjacency matrix of the graph.
+    Returns
+    -------
+    sparse.csr_matrix
+        Adjacency matrix of the graph with self loops.
+    """
+    n_row, n_col = adjacency.shape
+    if is_square(adjacency):
+        adjacency = sparse.diags(np.ones(n_col), format='csr') + adjacency
+    else:
+        tmp = sparse.eye(n_row)
+        tmp.resize(n_row, n_col)
+        adjacency += tmp
+    return adjacency

sknetwork/utils/format.py ADDED Viewed

@@ -0,0 +1,221 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created in April 2019
+@author: Nathan de Lara <nathan.delara@polytechnique.org>
+"""
+from typing import Union, Tuple, Optional
+import numpy as np
+from scipy import sparse
+from sknetwork.linalg.sparse_lowrank import SparseLR
+from sknetwork.utils.check import check_format, is_square, is_symmetric
+from sknetwork.utils.values import stack_values, get_values
+def check_csr_or_slr(adjacency):
+    """Check if input is csr or SparseLR and raise an error otherwise."""
+    if type(adjacency) not in [sparse.csr_matrix, SparseLR]:
+        raise TypeError('Input must be a scipy CSR matrix or a SparseLR object.')
+def directed2undirected(adjacency: Union[sparse.csr_matrix, SparseLR],
+                        weighted: bool = True) -> Union[sparse.csr_matrix, SparseLR]:
+    """Adjacency matrix of the undirected graph associated with some directed graph.
+    The new adjacency matrix becomes either:
+    :math:`A+A^T` (default)
+    or
+    :math:`\\max(A,A^T) > 0` (binary)
+    If the initial adjacency matrix :math:`A` is binary, bidirectional edges have weight 2
+    (first method, default) or 1 (second method).
+    Parameters
+    ----------
+    adjacency :
+        Adjacency matrix.
+    weighted :
+        If ``True``, return the sum of the weights in both directions of each edge.
+    Returns
+    -------
+    new_adjacency :
+        New adjacency matrix (same format as input).
+    """
+    check_csr_or_slr(adjacency)
+    if type(adjacency) == sparse.csr_matrix:
+        if weighted:
+            if adjacency.data.dtype == float:
+                data_type = float
+            else:
+                data_type = int
+            new_adjacency = adjacency.astype(data_type)
+            new_adjacency += adjacency.T
+        else:
+            new_adjacency = (adjacency + adjacency.T).astype(bool)
+        new_adjacency.tocsr().sort_indices()
+        return new_adjacency
+    else:
+        if weighted:
+            new_tuples = [(y, x) for (x, y) in adjacency.low_rank_tuples]
+            return SparseLR(directed2undirected(adjacency.sparse_mat), adjacency.low_rank_tuples + new_tuples)
+        else:
+            raise ValueError('This function only works with ``weighted=True`` for SparseLR objects.')
+def bipartite2directed(biadjacency: Union[sparse.csr_matrix, SparseLR]) -> Union[sparse.csr_matrix, SparseLR]:
+    """Adjacency matrix of the directed graph associated with a bipartite graph
+    (with edges from one part to the other).
+    The returned adjacency matrix is:
+    :math:`A  = \\begin{bmatrix} 0 & B \\\\ 0 & 0 \\end{bmatrix}`
+    where :math:`B` is the biadjacency matrix.
+    Parameters
+    ----------
+    biadjacency :
+        Biadjacency matrix of the graph.
+    Returns
+    -------
+    adjacency :
+        Adjacency matrix (same format as input).
+    """
+    check_csr_or_slr(biadjacency)
+    n_row, n_col = biadjacency.shape
+    if type(biadjacency) == sparse.csr_matrix:
+        adjacency = sparse.bmat([[None, biadjacency], [sparse.csr_matrix((n_col, n_row)), None]], format='csr')
+        adjacency.sort_indices()
+        return adjacency
+    else:
+        new_tuples = [(np.hstack((x, np.zeros(n_col))), np.hstack((np.zeros(n_row), y)))
+                      for (x, y) in biadjacency.low_rank_tuples]
+        return SparseLR(bipartite2directed(biadjacency.sparse_mat), new_tuples)
+def bipartite2undirected(biadjacency: Union[sparse.csr_matrix, SparseLR]) -> Union[sparse.csr_matrix, SparseLR]:
+    """Adjacency matrix of a bigraph defined by its biadjacency matrix.
+    The returned adjacency matrix is:
+    :math:`A  = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}`
+    where :math:`B` is the biadjacency matrix of the bipartite graph.
+    Parameters
+    ----------
+    biadjacency:
+        Biadjacency matrix of the graph.
+    Returns
+    -------
+    adjacency :
+        Adjacency matrix (same format as input).
+    """
+    check_csr_or_slr(biadjacency)
+    if type(biadjacency) == sparse.csr_matrix:
+        adjacency = sparse.bmat([[None, biadjacency], [biadjacency.T, None]], format='csr')
+        adjacency.sort_indices()
+        return adjacency
+    else:
+        n_row, n_col = biadjacency.shape
+        new_tuples = []
+        for (x, y) in biadjacency.low_rank_tuples:
+            new_tuples.append((np.hstack((x, np.zeros(n_col))), np.hstack((np.zeros(n_row), y))))
+            new_tuples.append((np.hstack((np.zeros(n_row), y)), np.hstack((x, np.zeros(n_col)))))
+        return SparseLR(bipartite2undirected(biadjacency.sparse_mat), new_tuples)
+def get_adjacency(input_matrix: Union[sparse.csr_matrix, np.ndarray], allow_directed: bool = True,
+                  force_bipartite: bool = False, force_directed: bool = False, allow_empty: bool = False)\
+        -> Tuple[sparse.csr_matrix, bool]:
+    """Check the input matrix and return a proper adjacency matrix.
+    Parameters
+    ----------
+    input_matrix :
+        Adjacency matrix of biadjacency matrix of the graph.
+    allow_directed :
+        If ``True`` (default), allow the graph to be directed.
+    force_bipartite : bool
+        If ``True``, return the adjacency matrix of a bipartite graph.
+        Otherwise (default), do it only if the input matrix is not square or not symmetric
+        with ``allow_directed=False``.
+    force_directed :
+        If ``True`` return :math:`A  = \\begin{bmatrix} 0 & B \\\\ 0 & 0 \\end{bmatrix}`.
+        Otherwise (default), return :math:`A  = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}`.
+    allow_empty :
+        If ``True``, allow the input matrix to be empty.
+    """
+    input_matrix = check_format(input_matrix, allow_empty=allow_empty)
+    bipartite = False
+    if force_bipartite or not is_square(input_matrix) or not (allow_directed or is_symmetric(input_matrix)):
+        bipartite = True
+    if bipartite:
+        if force_directed:
+            adjacency = bipartite2directed(input_matrix)
+        else:
+            adjacency = bipartite2undirected(input_matrix)
+    else:
+        adjacency = input_matrix
+    return adjacency, bipartite
+def get_adjacency_values(input_matrix: Union[sparse.csr_matrix, np.ndarray], allow_directed: bool = True,
+                         force_bipartite: bool = False, force_directed: bool = False,
+                         values: Optional[Union[dict, np.ndarray]] = None,
+                         values_row: Optional[Union[dict, np.ndarray]] = None,
+                         values_col: Optional[Union[dict, np.ndarray]] = None,
+                         default_value: float = -1,
+                         which: Optional[str] = None) \
+        -> Tuple[sparse.csr_matrix, np.ndarray, bool]:
+    """Check the input matrix and return a proper adjacency matrix and vector of values.
+    Parameters
+    ----------
+    input_matrix :
+        Adjacency matrix of biadjacency matrix of the graph.
+    allow_directed :
+        If ``True`` (default), allow the graph to be directed.
+    force_bipartite : bool
+        If ``True``, return the adjacency matrix of a bipartite graph.
+        Otherwise (default), do it only if the input matrix is not square or not symmetric
+        with ``allow_directed=False``.
+    force_directed :
+        If ``True`` return :math:`A  = \\begin{bmatrix} 0 & B \\\\ 0 & 0 \\end{bmatrix}`.
+        Otherwise (default), return :math:`A  = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}`.
+    values :
+        Values of nodes (dictionary or vector). Negative values ignored.
+    values_row, values_col :
+        Values of rows and columns for bipartite graphs. Negative values ignored.
+    default_value :
+        Default value of nodes (default = -1).
+    which :
+        Which values.
+        If ``'probs'``, return a probability distribution.
+        If ``'labels'``, return the values, or distinct integer values if all are the same.
+    """
+    input_matrix = check_format(input_matrix)
+    if values_row is not None or values_col is not None:
+        force_bipartite = True
+    adjacency, bipartite = get_adjacency(input_matrix, allow_directed=allow_directed,
+                                         force_bipartite=force_bipartite, force_directed=force_directed)
+    if bipartite:
+        if values is None:
+            values = stack_values(input_matrix.shape, values_row, values_col, default_value=default_value)
+        else:
+            values = stack_values(input_matrix.shape, values, default_value=default_value)
+    else:
+        values = get_values(input_matrix.shape, values, default_value=default_value)
+    if which == 'probs':
+        if values.sum() > 0:
+            values /= values.sum()
+    elif which == 'labels':
+        if len(set(values[values >= 0])) == 1:
+            values = np.arange(len(values))
+    return adjacency, values, bipartite