PyPI - scikit-network - Versions diffs - 0.33.3__cp313-cp313-win_amd64.whl - Mend

scikit-network 0.33.3__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of scikit-network might be problematic. Click here for more details.

Files changed (228) hide show

scikit_network-0.33.3.dist-info/METADATA +122 -0
scikit_network-0.33.3.dist-info/RECORD +228 -0
scikit_network-0.33.3.dist-info/WHEEL +5 -0
scikit_network-0.33.3.dist-info/licenses/AUTHORS.rst +43 -0
scikit_network-0.33.3.dist-info/licenses/LICENSE +34 -0
scikit_network-0.33.3.dist-info/top_level.txt +1 -0
sknetwork/__init__.py +21 -0
sknetwork/base.py +67 -0
sknetwork/classification/__init__.py +8 -0
sknetwork/classification/base.py +142 -0
sknetwork/classification/base_rank.py +133 -0
sknetwork/classification/diffusion.py +134 -0
sknetwork/classification/knn.py +139 -0
sknetwork/classification/metrics.py +205 -0
sknetwork/classification/pagerank.py +66 -0
sknetwork/classification/propagation.py +152 -0
sknetwork/classification/tests/__init__.py +1 -0
sknetwork/classification/tests/test_API.py +30 -0
sknetwork/classification/tests/test_diffusion.py +77 -0
sknetwork/classification/tests/test_knn.py +23 -0
sknetwork/classification/tests/test_metrics.py +53 -0
sknetwork/classification/tests/test_pagerank.py +20 -0
sknetwork/classification/tests/test_propagation.py +24 -0
sknetwork/classification/vote.cp313-win_amd64.pyd +0 -0
sknetwork/classification/vote.cpp +27584 -0
sknetwork/classification/vote.pyx +56 -0
sknetwork/clustering/__init__.py +8 -0
sknetwork/clustering/base.py +172 -0
sknetwork/clustering/kcenters.py +253 -0
sknetwork/clustering/leiden.py +242 -0
sknetwork/clustering/leiden_core.cp313-win_amd64.pyd +0 -0
sknetwork/clustering/leiden_core.cpp +31575 -0
sknetwork/clustering/leiden_core.pyx +124 -0
sknetwork/clustering/louvain.py +286 -0
sknetwork/clustering/louvain_core.cp313-win_amd64.pyd +0 -0
sknetwork/clustering/louvain_core.cpp +31220 -0
sknetwork/clustering/louvain_core.pyx +124 -0
sknetwork/clustering/metrics.py +91 -0
sknetwork/clustering/postprocess.py +66 -0
sknetwork/clustering/propagation_clustering.py +104 -0
sknetwork/clustering/tests/__init__.py +1 -0
sknetwork/clustering/tests/test_API.py +38 -0
sknetwork/clustering/tests/test_kcenters.py +60 -0
sknetwork/clustering/tests/test_leiden.py +34 -0
sknetwork/clustering/tests/test_louvain.py +135 -0
sknetwork/clustering/tests/test_metrics.py +50 -0
sknetwork/clustering/tests/test_postprocess.py +39 -0
sknetwork/data/__init__.py +6 -0
sknetwork/data/base.py +33 -0
sknetwork/data/load.py +406 -0
sknetwork/data/models.py +459 -0
sknetwork/data/parse.py +644 -0
sknetwork/data/test_graphs.py +84 -0
sknetwork/data/tests/__init__.py +1 -0
sknetwork/data/tests/test_API.py +30 -0
sknetwork/data/tests/test_base.py +14 -0
sknetwork/data/tests/test_load.py +95 -0
sknetwork/data/tests/test_models.py +52 -0
sknetwork/data/tests/test_parse.py +250 -0
sknetwork/data/tests/test_test_graphs.py +29 -0
sknetwork/data/tests/test_toy_graphs.py +68 -0
sknetwork/data/timeout.py +38 -0
sknetwork/data/toy_graphs.py +611 -0
sknetwork/embedding/__init__.py +8 -0
sknetwork/embedding/base.py +94 -0
sknetwork/embedding/force_atlas.py +198 -0
sknetwork/embedding/louvain_embedding.py +148 -0
sknetwork/embedding/random_projection.py +135 -0
sknetwork/embedding/spectral.py +141 -0
sknetwork/embedding/spring.py +198 -0
sknetwork/embedding/svd.py +359 -0
sknetwork/embedding/tests/__init__.py +1 -0
sknetwork/embedding/tests/test_API.py +49 -0
sknetwork/embedding/tests/test_force_atlas.py +35 -0
sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
sknetwork/embedding/tests/test_random_projection.py +28 -0
sknetwork/embedding/tests/test_spectral.py +81 -0
sknetwork/embedding/tests/test_spring.py +50 -0
sknetwork/embedding/tests/test_svd.py +43 -0
sknetwork/gnn/__init__.py +10 -0
sknetwork/gnn/activation.py +117 -0
sknetwork/gnn/base.py +181 -0
sknetwork/gnn/base_activation.py +90 -0
sknetwork/gnn/base_layer.py +109 -0
sknetwork/gnn/gnn_classifier.py +305 -0
sknetwork/gnn/layer.py +153 -0
sknetwork/gnn/loss.py +180 -0
sknetwork/gnn/neighbor_sampler.py +65 -0
sknetwork/gnn/optimizer.py +164 -0
sknetwork/gnn/tests/__init__.py +1 -0
sknetwork/gnn/tests/test_activation.py +56 -0
sknetwork/gnn/tests/test_base.py +75 -0
sknetwork/gnn/tests/test_base_layer.py +37 -0
sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
sknetwork/gnn/tests/test_layers.py +80 -0
sknetwork/gnn/tests/test_loss.py +33 -0
sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
sknetwork/gnn/tests/test_optimizer.py +43 -0
sknetwork/gnn/tests/test_utils.py +41 -0
sknetwork/gnn/utils.py +127 -0
sknetwork/hierarchy/__init__.py +6 -0
sknetwork/hierarchy/base.py +96 -0
sknetwork/hierarchy/louvain_hierarchy.py +272 -0
sknetwork/hierarchy/metrics.py +234 -0
sknetwork/hierarchy/paris.cp313-win_amd64.pyd +0 -0
sknetwork/hierarchy/paris.cpp +37868 -0
sknetwork/hierarchy/paris.pyx +316 -0
sknetwork/hierarchy/postprocess.py +350 -0
sknetwork/hierarchy/tests/__init__.py +1 -0
sknetwork/hierarchy/tests/test_API.py +24 -0
sknetwork/hierarchy/tests/test_algos.py +34 -0
sknetwork/hierarchy/tests/test_metrics.py +62 -0
sknetwork/hierarchy/tests/test_postprocess.py +57 -0
sknetwork/linalg/__init__.py +9 -0
sknetwork/linalg/basics.py +37 -0
sknetwork/linalg/diteration.cp313-win_amd64.pyd +0 -0
sknetwork/linalg/diteration.cpp +27400 -0
sknetwork/linalg/diteration.pyx +47 -0
sknetwork/linalg/eig_solver.py +93 -0
sknetwork/linalg/laplacian.py +15 -0
sknetwork/linalg/normalizer.py +86 -0
sknetwork/linalg/operators.py +225 -0
sknetwork/linalg/polynome.py +76 -0
sknetwork/linalg/ppr_solver.py +170 -0
sknetwork/linalg/push.cp313-win_amd64.pyd +0 -0
sknetwork/linalg/push.cpp +31072 -0
sknetwork/linalg/push.pyx +71 -0
sknetwork/linalg/sparse_lowrank.py +142 -0
sknetwork/linalg/svd_solver.py +91 -0
sknetwork/linalg/tests/__init__.py +1 -0
sknetwork/linalg/tests/test_eig.py +44 -0
sknetwork/linalg/tests/test_laplacian.py +18 -0
sknetwork/linalg/tests/test_normalization.py +34 -0
sknetwork/linalg/tests/test_operators.py +66 -0
sknetwork/linalg/tests/test_polynome.py +38 -0
sknetwork/linalg/tests/test_ppr.py +50 -0
sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
sknetwork/linalg/tests/test_svd.py +38 -0
sknetwork/linkpred/__init__.py +2 -0
sknetwork/linkpred/base.py +46 -0
sknetwork/linkpred/nn.py +126 -0
sknetwork/linkpred/tests/__init__.py +1 -0
sknetwork/linkpred/tests/test_nn.py +27 -0
sknetwork/log.py +19 -0
sknetwork/path/__init__.py +5 -0
sknetwork/path/dag.py +54 -0
sknetwork/path/distances.py +98 -0
sknetwork/path/search.py +31 -0
sknetwork/path/shortest_path.py +61 -0
sknetwork/path/tests/__init__.py +1 -0
sknetwork/path/tests/test_dag.py +37 -0
sknetwork/path/tests/test_distances.py +62 -0
sknetwork/path/tests/test_search.py +40 -0
sknetwork/path/tests/test_shortest_path.py +40 -0
sknetwork/ranking/__init__.py +8 -0
sknetwork/ranking/base.py +61 -0
sknetwork/ranking/betweenness.cp313-win_amd64.pyd +0 -0
sknetwork/ranking/betweenness.cpp +9707 -0
sknetwork/ranking/betweenness.pyx +97 -0
sknetwork/ranking/closeness.py +92 -0
sknetwork/ranking/hits.py +94 -0
sknetwork/ranking/katz.py +83 -0
sknetwork/ranking/pagerank.py +110 -0
sknetwork/ranking/postprocess.py +37 -0
sknetwork/ranking/tests/__init__.py +1 -0
sknetwork/ranking/tests/test_API.py +32 -0
sknetwork/ranking/tests/test_betweenness.py +38 -0
sknetwork/ranking/tests/test_closeness.py +30 -0
sknetwork/ranking/tests/test_hits.py +20 -0
sknetwork/ranking/tests/test_pagerank.py +62 -0
sknetwork/ranking/tests/test_postprocess.py +26 -0
sknetwork/regression/__init__.py +4 -0
sknetwork/regression/base.py +61 -0
sknetwork/regression/diffusion.py +210 -0
sknetwork/regression/tests/__init__.py +1 -0
sknetwork/regression/tests/test_API.py +32 -0
sknetwork/regression/tests/test_diffusion.py +56 -0
sknetwork/sknetwork.py +3 -0
sknetwork/test_base.py +35 -0
sknetwork/test_log.py +15 -0
sknetwork/topology/__init__.py +8 -0
sknetwork/topology/cliques.cp313-win_amd64.pyd +0 -0
sknetwork/topology/cliques.cpp +32565 -0
sknetwork/topology/cliques.pyx +149 -0
sknetwork/topology/core.cp313-win_amd64.pyd +0 -0
sknetwork/topology/core.cpp +30651 -0
sknetwork/topology/core.pyx +90 -0
sknetwork/topology/cycles.py +243 -0
sknetwork/topology/minheap.cp313-win_amd64.pyd +0 -0
sknetwork/topology/minheap.cpp +27332 -0
sknetwork/topology/minheap.pxd +20 -0
sknetwork/topology/minheap.pyx +109 -0
sknetwork/topology/structure.py +194 -0
sknetwork/topology/tests/__init__.py +1 -0
sknetwork/topology/tests/test_cliques.py +28 -0
sknetwork/topology/tests/test_core.py +19 -0
sknetwork/topology/tests/test_cycles.py +65 -0
sknetwork/topology/tests/test_structure.py +85 -0
sknetwork/topology/tests/test_triangles.py +38 -0
sknetwork/topology/tests/test_wl.py +72 -0
sknetwork/topology/triangles.cp313-win_amd64.pyd +0 -0
sknetwork/topology/triangles.cpp +8894 -0
sknetwork/topology/triangles.pyx +151 -0
sknetwork/topology/weisfeiler_lehman.py +133 -0
sknetwork/topology/weisfeiler_lehman_core.cp313-win_amd64.pyd +0 -0
sknetwork/topology/weisfeiler_lehman_core.cpp +27635 -0
sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
sknetwork/utils/__init__.py +7 -0
sknetwork/utils/check.py +355 -0
sknetwork/utils/format.py +221 -0
sknetwork/utils/membership.py +82 -0
sknetwork/utils/neighbors.py +115 -0
sknetwork/utils/tests/__init__.py +1 -0
sknetwork/utils/tests/test_check.py +190 -0
sknetwork/utils/tests/test_format.py +63 -0
sknetwork/utils/tests/test_membership.py +24 -0
sknetwork/utils/tests/test_neighbors.py +41 -0
sknetwork/utils/tests/test_tfidf.py +18 -0
sknetwork/utils/tests/test_values.py +66 -0
sknetwork/utils/tfidf.py +37 -0
sknetwork/utils/values.py +76 -0
sknetwork/visualization/__init__.py +4 -0
sknetwork/visualization/colors.py +34 -0
sknetwork/visualization/dendrograms.py +277 -0
sknetwork/visualization/graphs.py +1039 -0
sknetwork/visualization/tests/__init__.py +1 -0
sknetwork/visualization/tests/test_dendrograms.py +53 -0
sknetwork/visualization/tests/test_graphs.py +176 -0

sknetwork/data/parse.py ADDED Viewed

@@ -0,0 +1,644 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created in December 2018
+@author: Quentin Lutz <qlutz@enst.fr>
+@author: Nathan de Lara <nathan.delara@polytechnique.org>
+@author: Thomas Bonald <bonald@enst.fr>
+"""
+from csv import reader
+from typing import Dict, List, Tuple, Union, Optional
+from xml.etree import ElementTree
+import numpy as np
+from scipy import sparse
+from sknetwork.data.base import Dataset
+from sknetwork.utils.format import directed2undirected
+def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = False,
+                   bipartite: bool = False, weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
+                   sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
+    """Load a graph from an edge list.
+    Parameters
+    ----------
+    edge_list : Union[np.ndarray, List[Tuple]]
+        The edge list to convert, given as a NumPy array of size (n, 2) or (n, 3) or a list of tuples of
+        length 2 or 3.
+    directed : bool
+        If ``True``, considers the graph as directed.
+    bipartite : bool
+        If ``True``, returns a biadjacency matrix.
+    weighted : bool
+        If ``True``, returns a weighted graph.
+    reindex : bool
+        If ``True``, reindex nodes and returns the original node indices as names.
+        Reindexing is enforced if nodes are not integers.
+    shape : tuple
+        Shape of the adjacency or biadjacency matrix.
+        If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
+    sum_duplicates : bool
+        If ``True`` (default), sums weights of duplicate edges.
+        Otherwise, the weight of each edge is that of the first occurrence of this edge.
+    matrix_only : bool
+        If ``True``, returns only the adjacency or biadjacency matrix.
+        Otherwise, returns a ``Dataset`` object with graph attributes (e.g., node names).
+        If not specified (default), selects the most appropriate format.
+    Returns
+    -------
+    graph : :class:`Dataset` (including node names) or sparse matrix
+    Examples
+    --------
+    >>> edges = [(0, 1), (1, 2), (2, 0)]
+    >>> adjacency = from_edge_list(edges)
+    >>> adjacency.shape
+    (3, 3)
+    >>> edges = [('Alice', 'Bob'), ('Bob', 'Carol'), ('Carol', 'Alice')]
+    >>> graph = from_edge_list(edges)
+    >>> adjacency = graph.adjacency
+    >>> adjacency.shape
+    (3, 3)
+    >>> print(graph.names)
+    ['Alice' 'Bob' 'Carol']
+    """
+    edge_array = np.array([])
+    weights = None
+    if isinstance(edge_list, list):
+        try:
+            edge_array = np.array([[edge[0], edge[1]] for edge in edge_list])
+            if len(edge_list) and len(edge_list[0]) == 3:
+                weights = np.array([edge[2] for edge in edge_list])
+            else:
+                raise ValueError()
+        except ValueError:
+            ValueError('Edges must be given as tuples of fixed size (2 or 3).')
+    elif isinstance(edge_list, np.ndarray):
+        if edge_list.ndim != 2 or edge_list.shape[1] not in [2, 3]:
+            raise ValueError('The edge list must be given as an array of shape (n_edges, 2) or '
+                             '(n_edges, 3).')
+        edge_array = edge_list[:, :2]
+        if edge_list.shape[1] == 3:
+            weights = edge_list[:, 2]
+    else:
+        raise TypeError('The edge list must be given as a NumPy array or a list of tuples.')
+    return from_edge_array(edge_array=edge_array, weights=weights, directed=directed, bipartite=bipartite,
+                           weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
+                           matrix_only=matrix_only)
+def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], directed: bool = False,
+                        bipartite: bool = False, weighted: bool = True, reindex: bool = False,
+                        shape: Optional[tuple] = None, sum_duplicates: bool = True, matrix_only: bool = None) \
+                        -> Union[Dataset, sparse.csr_matrix]:
+    """Load a graph from an adjacency list.
+    Parameters
+    ----------
+    adjacency_list : Union[List[List], Dict[str, List]]
+        Adjacency list (neighbors of each node) or dictionary (node: neighbors).
+    directed : bool
+        If ``True``, considers the graph as directed.
+    bipartite : bool
+        If ``True``, returns a biadjacency matrix.
+    weighted : bool
+        If ``True``, returns a weighted graph.
+    reindex : bool
+        If ``True``, reindex nodes and returns the original node indices as names.
+        Reindexing is enforced if nodes are not integers.
+    shape : tuple
+        Shape of the adjacency or biadjacency matrix.
+        If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
+    sum_duplicates : bool
+        If ``True`` (default), sums weights of duplicate edges.
+        Otherwise, the weight of each edge is that of the first occurrence of this edge.
+    matrix_only : bool
+        If ``True``, returns only the adjacency or biadjacency matrix.
+        Otherwise, returns a ``Dataset`` object with graph attributes (e.g., node names).
+        If not specified (default), selects the most appropriate format.
+    Returns
+    -------
+    graph : :class:`Dataset` or sparse matrix
+    Example
+    -------
+    >>> edges = [[1, 2], [0, 2, 3], [0, 1]]
+    >>> adjacency = from_adjacency_list(edges)
+    >>> adjacency.shape
+    (4, 4)
+    """
+    edge_list = []
+    if isinstance(adjacency_list, list):
+        for i, neighbors in enumerate(adjacency_list):
+            for j in neighbors:
+                edge_list.append((i, j))
+    elif isinstance(adjacency_list, dict):
+        for i, neighbors in adjacency_list.items():
+            for j in neighbors:
+                edge_list.append((i, j))
+    else:
+        raise TypeError('The adjacency list must be given as a list of lists or a dict of lists.')
+    return from_edge_list(edge_list=edge_list, directed=directed, bipartite=bipartite, weighted=weighted,
+                          reindex=reindex, shape=shape, sum_duplicates=sum_duplicates, matrix_only=matrix_only)
+def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed: bool = False, bipartite: bool = False,
+                    weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
+                    sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
+    """Load a graph from an edge array of shape (n_edges, 2) and weights (optional).
+    Parameters
+    ----------
+    edge_array : np.ndarray
+        Array of edges.
+    weights : np.ndarray
+        Array of weights.
+    directed : bool
+        If ``True``, considers the graph as directed.
+    bipartite : bool
+        If ``True``, returns a biadjacency matrix.
+    weighted : bool
+        If ``True``, returns a weighted graph.
+    reindex : bool
+        If ``True``, reindex nodes and returns the original node indices as names.
+        Reindexing is enforced if nodes are not integers.
+    shape : tuple
+        Shape of the adjacency or biadjacency matrix.
+        If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
+    sum_duplicates : bool
+        If ``True`` (default), sums weights of duplicate edges.
+        Otherwise, the weight of each edge is that of the first occurrence of this edge.
+    matrix_only : bool
+        If ``True``, returns only the adjacency or biadjacency matrix.
+        Otherwise, returns a ``Dataset`` object with graph attributes (e.g., node names).
+        If not specified (default), selects the most appropriate format.
+    Returns
+    -------
+    graph : :class:`Dataset` or sparse matrix
+    """
+    try:
+        edge_array = edge_array.astype(float)
+    except ValueError:
+        pass
+    if edge_array.dtype == float and (edge_array == edge_array.astype(int)).all():
+        edge_array = edge_array.astype(int)
+    if weights is None:
+        weights = np.ones(len(edge_array))
+    if weights.dtype not in [bool, int, float]:
+        try:
+            weights = weights.astype(float)
+        except ValueError:
+            raise ValueError('Weights must be numeric.')
+    if all(weights == weights.astype(int)):
+        weights = weights.astype(int)
+    if not weighted:
+        weights = weights.astype(bool)
+    if not sum_duplicates:
+        _, index = np.unique(edge_array, axis=0, return_index=True)
+        edge_array = edge_array[index]
+        weights = weights[index]
+    graph = Dataset()
+    if bipartite:
+        row = edge_array[:, 0]
+        col = edge_array[:, 1]
+        if row.dtype != int or reindex:
+            names_row, row = np.unique(row, return_inverse=True)
+            graph.names_row = names_row
+            graph.names = names_row
+            n_row = len(names_row)
+        elif shape is not None:
+            n_row = max(shape[0], max(row) + 1)
+        else:
+            n_row = max(row) + 1
+        if col.dtype != int or reindex:
+            names_col, col = np.unique(col, return_inverse=True)
+            graph.names_col = names_col
+            n_col = len(names_col)
+        elif shape is not None:
+            n_col = max(shape[1], max(col) + 1)
+        else:
+            n_col = max(col) + 1
+        matrix = sparse.csr_matrix((weights, (row, col)), shape=(n_row, n_col))
+        matrix.sum_duplicates()
+        graph.biadjacency = matrix
+    else:
+        nodes = edge_array.ravel()
+        if nodes.dtype != int or reindex:
+            names, nodes = np.unique(nodes, return_inverse=True)
+            graph.names = names
+            n = len(names)
+            edge_array = nodes.reshape(-1, 2)
+        elif shape is not None:
+            n = max(shape[0], max(nodes) + 1)
+        else:
+            n = max(nodes) + 1
+        row = edge_array[:, 0]
+        col = edge_array[:, 1]
+        matrix = sparse.csr_matrix((weights, (row, col)), shape=(n, n))
+        if not directed:
+            matrix = directed2undirected(matrix)
+        matrix.sum_duplicates()
+        graph.adjacency = matrix
+    if matrix_only or (matrix_only is None and len(graph) == 1):
+        return matrix
+    else:
+        return graph
+def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: str = '#%',
+             data_structure: str = None, directed: bool = False, bipartite: bool = False, weighted: bool = True,
+             reindex: bool = False, shape: Optional[tuple] = None, sum_duplicates: bool = True,
+             matrix_only: bool = None) -> Union[Dataset, sparse.csr_matrix]:
+    """Load a graph from a CSV or TSV file.
+    The delimiter can be specified (e.g., ' ' for space-separated values).
+    Parameters
+    ----------
+    file_path : str
+        Path to the CSV file.
+    delimiter : str
+        Delimiter used in the file. Guessed if not specified.
+    sep : str
+        Alias for delimiter.
+    comments : str
+        Characters for comment lines.
+    data_structure : str
+        If 'edge_list', consider each row of the file as an edge (tuple of size 2 or 3).
+        If 'adjacency_list', consider each row of the file as an adjacency list (list of neighbors,
+        in the order of node indices; an empty line means no neighbor).
+        If 'adjacency_dict', consider each row of the file as an adjacency dictionary with key
+        given by the first column (node: list of neighbors).
+        If ``None`` (default), data_structure is guessed from the first rows of the file.
+    directed : bool
+        If ``True``, considers the graph as directed.
+    bipartite : bool
+        If ``True``, returns a biadjacency matrix of shape (n1, n2).
+    weighted : bool
+        If ``True``, returns a weighted graph (e.g., counts the number of occurrences of each edge).
+    reindex : bool
+        If ``True``, reindex nodes and returns the original node indices as names.
+        Reindexing is enforced if nodes are not integers.
+    shape : tuple
+        Shape of the adjacency or biadjacency matrix.
+        If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
+    sum_duplicates : bool
+        If ``True`` (default), sums weights of duplicate edges.
+        Otherwise, the weight of each edge is that of the first occurrence of this edge.
+    matrix_only : bool
+        If ``True``, returns only the adjacency or biadjacency matrix.
+        Otherwise, returns a ``Dataset`` object with graph attributes (e.g., node names).
+        If not specified (default), selects the most appropriate format.
+    Returns
+    -------
+    graph: :class:`Dataset` or sparse matrix
+    """
+    header_length, delimiter_guess, comment_guess, data_structure_guess = scan_header(file_path, delimiters=delimiter,
+                                                                                      comments=comments)
+    if delimiter is None:
+        if sep is not None:
+            delimiter = sep
+        else:
+            delimiter = delimiter_guess
+    if data_structure is None:
+        data_structure = data_structure_guess
+    if data_structure == 'edge_list':
+        try:
+            array = np.genfromtxt(file_path, delimiter=delimiter, comments=comment_guess)
+            if np.isnan(array).any():
+                raise TypeError()
+            edge_array = array[:, :2].astype(int)
+            if array.shape[1] == 3:
+                weights = array[:, 2]
+            else:
+                weights = None
+            return from_edge_array(edge_array=edge_array, weights=weights, directed=directed, bipartite=bipartite,
+                                   weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
+                                   matrix_only=matrix_only)
+        except TypeError:
+            pass
+    with open(file_path, 'r', encoding='utf-8') as f:
+        for i in range(header_length):
+            f.readline()
+        csv_reader = reader(f, delimiter=delimiter)
+        if data_structure == 'edge_list':
+            edge_list = [tuple(row) for row in csv_reader]
+            return from_edge_list(edge_list=edge_list, directed=directed, bipartite=bipartite,
+                                  weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
+                                  matrix_only=matrix_only)
+        elif data_structure == 'adjacency_list':
+            adjacency_list = [row for row in csv_reader]
+            return from_adjacency_list(adjacency_list=adjacency_list, directed=directed, bipartite=bipartite,
+                                       weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
+                                       matrix_only=matrix_only)
+        elif data_structure == 'adjacency_dict':
+            adjacency_list = {row[0]: row[1:] for row in csv_reader}
+            return from_adjacency_list(adjacency_list=adjacency_list, directed=directed, bipartite=bipartite,
+                                       weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
+                                       matrix_only=matrix_only)
+def scan_header(file_path: str, delimiters: str = None, comments: str = '#%', n_scan: int = 100):
+    """Infer some properties of the graph from the first lines of a CSV file .
+    Parameters
+    ----------
+    file_path : str
+        Path to the CSV file.
+    delimiters : str
+        Possible delimiters.
+    comments : str
+        Possible comment characters.
+    n_scan : int
+        Number of rows scanned for inference.
+    Returns
+    -------
+    header_length : int
+        Length of the header (comments and blank lines)
+    delimiter_guess : str
+        Guessed delimiter.
+    comment_guess : str
+        Guessed comment character.
+    data_structure_guess : str
+        Either 'edge_list' or 'adjacency_list'.
+    """
+    header_length = 0
+    if delimiters is None:
+        delimiters = '\t,; '
+    comment_guess = comments[0]
+    count = {delimiter: [] for delimiter in delimiters}
+    rows = []
+    with open(file_path, 'r', encoding='utf-8') as f:
+        for row in f.readlines():
+            if row.startswith(tuple(comments)) or row == '':
+                if len(row):
+                    comment_guess = row[0]
+                header_length += 1
+            else:
+                rows.append(row.rstrip())
+                for delimiter in delimiters:
+                    count[delimiter].append(row.count(delimiter))
+                if len(rows) == n_scan:
+                    break
+    means = [np.mean(count[delimiter]) for delimiter in delimiters]
+    stds = [np.std(count[delimiter]) for delimiter in delimiters]
+    index = np.argwhere((np.array(means) > 0) * (np.array(stds) == 0)).ravel()
+    if len(index) == 1:
+        delimiter_guess = delimiters[int(index)]
+    else:
+        delimiter_guess = delimiters[int(np.argmax(means))]
+    length = {len(row.split(delimiter_guess)) for row in rows}
+    if length == {2} or length == {3}:
+        data_structure_guess = 'edge_list'
+    else:
+        data_structure_guess = 'adjacency_list'
+    return header_length, delimiter_guess, comment_guess, data_structure_guess
+def load_labels(file: str) -> np.ndarray:
+    """Parser for files with a single entry on each row.
+    Parameters
+    ----------
+    file : str
+        The path to the dataset
+    Returns
+    -------
+    labels: np.ndarray
+        Labels.
+    """
+    rows = []
+    with open(file, 'r', encoding='utf-8') as f:
+        for row in f:
+            rows.append(row.strip())
+    return np.array(rows)
+def load_header(file: str):
+    """Check if the graph is directed, bipartite, weighted."""
+    directed, bipartite, weighted = False, False, True
+    with open(file, 'r', encoding='utf-8') as f:
+        row = f.readline()
+        if 'bip' in row:
+            bipartite = True
+        if 'unweighted' in row:
+            weighted = False
+        if 'asym' in row:
+            directed = True
+    return directed, bipartite, weighted
+def load_metadata(file: str, delimiter: str = ': ') -> Dataset:
+    """Extract metadata from the file."""
+    metadata = Dataset()
+    with open(file, 'r', encoding='utf-8') as f:
+        for row in f:
+            parts = row.split(delimiter)
+            key, value = parts[0], ': '.join(parts[1:]).strip('\n')
+            metadata[key] = value
+    return metadata
+def from_graphml(file_path: str, weight_key: str = 'weight', max_string_size: int = 512) -> Dataset:
+    """Load graph from GraphML file.
+    Hyperedges and nested graphs are not supported.
+    Parameters
+    ----------
+    file_path: str
+        Path to the GraphML file.
+    weight_key: str
+        The key to be used as a value for edge weights
+    max_string_size: int
+        The maximum size for string features of the data
+    Returns
+    -------
+    data: :class:`Dataset`
+        The dataset in a Dataset with the adjacency as a CSR matrix.
+    """
+    # see http://graphml.graphdrawing.org/primer/graphml-primer.html
+    # and http://graphml.graphdrawing.org/specification/dtd.html#top
+    tree = ElementTree.parse(file_path)
+    n_nodes = 0
+    n_edges = 0
+    symmetrize = None
+    naming_nodes = True
+    default_weight = 1
+    weight_type = bool
+    weight_id = None
+    # indices in the graph tree
+    node_indices = []
+    edge_indices = []
+    data = Dataset()
+    graph = None
+    file_description = None
+    attribute_descriptions = Dataset()
+    attribute_descriptions.node = Dataset()
+    attribute_descriptions.edge = Dataset()
+    keys = {}
+    for file_element in tree.getroot():
+        if file_element.tag.endswith('graph'):
+            graph = file_element
+            symmetrize = (graph.attrib['edgedefault'] == 'undirected')
+            for index, element in enumerate(graph):
+                if element.tag.endswith('node'):
+                    node_indices.append(index)
+                    n_nodes += 1
+                elif element.tag.endswith('edge'):
+                    edge_indices.append(index)
+                    if 'directed' in element.attrib:
+                        if element.attrib['directed'] == 'true':
+                            n_edges += 1
+                        else:
+                            n_edges += 2
+                    elif symmetrize:
+                        n_edges += 2
+                    else:
+                        n_edges += 1
+            if 'parse.nodeids' in graph.attrib:
+                naming_nodes = not (graph.attrib['parse.nodeids'] == 'canonical')
+    for file_element in tree.getroot():
+        if file_element.tag.endswith('key'):
+            attribute_name = file_element.attrib['attr.name']
+            attribute_type = java_type_to_python_type(file_element.attrib['attr.type'])
+            if attribute_name == weight_key:
+                weight_type = java_type_to_python_type(file_element.attrib['attr.type'])
+                weight_id = file_element.attrib['id']
+                for key_element in file_element:
+                    if key_element.tag == 'default':
+                        default_weight = attribute_type(key_element.text)
+            else:
+                default_value = None
+                if file_element.attrib['for'] == 'node':
+                    size = n_nodes
+                    if 'node_attribute' not in data:
+                        data.node_attribute = Dataset()
+                    for key_element in file_element:
+                        if key_element.tag.endswith('desc'):
+                            attribute_descriptions.node[attribute_name] = key_element.text
+                        elif key_element.tag.endswith('default'):
+                            default_value = attribute_type(key_element.text)
+                    if attribute_type == str:
+                        local_type = '<U' + str(max_string_size)
+                    else:
+                        local_type = attribute_type
+                    if default_value:
+                        data.node_attribute[attribute_name] = np.full(size, default_value, dtype=local_type)
+                    else:
+                        data.node_attribute[attribute_name] = np.zeros(size, dtype=local_type)
+                elif file_element.attrib['for'] == 'edge':
+                    size = n_edges
+                    if 'edge_attribute' not in data:
+                        data.edge_attribute = Dataset()
+                    for key_element in file_element:
+                        if key_element.tag.endswith('desc'):
+                            attribute_descriptions.edge[attribute_name] = key_element.text
+                        elif key_element.tag.endswith('default'):
+                            default_value = attribute_type(key_element.text)
+                    if attribute_type == str:
+                        local_type = '<U' + str(max_string_size)
+                    else:
+                        local_type = attribute_type
+                    if default_value:
+                        data.edge_attribute[attribute_name] = np.full(size, default_value, dtype=local_type)
+                    else:
+                        data.edge_attribute[attribute_name] = np.zeros(size, dtype=local_type)
+                keys[file_element.attrib['id']] = [attribute_name, attribute_type]
+        elif file_element.tag.endswith('desc'):
+            file_description = file_element.text
+    if file_description or attribute_descriptions.node or attribute_descriptions.edge:
+        data.meta = Dataset()
+        if file_description:
+            data.meta['description'] = file_description
+        if attribute_descriptions.node or attribute_descriptions.edge:
+            data.meta['attributes'] = attribute_descriptions
+    if graph is not None:
+        row = np.zeros(n_edges, dtype=int)
+        col = np.zeros(n_edges, dtype=int)
+        dat = np.full(n_edges, default_weight, dtype=weight_type)
+        data.names = None
+        if naming_nodes:
+            data.names = np.zeros(n_nodes, dtype='<U512')
+        node_map = {}
+        # deal with nodes first
+        for number, index in enumerate(node_indices):
+            node = graph[index]
+            if naming_nodes:
+                name = node.attrib['id']
+                data.names[number] = name
+                node_map[name] = number
+            for node_attribute in node:
+                if node_attribute.tag.endswith('data'):
+                    data.node_attribute[keys[node_attribute.attrib['key']][0]][number] = \
+                        keys[node_attribute.attrib['key']][1](node_attribute.text)
+        # deal with edges
+        edge_index = -1
+        for index in edge_indices:
+            edge_index += 1
+            duplicate = False
+            edge = graph[index]
+            if naming_nodes:
+                node1 = node_map[edge.attrib['source']]
+                node2 = node_map[edge.attrib['target']]
+            else:
+                node1 = int(edge.attrib['source'][1:])
+                node2 = int(edge.attrib['target'][1:])
+            row[edge_index] = node1
+            col[edge_index] = node2
+            for edge_attribute in edge:
+                if edge_attribute.tag.endswith('data'):
+                    if edge_attribute.attrib['key'] == weight_id:
+                        dat[edge_index] = weight_type(edge_attribute.text)
+                    else:
+                        data.edge_attribute[keys[edge_attribute.attrib['key']][0]][edge_index] = \
+                            keys[edge_attribute.attrib['key']][1](edge_attribute.text)
+            if 'directed' in edge.attrib:
+                if edge.attrib['directed'] != 'true':
+                    duplicate = True
+            elif symmetrize:
+                duplicate = True
+            if duplicate:
+                edge_index += 1
+                row[edge_index] = node2
+                col[edge_index] = node1
+                for edge_attribute in edge:
+                    if edge_attribute.tag.endswith('data'):
+                        if edge_attribute.attrib['key'] == weight_id:
+                            dat[edge_index] = weight_type(edge_attribute.text)
+                        else:
+                            data.edge_attribute[keys[edge_attribute.attrib['key']][0]][edge_index] = \
+                                keys[edge_attribute.attrib['key']][1](edge_attribute.text)
+        data.adjacency = sparse.csr_matrix((dat, (row, col)), shape=(n_nodes, n_nodes))
+        if data.names is None:
+            data.pop('names')
+        return data
+    else:
+        raise ValueError(f'No graph defined in {file_path}.')
+def java_type_to_python_type(value: str) -> type:
+    if value == 'boolean':
+        return bool
+    elif value == 'int':
+        return int
+    elif value == 'string':
+        return str
+    elif value in ('long', 'float', 'double'):
+        return float
+def is_number(s):
+    try:
+        float(s)
+        return True
+    except ValueError:
+        return False