PyPI - risk-network - Versions diffs - 0.0.3b1__py3-none-any.whl - Mend

risk-network 0.0.3b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

risk/__init__.py +13 -0
risk/annotations/__init__.py +7 -0
risk/annotations/annotations.py +259 -0
risk/annotations/io.py +183 -0
risk/constants.py +31 -0
risk/log/__init__.py +9 -0
risk/log/console.py +16 -0
risk/log/params.py +198 -0
risk/neighborhoods/__init__.py +10 -0
risk/neighborhoods/community.py +189 -0
risk/neighborhoods/domains.py +257 -0
risk/neighborhoods/neighborhoods.py +319 -0
risk/network/__init__.py +8 -0
risk/network/geometry.py +165 -0
risk/network/graph.py +280 -0
risk/network/io.py +326 -0
risk/network/plot.py +795 -0
risk/risk.py +382 -0
risk/stats/__init__.py +6 -0
risk/stats/permutation.py +88 -0
risk/stats/stats.py +447 -0
risk_network-0.0.3b1.dist-info/LICENSE +674 -0
risk_network-0.0.3b1.dist-info/METADATA +751 -0
risk_network-0.0.3b1.dist-info/RECORD +26 -0
risk_network-0.0.3b1.dist-info/WHEEL +5 -0
risk_network-0.0.3b1.dist-info/top_level.txt +1 -0

risk/neighborhoods/neighborhoods.py ADDED Viewed

@@ -0,0 +1,319 @@
+"""
+risk/neighborhoods/neighborhoods
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+"""
+import warnings
+from typing import Any, Dict, Tuple
+import networkx as nx
+import numpy as np
+from sklearn.exceptions import DataConversionWarning
+from risk.neighborhoods.community import (
+    calculate_dijkstra_neighborhoods,
+    calculate_label_propagation_neighborhoods,
+    calculate_louvain_neighborhoods,
+    calculate_markov_clustering_neighborhoods,
+    calculate_spinglass_neighborhoods,
+    calculate_walktrap_neighborhoods,
+)
+# Suppress DataConversionWarning
+warnings.filterwarnings(action="ignore", category=DataConversionWarning)
+def get_network_neighborhoods(
+    network: nx.Graph,
+    distance_metric: str = "dijkstra",
+    edge_length_threshold: float = 1.0,
+    louvain_resolution: float = 1.0,
+    random_seed: int = 888,
+) -> np.ndarray:
+    """Calculate the neighborhoods for each node in the network based on the specified distance metric.
+    Args:
+        network (nx.Graph): The network graph.
+        distance_metric (str): The distance metric to use ('euclidean', 'dijkstra', 'louvain', 'affinity_propagation',
+            'label_propagation', 'markov_clustering', 'walktrap', 'spinglass').
+        edge_length_threshold (float): The edge length threshold for the neighborhoods.
+        louvain_resolution (float, optional): Resolution parameter for the Louvain method. Defaults to 1.0.
+        random_seed (int, optional): Random seed for methods requiring random initialization. Defaults to 888.
+    Returns:
+        np.ndarray: Neighborhood matrix calculated based on the selected distance metric.
+    """
+    network = _create_percentile_limited_subgraph(network, edge_length_threshold)
+    if distance_metric == "dijkstra":
+        return calculate_dijkstra_neighborhoods(network)
+    if distance_metric == "louvain":
+        return calculate_louvain_neighborhoods(network, louvain_resolution, random_seed=random_seed)
+    if distance_metric == "label_propagation":
+        return calculate_label_propagation_neighborhoods(network)
+    if distance_metric == "markov_clustering":
+        return calculate_markov_clustering_neighborhoods(network)
+    if distance_metric == "walktrap":
+        return calculate_walktrap_neighborhoods(network)
+    if distance_metric == "spinglass":
+        return calculate_spinglass_neighborhoods(network)
+    raise ValueError(
+        "Incorrect distance metric specified. Please choose from 'dijkstra', 'louvain',"
+        "'label_propagation', 'markov_clustering', 'walktrap', 'spinglass'."
+    )
+def _create_percentile_limited_subgraph(G: nx.Graph, edge_length_percentile: float) -> nx.Graph:
+    """Calculate the edge length corresponding to the given percentile of edge lengths in the graph
+    and create a subgraph with all nodes and edges below this length.
+    Args:
+        G (nx.Graph): The input graph.
+        edge_length_percentile (float): The percentile to calculate (between 0 and 1).
+    Returns:
+        nx.Graph: A subgraph with all nodes and edges below the edge length corresponding to the given percentile.
+    """
+    # Extract edge lengths from the graph
+    edge_lengths = [d["length"] for _, _, d in G.edges(data=True) if "length" in d]
+    # Calculate the specific edge length for the given percentile
+    percentile_length = np.percentile(edge_lengths, edge_length_percentile * 100)
+    # Create a new graph with all nodes from the original graph
+    subgraph = nx.Graph()
+    subgraph.add_nodes_from(G.nodes(data=True))
+    # Add edges to the subgraph if they are below the specified percentile length
+    for u, v, d in G.edges(data=True):
+        if d.get("length", 1) <= percentile_length:
+            subgraph.add_edge(u, v, **d)
+    return subgraph
+def process_neighborhoods(
+    network: nx.Graph,
+    neighborhoods: Dict[str, Any],
+    impute_depth: int = 1,
+    prune_threshold: float = 0.0,
+) -> Dict[str, Any]:
+    """Process neighborhoods based on the imputation and pruning settings.
+    Args:
+        network (nx.Graph): The network data structure used for imputing and pruning neighbors.
+        neighborhoods (dict): Dictionary containing 'enrichment_matrix', 'binary_enrichment_matrix', and 'significant_enrichment_matrix'.
+        impute_depth (int, optional): Depth for imputing neighbors. Defaults to 1.
+        prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
+    Returns:
+        dict: Processed neighborhoods data, including the updated matrices and enrichment counts.
+    """
+    enrichment_matrix = neighborhoods["enrichment_matrix"]
+    binary_enrichment_matrix = neighborhoods["binary_enrichment_matrix"]
+    significant_enrichment_matrix = neighborhoods["significant_enrichment_matrix"]
+    print(f"Imputation depth: {impute_depth}")
+    if impute_depth:
+        (
+            enrichment_matrix,
+            binary_enrichment_matrix,
+            significant_enrichment_matrix,
+        ) = _impute_neighbors(
+            network,
+            enrichment_matrix,
+            binary_enrichment_matrix,
+            max_depth=impute_depth,
+        )
+    print(f"Pruning threshold: {prune_threshold}")
+    if prune_threshold:
+        (
+            enrichment_matrix,
+            binary_enrichment_matrix,
+            significant_enrichment_matrix,
+        ) = _prune_neighbors(
+            network,
+            enrichment_matrix,
+            binary_enrichment_matrix,
+            distance_threshold=prune_threshold,
+        )
+    neighborhood_enrichment_counts = np.sum(binary_enrichment_matrix, axis=0)
+    node_enrichment_sums = np.sum(enrichment_matrix, axis=1)
+    return {
+        "enrichment_matrix": enrichment_matrix,
+        "binary_enrichment_matrix": binary_enrichment_matrix,
+        "significant_enrichment_matrix": significant_enrichment_matrix,
+        "neighborhood_enrichment_counts": neighborhood_enrichment_counts,
+        "node_enrichment_sums": node_enrichment_sums,
+    }
+def _impute_neighbors(
+    network: nx.Graph,
+    enrichment_matrix: np.ndarray,
+    binary_enrichment_matrix: np.ndarray,
+    max_depth: int = 3,
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """Impute rows with sums of zero in the enrichment matrix based on the closest non-zero neighbors in the network graph.
+    Args:
+        network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
+        enrichment_matrix (np.ndarray): The enrichment matrix with rows to be imputed.
+        binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
+        max_depth (int): Maximum depth of nodes to traverse for imputing values.
+    Returns:
+        tuple: A tuple containing:
+            - np.ndarray: The imputed enrichment matrix.
+            - np.ndarray: The imputed alpha threshold matrix.
+            - np.ndarray: The significant enrichment matrix with non-significant entries set to zero.
+    """
+    # Calculate shortest distances for each node to determine the distance threshold
+    shortest_distances = []
+    for node in network.nodes():
+        neighbors = [n for n in network.neighbors(node) if binary_enrichment_matrix[n].sum() != 0]
+        if neighbors:
+            shortest_distance = min([_get_euclidean_distance(node, n, network) for n in neighbors])
+            shortest_distances.append(shortest_distance)
+    depth = 1
+    rows_to_impute = np.where(binary_enrichment_matrix.sum(axis=1) == 0)[0]
+    while len(rows_to_impute) and depth <= max_depth:
+        next_rows_to_impute = []
+        for row_index in rows_to_impute:
+            neighbors = nx.single_source_shortest_path_length(network, row_index, cutoff=depth)
+            valid_neighbors = [
+                n
+                for n in neighbors
+                if n != row_index
+                and binary_enrichment_matrix[n].sum() != 0
+                and enrichment_matrix[n].sum() != 0
+            ]
+            if valid_neighbors:
+                closest_neighbor = min(
+                    valid_neighbors, key=lambda n: _get_euclidean_distance(row_index, n, network)
+                )
+                # Impute the row with the closest valid neighbor's data
+                enrichment_matrix[row_index] = enrichment_matrix[closest_neighbor]
+                binary_enrichment_matrix[row_index] = binary_enrichment_matrix[
+                    closest_neighbor
+                ] / np.sqrt(depth + 1)
+            else:
+                next_rows_to_impute.append(row_index)
+        rows_to_impute = next_rows_to_impute
+        depth += 1
+    # Create a matrix where non-significant entries are set to zero
+    significant_enrichment_matrix = np.where(binary_enrichment_matrix == 1, enrichment_matrix, 0)
+    return enrichment_matrix, binary_enrichment_matrix, significant_enrichment_matrix
+def _prune_neighbors(
+    network: nx.Graph,
+    enrichment_matrix: np.ndarray,
+    binary_enrichment_matrix: np.ndarray,
+    distance_threshold: float = 0.9,
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """Remove outliers based on their rank for edge lengths.
+    Args:
+        network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
+        enrichment_matrix (np.ndarray): The enrichment matrix.
+        binary_enrichment_matrix (np.ndarray): The alpha threshold matrix.
+        distance_threshold (float): Rank threshold (0 to 1) to determine outliers.
+    Returns:
+        tuple: A tuple containing:
+            - np.ndarray: The updated enrichment matrix with outliers set to zero.
+            - np.ndarray: The updated alpha threshold matrix with outliers set to zero.
+            - np.ndarray: The significant enrichment matrix, where non-significant entries are set to zero.
+    """
+    # Identify indices with non-zero rows in the binary enrichment matrix
+    non_zero_indices = np.where(binary_enrichment_matrix.sum(axis=1) != 0)[0]
+    average_distances = []
+    for node in non_zero_indices:
+        neighbors = [n for n in network.neighbors(node) if binary_enrichment_matrix[n].sum() != 0]
+        if neighbors:
+            average_distance = np.mean(
+                [_get_euclidean_distance(node, n, network) for n in neighbors]
+            )
+            average_distances.append(average_distance)
+    # Calculate the distance threshold value based on rank
+    distance_threshold_value = _calculate_threshold(average_distances, 1 - distance_threshold)
+    # Prune nodes that are outliers based on the distance threshold
+    for row_index in non_zero_indices:
+        neighbors = [
+            n for n in network.neighbors(row_index) if binary_enrichment_matrix[n].sum() != 0
+        ]
+        if neighbors:
+            average_distance = np.mean(
+                [_get_euclidean_distance(row_index, n, network) for n in neighbors]
+            )
+            if average_distance >= distance_threshold_value:
+                enrichment_matrix[row_index] = 0
+                binary_enrichment_matrix[row_index] = 0
+    # Create a matrix where non-significant entries are set to zero
+    significant_enrichment_matrix = np.where(binary_enrichment_matrix == 1, enrichment_matrix, 0)
+    return enrichment_matrix, binary_enrichment_matrix, significant_enrichment_matrix
+def _get_euclidean_distance(node1: Any, node2: Any, network: nx.Graph) -> float:
+    """Calculate the Euclidean distance between two nodes in the network.
+    Args:
+        node1 (Any): The first node.
+        node2 (Any): The second node.
+        network (nx.Graph): The network graph containing the nodes.
+    Returns:
+        float: The Euclidean distance between the two nodes.
+    """
+    pos1 = _get_node_position(network, node1)
+    pos2 = _get_node_position(network, node2)
+    return np.linalg.norm(pos1 - pos2)
+def _get_node_position(network: nx.Graph, node: Any) -> np.ndarray:
+    """Retrieve the position of a node in the network as a numpy array.
+    Args:
+        network (nx.Graph): The network graph containing node positions.
+        node (Any): The node for which the position is being retrieved.
+    Returns:
+        np.ndarray: A numpy array representing the position of the node in the format [x, y, z].
+    """
+    return np.array(
+        [
+            network.nodes[node].get(coord, 0)
+            for coord in ["x", "y", "z"]
+            if coord in network.nodes[node]
+        ]
+    )
+def _calculate_threshold(average_distances: list, distance_threshold: float) -> float:
+    """Calculate the distance threshold based on the given average distances and a percentile threshold.
+    Args:
+        average_distances (list): An array of average distances.
+        distance_threshold (float): A percentile threshold (0 to 1) used to determine the distance cutoff.
+    Returns:
+        float: The calculated distance threshold value.
+    """
+    # Sort the average distances
+    sorted_distances = np.sort(average_distances)
+    # Compute the rank percentiles for the sorted distances
+    rank_percentiles = np.linspace(0, 1, len(sorted_distances))
+    # Interpolating the ranks to 1000 evenly spaced percentiles
+    interpolated_percentiles = np.linspace(0, 1, 1000)
+    smoothed_distances = np.interp(interpolated_percentiles, rank_percentiles, sorted_distances)
+    # Determine the index corresponding to the distance threshold
+    threshold_index = int(np.ceil(distance_threshold * len(smoothed_distances))) - 1
+    # Return the smoothed distance at the calculated index
+    return smoothed_distances[threshold_index]

risk/network/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""
+risk/network
+~~~~~~~~~~~~
+"""
+from .graph import NetworkGraph
+from .io import NetworkIO
+from .plot import NetworkPlotter

risk/network/geometry.py ADDED Viewed

@@ -0,0 +1,165 @@
+"""
+risk/network/geometry
+~~~~~~~~~~~~~~~~~~~~~
+"""
+import networkx as nx
+import numpy as np
+def apply_edge_lengths(
+    G: nx.Graph,
+    compute_sphere: bool = True,
+    surface_depth: float = 0.0,
+    include_edge_weight: bool = False,
+) -> nx.Graph:
+    """Apply edge lengths in the graph, optionally mapping nodes to a sphere and including edge weights.
+    Args:
+        G (nx.Graph): The input graph.
+        compute_sphere (bool): Whether to map nodes to a sphere. Defaults to True.
+        surface_depth (float): The surface depth for mapping to a sphere. Defaults to 0.0.
+        include_edge_weight (bool): Whether to include edge weights in the calculation. Defaults to False.
+    Returns:
+        nx.Graph: The graph with applied edge lengths.
+    """
+    def compute_distance(
+        u_coords: np.ndarray, v_coords: np.ndarray, is_sphere: bool = False
+    ) -> float:
+        """Compute the distance between two coordinate vectors.
+        Args:
+            u_coords (np.ndarray): Coordinates of the first point.
+            v_coords (np.ndarray): Coordinates of the second point.
+            is_sphere (bool, optional): If True, compute spherical distance. Defaults to False.
+        Returns:
+            float: The computed distance between the two points.
+        """
+        if is_sphere:
+            # Normalize vectors and compute spherical distance using the dot product
+            u_coords /= np.linalg.norm(u_coords)
+            v_coords /= np.linalg.norm(v_coords)
+            return np.arccos(np.clip(np.dot(u_coords, v_coords), -1.0, 1.0))
+        else:
+            # Compute Euclidean distance
+            return np.linalg.norm(u_coords - v_coords)
+    # Normalize graph coordinates
+    _normalize_graph_coordinates(G)
+    # Normalize weights
+    _normalize_weights(G)
+    # Use G_depth for edge length calculation
+    if compute_sphere:
+        # Map to sphere and adjust depth
+        _map_to_sphere(G)
+        G_depth = _create_depth(G.copy(), surface_depth=surface_depth)
+    else:
+        # Calculate edge lengths directly on the plane
+        G_depth = G.copy()
+    for u, v, _ in G_depth.edges(data=True):
+        u_coords = np.array([G_depth.nodes[u]["x"], G_depth.nodes[u]["y"]])
+        v_coords = np.array([G_depth.nodes[v]["x"], G_depth.nodes[v]["y"]])
+        if compute_sphere:
+            u_coords = np.append(u_coords, G_depth.nodes[u].get("z", 0))
+            v_coords = np.append(v_coords, G_depth.nodes[v].get("z", 0))
+        distance = compute_distance(u_coords, v_coords, is_sphere=compute_sphere)
+        if include_edge_weight:
+            # Square root of the normalized weight is used to minimize the effect of large weights
+            G.edges[u, v]["length"] = distance / np.sqrt(G.edges[u, v]["normalized_weight"] + 1e-6)
+        else:
+            # Use calculated distance directly
+            G.edges[u, v]["length"] = distance
+    return G
+def _map_to_sphere(G: nx.Graph) -> None:
+    """Map the x and y coordinates of graph nodes onto a 3D sphere.
+    Args:
+        G (nx.Graph): The input graph with nodes having 'x' and 'y' coordinates.
+    """
+    # Extract x, y coordinates from the graph nodes
+    xy_coords = np.array([[G.nodes[node]["x"], G.nodes[node]["y"]] for node in G.nodes()])
+    # Normalize the coordinates between [0, 1]
+    min_vals = np.min(xy_coords, axis=0)
+    max_vals = np.max(xy_coords, axis=0)
+    normalized_xy = (xy_coords - min_vals) / (max_vals - min_vals)
+    # Map normalized coordinates to theta and phi on a sphere
+    theta = normalized_xy[:, 0] * np.pi * 2
+    phi = normalized_xy[:, 1] * np.pi
+    # Convert spherical coordinates to Cartesian coordinates for 3D sphere
+    for i, node in enumerate(G.nodes()):
+        x = np.sin(phi[i]) * np.cos(theta[i])
+        y = np.sin(phi[i]) * np.sin(theta[i])
+        z = np.cos(phi[i])
+        G.nodes[node]["x"] = x
+        G.nodes[node]["y"] = y
+        G.nodes[node]["z"] = z
+def _normalize_graph_coordinates(G: nx.Graph) -> None:
+    """Normalize the x and y coordinates of the nodes in the graph to the [0, 1] range.
+    Args:
+        G (nx.Graph): The input graph with nodes having 'x' and 'y' coordinates.
+    """
+    # Extract x, y coordinates from the graph nodes
+    xy_coords = np.array([[G.nodes[node]["x"], G.nodes[node]["y"]] for node in G.nodes()])
+    # Calculate min and max values for x and y
+    min_vals = np.min(xy_coords, axis=0)
+    max_vals = np.max(xy_coords, axis=0)
+    # Normalize the coordinates to [0, 1]
+    normalized_xy = (xy_coords - min_vals) / (max_vals - min_vals)
+    # Update the node coordinates with the normalized values
+    for i, node in enumerate(G.nodes()):
+        G.nodes[node]["x"], G.nodes[node]["y"] = normalized_xy[i]
+def _normalize_weights(G: nx.Graph) -> None:
+    """Normalize the weights of the edges in the graph.
+    Args:
+        G (nx.Graph): The input graph with weighted edges.
+    """
+    # "weight" is present for all edges - weights are 1.0 if weight was not specified by the user
+    weights = [data["weight"] for _, _, data in G.edges(data=True)]
+    if weights:  # Ensure there are weighted edges
+        min_weight = min(weights)
+        max_weight = max(weights)
+        range_weight = max_weight - min_weight if max_weight > min_weight else 1
+        for _, _, data in G.edges(data=True):
+            data["normalized_weight"] = (data["weight"] - min_weight) / range_weight
+def _create_depth(G: nx.Graph, surface_depth: float = 0.0) -> nx.Graph:
+    """Adjust the 'z' attribute of each node based on the subcluster strengths and normalized surface depth.
+    Args:
+        G (nx.Graph): The input graph.
+        surface_depth (float): The maximum surface depth to apply for the strongest subcluster.
+    Returns:
+        nx.Graph: The graph with adjusted 'z' attribute for each node.
+    """
+    if surface_depth >= 1.0:
+        surface_depth = surface_depth - 1e-6  # Cap the surface depth to prevent value of 1.0
+    # Compute subclusters as connected components (subclusters can be any other method)
+    subclusters = {node: set(nx.node_connected_component(G, node)) for node in G.nodes}
+    # Create a strength metric for subclusters (here using size)
+    subcluster_strengths = {node: len(neighbors) for node, neighbors in subclusters.items()}
+    # Normalize the subcluster strengths and apply depths
+    max_strength = max(subcluster_strengths.values())
+    for node, strength in subcluster_strengths.items():
+        normalized_surface_depth = (strength / max_strength) * surface_depth
+        x, y, z = G.nodes[node]["x"], G.nodes[node]["y"], G.nodes[node]["z"]
+        norm = np.sqrt(x**2 + y**2 + z**2)
+        G.nodes[node]["z"] -= (z / norm) * normalized_surface_depth  # Adjust Z for a depth
+    return G