PyPI - risk-network - Versions diffs - 0.0.3b1__py3-none-any.whl - Mend

risk-network 0.0.3b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

risk/__init__.py +13 -0
risk/annotations/__init__.py +7 -0
risk/annotations/annotations.py +259 -0
risk/annotations/io.py +183 -0
risk/constants.py +31 -0
risk/log/__init__.py +9 -0
risk/log/console.py +16 -0
risk/log/params.py +198 -0
risk/neighborhoods/__init__.py +10 -0
risk/neighborhoods/community.py +189 -0
risk/neighborhoods/domains.py +257 -0
risk/neighborhoods/neighborhoods.py +319 -0
risk/network/__init__.py +8 -0
risk/network/geometry.py +165 -0
risk/network/graph.py +280 -0
risk/network/io.py +326 -0
risk/network/plot.py +795 -0
risk/risk.py +382 -0
risk/stats/__init__.py +6 -0
risk/stats/permutation.py +88 -0
risk/stats/stats.py +447 -0
risk_network-0.0.3b1.dist-info/LICENSE +674 -0
risk_network-0.0.3b1.dist-info/METADATA +751 -0
risk_network-0.0.3b1.dist-info/RECORD +26 -0
risk_network-0.0.3b1.dist-info/WHEEL +5 -0
risk_network-0.0.3b1.dist-info/top_level.txt +1 -0

risk/network/graph.py ADDED Viewed

@@ -0,0 +1,280 @@
+"""
+risk/network/graph
+~~~~~~~~~~~~~~~~~~
+"""
+import random
+from collections import defaultdict
+from typing import Any, Dict, List, Tuple
+import networkx as nx
+import numpy as np
+import pandas as pd
+import matplotlib
+import matplotlib.cm as cm
+class NetworkGraph:
+    """A class to represent a network graph and process its nodes and edges.
+    The NetworkGraph class provides functionality to handle and manipulate a network graph,
+    including managing domains, annotations, and node enrichment data. It also includes methods
+    for transforming and mapping graph coordinates, as well as generating colors based on node
+    enrichment.
+    """
+    def __init__(
+        self,
+        network: nx.Graph,
+        top_annotations: pd.DataFrame,
+        domains: pd.DataFrame,
+        trimmed_domains: pd.DataFrame,
+        node_label_to_id_map: Dict[str, Any],
+        node_enrichment_sums: np.ndarray,
+    ):
+        """Initialize the NetworkGraph object.
+        Args:
+            network (nx.Graph): The network graph.
+            top_annotations (pd.DataFrame): DataFrame containing annotations data for the network nodes.
+            domains (pd.DataFrame): DataFrame containing domain data for the network nodes.
+            trimmed_domains (pd.DataFrame): DataFrame containing trimmed domain data for the network nodes.
+            node_label_to_id_map (dict): A dictionary mapping node labels to their corresponding IDs.
+            node_enrichment_sums (np.ndarray): Array containing the enrichment sums for the nodes.
+        """
+        self.top_annotations = top_annotations
+        self.domain_to_nodes = self._create_domain_to_nodes_map(domains)
+        self.domains = domains
+        self.trimmed_domain_to_term = self._create_domain_to_term_map(trimmed_domains)
+        self.trimmed_domains = trimmed_domains
+        self.node_label_to_id_map = node_label_to_id_map
+        self.node_enrichment_sums = node_enrichment_sums
+        # NOTE: self.G and self.node_coordinates are declared in _initialize_network
+        self.G = None
+        self.node_coordinates = None
+        self._initialize_network(network)
+    def _create_domain_to_nodes_map(self, domains: pd.DataFrame) -> Dict[str, Any]:
+        """Create a mapping from domains to the list of nodes belonging to each domain.
+        Args:
+            domains (pd.DataFrame): DataFrame containing domain information, including the 'primary domain' for each node.
+        Returns:
+            dict: A dictionary where keys are domain IDs and values are lists of nodes belonging to each domain.
+        """
+        cleaned_domains_matrix = domains.reset_index()[["index", "primary domain"]]
+        node_to_domains = cleaned_domains_matrix.set_index("index")["primary domain"].to_dict()
+        domain_to_nodes = defaultdict(list)
+        for k, v in node_to_domains.items():
+            domain_to_nodes[v].append(k)
+        return domain_to_nodes
+    def _create_domain_to_term_map(self, trimmed_domains: pd.DataFrame) -> Dict[str, Any]:
+        """Create a mapping from domain IDs to their corresponding terms.
+        Args:
+            trimmed_domains (pd.DataFrame): DataFrame containing domain IDs and their corresponding labels.
+        Returns:
+            dict: A dictionary mapping domain IDs to their corresponding terms.
+        """
+        return dict(
+            zip(
+                trimmed_domains.index,
+                trimmed_domains["label"],
+            )
+        )
+    def _initialize_network(self, G: nx.Graph) -> None:
+        """Initialize the network by unfolding it and extracting node coordinates.
+        Args:
+            G (nx.Graph): The input network graph with 3D node coordinates.
+        """
+        # Unfold the network's 3D coordinates to 2D
+        G_2d = _unfold_sphere_to_plane(G)
+        # Assign the unfolded graph to self.G
+        self.G = G_2d
+        # Extract 2D coordinates of nodes
+        self.node_coordinates = _extract_node_coordinates(G_2d)
+    def get_domain_colors(
+        self, min_scale: float = 0.8, max_scale: float = 1.0, random_seed: int = 888, **kwargs
+    ) -> np.ndarray:
+        """Generate composite colors for domains.
+        This method generates composite colors for nodes based on their enrichment scores and transforms
+        them to ensure proper alpha values and intensity. For nodes with alpha == 0, it assigns new colors
+        based on the closest valid neighbors within a specified distance.
+        Args:
+            min_scale (float, optional): Minimum scale for color intensity. Defaults to 0.8.
+            max_scale (float, optional): Maximum scale for color intensity. Defaults to 1.0.
+            random_seed (int, optional): Seed for random number generation. Defaults to 888.
+            **kwargs: Additional keyword arguments for color generation.
+        Returns:
+            np.ndarray: Array of transformed colors.
+        """
+        # Get colors for each domain
+        domain_colors = self._get_domain_colors(**kwargs, random_seed=random_seed)
+        # Generate composite colors for nodes
+        node_colors = self._get_composite_node_colors(domain_colors)
+        # Transform colors to ensure proper alpha values and intensity
+        transformed_colors = _transform_colors(
+            node_colors,
+            self.node_enrichment_sums,
+            min_scale=min_scale,
+            max_scale=max_scale,
+        )
+        return transformed_colors
+    def _get_composite_node_colors(self, domain_colors: np.ndarray) -> np.ndarray:
+        """Generate composite colors for nodes based on domain colors and counts.
+        Args:
+            domain_colors (np.ndarray): Array of colors corresponding to each domain.
+        Returns:
+            np.ndarray: Array of composite colors for each node.
+        """
+        # Determine the number of nodes
+        num_nodes = len(self.node_coordinates)
+        # Initialize composite colors array with shape (number of nodes, 4) for RGBA
+        composite_colors = np.zeros((num_nodes, 4))
+        # Assign colors to nodes based on domain_colors
+        for domain_idx, nodes in self.domain_to_nodes.items():
+            color = domain_colors[domain_idx]
+            for node in nodes:
+                composite_colors[node] = color
+        return composite_colors
+    def _get_domain_colors(self, **kwargs) -> Dict[str, Any]:
+        """Get colors for each domain.
+        Returns:
+            dict: A dictionary mapping domain keys to their corresponding RGBA colors.
+        """
+        # Exclude non-numeric domain columns
+        numeric_domains = [
+            col for col in self.domains.columns if isinstance(col, (int, np.integer))
+        ]
+        domains = np.sort(numeric_domains)
+        domain_colors = _get_colors(**kwargs, num_colors_to_generate=len(domains))
+        return dict(zip(self.domain_to_nodes.keys(), domain_colors))
+def _transform_colors(
+    colors: np.ndarray, enrichment_sums: np.ndarray, min_scale: float = 0.8, max_scale: float = 1.0
+) -> np.ndarray:
+    """Transform colors to ensure proper alpha values and intensity based on enrichment sums.
+    Args:
+        colors (np.ndarray): An array of RGBA colors.
+        enrichment_sums (np.ndarray): An array of enrichment sums corresponding to the colors.
+        min_scale (float, optional): Minimum scale for color intensity. Defaults to 0.8.
+        max_scale (float, optional): Maximum scale for color intensity. Defaults to 1.0.
+    Returns:
+        np.ndarray: The transformed array of RGBA colors with adjusted intensities.
+    """
+    if min_scale == max_scale:
+        min_scale = max_scale - 10e-6  # Avoid division by zero
+    log_enrichment_sums = np.log1p(enrichment_sums)  # Use log1p to avoid log(0)
+    # Normalize the capped enrichment sums to the range [0, 1]
+    normalized_sums = log_enrichment_sums / np.max(log_enrichment_sums)
+    # Scale normalized sums to the specified color range [min_scale, max_scale]
+    scaled_sums = min_scale + (max_scale - min_scale) * normalized_sums
+    # Adjust RGB values based on scaled sums
+    for i in range(3):  # Only adjust RGB values
+        colors[:, i] = scaled_sums * colors[:, i]
+    return colors
+def _unfold_sphere_to_plane(G: nx.Graph) -> nx.Graph:
+    """Convert 3D coordinates to 2D by unfolding a sphere to a plane.
+    Args:
+        G (nx.Graph): A network graph with 3D coordinates. Each node should have 'x', 'y', and 'z' attributes.
+    Returns:
+        nx.Graph: The network graph with updated 2D coordinates (only 'x' and 'y').
+    """
+    for node in G.nodes():
+        if "z" in G.nodes[node]:
+            # Extract 3D coordinates
+            x, y, z = G.nodes[node]["x"], G.nodes[node]["y"], G.nodes[node]["z"]
+            # Calculate spherical coordinates theta and phi from Cartesian coordinates
+            r = np.sqrt(x**2 + y**2 + z**2)
+            theta = np.arctan2(y, x)
+            phi = np.arccos(z / r)
+            # Convert spherical coordinates to 2D plane coordinates
+            unfolded_x = (theta + np.pi) / (2 * np.pi)  # Shift and normalize theta to [0, 1]
+            unfolded_x = unfolded_x + 0.5 if unfolded_x < 0.5 else unfolded_x - 0.5
+            unfolded_y = (np.pi - phi) / np.pi  # Reflect phi and normalize to [0, 1]
+            # Update network node attributes
+            G.nodes[node]["x"] = unfolded_x
+            G.nodes[node]["y"] = -unfolded_y
+            # Remove the 'z' coordinate as it's no longer needed
+            del G.nodes[node]["z"]
+    return G
+def _extract_node_coordinates(G: nx.Graph) -> np.ndarray:
+    """Extract 2D coordinates of nodes from the graph.
+    Args:
+        G (nx.Graph): The network graph with node coordinates.
+    Returns:
+        np.ndarray: Array of node coordinates with shape (num_nodes, 2).
+    """
+    # Extract x and y coordinates from graph nodes
+    x_coords = dict(G.nodes.data("x"))
+    y_coords = dict(G.nodes.data("y"))
+    coordinates_dicts = [x_coords, y_coords]
+    # Combine x and y coordinates into a single array
+    node_positions = {
+        node: np.array([coords[node] for coords in coordinates_dicts]) for node in x_coords
+    }
+    node_coordinates = np.vstack(list(node_positions.values()))
+    return node_coordinates
+def _get_colors(
+    num_colors_to_generate: int = 10, cmap: str = "hsv", random_seed: int = 888, **kwargs
+) -> List[Tuple]:
+    """Generate a list of RGBA colors from a specified colormap or use a direct color string.
+    Args:
+        num_colors_to_generate (int): The number of colors to generate. Defaults to 10.
+        cmap (str): The name of the colormap to use. Defaults to "hsv".
+        random_seed (int): Seed for random number generation. Defaults to 888.
+        **kwargs: Additional keyword arguments, such as 'color' for a specific color.
+    Returns:
+        list of tuple: List of RGBA colors.
+    """
+    # Set random seed for reproducibility
+    random.seed(random_seed)
+    if kwargs.get("color"):
+        # If a direct color string is provided, generate a list with that color
+        rgba = matplotlib.colors.to_rgba(kwargs["color"])
+        rgbas = [rgba] * num_colors_to_generate
+    else:
+        colormap = cm.get_cmap(cmap)
+        # Generate evenly distributed color positions
+        color_positions = np.linspace(0, 1, num_colors_to_generate)
+        random.shuffle(color_positions)  # Shuffle the positions to randomize colors
+        # Generate colors based on shuffled positions
+        rgbas = [colormap(pos) for pos in color_positions]
+    return rgbas

risk/network/io.py ADDED Viewed

@@ -0,0 +1,326 @@
+"""
+risk/network/io
+~~~~~~~~~~~~~~~
+This file contains the code for the RISK class and command-line access.
+"""
+import json
+import pickle
+import shutil
+import zipfile
+from xml.dom import minidom
+import networkx as nx
+import pandas as pd
+from risk.network.geometry import apply_edge_lengths
+from risk.log import params, print_header
+class NetworkIO:
+    """A class for loading, processing, and managing network data.
+    The NetworkIO class provides methods to load network data from various formats (e.g., GPickle, NetworkX)
+    and process the network by adjusting node coordinates, calculating edge lengths, and validating graph structure.
+    """
+    def __init__(
+        self,
+        compute_sphere: bool = True,
+        surface_depth: float = 0.0,
+        distance_metric: str = "dijkstra",
+        edge_length_threshold: float = 0.5,
+        louvain_resolution: float = 0.1,
+        min_edges_per_node: int = 0,
+        include_edge_weight: bool = True,
+        weight_label: str = "weight",
+    ):
+        self.compute_sphere = compute_sphere
+        self.surface_depth = surface_depth
+        self.include_edge_weight = include_edge_weight
+        self.weight_label = weight_label
+        self.distance_metric = distance_metric
+        self.edge_length_threshold = edge_length_threshold
+        self.louvain_resolution = louvain_resolution
+        self.min_edges_per_node = min_edges_per_node
+    def load_gpickle_network(self, filepath: str) -> nx.Graph:
+        """Load a network from a GPickle file.
+        Args:
+            filepath (str): Path to the GPickle file.
+        Returns:
+            nx.Graph: Loaded and processed network.
+        """
+        filetype = "GPickle"
+        params.log_network(filetype=filetype, filepath=filepath)
+        self._log_loading(filetype, filepath=filepath)
+        with open(filepath, "rb") as f:
+            G = pickle.load(f)
+        return self._initialize_graph(G)
+    def load_networkx_network(self, G: nx.Graph) -> nx.Graph:
+        """Load a NetworkX graph.
+        Args:
+            G (nx.Graph): A NetworkX graph object.
+        Returns:
+            nx.Graph: Processed network.
+        """
+        filetype = "NetworkX"
+        params.log_network(filetype=filetype)
+        self._log_loading(filetype)
+        return self._initialize_graph(G)
+    def load_cytoscape_network(
+        self,
+        filepath: str,
+        source_label: str = "source",
+        target_label: str = "target",
+        view_name: str = "",
+    ) -> nx.Graph:
+        """Load a network from a Cytoscape file.
+        Args:
+            filepath (str): Path to the Cytoscape file.
+            source_label (str, optional): Source node label. Defaults to "source".
+            target_label (str, optional): Target node label. Defaults to "target".
+            view_name (str, optional): Specific view name to load. Defaults to None.
+        Returns:
+            nx.Graph: Loaded and processed network.
+        """
+        filetype = "Cytoscape"
+        params.log_network(filetype=filetype, filepath=str(filepath))
+        self._log_loading(filetype, filepath=filepath)
+        cys_files = []
+        # Try / finally to remove unzipped files
+        try:
+            # Unzip CYS file
+            with zipfile.ZipFile(filepath, "r") as zip_ref:
+                cys_files = zip_ref.namelist()
+                zip_ref.extractall("./")
+            # Get first view and network instances
+            cys_view_files = [cf for cf in cys_files if "/views/" in cf]
+            cys_view_file = (
+                cys_view_files[0]
+                if not view_name
+                else [cvf for cvf in cys_view_files if cvf.endswith(view_name + ".xgmml")][0]
+            )
+            # Parse nodes
+            cys_view_dom = minidom.parse(cys_view_file)
+            cys_nodes = cys_view_dom.getElementsByTagName("node")
+            node_x_positions = {}
+            node_y_positions = {}
+            for node in cys_nodes:
+                # Node ID is found in 'label'
+                node_id = str(node.attributes["label"].value)
+                for child in node.childNodes:
+                    if child.nodeType == 1 and child.tagName == "graphics":
+                        node_x_positions[node_id] = float(child.attributes["x"].value)
+                        node_y_positions[node_id] = float(child.attributes["y"].value)
+            # Read the node attributes (from /tables/)
+            attribute_metadata_keywords = ["/tables/", "SHARED_ATTRS", "edge.cytable"]
+            attribute_metadata = [
+                cf
+                for cf in cys_files
+                if all(keyword in cf for keyword in attribute_metadata_keywords)
+            ][0]
+            # Load attributes file from Cytoscape as pandas data frame
+            attribute_table = pd.read_csv(attribute_metadata, sep=",", header=None, skiprows=1)
+            # Set columns
+            attribute_table.columns = attribute_table.iloc[0]
+            # Skip first four rows
+            attribute_table = attribute_table.iloc[4:, :]
+            # Conditionally select columns based on include_edge_weight
+            if self.include_edge_weight:
+                attribute_table = attribute_table[[source_label, target_label, self.weight_label]]
+            else:
+                attribute_table = attribute_table[[source_label, target_label]]
+            attribute_table = attribute_table.dropna().reset_index(drop=True)
+            # Create a graph
+            G = nx.Graph()
+            # Add edges and nodes, conditionally including weights
+            for _, row in attribute_table.iterrows():
+                source = row[source_label]
+                target = row[target_label]
+                if self.include_edge_weight:
+                    weight = float(row[self.weight_label])
+                    G.add_edge(source, target, weight=weight)
+                else:
+                    G.add_edge(source, target)
+                if source not in G:
+                    G.add_node(source)  # Optionally add x, y coordinates here if available
+                if target not in G:
+                    G.add_node(target)  # Optionally add x, y coordinates here if available
+            # Add node attributes
+            for node in G.nodes():
+                G.nodes[node]["label"] = node
+                G.nodes[node]["x"] = node_x_positions[
+                    node
+                ]  # Assuming you have a dict `node_x_positions` for x coordinates
+                G.nodes[node]["y"] = node_y_positions[
+                    node
+                ]  # Assuming you have a dict `node_y_positions` for y coordinates
+            return self._initialize_graph(G)
+        finally:
+            # Remove unzipped files/directories
+            cys_dirnames = list(set([cf.split("/")[0] for cf in cys_files]))
+            for dirname in cys_dirnames:
+                shutil.rmtree(dirname)
+    def load_cytoscape_json_network(self, filepath, source_label="source", target_label="target"):
+        """Load a network from a Cytoscape JSON (.cyjs) file.
+        Args:
+            filepath (str): Path to the Cytoscape JSON file.
+            source_label (str, optional): Source node label. Default is "source".
+            target_label (str, optional): Target node label. Default is "target".
+        Returns:
+            NetworkX graph: Loaded and processed network.
+        """
+        filetype = "Cytoscape JSON"
+        params.log_network(filetype=filetype, filepath=str(filepath))
+        self._log_loading(filetype, filepath=filepath)
+        # Load the Cytoscape JSON file
+        with open(filepath, "r") as f:
+            cyjs_data = json.load(f)
+        # Create a graph
+        G = nx.Graph()
+        # Process nodes
+        node_x_positions = {}
+        node_y_positions = {}
+        for node in cyjs_data["elements"]["nodes"]:
+            node_data = node["data"]
+            node_id = node_data["id"]
+            node_x_positions[node_id] = node["position"]["x"]
+            node_y_positions[node_id] = node["position"]["y"]
+            G.add_node(node_id)
+            G.nodes[node_id]["label"] = node_data.get("name", node_id)
+            G.nodes[node_id]["x"] = node["position"]["x"]
+            G.nodes[node_id]["y"] = node["position"]["y"]
+        # Process edges
+        for edge in cyjs_data["elements"]["edges"]:
+            edge_data = edge["data"]
+            source = edge_data[source_label]
+            target = edge_data[target_label]
+            if self.weight_label is not None and self.weight_label in edge_data:
+                weight = float(edge_data[self.weight_label])
+                G.add_edge(source, target, weight=weight)
+            else:
+                G.add_edge(source, target)
+        # Initialize the graph
+        return self._initialize_graph(G)
+    def _initialize_graph(self, G: nx.Graph) -> nx.Graph:
+        """Initialize the graph by processing and validating its nodes and edges.
+        Args:
+            G (nx.Graph): The input NetworkX graph.
+        Returns:
+            nx.Graph: The processed and validated graph.
+        """
+        # IMPORTANT: This is where the graph node labels are converted to integers
+        G = nx.relabel_nodes(G, {node: idx for idx, node in enumerate(G.nodes)})
+        self._remove_invalid_graph_properties(G)
+        self._validate_edges(G)
+        self._validate_nodes(G)
+        self._process_graph(G)
+        return G
+    def _remove_invalid_graph_properties(self, G: nx.Graph) -> None:
+        """Remove invalid properties from the graph.
+        Args:
+            G (nx.Graph): A NetworkX graph object.
+        """
+        print(f"Minimum edges per node: {self.min_edges_per_node}")
+        # Remove nodes with fewer edges than the specified threshold
+        nodes_with_few_edges = [
+            node for node in G.nodes() if G.degree(node) <= self.min_edges_per_node
+        ]
+        G.remove_nodes_from(nodes_with_few_edges)
+        # Remove self-loop edges
+        self_loops = list(nx.selfloop_edges(G))
+        G.remove_edges_from(self_loops)
+    def _validate_edges(self, G: nx.Graph) -> None:
+        """Validate and assign weights to the edges in the graph.
+        Args:
+            G (nx.Graph): A NetworkX graph object.
+        """
+        missing_weights = 0
+        # Assign user-defined edge weights to the "weight" attribute
+        for _, _, data in G.edges(data=True):
+            if self.weight_label not in data:
+                missing_weights += 1
+            data["weight"] = data.get(
+                self.weight_label, 1.0
+            )  # Default to 1.0 if 'weight' not present
+        if self.include_edge_weight and missing_weights:
+            print(f"Total edges missing weights: {missing_weights}")
+    def _validate_nodes(self, G: nx.Graph) -> None:
+        """Validate the graph structure and attributes.
+        Args:
+            G (nx.Graph): A NetworkX graph object.
+        """
+        for node, attrs in G.nodes(data=True):
+            assert (
+                "x" in attrs and "y" in attrs
+            ), f"Node {node} is missing 'x' or 'y' position attributes."
+            assert "label" in attrs, f"Node {node} is missing a 'label' attribute."
+    def _process_graph(self, G: nx.Graph) -> None:
+        """Prepare the network by adjusting surface depth and calculating edge lengths.
+        Args:
+            G (nx.Graph): The input network graph.
+        """
+        apply_edge_lengths(
+            G,
+            compute_sphere=self.compute_sphere,
+            surface_depth=self.surface_depth,
+            include_edge_weight=self.include_edge_weight,
+        )
+    def _log_loading(
+        self,
+        filetype: str,
+        filepath: str = "",
+    ) -> None:
+        """Log the initialization details of the RISK class.
+        Args:
+            filetype (str): The type of the file being loaded (e.g., 'CSV', 'JSON').
+            filepath (str, optional): The path to the file being loaded. Defaults to "".
+        """
+        print_header("Loading network")
+        print(f"Filetype: {filetype}")
+        if filepath:
+            print(f"Filepath: {filepath}")
+        print(f"Projection: {'Sphere' if self.compute_sphere else 'Plane'}")
+        if self.compute_sphere:
+            print(f"Surface depth: {self.surface_depth}")
+        print(f"Edge length threshold: {self.edge_length_threshold}")
+        print(f"Edge weight: {'Included' if self.include_edge_weight else 'Excluded'}")
+        if self.include_edge_weight:
+            print(f"Weight label: {self.weight_label}")