PyPI - risk-network - Versions diffs - 0.0.5b6__py3-none-any.whl → 0.0.6__py3-none-any.whl - Mend

risk-network 0.0.5b6py3-none-any.whl → 0.0.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

risk/__init__.py +1 -1
risk/annotations/io.py +44 -3
risk/log/params.py +2 -0
risk/neighborhoods/community.py +7 -3
risk/neighborhoods/domains.py +24 -18
risk/neighborhoods/neighborhoods.py +2 -2
risk/network/graph.py +68 -40
risk/network/io.py +30 -10
risk/network/plot.py +713 -309
risk/risk.py +10 -22
{risk_network-0.0.5b6.dist-info → risk_network-0.0.6.dist-info}/METADATA +3 -4
{risk_network-0.0.5b6.dist-info → risk_network-0.0.6.dist-info}/RECORD +15 -15
{risk_network-0.0.5b6.dist-info → risk_network-0.0.6.dist-info}/WHEEL +1 -1
{risk_network-0.0.5b6.dist-info → risk_network-0.0.6.dist-info}/LICENSE +0 -0
{risk_network-0.0.5b6.dist-info → risk_network-0.0.6.dist-info}/top_level.txt +0 -0

risk/__init__.py CHANGED Viewed

@@ -7,4 +7,4 @@ RISK: RISK Infers Spatial Kinships
 from risk.risk import RISK
-__version__ = "0.0.5-beta.6"
+__version__ = "0.0.6"

risk/annotations/io.py CHANGED Viewed

@@ -36,13 +36,15 @@ class AnnotationsIO:
             dict: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
         """
         filetype = "JSON"
+        # Log the loading of the JSON file
         params.log_annotations(filepath=filepath, filetype=filetype)
         _log_loading(filetype, filepath=filepath)
         # Open and read the JSON file
         with open(filepath, "r") as file:
             annotations_input = json.load(file)
-        # Process the JSON data and return it in the context of the network
+        # Load the annotations into the provided network
         return load_annotations(network, annotations_input)
     def load_excel_annotation(
@@ -69,14 +71,18 @@ class AnnotationsIO:
                             linked to the provided network.
         """
         filetype = "Excel"
+        # Log the loading of the Excel file
         params.log_annotations(filepath=filepath, filetype=filetype)
         _log_loading(filetype, filepath=filepath)
         # Load the specified sheet from the Excel file
         df = pd.read_excel(filepath, sheet_name=sheet_name)
         # Split the nodes column by the specified nodes_delimiter
         df[nodes_colname] = df[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
         # Convert the DataFrame to a dictionary pairing labels with their corresponding nodes
         label_node_dict = df.set_index(label_colname)[nodes_colname].to_dict()
+        # Load the annotations into the provided network
         return load_annotations(network, label_node_dict)
     def load_csv_annotation(
@@ -101,13 +107,16 @@ class AnnotationsIO:
                             linked to the provided network.
         """
         filetype = "CSV"
+        # Log the loading of the CSV file
         params.log_annotations(filepath=filepath, filetype=filetype)
         _log_loading(filetype, filepath=filepath)
         # Load the CSV file into a dictionary
         annotations_input = _load_matrix_file(
             filepath, label_colname, nodes_colname, delimiter=",", nodes_delimiter=nodes_delimiter
         )
-        # Process and return the annotations in the context of the network
+        # Load the annotations into the provided network
         return load_annotations(network, annotations_input)
     def load_tsv_annotation(
@@ -132,15 +141,47 @@ class AnnotationsIO:
                             linked to the provided network.
         """
         filetype = "TSV"
+        # Log the loading of the TSV file
         params.log_annotations(filepath=filepath, filetype=filetype)
         _log_loading(filetype, filepath=filepath)
         # Load the TSV file into a dictionary
         annotations_input = _load_matrix_file(
             filepath, label_colname, nodes_colname, delimiter="\t", nodes_delimiter=nodes_delimiter
         )
-        # Process and return the annotations in the context of the network
+        # Load the annotations into the provided network
         return load_annotations(network, annotations_input)
+    def load_dict_annotation(self, content: Dict[str, Any], network: nx.Graph) -> Dict[str, Any]:
+        """Load annotations from a provided dictionary and convert them to a dictionary annotation.
+        Args:
+            content (dict): The annotations dictionary to load.
+            network (NetworkX graph): The network to which the annotations are related.
+        Returns:
+            dict: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
+        """
+        # Ensure the input content is a dictionary
+        if not isinstance(content, dict):
+            raise TypeError(
+                f"Expected 'content' to be a dictionary, but got {type(content).__name__} instead."
+            )
+        filetype = "Dictionary"
+        # Log the loading of the annotations from the dictionary
+        params.log_annotations(filepath="In-memory dictionary", filetype=filetype)
+        _log_loading(filetype, "In-memory dictionary")
+        # Load the annotations into the provided network
+        annotations_dict = load_annotations(network, content)
+        # Ensure the output is a dictionary
+        if not isinstance(annotations_dict, dict):
+            raise ValueError("Expected output to be a dictionary")
+        return annotations_dict
 def _load_matrix_file(
     filepath: str,

risk/log/params.py CHANGED Viewed

@@ -7,6 +7,7 @@ import csv
 import json
 import warnings
 from datetime import datetime
+from functools import wraps
 from typing import Any, Dict
 import numpy as np
@@ -27,6 +28,7 @@ def _safe_param_export(func):
         function: The wrapped function with error handling.
     """
+    @wraps(func)
     def wrapper(*args, **kwargs):
         try:
             result = func(*args, **kwargs)

risk/neighborhoods/community.py CHANGED Viewed

@@ -25,10 +25,14 @@ def calculate_dijkstra_neighborhoods(network: nx.Graph) -> np.ndarray:
     # Populate the neighborhoods matrix based on Dijkstra's distances
     for source, targets in all_dijkstra_paths.items():
+        max_length = max(targets.values()) if targets else 1  # Handle cases with no targets
         for target, length in targets.items():
-            neighborhoods[source, target] = (
-                1 if np.isnan(length) or length == 0 else np.sqrt(1 / length)
-            )
+            if np.isnan(length):
+                neighborhoods[source, target] = max_length  # Use max distance for NaN
+            elif length == 0:
+                neighborhoods[source, target] = 1  # Assign 1 for zero-length paths (self-loops)
+            else:
+                neighborhoods[source, target] = 1 / length  # Inverse of the distance
     return neighborhoods

risk/neighborhoods/domains.py CHANGED Viewed

@@ -35,26 +35,31 @@ def define_domains(
     Returns:
         pd.DataFrame: DataFrame with the primary domain for each node.
     """
-    # Perform hierarchical clustering on the binary enrichment matrix
-    m = significant_neighborhoods_enrichment[:, top_annotations["top attributes"]].T
-    best_linkage, best_metric, best_threshold = _optimize_silhouette_across_linkage_and_metrics(
-        m, linkage_criterion, linkage_method, linkage_metric
-    )
-    try:
-        Z = linkage(m, method=best_linkage, metric=best_metric)
-    except ValueError as e:
-        raise ValueError("No significant annotations found.") from e
+    # Check if there's more than one column in significant_neighborhoods_enrichment
+    if significant_neighborhoods_enrichment.shape[1] == 1:
+        print("Single annotation detected. Skipping clustering.")
+        top_annotations["domain"] = 1  # Assign a default domain or handle appropriately
+    else:
+        # Perform hierarchical clustering on the binary enrichment matrix
+        m = significant_neighborhoods_enrichment[:, top_annotations["top attributes"]].T
+        best_linkage, best_metric, best_threshold = _optimize_silhouette_across_linkage_and_metrics(
+            m, linkage_criterion, linkage_method, linkage_metric
+        )
+        try:
+            Z = linkage(m, method=best_linkage, metric=best_metric)
+        except ValueError as e:
+            raise ValueError("No significant annotations found.") from e
-    print(
-        f"Linkage criterion: '{linkage_criterion}'\nLinkage method: '{best_linkage}'\nLinkage metric: '{best_metric}'"
-    )
-    print(f"Optimal linkage threshold: {round(best_threshold, 3)}")
+        print(
+            f"Linkage criterion: '{linkage_criterion}'\nLinkage method: '{best_linkage}'\nLinkage metric: '{best_metric}'"
+        )
+        print(f"Optimal linkage threshold: {round(best_threshold, 3)}")
-    max_d_optimal = np.max(Z[:, 2]) * best_threshold
-    domains = fcluster(Z, max_d_optimal, criterion=linkage_criterion)
-    # Assign domains to the annotations matrix
-    top_annotations["domain"] = 0
-    top_annotations.loc[top_annotations["top attributes"], "domain"] = domains
+        max_d_optimal = np.max(Z[:, 2]) * best_threshold
+        domains = fcluster(Z, max_d_optimal, criterion=linkage_criterion)
+        # Assign domains to the annotations matrix
+        top_annotations["domain"] = 0
+        top_annotations.loc[top_annotations["top attributes"], "domain"] = domains
     # Create DataFrames to store domain information
     node_to_enrichment = pd.DataFrame(
@@ -63,6 +68,7 @@ def define_domains(
     )
     node_to_domain = node_to_enrichment.groupby(level="domain", axis=1).sum()
+    # Find the maximum enrichment score for each node
     t_max = node_to_domain.loc[:, 1:].max(axis=1)
     t_idxmax = node_to_domain.loc[:, 1:].idxmax(axis=1)
     t_idxmax[t_max == 0] = 0

risk/neighborhoods/neighborhoods.py CHANGED Viewed

@@ -4,7 +4,7 @@ risk/neighborhoods/neighborhoods
 """
 import warnings
-from typing import Any, Dict, Tuple
+from typing import Any, Dict, List, Tuple
 import networkx as nx
 import numpy as np
@@ -305,7 +305,7 @@ def _get_node_position(network: nx.Graph, node: Any) -> np.ndarray:
     )
-def _calculate_threshold(average_distances: list, distance_threshold: float) -> float:
+def _calculate_threshold(average_distances: List, distance_threshold: float) -> float:
     """Calculate the distance threshold based on the given average distances and a percentile threshold.
     Args:

risk/network/graph.py CHANGED Viewed

@@ -28,7 +28,7 @@ class NetworkGraph:
         top_annotations: pd.DataFrame,
         domains: pd.DataFrame,
         trimmed_domains: pd.DataFrame,
-        node_label_to_id_map: Dict[str, Any],
+        node_label_to_node_id_map: Dict[str, Any],
         node_enrichment_sums: np.ndarray,
     ):
         """Initialize the NetworkGraph object.
@@ -38,39 +38,48 @@ class NetworkGraph:
             top_annotations (pd.DataFrame): DataFrame containing annotations data for the network nodes.
             domains (pd.DataFrame): DataFrame containing domain data for the network nodes.
             trimmed_domains (pd.DataFrame): DataFrame containing trimmed domain data for the network nodes.
-            node_label_to_id_map (dict): A dictionary mapping node labels to their corresponding IDs.
+            node_label_to_node_id_map (dict): A dictionary mapping node labels to their corresponding IDs.
             node_enrichment_sums (np.ndarray): Array containing the enrichment sums for the nodes.
         """
         self.top_annotations = top_annotations
-        self.domain_to_nodes = self._create_domain_to_nodes_map(domains)
+        self.domain_id_to_node_ids_map = self._create_domain_id_to_node_ids_map(domains)
         self.domains = domains
-        self.trimmed_domain_to_term = self._create_domain_to_term_map(trimmed_domains)
-        self.trimmed_domains = trimmed_domains
-        self.node_label_to_id_map = node_label_to_id_map
+        self.domain_id_to_domain_terms_map = self._create_domain_id_to_domain_terms_map(
+            trimmed_domains
+        )
         self.node_enrichment_sums = node_enrichment_sums
-        # NOTE: self.network and self.node_coordinates are declared in _initialize_network
+        self.node_id_to_node_label_map = {v: k for k, v in node_label_to_node_id_map.items()}
+        self.node_label_to_enrichment_map = dict(
+            zip(node_label_to_node_id_map.keys(), node_enrichment_sums)
+        )
+        self.node_label_to_node_id_map = node_label_to_node_id_map
+        # NOTE: Below this point, instance attributes (i.e., self) will be used!
+        self.domain_id_to_node_labels_map = self._create_domain_id_to_node_labels_map()
+        # self.network and self.node_coordinates are properly declared in _initialize_network
         self.network = None
         self.node_coordinates = None
         self._initialize_network(network)
-    def _create_domain_to_nodes_map(self, domains: pd.DataFrame) -> Dict[str, Any]:
-        """Create a mapping from domains to the list of nodes belonging to each domain.
+    def _create_domain_id_to_node_ids_map(self, domains: pd.DataFrame) -> Dict[str, Any]:
+        """Create a mapping from domains to the list of node IDs belonging to each domain.
         Args:
             domains (pd.DataFrame): DataFrame containing domain information, including the 'primary domain' for each node.
         Returns:
-            dict: A dictionary where keys are domain IDs and values are lists of nodes belonging to each domain.
+            dict: A dictionary where keys are domain IDs and values are lists of node IDs belonging to each domain.
         """
         cleaned_domains_matrix = domains.reset_index()[["index", "primary domain"]]
-        node_to_domains = cleaned_domains_matrix.set_index("index")["primary domain"].to_dict()
-        domain_to_nodes = defaultdict(list)
-        for k, v in node_to_domains.items():
-            domain_to_nodes[v].append(k)
+        node_to_domains_map = cleaned_domains_matrix.set_index("index")["primary domain"].to_dict()
+        domain_id_to_node_ids_map = defaultdict(list)
+        for k, v in node_to_domains_map.items():
+            domain_id_to_node_ids_map[v].append(k)
-        return domain_to_nodes
+        return domain_id_to_node_ids_map
-    def _create_domain_to_term_map(self, trimmed_domains: pd.DataFrame) -> Dict[str, Any]:
+    def _create_domain_id_to_domain_terms_map(
+        self, trimmed_domains: pd.DataFrame
+    ) -> Dict[str, Any]:
         """Create a mapping from domain IDs to their corresponding terms.
         Args:
@@ -86,6 +95,20 @@ class NetworkGraph:
             )
         )
+    def _create_domain_id_to_node_labels_map(self) -> Dict[int, List[str]]:
+        """Create a map from domain IDs to node labels.
+        Returns:
+            dict: A dictionary mapping domain IDs to the corresponding node labels.
+        """
+        domain_id_to_label_map = {}
+        for domain_id, node_ids in self.domain_id_to_node_ids_map.items():
+            domain_id_to_label_map[domain_id] = [
+                self.node_id_to_node_label_map[node_id] for node_id in node_ids
+            ]
+        return domain_id_to_label_map
     def _initialize_network(self, G: nx.Graph) -> None:
         """Initialize the network by unfolding it and extracting node coordinates.
@@ -101,31 +124,32 @@ class NetworkGraph:
     def get_domain_colors(
         self,
+        cmap: str = "gist_rainbow",
+        color: Union[str, None] = None,
         min_scale: float = 0.8,
         max_scale: float = 1.0,
         scale_factor: float = 1.0,
         random_seed: int = 888,
-        **kwargs,
     ) -> np.ndarray:
-        """Generate composite colors for domains.
-        This method generates composite colors for nodes based on their enrichment scores and transforms
-        them to ensure proper alpha values and intensity. For nodes with alpha == 0, it assigns new colors
-        based on the closest valid neighbors within a specified distance.
+        """Generate composite colors for domains based on enrichment or specified colors.
         Args:
-            min_scale (float, optional): Minimum scale for color intensity. Defaults to 0.8.
-            max_scale (float, optional): Maximum scale for color intensity. Defaults to 1.0.
-            scale_factor (float, optional): Exponent for scaling, where values > 1 increase contrast by dimming small
-                values more. Defaults to 1.0.
-            random_seed (int, optional): Seed for random number generation. Defaults to 888.
-            **kwargs: Additional keyword arguments for color generation.
+            cmap (str, optional): Name of the colormap to use for generating domain colors. Defaults to "gist_rainbow".
+            color (str or None, optional): A specific color to use for all generated colors. Defaults to None.
+            min_scale (float, optional): Minimum intensity scale for the colors generated by the colormap.
+                Controls the dimmest colors. Defaults to 0.8.
+            max_scale (float, optional): Maximum intensity scale for the colors generated by the colormap.
+                Controls the brightest colors. Defaults to 1.0.
+            scale_factor (float, optional): Exponent for adjusting the color scaling based on enrichment scores.
+                A higher value increases contrast by dimming lower scores more. Defaults to 1.0.
+            random_seed (int, optional): Seed for random number generation to ensure reproducibility of color assignments.
+                Defaults to 888.
         Returns:
-            np.ndarray: Array of transformed colors.
+            np.ndarray: Array of RGBA colors generated for each domain, based on enrichment or the specified color.
         """
         # Get colors for each domain
-        domain_colors = self._get_domain_colors(random_seed=random_seed)
+        domain_colors = self._get_domain_colors(cmap=cmap, color=color, random_seed=random_seed)
         # Generate composite colors for nodes
         node_colors = self._get_composite_node_colors(domain_colors)
         # Transform colors to ensure proper alpha values and intensity
@@ -153,20 +177,24 @@ class NetworkGraph:
         # Initialize composite colors array with shape (number of nodes, 4) for RGBA
         composite_colors = np.zeros((num_nodes, 4))
         # Assign colors to nodes based on domain_colors
-        for domain_idx, nodes in self.domain_to_nodes.items():
-            color = domain_colors[domain_idx]
+        for domain_id, nodes in self.domain_id_to_node_ids_map.items():
+            color = domain_colors[domain_id]
             for node in nodes:
                 composite_colors[node] = color
         return composite_colors
     def _get_domain_colors(
-        self, color: Union[str, None] = None, random_seed: int = 888
+        self,
+        cmap: str = "gist_rainbow",
+        color: Union[str, None] = None,
+        random_seed: int = 888,
     ) -> Dict[str, Any]:
         """Get colors for each domain.
         Args:
-            color (Union[str, None], optional): Specific color to use for all domains. If specified, it will overwrite the colormap.
+            cmap (str, optional): The name of the colormap to use. Defaults to "gist_rainbow".
+            color (str or None, optional): A specific color to use for all generated colors. Defaults to None.
             random_seed (int, optional): Seed for random number generation. Defaults to 888.
         Returns:
@@ -178,9 +206,9 @@ class NetworkGraph:
         ]
         domains = np.sort(numeric_domains)
         domain_colors = _get_colors(
-            num_colors_to_generate=len(domains), color=color, random_seed=random_seed
+            num_colors_to_generate=len(domains), cmap=cmap, color=color, random_seed=random_seed
         )
-        return dict(zip(self.domain_to_nodes.keys(), domain_colors))
+        return dict(zip(self.domain_id_to_node_ids_map.keys(), domain_colors))
 def _transform_colors(
@@ -273,17 +301,17 @@ def _extract_node_coordinates(G: nx.Graph) -> np.ndarray:
 def _get_colors(
     num_colors_to_generate: int = 10,
-    cmap: str = "hsv",
-    random_seed: int = 888,
+    cmap: str = "gist_rainbow",
     color: Union[str, None] = None,
+    random_seed: int = 888,
 ) -> List[Tuple]:
     """Generate a list of RGBA colors from a specified colormap or use a direct color string.
     Args:
         num_colors_to_generate (int): The number of colors to generate. Defaults to 10.
-        cmap (str): The name of the colormap to use. Defaults to "hsv".
+        cmap (str, optional): The name of the colormap to use. Defaults to "gist_rainbow".
+        color (str or None, optional): A specific color to use for all generated colors.
         random_seed (int): Seed for random number generation. Defaults to 888.
-        color (str, optional): Specific color to use for all nodes. If specified, it will overwrite the colormap.
             Defaults to None.
     Returns:

risk/network/io.py CHANGED Viewed

@@ -48,6 +48,7 @@ class NetworkIO:
         self.min_edges_per_node = min_edges_per_node
         self.include_edge_weight = include_edge_weight
         self.weight_label = weight_label
+        # Log the initialization of the NetworkIO class
         params.log_network(
             compute_sphere=compute_sphere,
             surface_depth=surface_depth,
@@ -98,11 +99,14 @@ class NetworkIO:
             nx.Graph: Loaded and processed network.
         """
         filetype = "GPickle"
+        # Log the loading of the GPickle file
         params.log_network(filetype=filetype, filepath=filepath)
         self._log_loading(filetype, filepath=filepath)
         with open(filepath, "rb") as f:
             G = pickle.load(f)
+        # Initialize the graph
         return self._initialize_graph(G)
     @classmethod
@@ -147,8 +151,11 @@ class NetworkIO:
             nx.Graph: Processed network.
         """
         filetype = "NetworkX"
+        # Log the loading of the NetworkX graph
         params.log_network(filetype=filetype)
         self._log_loading(filetype)
+        # Initialize the graph
         return self._initialize_graph(network)
     @classmethod
@@ -213,8 +220,10 @@ class NetworkIO:
             nx.Graph: Loaded and processed network.
         """
         filetype = "Cytoscape"
+        # Log the loading of the Cytoscape file
         params.log_network(filetype=filetype, filepath=str(filepath))
         self._log_loading(filetype, filepath=filepath)
         cys_files = []
         tmp_dir = ".tmp_cytoscape"
         # Try / finally to remove unzipped files
@@ -295,6 +304,7 @@ class NetworkIO:
                     node
                 ]  # Assuming you have a dict `node_y_positions` for y coordinates
+            # Initialize the graph
             return self._initialize_graph(G)
         finally:
@@ -354,6 +364,7 @@ class NetworkIO:
             NetworkX graph: Loaded and processed network.
         """
         filetype = "Cytoscape JSON"
+        # Log the loading of the Cytoscape JSON file
         params.log_network(filetype=filetype, filepath=str(filepath))
         self._log_loading(filetype, filepath=filepath)
@@ -418,29 +429,37 @@ class NetworkIO:
         return G
     def _remove_invalid_graph_properties(self, G: nx.Graph) -> None:
-        """Remove invalid properties from the graph.
+        """Remove invalid properties from the graph, including self-loops, nodes with fewer edges than
+        the threshold, and isolated nodes.
         Args:
             G (nx.Graph): A NetworkX graph object.
         """
-        # First, Remove self-loop edges to ensure correct edge count
+        # Count number of nodes and edges before cleaning
+        num_initial_nodes = G.number_of_nodes()
+        num_initial_edges = G.number_of_edges()
+        # Remove self-loops to ensure correct edge count
         G.remove_edges_from(list(nx.selfloop_edges(G)))
-        # Then, iteratively remove nodes with fewer edges than the specified threshold
+        # Iteratively remove nodes with fewer edges than the threshold
         while True:
-            nodes_to_remove = [
-                node for node in G.nodes() if G.degree(node) < self.min_edges_per_node
-            ]
+            nodes_to_remove = [node for node in G.nodes if G.degree(node) < self.min_edges_per_node]
             if not nodes_to_remove:
-                break  # Exit loop if no more nodes to remove
-            # Remove the nodes and their associated edges
+                break  # Exit loop if no more nodes need removal
             G.remove_nodes_from(nodes_to_remove)
-        # Optionally: Remove any isolated nodes if needed
+        # Remove isolated nodes
         isolated_nodes = list(nx.isolates(G))
         if isolated_nodes:
             G.remove_nodes_from(isolated_nodes)
+        # Log the number of nodes and edges before and after cleaning
+        num_final_nodes = G.number_of_nodes()
+        num_final_edges = G.number_of_edges()
+        print(f"Initial node count: {num_initial_nodes}")
+        print(f"Final node count: {num_final_nodes}")
+        print(f"Initial edge count: {num_initial_edges}")
+        print(f"Final edge count: {num_final_edges}")
     def _assign_edge_weights(self, G: nx.Graph) -> None:
         """Assign weights to the edges in the graph.
@@ -502,6 +521,7 @@ class NetworkIO:
         print(f"Edge weight: {'Included' if self.include_edge_weight else 'Excluded'}")
         if self.include_edge_weight:
             print(f"Weight label: {self.weight_label}")
+        print(f"Minimum edges per node: {self.min_edges_per_node}")
         print(f"Projection: {'Sphere' if self.compute_sphere else 'Plane'}")
         if self.compute_sphere:
             print(f"Surface depth: {self.surface_depth}")

risk-network 0.0.5b6__py3-none-any.whl → 0.0.6__py3-none-any.whl

risk-network 0.0.5b6py3-none-any.whl → 0.0.6py3-none-any.whl