PyPI - risk-network - Versions diffs - 0.0.8b27__py3-none-any.whl → 0.0.9b2__py3-none-any.whl - Mend

risk-network 0.0.8b27py3-none-any.whl → 0.0.9b2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

risk/__init__.py +1 -1
risk/annotations/annotations.py +39 -38
risk/annotations/io.py +8 -6
risk/log/__init__.py +3 -1
risk/log/{params.py → parameters.py} +9 -34
risk/neighborhoods/domains.py +18 -18
risk/neighborhoods/neighborhoods.py +104 -92
risk/network/graph/__init__.py +6 -0
risk/network/{graph.py → graph/network.py} +38 -27
risk/network/graph/summary.py +239 -0
risk/network/io.py +3 -3
risk/network/plot/contour.py +1 -1
risk/network/plot/labels.py +1 -1
risk/network/plot/network.py +28 -28
risk/network/plot/utils/color.py +27 -27
risk/risk.py +25 -30
risk/stats/stats.py +13 -13
{risk_network-0.0.8b27.dist-info → risk_network-0.0.9b2.dist-info}/METADATA +1 -1
risk_network-0.0.9b2.dist-info/RECORD +39 -0
risk_network-0.0.8b27.dist-info/RECORD +0 -37
{risk_network-0.0.8b27.dist-info → risk_network-0.0.9b2.dist-info}/LICENSE +0 -0
{risk_network-0.0.8b27.dist-info → risk_network-0.0.9b2.dist-info}/WHEEL +0 -0
{risk_network-0.0.8b27.dist-info → risk_network-0.0.9b2.dist-info}/top_level.txt +0 -0

risk/__init__.py CHANGED Viewed

@@ -7,4 +7,4 @@ RISK: RISK Infers Spatial Kinships
 from risk.risk import RISK
-__version__ = "0.0.8-beta.27"
+__version__ = "0.0.9-beta.2"

risk/annotations/annotations.py CHANGED Viewed

@@ -83,69 +83,69 @@ def load_annotations(network: nx.Graph, annotations_input: Dict[str, Any]) -> Di
 def define_top_annotations(
     network: nx.Graph,
     ordered_annotation_labels: List[str],
-    neighborhood_enrichment_sums: List[int],
-    significant_enrichment_matrix: np.ndarray,
-    significant_binary_enrichment_matrix: np.ndarray,
+    neighborhood_significance_sums: List[int],
+    significant_significance_matrix: np.ndarray,
+    significant_binary_significance_matrix: np.ndarray,
     min_cluster_size: int = 5,
     max_cluster_size: int = 1000,
 ) -> pd.DataFrame:
-    """Define top annotations based on neighborhood enrichment sums and binary enrichment matrix.
+    """Define top annotations based on neighborhood significance sums and binary significance matrix.
     Args:
         network (NetworkX graph): The network graph.
         ordered_annotation_labels (list of str): List of ordered annotation labels.
-        neighborhood_enrichment_sums (list of int): List of neighborhood enrichment sums.
-        significant_enrichment_matrix (np.ndarray): Enrichment matrix below alpha threshold.
-        significant_binary_enrichment_matrix (np.ndarray): Binary enrichment matrix below alpha threshold.
+        neighborhood_significance_sums (list of int): List of neighborhood significance sums.
+        significant_significance_matrix (np.ndarray): Enrichment matrix below alpha threshold.
+        significant_binary_significance_matrix (np.ndarray): Binary significance matrix below alpha threshold.
         min_cluster_size (int, optional): Minimum cluster size. Defaults to 5.
         max_cluster_size (int, optional): Maximum cluster size. Defaults to 1000.
     Returns:
         pd.DataFrame: DataFrame with top annotations and their properties.
     """
-    # Sum the columns of the significant enrichment matrix (positive floating point values)
-    significant_enrichment_scores = significant_enrichment_matrix.sum(axis=0)
-    # Create DataFrame to store annotations, their neighborhood enrichment sums, and enrichment scores
-    annotations_enrichment_matrix = pd.DataFrame(
+    # Sum the columns of the significant significance matrix (positive floating point values)
+    significant_significance_scores = significant_significance_matrix.sum(axis=0)
+    # Create DataFrame to store annotations, their neighborhood significance sums, and significance scores
+    annotations_significance_matrix = pd.DataFrame(
         {
             "id": range(len(ordered_annotation_labels)),
             "full_terms": ordered_annotation_labels,
-            "significant_neighborhood_enrichment_sums": neighborhood_enrichment_sums,
-            "significant_enrichment_score": significant_enrichment_scores,
+            "significant_neighborhood_significance_sums": neighborhood_significance_sums,
+            "significant_significance_score": significant_significance_scores,
         }
     )
-    annotations_enrichment_matrix["significant_annotations"] = False
+    annotations_significance_matrix["significant_annotations"] = False
     # Apply size constraints to identify potential significant annotations
-    annotations_enrichment_matrix.loc[
+    annotations_significance_matrix.loc[
         (
-            annotations_enrichment_matrix["significant_neighborhood_enrichment_sums"]
+            annotations_significance_matrix["significant_neighborhood_significance_sums"]
             >= min_cluster_size
         )
         & (
-            annotations_enrichment_matrix["significant_neighborhood_enrichment_sums"]
+            annotations_significance_matrix["significant_neighborhood_significance_sums"]
             <= max_cluster_size
         ),
         "significant_annotations",
     ] = True
     # Initialize columns for connected components analysis
-    annotations_enrichment_matrix["num_connected_components"] = 0
-    annotations_enrichment_matrix["size_connected_components"] = None
-    annotations_enrichment_matrix["size_connected_components"] = annotations_enrichment_matrix[
+    annotations_significance_matrix["num_connected_components"] = 0
+    annotations_significance_matrix["size_connected_components"] = None
+    annotations_significance_matrix["size_connected_components"] = annotations_significance_matrix[
         "size_connected_components"
     ].astype(object)
-    annotations_enrichment_matrix["num_large_connected_components"] = 0
+    annotations_significance_matrix["num_large_connected_components"] = 0
-    for attribute in annotations_enrichment_matrix.index.values[
-        annotations_enrichment_matrix["significant_annotations"]
+    for attribute in annotations_significance_matrix.index.values[
+        annotations_significance_matrix["significant_annotations"]
     ]:
-        # Identify enriched neighborhoods based on the binary enrichment matrix
-        enriched_neighborhoods = list(
-            compress(list(network), significant_binary_enrichment_matrix[:, attribute])
+        # Identify significant neighborhoods based on the binary significance matrix
+        significant_neighborhoods = list(
+            compress(list(network), significant_binary_significance_matrix[:, attribute])
         )
-        enriched_network = nx.subgraph(network, enriched_neighborhoods)
-        # Analyze connected components within the enriched subnetwork
+        significant_network = nx.subgraph(network, significant_neighborhoods)
+        # Analyze connected components within the significant subnetwork
         connected_components = sorted(
-            nx.connected_components(enriched_network), key=len, reverse=True
+            nx.connected_components(significant_network), key=len, reverse=True
         )
         size_connected_components = np.array([len(c) for c in connected_components])
@@ -159,23 +159,24 @@ def define_top_annotations(
         num_large_connected_components = len(filtered_size_connected_components)
         # Assign the number of connected components
-        annotations_enrichment_matrix.loc[attribute, "num_connected_components"] = (
+        annotations_significance_matrix.loc[attribute, "num_connected_components"] = (
             num_connected_components
         )
         # Filter out attributes with more than one connected component
-        annotations_enrichment_matrix.loc[
-            annotations_enrichment_matrix["num_connected_components"] > 1, "significant_annotations"
+        annotations_significance_matrix.loc[
+            annotations_significance_matrix["num_connected_components"] > 1,
+            "significant_annotations",
         ] = False
         # Assign the number of large connected components
-        annotations_enrichment_matrix.loc[attribute, "num_large_connected_components"] = (
+        annotations_significance_matrix.loc[attribute, "num_large_connected_components"] = (
             num_large_connected_components
         )
         # Assign the size of connected components, ensuring it is always a list
-        annotations_enrichment_matrix.at[attribute, "size_connected_components"] = (
+        annotations_significance_matrix.at[attribute, "size_connected_components"] = (
             filtered_size_connected_components.tolist()
         )
-    return annotations_enrichment_matrix
+    return annotations_significance_matrix
 def get_weighted_description(words_column: pd.Series, scores_column: pd.Series) -> str:
@@ -184,16 +185,16 @@ def get_weighted_description(words_column: pd.Series, scores_column: pd.Series)
     Args:
         words_column (pd.Series): A pandas Series containing strings to process.
-        scores_column (pd.Series): A pandas Series containing enrichment scores to weigh the terms.
+        scores_column (pd.Series): A pandas Series containing significance scores to weigh the terms.
     Returns:
-        str: A coherent description formed from the most frequent and significant words, weighed by enrichment scores.
+        str: A coherent description formed from the most frequent and significant words, weighed by significance scores.
     """
     # Handle case where all scores are the same
     if scores_column.max() == scores_column.min():
         normalized_scores = pd.Series([1] * len(scores_column))
     else:
-        # Normalize the enrichment scores to be between 0 and 1
+        # Normalize the significance scores to be between 0 and 1
         normalized_scores = (scores_column - scores_column.min()) / (
             scores_column.max() - scores_column.min()
         )

risk/annotations/io.py CHANGED Viewed

@@ -76,11 +76,13 @@ class AnnotationsIO:
         _log_loading(filetype, filepath=filepath)
         # Load the specified sheet from the Excel file
-        df = pd.read_excel(filepath, sheet_name=sheet_name)
+        annotation = pd.read_excel(filepath, sheet_name=sheet_name)
         # Split the nodes column by the specified nodes_delimiter
-        df[nodes_colname] = df[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
+        annotation[nodes_colname] = annotation[nodes_colname].apply(
+            lambda x: x.split(nodes_delimiter)
+        )
         # Convert the DataFrame to a dictionary pairing labels with their corresponding nodes
-        label_node_dict = df.set_index(label_colname)[nodes_colname].to_dict()
+        label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
         # Load the annotations into the provided network
         return load_annotations(network, label_node_dict)
@@ -203,11 +205,11 @@ def _load_matrix_file(
         Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes.
     """
     # Load the CSV or TSV file into a DataFrame
-    df = pd.read_csv(filepath, delimiter=delimiter)
+    annotation = pd.read_csv(filepath, delimiter=delimiter)
     # Split the nodes column by the nodes_delimiter to handle multiple nodes per label
-    df[nodes_colname] = df[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
+    annotation[nodes_colname] = annotation[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
     # Create a dictionary pairing labels with their corresponding list of nodes
-    label_node_dict = df.set_index(label_colname)[nodes_colname].to_dict()
+    label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
     return label_node_dict

risk/log/__init__.py CHANGED Viewed

@@ -4,6 +4,8 @@ risk/log
 """
 from .console import logger, log_header, set_global_verbosity
-from .params import Params
+from .parameters import Params
+# Initialize the global parameters logger
 params = Params()
+params.initialize()

risk/log/{params.py → parameters.py} RENAMED Viewed

@@ -1,50 +1,22 @@
 """
-risk/log/params
-~~~~~~~~~~~~~~~
+risk/log/parameters
+~~~~~~~~~~~~~~~~~~~
 """
 import csv
 import json
 import warnings
 from datetime import datetime
-from functools import wraps
 from typing import Any, Dict
 import numpy as np
-from .console import logger, log_header
+from risk.log.console import logger, log_header
 # Suppress all warnings - this is to resolve warnings from multiprocessing
 warnings.filterwarnings("ignore")
-def _safe_param_export(func):
-    """A decorator to wrap parameter export functions in a try-except block for safe execution.
-    Args:
-        func (function): The function to be wrapped.
-    Returns:
-        function: The wrapped function with error handling.
-    """
-    @wraps(func)
-    def wrapper(*args, **kwargs):
-        try:
-            result = func(*args, **kwargs)
-            filepath = (
-                kwargs.get("filepath") or args[1]
-            )  # Assuming filepath is always the second argument
-            logger.info(f"Parameters successfully exported to filepath: {filepath}")
-            return result
-        except Exception as e:
-            filepath = kwargs.get("filepath") or args[1]
-            logger.error(f"An error occurred while exporting parameters to {filepath}: {e}")
-            return None
-    return wrapper
 class Params:
     """Handles the storage and logging of various parameters for network analysis.
@@ -106,7 +78,6 @@ class Params:
         """
         self.plotter = {**self.plotter, **kwargs}
-    @_safe_param_export
     def to_csv(self, filepath: str) -> None:
         """Export the parameters to a CSV file.
@@ -128,7 +99,8 @@ class Params:
                 else:
                     writer.writerow([parent_key, "", parent_value])
-    @_safe_param_export
+        logger.info(f"Parameters exported to CSV file: {filepath}")
     def to_json(self, filepath: str) -> None:
         """Export the parameters to a JSON file.
@@ -138,7 +110,8 @@ class Params:
         with open(filepath, "w") as json_file:
             json.dump(self.load(), json_file, indent=4)
-    @_safe_param_export
+        logger.info(f"Parameters exported to JSON file: {filepath}")
     def to_txt(self, filepath: str) -> None:
         """Export the parameters to a text file.
@@ -155,6 +128,8 @@ class Params:
             # Add a blank line after each entry
             txt_file.write("\n")
+        logger.info(f"Parameters exported to text file: {filepath}")
     def load(self) -> Dict[str, Any]:
         """Load and process various parameters, converting any np.ndarray values to lists.

risk/neighborhoods/domains.py CHANGED Viewed

@@ -20,17 +20,17 @@ from risk.log import logger
 def define_domains(
     top_annotations: pd.DataFrame,
-    significant_neighborhoods_enrichment: np.ndarray,
+    significant_neighborhoods_significance: np.ndarray,
     linkage_criterion: str,
     linkage_method: str,
     linkage_metric: str,
 ) -> pd.DataFrame:
-    """Define domains and assign nodes to these domains based on their enrichment scores and clustering,
+    """Define domains and assign nodes to these domains based on their significance scores and clustering,
     handling errors by assigning unique domains when clustering fails.
     Args:
         top_annotations (pd.DataFrame): DataFrame of top annotations data for the network nodes.
-        significant_neighborhoods_enrichment (np.ndarray): The binary enrichment matrix below alpha.
+        significant_neighborhoods_significance (np.ndarray): The binary significance matrix below alpha.
         linkage_criterion (str): The clustering criterion for defining groups.
         linkage_method (str): The linkage method for clustering.
         linkage_metric (str): The linkage metric for clustering.
@@ -40,7 +40,7 @@ def define_domains(
     """
     try:
         # Transpose the matrix to cluster annotations
-        m = significant_neighborhoods_enrichment[:, top_annotations["significant_annotations"]].T
+        m = significant_neighborhoods_significance[:, top_annotations["significant_annotations"]].T
         best_linkage, best_metric, best_threshold = _optimize_silhouette_across_linkage_and_metrics(
             m, linkage_criterion, linkage_method, linkage_metric
         )
@@ -65,13 +65,13 @@ def define_domains(
         top_annotations["domain"] = range(1, n_rows + 1)  # Assign unique domains
     # Create DataFrames to store domain information
-    node_to_enrichment = pd.DataFrame(
-        data=significant_neighborhoods_enrichment,
+    node_to_significance = pd.DataFrame(
+        data=significant_neighborhoods_significance,
         columns=[top_annotations.index.values, top_annotations["domain"]],
     )
-    node_to_domain = node_to_enrichment.groupby(level="domain", axis=1).sum()
+    node_to_domain = node_to_significance.groupby(level="domain", axis=1).sum()
-    # Find the maximum enrichment score for each node
+    # Find the maximum significance score for each node
     t_max = node_to_domain.loc[:, 1:].max(axis=1)
     t_idxmax = node_to_domain.loc[:, 1:].idxmax(axis=1)
     t_idxmax[t_max == 0] = 0
@@ -101,7 +101,7 @@ def trim_domains_and_top_annotations(
         max_cluster_size (int, optional): Maximum size of a cluster to be retained. Defaults to 1000.
     Returns:
-        Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: A tuple containing:
+        Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
             - Trimmed annotations (pd.DataFrame)
             - Trimmed domains (pd.DataFrame)
             - A DataFrame with domain labels (pd.DataFrame)
@@ -119,27 +119,27 @@ def trim_domains_and_top_annotations(
     top_annotations["domain"].replace(to_remove, invalid_domain_id, inplace=True)
     domains.loc[domains["primary_domain"].isin(to_remove), ["primary_domain"]] = invalid_domain_id
-    # Normalize "num enriched neighborhoods" by percentile for each domain and scale to 0-10
+    # Normalize "num significant neighborhoods" by percentile for each domain and scale to 0-10
     top_annotations["normalized_value"] = top_annotations.groupby("domain")[
-        "significant_neighborhood_enrichment_sums"
+        "significant_neighborhood_significance_sums"
     ].transform(lambda x: (x.rank(pct=True) * 10).apply(np.ceil).astype(int))
-    # Modify the lambda function to pass both full_terms and significant_enrichment_score
+    # Modify the lambda function to pass both full_terms and significant_significance_score
     top_annotations["combined_terms"] = top_annotations.apply(
         lambda row: " ".join([str(row["full_terms"])] * row["normalized_value"]), axis=1
     )
-    # Perform the groupby operation while retaining the other columns and adding the weighting with enrichment scores
+    # Perform the groupby operation while retaining the other columns and adding the weighting with significance scores
     domain_labels = (
         top_annotations.groupby("domain")
         .agg(
             full_terms=("full_terms", lambda x: list(x)),
-            enrichment_scores=("significant_enrichment_score", lambda x: list(x)),
+            significance_scores=("significant_significance_score", lambda x: list(x)),
         )
         .reset_index()
     )
     domain_labels["combined_terms"] = domain_labels.apply(
         lambda row: get_weighted_description(
-            pd.Series(row["full_terms"]), pd.Series(row["enrichment_scores"])
+            pd.Series(row["full_terms"]), pd.Series(row["significance_scores"])
         ),
         axis=1,
     )
@@ -150,7 +150,7 @@ def trim_domains_and_top_annotations(
             "domain": "id",
             "combined_terms": "normalized_description",
             "full_terms": "full_descriptions",
-            "enrichment_scores": "enrichment_scores",
+            "significance_scores": "significance_scores",
         }
     ).set_index("id")
@@ -177,7 +177,7 @@ def _optimize_silhouette_across_linkage_and_metrics(
         linkage_metric (str): Linkage metric for clustering.
     Returns:
-        Tuple[str, str, float]: A tuple containing:
+        Tuple[str, str, float]:
             - Best linkage method (str)
             - Best linkage metric (str)
             - Best threshold (float)
@@ -231,7 +231,7 @@ def _find_best_silhouette_score(
         resolution (float, optional): Desired resolution for the best threshold. Defaults to 0.001.
     Returns:
-        Tuple[float, float]: A tuple containing:
+        Tuple[float, float]:
             - Best threshold (float): The threshold that yields the best silhouette score.
             - Best silhouette score (float): The highest silhouette score achieved.
     """

risk-network 0.0.8b27__py3-none-any.whl → 0.0.9b2__py3-none-any.whl

risk-network 0.0.8b27py3-none-any.whl → 0.0.9b2py3-none-any.whl