PyPI - risk-network - Versions diffs - 0.0.8b26__tar.gz → 0.0.9b1__tar.gz - Mend

risk-network 0.0.8b26tar.gz → 0.0.9b1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

{risk_network-0.0.8b26 → risk_network-0.0.9b1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: risk-network
-Version: 0.0.8b26
+Version: 0.0.9b1
 Summary: A Python package for biological network analysis
 Author: Ira Horecka
 Author-email: Ira Horecka <ira89@icloud.com>

{risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/__init__.py RENAMED Viewed

@@ -7,4 +7,4 @@ RISK: RISK Infers Spatial Kinships
 from risk.risk import RISK
-__version__ = "0.0.8-beta.26"
+__version__ = "0.0.9-beta.1"

{risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/annotations/annotations.py RENAMED Viewed

@@ -83,69 +83,69 @@ def load_annotations(network: nx.Graph, annotations_input: Dict[str, Any]) -> Di
 def define_top_annotations(
     network: nx.Graph,
     ordered_annotation_labels: List[str],
-    neighborhood_enrichment_sums: List[int],
-    significant_enrichment_matrix: np.ndarray,
-    significant_binary_enrichment_matrix: np.ndarray,
+    neighborhood_significance_sums: List[int],
+    significant_significance_matrix: np.ndarray,
+    significant_binary_significance_matrix: np.ndarray,
     min_cluster_size: int = 5,
     max_cluster_size: int = 1000,
 ) -> pd.DataFrame:
-    """Define top annotations based on neighborhood enrichment sums and binary enrichment matrix.
+    """Define top annotations based on neighborhood significance sums and binary significance matrix.
     Args:
         network (NetworkX graph): The network graph.
         ordered_annotation_labels (list of str): List of ordered annotation labels.
-        neighborhood_enrichment_sums (list of int): List of neighborhood enrichment sums.
-        significant_enrichment_matrix (np.ndarray): Enrichment matrix below alpha threshold.
-        significant_binary_enrichment_matrix (np.ndarray): Binary enrichment matrix below alpha threshold.
+        neighborhood_significance_sums (list of int): List of neighborhood significance sums.
+        significant_significance_matrix (np.ndarray): Enrichment matrix below alpha threshold.
+        significant_binary_significance_matrix (np.ndarray): Binary significance matrix below alpha threshold.
         min_cluster_size (int, optional): Minimum cluster size. Defaults to 5.
         max_cluster_size (int, optional): Maximum cluster size. Defaults to 1000.
     Returns:
         pd.DataFrame: DataFrame with top annotations and their properties.
     """
-    # Sum the columns of the significant enrichment matrix (positive floating point values)
-    significant_enrichment_scores = significant_enrichment_matrix.sum(axis=0)
-    # Create DataFrame to store annotations, their neighborhood enrichment sums, and enrichment scores
-    annotations_enrichment_matrix = pd.DataFrame(
+    # Sum the columns of the significant significance matrix (positive floating point values)
+    significant_significance_scores = significant_significance_matrix.sum(axis=0)
+    # Create DataFrame to store annotations, their neighborhood significance sums, and significance scores
+    annotations_significance_matrix = pd.DataFrame(
         {
             "id": range(len(ordered_annotation_labels)),
             "full_terms": ordered_annotation_labels,
-            "significant_neighborhood_enrichment_sums": neighborhood_enrichment_sums,
-            "significant_enrichment_score": significant_enrichment_scores,
+            "significant_neighborhood_significance_sums": neighborhood_significance_sums,
+            "significant_significance_score": significant_significance_scores,
         }
     )
-    annotations_enrichment_matrix["significant_annotations"] = False
+    annotations_significance_matrix["significant_annotations"] = False
     # Apply size constraints to identify potential significant annotations
-    annotations_enrichment_matrix.loc[
+    annotations_significance_matrix.loc[
         (
-            annotations_enrichment_matrix["significant_neighborhood_enrichment_sums"]
+            annotations_significance_matrix["significant_neighborhood_significance_sums"]
             >= min_cluster_size
         )
         & (
-            annotations_enrichment_matrix["significant_neighborhood_enrichment_sums"]
+            annotations_significance_matrix["significant_neighborhood_significance_sums"]
             <= max_cluster_size
         ),
         "significant_annotations",
     ] = True
     # Initialize columns for connected components analysis
-    annotations_enrichment_matrix["num_connected_components"] = 0
-    annotations_enrichment_matrix["size_connected_components"] = None
-    annotations_enrichment_matrix["size_connected_components"] = annotations_enrichment_matrix[
+    annotations_significance_matrix["num_connected_components"] = 0
+    annotations_significance_matrix["size_connected_components"] = None
+    annotations_significance_matrix["size_connected_components"] = annotations_significance_matrix[
         "size_connected_components"
     ].astype(object)
-    annotations_enrichment_matrix["num_large_connected_components"] = 0
+    annotations_significance_matrix["num_large_connected_components"] = 0
-    for attribute in annotations_enrichment_matrix.index.values[
-        annotations_enrichment_matrix["significant_annotations"]
+    for attribute in annotations_significance_matrix.index.values[
+        annotations_significance_matrix["significant_annotations"]
     ]:
-        # Identify enriched neighborhoods based on the binary enrichment matrix
-        enriched_neighborhoods = list(
-            compress(list(network), significant_binary_enrichment_matrix[:, attribute])
+        # Identify significant neighborhoods based on the binary significance matrix
+        significant_neighborhoods = list(
+            compress(list(network), significant_binary_significance_matrix[:, attribute])
         )
-        enriched_network = nx.subgraph(network, enriched_neighborhoods)
-        # Analyze connected components within the enriched subnetwork
+        significant_network = nx.subgraph(network, significant_neighborhoods)
+        # Analyze connected components within the significant subnetwork
         connected_components = sorted(
-            nx.connected_components(enriched_network), key=len, reverse=True
+            nx.connected_components(significant_network), key=len, reverse=True
         )
         size_connected_components = np.array([len(c) for c in connected_components])
@@ -159,23 +159,24 @@ def define_top_annotations(
         num_large_connected_components = len(filtered_size_connected_components)
         # Assign the number of connected components
-        annotations_enrichment_matrix.loc[attribute, "num_connected_components"] = (
+        annotations_significance_matrix.loc[attribute, "num_connected_components"] = (
             num_connected_components
         )
         # Filter out attributes with more than one connected component
-        annotations_enrichment_matrix.loc[
-            annotations_enrichment_matrix["num_connected_components"] > 1, "significant_annotations"
+        annotations_significance_matrix.loc[
+            annotations_significance_matrix["num_connected_components"] > 1,
+            "significant_annotations",
         ] = False
         # Assign the number of large connected components
-        annotations_enrichment_matrix.loc[attribute, "num_large_connected_components"] = (
+        annotations_significance_matrix.loc[attribute, "num_large_connected_components"] = (
             num_large_connected_components
         )
         # Assign the size of connected components, ensuring it is always a list
-        annotations_enrichment_matrix.at[attribute, "size_connected_components"] = (
+        annotations_significance_matrix.at[attribute, "size_connected_components"] = (
             filtered_size_connected_components.tolist()
         )
-    return annotations_enrichment_matrix
+    return annotations_significance_matrix
 def get_weighted_description(words_column: pd.Series, scores_column: pd.Series) -> str:
@@ -184,16 +185,16 @@ def get_weighted_description(words_column: pd.Series, scores_column: pd.Series)
     Args:
         words_column (pd.Series): A pandas Series containing strings to process.
-        scores_column (pd.Series): A pandas Series containing enrichment scores to weigh the terms.
+        scores_column (pd.Series): A pandas Series containing significance scores to weigh the terms.
     Returns:
-        str: A coherent description formed from the most frequent and significant words, weighed by enrichment scores.
+        str: A coherent description formed from the most frequent and significant words, weighed by significance scores.
     """
     # Handle case where all scores are the same
     if scores_column.max() == scores_column.min():
         normalized_scores = pd.Series([1] * len(scores_column))
     else:
-        # Normalize the enrichment scores to be between 0 and 1
+        # Normalize the significance scores to be between 0 and 1
         normalized_scores = (scores_column - scores_column.min()) / (
             scores_column.max() - scores_column.min()
         )

{risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/log/__init__.py RENAMED Viewed

@@ -3,7 +3,7 @@ risk/log
 ~~~~~~~~
 """
-from .config import logger, log_header, set_global_verbosity
+from .console import logger, log_header, set_global_verbosity
 from .params import Params
 params = Params()

risk_network-0.0.8b26/risk/log/config.py → risk_network-0.0.9b1/risk/log/console.py RENAMED Viewed

@@ -1,6 +1,6 @@
 """
-risk/log/config
-~~~~~~~~~~~~~~~
+risk/log/console
+~~~~~~~~~~~~~~~~
 """
 import logging

risk_network-0.0.9b1/risk/log/enrichment.py ADDED Viewed

@@ -0,0 +1,18 @@
+"""
+risk/log/enrichment
+~~~~~~~~~~~~~~~~~~~
+"""
+import csv
+import json
+import warnings
+from datetime import datetime
+from functools import wraps
+from typing import Any, Dict
+import numpy as np
+from .console import logger, log_header
+# Suppress all warnings - this is to resolve warnings from multiprocessing
+warnings.filterwarnings("ignore")

{risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/log/params.py RENAMED Viewed

@@ -12,7 +12,7 @@ from typing import Any, Dict
 import numpy as np
-from .config import logger, log_header
+from .console import logger, log_header
 # Suppress all warnings - this is to resolve warnings from multiprocessing
 warnings.filterwarnings("ignore")

{risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/neighborhoods/domains.py RENAMED Viewed

@@ -20,17 +20,17 @@ from risk.log import logger
 def define_domains(
     top_annotations: pd.DataFrame,
-    significant_neighborhoods_enrichment: np.ndarray,
+    significant_neighborhoods_significance: np.ndarray,
     linkage_criterion: str,
     linkage_method: str,
     linkage_metric: str,
 ) -> pd.DataFrame:
-    """Define domains and assign nodes to these domains based on their enrichment scores and clustering,
+    """Define domains and assign nodes to these domains based on their significance scores and clustering,
     handling errors by assigning unique domains when clustering fails.
     Args:
         top_annotations (pd.DataFrame): DataFrame of top annotations data for the network nodes.
-        significant_neighborhoods_enrichment (np.ndarray): The binary enrichment matrix below alpha.
+        significant_neighborhoods_significance (np.ndarray): The binary significance matrix below alpha.
         linkage_criterion (str): The clustering criterion for defining groups.
         linkage_method (str): The linkage method for clustering.
         linkage_metric (str): The linkage metric for clustering.
@@ -40,7 +40,7 @@ def define_domains(
     """
     try:
         # Transpose the matrix to cluster annotations
-        m = significant_neighborhoods_enrichment[:, top_annotations["significant_annotations"]].T
+        m = significant_neighborhoods_significance[:, top_annotations["significant_annotations"]].T
         best_linkage, best_metric, best_threshold = _optimize_silhouette_across_linkage_and_metrics(
             m, linkage_criterion, linkage_method, linkage_metric
         )
@@ -65,13 +65,13 @@ def define_domains(
         top_annotations["domain"] = range(1, n_rows + 1)  # Assign unique domains
     # Create DataFrames to store domain information
-    node_to_enrichment = pd.DataFrame(
-        data=significant_neighborhoods_enrichment,
+    node_to_significance = pd.DataFrame(
+        data=significant_neighborhoods_significance,
         columns=[top_annotations.index.values, top_annotations["domain"]],
     )
-    node_to_domain = node_to_enrichment.groupby(level="domain", axis=1).sum()
+    node_to_domain = node_to_significance.groupby(level="domain", axis=1).sum()
-    # Find the maximum enrichment score for each node
+    # Find the maximum significance score for each node
     t_max = node_to_domain.loc[:, 1:].max(axis=1)
     t_idxmax = node_to_domain.loc[:, 1:].idxmax(axis=1)
     t_idxmax[t_max == 0] = 0
@@ -119,27 +119,27 @@ def trim_domains_and_top_annotations(
     top_annotations["domain"].replace(to_remove, invalid_domain_id, inplace=True)
     domains.loc[domains["primary_domain"].isin(to_remove), ["primary_domain"]] = invalid_domain_id
-    # Normalize "num enriched neighborhoods" by percentile for each domain and scale to 0-10
+    # Normalize "num significant neighborhoods" by percentile for each domain and scale to 0-10
     top_annotations["normalized_value"] = top_annotations.groupby("domain")[
-        "significant_neighborhood_enrichment_sums"
+        "significant_neighborhood_significance_sums"
     ].transform(lambda x: (x.rank(pct=True) * 10).apply(np.ceil).astype(int))
-    # Modify the lambda function to pass both full_terms and significant_enrichment_score
+    # Modify the lambda function to pass both full_terms and significant_significance_score
     top_annotations["combined_terms"] = top_annotations.apply(
         lambda row: " ".join([str(row["full_terms"])] * row["normalized_value"]), axis=1
     )
-    # Perform the groupby operation while retaining the other columns and adding the weighting with enrichment scores
+    # Perform the groupby operation while retaining the other columns and adding the weighting with significance scores
     domain_labels = (
         top_annotations.groupby("domain")
         .agg(
             full_terms=("full_terms", lambda x: list(x)),
-            enrichment_scores=("significant_enrichment_score", lambda x: list(x)),
+            significance_scores=("significant_significance_score", lambda x: list(x)),
         )
         .reset_index()
     )
     domain_labels["combined_terms"] = domain_labels.apply(
         lambda row: get_weighted_description(
-            pd.Series(row["full_terms"]), pd.Series(row["enrichment_scores"])
+            pd.Series(row["full_terms"]), pd.Series(row["significance_scores"])
         ),
         axis=1,
     )
@@ -150,7 +150,7 @@ def trim_domains_and_top_annotations(
             "domain": "id",
             "combined_terms": "normalized_description",
             "full_terms": "full_descriptions",
-            "enrichment_scores": "enrichment_scores",
+            "significance_scores": "significance_scores",
         }
     ).set_index("id")

risk-network 0.0.8b26__tar.gz → 0.0.9b1__tar.gz

risk-network 0.0.8b26tar.gz → 0.0.9b1tar.gz