PyPI - risk-network - Versions diffs - 0.0.11__py3-none-any.whl → 0.0.12__py3-none-any.whl - Mend

risk-network 0.0.11py3-none-any.whl → 0.0.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

risk/__init__.py +1 -1
risk/annotation/__init__.py +10 -0
risk/{annotations/annotations.py → annotation/annotation.py} +44 -44
risk/{annotations → annotation}/io.py +93 -92
risk/{annotations → annotation}/nltk_setup.py +6 -5
risk/log/__init__.py +1 -1
risk/log/parameters.py +26 -27
risk/neighborhoods/__init__.py +0 -1
risk/neighborhoods/api.py +38 -38
risk/neighborhoods/community.py +33 -4
risk/neighborhoods/domains.py +26 -28
risk/neighborhoods/neighborhoods.py +8 -2
risk/neighborhoods/stats/__init__.py +13 -0
risk/neighborhoods/stats/permutation/__init__.py +6 -0
risk/{stats → neighborhoods/stats}/permutation/permutation.py +24 -21
risk/{stats → neighborhoods/stats}/permutation/test_functions.py +4 -4
risk/{stats/stat_tests.py → neighborhoods/stats/tests.py} +62 -54
risk/network/__init__.py +0 -2
risk/network/graph/__init__.py +0 -2
risk/network/graph/api.py +19 -19
risk/network/graph/graph.py +73 -68
risk/{stats/significance.py → network/graph/stats.py} +2 -2
risk/network/graph/summary.py +12 -13
risk/network/io.py +163 -20
risk/network/plotter/__init__.py +0 -2
risk/network/plotter/api.py +1 -1
risk/network/plotter/canvas.py +36 -36
risk/network/plotter/contour.py +14 -15
risk/network/plotter/labels.py +303 -294
risk/network/plotter/network.py +6 -6
risk/network/plotter/plotter.py +8 -10
risk/network/plotter/utils/colors.py +15 -8
risk/network/plotter/utils/layout.py +3 -3
risk/risk.py +6 -6
risk_network-0.0.12.dist-info/METADATA +122 -0
risk_network-0.0.12.dist-info/RECORD +40 -0
{risk_network-0.0.11.dist-info → risk_network-0.0.12.dist-info}/WHEEL +1 -1
risk/annotations/__init__.py +0 -7
risk/network/geometry.py +0 -150
risk/stats/__init__.py +0 -15
risk/stats/permutation/__init__.py +0 -6
risk_network-0.0.11.dist-info/METADATA +0 -798
risk_network-0.0.11.dist-info/RECORD +0 -41
{risk_network-0.0.11.dist-info → risk_network-0.0.12.dist-info/licenses}/LICENSE +0 -0
{risk_network-0.0.11.dist-info → risk_network-0.0.12.dist-info}/top_level.txt +0 -0

risk/neighborhoods/__init__.py CHANGED Viewed

@@ -4,5 +4,4 @@ risk/neighborhoods
 """
 from risk.neighborhoods.domains import define_domains, trim_domains
-from risk.neighborhoods.api import NeighborhoodsAPI
 from risk.neighborhoods.neighborhoods import process_neighborhoods

risk/neighborhoods/api.py CHANGED Viewed

@@ -10,9 +10,9 @@ import networkx as nx
 import numpy as np
 from scipy.sparse import csr_matrix
-from risk.log import logger, log_header, params
+from risk.log import log_header, logger, params
 from risk.neighborhoods.neighborhoods import get_network_neighborhoods
-from risk.stats import (
+from risk.neighborhoods.stats import (
     compute_binom_test,
     compute_chi2_test,
     compute_hypergeom_test,
@@ -28,13 +28,13 @@ class NeighborhoodsAPI:
     The NeighborhoodsAPI class provides methods to load neighborhood results from statistical tests.
     """
-    def __init__() -> None:
+    def __init__(self) -> None:
         pass
-    def load_neighborhoods_by_binom(
+    def load_neighborhoods_binom(
         self,
         network: nx.Graph,
-        annotations: Dict[str, Any],
+        annotation: Dict[str, Any],
         distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
         louvain_resolution: float = 0.1,
         leiden_resolution: float = 1.0,
@@ -46,7 +46,7 @@ class NeighborhoodsAPI:
         Args:
             network (nx.Graph): The network graph.
-            annotations (Dict[str, Any]): The annotations associated with the network.
+            annotation (Dict[str, Any]): The annotation associated with the network.
             distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
                 metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
                 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -55,7 +55,7 @@ class NeighborhoodsAPI:
             fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
                 Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
                 Defaults to 0.5.
-            null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+            null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
             random_seed (int, optional): Seed for random number generation. Defaults to 888.
         Returns:
@@ -65,7 +65,7 @@ class NeighborhoodsAPI:
         # Compute neighborhood significance using the binomial test
         return self._load_neighborhoods_by_statistical_test(
             network=network,
-            annotations=annotations,
+            annotation=annotation,
             distance_metric=distance_metric,
             louvain_resolution=louvain_resolution,
             leiden_resolution=leiden_resolution,
@@ -76,10 +76,10 @@ class NeighborhoodsAPI:
             statistical_test_function=compute_binom_test,
         )
-    def load_neighborhoods_by_chi2(
+    def load_neighborhoods_chi2(
         self,
         network: nx.Graph,
-        annotations: Dict[str, Any],
+        annotation: Dict[str, Any],
         distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
         louvain_resolution: float = 0.1,
         leiden_resolution: float = 1.0,
@@ -91,7 +91,7 @@ class NeighborhoodsAPI:
         Args:
             network (nx.Graph): The network graph.
-            annotations (Dict[str, Any]): The annotations associated with the network.
+            annotation (Dict[str, Any]): The annotation associated with the network.
             distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
                 metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
                 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -100,7 +100,7 @@ class NeighborhoodsAPI:
             fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
                 Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
                 Defaults to 0.5.
-            null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+            null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
             random_seed (int, optional): Seed for random number generation. Defaults to 888.
         Returns:
@@ -110,7 +110,7 @@ class NeighborhoodsAPI:
         # Compute neighborhood significance using the chi-squared test
         return self._load_neighborhoods_by_statistical_test(
             network=network,
-            annotations=annotations,
+            annotation=annotation,
             distance_metric=distance_metric,
             louvain_resolution=louvain_resolution,
             leiden_resolution=leiden_resolution,
@@ -121,10 +121,10 @@ class NeighborhoodsAPI:
             statistical_test_function=compute_chi2_test,
         )
-    def load_neighborhoods_by_hypergeom(
+    def load_neighborhoods_hypergeom(
         self,
         network: nx.Graph,
-        annotations: Dict[str, Any],
+        annotation: Dict[str, Any],
         distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
         louvain_resolution: float = 0.1,
         leiden_resolution: float = 1.0,
@@ -136,7 +136,7 @@ class NeighborhoodsAPI:
         Args:
             network (nx.Graph): The network graph.
-            annotations (Dict[str, Any]): The annotations associated with the network.
+            annotation (Dict[str, Any]): The annotation associated with the network.
             distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
                 metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
                 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -145,7 +145,7 @@ class NeighborhoodsAPI:
             fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
                 Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
                 Defaults to 0.5.
-            null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+            null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
             random_seed (int, optional): Seed for random number generation. Defaults to 888.
         Returns:
@@ -155,7 +155,7 @@ class NeighborhoodsAPI:
         # Compute neighborhood significance using the hypergeometric test
         return self._load_neighborhoods_by_statistical_test(
             network=network,
-            annotations=annotations,
+            annotation=annotation,
             distance_metric=distance_metric,
             louvain_resolution=louvain_resolution,
             leiden_resolution=leiden_resolution,
@@ -166,10 +166,10 @@ class NeighborhoodsAPI:
             statistical_test_function=compute_hypergeom_test,
         )
-    def load_neighborhoods_by_permutation(
+    def load_neighborhoods_permutation(
         self,
         network: nx.Graph,
-        annotations: Dict[str, Any],
+        annotation: Dict[str, Any],
         distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
         louvain_resolution: float = 0.1,
         leiden_resolution: float = 1.0,
@@ -184,7 +184,7 @@ class NeighborhoodsAPI:
         Args:
             network (nx.Graph): The network graph.
-            annotations (Dict[str, Any]): The annotations associated with the network.
+            annotation (Dict[str, Any]): The annotation associated with the network.
             distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
                 metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
                 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -194,7 +194,7 @@ class NeighborhoodsAPI:
                 Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
                 Defaults to 0.5.
             score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
-            null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+            null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
             num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
             random_seed (int, optional): Seed for random number generation. Defaults to 888.
             max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
@@ -210,7 +210,7 @@ class NeighborhoodsAPI:
         # Compute neighborhood significance using the permutation test
         return self._load_neighborhoods_by_statistical_test(
             network=network,
-            annotations=annotations,
+            annotation=annotation,
             distance_metric=distance_metric,
             louvain_resolution=louvain_resolution,
             leiden_resolution=leiden_resolution,
@@ -224,10 +224,10 @@ class NeighborhoodsAPI:
             max_workers=max_workers,
         )
-    def load_neighborhoods_by_poisson(
+    def load_neighborhoods_poisson(
         self,
         network: nx.Graph,
-        annotations: Dict[str, Any],
+        annotation: Dict[str, Any],
         distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
         louvain_resolution: float = 0.1,
         leiden_resolution: float = 1.0,
@@ -239,7 +239,7 @@ class NeighborhoodsAPI:
         Args:
             network (nx.Graph): The network graph.
-            annotations (Dict[str, Any]): The annotations associated with the network.
+            annotation (Dict[str, Any]): The annotation associated with the network.
             distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
                 metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
                 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -248,7 +248,7 @@ class NeighborhoodsAPI:
             fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
                 Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
                 Defaults to 0.5.
-            null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+            null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
             random_seed (int, optional): Seed for random number generation. Defaults to 888.
         Returns:
@@ -258,7 +258,7 @@ class NeighborhoodsAPI:
         # Compute neighborhood significance using the Poisson test
         return self._load_neighborhoods_by_statistical_test(
             network=network,
-            annotations=annotations,
+            annotation=annotation,
             distance_metric=distance_metric,
             louvain_resolution=louvain_resolution,
             leiden_resolution=leiden_resolution,
@@ -269,10 +269,10 @@ class NeighborhoodsAPI:
             statistical_test_function=compute_poisson_test,
         )
-    def load_neighborhoods_by_zscore(
+    def load_neighborhoods_zscore(
         self,
         network: nx.Graph,
-        annotations: Dict[str, Any],
+        annotation: Dict[str, Any],
         distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
         louvain_resolution: float = 0.1,
         leiden_resolution: float = 1.0,
@@ -284,7 +284,7 @@ class NeighborhoodsAPI:
         Args:
             network (nx.Graph): The network graph.
-            annotations (Dict[str, Any]): The annotations associated with the network.
+            annotation (Dict[str, Any]): The annotation associated with the network.
             distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
                 metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
                 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -293,7 +293,7 @@ class NeighborhoodsAPI:
             fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
                 Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
                 Defaults to 0.5.
-            null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+            null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
             random_seed (int, optional): Seed for random number generation. Defaults to 888.
         Returns:
@@ -303,7 +303,7 @@ class NeighborhoodsAPI:
         # Compute neighborhood significance using the z-score test
         return self._load_neighborhoods_by_statistical_test(
             network=network,
-            annotations=annotations,
+            annotation=annotation,
             distance_metric=distance_metric,
             louvain_resolution=louvain_resolution,
             leiden_resolution=leiden_resolution,
@@ -317,7 +317,7 @@ class NeighborhoodsAPI:
     def _load_neighborhoods_by_statistical_test(
         self,
         network: nx.Graph,
-        annotations: Dict[str, Any],
+        annotation: Dict[str, Any],
         distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
         louvain_resolution: float = 0.1,
         leiden_resolution: float = 1.0,
@@ -332,7 +332,7 @@ class NeighborhoodsAPI:
         Args:
             network (nx.Graph): The input network graph.
-            annotations (Dict[str, Any]): Annotation data associated with the network, including a "matrix" key with annotation values.
+            annotation (Dict[str, Any]): Annotation data associated with the network, including a "matrix" key with annotation values.
             distance_metric (Union[str, List, Tuple, np.ndarray], optional): The distance metric or clustering method to define neighborhoods.
                 Can be a string specifying one method (e.g., 'louvain', 'leiden') or a collection of methods.
                 Defaults to "louvain".
@@ -340,13 +340,13 @@ class NeighborhoodsAPI:
             leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
             fraction_shortest_edges (Union[float, List, Tuple, np.ndarray], optional): Fraction of shortest edges to consider for creating subgraphs.
                 Can be a single value or a collection of thresholds for flexibility. Defaults to 0.5.
-            null_distribution (str, optional): The type of null distribution to use ('network' or 'annotations').
+            null_distribution (str, optional): The type of null distribution to use ('network' or 'annotation').
                 Defaults to "network".
             random_seed (int, optional): Seed for random number generation to ensure reproducibility. Defaults to 888.
             statistical_test_key (str, optional): Key or name of the statistical test to be applied (e.g., "hypergeom", "poisson").
                 Used for logging and debugging. Defaults to "hypergeom".
             statistical_test_function (Any, optional): The function implementing the statistical test.
-                It should accept neighborhoods, annotations, null distribution, and additional kwargs.
+                It should accept neighborhoods, annotation, null distribution, and additional kwargs.
                 Defaults to `compute_hypergeom_test`.
             **kwargs: Additional parameters to be passed to the statistical test function.
@@ -381,7 +381,7 @@ class NeighborhoodsAPI:
         # Apply statistical test function to compute neighborhood significance
         neighborhood_significance = statistical_test_function(
             neighborhoods=neighborhoods,
-            annotations=annotations["matrix"],
+            annotation=annotation["matrix"],
             null_distribution=null_distribution,
             **kwargs,
         )

risk/neighborhoods/community.py CHANGED Viewed

@@ -8,7 +8,7 @@ import igraph as ig
 import markov_clustering as mc
 import networkx as nx
 import numpy as np
-from leidenalg import find_partition, RBConfigurationVertexPartition
+from leidenalg import RBConfigurationVertexPartition, find_partition
 from networkx.algorithms.community import greedy_modularity_communities
 from scipy.sparse import csr_matrix
@@ -27,6 +27,10 @@ def calculate_greedy_modularity_neighborhoods(
     Returns:
         csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
+    Raises:
+        ValueError: If the subgraph has no edges after filtering.
+        Warning: If the resulting subgraph has no edges after filtering.
     """
     # Create a subgraph with the shortest edges based on the rank fraction
     subnetwork = _create_percentile_limited_subgraph(
@@ -67,6 +71,10 @@ def calculate_label_propagation_neighborhoods(
     Returns:
         csr_matrix: A binary neighborhood matrix (CSR) on Label Propagation.
+    Raises:
+        ValueError: If the subgraph has no edges after filtering.
+        Warning: If the resulting subgraph has no edges after filtering.
     """
     # Create a subgraph with the shortest edges based on the rank fraction
     subnetwork = _create_percentile_limited_subgraph(
@@ -115,6 +123,10 @@ def calculate_leiden_neighborhoods(
     Returns:
         csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
+    Raises:
+        ValueError: If the subgraph has no edges after filtering.
+        Warning: If the resulting subgraph has no edges after filtering.
     """
     # Create a subgraph with the shortest edges based on the rank fraction
     subnetwork = _create_percentile_limited_subgraph(
@@ -167,6 +179,10 @@ def calculate_louvain_neighborhoods(
     Returns:
         csr_matrix: A binary neighborhood matrix in CSR format.
+    Raises:
+        ValueError: If the subgraph has no edges after filtering.
+        Warning: If the resulting subgraph has no edges after filtering.
     """
     # Create a subgraph with the shortest edges based on the rank fraction
     subnetwork = _create_percentile_limited_subgraph(
@@ -215,9 +231,10 @@ def calculate_markov_clustering_neighborhoods(
     Returns:
         csr_matrix: A binary neighborhood matrix (CSR) on Markov Clustering.
-    Warning:
-        This function temporarily converts the adjacency matrix to a dense format, which may lead to
-        high memory consumption for large graphs.
+    Raises:
+        ValueError: If the subgraph has no edges after filtering.
+        RuntimeError: If MCL fails to run.
+        Warning: If the resulting subgraph has no edges after filtering.
     """
     # Create a subgraph with the shortest edges based on the rank fraction
     subnetwork = _create_percentile_limited_subgraph(
@@ -283,6 +300,10 @@ def calculate_spinglass_neighborhoods(
     Returns:
         csr_matrix: A binary neighborhood matrix (CSR) based on Spinglass communities.
+    Raises:
+        ValueError: If the subgraph has no edges after filtering.
+        Warning: If the resulting subgraph has no edges after filtering.
     """
     # Create a subgraph with the shortest edges based on the rank fraction
     subnetwork = _create_percentile_limited_subgraph(
@@ -343,6 +364,10 @@ def calculate_walktrap_neighborhoods(
     Returns:
         csr_matrix: A binary neighborhood matrix (CSR) on Walktrap communities.
+    Raises:
+        ValueError: If the subgraph has no edges after filtering.
+        Warning: If the resulting subgraph has no edges after filtering.
     """
     # Create a subgraph with the shortest edges based on the rank fraction
     subnetwork = _create_percentile_limited_subgraph(
@@ -384,6 +409,10 @@ def _create_percentile_limited_subgraph(G: nx.Graph, fraction_shortest_edges: fl
     Returns:
         nx.Graph: A subgraph with nodes and edges where the edges are within the shortest
         specified rank fraction.
+    Raises:
+        ValueError: If no edges with 'length' attributes are found in the graph.
+        Warning: If the resulting subgraph has no edges after filtering.
     """
     # Step 1: Extract edges with their lengths
     edges_with_length = [(u, v, d) for u, v, d in G.edges(data=True) if "length" in d]

risk/neighborhoods/domains.py CHANGED Viewed

@@ -9,19 +9,18 @@ from typing import Tuple, Union
 import numpy as np
 import pandas as pd
 from numpy.linalg import LinAlgError
-from scipy.cluster.hierarchy import linkage, fcluster
+from scipy.cluster.hierarchy import fcluster, linkage
 from sklearn.metrics import silhouette_score
 from tqdm import tqdm
-from risk.annotations import get_weighted_description
+from risk.annotation import get_weighted_description
 from risk.log import logger
 # Define constants for clustering
 # fmt: off
 LINKAGE_METHODS = {"single", "complete", "average", "weighted", "centroid", "median", "ward"}
 LINKAGE_METRICS = {
-    "braycurtis","canberra", "chebyshev", "cityblock", "correlation", "cosine", "dice", "euclidean",
+    "braycurtis", "canberra", "chebyshev", "cityblock", "correlation", "cosine", "dice", "euclidean",
     "hamming", "jaccard", "jensenshannon", "kulczynski1", "mahalanobis", "matching", "minkowski",
     "rogerstanimoto", "russellrao", "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule",
 }
@@ -29,7 +28,7 @@ LINKAGE_METRICS = {
 def define_domains(
-    top_annotations: pd.DataFrame,
+    top_annotation: pd.DataFrame,
     significant_neighborhoods_significance: np.ndarray,
     linkage_criterion: str,
     linkage_method: str,
@@ -40,7 +39,7 @@ def define_domains(
     handling errors by assigning unique domains when clustering fails.
     Args:
-        top_annotations (pd.DataFrame): DataFrame of top annotations data for the network nodes.
+        top_annotation (pd.DataFrame): DataFrame of top annotations data for the network nodes.
         significant_neighborhoods_significance (np.ndarray): The binary significance matrix below alpha.
         linkage_criterion (str): The clustering criterion for defining groups. Choose "off" to disable clustering.
         linkage_method (str): The linkage method for clustering. Choose "auto" to optimize.
@@ -49,13 +48,16 @@ def define_domains(
     Returns:
         pd.DataFrame: DataFrame with the primary domain for each node.
+    Raises:
+        ValueError: If the clustering criterion is set to "off" or if an error occurs during clustering.
     """
     try:
         if linkage_criterion == "off":
             raise ValueError("Clustering is turned off.")
         # Transpose the matrix to cluster annotations
-        m = significant_neighborhoods_significance[:, top_annotations["significant_annotations"]].T
+        m = significant_neighborhoods_significance[:, top_annotation["significant_annotation"]].T
         # Safeguard the matrix by replacing NaN, Inf, and -Inf values
         m = _safeguard_matrix(m)
         # Optimize silhouette score across different linkage methods and distance metrics
@@ -69,27 +71,23 @@ def define_domains(
         )
         # Calculate the optimal threshold for clustering
         max_d_optimal = np.max(Z[:, 2]) * best_threshold
-        # Assign domains to the annotations matrix
+        # Assign domains to the annotation matrix
         domains = fcluster(Z, max_d_optimal, criterion=linkage_criterion)
-        top_annotations["domain"] = 0
-        top_annotations.loc[top_annotations["significant_annotations"], "domain"] = domains
+        top_annotation["domain"] = 0
+        top_annotation.loc[top_annotation["significant_annotation"], "domain"] = domains
     except (ValueError, LinAlgError):
         # If a ValueError is encountered, handle it by assigning unique domains
-        n_rows = len(top_annotations)
+        n_rows = len(top_annotation)
         if linkage_criterion == "off":
-            logger.warning(
-                f"Clustering is turned off. Skipping clustering and assigning {n_rows} unique domains."
-            )
+            logger.warning("Clustering is turned off. Skipping clustering.")
         else:
-            logger.error(
-                f"Error encountered. Skipping clustering and assigning {n_rows} unique domains."
-            )
-        top_annotations["domain"] = range(1, n_rows + 1)  # Assign unique domains
+            logger.error("Error encountered. Skipping clustering.")
+        top_annotation["domain"] = range(1, n_rows + 1)  # Assign unique domains
     # Create DataFrames to store domain information
     node_to_significance = pd.DataFrame(
         data=significant_neighborhoods_significance,
-        columns=[top_annotations.index.values, top_annotations["domain"]],
+        columns=[top_annotation.index.values, top_annotation["domain"]],
     )
     node_to_domain = node_to_significance.T.groupby(level="domain").sum().T
@@ -110,15 +108,15 @@ def define_domains(
 def trim_domains(
     domains: pd.DataFrame,
-    top_annotations: pd.DataFrame,
+    top_annotation: pd.DataFrame,
     min_cluster_size: int = 5,
     max_cluster_size: int = 1000,
-) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+) -> Tuple[pd.DataFrame, pd.DataFrame]:
     """Trim domains that do not meet size criteria and find outliers.
     Args:
         domains (pd.DataFrame): DataFrame of domain data for the network nodes.
-        top_annotations (pd.DataFrame): DataFrame of top annotations data for the network nodes.
+        top_annotation (pd.DataFrame): DataFrame of top annotations data for the network nodes.
         min_cluster_size (int, optional): Minimum size of a cluster to be retained. Defaults to 5.
         max_cluster_size (int, optional): Maximum size of a cluster to be retained. Defaults to 1000.
@@ -137,21 +135,21 @@ def trim_domains(
     invalid_domain_id = 888888
     invalid_domain_ids = {0, invalid_domain_id}
     # Mark domains to be removed
-    top_annotations["domain"] = top_annotations["domain"].replace(to_remove, invalid_domain_id)
+    top_annotation["domain"] = top_annotation["domain"].replace(to_remove, invalid_domain_id)
     domains.loc[domains["primary_domain"].isin(to_remove), ["primary_domain"]] = invalid_domain_id
     # Normalize "num significant neighborhoods" by percentile for each domain and scale to 0-10
-    top_annotations["normalized_value"] = top_annotations.groupby("domain")[
+    top_annotation["normalized_value"] = top_annotation.groupby("domain")[
         "significant_neighborhood_significance_sums"
     ].transform(lambda x: (x.rank(pct=True) * 10).apply(np.ceil).astype(int))
     # Modify the lambda function to pass both full_terms and significant_significance_score
-    top_annotations["combined_terms"] = top_annotations.apply(
+    top_annotation["combined_terms"] = top_annotation.apply(
         lambda row: " ".join([str(row["full_terms"])] * row["normalized_value"]), axis=1
     )
     # Perform the groupby operation while retaining the other columns and adding the weighting with significance scores
     domain_labels = (
-        top_annotations.groupby("domain")
+        top_annotation.groupby("domain")
         .agg(
             full_terms=("full_terms", lambda x: list(x)),
             significance_scores=("significant_significance_score", lambda x: list(x)),
@@ -231,7 +229,7 @@ def _optimize_silhouette_across_linkage_and_metrics(
     # Initialize best overall values
     best_overall_method = linkage_method
     best_overall_metric = linkage_metric
-    best_overall_threshold = linkage_threshold
+    best_overall_threshold = 0.0
     best_overall_score = -np.inf
     # Set linkage methods and metrics to all combinations if "auto" is selected
@@ -242,7 +240,7 @@ def _optimize_silhouette_across_linkage_and_metrics(
     # Evaluating optimal linkage method and metric
     for method, metric in tqdm(
         product(linkage_methods, linkage_metrics),
-        desc="Evaluating optimal linkage method and metric",
+        desc="Evaluating linkage methods and metrics",
         total=total_combinations,
         bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]",
     ):

risk/neighborhoods/neighborhoods.py CHANGED Viewed

@@ -13,6 +13,7 @@ from scipy.sparse import csr_matrix
 from sklearn.exceptions import DataConversionWarning
 from sklearn.metrics.pairwise import cosine_similarity
+from risk.log import logger
 from risk.neighborhoods.community import (
     calculate_greedy_modularity_neighborhoods,
     calculate_label_propagation_neighborhoods,
@@ -22,7 +23,6 @@ from risk.neighborhoods.community import (
     calculate_spinglass_neighborhoods,
     calculate_walktrap_neighborhoods,
 )
-from risk.log import logger
 # Suppress DataConversionWarning
 warnings.filterwarnings(action="ignore", category=DataConversionWarning)
@@ -48,6 +48,9 @@ def get_network_neighborhoods(
     Returns:
         csr_matrix: The combined neighborhood matrix.
+    Raises:
+        ValueError: If the number of distance metrics does not match the number of edge length thresholds.
     """
     # Set random seed for reproducibility
     random.seed(random_seed)
@@ -490,6 +493,9 @@ def _calculate_threshold(median_distances: List, distance_threshold: float) -> f
     Returns:
         float: The calculated distance threshold value.
+    Raises:
+        ValueError: If no significant annotation is found in the median distances.
     """
     # Sort the median distances
     sorted_distances = np.sort(median_distances)
@@ -500,7 +506,7 @@ def _calculate_threshold(median_distances: List, distance_threshold: float) -> f
     try:
         smoothed_distances = np.interp(interpolated_percentiles, rank_percentiles, sorted_distances)
     except ValueError as e:
-        raise ValueError("No significant annotations found.") from e
+        raise ValueError("No significant annotation found.") from e
     # Determine the index corresponding to the distance threshold
     threshold_index = int(np.ceil(distance_threshold * len(smoothed_distances))) - 1

risk/neighborhoods/stats/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""
+risk/neighborhoods/stats
+~~~~~~~~~~~~~~~~~~~~~~~~
+"""
+from risk.neighborhoods.stats.permutation import compute_permutation_test
+from risk.neighborhoods.stats.tests import (
+    compute_binom_test,
+    compute_chi2_test,
+    compute_hypergeom_test,
+    compute_poisson_test,
+    compute_zscore_test,
+)

risk/neighborhoods/stats/permutation/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""
+risk/neighborhoods/stats/permutation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+"""
+from risk.neighborhoods.stats.permutation.permutation import compute_permutation_test

risk-network 0.0.11__py3-none-any.whl → 0.0.12__py3-none-any.whl

risk-network 0.0.11py3-none-any.whl → 0.0.12py3-none-any.whl