PyPI - risk-network - Versions diffs - 0.0.12b1__py3-none-any.whl → 0.0.12b3__py3-none-any.whl - Mend

risk-network 0.0.12b1py3-none-any.whl → 0.0.12b3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

risk/__init__.py +1 -1
risk/annotation/__init__.py +10 -0
risk/{annotations/annotations.py → annotation/annotation.py} +44 -44
risk/{annotations → annotation}/io.py +46 -46
risk/{annotations → annotation}/nltk_setup.py +4 -4
risk/log/parameters.py +5 -5
risk/neighborhoods/api.py +36 -36
risk/neighborhoods/domains.py +20 -24
risk/neighborhoods/neighborhoods.py +4 -4
risk/neighborhoods/stats/permutation/permutation.py +17 -17
risk/neighborhoods/stats/permutation/test_functions.py +2 -2
risk/neighborhoods/stats/tests.py +41 -41
risk/network/graph/api.py +17 -17
risk/network/graph/graph.py +17 -11
risk/network/graph/summary.py +10 -10
risk/network/io.py +12 -12
risk/network/plotter/canvas.py +1 -1
risk/network/plotter/contour.py +3 -3
risk/network/plotter/labels.py +72 -74
risk/network/plotter/network.py +6 -6
risk/network/plotter/plotter.py +6 -6
risk/network/plotter/utils/colors.py +12 -8
risk/network/plotter/utils/layout.py +3 -3
risk/risk.py +2 -2
{risk_network-0.0.12b1.dist-info → risk_network-0.0.12b3.dist-info}/METADATA +1 -1
risk_network-0.0.12b3.dist-info/RECORD +40 -0
{risk_network-0.0.12b1.dist-info → risk_network-0.0.12b3.dist-info}/WHEEL +1 -1
risk/annotations/__init__.py +0 -10
risk_network-0.0.12b1.dist-info/RECORD +0 -40
{risk_network-0.0.12b1.dist-info → risk_network-0.0.12b3.dist-info}/licenses/LICENSE +0 -0
{risk_network-0.0.12b1.dist-info → risk_network-0.0.12b3.dist-info}/top_level.txt +0 -0

risk/neighborhoods/api.py CHANGED Viewed

@@ -28,13 +28,13 @@ class NeighborhoodsAPI:
     The NeighborhoodsAPI class provides methods to load neighborhood results from statistical tests.
     """
-    def __init__() -> None:
+    def __init__(self) -> None:
         pass
-    def load_neighborhoods_by_binom(
+    def load_neighborhoods_binom(
         self,
         network: nx.Graph,
-        annotations: Dict[str, Any],
+        annotation: Dict[str, Any],
         distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
         louvain_resolution: float = 0.1,
         leiden_resolution: float = 1.0,
@@ -46,7 +46,7 @@ class NeighborhoodsAPI:
         Args:
             network (nx.Graph): The network graph.
-            annotations (Dict[str, Any]): The annotations associated with the network.
+            annotation (Dict[str, Any]): The annotation associated with the network.
             distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
                 metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
                 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -55,7 +55,7 @@ class NeighborhoodsAPI:
             fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
                 Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
                 Defaults to 0.5.
-            null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+            null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
             random_seed (int, optional): Seed for random number generation. Defaults to 888.
         Returns:
@@ -65,7 +65,7 @@ class NeighborhoodsAPI:
         # Compute neighborhood significance using the binomial test
         return self._load_neighborhoods_by_statistical_test(
             network=network,
-            annotations=annotations,
+            annotation=annotation,
             distance_metric=distance_metric,
             louvain_resolution=louvain_resolution,
             leiden_resolution=leiden_resolution,
@@ -76,10 +76,10 @@ class NeighborhoodsAPI:
             statistical_test_function=compute_binom_test,
         )
-    def load_neighborhoods_by_chi2(
+    def load_neighborhoods_chi2(
         self,
         network: nx.Graph,
-        annotations: Dict[str, Any],
+        annotation: Dict[str, Any],
         distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
         louvain_resolution: float = 0.1,
         leiden_resolution: float = 1.0,
@@ -91,7 +91,7 @@ class NeighborhoodsAPI:
         Args:
             network (nx.Graph): The network graph.
-            annotations (Dict[str, Any]): The annotations associated with the network.
+            annotation (Dict[str, Any]): The annotation associated with the network.
             distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
                 metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
                 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -100,7 +100,7 @@ class NeighborhoodsAPI:
             fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
                 Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
                 Defaults to 0.5.
-            null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+            null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
             random_seed (int, optional): Seed for random number generation. Defaults to 888.
         Returns:
@@ -110,7 +110,7 @@ class NeighborhoodsAPI:
         # Compute neighborhood significance using the chi-squared test
         return self._load_neighborhoods_by_statistical_test(
             network=network,
-            annotations=annotations,
+            annotation=annotation,
             distance_metric=distance_metric,
             louvain_resolution=louvain_resolution,
             leiden_resolution=leiden_resolution,
@@ -121,10 +121,10 @@ class NeighborhoodsAPI:
             statistical_test_function=compute_chi2_test,
         )
-    def load_neighborhoods_by_hypergeom(
+    def load_neighborhoods_hypergeom(
         self,
         network: nx.Graph,
-        annotations: Dict[str, Any],
+        annotation: Dict[str, Any],
         distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
         louvain_resolution: float = 0.1,
         leiden_resolution: float = 1.0,
@@ -136,7 +136,7 @@ class NeighborhoodsAPI:
         Args:
             network (nx.Graph): The network graph.
-            annotations (Dict[str, Any]): The annotations associated with the network.
+            annotation (Dict[str, Any]): The annotation associated with the network.
             distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
                 metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
                 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -145,7 +145,7 @@ class NeighborhoodsAPI:
             fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
                 Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
                 Defaults to 0.5.
-            null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+            null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
             random_seed (int, optional): Seed for random number generation. Defaults to 888.
         Returns:
@@ -155,7 +155,7 @@ class NeighborhoodsAPI:
         # Compute neighborhood significance using the hypergeometric test
         return self._load_neighborhoods_by_statistical_test(
             network=network,
-            annotations=annotations,
+            annotation=annotation,
             distance_metric=distance_metric,
             louvain_resolution=louvain_resolution,
             leiden_resolution=leiden_resolution,
@@ -166,10 +166,10 @@ class NeighborhoodsAPI:
             statistical_test_function=compute_hypergeom_test,
         )
-    def load_neighborhoods_by_permutation(
+    def load_neighborhoods_permutation(
         self,
         network: nx.Graph,
-        annotations: Dict[str, Any],
+        annotation: Dict[str, Any],
         distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
         louvain_resolution: float = 0.1,
         leiden_resolution: float = 1.0,
@@ -184,7 +184,7 @@ class NeighborhoodsAPI:
         Args:
             network (nx.Graph): The network graph.
-            annotations (Dict[str, Any]): The annotations associated with the network.
+            annotation (Dict[str, Any]): The annotation associated with the network.
             distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
                 metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
                 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -194,7 +194,7 @@ class NeighborhoodsAPI:
                 Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
                 Defaults to 0.5.
             score_metric (str, optional): Scoring metric for neighborhood significance. Defaults to "sum".
-            null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+            null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
             num_permutations (int, optional): Number of permutations for significance testing. Defaults to 1000.
             random_seed (int, optional): Seed for random number generation. Defaults to 888.
             max_workers (int, optional): Maximum number of workers for parallel computation. Defaults to 1.
@@ -210,7 +210,7 @@ class NeighborhoodsAPI:
         # Compute neighborhood significance using the permutation test
         return self._load_neighborhoods_by_statistical_test(
             network=network,
-            annotations=annotations,
+            annotation=annotation,
             distance_metric=distance_metric,
             louvain_resolution=louvain_resolution,
             leiden_resolution=leiden_resolution,
@@ -224,10 +224,10 @@ class NeighborhoodsAPI:
             max_workers=max_workers,
         )
-    def load_neighborhoods_by_poisson(
+    def load_neighborhoods_poisson(
         self,
         network: nx.Graph,
-        annotations: Dict[str, Any],
+        annotation: Dict[str, Any],
         distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
         louvain_resolution: float = 0.1,
         leiden_resolution: float = 1.0,
@@ -239,7 +239,7 @@ class NeighborhoodsAPI:
         Args:
             network (nx.Graph): The network graph.
-            annotations (Dict[str, Any]): The annotations associated with the network.
+            annotation (Dict[str, Any]): The annotation associated with the network.
             distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
                 metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
                 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -248,7 +248,7 @@ class NeighborhoodsAPI:
             fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
                 Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
                 Defaults to 0.5.
-            null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+            null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
             random_seed (int, optional): Seed for random number generation. Defaults to 888.
         Returns:
@@ -258,7 +258,7 @@ class NeighborhoodsAPI:
         # Compute neighborhood significance using the Poisson test
         return self._load_neighborhoods_by_statistical_test(
             network=network,
-            annotations=annotations,
+            annotation=annotation,
             distance_metric=distance_metric,
             louvain_resolution=louvain_resolution,
             leiden_resolution=leiden_resolution,
@@ -269,10 +269,10 @@ class NeighborhoodsAPI:
             statistical_test_function=compute_poisson_test,
         )
-    def load_neighborhoods_by_zscore(
+    def load_neighborhoods_zscore(
         self,
         network: nx.Graph,
-        annotations: Dict[str, Any],
+        annotation: Dict[str, Any],
         distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
         louvain_resolution: float = 0.1,
         leiden_resolution: float = 1.0,
@@ -284,7 +284,7 @@ class NeighborhoodsAPI:
         Args:
             network (nx.Graph): The network graph.
-            annotations (Dict[str, Any]): The annotations associated with the network.
+            annotation (Dict[str, Any]): The annotation associated with the network.
             distance_metric (str, List, Tuple, or np.ndarray, optional): The distance metric(s) to use. Can be a string for one
                 metric or a list/tuple/ndarray of metrics ('greedy_modularity', 'louvain', 'leiden', 'label_propagation',
                 'markov_clustering', 'walktrap', 'spinglass'). Defaults to 'louvain'.
@@ -293,7 +293,7 @@ class NeighborhoodsAPI:
             fraction_shortest_edges (float, List, Tuple, or np.ndarray, optional): Shortest edge rank fraction threshold(s) for creating subgraphs.
                 Can be a single float for one threshold or a list/tuple of floats corresponding to multiple thresholds.
                 Defaults to 0.5.
-            null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+            null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
             random_seed (int, optional): Seed for random number generation. Defaults to 888.
         Returns:
@@ -303,7 +303,7 @@ class NeighborhoodsAPI:
         # Compute neighborhood significance using the z-score test
         return self._load_neighborhoods_by_statistical_test(
             network=network,
-            annotations=annotations,
+            annotation=annotation,
             distance_metric=distance_metric,
             louvain_resolution=louvain_resolution,
             leiden_resolution=leiden_resolution,
@@ -317,7 +317,7 @@ class NeighborhoodsAPI:
     def _load_neighborhoods_by_statistical_test(
         self,
         network: nx.Graph,
-        annotations: Dict[str, Any],
+        annotation: Dict[str, Any],
         distance_metric: Union[str, List, Tuple, np.ndarray] = "louvain",
         louvain_resolution: float = 0.1,
         leiden_resolution: float = 1.0,
@@ -332,7 +332,7 @@ class NeighborhoodsAPI:
         Args:
             network (nx.Graph): The input network graph.
-            annotations (Dict[str, Any]): Annotation data associated with the network, including a "matrix" key with annotation values.
+            annotation (Dict[str, Any]): Annotation data associated with the network, including a "matrix" key with annotation values.
             distance_metric (Union[str, List, Tuple, np.ndarray], optional): The distance metric or clustering method to define neighborhoods.
                 Can be a string specifying one method (e.g., 'louvain', 'leiden') or a collection of methods.
                 Defaults to "louvain".
@@ -340,13 +340,13 @@ class NeighborhoodsAPI:
             leiden_resolution (float, optional): Resolution parameter for Leiden clustering. Defaults to 1.0.
             fraction_shortest_edges (Union[float, List, Tuple, np.ndarray], optional): Fraction of shortest edges to consider for creating subgraphs.
                 Can be a single value or a collection of thresholds for flexibility. Defaults to 0.5.
-            null_distribution (str, optional): The type of null distribution to use ('network' or 'annotations').
+            null_distribution (str, optional): The type of null distribution to use ('network' or 'annotation').
                 Defaults to "network".
             random_seed (int, optional): Seed for random number generation to ensure reproducibility. Defaults to 888.
             statistical_test_key (str, optional): Key or name of the statistical test to be applied (e.g., "hypergeom", "poisson").
                 Used for logging and debugging. Defaults to "hypergeom".
             statistical_test_function (Any, optional): The function implementing the statistical test.
-                It should accept neighborhoods, annotations, null distribution, and additional kwargs.
+                It should accept neighborhoods, annotation, null distribution, and additional kwargs.
                 Defaults to `compute_hypergeom_test`.
             **kwargs: Additional parameters to be passed to the statistical test function.
@@ -381,7 +381,7 @@ class NeighborhoodsAPI:
         # Apply statistical test function to compute neighborhood significance
         neighborhood_significance = statistical_test_function(
             neighborhoods=neighborhoods,
-            annotations=annotations["matrix"],
+            annotation=annotation["matrix"],
             null_distribution=null_distribution,
             **kwargs,
         )

risk/neighborhoods/domains.py CHANGED Viewed

@@ -13,7 +13,7 @@ from scipy.cluster.hierarchy import fcluster, linkage
 from sklearn.metrics import silhouette_score
 from tqdm import tqdm
-from risk.annotations import get_weighted_description
+from risk.annotation import get_weighted_description
 from risk.log import logger
 # Define constants for clustering
@@ -28,7 +28,7 @@ LINKAGE_METRICS = {
 def define_domains(
-    top_annotations: pd.DataFrame,
+    top_annotation: pd.DataFrame,
     significant_neighborhoods_significance: np.ndarray,
     linkage_criterion: str,
     linkage_method: str,
@@ -39,7 +39,7 @@ def define_domains(
     handling errors by assigning unique domains when clustering fails.
     Args:
-        top_annotations (pd.DataFrame): DataFrame of top annotations data for the network nodes.
+        top_annotation (pd.DataFrame): DataFrame of top annotations data for the network nodes.
         significant_neighborhoods_significance (np.ndarray): The binary significance matrix below alpha.
         linkage_criterion (str): The clustering criterion for defining groups. Choose "off" to disable clustering.
         linkage_method (str): The linkage method for clustering. Choose "auto" to optimize.
@@ -57,7 +57,7 @@ def define_domains(
             raise ValueError("Clustering is turned off.")
         # Transpose the matrix to cluster annotations
-        m = significant_neighborhoods_significance[:, top_annotations["significant_annotations"]].T
+        m = significant_neighborhoods_significance[:, top_annotation["significant_annotation"]].T
         # Safeguard the matrix by replacing NaN, Inf, and -Inf values
         m = _safeguard_matrix(m)
         # Optimize silhouette score across different linkage methods and distance metrics
@@ -71,27 +71,23 @@ def define_domains(
         )
         # Calculate the optimal threshold for clustering
         max_d_optimal = np.max(Z[:, 2]) * best_threshold
-        # Assign domains to the annotations matrix
+        # Assign domains to the annotation matrix
         domains = fcluster(Z, max_d_optimal, criterion=linkage_criterion)
-        top_annotations["domain"] = 0
-        top_annotations.loc[top_annotations["significant_annotations"], "domain"] = domains
+        top_annotation["domain"] = 0
+        top_annotation.loc[top_annotation["significant_annotation"], "domain"] = domains
     except (ValueError, LinAlgError):
         # If a ValueError is encountered, handle it by assigning unique domains
-        n_rows = len(top_annotations)
+        n_rows = len(top_annotation)
         if linkage_criterion == "off":
-            logger.warning(
-                f"Clustering is turned off. Skipping clustering and assigning {n_rows} unique domains."
-            )
+            logger.warning("Clustering is turned off. Skipping clustering.")
         else:
-            logger.error(
-                f"Error encountered. Skipping clustering and assigning {n_rows} unique domains."
-            )
-        top_annotations["domain"] = range(1, n_rows + 1)  # Assign unique domains
+            logger.error("Error encountered. Skipping clustering.")
+        top_annotation["domain"] = range(1, n_rows + 1)  # Assign unique domains
     # Create DataFrames to store domain information
     node_to_significance = pd.DataFrame(
         data=significant_neighborhoods_significance,
-        columns=[top_annotations.index.values, top_annotations["domain"]],
+        columns=[top_annotation.index.values, top_annotation["domain"]],
     )
     node_to_domain = node_to_significance.T.groupby(level="domain").sum().T
@@ -112,15 +108,15 @@ def define_domains(
 def trim_domains(
     domains: pd.DataFrame,
-    top_annotations: pd.DataFrame,
+    top_annotation: pd.DataFrame,
     min_cluster_size: int = 5,
     max_cluster_size: int = 1000,
-) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+) -> Tuple[pd.DataFrame, pd.DataFrame]:
     """Trim domains that do not meet size criteria and find outliers.
     Args:
         domains (pd.DataFrame): DataFrame of domain data for the network nodes.
-        top_annotations (pd.DataFrame): DataFrame of top annotations data for the network nodes.
+        top_annotation (pd.DataFrame): DataFrame of top annotations data for the network nodes.
         min_cluster_size (int, optional): Minimum size of a cluster to be retained. Defaults to 5.
         max_cluster_size (int, optional): Maximum size of a cluster to be retained. Defaults to 1000.
@@ -139,21 +135,21 @@ def trim_domains(
     invalid_domain_id = 888888
     invalid_domain_ids = {0, invalid_domain_id}
     # Mark domains to be removed
-    top_annotations["domain"] = top_annotations["domain"].replace(to_remove, invalid_domain_id)
+    top_annotation["domain"] = top_annotation["domain"].replace(to_remove, invalid_domain_id)
     domains.loc[domains["primary_domain"].isin(to_remove), ["primary_domain"]] = invalid_domain_id
     # Normalize "num significant neighborhoods" by percentile for each domain and scale to 0-10
-    top_annotations["normalized_value"] = top_annotations.groupby("domain")[
+    top_annotation["normalized_value"] = top_annotation.groupby("domain")[
         "significant_neighborhood_significance_sums"
     ].transform(lambda x: (x.rank(pct=True) * 10).apply(np.ceil).astype(int))
     # Modify the lambda function to pass both full_terms and significant_significance_score
-    top_annotations["combined_terms"] = top_annotations.apply(
+    top_annotation["combined_terms"] = top_annotation.apply(
         lambda row: " ".join([str(row["full_terms"])] * row["normalized_value"]), axis=1
     )
     # Perform the groupby operation while retaining the other columns and adding the weighting with significance scores
     domain_labels = (
-        top_annotations.groupby("domain")
+        top_annotation.groupby("domain")
         .agg(
             full_terms=("full_terms", lambda x: list(x)),
             significance_scores=("significant_significance_score", lambda x: list(x)),
@@ -233,7 +229,7 @@ def _optimize_silhouette_across_linkage_and_metrics(
     # Initialize best overall values
     best_overall_method = linkage_method
     best_overall_metric = linkage_metric
-    best_overall_threshold = linkage_threshold
+    best_overall_threshold = 0.0
     best_overall_score = -np.inf
     # Set linkage methods and metrics to all combinations if "auto" is selected

risk/neighborhoods/neighborhoods.py CHANGED Viewed

@@ -449,7 +449,7 @@ def _prune_neighbors(
     )
-def _get_euclidean_distance(node1: Any, node2: Any, network: nx.Graph) -> float:
+def _get_euclidean_distance(node1: Any, node2: Any, network: nx.Graph) -> np.floating[Any]:
     """Calculate the Euclidean distance between two nodes in the network.
     Args:
@@ -458,7 +458,7 @@ def _get_euclidean_distance(node1: Any, node2: Any, network: nx.Graph) -> float:
         network (nx.Graph): The network graph containing the nodes.
     Returns:
-        float: The Euclidean distance between the two nodes.
+        np.floating[Any]: The Euclidean distance between the two nodes.
     """
     pos1 = _get_node_position(network, node1)
     pos2 = _get_node_position(network, node2)
@@ -495,7 +495,7 @@ def _calculate_threshold(median_distances: List, distance_threshold: float) -> f
         float: The calculated distance threshold value.
     Raises:
-        ValueError: If no significant annotations are found in the median distances.
+        ValueError: If no significant annotation is found in the median distances.
     """
     # Sort the median distances
     sorted_distances = np.sort(median_distances)
@@ -506,7 +506,7 @@ def _calculate_threshold(median_distances: List, distance_threshold: float) -> f
     try:
         smoothed_distances = np.interp(interpolated_percentiles, rank_percentiles, sorted_distances)
     except ValueError as e:
-        raise ValueError("No significant annotations found.") from e
+        raise ValueError("No significant annotation found.") from e
     # Determine the index corresponding to the distance threshold
     threshold_index = int(np.ceil(distance_threshold * len(smoothed_distances))) - 1

risk/neighborhoods/stats/permutation/permutation.py CHANGED Viewed

@@ -17,7 +17,7 @@ from risk.neighborhoods.stats.permutation.test_functions import DISPATCH_TEST_FU
 def compute_permutation_test(
     neighborhoods: csr_matrix,
-    annotations: csr_matrix,
+    annotation: csr_matrix,
     score_metric: str = "sum",
     null_distribution: str = "network",
     num_permutations: int = 1000,
@@ -28,9 +28,9 @@ def compute_permutation_test(
     Args:
         neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
-        annotations (csr_matrix): Sparse binary matrix representing annotations.
+        annotation (csr_matrix): Sparse binary matrix representing annotation.
         score_metric (str, optional): Metric to use for scoring ('sum' or 'stdev'). Defaults to "sum".
-        null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+        null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
         num_permutations (int, optional): Number of permutations to run. Defaults to 1000.
         random_seed (int, optional): Seed for random number generation. Defaults to 888.
         max_workers (int, optional): Number of workers for multiprocessing. Defaults to 1.
@@ -41,14 +41,14 @@ def compute_permutation_test(
     # Ensure that the matrices are in the correct format and free of NaN values
     # NOTE: Keep the data type as float32 to avoid locking issues with dot product operations
     neighborhoods = neighborhoods.astype(np.float32)
-    annotations = annotations.astype(np.float32)
+    annotation = annotation.astype(np.float32)
     # Retrieve the appropriate neighborhood score function based on the metric
     neighborhood_score_func = DISPATCH_TEST_FUNCTIONS[score_metric]
     # Run the permutation test to calculate depletion and enrichment counts
     counts_depletion, counts_enrichment = _run_permutation_test(
         neighborhoods=neighborhoods,
-        annotations=annotations,
+        annotation=annotation,
         neighborhood_score_func=neighborhood_score_func,
         null_distribution=null_distribution,
         num_permutations=num_permutations,
@@ -68,7 +68,7 @@ def compute_permutation_test(
 def _run_permutation_test(
     neighborhoods: csr_matrix,
-    annotations: csr_matrix,
+    annotation: csr_matrix,
     neighborhood_score_func: Callable,
     null_distribution: str = "network",
     num_permutations: int = 1000,
@@ -79,9 +79,9 @@ def _run_permutation_test(
     Args:
         neighborhoods (csr_matrix): Sparse binary matrix representing neighborhoods.
-        annotations (csr_matrix): Sparse binary matrix representing annotations.
+        annotation (csr_matrix): Sparse binary matrix representing annotation.
         neighborhood_score_func (Callable): Function to calculate neighborhood scores.
-        null_distribution (str, optional): Type of null distribution ('network' or 'annotations'). Defaults to "network".
+        null_distribution (str, optional): Type of null distribution ('network' or 'annotation'). Defaults to "network".
         num_permutations (int, optional): Number of permutations. Defaults to 1000.
         random_seed (int, optional): Seed for random number generation. Defaults to 888.
         max_workers (int, optional): Number of workers for multiprocessing. Defaults to 4.
@@ -96,17 +96,17 @@ def _run_permutation_test(
     rng = np.random.default_rng(seed=random_seed)
     # Determine the indices to use based on the null distribution type
     if null_distribution == "network":
-        idxs = range(annotations.shape[0])
-    elif null_distribution == "annotations":
-        idxs = np.nonzero(annotations.getnnz(axis=1) > 0)[0]
+        idxs = range(annotation.shape[0])
+    elif null_distribution == "annotation":
+        idxs = np.nonzero(annotation.getnnz(axis=1) > 0)[0]
     else:
         raise ValueError(
-            "Invalid null_distribution value. Choose either 'network' or 'annotations'."
+            "Invalid null_distribution value. Choose either 'network' or 'annotation'."
         )
-    # Replace NaNs with zeros in the sparse annotations matrix
-    annotations.data[np.isnan(annotations.data)] = 0
-    annotation_matrix_obsv = annotations[idxs]
+    # Replace NaNs with zeros in the sparse annotation matrix
+    annotation.data[np.isnan(annotation.data)] = 0
+    annotation_matrix_obsv = annotation[idxs]
     neighborhoods_matrix_obsv = neighborhoods.T[idxs].T
     # Calculate observed neighborhood scores
     with np.errstate(invalid="ignore", divide="ignore"):
@@ -142,7 +142,7 @@ def _run_permutation_test(
             params_list = [
                 (
                     permutation_batches[i],  # Pass the batch of precomputed permutations
-                    annotations,
+                    annotation,
                     neighborhoods_matrix_obsv,
                     observed_neighborhood_scores,
                     neighborhood_score_func,
@@ -185,7 +185,7 @@ def _permutation_process_batch(
     Args:
         permutations (Union[List, Tuple, np.ndarray]): Permutation batch to process.
-        annotation_matrix (csr_matrix): Sparse binary matrix representing annotations.
+        annotation_matrix (csr_matrix): Sparse binary matrix representing annotation.
         neighborhoods_matrix_obsv (csr_matrix): Sparse binary matrix representing observed neighborhoods.
         observed_neighborhood_scores (np.ndarray): Observed neighborhood scores.
         neighborhood_score_func (Callable): Function to calculate neighborhood scores.

risk/neighborhoods/stats/permutation/test_functions.py CHANGED Viewed

@@ -24,7 +24,7 @@ def compute_neighborhood_score_by_sum(
     Returns:
         np.ndarray: Dense array of summed attribute values for each neighborhood.
     """
-    # Calculate the neighborhood score as the dot product of neighborhoods and annotations
+    # Calculate the neighborhood score as the dot product of neighborhoods and annotation
     neighborhood_score = neighborhoods_matrix @ annotation_matrix  # Sparse matrix multiplication
     # Convert the result to a dense array for downstream calculations
     neighborhood_score_dense = neighborhood_score.toarray()
@@ -43,7 +43,7 @@ def compute_neighborhood_score_by_stdev(
     Returns:
         np.ndarray: Standard deviation of the neighborhood scores.
     """
-    # Calculate the neighborhood score as the dot product of neighborhoods and annotations
+    # Calculate the neighborhood score as the dot product of neighborhoods and annotation
     neighborhood_score = neighborhoods_matrix @ annotation_matrix  # Sparse matrix multiplication
     # Calculate the number of elements in each neighborhood (sum of rows)
     N = neighborhoods_matrix.sum(axis=1).A.flatten()  # Convert to 1D array

risk-network 0.0.12b1__py3-none-any.whl → 0.0.12b3__py3-none-any.whl

risk-network 0.0.12b1py3-none-any.whl → 0.0.12b3py3-none-any.whl