PyPI - risk-network - Versions diffs - 0.0.7b0__py3-none-any.whl → 0.0.7b1__py3-none-any.whl - Mend

risk-network 0.0.7b0py3-none-any.whl → 0.0.7b1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

risk/__init__.py CHANGED Viewed

@@ -7,4 +7,4 @@ RISK: RISK Infers Spatial Kinships
 from risk.risk import RISK
-__version__ = "0.0.7-beta.0"
+__version__ = "0.0.7-beta.1"

risk/neighborhoods/neighborhoods.py CHANGED Viewed

@@ -9,6 +9,7 @@ from typing import Any, Dict, List, Tuple
 import networkx as nx
 import numpy as np
 from sklearn.exceptions import DataConversionWarning
+from sklearn.metrics.pairwise import cosine_similarity
 from risk.neighborhoods.community import (
     calculate_dijkstra_neighborhoods,
@@ -93,7 +94,7 @@ def _create_percentile_limited_subgraph(G: nx.Graph, edge_length_percentile: flo
 def process_neighborhoods(
     network: nx.Graph,
     neighborhoods: Dict[str, Any],
-    impute_depth: int = 1,
+    impute_depth: int = 0,
     prune_threshold: float = 0.0,
 ) -> Dict[str, Any]:
     """Process neighborhoods based on the imputation and pruning settings.
@@ -101,7 +102,7 @@ def process_neighborhoods(
     Args:
         network (nx.Graph): The network data structure used for imputing and pruning neighbors.
         neighborhoods (dict): Dictionary containing 'enrichment_matrix', 'binary_enrichment_matrix', and 'significant_enrichment_matrix'.
-        impute_depth (int, optional): Depth for imputing neighbors. Defaults to 1.
+        impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
         prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
     Returns:
@@ -167,55 +168,135 @@ def _impute_neighbors(
             - np.ndarray: The imputed alpha threshold matrix.
             - np.ndarray: The significant enrichment matrix with non-significant entries set to zero.
     """
-    # Calculate shortest distances for each node to determine the distance threshold
-    shortest_distances = []
-    for node in network.nodes():
-        try:
-            neighbors = [
-                n for n in network.neighbors(node) if binary_enrichment_matrix[n].sum() != 0
-            ]
-        except IndexError as e:
-            raise IndexError(
-                f"Failed to find neighbors for node '{node}': Ensure that the node exists in the network and that the binary enrichment matrix is correctly indexed."
-            ) from e
-        # Calculate the shortest distance to a neighbor
-        if neighbors:
-            shortest_distance = min([_get_euclidean_distance(node, n, network) for n in neighbors])
-            shortest_distances.append(shortest_distance)
+    # Calculate the distance threshold value based on the shortest distances
+    enrichment_matrix, binary_enrichment_matrix = _impute_neighbors_with_similarity(
+        network, enrichment_matrix, binary_enrichment_matrix, max_depth=max_depth
+    )
+    # Create a matrix where non-significant entries are set to zero
+    significant_enrichment_matrix = np.where(binary_enrichment_matrix == 1, enrichment_matrix, 0)
+    return enrichment_matrix, binary_enrichment_matrix, significant_enrichment_matrix
+def _impute_neighbors_with_similarity(
+    network: nx.Graph,
+    enrichment_matrix: np.ndarray,
+    binary_enrichment_matrix: np.ndarray,
+    max_depth: int = 3,
+) -> Tuple[np.ndarray, np.ndarray]:
+    """Impute non-enriched nodes based on the closest enriched neighbors' profiles and their similarity.
+    Args:
+        network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
+        enrichment_matrix (np.ndarray): The enrichment matrix with rows to be imputed.
+        binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
+        max_depth (int): Maximum depth of nodes to traverse for imputing values.
+    Returns:
+        Tuple[np.ndarray, np.ndarray]: A tuple containing:
+            - The imputed enrichment matrix.
+            - The imputed alpha threshold matrix.
+    """
     depth = 1
     rows_to_impute = np.where(binary_enrichment_matrix.sum(axis=1) == 0)[0]
     while len(rows_to_impute) and depth <= max_depth:
         next_rows_to_impute = []
-        for row_index in rows_to_impute:
-            neighbors = nx.single_source_shortest_path_length(network, row_index, cutoff=depth)
-            valid_neighbors = [
-                n
-                for n in neighbors
-                if n != row_index
-                and binary_enrichment_matrix[n].sum() != 0
-                and enrichment_matrix[n].sum() != 0
-            ]
-            if valid_neighbors:
-                closest_neighbor = min(
-                    valid_neighbors, key=lambda n: _get_euclidean_distance(row_index, n, network)
+        # Iterate over all enriched nodes
+        for row_index in range(binary_enrichment_matrix.shape[0]):
+            if binary_enrichment_matrix[row_index].sum() != 0:
+                enrichment_matrix, binary_enrichment_matrix = _process_node_imputation(
+                    row_index, network, enrichment_matrix, binary_enrichment_matrix, depth
                 )
-                # Impute the row with the closest valid neighbor's data
-                enrichment_matrix[row_index] = enrichment_matrix[closest_neighbor]
-                binary_enrichment_matrix[row_index] = binary_enrichment_matrix[
-                    closest_neighbor
-                ] / np.sqrt(depth + 1)
-            else:
-                next_rows_to_impute.append(row_index)
-        rows_to_impute = next_rows_to_impute
+        # Update rows to impute for the next iteration
+        rows_to_impute = np.where(binary_enrichment_matrix.sum(axis=1) == 0)[0]
         depth += 1
-    # Create a matrix where non-significant entries are set to zero
-    significant_enrichment_matrix = np.where(binary_enrichment_matrix == 1, enrichment_matrix, 0)
+    return enrichment_matrix, binary_enrichment_matrix
-    return enrichment_matrix, binary_enrichment_matrix, significant_enrichment_matrix
+def _process_node_imputation(
+    row_index: int,
+    network: nx.Graph,
+    enrichment_matrix: np.ndarray,
+    binary_enrichment_matrix: np.ndarray,
+    depth: int,
+) -> Tuple[np.ndarray, np.ndarray]:
+    """Process the imputation for a single node based on its enriched neighbors.
+    Args:
+        row_index (int): The index of the enriched node being processed.
+        network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
+        enrichment_matrix (np.ndarray): The enrichment matrix with rows to be imputed.
+        binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
+        depth (int): Current depth for traversal.
+    Returns:
+        Tuple[np.ndarray, np.ndarray]: The modified enrichment matrix and binary threshold matrix.
+    """
+    # Check neighbors at the current depth
+    neighbors = nx.single_source_shortest_path_length(network, row_index, cutoff=depth)
+    # Filter annotated neighbors (already enriched)
+    annotated_neighbors = [
+        n
+        for n in neighbors
+        if n != row_index
+        and binary_enrichment_matrix[n].sum() != 0
+        and enrichment_matrix[n].sum() != 0
+    ]
+    # Filter non-enriched neighbors
+    valid_neighbors = [
+        n
+        for n in neighbors
+        if n != row_index
+        and binary_enrichment_matrix[n].sum() == 0
+        and enrichment_matrix[n].sum() == 0
+    ]
+    # If there are valid non-enriched neighbors
+    if valid_neighbors and annotated_neighbors:
+        # Calculate distances to annotated neighbors
+        distances_to_annotated = [
+            _get_euclidean_distance(row_index, n, network) for n in annotated_neighbors
+        ]
+        # Calculate the IQR to identify outliers
+        q1, q3 = np.percentile(distances_to_annotated, [25, 75])
+        iqr = q3 - q1
+        lower_bound = q1 - 1.5 * iqr
+        upper_bound = q3 + 1.5 * iqr
+        # Filter valid non-enriched neighbors that fall within the IQR bounds
+        valid_neighbors_within_iqr = [
+            n
+            for n in valid_neighbors
+            if lower_bound <= _get_euclidean_distance(row_index, n, network) <= upper_bound
+        ]
+        # If there are any valid neighbors within the IQR
+        if valid_neighbors_within_iqr:
+            # If more than one valid neighbor is within the IQR, compute pairwise cosine similarities
+            if len(valid_neighbors_within_iqr) > 1:
+                # Find the most similar neighbor based on pairwise cosine similarities
+                def sum_pairwise_cosine_similarities(neighbor):
+                    return sum(
+                        cosine_similarity(
+                            enrichment_matrix[neighbor].reshape(1, -1),
+                            enrichment_matrix[other_neighbor].reshape(1, -1),
+                        )[0][0]
+                        for other_neighbor in valid_neighbors_within_iqr
+                        if other_neighbor != neighbor
+                    )
+                most_similar_neighbor = max(
+                    valid_neighbors_within_iqr, key=sum_pairwise_cosine_similarities
+                )
+            else:
+                most_similar_neighbor = valid_neighbors_within_iqr[0]
+            # Impute the most similar non-enriched neighbor with the enriched node's data, scaled by depth
+            enrichment_matrix[most_similar_neighbor] = enrichment_matrix[row_index] / np.sqrt(
+                depth + 1
+            )
+            binary_enrichment_matrix[most_similar_neighbor] = binary_enrichment_matrix[row_index]
+    return enrichment_matrix, binary_enrichment_matrix
 def _prune_neighbors(
@@ -240,27 +321,27 @@ def _prune_neighbors(
     """
     # Identify indices with non-zero rows in the binary enrichment matrix
     non_zero_indices = np.where(binary_enrichment_matrix.sum(axis=1) != 0)[0]
-    average_distances = []
+    median_distances = []
     for node in non_zero_indices:
         neighbors = [n for n in network.neighbors(node) if binary_enrichment_matrix[n].sum() != 0]
         if neighbors:
-            average_distance = np.mean(
+            median_distance = np.median(
                 [_get_euclidean_distance(node, n, network) for n in neighbors]
             )
-            average_distances.append(average_distance)
+            median_distances.append(median_distance)
     # Calculate the distance threshold value based on rank
-    distance_threshold_value = _calculate_threshold(average_distances, 1 - distance_threshold)
+    distance_threshold_value = _calculate_threshold(median_distances, 1 - distance_threshold)
     # Prune nodes that are outliers based on the distance threshold
     for row_index in non_zero_indices:
         neighbors = [
             n for n in network.neighbors(row_index) if binary_enrichment_matrix[n].sum() != 0
         ]
         if neighbors:
-            average_distance = np.mean(
+            median_distance = np.median(
                 [_get_euclidean_distance(row_index, n, network) for n in neighbors]
             )
-            if average_distance >= distance_threshold_value:
+            if median_distance >= distance_threshold_value:
                 enrichment_matrix[row_index] = 0
                 binary_enrichment_matrix[row_index] = 0
@@ -305,18 +386,18 @@ def _get_node_position(network: nx.Graph, node: Any) -> np.ndarray:
     )
-def _calculate_threshold(average_distances: List, distance_threshold: float) -> float:
-    """Calculate the distance threshold based on the given average distances and a percentile threshold.
+def _calculate_threshold(median_distances: List, distance_threshold: float) -> float:
+    """Calculate the distance threshold based on the given median distances and a percentile threshold.
     Args:
-        average_distances (list): An array of average distances.
+        median_distances (list): An array of median distances.
         distance_threshold (float): A percentile threshold (0 to 1) used to determine the distance cutoff.
     Returns:
         float: The calculated distance threshold value.
     """
-    # Sort the average distances
-    sorted_distances = np.sort(average_distances)
+    # Sort the median distances
+    sorted_distances = np.sort(median_distances)
     # Compute the rank percentiles for the sorted distances
     rank_percentiles = np.linspace(0, 1, len(sorted_distances))
     # Interpolating the ranks to 1000 evenly spaced percentiles

risk/risk.py CHANGED Viewed

@@ -237,7 +237,7 @@ class RISK(NetworkIO, AnnotationsIO):
         tail: str = "right",  # OPTIONS: "right" (enrichment), "left" (depletion), "both"
         pval_cutoff: float = 0.01,  # OPTIONS: Any value between 0 to 1
         fdr_cutoff: float = 0.9999,  # OPTIONS: Any value between 0 to 1
-        impute_depth: int = 1,
+        impute_depth: int = 0,
         prune_threshold: float = 0.0,
         linkage_criterion: str = "distance",
         linkage_method: str = "average",
@@ -254,7 +254,7 @@ class RISK(NetworkIO, AnnotationsIO):
             tail (str, optional): Type of significance tail ("right", "left", "both"). Defaults to "right".
             pval_cutoff (float, optional): p-value cutoff for significance. Defaults to 0.01.
             fdr_cutoff (float, optional): FDR cutoff for significance. Defaults to 0.9999.
-            impute_depth (int, optional): Depth for imputing neighbors. Defaults to 1.
+            impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
             prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
             linkage_criterion (str, optional): Clustering criterion for defining domains. Defaults to "distance".
             linkage_method (str, optional): Clustering method to use. Defaults to "average".

{risk_network-0.0.7b0.dist-info → risk_network-0.0.7b1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: risk-network
-Version: 0.0.7b0
+Version: 0.0.7b1
 Summary: A Python package for biological network analysis
 Author: Ira Horecka
 Author-email: Ira Horecka <ira89@icloud.com>

{risk_network-0.0.7b0.dist-info → risk_network-0.0.7b1.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
-risk/__init__.py,sha256=Qyktssx5ZswUpqcPtSMq9Zn-zzJXl2fka6MqbHS-JxQ,112
+risk/__init__.py,sha256=kKRKe-z54BZkkomARTvXCfcVgS-KX50Kgryn6By_kdc,112
 risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
-risk/risk.py,sha256=CKDIzVo9Jvl-fgzIlk5ZtJL9pIBMma24WK6EYdVu5po,20648
+risk/risk.py,sha256=jhfOv60iZdOssCF35tAjJ_br9w8aIqPFT2owVTehgtA,20648
 risk/annotations/__init__.py,sha256=vUpVvMRE5if01Ic8QY6M2Ae3EFGJHdugEe9PdEkAW4Y,138
 risk/annotations/annotations.py,sha256=K7cUA6vYTKYAvj0xHqrAwNEYtmPq4H7LDYENAOVQdQ0,11014
 risk/annotations/io.py,sha256=lo7NKqOVkeeBp58JBxWJHtA0xjL5Yoxqe9Ox0daKlZk,9457
@@ -10,7 +10,7 @@ risk/log/params.py,sha256=Rfdg5UcGCrG80m6V79FyORERWUqIzHFO7tGiY4zAImM,6347
 risk/neighborhoods/__init__.py,sha256=tKKEg4lsbqFukpgYlUGxU_v_9FOqK7V0uvM9T2QzoL0,206
 risk/neighborhoods/community.py,sha256=7ebo1Q5KokSQISnxZIh2SQxsKXdXm8aVkp-h_DiQ3K0,6818
 risk/neighborhoods/domains.py,sha256=bxJUxqFTynzX0mf3E8-AA4_Rfccje1reeVVhfzb1-pE,10672
-risk/neighborhoods/neighborhoods.py,sha256=sHmjFFl2U5qV9YbQCRbpbI36j7dS7IFfFwwRb1_-AuM,13945
+risk/neighborhoods/neighborhoods.py,sha256=cEk4gDvIkBky5POZhtHnO78iV-NXu4BeV-e5XdhYOkM,17508
 risk/network/__init__.py,sha256=iEPeJdZfqp0toxtbElryB8jbz9_t_k4QQ3iDvKE8C_0,126
 risk/network/geometry.py,sha256=H1yGVVqgbfpzBzJwEheDLfvGLSA284jGQQTn612L4Vc,6759
 risk/network/graph.py,sha256=7haHu4M3fleqbrIzs6HC9jnKizSERzmmAYSmUwdoSXA,13953
@@ -23,8 +23,8 @@ risk/stats/stats.py,sha256=kvShov-94W6ffgDUTb522vB9hDJQSyTsYif_UIaFfSM,7059
 risk/stats/permutation/__init__.py,sha256=neJp7FENC-zg_CGOXqv-iIvz1r5XUKI9Ruxhmq7kDOI,105
 risk/stats/permutation/permutation.py,sha256=qLWdwxEY6nmkYPxpM8HLDcd2mbqYv9Qr7CKtJvhLqIM,9220
 risk/stats/permutation/test_functions.py,sha256=HuDIM-V1jkkfE1rlaIqrWWBSKZt3dQ1f-YEDjWpnLSE,2343
-risk_network-0.0.7b0.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
-risk_network-0.0.7b0.dist-info/METADATA,sha256=Yokjvu7qlqWV6F_qJQ9O6TIwKw_9XpD_2qgwQHyimRY,43142
-risk_network-0.0.7b0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-risk_network-0.0.7b0.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
-risk_network-0.0.7b0.dist-info/RECORD,,
+risk_network-0.0.7b1.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
+risk_network-0.0.7b1.dist-info/METADATA,sha256=I0cAqenkwnGxhVcAkX_ipuB3rvmHV4OcR9S7tjOdaC8,43142
+risk_network-0.0.7b1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+risk_network-0.0.7b1.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
+risk_network-0.0.7b1.dist-info/RECORD,,

{risk_network-0.0.7b0.dist-info → risk_network-0.0.7b1.dist-info}/LICENSE RENAMED Viewed

File without changes

{risk_network-0.0.7b0.dist-info → risk_network-0.0.7b1.dist-info}/WHEEL RENAMED Viewed

File without changes

{risk_network-0.0.7b0.dist-info → risk_network-0.0.7b1.dist-info}/top_level.txt RENAMED Viewed

File without changes

risk-network 0.0.7b0__py3-none-any.whl → 0.0.7b1__py3-none-any.whl

risk-network 0.0.7b0py3-none-any.whl → 0.0.7b1py3-none-any.whl