PyPI - risk-network - Versions diffs - 0.0.7b11__py3-none-any.whl → 0.0.8__py3-none-any.whl - Mend

risk-network 0.0.7b11py3-none-any.whl → 0.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

risk/__init__.py +1 -1
risk/annotations/__init__.py +1 -1
risk/annotations/annotations.py +86 -54
risk/annotations/io.py +14 -14
risk/log/__init__.py +1 -1
risk/log/console.py +139 -0
risk/log/params.py +6 -6
risk/neighborhoods/community.py +68 -61
risk/neighborhoods/domains.py +43 -20
risk/neighborhoods/neighborhoods.py +136 -71
risk/network/geometry.py +5 -2
risk/network/graph.py +69 -235
risk/network/io.py +56 -18
risk/network/plot/__init__.py +6 -0
risk/network/plot/canvas.py +290 -0
risk/network/plot/contour.py +327 -0
risk/network/plot/labels.py +929 -0
risk/network/plot/network.py +288 -0
risk/network/plot/plotter.py +137 -0
risk/network/plot/utils/color.py +424 -0
risk/network/plot/utils/layout.py +91 -0
risk/risk.py +84 -58
risk/stats/hypergeom.py +1 -1
risk/stats/permutation/permutation.py +21 -8
risk/stats/poisson.py +2 -2
risk/stats/stats.py +12 -10
{risk_network-0.0.7b11.dist-info → risk_network-0.0.8.dist-info}/METADATA +84 -21
risk_network-0.0.8.dist-info/RECORD +37 -0
{risk_network-0.0.7b11.dist-info → risk_network-0.0.8.dist-info}/WHEEL +1 -1
risk/log/config.py +0 -48
risk/network/plot.py +0 -1343
risk_network-0.0.7b11.dist-info/RECORD +0 -30
{risk_network-0.0.7b11.dist-info → risk_network-0.0.8.dist-info}/LICENSE +0 -0
{risk_network-0.0.7b11.dist-info → risk_network-0.0.8.dist-info}/top_level.txt +0 -0

risk/neighborhoods/community.py CHANGED Viewed

@@ -21,15 +21,20 @@ def calculate_greedy_modularity_neighborhoods(network: nx.Graph) -> np.ndarray:
     """
     # Detect communities using the Greedy Modularity method
     communities = greedy_modularity_communities(network)
-    # Create a mapping from node to community
-    community_dict = {node: idx for idx, community in enumerate(communities) for node in community}
     # Create a binary neighborhood matrix
-    neighborhoods = np.zeros((network.number_of_nodes(), network.number_of_nodes()), dtype=int)
+    n_nodes = network.number_of_nodes()
+    neighborhoods = np.zeros((n_nodes, n_nodes), dtype=int)
+    # Create a mapping from node to index in the matrix
     node_index = {node: i for i, node in enumerate(network.nodes())}
-    for node_i, community_i in community_dict.items():
-        for node_j, community_j in community_dict.items():
-            if community_i == community_j:
-                neighborhoods[node_index[node_i], node_index[node_j]] = 1
+    # Fill in the neighborhood matrix for nodes in the same community
+    for community in communities:
+        # Iterate through all pairs of nodes in the same community
+        for node_i in community:
+            idx_i = node_index[node_i]
+            for node_j in community:
+                idx_j = node_index[node_j]
+                # Set them as neighbors (1) in the binary matrix
+                neighborhoods[idx_i, idx_j] = 1
     return neighborhoods
@@ -43,22 +48,20 @@ def calculate_label_propagation_neighborhoods(network: nx.Graph) -> np.ndarray:
     Returns:
         np.ndarray: Binary neighborhood matrix on Label Propagation.
     """
-    # Apply Label Propagation
+    # Apply Label Propagation for community detection
     communities = nx.algorithms.community.label_propagation.label_propagation_communities(network)
-    # Create a mapping from node to community
-    community_dict = {}
-    for community_id, community in enumerate(communities):
-        for node in community:
-            community_dict[node] = community_id
     # Create a binary neighborhood matrix
     num_nodes = network.number_of_nodes()
     neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
+    # Create a mapping from node to index in the matrix
+    node_index = {node: i for i, node in enumerate(network.nodes())}
     # Assign neighborhoods based on community labels
-    for node_i, community_i in community_dict.items():
-        for node_j, community_j in community_dict.items():
-            if community_i == community_j:
-                neighborhoods[node_i, node_j] = 1
+    for community in communities:
+        for node_i in community:
+            idx_i = node_index[node_i]
+            for node_j in community:
+                idx_j = node_index[node_j]
+                neighborhoods[idx_i, idx_j] = 1
     return neighborhoods
@@ -81,12 +84,22 @@ def calculate_louvain_neighborhoods(
         network, resolution=resolution, random_state=random_seed
     )
     # Create a binary neighborhood matrix
-    neighborhoods = np.zeros((network.number_of_nodes(), network.number_of_nodes()), dtype=int)
+    num_nodes = network.number_of_nodes()
+    neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
+    # Create a mapping from node to index in the matrix
+    node_index = {node: i for i, node in enumerate(network.nodes())}
+    # Group nodes by community
+    community_groups = {}
+    for node, community in partition.items():
+        community_groups.setdefault(community, []).append(node)
     # Assign neighborhoods based on community partitions
-    for node_i, community_i in partition.items():
-        for node_j, community_j in partition.items():
-            if community_i == community_j:
-                neighborhoods[node_i, node_j] = 1
+    for community, nodes in community_groups.items():
+        for node_i in nodes:
+            idx_i = node_index[node_i]
+            for node_j in nodes:
+                idx_j = node_index[node_j]
+                neighborhoods[idx_i, idx_j] = 1
     return neighborhoods
@@ -102,24 +115,22 @@ def calculate_markov_clustering_neighborhoods(network: nx.Graph) -> np.ndarray:
     """
     # Convert the graph to an adjacency matrix
     adjacency_matrix = nx.to_numpy_array(network)
-    # Run Markov Clustering
-    result = mc.run_mcl(adjacency_matrix)  # Run MCL with default parameters
-    # Get clusters
+    # Run Markov Clustering (MCL)
+    result = mc.run_mcl(adjacency_matrix)  # MCL with default parameters
+    # Get clusters (communities) from MCL result
     clusters = mc.get_clusters(result)
-    # Create a community label for each node
-    community_dict = {}
-    for community_id, community in enumerate(clusters):
-        for node in community:
-            community_dict[node] = community_id
     # Create a binary neighborhood matrix
     num_nodes = network.number_of_nodes()
     neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
-    # Assign neighborhoods based on community labels
-    for node_i, community_i in community_dict.items():
-        for node_j, community_j in community_dict.items():
-            if community_i == community_j:
-                neighborhoods[node_i, node_j] = 1
+    # Create a mapping from node to index in the matrix
+    node_index = {node: i for i, node in enumerate(network.nodes())}
+    # Assign neighborhoods based on MCL clusters
+    for cluster in clusters:
+        for node_i in cluster:
+            idx_i = node_index[node_i]
+            for node_j in cluster:
+                idx_j = node_index[node_j]
+                neighborhoods[idx_i, idx_j] = 1
     return neighborhoods
@@ -133,22 +144,20 @@ def calculate_spinglass_neighborhoods(network: nx.Graph) -> np.ndarray:
     Returns:
         np.ndarray: Binary neighborhood matrix on Spin Glass communities.
     """
-    # Use the asynchronous label propagation algorithm as a proxy for Spin Glass
+    # Apply Asynchronous Label Propagation (LPA)
     communities = asyn_lpa_communities(network)
-    # Create a community label for each node
-    community_dict = {}
-    for community_id, community in enumerate(communities):
-        for node in community:
-            community_dict[node] = community_id
     # Create a binary neighborhood matrix
     num_nodes = network.number_of_nodes()
     neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
-    # Assign neighborhoods based on community labels
-    for node_i, community_i in community_dict.items():
-        for node_j, community_j in community_dict.items():
-            if community_i == community_j:
-                neighborhoods[node_i, node_j] = 1
+    # Create a mapping from node to index in the matrix
+    node_index = {node: i for i, node in enumerate(network.nodes())}
+    # Assign neighborhoods based on community labels from LPA
+    for community in communities:
+        for node_i in community:
+            idx_i = node_index[node_i]
+            for node_j in community:
+                idx_j = node_index[node_j]
+                neighborhoods[idx_i, idx_j] = 1
     return neighborhoods
@@ -162,21 +171,19 @@ def calculate_walktrap_neighborhoods(network: nx.Graph) -> np.ndarray:
     Returns:
         np.ndarray: Binary neighborhood matrix on Walktrap communities.
     """
-    # Use the asynchronous label propagation algorithm as a proxy for Walktrap
+    # Apply Asynchronous Label Propagation (LPA)
     communities = asyn_lpa_communities(network)
-    # Create a community label for each node
-    community_dict = {}
-    for community_id, community in enumerate(communities):
-        for node in community:
-            community_dict[node] = community_id
     # Create a binary neighborhood matrix
     num_nodes = network.number_of_nodes()
     neighborhoods = np.zeros((num_nodes, num_nodes), dtype=int)
-    # Assign neighborhoods based on community labels
-    for node_i, community_i in community_dict.items():
-        for node_j, community_j in community_dict.items():
-            if community_i == community_j:
-                neighborhoods[node_i, node_j] = 1
+    # Create a mapping from node to index in the matrix
+    node_index = {node: i for i, node in enumerate(network.nodes())}
+    # Assign neighborhoods based on community labels from LPA
+    for community in communities:
+        for node_i in community:
+            idx_i = node_index[node_i]
+            for node_j in community:
+                idx_j = node_index[node_j]
+                neighborhoods[idx_i, idx_j] = 1
     return neighborhoods

risk/neighborhoods/domains.py CHANGED Viewed

@@ -13,7 +13,7 @@ import pandas as pd
 from scipy.cluster.hierarchy import linkage, fcluster
 from sklearn.metrics import silhouette_score
-from risk.annotations import get_description
+from risk.annotations import get_weighted_description
 from risk.constants import GROUP_LINKAGE_METHODS, GROUP_DISTANCE_METRICS
 from risk.log import logger
@@ -40,22 +40,22 @@ def define_domains(
     """
     try:
         # Transpose the matrix to cluster annotations
-        m = significant_neighborhoods_enrichment[:, top_annotations["top attributes"]].T
+        m = significant_neighborhoods_enrichment[:, top_annotations["significant_annotations"]].T
         best_linkage, best_metric, best_threshold = _optimize_silhouette_across_linkage_and_metrics(
             m, linkage_criterion, linkage_method, linkage_metric
         )
         # Perform hierarchical clustering
         Z = linkage(m, method=best_linkage, metric=best_metric)
-        logger.info(
+        logger.warning(
             f"Linkage criterion: '{linkage_criterion}'\nLinkage method: '{best_linkage}'\nLinkage metric: '{best_metric}'"
         )
-        logger.info(f"Optimal linkage threshold: {round(best_threshold, 3)}")
+        logger.debug(f"Optimal linkage threshold: {round(best_threshold, 3)}")
         # Calculate the optimal threshold for clustering
         max_d_optimal = np.max(Z[:, 2]) * best_threshold
         # Assign domains to the annotations matrix
         domains = fcluster(Z, max_d_optimal, criterion=linkage_criterion)
         top_annotations["domain"] = 0
-        top_annotations.loc[top_annotations["top attributes"], "domain"] = domains
+        top_annotations.loc[top_annotations["significant_annotations"], "domain"] = domains
     except ValueError:
         # If a ValueError is encountered, handle it by assigning unique domains
         n_rows = len(top_annotations)
@@ -76,8 +76,12 @@ def define_domains(
     t_idxmax = node_to_domain.loc[:, 1:].idxmax(axis=1)
     t_idxmax[t_max == 0] = 0
+    # Assign all domains where the score is greater than 0
+    node_to_domain["all_domains"] = node_to_domain.loc[:, 1:].apply(
+        lambda row: list(row[row > 0].index), axis=1
+    )
     # Assign primary domain
-    node_to_domain["primary domain"] = t_idxmax
+    node_to_domain["primary_domain"] = t_idxmax
     return node_to_domain
@@ -97,13 +101,13 @@ def trim_domains_and_top_annotations(
         max_cluster_size (int, optional): Maximum size of a cluster to be retained. Defaults to 1000.
     Returns:
-        tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: A tuple containing:
+        Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: A tuple containing:
             - Trimmed annotations (pd.DataFrame)
             - Trimmed domains (pd.DataFrame)
             - A DataFrame with domain labels (pd.DataFrame)
     """
     # Identify domains to remove based on size criteria
-    domain_counts = domains["primary domain"].value_counts()
+    domain_counts = domains["primary_domain"].value_counts()
     to_remove = set(
         domain_counts[(domain_counts < min_cluster_size) | (domain_counts > max_cluster_size)].index
     )
@@ -113,32 +117,51 @@ def trim_domains_and_top_annotations(
     invalid_domain_ids = {0, invalid_domain_id}
     # Mark domains to be removed
     top_annotations["domain"].replace(to_remove, invalid_domain_id, inplace=True)
-    domains.loc[domains["primary domain"].isin(to_remove), ["primary domain"]] = invalid_domain_id
+    domains.loc[domains["primary_domain"].isin(to_remove), ["primary_domain"]] = invalid_domain_id
     # Normalize "num enriched neighborhoods" by percentile for each domain and scale to 0-10
     top_annotations["normalized_value"] = top_annotations.groupby("domain")[
-        "neighborhood enrichment sums"
+        "significant_neighborhood_enrichment_sums"
     ].transform(lambda x: (x.rank(pct=True) * 10).apply(np.ceil).astype(int))
-    # Multiply 'words' column by normalized values
-    top_annotations["words"] = top_annotations.apply(
-        lambda row: " ".join([row["words"]] * row["normalized_value"]), axis=1
+    # Modify the lambda function to pass both full_terms and significant_enrichment_score
+    top_annotations["combined_terms"] = top_annotations.apply(
+        lambda row: " ".join([str(row["full_terms"])] * row["normalized_value"]), axis=1
+    )
+    # Perform the groupby operation while retaining the other columns and adding the weighting with enrichment scores
+    domain_labels = (
+        top_annotations.groupby("domain")
+        .agg(
+            full_terms=("full_terms", lambda x: list(x)),
+            enrichment_scores=("significant_enrichment_score", lambda x: list(x)),
+        )
+        .reset_index()
+    )
+    domain_labels["combined_terms"] = domain_labels.apply(
+        lambda row: get_weighted_description(
+            pd.Series(row["full_terms"]), pd.Series(row["enrichment_scores"])
+        ),
+        axis=1,
     )
-    # Generate domain labels
-    domain_labels = top_annotations.groupby("domain")["words"].apply(get_description).reset_index()
+    # Rename the columns as necessary
     trimmed_domains_matrix = domain_labels.rename(
-        columns={"domain": "id", "words": "label"}
+        columns={
+            "domain": "id",
+            "combined_terms": "normalized_description",
+            "full_terms": "full_descriptions",
+            "enrichment_scores": "enrichment_scores",
+        }
     ).set_index("id")
     # Remove invalid domains
     valid_annotations = top_annotations[~top_annotations["domain"].isin(invalid_domain_ids)].drop(
         columns=["normalized_value"]
     )
-    valid_domains = domains[~domains["primary domain"].isin(invalid_domain_ids)]
+    valid_domains = domains[~domains["primary_domain"].isin(invalid_domain_ids)]
     valid_trimmed_domains_matrix = trimmed_domains_matrix[
         ~trimmed_domains_matrix.index.isin(invalid_domain_ids)
     ]
     return valid_annotations, valid_domains, valid_trimmed_domains_matrix
@@ -154,7 +177,7 @@ def _optimize_silhouette_across_linkage_and_metrics(
         linkage_metric (str): Linkage metric for clustering.
     Returns:
-        tuple[str, str, float]: A tuple containing:
+        Tuple[str, str, float]: A tuple containing:
             - Best linkage method (str)
             - Best linkage metric (str)
             - Best threshold (float)
@@ -208,7 +231,7 @@ def _find_best_silhouette_score(
         resolution (float, optional): Desired resolution for the best threshold. Defaults to 0.001.
     Returns:
-        tuple[float, float]: A tuple containing:
+        Tuple[float, float]: A tuple containing:
             - Best threshold (float): The threshold that yields the best silhouette score.
             - Best silhouette score (float): The highest silhouette score achieved.
     """

risk-network 0.0.7b11__py3-none-any.whl → 0.0.8__py3-none-any.whl

risk-network 0.0.7b11py3-none-any.whl → 0.0.8py3-none-any.whl