PyPI - risk-network - Versions diffs - 0.0.16b0__tar.gz → 0.0.16b2__tar.gz - Mend

risk-network 0.0.16b0tar.gz → 0.0.16b2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

{risk_network-0.0.16b0 → risk_network-0.0.16b2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: risk-network
-Version: 0.0.16b0
+Version: 0.0.16b2
 Summary: A Python package for scalable network analysis and high-quality visualization.
 Author-email: Ira Horecka <ira89@icloud.com>
 License: GPL-3.0-or-later
@@ -44,7 +44,7 @@ Dynamic: license-file
 ![Python](https://img.shields.io/badge/python-3.8%2B-yellow)
 [![pypiv](https://img.shields.io/pypi/v/risk-network.svg)](https://pypi.python.org/pypi/risk-network)
 ![License](https://img.shields.io/badge/license-GPLv3-purple)
-[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.17257418.svg)](https://doi.org/10.5281/zenodo.17257418)
+[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.17257417.svg)](https://doi.org/10.5281/zenodo.17257417)
 ![Tests](https://github.com/riskportal/risk/actions/workflows/ci.yml/badge.svg)
 **RISK** (Regional Inference of Significant Kinships) is a next-generation tool for biological network annotation and visualization. It integrates community detection algorithms, rigorous overrepresentation analysis, and a modular framework for diverse network types. RISK identifies biologically coherent relationships within networks and generates publication-ready visualizations, making it a useful tool for biological and interdisciplinary network analysis.

{risk_network-0.0.16b0 → risk_network-0.0.16b2}/README.md RENAMED Viewed

@@ -3,7 +3,7 @@
 ![Python](https://img.shields.io/badge/python-3.8%2B-yellow)
 [![pypiv](https://img.shields.io/pypi/v/risk-network.svg)](https://pypi.python.org/pypi/risk-network)
 ![License](https://img.shields.io/badge/license-GPLv3-purple)
-[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.17257418.svg)](https://doi.org/10.5281/zenodo.17257418)
+[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.17257417.svg)](https://doi.org/10.5281/zenodo.17257417)
 ![Tests](https://github.com/riskportal/risk/actions/workflows/ci.yml/badge.svg)
 **RISK** (Regional Inference of Significant Kinships) is a next-generation tool for biological network annotation and visualization. It integrates community detection algorithms, rigorous overrepresentation analysis, and a modular framework for diverse network types. RISK identifies biologically coherent relationships within networks and generates publication-ready visualizations, making it a useful tool for biological and interdisciplinary network analysis.

{risk_network-0.0.16b0 → risk_network-0.0.16b2}/src/risk/__init__.py RENAMED Viewed

@@ -5,7 +5,7 @@ risk
 RISK: Regional Inference of Significant Kinships
 """
-from ._risk import RISK
+from .risk import RISK
 __all__ = ["RISK"]
-__version__ = "0.0.16-beta.0"
+__version__ = "0.0.16-beta.2"

{risk_network-0.0.16b0/src/risk/_annotation → risk_network-0.0.16b2/src/risk/annotation}/__init__.py RENAMED Viewed

@@ -3,8 +3,8 @@ risk/_annotation
 ~~~~~~~~~~~~~~~~
 """
-from ._annotation import (
+from .annotation import (
     define_top_annotation,
     get_weighted_description,
 )
-from ._io import AnnotationHandler
+from .io import AnnotationHandler

{risk_network-0.0.16b0/src/risk/_annotation → risk_network-0.0.16b2/src/risk/annotation}/_nltk_setup.py RENAMED Viewed

@@ -1,6 +1,6 @@
 """
-risk/_annotation/_nltk_setup
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+risk/annotation/_nltk_setup
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
 """
 import os
@@ -11,7 +11,7 @@ import nltk
 from nltk.data import find
 from nltk.data import path as nltk_data_path
-from .._log import logger
+from ..log import logger
 def setup_nltk_resources(required_resources: Optional[List[Tuple[str, str]]] = None) -> None:

risk_network-0.0.16b0/src/risk/_annotation/_annotation.py → risk_network-0.0.16b2/src/risk/annotation/annotation.py RENAMED Viewed

@@ -1,6 +1,6 @@
 """
-risk/_annotation/_annotation
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+risk/annotation/annotation
+~~~~~~~~~~~~~~~~~~~~~~~~~~
 """
 import re
@@ -14,7 +14,7 @@ import pandas as pd
 from nltk.tokenize import word_tokenize
 from scipy.sparse import coo_matrix
-from .._log import logger
+from ..log import logger
 from ._nltk_setup import setup_nltk_resources
@@ -123,19 +123,19 @@ def load_annotation(
 def define_top_annotation(
     network: nx.Graph,
     ordered_annotation_labels: List[str],
-    neighborhood_significance_sums: List[int],
+    cluster_significance_sums: List[int],
     significant_significance_matrix: np.ndarray,
     significant_binary_significance_matrix: np.ndarray,
     min_cluster_size: int = 5,
     max_cluster_size: int = 1000,
 ) -> pd.DataFrame:
     """
-    Define top annotations based on neighborhood significance sums and binary significance matrix.
+    Define top annotations based on cluster significance sums and binary significance matrix.
     Args:
         network (NetworkX graph): The network graph.
         ordered_annotation_labels (list of str): List of ordered annotation labels.
-        neighborhood_significance_sums (list of int): List of neighborhood significance sums.
+        cluster_significance_sums (list of int): List of cluster significance sums.
         significant_significance_matrix (np.ndarray): Enrichment matrix below alpha threshold.
         significant_binary_significance_matrix (np.ndarray): Binary significance matrix below alpha threshold.
         min_cluster_size (int, optional): Minimum cluster size. Defaults to 5.
@@ -146,12 +146,12 @@ def define_top_annotation(
     """
     # Sum the columns of the significant significance matrix (positive floating point values)
     significant_significance_scores = significant_significance_matrix.sum(axis=0)
-    # Create DataFrame to store annotations, their neighborhood significance sums, and significance scores
+    # Create DataFrame to store annotations, their cluster significance sums, and significance scores
     annotation_significance_matrix = pd.DataFrame(
         {
             "id": range(len(ordered_annotation_labels)),
             "full_terms": ordered_annotation_labels,
-            "significant_neighborhood_significance_sums": neighborhood_significance_sums,
+            "significant_cluster_significance_sums": cluster_significance_sums,
             "significant_significance_score": significant_significance_scores,
         }
     )
@@ -159,11 +159,11 @@ def define_top_annotation(
     # Apply size constraints to identify potential significant annotations
     annotation_significance_matrix.loc[
         (
-            annotation_significance_matrix["significant_neighborhood_significance_sums"]
+            annotation_significance_matrix["significant_cluster_significance_sums"]
             >= min_cluster_size
         )
         & (
-            annotation_significance_matrix["significant_neighborhood_significance_sums"]
+            annotation_significance_matrix["significant_cluster_significance_sums"]
             <= max_cluster_size
         ),
         "significant_annotation",
@@ -179,11 +179,11 @@ def define_top_annotation(
     for attribute in annotation_significance_matrix.index.values[
         annotation_significance_matrix["significant_annotation"]
     ]:
-        # Identify significant neighborhoods based on the binary significance matrix
-        significant_neighborhoods = list(
+        # Identify significant clusters based on the binary significance matrix
+        significant_clusters = list(
             compress(list(network), significant_binary_significance_matrix[:, attribute])
         )
-        significant_network = nx.subgraph(network, significant_neighborhoods)
+        significant_network = nx.subgraph(network, significant_clusters)
         # Analyze connected components within the significant subnetwork
         connected_components = sorted(
             nx.connected_components(significant_network), key=len, reverse=True
@@ -257,26 +257,23 @@ def get_weighted_description(words_column: pd.Series, scores_column: pd.Series)
                 continue
             # Lemmatize the token to merge similar forms
             token_norm = LEMMATIZER.lemmatize(token_clean)
-            weighted_counts[token_norm] = weighted_counts.get(token_norm, 0) + weight
+            # Apply weighting boost for biologically structured number-word hybrids
+            if re.match(r"^\d+[\-\w]+", token_norm):
+                actual_weight = int(weight * 1.5)
+            else:
+                actual_weight = weight
+            weighted_counts[token_norm] = weighted_counts.get(token_norm, 0) + actual_weight
     # Reconstruct a weighted token list by repeating each token by its aggregated count.
     weighted_words = []
     for token, count in weighted_counts.items():
         weighted_words.extend([token] * count)
-    # Combine tokens that match number-word patterns (e.g. "4-alpha") and remove pure numeric tokens.
+    # Combine tokens that match number-word patterns (e.g. "4-alpha"), but do not remove numeric tokens.
+    # All tokens are included in the final list.
     combined_tokens = []
     for token in weighted_words:
-        if re.match(r"^\d+-\w+", token):
-            combined_tokens.append(token)
-        elif token.replace(".", "", 1).isdigit():
-            continue
-        else:
-            combined_tokens.append(token)
-    # If the only token is numeric, return a default value.
-    if len(combined_tokens) == 1 and combined_tokens[0].isdigit():
-        return "N/A"
+        combined_tokens.append(token)
     # Simplify the token list to remove near-duplicates based on the Jaccard index.
     simplified_words = _simplify_word_list(combined_tokens)

risk_network-0.0.16b0/src/risk/_annotation/_io.py → risk_network-0.0.16b2/src/risk/annotation/io.py RENAMED Viewed

@@ -1,6 +1,6 @@
 """
-risk/_annotation/_io
-~~~~~~~~~~~~~~~~~~~~
+risk/annotation/io
+~~~~~~~~~~~~~~~~~~
 """
 import json
@@ -9,8 +9,8 @@ from typing import Any, Dict
 import networkx as nx
 import pandas as pd
-from .._log import log_header, logger, params
-from ._annotation import load_annotation
+from ..log import log_header, logger, params
+from .annotation import load_annotation
 class AnnotationHandler:

risk_network-0.0.16b2/src/risk/cluster/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""
+risk/cluster
+~~~~~~~~~~~~
+"""
+from .api import ClusterAPI
+from .label import define_domains, trim_domains
+from .cluster import process_significant_clusters

{risk_network-0.0.16b0/src/risk/_neighborhoods → risk_network-0.0.16b2/src/risk/cluster}/_community.py RENAMED Viewed

@@ -1,6 +1,6 @@
 """
-risk/_neighborhoods/_community
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+risk/cluster/_community
+~~~~~~~~~~~~~~~~~~~~~~~
 """
 import community as community_louvain
@@ -12,14 +12,14 @@ from leidenalg import RBConfigurationVertexPartition, find_partition
 from networkx.algorithms.community import greedy_modularity_communities
 from scipy.sparse import csr_matrix
-from .._log import logger
+from ..log import logger
-def calculate_greedy_modularity_neighborhoods(
+def calculate_greedy_modularity_clusters(
     network: nx.Graph, fraction_shortest_edges: float = 1.0
 ) -> csr_matrix:
     """
-    Calculate neighborhoods using the Greedy Modularity method with CSR matrix output.
+    Calculate clusters using the Greedy Modularity method with CSR matrix output.
     Args:
         network (nx.Graph): The network graph.
@@ -27,7 +27,7 @@ def calculate_greedy_modularity_neighborhoods(
             subgraphs before clustering. Defaults to 1.0.
     Returns:
-        csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
+        csr_matrix: A binary cluster matrix (CSR) where nodes in the same community have 1, and others have 0.
     Raises:
         ValueError: If the subgraph has no edges after filtering.
@@ -55,12 +55,12 @@ def calculate_greedy_modularity_neighborhoods(
     # Create a CSR matrix
     num_nodes = len(nodes)
     data = np.ones(len(row_indices), dtype=int)
-    neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
+    clusters = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
-    return neighborhoods
+    return clusters
-def calculate_label_propagation_neighborhoods(
+def calculate_label_propagation_clusters(
     network: nx.Graph, fraction_shortest_edges: float = 1.0
 ) -> csr_matrix:
     """
@@ -72,7 +72,7 @@ def calculate_label_propagation_neighborhoods(
             subgraphs before clustering. Defaults to 1.0.
     Returns:
-        csr_matrix: A binary neighborhood matrix (CSR) on Label Propagation.
+        csr_matrix: A binary cluster matrix (CSR) on Label Propagation.
     Raises:
         ValueError: If the subgraph has no edges after filtering.
@@ -92,7 +92,7 @@ def calculate_label_propagation_neighborhoods(
     # Prepare data for CSR matrix
     row_indices = []
     col_indices = []
-    # Assign neighborhoods based on community labels using the mapped indices
+    # Assign clusters based on community labels using the mapped indices
     for community in communities:
         mapped_indices = [node_index_map[node] for node in community]
         for i in mapped_indices:
@@ -103,19 +103,19 @@ def calculate_label_propagation_neighborhoods(
     # Create a CSR matrix
     num_nodes = len(nodes)
     data = np.ones(len(row_indices), dtype=int)
-    neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
+    clusters = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
-    return neighborhoods
+    return clusters
-def calculate_leiden_neighborhoods(
+def calculate_leiden_clusters(
     network: nx.Graph,
     resolution: float = 1.0,
     fraction_shortest_edges: float = 1.0,
     random_seed: int = 888,
 ) -> csr_matrix:
     """
-    Calculate neighborhoods using the Leiden method with CSR matrix output.
+    Calculate clusters using the Leiden method with CSR matrix output.
     Args:
         network (nx.Graph): The network graph.
@@ -125,7 +125,7 @@ def calculate_leiden_neighborhoods(
         random_seed (int, optional): Random seed for reproducibility. Defaults to 888.
     Returns:
-        csr_matrix: A binary neighborhood matrix (CSR) where nodes in the same community have 1, and others have 0.
+        csr_matrix: A binary cluster matrix (CSR) where nodes in the same community have 1, and others have 0.
     Raises:
         ValueError: If the subgraph has no edges after filtering.
@@ -160,19 +160,19 @@ def calculate_leiden_neighborhoods(
     # Create a CSR matrix
     num_nodes = len(nodes)
     data = np.ones(len(row_indices), dtype=int)
-    neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
+    clusters = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
-    return neighborhoods
+    return clusters
-def calculate_louvain_neighborhoods(
+def calculate_louvain_clusters(
     network: nx.Graph,
     resolution: float = 0.1,
     fraction_shortest_edges: float = 1.0,
     random_seed: int = 888,
 ) -> csr_matrix:
     """
-    Calculate neighborhoods using the Louvain method.
+    Calculate clusters using the Louvain method.
     Args:
         network (nx.Graph): The network graph.
@@ -182,7 +182,7 @@ def calculate_louvain_neighborhoods(
         random_seed (int, optional): Random seed for reproducibility. Defaults to 888.
     Returns:
-        csr_matrix: A binary neighborhood matrix in CSR format.
+        csr_matrix: A binary cluster matrix in CSR format.
     Raises:
         ValueError: If the subgraph has no edges after filtering.
@@ -217,16 +217,16 @@ def calculate_louvain_neighborhoods(
     # Create a CSR matrix
     num_nodes = len(nodes)
     data = np.ones(len(row_indices), dtype=int)
-    neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
+    clusters = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
-    return neighborhoods
+    return clusters
-def calculate_markov_clustering_neighborhoods(
+def calculate_markov_clustering_clusters(
     network: nx.Graph, fraction_shortest_edges: float = 1.0
 ) -> csr_matrix:
     """
-    Apply Markov Clustering (MCL) to the network and return a binary neighborhood matrix (CSR).
+    Apply Markov Clustering (MCL) to the network and return a binary cluster matrix (CSR).
     Args:
         network (nx.Graph): The network graph.
@@ -234,7 +234,7 @@ def calculate_markov_clustering_neighborhoods(
             subgraphs before clustering. Defaults to 1.0.
     Returns:
-        csr_matrix: A binary neighborhood matrix (CSR) on Markov Clustering.
+        csr_matrix: A binary cluster matrix (CSR) on Markov Clustering.
     Raises:
         ValueError: If the subgraph has no edges after filtering.
@@ -288,12 +288,12 @@ def calculate_markov_clustering_neighborhoods(
     # Step 5: Create a CSR matrix
     data = np.ones(len(row_indices), dtype=int)
-    neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
+    clusters = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
-    return neighborhoods
+    return clusters
-def calculate_spinglass_neighborhoods(
+def calculate_spinglass_clusters(
     network: nx.Graph, fraction_shortest_edges: float = 1.0
 ) -> csr_matrix:
     """
@@ -305,7 +305,7 @@ def calculate_spinglass_neighborhoods(
             subgraphs before clustering. Defaults to 1.0.
     Returns:
-        csr_matrix: A binary neighborhood matrix (CSR) based on Spinglass communities.
+        csr_matrix: A binary cluster matrix (CSR) based on Spinglass communities.
     Raises:
         ValueError: If the subgraph has no edges after filtering.
@@ -340,7 +340,7 @@ def calculate_spinglass_neighborhoods(
             logger.error(f"Error running Spinglass on component: {e}")
             continue
-        # Step 3: Assign neighborhoods based on community labels
+        # Step 3: Assign clusters based on community labels
         for community in communities:
             mapped_indices = [
                 node_index_map[igraph_subgraph.vs[node]["_nx_name"]] for node in community
@@ -353,12 +353,12 @@ def calculate_spinglass_neighborhoods(
     # Step 4: Create a CSR matrix
     num_nodes = len(nodes)
     data = np.ones(len(row_indices), dtype=int)
-    neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
+    clusters = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
-    return neighborhoods
+    return clusters
-def calculate_walktrap_neighborhoods(
+def calculate_walktrap_clusters(
     network: nx.Graph, fraction_shortest_edges: float = 1.0
 ) -> csr_matrix:
     """
@@ -370,7 +370,7 @@ def calculate_walktrap_neighborhoods(
             subgraphs before clustering. Defaults to 1.0.
     Returns:
-        csr_matrix: A binary neighborhood matrix (CSR) on Walktrap communities.
+        csr_matrix: A binary cluster matrix (CSR) on Walktrap communities.
     Raises:
         ValueError: If the subgraph has no edges after filtering.
@@ -400,9 +400,9 @@ def calculate_walktrap_neighborhoods(
     # Create a CSR matrix
     num_nodes = len(nodes)
     data = np.ones(len(row_indices), dtype=int)
-    neighborhoods = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
+    clusters = csr_matrix((data, (row_indices, col_indices)), shape=(num_nodes, num_nodes))
-    return neighborhoods
+    return clusters
 def _create_percentile_limited_subgraph(G: nx.Graph, fraction_shortest_edges: float) -> nx.Graph:

risk-network 0.0.16b0__tar.gz → 0.0.16b2__tar.gz

risk-network 0.0.16b0tar.gz → 0.0.16b2tar.gz