PyPI - risk-network - Versions diffs - 0.0.13b3__tar.gz → 0.0.13b4__tar.gz - Mend

risk-network 0.0.13b3tar.gz → 0.0.13b4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

{risk_network-0.0.13b3/src/risk_network.egg-info → risk_network-0.0.13b4}/PKG-INFO RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: risk-network
-Version: 0.0.13b3
-Summary: A Python package for biological network analysis.
+Version: 0.0.13b4
+Summary: A Python package for scalable network analysis and high-quality visualization.
 Author-email: Ira Horecka <ira89@icloud.com>
 License: GPL-3.0-or-later
 Project-URL: Homepage, https://github.com/riskportal/network

{risk_network-0.0.13b3 → risk_network-0.0.13b4}/pyproject.toml RENAMED Viewed

@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "risk-network"
 dynamic = ["version"]
-description = "A Python package for biological network analysis."
+description = "A Python package for scalable network analysis and high-quality visualization."
 authors = [
     { name = "Ira Horecka", email = "ira89@icloud.com" },
 ]

{risk_network-0.0.13b3 → risk_network-0.0.13b4}/src/risk/__init__.py RENAMED Viewed

@@ -7,4 +7,4 @@ RISK: Regional Inference of Significant Kinships
 from risk.risk import RISK
-__version__ = "0.0.13-beta.3"
+__version__ = "0.0.13-beta.4"

{risk_network-0.0.13b3 → risk_network-0.0.13b4}/src/risk/annotation/annotation.py RENAMED Viewed

@@ -36,7 +36,10 @@ initialize_nltk()
 def load_annotation(
-    network: nx.Graph, annotation_input: Dict[str, Any], min_nodes_per_term: int = 2
+    network: nx.Graph,
+    annotation_input: Dict[str, Any],
+    min_nodes_per_term: int = 1,
+    max_nodes_per_term: int = 10_000,
 ) -> Dict[str, Any]:
     """Convert annotation input to a sparse matrix and reindex based on the network's node labels.
@@ -44,7 +47,9 @@ def load_annotation(
         network (nx.Graph): The network graph.
         annotation_input (Dict[str, Any]): An annotation dictionary.
         min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
-            term to be included. Defaults to 2.
+            term to be included. Defaults to 1.
+        max_nodes_per_term (int, optional): The maximum number of network nodes allowed for each annotation
+            term. Defaults to 10_000.
     Returns:
         Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the sparse binary annotations
@@ -52,7 +57,6 @@ def load_annotation(
     Raises:
         ValueError: If no annotation is found for the nodes in the network.
-        ValueError: If no annotation has at least min_nodes_per_term nodes in the network.
     """
     # Step 1: Map nodes and annotations to indices
     node_label_order = [attr["label"] for _, attr in network.nodes(data=True) if "label" in attr]
@@ -72,9 +76,18 @@ def load_annotation(
     # Create a sparse binary matrix
     num_nodes = len(node_to_idx)
     num_annotation = len(annotation_to_idx)
-    annotation_pivot = coo_matrix((data, (row, col)), shape=(num_nodes, num_annotation)).tocsr()
-    # Step 3: Filter out annotations with fewer than min_nodes_per_term occurrences
-    valid_annotation = annotation_pivot.sum(axis=0).A1 >= min_nodes_per_term
+    # Convert to a sparse matrix and set the data type to uint8 for binary representation
+    annotation_pivot = (
+        coo_matrix((data, (row, col)), shape=(num_nodes, num_annotation)).tocsr().astype(np.uint8)
+    )
+    # Step 3: Filter out annotations with too few or too many nodes
+    valid_annotation = np.array(
+        [
+            annotation_pivot[:, i].sum() >= min_nodes_per_term
+            and annotation_pivot[:, i].sum() <= max_nodes_per_term
+            for i in range(num_annotation)
+        ]
+    )
     annotation_pivot = annotation_pivot[:, valid_annotation]
     # Step 4: Raise errors for empty matrices
     if annotation_pivot.nnz == 0:
@@ -83,7 +96,7 @@ def load_annotation(
     num_remaining_annotation = annotation_pivot.shape[1]
     if num_remaining_annotation == 0:
         raise ValueError(
-            f"No annotation terms found with at least {min_nodes_per_term} nodes in the network."
+            f"No annotation terms found with at least {min_nodes_per_term} nodes and at most {max_nodes_per_term} nodes."
         )
     # Step 5: Extract ordered nodes and annotations
@@ -94,6 +107,7 @@ def load_annotation(
     # Log the filtering details
     logger.info(f"Minimum number of nodes per annotation term: {min_nodes_per_term}")
+    logger.info(f"Maximum number of nodes per annotation term: {max_nodes_per_term}")
     logger.info(f"Number of input annotation terms: {num_annotation}")
     logger.info(f"Number of remaining annotation terms: {num_remaining_annotation}")
@@ -122,7 +136,7 @@ def define_top_annotation(
         significant_significance_matrix (np.ndarray): Enrichment matrix below alpha threshold.
         significant_binary_significance_matrix (np.ndarray): Binary significance matrix below alpha threshold.
         min_cluster_size (int, optional): Minimum cluster size. Defaults to 5.
-        max_cluster_size (int, optional): Maximum cluster size. Defaults to 1000.
+        max_cluster_size (int, optional): Maximum cluster size. Defaults to 10_000.
     Returns:
         pd.DataFrame: DataFrame with top annotations and their properties.

{risk_network-0.0.13b3 → risk_network-0.0.13b4}/src/risk/annotation/io.py RENAMED Viewed

@@ -21,7 +21,11 @@ class AnnotationIO:
     """
     def load_annotation_json(
-        self, network: nx.Graph, filepath: str, min_nodes_per_term: int = 2
+        self,
+        network: nx.Graph,
+        filepath: str,
+        min_nodes_per_term: int = 1,
+        max_nodes_per_term: int = 10_000,
     ) -> Dict[str, Any]:
         """Load annotation from a JSON file and convert them to a DataFrame.
@@ -29,7 +33,9 @@ class AnnotationIO:
             network (NetworkX graph): The network to which the annotation is related.
             filepath (str): Path to the JSON annotation file.
             min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
-                term to be included. Defaults to 2.
+                term to be included. Defaults to 1.
+            max_nodes_per_term (int, optional): The maximum number of network nodes allowed for each annotation
+                term to be included. Defaults to 10_000.
         Returns:
             Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotation matrix.
@@ -37,7 +43,10 @@ class AnnotationIO:
         filetype = "JSON"
         # Log the loading of the JSON file
         params.log_annotation(
-            filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
+            filetype=filetype,
+            filepath=filepath,
+            min_nodes_per_term=min_nodes_per_term,
+            max_nodes_per_term=max_nodes_per_term,
         )
         self._log_loading_annotation(filetype, filepath=filepath)
@@ -45,7 +54,7 @@ class AnnotationIO:
         with open(filepath, "r", encoding="utf-8") as file:
             annotation_input = json.load(file)
-        return load_annotation(network, annotation_input, min_nodes_per_term)
+        return load_annotation(network, annotation_input, min_nodes_per_term, max_nodes_per_term)
     def load_annotation_excel(
         self,
@@ -55,7 +64,8 @@ class AnnotationIO:
         nodes_colname: str = "nodes",
         sheet_name: str = "Sheet1",
         nodes_delimiter: str = ";",
-        min_nodes_per_term: int = 2,
+        min_nodes_per_term: int = 1,
+        max_nodes_per_term: int = 10_000,
     ) -> Dict[str, Any]:
         """Load annotation from an Excel file and associate them with the network.
@@ -67,7 +77,9 @@ class AnnotationIO:
             sheet_name (str, optional): The name of the Excel sheet to load (default is 'Sheet1').
             nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
             min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
-                term to be included. Defaults to 2.
+                term to be included. Defaults to 1.
+            max_nodes_per_term (int, optional): The maximum number of network nodes allowed for each annotation
+                term to be included. Defaults to 10_000.
         Returns:
             Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
@@ -76,7 +88,10 @@ class AnnotationIO:
         filetype = "Excel"
         # Log the loading of the Excel file
         params.log_annotation(
-            filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
+            filetype=filetype,
+            filepath=filepath,
+            min_nodes_per_term=min_nodes_per_term,
+            max_nodes_per_term=max_nodes_per_term,
         )
         self._log_loading_annotation(filetype, filepath=filepath)
@@ -89,7 +104,7 @@ class AnnotationIO:
         # Convert the DataFrame to a dictionary pairing labels with their corresponding nodes
         annotation_input = annotation.set_index(label_colname)[nodes_colname].to_dict()
-        return load_annotation(network, annotation_input, min_nodes_per_term)
+        return load_annotation(network, annotation_input, min_nodes_per_term, max_nodes_per_term)
     def load_annotation_csv(
         self,
@@ -98,7 +113,8 @@ class AnnotationIO:
         label_colname: str = "label",
         nodes_colname: str = "nodes",
         nodes_delimiter: str = ";",
-        min_nodes_per_term: int = 2,
+        min_nodes_per_term: int = 1,
+        max_nodes_per_term: int = 10_000,
     ) -> Dict[str, Any]:
         """Load annotation from a CSV file and associate them with the network.
@@ -109,7 +125,9 @@ class AnnotationIO:
             nodes_colname (str): Name of the column containing the nodes associated with each label.
             nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
             min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
-                term to be included. Defaults to 2.
+                term to be included. Defaults to 1.
+            max_nodes_per_term (int, optional): The maximum number of network nodes allowed for each annotation
+                term to be included. Defaults to 10_000.
         Returns:
             Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
@@ -118,7 +136,10 @@ class AnnotationIO:
         filetype = "CSV"
         # Log the loading of the CSV file
         params.log_annotation(
-            filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
+            filetype=filetype,
+            filepath=filepath,
+            min_nodes_per_term=min_nodes_per_term,
+            max_nodes_per_term=max_nodes_per_term,
         )
         self._log_loading_annotation(filetype, filepath=filepath)
@@ -127,7 +148,7 @@ class AnnotationIO:
             filepath, label_colname, nodes_colname, delimiter=",", nodes_delimiter=nodes_delimiter
         )
-        return load_annotation(network, annotation_input, min_nodes_per_term)
+        return load_annotation(network, annotation_input, min_nodes_per_term, max_nodes_per_term)
     def load_annotation_tsv(
         self,
@@ -136,7 +157,8 @@ class AnnotationIO:
         label_colname: str = "label",
         nodes_colname: str = "nodes",
         nodes_delimiter: str = ";",
-        min_nodes_per_term: int = 2,
+        min_nodes_per_term: int = 1,
+        max_nodes_per_term: int = 10_000,
     ) -> Dict[str, Any]:
         """Load annotation from a TSV file and associate them with the network.
@@ -147,7 +169,9 @@ class AnnotationIO:
             nodes_colname (str): Name of the column containing the nodes associated with each label.
             nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
             min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
-                term to be included. Defaults to 2.
+                term to be included. Defaults to 1.
+            max_nodes_per_term (int, optional): The maximum number of network nodes allowed for each annotation
+                term to be included. Defaults to 10_000.
         Returns:
             Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
@@ -156,7 +180,10 @@ class AnnotationIO:
         filetype = "TSV"
         # Log the loading of the TSV file
         params.log_annotation(
-            filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
+            filetype=filetype,
+            filepath=filepath,
+            min_nodes_per_term=min_nodes_per_term,
+            max_nodes_per_term=max_nodes_per_term,
         )
         self._log_loading_annotation(filetype, filepath=filepath)
@@ -165,10 +192,14 @@ class AnnotationIO:
             filepath, label_colname, nodes_colname, delimiter="\t", nodes_delimiter=nodes_delimiter
         )
-        return load_annotation(network, annotation_input, min_nodes_per_term)
+        return load_annotation(network, annotation_input, min_nodes_per_term, max_nodes_per_term)
     def load_annotation_dict(
-        self, network: nx.Graph, content: Dict[str, Any], min_nodes_per_term: int = 2
+        self,
+        network: nx.Graph,
+        content: Dict[str, Any],
+        min_nodes_per_term: int = 1,
+        max_nodes_per_term: int = 10_000,
     ) -> Dict[str, Any]:
         """Load annotation from a provided dictionary and convert them to a dictionary annotation.
@@ -176,7 +207,9 @@ class AnnotationIO:
             network (NetworkX graph): The network to which the annotation is related.
             content (Dict[str, Any]): The annotation dictionary to load.
             min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
-                term to be included. Defaults to 2.
+                term to be included. Defaults to 1.
+            max_nodes_per_term (int, optional): The maximum number of network nodes allowed for each annotation
+                term to be included. Defaults to 10_000.
         Returns:
             Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotation matrix.
@@ -192,11 +225,16 @@ class AnnotationIO:
         filetype = "Dictionary"
         # Log the loading of the annotation from the dictionary
-        params.log_annotation(filepath="In-memory dictionary", filetype=filetype)
+        params.log_annotation(
+            filepath="In-memory dictionary",
+            filetype=filetype,
+            min_nodes_per_term=min_nodes_per_term,
+            max_nodes_per_term=max_nodes_per_term,
+        )
         self._log_loading_annotation(filetype, "In-memory dictionary")
         # Load the annotation as a dictionary from the provided dictionary
-        return load_annotation(network, content, min_nodes_per_term)
+        return load_annotation(network, content, min_nodes_per_term, max_nodes_per_term)
     def _load_matrix_file(
         self,

{risk_network-0.0.13b3 → risk_network-0.0.13b4/src/risk_network.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: risk-network
-Version: 0.0.13b3
-Summary: A Python package for biological network analysis.
+Version: 0.0.13b4
+Summary: A Python package for scalable network analysis and high-quality visualization.
 Author-email: Ira Horecka <ira89@icloud.com>
 License: GPL-3.0-or-later
 Project-URL: Homepage, https://github.com/riskportal/network

{risk_network-0.0.13b3 → risk_network-0.0.13b4}/tests/test_load_annotation.py RENAMED Viewed

@@ -3,11 +3,11 @@ tests/test_load_annotation
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 """
+import json
 import pytest
 from scipy.sparse import csr_matrix, vstack
-# Ensure dummy fixtures are imported by referencing them in test signatures below.
 def test_missing_annotation_file(risk_obj, dummy_network):
     """Test loading an annotation file that does not exist.
@@ -22,6 +22,7 @@ def test_missing_annotation_file(risk_obj, dummy_network):
             filepath=annotation_file,
             network=dummy_network,
             min_nodes_per_term=1,
+            max_nodes_per_term=1000,
         )
@@ -38,6 +39,7 @@ def test_load_annotation_csv(risk_obj, cytoscape_network, data_path):
         filepath=str(annotation_file),
         network=cytoscape_network,
         min_nodes_per_term=1,
+        max_nodes_per_term=1000,
     )
     assert annotation is not None
@@ -57,6 +59,7 @@ def test_csv_annotation_structure(risk_obj, cytoscape_network, data_path):
         filepath=str(annotation_file),
         network=cytoscape_network,
         min_nodes_per_term=1,
+        max_nodes_per_term=1000,
     )
     assert isinstance(annotation, dict), "Annotation should be a dictionary"
@@ -82,6 +85,7 @@ def test_load_annotation_dict(risk_obj, dummy_network, dummy_annotation_dict):
         content=dummy_annotation_dict,
         network=dummy_network,
         min_nodes_per_term=1,
+        max_nodes_per_term=1000,
     )
     assert annotation is not None
@@ -100,6 +104,7 @@ def test_dict_annotation_structure(risk_obj, dummy_network, dummy_annotation_dic
         content=dummy_annotation_dict,
         network=dummy_network,
         min_nodes_per_term=1,
+        max_nodes_per_term=1000,
     )
     assert isinstance(annotation, dict), "Annotation should be a dictionary"
@@ -126,6 +131,7 @@ def test_load_annotation_json(risk_obj, cytoscape_network, data_path):
         filepath=str(annotation_file),
         network=cytoscape_network,
         min_nodes_per_term=1,
+        max_nodes_per_term=1000,
     )
     assert annotation is not None
@@ -145,6 +151,7 @@ def test_json_annotation_structure(risk_obj, cytoscape_network, data_path):
         filepath=str(annotation_file),
         network=cytoscape_network,
         min_nodes_per_term=1,
+        max_nodes_per_term=1000,
     )
     assert isinstance(annotation, dict), "Annotation should be a dictionary"
@@ -171,6 +178,7 @@ def test_load_annotation_tsv(risk_obj, cytoscape_network, data_path):
         filepath=str(annotation_file),
         network=cytoscape_network,
         min_nodes_per_term=1,
+        max_nodes_per_term=1000,
     )
     assert annotation is not None
@@ -190,6 +198,7 @@ def test_tsv_annotation_structure(risk_obj, cytoscape_network, data_path):
         filepath=str(annotation_file),
         network=cytoscape_network,
         min_nodes_per_term=1,
+        max_nodes_per_term=1000,
     )
     assert isinstance(annotation, dict), "Annotation should be a dictionary"
@@ -216,6 +225,7 @@ def test_load_annotation_excel(risk_obj, cytoscape_network, data_path):
         filepath=str(annotation_file),
         network=cytoscape_network,
         min_nodes_per_term=1,
+        max_nodes_per_term=1000,
     )
     assert annotation is not None
@@ -235,6 +245,7 @@ def test_excel_annotation_structure(risk_obj, cytoscape_network, data_path):
         filepath=str(annotation_file),
         network=cytoscape_network,
         min_nodes_per_term=1,
+        max_nodes_per_term=1000,
     )
     assert isinstance(annotation, dict), "Annotation should be a dictionary"
@@ -262,11 +273,13 @@ def test_combined_annotation(risk_obj, cytoscape_network, data_path):
         filepath=str(csv_file),
         network=cytoscape_network,
         min_nodes_per_term=1,
+        max_nodes_per_term=1000,
     )
     json_annotation = risk_obj.load_annotation_json(
         filepath=str(json_file),
         network=cytoscape_network,
         min_nodes_per_term=1,
+        max_nodes_per_term=1000,
     )
     # Combine the components of the annotations
     combined_annotation = {
@@ -289,3 +302,32 @@ def test_combined_annotation(risk_obj, cytoscape_network, data_path):
     assert len(combined_annotation["ordered_nodes"]) == len(csv_annotation["ordered_nodes"]) + len(
         json_annotation["ordered_nodes"]
     )
+def test_min_max_nodes_per_term(risk_obj, cytoscape_network, data_path):
+    """Test that loaded annotation respects min and max node limits per term.
+    Args:
+        risk_obj: The RISK object instance used for loading annotation.
+        cytoscape_network: The network object to which annotation will be applied.
+        data_path: The base path to the directory containing the annotation files.
+    """
+    annotation_file = data_path / "json" / "annotation" / "go_biological_process.json"
+    min_nodes = 2
+    max_nodes = 100
+    # Load annotation with filtering
+    annotation = risk_obj.load_annotation_json(
+        filepath=str(annotation_file),
+        network=cytoscape_network,
+        min_nodes_per_term=min_nodes,
+        max_nodes_per_term=max_nodes,
+    )
+    # Extract the mapping of term to genes from the raw JSON input
+    with open(annotation_file, "r") as f:
+        raw_dict = json.load(f)
+    filtered_terms = annotation["ordered_annotation"]
+    for term in filtered_terms:
+        gene_count = len(raw_dict[term])
+        assert gene_count >= min_nodes, f"Term {term} has too few genes: {gene_count}"
+        assert gene_count <= max_nodes, f"Term {term} has too many genes: {gene_count}"

{risk_network-0.0.13b3 → risk_network-0.0.13b4}/tests/test_log.py RENAMED Viewed

@@ -66,7 +66,9 @@ def test_params_log_annotation(log_capture):
         filetype="CSV",
         filepath="mock/path/to/file.csv",
         min_nodes_per_term=3,
+        max_nodes_per_term=5,
     )
     assert params.annotation["filetype"] == "CSV"
     assert params.annotation["filepath"] == "mock/path/to/file.csv"
     assert params.annotation["min_nodes_per_term"] == 3
+    assert params.annotation["max_nodes_per_term"] == 5