PyPI - risk-network - Versions diffs - 0.0.9b5__py3-none-any.whl → 0.0.9b6__py3-none-any.whl - Mend

risk-network 0.0.9b5py3-none-any.whl → 0.0.9b6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

risk/__init__.py CHANGED Viewed

@@ -7,4 +7,4 @@ RISK: RISK Infers Spatial Kinships
 from risk.risk import RISK
-__version__ = "0.0.9-beta.5"
+__version__ = "0.0.9-beta.6"

risk/annotations/annotations.py CHANGED Viewed

@@ -15,6 +15,8 @@ import pandas as pd
 from nltk.tokenize import word_tokenize
 from nltk.corpus import stopwords
+from risk.log import logger
 def _setup_nltk():
     """Ensure necessary NLTK data is downloaded."""
@@ -35,15 +37,23 @@ _setup_nltk()
 stop_words = set(stopwords.words("english"))
-def load_annotations(network: nx.Graph, annotations_input: Dict[str, Any]) -> Dict[str, Any]:
+def load_annotations(
+    network: nx.Graph, annotations_input: Dict[str, Any], min_nodes_per_term: int = 2
+) -> Dict[str, Any]:
     """Convert annotations input to a DataFrame and reindex based on the network's node labels.
     Args:
         network (nx.Graph): The network graph.
         annotations_input (Dict[str, Any]): A dictionary with annotations.
+        min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
+            term to be included. Defaults to 2.
     Returns:
         Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the binary annotations matrix.
+    Raises:
+        ValueError: If no annotations are found for the nodes in the network.
+        ValueError: If no annotations have at least min_nodes_per_term nodes in the network.
     """
     # Flatten the dictionary to a list of tuples for easier DataFrame creation
     flattened_annotations = [
@@ -61,13 +71,24 @@ def load_annotations(network: nx.Graph, annotations_input: Dict[str, Any]) -> Di
     annotations_pivot = annotations_pivot.reindex(index=node_label_order)
     # Raise an error if no valid annotations are found for the nodes in the network
     if annotations_pivot.notnull().sum().sum() == 0:
+        raise ValueError("No terms found in the annotation file for the nodes in the network.")
+    # Filter out annotations with fewer than min_nodes_per_term occurrences
+    # This assists in reducing noise and focusing on more relevant annotations for statistical analysis
+    num_terms_before_filtering = annotations_pivot.shape[1]
+    annotations_pivot = annotations_pivot.loc[
+        :, (annotations_pivot.sum(axis=0) >= min_nodes_per_term)
+    ]
+    num_terms_after_filtering = annotations_pivot.shape[1]
+    # Log the number of annotations before and after filtering
+    logger.info(f"Minimum number of nodes per annotation term: {min_nodes_per_term}")
+    logger.info(f"Number of input annotation terms: {num_terms_before_filtering}")
+    logger.info(f"Number of remaining annotation terms: {num_terms_after_filtering}")
+    if num_terms_after_filtering == 0:
         raise ValueError(
-            "No annotations found in the annotations file for the nodes in the network."
+            f"No annotation terms found with at least {min_nodes_per_term} nodes in the network."
         )
-    # Remove columns with all zeros and those with only a single '1' to improve statistical performance
-    # (i.e., it's unreliable to compute the significance of an annotation in a node cluster based on a single occurrence).
-    annotations_pivot = annotations_pivot.loc[:, (annotations_pivot.sum(axis=0) > 1)]
     # Extract ordered nodes and annotations
     ordered_nodes = tuple(annotations_pivot.index)
     ordered_annotations = tuple(annotations_pivot.columns)

risk/annotations/io.py CHANGED Viewed

@@ -25,12 +25,16 @@ class AnnotationsIO:
     def __init__(self):
         pass
-    def load_json_annotation(self, network: nx.Graph, filepath: str) -> Dict[str, Any]:
+    def load_json_annotation(
+        self, network: nx.Graph, filepath: str, min_nodes_per_term: int = 2
+    ) -> Dict[str, Any]:
         """Load annotations from a JSON file and convert them to a DataFrame.
         Args:
             network (NetworkX graph): The network to which the annotations are related.
             filepath (str): Path to the JSON annotations file.
+            min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
+                term to be included. Defaults to 2.
         Returns:
             Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
@@ -40,12 +44,11 @@ class AnnotationsIO:
         params.log_annotations(filepath=filepath, filetype=filetype)
         _log_loading(filetype, filepath=filepath)
-        # Open and read the JSON file
+        # Load the JSON file into a dictionary
         with open(filepath, "r") as file:
             annotations_input = json.load(file)
-        # Load the annotations into the provided network
-        return load_annotations(network, annotations_input)
+        return load_annotations(network, annotations_input, min_nodes_per_term)
     def load_excel_annotation(
         self,
@@ -55,6 +58,7 @@ class AnnotationsIO:
         nodes_colname: str = "nodes",
         sheet_name: str = "Sheet1",
         nodes_delimiter: str = ";",
+        min_nodes_per_term: int = 2,
     ) -> Dict[str, Any]:
         """Load annotations from an Excel file and associate them with the network.
@@ -65,6 +69,8 @@ class AnnotationsIO:
             nodes_colname (str): Name of the column containing the nodes associated with each label.
             sheet_name (str, optional): The name of the Excel sheet to load (default is 'Sheet1').
             nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
+            min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
+                term to be included. Defaults to 2.
         Returns:
             Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
@@ -82,10 +88,9 @@ class AnnotationsIO:
             lambda x: x.split(nodes_delimiter)
         )
         # Convert the DataFrame to a dictionary pairing labels with their corresponding nodes
-        label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
+        annotations_input = annotation.set_index(label_colname)[nodes_colname].to_dict()
-        # Load the annotations into the provided network
-        return load_annotations(network, label_node_dict)
+        return load_annotations(network, annotations_input, min_nodes_per_term)
     def load_csv_annotation(
         self,
@@ -94,6 +99,7 @@ class AnnotationsIO:
         label_colname: str = "label",
         nodes_colname: str = "nodes",
         nodes_delimiter: str = ";",
+        min_nodes_per_term: int = 2,
     ) -> Dict[str, Any]:
         """Load annotations from a CSV file and associate them with the network.
@@ -103,6 +109,8 @@ class AnnotationsIO:
             label_colname (str): Name of the column containing the labels (e.g., GO terms).
             nodes_colname (str): Name of the column containing the nodes associated with each label.
             nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
+            min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
+                term to be included. Defaults to 2.
         Returns:
             Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
@@ -118,8 +126,7 @@ class AnnotationsIO:
             filepath, label_colname, nodes_colname, delimiter=",", nodes_delimiter=nodes_delimiter
         )
-        # Load the annotations into the provided network
-        return load_annotations(network, annotations_input)
+        return load_annotations(network, annotations_input, min_nodes_per_term)
     def load_tsv_annotation(
         self,
@@ -128,6 +135,7 @@ class AnnotationsIO:
         label_colname: str = "label",
         nodes_colname: str = "nodes",
         nodes_delimiter: str = ";",
+        min_nodes_per_term: int = 2,
     ) -> Dict[str, Any]:
         """Load annotations from a TSV file and associate them with the network.
@@ -137,6 +145,8 @@ class AnnotationsIO:
             label_colname (str): Name of the column containing the labels (e.g., GO terms).
             nodes_colname (str): Name of the column containing the nodes associated with each label.
             nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
+            min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
+                term to be included. Defaults to 2.
         Returns:
             Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
@@ -152,15 +162,18 @@ class AnnotationsIO:
             filepath, label_colname, nodes_colname, delimiter="\t", nodes_delimiter=nodes_delimiter
         )
-        # Load the annotations into the provided network
-        return load_annotations(network, annotations_input)
+        return load_annotations(network, annotations_input, min_nodes_per_term)
-    def load_dict_annotation(self, network: nx.Graph, content: Dict[str, Any]) -> Dict[str, Any]:
+    def load_dict_annotation(
+        self, network: nx.Graph, content: Dict[str, Any], min_nodes_per_term: int = 2
+    ) -> Dict[str, Any]:
         """Load annotations from a provided dictionary and convert them to a dictionary annotation.
         Args:
             network (NetworkX graph): The network to which the annotations are related.
             content (Dict[str, Any]): The annotations dictionary to load.
+            min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
+                term to be included. Defaults to 2.
         Returns:
             Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
@@ -176,13 +189,8 @@ class AnnotationsIO:
         params.log_annotations(filepath="In-memory dictionary", filetype=filetype)
         _log_loading(filetype, "In-memory dictionary")
-        # Load the annotations into the provided network
-        annotations_dict = load_annotations(network, content)
-        # Ensure the output is a dictionary
-        if not isinstance(annotations_dict, dict):
-            raise ValueError("Expected output to be a dictionary")
-        return annotations_dict
+        # Load the annotations as a dictionary from the provided dictionary
+        return load_annotations(network, content, min_nodes_per_term)
 def _load_matrix_file(

{risk_network-0.0.9b5.dist-info → risk_network-0.0.9b6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: risk-network
-Version: 0.0.9b5
+Version: 0.0.9b6
 Summary: A Python package for biological network analysis
 Author: Ira Horecka
 Author-email: Ira Horecka <ira89@icloud.com>

{risk_network-0.0.9b5.dist-info → risk_network-0.0.9b6.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-risk/__init__.py,sha256=xpU4eFf8OBGeA2fxuMCAFmBEQO6YlnDSVL09OvyHThs,112
+risk/__init__.py,sha256=fpCtulKZFHI4Je7dm4qBJHyP9InK9uDRYmYYgog9BGQ,112
 risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
 risk/risk.py,sha256=De1vn8Xc-TKz6aTL0bvJI-SVrIqU3k0IWAbKc7dde1c,23618
 risk/annotations/__init__.py,sha256=kXgadEXaCh0z8OyhOhTj7c3qXGmWgOhaSZ4gSzSb59U,147
-risk/annotations/annotations.py,sha256=aC30M-wdd72ZjOfn8RZKAsGM7Yti0Wl_4CHTvayoPvY,13312
-risk/annotations/io.py,sha256=eOkPD9G6KzkhGRc_ZW2McxQ8665o-H3uDG8bmKlzQ80,9591
+risk/annotations/annotations.py,sha256=WVT9wzTm8lTpMw_3SnbyljWR77yExo0rb1zVgJza8nw,14284
+risk/annotations/io.py,sha256=Nj_RPmn-WM1zMsssm9bVGR94SHytkEBK-wcBJ3WhqkU,10310
 risk/log/__init__.py,sha256=gy7C5L6D222AYUChq5lkc0LsCJ_QMQPaFiBJKbecdac,201
 risk/log/console.py,sha256=C52s3FgQ2e9kQWcXL8m7rs_pnKXt5Yy8PBHmQkOTiNo,4537
 risk/log/parameters.py,sha256=o4StqYCa0kt7_Ht4mKa1DwwvhGUwkC_dGBaiUIc0GB0,5683
@@ -32,8 +32,8 @@ risk/stats/stats.py,sha256=z8NrhiVj4BzJ250bVLfytpmfC7RzYu7mBuIZD_l0aCA,7222
 risk/stats/permutation/__init__.py,sha256=neJp7FENC-zg_CGOXqv-iIvz1r5XUKI9Ruxhmq7kDOI,105
 risk/stats/permutation/permutation.py,sha256=meBNSrbRa9P8WJ54n485l0H7VQJlMSfHqdN4aCKYCtQ,10105
 risk/stats/permutation/test_functions.py,sha256=lftOude6hee0pyR80HlBD32522JkDoN5hrKQ9VEbuoY,2345
-risk_network-0.0.9b5.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
-risk_network-0.0.9b5.dist-info/METADATA,sha256=Oc_07HiBSedyTbbiP-2a-xeLgEH-3zzNdXYzV6FSdQY,47497
-risk_network-0.0.9b5.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
-risk_network-0.0.9b5.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
-risk_network-0.0.9b5.dist-info/RECORD,,
+risk_network-0.0.9b6.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
+risk_network-0.0.9b6.dist-info/METADATA,sha256=0YZ5Rd4bqOid3nSHpa-S6fBBtBhYPoAQ1SlACtmDVaw,47497
+risk_network-0.0.9b6.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
+risk_network-0.0.9b6.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
+risk_network-0.0.9b6.dist-info/RECORD,,

{risk_network-0.0.9b5.dist-info → risk_network-0.0.9b6.dist-info}/LICENSE RENAMED Viewed

File without changes

{risk_network-0.0.9b5.dist-info → risk_network-0.0.9b6.dist-info}/WHEEL RENAMED Viewed

File without changes

{risk_network-0.0.9b5.dist-info → risk_network-0.0.9b6.dist-info}/top_level.txt RENAMED Viewed

File without changes

risk-network 0.0.9b5__py3-none-any.whl → 0.0.9b6__py3-none-any.whl

risk-network 0.0.9b5py3-none-any.whl → 0.0.9b6py3-none-any.whl