PyPI - risk-network - Versions diffs - 0.0.13b3__py3-none-any.whl → 0.0.13b4__py3-none-any.whl - Mend

risk-network 0.0.13b3py3-none-any.whl → 0.0.13b4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

risk/__init__.py CHANGED Viewed

@@ -7,4 +7,4 @@ RISK: Regional Inference of Significant Kinships
 from risk.risk import RISK
-__version__ = "0.0.13-beta.3"
+__version__ = "0.0.13-beta.4"

risk/annotation/annotation.py CHANGED Viewed

@@ -36,7 +36,10 @@ initialize_nltk()
 def load_annotation(
-    network: nx.Graph, annotation_input: Dict[str, Any], min_nodes_per_term: int = 2
+    network: nx.Graph,
+    annotation_input: Dict[str, Any],
+    min_nodes_per_term: int = 1,
+    max_nodes_per_term: int = 10_000,
 ) -> Dict[str, Any]:
     """Convert annotation input to a sparse matrix and reindex based on the network's node labels.
@@ -44,7 +47,9 @@ def load_annotation(
         network (nx.Graph): The network graph.
         annotation_input (Dict[str, Any]): An annotation dictionary.
         min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
-            term to be included. Defaults to 2.
+            term to be included. Defaults to 1.
+        max_nodes_per_term (int, optional): The maximum number of network nodes allowed for each annotation
+            term. Defaults to 10_000.
     Returns:
         Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the sparse binary annotations
@@ -52,7 +57,6 @@ def load_annotation(
     Raises:
         ValueError: If no annotation is found for the nodes in the network.
-        ValueError: If no annotation has at least min_nodes_per_term nodes in the network.
     """
     # Step 1: Map nodes and annotations to indices
     node_label_order = [attr["label"] for _, attr in network.nodes(data=True) if "label" in attr]
@@ -72,9 +76,18 @@ def load_annotation(
     # Create a sparse binary matrix
     num_nodes = len(node_to_idx)
     num_annotation = len(annotation_to_idx)
-    annotation_pivot = coo_matrix((data, (row, col)), shape=(num_nodes, num_annotation)).tocsr()
-    # Step 3: Filter out annotations with fewer than min_nodes_per_term occurrences
-    valid_annotation = annotation_pivot.sum(axis=0).A1 >= min_nodes_per_term
+    # Convert to a sparse matrix and set the data type to uint8 for binary representation
+    annotation_pivot = (
+        coo_matrix((data, (row, col)), shape=(num_nodes, num_annotation)).tocsr().astype(np.uint8)
+    )
+    # Step 3: Filter out annotations with too few or too many nodes
+    valid_annotation = np.array(
+        [
+            annotation_pivot[:, i].sum() >= min_nodes_per_term
+            and annotation_pivot[:, i].sum() <= max_nodes_per_term
+            for i in range(num_annotation)
+        ]
+    )
     annotation_pivot = annotation_pivot[:, valid_annotation]
     # Step 4: Raise errors for empty matrices
     if annotation_pivot.nnz == 0:
@@ -83,7 +96,7 @@ def load_annotation(
     num_remaining_annotation = annotation_pivot.shape[1]
     if num_remaining_annotation == 0:
         raise ValueError(
-            f"No annotation terms found with at least {min_nodes_per_term} nodes in the network."
+            f"No annotation terms found with at least {min_nodes_per_term} nodes and at most {max_nodes_per_term} nodes."
         )
     # Step 5: Extract ordered nodes and annotations
@@ -94,6 +107,7 @@ def load_annotation(
     # Log the filtering details
     logger.info(f"Minimum number of nodes per annotation term: {min_nodes_per_term}")
+    logger.info(f"Maximum number of nodes per annotation term: {max_nodes_per_term}")
     logger.info(f"Number of input annotation terms: {num_annotation}")
     logger.info(f"Number of remaining annotation terms: {num_remaining_annotation}")
@@ -122,7 +136,7 @@ def define_top_annotation(
         significant_significance_matrix (np.ndarray): Enrichment matrix below alpha threshold.
         significant_binary_significance_matrix (np.ndarray): Binary significance matrix below alpha threshold.
         min_cluster_size (int, optional): Minimum cluster size. Defaults to 5.
-        max_cluster_size (int, optional): Maximum cluster size. Defaults to 1000.
+        max_cluster_size (int, optional): Maximum cluster size. Defaults to 10_000.
     Returns:
         pd.DataFrame: DataFrame with top annotations and their properties.

risk/annotation/io.py CHANGED Viewed

@@ -21,7 +21,11 @@ class AnnotationIO:
     """
     def load_annotation_json(
-        self, network: nx.Graph, filepath: str, min_nodes_per_term: int = 2
+        self,
+        network: nx.Graph,
+        filepath: str,
+        min_nodes_per_term: int = 1,
+        max_nodes_per_term: int = 10_000,
     ) -> Dict[str, Any]:
         """Load annotation from a JSON file and convert them to a DataFrame.
@@ -29,7 +33,9 @@ class AnnotationIO:
             network (NetworkX graph): The network to which the annotation is related.
             filepath (str): Path to the JSON annotation file.
             min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
-                term to be included. Defaults to 2.
+                term to be included. Defaults to 1.
+            max_nodes_per_term (int, optional): The maximum number of network nodes allowed for each annotation
+                term to be included. Defaults to 10_000.
         Returns:
             Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotation matrix.
@@ -37,7 +43,10 @@ class AnnotationIO:
         filetype = "JSON"
         # Log the loading of the JSON file
         params.log_annotation(
-            filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
+            filetype=filetype,
+            filepath=filepath,
+            min_nodes_per_term=min_nodes_per_term,
+            max_nodes_per_term=max_nodes_per_term,
         )
         self._log_loading_annotation(filetype, filepath=filepath)
@@ -45,7 +54,7 @@ class AnnotationIO:
         with open(filepath, "r", encoding="utf-8") as file:
             annotation_input = json.load(file)
-        return load_annotation(network, annotation_input, min_nodes_per_term)
+        return load_annotation(network, annotation_input, min_nodes_per_term, max_nodes_per_term)
     def load_annotation_excel(
         self,
@@ -55,7 +64,8 @@ class AnnotationIO:
         nodes_colname: str = "nodes",
         sheet_name: str = "Sheet1",
         nodes_delimiter: str = ";",
-        min_nodes_per_term: int = 2,
+        min_nodes_per_term: int = 1,
+        max_nodes_per_term: int = 10_000,
     ) -> Dict[str, Any]:
         """Load annotation from an Excel file and associate them with the network.
@@ -67,7 +77,9 @@ class AnnotationIO:
             sheet_name (str, optional): The name of the Excel sheet to load (default is 'Sheet1').
             nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
             min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
-                term to be included. Defaults to 2.
+                term to be included. Defaults to 1.
+            max_nodes_per_term (int, optional): The maximum number of network nodes allowed for each annotation
+                term to be included. Defaults to 10_000.
         Returns:
             Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
@@ -76,7 +88,10 @@ class AnnotationIO:
         filetype = "Excel"
         # Log the loading of the Excel file
         params.log_annotation(
-            filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
+            filetype=filetype,
+            filepath=filepath,
+            min_nodes_per_term=min_nodes_per_term,
+            max_nodes_per_term=max_nodes_per_term,
         )
         self._log_loading_annotation(filetype, filepath=filepath)
@@ -89,7 +104,7 @@ class AnnotationIO:
         # Convert the DataFrame to a dictionary pairing labels with their corresponding nodes
         annotation_input = annotation.set_index(label_colname)[nodes_colname].to_dict()
-        return load_annotation(network, annotation_input, min_nodes_per_term)
+        return load_annotation(network, annotation_input, min_nodes_per_term, max_nodes_per_term)
     def load_annotation_csv(
         self,
@@ -98,7 +113,8 @@ class AnnotationIO:
         label_colname: str = "label",
         nodes_colname: str = "nodes",
         nodes_delimiter: str = ";",
-        min_nodes_per_term: int = 2,
+        min_nodes_per_term: int = 1,
+        max_nodes_per_term: int = 10_000,
     ) -> Dict[str, Any]:
         """Load annotation from a CSV file and associate them with the network.
@@ -109,7 +125,9 @@ class AnnotationIO:
             nodes_colname (str): Name of the column containing the nodes associated with each label.
             nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
             min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
-                term to be included. Defaults to 2.
+                term to be included. Defaults to 1.
+            max_nodes_per_term (int, optional): The maximum number of network nodes allowed for each annotation
+                term to be included. Defaults to 10_000.
         Returns:
             Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
@@ -118,7 +136,10 @@ class AnnotationIO:
         filetype = "CSV"
         # Log the loading of the CSV file
         params.log_annotation(
-            filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
+            filetype=filetype,
+            filepath=filepath,
+            min_nodes_per_term=min_nodes_per_term,
+            max_nodes_per_term=max_nodes_per_term,
         )
         self._log_loading_annotation(filetype, filepath=filepath)
@@ -127,7 +148,7 @@ class AnnotationIO:
             filepath, label_colname, nodes_colname, delimiter=",", nodes_delimiter=nodes_delimiter
         )
-        return load_annotation(network, annotation_input, min_nodes_per_term)
+        return load_annotation(network, annotation_input, min_nodes_per_term, max_nodes_per_term)
     def load_annotation_tsv(
         self,
@@ -136,7 +157,8 @@ class AnnotationIO:
         label_colname: str = "label",
         nodes_colname: str = "nodes",
         nodes_delimiter: str = ";",
-        min_nodes_per_term: int = 2,
+        min_nodes_per_term: int = 1,
+        max_nodes_per_term: int = 10_000,
     ) -> Dict[str, Any]:
         """Load annotation from a TSV file and associate them with the network.
@@ -147,7 +169,9 @@ class AnnotationIO:
             nodes_colname (str): Name of the column containing the nodes associated with each label.
             nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
             min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
-                term to be included. Defaults to 2.
+                term to be included. Defaults to 1.
+            max_nodes_per_term (int, optional): The maximum number of network nodes allowed for each annotation
+                term to be included. Defaults to 10_000.
         Returns:
             Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
@@ -156,7 +180,10 @@ class AnnotationIO:
         filetype = "TSV"
         # Log the loading of the TSV file
         params.log_annotation(
-            filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
+            filetype=filetype,
+            filepath=filepath,
+            min_nodes_per_term=min_nodes_per_term,
+            max_nodes_per_term=max_nodes_per_term,
         )
         self._log_loading_annotation(filetype, filepath=filepath)
@@ -165,10 +192,14 @@ class AnnotationIO:
             filepath, label_colname, nodes_colname, delimiter="\t", nodes_delimiter=nodes_delimiter
         )
-        return load_annotation(network, annotation_input, min_nodes_per_term)
+        return load_annotation(network, annotation_input, min_nodes_per_term, max_nodes_per_term)
     def load_annotation_dict(
-        self, network: nx.Graph, content: Dict[str, Any], min_nodes_per_term: int = 2
+        self,
+        network: nx.Graph,
+        content: Dict[str, Any],
+        min_nodes_per_term: int = 1,
+        max_nodes_per_term: int = 10_000,
     ) -> Dict[str, Any]:
         """Load annotation from a provided dictionary and convert them to a dictionary annotation.
@@ -176,7 +207,9 @@ class AnnotationIO:
             network (NetworkX graph): The network to which the annotation is related.
             content (Dict[str, Any]): The annotation dictionary to load.
             min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
-                term to be included. Defaults to 2.
+                term to be included. Defaults to 1.
+            max_nodes_per_term (int, optional): The maximum number of network nodes allowed for each annotation
+                term to be included. Defaults to 10_000.
         Returns:
             Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotation matrix.
@@ -192,11 +225,16 @@ class AnnotationIO:
         filetype = "Dictionary"
         # Log the loading of the annotation from the dictionary
-        params.log_annotation(filepath="In-memory dictionary", filetype=filetype)
+        params.log_annotation(
+            filepath="In-memory dictionary",
+            filetype=filetype,
+            min_nodes_per_term=min_nodes_per_term,
+            max_nodes_per_term=max_nodes_per_term,
+        )
         self._log_loading_annotation(filetype, "In-memory dictionary")
         # Load the annotation as a dictionary from the provided dictionary
-        return load_annotation(network, content, min_nodes_per_term)
+        return load_annotation(network, content, min_nodes_per_term, max_nodes_per_term)
     def _load_matrix_file(
         self,

{risk_network-0.0.13b3.dist-info → risk_network-0.0.13b4.dist-info}/METADATA RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: risk-network
-Version: 0.0.13b3
-Summary: A Python package for biological network analysis.
+Version: 0.0.13b4
+Summary: A Python package for scalable network analysis and high-quality visualization.
 Author-email: Ira Horecka <ira89@icloud.com>
 License: GPL-3.0-or-later
 Project-URL: Homepage, https://github.com/riskportal/network

{risk_network-0.0.13b3.dist-info → risk_network-0.0.13b4.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
-risk/__init__.py,sha256=POYLgxsSJuztGZbXImKx9nULm27K5xb0sbwxvFVzRXY,127
+risk/__init__.py,sha256=64n4kde42cujId1bWBqXdxznWZaSEVq1NvS_gqlvt1g,127
 risk/risk.py,sha256=Wjuxob5bI70Tpz9t71i05g94AQ3qXEMjfEcm5IV9HSY,1118
 risk/annotation/__init__.py,sha256=1EbGo41ClQb5ESTtitjOhrZhaLzzwr5aT-RYDX8w-h4,185
-risk/annotation/annotation.py,sha256=OE859FpVnp69hDi1cN_CQqeiG0SrJ2ZCuIdQKzY-gt0,14675
-risk/annotation/io.py,sha256=BF-hBSslHh6AlgL2FqVFBeH4swvVmdMYXJI0wu1W-gI,10745
+risk/annotation/annotation.py,sha256=EExSfYbZu4EUyA5vl7EDadGefyf-sJw_UmUxgXbuKng,15151
+risk/annotation/io.py,sha256=Rhob9GKgdfHZIMVyaRJa15YOAPMDbqg3y_b8vckPeoM,12391
 risk/annotation/nltk_setup.py,sha256=14B6L56_dwIgAOC9Rl4dNd4-b-aEngUCoJP9L9kEilU,3572
 risk/log/__init__.py,sha256=en-hKzuFtQWos4oZd8PxJ9u9Pe5bdihiqH9-qk_5ppw,217
 risk/log/console.py,sha256=PgjyEvyhYLUSHXPUKEqOmxsDsfrjPICIgqo_cAHq0N8,4575
@@ -33,8 +33,8 @@ risk/network/plotter/network.py,sha256=c9rPQ5mjil0sxVQnprRaKMAUqT6PZmKiATWz0m-Tv
 risk/network/plotter/plotter.py,sha256=WZcOrBW3vBQ_aLwv8c8pXJO8ZlyswHHHfEsiLxzEYaI,6121
 risk/network/plotter/utils/colors.py,sha256=xZt4877ORTQqySiMh-tUGe0sXvhLbXO04iGNeBDkbbw,19144
 risk/network/plotter/utils/layout.py,sha256=Lty16T-Q-oWwo9fXqm-nnS_dMS3BMhuFt4SFqxFC3Ng,3610
-risk_network-0.0.13b3.dist-info/licenses/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
-risk_network-0.0.13b3.dist-info/METADATA,sha256=LLxE7y3KeD13405wl2bXDoAwdjogKJTq0JFj7uF0bQs,6824
-risk_network-0.0.13b3.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
-risk_network-0.0.13b3.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
-risk_network-0.0.13b3.dist-info/RECORD,,
+risk_network-0.0.13b4.dist-info/licenses/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
+risk_network-0.0.13b4.dist-info/METADATA,sha256=Atc8HAHrKKIbdjfZUx4bwo5GifV3OGTrShjr0Ewd2T4,6853
+risk_network-0.0.13b4.dist-info/WHEEL,sha256=GHB6lJx2juba1wDgXDNlMTyM13ckjBMKf-OnwgKOCtA,91
+risk_network-0.0.13b4.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
+risk_network-0.0.13b4.dist-info/RECORD,,

{risk_network-0.0.13b3.dist-info → risk_network-0.0.13b4.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.1.0)
+Generator: setuptools (80.3.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{risk_network-0.0.13b3.dist-info → risk_network-0.0.13b4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{risk_network-0.0.13b3.dist-info → risk_network-0.0.13b4.dist-info}/top_level.txt RENAMED Viewed

File without changes

risk-network 0.0.13b3__py3-none-any.whl → 0.0.13b4__py3-none-any.whl

risk-network 0.0.13b3py3-none-any.whl → 0.0.13b4py3-none-any.whl