PyPI - risk-network - Versions diffs - 0.0.8b18__py3-none-any.whl → 0.0.9b26__py3-none-any.whl - Mend

risk-network 0.0.8b18py3-none-any.whl → 0.0.9b26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

risk/__init__.py +2 -2
risk/annotations/__init__.py +2 -2
risk/annotations/annotations.py +133 -72
risk/annotations/io.py +50 -34
risk/log/__init__.py +4 -2
risk/log/{config.py → console.py} +5 -3
risk/log/{params.py → parameters.py} +21 -46
risk/neighborhoods/__init__.py +3 -5
risk/neighborhoods/api.py +446 -0
risk/neighborhoods/community.py +281 -96
risk/neighborhoods/domains.py +92 -38
risk/neighborhoods/neighborhoods.py +210 -149
risk/network/__init__.py +1 -3
risk/network/geometry.py +69 -58
risk/network/graph/__init__.py +6 -0
risk/network/graph/api.py +194 -0
risk/network/graph/network.py +269 -0
risk/network/graph/summary.py +254 -0
risk/network/io.py +58 -48
risk/network/plotter/__init__.py +6 -0
risk/network/plotter/api.py +54 -0
risk/network/{plot → plotter}/canvas.py +80 -26
risk/network/{plot → plotter}/contour.py +43 -34
risk/network/{plot → plotter}/labels.py +123 -113
risk/network/plotter/network.py +424 -0
risk/network/plotter/utils/colors.py +416 -0
risk/network/plotter/utils/layout.py +94 -0
risk/risk.py +11 -469
risk/stats/__init__.py +8 -4
risk/stats/binom.py +51 -0
risk/stats/chi2.py +69 -0
risk/stats/hypergeom.py +28 -18
risk/stats/permutation/__init__.py +1 -1
risk/stats/permutation/permutation.py +45 -39
risk/stats/permutation/test_functions.py +25 -17
risk/stats/poisson.py +17 -11
risk/stats/stats.py +20 -16
risk/stats/zscore.py +68 -0
{risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/METADATA +9 -5
risk_network-0.0.9b26.dist-info/RECORD +44 -0
{risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/WHEEL +1 -1
risk/network/graph.py +0 -159
risk/network/plot/__init__.py +0 -6
risk/network/plot/network.py +0 -282
risk/network/plot/plotter.py +0 -137
risk/network/plot/utils/color.py +0 -353
risk/network/plot/utils/layout.py +0 -53
risk_network-0.0.8b18.dist-info/RECORD +0 -37
{risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/LICENSE +0 -0
{risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/top_level.txt +0 -0

risk/__init__.py CHANGED Viewed

@@ -2,9 +2,9 @@
 risk
 ~~~~
-RISK: RISK Infers Spatial Kinships
+RISK: Regional Inference of Significant Kinships
 """
 from risk.risk import RISK
-__version__ = "0.0.8-beta.18"
+__version__ = "0.0.9-beta.26"

risk/annotations/__init__.py CHANGED Viewed

@@ -3,5 +3,5 @@ risk/annotations
 ~~~~~~~~~~~~~~~~
 """
-from .annotations import define_top_annotations, get_description
-from .io import AnnotationsIO
+from risk.annotations.annotations import define_top_annotations, get_weighted_description
+from risk.annotations.io import AnnotationsIO

risk/annotations/annotations.py CHANGED Viewed

@@ -3,6 +3,7 @@ risk/annotations/annotations
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 """
+import re
 from collections import Counter
 from itertools import compress
 from typing import Any, Dict, List, Set
@@ -14,6 +15,9 @@ import pandas as pd
 from nltk.tokenize import word_tokenize
 from nltk.corpus import stopwords
+from risk.log import logger
+from scipy.sparse import csr_matrix
 def _setup_nltk():
     """Ensure necessary NLTK data is downloaded."""
@@ -30,107 +34,144 @@ def _setup_nltk():
 # Ensure you have the necessary NLTK data
 _setup_nltk()
+# Initialize English stopwords
+stop_words = set(stopwords.words("english"))
-def load_annotations(network: nx.Graph, annotations_input: Dict[str, Any]) -> Dict[str, Any]:
+def load_annotations(
+    network: nx.Graph, annotations_input: Dict[str, Any], min_nodes_per_term: int = 2
+) -> Dict[str, Any]:
     """Convert annotations input to a DataFrame and reindex based on the network's node labels.
     Args:
-        annotations_input (dict): A dictionary with annotations.
+        network (nx.Graph): The network graph.
+        annotations_input (Dict[str, Any]): A dictionary with annotations.
+        min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
+            term to be included. Defaults to 2.
+        use_sparse (bool, optional): Whether to return the annotations matrix as a sparse matrix. Defaults to True.
     Returns:
-        dict: A dictionary containing ordered nodes, ordered annotations, and the binary annotations matrix.
+        Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the sparse binary annotations
+            matrix.
+    Raises:
+        ValueError: If no annotations are found for the nodes in the network.
+        ValueError: If no annotations have at least min_nodes_per_term nodes in the network.
     """
     # Flatten the dictionary to a list of tuples for easier DataFrame creation
     flattened_annotations = [
         (node, annotation) for annotation, nodes in annotations_input.items() for node in nodes
     ]
     # Create a DataFrame from the flattened list
-    annotations = pd.DataFrame(flattened_annotations, columns=["Node", "Annotations"])
-    annotations["Is Member"] = 1
+    annotations = pd.DataFrame(flattened_annotations, columns=["node", "annotations"])
+    annotations["is_member"] = 1
     # Pivot to create a binary matrix with nodes as rows and annotations as columns
     annotations_pivot = annotations.pivot_table(
-        index="Node", columns="Annotations", values="Is Member", fill_value=0, dropna=False
+        index="node", columns="annotations", values="is_member", fill_value=0, dropna=False
     )
     # Reindex the annotations matrix based on the node labels from the network
-    node_label_order = list(nx.get_node_attributes(network, "label").values())
+    node_label_order = (attr["label"] for _, attr in network.nodes(data=True) if "label" in attr)
     annotations_pivot = annotations_pivot.reindex(index=node_label_order)
     # Raise an error if no valid annotations are found for the nodes in the network
     if annotations_pivot.notnull().sum().sum() == 0:
+        raise ValueError("No terms found in the annotation file for the nodes in the network.")
+    # Filter out annotations with fewer than min_nodes_per_term occurrences
+    num_terms_before_filtering = annotations_pivot.shape[1]
+    annotations_pivot = annotations_pivot.loc[
+        :, (annotations_pivot.sum(axis=0) >= min_nodes_per_term)
+    ]
+    num_terms_after_filtering = annotations_pivot.shape[1]
+    # Log the number of annotations before and after filtering
+    logger.info(f"Minimum number of nodes per annotation term: {min_nodes_per_term}")
+    logger.info(f"Number of input annotation terms: {num_terms_before_filtering}")
+    logger.info(f"Number of remaining annotation terms: {num_terms_after_filtering}")
+    if num_terms_after_filtering == 0:
         raise ValueError(
-            "No annotations found in the annotations file for the nodes in the network."
+            f"No annotation terms found with at least {min_nodes_per_term} nodes in the network."
         )
-    # Remove columns with all zeros to improve performance
-    annotations_pivot = annotations_pivot.loc[:, annotations_pivot.sum(axis=0) != 0]
     # Extract ordered nodes and annotations
     ordered_nodes = tuple(annotations_pivot.index)
     ordered_annotations = tuple(annotations_pivot.columns)
-    # Convert the annotations_pivot matrix to a numpy array and ensure it's binary
-    annotations_pivot_numpy = (annotations_pivot.fillna(0).to_numpy() > 0).astype(int)
+    # Convert the annotations_pivot matrix to a numpy array or sparse matrix
+    annotations_pivot_binary = (annotations_pivot.fillna(0).to_numpy() > 0).astype(int)
+    # Convert the binary annotations matrix to a sparse matrix
+    annotations_pivot_binary = csr_matrix(annotations_pivot_binary)
     return {
         "ordered_nodes": ordered_nodes,
         "ordered_annotations": ordered_annotations,
-        "matrix": annotations_pivot_numpy,
+        "matrix": annotations_pivot_binary,
     }
 def define_top_annotations(
     network: nx.Graph,
     ordered_annotation_labels: List[str],
-    neighborhood_enrichment_sums: List[int],
-    binary_enrichment_matrix: np.ndarray,
+    neighborhood_significance_sums: List[int],
+    significant_significance_matrix: np.ndarray,
+    significant_binary_significance_matrix: np.ndarray,
     min_cluster_size: int = 5,
     max_cluster_size: int = 1000,
 ) -> pd.DataFrame:
-    """Define top annotations based on neighborhood enrichment sums and binary enrichment matrix.
+    """Define top annotations based on neighborhood significance sums and binary significance matrix.
     Args:
         network (NetworkX graph): The network graph.
         ordered_annotation_labels (list of str): List of ordered annotation labels.
-        neighborhood_enrichment_sums (list of int): List of neighborhood enrichment sums.
-        binary_enrichment_matrix (np.ndarray): Binary enrichment matrix below alpha threshold.
+        neighborhood_significance_sums (list of int): List of neighborhood significance sums.
+        significant_significance_matrix (np.ndarray): Enrichment matrix below alpha threshold.
+        significant_binary_significance_matrix (np.ndarray): Binary significance matrix below alpha threshold.
         min_cluster_size (int, optional): Minimum cluster size. Defaults to 5.
         max_cluster_size (int, optional): Maximum cluster size. Defaults to 1000.
     Returns:
         pd.DataFrame: DataFrame with top annotations and their properties.
     """
-    # Create DataFrame to store annotations and their neighborhood enrichment sums
-    annotations_enrichment_matrix = pd.DataFrame(
+    # Sum the columns of the significant significance matrix (positive floating point values)
+    significant_significance_scores = significant_significance_matrix.sum(axis=0)
+    # Create DataFrame to store annotations, their neighborhood significance sums, and significance scores
+    annotations_significance_matrix = pd.DataFrame(
         {
             "id": range(len(ordered_annotation_labels)),
-            "words": ordered_annotation_labels,
-            "neighborhood enrichment sums": neighborhood_enrichment_sums,
+            "full_terms": ordered_annotation_labels,
+            "significant_neighborhood_significance_sums": neighborhood_significance_sums,
+            "significant_significance_score": significant_significance_scores,
         }
     )
-    annotations_enrichment_matrix["top attributes"] = False
-    # Apply size constraints to identify potential top attributes
-    annotations_enrichment_matrix.loc[
-        (annotations_enrichment_matrix["neighborhood enrichment sums"] >= min_cluster_size)
-        & (annotations_enrichment_matrix["neighborhood enrichment sums"] <= max_cluster_size),
-        "top attributes",
+    annotations_significance_matrix["significant_annotations"] = False
+    # Apply size constraints to identify potential significant annotations
+    annotations_significance_matrix.loc[
+        (
+            annotations_significance_matrix["significant_neighborhood_significance_sums"]
+            >= min_cluster_size
+        )
+        & (
+            annotations_significance_matrix["significant_neighborhood_significance_sums"]
+            <= max_cluster_size
+        ),
+        "significant_annotations",
     ] = True
     # Initialize columns for connected components analysis
-    annotations_enrichment_matrix["num connected components"] = 0
-    annotations_enrichment_matrix["size connected components"] = None
-    annotations_enrichment_matrix["size connected components"] = annotations_enrichment_matrix[
-        "size connected components"
+    annotations_significance_matrix["num_connected_components"] = 0
+    annotations_significance_matrix["size_connected_components"] = None
+    annotations_significance_matrix["size_connected_components"] = annotations_significance_matrix[
+        "size_connected_components"
     ].astype(object)
-    annotations_enrichment_matrix["num large connected components"] = 0
+    annotations_significance_matrix["num_large_connected_components"] = 0
-    for attribute in annotations_enrichment_matrix.index.values[
-        annotations_enrichment_matrix["top attributes"]
+    for attribute in annotations_significance_matrix.index.values[
+        annotations_significance_matrix["significant_annotations"]
     ]:
-        # Identify enriched neighborhoods based on the binary enrichment matrix
-        enriched_neighborhoods = list(
-            compress(list(network), binary_enrichment_matrix[:, attribute])
+        # Identify significant neighborhoods based on the binary significance matrix
+        significant_neighborhoods = list(
+            compress(list(network), significant_binary_significance_matrix[:, attribute])
         )
-        enriched_network = nx.subgraph(network, enriched_neighborhoods)
-        # Analyze connected components within the enriched subnetwork
+        significant_network = nx.subgraph(network, significant_neighborhoods)
+        # Analyze connected components within the significant subnetwork
         connected_components = sorted(
-            nx.connected_components(enriched_network), key=len, reverse=True
+            nx.connected_components(significant_network), key=len, reverse=True
         )
         size_connected_components = np.array([len(c) for c in connected_components])
@@ -144,55 +185,75 @@ def define_top_annotations(
         num_large_connected_components = len(filtered_size_connected_components)
         # Assign the number of connected components
-        annotations_enrichment_matrix.loc[attribute, "num connected components"] = (
+        annotations_significance_matrix.loc[attribute, "num_connected_components"] = (
             num_connected_components
         )
         # Filter out attributes with more than one connected component
-        annotations_enrichment_matrix.loc[
-            annotations_enrichment_matrix["num connected components"] > 1, "top attributes"
+        annotations_significance_matrix.loc[
+            annotations_significance_matrix["num_connected_components"] > 1,
+            "significant_annotations",
         ] = False
         # Assign the number of large connected components
-        annotations_enrichment_matrix.loc[attribute, "num large connected components"] = (
+        annotations_significance_matrix.loc[attribute, "num_large_connected_components"] = (
             num_large_connected_components
         )
         # Assign the size of connected components, ensuring it is always a list
-        annotations_enrichment_matrix.at[attribute, "size connected components"] = (
+        annotations_significance_matrix.at[attribute, "size_connected_components"] = (
             filtered_size_connected_components.tolist()
         )
-    return annotations_enrichment_matrix
+    return annotations_significance_matrix
-def get_description(words_column: pd.Series) -> str:
-    """Process input Series to identify and return the top frequent, significant words,
-    filtering based on stopwords and gracefully handling numerical strings.
+def get_weighted_description(words_column: pd.Series, scores_column: pd.Series) -> str:
+    """Generate a weighted description from words and their corresponding scores,
+    with support for stopwords filtering and improved weighting logic.
     Args:
         words_column (pd.Series): A pandas Series containing strings to process.
+        scores_column (pd.Series): A pandas Series containing significance scores to weigh the terms.
     Returns:
-        str: A coherent description formed from the most frequent and significant words.
+        str: A coherent description formed from the most frequent and significant words, weighed by significance scores.
     """
-    # Concatenate all rows into a single string and tokenize into words
-    all_words = words_column.str.cat(sep=" ")
-    tokens = word_tokenize(all_words)
-    # Separate numeric tokens
-    numeric_tokens = [token for token in tokens if token.replace(".", "", 1).isdigit()]
-    # If there's only one unique numeric value, return it directly as a string
-    unique_numeric_values = set(numeric_tokens)
-    if len(unique_numeric_values) == 1:
-        return f"{list(unique_numeric_values)[0]}"
-    # Ensure that all values in 'words' are strings and include both alphabetic and numeric tokens
-    words = [
-        str(word)  # Convert to string to ensure consistent processing
-        for word in tokens
-        if word.isalpha()
-        or word.replace(".", "", 1).isdigit()  # Keep alphabetic words and numeric strings
-    ]
-    # Generate a coherent description from the processed words
-    description = _generate_coherent_description(words)
+    # Handle case where all scores are the same
+    if scores_column.max() == scores_column.min():
+        normalized_scores = pd.Series([1] * len(scores_column))
+    else:
+        # Normalize the significance scores to be between 0 and 1
+        normalized_scores = (scores_column - scores_column.min()) / (
+            scores_column.max() - scores_column.min()
+        )
+    # Combine words and normalized scores to create weighted words
+    weighted_words = []
+    for word, score in zip(words_column, normalized_scores):
+        word = str(word)
+        if word not in stop_words:  # Skip stopwords
+            weight = max(1, int((0 if pd.isna(score) else score) * 10))
+            weighted_words.extend([word] * weight)
+    # Tokenize the weighted words, but preserve number-word patterns like '4-alpha'
+    tokens = word_tokenize(" ".join(weighted_words))
+    # Ensure we treat "4-alpha" or other "number-word" patterns as single tokens
+    combined_tokens = []
+    for token in tokens:
+        # Match patterns like '4-alpha' or '5-hydroxy' and keep them together
+        if re.match(r"^\d+-\w+", token):
+            combined_tokens.append(token)
+        elif token.replace(".", "", 1).isdigit():  # Handle pure numeric tokens
+            # Ignore pure numbers as descriptions unless necessary
+            continue
+        else:
+            combined_tokens.append(token)
+    # Prevent descriptions like just '4' from being selected
+    if len(combined_tokens) == 1 and combined_tokens[0].isdigit():
+        return "N/A"  # Return "N/A" for cases where it's just a number
+    # Simplify the word list and generate the description
+    simplified_words = _simplify_word_list(combined_tokens)
+    description = _generate_coherent_description(simplified_words)
     return description
@@ -255,7 +316,7 @@ def _generate_coherent_description(words: List[str]) -> str:
     If there is only one unique entry, return it directly.
     Args:
-        words (list): A list of words or numerical string values.
+        words (List): A list of words or numerical string values.
     Returns:
         str: A coherent description formed by arranging the words in a logical sequence.

risk/annotations/io.py CHANGED Viewed

@@ -1,8 +1,6 @@
 """
 risk/annotations/io
 ~~~~~~~~~~~~~~~~~~~
-This file contains the code for the RISK class and command-line access.
 """
 import json
@@ -25,27 +23,32 @@ class AnnotationsIO:
     def __init__(self):
         pass
-    def load_json_annotation(self, network: nx.Graph, filepath: str) -> Dict[str, Any]:
+    def load_json_annotation(
+        self, network: nx.Graph, filepath: str, min_nodes_per_term: int = 2
+    ) -> Dict[str, Any]:
         """Load annotations from a JSON file and convert them to a DataFrame.
         Args:
             network (NetworkX graph): The network to which the annotations are related.
             filepath (str): Path to the JSON annotations file.
+            min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
+                term to be included. Defaults to 2.
         Returns:
-            dict: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
+            Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
         """
         filetype = "JSON"
         # Log the loading of the JSON file
-        params.log_annotations(filepath=filepath, filetype=filetype)
+        params.log_annotations(
+            filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
+        )
         _log_loading(filetype, filepath=filepath)
-        # Open and read the JSON file
-        with open(filepath, "r") as file:
+        # Load the JSON file into a dictionary
+        with open(filepath, "r", encoding="utf-8") as file:
             annotations_input = json.load(file)
-        # Load the annotations into the provided network
-        return load_annotations(network, annotations_input)
+        return load_annotations(network, annotations_input, min_nodes_per_term)
     def load_excel_annotation(
         self,
@@ -55,6 +58,7 @@ class AnnotationsIO:
         nodes_colname: str = "nodes",
         sheet_name: str = "Sheet1",
         nodes_delimiter: str = ";",
+        min_nodes_per_term: int = 2,
     ) -> Dict[str, Any]:
         """Load annotations from an Excel file and associate them with the network.
@@ -65,6 +69,8 @@ class AnnotationsIO:
             nodes_colname (str): Name of the column containing the nodes associated with each label.
             sheet_name (str, optional): The name of the Excel sheet to load (default is 'Sheet1').
             nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
+            min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
+                term to be included. Defaults to 2.
         Returns:
             Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
@@ -72,18 +78,21 @@ class AnnotationsIO:
         """
         filetype = "Excel"
         # Log the loading of the Excel file
-        params.log_annotations(filepath=filepath, filetype=filetype)
+        params.log_annotations(
+            filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
+        )
         _log_loading(filetype, filepath=filepath)
         # Load the specified sheet from the Excel file
-        df = pd.read_excel(filepath, sheet_name=sheet_name)
+        annotation = pd.read_excel(filepath, sheet_name=sheet_name)
         # Split the nodes column by the specified nodes_delimiter
-        df[nodes_colname] = df[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
+        annotation[nodes_colname] = annotation[nodes_colname].apply(
+            lambda x: x.split(nodes_delimiter)
+        )
         # Convert the DataFrame to a dictionary pairing labels with their corresponding nodes
-        label_node_dict = df.set_index(label_colname)[nodes_colname].to_dict()
+        annotations_input = annotation.set_index(label_colname)[nodes_colname].to_dict()
-        # Load the annotations into the provided network
-        return load_annotations(network, label_node_dict)
+        return load_annotations(network, annotations_input, min_nodes_per_term)
     def load_csv_annotation(
         self,
@@ -92,6 +101,7 @@ class AnnotationsIO:
         label_colname: str = "label",
         nodes_colname: str = "nodes",
         nodes_delimiter: str = ";",
+        min_nodes_per_term: int = 2,
     ) -> Dict[str, Any]:
         """Load annotations from a CSV file and associate them with the network.
@@ -101,6 +111,8 @@ class AnnotationsIO:
             label_colname (str): Name of the column containing the labels (e.g., GO terms).
             nodes_colname (str): Name of the column containing the nodes associated with each label.
             nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
+            min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
+                term to be included. Defaults to 2.
         Returns:
             Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
@@ -108,7 +120,9 @@ class AnnotationsIO:
         """
         filetype = "CSV"
         # Log the loading of the CSV file
-        params.log_annotations(filepath=filepath, filetype=filetype)
+        params.log_annotations(
+            filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
+        )
         _log_loading(filetype, filepath=filepath)
         # Load the CSV file into a dictionary
@@ -116,8 +130,7 @@ class AnnotationsIO:
             filepath, label_colname, nodes_colname, delimiter=",", nodes_delimiter=nodes_delimiter
         )
-        # Load the annotations into the provided network
-        return load_annotations(network, annotations_input)
+        return load_annotations(network, annotations_input, min_nodes_per_term)
     def load_tsv_annotation(
         self,
@@ -126,6 +139,7 @@ class AnnotationsIO:
         label_colname: str = "label",
         nodes_colname: str = "nodes",
         nodes_delimiter: str = ";",
+        min_nodes_per_term: int = 2,
     ) -> Dict[str, Any]:
         """Load annotations from a TSV file and associate them with the network.
@@ -135,6 +149,8 @@ class AnnotationsIO:
             label_colname (str): Name of the column containing the labels (e.g., GO terms).
             nodes_colname (str): Name of the column containing the nodes associated with each label.
             nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
+            min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
+                term to be included. Defaults to 2.
         Returns:
             Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
@@ -142,7 +158,9 @@ class AnnotationsIO:
         """
         filetype = "TSV"
         # Log the loading of the TSV file
-        params.log_annotations(filepath=filepath, filetype=filetype)
+        params.log_annotations(
+            filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
+        )
         _log_loading(filetype, filepath=filepath)
         # Load the TSV file into a dictionary
@@ -150,18 +168,21 @@ class AnnotationsIO:
             filepath, label_colname, nodes_colname, delimiter="\t", nodes_delimiter=nodes_delimiter
         )
-        # Load the annotations into the provided network
-        return load_annotations(network, annotations_input)
+        return load_annotations(network, annotations_input, min_nodes_per_term)
-    def load_dict_annotation(self, network: nx.Graph, content: Dict[str, Any]) -> Dict[str, Any]:
+    def load_dict_annotation(
+        self, network: nx.Graph, content: Dict[str, Any], min_nodes_per_term: int = 2
+    ) -> Dict[str, Any]:
         """Load annotations from a provided dictionary and convert them to a dictionary annotation.
         Args:
             network (NetworkX graph): The network to which the annotations are related.
-            content (dict): The annotations dictionary to load.
+            content (Dict[str, Any]): The annotations dictionary to load.
+            min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
+                term to be included. Defaults to 2.
         Returns:
-            dict: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
+            Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
         """
         # Ensure the input content is a dictionary
         if not isinstance(content, dict):
@@ -174,13 +195,8 @@ class AnnotationsIO:
         params.log_annotations(filepath="In-memory dictionary", filetype=filetype)
         _log_loading(filetype, "In-memory dictionary")
-        # Load the annotations into the provided network
-        annotations_dict = load_annotations(network, content)
-        # Ensure the output is a dictionary
-        if not isinstance(annotations_dict, dict):
-            raise ValueError("Expected output to be a dictionary")
-        return annotations_dict
+        # Load the annotations as a dictionary from the provided dictionary
+        return load_annotations(network, content, min_nodes_per_term)
 def _load_matrix_file(
@@ -203,11 +219,11 @@ def _load_matrix_file(
         Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes.
     """
     # Load the CSV or TSV file into a DataFrame
-    df = pd.read_csv(filepath, delimiter=delimiter)
+    annotation = pd.read_csv(filepath, delimiter=delimiter)
     # Split the nodes column by the nodes_delimiter to handle multiple nodes per label
-    df[nodes_colname] = df[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
+    annotation[nodes_colname] = annotation[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
     # Create a dictionary pairing labels with their corresponding list of nodes
-    label_node_dict = df.set_index(label_colname)[nodes_colname].to_dict()
+    label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
     return label_node_dict

risk/log/__init__.py CHANGED Viewed

@@ -3,7 +3,9 @@ risk/log
 ~~~~~~~~
 """
-from .config import logger, log_header, set_global_verbosity
-from .params import Params
+from risk.log.console import logger, log_header, set_global_verbosity
+from risk.log.parameters import Params
+# Initialize the global parameters logger
 params = Params()
+params.initialize()

risk/log/{config.py → console.py} RENAMED Viewed

@@ -1,6 +1,6 @@
 """
-risk/log/config
-~~~~~~~~~~~~~~~
+risk/log/console
+~~~~~~~~~~~~~~~~
 """
 import logging
@@ -16,8 +16,10 @@ def in_jupyter():
         shell = get_ipython().__class__.__name__
         if shell == "ZMQInteractiveShell":  # Jupyter Notebook or QtConsole
             return True
-        elif shell == "TerminalInteractiveShell":  # Terminal running IPython
+        if shell == "TerminalInteractiveShell":  # Terminal running IPython
             return False
+        return False  # Other type (?)
     except NameError:
         return False  # Not in Jupyter

risk-network 0.0.8b18__py3-none-any.whl → 0.0.9b26__py3-none-any.whl

risk-network 0.0.8b18py3-none-any.whl → 0.0.9b26py3-none-any.whl