PyPI - napistu - Versions diffs - 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl - Mend

napistu 0.4.2py3-none-any.whl → 0.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

napistu/consensus.py +3 -4
napistu/ingestion/constants.py +51 -0
napistu/ingestion/reactom_fi.py +208 -0
napistu/network/neighborhoods.py +28 -7
napistu/network/ng_utils.py +26 -6
napistu/network/precompute.py +56 -0
napistu/sbml_dfs_utils.py +8 -2
napistu/source.py +243 -40
napistu/statistics/hypothesis_testing.py +66 -0
napistu/utils.py +23 -1
{napistu-0.4.2.dist-info → napistu-0.4.4.dist-info}/METADATA +1 -1
{napistu-0.4.2.dist-info → napistu-0.4.4.dist-info}/RECORD +20 -18
tests/test_network_precompute.py +30 -0
tests/test_sbml_dfs_utils.py +13 -0
tests/test_source.py +38 -6
tests/test_statistics_hypothesis_testing.py +62 -0
tests/test_set_coverage.py +0 -50
{napistu-0.4.2.dist-info → napistu-0.4.4.dist-info}/WHEEL +0 -0
{napistu-0.4.2.dist-info → napistu-0.4.4.dist-info}/entry_points.txt +0 -0
{napistu-0.4.2.dist-info → napistu-0.4.4.dist-info}/licenses/LICENSE +0 -0
{napistu-0.4.2.dist-info → napistu-0.4.4.dist-info}/top_level.txt +0 -0

napistu/source.py CHANGED Viewed

@@ -1,8 +1,14 @@
 from __future__ import annotations
+import numpy as np
 import pandas as pd
+from typing import Optional
 from napistu import indices
-from napistu.constants import SOURCE_SPEC
+from napistu import sbml_dfs_core
+from napistu import sbml_dfs_utils
+from napistu.statistics import hypothesis_testing
+from napistu.constants import SBML_DFS_SCHEMA, SCHEMA_DEFS, SOURCE_SPEC
 class Source:
@@ -41,11 +47,18 @@ class Source:
             Creates an empty source object. This is typically used when creating an SBML_dfs
             object from a single source.
         pw_index : indices.PWIndex
+            a pathway index object containing the pathway_id and other metadata
         Returns
         -------
         None.
+        Raises
+        ------
+        ValueError:
+            if pw_index is not a indices.PWIndex
+        ValueError:
+            if SOURCE_SPEC.MODEL is not present in source_df
         """
         if init is True:
@@ -101,8 +114,27 @@ def create_source_table(
     """
     Create Source Table
-    Create a table with one row per "new_id" and a Source object created from the union
-      of "old_id" Source objects
+    Create a table with one row per "new_id" and a Source object created from the unionof "old_id" Source objects
+    Parameters
+    ----------
+    lookup_table: pd.Series
+        a pd.Series containing the index of the table to create a source table for
+    table_schema: dict
+        a dictionary containing the schema of the table to create a source table for
+    pw_index: indices.PWIndex
+        a pathway index object containing the pathway_id and other metadata
+    Returns
+    -------
+    source_table: pd.DataFrame
+        a pd.DataFrame containing the index of the table to create a source table for
+        with one row per "new_id" and a Source object created from the union of "old_id" Source objects
+    Raises
+    ------
+    ValueError:
+        if SOURCE_SPEC.SOURCE is not present in table_schema
     """
     if SOURCE_SPEC.SOURCE not in table_schema.keys():
@@ -142,8 +174,27 @@ def merge_sources(source_list: list | pd.Series) -> Source:
     Merge a list of Source objects into a single Source object
+    Parameters
+    ----------
+    source_list: list | pd.Series
+        a list of Source objects or a pd.Series of Source objects
+    Returns
+    -------
+    source: Source
+        a Source object created from the union of the Source objects in source_list
+    Raises
+    ------
+    TypeError:
+        if source_list is not a list or pd.Series
     """
+    if not isinstance(source_list, (list, pd.Series)):
+        raise TypeError(
+            f"source_list must be a list or pd.Series, but was a {type(source_list).__name__}"
+        )
     # filter to non-empty sources
     # empty sources have only been initialized; a merge hasn't occured
     existing_sources = [s.source is not None for s in source_list]
@@ -160,28 +211,35 @@ def merge_sources(source_list: list | pd.Series) -> Source:
     return Source(pd.concat(existing_source_list))
-def unnest_sources(
-    source_table: pd.DataFrame, source_var: str, verbose: bool = False
-) -> pd.DataFrame:
+def unnest_sources(source_table: pd.DataFrame, verbose: bool = False) -> pd.DataFrame:
     """
     Unnest Sources
     Take a pd.DataFrame containing an array of Sources and
     return one-row per source.
-    Parameters:
+    Parameters
+    ----------
     source_table: pd.DataFrame
         a table containing an array of Sources
-    source_var: str
-        variable containing Sources
+    verbose: bool
+        print progress
-    Returns:
+    Returns
+    -------
     pd.Dataframe containing the index of source_table but expanded
     to include one row per source
     """
     sources = list()
+    table_type = sbml_dfs_utils.infer_entity_type(source_table)
+    source_table_schema = SBML_DFS_SCHEMA.SCHEMA[table_type]
+    if SCHEMA_DEFS.SOURCE not in source_table_schema.keys():
+        raise ValueError(f"{table_type} does not have a source attribute")
+    source_var = source_table_schema[SCHEMA_DEFS.SOURCE]
     source_table_index = source_table.index.to_frame().reset_index(drop=True)
     for i in range(source_table.shape[0]):
@@ -216,53 +274,73 @@ def unnest_sources(
     return pd.concat(sources)
-def greedy_set_coverge_of_sources(
-    source_df: pd.DataFrame, table_schema: dict
+def source_set_coverage(
+    select_sources_df: pd.DataFrame,
+    source_total_counts: Optional[pd.Series] = None,
+    sbml_dfs: Optional[sbml_dfs_core.SBML_dfs] = None,
 ) -> pd.DataFrame:
     """
     Greedy Set Coverage of Sources
-    Apply the greedy set coverge algorithm to find the minimal set of
-    sources which cover all entries
+    Find the set of pathways covering `select_sources_df`. If `all_sources_df`
+    is provided pathways will be selected iteratively based on statistical
+    enrichment. If `all_sources_df` is not provided, the largest pathways
+    will be chosen iteratively.
-    Parameters:
-    source_df: pd.DataFrame
+    Parameters
+    ----------
+    select_sources_df: pd.DataFrame
         pd.Dataframe containing the index of source_table but expanded to
         include one row per source. As produced by source.unnest_sources()
-    Returns:
+    source_total_counts: pd.Series
+        pd.Series containing the total counts of each source. As produced by
+        source.get_source_total_counts()
+    sbml_dfs: sbml_dfs_core.SBML_dfs
+        if `source_total_counts` is provided then `sbml_dfs` must be provided
+        to calculate the total number of entities in the table.
+    Returns
+    -------
     minimial_sources: [str]
         A list of pathway_ids of the minimal source set
     """
+    table_type = sbml_dfs_utils.infer_entity_type(select_sources_df)
+    pk = SBML_DFS_SCHEMA.SCHEMA[table_type][SCHEMA_DEFS.PK]
+    if source_total_counts is not None:
+        if sbml_dfs is None:
+            raise ValueError(
+                "If `source_total_counts` is provided, `sbml_dfs` must be provided to calculate the total number of entities in the table."
+            )
+        n_total_entities = sbml_dfs.get_table(table_type).shape[0]
     # rollup pathways with identical membership
-    deduplicated_sources = _deduplicate_source_df(source_df, table_schema)
+    deduplicated_sources = _deduplicate_source_df(select_sources_df)
     unaccounted_for_members = deduplicated_sources
     retained_pathway_ids = []
     while unaccounted_for_members.shape[0] != 0:
         # find the pathway with the most members
-        pathway_members = unaccounted_for_members.groupby(SOURCE_SPEC.PATHWAY_ID).size()
-        top_pathway = pathway_members[pathway_members == max(pathway_members)].index[0]
+        if source_total_counts is None:
+            top_pathway = _select_top_pathway_by_size(unaccounted_for_members)
+        else:
+            top_pathway = _select_top_pathway_by_enrichment(
+                unaccounted_for_members, source_total_counts, n_total_entities, pk
+            )
+        if top_pathway is None:
+            break
         retained_pathway_ids.append(top_pathway)
         # remove all members associated with the top pathway
-        members_captured = (
-            unaccounted_for_members[
-                unaccounted_for_members[SOURCE_SPEC.PATHWAY_ID] == top_pathway
-            ]
-            .index.get_level_values(table_schema["pk"])
-            .tolist()
+        unaccounted_for_members = _update_unaccounted_for_members(
+            top_pathway, unaccounted_for_members
         )
-        unaccounted_for_members = unaccounted_for_members[
-            ~unaccounted_for_members.index.get_level_values(table_schema["pk"]).isin(
-                members_captured
-            )
-        ]
     minimial_sources = deduplicated_sources[
         deduplicated_sources[SOURCE_SPEC.PATHWAY_ID].isin(retained_pathway_ids)
     ].sort_index()
@@ -270,9 +348,39 @@ def greedy_set_coverge_of_sources(
     return minimial_sources
-def _deduplicate_source_df(source_df: pd.DataFrame, table_schema: dict) -> pd.DataFrame:
+def get_source_total_counts(
+    sbml_dfs: sbml_dfs_core.SBML_dfs, entity_type: str
+) -> pd.Series:
+    """
+    Get the total counts of each source.
+    Parameters
+    ----------
+    sbml_dfs: sbml_dfs_core.SBML_dfs
+        sbml_dfs object containing the table to get the total counts of
+    entity_type: str
+        the type of entity to get the total counts of
+    Returns
+    -------
+    source_total_counts: pd.Series
+        pd.Series containing the total counts of each source.
+    """
+    all_sources_table = unnest_sources(sbml_dfs.get_table(entity_type))
+    source_total_counts = all_sources_table.value_counts(SOURCE_SPEC.PATHWAY_ID).rename(
+        "total_counts"
+    )
+    return source_total_counts
+def _deduplicate_source_df(source_df: pd.DataFrame) -> pd.DataFrame:
     """Combine entries in a source table when multiple models have the same members."""
+    table_type = sbml_dfs_utils.infer_entity_type(source_df)
+    source_table_schema = SBML_DFS_SCHEMA.SCHEMA[table_type]
     # drop entries which are missing required attributes and throw an error if none are left
     REQUIRED_NON_NA_ATTRIBUTES = [SOURCE_SPEC.PATHWAY_ID]
     indexed_sources = (
@@ -296,7 +404,11 @@ def _deduplicate_source_df(source_df: pd.DataFrame, table_schema: dict) -> pd.Da
                 {
                     SOURCE_SPEC.PATHWAY_ID: p,
                     "membership_string": "_".join(
-                        set(indexed_sources.loc[[p]][table_schema["pk"]].tolist())
+                        set(
+                            indexed_sources.loc[[p]][
+                                source_table_schema[SCHEMA_DEFS.PK]
+                            ].tolist()
+                        )
                     ),
                 }
                 for p in pathways
@@ -320,16 +432,16 @@ def _deduplicate_source_df(source_df: pd.DataFrame, table_schema: dict) -> pd.Da
     merged_sources = pd.concat(
         [
-            _collapse_by_membership_string(s, membership_categories, table_schema)  # type: ignore
+            _collapse_by_membership_string(s, membership_categories, source_table_schema)  # type: ignore
             for s in category_index.tolist()
         ]
     )
     merged_sources[SOURCE_SPEC.INDEX_NAME] = merged_sources.groupby(
-        table_schema["pk"]
+        source_table_schema[SCHEMA_DEFS.PK]
     ).cumcount()
     return merged_sources.set_index(
-        [table_schema["pk"], SOURCE_SPEC.INDEX_NAME]
+        [source_table_schema[SCHEMA_DEFS.PK], SOURCE_SPEC.INDEX_NAME]
     ).sort_index()
@@ -345,7 +457,10 @@ def _collapse_by_membership_string(
     return pd.DataFrame(
         [
             pd.concat(
-                [pd.Series({table_schema["pk"]: ms}), collapsed_source_membership]
+                [
+                    pd.Series({table_schema[SCHEMA_DEFS.PK]: ms}),
+                    collapsed_source_membership,
+                ]
             )
             for ms in membership_string.split("_")
         ]
@@ -398,3 +513,91 @@ def _safe_source_merge(member_Sources: Source | list) -> Source:
         return merge_sources(member_Sources.tolist())
     else:
         raise TypeError("Expecting source.Source or pd.Series")
+def _select_top_pathway_by_size(unaccounted_for_members: pd.DataFrame) -> str:
+    pathway_members = unaccounted_for_members.value_counts(SOURCE_SPEC.PATHWAY_ID)
+    top_pathway = pathway_members[pathway_members == max(pathway_members)].index[0]
+    return top_pathway
+def _select_top_pathway_by_enrichment(
+    unaccounted_for_members: pd.DataFrame,
+    source_total_counts: pd.Series,
+    n_total_entities: int,
+    table_pk: str,
+    min_pw_size: int = 5,
+) -> str:
+    n_observed_entities = len(
+        unaccounted_for_members.index.get_level_values(table_pk).unique()
+    )
+    pathway_members = unaccounted_for_members.value_counts(
+        SOURCE_SPEC.PATHWAY_ID
+    ).rename("observed_members")
+    pathway_members = pathway_members.loc[pathway_members >= min_pw_size]
+    if pathway_members.shape[0] == 0:
+        return None
+    wide_contingency_table = (
+        pathway_members.to_frame()
+        .join(source_total_counts)
+        .assign(
+            missing_members=lambda x: x["total_counts"] - x["observed_members"],
+            observed_nonmembers=lambda x: n_observed_entities - x["observed_members"],
+            nonobserved_nonmembers=lambda x: n_total_entities
+            - x["observed_nonmembers"]
+            - x["missing_members"]
+            - x["observed_members"],
+        )
+        .drop(columns=["total_counts"])
+    )
+    # calculate enrichments using a fast vectorized normal approximation
+    odds_ratios, _ = hypothesis_testing.fisher_exact_vectorized(
+        wide_contingency_table["observed_members"],
+        wide_contingency_table["missing_members"],
+        wide_contingency_table["observed_nonmembers"],
+        wide_contingency_table["nonobserved_nonmembers"],
+    )
+    return pathway_members.index[np.argmax(odds_ratios)]
+def _update_unaccounted_for_members(
+    top_pathway, unaccounted_for_members
+) -> pd.DataFrame:
+    """
+    Update the unaccounted for members dataframe by removing the members
+    associated with the top pathway.
+    Parameters
+    ----------
+    top_pathway: str
+        the pathway to remove from the unaccounted for members
+    unaccounted_for_members: pd.DataFrame
+        the dataframe of unaccounted for members
+    Returns
+    -------
+    unaccounted_for_members: pd.DataFrame
+        the dataframe of unaccounted for members with the top pathway removed
+    """
+    table_type = sbml_dfs_utils.infer_entity_type(unaccounted_for_members)
+    pk = SBML_DFS_SCHEMA.SCHEMA[table_type][SCHEMA_DEFS.PK]
+    members_captured = (
+        unaccounted_for_members[
+            unaccounted_for_members[SOURCE_SPEC.PATHWAY_ID] == top_pathway
+        ]
+        .index.get_level_values(pk)
+        .tolist()
+    )
+    return unaccounted_for_members[
+        ~unaccounted_for_members.index.get_level_values(pk).isin(members_captured)
+    ]

napistu/statistics/hypothesis_testing.py ADDED Viewed

@@ -0,0 +1,66 @@
+from typing import Union
+import numpy as np
+from scipy.stats import norm
+def fisher_exact_vectorized(
+    observed_members: Union[list[int], np.ndarray],
+    missing_members: Union[list[int], np.ndarray],
+    observed_nonmembers: Union[list[int], np.ndarray],
+    nonobserved_nonmembers: Union[list[int], np.ndarray],
+) -> tuple[np.ndarray, np.ndarray]:
+    """
+    Fast vectorized one-tailed Fisher exact test using normal approximation.
+    Parameters:
+    -----------
+    observed_members, missing_members, observed_nonmembers, nonobserved_nonmembers : array-like
+        The four cells of the 2x2 contingency tables (must be non-negative)
+    Returns:
+    --------
+    odds_ratios : numpy array
+        Odds ratios for each test
+    p_values : numpy array
+        One-tailed p-values (tests for enrichment)
+    """
+    # Convert to numpy arrays
+    a = np.array(observed_members, dtype=float)
+    b = np.array(missing_members, dtype=float)
+    c = np.array(observed_nonmembers, dtype=float)
+    d = np.array(nonobserved_nonmembers, dtype=float)
+    # Check for negative values and raise error
+    if np.any((a < 0) | (b < 0) | (c < 0) | (d < 0)):
+        raise ValueError("All contingency table values must be non-negative")
+    # Calculate odds ratios
+    odds_ratios = np.divide(
+        a * d, b * c, out=np.full_like(a, np.inf, dtype=float), where=(b * c) != 0
+    )
+    # Normal approximation to hypergeometric distribution
+    n = a + b + c + d
+    # Avoid division by zero in expected value calculation
+    expected_a = np.divide(
+        (a + b) * (a + c), n, out=np.zeros_like(n, dtype=float), where=n != 0
+    )
+    # Variance calculation with protection against division by zero
+    var_a = np.divide(
+        (a + b) * (c + d) * (a + c) * (b + d),
+        n * n * (n - 1),
+        out=np.ones_like(n, dtype=float),  # Default to 1 to avoid sqrt(0)
+        where=(n > 1),
+    )
+    var_a = np.maximum(var_a, 1e-10)  # Ensure positive variance
+    # Continuity correction and z-score
+    z = (a - expected_a - 0.5) / np.sqrt(var_a)
+    # One-tailed p-value (upper tail for enrichment)
+    p_values = norm.sf(z)  # 1 - norm.cdf(z)
+    return odds_ratios, p_values

napistu/utils.py CHANGED Viewed

@@ -14,7 +14,7 @@ import zipfile
 from contextlib import closing
 from itertools import starmap
 from textwrap import fill
-from typing import Any, List, Optional, Union
+from typing import Any, Dict, Optional, List, Union
 from urllib.parse import urlparse
 from pathlib import Path
 from requests.adapters import HTTPAdapter
@@ -1131,6 +1131,28 @@ def safe_fill(x: str, fill_width: int = 15) -> str:
         return fill(x, fill_width)
+def match_regex_dict(s: str, regex_dict: Dict[str, any]) -> Optional[any]:
+    """
+    Apply each regex in regex_dict to the string s. If a regex matches, return its value.
+    If no regex matches, return None.
+    Parameters
+    ----------
+    s : str
+        The string to test.
+    regex_dict : dict
+        Dictionary where keys are regex patterns (str), and values are the values to return.
+    Returns
+    -------
+    The value associated with the first matching regex, or None if no match.
+    """
+    for pattern, value in regex_dict.items():
+        if re.search(pattern, s):
+            return value
+    return None
 def _add_nameness_score_wrapper(df, name_var, table_schema):
     """Call _add_nameness_score with default value."""

{napistu-0.4.2.dist-info → napistu-0.4.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: napistu
-Version: 0.4.2
+Version: 0.4.4
 Summary: Connecting high-dimensional data to curated pathways
 Home-page: https://github.com/napistu/napistu-py
 Author: Sean Hackett

{napistu-0.4.2.dist-info → napistu-0.4.4.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
 napistu/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
 napistu/__main__.py,sha256=xwlbh_0Ig3a-yG6BIJRiDPSN9R2HnX2pEBvlodlO6h4,29015
-napistu/consensus.py,sha256=xWXiqIM6ot-SSPJZXTrVpohbINSCkZXBtRi-5REfk_g,69897
+napistu/consensus.py,sha256=SDw58vkDivzy5AiOQUnf5vUbFxmSrMGMMmptDMZhk0E,69807
 napistu/constants.py,sha256=8sp1l0cxu2rsnCrWBEEwhcBKvDtc4u0D0f_72zILLW0,13427
 napistu/identifiers.py,sha256=e2-nTVzr5AINa0y1ER9218bKXyF2kAeJ9At22S4Z00o,33914
 napistu/indices.py,sha256=Zjg3gE0JQ3T879lCPazYg-WXVE6hvcAr713ZKpJ32rk,9830
 napistu/sbml_dfs_core.py,sha256=s0OyoHs-AjOcbZu1d3KNkW_PI7Rxbhu5ZLpfQeO4iY8,72639
-napistu/sbml_dfs_utils.py,sha256=w5dFcJFDKnKDK9jxPOCuCW8IccxdXmyNmP9vCUhVdf8,46184
-napistu/source.py,sha256=UGpN70bqbC9gnKmM0ivSdQYim9hfzgABeXoQKzRr9oU,13646
-napistu/utils.py,sha256=PEAsLn7VGN8JlNJQcAMYpjF1gr2mWmb5IqBsypP9hi0,35768
+napistu/sbml_dfs_utils.py,sha256=SOy1Ii2hDFOfQa7pFAJS9EfAmfBVD_sHvDJBVmCN_p8,46456
+napistu/source.py,sha256=iDDKpN-4k_W_tyxEjqe_z-yPJv7uoFRRBhkiBtOH5C8,20416
+napistu/utils.py,sha256=p2sJxTklmV30XS6hanJRjcdfgeaZpkULuMyQX3BPP0c,36404
 napistu/context/__init__.py,sha256=LQBEqipcHKK0E5UlDEg1ct-ymCs93IlUrUaH8BCevf0,242
 napistu/context/discretize.py,sha256=Qq7zg46F_I-PvQIT2_pEDQV7YEtUQCxKoRvT5Gu9QsE,15052
 napistu/context/filtering.py,sha256=l1oq-43ysSGqU9VmhTOO_pYT4DSMf20yxvktPC1MI0I,13696
@@ -17,13 +17,14 @@ napistu/gcs/downloads.py,sha256=SvGv9WYr_Vt3guzyz1QiAuBndeKPTBtWSFLj1-QbLf4,6348
 napistu/gcs/utils.py,sha256=eLSsvewWJdCguyj2k0ozUGP5BTemaE1PZg41Z3aY5kM,571
 napistu/ingestion/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
 napistu/ingestion/bigg.py,sha256=f65--8ARe248eYCUJpFMF284Wz53sLyFyBuwelxHmJA,4340
-napistu/ingestion/constants.py,sha256=9UP47VImZ11q0kz17N3EJg2155USqLewwNWyKpA-cbA,8089
+napistu/ingestion/constants.py,sha256=jo3v8Z7Y_tNNhTmEcokVOh1HBJFAXc-Z38S4mG58qfo,10059
 napistu/ingestion/gtex.py,sha256=X0hSC1yrpf4xSJWFhpeNcnHwJzKDII2MvjfUqYA0JN8,3720
 napistu/ingestion/hpa.py,sha256=R27ExrryKQ4Crxv9ATXmBJCa-yd01TMOrDjkeBhIQac,5054
 napistu/ingestion/identifiers_etl.py,sha256=6ppDUA6lEZurdmVbiFLOUzphYbr-hndMhtqsQnq_yAc,5009
 napistu/ingestion/napistu_edgelist.py,sha256=4RLXsoIk_-Atu-Nqme_t1JpEpBET26VIY2Y_Hcd3sMw,3580
 napistu/ingestion/obo.py,sha256=AQkIPWbjA464Lma0tx91JucWkIwLjC7Jgv5VHGRTDkE,9601
 napistu/ingestion/psi_mi.py,sha256=5eJjm7XWogL9oTyGqR52kntHClLwLsTePKqCvUGyi-w,10111
+napistu/ingestion/reactom_fi.py,sha256=hKdOY2wNtcNk6WlnHnNalryiXv6mtcWUiBW9isXPB0Y,6991
 napistu/ingestion/reactome.py,sha256=Hn9X-vDp4o_HK-OtaQvel3vJeZ8_TC1-4N2rruK9Oks,7099
 napistu/ingestion/sbml.py,sha256=l8Z98yWuOIRGns8G4UNnoQz7v_xmukZb_IZ_5ye34Ko,25296
 napistu/ingestion/string.py,sha256=go1WGTkoLJejX7GQWf9bFeInFGAw4jNSpS2B_Zr5f_s,11364
@@ -61,14 +62,14 @@ napistu/network/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,2
 napistu/network/constants.py,sha256=nG_lUZYLgop8oxOGjDYqvxXJzVdOwKZ3aWnxlhtSaIo,6915
 napistu/network/data_handling.py,sha256=KncrAKjXI3169BgVE-SnY8FkpVF60JnUwfMHtbqvsTc,14725
 napistu/network/ig_utils.py,sha256=MuyEyOVtSHndil6QuuRCimBZrJ2jTaF5qQESgYlu02M,17042
-napistu/network/neighborhoods.py,sha256=g5QeGaizSfW4nNe9YZY86g8q79EQmuvSwipaNPnOVqA,56121
+napistu/network/neighborhoods.py,sha256=kXoD5d3plcTEw-6XCbb5QjaCt0jsKwn17VdAvnGoFhY,57041
 napistu/network/net_create.py,sha256=66kV_xoWnu4BVLaJZ1TAC7wBSsjPDqjoAXH-X9ShV3s,59091
 napistu/network/net_create_utils.py,sha256=zajwaz2xAij_9fEnD77SgBw_EnNAnJ8jBCmmK2rk_bA,24672
 napistu/network/net_propagation.py,sha256=Il5nDOWh3nLz8gRhDFHGp2LxcvJ9C1twiSZjDeiZMUo,23490
 napistu/network/ng_core.py,sha256=dGnTUKR4WtnvaYMyIHqqF55FY4mJSa7wjA2LZ4cVB6U,11720
-napistu/network/ng_utils.py,sha256=c1tHXz_JcH01D5KovNQmRLTEVxpCkCe36otULq-liz8,15579
+napistu/network/ng_utils.py,sha256=ahSm-8M2pV662V7MMVcGaoguBM55_y-F7LDmZSVp9ag,15951
 napistu/network/paths.py,sha256=r6LVKVvX7i3ctBA5r-xvHfpH5Zsd0VDHUCtin2iag20,17453
-napistu/network/precompute.py,sha256=ibL0ByY7Wp5kEfIG3LUDpQKdvAeQX0DNkT_46g2YrGc,8367
+napistu/network/precompute.py,sha256=ARU2tktWnxFISaHAY8chpkg8pusZPv7TT5jSIB9eFF0,10081
 napistu/ontologies/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
 napistu/ontologies/constants.py,sha256=GyOFvezSxDK1VigATcruTKtNhjcYaid1ggulEf_HEtQ,4345
 napistu/ontologies/dogma.py,sha256=VVj6NKBgNym4SdOSu8g22OohALj7cbObhIJmdY2Sfy0,8860
@@ -84,8 +85,9 @@ napistu/scverse/__init__.py,sha256=Lgxr3iMQAkTzXE9BNz93CndNP5djzerLvmHM-D0PU3I,3
 napistu/scverse/constants.py,sha256=0iAkhyJUIeFGHdLLU3fCaEU1O3Oix4qAsxr3CxGTjVs,653
 napistu/scverse/loading.py,sha256=jqiE71XB-wdV50GyZrauFNY0Lai4bX9Fm2Gv80VR8t8,27016
 napistu/statistics/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
+napistu/statistics/hypothesis_testing.py,sha256=k0mBFAMF0XHVcKwS26aPnEbq_FIUVwXU1gZ6cKfFbCk,2190
 napistu/statistics/quantiles.py,sha256=1-LnmVzC2CQWxCKUh0yi6YfKrbsZM1-kkD7nu2-aS5s,3042
-napistu-0.4.2.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
+napistu-0.4.4.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
 tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tests/conftest.py,sha256=t-GHb0MvSsC-MyhkFpOy2K3t5fi7eaig_Rc2xEQC-t8,9678
 tests/test_consensus.py,sha256=Hzfrgp4SpkRDnEMVMD3f0UInSycndB8kKzC4wDDvRas,15076
@@ -114,7 +116,7 @@ tests/test_network_net_propagation.py,sha256=kZeDHD93iMrLVvxO4OyfRH5_vgsYeQyC40O
 tests/test_network_ng_core.py,sha256=w-iNBTtenennJhaLFauk952pEsk7W0-Fa8lPvIRqHyY,628
 tests/test_network_ng_utils.py,sha256=QVVuRnvCRfTSIlGdwQTIF9lr0wOwoc5gGeXAUY_AdgE,713
 tests/test_network_paths.py,sha256=TWZnxY5bF3m6gahcxcYJGrBIawh2-_vUcec1LyPmXV8,1686
-tests/test_network_precompute.py,sha256=zwJrKNC3s8rIrsyAQfQMYxbl8HZXUr7u09nMJ_K8jiU,9005
+tests/test_network_precompute.py,sha256=IPr1KhtxBD0fXx_2TvZqnevrD-Iig35otb8yloRFpRc,10014
 tests/test_ontologies_genodexito.py,sha256=6fINyUiubHZqu7qxye09DQfJXw28ZMAJc3clPb-cCoY,2298
 tests/test_ontologies_id_tables.py,sha256=CpwpbmQvTc1BaVd6jbDKHAVE2etwN0vx93nC8jpnMlE,7265
 tests/test_ontologies_mygene.py,sha256=VkdRcKIWmcG6V-2dpfvsBiOJN5dO-j0RqZNxtJRcyBU,1583
@@ -124,18 +126,18 @@ tests/test_rpy2_callr.py,sha256=V4a-QH5krgYOQRgqzksMzIkGAFjBqKOAqgprxrH6bE0,2904
 tests/test_rpy2_init.py,sha256=T3gnxC1O7XNvYM2P4018ikpPPAy-kwQLm7Erj0RfA-4,5895
 tests/test_sbml.py,sha256=f25zj1NogYrmLluvBDboLameTuCiQ309433Qn3iPvhg,1483
 tests/test_sbml_dfs_core.py,sha256=nnLPpZTVtCznOBohk7CX67x6sMqktJWt-sZMWQKoaDs,26521
-tests/test_sbml_dfs_utils.py,sha256=gWIhzUEtQlOR9c1TiCyhlSAELmWnBSncn6vCEqH5hl0,11029
+tests/test_sbml_dfs_utils.py,sha256=ZD9x2B81fsfYEjAV9wphHOR7ywjNcfvfw1LGNv4PxUA,11471
 tests/test_sbo.py,sha256=x_PENFaXYsrZIzOZu9cj_Wrej7i7SNGxgBYYvcigLs0,308
 tests/test_scverse_loading.py,sha256=bnU1lQSYYWhOAs0IIBoi4ZohqPokDQJ0n_rtkAfEyMU,29948
-tests/test_set_coverage.py,sha256=J-6m6LuOjcQa9pxRuWglSfJk4Ltm7kt_eOrn_Q-7P6Q,1604
-tests/test_source.py,sha256=hT0IlpexR5zP0OhWl5BBaho9d1aCYQlFZLwRIRRnw_Y,1969
+tests/test_source.py,sha256=iV-Yyu8flhIGWF17SCL8msG2bjqwb9w2IZ694b0iZ-o,2985
+tests/test_statistics_hypothesis_testing.py,sha256=qD-oS9zo5JlH-jdtiOrWAKI4nKFuZvvh6361_pFSpIs,2259
 tests/test_statistics_quantiles.py,sha256=yNDeqwgbP-1Rx3C_dLX_wnwT_Lr-iJWClmeKmElqmTE,4984
 tests/test_uncompartmentalize.py,sha256=nAk5kfAVLU9a2VWe2x2HYVcKqj-EnwmwddERIPRax8c,1289
 tests/test_utils.py,sha256=qPSpV-Q9b6vmdycgaDmQqtcvzKnAVnN9j5xJ9x-T6bg,23959
 tests/utils.py,sha256=SoWQ_5roJteFGcMaOeEiQ5ucwq3Z2Fa3AAs9iXHTsJY,749
 tests/test_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-napistu-0.4.2.dist-info/METADATA,sha256=6P_9Mmno6pVu4Me-3QdcMtiGOhCcajTqm5LP_Hns4lI,4078
-napistu-0.4.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-napistu-0.4.2.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
-napistu-0.4.2.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
-napistu-0.4.2.dist-info/RECORD,,
+napistu-0.4.4.dist-info/METADATA,sha256=E15A5Ve2RZTn4HtXGD2rDO1Q7AEaTfSdo3fgLuwravE,4078
+napistu-0.4.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+napistu-0.4.4.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
+napistu-0.4.4.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
+napistu-0.4.4.dist-info/RECORD,,

tests/test_network_precompute.py CHANGED Viewed

@@ -276,3 +276,33 @@ def test_precomputed_distances_serialization():
         # Clean up the temporary file
         if os.path.exists(temp_path):
             os.remove(temp_path)
+def test_filter_precomputed_distances_top_n_subset():
+    # Use a small top_n for a quick test
+    top_n = 5
+    filtered = precompute.filter_precomputed_distances_top_n(
+        precomputed_distances, top_n=top_n
+    )
+    # Check that the filtered DataFrame is a subset of the original
+    merged = filtered.merge(
+        precomputed_distances,
+        on=[
+            precompute.NAPISTU_EDGELIST.SC_ID_ORIGIN,
+            precompute.NAPISTU_EDGELIST.SC_ID_DEST,
+        ],
+        how="left",
+        indicator=True,
+    )
+    assert (
+        merged["_merge"] == "both"
+    ).all(), "Filtered rows must be present in the original DataFrame"
+    # Check that columns are preserved
+    assert set(
+        [
+            precompute.NAPISTU_EDGELIST.SC_ID_ORIGIN,
+            precompute.NAPISTU_EDGELIST.SC_ID_DEST,
+        ]
+    ).issubset(filtered.columns)
+    # Optionally, check that the number of rows is less than or equal to the input
+    assert filtered.shape[0] <= precomputed_distances.shape[0]

tests/test_sbml_dfs_utils.py CHANGED Viewed

@@ -334,3 +334,16 @@ def test_infer_entity_type_errors():
     )  # Two primary keys
     with pytest.raises(ValueError):
         sbml_dfs_utils.infer_entity_type(df)
+def test_infer_entity_type_multindex_reactions():
+    # DataFrame with MultiIndex (r_id, foo), should infer as reactions
+    import pandas as pd
+    from napistu.constants import SBML_DFS
+    df = pd.DataFrame({"some_col": [1, 2]})
+    df.index = pd.MultiIndex.from_tuples(
+        [("rxn1", "a"), ("rxn2", "b")], names=[SBML_DFS.R_ID, "foo"]
+    )
+    result = sbml_dfs_utils.infer_entity_type(df)
+    assert result == SBML_DFS.REACTIONS

napistu 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl

napistu 0.4.2py3-none-any.whl → 0.4.4py3-none-any.whl