PyPI - napistu - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

napistu 0.4.1py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

napistu/gcs/constants.py +5 -5
napistu/network/constants.py +23 -1
napistu/network/ig_utils.py +161 -1
napistu/network/net_create.py +3 -3
napistu/network/net_propagation.py +646 -96
napistu/statistics/__init__.py +10 -0
napistu/statistics/quantiles.py +82 -0
{napistu-0.4.1.dist-info → napistu-0.4.2.dist-info}/METADATA +1 -1
{napistu-0.4.1.dist-info → napistu-0.4.2.dist-info}/RECORD +16 -13
tests/test_network_ig_utils.py +133 -0
tests/test_network_net_propagation.py +365 -74
tests/test_statistics_quantiles.py +133 -0
{napistu-0.4.1.dist-info → napistu-0.4.2.dist-info}/WHEEL +0 -0
{napistu-0.4.1.dist-info → napistu-0.4.2.dist-info}/entry_points.txt +0 -0
{napistu-0.4.1.dist-info → napistu-0.4.2.dist-info}/licenses/LICENSE +0 -0
{napistu-0.4.1.dist-info → napistu-0.4.2.dist-info}/top_level.txt +0 -0

napistu/statistics/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+from __future__ import annotations
+from importlib.metadata import PackageNotFoundError
+from importlib.metadata import version
+try:
+    __version__ = version("napistu")
+except PackageNotFoundError:
+    # package is not installed
+    pass

napistu/statistics/quantiles.py ADDED Viewed

@@ -0,0 +1,82 @@
+"""Module for comparing observed values to null distributions."""
+import logging
+import pandas as pd
+import numpy as np
+logger = logging.getLogger(__name__)
+def calculate_quantiles(
+    observed_df: pd.DataFrame, null_df: pd.DataFrame
+) -> pd.DataFrame:
+    """
+    Calculate quantiles of observed scores relative to null distributions using
+    ultra-fast vectorized operations.
+    Parameters
+    ----------
+    observed_df : pd.DataFrame
+        DataFrame with features as index and attributes as columns containing
+        observed scores.
+    null_df : pd.DataFrame
+        DataFrame with null scores, features as index (multiple rows per feature)
+        and attributes as columns.
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame with same structure as observed_df containing quantiles.
+        Each value represents the proportion of null values <= observed value.
+    """
+    if not observed_df.columns.equals(null_df.columns):
+        raise ValueError("Column names must match between observed and null data")
+    # Validate all features present
+    missing_features = set(observed_df.index) - set(null_df.index)
+    if missing_features:
+        raise ValueError(f"Missing features in null data: {missing_features}")
+    # Check for NaN values
+    if observed_df.isna().any().any():
+        raise ValueError("NaN values found in observed data")
+    if null_df.isna().any().any():
+        raise ValueError("NaN values found in null data")
+    # Check for unequal sample sizes and warn
+    null_grouped = null_df.groupby(level=0)
+    sample_counts = {name: len(group) for name, group in null_grouped}
+    if len(set(sample_counts.values())) > 1:
+        logger.warning("Unequal null sample counts per feature may affect results")
+    # Convert to numpy arrays for speed
+    observed_values = observed_df.values
+    # Group null data and stack into 3D array
+    null_grouped = null_df.groupby(level=0)
+    # Get the maximum number of null samples per feature
+    max_null_samples = max(len(group) for _, group in null_grouped)
+    # Pre-allocate 3D array: [features, null_samples, attributes]
+    null_array = np.full(
+        (len(observed_df), max_null_samples, len(observed_df.columns)), np.nan
+    )
+    # Fill the null array
+    for i, (feature, group) in enumerate(null_grouped):
+        feature_idx = observed_df.index.get_loc(feature)
+        null_array[feature_idx, : len(group)] = group.values
+    # Broadcast comparison: observed[features, 1, attributes] vs null[features, samples, attributes]
+    # This creates a boolean array of shape [features, null_samples, attributes]
+    # Less than or equal to is used to calculate the quantile consistent with the R quantile function
+    comparisons = null_array <= observed_values[:, np.newaxis, :]
+    # Calculate quantiles by taking mean along the null_samples axis
+    # Use nanmean to handle padded NaN values
+    quantiles = np.nanmean(comparisons, axis=1)
+    return pd.DataFrame(quantiles, index=observed_df.index, columns=observed_df.columns)

{napistu-0.4.1.dist-info → napistu-0.4.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: napistu
-Version: 0.4.1
+Version: 0.4.2
 Summary: Connecting high-dimensional data to curated pathways
 Home-page: https://github.com/napistu/napistu-py
 Author: Sean Hackett

{napistu-0.4.1.dist-info → napistu-0.4.2.dist-info}/RECORD RENAMED Viewed

@@ -12,7 +12,7 @@ napistu/context/__init__.py,sha256=LQBEqipcHKK0E5UlDEg1ct-ymCs93IlUrUaH8BCevf0,2
 napistu/context/discretize.py,sha256=Qq7zg46F_I-PvQIT2_pEDQV7YEtUQCxKoRvT5Gu9QsE,15052
 napistu/context/filtering.py,sha256=l1oq-43ysSGqU9VmhTOO_pYT4DSMf20yxvktPC1MI0I,13696
 napistu/gcs/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
-napistu/gcs/constants.py,sha256=5hLp1pL7SHEiscLNKcdI4IeOP4vUaasBCIHJrEedl0o,2909
+napistu/gcs/constants.py,sha256=CuWsLXC1ZvOIRCQy7-ZeZdgRPmCVAqzvMuzJakGRk10,2923
 napistu/gcs/downloads.py,sha256=SvGv9WYr_Vt3guzyz1QiAuBndeKPTBtWSFLj1-QbLf4,6348
 napistu/gcs/utils.py,sha256=eLSsvewWJdCguyj2k0ozUGP5BTemaE1PZg41Z3aY5kM,571
 napistu/ingestion/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
@@ -58,13 +58,13 @@ napistu/modify/gaps.py,sha256=CV-bdSfanhrnCIFVWfNuQJbtjvj4hsEwheKYR-Z3tNA,26844
 napistu/modify/pathwayannot.py,sha256=xuBSMDFWbg_d6-Gzv0Td3Q5nnFTa-Qzic48g1b1AZtQ,48081
 napistu/modify/uncompartmentalize.py,sha256=y5LkXn5x6u80dB_McfAIh88BxZGIAVFLujkP7sPNRh0,9690
 napistu/network/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
-napistu/network/constants.py,sha256=LPsMtbAoier9Qor9REA7UIx7vnBtDpxMbcv3yI-c-2s,6441
+napistu/network/constants.py,sha256=nG_lUZYLgop8oxOGjDYqvxXJzVdOwKZ3aWnxlhtSaIo,6915
 napistu/network/data_handling.py,sha256=KncrAKjXI3169BgVE-SnY8FkpVF60JnUwfMHtbqvsTc,14725
-napistu/network/ig_utils.py,sha256=JSlf_sZtw3DiiSIiYJ2YqJFEP4hVJMwNRox2qYTA4zY,11470
+napistu/network/ig_utils.py,sha256=MuyEyOVtSHndil6QuuRCimBZrJ2jTaF5qQESgYlu02M,17042
 napistu/network/neighborhoods.py,sha256=g5QeGaizSfW4nNe9YZY86g8q79EQmuvSwipaNPnOVqA,56121
-napistu/network/net_create.py,sha256=Ylt4osGWPfj9MSDPy67pOTmLERGtS3cStR94UaqmXes,59082
+napistu/network/net_create.py,sha256=66kV_xoWnu4BVLaJZ1TAC7wBSsjPDqjoAXH-X9ShV3s,59091
 napistu/network/net_create_utils.py,sha256=zajwaz2xAij_9fEnD77SgBw_EnNAnJ8jBCmmK2rk_bA,24672
-napistu/network/net_propagation.py,sha256=S70zl0W4aYu5RPf5PZh829xT1xUyeTdi3TcIaFeYMww,4924
+napistu/network/net_propagation.py,sha256=Il5nDOWh3nLz8gRhDFHGp2LxcvJ9C1twiSZjDeiZMUo,23490
 napistu/network/ng_core.py,sha256=dGnTUKR4WtnvaYMyIHqqF55FY4mJSa7wjA2LZ4cVB6U,11720
 napistu/network/ng_utils.py,sha256=c1tHXz_JcH01D5KovNQmRLTEVxpCkCe36otULq-liz8,15579
 napistu/network/paths.py,sha256=r6LVKVvX7i3ctBA5r-xvHfpH5Zsd0VDHUCtin2iag20,17453
@@ -83,7 +83,9 @@ napistu/rpy2/rids.py,sha256=AfXLTfTdonfspgAHYO0Ph7jSUWv8YuyT8x3fyLfAqc8,3413
 napistu/scverse/__init__.py,sha256=Lgxr3iMQAkTzXE9BNz93CndNP5djzerLvmHM-D0PU3I,357
 napistu/scverse/constants.py,sha256=0iAkhyJUIeFGHdLLU3fCaEU1O3Oix4qAsxr3CxGTjVs,653
 napistu/scverse/loading.py,sha256=jqiE71XB-wdV50GyZrauFNY0Lai4bX9Fm2Gv80VR8t8,27016
-napistu-0.4.1.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
+napistu/statistics/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
+napistu/statistics/quantiles.py,sha256=1-LnmVzC2CQWxCKUh0yi6YfKrbsZM1-kkD7nu2-aS5s,3042
+napistu-0.4.2.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
 tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tests/conftest.py,sha256=t-GHb0MvSsC-MyhkFpOy2K3t5fi7eaig_Rc2xEQC-t8,9678
 tests/test_consensus.py,sha256=Hzfrgp4SpkRDnEMVMD3f0UInSycndB8kKzC4wDDvRas,15076
@@ -104,11 +106,11 @@ tests/test_mcp_config.py,sha256=GTu9vywqAHTYkolywdYS_BEIW3gBzs4A4qcneMSPpRk,7007
 tests/test_mcp_documentation_utils.py,sha256=OW0N2N_2IOktbYTcCWhhWz4bANi8IB60l1q3DJi8Ra4,810
 tests/test_mcp_server.py,sha256=bP3PWVQsEfX6-lAgXKP32njdg__o65n2WuLvkxTTHkQ,11215
 tests/test_network_data_handling.py,sha256=4aS8z2AlKkVd-JhK4BQ8fjeiW8_bJ1hZ3cc71Jh7Glk,12716
-tests/test_network_ig_utils.py,sha256=Buoh570mNm5pcac3Hf6f3pevCjWfBwPfKuD8IkDLg58,2120
+tests/test_network_ig_utils.py,sha256=XihmEpX890sr-LYmsb_t4aN0sKIDWCnXkTpDhpuTDmw,7199
 tests/test_network_neighborhoods.py,sha256=8BV17m5X1OUd5FwasTTYUOkNYUHDPUkxOKH_VZCsyBE,631
 tests/test_network_net_create.py,sha256=L0U91b4jVHDuC3DFo-_BUFVuv4GuSxZuLAo7r-7EJxY,12877
 tests/test_network_net_create_utils.py,sha256=0J6KIh2HBc4koFsvwMaul1QRtj5x92kR9HBdDZajnAw,18971
-tests/test_network_net_propagation.py,sha256=9pKkUdduWejH4iKNCJXKFzAkdNpCfrMbiUWySgI_LH4,3244
+tests/test_network_net_propagation.py,sha256=kZeDHD93iMrLVvxO4OyfRH5_vgsYeQyC40OI9Dsb0xY,14999
 tests/test_network_ng_core.py,sha256=w-iNBTtenennJhaLFauk952pEsk7W0-Fa8lPvIRqHyY,628
 tests/test_network_ng_utils.py,sha256=QVVuRnvCRfTSIlGdwQTIF9lr0wOwoc5gGeXAUY_AdgE,713
 tests/test_network_paths.py,sha256=TWZnxY5bF3m6gahcxcYJGrBIawh2-_vUcec1LyPmXV8,1686
@@ -127,12 +129,13 @@ tests/test_sbo.py,sha256=x_PENFaXYsrZIzOZu9cj_Wrej7i7SNGxgBYYvcigLs0,308
 tests/test_scverse_loading.py,sha256=bnU1lQSYYWhOAs0IIBoi4ZohqPokDQJ0n_rtkAfEyMU,29948
 tests/test_set_coverage.py,sha256=J-6m6LuOjcQa9pxRuWglSfJk4Ltm7kt_eOrn_Q-7P6Q,1604
 tests/test_source.py,sha256=hT0IlpexR5zP0OhWl5BBaho9d1aCYQlFZLwRIRRnw_Y,1969
+tests/test_statistics_quantiles.py,sha256=yNDeqwgbP-1Rx3C_dLX_wnwT_Lr-iJWClmeKmElqmTE,4984
 tests/test_uncompartmentalize.py,sha256=nAk5kfAVLU9a2VWe2x2HYVcKqj-EnwmwddERIPRax8c,1289
 tests/test_utils.py,sha256=qPSpV-Q9b6vmdycgaDmQqtcvzKnAVnN9j5xJ9x-T6bg,23959
 tests/utils.py,sha256=SoWQ_5roJteFGcMaOeEiQ5ucwq3Z2Fa3AAs9iXHTsJY,749
 tests/test_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-napistu-0.4.1.dist-info/METADATA,sha256=zl_710wCsatB3lKZAgHba-MLEOPSDOyrxs3b5FB6toA,4078
-napistu-0.4.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-napistu-0.4.1.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
-napistu-0.4.1.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
-napistu-0.4.1.dist-info/RECORD,,
+napistu-0.4.2.dist-info/METADATA,sha256=6P_9Mmno6pVu4Me-3QdcMtiGOhCcajTqm5LP_Hns4lI,4078
+napistu-0.4.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+napistu-0.4.2.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
+napistu-0.4.2.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
+napistu-0.4.2.dist-info/RECORD,,

tests/test_network_ig_utils.py CHANGED Viewed

@@ -1,5 +1,7 @@
 from __future__ import annotations
+import igraph as ig
+import numpy as np
 import pytest
 from napistu.network import ig_utils
@@ -57,3 +59,134 @@ def test_filter_to_largest_subgraphs(multi_component_graph):
     # Test invalid top_k
     with pytest.raises(ValueError):
         ig_utils.filter_to_largest_subgraphs(multi_component_graph, top_k=0)
+def test_mask_functions_valid_inputs():
+    """Test mask functions with various valid input formats."""
+    # Create real graph with attributes
+    graph = ig.Graph(5)
+    graph.vs["attr1"] = [0, 1, 2, 0, 3]
+    graph.vs["attr2"] = [1, 0, 1, 2, 0]
+    graph.vs["name"] = ["A", "B", "C", "D", "E"]
+    attributes = ["attr1", "attr2"]
+    # Test 1: None input
+    specs = ig_utils._parse_mask_input(None, attributes)
+    assert specs == {"attr1": None, "attr2": None}
+    masks = ig_utils._get_attribute_masks(graph, specs)
+    assert np.array_equal(masks["attr1"], np.ones(5, dtype=bool))
+    assert np.array_equal(masks["attr2"], np.ones(5, dtype=bool))
+    # Test 2: "attr" keyword
+    specs = ig_utils._parse_mask_input("attr", attributes)
+    assert specs == {"attr1": "attr1", "attr2": "attr2"}
+    masks = ig_utils._get_attribute_masks(graph, specs)
+    assert np.array_equal(masks["attr1"], np.array([False, True, True, False, True]))
+    assert np.array_equal(masks["attr2"], np.array([True, False, True, True, False]))
+    # Test 3: Single attribute name
+    specs = ig_utils._parse_mask_input("attr1", attributes)
+    assert specs == {"attr1": "attr1", "attr2": "attr1"}
+    # Test 4: Boolean array
+    bool_mask = np.array([True, False, True, False, False])
+    specs = ig_utils._parse_mask_input(bool_mask, attributes)
+    masks = ig_utils._get_attribute_masks(graph, specs)
+    assert np.array_equal(masks["attr1"], bool_mask)
+    assert np.array_equal(masks["attr2"], bool_mask)
+    # Test 5: Node indices list
+    indices = [0, 2, 4]
+    specs = ig_utils._parse_mask_input(indices, attributes)
+    masks = ig_utils._get_attribute_masks(graph, specs)
+    expected = np.array([True, False, True, False, True])
+    assert np.array_equal(masks["attr1"], expected)
+    # Test 6: Node names list
+    names = ["A", "C", "E"]
+    specs = ig_utils._parse_mask_input(names, attributes)
+    masks = ig_utils._get_attribute_masks(graph, specs)
+    assert np.array_equal(masks["attr1"], expected)
+    # Test 7: Dictionary input
+    mask_dict = {"attr1": "attr1", "attr2": None}
+    specs = ig_utils._parse_mask_input(mask_dict, attributes)
+    assert specs == mask_dict
+    masks = ig_utils._get_attribute_masks(graph, specs)
+    assert np.array_equal(masks["attr1"], np.array([False, True, True, False, True]))
+    assert np.array_equal(masks["attr2"], np.ones(5, dtype=bool))
+def test_mask_functions_error_cases():
+    """Test mask functions with invalid inputs that should raise errors."""
+    # Graph without name attribute
+    graph_no_names = ig.Graph(3)
+    graph_no_names.vs["attr1"] = [1, 2, 3]
+    # Graph with names
+    graph = ig.Graph(3)
+    graph.vs["attr1"] = [1, 2, 3]
+    graph.vs["name"] = ["A", "B", "C"]
+    attributes = ["attr1", "attr2"]
+    # Test 1: Invalid mask type
+    with pytest.raises(ValueError, match="Invalid mask input type"):
+        ig_utils._parse_mask_input(123, attributes)
+    # Test 2: Missing attribute in dictionary
+    incomplete_dict = {"attr1": None}  # Missing 'attr2'
+    with pytest.raises(
+        ValueError, match="Attribute 'attr2' not found in mask dictionary"
+    ):
+        ig_utils._parse_mask_input(incomplete_dict, attributes)
+    # Test 3: String mask for graph without names
+    specs = {"attr1": ["A", "B"]}
+    with pytest.raises(
+        ValueError, match="Graph has no 'name' attribute for string mask"
+    ):
+        ig_utils._get_attribute_masks(graph_no_names, specs)
+    # Test 4: Invalid mask specification type in _get_attribute_masks
+    specs = {"attr1": 123}  # Invalid type
+    with pytest.raises(
+        ValueError, match="Invalid mask specification for attribute 'attr1'"
+    ):
+        ig_utils._get_attribute_masks(graph, specs)
+def test_ensure_nonnegative_vertex_attribute():
+    """Test _ensure_valid_attribute with various valid and invalid inputs."""
+    # Create test graph
+    graph = ig.Graph(4)
+    graph.vs["good_attr"] = [1.0, 2.0, 0.0, 3.0]
+    graph.vs["zero_attr"] = [0.0, 0.0, 0.0, 0.0]
+    graph.vs["negative_attr"] = [1.0, -1.0, 2.0, 0.0]
+    graph.vs["mixed_attr"] = [1.0, None, 2.0, 0.0]  # Some None values
+    # Test 1: Valid attribute
+    result = ig_utils._ensure_valid_attribute(graph, "good_attr")
+    expected = np.array([1.0, 2.0, 0.0, 3.0])
+    assert np.array_equal(result, expected)
+    # Test 2: Attribute with None values (should be replaced with 0)
+    result = ig_utils._ensure_valid_attribute(graph, "mixed_attr")
+    expected = np.array([1.0, 0.0, 2.0, 0.0])
+    assert np.array_equal(result, expected)
+    # Test 3: All zero values
+    with pytest.raises(ValueError, match="zero for all vertices"):
+        ig_utils._ensure_valid_attribute(graph, "zero_attr")
+    # Test 4: Negative values
+    with pytest.raises(ValueError, match="contains negative values"):
+        ig_utils._ensure_valid_attribute(graph, "negative_attr")
+    # Test 5: Missing attribute
+    with pytest.raises(ValueError, match="missing for all vertices"):
+        ig_utils._ensure_valid_attribute(graph, "nonexistent_attr")

napistu 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl

napistu 0.4.1py3-none-any.whl → 0.4.2py3-none-any.whl