napistu 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/gcs/constants.py +5 -5
- napistu/network/constants.py +23 -1
- napistu/network/ig_utils.py +161 -1
- napistu/network/net_create.py +3 -3
- napistu/network/net_propagation.py +646 -96
- napistu/statistics/__init__.py +10 -0
- napistu/statistics/quantiles.py +82 -0
- {napistu-0.4.1.dist-info → napistu-0.4.2.dist-info}/METADATA +1 -1
- {napistu-0.4.1.dist-info → napistu-0.4.2.dist-info}/RECORD +16 -13
- tests/test_network_ig_utils.py +133 -0
- tests/test_network_net_propagation.py +365 -74
- tests/test_statistics_quantiles.py +133 -0
- {napistu-0.4.1.dist-info → napistu-0.4.2.dist-info}/WHEEL +0 -0
- {napistu-0.4.1.dist-info → napistu-0.4.2.dist-info}/entry_points.txt +0 -0
- {napistu-0.4.1.dist-info → napistu-0.4.2.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.4.1.dist-info → napistu-0.4.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,82 @@
|
|
1
|
+
"""Module for comparing observed values to null distributions."""
|
2
|
+
|
3
|
+
import logging
|
4
|
+
|
5
|
+
import pandas as pd
|
6
|
+
import numpy as np
|
7
|
+
|
8
|
+
logger = logging.getLogger(__name__)
|
9
|
+
|
10
|
+
|
11
|
+
def calculate_quantiles(
|
12
|
+
observed_df: pd.DataFrame, null_df: pd.DataFrame
|
13
|
+
) -> pd.DataFrame:
|
14
|
+
"""
|
15
|
+
Calculate quantiles of observed scores relative to null distributions using
|
16
|
+
ultra-fast vectorized operations.
|
17
|
+
|
18
|
+
Parameters
|
19
|
+
----------
|
20
|
+
observed_df : pd.DataFrame
|
21
|
+
DataFrame with features as index and attributes as columns containing
|
22
|
+
observed scores.
|
23
|
+
null_df : pd.DataFrame
|
24
|
+
DataFrame with null scores, features as index (multiple rows per feature)
|
25
|
+
and attributes as columns.
|
26
|
+
|
27
|
+
Returns
|
28
|
+
-------
|
29
|
+
pd.DataFrame
|
30
|
+
DataFrame with same structure as observed_df containing quantiles.
|
31
|
+
Each value represents the proportion of null values <= observed value.
|
32
|
+
"""
|
33
|
+
|
34
|
+
if not observed_df.columns.equals(null_df.columns):
|
35
|
+
raise ValueError("Column names must match between observed and null data")
|
36
|
+
|
37
|
+
# Validate all features present
|
38
|
+
missing_features = set(observed_df.index) - set(null_df.index)
|
39
|
+
if missing_features:
|
40
|
+
raise ValueError(f"Missing features in null data: {missing_features}")
|
41
|
+
|
42
|
+
# Check for NaN values
|
43
|
+
if observed_df.isna().any().any():
|
44
|
+
raise ValueError("NaN values found in observed data")
|
45
|
+
if null_df.isna().any().any():
|
46
|
+
raise ValueError("NaN values found in null data")
|
47
|
+
|
48
|
+
# Check for unequal sample sizes and warn
|
49
|
+
null_grouped = null_df.groupby(level=0)
|
50
|
+
sample_counts = {name: len(group) for name, group in null_grouped}
|
51
|
+
if len(set(sample_counts.values())) > 1:
|
52
|
+
logger.warning("Unequal null sample counts per feature may affect results")
|
53
|
+
|
54
|
+
# Convert to numpy arrays for speed
|
55
|
+
observed_values = observed_df.values
|
56
|
+
|
57
|
+
# Group null data and stack into 3D array
|
58
|
+
null_grouped = null_df.groupby(level=0)
|
59
|
+
|
60
|
+
# Get the maximum number of null samples per feature
|
61
|
+
max_null_samples = max(len(group) for _, group in null_grouped)
|
62
|
+
|
63
|
+
# Pre-allocate 3D array: [features, null_samples, attributes]
|
64
|
+
null_array = np.full(
|
65
|
+
(len(observed_df), max_null_samples, len(observed_df.columns)), np.nan
|
66
|
+
)
|
67
|
+
|
68
|
+
# Fill the null array
|
69
|
+
for i, (feature, group) in enumerate(null_grouped):
|
70
|
+
feature_idx = observed_df.index.get_loc(feature)
|
71
|
+
null_array[feature_idx, : len(group)] = group.values
|
72
|
+
|
73
|
+
# Broadcast comparison: observed[features, 1, attributes] vs null[features, samples, attributes]
|
74
|
+
# This creates a boolean array of shape [features, null_samples, attributes]
|
75
|
+
# Less than or equal to is used to calculate the quantile consistent with the R quantile function
|
76
|
+
comparisons = null_array <= observed_values[:, np.newaxis, :]
|
77
|
+
|
78
|
+
# Calculate quantiles by taking mean along the null_samples axis
|
79
|
+
# Use nanmean to handle padded NaN values
|
80
|
+
quantiles = np.nanmean(comparisons, axis=1)
|
81
|
+
|
82
|
+
return pd.DataFrame(quantiles, index=observed_df.index, columns=observed_df.columns)
|
@@ -12,7 +12,7 @@ napistu/context/__init__.py,sha256=LQBEqipcHKK0E5UlDEg1ct-ymCs93IlUrUaH8BCevf0,2
|
|
12
12
|
napistu/context/discretize.py,sha256=Qq7zg46F_I-PvQIT2_pEDQV7YEtUQCxKoRvT5Gu9QsE,15052
|
13
13
|
napistu/context/filtering.py,sha256=l1oq-43ysSGqU9VmhTOO_pYT4DSMf20yxvktPC1MI0I,13696
|
14
14
|
napistu/gcs/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
|
15
|
-
napistu/gcs/constants.py,sha256=
|
15
|
+
napistu/gcs/constants.py,sha256=CuWsLXC1ZvOIRCQy7-ZeZdgRPmCVAqzvMuzJakGRk10,2923
|
16
16
|
napistu/gcs/downloads.py,sha256=SvGv9WYr_Vt3guzyz1QiAuBndeKPTBtWSFLj1-QbLf4,6348
|
17
17
|
napistu/gcs/utils.py,sha256=eLSsvewWJdCguyj2k0ozUGP5BTemaE1PZg41Z3aY5kM,571
|
18
18
|
napistu/ingestion/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
|
@@ -58,13 +58,13 @@ napistu/modify/gaps.py,sha256=CV-bdSfanhrnCIFVWfNuQJbtjvj4hsEwheKYR-Z3tNA,26844
|
|
58
58
|
napistu/modify/pathwayannot.py,sha256=xuBSMDFWbg_d6-Gzv0Td3Q5nnFTa-Qzic48g1b1AZtQ,48081
|
59
59
|
napistu/modify/uncompartmentalize.py,sha256=y5LkXn5x6u80dB_McfAIh88BxZGIAVFLujkP7sPNRh0,9690
|
60
60
|
napistu/network/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
|
61
|
-
napistu/network/constants.py,sha256=
|
61
|
+
napistu/network/constants.py,sha256=nG_lUZYLgop8oxOGjDYqvxXJzVdOwKZ3aWnxlhtSaIo,6915
|
62
62
|
napistu/network/data_handling.py,sha256=KncrAKjXI3169BgVE-SnY8FkpVF60JnUwfMHtbqvsTc,14725
|
63
|
-
napistu/network/ig_utils.py,sha256=
|
63
|
+
napistu/network/ig_utils.py,sha256=MuyEyOVtSHndil6QuuRCimBZrJ2jTaF5qQESgYlu02M,17042
|
64
64
|
napistu/network/neighborhoods.py,sha256=g5QeGaizSfW4nNe9YZY86g8q79EQmuvSwipaNPnOVqA,56121
|
65
|
-
napistu/network/net_create.py,sha256=
|
65
|
+
napistu/network/net_create.py,sha256=66kV_xoWnu4BVLaJZ1TAC7wBSsjPDqjoAXH-X9ShV3s,59091
|
66
66
|
napistu/network/net_create_utils.py,sha256=zajwaz2xAij_9fEnD77SgBw_EnNAnJ8jBCmmK2rk_bA,24672
|
67
|
-
napistu/network/net_propagation.py,sha256=
|
67
|
+
napistu/network/net_propagation.py,sha256=Il5nDOWh3nLz8gRhDFHGp2LxcvJ9C1twiSZjDeiZMUo,23490
|
68
68
|
napistu/network/ng_core.py,sha256=dGnTUKR4WtnvaYMyIHqqF55FY4mJSa7wjA2LZ4cVB6U,11720
|
69
69
|
napistu/network/ng_utils.py,sha256=c1tHXz_JcH01D5KovNQmRLTEVxpCkCe36otULq-liz8,15579
|
70
70
|
napistu/network/paths.py,sha256=r6LVKVvX7i3ctBA5r-xvHfpH5Zsd0VDHUCtin2iag20,17453
|
@@ -83,7 +83,9 @@ napistu/rpy2/rids.py,sha256=AfXLTfTdonfspgAHYO0Ph7jSUWv8YuyT8x3fyLfAqc8,3413
|
|
83
83
|
napistu/scverse/__init__.py,sha256=Lgxr3iMQAkTzXE9BNz93CndNP5djzerLvmHM-D0PU3I,357
|
84
84
|
napistu/scverse/constants.py,sha256=0iAkhyJUIeFGHdLLU3fCaEU1O3Oix4qAsxr3CxGTjVs,653
|
85
85
|
napistu/scverse/loading.py,sha256=jqiE71XB-wdV50GyZrauFNY0Lai4bX9Fm2Gv80VR8t8,27016
|
86
|
-
napistu
|
86
|
+
napistu/statistics/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
|
87
|
+
napistu/statistics/quantiles.py,sha256=1-LnmVzC2CQWxCKUh0yi6YfKrbsZM1-kkD7nu2-aS5s,3042
|
88
|
+
napistu-0.4.2.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
|
87
89
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
88
90
|
tests/conftest.py,sha256=t-GHb0MvSsC-MyhkFpOy2K3t5fi7eaig_Rc2xEQC-t8,9678
|
89
91
|
tests/test_consensus.py,sha256=Hzfrgp4SpkRDnEMVMD3f0UInSycndB8kKzC4wDDvRas,15076
|
@@ -104,11 +106,11 @@ tests/test_mcp_config.py,sha256=GTu9vywqAHTYkolywdYS_BEIW3gBzs4A4qcneMSPpRk,7007
|
|
104
106
|
tests/test_mcp_documentation_utils.py,sha256=OW0N2N_2IOktbYTcCWhhWz4bANi8IB60l1q3DJi8Ra4,810
|
105
107
|
tests/test_mcp_server.py,sha256=bP3PWVQsEfX6-lAgXKP32njdg__o65n2WuLvkxTTHkQ,11215
|
106
108
|
tests/test_network_data_handling.py,sha256=4aS8z2AlKkVd-JhK4BQ8fjeiW8_bJ1hZ3cc71Jh7Glk,12716
|
107
|
-
tests/test_network_ig_utils.py,sha256=
|
109
|
+
tests/test_network_ig_utils.py,sha256=XihmEpX890sr-LYmsb_t4aN0sKIDWCnXkTpDhpuTDmw,7199
|
108
110
|
tests/test_network_neighborhoods.py,sha256=8BV17m5X1OUd5FwasTTYUOkNYUHDPUkxOKH_VZCsyBE,631
|
109
111
|
tests/test_network_net_create.py,sha256=L0U91b4jVHDuC3DFo-_BUFVuv4GuSxZuLAo7r-7EJxY,12877
|
110
112
|
tests/test_network_net_create_utils.py,sha256=0J6KIh2HBc4koFsvwMaul1QRtj5x92kR9HBdDZajnAw,18971
|
111
|
-
tests/test_network_net_propagation.py,sha256=
|
113
|
+
tests/test_network_net_propagation.py,sha256=kZeDHD93iMrLVvxO4OyfRH5_vgsYeQyC40OI9Dsb0xY,14999
|
112
114
|
tests/test_network_ng_core.py,sha256=w-iNBTtenennJhaLFauk952pEsk7W0-Fa8lPvIRqHyY,628
|
113
115
|
tests/test_network_ng_utils.py,sha256=QVVuRnvCRfTSIlGdwQTIF9lr0wOwoc5gGeXAUY_AdgE,713
|
114
116
|
tests/test_network_paths.py,sha256=TWZnxY5bF3m6gahcxcYJGrBIawh2-_vUcec1LyPmXV8,1686
|
@@ -127,12 +129,13 @@ tests/test_sbo.py,sha256=x_PENFaXYsrZIzOZu9cj_Wrej7i7SNGxgBYYvcigLs0,308
|
|
127
129
|
tests/test_scverse_loading.py,sha256=bnU1lQSYYWhOAs0IIBoi4ZohqPokDQJ0n_rtkAfEyMU,29948
|
128
130
|
tests/test_set_coverage.py,sha256=J-6m6LuOjcQa9pxRuWglSfJk4Ltm7kt_eOrn_Q-7P6Q,1604
|
129
131
|
tests/test_source.py,sha256=hT0IlpexR5zP0OhWl5BBaho9d1aCYQlFZLwRIRRnw_Y,1969
|
132
|
+
tests/test_statistics_quantiles.py,sha256=yNDeqwgbP-1Rx3C_dLX_wnwT_Lr-iJWClmeKmElqmTE,4984
|
130
133
|
tests/test_uncompartmentalize.py,sha256=nAk5kfAVLU9a2VWe2x2HYVcKqj-EnwmwddERIPRax8c,1289
|
131
134
|
tests/test_utils.py,sha256=qPSpV-Q9b6vmdycgaDmQqtcvzKnAVnN9j5xJ9x-T6bg,23959
|
132
135
|
tests/utils.py,sha256=SoWQ_5roJteFGcMaOeEiQ5ucwq3Z2Fa3AAs9iXHTsJY,749
|
133
136
|
tests/test_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
134
|
-
napistu-0.4.
|
135
|
-
napistu-0.4.
|
136
|
-
napistu-0.4.
|
137
|
-
napistu-0.4.
|
138
|
-
napistu-0.4.
|
137
|
+
napistu-0.4.2.dist-info/METADATA,sha256=6P_9Mmno6pVu4Me-3QdcMtiGOhCcajTqm5LP_Hns4lI,4078
|
138
|
+
napistu-0.4.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
139
|
+
napistu-0.4.2.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
|
140
|
+
napistu-0.4.2.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
|
141
|
+
napistu-0.4.2.dist-info/RECORD,,
|
tests/test_network_ig_utils.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import igraph as ig
|
4
|
+
import numpy as np
|
3
5
|
import pytest
|
4
6
|
|
5
7
|
from napistu.network import ig_utils
|
@@ -57,3 +59,134 @@ def test_filter_to_largest_subgraphs(multi_component_graph):
|
|
57
59
|
# Test invalid top_k
|
58
60
|
with pytest.raises(ValueError):
|
59
61
|
ig_utils.filter_to_largest_subgraphs(multi_component_graph, top_k=0)
|
62
|
+
|
63
|
+
|
64
|
+
def test_mask_functions_valid_inputs():
|
65
|
+
"""Test mask functions with various valid input formats."""
|
66
|
+
# Create real graph with attributes
|
67
|
+
graph = ig.Graph(5)
|
68
|
+
graph.vs["attr1"] = [0, 1, 2, 0, 3]
|
69
|
+
graph.vs["attr2"] = [1, 0, 1, 2, 0]
|
70
|
+
graph.vs["name"] = ["A", "B", "C", "D", "E"]
|
71
|
+
|
72
|
+
attributes = ["attr1", "attr2"]
|
73
|
+
|
74
|
+
# Test 1: None input
|
75
|
+
specs = ig_utils._parse_mask_input(None, attributes)
|
76
|
+
assert specs == {"attr1": None, "attr2": None}
|
77
|
+
|
78
|
+
masks = ig_utils._get_attribute_masks(graph, specs)
|
79
|
+
assert np.array_equal(masks["attr1"], np.ones(5, dtype=bool))
|
80
|
+
assert np.array_equal(masks["attr2"], np.ones(5, dtype=bool))
|
81
|
+
|
82
|
+
# Test 2: "attr" keyword
|
83
|
+
specs = ig_utils._parse_mask_input("attr", attributes)
|
84
|
+
assert specs == {"attr1": "attr1", "attr2": "attr2"}
|
85
|
+
|
86
|
+
masks = ig_utils._get_attribute_masks(graph, specs)
|
87
|
+
assert np.array_equal(masks["attr1"], np.array([False, True, True, False, True]))
|
88
|
+
assert np.array_equal(masks["attr2"], np.array([True, False, True, True, False]))
|
89
|
+
|
90
|
+
# Test 3: Single attribute name
|
91
|
+
specs = ig_utils._parse_mask_input("attr1", attributes)
|
92
|
+
assert specs == {"attr1": "attr1", "attr2": "attr1"}
|
93
|
+
|
94
|
+
# Test 4: Boolean array
|
95
|
+
bool_mask = np.array([True, False, True, False, False])
|
96
|
+
specs = ig_utils._parse_mask_input(bool_mask, attributes)
|
97
|
+
masks = ig_utils._get_attribute_masks(graph, specs)
|
98
|
+
assert np.array_equal(masks["attr1"], bool_mask)
|
99
|
+
assert np.array_equal(masks["attr2"], bool_mask)
|
100
|
+
|
101
|
+
# Test 5: Node indices list
|
102
|
+
indices = [0, 2, 4]
|
103
|
+
specs = ig_utils._parse_mask_input(indices, attributes)
|
104
|
+
masks = ig_utils._get_attribute_masks(graph, specs)
|
105
|
+
expected = np.array([True, False, True, False, True])
|
106
|
+
assert np.array_equal(masks["attr1"], expected)
|
107
|
+
|
108
|
+
# Test 6: Node names list
|
109
|
+
names = ["A", "C", "E"]
|
110
|
+
specs = ig_utils._parse_mask_input(names, attributes)
|
111
|
+
masks = ig_utils._get_attribute_masks(graph, specs)
|
112
|
+
assert np.array_equal(masks["attr1"], expected)
|
113
|
+
|
114
|
+
# Test 7: Dictionary input
|
115
|
+
mask_dict = {"attr1": "attr1", "attr2": None}
|
116
|
+
specs = ig_utils._parse_mask_input(mask_dict, attributes)
|
117
|
+
assert specs == mask_dict
|
118
|
+
|
119
|
+
masks = ig_utils._get_attribute_masks(graph, specs)
|
120
|
+
assert np.array_equal(masks["attr1"], np.array([False, True, True, False, True]))
|
121
|
+
assert np.array_equal(masks["attr2"], np.ones(5, dtype=bool))
|
122
|
+
|
123
|
+
|
124
|
+
def test_mask_functions_error_cases():
|
125
|
+
"""Test mask functions with invalid inputs that should raise errors."""
|
126
|
+
# Graph without name attribute
|
127
|
+
graph_no_names = ig.Graph(3)
|
128
|
+
graph_no_names.vs["attr1"] = [1, 2, 3]
|
129
|
+
|
130
|
+
# Graph with names
|
131
|
+
graph = ig.Graph(3)
|
132
|
+
graph.vs["attr1"] = [1, 2, 3]
|
133
|
+
graph.vs["name"] = ["A", "B", "C"]
|
134
|
+
|
135
|
+
attributes = ["attr1", "attr2"]
|
136
|
+
|
137
|
+
# Test 1: Invalid mask type
|
138
|
+
with pytest.raises(ValueError, match="Invalid mask input type"):
|
139
|
+
ig_utils._parse_mask_input(123, attributes)
|
140
|
+
|
141
|
+
# Test 2: Missing attribute in dictionary
|
142
|
+
incomplete_dict = {"attr1": None} # Missing 'attr2'
|
143
|
+
with pytest.raises(
|
144
|
+
ValueError, match="Attribute 'attr2' not found in mask dictionary"
|
145
|
+
):
|
146
|
+
ig_utils._parse_mask_input(incomplete_dict, attributes)
|
147
|
+
|
148
|
+
# Test 3: String mask for graph without names
|
149
|
+
specs = {"attr1": ["A", "B"]}
|
150
|
+
with pytest.raises(
|
151
|
+
ValueError, match="Graph has no 'name' attribute for string mask"
|
152
|
+
):
|
153
|
+
ig_utils._get_attribute_masks(graph_no_names, specs)
|
154
|
+
|
155
|
+
# Test 4: Invalid mask specification type in _get_attribute_masks
|
156
|
+
specs = {"attr1": 123} # Invalid type
|
157
|
+
with pytest.raises(
|
158
|
+
ValueError, match="Invalid mask specification for attribute 'attr1'"
|
159
|
+
):
|
160
|
+
ig_utils._get_attribute_masks(graph, specs)
|
161
|
+
|
162
|
+
|
163
|
+
def test_ensure_nonnegative_vertex_attribute():
|
164
|
+
"""Test _ensure_valid_attribute with various valid and invalid inputs."""
|
165
|
+
# Create test graph
|
166
|
+
graph = ig.Graph(4)
|
167
|
+
graph.vs["good_attr"] = [1.0, 2.0, 0.0, 3.0]
|
168
|
+
graph.vs["zero_attr"] = [0.0, 0.0, 0.0, 0.0]
|
169
|
+
graph.vs["negative_attr"] = [1.0, -1.0, 2.0, 0.0]
|
170
|
+
graph.vs["mixed_attr"] = [1.0, None, 2.0, 0.0] # Some None values
|
171
|
+
|
172
|
+
# Test 1: Valid attribute
|
173
|
+
result = ig_utils._ensure_valid_attribute(graph, "good_attr")
|
174
|
+
expected = np.array([1.0, 2.0, 0.0, 3.0])
|
175
|
+
assert np.array_equal(result, expected)
|
176
|
+
|
177
|
+
# Test 2: Attribute with None values (should be replaced with 0)
|
178
|
+
result = ig_utils._ensure_valid_attribute(graph, "mixed_attr")
|
179
|
+
expected = np.array([1.0, 0.0, 2.0, 0.0])
|
180
|
+
assert np.array_equal(result, expected)
|
181
|
+
|
182
|
+
# Test 3: All zero values
|
183
|
+
with pytest.raises(ValueError, match="zero for all vertices"):
|
184
|
+
ig_utils._ensure_valid_attribute(graph, "zero_attr")
|
185
|
+
|
186
|
+
# Test 4: Negative values
|
187
|
+
with pytest.raises(ValueError, match="contains negative values"):
|
188
|
+
ig_utils._ensure_valid_attribute(graph, "negative_attr")
|
189
|
+
|
190
|
+
# Test 5: Missing attribute
|
191
|
+
with pytest.raises(ValueError, match="missing for all vertices"):
|
192
|
+
ig_utils._ensure_valid_attribute(graph, "nonexistent_attr")
|