napistu 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/consensus.py +3 -4
- napistu/gcs/constants.py +5 -5
- napistu/ingestion/constants.py +51 -0
- napistu/ingestion/reactom_fi.py +208 -0
- napistu/network/constants.py +23 -1
- napistu/network/ig_utils.py +161 -1
- napistu/network/net_create.py +3 -3
- napistu/network/net_propagation.py +646 -96
- napistu/network/ng_utils.py +26 -6
- napistu/network/precompute.py +56 -0
- napistu/sbml_dfs_utils.py +8 -2
- napistu/source.py +243 -40
- napistu/statistics/__init__.py +10 -0
- napistu/statistics/hypothesis_testing.py +66 -0
- napistu/statistics/quantiles.py +82 -0
- napistu/utils.py +23 -1
- {napistu-0.4.1.dist-info → napistu-0.4.3.dist-info}/METADATA +1 -1
- {napistu-0.4.1.dist-info → napistu-0.4.3.dist-info}/RECORD +29 -24
- tests/test_network_ig_utils.py +133 -0
- tests/test_network_net_propagation.py +365 -74
- tests/test_network_precompute.py +30 -0
- tests/test_sbml_dfs_utils.py +13 -0
- tests/test_source.py +38 -6
- tests/test_statistics_hypothesis_testing.py +62 -0
- tests/test_statistics_quantiles.py +133 -0
- tests/test_set_coverage.py +0 -50
- {napistu-0.4.1.dist-info → napistu-0.4.3.dist-info}/WHEEL +0 -0
- {napistu-0.4.1.dist-info → napistu-0.4.3.dist-info}/entry_points.txt +0 -0
- {napistu-0.4.1.dist-info → napistu-0.4.3.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.4.1.dist-info → napistu-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,133 @@
|
|
1
|
+
import pytest
|
2
|
+
import numpy as np
|
3
|
+
import pandas as pd
|
4
|
+
from napistu.statistics import quantiles
|
5
|
+
|
6
|
+
|
7
|
+
def test_calculate_quantiles_valid_inputs():
|
8
|
+
"""Test calculate_quantiles with valid, well-formed inputs."""
|
9
|
+
# Create observed data: 4 features x 3 attributes
|
10
|
+
observed = pd.DataFrame(
|
11
|
+
[[0.8, 0.3, 0.9], [0.2, 0.7, 0.1], [0.5, 0.5, 0.5], [0.1, 0.9, 0.2]],
|
12
|
+
index=["gene1", "gene2", "gene3", "gene4"],
|
13
|
+
columns=["attr1", "attr2", "attr3"],
|
14
|
+
)
|
15
|
+
|
16
|
+
# Create null data: 2 samples per feature (8 rows total)
|
17
|
+
null_index = ["gene1", "gene2", "gene3", "gene4"] * 2
|
18
|
+
null_data = pd.DataFrame(
|
19
|
+
[
|
20
|
+
[0.1, 0.2, 0.3], # gene1 sample 1
|
21
|
+
[0.4, 0.5, 0.6], # gene2 sample 1
|
22
|
+
[0.7, 0.8, 0.9], # gene3 sample 1
|
23
|
+
[0.0, 0.1, 0.2], # gene4 sample 1
|
24
|
+
[0.2, 0.3, 0.4], # gene1 sample 2
|
25
|
+
[0.5, 0.6, 0.7], # gene2 sample 2
|
26
|
+
[0.8, 0.9, 1.0], # gene3 sample 2
|
27
|
+
[0.1, 0.2, 0.3], # gene4 sample 2
|
28
|
+
],
|
29
|
+
index=null_index,
|
30
|
+
columns=["attr1", "attr2", "attr3"],
|
31
|
+
)
|
32
|
+
|
33
|
+
# Calculate quantiles
|
34
|
+
result = quantiles.calculate_quantiles(observed, null_data)
|
35
|
+
|
36
|
+
# Verify output structure
|
37
|
+
assert result.shape == observed.shape
|
38
|
+
assert list(result.index) == list(observed.index)
|
39
|
+
assert list(result.columns) == list(observed.columns)
|
40
|
+
|
41
|
+
# Check specific quantile calculations
|
42
|
+
# gene1, attr1: observed=0.8, nulls=[0.1, 0.2] -> quantile = 1.0 (100%)
|
43
|
+
assert result.loc["gene1", "attr1"] == 1.0
|
44
|
+
|
45
|
+
# gene2, attr2: observed=0.7, nulls=[0.5, 0.6] -> quantile = 1.0 (100%)
|
46
|
+
assert result.loc["gene2", "attr2"] == 1.0
|
47
|
+
|
48
|
+
# gene3, attr3: observed=0.5, nulls=[0.9, 1.0] -> quantile = 0.0 (0%)
|
49
|
+
assert result.loc["gene3", "attr3"] == 0.0
|
50
|
+
|
51
|
+
# gene4, attr1: observed=0.1, nulls=[0.0, 0.1]
|
52
|
+
# With ≤: 0.0 ≤ 0.1 (True), 0.1 ≤ 0.1 (True) → 2/2 = 1.0
|
53
|
+
assert result.loc["gene4", "attr1"] == 1.0
|
54
|
+
|
55
|
+
|
56
|
+
def test_calculate_quantiles_error_cases():
|
57
|
+
"""Test calculate_quantiles with invalid inputs that should raise errors or warnings."""
|
58
|
+
# Base observed data
|
59
|
+
observed = pd.DataFrame(
|
60
|
+
[[0.8, 0.3], [0.2, 0.7]], index=["gene1", "gene2"], columns=["attr1", "attr2"]
|
61
|
+
)
|
62
|
+
|
63
|
+
# Test 1: Mismatched columns
|
64
|
+
null_wrong_cols = pd.DataFrame(
|
65
|
+
[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
|
66
|
+
index=["gene1", "gene2"],
|
67
|
+
columns=["attr1", "attr2", "attr3"], # Extra column
|
68
|
+
)
|
69
|
+
|
70
|
+
with pytest.raises((KeyError, ValueError)):
|
71
|
+
quantiles.calculate_quantiles(observed, null_wrong_cols)
|
72
|
+
|
73
|
+
# Test 2: Missing features in null data
|
74
|
+
null_missing_feature = pd.DataFrame(
|
75
|
+
[[0.1, 0.2]], index=["gene1"], columns=["attr1", "attr2"] # Missing gene2
|
76
|
+
)
|
77
|
+
|
78
|
+
# Current implementation doesn't validate - it will likely fail in groupby or indexing
|
79
|
+
# This test verifies current behavior (may change if validation added)
|
80
|
+
try:
|
81
|
+
result = quantiles.calculate_quantiles(observed, null_missing_feature)
|
82
|
+
# If it succeeds, gene2 quantiles will be invalid/error
|
83
|
+
assert True # Just check it doesn't crash for now
|
84
|
+
except (KeyError, ValueError, IndexError):
|
85
|
+
assert True # Expected behavior
|
86
|
+
|
87
|
+
# Test 3: Unequal null samples per feature
|
88
|
+
null_unequal_samples = pd.DataFrame(
|
89
|
+
[
|
90
|
+
[0.1, 0.2], # gene1 sample 1
|
91
|
+
[0.3, 0.4], # gene1 sample 2
|
92
|
+
[0.5, 0.6], # gene2 sample 1 (only 1 sample)
|
93
|
+
],
|
94
|
+
index=["gene1", "gene1", "gene2"],
|
95
|
+
columns=["attr1", "attr2"],
|
96
|
+
)
|
97
|
+
|
98
|
+
# This should still work but may give different results
|
99
|
+
result = quantiles.calculate_quantiles(observed, null_unequal_samples)
|
100
|
+
assert result.shape == observed.shape
|
101
|
+
|
102
|
+
# Test 4: Empty null data
|
103
|
+
null_empty = pd.DataFrame(columns=["attr1", "attr2"])
|
104
|
+
|
105
|
+
with pytest.raises((ValueError, IndexError)):
|
106
|
+
quantiles.calculate_quantiles(observed, null_empty)
|
107
|
+
|
108
|
+
# Test 5: Single null sample (edge case)
|
109
|
+
null_single = pd.DataFrame(
|
110
|
+
[[0.1, 0.2], [0.5, 0.6]], index=["gene1", "gene2"], columns=["attr1", "attr2"]
|
111
|
+
)
|
112
|
+
|
113
|
+
result = quantiles.calculate_quantiles(observed, null_single)
|
114
|
+
assert result.shape == observed.shape
|
115
|
+
# With single sample, results should be binary (0 or 1)
|
116
|
+
assert all(val in [0.0, 1.0] for val in result.values.flatten())
|
117
|
+
|
118
|
+
# Test 6: NaN values in data
|
119
|
+
observed_with_nan = observed.copy()
|
120
|
+
observed_with_nan.loc["gene1", "attr1"] = np.nan
|
121
|
+
|
122
|
+
null_with_nan = pd.DataFrame(
|
123
|
+
[[np.nan, 0.2], [0.4, 0.5], [0.1, 0.3], [0.6, 0.7]],
|
124
|
+
index=["gene1", "gene2", "gene1", "gene2"],
|
125
|
+
columns=["attr1", "attr2"],
|
126
|
+
)
|
127
|
+
|
128
|
+
# Should raise ValueError for NaN values
|
129
|
+
with pytest.raises(ValueError, match="NaN values found in observed data"):
|
130
|
+
quantiles.calculate_quantiles(observed_with_nan, null_single)
|
131
|
+
|
132
|
+
with pytest.raises(ValueError, match="NaN values found in null data"):
|
133
|
+
quantiles.calculate_quantiles(observed, null_with_nan)
|
tests/test_set_coverage.py
DELETED
@@ -1,50 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
from napistu import source
|
4
|
-
from napistu.network import ng_utils
|
5
|
-
|
6
|
-
|
7
|
-
def test_get_minimal_source_edges(sbml_dfs_metabolism):
|
8
|
-
vertices = sbml_dfs_metabolism.reactions.reset_index().rename(
|
9
|
-
columns={"r_id": "node"}
|
10
|
-
)
|
11
|
-
|
12
|
-
minimal_source_edges = ng_utils.get_minimal_sources_edges(
|
13
|
-
vertices, sbml_dfs_metabolism
|
14
|
-
)
|
15
|
-
# print(minimal_source_edges.shape)
|
16
|
-
assert minimal_source_edges.shape == (87, 3)
|
17
|
-
|
18
|
-
|
19
|
-
def test_greedy_set_coverge_of_sources(sbml_dfs_metabolism):
|
20
|
-
table_schema = sbml_dfs_metabolism.schema["reactions"]
|
21
|
-
|
22
|
-
source_df = source.unnest_sources(
|
23
|
-
sbml_dfs_metabolism.reactions, source_var="r_Source"
|
24
|
-
)
|
25
|
-
# print(source_df.shape)
|
26
|
-
assert source_df.shape == (111, 7)
|
27
|
-
|
28
|
-
set_coverage = source.greedy_set_coverge_of_sources(source_df, table_schema)
|
29
|
-
# print(set_coverage.shape)
|
30
|
-
assert set_coverage.shape == (87, 6)
|
31
|
-
|
32
|
-
|
33
|
-
################################################
|
34
|
-
# __main__
|
35
|
-
################################################
|
36
|
-
|
37
|
-
if __name__ == "__main__":
|
38
|
-
import os
|
39
|
-
from napistu import indices
|
40
|
-
from napistu import consensus
|
41
|
-
|
42
|
-
test_path = os.path.abspath(os.path.join(__file__, os.pardir))
|
43
|
-
test_data = os.path.join(test_path, "test_data")
|
44
|
-
|
45
|
-
pw_index = indices.PWIndex(os.path.join(test_data, "pw_index_metabolism.tsv"))
|
46
|
-
sbml_dfs_dict = consensus.construct_sbml_dfs_dict(pw_index)
|
47
|
-
sbml_dfs_metabolism = consensus.construct_consensus_model(sbml_dfs_dict, pw_index)
|
48
|
-
|
49
|
-
test_get_minimal_source_edges(sbml_dfs_metabolism)
|
50
|
-
test_greedy_set_coverge_of_sources(sbml_dfs_metabolism)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|