napistu 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,133 @@
1
+ import pytest
2
+ import numpy as np
3
+ import pandas as pd
4
+ from napistu.statistics import quantiles
5
+
6
+
7
+ def test_calculate_quantiles_valid_inputs():
8
+ """Test calculate_quantiles with valid, well-formed inputs."""
9
+ # Create observed data: 4 features x 3 attributes
10
+ observed = pd.DataFrame(
11
+ [[0.8, 0.3, 0.9], [0.2, 0.7, 0.1], [0.5, 0.5, 0.5], [0.1, 0.9, 0.2]],
12
+ index=["gene1", "gene2", "gene3", "gene4"],
13
+ columns=["attr1", "attr2", "attr3"],
14
+ )
15
+
16
+ # Create null data: 2 samples per feature (8 rows total)
17
+ null_index = ["gene1", "gene2", "gene3", "gene4"] * 2
18
+ null_data = pd.DataFrame(
19
+ [
20
+ [0.1, 0.2, 0.3], # gene1 sample 1
21
+ [0.4, 0.5, 0.6], # gene2 sample 1
22
+ [0.7, 0.8, 0.9], # gene3 sample 1
23
+ [0.0, 0.1, 0.2], # gene4 sample 1
24
+ [0.2, 0.3, 0.4], # gene1 sample 2
25
+ [0.5, 0.6, 0.7], # gene2 sample 2
26
+ [0.8, 0.9, 1.0], # gene3 sample 2
27
+ [0.1, 0.2, 0.3], # gene4 sample 2
28
+ ],
29
+ index=null_index,
30
+ columns=["attr1", "attr2", "attr3"],
31
+ )
32
+
33
+ # Calculate quantiles
34
+ result = quantiles.calculate_quantiles(observed, null_data)
35
+
36
+ # Verify output structure
37
+ assert result.shape == observed.shape
38
+ assert list(result.index) == list(observed.index)
39
+ assert list(result.columns) == list(observed.columns)
40
+
41
+ # Check specific quantile calculations
42
+ # gene1, attr1: observed=0.8, nulls=[0.1, 0.2] -> quantile = 1.0 (100%)
43
+ assert result.loc["gene1", "attr1"] == 1.0
44
+
45
+ # gene2, attr2: observed=0.7, nulls=[0.5, 0.6] -> quantile = 1.0 (100%)
46
+ assert result.loc["gene2", "attr2"] == 1.0
47
+
48
+ # gene3, attr3: observed=0.5, nulls=[0.9, 1.0] -> quantile = 0.0 (0%)
49
+ assert result.loc["gene3", "attr3"] == 0.0
50
+
51
+ # gene4, attr1: observed=0.1, nulls=[0.0, 0.1]
52
+ # With ≤: 0.0 ≤ 0.1 (True), 0.1 ≤ 0.1 (True) → 2/2 = 1.0
53
+ assert result.loc["gene4", "attr1"] == 1.0
54
+
55
+
56
+ def test_calculate_quantiles_error_cases():
57
+ """Test calculate_quantiles with invalid inputs that should raise errors or warnings."""
58
+ # Base observed data
59
+ observed = pd.DataFrame(
60
+ [[0.8, 0.3], [0.2, 0.7]], index=["gene1", "gene2"], columns=["attr1", "attr2"]
61
+ )
62
+
63
+ # Test 1: Mismatched columns
64
+ null_wrong_cols = pd.DataFrame(
65
+ [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
66
+ index=["gene1", "gene2"],
67
+ columns=["attr1", "attr2", "attr3"], # Extra column
68
+ )
69
+
70
+ with pytest.raises((KeyError, ValueError)):
71
+ quantiles.calculate_quantiles(observed, null_wrong_cols)
72
+
73
+ # Test 2: Missing features in null data
74
+ null_missing_feature = pd.DataFrame(
75
+ [[0.1, 0.2]], index=["gene1"], columns=["attr1", "attr2"] # Missing gene2
76
+ )
77
+
78
+ # Current implementation doesn't validate - it will likely fail in groupby or indexing
79
+ # This test verifies current behavior (may change if validation added)
80
+ try:
81
+ result = quantiles.calculate_quantiles(observed, null_missing_feature)
82
+ # If it succeeds, gene2 quantiles will be invalid/error
83
+ assert True # Just check it doesn't crash for now
84
+ except (KeyError, ValueError, IndexError):
85
+ assert True # Expected behavior
86
+
87
+ # Test 3: Unequal null samples per feature
88
+ null_unequal_samples = pd.DataFrame(
89
+ [
90
+ [0.1, 0.2], # gene1 sample 1
91
+ [0.3, 0.4], # gene1 sample 2
92
+ [0.5, 0.6], # gene2 sample 1 (only 1 sample)
93
+ ],
94
+ index=["gene1", "gene1", "gene2"],
95
+ columns=["attr1", "attr2"],
96
+ )
97
+
98
+ # This should still work but may give different results
99
+ result = quantiles.calculate_quantiles(observed, null_unequal_samples)
100
+ assert result.shape == observed.shape
101
+
102
+ # Test 4: Empty null data
103
+ null_empty = pd.DataFrame(columns=["attr1", "attr2"])
104
+
105
+ with pytest.raises((ValueError, IndexError)):
106
+ quantiles.calculate_quantiles(observed, null_empty)
107
+
108
+ # Test 5: Single null sample (edge case)
109
+ null_single = pd.DataFrame(
110
+ [[0.1, 0.2], [0.5, 0.6]], index=["gene1", "gene2"], columns=["attr1", "attr2"]
111
+ )
112
+
113
+ result = quantiles.calculate_quantiles(observed, null_single)
114
+ assert result.shape == observed.shape
115
+ # With single sample, results should be binary (0 or 1)
116
+ assert all(val in [0.0, 1.0] for val in result.values.flatten())
117
+
118
+ # Test 6: NaN values in data
119
+ observed_with_nan = observed.copy()
120
+ observed_with_nan.loc["gene1", "attr1"] = np.nan
121
+
122
+ null_with_nan = pd.DataFrame(
123
+ [[np.nan, 0.2], [0.4, 0.5], [0.1, 0.3], [0.6, 0.7]],
124
+ index=["gene1", "gene2", "gene1", "gene2"],
125
+ columns=["attr1", "attr2"],
126
+ )
127
+
128
+ # Should raise ValueError for NaN values
129
+ with pytest.raises(ValueError, match="NaN values found in observed data"):
130
+ quantiles.calculate_quantiles(observed_with_nan, null_single)
131
+
132
+ with pytest.raises(ValueError, match="NaN values found in null data"):
133
+ quantiles.calculate_quantiles(observed, null_with_nan)
@@ -1,50 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from napistu import source
4
- from napistu.network import ng_utils
5
-
6
-
7
- def test_get_minimal_source_edges(sbml_dfs_metabolism):
8
- vertices = sbml_dfs_metabolism.reactions.reset_index().rename(
9
- columns={"r_id": "node"}
10
- )
11
-
12
- minimal_source_edges = ng_utils.get_minimal_sources_edges(
13
- vertices, sbml_dfs_metabolism
14
- )
15
- # print(minimal_source_edges.shape)
16
- assert minimal_source_edges.shape == (87, 3)
17
-
18
-
19
- def test_greedy_set_coverge_of_sources(sbml_dfs_metabolism):
20
- table_schema = sbml_dfs_metabolism.schema["reactions"]
21
-
22
- source_df = source.unnest_sources(
23
- sbml_dfs_metabolism.reactions, source_var="r_Source"
24
- )
25
- # print(source_df.shape)
26
- assert source_df.shape == (111, 7)
27
-
28
- set_coverage = source.greedy_set_coverge_of_sources(source_df, table_schema)
29
- # print(set_coverage.shape)
30
- assert set_coverage.shape == (87, 6)
31
-
32
-
33
- ################################################
34
- # __main__
35
- ################################################
36
-
37
- if __name__ == "__main__":
38
- import os
39
- from napistu import indices
40
- from napistu import consensus
41
-
42
- test_path = os.path.abspath(os.path.join(__file__, os.pardir))
43
- test_data = os.path.join(test_path, "test_data")
44
-
45
- pw_index = indices.PWIndex(os.path.join(test_data, "pw_index_metabolism.tsv"))
46
- sbml_dfs_dict = consensus.construct_sbml_dfs_dict(pw_index)
47
- sbml_dfs_metabolism = consensus.construct_consensus_model(sbml_dfs_dict, pw_index)
48
-
49
- test_get_minimal_source_edges(sbml_dfs_metabolism)
50
- test_greedy_set_coverge_of_sources(sbml_dfs_metabolism)