napistu 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/consensus.py +3 -4
- napistu/gcs/constants.py +5 -5
- napistu/ingestion/constants.py +51 -0
- napistu/ingestion/reactom_fi.py +208 -0
- napistu/network/constants.py +23 -1
- napistu/network/ig_utils.py +161 -1
- napistu/network/net_create.py +3 -3
- napistu/network/net_propagation.py +646 -96
- napistu/network/ng_utils.py +26 -6
- napistu/network/precompute.py +56 -0
- napistu/sbml_dfs_utils.py +8 -2
- napistu/source.py +243 -40
- napistu/statistics/__init__.py +10 -0
- napistu/statistics/hypothesis_testing.py +66 -0
- napistu/statistics/quantiles.py +82 -0
- napistu/utils.py +23 -1
- {napistu-0.4.1.dist-info → napistu-0.4.3.dist-info}/METADATA +1 -1
- {napistu-0.4.1.dist-info → napistu-0.4.3.dist-info}/RECORD +29 -24
- tests/test_network_ig_utils.py +133 -0
- tests/test_network_net_propagation.py +365 -74
- tests/test_network_precompute.py +30 -0
- tests/test_sbml_dfs_utils.py +13 -0
- tests/test_source.py +38 -6
- tests/test_statistics_hypothesis_testing.py +62 -0
- tests/test_statistics_quantiles.py +133 -0
- tests/test_set_coverage.py +0 -50
- {napistu-0.4.1.dist-info → napistu-0.4.3.dist-info}/WHEEL +0 -0
- {napistu-0.4.1.dist-info → napistu-0.4.3.dist-info}/entry_points.txt +0 -0
- {napistu-0.4.1.dist-info → napistu-0.4.3.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.4.1.dist-info → napistu-0.4.3.dist-info}/top_level.txt +0 -0
@@ -1,89 +1,380 @@
|
|
1
1
|
import pytest
|
2
|
-
import igraph as ig
|
3
2
|
import numpy as np
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
g = ig.Graph.Full(3)
|
35
|
-
g.vs["score"] = [1, None, 2]
|
36
|
-
# None should be treated as 0
|
37
|
-
df = personalized_pagerank_by_attribute(g, "score")
|
38
|
-
assert np.isclose(df["score"].sum(), 3)
|
39
|
-
# Negative values should raise
|
40
|
-
g.vs["score"] = [1, -1, 2]
|
41
|
-
with pytest.raises(ValueError):
|
42
|
-
personalized_pagerank_by_attribute(g, "score")
|
43
|
-
|
44
|
-
|
45
|
-
def test_personalized_pagerank_by_attribute_additional_args_directed():
|
46
|
-
# create an asymmetric directed graph to test whether additional_propagation_args is respected
|
47
|
-
g = ig.Graph(directed=True)
|
48
|
-
g.add_vertices(3)
|
49
|
-
g.add_edges([(0, 1), (1, 2)])
|
50
|
-
g.vs["score"] = [1, 0, 2]
|
51
|
-
# Run with directed=False, which should treat the graph as undirected
|
52
|
-
df_directed = personalized_pagerank_by_attribute(
|
53
|
-
g, "score", additional_propagation_args={"directed": True}
|
3
|
+
import pandas as pd
|
4
|
+
import igraph as ig
|
5
|
+
from napistu.network.net_propagation import (
|
6
|
+
net_propagate_attributes,
|
7
|
+
uniform_null,
|
8
|
+
parametric_null,
|
9
|
+
node_permutation_null,
|
10
|
+
edge_permutation_null,
|
11
|
+
NULL_GENERATORS,
|
12
|
+
network_propagation_with_null,
|
13
|
+
)
|
14
|
+
from napistu.network.constants import (
|
15
|
+
NAPISTU_GRAPH_VERTICES,
|
16
|
+
NULL_STRATEGIES,
|
17
|
+
)
|
18
|
+
|
19
|
+
|
20
|
+
def test_network_propagation_with_null():
|
21
|
+
"""Test the main orchestrator function with different null strategies."""
|
22
|
+
# Create test graph
|
23
|
+
graph = ig.Graph(5)
|
24
|
+
graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["A", "B", "C", "D", "E"]
|
25
|
+
graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0, 1.5] # Non-negative, not all zero
|
26
|
+
graph.add_edges([(0, 1), (1, 2), (2, 3), (3, 4)])
|
27
|
+
|
28
|
+
attributes = ["attr1"]
|
29
|
+
|
30
|
+
# Test 1: Uniform null (should return ratios)
|
31
|
+
result_uniform = network_propagation_with_null(
|
32
|
+
graph, attributes, null_strategy=NULL_STRATEGIES.UNIFORM
|
54
33
|
)
|
55
|
-
|
56
|
-
|
34
|
+
|
35
|
+
# Check structure
|
36
|
+
assert isinstance(result_uniform, pd.DataFrame)
|
37
|
+
assert result_uniform.shape == (5, 1)
|
38
|
+
assert list(result_uniform.columns) == attributes
|
39
|
+
assert list(result_uniform.index) == ["A", "B", "C", "D", "E"]
|
40
|
+
|
41
|
+
# Should be ratios (can be > 1)
|
42
|
+
assert (result_uniform.values > 0).all(), "Ratios should be positive"
|
43
|
+
# Some ratios should be > 1 since observed scores concentrate on fewer nodes
|
44
|
+
assert (result_uniform.values > 1).any(), "Some ratios should be > 1"
|
45
|
+
|
46
|
+
# Test 2: Node permutation null (should return quantiles)
|
47
|
+
result_permutation = network_propagation_with_null(
|
48
|
+
graph,
|
49
|
+
attributes,
|
50
|
+
null_strategy=NULL_STRATEGIES.NODE_PERMUTATION,
|
51
|
+
n_samples=10, # Small for testing
|
57
52
|
)
|
58
|
-
|
59
|
-
|
60
|
-
|
53
|
+
|
54
|
+
# Check structure
|
55
|
+
assert isinstance(result_permutation, pd.DataFrame)
|
56
|
+
assert result_permutation.shape == (5, 1)
|
57
|
+
assert list(result_permutation.columns) == attributes
|
58
|
+
|
59
|
+
# Should be quantiles (0 to 1)
|
60
|
+
assert (result_permutation.values >= 0).all(), "Quantiles should be >= 0"
|
61
|
+
assert (result_permutation.values <= 1).all(), "Quantiles should be <= 1"
|
62
|
+
|
63
|
+
# Test 3: Edge permutation null
|
64
|
+
result_edge = network_propagation_with_null(
|
65
|
+
graph,
|
66
|
+
attributes,
|
67
|
+
null_strategy=NULL_STRATEGIES.EDGE_PERMUTATION,
|
68
|
+
n_samples=5,
|
69
|
+
burn_in_ratio=2, # Small for testing
|
70
|
+
sampling_ratio=0.2,
|
71
|
+
)
|
72
|
+
|
73
|
+
# Check structure
|
74
|
+
assert isinstance(result_edge, pd.DataFrame)
|
75
|
+
assert result_edge.shape == (5, 1)
|
76
|
+
assert (result_edge.values >= 0).all()
|
77
|
+
assert (result_edge.values <= 1).all()
|
78
|
+
|
79
|
+
# Test 4: Gaussian null
|
80
|
+
result_parametric = network_propagation_with_null(
|
81
|
+
graph, attributes, null_strategy=NULL_STRATEGIES.PARAMETRIC, n_samples=8
|
82
|
+
)
|
83
|
+
|
84
|
+
# Check structure
|
85
|
+
assert isinstance(result_parametric, pd.DataFrame)
|
86
|
+
assert result_parametric.shape == (5, 1)
|
87
|
+
assert (result_parametric.values >= 0).all()
|
88
|
+
assert (result_parametric.values <= 1).all()
|
89
|
+
|
90
|
+
# Test 5: Custom propagation parameters
|
91
|
+
result_custom = network_propagation_with_null(
|
92
|
+
graph,
|
93
|
+
attributes,
|
94
|
+
null_strategy=NULL_STRATEGIES.UNIFORM,
|
95
|
+
additional_propagation_args={"damping": 0.7},
|
61
96
|
)
|
62
|
-
|
97
|
+
|
98
|
+
# Should be different from default
|
63
99
|
assert not np.allclose(
|
64
|
-
|
100
|
+
result_uniform.values, result_custom.values
|
101
|
+
), "Different propagation parameters should give different results"
|
102
|
+
|
103
|
+
# Test 6: Custom null parameters (mask)
|
104
|
+
mask_array = np.array([True, False, True, False, True])
|
105
|
+
result_masked = network_propagation_with_null(
|
106
|
+
graph,
|
107
|
+
attributes,
|
108
|
+
null_strategy=NULL_STRATEGIES.NODE_PERMUTATION,
|
109
|
+
n_samples=5,
|
110
|
+
mask=mask_array,
|
65
111
|
)
|
66
112
|
|
113
|
+
# Should work without error
|
114
|
+
assert isinstance(result_masked, pd.DataFrame)
|
115
|
+
assert result_masked.shape == (5, 1)
|
67
116
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
with pytest.raises(ValueError):
|
73
|
-
personalized_pagerank_by_attribute(
|
74
|
-
g, "score", additional_propagation_args={"not_a_real_arg": 123}
|
117
|
+
# Test 7: Error handling - invalid null strategy
|
118
|
+
with pytest.raises(ValueError, match="Unknown null strategy"):
|
119
|
+
network_propagation_with_null(
|
120
|
+
graph, attributes, null_strategy="invalid_strategy"
|
75
121
|
)
|
76
122
|
|
77
123
|
|
78
|
-
def
|
79
|
-
|
80
|
-
#
|
81
|
-
|
82
|
-
|
124
|
+
def test_net_propagate_attributes():
|
125
|
+
"""Test net_propagate_attributes with multiple attributes and various scenarios."""
|
126
|
+
# Create test graph with edges for realistic propagation
|
127
|
+
graph = ig.Graph(4)
|
128
|
+
graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["node1", "node2", "node3", "node4"]
|
129
|
+
graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0] # Non-negative, not all zero
|
130
|
+
graph.vs["attr2"] = [0.5, 1.5, 0.0, 1.0] # Non-negative, not all zero
|
131
|
+
graph.add_edges([(0, 1), (1, 2), (2, 3), (0, 3)]) # Create connected graph
|
132
|
+
|
133
|
+
# Test 1: Basic functionality with two attributes
|
134
|
+
result = net_propagate_attributes(graph, ["attr1", "attr2"])
|
135
|
+
|
136
|
+
# Check structure
|
137
|
+
assert isinstance(result, pd.DataFrame)
|
138
|
+
assert result.shape == (4, 2)
|
139
|
+
assert list(result.index) == ["node1", "node2", "node3", "node4"]
|
140
|
+
assert list(result.columns) == ["attr1", "attr2"]
|
141
|
+
|
142
|
+
# Check that values are valid probabilities (PPR returns probabilities)
|
143
|
+
assert np.all(result.values >= 0)
|
144
|
+
assert np.all(result.values <= 1)
|
145
|
+
# Each column should sum to approximately 1 (PPR property)
|
146
|
+
assert np.allclose(result.sum(axis=0), [1.0, 1.0], atol=1e-10)
|
147
|
+
|
148
|
+
# Test 2: Single attribute
|
149
|
+
result_single = net_propagate_attributes(graph, ["attr1"])
|
150
|
+
assert result_single.shape == (4, 1)
|
151
|
+
assert list(result_single.columns) == ["attr1"]
|
152
|
+
|
153
|
+
# Test 3: Graph without names (should use indices)
|
154
|
+
graph_no_names = ig.Graph(3)
|
155
|
+
graph_no_names.vs["attr1"] = [1.0, 2.0, 1.0]
|
156
|
+
graph_no_names.add_edges([(0, 1), (1, 2)])
|
157
|
+
|
158
|
+
result_no_names = net_propagate_attributes(graph_no_names, ["attr1"])
|
159
|
+
assert list(result_no_names.index) == [0, 1, 2] # Should use integer indices
|
83
160
|
|
161
|
+
# Test 4: Invalid propagation method
|
162
|
+
with pytest.raises(ValueError, match="Invalid propagation method"):
|
163
|
+
net_propagate_attributes(graph, ["attr1"], propagation_method="invalid_method")
|
84
164
|
|
85
|
-
|
86
|
-
|
87
|
-
|
165
|
+
# Test 5: Additional arguments (test damping parameter)
|
166
|
+
result_default = net_propagate_attributes(graph, ["attr1"])
|
167
|
+
result_damped = net_propagate_attributes(
|
168
|
+
graph, ["attr1"], additional_propagation_args={"damping": 0.5} # Lower damping
|
169
|
+
)
|
170
|
+
|
171
|
+
# Results should be different with different damping
|
172
|
+
assert not np.allclose(result_default.values, result_damped.values)
|
173
|
+
|
174
|
+
# Test 6: Invalid attribute (should be caught by internal validation)
|
175
|
+
graph.vs["bad_attr"] = [-1.0, 1.0, 2.0, 0.0] # Has negative values
|
176
|
+
with pytest.raises(ValueError, match="contains negative values"):
|
177
|
+
net_propagate_attributes(graph, ["bad_attr"])
|
178
|
+
|
179
|
+
# Test 7: Zero attribute (should be caught by internal validation)
|
180
|
+
graph.vs["zero_attr"] = [0.0, 0.0, 0.0, 0.0]
|
88
181
|
with pytest.raises(ValueError, match="zero for all vertices"):
|
89
|
-
|
182
|
+
net_propagate_attributes(graph, ["zero_attr"])
|
183
|
+
|
184
|
+
|
185
|
+
def test_all_null_generators_structure():
|
186
|
+
"""Test all null generators with default options and validate output structure."""
|
187
|
+
# Create test graph with edges for realistic propagation
|
188
|
+
graph = ig.Graph(5)
|
189
|
+
graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["A", "B", "C", "D", "E"]
|
190
|
+
graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0, 1.5] # Non-negative, not all zero
|
191
|
+
graph.vs["attr2"] = [0.5, 1.0, 0.0, 2.0, 0.0] # Non-negative, not all zero
|
192
|
+
graph.add_edges([(0, 1), (1, 2), (2, 3), (3, 4)])
|
193
|
+
|
194
|
+
attributes = ["attr1", "attr2"]
|
195
|
+
n_samples = 3 # Small for testing
|
196
|
+
|
197
|
+
for generator_name, generator_func in NULL_GENERATORS.items():
|
198
|
+
print(f"Testing {generator_name}")
|
199
|
+
|
200
|
+
if generator_name == NULL_STRATEGIES.UNIFORM:
|
201
|
+
# Uniform null doesn't take n_samples
|
202
|
+
result = generator_func(graph, attributes)
|
203
|
+
expected_rows = 5 # One row per node
|
204
|
+
elif generator_name == NULL_STRATEGIES.EDGE_PERMUTATION:
|
205
|
+
# Edge permutation has different parameters
|
206
|
+
result = generator_func(graph, attributes, n_samples=n_samples)
|
207
|
+
expected_rows = n_samples * 5 # n_samples rows per node
|
208
|
+
else:
|
209
|
+
# Gaussian and node_permutation
|
210
|
+
result = generator_func(graph, attributes, n_samples=n_samples)
|
211
|
+
expected_rows = n_samples * 5 # n_samples rows per node
|
212
|
+
|
213
|
+
# Validate structure
|
214
|
+
assert isinstance(
|
215
|
+
result, pd.DataFrame
|
216
|
+
), f"{generator_name} should return DataFrame"
|
217
|
+
assert result.shape == (
|
218
|
+
expected_rows,
|
219
|
+
2,
|
220
|
+
), f"{generator_name} wrong shape: {result.shape}"
|
221
|
+
assert list(result.columns) == attributes, f"{generator_name} wrong columns"
|
222
|
+
|
223
|
+
# Validate index structure
|
224
|
+
if generator_name == NULL_STRATEGIES.UNIFORM:
|
225
|
+
assert list(result.index) == [
|
226
|
+
"A",
|
227
|
+
"B",
|
228
|
+
"C",
|
229
|
+
"D",
|
230
|
+
"E",
|
231
|
+
], f"{generator_name} wrong index"
|
232
|
+
else:
|
233
|
+
expected_index = ["A", "B", "C", "D", "E"] * n_samples
|
234
|
+
assert (
|
235
|
+
list(result.index) == expected_index
|
236
|
+
), f"{generator_name} wrong repeated index"
|
237
|
+
|
238
|
+
# Validate values are numeric and finite (propagated outputs should be valid probabilities)
|
239
|
+
assert result.isna().sum().sum() == 0, f"{generator_name} contains NaN values"
|
240
|
+
assert np.isfinite(
|
241
|
+
result.values
|
242
|
+
).all(), f"{generator_name} contains infinite values"
|
243
|
+
assert (result.values >= 0).all(), f"{generator_name} contains negative values"
|
244
|
+
assert (
|
245
|
+
result.values <= 1
|
246
|
+
).all(), f"{generator_name} should contain probabilities <= 1"
|
247
|
+
|
248
|
+
# Each sample should sum to approximately 1 (PPR property)
|
249
|
+
if generator_name == NULL_STRATEGIES.UNIFORM:
|
250
|
+
assert np.allclose(
|
251
|
+
result.sum(axis=0), [1.0, 1.0], atol=1e-10
|
252
|
+
), f"{generator_name} doesn't sum to 1"
|
253
|
+
else:
|
254
|
+
# For multiple samples, each individual sample should sum to 1
|
255
|
+
for i in range(n_samples):
|
256
|
+
start_idx = i * 5
|
257
|
+
end_idx = (i + 1) * 5
|
258
|
+
sample_data = result.iloc[start_idx:end_idx]
|
259
|
+
assert np.allclose(
|
260
|
+
sample_data.sum(axis=0), [1.0, 1.0], atol=1e-10
|
261
|
+
), f"{generator_name} sample {i} doesn't sum to 1"
|
262
|
+
|
263
|
+
|
264
|
+
def test_mask_application():
|
265
|
+
"""Test that masks are correctly applied across all null generators."""
|
266
|
+
# Create test graph
|
267
|
+
graph = ig.Graph(6)
|
268
|
+
graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["A", "B", "C", "D", "E", "F"]
|
269
|
+
graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0, 1.5, 0.0] # Nonzero at indices 0, 2, 4
|
270
|
+
graph.vs["attr2"] = [0.0, 1.0, 0.0, 2.0, 0.0, 1.0] # Nonzero at indices 1, 3, 5
|
271
|
+
graph.add_edges([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)])
|
272
|
+
|
273
|
+
attributes = ["attr1", "attr2"]
|
274
|
+
|
275
|
+
# Test mask that includes nodes with nonzero values for both attributes
|
276
|
+
# Use nodes 0, 1, 2, 3 which covers nonzero values for both attributes
|
277
|
+
mask_array = np.array([True, True, True, True, False, False]) # Nodes 0, 1, 2, 3
|
278
|
+
|
279
|
+
for generator_name, generator_func in NULL_GENERATORS.items():
|
280
|
+
print(f"Testing mask application for {generator_name}")
|
281
|
+
|
282
|
+
if generator_name == NULL_STRATEGIES.UNIFORM:
|
283
|
+
result = generator_func(graph, attributes, mask=mask_array)
|
284
|
+
|
285
|
+
# For uniform null with mask, verify structure is correct
|
286
|
+
assert result.shape == (6, 2), f"{generator_name} wrong shape with mask"
|
287
|
+
# After propagation, all nodes will have some value due to network effect
|
288
|
+
assert (
|
289
|
+
result.values > 0
|
290
|
+
).all(), "All nodes should have positive values after propagation"
|
291
|
+
|
292
|
+
elif generator_name == NULL_STRATEGIES.EDGE_PERMUTATION:
|
293
|
+
# Edge permutation ignores mask, just test it doesn't crash
|
294
|
+
result = generator_func(graph, attributes, n_samples=2)
|
295
|
+
assert result.shape[0] == 12 # 2 samples * 6 nodes
|
296
|
+
|
297
|
+
else:
|
298
|
+
# Gaussian and node_permutation with mask
|
299
|
+
result = generator_func(graph, attributes, mask=mask_array, n_samples=2)
|
300
|
+
|
301
|
+
# Check that structure is maintained
|
302
|
+
assert result.shape == (12, 2) # 2 samples * 6 nodes
|
303
|
+
|
304
|
+
|
305
|
+
def test_edge_cases_and_errors():
|
306
|
+
"""Test edge cases and error conditions for null generators."""
|
307
|
+
# Create minimal test graph
|
308
|
+
graph = ig.Graph(3)
|
309
|
+
graph.vs["attr1"] = [1.0, 2.0, 0.0]
|
310
|
+
graph.vs["bad_attr"] = [0.0, 0.0, 0.0] # All zeros
|
311
|
+
graph.add_edges([(0, 1), (1, 2)])
|
312
|
+
|
313
|
+
# Test 1: All zero attribute should raise error for all generators
|
314
|
+
with pytest.raises(ValueError):
|
315
|
+
uniform_null(graph, ["bad_attr"])
|
316
|
+
|
317
|
+
with pytest.raises(ValueError):
|
318
|
+
parametric_null(graph, ["bad_attr"])
|
319
|
+
|
320
|
+
with pytest.raises(ValueError):
|
321
|
+
node_permutation_null(graph, ["bad_attr"])
|
322
|
+
|
323
|
+
with pytest.raises(ValueError):
|
324
|
+
edge_permutation_null(graph, ["bad_attr"])
|
325
|
+
|
326
|
+
# Test 2: Empty mask should raise error
|
327
|
+
empty_mask = np.array([False, False, False])
|
328
|
+
with pytest.raises(ValueError, match="No nodes in mask"):
|
329
|
+
uniform_null(graph, ["attr1"], mask=empty_mask)
|
330
|
+
|
331
|
+
# Test 3: Single node mask (edge case)
|
332
|
+
single_mask = np.array([True, False, False])
|
333
|
+
result = uniform_null(graph, ["attr1"], mask=single_mask)
|
334
|
+
assert result.shape == (3, 1) # Should work
|
335
|
+
|
336
|
+
# Test 4: Replace parameter in node permutation
|
337
|
+
result_no_replace = node_permutation_null(
|
338
|
+
graph, ["attr1"], replace=False, n_samples=2
|
339
|
+
)
|
340
|
+
result_replace = node_permutation_null(graph, ["attr1"], replace=True, n_samples=2)
|
341
|
+
|
342
|
+
# Both should have same structure
|
343
|
+
assert result_no_replace.shape == result_replace.shape
|
344
|
+
|
345
|
+
|
346
|
+
def test_propagation_method_parameters():
|
347
|
+
"""Test that propagation method and additional arguments are properly passed through."""
|
348
|
+
# Create test graph
|
349
|
+
graph = ig.Graph(4)
|
350
|
+
graph.vs["attr1"] = [1.0, 2.0, 0.0, 1.5]
|
351
|
+
graph.add_edges([(0, 1), (1, 2), (2, 3)])
|
352
|
+
|
353
|
+
# Test different damping parameters produce different results
|
354
|
+
result_default = uniform_null(graph, ["attr1"])
|
355
|
+
result_damped = uniform_null(
|
356
|
+
graph, ["attr1"], additional_propagation_args={"damping": 0.5}
|
357
|
+
)
|
358
|
+
|
359
|
+
# Results should be different with different damping
|
360
|
+
assert not np.allclose(
|
361
|
+
result_default.values, result_damped.values
|
362
|
+
), "Different damping should produce different results"
|
363
|
+
|
364
|
+
# Test that all generators accept method parameters
|
365
|
+
for generator_name, generator_func in NULL_GENERATORS.items():
|
366
|
+
if generator_name == NULL_STRATEGIES.UNIFORM:
|
367
|
+
result = generator_func(
|
368
|
+
graph, ["attr1"], additional_propagation_args={"damping": 0.8}
|
369
|
+
)
|
370
|
+
else:
|
371
|
+
result = generator_func(
|
372
|
+
graph,
|
373
|
+
["attr1"],
|
374
|
+
additional_propagation_args={"damping": 0.8},
|
375
|
+
n_samples=2,
|
376
|
+
)
|
377
|
+
|
378
|
+
# Should produce valid results
|
379
|
+
assert isinstance(result, pd.DataFrame)
|
380
|
+
assert not result.empty
|
tests/test_network_precompute.py
CHANGED
@@ -276,3 +276,33 @@ def test_precomputed_distances_serialization():
|
|
276
276
|
# Clean up the temporary file
|
277
277
|
if os.path.exists(temp_path):
|
278
278
|
os.remove(temp_path)
|
279
|
+
|
280
|
+
|
281
|
+
def test_filter_precomputed_distances_top_n_subset():
|
282
|
+
# Use a small top_n for a quick test
|
283
|
+
top_n = 5
|
284
|
+
filtered = precompute.filter_precomputed_distances_top_n(
|
285
|
+
precomputed_distances, top_n=top_n
|
286
|
+
)
|
287
|
+
# Check that the filtered DataFrame is a subset of the original
|
288
|
+
merged = filtered.merge(
|
289
|
+
precomputed_distances,
|
290
|
+
on=[
|
291
|
+
precompute.NAPISTU_EDGELIST.SC_ID_ORIGIN,
|
292
|
+
precompute.NAPISTU_EDGELIST.SC_ID_DEST,
|
293
|
+
],
|
294
|
+
how="left",
|
295
|
+
indicator=True,
|
296
|
+
)
|
297
|
+
assert (
|
298
|
+
merged["_merge"] == "both"
|
299
|
+
).all(), "Filtered rows must be present in the original DataFrame"
|
300
|
+
# Check that columns are preserved
|
301
|
+
assert set(
|
302
|
+
[
|
303
|
+
precompute.NAPISTU_EDGELIST.SC_ID_ORIGIN,
|
304
|
+
precompute.NAPISTU_EDGELIST.SC_ID_DEST,
|
305
|
+
]
|
306
|
+
).issubset(filtered.columns)
|
307
|
+
# Optionally, check that the number of rows is less than or equal to the input
|
308
|
+
assert filtered.shape[0] <= precomputed_distances.shape[0]
|
tests/test_sbml_dfs_utils.py
CHANGED
@@ -334,3 +334,16 @@ def test_infer_entity_type_errors():
|
|
334
334
|
) # Two primary keys
|
335
335
|
with pytest.raises(ValueError):
|
336
336
|
sbml_dfs_utils.infer_entity_type(df)
|
337
|
+
|
338
|
+
|
339
|
+
def test_infer_entity_type_multindex_reactions():
|
340
|
+
# DataFrame with MultiIndex (r_id, foo), should infer as reactions
|
341
|
+
import pandas as pd
|
342
|
+
from napistu.constants import SBML_DFS
|
343
|
+
|
344
|
+
df = pd.DataFrame({"some_col": [1, 2]})
|
345
|
+
df.index = pd.MultiIndex.from_tuples(
|
346
|
+
[("rxn1", "a"), ("rxn2", "b")], names=[SBML_DFS.R_ID, "foo"]
|
347
|
+
)
|
348
|
+
result = sbml_dfs_utils.infer_entity_type(df)
|
349
|
+
assert result == SBML_DFS.REACTIONS
|
tests/test_source.py
CHANGED
@@ -5,6 +5,8 @@ import os
|
|
5
5
|
import pandas as pd
|
6
6
|
from napistu import indices
|
7
7
|
from napistu import source
|
8
|
+
from napistu.network import ng_utils
|
9
|
+
from napistu.constants import SBML_DFS
|
8
10
|
|
9
11
|
test_path = os.path.abspath(os.path.join(__file__, os.pardir))
|
10
12
|
test_data = os.path.join(test_path, "test_data")
|
@@ -58,10 +60,40 @@ def test_source_w_pwindex():
|
|
58
60
|
assert source_obj.source.shape == (2, 8)
|
59
61
|
|
60
62
|
|
61
|
-
|
62
|
-
|
63
|
-
|
63
|
+
def test_get_minimal_source_edges(sbml_dfs_metabolism):
|
64
|
+
vertices = sbml_dfs_metabolism.reactions.reset_index().rename(
|
65
|
+
columns={SBML_DFS.R_ID: "node"}
|
66
|
+
)
|
67
|
+
|
68
|
+
minimal_source_edges = ng_utils.get_minimal_sources_edges(
|
69
|
+
vertices, sbml_dfs_metabolism
|
70
|
+
)
|
71
|
+
# print(minimal_source_edges.shape)
|
72
|
+
assert minimal_source_edges.shape == (87, 3)
|
73
|
+
|
74
|
+
|
75
|
+
def test_source_set_coverage(sbml_dfs_metabolism):
|
76
|
+
|
77
|
+
source_df = source.unnest_sources(sbml_dfs_metabolism.reactions)
|
78
|
+
|
79
|
+
# print(source_df.shape)
|
80
|
+
assert source_df.shape == (111, 7)
|
81
|
+
|
82
|
+
set_coverage = source.source_set_coverage(source_df)
|
83
|
+
# print(set_coverage.shape)
|
84
|
+
assert set_coverage.shape == (87, 6)
|
85
|
+
|
86
|
+
|
87
|
+
def test_source_set_coverage_enrichment(sbml_dfs_metabolism):
|
88
|
+
|
89
|
+
source_total_counts = source.get_source_total_counts(
|
90
|
+
sbml_dfs_metabolism, "reactions"
|
91
|
+
)
|
92
|
+
|
93
|
+
source_df = source.unnest_sources(sbml_dfs_metabolism.reactions).head(40)
|
94
|
+
|
95
|
+
set_coverage = source.source_set_coverage(
|
96
|
+
source_df, source_total_counts=source_total_counts, sbml_dfs=sbml_dfs_metabolism
|
97
|
+
)
|
64
98
|
|
65
|
-
|
66
|
-
test_source()
|
67
|
-
test_source_w_pwindex()
|
99
|
+
assert set_coverage.shape == (30, 6)
|
@@ -0,0 +1,62 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from scipy.stats import fisher_exact
|
3
|
+
|
4
|
+
from napistu.statistics import hypothesis_testing
|
5
|
+
|
6
|
+
|
7
|
+
def test_fisher_exact_vectorized_basic_and_vectorized():
|
8
|
+
|
9
|
+
# Classic Fisher's test example: [[1, 9], [11, 3]]
|
10
|
+
# a=1, b=9, c=11, d=3
|
11
|
+
odds, p = hypothesis_testing.fisher_exact_vectorized([1], [9], [11], [3])
|
12
|
+
# Odds ratio: (1*3)/(9*11) = 3/99 = 0.0303...
|
13
|
+
assert np.allclose(odds, [3 / 99])
|
14
|
+
assert p.shape == (1,)
|
15
|
+
assert (p >= 0).all() and (p <= 1).all()
|
16
|
+
|
17
|
+
# Vectorized: two tables
|
18
|
+
odds, p = hypothesis_testing.fisher_exact_vectorized(
|
19
|
+
[1, 2], [9, 8], [11, 10], [3, 4]
|
20
|
+
)
|
21
|
+
assert odds.shape == (2,)
|
22
|
+
assert p.shape == (2,)
|
23
|
+
# Check that odds ratios are correct
|
24
|
+
expected_odds = np.array([(1 * 3) / (9 * 11), (2 * 4) / (8 * 10)])
|
25
|
+
assert np.allclose(odds, expected_odds)
|
26
|
+
# P-values should be between 0 and 1
|
27
|
+
assert (p >= 0).all() and (p <= 1).all()
|
28
|
+
|
29
|
+
|
30
|
+
def test_fisher_exact_vectorized_vs_scipy():
|
31
|
+
|
32
|
+
# Define several 2x2 tables
|
33
|
+
tables = [
|
34
|
+
([1], [9], [11], [3]),
|
35
|
+
([5], [2], [8], [7]),
|
36
|
+
([10], [10], [10], [10]),
|
37
|
+
([0], [5], [5], [10]),
|
38
|
+
([3], [7], [2], [8]),
|
39
|
+
]
|
40
|
+
for a, b, c, d in tables:
|
41
|
+
odds_vec, p_vec = hypothesis_testing.fisher_exact_vectorized(a, b, c, d)
|
42
|
+
# Build the table for scipy
|
43
|
+
table = np.array([[a[0], b[0]], [c[0], d[0]]])
|
44
|
+
odds_scipy, p_scipy = fisher_exact(table, alternative="greater")
|
45
|
+
# Odds ratios should be nearly identical
|
46
|
+
assert np.allclose(odds_vec, [odds_scipy], rtol=1e-6, atol=1e-8)
|
47
|
+
# P-values should be close (normal approx vs exact)
|
48
|
+
assert np.allclose(
|
49
|
+
p_vec, [p_scipy], rtol=0.15, atol=1e-3
|
50
|
+
) # allow some tolerance
|
51
|
+
|
52
|
+
# Also test vectorized input
|
53
|
+
a = [1, 5, 10, 0, 3]
|
54
|
+
b = [9, 2, 10, 5, 7]
|
55
|
+
c = [11, 8, 10, 5, 2]
|
56
|
+
d = [3, 7, 10, 10, 8]
|
57
|
+
odds_vec, p_vec = hypothesis_testing.fisher_exact_vectorized(a, b, c, d)
|
58
|
+
for i in range(len(a)):
|
59
|
+
table = np.array([[a[i], b[i]], [c[i], d[i]]])
|
60
|
+
odds_scipy, p_scipy = fisher_exact(table, alternative="greater")
|
61
|
+
assert np.allclose(odds_vec[i], odds_scipy, rtol=1e-6, atol=1e-8)
|
62
|
+
assert np.allclose(p_vec[i], p_scipy, rtol=0.15, atol=1e-3)
|