napistu 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/gcs/constants.py +5 -5
- napistu/network/constants.py +23 -1
- napistu/network/ig_utils.py +161 -1
- napistu/network/net_create.py +3 -3
- napistu/network/net_propagation.py +646 -96
- napistu/statistics/__init__.py +10 -0
- napistu/statistics/quantiles.py +82 -0
- {napistu-0.4.1.dist-info → napistu-0.4.2.dist-info}/METADATA +1 -1
- {napistu-0.4.1.dist-info → napistu-0.4.2.dist-info}/RECORD +16 -13
- tests/test_network_ig_utils.py +133 -0
- tests/test_network_net_propagation.py +365 -74
- tests/test_statistics_quantiles.py +133 -0
- {napistu-0.4.1.dist-info → napistu-0.4.2.dist-info}/WHEEL +0 -0
- {napistu-0.4.1.dist-info → napistu-0.4.2.dist-info}/entry_points.txt +0 -0
- {napistu-0.4.1.dist-info → napistu-0.4.2.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.4.1.dist-info → napistu-0.4.2.dist-info}/top_level.txt +0 -0
@@ -1,89 +1,380 @@
|
|
1
1
|
import pytest
|
2
|
-
import igraph as ig
|
3
2
|
import numpy as np
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
g = ig.Graph.Full(3)
|
35
|
-
g.vs["score"] = [1, None, 2]
|
36
|
-
# None should be treated as 0
|
37
|
-
df = personalized_pagerank_by_attribute(g, "score")
|
38
|
-
assert np.isclose(df["score"].sum(), 3)
|
39
|
-
# Negative values should raise
|
40
|
-
g.vs["score"] = [1, -1, 2]
|
41
|
-
with pytest.raises(ValueError):
|
42
|
-
personalized_pagerank_by_attribute(g, "score")
|
43
|
-
|
44
|
-
|
45
|
-
def test_personalized_pagerank_by_attribute_additional_args_directed():
|
46
|
-
# create an asymmetric directed graph to test whether additional_propagation_args is respected
|
47
|
-
g = ig.Graph(directed=True)
|
48
|
-
g.add_vertices(3)
|
49
|
-
g.add_edges([(0, 1), (1, 2)])
|
50
|
-
g.vs["score"] = [1, 0, 2]
|
51
|
-
# Run with directed=False, which should treat the graph as undirected
|
52
|
-
df_directed = personalized_pagerank_by_attribute(
|
53
|
-
g, "score", additional_propagation_args={"directed": True}
|
3
|
+
import pandas as pd
|
4
|
+
import igraph as ig
|
5
|
+
from napistu.network.net_propagation import (
|
6
|
+
net_propagate_attributes,
|
7
|
+
uniform_null,
|
8
|
+
parametric_null,
|
9
|
+
node_permutation_null,
|
10
|
+
edge_permutation_null,
|
11
|
+
NULL_GENERATORS,
|
12
|
+
network_propagation_with_null,
|
13
|
+
)
|
14
|
+
from napistu.network.constants import (
|
15
|
+
NAPISTU_GRAPH_VERTICES,
|
16
|
+
NULL_STRATEGIES,
|
17
|
+
)
|
18
|
+
|
19
|
+
|
20
|
+
def test_network_propagation_with_null():
|
21
|
+
"""Test the main orchestrator function with different null strategies."""
|
22
|
+
# Create test graph
|
23
|
+
graph = ig.Graph(5)
|
24
|
+
graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["A", "B", "C", "D", "E"]
|
25
|
+
graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0, 1.5] # Non-negative, not all zero
|
26
|
+
graph.add_edges([(0, 1), (1, 2), (2, 3), (3, 4)])
|
27
|
+
|
28
|
+
attributes = ["attr1"]
|
29
|
+
|
30
|
+
# Test 1: Uniform null (should return ratios)
|
31
|
+
result_uniform = network_propagation_with_null(
|
32
|
+
graph, attributes, null_strategy=NULL_STRATEGIES.UNIFORM
|
54
33
|
)
|
55
|
-
|
56
|
-
|
34
|
+
|
35
|
+
# Check structure
|
36
|
+
assert isinstance(result_uniform, pd.DataFrame)
|
37
|
+
assert result_uniform.shape == (5, 1)
|
38
|
+
assert list(result_uniform.columns) == attributes
|
39
|
+
assert list(result_uniform.index) == ["A", "B", "C", "D", "E"]
|
40
|
+
|
41
|
+
# Should be ratios (can be > 1)
|
42
|
+
assert (result_uniform.values > 0).all(), "Ratios should be positive"
|
43
|
+
# Some ratios should be > 1 since observed scores concentrate on fewer nodes
|
44
|
+
assert (result_uniform.values > 1).any(), "Some ratios should be > 1"
|
45
|
+
|
46
|
+
# Test 2: Node permutation null (should return quantiles)
|
47
|
+
result_permutation = network_propagation_with_null(
|
48
|
+
graph,
|
49
|
+
attributes,
|
50
|
+
null_strategy=NULL_STRATEGIES.NODE_PERMUTATION,
|
51
|
+
n_samples=10, # Small for testing
|
57
52
|
)
|
58
|
-
|
59
|
-
|
60
|
-
|
53
|
+
|
54
|
+
# Check structure
|
55
|
+
assert isinstance(result_permutation, pd.DataFrame)
|
56
|
+
assert result_permutation.shape == (5, 1)
|
57
|
+
assert list(result_permutation.columns) == attributes
|
58
|
+
|
59
|
+
# Should be quantiles (0 to 1)
|
60
|
+
assert (result_permutation.values >= 0).all(), "Quantiles should be >= 0"
|
61
|
+
assert (result_permutation.values <= 1).all(), "Quantiles should be <= 1"
|
62
|
+
|
63
|
+
# Test 3: Edge permutation null
|
64
|
+
result_edge = network_propagation_with_null(
|
65
|
+
graph,
|
66
|
+
attributes,
|
67
|
+
null_strategy=NULL_STRATEGIES.EDGE_PERMUTATION,
|
68
|
+
n_samples=5,
|
69
|
+
burn_in_ratio=2, # Small for testing
|
70
|
+
sampling_ratio=0.2,
|
71
|
+
)
|
72
|
+
|
73
|
+
# Check structure
|
74
|
+
assert isinstance(result_edge, pd.DataFrame)
|
75
|
+
assert result_edge.shape == (5, 1)
|
76
|
+
assert (result_edge.values >= 0).all()
|
77
|
+
assert (result_edge.values <= 1).all()
|
78
|
+
|
79
|
+
# Test 4: Gaussian null
|
80
|
+
result_parametric = network_propagation_with_null(
|
81
|
+
graph, attributes, null_strategy=NULL_STRATEGIES.PARAMETRIC, n_samples=8
|
82
|
+
)
|
83
|
+
|
84
|
+
# Check structure
|
85
|
+
assert isinstance(result_parametric, pd.DataFrame)
|
86
|
+
assert result_parametric.shape == (5, 1)
|
87
|
+
assert (result_parametric.values >= 0).all()
|
88
|
+
assert (result_parametric.values <= 1).all()
|
89
|
+
|
90
|
+
# Test 5: Custom propagation parameters
|
91
|
+
result_custom = network_propagation_with_null(
|
92
|
+
graph,
|
93
|
+
attributes,
|
94
|
+
null_strategy=NULL_STRATEGIES.UNIFORM,
|
95
|
+
additional_propagation_args={"damping": 0.7},
|
61
96
|
)
|
62
|
-
|
97
|
+
|
98
|
+
# Should be different from default
|
63
99
|
assert not np.allclose(
|
64
|
-
|
100
|
+
result_uniform.values, result_custom.values
|
101
|
+
), "Different propagation parameters should give different results"
|
102
|
+
|
103
|
+
# Test 6: Custom null parameters (mask)
|
104
|
+
mask_array = np.array([True, False, True, False, True])
|
105
|
+
result_masked = network_propagation_with_null(
|
106
|
+
graph,
|
107
|
+
attributes,
|
108
|
+
null_strategy=NULL_STRATEGIES.NODE_PERMUTATION,
|
109
|
+
n_samples=5,
|
110
|
+
mask=mask_array,
|
65
111
|
)
|
66
112
|
|
113
|
+
# Should work without error
|
114
|
+
assert isinstance(result_masked, pd.DataFrame)
|
115
|
+
assert result_masked.shape == (5, 1)
|
67
116
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
with pytest.raises(ValueError):
|
73
|
-
personalized_pagerank_by_attribute(
|
74
|
-
g, "score", additional_propagation_args={"not_a_real_arg": 123}
|
117
|
+
# Test 7: Error handling - invalid null strategy
|
118
|
+
with pytest.raises(ValueError, match="Unknown null strategy"):
|
119
|
+
network_propagation_with_null(
|
120
|
+
graph, attributes, null_strategy="invalid_strategy"
|
75
121
|
)
|
76
122
|
|
77
123
|
|
78
|
-
def
|
79
|
-
|
80
|
-
#
|
81
|
-
|
82
|
-
|
124
|
+
def test_net_propagate_attributes():
|
125
|
+
"""Test net_propagate_attributes with multiple attributes and various scenarios."""
|
126
|
+
# Create test graph with edges for realistic propagation
|
127
|
+
graph = ig.Graph(4)
|
128
|
+
graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["node1", "node2", "node3", "node4"]
|
129
|
+
graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0] # Non-negative, not all zero
|
130
|
+
graph.vs["attr2"] = [0.5, 1.5, 0.0, 1.0] # Non-negative, not all zero
|
131
|
+
graph.add_edges([(0, 1), (1, 2), (2, 3), (0, 3)]) # Create connected graph
|
132
|
+
|
133
|
+
# Test 1: Basic functionality with two attributes
|
134
|
+
result = net_propagate_attributes(graph, ["attr1", "attr2"])
|
135
|
+
|
136
|
+
# Check structure
|
137
|
+
assert isinstance(result, pd.DataFrame)
|
138
|
+
assert result.shape == (4, 2)
|
139
|
+
assert list(result.index) == ["node1", "node2", "node3", "node4"]
|
140
|
+
assert list(result.columns) == ["attr1", "attr2"]
|
141
|
+
|
142
|
+
# Check that values are valid probabilities (PPR returns probabilities)
|
143
|
+
assert np.all(result.values >= 0)
|
144
|
+
assert np.all(result.values <= 1)
|
145
|
+
# Each column should sum to approximately 1 (PPR property)
|
146
|
+
assert np.allclose(result.sum(axis=0), [1.0, 1.0], atol=1e-10)
|
147
|
+
|
148
|
+
# Test 2: Single attribute
|
149
|
+
result_single = net_propagate_attributes(graph, ["attr1"])
|
150
|
+
assert result_single.shape == (4, 1)
|
151
|
+
assert list(result_single.columns) == ["attr1"]
|
152
|
+
|
153
|
+
# Test 3: Graph without names (should use indices)
|
154
|
+
graph_no_names = ig.Graph(3)
|
155
|
+
graph_no_names.vs["attr1"] = [1.0, 2.0, 1.0]
|
156
|
+
graph_no_names.add_edges([(0, 1), (1, 2)])
|
157
|
+
|
158
|
+
result_no_names = net_propagate_attributes(graph_no_names, ["attr1"])
|
159
|
+
assert list(result_no_names.index) == [0, 1, 2] # Should use integer indices
|
83
160
|
|
161
|
+
# Test 4: Invalid propagation method
|
162
|
+
with pytest.raises(ValueError, match="Invalid propagation method"):
|
163
|
+
net_propagate_attributes(graph, ["attr1"], propagation_method="invalid_method")
|
84
164
|
|
85
|
-
|
86
|
-
|
87
|
-
|
165
|
+
# Test 5: Additional arguments (test damping parameter)
|
166
|
+
result_default = net_propagate_attributes(graph, ["attr1"])
|
167
|
+
result_damped = net_propagate_attributes(
|
168
|
+
graph, ["attr1"], additional_propagation_args={"damping": 0.5} # Lower damping
|
169
|
+
)
|
170
|
+
|
171
|
+
# Results should be different with different damping
|
172
|
+
assert not np.allclose(result_default.values, result_damped.values)
|
173
|
+
|
174
|
+
# Test 6: Invalid attribute (should be caught by internal validation)
|
175
|
+
graph.vs["bad_attr"] = [-1.0, 1.0, 2.0, 0.0] # Has negative values
|
176
|
+
with pytest.raises(ValueError, match="contains negative values"):
|
177
|
+
net_propagate_attributes(graph, ["bad_attr"])
|
178
|
+
|
179
|
+
# Test 7: Zero attribute (should be caught by internal validation)
|
180
|
+
graph.vs["zero_attr"] = [0.0, 0.0, 0.0, 0.0]
|
88
181
|
with pytest.raises(ValueError, match="zero for all vertices"):
|
89
|
-
|
182
|
+
net_propagate_attributes(graph, ["zero_attr"])
|
183
|
+
|
184
|
+
|
185
|
+
def test_all_null_generators_structure():
|
186
|
+
"""Test all null generators with default options and validate output structure."""
|
187
|
+
# Create test graph with edges for realistic propagation
|
188
|
+
graph = ig.Graph(5)
|
189
|
+
graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["A", "B", "C", "D", "E"]
|
190
|
+
graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0, 1.5] # Non-negative, not all zero
|
191
|
+
graph.vs["attr2"] = [0.5, 1.0, 0.0, 2.0, 0.0] # Non-negative, not all zero
|
192
|
+
graph.add_edges([(0, 1), (1, 2), (2, 3), (3, 4)])
|
193
|
+
|
194
|
+
attributes = ["attr1", "attr2"]
|
195
|
+
n_samples = 3 # Small for testing
|
196
|
+
|
197
|
+
for generator_name, generator_func in NULL_GENERATORS.items():
|
198
|
+
print(f"Testing {generator_name}")
|
199
|
+
|
200
|
+
if generator_name == NULL_STRATEGIES.UNIFORM:
|
201
|
+
# Uniform null doesn't take n_samples
|
202
|
+
result = generator_func(graph, attributes)
|
203
|
+
expected_rows = 5 # One row per node
|
204
|
+
elif generator_name == NULL_STRATEGIES.EDGE_PERMUTATION:
|
205
|
+
# Edge permutation has different parameters
|
206
|
+
result = generator_func(graph, attributes, n_samples=n_samples)
|
207
|
+
expected_rows = n_samples * 5 # n_samples rows per node
|
208
|
+
else:
|
209
|
+
# Gaussian and node_permutation
|
210
|
+
result = generator_func(graph, attributes, n_samples=n_samples)
|
211
|
+
expected_rows = n_samples * 5 # n_samples rows per node
|
212
|
+
|
213
|
+
# Validate structure
|
214
|
+
assert isinstance(
|
215
|
+
result, pd.DataFrame
|
216
|
+
), f"{generator_name} should return DataFrame"
|
217
|
+
assert result.shape == (
|
218
|
+
expected_rows,
|
219
|
+
2,
|
220
|
+
), f"{generator_name} wrong shape: {result.shape}"
|
221
|
+
assert list(result.columns) == attributes, f"{generator_name} wrong columns"
|
222
|
+
|
223
|
+
# Validate index structure
|
224
|
+
if generator_name == NULL_STRATEGIES.UNIFORM:
|
225
|
+
assert list(result.index) == [
|
226
|
+
"A",
|
227
|
+
"B",
|
228
|
+
"C",
|
229
|
+
"D",
|
230
|
+
"E",
|
231
|
+
], f"{generator_name} wrong index"
|
232
|
+
else:
|
233
|
+
expected_index = ["A", "B", "C", "D", "E"] * n_samples
|
234
|
+
assert (
|
235
|
+
list(result.index) == expected_index
|
236
|
+
), f"{generator_name} wrong repeated index"
|
237
|
+
|
238
|
+
# Validate values are numeric and finite (propagated outputs should be valid probabilities)
|
239
|
+
assert result.isna().sum().sum() == 0, f"{generator_name} contains NaN values"
|
240
|
+
assert np.isfinite(
|
241
|
+
result.values
|
242
|
+
).all(), f"{generator_name} contains infinite values"
|
243
|
+
assert (result.values >= 0).all(), f"{generator_name} contains negative values"
|
244
|
+
assert (
|
245
|
+
result.values <= 1
|
246
|
+
).all(), f"{generator_name} should contain probabilities <= 1"
|
247
|
+
|
248
|
+
# Each sample should sum to approximately 1 (PPR property)
|
249
|
+
if generator_name == NULL_STRATEGIES.UNIFORM:
|
250
|
+
assert np.allclose(
|
251
|
+
result.sum(axis=0), [1.0, 1.0], atol=1e-10
|
252
|
+
), f"{generator_name} doesn't sum to 1"
|
253
|
+
else:
|
254
|
+
# For multiple samples, each individual sample should sum to 1
|
255
|
+
for i in range(n_samples):
|
256
|
+
start_idx = i * 5
|
257
|
+
end_idx = (i + 1) * 5
|
258
|
+
sample_data = result.iloc[start_idx:end_idx]
|
259
|
+
assert np.allclose(
|
260
|
+
sample_data.sum(axis=0), [1.0, 1.0], atol=1e-10
|
261
|
+
), f"{generator_name} sample {i} doesn't sum to 1"
|
262
|
+
|
263
|
+
|
264
|
+
def test_mask_application():
|
265
|
+
"""Test that masks are correctly applied across all null generators."""
|
266
|
+
# Create test graph
|
267
|
+
graph = ig.Graph(6)
|
268
|
+
graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["A", "B", "C", "D", "E", "F"]
|
269
|
+
graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0, 1.5, 0.0] # Nonzero at indices 0, 2, 4
|
270
|
+
graph.vs["attr2"] = [0.0, 1.0, 0.0, 2.0, 0.0, 1.0] # Nonzero at indices 1, 3, 5
|
271
|
+
graph.add_edges([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)])
|
272
|
+
|
273
|
+
attributes = ["attr1", "attr2"]
|
274
|
+
|
275
|
+
# Test mask that includes nodes with nonzero values for both attributes
|
276
|
+
# Use nodes 0, 1, 2, 3 which covers nonzero values for both attributes
|
277
|
+
mask_array = np.array([True, True, True, True, False, False]) # Nodes 0, 1, 2, 3
|
278
|
+
|
279
|
+
for generator_name, generator_func in NULL_GENERATORS.items():
|
280
|
+
print(f"Testing mask application for {generator_name}")
|
281
|
+
|
282
|
+
if generator_name == NULL_STRATEGIES.UNIFORM:
|
283
|
+
result = generator_func(graph, attributes, mask=mask_array)
|
284
|
+
|
285
|
+
# For uniform null with mask, verify structure is correct
|
286
|
+
assert result.shape == (6, 2), f"{generator_name} wrong shape with mask"
|
287
|
+
# After propagation, all nodes will have some value due to network effect
|
288
|
+
assert (
|
289
|
+
result.values > 0
|
290
|
+
).all(), "All nodes should have positive values after propagation"
|
291
|
+
|
292
|
+
elif generator_name == NULL_STRATEGIES.EDGE_PERMUTATION:
|
293
|
+
# Edge permutation ignores mask, just test it doesn't crash
|
294
|
+
result = generator_func(graph, attributes, n_samples=2)
|
295
|
+
assert result.shape[0] == 12 # 2 samples * 6 nodes
|
296
|
+
|
297
|
+
else:
|
298
|
+
# Gaussian and node_permutation with mask
|
299
|
+
result = generator_func(graph, attributes, mask=mask_array, n_samples=2)
|
300
|
+
|
301
|
+
# Check that structure is maintained
|
302
|
+
assert result.shape == (12, 2) # 2 samples * 6 nodes
|
303
|
+
|
304
|
+
|
305
|
+
def test_edge_cases_and_errors():
|
306
|
+
"""Test edge cases and error conditions for null generators."""
|
307
|
+
# Create minimal test graph
|
308
|
+
graph = ig.Graph(3)
|
309
|
+
graph.vs["attr1"] = [1.0, 2.0, 0.0]
|
310
|
+
graph.vs["bad_attr"] = [0.0, 0.0, 0.0] # All zeros
|
311
|
+
graph.add_edges([(0, 1), (1, 2)])
|
312
|
+
|
313
|
+
# Test 1: All zero attribute should raise error for all generators
|
314
|
+
with pytest.raises(ValueError):
|
315
|
+
uniform_null(graph, ["bad_attr"])
|
316
|
+
|
317
|
+
with pytest.raises(ValueError):
|
318
|
+
parametric_null(graph, ["bad_attr"])
|
319
|
+
|
320
|
+
with pytest.raises(ValueError):
|
321
|
+
node_permutation_null(graph, ["bad_attr"])
|
322
|
+
|
323
|
+
with pytest.raises(ValueError):
|
324
|
+
edge_permutation_null(graph, ["bad_attr"])
|
325
|
+
|
326
|
+
# Test 2: Empty mask should raise error
|
327
|
+
empty_mask = np.array([False, False, False])
|
328
|
+
with pytest.raises(ValueError, match="No nodes in mask"):
|
329
|
+
uniform_null(graph, ["attr1"], mask=empty_mask)
|
330
|
+
|
331
|
+
# Test 3: Single node mask (edge case)
|
332
|
+
single_mask = np.array([True, False, False])
|
333
|
+
result = uniform_null(graph, ["attr1"], mask=single_mask)
|
334
|
+
assert result.shape == (3, 1) # Should work
|
335
|
+
|
336
|
+
# Test 4: Replace parameter in node permutation
|
337
|
+
result_no_replace = node_permutation_null(
|
338
|
+
graph, ["attr1"], replace=False, n_samples=2
|
339
|
+
)
|
340
|
+
result_replace = node_permutation_null(graph, ["attr1"], replace=True, n_samples=2)
|
341
|
+
|
342
|
+
# Both should have same structure
|
343
|
+
assert result_no_replace.shape == result_replace.shape
|
344
|
+
|
345
|
+
|
346
|
+
def test_propagation_method_parameters():
|
347
|
+
"""Test that propagation method and additional arguments are properly passed through."""
|
348
|
+
# Create test graph
|
349
|
+
graph = ig.Graph(4)
|
350
|
+
graph.vs["attr1"] = [1.0, 2.0, 0.0, 1.5]
|
351
|
+
graph.add_edges([(0, 1), (1, 2), (2, 3)])
|
352
|
+
|
353
|
+
# Test different damping parameters produce different results
|
354
|
+
result_default = uniform_null(graph, ["attr1"])
|
355
|
+
result_damped = uniform_null(
|
356
|
+
graph, ["attr1"], additional_propagation_args={"damping": 0.5}
|
357
|
+
)
|
358
|
+
|
359
|
+
# Results should be different with different damping
|
360
|
+
assert not np.allclose(
|
361
|
+
result_default.values, result_damped.values
|
362
|
+
), "Different damping should produce different results"
|
363
|
+
|
364
|
+
# Test that all generators accept method parameters
|
365
|
+
for generator_name, generator_func in NULL_GENERATORS.items():
|
366
|
+
if generator_name == NULL_STRATEGIES.UNIFORM:
|
367
|
+
result = generator_func(
|
368
|
+
graph, ["attr1"], additional_propagation_args={"damping": 0.8}
|
369
|
+
)
|
370
|
+
else:
|
371
|
+
result = generator_func(
|
372
|
+
graph,
|
373
|
+
["attr1"],
|
374
|
+
additional_propagation_args={"damping": 0.8},
|
375
|
+
n_samples=2,
|
376
|
+
)
|
377
|
+
|
378
|
+
# Should produce valid results
|
379
|
+
assert isinstance(result, pd.DataFrame)
|
380
|
+
assert not result.empty
|
@@ -0,0 +1,133 @@
|
|
1
|
+
import pytest
|
2
|
+
import numpy as np
|
3
|
+
import pandas as pd
|
4
|
+
from napistu.statistics import quantiles
|
5
|
+
|
6
|
+
|
7
|
+
def test_calculate_quantiles_valid_inputs():
|
8
|
+
"""Test calculate_quantiles with valid, well-formed inputs."""
|
9
|
+
# Create observed data: 4 features x 3 attributes
|
10
|
+
observed = pd.DataFrame(
|
11
|
+
[[0.8, 0.3, 0.9], [0.2, 0.7, 0.1], [0.5, 0.5, 0.5], [0.1, 0.9, 0.2]],
|
12
|
+
index=["gene1", "gene2", "gene3", "gene4"],
|
13
|
+
columns=["attr1", "attr2", "attr3"],
|
14
|
+
)
|
15
|
+
|
16
|
+
# Create null data: 2 samples per feature (8 rows total)
|
17
|
+
null_index = ["gene1", "gene2", "gene3", "gene4"] * 2
|
18
|
+
null_data = pd.DataFrame(
|
19
|
+
[
|
20
|
+
[0.1, 0.2, 0.3], # gene1 sample 1
|
21
|
+
[0.4, 0.5, 0.6], # gene2 sample 1
|
22
|
+
[0.7, 0.8, 0.9], # gene3 sample 1
|
23
|
+
[0.0, 0.1, 0.2], # gene4 sample 1
|
24
|
+
[0.2, 0.3, 0.4], # gene1 sample 2
|
25
|
+
[0.5, 0.6, 0.7], # gene2 sample 2
|
26
|
+
[0.8, 0.9, 1.0], # gene3 sample 2
|
27
|
+
[0.1, 0.2, 0.3], # gene4 sample 2
|
28
|
+
],
|
29
|
+
index=null_index,
|
30
|
+
columns=["attr1", "attr2", "attr3"],
|
31
|
+
)
|
32
|
+
|
33
|
+
# Calculate quantiles
|
34
|
+
result = quantiles.calculate_quantiles(observed, null_data)
|
35
|
+
|
36
|
+
# Verify output structure
|
37
|
+
assert result.shape == observed.shape
|
38
|
+
assert list(result.index) == list(observed.index)
|
39
|
+
assert list(result.columns) == list(observed.columns)
|
40
|
+
|
41
|
+
# Check specific quantile calculations
|
42
|
+
# gene1, attr1: observed=0.8, nulls=[0.1, 0.2] -> quantile = 1.0 (100%)
|
43
|
+
assert result.loc["gene1", "attr1"] == 1.0
|
44
|
+
|
45
|
+
# gene2, attr2: observed=0.7, nulls=[0.5, 0.6] -> quantile = 1.0 (100%)
|
46
|
+
assert result.loc["gene2", "attr2"] == 1.0
|
47
|
+
|
48
|
+
# gene3, attr3: observed=0.5, nulls=[0.9, 1.0] -> quantile = 0.0 (0%)
|
49
|
+
assert result.loc["gene3", "attr3"] == 0.0
|
50
|
+
|
51
|
+
# gene4, attr1: observed=0.1, nulls=[0.0, 0.1]
|
52
|
+
# With ≤: 0.0 ≤ 0.1 (True), 0.1 ≤ 0.1 (True) → 2/2 = 1.0
|
53
|
+
assert result.loc["gene4", "attr1"] == 1.0
|
54
|
+
|
55
|
+
|
56
|
+
def test_calculate_quantiles_error_cases():
|
57
|
+
"""Test calculate_quantiles with invalid inputs that should raise errors or warnings."""
|
58
|
+
# Base observed data
|
59
|
+
observed = pd.DataFrame(
|
60
|
+
[[0.8, 0.3], [0.2, 0.7]], index=["gene1", "gene2"], columns=["attr1", "attr2"]
|
61
|
+
)
|
62
|
+
|
63
|
+
# Test 1: Mismatched columns
|
64
|
+
null_wrong_cols = pd.DataFrame(
|
65
|
+
[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
|
66
|
+
index=["gene1", "gene2"],
|
67
|
+
columns=["attr1", "attr2", "attr3"], # Extra column
|
68
|
+
)
|
69
|
+
|
70
|
+
with pytest.raises((KeyError, ValueError)):
|
71
|
+
quantiles.calculate_quantiles(observed, null_wrong_cols)
|
72
|
+
|
73
|
+
# Test 2: Missing features in null data
|
74
|
+
null_missing_feature = pd.DataFrame(
|
75
|
+
[[0.1, 0.2]], index=["gene1"], columns=["attr1", "attr2"] # Missing gene2
|
76
|
+
)
|
77
|
+
|
78
|
+
# Current implementation doesn't validate - it will likely fail in groupby or indexing
|
79
|
+
# This test verifies current behavior (may change if validation added)
|
80
|
+
try:
|
81
|
+
result = quantiles.calculate_quantiles(observed, null_missing_feature)
|
82
|
+
# If it succeeds, gene2 quantiles will be invalid/error
|
83
|
+
assert True # Just check it doesn't crash for now
|
84
|
+
except (KeyError, ValueError, IndexError):
|
85
|
+
assert True # Expected behavior
|
86
|
+
|
87
|
+
# Test 3: Unequal null samples per feature
|
88
|
+
null_unequal_samples = pd.DataFrame(
|
89
|
+
[
|
90
|
+
[0.1, 0.2], # gene1 sample 1
|
91
|
+
[0.3, 0.4], # gene1 sample 2
|
92
|
+
[0.5, 0.6], # gene2 sample 1 (only 1 sample)
|
93
|
+
],
|
94
|
+
index=["gene1", "gene1", "gene2"],
|
95
|
+
columns=["attr1", "attr2"],
|
96
|
+
)
|
97
|
+
|
98
|
+
# This should still work but may give different results
|
99
|
+
result = quantiles.calculate_quantiles(observed, null_unequal_samples)
|
100
|
+
assert result.shape == observed.shape
|
101
|
+
|
102
|
+
# Test 4: Empty null data
|
103
|
+
null_empty = pd.DataFrame(columns=["attr1", "attr2"])
|
104
|
+
|
105
|
+
with pytest.raises((ValueError, IndexError)):
|
106
|
+
quantiles.calculate_quantiles(observed, null_empty)
|
107
|
+
|
108
|
+
# Test 5: Single null sample (edge case)
|
109
|
+
null_single = pd.DataFrame(
|
110
|
+
[[0.1, 0.2], [0.5, 0.6]], index=["gene1", "gene2"], columns=["attr1", "attr2"]
|
111
|
+
)
|
112
|
+
|
113
|
+
result = quantiles.calculate_quantiles(observed, null_single)
|
114
|
+
assert result.shape == observed.shape
|
115
|
+
# With single sample, results should be binary (0 or 1)
|
116
|
+
assert all(val in [0.0, 1.0] for val in result.values.flatten())
|
117
|
+
|
118
|
+
# Test 6: NaN values in data
|
119
|
+
observed_with_nan = observed.copy()
|
120
|
+
observed_with_nan.loc["gene1", "attr1"] = np.nan
|
121
|
+
|
122
|
+
null_with_nan = pd.DataFrame(
|
123
|
+
[[np.nan, 0.2], [0.4, 0.5], [0.1, 0.3], [0.6, 0.7]],
|
124
|
+
index=["gene1", "gene2", "gene1", "gene2"],
|
125
|
+
columns=["attr1", "attr2"],
|
126
|
+
)
|
127
|
+
|
128
|
+
# Should raise ValueError for NaN values
|
129
|
+
with pytest.raises(ValueError, match="NaN values found in observed data"):
|
130
|
+
quantiles.calculate_quantiles(observed_with_nan, null_single)
|
131
|
+
|
132
|
+
with pytest.raises(ValueError, match="NaN values found in null data"):
|
133
|
+
quantiles.calculate_quantiles(observed, null_with_nan)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|