napistu 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/constants.py +2 -0
- napistu/gcs/constants.py +15 -15
- napistu/network/constants.py +23 -1
- napistu/network/ig_utils.py +161 -1
- napistu/network/net_create.py +3 -3
- napistu/network/net_propagation.py +646 -96
- napistu/ontologies/id_tables.py +282 -0
- napistu/sbml_dfs_core.py +53 -63
- napistu/sbml_dfs_utils.py +82 -18
- napistu/statistics/__init__.py +10 -0
- napistu/statistics/quantiles.py +82 -0
- {napistu-0.4.0.dist-info → napistu-0.4.2.dist-info}/METADATA +6 -1
- {napistu-0.4.0.dist-info → napistu-0.4.2.dist-info}/RECORD +23 -18
- tests/test_network_ig_utils.py +133 -0
- tests/test_network_net_propagation.py +365 -74
- tests/test_ontologies_id_tables.py +198 -0
- tests/test_sbml_dfs_core.py +30 -19
- tests/test_sbml_dfs_utils.py +70 -0
- tests/test_statistics_quantiles.py +133 -0
- {napistu-0.4.0.dist-info → napistu-0.4.2.dist-info}/WHEEL +0 -0
- {napistu-0.4.0.dist-info → napistu-0.4.2.dist-info}/entry_points.txt +0 -0
- {napistu-0.4.0.dist-info → napistu-0.4.2.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.4.0.dist-info → napistu-0.4.2.dist-info}/top_level.txt +0 -0
@@ -1,89 +1,380 @@
|
|
1
1
|
import pytest
|
2
|
-
import igraph as ig
|
3
2
|
import numpy as np
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
g = ig.Graph.Full(3)
|
35
|
-
g.vs["score"] = [1, None, 2]
|
36
|
-
# None should be treated as 0
|
37
|
-
df = personalized_pagerank_by_attribute(g, "score")
|
38
|
-
assert np.isclose(df["score"].sum(), 3)
|
39
|
-
# Negative values should raise
|
40
|
-
g.vs["score"] = [1, -1, 2]
|
41
|
-
with pytest.raises(ValueError):
|
42
|
-
personalized_pagerank_by_attribute(g, "score")
|
43
|
-
|
44
|
-
|
45
|
-
def test_personalized_pagerank_by_attribute_additional_args_directed():
|
46
|
-
# create an asymmetric directed graph to test whether additional_propagation_args is respected
|
47
|
-
g = ig.Graph(directed=True)
|
48
|
-
g.add_vertices(3)
|
49
|
-
g.add_edges([(0, 1), (1, 2)])
|
50
|
-
g.vs["score"] = [1, 0, 2]
|
51
|
-
# Run with directed=False, which should treat the graph as undirected
|
52
|
-
df_directed = personalized_pagerank_by_attribute(
|
53
|
-
g, "score", additional_propagation_args={"directed": True}
|
3
|
+
import pandas as pd
|
4
|
+
import igraph as ig
|
5
|
+
from napistu.network.net_propagation import (
|
6
|
+
net_propagate_attributes,
|
7
|
+
uniform_null,
|
8
|
+
parametric_null,
|
9
|
+
node_permutation_null,
|
10
|
+
edge_permutation_null,
|
11
|
+
NULL_GENERATORS,
|
12
|
+
network_propagation_with_null,
|
13
|
+
)
|
14
|
+
from napistu.network.constants import (
|
15
|
+
NAPISTU_GRAPH_VERTICES,
|
16
|
+
NULL_STRATEGIES,
|
17
|
+
)
|
18
|
+
|
19
|
+
|
20
|
+
def test_network_propagation_with_null():
|
21
|
+
"""Test the main orchestrator function with different null strategies."""
|
22
|
+
# Create test graph
|
23
|
+
graph = ig.Graph(5)
|
24
|
+
graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["A", "B", "C", "D", "E"]
|
25
|
+
graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0, 1.5] # Non-negative, not all zero
|
26
|
+
graph.add_edges([(0, 1), (1, 2), (2, 3), (3, 4)])
|
27
|
+
|
28
|
+
attributes = ["attr1"]
|
29
|
+
|
30
|
+
# Test 1: Uniform null (should return ratios)
|
31
|
+
result_uniform = network_propagation_with_null(
|
32
|
+
graph, attributes, null_strategy=NULL_STRATEGIES.UNIFORM
|
54
33
|
)
|
55
|
-
|
56
|
-
|
34
|
+
|
35
|
+
# Check structure
|
36
|
+
assert isinstance(result_uniform, pd.DataFrame)
|
37
|
+
assert result_uniform.shape == (5, 1)
|
38
|
+
assert list(result_uniform.columns) == attributes
|
39
|
+
assert list(result_uniform.index) == ["A", "B", "C", "D", "E"]
|
40
|
+
|
41
|
+
# Should be ratios (can be > 1)
|
42
|
+
assert (result_uniform.values > 0).all(), "Ratios should be positive"
|
43
|
+
# Some ratios should be > 1 since observed scores concentrate on fewer nodes
|
44
|
+
assert (result_uniform.values > 1).any(), "Some ratios should be > 1"
|
45
|
+
|
46
|
+
# Test 2: Node permutation null (should return quantiles)
|
47
|
+
result_permutation = network_propagation_with_null(
|
48
|
+
graph,
|
49
|
+
attributes,
|
50
|
+
null_strategy=NULL_STRATEGIES.NODE_PERMUTATION,
|
51
|
+
n_samples=10, # Small for testing
|
57
52
|
)
|
58
|
-
|
59
|
-
|
60
|
-
|
53
|
+
|
54
|
+
# Check structure
|
55
|
+
assert isinstance(result_permutation, pd.DataFrame)
|
56
|
+
assert result_permutation.shape == (5, 1)
|
57
|
+
assert list(result_permutation.columns) == attributes
|
58
|
+
|
59
|
+
# Should be quantiles (0 to 1)
|
60
|
+
assert (result_permutation.values >= 0).all(), "Quantiles should be >= 0"
|
61
|
+
assert (result_permutation.values <= 1).all(), "Quantiles should be <= 1"
|
62
|
+
|
63
|
+
# Test 3: Edge permutation null
|
64
|
+
result_edge = network_propagation_with_null(
|
65
|
+
graph,
|
66
|
+
attributes,
|
67
|
+
null_strategy=NULL_STRATEGIES.EDGE_PERMUTATION,
|
68
|
+
n_samples=5,
|
69
|
+
burn_in_ratio=2, # Small for testing
|
70
|
+
sampling_ratio=0.2,
|
71
|
+
)
|
72
|
+
|
73
|
+
# Check structure
|
74
|
+
assert isinstance(result_edge, pd.DataFrame)
|
75
|
+
assert result_edge.shape == (5, 1)
|
76
|
+
assert (result_edge.values >= 0).all()
|
77
|
+
assert (result_edge.values <= 1).all()
|
78
|
+
|
79
|
+
# Test 4: Gaussian null
|
80
|
+
result_parametric = network_propagation_with_null(
|
81
|
+
graph, attributes, null_strategy=NULL_STRATEGIES.PARAMETRIC, n_samples=8
|
82
|
+
)
|
83
|
+
|
84
|
+
# Check structure
|
85
|
+
assert isinstance(result_parametric, pd.DataFrame)
|
86
|
+
assert result_parametric.shape == (5, 1)
|
87
|
+
assert (result_parametric.values >= 0).all()
|
88
|
+
assert (result_parametric.values <= 1).all()
|
89
|
+
|
90
|
+
# Test 5: Custom propagation parameters
|
91
|
+
result_custom = network_propagation_with_null(
|
92
|
+
graph,
|
93
|
+
attributes,
|
94
|
+
null_strategy=NULL_STRATEGIES.UNIFORM,
|
95
|
+
additional_propagation_args={"damping": 0.7},
|
61
96
|
)
|
62
|
-
|
97
|
+
|
98
|
+
# Should be different from default
|
63
99
|
assert not np.allclose(
|
64
|
-
|
100
|
+
result_uniform.values, result_custom.values
|
101
|
+
), "Different propagation parameters should give different results"
|
102
|
+
|
103
|
+
# Test 6: Custom null parameters (mask)
|
104
|
+
mask_array = np.array([True, False, True, False, True])
|
105
|
+
result_masked = network_propagation_with_null(
|
106
|
+
graph,
|
107
|
+
attributes,
|
108
|
+
null_strategy=NULL_STRATEGIES.NODE_PERMUTATION,
|
109
|
+
n_samples=5,
|
110
|
+
mask=mask_array,
|
65
111
|
)
|
66
112
|
|
113
|
+
# Should work without error
|
114
|
+
assert isinstance(result_masked, pd.DataFrame)
|
115
|
+
assert result_masked.shape == (5, 1)
|
67
116
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
with pytest.raises(ValueError):
|
73
|
-
personalized_pagerank_by_attribute(
|
74
|
-
g, "score", additional_propagation_args={"not_a_real_arg": 123}
|
117
|
+
# Test 7: Error handling - invalid null strategy
|
118
|
+
with pytest.raises(ValueError, match="Unknown null strategy"):
|
119
|
+
network_propagation_with_null(
|
120
|
+
graph, attributes, null_strategy="invalid_strategy"
|
75
121
|
)
|
76
122
|
|
77
123
|
|
78
|
-
def
|
79
|
-
|
80
|
-
#
|
81
|
-
|
82
|
-
|
124
|
+
def test_net_propagate_attributes():
|
125
|
+
"""Test net_propagate_attributes with multiple attributes and various scenarios."""
|
126
|
+
# Create test graph with edges for realistic propagation
|
127
|
+
graph = ig.Graph(4)
|
128
|
+
graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["node1", "node2", "node3", "node4"]
|
129
|
+
graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0] # Non-negative, not all zero
|
130
|
+
graph.vs["attr2"] = [0.5, 1.5, 0.0, 1.0] # Non-negative, not all zero
|
131
|
+
graph.add_edges([(0, 1), (1, 2), (2, 3), (0, 3)]) # Create connected graph
|
132
|
+
|
133
|
+
# Test 1: Basic functionality with two attributes
|
134
|
+
result = net_propagate_attributes(graph, ["attr1", "attr2"])
|
135
|
+
|
136
|
+
# Check structure
|
137
|
+
assert isinstance(result, pd.DataFrame)
|
138
|
+
assert result.shape == (4, 2)
|
139
|
+
assert list(result.index) == ["node1", "node2", "node3", "node4"]
|
140
|
+
assert list(result.columns) == ["attr1", "attr2"]
|
141
|
+
|
142
|
+
# Check that values are valid probabilities (PPR returns probabilities)
|
143
|
+
assert np.all(result.values >= 0)
|
144
|
+
assert np.all(result.values <= 1)
|
145
|
+
# Each column should sum to approximately 1 (PPR property)
|
146
|
+
assert np.allclose(result.sum(axis=0), [1.0, 1.0], atol=1e-10)
|
147
|
+
|
148
|
+
# Test 2: Single attribute
|
149
|
+
result_single = net_propagate_attributes(graph, ["attr1"])
|
150
|
+
assert result_single.shape == (4, 1)
|
151
|
+
assert list(result_single.columns) == ["attr1"]
|
152
|
+
|
153
|
+
# Test 3: Graph without names (should use indices)
|
154
|
+
graph_no_names = ig.Graph(3)
|
155
|
+
graph_no_names.vs["attr1"] = [1.0, 2.0, 1.0]
|
156
|
+
graph_no_names.add_edges([(0, 1), (1, 2)])
|
157
|
+
|
158
|
+
result_no_names = net_propagate_attributes(graph_no_names, ["attr1"])
|
159
|
+
assert list(result_no_names.index) == [0, 1, 2] # Should use integer indices
|
83
160
|
|
161
|
+
# Test 4: Invalid propagation method
|
162
|
+
with pytest.raises(ValueError, match="Invalid propagation method"):
|
163
|
+
net_propagate_attributes(graph, ["attr1"], propagation_method="invalid_method")
|
84
164
|
|
85
|
-
|
86
|
-
|
87
|
-
|
165
|
+
# Test 5: Additional arguments (test damping parameter)
|
166
|
+
result_default = net_propagate_attributes(graph, ["attr1"])
|
167
|
+
result_damped = net_propagate_attributes(
|
168
|
+
graph, ["attr1"], additional_propagation_args={"damping": 0.5} # Lower damping
|
169
|
+
)
|
170
|
+
|
171
|
+
# Results should be different with different damping
|
172
|
+
assert not np.allclose(result_default.values, result_damped.values)
|
173
|
+
|
174
|
+
# Test 6: Invalid attribute (should be caught by internal validation)
|
175
|
+
graph.vs["bad_attr"] = [-1.0, 1.0, 2.0, 0.0] # Has negative values
|
176
|
+
with pytest.raises(ValueError, match="contains negative values"):
|
177
|
+
net_propagate_attributes(graph, ["bad_attr"])
|
178
|
+
|
179
|
+
# Test 7: Zero attribute (should be caught by internal validation)
|
180
|
+
graph.vs["zero_attr"] = [0.0, 0.0, 0.0, 0.0]
|
88
181
|
with pytest.raises(ValueError, match="zero for all vertices"):
|
89
|
-
|
182
|
+
net_propagate_attributes(graph, ["zero_attr"])
|
183
|
+
|
184
|
+
|
185
|
+
def test_all_null_generators_structure():
|
186
|
+
"""Test all null generators with default options and validate output structure."""
|
187
|
+
# Create test graph with edges for realistic propagation
|
188
|
+
graph = ig.Graph(5)
|
189
|
+
graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["A", "B", "C", "D", "E"]
|
190
|
+
graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0, 1.5] # Non-negative, not all zero
|
191
|
+
graph.vs["attr2"] = [0.5, 1.0, 0.0, 2.0, 0.0] # Non-negative, not all zero
|
192
|
+
graph.add_edges([(0, 1), (1, 2), (2, 3), (3, 4)])
|
193
|
+
|
194
|
+
attributes = ["attr1", "attr2"]
|
195
|
+
n_samples = 3 # Small for testing
|
196
|
+
|
197
|
+
for generator_name, generator_func in NULL_GENERATORS.items():
|
198
|
+
print(f"Testing {generator_name}")
|
199
|
+
|
200
|
+
if generator_name == NULL_STRATEGIES.UNIFORM:
|
201
|
+
# Uniform null doesn't take n_samples
|
202
|
+
result = generator_func(graph, attributes)
|
203
|
+
expected_rows = 5 # One row per node
|
204
|
+
elif generator_name == NULL_STRATEGIES.EDGE_PERMUTATION:
|
205
|
+
# Edge permutation has different parameters
|
206
|
+
result = generator_func(graph, attributes, n_samples=n_samples)
|
207
|
+
expected_rows = n_samples * 5 # n_samples rows per node
|
208
|
+
else:
|
209
|
+
# Gaussian and node_permutation
|
210
|
+
result = generator_func(graph, attributes, n_samples=n_samples)
|
211
|
+
expected_rows = n_samples * 5 # n_samples rows per node
|
212
|
+
|
213
|
+
# Validate structure
|
214
|
+
assert isinstance(
|
215
|
+
result, pd.DataFrame
|
216
|
+
), f"{generator_name} should return DataFrame"
|
217
|
+
assert result.shape == (
|
218
|
+
expected_rows,
|
219
|
+
2,
|
220
|
+
), f"{generator_name} wrong shape: {result.shape}"
|
221
|
+
assert list(result.columns) == attributes, f"{generator_name} wrong columns"
|
222
|
+
|
223
|
+
# Validate index structure
|
224
|
+
if generator_name == NULL_STRATEGIES.UNIFORM:
|
225
|
+
assert list(result.index) == [
|
226
|
+
"A",
|
227
|
+
"B",
|
228
|
+
"C",
|
229
|
+
"D",
|
230
|
+
"E",
|
231
|
+
], f"{generator_name} wrong index"
|
232
|
+
else:
|
233
|
+
expected_index = ["A", "B", "C", "D", "E"] * n_samples
|
234
|
+
assert (
|
235
|
+
list(result.index) == expected_index
|
236
|
+
), f"{generator_name} wrong repeated index"
|
237
|
+
|
238
|
+
# Validate values are numeric and finite (propagated outputs should be valid probabilities)
|
239
|
+
assert result.isna().sum().sum() == 0, f"{generator_name} contains NaN values"
|
240
|
+
assert np.isfinite(
|
241
|
+
result.values
|
242
|
+
).all(), f"{generator_name} contains infinite values"
|
243
|
+
assert (result.values >= 0).all(), f"{generator_name} contains negative values"
|
244
|
+
assert (
|
245
|
+
result.values <= 1
|
246
|
+
).all(), f"{generator_name} should contain probabilities <= 1"
|
247
|
+
|
248
|
+
# Each sample should sum to approximately 1 (PPR property)
|
249
|
+
if generator_name == NULL_STRATEGIES.UNIFORM:
|
250
|
+
assert np.allclose(
|
251
|
+
result.sum(axis=0), [1.0, 1.0], atol=1e-10
|
252
|
+
), f"{generator_name} doesn't sum to 1"
|
253
|
+
else:
|
254
|
+
# For multiple samples, each individual sample should sum to 1
|
255
|
+
for i in range(n_samples):
|
256
|
+
start_idx = i * 5
|
257
|
+
end_idx = (i + 1) * 5
|
258
|
+
sample_data = result.iloc[start_idx:end_idx]
|
259
|
+
assert np.allclose(
|
260
|
+
sample_data.sum(axis=0), [1.0, 1.0], atol=1e-10
|
261
|
+
), f"{generator_name} sample {i} doesn't sum to 1"
|
262
|
+
|
263
|
+
|
264
|
+
def test_mask_application():
|
265
|
+
"""Test that masks are correctly applied across all null generators."""
|
266
|
+
# Create test graph
|
267
|
+
graph = ig.Graph(6)
|
268
|
+
graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["A", "B", "C", "D", "E", "F"]
|
269
|
+
graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0, 1.5, 0.0] # Nonzero at indices 0, 2, 4
|
270
|
+
graph.vs["attr2"] = [0.0, 1.0, 0.0, 2.0, 0.0, 1.0] # Nonzero at indices 1, 3, 5
|
271
|
+
graph.add_edges([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)])
|
272
|
+
|
273
|
+
attributes = ["attr1", "attr2"]
|
274
|
+
|
275
|
+
# Test mask that includes nodes with nonzero values for both attributes
|
276
|
+
# Use nodes 0, 1, 2, 3 which covers nonzero values for both attributes
|
277
|
+
mask_array = np.array([True, True, True, True, False, False]) # Nodes 0, 1, 2, 3
|
278
|
+
|
279
|
+
for generator_name, generator_func in NULL_GENERATORS.items():
|
280
|
+
print(f"Testing mask application for {generator_name}")
|
281
|
+
|
282
|
+
if generator_name == NULL_STRATEGIES.UNIFORM:
|
283
|
+
result = generator_func(graph, attributes, mask=mask_array)
|
284
|
+
|
285
|
+
# For uniform null with mask, verify structure is correct
|
286
|
+
assert result.shape == (6, 2), f"{generator_name} wrong shape with mask"
|
287
|
+
# After propagation, all nodes will have some value due to network effect
|
288
|
+
assert (
|
289
|
+
result.values > 0
|
290
|
+
).all(), "All nodes should have positive values after propagation"
|
291
|
+
|
292
|
+
elif generator_name == NULL_STRATEGIES.EDGE_PERMUTATION:
|
293
|
+
# Edge permutation ignores mask, just test it doesn't crash
|
294
|
+
result = generator_func(graph, attributes, n_samples=2)
|
295
|
+
assert result.shape[0] == 12 # 2 samples * 6 nodes
|
296
|
+
|
297
|
+
else:
|
298
|
+
# Gaussian and node_permutation with mask
|
299
|
+
result = generator_func(graph, attributes, mask=mask_array, n_samples=2)
|
300
|
+
|
301
|
+
# Check that structure is maintained
|
302
|
+
assert result.shape == (12, 2) # 2 samples * 6 nodes
|
303
|
+
|
304
|
+
|
305
|
+
def test_edge_cases_and_errors():
|
306
|
+
"""Test edge cases and error conditions for null generators."""
|
307
|
+
# Create minimal test graph
|
308
|
+
graph = ig.Graph(3)
|
309
|
+
graph.vs["attr1"] = [1.0, 2.0, 0.0]
|
310
|
+
graph.vs["bad_attr"] = [0.0, 0.0, 0.0] # All zeros
|
311
|
+
graph.add_edges([(0, 1), (1, 2)])
|
312
|
+
|
313
|
+
# Test 1: All zero attribute should raise error for all generators
|
314
|
+
with pytest.raises(ValueError):
|
315
|
+
uniform_null(graph, ["bad_attr"])
|
316
|
+
|
317
|
+
with pytest.raises(ValueError):
|
318
|
+
parametric_null(graph, ["bad_attr"])
|
319
|
+
|
320
|
+
with pytest.raises(ValueError):
|
321
|
+
node_permutation_null(graph, ["bad_attr"])
|
322
|
+
|
323
|
+
with pytest.raises(ValueError):
|
324
|
+
edge_permutation_null(graph, ["bad_attr"])
|
325
|
+
|
326
|
+
# Test 2: Empty mask should raise error
|
327
|
+
empty_mask = np.array([False, False, False])
|
328
|
+
with pytest.raises(ValueError, match="No nodes in mask"):
|
329
|
+
uniform_null(graph, ["attr1"], mask=empty_mask)
|
330
|
+
|
331
|
+
# Test 3: Single node mask (edge case)
|
332
|
+
single_mask = np.array([True, False, False])
|
333
|
+
result = uniform_null(graph, ["attr1"], mask=single_mask)
|
334
|
+
assert result.shape == (3, 1) # Should work
|
335
|
+
|
336
|
+
# Test 4: Replace parameter in node permutation
|
337
|
+
result_no_replace = node_permutation_null(
|
338
|
+
graph, ["attr1"], replace=False, n_samples=2
|
339
|
+
)
|
340
|
+
result_replace = node_permutation_null(graph, ["attr1"], replace=True, n_samples=2)
|
341
|
+
|
342
|
+
# Both should have same structure
|
343
|
+
assert result_no_replace.shape == result_replace.shape
|
344
|
+
|
345
|
+
|
346
|
+
def test_propagation_method_parameters():
|
347
|
+
"""Test that propagation method and additional arguments are properly passed through."""
|
348
|
+
# Create test graph
|
349
|
+
graph = ig.Graph(4)
|
350
|
+
graph.vs["attr1"] = [1.0, 2.0, 0.0, 1.5]
|
351
|
+
graph.add_edges([(0, 1), (1, 2), (2, 3)])
|
352
|
+
|
353
|
+
# Test different damping parameters produce different results
|
354
|
+
result_default = uniform_null(graph, ["attr1"])
|
355
|
+
result_damped = uniform_null(
|
356
|
+
graph, ["attr1"], additional_propagation_args={"damping": 0.5}
|
357
|
+
)
|
358
|
+
|
359
|
+
# Results should be different with different damping
|
360
|
+
assert not np.allclose(
|
361
|
+
result_default.values, result_damped.values
|
362
|
+
), "Different damping should produce different results"
|
363
|
+
|
364
|
+
# Test that all generators accept method parameters
|
365
|
+
for generator_name, generator_func in NULL_GENERATORS.items():
|
366
|
+
if generator_name == NULL_STRATEGIES.UNIFORM:
|
367
|
+
result = generator_func(
|
368
|
+
graph, ["attr1"], additional_propagation_args={"damping": 0.8}
|
369
|
+
)
|
370
|
+
else:
|
371
|
+
result = generator_func(
|
372
|
+
graph,
|
373
|
+
["attr1"],
|
374
|
+
additional_propagation_args={"damping": 0.8},
|
375
|
+
n_samples=2,
|
376
|
+
)
|
377
|
+
|
378
|
+
# Should produce valid results
|
379
|
+
assert isinstance(result, pd.DataFrame)
|
380
|
+
assert not result.empty
|
@@ -0,0 +1,198 @@
|
|
1
|
+
import pytest
|
2
|
+
import pandas as pd
|
3
|
+
from unittest.mock import patch
|
4
|
+
|
5
|
+
from napistu.ontologies import id_tables
|
6
|
+
from napistu.constants import (
|
7
|
+
BQB,
|
8
|
+
IDENTIFIERS,
|
9
|
+
ONTOLOGIES,
|
10
|
+
SBML_DFS,
|
11
|
+
VALID_BQB_TERMS,
|
12
|
+
)
|
13
|
+
|
14
|
+
|
15
|
+
@pytest.fixture
|
16
|
+
def sample_id_table():
|
17
|
+
"""Create a sample DataFrame for testing."""
|
18
|
+
return pd.DataFrame(
|
19
|
+
{
|
20
|
+
SBML_DFS.S_ID: ["s1", "s2", "s3", "s4"],
|
21
|
+
IDENTIFIERS.ONTOLOGY: [
|
22
|
+
ONTOLOGIES.GO,
|
23
|
+
ONTOLOGIES.KEGG,
|
24
|
+
ONTOLOGIES.REACTOME,
|
25
|
+
ONTOLOGIES.WIKIPATHWAYS,
|
26
|
+
],
|
27
|
+
IDENTIFIERS.IDENTIFIER: ["GO:0001", "hsa00010", "R-HSA-123", "WP123"],
|
28
|
+
IDENTIFIERS.BQB: [BQB.IS, BQB.HAS_PART, BQB.IS_PART_OF, BQB.IS_VERSION_OF],
|
29
|
+
IDENTIFIERS.URL: ["foo", "bar", "baz", "qux"],
|
30
|
+
"other_col": ["a", "b", "c", "d"],
|
31
|
+
}
|
32
|
+
)
|
33
|
+
|
34
|
+
|
35
|
+
def test_sanitize_id_table_values_valid_cases(sample_id_table):
|
36
|
+
"""Test all valid use cases for _sanitize_id_table_values function."""
|
37
|
+
|
38
|
+
# Test string input conversion
|
39
|
+
result = id_tables._sanitize_id_table_values(
|
40
|
+
ONTOLOGIES.GO, sample_id_table, IDENTIFIERS.ONTOLOGY
|
41
|
+
)
|
42
|
+
assert result == {ONTOLOGIES.GO}
|
43
|
+
assert isinstance(result, set)
|
44
|
+
|
45
|
+
# Test list input conversion
|
46
|
+
result = id_tables._sanitize_id_table_values(
|
47
|
+
[ONTOLOGIES.GO, ONTOLOGIES.KEGG], sample_id_table, IDENTIFIERS.ONTOLOGY
|
48
|
+
)
|
49
|
+
assert result == {ONTOLOGIES.GO, ONTOLOGIES.KEGG}
|
50
|
+
assert isinstance(result, set)
|
51
|
+
|
52
|
+
# Test set input unchanged
|
53
|
+
input_set = {ONTOLOGIES.GO, ONTOLOGIES.KEGG}
|
54
|
+
result = id_tables._sanitize_id_table_values(
|
55
|
+
input_set, sample_id_table, IDENTIFIERS.ONTOLOGY
|
56
|
+
)
|
57
|
+
assert result == input_set
|
58
|
+
assert isinstance(result, set)
|
59
|
+
|
60
|
+
# Test successful validation against valid_values
|
61
|
+
result = id_tables._sanitize_id_table_values(
|
62
|
+
BQB.IS, sample_id_table, IDENTIFIERS.BQB, set(VALID_BQB_TERMS)
|
63
|
+
)
|
64
|
+
assert result == {BQB.IS}
|
65
|
+
|
66
|
+
# Test duplicate values in input list are handled correctly
|
67
|
+
result = id_tables._sanitize_id_table_values(
|
68
|
+
[ONTOLOGIES.GO, ONTOLOGIES.GO, ONTOLOGIES.KEGG],
|
69
|
+
sample_id_table,
|
70
|
+
IDENTIFIERS.ONTOLOGY,
|
71
|
+
)
|
72
|
+
assert result == {
|
73
|
+
ONTOLOGIES.GO,
|
74
|
+
ONTOLOGIES.KEGG,
|
75
|
+
} # Duplicates removed by set conversion
|
76
|
+
|
77
|
+
# Test all values present in table
|
78
|
+
result = id_tables._sanitize_id_table_values(
|
79
|
+
[ONTOLOGIES.GO, ONTOLOGIES.KEGG, ONTOLOGIES.REACTOME],
|
80
|
+
sample_id_table,
|
81
|
+
IDENTIFIERS.ONTOLOGY,
|
82
|
+
)
|
83
|
+
assert result == {ONTOLOGIES.GO, ONTOLOGIES.KEGG, ONTOLOGIES.REACTOME}
|
84
|
+
|
85
|
+
# Test single value present in table
|
86
|
+
result = id_tables._sanitize_id_table_values(
|
87
|
+
ONTOLOGIES.WIKIPATHWAYS, sample_id_table, IDENTIFIERS.ONTOLOGY
|
88
|
+
)
|
89
|
+
assert result == {ONTOLOGIES.WIKIPATHWAYS}
|
90
|
+
|
91
|
+
# Test with different column (BQB)
|
92
|
+
result = id_tables._sanitize_id_table_values(
|
93
|
+
BQB.HAS_PART, sample_id_table, IDENTIFIERS.BQB
|
94
|
+
)
|
95
|
+
assert result == {BQB.HAS_PART}
|
96
|
+
|
97
|
+
|
98
|
+
@patch("napistu.ontologies.id_tables.logger")
|
99
|
+
def test_sanitize_id_table_values_error_cases(mock_logger, sample_id_table):
|
100
|
+
"""Test error cases and edge cases for _sanitize_id_table_values function."""
|
101
|
+
|
102
|
+
# Test invalid input types raise ValueError
|
103
|
+
with pytest.raises(ValueError, match="ontology must be a string, a set, or list"):
|
104
|
+
id_tables._sanitize_id_table_values(123, sample_id_table, IDENTIFIERS.ONTOLOGY)
|
105
|
+
|
106
|
+
with pytest.raises(ValueError, match="ontology must be a string, a set, or list"):
|
107
|
+
id_tables._sanitize_id_table_values(
|
108
|
+
{"key": "value"}, sample_id_table, IDENTIFIERS.ONTOLOGY
|
109
|
+
)
|
110
|
+
|
111
|
+
# Test validation failure against valid_values
|
112
|
+
with pytest.raises(
|
113
|
+
ValueError, match="The following bqb are not valid: INVALID_BQB"
|
114
|
+
):
|
115
|
+
id_tables._sanitize_id_table_values(
|
116
|
+
"INVALID_BQB", sample_id_table, IDENTIFIERS.BQB, set(VALID_BQB_TERMS), "bqb"
|
117
|
+
)
|
118
|
+
|
119
|
+
# Test multiple invalid values against valid_values
|
120
|
+
with pytest.raises(ValueError, match="The following bqb are not valid"):
|
121
|
+
id_tables._sanitize_id_table_values(
|
122
|
+
["INVALID1", "INVALID2"],
|
123
|
+
sample_id_table,
|
124
|
+
IDENTIFIERS.BQB,
|
125
|
+
set(VALID_BQB_TERMS),
|
126
|
+
"bqb",
|
127
|
+
)
|
128
|
+
|
129
|
+
# Test all values missing from table raises error
|
130
|
+
missing_values = {"MISSING1", "MISSING2"}
|
131
|
+
with pytest.raises(ValueError, match="None of the requested ontology are present"):
|
132
|
+
id_tables._sanitize_id_table_values(
|
133
|
+
missing_values, sample_id_table, IDENTIFIERS.ONTOLOGY
|
134
|
+
)
|
135
|
+
|
136
|
+
# Test case-sensitive matching (lowercase 'go' should fail)
|
137
|
+
with pytest.raises(ValueError, match="None of the requested ontology are present"):
|
138
|
+
id_tables._sanitize_id_table_values(
|
139
|
+
"INVALID_ONTOLOGY", sample_id_table, IDENTIFIERS.ONTOLOGY
|
140
|
+
)
|
141
|
+
|
142
|
+
# Test custom value_type_name in error messages
|
143
|
+
with pytest.raises(ValueError, match="custom_type must be a string"):
|
144
|
+
id_tables._sanitize_id_table_values(
|
145
|
+
123, sample_id_table, IDENTIFIERS.ONTOLOGY, value_type_name="custom_type"
|
146
|
+
)
|
147
|
+
|
148
|
+
# Test default value_type_name uses column_name
|
149
|
+
with pytest.raises(ValueError, match="test_column must be a string"):
|
150
|
+
id_tables._sanitize_id_table_values(123, sample_id_table, "test_column")
|
151
|
+
|
152
|
+
# Test empty dataframe column
|
153
|
+
empty_df = pd.DataFrame({"ontology": []})
|
154
|
+
with pytest.raises(ValueError, match="None of the requested ontology are present"):
|
155
|
+
id_tables._sanitize_id_table_values("GO", empty_df, IDENTIFIERS.ONTOLOGY)
|
156
|
+
|
157
|
+
# Test partial values missing logs warning but doesn't raise error
|
158
|
+
mixed_values = {ONTOLOGIES.GO, "MISSING"} # GO exists, MISSING doesn't
|
159
|
+
result = id_tables._sanitize_id_table_values(
|
160
|
+
mixed_values, sample_id_table, IDENTIFIERS.ONTOLOGY
|
161
|
+
)
|
162
|
+
|
163
|
+
assert result == mixed_values
|
164
|
+
mock_logger.warning.assert_called_once()
|
165
|
+
warning_call = mock_logger.warning.call_args[0][0]
|
166
|
+
assert "MISSING" in warning_call
|
167
|
+
assert "not present in the id_table" in warning_call
|
168
|
+
|
169
|
+
# Test multiple partial missing values
|
170
|
+
mock_logger.reset_mock()
|
171
|
+
mixed_values = {ONTOLOGIES.GO, ONTOLOGIES.KEGG, "MISSING1", "MISSING2"}
|
172
|
+
result = id_tables._sanitize_id_table_values(
|
173
|
+
mixed_values, sample_id_table, IDENTIFIERS.ONTOLOGY
|
174
|
+
)
|
175
|
+
|
176
|
+
assert result == mixed_values
|
177
|
+
mock_logger.warning.assert_called_once()
|
178
|
+
warning_call = mock_logger.warning.call_args[0][0]
|
179
|
+
assert "MISSING1" in warning_call and "MISSING2" in warning_call
|
180
|
+
|
181
|
+
|
182
|
+
def test_filter_id_table_basic(sample_id_table):
|
183
|
+
"""Basic test for filter_id_table filtering by identifier, ontology, and bqb."""
|
184
|
+
|
185
|
+
# Use a known identifier, ontology, and bqb from the fixture
|
186
|
+
filtered = id_tables.filter_id_table(
|
187
|
+
id_table=sample_id_table,
|
188
|
+
identifiers=["GO:0001"],
|
189
|
+
ontologies=[ONTOLOGIES.GO],
|
190
|
+
bqbs=[BQB.IS],
|
191
|
+
)
|
192
|
+
# Should return a DataFrame with only the matching row
|
193
|
+
assert isinstance(filtered, pd.DataFrame)
|
194
|
+
assert len(filtered) == 1
|
195
|
+
row = filtered.iloc[0]
|
196
|
+
assert row[IDENTIFIERS.ONTOLOGY] == ONTOLOGIES.GO
|
197
|
+
assert row[IDENTIFIERS.IDENTIFIER] == "GO:0001"
|
198
|
+
assert row[IDENTIFIERS.BQB] == BQB.IS
|