napistu 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,89 +1,380 @@
1
1
  import pytest
2
- import igraph as ig
3
2
  import numpy as np
4
- from napistu.network.net_propagation import personalized_pagerank_by_attribute
5
-
6
-
7
- def test_personalized_pagerank_by_attribute_basic():
8
- g = ig.Graph.Full(3)
9
- g.vs["name"] = ["A", "B", "C"]
10
- g.vs["score"] = [1, 0, 2]
11
- df = personalized_pagerank_by_attribute(g, "score")
12
- assert set(df.columns) == {
13
- "name",
14
- "pagerank_by_attribute",
15
- "pagerank_uniform",
16
- "score",
17
- }
18
- assert np.isclose(df["score"].sum(), 3)
19
- assert np.isclose(df["pagerank_by_attribute"].sum(), 1)
20
- assert np.isclose(df["pagerank_uniform"].sum(), 1)
21
- # Uniform should only include A and C
22
- assert df.loc[df["name"] == "B", "pagerank_uniform"].values[0] > 0
23
-
24
-
25
- def test_personalized_pagerank_by_attribute_no_uniform():
26
- g = ig.Graph.Full(3)
27
- g.vs["score"] = [1, 0, 2]
28
- df = personalized_pagerank_by_attribute(g, "score", calculate_uniform_dist=False)
29
- assert "pagerank_uniform" not in df.columns
30
- assert np.isclose(df["pagerank_by_attribute"].sum(), 1)
31
-
32
-
33
- def test_personalized_pagerank_by_attribute_missing_and_negative():
34
- g = ig.Graph.Full(3)
35
- g.vs["score"] = [1, None, 2]
36
- # None should be treated as 0
37
- df = personalized_pagerank_by_attribute(g, "score")
38
- assert np.isclose(df["score"].sum(), 3)
39
- # Negative values should raise
40
- g.vs["score"] = [1, -1, 2]
41
- with pytest.raises(ValueError):
42
- personalized_pagerank_by_attribute(g, "score")
43
-
44
-
45
- def test_personalized_pagerank_by_attribute_additional_args_directed():
46
- # create an asymmetric directed graph to test whether additional_propagation_args is respected
47
- g = ig.Graph(directed=True)
48
- g.add_vertices(3)
49
- g.add_edges([(0, 1), (1, 2)])
50
- g.vs["score"] = [1, 0, 2]
51
- # Run with directed=False, which should treat the graph as undirected
52
- df_directed = personalized_pagerank_by_attribute(
53
- g, "score", additional_propagation_args={"directed": True}
3
+ import pandas as pd
4
+ import igraph as ig
5
+ from napistu.network.net_propagation import (
6
+ net_propagate_attributes,
7
+ uniform_null,
8
+ parametric_null,
9
+ node_permutation_null,
10
+ edge_permutation_null,
11
+ NULL_GENERATORS,
12
+ network_propagation_with_null,
13
+ )
14
+ from napistu.network.constants import (
15
+ NAPISTU_GRAPH_VERTICES,
16
+ NULL_STRATEGIES,
17
+ )
18
+
19
+
20
+ def test_network_propagation_with_null():
21
+ """Test the main orchestrator function with different null strategies."""
22
+ # Create test graph
23
+ graph = ig.Graph(5)
24
+ graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["A", "B", "C", "D", "E"]
25
+ graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0, 1.5] # Non-negative, not all zero
26
+ graph.add_edges([(0, 1), (1, 2), (2, 3), (3, 4)])
27
+
28
+ attributes = ["attr1"]
29
+
30
+ # Test 1: Uniform null (should return ratios)
31
+ result_uniform = network_propagation_with_null(
32
+ graph, attributes, null_strategy=NULL_STRATEGIES.UNIFORM
54
33
  )
55
- df_undirected = personalized_pagerank_by_attribute(
56
- g, "score", additional_propagation_args={"directed": False}
34
+
35
+ # Check structure
36
+ assert isinstance(result_uniform, pd.DataFrame)
37
+ assert result_uniform.shape == (5, 1)
38
+ assert list(result_uniform.columns) == attributes
39
+ assert list(result_uniform.index) == ["A", "B", "C", "D", "E"]
40
+
41
+ # Should be ratios (can be > 1)
42
+ assert (result_uniform.values > 0).all(), "Ratios should be positive"
43
+ # Some ratios should be > 1 since observed scores concentrate on fewer nodes
44
+ assert (result_uniform.values > 1).any(), "Some ratios should be > 1"
45
+
46
+ # Test 2: Node permutation null (should return quantiles)
47
+ result_permutation = network_propagation_with_null(
48
+ graph,
49
+ attributes,
50
+ null_strategy=NULL_STRATEGIES.NODE_PERMUTATION,
51
+ n_samples=10, # Small for testing
57
52
  )
58
- # The results should differ for directed vs undirected
59
- assert not np.allclose(
60
- df_directed["pagerank_by_attribute"], df_undirected["pagerank_by_attribute"]
53
+
54
+ # Check structure
55
+ assert isinstance(result_permutation, pd.DataFrame)
56
+ assert result_permutation.shape == (5, 1)
57
+ assert list(result_permutation.columns) == attributes
58
+
59
+ # Should be quantiles (0 to 1)
60
+ assert (result_permutation.values >= 0).all(), "Quantiles should be >= 0"
61
+ assert (result_permutation.values <= 1).all(), "Quantiles should be <= 1"
62
+
63
+ # Test 3: Edge permutation null
64
+ result_edge = network_propagation_with_null(
65
+ graph,
66
+ attributes,
67
+ null_strategy=NULL_STRATEGIES.EDGE_PERMUTATION,
68
+ n_samples=5,
69
+ burn_in_ratio=2, # Small for testing
70
+ sampling_ratio=0.2,
71
+ )
72
+
73
+ # Check structure
74
+ assert isinstance(result_edge, pd.DataFrame)
75
+ assert result_edge.shape == (5, 1)
76
+ assert (result_edge.values >= 0).all()
77
+ assert (result_edge.values <= 1).all()
78
+
79
+ # Test 4: Gaussian null
80
+ result_parametric = network_propagation_with_null(
81
+ graph, attributes, null_strategy=NULL_STRATEGIES.PARAMETRIC, n_samples=8
82
+ )
83
+
84
+ # Check structure
85
+ assert isinstance(result_parametric, pd.DataFrame)
86
+ assert result_parametric.shape == (5, 1)
87
+ assert (result_parametric.values >= 0).all()
88
+ assert (result_parametric.values <= 1).all()
89
+
90
+ # Test 5: Custom propagation parameters
91
+ result_custom = network_propagation_with_null(
92
+ graph,
93
+ attributes,
94
+ null_strategy=NULL_STRATEGIES.UNIFORM,
95
+ additional_propagation_args={"damping": 0.7},
61
96
  )
62
- # Uniform should also be affected
97
+
98
+ # Should be different from default
63
99
  assert not np.allclose(
64
- df_directed["pagerank_uniform"], df_undirected["pagerank_uniform"]
100
+ result_uniform.values, result_custom.values
101
+ ), "Different propagation parameters should give different results"
102
+
103
+ # Test 6: Custom null parameters (mask)
104
+ mask_array = np.array([True, False, True, False, True])
105
+ result_masked = network_propagation_with_null(
106
+ graph,
107
+ attributes,
108
+ null_strategy=NULL_STRATEGIES.NODE_PERMUTATION,
109
+ n_samples=5,
110
+ mask=mask_array,
65
111
  )
66
112
 
113
+ # Should work without error
114
+ assert isinstance(result_masked, pd.DataFrame)
115
+ assert result_masked.shape == (5, 1)
67
116
 
68
- def test_personalized_pagerank_by_attribute_additional_args_invalid():
69
- g = ig.Graph.Full(3)
70
- g.vs["score"] = [1, 0, 2]
71
- # Passing an invalid argument should raise ValueError
72
- with pytest.raises(ValueError):
73
- personalized_pagerank_by_attribute(
74
- g, "score", additional_propagation_args={"not_a_real_arg": 123}
117
+ # Test 7: Error handling - invalid null strategy
118
+ with pytest.raises(ValueError, match="Unknown null strategy"):
119
+ network_propagation_with_null(
120
+ graph, attributes, null_strategy="invalid_strategy"
75
121
  )
76
122
 
77
123
 
78
- def test_personalized_pagerank_by_attribute_all_missing():
79
- g = ig.Graph.Full(3)
80
- # No 'score' attribute at all
81
- with pytest.raises(ValueError, match="missing for all vertices"):
82
- personalized_pagerank_by_attribute(g, "score")
124
+ def test_net_propagate_attributes():
125
+ """Test net_propagate_attributes with multiple attributes and various scenarios."""
126
+ # Create test graph with edges for realistic propagation
127
+ graph = ig.Graph(4)
128
+ graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["node1", "node2", "node3", "node4"]
129
+ graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0] # Non-negative, not all zero
130
+ graph.vs["attr2"] = [0.5, 1.5, 0.0, 1.0] # Non-negative, not all zero
131
+ graph.add_edges([(0, 1), (1, 2), (2, 3), (0, 3)]) # Create connected graph
132
+
133
+ # Test 1: Basic functionality with two attributes
134
+ result = net_propagate_attributes(graph, ["attr1", "attr2"])
135
+
136
+ # Check structure
137
+ assert isinstance(result, pd.DataFrame)
138
+ assert result.shape == (4, 2)
139
+ assert list(result.index) == ["node1", "node2", "node3", "node4"]
140
+ assert list(result.columns) == ["attr1", "attr2"]
141
+
142
+ # Check that values are valid probabilities (PPR returns probabilities)
143
+ assert np.all(result.values >= 0)
144
+ assert np.all(result.values <= 1)
145
+ # Each column should sum to approximately 1 (PPR property)
146
+ assert np.allclose(result.sum(axis=0), [1.0, 1.0], atol=1e-10)
147
+
148
+ # Test 2: Single attribute
149
+ result_single = net_propagate_attributes(graph, ["attr1"])
150
+ assert result_single.shape == (4, 1)
151
+ assert list(result_single.columns) == ["attr1"]
152
+
153
+ # Test 3: Graph without names (should use indices)
154
+ graph_no_names = ig.Graph(3)
155
+ graph_no_names.vs["attr1"] = [1.0, 2.0, 1.0]
156
+ graph_no_names.add_edges([(0, 1), (1, 2)])
157
+
158
+ result_no_names = net_propagate_attributes(graph_no_names, ["attr1"])
159
+ assert list(result_no_names.index) == [0, 1, 2] # Should use integer indices
83
160
 
161
+ # Test 4: Invalid propagation method
162
+ with pytest.raises(ValueError, match="Invalid propagation method"):
163
+ net_propagate_attributes(graph, ["attr1"], propagation_method="invalid_method")
84
164
 
85
- def test_personalized_pagerank_by_attribute_all_zero():
86
- g = ig.Graph.Full(3)
87
- g.vs["score"] = [0, 0, 0]
165
+ # Test 5: Additional arguments (test damping parameter)
166
+ result_default = net_propagate_attributes(graph, ["attr1"])
167
+ result_damped = net_propagate_attributes(
168
+ graph, ["attr1"], additional_propagation_args={"damping": 0.5} # Lower damping
169
+ )
170
+
171
+ # Results should be different with different damping
172
+ assert not np.allclose(result_default.values, result_damped.values)
173
+
174
+ # Test 6: Invalid attribute (should be caught by internal validation)
175
+ graph.vs["bad_attr"] = [-1.0, 1.0, 2.0, 0.0] # Has negative values
176
+ with pytest.raises(ValueError, match="contains negative values"):
177
+ net_propagate_attributes(graph, ["bad_attr"])
178
+
179
+ # Test 7: Zero attribute (should be caught by internal validation)
180
+ graph.vs["zero_attr"] = [0.0, 0.0, 0.0, 0.0]
88
181
  with pytest.raises(ValueError, match="zero for all vertices"):
89
- personalized_pagerank_by_attribute(g, "score")
182
+ net_propagate_attributes(graph, ["zero_attr"])
183
+
184
+
185
+ def test_all_null_generators_structure():
186
+ """Test all null generators with default options and validate output structure."""
187
+ # Create test graph with edges for realistic propagation
188
+ graph = ig.Graph(5)
189
+ graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["A", "B", "C", "D", "E"]
190
+ graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0, 1.5] # Non-negative, not all zero
191
+ graph.vs["attr2"] = [0.5, 1.0, 0.0, 2.0, 0.0] # Non-negative, not all zero
192
+ graph.add_edges([(0, 1), (1, 2), (2, 3), (3, 4)])
193
+
194
+ attributes = ["attr1", "attr2"]
195
+ n_samples = 3 # Small for testing
196
+
197
+ for generator_name, generator_func in NULL_GENERATORS.items():
198
+ print(f"Testing {generator_name}")
199
+
200
+ if generator_name == NULL_STRATEGIES.UNIFORM:
201
+ # Uniform null doesn't take n_samples
202
+ result = generator_func(graph, attributes)
203
+ expected_rows = 5 # One row per node
204
+ elif generator_name == NULL_STRATEGIES.EDGE_PERMUTATION:
205
+ # Edge permutation has different parameters
206
+ result = generator_func(graph, attributes, n_samples=n_samples)
207
+ expected_rows = n_samples * 5 # n_samples rows per node
208
+ else:
209
+ # Gaussian and node_permutation
210
+ result = generator_func(graph, attributes, n_samples=n_samples)
211
+ expected_rows = n_samples * 5 # n_samples rows per node
212
+
213
+ # Validate structure
214
+ assert isinstance(
215
+ result, pd.DataFrame
216
+ ), f"{generator_name} should return DataFrame"
217
+ assert result.shape == (
218
+ expected_rows,
219
+ 2,
220
+ ), f"{generator_name} wrong shape: {result.shape}"
221
+ assert list(result.columns) == attributes, f"{generator_name} wrong columns"
222
+
223
+ # Validate index structure
224
+ if generator_name == NULL_STRATEGIES.UNIFORM:
225
+ assert list(result.index) == [
226
+ "A",
227
+ "B",
228
+ "C",
229
+ "D",
230
+ "E",
231
+ ], f"{generator_name} wrong index"
232
+ else:
233
+ expected_index = ["A", "B", "C", "D", "E"] * n_samples
234
+ assert (
235
+ list(result.index) == expected_index
236
+ ), f"{generator_name} wrong repeated index"
237
+
238
+ # Validate values are numeric and finite (propagated outputs should be valid probabilities)
239
+ assert result.isna().sum().sum() == 0, f"{generator_name} contains NaN values"
240
+ assert np.isfinite(
241
+ result.values
242
+ ).all(), f"{generator_name} contains infinite values"
243
+ assert (result.values >= 0).all(), f"{generator_name} contains negative values"
244
+ assert (
245
+ result.values <= 1
246
+ ).all(), f"{generator_name} should contain probabilities <= 1"
247
+
248
+ # Each sample should sum to approximately 1 (PPR property)
249
+ if generator_name == NULL_STRATEGIES.UNIFORM:
250
+ assert np.allclose(
251
+ result.sum(axis=0), [1.0, 1.0], atol=1e-10
252
+ ), f"{generator_name} doesn't sum to 1"
253
+ else:
254
+ # For multiple samples, each individual sample should sum to 1
255
+ for i in range(n_samples):
256
+ start_idx = i * 5
257
+ end_idx = (i + 1) * 5
258
+ sample_data = result.iloc[start_idx:end_idx]
259
+ assert np.allclose(
260
+ sample_data.sum(axis=0), [1.0, 1.0], atol=1e-10
261
+ ), f"{generator_name} sample {i} doesn't sum to 1"
262
+
263
+
264
+ def test_mask_application():
265
+ """Test that masks are correctly applied across all null generators."""
266
+ # Create test graph
267
+ graph = ig.Graph(6)
268
+ graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["A", "B", "C", "D", "E", "F"]
269
+ graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0, 1.5, 0.0] # Nonzero at indices 0, 2, 4
270
+ graph.vs["attr2"] = [0.0, 1.0, 0.0, 2.0, 0.0, 1.0] # Nonzero at indices 1, 3, 5
271
+ graph.add_edges([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)])
272
+
273
+ attributes = ["attr1", "attr2"]
274
+
275
+ # Test mask that includes nodes with nonzero values for both attributes
276
+ # Use nodes 0, 1, 2, 3 which covers nonzero values for both attributes
277
+ mask_array = np.array([True, True, True, True, False, False]) # Nodes 0, 1, 2, 3
278
+
279
+ for generator_name, generator_func in NULL_GENERATORS.items():
280
+ print(f"Testing mask application for {generator_name}")
281
+
282
+ if generator_name == NULL_STRATEGIES.UNIFORM:
283
+ result = generator_func(graph, attributes, mask=mask_array)
284
+
285
+ # For uniform null with mask, verify structure is correct
286
+ assert result.shape == (6, 2), f"{generator_name} wrong shape with mask"
287
+ # After propagation, all nodes will have some value due to network effect
288
+ assert (
289
+ result.values > 0
290
+ ).all(), "All nodes should have positive values after propagation"
291
+
292
+ elif generator_name == NULL_STRATEGIES.EDGE_PERMUTATION:
293
+ # Edge permutation ignores mask, just test it doesn't crash
294
+ result = generator_func(graph, attributes, n_samples=2)
295
+ assert result.shape[0] == 12 # 2 samples * 6 nodes
296
+
297
+ else:
298
+ # Gaussian and node_permutation with mask
299
+ result = generator_func(graph, attributes, mask=mask_array, n_samples=2)
300
+
301
+ # Check that structure is maintained
302
+ assert result.shape == (12, 2) # 2 samples * 6 nodes
303
+
304
+
305
+ def test_edge_cases_and_errors():
306
+ """Test edge cases and error conditions for null generators."""
307
+ # Create minimal test graph
308
+ graph = ig.Graph(3)
309
+ graph.vs["attr1"] = [1.0, 2.0, 0.0]
310
+ graph.vs["bad_attr"] = [0.0, 0.0, 0.0] # All zeros
311
+ graph.add_edges([(0, 1), (1, 2)])
312
+
313
+ # Test 1: All zero attribute should raise error for all generators
314
+ with pytest.raises(ValueError):
315
+ uniform_null(graph, ["bad_attr"])
316
+
317
+ with pytest.raises(ValueError):
318
+ parametric_null(graph, ["bad_attr"])
319
+
320
+ with pytest.raises(ValueError):
321
+ node_permutation_null(graph, ["bad_attr"])
322
+
323
+ with pytest.raises(ValueError):
324
+ edge_permutation_null(graph, ["bad_attr"])
325
+
326
+ # Test 2: Empty mask should raise error
327
+ empty_mask = np.array([False, False, False])
328
+ with pytest.raises(ValueError, match="No nodes in mask"):
329
+ uniform_null(graph, ["attr1"], mask=empty_mask)
330
+
331
+ # Test 3: Single node mask (edge case)
332
+ single_mask = np.array([True, False, False])
333
+ result = uniform_null(graph, ["attr1"], mask=single_mask)
334
+ assert result.shape == (3, 1) # Should work
335
+
336
+ # Test 4: Replace parameter in node permutation
337
+ result_no_replace = node_permutation_null(
338
+ graph, ["attr1"], replace=False, n_samples=2
339
+ )
340
+ result_replace = node_permutation_null(graph, ["attr1"], replace=True, n_samples=2)
341
+
342
+ # Both should have same structure
343
+ assert result_no_replace.shape == result_replace.shape
344
+
345
+
346
+ def test_propagation_method_parameters():
347
+ """Test that propagation method and additional arguments are properly passed through."""
348
+ # Create test graph
349
+ graph = ig.Graph(4)
350
+ graph.vs["attr1"] = [1.0, 2.0, 0.0, 1.5]
351
+ graph.add_edges([(0, 1), (1, 2), (2, 3)])
352
+
353
+ # Test different damping parameters produce different results
354
+ result_default = uniform_null(graph, ["attr1"])
355
+ result_damped = uniform_null(
356
+ graph, ["attr1"], additional_propagation_args={"damping": 0.5}
357
+ )
358
+
359
+ # Results should be different with different damping
360
+ assert not np.allclose(
361
+ result_default.values, result_damped.values
362
+ ), "Different damping should produce different results"
363
+
364
+ # Test that all generators accept method parameters
365
+ for generator_name, generator_func in NULL_GENERATORS.items():
366
+ if generator_name == NULL_STRATEGIES.UNIFORM:
367
+ result = generator_func(
368
+ graph, ["attr1"], additional_propagation_args={"damping": 0.8}
369
+ )
370
+ else:
371
+ result = generator_func(
372
+ graph,
373
+ ["attr1"],
374
+ additional_propagation_args={"damping": 0.8},
375
+ n_samples=2,
376
+ )
377
+
378
+ # Should produce valid results
379
+ assert isinstance(result, pd.DataFrame)
380
+ assert not result.empty
@@ -0,0 +1,198 @@
1
+ import pytest
2
+ import pandas as pd
3
+ from unittest.mock import patch
4
+
5
+ from napistu.ontologies import id_tables
6
+ from napistu.constants import (
7
+ BQB,
8
+ IDENTIFIERS,
9
+ ONTOLOGIES,
10
+ SBML_DFS,
11
+ VALID_BQB_TERMS,
12
+ )
13
+
14
+
15
+ @pytest.fixture
16
+ def sample_id_table():
17
+ """Create a sample DataFrame for testing."""
18
+ return pd.DataFrame(
19
+ {
20
+ SBML_DFS.S_ID: ["s1", "s2", "s3", "s4"],
21
+ IDENTIFIERS.ONTOLOGY: [
22
+ ONTOLOGIES.GO,
23
+ ONTOLOGIES.KEGG,
24
+ ONTOLOGIES.REACTOME,
25
+ ONTOLOGIES.WIKIPATHWAYS,
26
+ ],
27
+ IDENTIFIERS.IDENTIFIER: ["GO:0001", "hsa00010", "R-HSA-123", "WP123"],
28
+ IDENTIFIERS.BQB: [BQB.IS, BQB.HAS_PART, BQB.IS_PART_OF, BQB.IS_VERSION_OF],
29
+ IDENTIFIERS.URL: ["foo", "bar", "baz", "qux"],
30
+ "other_col": ["a", "b", "c", "d"],
31
+ }
32
+ )
33
+
34
+
35
+ def test_sanitize_id_table_values_valid_cases(sample_id_table):
36
+ """Test all valid use cases for _sanitize_id_table_values function."""
37
+
38
+ # Test string input conversion
39
+ result = id_tables._sanitize_id_table_values(
40
+ ONTOLOGIES.GO, sample_id_table, IDENTIFIERS.ONTOLOGY
41
+ )
42
+ assert result == {ONTOLOGIES.GO}
43
+ assert isinstance(result, set)
44
+
45
+ # Test list input conversion
46
+ result = id_tables._sanitize_id_table_values(
47
+ [ONTOLOGIES.GO, ONTOLOGIES.KEGG], sample_id_table, IDENTIFIERS.ONTOLOGY
48
+ )
49
+ assert result == {ONTOLOGIES.GO, ONTOLOGIES.KEGG}
50
+ assert isinstance(result, set)
51
+
52
+ # Test set input unchanged
53
+ input_set = {ONTOLOGIES.GO, ONTOLOGIES.KEGG}
54
+ result = id_tables._sanitize_id_table_values(
55
+ input_set, sample_id_table, IDENTIFIERS.ONTOLOGY
56
+ )
57
+ assert result == input_set
58
+ assert isinstance(result, set)
59
+
60
+ # Test successful validation against valid_values
61
+ result = id_tables._sanitize_id_table_values(
62
+ BQB.IS, sample_id_table, IDENTIFIERS.BQB, set(VALID_BQB_TERMS)
63
+ )
64
+ assert result == {BQB.IS}
65
+
66
+ # Test duplicate values in input list are handled correctly
67
+ result = id_tables._sanitize_id_table_values(
68
+ [ONTOLOGIES.GO, ONTOLOGIES.GO, ONTOLOGIES.KEGG],
69
+ sample_id_table,
70
+ IDENTIFIERS.ONTOLOGY,
71
+ )
72
+ assert result == {
73
+ ONTOLOGIES.GO,
74
+ ONTOLOGIES.KEGG,
75
+ } # Duplicates removed by set conversion
76
+
77
+ # Test all values present in table
78
+ result = id_tables._sanitize_id_table_values(
79
+ [ONTOLOGIES.GO, ONTOLOGIES.KEGG, ONTOLOGIES.REACTOME],
80
+ sample_id_table,
81
+ IDENTIFIERS.ONTOLOGY,
82
+ )
83
+ assert result == {ONTOLOGIES.GO, ONTOLOGIES.KEGG, ONTOLOGIES.REACTOME}
84
+
85
+ # Test single value present in table
86
+ result = id_tables._sanitize_id_table_values(
87
+ ONTOLOGIES.WIKIPATHWAYS, sample_id_table, IDENTIFIERS.ONTOLOGY
88
+ )
89
+ assert result == {ONTOLOGIES.WIKIPATHWAYS}
90
+
91
+ # Test with different column (BQB)
92
+ result = id_tables._sanitize_id_table_values(
93
+ BQB.HAS_PART, sample_id_table, IDENTIFIERS.BQB
94
+ )
95
+ assert result == {BQB.HAS_PART}
96
+
97
+
98
+ @patch("napistu.ontologies.id_tables.logger")
99
+ def test_sanitize_id_table_values_error_cases(mock_logger, sample_id_table):
100
+ """Test error cases and edge cases for _sanitize_id_table_values function."""
101
+
102
+ # Test invalid input types raise ValueError
103
+ with pytest.raises(ValueError, match="ontology must be a string, a set, or list"):
104
+ id_tables._sanitize_id_table_values(123, sample_id_table, IDENTIFIERS.ONTOLOGY)
105
+
106
+ with pytest.raises(ValueError, match="ontology must be a string, a set, or list"):
107
+ id_tables._sanitize_id_table_values(
108
+ {"key": "value"}, sample_id_table, IDENTIFIERS.ONTOLOGY
109
+ )
110
+
111
+ # Test validation failure against valid_values
112
+ with pytest.raises(
113
+ ValueError, match="The following bqb are not valid: INVALID_BQB"
114
+ ):
115
+ id_tables._sanitize_id_table_values(
116
+ "INVALID_BQB", sample_id_table, IDENTIFIERS.BQB, set(VALID_BQB_TERMS), "bqb"
117
+ )
118
+
119
+ # Test multiple invalid values against valid_values
120
+ with pytest.raises(ValueError, match="The following bqb are not valid"):
121
+ id_tables._sanitize_id_table_values(
122
+ ["INVALID1", "INVALID2"],
123
+ sample_id_table,
124
+ IDENTIFIERS.BQB,
125
+ set(VALID_BQB_TERMS),
126
+ "bqb",
127
+ )
128
+
129
+ # Test all values missing from table raises error
130
+ missing_values = {"MISSING1", "MISSING2"}
131
+ with pytest.raises(ValueError, match="None of the requested ontology are present"):
132
+ id_tables._sanitize_id_table_values(
133
+ missing_values, sample_id_table, IDENTIFIERS.ONTOLOGY
134
+ )
135
+
136
+ # Test case-sensitive matching (lowercase 'go' should fail)
137
+ with pytest.raises(ValueError, match="None of the requested ontology are present"):
138
+ id_tables._sanitize_id_table_values(
139
+ "INVALID_ONTOLOGY", sample_id_table, IDENTIFIERS.ONTOLOGY
140
+ )
141
+
142
+ # Test custom value_type_name in error messages
143
+ with pytest.raises(ValueError, match="custom_type must be a string"):
144
+ id_tables._sanitize_id_table_values(
145
+ 123, sample_id_table, IDENTIFIERS.ONTOLOGY, value_type_name="custom_type"
146
+ )
147
+
148
+ # Test default value_type_name uses column_name
149
+ with pytest.raises(ValueError, match="test_column must be a string"):
150
+ id_tables._sanitize_id_table_values(123, sample_id_table, "test_column")
151
+
152
+ # Test empty dataframe column
153
+ empty_df = pd.DataFrame({"ontology": []})
154
+ with pytest.raises(ValueError, match="None of the requested ontology are present"):
155
+ id_tables._sanitize_id_table_values("GO", empty_df, IDENTIFIERS.ONTOLOGY)
156
+
157
+ # Test partial values missing logs warning but doesn't raise error
158
+ mixed_values = {ONTOLOGIES.GO, "MISSING"} # GO exists, MISSING doesn't
159
+ result = id_tables._sanitize_id_table_values(
160
+ mixed_values, sample_id_table, IDENTIFIERS.ONTOLOGY
161
+ )
162
+
163
+ assert result == mixed_values
164
+ mock_logger.warning.assert_called_once()
165
+ warning_call = mock_logger.warning.call_args[0][0]
166
+ assert "MISSING" in warning_call
167
+ assert "not present in the id_table" in warning_call
168
+
169
+ # Test multiple partial missing values
170
+ mock_logger.reset_mock()
171
+ mixed_values = {ONTOLOGIES.GO, ONTOLOGIES.KEGG, "MISSING1", "MISSING2"}
172
+ result = id_tables._sanitize_id_table_values(
173
+ mixed_values, sample_id_table, IDENTIFIERS.ONTOLOGY
174
+ )
175
+
176
+ assert result == mixed_values
177
+ mock_logger.warning.assert_called_once()
178
+ warning_call = mock_logger.warning.call_args[0][0]
179
+ assert "MISSING1" in warning_call and "MISSING2" in warning_call
180
+
181
+
182
+ def test_filter_id_table_basic(sample_id_table):
183
+ """Basic test for filter_id_table filtering by identifier, ontology, and bqb."""
184
+
185
+ # Use a known identifier, ontology, and bqb from the fixture
186
+ filtered = id_tables.filter_id_table(
187
+ id_table=sample_id_table,
188
+ identifiers=["GO:0001"],
189
+ ontologies=[ONTOLOGIES.GO],
190
+ bqbs=[BQB.IS],
191
+ )
192
+ # Should return a DataFrame with only the matching row
193
+ assert isinstance(filtered, pd.DataFrame)
194
+ assert len(filtered) == 1
195
+ row = filtered.iloc[0]
196
+ assert row[IDENTIFIERS.ONTOLOGY] == ONTOLOGIES.GO
197
+ assert row[IDENTIFIERS.IDENTIFIER] == "GO:0001"
198
+ assert row[IDENTIFIERS.BQB] == BQB.IS