napistu 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,89 +1,380 @@
1
1
  import pytest
2
- import igraph as ig
3
2
  import numpy as np
4
- from napistu.network.net_propagation import personalized_pagerank_by_attribute
5
-
6
-
7
- def test_personalized_pagerank_by_attribute_basic():
8
- g = ig.Graph.Full(3)
9
- g.vs["name"] = ["A", "B", "C"]
10
- g.vs["score"] = [1, 0, 2]
11
- df = personalized_pagerank_by_attribute(g, "score")
12
- assert set(df.columns) == {
13
- "name",
14
- "pagerank_by_attribute",
15
- "pagerank_uniform",
16
- "score",
17
- }
18
- assert np.isclose(df["score"].sum(), 3)
19
- assert np.isclose(df["pagerank_by_attribute"].sum(), 1)
20
- assert np.isclose(df["pagerank_uniform"].sum(), 1)
21
- # Uniform should only include A and C
22
- assert df.loc[df["name"] == "B", "pagerank_uniform"].values[0] > 0
23
-
24
-
25
- def test_personalized_pagerank_by_attribute_no_uniform():
26
- g = ig.Graph.Full(3)
27
- g.vs["score"] = [1, 0, 2]
28
- df = personalized_pagerank_by_attribute(g, "score", calculate_uniform_dist=False)
29
- assert "pagerank_uniform" not in df.columns
30
- assert np.isclose(df["pagerank_by_attribute"].sum(), 1)
31
-
32
-
33
- def test_personalized_pagerank_by_attribute_missing_and_negative():
34
- g = ig.Graph.Full(3)
35
- g.vs["score"] = [1, None, 2]
36
- # None should be treated as 0
37
- df = personalized_pagerank_by_attribute(g, "score")
38
- assert np.isclose(df["score"].sum(), 3)
39
- # Negative values should raise
40
- g.vs["score"] = [1, -1, 2]
41
- with pytest.raises(ValueError):
42
- personalized_pagerank_by_attribute(g, "score")
43
-
44
-
45
- def test_personalized_pagerank_by_attribute_additional_args_directed():
46
- # create an asymmetric directed graph to test whether additional_propagation_args is respected
47
- g = ig.Graph(directed=True)
48
- g.add_vertices(3)
49
- g.add_edges([(0, 1), (1, 2)])
50
- g.vs["score"] = [1, 0, 2]
51
- # Run with directed=False, which should treat the graph as undirected
52
- df_directed = personalized_pagerank_by_attribute(
53
- g, "score", additional_propagation_args={"directed": True}
3
+ import pandas as pd
4
+ import igraph as ig
5
+ from napistu.network.net_propagation import (
6
+ net_propagate_attributes,
7
+ uniform_null,
8
+ parametric_null,
9
+ node_permutation_null,
10
+ edge_permutation_null,
11
+ NULL_GENERATORS,
12
+ network_propagation_with_null,
13
+ )
14
+ from napistu.network.constants import (
15
+ NAPISTU_GRAPH_VERTICES,
16
+ NULL_STRATEGIES,
17
+ )
18
+
19
+
20
+ def test_network_propagation_with_null():
21
+ """Test the main orchestrator function with different null strategies."""
22
+ # Create test graph
23
+ graph = ig.Graph(5)
24
+ graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["A", "B", "C", "D", "E"]
25
+ graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0, 1.5] # Non-negative, not all zero
26
+ graph.add_edges([(0, 1), (1, 2), (2, 3), (3, 4)])
27
+
28
+ attributes = ["attr1"]
29
+
30
+ # Test 1: Uniform null (should return ratios)
31
+ result_uniform = network_propagation_with_null(
32
+ graph, attributes, null_strategy=NULL_STRATEGIES.UNIFORM
54
33
  )
55
- df_undirected = personalized_pagerank_by_attribute(
56
- g, "score", additional_propagation_args={"directed": False}
34
+
35
+ # Check structure
36
+ assert isinstance(result_uniform, pd.DataFrame)
37
+ assert result_uniform.shape == (5, 1)
38
+ assert list(result_uniform.columns) == attributes
39
+ assert list(result_uniform.index) == ["A", "B", "C", "D", "E"]
40
+
41
+ # Should be ratios (can be > 1)
42
+ assert (result_uniform.values > 0).all(), "Ratios should be positive"
43
+ # Some ratios should be > 1 since observed scores concentrate on fewer nodes
44
+ assert (result_uniform.values > 1).any(), "Some ratios should be > 1"
45
+
46
+ # Test 2: Node permutation null (should return quantiles)
47
+ result_permutation = network_propagation_with_null(
48
+ graph,
49
+ attributes,
50
+ null_strategy=NULL_STRATEGIES.NODE_PERMUTATION,
51
+ n_samples=10, # Small for testing
57
52
  )
58
- # The results should differ for directed vs undirected
59
- assert not np.allclose(
60
- df_directed["pagerank_by_attribute"], df_undirected["pagerank_by_attribute"]
53
+
54
+ # Check structure
55
+ assert isinstance(result_permutation, pd.DataFrame)
56
+ assert result_permutation.shape == (5, 1)
57
+ assert list(result_permutation.columns) == attributes
58
+
59
+ # Should be quantiles (0 to 1)
60
+ assert (result_permutation.values >= 0).all(), "Quantiles should be >= 0"
61
+ assert (result_permutation.values <= 1).all(), "Quantiles should be <= 1"
62
+
63
+ # Test 3: Edge permutation null
64
+ result_edge = network_propagation_with_null(
65
+ graph,
66
+ attributes,
67
+ null_strategy=NULL_STRATEGIES.EDGE_PERMUTATION,
68
+ n_samples=5,
69
+ burn_in_ratio=2, # Small for testing
70
+ sampling_ratio=0.2,
71
+ )
72
+
73
+ # Check structure
74
+ assert isinstance(result_edge, pd.DataFrame)
75
+ assert result_edge.shape == (5, 1)
76
+ assert (result_edge.values >= 0).all()
77
+ assert (result_edge.values <= 1).all()
78
+
79
+ # Test 4: Gaussian null
80
+ result_parametric = network_propagation_with_null(
81
+ graph, attributes, null_strategy=NULL_STRATEGIES.PARAMETRIC, n_samples=8
82
+ )
83
+
84
+ # Check structure
85
+ assert isinstance(result_parametric, pd.DataFrame)
86
+ assert result_parametric.shape == (5, 1)
87
+ assert (result_parametric.values >= 0).all()
88
+ assert (result_parametric.values <= 1).all()
89
+
90
+ # Test 5: Custom propagation parameters
91
+ result_custom = network_propagation_with_null(
92
+ graph,
93
+ attributes,
94
+ null_strategy=NULL_STRATEGIES.UNIFORM,
95
+ additional_propagation_args={"damping": 0.7},
61
96
  )
62
- # Uniform should also be affected
97
+
98
+ # Should be different from default
63
99
  assert not np.allclose(
64
- df_directed["pagerank_uniform"], df_undirected["pagerank_uniform"]
100
+ result_uniform.values, result_custom.values
101
+ ), "Different propagation parameters should give different results"
102
+
103
+ # Test 6: Custom null parameters (mask)
104
+ mask_array = np.array([True, False, True, False, True])
105
+ result_masked = network_propagation_with_null(
106
+ graph,
107
+ attributes,
108
+ null_strategy=NULL_STRATEGIES.NODE_PERMUTATION,
109
+ n_samples=5,
110
+ mask=mask_array,
65
111
  )
66
112
 
113
+ # Should work without error
114
+ assert isinstance(result_masked, pd.DataFrame)
115
+ assert result_masked.shape == (5, 1)
67
116
 
68
- def test_personalized_pagerank_by_attribute_additional_args_invalid():
69
- g = ig.Graph.Full(3)
70
- g.vs["score"] = [1, 0, 2]
71
- # Passing an invalid argument should raise ValueError
72
- with pytest.raises(ValueError):
73
- personalized_pagerank_by_attribute(
74
- g, "score", additional_propagation_args={"not_a_real_arg": 123}
117
+ # Test 7: Error handling - invalid null strategy
118
+ with pytest.raises(ValueError, match="Unknown null strategy"):
119
+ network_propagation_with_null(
120
+ graph, attributes, null_strategy="invalid_strategy"
75
121
  )
76
122
 
77
123
 
78
- def test_personalized_pagerank_by_attribute_all_missing():
79
- g = ig.Graph.Full(3)
80
- # No 'score' attribute at all
81
- with pytest.raises(ValueError, match="missing for all vertices"):
82
- personalized_pagerank_by_attribute(g, "score")
124
+ def test_net_propagate_attributes():
125
+ """Test net_propagate_attributes with multiple attributes and various scenarios."""
126
+ # Create test graph with edges for realistic propagation
127
+ graph = ig.Graph(4)
128
+ graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["node1", "node2", "node3", "node4"]
129
+ graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0] # Non-negative, not all zero
130
+ graph.vs["attr2"] = [0.5, 1.5, 0.0, 1.0] # Non-negative, not all zero
131
+ graph.add_edges([(0, 1), (1, 2), (2, 3), (0, 3)]) # Create connected graph
132
+
133
+ # Test 1: Basic functionality with two attributes
134
+ result = net_propagate_attributes(graph, ["attr1", "attr2"])
135
+
136
+ # Check structure
137
+ assert isinstance(result, pd.DataFrame)
138
+ assert result.shape == (4, 2)
139
+ assert list(result.index) == ["node1", "node2", "node3", "node4"]
140
+ assert list(result.columns) == ["attr1", "attr2"]
141
+
142
+ # Check that values are valid probabilities (PPR returns probabilities)
143
+ assert np.all(result.values >= 0)
144
+ assert np.all(result.values <= 1)
145
+ # Each column should sum to approximately 1 (PPR property)
146
+ assert np.allclose(result.sum(axis=0), [1.0, 1.0], atol=1e-10)
147
+
148
+ # Test 2: Single attribute
149
+ result_single = net_propagate_attributes(graph, ["attr1"])
150
+ assert result_single.shape == (4, 1)
151
+ assert list(result_single.columns) == ["attr1"]
152
+
153
+ # Test 3: Graph without names (should use indices)
154
+ graph_no_names = ig.Graph(3)
155
+ graph_no_names.vs["attr1"] = [1.0, 2.0, 1.0]
156
+ graph_no_names.add_edges([(0, 1), (1, 2)])
157
+
158
+ result_no_names = net_propagate_attributes(graph_no_names, ["attr1"])
159
+ assert list(result_no_names.index) == [0, 1, 2] # Should use integer indices
83
160
 
161
+ # Test 4: Invalid propagation method
162
+ with pytest.raises(ValueError, match="Invalid propagation method"):
163
+ net_propagate_attributes(graph, ["attr1"], propagation_method="invalid_method")
84
164
 
85
- def test_personalized_pagerank_by_attribute_all_zero():
86
- g = ig.Graph.Full(3)
87
- g.vs["score"] = [0, 0, 0]
165
+ # Test 5: Additional arguments (test damping parameter)
166
+ result_default = net_propagate_attributes(graph, ["attr1"])
167
+ result_damped = net_propagate_attributes(
168
+ graph, ["attr1"], additional_propagation_args={"damping": 0.5} # Lower damping
169
+ )
170
+
171
+ # Results should be different with different damping
172
+ assert not np.allclose(result_default.values, result_damped.values)
173
+
174
+ # Test 6: Invalid attribute (should be caught by internal validation)
175
+ graph.vs["bad_attr"] = [-1.0, 1.0, 2.0, 0.0] # Has negative values
176
+ with pytest.raises(ValueError, match="contains negative values"):
177
+ net_propagate_attributes(graph, ["bad_attr"])
178
+
179
+ # Test 7: Zero attribute (should be caught by internal validation)
180
+ graph.vs["zero_attr"] = [0.0, 0.0, 0.0, 0.0]
88
181
  with pytest.raises(ValueError, match="zero for all vertices"):
89
- personalized_pagerank_by_attribute(g, "score")
182
+ net_propagate_attributes(graph, ["zero_attr"])
183
+
184
+
185
+ def test_all_null_generators_structure():
186
+ """Test all null generators with default options and validate output structure."""
187
+ # Create test graph with edges for realistic propagation
188
+ graph = ig.Graph(5)
189
+ graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["A", "B", "C", "D", "E"]
190
+ graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0, 1.5] # Non-negative, not all zero
191
+ graph.vs["attr2"] = [0.5, 1.0, 0.0, 2.0, 0.0] # Non-negative, not all zero
192
+ graph.add_edges([(0, 1), (1, 2), (2, 3), (3, 4)])
193
+
194
+ attributes = ["attr1", "attr2"]
195
+ n_samples = 3 # Small for testing
196
+
197
+ for generator_name, generator_func in NULL_GENERATORS.items():
198
+ print(f"Testing {generator_name}")
199
+
200
+ if generator_name == NULL_STRATEGIES.UNIFORM:
201
+ # Uniform null doesn't take n_samples
202
+ result = generator_func(graph, attributes)
203
+ expected_rows = 5 # One row per node
204
+ elif generator_name == NULL_STRATEGIES.EDGE_PERMUTATION:
205
+ # Edge permutation has different parameters
206
+ result = generator_func(graph, attributes, n_samples=n_samples)
207
+ expected_rows = n_samples * 5 # n_samples rows per node
208
+ else:
209
+ # Gaussian and node_permutation
210
+ result = generator_func(graph, attributes, n_samples=n_samples)
211
+ expected_rows = n_samples * 5 # n_samples rows per node
212
+
213
+ # Validate structure
214
+ assert isinstance(
215
+ result, pd.DataFrame
216
+ ), f"{generator_name} should return DataFrame"
217
+ assert result.shape == (
218
+ expected_rows,
219
+ 2,
220
+ ), f"{generator_name} wrong shape: {result.shape}"
221
+ assert list(result.columns) == attributes, f"{generator_name} wrong columns"
222
+
223
+ # Validate index structure
224
+ if generator_name == NULL_STRATEGIES.UNIFORM:
225
+ assert list(result.index) == [
226
+ "A",
227
+ "B",
228
+ "C",
229
+ "D",
230
+ "E",
231
+ ], f"{generator_name} wrong index"
232
+ else:
233
+ expected_index = ["A", "B", "C", "D", "E"] * n_samples
234
+ assert (
235
+ list(result.index) == expected_index
236
+ ), f"{generator_name} wrong repeated index"
237
+
238
+ # Validate values are numeric and finite (propagated outputs should be valid probabilities)
239
+ assert result.isna().sum().sum() == 0, f"{generator_name} contains NaN values"
240
+ assert np.isfinite(
241
+ result.values
242
+ ).all(), f"{generator_name} contains infinite values"
243
+ assert (result.values >= 0).all(), f"{generator_name} contains negative values"
244
+ assert (
245
+ result.values <= 1
246
+ ).all(), f"{generator_name} should contain probabilities <= 1"
247
+
248
+ # Each sample should sum to approximately 1 (PPR property)
249
+ if generator_name == NULL_STRATEGIES.UNIFORM:
250
+ assert np.allclose(
251
+ result.sum(axis=0), [1.0, 1.0], atol=1e-10
252
+ ), f"{generator_name} doesn't sum to 1"
253
+ else:
254
+ # For multiple samples, each individual sample should sum to 1
255
+ for i in range(n_samples):
256
+ start_idx = i * 5
257
+ end_idx = (i + 1) * 5
258
+ sample_data = result.iloc[start_idx:end_idx]
259
+ assert np.allclose(
260
+ sample_data.sum(axis=0), [1.0, 1.0], atol=1e-10
261
+ ), f"{generator_name} sample {i} doesn't sum to 1"
262
+
263
+
264
+ def test_mask_application():
265
+ """Test that masks are correctly applied across all null generators."""
266
+ # Create test graph
267
+ graph = ig.Graph(6)
268
+ graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["A", "B", "C", "D", "E", "F"]
269
+ graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0, 1.5, 0.0] # Nonzero at indices 0, 2, 4
270
+ graph.vs["attr2"] = [0.0, 1.0, 0.0, 2.0, 0.0, 1.0] # Nonzero at indices 1, 3, 5
271
+ graph.add_edges([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)])
272
+
273
+ attributes = ["attr1", "attr2"]
274
+
275
+ # Test mask that includes nodes with nonzero values for both attributes
276
+ # Use nodes 0, 1, 2, 3 which covers nonzero values for both attributes
277
+ mask_array = np.array([True, True, True, True, False, False]) # Nodes 0, 1, 2, 3
278
+
279
+ for generator_name, generator_func in NULL_GENERATORS.items():
280
+ print(f"Testing mask application for {generator_name}")
281
+
282
+ if generator_name == NULL_STRATEGIES.UNIFORM:
283
+ result = generator_func(graph, attributes, mask=mask_array)
284
+
285
+ # For uniform null with mask, verify structure is correct
286
+ assert result.shape == (6, 2), f"{generator_name} wrong shape with mask"
287
+ # After propagation, all nodes will have some value due to network effect
288
+ assert (
289
+ result.values > 0
290
+ ).all(), "All nodes should have positive values after propagation"
291
+
292
+ elif generator_name == NULL_STRATEGIES.EDGE_PERMUTATION:
293
+ # Edge permutation ignores mask, just test it doesn't crash
294
+ result = generator_func(graph, attributes, n_samples=2)
295
+ assert result.shape[0] == 12 # 2 samples * 6 nodes
296
+
297
+ else:
298
+ # Gaussian and node_permutation with mask
299
+ result = generator_func(graph, attributes, mask=mask_array, n_samples=2)
300
+
301
+ # Check that structure is maintained
302
+ assert result.shape == (12, 2) # 2 samples * 6 nodes
303
+
304
+
305
+ def test_edge_cases_and_errors():
306
+ """Test edge cases and error conditions for null generators."""
307
+ # Create minimal test graph
308
+ graph = ig.Graph(3)
309
+ graph.vs["attr1"] = [1.0, 2.0, 0.0]
310
+ graph.vs["bad_attr"] = [0.0, 0.0, 0.0] # All zeros
311
+ graph.add_edges([(0, 1), (1, 2)])
312
+
313
+ # Test 1: All zero attribute should raise error for all generators
314
+ with pytest.raises(ValueError):
315
+ uniform_null(graph, ["bad_attr"])
316
+
317
+ with pytest.raises(ValueError):
318
+ parametric_null(graph, ["bad_attr"])
319
+
320
+ with pytest.raises(ValueError):
321
+ node_permutation_null(graph, ["bad_attr"])
322
+
323
+ with pytest.raises(ValueError):
324
+ edge_permutation_null(graph, ["bad_attr"])
325
+
326
+ # Test 2: Empty mask should raise error
327
+ empty_mask = np.array([False, False, False])
328
+ with pytest.raises(ValueError, match="No nodes in mask"):
329
+ uniform_null(graph, ["attr1"], mask=empty_mask)
330
+
331
+ # Test 3: Single node mask (edge case)
332
+ single_mask = np.array([True, False, False])
333
+ result = uniform_null(graph, ["attr1"], mask=single_mask)
334
+ assert result.shape == (3, 1) # Should work
335
+
336
+ # Test 4: Replace parameter in node permutation
337
+ result_no_replace = node_permutation_null(
338
+ graph, ["attr1"], replace=False, n_samples=2
339
+ )
340
+ result_replace = node_permutation_null(graph, ["attr1"], replace=True, n_samples=2)
341
+
342
+ # Both should have same structure
343
+ assert result_no_replace.shape == result_replace.shape
344
+
345
+
346
+ def test_propagation_method_parameters():
347
+ """Test that propagation method and additional arguments are properly passed through."""
348
+ # Create test graph
349
+ graph = ig.Graph(4)
350
+ graph.vs["attr1"] = [1.0, 2.0, 0.0, 1.5]
351
+ graph.add_edges([(0, 1), (1, 2), (2, 3)])
352
+
353
+ # Test different damping parameters produce different results
354
+ result_default = uniform_null(graph, ["attr1"])
355
+ result_damped = uniform_null(
356
+ graph, ["attr1"], additional_propagation_args={"damping": 0.5}
357
+ )
358
+
359
+ # Results should be different with different damping
360
+ assert not np.allclose(
361
+ result_default.values, result_damped.values
362
+ ), "Different damping should produce different results"
363
+
364
+ # Test that all generators accept method parameters
365
+ for generator_name, generator_func in NULL_GENERATORS.items():
366
+ if generator_name == NULL_STRATEGIES.UNIFORM:
367
+ result = generator_func(
368
+ graph, ["attr1"], additional_propagation_args={"damping": 0.8}
369
+ )
370
+ else:
371
+ result = generator_func(
372
+ graph,
373
+ ["attr1"],
374
+ additional_propagation_args={"damping": 0.8},
375
+ n_samples=2,
376
+ )
377
+
378
+ # Should produce valid results
379
+ assert isinstance(result, pd.DataFrame)
380
+ assert not result.empty
@@ -276,3 +276,33 @@ def test_precomputed_distances_serialization():
276
276
  # Clean up the temporary file
277
277
  if os.path.exists(temp_path):
278
278
  os.remove(temp_path)
279
+
280
+
281
+ def test_filter_precomputed_distances_top_n_subset():
282
+ # Use a small top_n for a quick test
283
+ top_n = 5
284
+ filtered = precompute.filter_precomputed_distances_top_n(
285
+ precomputed_distances, top_n=top_n
286
+ )
287
+ # Check that the filtered DataFrame is a subset of the original
288
+ merged = filtered.merge(
289
+ precomputed_distances,
290
+ on=[
291
+ precompute.NAPISTU_EDGELIST.SC_ID_ORIGIN,
292
+ precompute.NAPISTU_EDGELIST.SC_ID_DEST,
293
+ ],
294
+ how="left",
295
+ indicator=True,
296
+ )
297
+ assert (
298
+ merged["_merge"] == "both"
299
+ ).all(), "Filtered rows must be present in the original DataFrame"
300
+ # Check that columns are preserved
301
+ assert set(
302
+ [
303
+ precompute.NAPISTU_EDGELIST.SC_ID_ORIGIN,
304
+ precompute.NAPISTU_EDGELIST.SC_ID_DEST,
305
+ ]
306
+ ).issubset(filtered.columns)
307
+ # Optionally, check that the number of rows is less than or equal to the input
308
+ assert filtered.shape[0] <= precomputed_distances.shape[0]
@@ -334,3 +334,16 @@ def test_infer_entity_type_errors():
334
334
  ) # Two primary keys
335
335
  with pytest.raises(ValueError):
336
336
  sbml_dfs_utils.infer_entity_type(df)
337
+
338
+
339
+ def test_infer_entity_type_multindex_reactions():
340
+ # DataFrame with MultiIndex (r_id, foo), should infer as reactions
341
+ import pandas as pd
342
+ from napistu.constants import SBML_DFS
343
+
344
+ df = pd.DataFrame({"some_col": [1, 2]})
345
+ df.index = pd.MultiIndex.from_tuples(
346
+ [("rxn1", "a"), ("rxn2", "b")], names=[SBML_DFS.R_ID, "foo"]
347
+ )
348
+ result = sbml_dfs_utils.infer_entity_type(df)
349
+ assert result == SBML_DFS.REACTIONS
tests/test_source.py CHANGED
@@ -5,6 +5,8 @@ import os
5
5
  import pandas as pd
6
6
  from napistu import indices
7
7
  from napistu import source
8
+ from napistu.network import ng_utils
9
+ from napistu.constants import SBML_DFS
8
10
 
9
11
  test_path = os.path.abspath(os.path.join(__file__, os.pardir))
10
12
  test_data = os.path.join(test_path, "test_data")
@@ -58,10 +60,40 @@ def test_source_w_pwindex():
58
60
  assert source_obj.source.shape == (2, 8)
59
61
 
60
62
 
61
- ################################################
62
- # __main__
63
- ################################################
63
+ def test_get_minimal_source_edges(sbml_dfs_metabolism):
64
+ vertices = sbml_dfs_metabolism.reactions.reset_index().rename(
65
+ columns={SBML_DFS.R_ID: "node"}
66
+ )
67
+
68
+ minimal_source_edges = ng_utils.get_minimal_sources_edges(
69
+ vertices, sbml_dfs_metabolism
70
+ )
71
+ # print(minimal_source_edges.shape)
72
+ assert minimal_source_edges.shape == (87, 3)
73
+
74
+
75
+ def test_source_set_coverage(sbml_dfs_metabolism):
76
+
77
+ source_df = source.unnest_sources(sbml_dfs_metabolism.reactions)
78
+
79
+ # print(source_df.shape)
80
+ assert source_df.shape == (111, 7)
81
+
82
+ set_coverage = source.source_set_coverage(source_df)
83
+ # print(set_coverage.shape)
84
+ assert set_coverage.shape == (87, 6)
85
+
86
+
87
+ def test_source_set_coverage_enrichment(sbml_dfs_metabolism):
88
+
89
+ source_total_counts = source.get_source_total_counts(
90
+ sbml_dfs_metabolism, "reactions"
91
+ )
92
+
93
+ source_df = source.unnest_sources(sbml_dfs_metabolism.reactions).head(40)
94
+
95
+ set_coverage = source.source_set_coverage(
96
+ source_df, source_total_counts=source_total_counts, sbml_dfs=sbml_dfs_metabolism
97
+ )
64
98
 
65
- if __name__ == "__main__":
66
- test_source()
67
- test_source_w_pwindex()
99
+ assert set_coverage.shape == (30, 6)
@@ -0,0 +1,62 @@
1
+ import numpy as np
2
+ from scipy.stats import fisher_exact
3
+
4
+ from napistu.statistics import hypothesis_testing
5
+
6
+
7
+ def test_fisher_exact_vectorized_basic_and_vectorized():
8
+
9
+ # Classic Fisher's test example: [[1, 9], [11, 3]]
10
+ # a=1, b=9, c=11, d=3
11
+ odds, p = hypothesis_testing.fisher_exact_vectorized([1], [9], [11], [3])
12
+ # Odds ratio: (1*3)/(9*11) = 3/99 = 0.0303...
13
+ assert np.allclose(odds, [3 / 99])
14
+ assert p.shape == (1,)
15
+ assert (p >= 0).all() and (p <= 1).all()
16
+
17
+ # Vectorized: two tables
18
+ odds, p = hypothesis_testing.fisher_exact_vectorized(
19
+ [1, 2], [9, 8], [11, 10], [3, 4]
20
+ )
21
+ assert odds.shape == (2,)
22
+ assert p.shape == (2,)
23
+ # Check that odds ratios are correct
24
+ expected_odds = np.array([(1 * 3) / (9 * 11), (2 * 4) / (8 * 10)])
25
+ assert np.allclose(odds, expected_odds)
26
+ # P-values should be between 0 and 1
27
+ assert (p >= 0).all() and (p <= 1).all()
28
+
29
+
30
+ def test_fisher_exact_vectorized_vs_scipy():
31
+
32
+ # Define several 2x2 tables
33
+ tables = [
34
+ ([1], [9], [11], [3]),
35
+ ([5], [2], [8], [7]),
36
+ ([10], [10], [10], [10]),
37
+ ([0], [5], [5], [10]),
38
+ ([3], [7], [2], [8]),
39
+ ]
40
+ for a, b, c, d in tables:
41
+ odds_vec, p_vec = hypothesis_testing.fisher_exact_vectorized(a, b, c, d)
42
+ # Build the table for scipy
43
+ table = np.array([[a[0], b[0]], [c[0], d[0]]])
44
+ odds_scipy, p_scipy = fisher_exact(table, alternative="greater")
45
+ # Odds ratios should be nearly identical
46
+ assert np.allclose(odds_vec, [odds_scipy], rtol=1e-6, atol=1e-8)
47
+ # P-values should be close (normal approx vs exact)
48
+ assert np.allclose(
49
+ p_vec, [p_scipy], rtol=0.15, atol=1e-3
50
+ ) # allow some tolerance
51
+
52
+ # Also test vectorized input
53
+ a = [1, 5, 10, 0, 3]
54
+ b = [9, 2, 10, 5, 7]
55
+ c = [11, 8, 10, 5, 2]
56
+ d = [3, 7, 10, 10, 8]
57
+ odds_vec, p_vec = hypothesis_testing.fisher_exact_vectorized(a, b, c, d)
58
+ for i in range(len(a)):
59
+ table = np.array([[a[i], b[i]], [c[i], d[i]]])
60
+ odds_scipy, p_scipy = fisher_exact(table, alternative="greater")
61
+ assert np.allclose(odds_vec[i], odds_scipy, rtol=1e-6, atol=1e-8)
62
+ assert np.allclose(p_vec[i], p_scipy, rtol=0.15, atol=1e-3)