napistu 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,89 +1,380 @@
1
1
  import pytest
2
- import igraph as ig
3
2
  import numpy as np
4
- from napistu.network.net_propagation import personalized_pagerank_by_attribute
5
-
6
-
7
- def test_personalized_pagerank_by_attribute_basic():
8
- g = ig.Graph.Full(3)
9
- g.vs["name"] = ["A", "B", "C"]
10
- g.vs["score"] = [1, 0, 2]
11
- df = personalized_pagerank_by_attribute(g, "score")
12
- assert set(df.columns) == {
13
- "name",
14
- "pagerank_by_attribute",
15
- "pagerank_uniform",
16
- "score",
17
- }
18
- assert np.isclose(df["score"].sum(), 3)
19
- assert np.isclose(df["pagerank_by_attribute"].sum(), 1)
20
- assert np.isclose(df["pagerank_uniform"].sum(), 1)
21
- # Uniform should only include A and C
22
- assert df.loc[df["name"] == "B", "pagerank_uniform"].values[0] > 0
23
-
24
-
25
- def test_personalized_pagerank_by_attribute_no_uniform():
26
- g = ig.Graph.Full(3)
27
- g.vs["score"] = [1, 0, 2]
28
- df = personalized_pagerank_by_attribute(g, "score", calculate_uniform_dist=False)
29
- assert "pagerank_uniform" not in df.columns
30
- assert np.isclose(df["pagerank_by_attribute"].sum(), 1)
31
-
32
-
33
- def test_personalized_pagerank_by_attribute_missing_and_negative():
34
- g = ig.Graph.Full(3)
35
- g.vs["score"] = [1, None, 2]
36
- # None should be treated as 0
37
- df = personalized_pagerank_by_attribute(g, "score")
38
- assert np.isclose(df["score"].sum(), 3)
39
- # Negative values should raise
40
- g.vs["score"] = [1, -1, 2]
41
- with pytest.raises(ValueError):
42
- personalized_pagerank_by_attribute(g, "score")
43
-
44
-
45
- def test_personalized_pagerank_by_attribute_additional_args_directed():
46
- # create an asymmetric directed graph to test whether additional_propagation_args is respected
47
- g = ig.Graph(directed=True)
48
- g.add_vertices(3)
49
- g.add_edges([(0, 1), (1, 2)])
50
- g.vs["score"] = [1, 0, 2]
51
- # Run with directed=False, which should treat the graph as undirected
52
- df_directed = personalized_pagerank_by_attribute(
53
- g, "score", additional_propagation_args={"directed": True}
3
+ import pandas as pd
4
+ import igraph as ig
5
+ from napistu.network.net_propagation import (
6
+ net_propagate_attributes,
7
+ uniform_null,
8
+ parametric_null,
9
+ node_permutation_null,
10
+ edge_permutation_null,
11
+ NULL_GENERATORS,
12
+ network_propagation_with_null,
13
+ )
14
+ from napistu.network.constants import (
15
+ NAPISTU_GRAPH_VERTICES,
16
+ NULL_STRATEGIES,
17
+ )
18
+
19
+
20
+ def test_network_propagation_with_null():
21
+ """Test the main orchestrator function with different null strategies."""
22
+ # Create test graph
23
+ graph = ig.Graph(5)
24
+ graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["A", "B", "C", "D", "E"]
25
+ graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0, 1.5] # Non-negative, not all zero
26
+ graph.add_edges([(0, 1), (1, 2), (2, 3), (3, 4)])
27
+
28
+ attributes = ["attr1"]
29
+
30
+ # Test 1: Uniform null (should return ratios)
31
+ result_uniform = network_propagation_with_null(
32
+ graph, attributes, null_strategy=NULL_STRATEGIES.UNIFORM
54
33
  )
55
- df_undirected = personalized_pagerank_by_attribute(
56
- g, "score", additional_propagation_args={"directed": False}
34
+
35
+ # Check structure
36
+ assert isinstance(result_uniform, pd.DataFrame)
37
+ assert result_uniform.shape == (5, 1)
38
+ assert list(result_uniform.columns) == attributes
39
+ assert list(result_uniform.index) == ["A", "B", "C", "D", "E"]
40
+
41
+ # Should be ratios (can be > 1)
42
+ assert (result_uniform.values > 0).all(), "Ratios should be positive"
43
+ # Some ratios should be > 1 since observed scores concentrate on fewer nodes
44
+ assert (result_uniform.values > 1).any(), "Some ratios should be > 1"
45
+
46
+ # Test 2: Node permutation null (should return quantiles)
47
+ result_permutation = network_propagation_with_null(
48
+ graph,
49
+ attributes,
50
+ null_strategy=NULL_STRATEGIES.NODE_PERMUTATION,
51
+ n_samples=10, # Small for testing
57
52
  )
58
- # The results should differ for directed vs undirected
59
- assert not np.allclose(
60
- df_directed["pagerank_by_attribute"], df_undirected["pagerank_by_attribute"]
53
+
54
+ # Check structure
55
+ assert isinstance(result_permutation, pd.DataFrame)
56
+ assert result_permutation.shape == (5, 1)
57
+ assert list(result_permutation.columns) == attributes
58
+
59
+ # Should be quantiles (0 to 1)
60
+ assert (result_permutation.values >= 0).all(), "Quantiles should be >= 0"
61
+ assert (result_permutation.values <= 1).all(), "Quantiles should be <= 1"
62
+
63
+ # Test 3: Edge permutation null
64
+ result_edge = network_propagation_with_null(
65
+ graph,
66
+ attributes,
67
+ null_strategy=NULL_STRATEGIES.EDGE_PERMUTATION,
68
+ n_samples=5,
69
+ burn_in_ratio=2, # Small for testing
70
+ sampling_ratio=0.2,
71
+ )
72
+
73
+ # Check structure
74
+ assert isinstance(result_edge, pd.DataFrame)
75
+ assert result_edge.shape == (5, 1)
76
+ assert (result_edge.values >= 0).all()
77
+ assert (result_edge.values <= 1).all()
78
+
79
+ # Test 4: Gaussian null
80
+ result_parametric = network_propagation_with_null(
81
+ graph, attributes, null_strategy=NULL_STRATEGIES.PARAMETRIC, n_samples=8
82
+ )
83
+
84
+ # Check structure
85
+ assert isinstance(result_parametric, pd.DataFrame)
86
+ assert result_parametric.shape == (5, 1)
87
+ assert (result_parametric.values >= 0).all()
88
+ assert (result_parametric.values <= 1).all()
89
+
90
+ # Test 5: Custom propagation parameters
91
+ result_custom = network_propagation_with_null(
92
+ graph,
93
+ attributes,
94
+ null_strategy=NULL_STRATEGIES.UNIFORM,
95
+ additional_propagation_args={"damping": 0.7},
61
96
  )
62
- # Uniform should also be affected
97
+
98
+ # Should be different from default
63
99
  assert not np.allclose(
64
- df_directed["pagerank_uniform"], df_undirected["pagerank_uniform"]
100
+ result_uniform.values, result_custom.values
101
+ ), "Different propagation parameters should give different results"
102
+
103
+ # Test 6: Custom null parameters (mask)
104
+ mask_array = np.array([True, False, True, False, True])
105
+ result_masked = network_propagation_with_null(
106
+ graph,
107
+ attributes,
108
+ null_strategy=NULL_STRATEGIES.NODE_PERMUTATION,
109
+ n_samples=5,
110
+ mask=mask_array,
65
111
  )
66
112
 
113
+ # Should work without error
114
+ assert isinstance(result_masked, pd.DataFrame)
115
+ assert result_masked.shape == (5, 1)
67
116
 
68
- def test_personalized_pagerank_by_attribute_additional_args_invalid():
69
- g = ig.Graph.Full(3)
70
- g.vs["score"] = [1, 0, 2]
71
- # Passing an invalid argument should raise ValueError
72
- with pytest.raises(ValueError):
73
- personalized_pagerank_by_attribute(
74
- g, "score", additional_propagation_args={"not_a_real_arg": 123}
117
+ # Test 7: Error handling - invalid null strategy
118
+ with pytest.raises(ValueError, match="Unknown null strategy"):
119
+ network_propagation_with_null(
120
+ graph, attributes, null_strategy="invalid_strategy"
75
121
  )
76
122
 
77
123
 
78
- def test_personalized_pagerank_by_attribute_all_missing():
79
- g = ig.Graph.Full(3)
80
- # No 'score' attribute at all
81
- with pytest.raises(ValueError, match="missing for all vertices"):
82
- personalized_pagerank_by_attribute(g, "score")
124
+ def test_net_propagate_attributes():
125
+ """Test net_propagate_attributes with multiple attributes and various scenarios."""
126
+ # Create test graph with edges for realistic propagation
127
+ graph = ig.Graph(4)
128
+ graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["node1", "node2", "node3", "node4"]
129
+ graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0] # Non-negative, not all zero
130
+ graph.vs["attr2"] = [0.5, 1.5, 0.0, 1.0] # Non-negative, not all zero
131
+ graph.add_edges([(0, 1), (1, 2), (2, 3), (0, 3)]) # Create connected graph
132
+
133
+ # Test 1: Basic functionality with two attributes
134
+ result = net_propagate_attributes(graph, ["attr1", "attr2"])
135
+
136
+ # Check structure
137
+ assert isinstance(result, pd.DataFrame)
138
+ assert result.shape == (4, 2)
139
+ assert list(result.index) == ["node1", "node2", "node3", "node4"]
140
+ assert list(result.columns) == ["attr1", "attr2"]
141
+
142
+ # Check that values are valid probabilities (PPR returns probabilities)
143
+ assert np.all(result.values >= 0)
144
+ assert np.all(result.values <= 1)
145
+ # Each column should sum to approximately 1 (PPR property)
146
+ assert np.allclose(result.sum(axis=0), [1.0, 1.0], atol=1e-10)
147
+
148
+ # Test 2: Single attribute
149
+ result_single = net_propagate_attributes(graph, ["attr1"])
150
+ assert result_single.shape == (4, 1)
151
+ assert list(result_single.columns) == ["attr1"]
152
+
153
+ # Test 3: Graph without names (should use indices)
154
+ graph_no_names = ig.Graph(3)
155
+ graph_no_names.vs["attr1"] = [1.0, 2.0, 1.0]
156
+ graph_no_names.add_edges([(0, 1), (1, 2)])
157
+
158
+ result_no_names = net_propagate_attributes(graph_no_names, ["attr1"])
159
+ assert list(result_no_names.index) == [0, 1, 2] # Should use integer indices
83
160
 
161
+ # Test 4: Invalid propagation method
162
+ with pytest.raises(ValueError, match="Invalid propagation method"):
163
+ net_propagate_attributes(graph, ["attr1"], propagation_method="invalid_method")
84
164
 
85
- def test_personalized_pagerank_by_attribute_all_zero():
86
- g = ig.Graph.Full(3)
87
- g.vs["score"] = [0, 0, 0]
165
+ # Test 5: Additional arguments (test damping parameter)
166
+ result_default = net_propagate_attributes(graph, ["attr1"])
167
+ result_damped = net_propagate_attributes(
168
+ graph, ["attr1"], additional_propagation_args={"damping": 0.5} # Lower damping
169
+ )
170
+
171
+ # Results should be different with different damping
172
+ assert not np.allclose(result_default.values, result_damped.values)
173
+
174
+ # Test 6: Invalid attribute (should be caught by internal validation)
175
+ graph.vs["bad_attr"] = [-1.0, 1.0, 2.0, 0.0] # Has negative values
176
+ with pytest.raises(ValueError, match="contains negative values"):
177
+ net_propagate_attributes(graph, ["bad_attr"])
178
+
179
+ # Test 7: Zero attribute (should be caught by internal validation)
180
+ graph.vs["zero_attr"] = [0.0, 0.0, 0.0, 0.0]
88
181
  with pytest.raises(ValueError, match="zero for all vertices"):
89
- personalized_pagerank_by_attribute(g, "score")
182
+ net_propagate_attributes(graph, ["zero_attr"])
183
+
184
+
185
+ def test_all_null_generators_structure():
186
+ """Test all null generators with default options and validate output structure."""
187
+ # Create test graph with edges for realistic propagation
188
+ graph = ig.Graph(5)
189
+ graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["A", "B", "C", "D", "E"]
190
+ graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0, 1.5] # Non-negative, not all zero
191
+ graph.vs["attr2"] = [0.5, 1.0, 0.0, 2.0, 0.0] # Non-negative, not all zero
192
+ graph.add_edges([(0, 1), (1, 2), (2, 3), (3, 4)])
193
+
194
+ attributes = ["attr1", "attr2"]
195
+ n_samples = 3 # Small for testing
196
+
197
+ for generator_name, generator_func in NULL_GENERATORS.items():
198
+ print(f"Testing {generator_name}")
199
+
200
+ if generator_name == NULL_STRATEGIES.UNIFORM:
201
+ # Uniform null doesn't take n_samples
202
+ result = generator_func(graph, attributes)
203
+ expected_rows = 5 # One row per node
204
+ elif generator_name == NULL_STRATEGIES.EDGE_PERMUTATION:
205
+ # Edge permutation has different parameters
206
+ result = generator_func(graph, attributes, n_samples=n_samples)
207
+ expected_rows = n_samples * 5 # n_samples rows per node
208
+ else:
209
+ # Gaussian and node_permutation
210
+ result = generator_func(graph, attributes, n_samples=n_samples)
211
+ expected_rows = n_samples * 5 # n_samples rows per node
212
+
213
+ # Validate structure
214
+ assert isinstance(
215
+ result, pd.DataFrame
216
+ ), f"{generator_name} should return DataFrame"
217
+ assert result.shape == (
218
+ expected_rows,
219
+ 2,
220
+ ), f"{generator_name} wrong shape: {result.shape}"
221
+ assert list(result.columns) == attributes, f"{generator_name} wrong columns"
222
+
223
+ # Validate index structure
224
+ if generator_name == NULL_STRATEGIES.UNIFORM:
225
+ assert list(result.index) == [
226
+ "A",
227
+ "B",
228
+ "C",
229
+ "D",
230
+ "E",
231
+ ], f"{generator_name} wrong index"
232
+ else:
233
+ expected_index = ["A", "B", "C", "D", "E"] * n_samples
234
+ assert (
235
+ list(result.index) == expected_index
236
+ ), f"{generator_name} wrong repeated index"
237
+
238
+ # Validate values are numeric and finite (propagated outputs should be valid probabilities)
239
+ assert result.isna().sum().sum() == 0, f"{generator_name} contains NaN values"
240
+ assert np.isfinite(
241
+ result.values
242
+ ).all(), f"{generator_name} contains infinite values"
243
+ assert (result.values >= 0).all(), f"{generator_name} contains negative values"
244
+ assert (
245
+ result.values <= 1
246
+ ).all(), f"{generator_name} should contain probabilities <= 1"
247
+
248
+ # Each sample should sum to approximately 1 (PPR property)
249
+ if generator_name == NULL_STRATEGIES.UNIFORM:
250
+ assert np.allclose(
251
+ result.sum(axis=0), [1.0, 1.0], atol=1e-10
252
+ ), f"{generator_name} doesn't sum to 1"
253
+ else:
254
+ # For multiple samples, each individual sample should sum to 1
255
+ for i in range(n_samples):
256
+ start_idx = i * 5
257
+ end_idx = (i + 1) * 5
258
+ sample_data = result.iloc[start_idx:end_idx]
259
+ assert np.allclose(
260
+ sample_data.sum(axis=0), [1.0, 1.0], atol=1e-10
261
+ ), f"{generator_name} sample {i} doesn't sum to 1"
262
+
263
+
264
+ def test_mask_application():
265
+ """Test that masks are correctly applied across all null generators."""
266
+ # Create test graph
267
+ graph = ig.Graph(6)
268
+ graph.vs[NAPISTU_GRAPH_VERTICES.NAME] = ["A", "B", "C", "D", "E", "F"]
269
+ graph.vs["attr1"] = [1.0, 0.0, 2.0, 0.0, 1.5, 0.0] # Nonzero at indices 0, 2, 4
270
+ graph.vs["attr2"] = [0.0, 1.0, 0.0, 2.0, 0.0, 1.0] # Nonzero at indices 1, 3, 5
271
+ graph.add_edges([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)])
272
+
273
+ attributes = ["attr1", "attr2"]
274
+
275
+ # Test mask that includes nodes with nonzero values for both attributes
276
+ # Use nodes 0, 1, 2, 3 which covers nonzero values for both attributes
277
+ mask_array = np.array([True, True, True, True, False, False]) # Nodes 0, 1, 2, 3
278
+
279
+ for generator_name, generator_func in NULL_GENERATORS.items():
280
+ print(f"Testing mask application for {generator_name}")
281
+
282
+ if generator_name == NULL_STRATEGIES.UNIFORM:
283
+ result = generator_func(graph, attributes, mask=mask_array)
284
+
285
+ # For uniform null with mask, verify structure is correct
286
+ assert result.shape == (6, 2), f"{generator_name} wrong shape with mask"
287
+ # After propagation, all nodes will have some value due to network effect
288
+ assert (
289
+ result.values > 0
290
+ ).all(), "All nodes should have positive values after propagation"
291
+
292
+ elif generator_name == NULL_STRATEGIES.EDGE_PERMUTATION:
293
+ # Edge permutation ignores mask, just test it doesn't crash
294
+ result = generator_func(graph, attributes, n_samples=2)
295
+ assert result.shape[0] == 12 # 2 samples * 6 nodes
296
+
297
+ else:
298
+ # Gaussian and node_permutation with mask
299
+ result = generator_func(graph, attributes, mask=mask_array, n_samples=2)
300
+
301
+ # Check that structure is maintained
302
+ assert result.shape == (12, 2) # 2 samples * 6 nodes
303
+
304
+
305
+ def test_edge_cases_and_errors():
306
+ """Test edge cases and error conditions for null generators."""
307
+ # Create minimal test graph
308
+ graph = ig.Graph(3)
309
+ graph.vs["attr1"] = [1.0, 2.0, 0.0]
310
+ graph.vs["bad_attr"] = [0.0, 0.0, 0.0] # All zeros
311
+ graph.add_edges([(0, 1), (1, 2)])
312
+
313
+ # Test 1: All zero attribute should raise error for all generators
314
+ with pytest.raises(ValueError):
315
+ uniform_null(graph, ["bad_attr"])
316
+
317
+ with pytest.raises(ValueError):
318
+ parametric_null(graph, ["bad_attr"])
319
+
320
+ with pytest.raises(ValueError):
321
+ node_permutation_null(graph, ["bad_attr"])
322
+
323
+ with pytest.raises(ValueError):
324
+ edge_permutation_null(graph, ["bad_attr"])
325
+
326
+ # Test 2: Empty mask should raise error
327
+ empty_mask = np.array([False, False, False])
328
+ with pytest.raises(ValueError, match="No nodes in mask"):
329
+ uniform_null(graph, ["attr1"], mask=empty_mask)
330
+
331
+ # Test 3: Single node mask (edge case)
332
+ single_mask = np.array([True, False, False])
333
+ result = uniform_null(graph, ["attr1"], mask=single_mask)
334
+ assert result.shape == (3, 1) # Should work
335
+
336
+ # Test 4: Replace parameter in node permutation
337
+ result_no_replace = node_permutation_null(
338
+ graph, ["attr1"], replace=False, n_samples=2
339
+ )
340
+ result_replace = node_permutation_null(graph, ["attr1"], replace=True, n_samples=2)
341
+
342
+ # Both should have same structure
343
+ assert result_no_replace.shape == result_replace.shape
344
+
345
+
346
+ def test_propagation_method_parameters():
347
+ """Test that propagation method and additional arguments are properly passed through."""
348
+ # Create test graph
349
+ graph = ig.Graph(4)
350
+ graph.vs["attr1"] = [1.0, 2.0, 0.0, 1.5]
351
+ graph.add_edges([(0, 1), (1, 2), (2, 3)])
352
+
353
+ # Test different damping parameters produce different results
354
+ result_default = uniform_null(graph, ["attr1"])
355
+ result_damped = uniform_null(
356
+ graph, ["attr1"], additional_propagation_args={"damping": 0.5}
357
+ )
358
+
359
+ # Results should be different with different damping
360
+ assert not np.allclose(
361
+ result_default.values, result_damped.values
362
+ ), "Different damping should produce different results"
363
+
364
+ # Test that all generators accept method parameters
365
+ for generator_name, generator_func in NULL_GENERATORS.items():
366
+ if generator_name == NULL_STRATEGIES.UNIFORM:
367
+ result = generator_func(
368
+ graph, ["attr1"], additional_propagation_args={"damping": 0.8}
369
+ )
370
+ else:
371
+ result = generator_func(
372
+ graph,
373
+ ["attr1"],
374
+ additional_propagation_args={"damping": 0.8},
375
+ n_samples=2,
376
+ )
377
+
378
+ # Should produce valid results
379
+ assert isinstance(result, pd.DataFrame)
380
+ assert not result.empty
@@ -0,0 +1,133 @@
1
+ import pytest
2
+ import numpy as np
3
+ import pandas as pd
4
+ from napistu.statistics import quantiles
5
+
6
+
7
+ def test_calculate_quantiles_valid_inputs():
8
+ """Test calculate_quantiles with valid, well-formed inputs."""
9
+ # Create observed data: 4 features x 3 attributes
10
+ observed = pd.DataFrame(
11
+ [[0.8, 0.3, 0.9], [0.2, 0.7, 0.1], [0.5, 0.5, 0.5], [0.1, 0.9, 0.2]],
12
+ index=["gene1", "gene2", "gene3", "gene4"],
13
+ columns=["attr1", "attr2", "attr3"],
14
+ )
15
+
16
+ # Create null data: 2 samples per feature (8 rows total)
17
+ null_index = ["gene1", "gene2", "gene3", "gene4"] * 2
18
+ null_data = pd.DataFrame(
19
+ [
20
+ [0.1, 0.2, 0.3], # gene1 sample 1
21
+ [0.4, 0.5, 0.6], # gene2 sample 1
22
+ [0.7, 0.8, 0.9], # gene3 sample 1
23
+ [0.0, 0.1, 0.2], # gene4 sample 1
24
+ [0.2, 0.3, 0.4], # gene1 sample 2
25
+ [0.5, 0.6, 0.7], # gene2 sample 2
26
+ [0.8, 0.9, 1.0], # gene3 sample 2
27
+ [0.1, 0.2, 0.3], # gene4 sample 2
28
+ ],
29
+ index=null_index,
30
+ columns=["attr1", "attr2", "attr3"],
31
+ )
32
+
33
+ # Calculate quantiles
34
+ result = quantiles.calculate_quantiles(observed, null_data)
35
+
36
+ # Verify output structure
37
+ assert result.shape == observed.shape
38
+ assert list(result.index) == list(observed.index)
39
+ assert list(result.columns) == list(observed.columns)
40
+
41
+ # Check specific quantile calculations
42
+ # gene1, attr1: observed=0.8, nulls=[0.1, 0.2] -> quantile = 1.0 (100%)
43
+ assert result.loc["gene1", "attr1"] == 1.0
44
+
45
+ # gene2, attr2: observed=0.7, nulls=[0.5, 0.6] -> quantile = 1.0 (100%)
46
+ assert result.loc["gene2", "attr2"] == 1.0
47
+
48
+ # gene3, attr3: observed=0.5, nulls=[0.9, 1.0] -> quantile = 0.0 (0%)
49
+ assert result.loc["gene3", "attr3"] == 0.0
50
+
51
+ # gene4, attr1: observed=0.1, nulls=[0.0, 0.1]
52
+ # With ≤: 0.0 ≤ 0.1 (True), 0.1 ≤ 0.1 (True) → 2/2 = 1.0
53
+ assert result.loc["gene4", "attr1"] == 1.0
54
+
55
+
56
+ def test_calculate_quantiles_error_cases():
57
+ """Test calculate_quantiles with invalid inputs that should raise errors or warnings."""
58
+ # Base observed data
59
+ observed = pd.DataFrame(
60
+ [[0.8, 0.3], [0.2, 0.7]], index=["gene1", "gene2"], columns=["attr1", "attr2"]
61
+ )
62
+
63
+ # Test 1: Mismatched columns
64
+ null_wrong_cols = pd.DataFrame(
65
+ [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]],
66
+ index=["gene1", "gene2"],
67
+ columns=["attr1", "attr2", "attr3"], # Extra column
68
+ )
69
+
70
+ with pytest.raises((KeyError, ValueError)):
71
+ quantiles.calculate_quantiles(observed, null_wrong_cols)
72
+
73
+ # Test 2: Missing features in null data
74
+ null_missing_feature = pd.DataFrame(
75
+ [[0.1, 0.2]], index=["gene1"], columns=["attr1", "attr2"] # Missing gene2
76
+ )
77
+
78
+ # Current implementation doesn't validate - it will likely fail in groupby or indexing
79
+ # This test verifies current behavior (may change if validation added)
80
+ try:
81
+ result = quantiles.calculate_quantiles(observed, null_missing_feature)
82
+ # If it succeeds, gene2 quantiles will be invalid/error
83
+ assert True # Just check it doesn't crash for now
84
+ except (KeyError, ValueError, IndexError):
85
+ assert True # Expected behavior
86
+
87
+ # Test 3: Unequal null samples per feature
88
+ null_unequal_samples = pd.DataFrame(
89
+ [
90
+ [0.1, 0.2], # gene1 sample 1
91
+ [0.3, 0.4], # gene1 sample 2
92
+ [0.5, 0.6], # gene2 sample 1 (only 1 sample)
93
+ ],
94
+ index=["gene1", "gene1", "gene2"],
95
+ columns=["attr1", "attr2"],
96
+ )
97
+
98
+ # This should still work but may give different results
99
+ result = quantiles.calculate_quantiles(observed, null_unequal_samples)
100
+ assert result.shape == observed.shape
101
+
102
+ # Test 4: Empty null data
103
+ null_empty = pd.DataFrame(columns=["attr1", "attr2"])
104
+
105
+ with pytest.raises((ValueError, IndexError)):
106
+ quantiles.calculate_quantiles(observed, null_empty)
107
+
108
+ # Test 5: Single null sample (edge case)
109
+ null_single = pd.DataFrame(
110
+ [[0.1, 0.2], [0.5, 0.6]], index=["gene1", "gene2"], columns=["attr1", "attr2"]
111
+ )
112
+
113
+ result = quantiles.calculate_quantiles(observed, null_single)
114
+ assert result.shape == observed.shape
115
+ # With single sample, results should be binary (0 or 1)
116
+ assert all(val in [0.0, 1.0] for val in result.values.flatten())
117
+
118
+ # Test 6: NaN values in data
119
+ observed_with_nan = observed.copy()
120
+ observed_with_nan.loc["gene1", "attr1"] = np.nan
121
+
122
+ null_with_nan = pd.DataFrame(
123
+ [[np.nan, 0.2], [0.4, 0.5], [0.1, 0.3], [0.6, 0.7]],
124
+ index=["gene1", "gene2", "gene1", "gene2"],
125
+ columns=["attr1", "attr2"],
126
+ )
127
+
128
+ # Should raise ValueError for NaN values
129
+ with pytest.raises(ValueError, match="NaN values found in observed data"):
130
+ quantiles.calculate_quantiles(observed_with_nan, null_single)
131
+
132
+ with pytest.raises(ValueError, match="NaN values found in null data"):
133
+ quantiles.calculate_quantiles(observed, null_with_nan)