pdex 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,10 +5,41 @@ on: [push, pull_request]
5
5
  jobs:
6
6
  all_jobs:
7
7
  runs-on: ubuntu-latest
8
- needs: [formatting, type-checking, pytest]
8
+ needs: [formatting, type-checking, pytest, semver-check]
9
+ if: always()
9
10
  steps:
10
11
  - name: Complete
11
- run: echo "Complete"
12
+ run: |
13
+ if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
14
+ echo "One or more required jobs failed."
15
+ exit 1
16
+ fi
17
+ echo "Complete"
18
+
19
+ semver-check:
20
+ runs-on: ubuntu-latest
21
+ if: github.event_name == 'pull_request'
22
+
23
+ steps:
24
+ - uses: actions/checkout@v4
25
+ with:
26
+ fetch-depth: 0
27
+
28
+ - name: check version bump
29
+ run: |
30
+ BASE_VERSION=$(git show origin/${{ github.base_ref }}:pyproject.toml \
31
+ | python3 -c "import sys, tomllib; print(tomllib.load(sys.stdin.buffer)['project']['version'])")
32
+ PR_VERSION=$(python3 -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")
33
+ echo "Base version: $BASE_VERSION"
34
+ echo "PR version: $PR_VERSION"
35
+ if [ -z "$BASE_VERSION" ] || [ -z "$PR_VERSION" ]; then
36
+ echo "ERROR: failed to parse version from pyproject.toml"
37
+ exit 1
38
+ fi
39
+ if [ "$BASE_VERSION" = "$PR_VERSION" ]; then
40
+ echo "ERROR: version in pyproject.toml ($PR_VERSION) must be bumped before merging."
41
+ exit 1
42
+ fi
12
43
 
13
44
  install-job:
14
45
  runs-on: ubuntu-latest
@@ -36,7 +36,7 @@ uv run ty check
36
36
 
37
37
  ### Core Pipeline (`src/pdex/__init__.py`)
38
38
 
39
- The main entry point is `pdex(adata, groupby, mode, threads, is_log1p, geometric_mean, as_pandas, **kwargs)`, which:
39
+ The main entry point is `pdex(adata, groupby, mode, threads, is_log1p, geometric_mean, as_pandas, epsilon, **kwargs)`, which:
40
40
 
41
41
  1. Validates the `groupby` column in `adata.obs`
42
42
  2. Extracts unique groups (filters NaN and empty strings)
@@ -79,8 +79,9 @@ The returned Polars DataFrame (or pandas DataFrame when `as_pandas=True`) has co
79
79
  | `ref_mean` | float | Pseudobulk mean for the reference, always in natural (count) space |
80
80
  | `target_membership` | int | Number of cells in the target group |
81
81
  | `ref_membership` | int | Number of cells in the reference |
82
- | `fold_change` | float | log2(target_mean / ref_mean) computed from pseudobulk means |
83
- | `percent_change` | float | (target_mean - ref_mean) / ref_mean — computed from pseudobulk means |
82
+ | `fold_change` | float | **Deprecated** alias for `log2_fold_change` (identical values). Retained for one release; emits a `FutureWarning` on every `pdex(...)` call and will be removed in pdex 0.3.0. |
83
+ | `log2_fold_change` | float | log2((target_mean + epsilon) / (ref_mean + epsilon)) — computed from pseudobulk means |
84
+ | `percent_change` | float | (target_mean - ref_mean) / (ref_mean + epsilon) — computed from pseudobulk means |
84
85
  | `p_value` | float | Mann-Whitney U p-value (per-cell vectors) |
85
86
  | `statistic` | float | Mann-Whitney U statistic |
86
87
  | `fdr` | float | FDR-corrected p-value, applied per-group across genes. For `on_target` mode, applied across all groups. |
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pdex
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Parallel differential expression for single-cell perturbation sequencing
5
5
  Author-email: noam teyssier <noam.teyssier@arcinstitute.org>
6
6
  License-File: LICENSE
@@ -113,7 +113,8 @@ Returns a Polars DataFrame (or pandas if `as_pandas=True`) with one row per (gro
113
113
  | `ref_mean` | Pseudobulk mean for the reference (count space) |
114
114
  | `target_membership` | Number of cells in the target group |
115
115
  | `ref_membership` | Number of cells in the reference |
116
- | `fold_change` | log2(target_mean / ref_mean) |
116
+ | `fold_change` | **Deprecated alias** for `log2_fold_change` (identical values). Will be removed in pdex 0.3.0. |
117
+ | `log2_fold_change` | log2(target_mean / ref_mean) |
117
118
  | `percent_change` | (target_mean - ref_mean) / ref_mean |
118
119
  | `p_value` | Mann-Whitney U p-value |
119
120
  | `statistic` | Mann-Whitney U statistic |
@@ -95,7 +95,8 @@ Returns a Polars DataFrame (or pandas if `as_pandas=True`) with one row per (gro
95
95
  | `ref_mean` | Pseudobulk mean for the reference (count space) |
96
96
  | `target_membership` | Number of cells in the target group |
97
97
  | `ref_membership` | Number of cells in the reference |
98
- | `fold_change` | log2(target_mean / ref_mean) |
98
+ | `fold_change` | **Deprecated alias** for `log2_fold_change` (identical values). Will be removed in pdex 0.3.0. |
99
+ | `log2_fold_change` | log2(target_mean / ref_mean) |
99
100
  | `percent_change` | (target_mean - ref_mean) / ref_mean |
100
101
  | `p_value` | Mann-Whitney U p-value |
101
102
  | `statistic` | Mann-Whitney U statistic |
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pdex"
3
- version = "0.2.0"
3
+ version = "0.2.2"
4
4
  description = "Parallel differential expression for single-cell perturbation sequencing"
5
5
  readme = "README.md"
6
6
  authors = [{ name = "noam teyssier", email = "noam.teyssier@arcinstitute.org" }]
@@ -12,7 +12,7 @@ from scipy.sparse import csr_matrix, issparse
12
12
  from scipy.stats import false_discovery_control
13
13
  from tqdm import tqdm
14
14
 
15
- from pdex._math import fold_change, mwu, percent_change, pseudobulk
15
+ from pdex._math import log2_fold_change, mwu, percent_change, pseudobulk
16
16
 
17
17
  from ._utils import _detect_is_log1p, set_numba_threadpool
18
18
 
@@ -129,9 +129,9 @@ def _isolate_matrix(
129
129
  if adata.X is None:
130
130
  raise ValueError("AnnData object does not have a matrix.")
131
131
  if mask_y is None:
132
- result = adata.X[mask_x] # type: ignore[not-subscriptable]
132
+ result = adata.X[mask_x] # ty: ignore[not-subscriptable]
133
133
  else:
134
- result = adata.X[mask_x, mask_y] # type: ignore[not-subscriptable]
134
+ result = adata.X[mask_x, mask_y] # ty: ignore[not-subscriptable]
135
135
 
136
136
  # Fast path: already in-memory
137
137
  if isinstance(result, (np.ndarray, csr_matrix)):
@@ -151,6 +151,7 @@ def pdex(
151
151
  is_log1p: bool | None = None,
152
152
  geometric_mean: bool = True,
153
153
  as_pandas: bool = False,
154
+ epsilon: float = 0.0,
154
155
  **kwargs,
155
156
  ) -> pl.DataFrame | pd.DataFrame:
156
157
  """Run parallel differential expression analysis on single-cell data.
@@ -201,6 +202,22 @@ def pdex(
201
202
  as_pandas:
202
203
  If ``True``, return a :class:`pandas.DataFrame` instead of a
203
204
  :class:`polars.DataFrame`. Requires ``pyarrow``.
205
+ epsilon:
206
+ Pseudocount added to both ``target_mean`` and ``ref_mean`` before computing
207
+ ``fold_change`` and ``percent_change``. When ``epsilon > 0``, extreme
208
+ values from near-zero reference means (scRNA-seq sparsity artifact) are
209
+ dampened toward zero. Has no effect on the Mann-Whitney U p-value or FDR.
210
+ Default ``0.0`` preserves existing behaviour.
211
+
212
+ **Recommended usage:** For scRNA-seq CRISPRi/CRISPRa screens where many
213
+ genes are unexpressed in the reference group, start with ``epsilon=0.5``.
214
+ This provides modest dampening without substantially compressing fold changes
215
+ for well-expressed genes. For complete suppression of the sparsity artifact,
216
+ combine with a ``min_mean_expression`` pre-filter on the reference group —
217
+ ``epsilon`` alone cannot eliminate low p-values arising from per-cell
218
+ distributional shifts in near-zero genes.
219
+
220
+ Must be non-negative. Raises :class:`ValueError` if negative.
204
221
  **kwargs:
205
222
  Mode-specific keyword arguments:
206
223
 
@@ -216,14 +233,21 @@ def pdex(
216
233
  pl.DataFrame | pd.DataFrame
217
234
  One row per (group, feature) pair with columns: ``target``, ``feature``,
218
235
  ``target_mean``, ``ref_mean``, ``target_membership``, ``ref_membership``,
219
- ``fold_change``, ``percent_change``, ``p_value``, ``statistic``, ``fdr``.
236
+ ``fold_change``, ``log2_fold_change``, ``percent_change``, ``p_value``,
237
+ ``statistic``, ``fdr``.
220
238
 
221
239
  ``target_mean`` and ``ref_mean`` are always in **natural (count) space**.
222
240
 
223
- ``fold_change`` and ``percent_change`` are derived from the pseudobulk
224
- means (not from the per-cell MWU test inputs): ``fold_change`` is
225
- ``log2(target_mean / ref_mean)`` and ``percent_change`` is
226
- ``(target_mean - ref_mean) / ref_mean``. The MWU ``p_value`` and
241
+ ``log2_fold_change`` and ``percent_change`` are derived from the pseudobulk
242
+ means (not from the per-cell MWU test inputs): ``log2_fold_change`` is
243
+ ``log2((target_mean + epsilon) / (ref_mean + epsilon))`` and
244
+ ``percent_change`` is ``(target_mean - ref_mean) / (ref_mean + epsilon)``.
245
+
246
+ ``fold_change`` is a **deprecated** alias for ``log2_fold_change``
247
+ (identical values). It is retained for one release to ease migration
248
+ and will be removed in pdex 0.3.0. New code should read
249
+ ``log2_fold_change`` directly. A :class:`FutureWarning` is emitted
250
+ on every ``pdex(...)`` call. The MWU ``p_value`` and
227
251
  ``statistic`` are computed directly on the per-cell expression vectors.
228
252
 
229
253
  For ``mode="ref"``, the reference group itself is excluded from the output.
@@ -239,6 +263,17 @@ def pdex(
239
263
  adata.n_vars,
240
264
  )
241
265
 
266
+ if epsilon < 0:
267
+ raise ValueError(f"epsilon must be non-negative, got {epsilon}")
268
+
269
+ warnings.warn(
270
+ "The `fold_change` column in pdex output is deprecated and will be "
271
+ "removed in pdex 0.3.0. Use `log2_fold_change` instead — it contains "
272
+ "the same values (`log2(target_mean / ref_mean)`).",
273
+ FutureWarning,
274
+ stacklevel=2,
275
+ )
276
+
242
277
  # Set the global threadpool for numba
243
278
  set_numba_threadpool(threads)
244
279
 
@@ -270,6 +305,7 @@ def pdex(
270
305
  reference=reference,
271
306
  geometric_mean=geometric_mean,
272
307
  is_log1p=is_log1p,
308
+ epsilon=epsilon,
273
309
  )
274
310
  elif mode == "all":
275
311
  if kwargs:
@@ -283,6 +319,7 @@ def pdex(
283
319
  groupby=groupby,
284
320
  geometric_mean=geometric_mean,
285
321
  is_log1p=is_log1p,
322
+ epsilon=epsilon,
286
323
  )
287
324
  elif mode == "on_target":
288
325
  gene_col = kwargs.pop("gene_col", None)
@@ -303,6 +340,7 @@ def pdex(
303
340
  reference=reference,
304
341
  geometric_mean=geometric_mean,
305
342
  is_log1p=is_log1p,
343
+ epsilon=epsilon,
306
344
  )
307
345
  else:
308
346
  raise ValueError(f"Invalid mode: {mode}")
@@ -318,6 +356,7 @@ def _pdex_ref(
318
356
  reference: str = DEFAULT_REFERENCE,
319
357
  geometric_mean: bool = True,
320
358
  is_log1p: bool = False,
359
+ epsilon: float = 0.0,
321
360
  ) -> pl.DataFrame:
322
361
  unique_groups, unique_group_indices = _unique_groups(adata.obs, groupby)
323
362
  log.info("Found %d groups (excluding reference)", len(unique_groups) - 1)
@@ -353,8 +392,8 @@ def _pdex_ref(
353
392
  group_matrix, geometric_mean=geometric_mean, is_log1p=is_log1p
354
393
  )
355
394
 
356
- fc = fold_change(group_bulk, ref_bulk)
357
- pc = percent_change(group_bulk, ref_bulk)
395
+ lfc = log2_fold_change(group_bulk, ref_bulk, epsilon)
396
+ pc = percent_change(group_bulk, ref_bulk, epsilon)
358
397
  mwu_result = mwu(group_matrix, ref_data)
359
398
 
360
399
  mwu_statistic = mwu_result.statistic
@@ -370,7 +409,8 @@ def _pdex_ref(
370
409
  "ref_mean": np.asarray(ref_bulk).ravel(),
371
410
  "target_membership": group_mask.size,
372
411
  "ref_membership": ref_membership,
373
- "fold_change": fc,
412
+ "fold_change": lfc,
413
+ "log2_fold_change": lfc,
374
414
  "percent_change": pc,
375
415
  "p_value": mwu_pvalue,
376
416
  "statistic": mwu_statistic,
@@ -386,6 +426,7 @@ def _pdex_all(
386
426
  groupby: str,
387
427
  geometric_mean: bool = True,
388
428
  is_log1p: bool = False,
429
+ epsilon: float = 0.0,
389
430
  ) -> pl.DataFrame:
390
431
  unique_groups, unique_group_indices = _unique_groups(adata.obs, groupby)
391
432
  log.info("Found %d groups for 1-vs-rest comparison", len(unique_groups))
@@ -414,8 +455,8 @@ def _pdex_all(
414
455
  rest_matrix, geometric_mean=geometric_mean, is_log1p=is_log1p
415
456
  )
416
457
 
417
- fc = fold_change(group_bulk, rest_bulk)
418
- pc = percent_change(group_bulk, rest_bulk)
458
+ lfc = log2_fold_change(group_bulk, rest_bulk, epsilon)
459
+ pc = percent_change(group_bulk, rest_bulk, epsilon)
419
460
  mwu_result = mwu(group_matrix, rest_matrix)
420
461
 
421
462
  mwu_statistic = mwu_result.statistic
@@ -431,7 +472,8 @@ def _pdex_all(
431
472
  "ref_mean": np.asarray(rest_bulk).ravel(),
432
473
  "target_membership": group_mask.size,
433
474
  "ref_membership": rest_mask.size,
434
- "fold_change": fc,
475
+ "fold_change": lfc,
476
+ "log2_fold_change": lfc,
435
477
  "percent_change": pc,
436
478
  "p_value": mwu_pvalue,
437
479
  "statistic": mwu_statistic,
@@ -450,6 +492,7 @@ def _pdex_on_target(
450
492
  reference: str = DEFAULT_REFERENCE,
451
493
  geometric_mean: bool = True,
452
494
  is_log1p: bool = False,
495
+ epsilon: float = 0.0,
453
496
  ) -> pl.DataFrame:
454
497
  unique_groups, unique_group_indices = _unique_groups(adata.obs, groupby)
455
498
  ref_index = _identify_reference_index(unique_groups, reference)
@@ -501,8 +544,12 @@ def _pdex_on_target(
501
544
  pseudobulk(ref_col, geometric_mean=geometric_mean, is_log1p=is_log1p)[0]
502
545
  )
503
546
 
504
- fc = float(fold_change(np.array([target_mean]), np.array([ref_mean]))[0])
505
- pc = float(percent_change(np.array([target_mean]), np.array([ref_mean]))[0])
547
+ lfc = float(
548
+ log2_fold_change(np.array([target_mean]), np.array([ref_mean]), epsilon)[0]
549
+ )
550
+ pc = float(
551
+ percent_change(np.array([target_mean]), np.array([ref_mean]), epsilon)[0]
552
+ )
506
553
 
507
554
  mwu_result = mwu(group_col, ref_col)
508
555
  p_value = float(np.clip(np.asarray(mwu_result.pvalue).ravel()[0], 0, 1))
@@ -516,7 +563,8 @@ def _pdex_on_target(
516
563
  "ref_mean": ref_mean,
517
564
  "target_membership": group_mask.size,
518
565
  "ref_membership": ref_membership,
519
- "fold_change": fc,
566
+ "fold_change": lfc,
567
+ "log2_fold_change": lfc,
520
568
  "percent_change": pc,
521
569
  "p_value": p_value,
522
570
  "statistic": statistic,
@@ -14,7 +14,7 @@ def _log1p_col_mean(matrix: np.ndarray) -> np.ndarray:
14
14
  """Mean of log1p(X) across rows (axis=0) for a dense 2-D array."""
15
15
  n_rows, n_cols = matrix.shape
16
16
  result = np.zeros(n_cols)
17
- for j in nb.prange(n_cols): # type: ignore[attr-defined]
17
+ for j in nb.prange(n_cols): # ty: ignore[not-iterable]
18
18
  s = 0.0
19
19
  for i in range(n_rows):
20
20
  s += np.log1p(matrix[i, j])
@@ -26,7 +26,7 @@ def _log1p_col_mean(matrix: np.ndarray) -> np.ndarray:
26
26
  def _expm1_vec(x: np.ndarray) -> np.ndarray:
27
27
  """Element-wise expm1 over a 1-D array."""
28
28
  result = np.empty_like(x)
29
- for i in nb.prange(len(x)): # type: ignore[attr-defined]
29
+ for i in nb.prange(len(x)): # ty: ignore[not-iterable]
30
30
  result[i] = np.expm1(x[i])
31
31
  return result
32
32
 
@@ -36,7 +36,7 @@ def _expm1_vec_mean(matrix: np.ndarray) -> np.ndarray:
36
36
  """Mean of expm1(X) across rows (axis=0) for a dense 2-D array."""
37
37
  n_rows, n_cols = matrix.shape
38
38
  result = np.zeros(n_cols)
39
- for j in nb.prange(n_cols): # type: ignore[attr-defined]
39
+ for j in nb.prange(n_cols): # ty: ignore[not-iterable]
40
40
  s = 0.0
41
41
  for i in range(n_rows):
42
42
  s += np.expm1(matrix[i, j])
@@ -106,15 +106,27 @@ def bulk_matrix_geometric(
106
106
 
107
107
 
108
108
  @nb.njit(parallel=True)
109
- def fold_change(x: np.ndarray, y: np.ndarray) -> np.ndarray:
110
- """Calculates the log2-fold change between two arrays."""
111
- return np.log2(x / y)
109
+ def log2_fold_change(x: np.ndarray, y: np.ndarray, epsilon: float = 0.0) -> np.ndarray:
110
+ """Calculates the log2-fold change between two arrays.
111
+
112
+ When ``epsilon > 0``, adds a small pseudocount to both numerator and
113
+ denominator before taking the ratio, dampening extreme fold changes that arise
114
+ when the reference mean is near zero (scRNA-seq sparsity artifact).
115
+ """
116
+ return np.log2((x + epsilon) / (y + epsilon))
112
117
 
113
118
 
114
119
  @nb.njit(parallel=True)
115
- def percent_change(x: np.ndarray, y: np.ndarray) -> np.ndarray:
116
- """Calculates the change between two arrays."""
117
- return (x - y) / y
120
+ def percent_change(
121
+ x: np.ndarray, y: np.ndarray, prior_count: float = 0.0
122
+ ) -> np.ndarray:
123
+ """Calculates the percent change between two arrays.
124
+
125
+ When ``prior_count > 0``, adds a pseudocount to the denominator before
126
+ computing the ratio, dampening extreme values when the reference mean is
127
+ near zero (scRNA-seq sparsity artifact).
128
+ """
129
+ return (x - y) / (y + prior_count)
118
130
 
119
131
 
120
132
  def mwu(
@@ -1,32 +1,32 @@
1
- """Tests for pdex._math (fold_change, percent_change, bulk_matrix_geometric)."""
1
+ """Tests for pdex._math (log2_fold_change, percent_change, bulk_matrix_geometric)."""
2
2
 
3
3
  import numpy as np
4
4
 
5
- from pdex._math import bulk_matrix_geometric, fold_change, percent_change
5
+ from pdex._math import bulk_matrix_geometric, log2_fold_change, percent_change
6
6
 
7
7
 
8
8
  class TestFoldChange:
9
9
  def test_ratio_of_two(self):
10
10
  x = np.array([4.0, 8.0])
11
11
  y = np.array([2.0, 4.0])
12
- result = fold_change(x, y)
12
+ result = log2_fold_change(x, y)
13
13
  np.testing.assert_allclose(result, [1.0, 1.0])
14
14
 
15
15
  def test_equal_values(self):
16
16
  x = np.array([3.0, 5.0])
17
- result = fold_change(x, x)
17
+ result = log2_fold_change(x, x)
18
18
  np.testing.assert_allclose(result, [0.0, 0.0])
19
19
 
20
20
  def test_half(self):
21
21
  x = np.array([1.0])
22
22
  y = np.array([2.0])
23
- result = fold_change(x, y)
23
+ result = log2_fold_change(x, y)
24
24
  np.testing.assert_allclose(result, [-1.0])
25
25
 
26
26
  def test_known_values(self):
27
27
  x = np.array([1.0, 2.0, 4.0, 8.0])
28
28
  y = np.array([1.0, 1.0, 1.0, 1.0])
29
- result = fold_change(x, y)
29
+ result = log2_fold_change(x, y)
30
30
  np.testing.assert_allclose(result, [0.0, 1.0, 2.0, 3.0])
31
31
 
32
32
 
@@ -55,6 +55,72 @@ class TestPercentChange:
55
55
  np.testing.assert_allclose(result, [-0.5, 0.0, 0.5])
56
56
 
57
57
 
58
+ class TestFoldChangeWithEpsilon:
59
+ def test_zero_epsilon_matches_baseline(self):
60
+ """epsilon=0.0 must be identical to calling without it."""
61
+ x = np.array([4.0, 8.0, 0.1])
62
+ y = np.array([2.0, 4.0, 0.001])
63
+ np.testing.assert_array_equal(
64
+ log2_fold_change(x, y), log2_fold_change(x, y, 0.0)
65
+ )
66
+
67
+ def test_dampens_extreme_fc_from_near_zero_denominator(self):
68
+ """epsilon=0.5 pulls extreme FC toward zero."""
69
+ x = np.array([0.1])
70
+ y = np.array([0.001])
71
+ fc_raw = log2_fold_change(x, y)[0]
72
+ fc_dampened = log2_fold_change(x, y, 0.5)[0]
73
+ assert abs(fc_dampened) < abs(fc_raw)
74
+ np.testing.assert_allclose(fc_dampened, np.log2(0.6 / 0.501), rtol=1e-5)
75
+
76
+ def test_preserves_direction(self):
77
+ """epsilon should not flip the sign of fold change."""
78
+ x = np.array([2.0, 0.5])
79
+ y = np.array([1.0, 1.0])
80
+ result = log2_fold_change(x, y, 0.5)
81
+ assert result[0] > 0
82
+ assert result[1] < 0
83
+
84
+ def test_equal_means_still_zero(self):
85
+ """When target_mean == ref_mean, FC should be 0 regardless of epsilon."""
86
+ x = np.array([0.5, 2.0])
87
+ result = log2_fold_change(x, x, 0.5)
88
+ np.testing.assert_allclose(result, [0.0, 0.0])
89
+
90
+
91
+ class TestPercentChangeWithPriorCount:
92
+ def test_zero_epsilon_matches_baseline(self):
93
+ """epsilon=0.0 must be identical to calling without it."""
94
+ x = np.array([4.0, 8.0, 0.1])
95
+ y = np.array([2.0, 4.0, 0.001])
96
+ np.testing.assert_array_equal(percent_change(x, y), percent_change(x, y, 0.0))
97
+
98
+ def test_dampens_extreme_pc_from_near_zero_denominator(self):
99
+ """epsilon=0.5 pulls extreme percent change toward zero."""
100
+ x = np.array([0.1])
101
+ y = np.array([0.001])
102
+ pc_raw = percent_change(x, y)[0]
103
+ pc_dampened = percent_change(x, y, 0.5)[0]
104
+ assert abs(pc_dampened) < abs(pc_raw)
105
+ np.testing.assert_allclose(
106
+ pc_dampened, (0.1 - 0.001) / (0.001 + 0.5), rtol=1e-5
107
+ )
108
+
109
+ def test_preserves_direction(self):
110
+ """epsilon should not flip the sign of percent change."""
111
+ x = np.array([2.0, 0.5])
112
+ y = np.array([1.0, 1.0])
113
+ result = percent_change(x, y, 0.5)
114
+ assert result[0] > 0
115
+ assert result[1] < 0
116
+
117
+ def test_equal_means_still_zero(self):
118
+ """When target_mean == ref_mean, percent_change should be 0 regardless of epsilon."""
119
+ x = np.array([0.5, 2.0])
120
+ result = percent_change(x, x, 0.5)
121
+ np.testing.assert_allclose(result, [0.0, 0.0])
122
+
123
+
58
124
  class TestBulkMatrixGeometric:
59
125
  """Tests for bulk_matrix_geometric."""
60
126
 
@@ -15,6 +15,7 @@ EXPECTED_COLUMNS = {
15
15
  "target_membership",
16
16
  "ref_membership",
17
17
  "fold_change",
18
+ "log2_fold_change",
18
19
  "percent_change",
19
20
  "p_value",
20
21
  "statistic",
@@ -137,6 +138,21 @@ class TestPdexRefMode:
137
138
  typo_arg="oops",
138
139
  )
139
140
 
141
+ def test_epsilon_accepted(self, small_adata):
142
+ """epsilon parameter is accepted without error."""
143
+ result = pdex(small_adata, groupby="guide", is_log1p=False, epsilon=0.5)
144
+ assert isinstance(result, pl.DataFrame)
145
+
146
+ def test_epsilon_zero_matches_default(self, small_adata):
147
+ """epsilon=0.0 produces identical results to omitting the parameter."""
148
+ default_result = pdex(small_adata, groupby="guide", is_log1p=False)
149
+ explicit_result = pdex(
150
+ small_adata, groupby="guide", is_log1p=False, epsilon=0.0
151
+ )
152
+ assert isinstance(default_result, pl.DataFrame)
153
+ assert isinstance(explicit_result, pl.DataFrame)
154
+ assert default_result.equals(explicit_result)
155
+
140
156
 
141
157
  class TestPdexRefSparse:
142
158
  """Tests for pdex with sparse CSR input."""
@@ -463,6 +479,10 @@ class TestPdexOnTargetValidation:
463
479
 
464
480
 
465
481
  class TestPdexValidation:
482
+ def test_negative_epsilon_raises(self, small_adata):
483
+ with pytest.raises(ValueError, match="epsilon must be non-negative"):
484
+ pdex(small_adata, groupby="guide", is_log1p=False, epsilon=-0.1)
485
+
466
486
  def test_invalid_mode(self, small_adata):
467
487
  with pytest.raises(ValueError, match="Invalid mode"):
468
488
  pdex(
@@ -645,3 +665,20 @@ class TestPdexBacked:
645
665
  rtol=1e-6,
646
666
  err_msg=f"Mismatch in column {col}",
647
667
  )
668
+
669
+
670
+ class TestLog2FoldChangeColumn:
671
+ """Regression test for the `log2_fold_change` column semantics."""
672
+
673
+ @pytest.mark.parametrize("mode", ["ref", "all"])
674
+ def test_log2_fold_change_equals_log2_ratio(self, small_adata, mode):
675
+ """log2_fold_change == log2(target_mean / ref_mean) on finite entries."""
676
+ result = pdex(small_adata, groupby="guide", mode=mode, is_log1p=False)
677
+ target = result["target_mean"].to_numpy()
678
+ ref = result["ref_mean"].to_numpy()
679
+ actual = result["log2_fold_change"].to_numpy()
680
+ with np.errstate(divide="ignore", invalid="ignore"):
681
+ expected = np.log2(target / ref)
682
+ finite = np.isfinite(expected) & np.isfinite(actual)
683
+ assert finite.any()
684
+ np.testing.assert_allclose(actual[finite], expected[finite], rtol=1e-6)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes