pdex 0.2.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pdex-0.2.0 → pdex-0.2.2}/.github/workflows/ci.yml +33 -2
- {pdex-0.2.0 → pdex-0.2.2}/CLAUDE.md +4 -3
- {pdex-0.2.0 → pdex-0.2.2}/PKG-INFO +3 -2
- {pdex-0.2.0 → pdex-0.2.2}/README.md +2 -1
- {pdex-0.2.0 → pdex-0.2.2}/pyproject.toml +1 -1
- {pdex-0.2.0 → pdex-0.2.2}/src/pdex/__init__.py +65 -17
- {pdex-0.2.0 → pdex-0.2.2}/src/pdex/_math.py +21 -9
- {pdex-0.2.0 → pdex-0.2.2}/tests/test_math.py +72 -6
- {pdex-0.2.0 → pdex-0.2.2}/tests/test_pdex.py +37 -0
- {pdex-0.2.0 → pdex-0.2.2}/.github/workflows/release.yml +0 -0
- {pdex-0.2.0 → pdex-0.2.2}/.gitignore +0 -0
- {pdex-0.2.0 → pdex-0.2.2}/.python-version +0 -0
- {pdex-0.2.0 → pdex-0.2.2}/LICENSE +0 -0
- {pdex-0.2.0 → pdex-0.2.2}/src/pdex/_utils.py +0 -0
- {pdex-0.2.0 → pdex-0.2.2}/src/pdex/py.typed +0 -0
- {pdex-0.2.0 → pdex-0.2.2}/tests/conftest.py +0 -0
- {pdex-0.2.0 → pdex-0.2.2}/tests/test_internals.py +0 -0
- {pdex-0.2.0 → pdex-0.2.2}/tests/test_utils.py +0 -0
|
@@ -5,10 +5,41 @@ on: [push, pull_request]
|
|
|
5
5
|
jobs:
|
|
6
6
|
all_jobs:
|
|
7
7
|
runs-on: ubuntu-latest
|
|
8
|
-
needs: [formatting, type-checking, pytest]
|
|
8
|
+
needs: [formatting, type-checking, pytest, semver-check]
|
|
9
|
+
if: always()
|
|
9
10
|
steps:
|
|
10
11
|
- name: Complete
|
|
11
|
-
run:
|
|
12
|
+
run: |
|
|
13
|
+
if [[ "${{ contains(needs.*.result, 'failure') }}" == "true" ]]; then
|
|
14
|
+
echo "One or more required jobs failed."
|
|
15
|
+
exit 1
|
|
16
|
+
fi
|
|
17
|
+
echo "Complete"
|
|
18
|
+
|
|
19
|
+
semver-check:
|
|
20
|
+
runs-on: ubuntu-latest
|
|
21
|
+
if: github.event_name == 'pull_request'
|
|
22
|
+
|
|
23
|
+
steps:
|
|
24
|
+
- uses: actions/checkout@v4
|
|
25
|
+
with:
|
|
26
|
+
fetch-depth: 0
|
|
27
|
+
|
|
28
|
+
- name: check version bump
|
|
29
|
+
run: |
|
|
30
|
+
BASE_VERSION=$(git show origin/${{ github.base_ref }}:pyproject.toml \
|
|
31
|
+
| python3 -c "import sys, tomllib; print(tomllib.load(sys.stdin.buffer)['project']['version'])")
|
|
32
|
+
PR_VERSION=$(python3 -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")
|
|
33
|
+
echo "Base version: $BASE_VERSION"
|
|
34
|
+
echo "PR version: $PR_VERSION"
|
|
35
|
+
if [ -z "$BASE_VERSION" ] || [ -z "$PR_VERSION" ]; then
|
|
36
|
+
echo "ERROR: failed to parse version from pyproject.toml"
|
|
37
|
+
exit 1
|
|
38
|
+
fi
|
|
39
|
+
if [ "$BASE_VERSION" = "$PR_VERSION" ]; then
|
|
40
|
+
echo "ERROR: version in pyproject.toml ($PR_VERSION) must be bumped before merging."
|
|
41
|
+
exit 1
|
|
42
|
+
fi
|
|
12
43
|
|
|
13
44
|
install-job:
|
|
14
45
|
runs-on: ubuntu-latest
|
|
@@ -36,7 +36,7 @@ uv run ty check
|
|
|
36
36
|
|
|
37
37
|
### Core Pipeline (`src/pdex/__init__.py`)
|
|
38
38
|
|
|
39
|
-
The main entry point is `pdex(adata, groupby, mode, threads, is_log1p, geometric_mean, as_pandas, **kwargs)`, which:
|
|
39
|
+
The main entry point is `pdex(adata, groupby, mode, threads, is_log1p, geometric_mean, as_pandas, epsilon, **kwargs)`, which:
|
|
40
40
|
|
|
41
41
|
1. Validates the `groupby` column in `adata.obs`
|
|
42
42
|
2. Extracts unique groups (filters NaN and empty strings)
|
|
@@ -79,8 +79,9 @@ The returned Polars DataFrame (or pandas DataFrame when `as_pandas=True`) has co
|
|
|
79
79
|
| `ref_mean` | float | Pseudobulk mean for the reference, always in natural (count) space |
|
|
80
80
|
| `target_membership` | int | Number of cells in the target group |
|
|
81
81
|
| `ref_membership` | int | Number of cells in the reference |
|
|
82
|
-
| `fold_change` | float |
|
|
83
|
-
| `
|
|
82
|
+
| `fold_change` | float | **Deprecated** alias for `log2_fold_change` (identical values). Retained for one release; emits a `FutureWarning` on every `pdex(...)` call and will be removed in pdex 0.3.0. |
|
|
83
|
+
| `log2_fold_change` | float | log2((target_mean + epsilon) / (ref_mean + epsilon)) — computed from pseudobulk means |
|
|
84
|
+
| `percent_change` | float | (target_mean - ref_mean) / (ref_mean + epsilon) — computed from pseudobulk means |
|
|
84
85
|
| `p_value` | float | Mann-Whitney U p-value (per-cell vectors) |
|
|
85
86
|
| `statistic` | float | Mann-Whitney U statistic |
|
|
86
87
|
| `fdr` | float | FDR-corrected p-value, applied per-group across genes. For `on_target` mode, applied across all groups. |
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pdex
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: Parallel differential expression for single-cell perturbation sequencing
|
|
5
5
|
Author-email: noam teyssier <noam.teyssier@arcinstitute.org>
|
|
6
6
|
License-File: LICENSE
|
|
@@ -113,7 +113,8 @@ Returns a Polars DataFrame (or pandas if `as_pandas=True`) with one row per (gro
|
|
|
113
113
|
| `ref_mean` | Pseudobulk mean for the reference (count space) |
|
|
114
114
|
| `target_membership` | Number of cells in the target group |
|
|
115
115
|
| `ref_membership` | Number of cells in the reference |
|
|
116
|
-
| `fold_change` |
|
|
116
|
+
| `fold_change` | **Deprecated alias** for `log2_fold_change` (identical values). Will be removed in pdex 0.3.0. |
|
|
117
|
+
| `log2_fold_change` | log2(target_mean / ref_mean) |
|
|
117
118
|
| `percent_change` | (target_mean - ref_mean) / ref_mean |
|
|
118
119
|
| `p_value` | Mann-Whitney U p-value |
|
|
119
120
|
| `statistic` | Mann-Whitney U statistic |
|
|
@@ -95,7 +95,8 @@ Returns a Polars DataFrame (or pandas if `as_pandas=True`) with one row per (gro
|
|
|
95
95
|
| `ref_mean` | Pseudobulk mean for the reference (count space) |
|
|
96
96
|
| `target_membership` | Number of cells in the target group |
|
|
97
97
|
| `ref_membership` | Number of cells in the reference |
|
|
98
|
-
| `fold_change` |
|
|
98
|
+
| `fold_change` | **Deprecated alias** for `log2_fold_change` (identical values). Will be removed in pdex 0.3.0. |
|
|
99
|
+
| `log2_fold_change` | log2(target_mean / ref_mean) |
|
|
99
100
|
| `percent_change` | (target_mean - ref_mean) / ref_mean |
|
|
100
101
|
| `p_value` | Mann-Whitney U p-value |
|
|
101
102
|
| `statistic` | Mann-Whitney U statistic |
|
|
@@ -12,7 +12,7 @@ from scipy.sparse import csr_matrix, issparse
|
|
|
12
12
|
from scipy.stats import false_discovery_control
|
|
13
13
|
from tqdm import tqdm
|
|
14
14
|
|
|
15
|
-
from pdex._math import
|
|
15
|
+
from pdex._math import log2_fold_change, mwu, percent_change, pseudobulk
|
|
16
16
|
|
|
17
17
|
from ._utils import _detect_is_log1p, set_numba_threadpool
|
|
18
18
|
|
|
@@ -129,9 +129,9 @@ def _isolate_matrix(
|
|
|
129
129
|
if adata.X is None:
|
|
130
130
|
raise ValueError("AnnData object does not have a matrix.")
|
|
131
131
|
if mask_y is None:
|
|
132
|
-
result = adata.X[mask_x] #
|
|
132
|
+
result = adata.X[mask_x] # ty: ignore[not-subscriptable]
|
|
133
133
|
else:
|
|
134
|
-
result = adata.X[mask_x, mask_y] #
|
|
134
|
+
result = adata.X[mask_x, mask_y] # ty: ignore[not-subscriptable]
|
|
135
135
|
|
|
136
136
|
# Fast path: already in-memory
|
|
137
137
|
if isinstance(result, (np.ndarray, csr_matrix)):
|
|
@@ -151,6 +151,7 @@ def pdex(
|
|
|
151
151
|
is_log1p: bool | None = None,
|
|
152
152
|
geometric_mean: bool = True,
|
|
153
153
|
as_pandas: bool = False,
|
|
154
|
+
epsilon: float = 0.0,
|
|
154
155
|
**kwargs,
|
|
155
156
|
) -> pl.DataFrame | pd.DataFrame:
|
|
156
157
|
"""Run parallel differential expression analysis on single-cell data.
|
|
@@ -201,6 +202,22 @@ def pdex(
|
|
|
201
202
|
as_pandas:
|
|
202
203
|
If ``True``, return a :class:`pandas.DataFrame` instead of a
|
|
203
204
|
:class:`polars.DataFrame`. Requires ``pyarrow``.
|
|
205
|
+
epsilon:
|
|
206
|
+
Pseudocount added to both ``target_mean`` and ``ref_mean`` before computing
|
|
207
|
+
``fold_change`` and ``percent_change``. When ``epsilon > 0``, extreme
|
|
208
|
+
values from near-zero reference means (scRNA-seq sparsity artifact) are
|
|
209
|
+
dampened toward zero. Has no effect on the Mann-Whitney U p-value or FDR.
|
|
210
|
+
Default ``0.0`` preserves existing behaviour.
|
|
211
|
+
|
|
212
|
+
**Recommended usage:** For scRNA-seq CRISPRi/CRISPRa screens where many
|
|
213
|
+
genes are unexpressed in the reference group, start with ``epsilon=0.5``.
|
|
214
|
+
This provides modest dampening without substantially compressing fold changes
|
|
215
|
+
for well-expressed genes. For complete suppression of the sparsity artifact,
|
|
216
|
+
combine with a ``min_mean_expression`` pre-filter on the reference group —
|
|
217
|
+
``epsilon`` alone cannot eliminate low p-values arising from per-cell
|
|
218
|
+
distributional shifts in near-zero genes.
|
|
219
|
+
|
|
220
|
+
Must be non-negative. Raises :class:`ValueError` if negative.
|
|
204
221
|
**kwargs:
|
|
205
222
|
Mode-specific keyword arguments:
|
|
206
223
|
|
|
@@ -216,14 +233,21 @@ def pdex(
|
|
|
216
233
|
pl.DataFrame | pd.DataFrame
|
|
217
234
|
One row per (group, feature) pair with columns: ``target``, ``feature``,
|
|
218
235
|
``target_mean``, ``ref_mean``, ``target_membership``, ``ref_membership``,
|
|
219
|
-
``fold_change``, ``
|
|
236
|
+
``fold_change``, ``log2_fold_change``, ``percent_change``, ``p_value``,
|
|
237
|
+
``statistic``, ``fdr``.
|
|
220
238
|
|
|
221
239
|
``target_mean`` and ``ref_mean`` are always in **natural (count) space**.
|
|
222
240
|
|
|
223
|
-
``
|
|
224
|
-
means (not from the per-cell MWU test inputs): ``
|
|
225
|
-
``log2(target_mean / ref_mean)`` and
|
|
226
|
-
``(target_mean - ref_mean) / ref_mean
|
|
241
|
+
``log2_fold_change`` and ``percent_change`` are derived from the pseudobulk
|
|
242
|
+
means (not from the per-cell MWU test inputs): ``log2_fold_change`` is
|
|
243
|
+
``log2((target_mean + epsilon) / (ref_mean + epsilon))`` and
|
|
244
|
+
``percent_change`` is ``(target_mean - ref_mean) / (ref_mean + epsilon)``.
|
|
245
|
+
|
|
246
|
+
``fold_change`` is a **deprecated** alias for ``log2_fold_change``
|
|
247
|
+
(identical values). It is retained for one release to ease migration
|
|
248
|
+
and will be removed in pdex 0.3.0. New code should read
|
|
249
|
+
``log2_fold_change`` directly. A :class:`FutureWarning` is emitted
|
|
250
|
+
on every ``pdex(...)`` call. The MWU ``p_value`` and
|
|
227
251
|
``statistic`` are computed directly on the per-cell expression vectors.
|
|
228
252
|
|
|
229
253
|
For ``mode="ref"``, the reference group itself is excluded from the output.
|
|
@@ -239,6 +263,17 @@ def pdex(
|
|
|
239
263
|
adata.n_vars,
|
|
240
264
|
)
|
|
241
265
|
|
|
266
|
+
if epsilon < 0:
|
|
267
|
+
raise ValueError(f"epsilon must be non-negative, got {epsilon}")
|
|
268
|
+
|
|
269
|
+
warnings.warn(
|
|
270
|
+
"The `fold_change` column in pdex output is deprecated and will be "
|
|
271
|
+
"removed in pdex 0.3.0. Use `log2_fold_change` instead — it contains "
|
|
272
|
+
"the same values (`log2(target_mean / ref_mean)`).",
|
|
273
|
+
FutureWarning,
|
|
274
|
+
stacklevel=2,
|
|
275
|
+
)
|
|
276
|
+
|
|
242
277
|
# Set the global threadpool for numba
|
|
243
278
|
set_numba_threadpool(threads)
|
|
244
279
|
|
|
@@ -270,6 +305,7 @@ def pdex(
|
|
|
270
305
|
reference=reference,
|
|
271
306
|
geometric_mean=geometric_mean,
|
|
272
307
|
is_log1p=is_log1p,
|
|
308
|
+
epsilon=epsilon,
|
|
273
309
|
)
|
|
274
310
|
elif mode == "all":
|
|
275
311
|
if kwargs:
|
|
@@ -283,6 +319,7 @@ def pdex(
|
|
|
283
319
|
groupby=groupby,
|
|
284
320
|
geometric_mean=geometric_mean,
|
|
285
321
|
is_log1p=is_log1p,
|
|
322
|
+
epsilon=epsilon,
|
|
286
323
|
)
|
|
287
324
|
elif mode == "on_target":
|
|
288
325
|
gene_col = kwargs.pop("gene_col", None)
|
|
@@ -303,6 +340,7 @@ def pdex(
|
|
|
303
340
|
reference=reference,
|
|
304
341
|
geometric_mean=geometric_mean,
|
|
305
342
|
is_log1p=is_log1p,
|
|
343
|
+
epsilon=epsilon,
|
|
306
344
|
)
|
|
307
345
|
else:
|
|
308
346
|
raise ValueError(f"Invalid mode: {mode}")
|
|
@@ -318,6 +356,7 @@ def _pdex_ref(
|
|
|
318
356
|
reference: str = DEFAULT_REFERENCE,
|
|
319
357
|
geometric_mean: bool = True,
|
|
320
358
|
is_log1p: bool = False,
|
|
359
|
+
epsilon: float = 0.0,
|
|
321
360
|
) -> pl.DataFrame:
|
|
322
361
|
unique_groups, unique_group_indices = _unique_groups(adata.obs, groupby)
|
|
323
362
|
log.info("Found %d groups (excluding reference)", len(unique_groups) - 1)
|
|
@@ -353,8 +392,8 @@ def _pdex_ref(
|
|
|
353
392
|
group_matrix, geometric_mean=geometric_mean, is_log1p=is_log1p
|
|
354
393
|
)
|
|
355
394
|
|
|
356
|
-
|
|
357
|
-
pc = percent_change(group_bulk, ref_bulk)
|
|
395
|
+
lfc = log2_fold_change(group_bulk, ref_bulk, epsilon)
|
|
396
|
+
pc = percent_change(group_bulk, ref_bulk, epsilon)
|
|
358
397
|
mwu_result = mwu(group_matrix, ref_data)
|
|
359
398
|
|
|
360
399
|
mwu_statistic = mwu_result.statistic
|
|
@@ -370,7 +409,8 @@ def _pdex_ref(
|
|
|
370
409
|
"ref_mean": np.asarray(ref_bulk).ravel(),
|
|
371
410
|
"target_membership": group_mask.size,
|
|
372
411
|
"ref_membership": ref_membership,
|
|
373
|
-
"fold_change":
|
|
412
|
+
"fold_change": lfc,
|
|
413
|
+
"log2_fold_change": lfc,
|
|
374
414
|
"percent_change": pc,
|
|
375
415
|
"p_value": mwu_pvalue,
|
|
376
416
|
"statistic": mwu_statistic,
|
|
@@ -386,6 +426,7 @@ def _pdex_all(
|
|
|
386
426
|
groupby: str,
|
|
387
427
|
geometric_mean: bool = True,
|
|
388
428
|
is_log1p: bool = False,
|
|
429
|
+
epsilon: float = 0.0,
|
|
389
430
|
) -> pl.DataFrame:
|
|
390
431
|
unique_groups, unique_group_indices = _unique_groups(adata.obs, groupby)
|
|
391
432
|
log.info("Found %d groups for 1-vs-rest comparison", len(unique_groups))
|
|
@@ -414,8 +455,8 @@ def _pdex_all(
|
|
|
414
455
|
rest_matrix, geometric_mean=geometric_mean, is_log1p=is_log1p
|
|
415
456
|
)
|
|
416
457
|
|
|
417
|
-
|
|
418
|
-
pc = percent_change(group_bulk, rest_bulk)
|
|
458
|
+
lfc = log2_fold_change(group_bulk, rest_bulk, epsilon)
|
|
459
|
+
pc = percent_change(group_bulk, rest_bulk, epsilon)
|
|
419
460
|
mwu_result = mwu(group_matrix, rest_matrix)
|
|
420
461
|
|
|
421
462
|
mwu_statistic = mwu_result.statistic
|
|
@@ -431,7 +472,8 @@ def _pdex_all(
|
|
|
431
472
|
"ref_mean": np.asarray(rest_bulk).ravel(),
|
|
432
473
|
"target_membership": group_mask.size,
|
|
433
474
|
"ref_membership": rest_mask.size,
|
|
434
|
-
"fold_change":
|
|
475
|
+
"fold_change": lfc,
|
|
476
|
+
"log2_fold_change": lfc,
|
|
435
477
|
"percent_change": pc,
|
|
436
478
|
"p_value": mwu_pvalue,
|
|
437
479
|
"statistic": mwu_statistic,
|
|
@@ -450,6 +492,7 @@ def _pdex_on_target(
|
|
|
450
492
|
reference: str = DEFAULT_REFERENCE,
|
|
451
493
|
geometric_mean: bool = True,
|
|
452
494
|
is_log1p: bool = False,
|
|
495
|
+
epsilon: float = 0.0,
|
|
453
496
|
) -> pl.DataFrame:
|
|
454
497
|
unique_groups, unique_group_indices = _unique_groups(adata.obs, groupby)
|
|
455
498
|
ref_index = _identify_reference_index(unique_groups, reference)
|
|
@@ -501,8 +544,12 @@ def _pdex_on_target(
|
|
|
501
544
|
pseudobulk(ref_col, geometric_mean=geometric_mean, is_log1p=is_log1p)[0]
|
|
502
545
|
)
|
|
503
546
|
|
|
504
|
-
|
|
505
|
-
|
|
547
|
+
lfc = float(
|
|
548
|
+
log2_fold_change(np.array([target_mean]), np.array([ref_mean]), epsilon)[0]
|
|
549
|
+
)
|
|
550
|
+
pc = float(
|
|
551
|
+
percent_change(np.array([target_mean]), np.array([ref_mean]), epsilon)[0]
|
|
552
|
+
)
|
|
506
553
|
|
|
507
554
|
mwu_result = mwu(group_col, ref_col)
|
|
508
555
|
p_value = float(np.clip(np.asarray(mwu_result.pvalue).ravel()[0], 0, 1))
|
|
@@ -516,7 +563,8 @@ def _pdex_on_target(
|
|
|
516
563
|
"ref_mean": ref_mean,
|
|
517
564
|
"target_membership": group_mask.size,
|
|
518
565
|
"ref_membership": ref_membership,
|
|
519
|
-
"fold_change":
|
|
566
|
+
"fold_change": lfc,
|
|
567
|
+
"log2_fold_change": lfc,
|
|
520
568
|
"percent_change": pc,
|
|
521
569
|
"p_value": p_value,
|
|
522
570
|
"statistic": statistic,
|
|
@@ -14,7 +14,7 @@ def _log1p_col_mean(matrix: np.ndarray) -> np.ndarray:
|
|
|
14
14
|
"""Mean of log1p(X) across rows (axis=0) for a dense 2-D array."""
|
|
15
15
|
n_rows, n_cols = matrix.shape
|
|
16
16
|
result = np.zeros(n_cols)
|
|
17
|
-
for j in nb.prange(n_cols): #
|
|
17
|
+
for j in nb.prange(n_cols): # ty: ignore[not-iterable]
|
|
18
18
|
s = 0.0
|
|
19
19
|
for i in range(n_rows):
|
|
20
20
|
s += np.log1p(matrix[i, j])
|
|
@@ -26,7 +26,7 @@ def _log1p_col_mean(matrix: np.ndarray) -> np.ndarray:
|
|
|
26
26
|
def _expm1_vec(x: np.ndarray) -> np.ndarray:
|
|
27
27
|
"""Element-wise expm1 over a 1-D array."""
|
|
28
28
|
result = np.empty_like(x)
|
|
29
|
-
for i in nb.prange(len(x)): #
|
|
29
|
+
for i in nb.prange(len(x)): # ty: ignore[not-iterable]
|
|
30
30
|
result[i] = np.expm1(x[i])
|
|
31
31
|
return result
|
|
32
32
|
|
|
@@ -36,7 +36,7 @@ def _expm1_vec_mean(matrix: np.ndarray) -> np.ndarray:
|
|
|
36
36
|
"""Mean of expm1(X) across rows (axis=0) for a dense 2-D array."""
|
|
37
37
|
n_rows, n_cols = matrix.shape
|
|
38
38
|
result = np.zeros(n_cols)
|
|
39
|
-
for j in nb.prange(n_cols): #
|
|
39
|
+
for j in nb.prange(n_cols): # ty: ignore[not-iterable]
|
|
40
40
|
s = 0.0
|
|
41
41
|
for i in range(n_rows):
|
|
42
42
|
s += np.expm1(matrix[i, j])
|
|
@@ -106,15 +106,27 @@ def bulk_matrix_geometric(
|
|
|
106
106
|
|
|
107
107
|
|
|
108
108
|
@nb.njit(parallel=True)
|
|
109
|
-
def
|
|
110
|
-
"""Calculates the log2-fold change between two arrays.
|
|
111
|
-
|
|
109
|
+
def log2_fold_change(x: np.ndarray, y: np.ndarray, epsilon: float = 0.0) -> np.ndarray:
|
|
110
|
+
"""Calculates the log2-fold change between two arrays.
|
|
111
|
+
|
|
112
|
+
When ``epsilon > 0``, adds a small pseudocount to both numerator and
|
|
113
|
+
denominator before taking the ratio, dampening extreme fold changes that arise
|
|
114
|
+
when the reference mean is near zero (scRNA-seq sparsity artifact).
|
|
115
|
+
"""
|
|
116
|
+
return np.log2((x + epsilon) / (y + epsilon))
|
|
112
117
|
|
|
113
118
|
|
|
114
119
|
@nb.njit(parallel=True)
|
|
115
|
-
def percent_change(
|
|
116
|
-
|
|
117
|
-
|
|
120
|
+
def percent_change(
|
|
121
|
+
x: np.ndarray, y: np.ndarray, prior_count: float = 0.0
|
|
122
|
+
) -> np.ndarray:
|
|
123
|
+
"""Calculates the percent change between two arrays.
|
|
124
|
+
|
|
125
|
+
When ``prior_count > 0``, adds a pseudocount to the denominator before
|
|
126
|
+
computing the ratio, dampening extreme values when the reference mean is
|
|
127
|
+
near zero (scRNA-seq sparsity artifact).
|
|
128
|
+
"""
|
|
129
|
+
return (x - y) / (y + prior_count)
|
|
118
130
|
|
|
119
131
|
|
|
120
132
|
def mwu(
|
|
@@ -1,32 +1,32 @@
|
|
|
1
|
-
"""Tests for pdex._math (
|
|
1
|
+
"""Tests for pdex._math (log2_fold_change, percent_change, bulk_matrix_geometric)."""
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
|
|
5
|
-
from pdex._math import bulk_matrix_geometric,
|
|
5
|
+
from pdex._math import bulk_matrix_geometric, log2_fold_change, percent_change
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class TestFoldChange:
|
|
9
9
|
def test_ratio_of_two(self):
|
|
10
10
|
x = np.array([4.0, 8.0])
|
|
11
11
|
y = np.array([2.0, 4.0])
|
|
12
|
-
result =
|
|
12
|
+
result = log2_fold_change(x, y)
|
|
13
13
|
np.testing.assert_allclose(result, [1.0, 1.0])
|
|
14
14
|
|
|
15
15
|
def test_equal_values(self):
|
|
16
16
|
x = np.array([3.0, 5.0])
|
|
17
|
-
result =
|
|
17
|
+
result = log2_fold_change(x, x)
|
|
18
18
|
np.testing.assert_allclose(result, [0.0, 0.0])
|
|
19
19
|
|
|
20
20
|
def test_half(self):
|
|
21
21
|
x = np.array([1.0])
|
|
22
22
|
y = np.array([2.0])
|
|
23
|
-
result =
|
|
23
|
+
result = log2_fold_change(x, y)
|
|
24
24
|
np.testing.assert_allclose(result, [-1.0])
|
|
25
25
|
|
|
26
26
|
def test_known_values(self):
|
|
27
27
|
x = np.array([1.0, 2.0, 4.0, 8.0])
|
|
28
28
|
y = np.array([1.0, 1.0, 1.0, 1.0])
|
|
29
|
-
result =
|
|
29
|
+
result = log2_fold_change(x, y)
|
|
30
30
|
np.testing.assert_allclose(result, [0.0, 1.0, 2.0, 3.0])
|
|
31
31
|
|
|
32
32
|
|
|
@@ -55,6 +55,72 @@ class TestPercentChange:
|
|
|
55
55
|
np.testing.assert_allclose(result, [-0.5, 0.0, 0.5])
|
|
56
56
|
|
|
57
57
|
|
|
58
|
+
class TestFoldChangeWithEpsilon:
|
|
59
|
+
def test_zero_epsilon_matches_baseline(self):
|
|
60
|
+
"""epsilon=0.0 must be identical to calling without it."""
|
|
61
|
+
x = np.array([4.0, 8.0, 0.1])
|
|
62
|
+
y = np.array([2.0, 4.0, 0.001])
|
|
63
|
+
np.testing.assert_array_equal(
|
|
64
|
+
log2_fold_change(x, y), log2_fold_change(x, y, 0.0)
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def test_dampens_extreme_fc_from_near_zero_denominator(self):
|
|
68
|
+
"""epsilon=0.5 pulls extreme FC toward zero."""
|
|
69
|
+
x = np.array([0.1])
|
|
70
|
+
y = np.array([0.001])
|
|
71
|
+
fc_raw = log2_fold_change(x, y)[0]
|
|
72
|
+
fc_dampened = log2_fold_change(x, y, 0.5)[0]
|
|
73
|
+
assert abs(fc_dampened) < abs(fc_raw)
|
|
74
|
+
np.testing.assert_allclose(fc_dampened, np.log2(0.6 / 0.501), rtol=1e-5)
|
|
75
|
+
|
|
76
|
+
def test_preserves_direction(self):
|
|
77
|
+
"""epsilon should not flip the sign of fold change."""
|
|
78
|
+
x = np.array([2.0, 0.5])
|
|
79
|
+
y = np.array([1.0, 1.0])
|
|
80
|
+
result = log2_fold_change(x, y, 0.5)
|
|
81
|
+
assert result[0] > 0
|
|
82
|
+
assert result[1] < 0
|
|
83
|
+
|
|
84
|
+
def test_equal_means_still_zero(self):
|
|
85
|
+
"""When target_mean == ref_mean, FC should be 0 regardless of epsilon."""
|
|
86
|
+
x = np.array([0.5, 2.0])
|
|
87
|
+
result = log2_fold_change(x, x, 0.5)
|
|
88
|
+
np.testing.assert_allclose(result, [0.0, 0.0])
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class TestPercentChangeWithPriorCount:
|
|
92
|
+
def test_zero_epsilon_matches_baseline(self):
|
|
93
|
+
"""epsilon=0.0 must be identical to calling without it."""
|
|
94
|
+
x = np.array([4.0, 8.0, 0.1])
|
|
95
|
+
y = np.array([2.0, 4.0, 0.001])
|
|
96
|
+
np.testing.assert_array_equal(percent_change(x, y), percent_change(x, y, 0.0))
|
|
97
|
+
|
|
98
|
+
def test_dampens_extreme_pc_from_near_zero_denominator(self):
|
|
99
|
+
"""epsilon=0.5 pulls extreme percent change toward zero."""
|
|
100
|
+
x = np.array([0.1])
|
|
101
|
+
y = np.array([0.001])
|
|
102
|
+
pc_raw = percent_change(x, y)[0]
|
|
103
|
+
pc_dampened = percent_change(x, y, 0.5)[0]
|
|
104
|
+
assert abs(pc_dampened) < abs(pc_raw)
|
|
105
|
+
np.testing.assert_allclose(
|
|
106
|
+
pc_dampened, (0.1 - 0.001) / (0.001 + 0.5), rtol=1e-5
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
def test_preserves_direction(self):
|
|
110
|
+
"""epsilon should not flip the sign of percent change."""
|
|
111
|
+
x = np.array([2.0, 0.5])
|
|
112
|
+
y = np.array([1.0, 1.0])
|
|
113
|
+
result = percent_change(x, y, 0.5)
|
|
114
|
+
assert result[0] > 0
|
|
115
|
+
assert result[1] < 0
|
|
116
|
+
|
|
117
|
+
def test_equal_means_still_zero(self):
|
|
118
|
+
"""When target_mean == ref_mean, percent_change should be 0 regardless of epsilon."""
|
|
119
|
+
x = np.array([0.5, 2.0])
|
|
120
|
+
result = percent_change(x, x, 0.5)
|
|
121
|
+
np.testing.assert_allclose(result, [0.0, 0.0])
|
|
122
|
+
|
|
123
|
+
|
|
58
124
|
class TestBulkMatrixGeometric:
|
|
59
125
|
"""Tests for bulk_matrix_geometric."""
|
|
60
126
|
|
|
@@ -15,6 +15,7 @@ EXPECTED_COLUMNS = {
|
|
|
15
15
|
"target_membership",
|
|
16
16
|
"ref_membership",
|
|
17
17
|
"fold_change",
|
|
18
|
+
"log2_fold_change",
|
|
18
19
|
"percent_change",
|
|
19
20
|
"p_value",
|
|
20
21
|
"statistic",
|
|
@@ -137,6 +138,21 @@ class TestPdexRefMode:
|
|
|
137
138
|
typo_arg="oops",
|
|
138
139
|
)
|
|
139
140
|
|
|
141
|
+
def test_epsilon_accepted(self, small_adata):
|
|
142
|
+
"""epsilon parameter is accepted without error."""
|
|
143
|
+
result = pdex(small_adata, groupby="guide", is_log1p=False, epsilon=0.5)
|
|
144
|
+
assert isinstance(result, pl.DataFrame)
|
|
145
|
+
|
|
146
|
+
def test_epsilon_zero_matches_default(self, small_adata):
|
|
147
|
+
"""epsilon=0.0 produces identical results to omitting the parameter."""
|
|
148
|
+
default_result = pdex(small_adata, groupby="guide", is_log1p=False)
|
|
149
|
+
explicit_result = pdex(
|
|
150
|
+
small_adata, groupby="guide", is_log1p=False, epsilon=0.0
|
|
151
|
+
)
|
|
152
|
+
assert isinstance(default_result, pl.DataFrame)
|
|
153
|
+
assert isinstance(explicit_result, pl.DataFrame)
|
|
154
|
+
assert default_result.equals(explicit_result)
|
|
155
|
+
|
|
140
156
|
|
|
141
157
|
class TestPdexRefSparse:
|
|
142
158
|
"""Tests for pdex with sparse CSR input."""
|
|
@@ -463,6 +479,10 @@ class TestPdexOnTargetValidation:
|
|
|
463
479
|
|
|
464
480
|
|
|
465
481
|
class TestPdexValidation:
|
|
482
|
+
def test_negative_epsilon_raises(self, small_adata):
|
|
483
|
+
with pytest.raises(ValueError, match="epsilon must be non-negative"):
|
|
484
|
+
pdex(small_adata, groupby="guide", is_log1p=False, epsilon=-0.1)
|
|
485
|
+
|
|
466
486
|
def test_invalid_mode(self, small_adata):
|
|
467
487
|
with pytest.raises(ValueError, match="Invalid mode"):
|
|
468
488
|
pdex(
|
|
@@ -645,3 +665,20 @@ class TestPdexBacked:
|
|
|
645
665
|
rtol=1e-6,
|
|
646
666
|
err_msg=f"Mismatch in column {col}",
|
|
647
667
|
)
|
|
668
|
+
|
|
669
|
+
|
|
670
|
+
class TestLog2FoldChangeColumn:
|
|
671
|
+
"""Regression test for the `log2_fold_change` column semantics."""
|
|
672
|
+
|
|
673
|
+
@pytest.mark.parametrize("mode", ["ref", "all"])
|
|
674
|
+
def test_log2_fold_change_equals_log2_ratio(self, small_adata, mode):
|
|
675
|
+
"""log2_fold_change == log2(target_mean / ref_mean) on finite entries."""
|
|
676
|
+
result = pdex(small_adata, groupby="guide", mode=mode, is_log1p=False)
|
|
677
|
+
target = result["target_mean"].to_numpy()
|
|
678
|
+
ref = result["ref_mean"].to_numpy()
|
|
679
|
+
actual = result["log2_fold_change"].to_numpy()
|
|
680
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
681
|
+
expected = np.log2(target / ref)
|
|
682
|
+
finite = np.isfinite(expected) & np.isfinite(actual)
|
|
683
|
+
assert finite.any()
|
|
684
|
+
np.testing.assert_allclose(actual[finite], expected[finite], rtol=1e-6)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|