pdex 0.2.2__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pdex-0.2.2 → pdex-0.2.3}/CLAUDE.md +2 -2
- {pdex-0.2.2 → pdex-0.2.3}/PKG-INFO +3 -3
- {pdex-0.2.2 → pdex-0.2.3}/README.md +2 -2
- {pdex-0.2.2 → pdex-0.2.3}/pyproject.toml +1 -1
- {pdex-0.2.2 → pdex-0.2.3}/src/pdex/__init__.py +12 -5
- {pdex-0.2.2 → pdex-0.2.3}/src/pdex/_math.py +16 -2
- {pdex-0.2.2 → pdex-0.2.3}/tests/test_math.py +32 -0
- {pdex-0.2.2 → pdex-0.2.3}/tests/test_pdex.py +54 -0
- {pdex-0.2.2 → pdex-0.2.3}/.github/workflows/ci.yml +0 -0
- {pdex-0.2.2 → pdex-0.2.3}/.github/workflows/release.yml +0 -0
- {pdex-0.2.2 → pdex-0.2.3}/.gitignore +0 -0
- {pdex-0.2.2 → pdex-0.2.3}/.python-version +0 -0
- {pdex-0.2.2 → pdex-0.2.3}/LICENSE +0 -0
- {pdex-0.2.2 → pdex-0.2.3}/src/pdex/_utils.py +0 -0
- {pdex-0.2.2 → pdex-0.2.3}/src/pdex/py.typed +0 -0
- {pdex-0.2.2 → pdex-0.2.3}/tests/conftest.py +0 -0
- {pdex-0.2.2 → pdex-0.2.3}/tests/test_internals.py +0 -0
- {pdex-0.2.2 → pdex-0.2.3}/tests/test_utils.py +0 -0
|
@@ -80,8 +80,8 @@ The returned Polars DataFrame (or pandas DataFrame when `as_pandas=True`) has co
|
|
|
80
80
|
| `target_membership` | int | Number of cells in the target group |
|
|
81
81
|
| `ref_membership` | int | Number of cells in the reference |
|
|
82
82
|
| `fold_change` | float | **Deprecated** alias for `log2_fold_change` (identical values). Retained for one release; emits a `FutureWarning` on every `pdex(...)` call and will be removed in pdex 0.3.0. |
|
|
83
|
-
| `log2_fold_change` | float | log2((target_mean + epsilon) / (ref_mean + epsilon)) — computed from pseudobulk means |
|
|
84
|
-
| `percent_change` | float | (target_mean - ref_mean) / (ref_mean + epsilon) — computed from pseudobulk means |
|
|
83
|
+
| `log2_fold_change` | float | log2((target_mean + epsilon) / (ref_mean + epsilon)) — computed from pseudobulk means. Features unexpressed in both groups (`target_mean == ref_mean == 0`, only with `epsilon == 0`) give `0/0`, defined as `0.0` (not `NaN`); one-sided zeros still yield `±inf`. |
|
|
84
|
+
| `percent_change` | float | (target_mean - ref_mean) / (ref_mean + epsilon) — computed from pseudobulk means. Features unexpressed in both groups (`target_mean == ref_mean == 0`, only with `epsilon == 0`) give `0/0`, defined as `0.0` (not `NaN`); a zero reference with nonzero target still yields `+inf`. |
|
|
85
85
|
| `p_value` | float | Mann-Whitney U p-value (per-cell vectors) |
|
|
86
86
|
| `statistic` | float | Mann-Whitney U statistic |
|
|
87
87
|
| `fdr` | float | FDR-corrected p-value, applied per-group across genes. For `on_target` mode, applied across all groups. |
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pdex
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: Parallel differential expression for single-cell perturbation sequencing
|
|
5
5
|
Author-email: noam teyssier <noam.teyssier@arcinstitute.org>
|
|
6
6
|
License-File: LICENSE
|
|
@@ -114,8 +114,8 @@ Returns a Polars DataFrame (or pandas if `as_pandas=True`) with one row per (gro
|
|
|
114
114
|
| `target_membership` | Number of cells in the target group |
|
|
115
115
|
| `ref_membership` | Number of cells in the reference |
|
|
116
116
|
| `fold_change` | **Deprecated alias** for `log2_fold_change` (identical values). Will be removed in pdex 0.3.0. |
|
|
117
|
-
| `log2_fold_change` | log2(target_mean / ref_mean)
|
|
118
|
-
| `percent_change` | (target_mean - ref_mean) / ref_mean
|
|
117
|
+
| `log2_fold_change` | log2(target_mean / ref_mean). Genes unexpressed in both groups (0/0) report `0.0`, not `NaN`. |
|
|
118
|
+
| `percent_change` | (target_mean - ref_mean) / ref_mean. Genes unexpressed in both groups (0/0) report `0.0`, not `NaN`. |
|
|
119
119
|
| `p_value` | Mann-Whitney U p-value |
|
|
120
120
|
| `statistic` | Mann-Whitney U statistic |
|
|
121
121
|
| `fdr` | FDR-corrected p-value (per-group, across genes). For `on_target` mode, this is applied across all groups. |
|
|
@@ -96,8 +96,8 @@ Returns a Polars DataFrame (or pandas if `as_pandas=True`) with one row per (gro
|
|
|
96
96
|
| `target_membership` | Number of cells in the target group |
|
|
97
97
|
| `ref_membership` | Number of cells in the reference |
|
|
98
98
|
| `fold_change` | **Deprecated alias** for `log2_fold_change` (identical values). Will be removed in pdex 0.3.0. |
|
|
99
|
-
| `log2_fold_change` | log2(target_mean / ref_mean)
|
|
100
|
-
| `percent_change` | (target_mean - ref_mean) / ref_mean
|
|
99
|
+
| `log2_fold_change` | log2(target_mean / ref_mean). Genes unexpressed in both groups (0/0) report `0.0`, not `NaN`. |
|
|
100
|
+
| `percent_change` | (target_mean - ref_mean) / ref_mean. Genes unexpressed in both groups (0/0) report `0.0`, not `NaN`. |
|
|
101
101
|
| `p_value` | Mann-Whitney U p-value |
|
|
102
102
|
| `statistic` | Mann-Whitney U statistic |
|
|
103
103
|
| `fdr` | FDR-corrected p-value (per-group, across genes). For `on_target` mode, this is applied across all groups. |
|
|
@@ -203,11 +203,13 @@ def pdex(
|
|
|
203
203
|
If ``True``, return a :class:`pandas.DataFrame` instead of a
|
|
204
204
|
:class:`polars.DataFrame`. Requires ``pyarrow``.
|
|
205
205
|
epsilon:
|
|
206
|
-
Pseudocount added to
|
|
207
|
-
``fold_change`` and ``percent_change``. When
|
|
208
|
-
values from near-zero reference means (scRNA-seq
|
|
209
|
-
dampened toward zero. Has no effect on the
|
|
210
|
-
Default ``0.0`` preserves existing behaviour
|
|
206
|
+
Pseudocount added to the denominator (and, for ``log2_fold_change``, the
|
|
207
|
+
numerator) before computing ``fold_change`` and ``percent_change``. When
|
|
208
|
+
``epsilon > 0``, extreme values from near-zero reference means (scRNA-seq
|
|
209
|
+
sparsity artifact) are dampened toward zero. Has no effect on the
|
|
210
|
+
Mann-Whitney U p-value or FDR. Default ``0.0`` preserves existing behaviour;
|
|
211
|
+
regardless of ``epsilon``, features unexpressed in both groups report
|
|
212
|
+
``0.0`` (no change) rather than ``NaN`` (see Returns).
|
|
211
213
|
|
|
212
214
|
**Recommended usage:** For scRNA-seq CRISPRi/CRISPRa screens where many
|
|
213
215
|
genes are unexpressed in the reference group, start with ``epsilon=0.5``.
|
|
@@ -243,6 +245,11 @@ def pdex(
|
|
|
243
245
|
``log2((target_mean + epsilon) / (ref_mean + epsilon))`` and
|
|
244
246
|
``percent_change`` is ``(target_mean - ref_mean) / (ref_mean + epsilon)``.
|
|
245
247
|
|
|
248
|
+
Features unexpressed in both groups (``target_mean == ref_mean == 0`` with
|
|
249
|
+
``epsilon == 0``) would evaluate to ``0 / 0``; both ``log2_fold_change``
|
|
250
|
+
and ``percent_change`` define this as ``0.0`` (no change) rather than
|
|
251
|
+
``NaN``. One-sided zeros still produce ``±inf``.
|
|
252
|
+
|
|
246
253
|
``fold_change`` is a **deprecated** alias for ``log2_fold_change``
|
|
247
254
|
(identical values). It is retained for one release to ease migration
|
|
248
255
|
and will be removed in pdex 0.3.0. New code should read
|
|
@@ -112,8 +112,15 @@ def log2_fold_change(x: np.ndarray, y: np.ndarray, epsilon: float = 0.0) -> np.n
|
|
|
112
112
|
When ``epsilon > 0``, adds a small pseudocount to both numerator and
|
|
113
113
|
denominator before taking the ratio, dampening extreme fold changes that arise
|
|
114
114
|
when the reference mean is near zero (scRNA-seq sparsity artifact).
|
|
115
|
+
|
|
116
|
+
Entries that are zero in both arrays (a feature unexpressed in both groups,
|
|
117
|
+
only possible when ``epsilon == 0``) evaluate to ``log2(0 / 0)``; these are
|
|
118
|
+
defined as ``0.0`` (no change) rather than ``NaN``. Legitimate ``±inf`` values
|
|
119
|
+
from one-sided zeros are preserved.
|
|
115
120
|
"""
|
|
116
|
-
|
|
121
|
+
lfc = np.log2((x + epsilon) / (y + epsilon))
|
|
122
|
+
lfc[np.isnan(lfc)] = 0.0
|
|
123
|
+
return lfc
|
|
117
124
|
|
|
118
125
|
|
|
119
126
|
@nb.njit(parallel=True)
|
|
@@ -125,8 +132,15 @@ def percent_change(
|
|
|
125
132
|
When ``prior_count > 0``, adds a pseudocount to the denominator before
|
|
126
133
|
computing the ratio, dampening extreme values when the reference mean is
|
|
127
134
|
near zero (scRNA-seq sparsity artifact).
|
|
135
|
+
|
|
136
|
+
Entries that are zero in both arrays (a feature unexpressed in both groups,
|
|
137
|
+
only possible when ``prior_count == 0``) evaluate to ``0 / 0``; these are
|
|
138
|
+
defined as ``0.0`` (no change) rather than ``NaN``. Legitimate ``+inf`` values
|
|
139
|
+
from a zero reference are preserved.
|
|
128
140
|
"""
|
|
129
|
-
|
|
141
|
+
pc = (x - y) / (y + prior_count)
|
|
142
|
+
pc[np.isnan(pc)] = 0.0
|
|
143
|
+
return pc
|
|
130
144
|
|
|
131
145
|
|
|
132
146
|
def mwu(
|
|
@@ -29,6 +29,22 @@ class TestFoldChange:
|
|
|
29
29
|
result = log2_fold_change(x, y)
|
|
30
30
|
np.testing.assert_allclose(result, [0.0, 1.0, 2.0, 3.0])
|
|
31
31
|
|
|
32
|
+
def test_zero_over_zero_is_zero(self):
|
|
33
|
+
"""0/0 (unexpressed in both groups) is defined as 0.0, not NaN."""
|
|
34
|
+
x = np.array([0.0])
|
|
35
|
+
y = np.array([0.0])
|
|
36
|
+
result = log2_fold_change(x, y)
|
|
37
|
+
assert not np.isnan(result).any()
|
|
38
|
+
np.testing.assert_array_equal(result, [0.0])
|
|
39
|
+
|
|
40
|
+
def test_zero_over_zero_mixed_with_finite_and_inf(self):
|
|
41
|
+
"""0/0 -> 0.0 while normal ratios and one-sided zeros are untouched."""
|
|
42
|
+
x = np.array([0.0, 4.0, 0.0, 4.0])
|
|
43
|
+
y = np.array([0.0, 2.0, 1.0, 0.0])
|
|
44
|
+
result = log2_fold_change(x, y)
|
|
45
|
+
# 0/0 -> 0.0, log2(2) -> 1.0, log2(0) -> -inf, log2(4/0) -> +inf
|
|
46
|
+
np.testing.assert_array_equal(result, [0.0, 1.0, -np.inf, np.inf])
|
|
47
|
+
|
|
32
48
|
|
|
33
49
|
class TestPercentChange:
|
|
34
50
|
def test_double(self):
|
|
@@ -54,6 +70,22 @@ class TestPercentChange:
|
|
|
54
70
|
result = percent_change(x, y)
|
|
55
71
|
np.testing.assert_allclose(result, [-0.5, 0.0, 0.5])
|
|
56
72
|
|
|
73
|
+
def test_zero_over_zero_is_zero(self):
|
|
74
|
+
"""0/0 (unexpressed in both groups) is defined as 0.0, not NaN."""
|
|
75
|
+
x = np.array([0.0])
|
|
76
|
+
y = np.array([0.0])
|
|
77
|
+
result = percent_change(x, y)
|
|
78
|
+
assert not np.isnan(result).any()
|
|
79
|
+
np.testing.assert_array_equal(result, [0.0])
|
|
80
|
+
|
|
81
|
+
def test_zero_over_zero_mixed_with_finite_and_inf(self):
|
|
82
|
+
"""0/0 -> 0.0 while normal ratios and a zero reference are untouched."""
|
|
83
|
+
x = np.array([0.0, 4.0, 4.0])
|
|
84
|
+
y = np.array([0.0, 2.0, 0.0])
|
|
85
|
+
result = percent_change(x, y)
|
|
86
|
+
# 0/0 -> 0.0, (4-2)/2 -> 1.0, (4-0)/0 -> +inf
|
|
87
|
+
np.testing.assert_array_equal(result, [0.0, 1.0, np.inf])
|
|
88
|
+
|
|
57
89
|
|
|
58
90
|
class TestFoldChangeWithEpsilon:
|
|
59
91
|
def test_zero_epsilon_matches_baseline(self):
|
|
@@ -682,3 +682,57 @@ class TestLog2FoldChangeColumn:
|
|
|
682
682
|
finite = np.isfinite(expected) & np.isfinite(actual)
|
|
683
683
|
assert finite.any()
|
|
684
684
|
np.testing.assert_allclose(actual[finite], expected[finite], rtol=1e-6)
|
|
685
|
+
|
|
686
|
+
|
|
687
|
+
class TestUnexpressedInBothGroups:
|
|
688
|
+
"""A feature unexpressed in both groups (0/0) reports 0.0, not NaN."""
|
|
689
|
+
|
|
690
|
+
@pytest.mark.parametrize("mode", ["ref", "all"])
|
|
691
|
+
def test_zero_in_both_is_zero_not_nan(self, small_adata, mode):
|
|
692
|
+
"""gene_0 is zero everywhere -> 0/0 in every comparison -> 0.0."""
|
|
693
|
+
adata = small_adata.copy()
|
|
694
|
+
adata.X[:, 0] = 0.0 # gene_0 unexpressed in every cell
|
|
695
|
+
|
|
696
|
+
result = pdex(adata, groupby="guide", mode=mode, is_log1p=False, epsilon=0.0)
|
|
697
|
+
gene0 = result.filter(pl.col("feature") == "gene_0")
|
|
698
|
+
|
|
699
|
+
assert (gene0["target_mean"].to_numpy() == 0).all()
|
|
700
|
+
assert (gene0["ref_mean"].to_numpy() == 0).all()
|
|
701
|
+
for col in ["log2_fold_change", "fold_change", "percent_change"]:
|
|
702
|
+
values = gene0[col].to_numpy()
|
|
703
|
+
assert not np.isnan(values).any(), f"{col} contains NaN"
|
|
704
|
+
np.testing.assert_array_equal(values, 0.0)
|
|
705
|
+
|
|
706
|
+
def test_on_target_zero_in_both_is_zero_not_nan(self, on_target_adata):
|
|
707
|
+
"""on_target mode: a targeted gene that is zero everywhere reports 0.0."""
|
|
708
|
+
adata = on_target_adata.copy()
|
|
709
|
+
adata.X[:, 1] = 0.0 # group "A" targets gene_1
|
|
710
|
+
|
|
711
|
+
result = pdex(
|
|
712
|
+
adata,
|
|
713
|
+
groupby="guide",
|
|
714
|
+
mode="on_target",
|
|
715
|
+
gene_col="target_gene",
|
|
716
|
+
is_log1p=False,
|
|
717
|
+
epsilon=0.0,
|
|
718
|
+
)
|
|
719
|
+
row = result.filter(pl.col("target") == "A")
|
|
720
|
+
assert row["target_mean"].to_numpy()[0] == 0
|
|
721
|
+
assert row["ref_mean"].to_numpy()[0] == 0
|
|
722
|
+
for col in ["log2_fold_change", "fold_change", "percent_change"]:
|
|
723
|
+
value = row[col].to_numpy()[0]
|
|
724
|
+
assert not np.isnan(value), f"{col} is NaN"
|
|
725
|
+
assert value == 0.0
|
|
726
|
+
|
|
727
|
+
def test_one_sided_zero_still_infinite(self, small_adata):
|
|
728
|
+
"""Only 0/0 is filled; a zero target over a nonzero reference stays infinite."""
|
|
729
|
+
adata = small_adata.copy()
|
|
730
|
+
# gene_0 expressed only in the reference -> target_mean 0, ref_mean > 0
|
|
731
|
+
adata.X[:, 0] = 0.0
|
|
732
|
+
adata.X[adata.obs["guide"].to_numpy() == "non-targeting", 0] = 5.0
|
|
733
|
+
|
|
734
|
+
result = pdex(adata, groupby="guide", mode="ref", is_log1p=False, epsilon=0.0)
|
|
735
|
+
gene0 = result.filter(pl.col("feature") == "gene_0")
|
|
736
|
+
# log2(0 / ref) -> -inf; percent_change (0 - ref) / ref -> -1.0
|
|
737
|
+
assert np.isneginf(gene0["log2_fold_change"].to_numpy()).all()
|
|
738
|
+
np.testing.assert_allclose(gene0["percent_change"].to_numpy(), -1.0)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|