pdex 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -80,8 +80,8 @@ The returned Polars DataFrame (or pandas DataFrame when `as_pandas=True`) has co
80
80
  | `target_membership` | int | Number of cells in the target group |
81
81
  | `ref_membership` | int | Number of cells in the reference |
82
82
  | `fold_change` | float | **Deprecated** alias for `log2_fold_change` (identical values). Retained for one release; emits a `FutureWarning` on every `pdex(...)` call and will be removed in pdex 0.3.0. |
83
- | `log2_fold_change` | float | log2((target_mean + epsilon) / (ref_mean + epsilon)) — computed from pseudobulk means |
84
- | `percent_change` | float | (target_mean - ref_mean) / (ref_mean + epsilon) — computed from pseudobulk means |
83
+ | `log2_fold_change` | float | log2((target_mean + epsilon) / (ref_mean + epsilon)) — computed from pseudobulk means. Features unexpressed in both groups (`target_mean == ref_mean == 0`, only with `epsilon == 0`) give `0/0`, defined as `0.0` (not `NaN`); one-sided zeros still yield `±inf`. |
84
+ | `percent_change` | float | (target_mean - ref_mean) / (ref_mean + epsilon) — computed from pseudobulk means. Features unexpressed in both groups (`target_mean == ref_mean == 0`, only with `epsilon == 0`) give `0/0`, defined as `0.0` (not `NaN`); a zero reference with nonzero target still yields `+inf`. |
85
85
  | `p_value` | float | Mann-Whitney U p-value (per-cell vectors) |
86
86
  | `statistic` | float | Mann-Whitney U statistic |
87
87
  | `fdr` | float | FDR-corrected p-value, applied per-group across genes. For `on_target` mode, applied across all groups. |
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pdex
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: Parallel differential expression for single-cell perturbation sequencing
5
5
  Author-email: noam teyssier <noam.teyssier@arcinstitute.org>
6
6
  License-File: LICENSE
@@ -114,8 +114,8 @@ Returns a Polars DataFrame (or pandas if `as_pandas=True`) with one row per (gro
114
114
  | `target_membership` | Number of cells in the target group |
115
115
  | `ref_membership` | Number of cells in the reference |
116
116
  | `fold_change` | **Deprecated alias** for `log2_fold_change` (identical values). Will be removed in pdex 0.3.0. |
117
- | `log2_fold_change` | log2(target_mean / ref_mean) |
118
- | `percent_change` | (target_mean - ref_mean) / ref_mean |
117
+ | `log2_fold_change` | log2(target_mean / ref_mean). Genes unexpressed in both groups (0/0) report `0.0`, not `NaN`. |
118
+ | `percent_change` | (target_mean - ref_mean) / ref_mean. Genes unexpressed in both groups (0/0) report `0.0`, not `NaN`. |
119
119
  | `p_value` | Mann-Whitney U p-value |
120
120
  | `statistic` | Mann-Whitney U statistic |
121
121
  | `fdr` | FDR-corrected p-value (per-group, across genes). For `on_target` mode, this is applied across all groups. |
@@ -96,8 +96,8 @@ Returns a Polars DataFrame (or pandas if `as_pandas=True`) with one row per (gro
96
96
  | `target_membership` | Number of cells in the target group |
97
97
  | `ref_membership` | Number of cells in the reference |
98
98
  | `fold_change` | **Deprecated alias** for `log2_fold_change` (identical values). Will be removed in pdex 0.3.0. |
99
- | `log2_fold_change` | log2(target_mean / ref_mean) |
100
- | `percent_change` | (target_mean - ref_mean) / ref_mean |
99
+ | `log2_fold_change` | log2(target_mean / ref_mean). Genes unexpressed in both groups (0/0) report `0.0`, not `NaN`. |
100
+ | `percent_change` | (target_mean - ref_mean) / ref_mean. Genes unexpressed in both groups (0/0) report `0.0`, not `NaN`. |
101
101
  | `p_value` | Mann-Whitney U p-value |
102
102
  | `statistic` | Mann-Whitney U statistic |
103
103
  | `fdr` | FDR-corrected p-value (per-group, across genes). For `on_target` mode, this is applied across all groups. |
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pdex"
3
- version = "0.2.2"
3
+ version = "0.2.3"
4
4
  description = "Parallel differential expression for single-cell perturbation sequencing"
5
5
  readme = "README.md"
6
6
  authors = [{ name = "noam teyssier", email = "noam.teyssier@arcinstitute.org" }]
@@ -203,11 +203,13 @@ def pdex(
203
203
  If ``True``, return a :class:`pandas.DataFrame` instead of a
204
204
  :class:`polars.DataFrame`. Requires ``pyarrow``.
205
205
  epsilon:
206
- Pseudocount added to both ``target_mean`` and ``ref_mean`` before computing
207
- ``fold_change`` and ``percent_change``. When ``epsilon > 0``, extreme
208
- values from near-zero reference means (scRNA-seq sparsity artifact) are
209
- dampened toward zero. Has no effect on the Mann-Whitney U p-value or FDR.
210
- Default ``0.0`` preserves existing behaviour.
206
+ Pseudocount added to the denominator (and, for ``log2_fold_change``, the
207
+ numerator) before computing ``fold_change`` and ``percent_change``. When
208
+ ``epsilon > 0``, extreme values from near-zero reference means (scRNA-seq
209
+ sparsity artifact) are dampened toward zero. Has no effect on the
210
+ Mann-Whitney U p-value or FDR. Default ``0.0`` preserves existing behaviour;
211
+ regardless of ``epsilon``, features unexpressed in both groups report
212
+ ``0.0`` (no change) rather than ``NaN`` (see Returns).
211
213
 
212
214
  **Recommended usage:** For scRNA-seq CRISPRi/CRISPRa screens where many
213
215
  genes are unexpressed in the reference group, start with ``epsilon=0.5``.
@@ -243,6 +245,11 @@ def pdex(
243
245
  ``log2((target_mean + epsilon) / (ref_mean + epsilon))`` and
244
246
  ``percent_change`` is ``(target_mean - ref_mean) / (ref_mean + epsilon)``.
245
247
 
248
+ Features unexpressed in both groups (``target_mean == ref_mean == 0`` with
249
+ ``epsilon == 0``) would evaluate to ``0 / 0``; both ``log2_fold_change``
250
+ and ``percent_change`` define this as ``0.0`` (no change) rather than
251
+ ``NaN``. One-sided zeros still produce ``±inf``.
252
+
246
253
  ``fold_change`` is a **deprecated** alias for ``log2_fold_change``
247
254
  (identical values). It is retained for one release to ease migration
248
255
  and will be removed in pdex 0.3.0. New code should read
@@ -112,8 +112,15 @@ def log2_fold_change(x: np.ndarray, y: np.ndarray, epsilon: float = 0.0) -> np.n
112
112
  When ``epsilon > 0``, adds a small pseudocount to both numerator and
113
113
  denominator before taking the ratio, dampening extreme fold changes that arise
114
114
  when the reference mean is near zero (scRNA-seq sparsity artifact).
115
+
116
+ Entries that are zero in both arrays (a feature unexpressed in both groups,
117
+ only possible when ``epsilon == 0``) evaluate to ``log2(0 / 0)``; these are
118
+ defined as ``0.0`` (no change) rather than ``NaN``. Legitimate ``±inf`` values
119
+ from one-sided zeros are preserved.
115
120
  """
116
- return np.log2((x + epsilon) / (y + epsilon))
121
+ lfc = np.log2((x + epsilon) / (y + epsilon))
122
+ lfc[np.isnan(lfc)] = 0.0
123
+ return lfc
117
124
 
118
125
 
119
126
  @nb.njit(parallel=True)
@@ -125,8 +132,15 @@ def percent_change(
125
132
  When ``prior_count > 0``, adds a pseudocount to the denominator before
126
133
  computing the ratio, dampening extreme values when the reference mean is
127
134
  near zero (scRNA-seq sparsity artifact).
135
+
136
+ Entries that are zero in both arrays (a feature unexpressed in both groups,
137
+ only possible when ``prior_count == 0``) evaluate to ``0 / 0``; these are
138
+ defined as ``0.0`` (no change) rather than ``NaN``. Legitimate ``+inf`` values
139
+ from a zero reference are preserved.
128
140
  """
129
- return (x - y) / (y + prior_count)
141
+ pc = (x - y) / (y + prior_count)
142
+ pc[np.isnan(pc)] = 0.0
143
+ return pc
130
144
 
131
145
 
132
146
  def mwu(
@@ -29,6 +29,22 @@ class TestFoldChange:
29
29
  result = log2_fold_change(x, y)
30
30
  np.testing.assert_allclose(result, [0.0, 1.0, 2.0, 3.0])
31
31
 
32
+ def test_zero_over_zero_is_zero(self):
33
+ """0/0 (unexpressed in both groups) is defined as 0.0, not NaN."""
34
+ x = np.array([0.0])
35
+ y = np.array([0.0])
36
+ result = log2_fold_change(x, y)
37
+ assert not np.isnan(result).any()
38
+ np.testing.assert_array_equal(result, [0.0])
39
+
40
+ def test_zero_over_zero_mixed_with_finite_and_inf(self):
41
+ """0/0 -> 0.0 while normal ratios and one-sided zeros are untouched."""
42
+ x = np.array([0.0, 4.0, 0.0, 4.0])
43
+ y = np.array([0.0, 2.0, 1.0, 0.0])
44
+ result = log2_fold_change(x, y)
45
+ # 0/0 -> 0.0, log2(2) -> 1.0, log2(0) -> -inf, log2(4/0) -> +inf
46
+ np.testing.assert_array_equal(result, [0.0, 1.0, -np.inf, np.inf])
47
+
32
48
 
33
49
  class TestPercentChange:
34
50
  def test_double(self):
@@ -54,6 +70,22 @@ class TestPercentChange:
54
70
  result = percent_change(x, y)
55
71
  np.testing.assert_allclose(result, [-0.5, 0.0, 0.5])
56
72
 
73
+ def test_zero_over_zero_is_zero(self):
74
+ """0/0 (unexpressed in both groups) is defined as 0.0, not NaN."""
75
+ x = np.array([0.0])
76
+ y = np.array([0.0])
77
+ result = percent_change(x, y)
78
+ assert not np.isnan(result).any()
79
+ np.testing.assert_array_equal(result, [0.0])
80
+
81
+ def test_zero_over_zero_mixed_with_finite_and_inf(self):
82
+ """0/0 -> 0.0 while normal ratios and a zero reference are untouched."""
83
+ x = np.array([0.0, 4.0, 4.0])
84
+ y = np.array([0.0, 2.0, 0.0])
85
+ result = percent_change(x, y)
86
+ # 0/0 -> 0.0, (4-2)/2 -> 1.0, (4-0)/0 -> +inf
87
+ np.testing.assert_array_equal(result, [0.0, 1.0, np.inf])
88
+
57
89
 
58
90
  class TestFoldChangeWithEpsilon:
59
91
  def test_zero_epsilon_matches_baseline(self):
@@ -682,3 +682,57 @@ class TestLog2FoldChangeColumn:
682
682
  finite = np.isfinite(expected) & np.isfinite(actual)
683
683
  assert finite.any()
684
684
  np.testing.assert_allclose(actual[finite], expected[finite], rtol=1e-6)
685
+
686
+
687
+ class TestUnexpressedInBothGroups:
688
+ """A feature unexpressed in both groups (0/0) reports 0.0, not NaN."""
689
+
690
+ @pytest.mark.parametrize("mode", ["ref", "all"])
691
+ def test_zero_in_both_is_zero_not_nan(self, small_adata, mode):
692
+ """gene_0 is zero everywhere -> 0/0 in every comparison -> 0.0."""
693
+ adata = small_adata.copy()
694
+ adata.X[:, 0] = 0.0 # gene_0 unexpressed in every cell
695
+
696
+ result = pdex(adata, groupby="guide", mode=mode, is_log1p=False, epsilon=0.0)
697
+ gene0 = result.filter(pl.col("feature") == "gene_0")
698
+
699
+ assert (gene0["target_mean"].to_numpy() == 0).all()
700
+ assert (gene0["ref_mean"].to_numpy() == 0).all()
701
+ for col in ["log2_fold_change", "fold_change", "percent_change"]:
702
+ values = gene0[col].to_numpy()
703
+ assert not np.isnan(values).any(), f"{col} contains NaN"
704
+ np.testing.assert_array_equal(values, 0.0)
705
+
706
+ def test_on_target_zero_in_both_is_zero_not_nan(self, on_target_adata):
707
+ """on_target mode: a targeted gene that is zero everywhere reports 0.0."""
708
+ adata = on_target_adata.copy()
709
+ adata.X[:, 1] = 0.0 # group "A" targets gene_1
710
+
711
+ result = pdex(
712
+ adata,
713
+ groupby="guide",
714
+ mode="on_target",
715
+ gene_col="target_gene",
716
+ is_log1p=False,
717
+ epsilon=0.0,
718
+ )
719
+ row = result.filter(pl.col("target") == "A")
720
+ assert row["target_mean"].to_numpy()[0] == 0
721
+ assert row["ref_mean"].to_numpy()[0] == 0
722
+ for col in ["log2_fold_change", "fold_change", "percent_change"]:
723
+ value = row[col].to_numpy()[0]
724
+ assert not np.isnan(value), f"{col} is NaN"
725
+ assert value == 0.0
726
+
727
+ def test_one_sided_zero_still_infinite(self, small_adata):
728
+ """Only 0/0 is filled; a zero target over a nonzero reference stays infinite."""
729
+ adata = small_adata.copy()
730
+ # gene_0 expressed only in the reference -> target_mean 0, ref_mean > 0
731
+ adata.X[:, 0] = 0.0
732
+ adata.X[adata.obs["guide"].to_numpy() == "non-targeting", 0] = 5.0
733
+
734
+ result = pdex(adata, groupby="guide", mode="ref", is_log1p=False, epsilon=0.0)
735
+ gene0 = result.filter(pl.col("feature") == "gene_0")
736
+ # log2(0 / ref) -> -inf; percent_change (0 - ref) / ref -> -1.0
737
+ assert np.isneginf(gene0["log2_fold_change"].to_numpy()).all()
738
+ np.testing.assert_allclose(gene0["percent_change"].to_numpy(), -1.0)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes