sctrial 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. sctrial-0.3.0/.gitignore +57 -0
  2. sctrial-0.3.0/LICENSE +21 -0
  3. sctrial-0.3.0/PKG-INFO +236 -0
  4. sctrial-0.3.0/README.md +155 -0
  5. sctrial-0.3.0/pyproject.toml +123 -0
  6. sctrial-0.3.0/src/sctrial/__init__.py +256 -0
  7. sctrial-0.3.0/src/sctrial/_env.py +30 -0
  8. sctrial-0.3.0/src/sctrial/adata_tools.py +211 -0
  9. sctrial-0.3.0/src/sctrial/analysis.py +91 -0
  10. sctrial-0.3.0/src/sctrial/benchmark/__init__.py +36 -0
  11. sctrial-0.3.0/src/sctrial/benchmark/ablation.py +189 -0
  12. sctrial-0.3.0/src/sctrial/benchmark/metrics.py +323 -0
  13. sctrial-0.3.0/src/sctrial/benchmark/orchestrator.py +497 -0
  14. sctrial-0.3.0/src/sctrial/benchmark/permutation.py +191 -0
  15. sctrial-0.3.0/src/sctrial/benchmark/runners/__init__.py +12 -0
  16. sctrial-0.3.0/src/sctrial/benchmark/runners/dreamlet_runner.py +173 -0
  17. sctrial-0.3.0/src/sctrial/benchmark/runners/edger_qlf.py +189 -0
  18. sctrial-0.3.0/src/sctrial/benchmark/runners/limma_voom.py +164 -0
  19. sctrial-0.3.0/src/sctrial/benchmark/runners/nebula_runner.py +198 -0
  20. sctrial-0.3.0/src/sctrial/benchmark/runners/sctrial_did.py +228 -0
  21. sctrial-0.3.0/src/sctrial/benchmark/runners/wilcoxon_paired.py +112 -0
  22. sctrial-0.3.0/src/sctrial/benchmark/simulator.py +551 -0
  23. sctrial-0.3.0/src/sctrial/benchmark/subsample.py +170 -0
  24. sctrial-0.3.0/src/sctrial/convenience.py +415 -0
  25. sctrial-0.3.0/src/sctrial/datasets.py +1798 -0
  26. sctrial-0.3.0/src/sctrial/design.py +216 -0
  27. sctrial-0.3.0/src/sctrial/plotting.py +1374 -0
  28. sctrial-0.3.0/src/sctrial/preprocessing.py +143 -0
  29. sctrial-0.3.0/src/sctrial/scoring.py +274 -0
  30. sctrial-0.3.0/src/sctrial/stats/__init__.py +162 -0
  31. sctrial-0.3.0/src/sctrial/stats/_extract.py +80 -0
  32. sctrial-0.3.0/src/sctrial/stats/_utils.py +153 -0
  33. sctrial-0.3.0/src/sctrial/stats/abundance.py +378 -0
  34. sctrial-0.3.0/src/sctrial/stats/bayes.py +396 -0
  35. sctrial-0.3.0/src/sctrial/stats/comparisons.py +822 -0
  36. sctrial-0.3.0/src/sctrial/stats/cv.py +479 -0
  37. sctrial-0.3.0/src/sctrial/stats/diagnostics.py +73 -0
  38. sctrial-0.3.0/src/sctrial/stats/did.py +942 -0
  39. sctrial-0.3.0/src/sctrial/stats/effect_size.py +579 -0
  40. sctrial-0.3.0/src/sctrial/stats/gsea.py +544 -0
  41. sctrial-0.3.0/src/sctrial/stats/heterogeneity.py +197 -0
  42. sctrial-0.3.0/src/sctrial/stats/mixed_effects.py +534 -0
  43. sctrial-0.3.0/src/sctrial/stats/module_scores.py +476 -0
  44. sctrial-0.3.0/src/sctrial/stats/power.py +643 -0
  45. sctrial-0.3.0/src/sctrial/stats/pseudobulk.py +465 -0
  46. sctrial-0.3.0/src/sctrial/stats/sensitivity.py +59 -0
  47. sctrial-0.3.0/src/sctrial/stats/simulation.py +503 -0
  48. sctrial-0.3.0/src/sctrial/stats/summary.py +66 -0
  49. sctrial-0.3.0/src/sctrial/stats/survival.py +182 -0
  50. sctrial-0.3.0/src/sctrial/stats/timeseries.py +582 -0
  51. sctrial-0.3.0/src/sctrial/utils.py +475 -0
  52. sctrial-0.3.0/src/sctrial/validation.py +506 -0
  53. sctrial-0.3.0/src/sctrial/workflow.py +153 -0
  54. sctrial-0.3.0/tests/conftest.py +55 -0
  55. sctrial-0.3.0/tests/test_abundance.py +52 -0
  56. sctrial-0.3.0/tests/test_adata_tools.py +362 -0
  57. sctrial-0.3.0/tests/test_advanced.py +85 -0
  58. sctrial-0.3.0/tests/test_analysis.py +146 -0
  59. sctrial-0.3.0/tests/test_annotation.py +245 -0
  60. sctrial-0.3.0/tests/test_api_design.py +80 -0
  61. sctrial-0.3.0/tests/test_audit_coverage.py +503 -0
  62. sctrial-0.3.0/tests/test_basic.py +26 -0
  63. sctrial-0.3.0/tests/test_bayes.py +93 -0
  64. sctrial-0.3.0/tests/test_benchmark.py +257 -0
  65. sctrial-0.3.0/tests/test_compare_gene_in_celltype.py +48 -0
  66. sctrial-0.3.0/tests/test_comparisons.py +331 -0
  67. sctrial-0.3.0/tests/test_convenience.py +470 -0
  68. sctrial-0.3.0/tests/test_cv.py +63 -0
  69. sctrial-0.3.0/tests/test_datasets_extended.py +377 -0
  70. sctrial-0.3.0/tests/test_design.py +295 -0
  71. sctrial-0.3.0/tests/test_diagnostics.py +82 -0
  72. sctrial-0.3.0/tests/test_edge_cases.py +281 -0
  73. sctrial-0.3.0/tests/test_effect_size.py +217 -0
  74. sctrial-0.3.0/tests/test_final_additions.py +60 -0
  75. sctrial-0.3.0/tests/test_gsea.py +70 -0
  76. sctrial-0.3.0/tests/test_gsea_wrappers.py +97 -0
  77. sctrial-0.3.0/tests/test_heterogeneity.py +90 -0
  78. sctrial-0.3.0/tests/test_integration.py +349 -0
  79. sctrial-0.3.0/tests/test_mixed_effects.py +55 -0
  80. sctrial-0.3.0/tests/test_module_scores.py +82 -0
  81. sctrial-0.3.0/tests/test_new_stats.py +90 -0
  82. sctrial-0.3.0/tests/test_parallel_did.py +66 -0
  83. sctrial-0.3.0/tests/test_plotting.py +179 -0
  84. sctrial-0.3.0/tests/test_plotting_interactive.py +44 -0
  85. sctrial-0.3.0/tests/test_power.py +218 -0
  86. sctrial-0.3.0/tests/test_preprocessing.py +353 -0
  87. sctrial-0.3.0/tests/test_processing.py +20 -0
  88. sctrial-0.3.0/tests/test_pseudobulk_did.py +347 -0
  89. sctrial-0.3.0/tests/test_scalability.py +36 -0
  90. sctrial-0.3.0/tests/test_scoring.py +454 -0
  91. sctrial-0.3.0/tests/test_sensitivity.py +23 -0
  92. sctrial-0.3.0/tests/test_simulation.py +182 -0
  93. sctrial-0.3.0/tests/test_survival.py +73 -0
  94. sctrial-0.3.0/tests/test_timeseries.py +72 -0
  95. sctrial-0.3.0/tests/test_utils.py +16 -0
  96. sctrial-0.3.0/tests/test_utils_extended.py +252 -0
  97. sctrial-0.3.0/tests/test_validation.py +73 -0
@@ -0,0 +1,57 @@
1
+ # Python bytecode
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Distribution / packaging
7
+ dist/
8
+ /build/
9
+ *.egg-info/
10
+ .eggs/
11
+
12
+ # Testing
13
+ .pytest_cache/
14
+ .coverage
15
+ htmlcov/
16
+ .tox/
17
+
18
+ # Type checking
19
+ .mypy_cache/
20
+ .dmypy.json
21
+ dmypy.json
22
+
23
+ # Virtual environments (if created inside this subdirectory)
24
+ venv/
25
+ env/
26
+ ENV/
27
+
28
+ *.h5ad
29
+ *.gz
30
+ *.txt.gz
31
+ data/
32
+ datasets/
33
+ tutorials/data/
34
+ **/__pycache__/
35
+ .DS_Store
36
+ **/.DS_Store
37
+ .vscode/
38
+ .idea/
39
+ .ruff_cache/
40
+ **/.doctrees/
41
+ docs/build/
42
+
43
+ # Manuscript figure computation cache
44
+ _cache/
45
+ manuscript_figures/_cache/
46
+
47
+ # Run scripts (local convenience only)
48
+ run_nb*.sh
49
+
50
+ # Jupyter notebook artifacts
51
+ *.nbconvert.ipynb
52
+ .ipynb_checkpoints/
53
+
54
+ # Claude Code
55
+ CLAUDE.md
56
+ .claude/
57
+ .mcp.json
sctrial-0.3.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Mohamed Omar, MD Assistant Professor of Computational Biomedicine and Urology, Cedars-Sinai
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
sctrial-0.3.0/PKG-INFO ADDED
@@ -0,0 +1,236 @@
1
+ Metadata-Version: 2.4
2
+ Name: sctrial
3
+ Version: 0.3.0
4
+ Summary: Participant-level differential analysis for longitudinal single-cell experiments: DiD, paired comparisons, module scoring, pseudobulk, and power analysis.
5
+ Project-URL: Homepage, https://github.com/TheOmarLab/sctrial
6
+ Project-URL: Documentation, https://sctrial.readthedocs.io
7
+ Project-URL: Repository, https://github.com/TheOmarLab/sctrial
8
+ Project-URL: Issues, https://github.com/TheOmarLab/sctrial/issues
9
+ Author: Contributors
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: anndata,crossover,difference-in-differences,pseudobulk,single-cell,trial
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3 :: Only
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
23
+ Classifier: Typing :: Typed
24
+ Requires-Python: >=3.9
25
+ Requires-Dist: anndata>=0.9
26
+ Requires-Dist: joblib>=1.2
27
+ Requires-Dist: numpy>=1.22
28
+ Requires-Dist: pandas>=1.5
29
+ Requires-Dist: scipy>=1.8
30
+ Requires-Dist: statsmodels>=0.13
31
+ Requires-Dist: typing-extensions>=4.0; python_version < '3.11'
32
+ Provides-Extra: all
33
+ Requires-Dist: adjusttext>=1.0; extra == 'all'
34
+ Requires-Dist: gseapy>=1.0; extra == 'all'
35
+ Requires-Dist: lifelines>=0.27; extra == 'all'
36
+ Requires-Dist: matplotlib>=3.6; extra == 'all'
37
+ Requires-Dist: plotly>=5.0; extra == 'all'
38
+ Requires-Dist: pymc>=5.0; extra == 'all'
39
+ Requires-Dist: pyscenic>=0.12; extra == 'all'
40
+ Requires-Dist: scanpy>=1.9; extra == 'all'
41
+ Requires-Dist: seaborn>=0.12; extra == 'all'
42
+ Provides-Extra: aucell
43
+ Requires-Dist: pyscenic>=0.12; extra == 'aucell'
44
+ Provides-Extra: bayes
45
+ Requires-Dist: pymc>=5.0; extra == 'bayes'
46
+ Provides-Extra: dev
47
+ Requires-Dist: build>=1.0; extra == 'dev'
48
+ Requires-Dist: mypy>=1.5; extra == 'dev'
49
+ Requires-Dist: myst-parser>=2.0; extra == 'dev'
50
+ Requires-Dist: nbsphinx>=0.9; extra == 'dev'
51
+ Requires-Dist: pandoc; extra == 'dev'
52
+ Requires-Dist: pre-commit>=3.0; extra == 'dev'
53
+ Requires-Dist: psutil>=5.9; extra == 'dev'
54
+ Requires-Dist: pydata-sphinx-theme>=0.15; extra == 'dev'
55
+ Requires-Dist: pytest-cov>=4.0; extra == 'dev'
56
+ Requires-Dist: pytest>=7.0; extra == 'dev'
57
+ Requires-Dist: ruff>=0.5; extra == 'dev'
58
+ Requires-Dist: sphinx-copybutton>=0.5; extra == 'dev'
59
+ Requires-Dist: sphinx-design>=0.5; extra == 'dev'
60
+ Requires-Dist: sphinx>=7.0; extra == 'dev'
61
+ Requires-Dist: twine>=5.0; extra == 'dev'
62
+ Provides-Extra: docs
63
+ Requires-Dist: myst-parser>=2.0; extra == 'docs'
64
+ Requires-Dist: nbsphinx>=0.9; extra == 'docs'
65
+ Requires-Dist: pandoc; extra == 'docs'
66
+ Requires-Dist: pydata-sphinx-theme>=0.15; extra == 'docs'
67
+ Requires-Dist: sphinx-copybutton>=0.5; extra == 'docs'
68
+ Requires-Dist: sphinx-design>=0.5; extra == 'docs'
69
+ Requires-Dist: sphinx>=7.0; extra == 'docs'
70
+ Provides-Extra: gsea
71
+ Requires-Dist: gseapy>=1.0; extra == 'gsea'
72
+ Provides-Extra: plots
73
+ Requires-Dist: adjusttext>=1.0; extra == 'plots'
74
+ Requires-Dist: matplotlib>=3.6; extra == 'plots'
75
+ Requires-Dist: plotly>=5.0; extra == 'plots'
76
+ Requires-Dist: scanpy>=1.9; extra == 'plots'
77
+ Requires-Dist: seaborn>=0.12; extra == 'plots'
78
+ Provides-Extra: survival
79
+ Requires-Dist: lifelines>=0.27; extra == 'survival'
80
+ Description-Content-Type: text/markdown
81
+
82
+ <p align="center">
83
+ <img src="docs/source/_static/logo.svg" alt="sctrial" width="280">
84
+ </p>
85
+ <p align="center"><strong>Participant-Level Differential Analysis for Longitudinal Single-Cell Experiments</strong></p>
86
+
87
+ <p align="center">
88
+ <a href="https://github.com/TheOmarLab/sctrial/actions/workflows/test.yml">
89
+ <img src="https://github.com/TheOmarLab/sctrial/actions/workflows/test.yml/badge.svg?branch=main" alt="Test Status">
90
+ </a>
91
+ <a href="https://github.com/TheOmarLab/sctrial/releases">
92
+ <img src="https://img.shields.io/github/v/release/TheOmarLab/sctrial?label=version" alt="Release Version">
93
+ </a>
94
+ <img src="https://img.shields.io/badge/python-3.9%2B-blue" alt="Python Versions">
95
+ <a href="https://opensource.org/licenses/MIT">
96
+ <img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="License: MIT">
97
+ </a>
98
+ <a href="https://sctrial.readthedocs.io">
99
+ <img src="https://img.shields.io/badge/docs-latest-brightgreen.svg" alt="Documentation">
100
+ </a>
101
+ </p>
102
+
103
+ ---
104
+
105
+ ## Overview
106
+
107
+ **sctrial** is a Python package for performing rigorous statistical inference on single-cell RNA-seq data from clinical trials and longitudinal studies. Built on top of [AnnData](https://anndata.readthedocs.io/) and [Scanpy](https://scanpy.readthedocs.io/), it provides specialized tools for:
108
+
109
+ - **Difference-in-Differences (DiD)** analysis with participant fixed effects
110
+ - **Paired within-arm** pre→post contrasts
111
+ - **Between-arm** comparisons at fixed timepoints
112
+ - **Cell-type abundance** change testing
113
+ - **Gene set enrichment** analysis (GSEA) on DiD rankings
114
+ - **Power analysis** and sample size calculations
115
+ - **Effect size** estimation with confidence intervals
116
+
117
+ <p align="center">
118
+ <img src="docs/source/_static/overview_figure.png" alt="sctrial overview — from scRNA-seq input through trial-aware analysis to statistical outputs" width="100%">
119
+ </p>
120
+
121
+ ## Key Features
122
+
123
+ | Feature | Description |
124
+ |---------|-------------|
125
+ | **Trial-Aware Design** | Define participant, visit, arm, and cell type columns once |
126
+ | **Robust Statistics** | Wild cluster bootstrap, participant-level aggregation |
127
+ | **Multiple Comparisons** | Built-in FDR correction across features and cell types |
128
+ | **Power Analysis** | Two-arm DiD and single-arm paired power/sample size calculations |
129
+ | **Single-Arm Support** | `arm_col=None` for studies without a control arm |
130
+ | **Publication-Ready Plots** | Forest plots, interaction plots, GSEA heatmaps |
131
+ | **Scalable** | Efficient processing of large single-cell datasets |
132
+
133
+ ## Installation
134
+
135
+ ```bash
136
+ pip install sctrial
137
+ ```
138
+
139
+ For development:
140
+ ```bash
141
+ git clone https://github.com/TheOmarLab/sctrial.git
142
+ cd sctrial
143
+ pip install -e ".[dev]"
144
+ ```
145
+
146
+ ## Quick Start
147
+
148
+ ```bash
149
+ pip install "sctrial[plots]" # includes dataset loaders and visualization
150
+ ```
151
+
152
+ ```python
153
+ import sctrial as st
154
+
155
+ # 1. Load a real immunotherapy trial dataset (auto-downloads on first use)
156
+ adata = st.load_sade_feldman()
157
+ adata = st.harmonize_response(adata) # majority-vote response labels
158
+
159
+ # 2. Define trial design
160
+ design = st.TrialDesign(
161
+ participant_col="participant_id",
162
+ visit_col="visit",
163
+ arm_col="response_harmonized",
164
+ arm_treated="Responder",
165
+ arm_control="Non-responder",
166
+ celltype_col="cell_type",
167
+ )
168
+
169
+ # 3. Score gene sets (dataset ships pre-normalized with log1p_tpm layer)
170
+ gene_sets = {
171
+ "Cytotoxicity": ["GZMA", "GZMB", "PRF1", "GNLY", "NKG7"],
172
+ "Exhaustion": ["PDCD1", "CTLA4", "HAVCR2", "LAG3", "TIGIT"],
173
+ }
174
+ adata = st.score_gene_sets(adata, gene_sets, layer="log1p_tpm", method="zmean", prefix="ms_")
175
+
176
+ # 4. Run Difference-in-Differences on CD8 T cells
177
+ features = [c for c in adata.obs.columns if c.startswith("ms_")]
178
+ results = st.did_table(adata, features, design, visits=("Pre", "Post"), celltype="CD8 T cell")
179
+ print(results[["feature", "beta_DiD", "se_DiD", "p_DiD", "FDR_DiD"]])
180
+ ```
181
+
182
+ Or use the one-liner convenience wrapper for a quick multi-cell-type scan:
183
+
184
+ ```python
185
+ results = st.quick_did(
186
+ adata,
187
+ module_scores=gene_sets,
188
+ visits=("Pre", "Post"),
189
+ arm_col="response_harmonized",
190
+ arm_treated="Responder",
191
+ arm_control="Non-responder",
192
+ celltype_col="cell_type",
193
+ )
194
+ ```
195
+
196
+ ## Supported Study Designs & Datasets
197
+
198
+ sctrial ships with five real clinical trial datasets, accessible via built-in loaders (`st.load_*()`). Each demonstrates a different study design:
199
+
200
+ | Design | Description | Dataset | Source | Tutorial |
201
+ |--------|-------------|---------|--------|----------|
202
+ | **Two-arm paired DiD** | Pre/post × treatment/control interaction | Sade-Feldman et al., *Cell* 2018 — melanoma immunotherapy | [GSE120575](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE120575) | [Immunotherapy](tutorials/example_immunotherapy_sade_feldman.ipynb) |
203
+ | **Single-arm pre/post** | Paired within-arm contrasts over time | ImmPort GSE171964 — PBMC vaccine response | [GSE171964](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE171964) | [Vaccine](tutorials/example_vaccine_immport.ipynb) |
204
+ | **Single-arm pre/post** | Paired within-arm, multi-timepoint | van Galen et al., *Cell* 2019 — AML chemotherapy | [GSE116256](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE116256) | — |
205
+ | **Single-arm multi-timepoint** | Longitudinal tracking across 4 visits | GSE290722 — CAR-T cell therapy (ZUMA-1) | [GSE290722](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE290722) | — |
206
+ | **Cross-sectional between-arm** | Between-group comparison at one timepoint | Stephenson et al., *Nature Medicine* 2021 — COVID-19 severity | [E-MTAB-10026](https://www.ebi.ac.uk/biostudies/arrayexpress/studies/E-MTAB-10026) | [COVID-19](tutorials/example_covid19_stephenson.ipynb) |
207
+
208
+ Additional tutorial: [Scalability Benchmark](tutorials/stress_test_real_scale.ipynb) — performance testing on the Sade-Feldman dataset.
209
+
210
+ ## Documentation
211
+
212
+ Full documentation: [https://sctrial.readthedocs.io](https://sctrial.readthedocs.io)
213
+
214
+ - [API Reference](https://sctrial.readthedocs.io/en/latest/api.html)
215
+ - [Tutorials](https://sctrial.readthedocs.io/en/latest/tutorials/index.html)
216
+ - [FAQ](https://sctrial.readthedocs.io/en/latest/faq.html)
217
+
218
+ ## Citation
219
+
220
+ If you use **sctrial** in your research, please cite:
221
+
222
+ > Vasanthakumari P, Valencia I, Aghmiouni MR, Magana B, Omar MN. **sctrial: Participant-Level Differential Analysis for Longitudinal Single-Cell Experiments.** *bioRxiv* (2026).
223
+
224
+ ```bibtex
225
+ @article{vasanthakumari2026sctrial,
226
+ title = {sctrial: Participant-Level Differential Analysis for Longitudinal Single-Cell Experiments},
227
+ author = {Vasanthakumari, Priyanka and Valencia, Itzel and Aghmiouni, Maryam R. and Magana, Bryan and Omar, Mohamed N.},
228
+ journal = {bioRxiv},
229
+ year = {2026},
230
+ url = {https://github.com/TheOmarLab/sctrial}
231
+ }
232
+ ```
233
+
234
+ ## License
235
+
236
+ MIT License - see [LICENSE](LICENSE) for details.
@@ -0,0 +1,155 @@
1
+ <p align="center">
2
+ <img src="docs/source/_static/logo.svg" alt="sctrial" width="280">
3
+ </p>
4
+ <p align="center"><strong>Participant-Level Differential Analysis for Longitudinal Single-Cell Experiments</strong></p>
5
+
6
+ <p align="center">
7
+ <a href="https://github.com/TheOmarLab/sctrial/actions/workflows/test.yml">
8
+ <img src="https://github.com/TheOmarLab/sctrial/actions/workflows/test.yml/badge.svg?branch=main" alt="Test Status">
9
+ </a>
10
+ <a href="https://github.com/TheOmarLab/sctrial/releases">
11
+ <img src="https://img.shields.io/github/v/release/TheOmarLab/sctrial?label=version" alt="Release Version">
12
+ </a>
13
+ <img src="https://img.shields.io/badge/python-3.9%2B-blue" alt="Python Versions">
14
+ <a href="https://opensource.org/licenses/MIT">
15
+ <img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="License: MIT">
16
+ </a>
17
+ <a href="https://sctrial.readthedocs.io">
18
+ <img src="https://img.shields.io/badge/docs-latest-brightgreen.svg" alt="Documentation">
19
+ </a>
20
+ </p>
21
+
22
+ ---
23
+
24
+ ## Overview
25
+
26
+ **sctrial** is a Python package for performing rigorous statistical inference on single-cell RNA-seq data from clinical trials and longitudinal studies. Built on top of [AnnData](https://anndata.readthedocs.io/) and [Scanpy](https://scanpy.readthedocs.io/), it provides specialized tools for:
27
+
28
+ - **Difference-in-Differences (DiD)** analysis with participant fixed effects
29
+ - **Paired within-arm** pre→post contrasts
30
+ - **Between-arm** comparisons at fixed timepoints
31
+ - **Cell-type abundance** change testing
32
+ - **Gene set enrichment** analysis (GSEA) on DiD rankings
33
+ - **Power analysis** and sample size calculations
34
+ - **Effect size** estimation with confidence intervals
35
+
36
+ <p align="center">
37
+ <img src="docs/source/_static/overview_figure.png" alt="sctrial overview — from scRNA-seq input through trial-aware analysis to statistical outputs" width="100%">
38
+ </p>
39
+
40
+ ## Key Features
41
+
42
+ | Feature | Description |
43
+ |---------|-------------|
44
+ | **Trial-Aware Design** | Define participant, visit, arm, and cell type columns once |
45
+ | **Robust Statistics** | Wild cluster bootstrap, participant-level aggregation |
46
+ | **Multiple Comparisons** | Built-in FDR correction across features and cell types |
47
+ | **Power Analysis** | Two-arm DiD and single-arm paired power/sample size calculations |
48
+ | **Single-Arm Support** | `arm_col=None` for studies without a control arm |
49
+ | **Publication-Ready Plots** | Forest plots, interaction plots, GSEA heatmaps |
50
+ | **Scalable** | Efficient processing of large single-cell datasets |
51
+
52
+ ## Installation
53
+
54
+ ```bash
55
+ pip install sctrial
56
+ ```
57
+
58
+ For development:
59
+ ```bash
60
+ git clone https://github.com/TheOmarLab/sctrial.git
61
+ cd sctrial
62
+ pip install -e ".[dev]"
63
+ ```
64
+
65
+ ## Quick Start
66
+
67
+ ```bash
68
+ pip install "sctrial[plots]" # includes dataset loaders and visualization
69
+ ```
70
+
71
+ ```python
72
+ import sctrial as st
73
+
74
+ # 1. Load a real immunotherapy trial dataset (auto-downloads on first use)
75
+ adata = st.load_sade_feldman()
76
+ adata = st.harmonize_response(adata) # majority-vote response labels
77
+
78
+ # 2. Define trial design
79
+ design = st.TrialDesign(
80
+ participant_col="participant_id",
81
+ visit_col="visit",
82
+ arm_col="response_harmonized",
83
+ arm_treated="Responder",
84
+ arm_control="Non-responder",
85
+ celltype_col="cell_type",
86
+ )
87
+
88
+ # 3. Score gene sets (dataset ships pre-normalized with log1p_tpm layer)
89
+ gene_sets = {
90
+ "Cytotoxicity": ["GZMA", "GZMB", "PRF1", "GNLY", "NKG7"],
91
+ "Exhaustion": ["PDCD1", "CTLA4", "HAVCR2", "LAG3", "TIGIT"],
92
+ }
93
+ adata = st.score_gene_sets(adata, gene_sets, layer="log1p_tpm", method="zmean", prefix="ms_")
94
+
95
+ # 4. Run Difference-in-Differences on CD8 T cells
96
+ features = [c for c in adata.obs.columns if c.startswith("ms_")]
97
+ results = st.did_table(adata, features, design, visits=("Pre", "Post"), celltype="CD8 T cell")
98
+ print(results[["feature", "beta_DiD", "se_DiD", "p_DiD", "FDR_DiD"]])
99
+ ```
100
+
101
+ Or use the one-liner convenience wrapper for a quick multi-cell-type scan:
102
+
103
+ ```python
104
+ results = st.quick_did(
105
+ adata,
106
+ module_scores=gene_sets,
107
+ visits=("Pre", "Post"),
108
+ arm_col="response_harmonized",
109
+ arm_treated="Responder",
110
+ arm_control="Non-responder",
111
+ celltype_col="cell_type",
112
+ )
113
+ ```
114
+
115
+ ## Supported Study Designs & Datasets
116
+
117
+ sctrial ships with five real clinical trial datasets, accessible via built-in loaders (`st.load_*()`). Each demonstrates a different study design:
118
+
119
+ | Design | Description | Dataset | Source | Tutorial |
120
+ |--------|-------------|---------|--------|----------|
121
+ | **Two-arm paired DiD** | Pre/post × treatment/control interaction | Sade-Feldman et al., *Cell* 2018 — melanoma immunotherapy | [GSE120575](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE120575) | [Immunotherapy](tutorials/example_immunotherapy_sade_feldman.ipynb) |
122
+ | **Single-arm pre/post** | Paired within-arm contrasts over time | ImmPort GSE171964 — PBMC vaccine response | [GSE171964](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE171964) | [Vaccine](tutorials/example_vaccine_immport.ipynb) |
123
+ | **Single-arm pre/post** | Paired within-arm, multi-timepoint | van Galen et al., *Cell* 2019 — AML chemotherapy | [GSE116256](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE116256) | — |
124
+ | **Single-arm multi-timepoint** | Longitudinal tracking across 4 visits | GSE290722 — CAR-T cell therapy (ZUMA-1) | [GSE290722](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE290722) | — |
125
+ | **Cross-sectional between-arm** | Between-group comparison at one timepoint | Stephenson et al., *Nature Medicine* 2021 — COVID-19 severity | [E-MTAB-10026](https://www.ebi.ac.uk/biostudies/arrayexpress/studies/E-MTAB-10026) | [COVID-19](tutorials/example_covid19_stephenson.ipynb) |
126
+
127
+ Additional tutorial: [Scalability Benchmark](tutorials/stress_test_real_scale.ipynb) — performance testing on the Sade-Feldman dataset.
128
+
129
+ ## Documentation
130
+
131
+ Full documentation: [https://sctrial.readthedocs.io](https://sctrial.readthedocs.io)
132
+
133
+ - [API Reference](https://sctrial.readthedocs.io/en/latest/api.html)
134
+ - [Tutorials](https://sctrial.readthedocs.io/en/latest/tutorials/index.html)
135
+ - [FAQ](https://sctrial.readthedocs.io/en/latest/faq.html)
136
+
137
+ ## Citation
138
+
139
+ If you use **sctrial** in your research, please cite:
140
+
141
+ > Vasanthakumari P, Valencia I, Aghmiouni MR, Magana B, Omar MN. **sctrial: Participant-Level Differential Analysis for Longitudinal Single-Cell Experiments.** *bioRxiv* (2026).
142
+
143
+ ```bibtex
144
+ @article{vasanthakumari2026sctrial,
145
+ title = {sctrial: Participant-Level Differential Analysis for Longitudinal Single-Cell Experiments},
146
+ author = {Vasanthakumari, Priyanka and Valencia, Itzel and Aghmiouni, Maryam R. and Magana, Bryan and Omar, Mohamed N.},
147
+ journal = {bioRxiv},
148
+ year = {2026},
149
+ url = {https://github.com/TheOmarLab/sctrial}
150
+ }
151
+ ```
152
+
153
+ ## License
154
+
155
+ MIT License - see [LICENSE](LICENSE) for details.
@@ -0,0 +1,123 @@
1
+ [build-system]
2
+ requires = ["hatchling>=1.24.2"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "sctrial"
7
+ version = "0.3.0"
8
+ description = "Participant-level differential analysis for longitudinal single-cell experiments: DiD, paired comparisons, module scoring, pseudobulk, and power analysis."
9
+ readme = { file = "README.md", content-type = "text/markdown" }
10
+ requires-python = ">=3.9"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "Contributors" }]
13
+ keywords = ["single-cell", "anndata", "trial", "difference-in-differences", "crossover", "pseudobulk"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Science/Research",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3 :: Only",
20
+ "Programming Language :: Python :: 3.9",
21
+ "Programming Language :: Python :: 3.10",
22
+ "Programming Language :: Python :: 3.11",
23
+ "Programming Language :: Python :: 3.12",
24
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
25
+ "Typing :: Typed",
26
+ ]
27
+
28
+ dependencies = [
29
+ "numpy>=1.22",
30
+ "pandas>=1.5",
31
+ "scipy>=1.8",
32
+ "statsmodels>=0.13",
33
+ "anndata>=0.9",
34
+ "joblib>=1.2",
35
+ "typing_extensions>=4.0; python_version < '3.11'",
36
+ ]
37
+
38
+ [project.urls]
39
+ Homepage = "https://github.com/TheOmarLab/sctrial"
40
+ Documentation = "https://sctrial.readthedocs.io"
41
+ Repository = "https://github.com/TheOmarLab/sctrial"
42
+ Issues = "https://github.com/TheOmarLab/sctrial/issues"
43
+
44
+ [project.optional-dependencies]
45
+ plots = ["matplotlib>=3.6", "seaborn>=0.12", "scanpy>=1.9", "plotly>=5.0", "adjustText>=1.0"]
46
+ gsea = ["gseapy>=1.0"]
47
+ aucell = ["pyscenic>=0.12"]
48
+ survival = ["lifelines>=0.27"]
49
+ bayes = ["pymc>=5.0"]
50
+ all = [
51
+ "matplotlib>=3.6",
52
+ "seaborn>=0.12",
53
+ "scanpy>=1.9",
54
+ "plotly>=5.0",
55
+ "adjustText>=1.0",
56
+ "gseapy>=1.0",
57
+ "pyscenic>=0.12",
58
+ "lifelines>=0.27",
59
+ "pymc>=5.0",
60
+ ]
61
+ docs = [
62
+ "sphinx>=7.0",
63
+ "pydata-sphinx-theme>=0.15",
64
+ "sphinx-design>=0.5",
65
+ "sphinx-copybutton>=0.5",
66
+ "nbsphinx>=0.9",
67
+ "myst-parser>=2.0",
68
+ "pandoc",
69
+ ]
70
+ dev = [
71
+ "pytest>=7.0",
72
+ "pytest-cov>=4.0",
73
+ "ruff>=0.5",
74
+ "mypy>=1.5",
75
+ "pre-commit>=3.0",
76
+ "build>=1.0",
77
+ "twine>=5.0",
78
+ "psutil>=5.9",
79
+ "sctrial[docs]",
80
+ ]
81
+
82
+ [tool.hatch.build.targets.wheel]
83
+ packages = ["src/sctrial"]
84
+
85
+ [tool.hatch.build.targets.sdist]
86
+ include = [
87
+ "src/sctrial",
88
+ "tests",
89
+ "README.md",
90
+ "LICENSE",
91
+ "pyproject.toml",
92
+ ]
93
+
94
+ [tool.pytest.ini_options]
95
+ testpaths = ["tests"]
96
+ addopts = "-q"
97
+
98
+ [tool.mypy]
99
+ ignore_missing_imports = true
100
+ check_untyped_defs = true
101
+
102
+ [tool.ruff]
103
+ line-length = 100
104
+ target-version = "py39"
105
+
106
+ [tool.ruff.lint]
107
+ select = ["E", "F", "W", "I", "UP"]
108
+ ignore = ["E501"] # Ignore line-too-long errors
109
+
110
+ [tool.coverage.run]
111
+ branch = true
112
+ source = ["src/sctrial"]
113
+ omit = ["*/tests/*", "*/__pycache__/*", "*/conftest.py"]
114
+
115
+ [tool.coverage.report]
116
+ exclude_lines = [
117
+ "pragma: no cover",
118
+ "def __repr__",
119
+ "raise AssertionError",
120
+ "raise NotImplementedError",
121
+ "if TYPE_CHECKING:",
122
+ "if __name__ == .__main__.:",
123
+ ]