scdlkit 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scdlkit-0.1.0/LICENSE +21 -0
- scdlkit-0.1.0/PKG-INFO +265 -0
- scdlkit-0.1.0/README.md +211 -0
- scdlkit-0.1.0/pyproject.toml +93 -0
- scdlkit-0.1.0/setup.cfg +4 -0
- scdlkit-0.1.0/src/scdlkit/__init__.py +20 -0
- scdlkit-0.1.0/src/scdlkit/data/__init__.py +6 -0
- scdlkit-0.1.0/src/scdlkit/data/datasets.py +30 -0
- scdlkit-0.1.0/src/scdlkit/data/prepare.py +262 -0
- scdlkit-0.1.0/src/scdlkit/data/schemas.py +35 -0
- scdlkit-0.1.0/src/scdlkit/data/splits.py +90 -0
- scdlkit-0.1.0/src/scdlkit/evaluation/__init__.py +6 -0
- scdlkit-0.1.0/src/scdlkit/evaluation/compare.py +69 -0
- scdlkit-0.1.0/src/scdlkit/evaluation/evaluator.py +34 -0
- scdlkit-0.1.0/src/scdlkit/evaluation/metrics.py +83 -0
- scdlkit-0.1.0/src/scdlkit/evaluation/report.py +40 -0
- scdlkit-0.1.0/src/scdlkit/models/__init__.py +20 -0
- scdlkit-0.1.0/src/scdlkit/models/autoencoder.py +43 -0
- scdlkit-0.1.0/src/scdlkit/models/base.py +22 -0
- scdlkit-0.1.0/src/scdlkit/models/blocks.py +32 -0
- scdlkit-0.1.0/src/scdlkit/models/classifier.py +30 -0
- scdlkit-0.1.0/src/scdlkit/models/denoising.py +37 -0
- scdlkit-0.1.0/src/scdlkit/models/registry.py +33 -0
- scdlkit-0.1.0/src/scdlkit/models/transformer.py +73 -0
- scdlkit-0.1.0/src/scdlkit/models/vae.py +61 -0
- scdlkit-0.1.0/src/scdlkit/runner.py +278 -0
- scdlkit-0.1.0/src/scdlkit/tasks/__init__.py +14 -0
- scdlkit-0.1.0/src/scdlkit/tasks/base.py +40 -0
- scdlkit-0.1.0/src/scdlkit/tasks/classification.py +28 -0
- scdlkit-0.1.0/src/scdlkit/tasks/reconstruction.py +41 -0
- scdlkit-0.1.0/src/scdlkit/tasks/representation.py +14 -0
- scdlkit-0.1.0/src/scdlkit/training/__init__.py +5 -0
- scdlkit-0.1.0/src/scdlkit/training/callbacks.py +12 -0
- scdlkit-0.1.0/src/scdlkit/training/trainer.py +176 -0
- scdlkit-0.1.0/src/scdlkit/utils/__init__.py +7 -0
- scdlkit-0.1.0/src/scdlkit/utils/device.py +13 -0
- scdlkit-0.1.0/src/scdlkit/utils/io.py +13 -0
- scdlkit-0.1.0/src/scdlkit/utils/seed.py +18 -0
- scdlkit-0.1.0/src/scdlkit/visualization/__init__.py +15 -0
- scdlkit-0.1.0/src/scdlkit/visualization/classification.py +26 -0
- scdlkit-0.1.0/src/scdlkit/visualization/compare.py +32 -0
- scdlkit-0.1.0/src/scdlkit/visualization/latent.py +36 -0
- scdlkit-0.1.0/src/scdlkit/visualization/reconstruction.py +24 -0
- scdlkit-0.1.0/src/scdlkit/visualization/training.py +21 -0
- scdlkit-0.1.0/src/scdlkit.egg-info/PKG-INFO +265 -0
- scdlkit-0.1.0/src/scdlkit.egg-info/SOURCES.txt +50 -0
- scdlkit-0.1.0/src/scdlkit.egg-info/dependency_links.txt +1 -0
- scdlkit-0.1.0/src/scdlkit.egg-info/requires.txt +32 -0
- scdlkit-0.1.0/src/scdlkit.egg-info/top_level.txt +1 -0
- scdlkit-0.1.0/tests/test_models.py +26 -0
- scdlkit-0.1.0/tests/test_prepare.py +41 -0
- scdlkit-0.1.0/tests/test_runner.py +80 -0
scdlkit-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Vathanak Uddam
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
scdlkit-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: scdlkit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: AnnData-native deep learning baselines for single-cell data.
|
|
5
|
+
Author: Vathanak Uddam
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/uddamvathanak/scDLKit
|
|
8
|
+
Project-URL: Changelog, https://github.com/uddamvathanak/scDLKit/blob/main/CHANGELOG.md
|
|
9
|
+
Project-URL: Documentation, https://uddamvathanak.github.io/scDLKit/
|
|
10
|
+
Project-URL: Repository, https://github.com/uddamvathanak/scDLKit
|
|
11
|
+
Project-URL: Issues, https://github.com/uddamvathanak/scDLKit/issues
|
|
12
|
+
Keywords: single-cell,anndata,bioinformatics,deep-learning,pytorch,scRNA-seq
|
|
13
|
+
Classifier: Development Status :: 3 - Alpha
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Requires-Dist: anndata>=0.10
|
|
26
|
+
Requires-Dist: h5py>=3.11
|
|
27
|
+
Requires-Dist: matplotlib>=3.8
|
|
28
|
+
Requires-Dist: numpy>=1.26
|
|
29
|
+
Requires-Dist: pandas>=2.1
|
|
30
|
+
Requires-Dist: scikit-learn>=1.4
|
|
31
|
+
Requires-Dist: scipy>=1.11
|
|
32
|
+
Requires-Dist: seaborn>=0.13
|
|
33
|
+
Requires-Dist: torch>=2.3
|
|
34
|
+
Requires-Dist: tqdm>=4.66
|
|
35
|
+
Requires-Dist: umap-learn>=0.5
|
|
36
|
+
Provides-Extra: notebook
|
|
37
|
+
Requires-Dist: jupyter>=1.0; extra == "notebook"
|
|
38
|
+
Provides-Extra: scanpy
|
|
39
|
+
Requires-Dist: scanpy>=1.10; extra == "scanpy"
|
|
40
|
+
Provides-Extra: dev
|
|
41
|
+
Requires-Dist: build>=1.2; extra == "dev"
|
|
42
|
+
Requires-Dist: mypy>=1.9; extra == "dev"
|
|
43
|
+
Requires-Dist: pytest>=8.2; extra == "dev"
|
|
44
|
+
Requires-Dist: pytest-cov>=5.0; extra == "dev"
|
|
45
|
+
Requires-Dist: ruff>=0.5; extra == "dev"
|
|
46
|
+
Requires-Dist: twine>=5.0; extra == "dev"
|
|
47
|
+
Provides-Extra: docs
|
|
48
|
+
Requires-Dist: jupyter>=1.0; extra == "docs"
|
|
49
|
+
Requires-Dist: mkdocs-material>=9.5; extra == "docs"
|
|
50
|
+
Requires-Dist: mkdocstrings[python]>=0.25; extra == "docs"
|
|
51
|
+
Requires-Dist: mkdocs-gen-files>=0.5; extra == "docs"
|
|
52
|
+
Requires-Dist: mkdocs-literate-nav>=0.6; extra == "docs"
|
|
53
|
+
Dynamic: license-file
|
|
54
|
+
|
|
55
|
+
# scDLKit
|
|
56
|
+
|
|
57
|
+
[](https://github.com/uddamvathanak/scDLKit/actions/workflows/ci.yml)
|
|
58
|
+
[](https://github.com/uddamvathanak/scDLKit/actions/workflows/docs.yml)
|
|
59
|
+
[](https://pypi.org/project/scdlkit/)
|
|
60
|
+
[](https://pypi.org/project/scdlkit/)
|
|
61
|
+
[](LICENSE)
|
|
62
|
+
[](https://github.com/uddamvathanak/scDLKit/stargazers)
|
|
63
|
+
[](https://pepy.tech/projects/scdlkit)
|
|
64
|
+
|
|
65
|
+
Train, evaluate, compare, and visualize baseline deep-learning models for single-cell data without writing PyTorch from scratch.
|
|
66
|
+
|
|
67
|
+
## Why scDLKit
|
|
68
|
+
|
|
69
|
+
- AnnData-native workflow for single-cell users.
|
|
70
|
+
- Baseline-first model zoo: AE, VAE, DAE, Transformer AE, and MLP classification.
|
|
71
|
+
- Built-in training, evaluation, comparison, and plotting.
|
|
72
|
+
- Reproducible reports and notebooks for portfolio-ready demonstrations.
|
|
73
|
+
- Extensible registry-based architecture for custom models and future tasks.
|
|
74
|
+
|
|
75
|
+
## Supported platforms
|
|
76
|
+
|
|
77
|
+
- Linux: supported
|
|
78
|
+
- macOS: supported
|
|
79
|
+
- Windows: supported
|
|
80
|
+
|
|
81
|
+
## Installation
|
|
82
|
+
|
|
83
|
+
Primary public install path:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
python -m pip install scdlkit
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Optional extras:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
python -m pip install "scdlkit[scanpy]"
|
|
93
|
+
python -m pip install "scdlkit[notebook]"
|
|
94
|
+
python -m pip install "scdlkit[dev,docs]"
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Quickstart
|
|
98
|
+
|
|
99
|
+
Smallest package-level run:
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
import numpy as np
|
|
103
|
+
import pandas as pd
|
|
104
|
+
from anndata import AnnData
|
|
105
|
+
from scdlkit import TaskRunner
|
|
106
|
+
|
|
107
|
+
X = np.random.rand(120, 32).astype("float32")
|
|
108
|
+
obs = pd.DataFrame({"cell_type": ["T-cell"] * 60 + ["B-cell"] * 60})
|
|
109
|
+
adata = AnnData(X=X, obs=obs)
|
|
110
|
+
|
|
111
|
+
runner = TaskRunner(
|
|
112
|
+
model="vae",
|
|
113
|
+
task="representation",
|
|
114
|
+
latent_dim=8,
|
|
115
|
+
epochs=5,
|
|
116
|
+
batch_size=16,
|
|
117
|
+
label_key="cell_type",
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
runner.fit(adata)
|
|
121
|
+
metrics = runner.evaluate()
|
|
122
|
+
runner.plot_losses()
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Repo examples
|
|
126
|
+
|
|
127
|
+
If you cloned the repository, the easiest end-to-end demo is:
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
python examples/first_run_synthetic.py
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
This writes a report, checkpoint, loss curve, and latent PCA plot to `artifacts/first_run/`.
|
|
134
|
+
|
|
135
|
+
If you want the beginner notebook after cloning the repo:
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
python -m pip install "scdlkit[notebook]"
|
|
139
|
+
jupyter notebook examples/first_run_synthetic.ipynb
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
The heavier notebooks still need Scanpy:
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
python -m pip install "scdlkit[scanpy]"
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## Optional contributor Conda environment
|
|
149
|
+
|
|
150
|
+
Conda is kept for contributors and demos. It is not the primary public install path for `v0.1.0`.
|
|
151
|
+
|
|
152
|
+
Official installers:
|
|
153
|
+
|
|
154
|
+
- Miniconda install guide: https://www.anaconda.com/docs/getting-started/miniconda/install
|
|
155
|
+
- Anaconda Distribution download: https://www.anaconda.com/download
|
|
156
|
+
|
|
157
|
+
From the repo root:
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
conda env create -f environment.yml
|
|
161
|
+
conda activate scdlkit
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
## Core APIs
|
|
165
|
+
|
|
166
|
+
High-level:
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
from scdlkit import TaskRunner
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Lower-level:
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
from scdlkit import Trainer, create_model, prepare_data
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
Comparison:
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
from scdlkit import compare_models
|
|
182
|
+
|
|
183
|
+
benchmark = compare_models(
|
|
184
|
+
adata,
|
|
185
|
+
models=["autoencoder", "vae", "transformer_ae"],
|
|
186
|
+
task="representation",
|
|
187
|
+
shared_kwargs={"epochs": 10, "label_key": "cell_type"},
|
|
188
|
+
output_dir="artifacts/compare",
|
|
189
|
+
)
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
## Supported models
|
|
193
|
+
|
|
194
|
+
- `autoencoder`
|
|
195
|
+
- `vae`
|
|
196
|
+
- `denoising_autoencoder`
|
|
197
|
+
- `transformer_ae`
|
|
198
|
+
- `mlp_classifier`
|
|
199
|
+
|
|
200
|
+
## Supported tasks
|
|
201
|
+
|
|
202
|
+
- `representation`
|
|
203
|
+
- `reconstruction`
|
|
204
|
+
- `classification`
|
|
205
|
+
|
|
206
|
+
## Documentation
|
|
207
|
+
|
|
208
|
+
Project documentation is configured for GitHub Pages with MkDocs Material:
|
|
209
|
+
|
|
210
|
+
- Docs site: https://uddamvathanak.github.io/scDLKit/
|
|
211
|
+
- API reference: `docs/api.md`
|
|
212
|
+
- Example notebooks: `examples/`
|
|
213
|
+
|
|
214
|
+
### GitHub Pages setup
|
|
215
|
+
|
|
216
|
+
The docs workflow expects GitHub Pages to be enabled once at the repository level.
|
|
217
|
+
|
|
218
|
+
1. Open `Settings -> Pages` for this repo:
|
|
219
|
+
`https://github.com/uddamvathanak/scDLKit/settings/pages`
|
|
220
|
+
2. Under `Build and deployment`, set `Source` to `GitHub Actions`.
|
|
221
|
+
3. Save the setting.
|
|
222
|
+
4. Re-run the `docs` workflow.
|
|
223
|
+
|
|
224
|
+
Without that one-time setting, GitHub returns a `404` when `actions/configure-pages` or `actions/deploy-pages` tries to access the Pages site.
|
|
225
|
+
|
|
226
|
+
### Optional automatic Pages enablement
|
|
227
|
+
|
|
228
|
+
If you want the workflow to bootstrap Pages automatically instead of doing the one-time manual setup:
|
|
229
|
+
|
|
230
|
+
1. Create a repository secret named `PAGES_ENABLEMENT_TOKEN`.
|
|
231
|
+
2. Use a Personal Access Token with `repo` scope or Pages write permission.
|
|
232
|
+
3. Re-run the `docs` workflow.
|
|
233
|
+
|
|
234
|
+
## Release flow
|
|
235
|
+
|
|
236
|
+
- Stage to TestPyPI first with `release-testpypi.yml`.
|
|
237
|
+
- Publish the final release from a `v*` tag with `release.yml`.
|
|
238
|
+
- Use trusted publishing instead of long-lived PyPI API tokens.
|
|
239
|
+
- See [`RELEASING.md`](RELEASING.md) for the full checklist.
|
|
240
|
+
|
|
241
|
+
## Examples
|
|
242
|
+
|
|
243
|
+
- `examples/first_run_synthetic.ipynb` is the easiest notebook walkthrough.
|
|
244
|
+
- `examples/first_run_synthetic.py` is the easiest script walkthrough.
|
|
245
|
+
- `examples/train_vae_pbmc.ipynb`
|
|
246
|
+
- `examples/compare_models_pbmc.ipynb`
|
|
247
|
+
- `examples/classification_demo.ipynb`
|
|
248
|
+
|
|
249
|
+
## Roadmap
|
|
250
|
+
|
|
251
|
+
`v0.1`
|
|
252
|
+
|
|
253
|
+
- Expanded core workflow with training, evaluation, reporting, and plotting.
|
|
254
|
+
- Staged TestPyPI and PyPI publishing.
|
|
255
|
+
- Cross-platform smoke validation and reproducible notebooks.
|
|
256
|
+
|
|
257
|
+
`v0.2`
|
|
258
|
+
|
|
259
|
+
- CLI and YAML config support.
|
|
260
|
+
- Graph-based models and richer benchmarking helpers.
|
|
261
|
+
- More task-specific extensions.
|
|
262
|
+
|
|
263
|
+
## Citation
|
|
264
|
+
|
|
265
|
+
If you use `scDLKit`, cite the software entry in [`CITATION.cff`](CITATION.cff).
|
scdlkit-0.1.0/README.md
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
# scDLKit
|
|
2
|
+
|
|
3
|
+
[](https://github.com/uddamvathanak/scDLKit/actions/workflows/ci.yml)
|
|
4
|
+
[](https://github.com/uddamvathanak/scDLKit/actions/workflows/docs.yml)
|
|
5
|
+
[](https://pypi.org/project/scdlkit/)
|
|
6
|
+
[](https://pypi.org/project/scdlkit/)
|
|
7
|
+
[](LICENSE)
|
|
8
|
+
[](https://github.com/uddamvathanak/scDLKit/stargazers)
|
|
9
|
+
[](https://pepy.tech/projects/scdlkit)
|
|
10
|
+
|
|
11
|
+
Train, evaluate, compare, and visualize baseline deep-learning models for single-cell data without writing PyTorch from scratch.
|
|
12
|
+
|
|
13
|
+
## Why scDLKit
|
|
14
|
+
|
|
15
|
+
- AnnData-native workflow for single-cell users.
|
|
16
|
+
- Baseline-first model zoo: AE, VAE, DAE, Transformer AE, and MLP classification.
|
|
17
|
+
- Built-in training, evaluation, comparison, and plotting.
|
|
18
|
+
- Reproducible reports and notebooks for portfolio-ready demonstrations.
|
|
19
|
+
- Extensible registry-based architecture for custom models and future tasks.
|
|
20
|
+
|
|
21
|
+
## Supported platforms
|
|
22
|
+
|
|
23
|
+
- Linux: supported
|
|
24
|
+
- macOS: supported
|
|
25
|
+
- Windows: supported
|
|
26
|
+
|
|
27
|
+
## Installation
|
|
28
|
+
|
|
29
|
+
Primary public install path:
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
python -m pip install scdlkit
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Optional extras:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
python -m pip install "scdlkit[scanpy]"
|
|
39
|
+
python -m pip install "scdlkit[notebook]"
|
|
40
|
+
python -m pip install "scdlkit[dev,docs]"
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Quickstart
|
|
44
|
+
|
|
45
|
+
Smallest package-level run:
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
import numpy as np
|
|
49
|
+
import pandas as pd
|
|
50
|
+
from anndata import AnnData
|
|
51
|
+
from scdlkit import TaskRunner
|
|
52
|
+
|
|
53
|
+
X = np.random.rand(120, 32).astype("float32")
|
|
54
|
+
obs = pd.DataFrame({"cell_type": ["T-cell"] * 60 + ["B-cell"] * 60})
|
|
55
|
+
adata = AnnData(X=X, obs=obs)
|
|
56
|
+
|
|
57
|
+
runner = TaskRunner(
|
|
58
|
+
model="vae",
|
|
59
|
+
task="representation",
|
|
60
|
+
latent_dim=8,
|
|
61
|
+
epochs=5,
|
|
62
|
+
batch_size=16,
|
|
63
|
+
label_key="cell_type",
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
runner.fit(adata)
|
|
67
|
+
metrics = runner.evaluate()
|
|
68
|
+
runner.plot_losses()
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Repo examples
|
|
72
|
+
|
|
73
|
+
If you cloned the repository, the easiest end-to-end demo is:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
python examples/first_run_synthetic.py
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
This writes a report, checkpoint, loss curve, and latent PCA plot to `artifacts/first_run/`.
|
|
80
|
+
|
|
81
|
+
If you want the beginner notebook after cloning the repo:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
python -m pip install "scdlkit[notebook]"
|
|
85
|
+
jupyter notebook examples/first_run_synthetic.ipynb
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
The heavier notebooks still need Scanpy:
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
python -m pip install "scdlkit[scanpy]"
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Optional contributor Conda environment
|
|
95
|
+
|
|
96
|
+
Conda is kept for contributors and demos. It is not the primary public install path for `v0.1.0`.
|
|
97
|
+
|
|
98
|
+
Official installers:
|
|
99
|
+
|
|
100
|
+
- Miniconda install guide: https://www.anaconda.com/docs/getting-started/miniconda/install
|
|
101
|
+
- Anaconda Distribution download: https://www.anaconda.com/download
|
|
102
|
+
|
|
103
|
+
From the repo root:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
conda env create -f environment.yml
|
|
107
|
+
conda activate scdlkit
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## Core APIs
|
|
111
|
+
|
|
112
|
+
High-level:
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
from scdlkit import TaskRunner
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Lower-level:
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
from scdlkit import Trainer, create_model, prepare_data
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Comparison:
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
from scdlkit import compare_models
|
|
128
|
+
|
|
129
|
+
benchmark = compare_models(
|
|
130
|
+
adata,
|
|
131
|
+
models=["autoencoder", "vae", "transformer_ae"],
|
|
132
|
+
task="representation",
|
|
133
|
+
shared_kwargs={"epochs": 10, "label_key": "cell_type"},
|
|
134
|
+
output_dir="artifacts/compare",
|
|
135
|
+
)
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## Supported models
|
|
139
|
+
|
|
140
|
+
- `autoencoder`
|
|
141
|
+
- `vae`
|
|
142
|
+
- `denoising_autoencoder`
|
|
143
|
+
- `transformer_ae`
|
|
144
|
+
- `mlp_classifier`
|
|
145
|
+
|
|
146
|
+
## Supported tasks
|
|
147
|
+
|
|
148
|
+
- `representation`
|
|
149
|
+
- `reconstruction`
|
|
150
|
+
- `classification`
|
|
151
|
+
|
|
152
|
+
## Documentation
|
|
153
|
+
|
|
154
|
+
Project documentation is configured for GitHub Pages with MkDocs Material:
|
|
155
|
+
|
|
156
|
+
- Docs site: https://uddamvathanak.github.io/scDLKit/
|
|
157
|
+
- API reference: `docs/api.md`
|
|
158
|
+
- Example notebooks: `examples/`
|
|
159
|
+
|
|
160
|
+
### GitHub Pages setup
|
|
161
|
+
|
|
162
|
+
The docs workflow expects GitHub Pages to be enabled once at the repository level.
|
|
163
|
+
|
|
164
|
+
1. Open `Settings -> Pages` for this repo:
|
|
165
|
+
`https://github.com/uddamvathanak/scDLKit/settings/pages`
|
|
166
|
+
2. Under `Build and deployment`, set `Source` to `GitHub Actions`.
|
|
167
|
+
3. Save the setting.
|
|
168
|
+
4. Re-run the `docs` workflow.
|
|
169
|
+
|
|
170
|
+
Without that one-time setting, GitHub returns a `404` when `actions/configure-pages` or `actions/deploy-pages` tries to access the Pages site.
|
|
171
|
+
|
|
172
|
+
### Optional automatic Pages enablement
|
|
173
|
+
|
|
174
|
+
If you want the workflow to bootstrap Pages automatically instead of doing the one-time manual setup:
|
|
175
|
+
|
|
176
|
+
1. Create a repository secret named `PAGES_ENABLEMENT_TOKEN`.
|
|
177
|
+
2. Use a Personal Access Token with `repo` scope or Pages write permission.
|
|
178
|
+
3. Re-run the `docs` workflow.
|
|
179
|
+
|
|
180
|
+
## Release flow
|
|
181
|
+
|
|
182
|
+
- Stage to TestPyPI first with `release-testpypi.yml`.
|
|
183
|
+
- Publish the final release from a `v*` tag with `release.yml`.
|
|
184
|
+
- Use trusted publishing instead of long-lived PyPI API tokens.
|
|
185
|
+
- See [`RELEASING.md`](RELEASING.md) for the full checklist.
|
|
186
|
+
|
|
187
|
+
## Examples
|
|
188
|
+
|
|
189
|
+
- `examples/first_run_synthetic.ipynb` is the easiest notebook walkthrough.
|
|
190
|
+
- `examples/first_run_synthetic.py` is the easiest script walkthrough.
|
|
191
|
+
- `examples/train_vae_pbmc.ipynb`
|
|
192
|
+
- `examples/compare_models_pbmc.ipynb`
|
|
193
|
+
- `examples/classification_demo.ipynb`
|
|
194
|
+
|
|
195
|
+
## Roadmap
|
|
196
|
+
|
|
197
|
+
`v0.1`
|
|
198
|
+
|
|
199
|
+
- Expanded core workflow with training, evaluation, reporting, and plotting.
|
|
200
|
+
- Staged TestPyPI and PyPI publishing.
|
|
201
|
+
- Cross-platform smoke validation and reproducible notebooks.
|
|
202
|
+
|
|
203
|
+
`v0.2`
|
|
204
|
+
|
|
205
|
+
- CLI and YAML config support.
|
|
206
|
+
- Graph-based models and richer benchmarking helpers.
|
|
207
|
+
- More task-specific extensions.
|
|
208
|
+
|
|
209
|
+
## Citation
|
|
210
|
+
|
|
211
|
+
If you use `scDLKit`, cite the software entry in [`CITATION.cff`](CITATION.cff).
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=69", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "scdlkit"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "AnnData-native deep learning baselines for single-cell data."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Vathanak Uddam" },
|
|
14
|
+
]
|
|
15
|
+
keywords = ["single-cell", "anndata", "bioinformatics", "deep-learning", "pytorch", "scRNA-seq"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Intended Audience :: Science/Research",
|
|
19
|
+
"Operating System :: OS Independent",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.10",
|
|
22
|
+
"Programming Language :: Python :: 3.11",
|
|
23
|
+
"Programming Language :: Python :: 3.12",
|
|
24
|
+
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
|
25
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
26
|
+
]
|
|
27
|
+
dependencies = [
|
|
28
|
+
"anndata>=0.10",
|
|
29
|
+
"h5py>=3.11",
|
|
30
|
+
"matplotlib>=3.8",
|
|
31
|
+
"numpy>=1.26",
|
|
32
|
+
"pandas>=2.1",
|
|
33
|
+
"scikit-learn>=1.4",
|
|
34
|
+
"scipy>=1.11",
|
|
35
|
+
"seaborn>=0.13",
|
|
36
|
+
"torch>=2.3",
|
|
37
|
+
"tqdm>=4.66",
|
|
38
|
+
"umap-learn>=0.5",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
[project.optional-dependencies]
|
|
42
|
+
notebook = ["jupyter>=1.0"]
|
|
43
|
+
scanpy = ["scanpy>=1.10"]
|
|
44
|
+
dev = [
|
|
45
|
+
"build>=1.2",
|
|
46
|
+
"mypy>=1.9",
|
|
47
|
+
"pytest>=8.2",
|
|
48
|
+
"pytest-cov>=5.0",
|
|
49
|
+
"ruff>=0.5",
|
|
50
|
+
"twine>=5.0",
|
|
51
|
+
]
|
|
52
|
+
docs = [
|
|
53
|
+
"jupyter>=1.0",
|
|
54
|
+
"mkdocs-material>=9.5",
|
|
55
|
+
"mkdocstrings[python]>=0.25",
|
|
56
|
+
"mkdocs-gen-files>=0.5",
|
|
57
|
+
"mkdocs-literate-nav>=0.6",
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
[project.urls]
|
|
61
|
+
Homepage = "https://github.com/uddamvathanak/scDLKit"
|
|
62
|
+
Changelog = "https://github.com/uddamvathanak/scDLKit/blob/main/CHANGELOG.md"
|
|
63
|
+
Documentation = "https://uddamvathanak.github.io/scDLKit/"
|
|
64
|
+
Repository = "https://github.com/uddamvathanak/scDLKit"
|
|
65
|
+
Issues = "https://github.com/uddamvathanak/scDLKit/issues"
|
|
66
|
+
|
|
67
|
+
[tool.setuptools]
|
|
68
|
+
package-dir = { "" = "src" }
|
|
69
|
+
|
|
70
|
+
[tool.setuptools.packages.find]
|
|
71
|
+
where = ["src"]
|
|
72
|
+
|
|
73
|
+
[tool.pytest.ini_options]
|
|
74
|
+
addopts = "-ra --strict-markers --strict-config"
|
|
75
|
+
testpaths = ["tests"]
|
|
76
|
+
|
|
77
|
+
[tool.ruff]
|
|
78
|
+
line-length = 100
|
|
79
|
+
target-version = "py310"
|
|
80
|
+
|
|
81
|
+
[tool.ruff.lint]
|
|
82
|
+
select = ["E", "F", "I", "B", "UP", "N", "S", "C4", "SIM"]
|
|
83
|
+
ignore = ["S101"]
|
|
84
|
+
|
|
85
|
+
[tool.mypy]
|
|
86
|
+
python_version = "3.10"
|
|
87
|
+
warn_unused_configs = true
|
|
88
|
+
warn_redundant_casts = true
|
|
89
|
+
warn_unused_ignores = true
|
|
90
|
+
disallow_untyped_defs = true
|
|
91
|
+
no_implicit_optional = true
|
|
92
|
+
ignore_missing_imports = true
|
|
93
|
+
exclude = ["examples/"]
|
scdlkit-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Public package surface for scDLKit."""
|
|
2
|
+
|
|
3
|
+
from scdlkit.data import PreparedData, prepare_data
|
|
4
|
+
from scdlkit.evaluation.compare import BenchmarkResult, compare_models
|
|
5
|
+
from scdlkit.models import BaseModel, create_model
|
|
6
|
+
from scdlkit.runner import TaskRunner
|
|
7
|
+
from scdlkit.training import Trainer
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"BaseModel",
|
|
11
|
+
"BenchmarkResult",
|
|
12
|
+
"PreparedData",
|
|
13
|
+
"TaskRunner",
|
|
14
|
+
"Trainer",
|
|
15
|
+
"compare_models",
|
|
16
|
+
"create_model",
|
|
17
|
+
"prepare_data",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""PyTorch datasets backed by dense or sparse matrices."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import torch
|
|
7
|
+
from scipy import sparse
|
|
8
|
+
from torch.utils.data import Dataset
|
|
9
|
+
|
|
10
|
+
from scdlkit.data.schemas import SplitData
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AnnDataset(Dataset[dict[str, torch.Tensor]]):
|
|
14
|
+
"""Dataset that converts rows to dense float32 on access."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, split: SplitData):
|
|
17
|
+
self.split = split
|
|
18
|
+
|
|
19
|
+
def __len__(self) -> int:
|
|
20
|
+
return len(self.split)
|
|
21
|
+
|
|
22
|
+
def __getitem__(self, index: int) -> dict[str, torch.Tensor]:
|
|
23
|
+
row = self.split.X[index]
|
|
24
|
+
x = row.toarray().ravel() if sparse.issparse(row) else np.asarray(row).ravel()
|
|
25
|
+
sample: dict[str, torch.Tensor] = {"x": torch.as_tensor(x, dtype=torch.float32)}
|
|
26
|
+
if self.split.labels is not None:
|
|
27
|
+
sample["y"] = torch.as_tensor(int(self.split.labels[index]), dtype=torch.long)
|
|
28
|
+
if self.split.batches is not None:
|
|
29
|
+
sample["batch"] = torch.as_tensor(int(self.split.batches[index]), dtype=torch.long)
|
|
30
|
+
return sample
|