scdlkit 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. scdlkit-0.1.0/LICENSE +21 -0
  2. scdlkit-0.1.0/PKG-INFO +265 -0
  3. scdlkit-0.1.0/README.md +211 -0
  4. scdlkit-0.1.0/pyproject.toml +93 -0
  5. scdlkit-0.1.0/setup.cfg +4 -0
  6. scdlkit-0.1.0/src/scdlkit/__init__.py +20 -0
  7. scdlkit-0.1.0/src/scdlkit/data/__init__.py +6 -0
  8. scdlkit-0.1.0/src/scdlkit/data/datasets.py +30 -0
  9. scdlkit-0.1.0/src/scdlkit/data/prepare.py +262 -0
  10. scdlkit-0.1.0/src/scdlkit/data/schemas.py +35 -0
  11. scdlkit-0.1.0/src/scdlkit/data/splits.py +90 -0
  12. scdlkit-0.1.0/src/scdlkit/evaluation/__init__.py +6 -0
  13. scdlkit-0.1.0/src/scdlkit/evaluation/compare.py +69 -0
  14. scdlkit-0.1.0/src/scdlkit/evaluation/evaluator.py +34 -0
  15. scdlkit-0.1.0/src/scdlkit/evaluation/metrics.py +83 -0
  16. scdlkit-0.1.0/src/scdlkit/evaluation/report.py +40 -0
  17. scdlkit-0.1.0/src/scdlkit/models/__init__.py +20 -0
  18. scdlkit-0.1.0/src/scdlkit/models/autoencoder.py +43 -0
  19. scdlkit-0.1.0/src/scdlkit/models/base.py +22 -0
  20. scdlkit-0.1.0/src/scdlkit/models/blocks.py +32 -0
  21. scdlkit-0.1.0/src/scdlkit/models/classifier.py +30 -0
  22. scdlkit-0.1.0/src/scdlkit/models/denoising.py +37 -0
  23. scdlkit-0.1.0/src/scdlkit/models/registry.py +33 -0
  24. scdlkit-0.1.0/src/scdlkit/models/transformer.py +73 -0
  25. scdlkit-0.1.0/src/scdlkit/models/vae.py +61 -0
  26. scdlkit-0.1.0/src/scdlkit/runner.py +278 -0
  27. scdlkit-0.1.0/src/scdlkit/tasks/__init__.py +14 -0
  28. scdlkit-0.1.0/src/scdlkit/tasks/base.py +40 -0
  29. scdlkit-0.1.0/src/scdlkit/tasks/classification.py +28 -0
  30. scdlkit-0.1.0/src/scdlkit/tasks/reconstruction.py +41 -0
  31. scdlkit-0.1.0/src/scdlkit/tasks/representation.py +14 -0
  32. scdlkit-0.1.0/src/scdlkit/training/__init__.py +5 -0
  33. scdlkit-0.1.0/src/scdlkit/training/callbacks.py +12 -0
  34. scdlkit-0.1.0/src/scdlkit/training/trainer.py +176 -0
  35. scdlkit-0.1.0/src/scdlkit/utils/__init__.py +7 -0
  36. scdlkit-0.1.0/src/scdlkit/utils/device.py +13 -0
  37. scdlkit-0.1.0/src/scdlkit/utils/io.py +13 -0
  38. scdlkit-0.1.0/src/scdlkit/utils/seed.py +18 -0
  39. scdlkit-0.1.0/src/scdlkit/visualization/__init__.py +15 -0
  40. scdlkit-0.1.0/src/scdlkit/visualization/classification.py +26 -0
  41. scdlkit-0.1.0/src/scdlkit/visualization/compare.py +32 -0
  42. scdlkit-0.1.0/src/scdlkit/visualization/latent.py +36 -0
  43. scdlkit-0.1.0/src/scdlkit/visualization/reconstruction.py +24 -0
  44. scdlkit-0.1.0/src/scdlkit/visualization/training.py +21 -0
  45. scdlkit-0.1.0/src/scdlkit.egg-info/PKG-INFO +265 -0
  46. scdlkit-0.1.0/src/scdlkit.egg-info/SOURCES.txt +50 -0
  47. scdlkit-0.1.0/src/scdlkit.egg-info/dependency_links.txt +1 -0
  48. scdlkit-0.1.0/src/scdlkit.egg-info/requires.txt +32 -0
  49. scdlkit-0.1.0/src/scdlkit.egg-info/top_level.txt +1 -0
  50. scdlkit-0.1.0/tests/test_models.py +26 -0
  51. scdlkit-0.1.0/tests/test_prepare.py +41 -0
  52. scdlkit-0.1.0/tests/test_runner.py +80 -0
scdlkit-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Vathanak Uddam
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
scdlkit-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,265 @@
1
+ Metadata-Version: 2.4
2
+ Name: scdlkit
3
+ Version: 0.1.0
4
+ Summary: AnnData-native deep learning baselines for single-cell data.
5
+ Author: Vathanak Uddam
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/uddamvathanak/scDLKit
8
+ Project-URL: Changelog, https://github.com/uddamvathanak/scDLKit/blob/main/CHANGELOG.md
9
+ Project-URL: Documentation, https://uddamvathanak.github.io/scDLKit/
10
+ Project-URL: Repository, https://github.com/uddamvathanak/scDLKit
11
+ Project-URL: Issues, https://github.com/uddamvathanak/scDLKit/issues
12
+ Keywords: single-cell,anndata,bioinformatics,deep-learning,pytorch,scRNA-seq
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Requires-Python: >=3.10
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: anndata>=0.10
26
+ Requires-Dist: h5py>=3.11
27
+ Requires-Dist: matplotlib>=3.8
28
+ Requires-Dist: numpy>=1.26
29
+ Requires-Dist: pandas>=2.1
30
+ Requires-Dist: scikit-learn>=1.4
31
+ Requires-Dist: scipy>=1.11
32
+ Requires-Dist: seaborn>=0.13
33
+ Requires-Dist: torch>=2.3
34
+ Requires-Dist: tqdm>=4.66
35
+ Requires-Dist: umap-learn>=0.5
36
+ Provides-Extra: notebook
37
+ Requires-Dist: jupyter>=1.0; extra == "notebook"
38
+ Provides-Extra: scanpy
39
+ Requires-Dist: scanpy>=1.10; extra == "scanpy"
40
+ Provides-Extra: dev
41
+ Requires-Dist: build>=1.2; extra == "dev"
42
+ Requires-Dist: mypy>=1.9; extra == "dev"
43
+ Requires-Dist: pytest>=8.2; extra == "dev"
44
+ Requires-Dist: pytest-cov>=5.0; extra == "dev"
45
+ Requires-Dist: ruff>=0.5; extra == "dev"
46
+ Requires-Dist: twine>=5.0; extra == "dev"
47
+ Provides-Extra: docs
48
+ Requires-Dist: jupyter>=1.0; extra == "docs"
49
+ Requires-Dist: mkdocs-material>=9.5; extra == "docs"
50
+ Requires-Dist: mkdocstrings[python]>=0.25; extra == "docs"
51
+ Requires-Dist: mkdocs-gen-files>=0.5; extra == "docs"
52
+ Requires-Dist: mkdocs-literate-nav>=0.6; extra == "docs"
53
+ Dynamic: license-file
54
+
55
+ # scDLKit
56
+
57
+ [![CI](https://img.shields.io/github/actions/workflow/status/uddamvathanak/scDLKit/ci.yml?label=ci)](https://github.com/uddamvathanak/scDLKit/actions/workflows/ci.yml)
58
+ [![Docs](https://img.shields.io/github/actions/workflow/status/uddamvathanak/scDLKit/docs.yml?label=docs)](https://github.com/uddamvathanak/scDLKit/actions/workflows/docs.yml)
59
+ [![PyPI version](https://img.shields.io/pypi/v/scdlkit)](https://pypi.org/project/scdlkit/)
60
+ [![Python versions](https://img.shields.io/pypi/pyversions/scdlkit)](https://pypi.org/project/scdlkit/)
61
+ [![License](https://img.shields.io/pypi/l/scdlkit)](LICENSE)
62
+ [![GitHub stars](https://img.shields.io/github/stars/uddamvathanak/scDLKit?style=social)](https://github.com/uddamvathanak/scDLKit/stargazers)
63
+ [![Downloads](https://static.pepy.tech/badge/scdlkit)](https://pepy.tech/projects/scdlkit)
64
+
65
+ Train, evaluate, compare, and visualize baseline deep-learning models for single-cell data without writing PyTorch from scratch.
66
+
67
+ ## Why scDLKit
68
+
69
+ - AnnData-native workflow for single-cell users.
70
+ - Baseline-first model zoo: AE, VAE, DAE, Transformer AE, and MLP classification.
71
+ - Built-in training, evaluation, comparison, and plotting.
72
+ - Reproducible reports and notebooks for portfolio-ready demonstrations.
73
+ - Extensible registry-based architecture for custom models and future tasks.
74
+
75
+ ## Supported platforms
76
+
77
+ - Linux: supported
78
+ - macOS: supported
79
+ - Windows: supported
80
+
81
+ ## Installation
82
+
83
+ Primary public install path:
84
+
85
+ ```bash
86
+ python -m pip install scdlkit
87
+ ```
88
+
89
+ Optional extras:
90
+
91
+ ```bash
92
+ python -m pip install "scdlkit[scanpy]"
93
+ python -m pip install "scdlkit[notebook]"
94
+ python -m pip install "scdlkit[dev,docs]"
95
+ ```
96
+
97
+ ## Quickstart
98
+
99
+ Smallest package-level run:
100
+
101
+ ```python
102
+ import numpy as np
103
+ import pandas as pd
104
+ from anndata import AnnData
105
+ from scdlkit import TaskRunner
106
+
107
+ X = np.random.rand(120, 32).astype("float32")
108
+ obs = pd.DataFrame({"cell_type": ["T-cell"] * 60 + ["B-cell"] * 60})
109
+ adata = AnnData(X=X, obs=obs)
110
+
111
+ runner = TaskRunner(
112
+ model="vae",
113
+ task="representation",
114
+ latent_dim=8,
115
+ epochs=5,
116
+ batch_size=16,
117
+ label_key="cell_type",
118
+ )
119
+
120
+ runner.fit(adata)
121
+ metrics = runner.evaluate()
122
+ runner.plot_losses()
123
+ ```
124
+
125
+ ## Repo examples
126
+
127
+ If you cloned the repository, the easiest end-to-end demo is:
128
+
129
+ ```bash
130
+ python examples/first_run_synthetic.py
131
+ ```
132
+
133
+ This writes a report, checkpoint, loss curve, and latent PCA plot to `artifacts/first_run/`.
134
+
135
+ If you want the beginner notebook after cloning the repo:
136
+
137
+ ```bash
138
+ python -m pip install "scdlkit[notebook]"
139
+ jupyter notebook examples/first_run_synthetic.ipynb
140
+ ```
141
+
142
+ The heavier notebooks still need Scanpy:
143
+
144
+ ```bash
145
+ python -m pip install "scdlkit[scanpy]"
146
+ ```
147
+
148
+ ## Optional contributor Conda environment
149
+
150
+ Conda is kept for contributors and demos. It is not the primary public install path for `v0.1.0`.
151
+
152
+ Official installers:
153
+
154
+ - Miniconda install guide: https://www.anaconda.com/docs/getting-started/miniconda/install
155
+ - Anaconda Distribution download: https://www.anaconda.com/download
156
+
157
+ From the repo root:
158
+
159
+ ```bash
160
+ conda env create -f environment.yml
161
+ conda activate scdlkit
162
+ ```
163
+
164
+ ## Core APIs
165
+
166
+ High-level:
167
+
168
+ ```python
169
+ from scdlkit import TaskRunner
170
+ ```
171
+
172
+ Lower-level:
173
+
174
+ ```python
175
+ from scdlkit import Trainer, create_model, prepare_data
176
+ ```
177
+
178
+ Comparison:
179
+
180
+ ```python
181
+ from scdlkit import compare_models
182
+
183
+ benchmark = compare_models(
184
+ adata,
185
+ models=["autoencoder", "vae", "transformer_ae"],
186
+ task="representation",
187
+ shared_kwargs={"epochs": 10, "label_key": "cell_type"},
188
+ output_dir="artifacts/compare",
189
+ )
190
+ ```
191
+
192
+ ## Supported models
193
+
194
+ - `autoencoder`
195
+ - `vae`
196
+ - `denoising_autoencoder`
197
+ - `transformer_ae`
198
+ - `mlp_classifier`
199
+
200
+ ## Supported tasks
201
+
202
+ - `representation`
203
+ - `reconstruction`
204
+ - `classification`
205
+
206
+ ## Documentation
207
+
208
+ Project documentation is configured for GitHub Pages with MkDocs Material:
209
+
210
+ - Docs site: https://uddamvathanak.github.io/scDLKit/
211
+ - API reference: `docs/api.md`
212
+ - Example notebooks: `examples/`
213
+
214
+ ### GitHub Pages setup
215
+
216
+ The docs workflow expects GitHub Pages to be enabled once at the repository level.
217
+
218
+ 1. Open `Settings -> Pages` for this repo:
219
+ `https://github.com/uddamvathanak/scDLKit/settings/pages`
220
+ 2. Under `Build and deployment`, set `Source` to `GitHub Actions`.
221
+ 3. Save the setting.
222
+ 4. Re-run the `docs` workflow.
223
+
224
+ Without that one-time setting, GitHub returns a `404` when `actions/configure-pages` or `actions/deploy-pages` tries to access the Pages site.
225
+
226
+ ### Optional automatic Pages enablement
227
+
228
+ If you want the workflow to bootstrap Pages automatically instead of doing the one-time manual setup:
229
+
230
+ 1. Create a repository secret named `PAGES_ENABLEMENT_TOKEN`.
231
+ 2. Use a Personal Access Token with `repo` scope or Pages write permission.
232
+ 3. Re-run the `docs` workflow.
233
+
234
+ ## Release flow
235
+
236
+ - Stage to TestPyPI first with `release-testpypi.yml`.
237
+ - Publish the final release from a `v*` tag with `release.yml`.
238
+ - Use trusted publishing instead of long-lived PyPI API tokens.
239
+ - See [`RELEASING.md`](RELEASING.md) for the full checklist.
240
+
241
+ ## Examples
242
+
243
+ - `examples/first_run_synthetic.ipynb` is the easiest notebook walkthrough.
244
+ - `examples/first_run_synthetic.py` is the easiest script walkthrough.
245
+ - `examples/train_vae_pbmc.ipynb`
246
+ - `examples/compare_models_pbmc.ipynb`
247
+ - `examples/classification_demo.ipynb`
248
+
249
+ ## Roadmap
250
+
251
+ `v0.1`
252
+
253
+ - Expanded core workflow with training, evaluation, reporting, and plotting.
254
+ - Staged TestPyPI and PyPI publishing.
255
+ - Cross-platform smoke validation and reproducible notebooks.
256
+
257
+ `v0.2`
258
+
259
+ - CLI and YAML config support.
260
+ - Graph-based models and richer benchmarking helpers.
261
+ - More task-specific extensions.
262
+
263
+ ## Citation
264
+
265
+ If you use `scDLKit`, cite the software entry in [`CITATION.cff`](CITATION.cff).
@@ -0,0 +1,211 @@
1
+ # scDLKit
2
+
3
+ [![CI](https://img.shields.io/github/actions/workflow/status/uddamvathanak/scDLKit/ci.yml?label=ci)](https://github.com/uddamvathanak/scDLKit/actions/workflows/ci.yml)
4
+ [![Docs](https://img.shields.io/github/actions/workflow/status/uddamvathanak/scDLKit/docs.yml?label=docs)](https://github.com/uddamvathanak/scDLKit/actions/workflows/docs.yml)
5
+ [![PyPI version](https://img.shields.io/pypi/v/scdlkit)](https://pypi.org/project/scdlkit/)
6
+ [![Python versions](https://img.shields.io/pypi/pyversions/scdlkit)](https://pypi.org/project/scdlkit/)
7
+ [![License](https://img.shields.io/pypi/l/scdlkit)](LICENSE)
8
+ [![GitHub stars](https://img.shields.io/github/stars/uddamvathanak/scDLKit?style=social)](https://github.com/uddamvathanak/scDLKit/stargazers)
9
+ [![Downloads](https://static.pepy.tech/badge/scdlkit)](https://pepy.tech/projects/scdlkit)
10
+
11
+ Train, evaluate, compare, and visualize baseline deep-learning models for single-cell data without writing PyTorch from scratch.
12
+
13
+ ## Why scDLKit
14
+
15
+ - AnnData-native workflow for single-cell users.
16
+ - Baseline-first model zoo: AE, VAE, DAE, Transformer AE, and MLP classification.
17
+ - Built-in training, evaluation, comparison, and plotting.
18
+ - Reproducible reports and notebooks for portfolio-ready demonstrations.
19
+ - Extensible registry-based architecture for custom models and future tasks.
20
+
21
+ ## Supported platforms
22
+
23
+ - Linux: supported
24
+ - macOS: supported
25
+ - Windows: supported
26
+
27
+ ## Installation
28
+
29
+ Primary public install path:
30
+
31
+ ```bash
32
+ python -m pip install scdlkit
33
+ ```
34
+
35
+ Optional extras:
36
+
37
+ ```bash
38
+ python -m pip install "scdlkit[scanpy]"
39
+ python -m pip install "scdlkit[notebook]"
40
+ python -m pip install "scdlkit[dev,docs]"
41
+ ```
42
+
43
+ ## Quickstart
44
+
45
+ Smallest package-level run:
46
+
47
+ ```python
48
+ import numpy as np
49
+ import pandas as pd
50
+ from anndata import AnnData
51
+ from scdlkit import TaskRunner
52
+
53
+ X = np.random.rand(120, 32).astype("float32")
54
+ obs = pd.DataFrame({"cell_type": ["T-cell"] * 60 + ["B-cell"] * 60})
55
+ adata = AnnData(X=X, obs=obs)
56
+
57
+ runner = TaskRunner(
58
+ model="vae",
59
+ task="representation",
60
+ latent_dim=8,
61
+ epochs=5,
62
+ batch_size=16,
63
+ label_key="cell_type",
64
+ )
65
+
66
+ runner.fit(adata)
67
+ metrics = runner.evaluate()
68
+ runner.plot_losses()
69
+ ```
70
+
71
+ ## Repo examples
72
+
73
+ If you cloned the repository, the easiest end-to-end demo is:
74
+
75
+ ```bash
76
+ python examples/first_run_synthetic.py
77
+ ```
78
+
79
+ This writes a report, checkpoint, loss curve, and latent PCA plot to `artifacts/first_run/`.
80
+
81
+ If you want the beginner notebook after cloning the repo:
82
+
83
+ ```bash
84
+ python -m pip install "scdlkit[notebook]"
85
+ jupyter notebook examples/first_run_synthetic.ipynb
86
+ ```
87
+
88
+ The heavier notebooks still need Scanpy:
89
+
90
+ ```bash
91
+ python -m pip install "scdlkit[scanpy]"
92
+ ```
93
+
94
+ ## Optional contributor Conda environment
95
+
96
+ Conda is kept for contributors and demos. It is not the primary public install path for `v0.1.0`.
97
+
98
+ Official installers:
99
+
100
+ - Miniconda install guide: https://www.anaconda.com/docs/getting-started/miniconda/install
101
+ - Anaconda Distribution download: https://www.anaconda.com/download
102
+
103
+ From the repo root:
104
+
105
+ ```bash
106
+ conda env create -f environment.yml
107
+ conda activate scdlkit
108
+ ```
109
+
110
+ ## Core APIs
111
+
112
+ High-level:
113
+
114
+ ```python
115
+ from scdlkit import TaskRunner
116
+ ```
117
+
118
+ Lower-level:
119
+
120
+ ```python
121
+ from scdlkit import Trainer, create_model, prepare_data
122
+ ```
123
+
124
+ Comparison:
125
+
126
+ ```python
127
+ from scdlkit import compare_models
128
+
129
+ benchmark = compare_models(
130
+ adata,
131
+ models=["autoencoder", "vae", "transformer_ae"],
132
+ task="representation",
133
+ shared_kwargs={"epochs": 10, "label_key": "cell_type"},
134
+ output_dir="artifacts/compare",
135
+ )
136
+ ```
137
+
138
+ ## Supported models
139
+
140
+ - `autoencoder`
141
+ - `vae`
142
+ - `denoising_autoencoder`
143
+ - `transformer_ae`
144
+ - `mlp_classifier`
145
+
146
+ ## Supported tasks
147
+
148
+ - `representation`
149
+ - `reconstruction`
150
+ - `classification`
151
+
152
+ ## Documentation
153
+
154
+ Project documentation is configured for GitHub Pages with MkDocs Material:
155
+
156
+ - Docs site: https://uddamvathanak.github.io/scDLKit/
157
+ - API reference: `docs/api.md`
158
+ - Example notebooks: `examples/`
159
+
160
+ ### GitHub Pages setup
161
+
162
+ The docs workflow expects GitHub Pages to be enabled once at the repository level.
163
+
164
+ 1. Open `Settings -> Pages` for this repo:
165
+ `https://github.com/uddamvathanak/scDLKit/settings/pages`
166
+ 2. Under `Build and deployment`, set `Source` to `GitHub Actions`.
167
+ 3. Save the setting.
168
+ 4. Re-run the `docs` workflow.
169
+
170
+ Without that one-time setting, GitHub returns a `404` when `actions/configure-pages` or `actions/deploy-pages` tries to access the Pages site.
171
+
172
+ ### Optional automatic Pages enablement
173
+
174
+ If you want the workflow to bootstrap Pages automatically instead of doing the one-time manual setup:
175
+
176
+ 1. Create a repository secret named `PAGES_ENABLEMENT_TOKEN`.
177
+ 2. Use a Personal Access Token with `repo` scope or Pages write permission.
178
+ 3. Re-run the `docs` workflow.
179
+
180
+ ## Release flow
181
+
182
+ - Stage to TestPyPI first with `release-testpypi.yml`.
183
+ - Publish the final release from a `v*` tag with `release.yml`.
184
+ - Use trusted publishing instead of long-lived PyPI API tokens.
185
+ - See [`RELEASING.md`](RELEASING.md) for the full checklist.
186
+
187
+ ## Examples
188
+
189
+ - `examples/first_run_synthetic.ipynb` is the easiest notebook walkthrough.
190
+ - `examples/first_run_synthetic.py` is the easiest script walkthrough.
191
+ - `examples/train_vae_pbmc.ipynb`
192
+ - `examples/compare_models_pbmc.ipynb`
193
+ - `examples/classification_demo.ipynb`
194
+
195
+ ## Roadmap
196
+
197
+ `v0.1`
198
+
199
+ - Expanded core workflow with training, evaluation, reporting, and plotting.
200
+ - Staged TestPyPI and PyPI publishing.
201
+ - Cross-platform smoke validation and reproducible notebooks.
202
+
203
+ `v0.2`
204
+
205
+ - CLI and YAML config support.
206
+ - Graph-based models and richer benchmarking helpers.
207
+ - More task-specific extensions.
208
+
209
+ ## Citation
210
+
211
+ If you use `scDLKit`, cite the software entry in [`CITATION.cff`](CITATION.cff).
@@ -0,0 +1,93 @@
1
+ [build-system]
2
+ requires = ["setuptools>=69", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "scdlkit"
7
+ version = "0.1.0"
8
+ description = "AnnData-native deep learning baselines for single-cell data."
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.10"
12
+ authors = [
13
+ { name = "Vathanak Uddam" },
14
+ ]
15
+ keywords = ["single-cell", "anndata", "bioinformatics", "deep-learning", "pytorch", "scRNA-seq"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Science/Research",
19
+ "Operating System :: OS Independent",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.10",
22
+ "Programming Language :: Python :: 3.11",
23
+ "Programming Language :: Python :: 3.12",
24
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
25
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
26
+ ]
27
+ dependencies = [
28
+ "anndata>=0.10",
29
+ "h5py>=3.11",
30
+ "matplotlib>=3.8",
31
+ "numpy>=1.26",
32
+ "pandas>=2.1",
33
+ "scikit-learn>=1.4",
34
+ "scipy>=1.11",
35
+ "seaborn>=0.13",
36
+ "torch>=2.3",
37
+ "tqdm>=4.66",
38
+ "umap-learn>=0.5",
39
+ ]
40
+
41
+ [project.optional-dependencies]
42
+ notebook = ["jupyter>=1.0"]
43
+ scanpy = ["scanpy>=1.10"]
44
+ dev = [
45
+ "build>=1.2",
46
+ "mypy>=1.9",
47
+ "pytest>=8.2",
48
+ "pytest-cov>=5.0",
49
+ "ruff>=0.5",
50
+ "twine>=5.0",
51
+ ]
52
+ docs = [
53
+ "jupyter>=1.0",
54
+ "mkdocs-material>=9.5",
55
+ "mkdocstrings[python]>=0.25",
56
+ "mkdocs-gen-files>=0.5",
57
+ "mkdocs-literate-nav>=0.6",
58
+ ]
59
+
60
+ [project.urls]
61
+ Homepage = "https://github.com/uddamvathanak/scDLKit"
62
+ Changelog = "https://github.com/uddamvathanak/scDLKit/blob/main/CHANGELOG.md"
63
+ Documentation = "https://uddamvathanak.github.io/scDLKit/"
64
+ Repository = "https://github.com/uddamvathanak/scDLKit"
65
+ Issues = "https://github.com/uddamvathanak/scDLKit/issues"
66
+
67
+ [tool.setuptools]
68
+ package-dir = { "" = "src" }
69
+
70
+ [tool.setuptools.packages.find]
71
+ where = ["src"]
72
+
73
+ [tool.pytest.ini_options]
74
+ addopts = "-ra --strict-markers --strict-config"
75
+ testpaths = ["tests"]
76
+
77
+ [tool.ruff]
78
+ line-length = 100
79
+ target-version = "py310"
80
+
81
+ [tool.ruff.lint]
82
+ select = ["E", "F", "I", "B", "UP", "N", "S", "C4", "SIM"]
83
+ ignore = ["S101"]
84
+
85
+ [tool.mypy]
86
+ python_version = "3.10"
87
+ warn_unused_configs = true
88
+ warn_redundant_casts = true
89
+ warn_unused_ignores = true
90
+ disallow_untyped_defs = true
91
+ no_implicit_optional = true
92
+ ignore_missing_imports = true
93
+ exclude = ["examples/"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,20 @@
1
+ """Public package surface for scDLKit."""
2
+
3
+ from scdlkit.data import PreparedData, prepare_data
4
+ from scdlkit.evaluation.compare import BenchmarkResult, compare_models
5
+ from scdlkit.models import BaseModel, create_model
6
+ from scdlkit.runner import TaskRunner
7
+ from scdlkit.training import Trainer
8
+
9
+ __all__ = [
10
+ "BaseModel",
11
+ "BenchmarkResult",
12
+ "PreparedData",
13
+ "TaskRunner",
14
+ "Trainer",
15
+ "compare_models",
16
+ "create_model",
17
+ "prepare_data",
18
+ ]
19
+
20
+ __version__ = "0.1.0"
@@ -0,0 +1,6 @@
1
+ """Data preparation utilities."""
2
+
3
+ from scdlkit.data.prepare import prepare_data, transform_adata
4
+ from scdlkit.data.schemas import PreparedData, SplitData
5
+
6
+ __all__ = ["PreparedData", "SplitData", "prepare_data", "transform_adata"]
@@ -0,0 +1,30 @@
1
+ """PyTorch datasets backed by dense or sparse matrices."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import numpy as np
6
+ import torch
7
+ from scipy import sparse
8
+ from torch.utils.data import Dataset
9
+
10
+ from scdlkit.data.schemas import SplitData
11
+
12
+
13
+ class AnnDataset(Dataset[dict[str, torch.Tensor]]):
14
+ """Dataset that converts rows to dense float32 on access."""
15
+
16
+ def __init__(self, split: SplitData):
17
+ self.split = split
18
+
19
+ def __len__(self) -> int:
20
+ return len(self.split)
21
+
22
+ def __getitem__(self, index: int) -> dict[str, torch.Tensor]:
23
+ row = self.split.X[index]
24
+ x = row.toarray().ravel() if sparse.issparse(row) else np.asarray(row).ravel()
25
+ sample: dict[str, torch.Tensor] = {"x": torch.as_tensor(x, dtype=torch.float32)}
26
+ if self.split.labels is not None:
27
+ sample["y"] = torch.as_tensor(int(self.split.labels[index]), dtype=torch.long)
28
+ if self.split.batches is not None:
29
+ sample["batch"] = torch.as_tensor(int(self.split.batches[index]), dtype=torch.long)
30
+ return sample