annbatch 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of annbatch might be problematic. Click here for more details.
- annbatch/__init__.py +15 -0
- annbatch/abc.py +228 -0
- annbatch/anndata_manager.py +396 -0
- annbatch/dense.py +63 -0
- annbatch/io.py +474 -0
- annbatch/sparse.py +160 -0
- annbatch/types.py +25 -0
- annbatch/utils.py +319 -0
- annbatch-0.0.1.dist-info/METADATA +214 -0
- annbatch-0.0.1.dist-info/RECORD +12 -0
- annbatch-0.0.1.dist-info/WHEEL +4 -0
- annbatch-0.0.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: annbatch
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: A minibatch loader for AnnData stores
|
|
5
|
+
Project-URL: Documentation, https://annbatch.readthedocs.io/
|
|
6
|
+
Project-URL: Homepage, https://github.com/scverse/annbatch
|
|
7
|
+
Project-URL: Source, https://github.com/scverse/annbatch
|
|
8
|
+
Author: Ilan Gold, Felix Fischer
|
|
9
|
+
Maintainer-email: Ilan Gold <ilan.gold@scverse.org>, Felix Fischer <felix.fischer@lamin.ai>
|
|
10
|
+
License: MIT License
|
|
11
|
+
|
|
12
|
+
Copyright (c) 2025, Ilan Gold
|
|
13
|
+
|
|
14
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
15
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
16
|
+
in the Software without restriction, including without limitation the rights
|
|
17
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
18
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
19
|
+
furnished to do so, subject to the following conditions:
|
|
20
|
+
|
|
21
|
+
The above copyright notice and this permission notice shall be included in all
|
|
22
|
+
copies or substantial portions of the Software.
|
|
23
|
+
|
|
24
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
25
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
26
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
27
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
28
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
29
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
30
|
+
SOFTWARE.
|
|
31
|
+
License-File: LICENSE
|
|
32
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
33
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
34
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
35
|
+
Requires-Python: <3.14,>=3.12
|
|
36
|
+
Requires-Dist: anndata[lazy]
|
|
37
|
+
Requires-Dist: dask
|
|
38
|
+
Requires-Dist: pandas
|
|
39
|
+
Requires-Dist: scipy>1.15
|
|
40
|
+
Requires-Dist: session-info2
|
|
41
|
+
Requires-Dist: tqdm
|
|
42
|
+
Requires-Dist: zarr>=3
|
|
43
|
+
Provides-Extra: cupy-cuda12
|
|
44
|
+
Requires-Dist: cupy-cuda12x; extra == 'cupy-cuda12'
|
|
45
|
+
Provides-Extra: cupy-cuda13
|
|
46
|
+
Requires-Dist: cupy-cuda13x; extra == 'cupy-cuda13'
|
|
47
|
+
Provides-Extra: dev
|
|
48
|
+
Requires-Dist: pre-commit; extra == 'dev'
|
|
49
|
+
Requires-Dist: twine>=4.0.2; extra == 'dev'
|
|
50
|
+
Provides-Extra: doc
|
|
51
|
+
Requires-Dist: docutils!=0.18.*,!=0.19.*,>=0.8; extra == 'doc'
|
|
52
|
+
Requires-Dist: ipykernel; extra == 'doc'
|
|
53
|
+
Requires-Dist: ipython; extra == 'doc'
|
|
54
|
+
Requires-Dist: myst-nb>=1.1; extra == 'doc'
|
|
55
|
+
Requires-Dist: pandas; extra == 'doc'
|
|
56
|
+
Requires-Dist: scanpydoc[theme,typehints]>=0.15.3; extra == 'doc'
|
|
57
|
+
Requires-Dist: sphinx-autodoc-typehints; extra == 'doc'
|
|
58
|
+
Requires-Dist: sphinx-book-theme>=1; extra == 'doc'
|
|
59
|
+
Requires-Dist: sphinx-copybutton; extra == 'doc'
|
|
60
|
+
Requires-Dist: sphinx-issues>=5.0.1; extra == 'doc'
|
|
61
|
+
Requires-Dist: sphinx-tabs; extra == 'doc'
|
|
62
|
+
Requires-Dist: sphinx>=8.1; extra == 'doc'
|
|
63
|
+
Requires-Dist: sphinxcontrib-bibtex>=1; extra == 'doc'
|
|
64
|
+
Requires-Dist: sphinxext-opengraph; extra == 'doc'
|
|
65
|
+
Provides-Extra: test
|
|
66
|
+
Requires-Dist: coverage; extra == 'test'
|
|
67
|
+
Requires-Dist: pytest; extra == 'test'
|
|
68
|
+
Requires-Dist: zarrs>=0.2.1; extra == 'test'
|
|
69
|
+
Provides-Extra: torch
|
|
70
|
+
Requires-Dist: torch; extra == 'torch'
|
|
71
|
+
Provides-Extra: zarrs
|
|
72
|
+
Requires-Dist: zarrs>=0.2.1; extra == 'zarrs'
|
|
73
|
+
Description-Content-Type: text/markdown
|
|
74
|
+
|
|
75
|
+
<!--Links at the top because this document is split for docs home page-->
|
|
76
|
+
|
|
77
|
+
[uv]: https://github.com/astral-sh/uv
|
|
78
|
+
|
|
79
|
+
[scverse discourse]: https://discourse.scverse.org/
|
|
80
|
+
|
|
81
|
+
[issue tracker]: https://github.com/scverse/annbatch/issues
|
|
82
|
+
|
|
83
|
+
[tests]: https://github.com/scverse/annbatch/actions/workflows/test.yaml
|
|
84
|
+
|
|
85
|
+
[documentation]: https://annbatch.readthedocs.io
|
|
86
|
+
|
|
87
|
+
[changelog]: https://annbatch.readthedocs.io/en/latest/changelog.html
|
|
88
|
+
|
|
89
|
+
[api documentation]: https://annbatch.readthedocs.io/en/latest/api.html
|
|
90
|
+
|
|
91
|
+
[pypi]: https://pypi.org/project/annbatch
|
|
92
|
+
|
|
93
|
+
[zarrs-python]: https://zarrs-python.readthedocs.io/
|
|
94
|
+
|
|
95
|
+
[lamin]: https://lamin.ai/
|
|
96
|
+
|
|
97
|
+
[scverse]: https://scverse.org/
|
|
98
|
+
|
|
99
|
+
[in-depth section of our docs]: https://annbatch.readthedocs.io/en/latest/#in-depth
|
|
100
|
+
|
|
101
|
+
# annbatch
|
|
102
|
+
|
|
103
|
+
> [!CAUTION]
|
|
104
|
+
> This package does not have a stable API.
|
|
105
|
+
However, we do not anticipate the on-disk format to change in an incompatible manner.
|
|
106
|
+
|
|
107
|
+
[![Tests][badge-tests]][tests]
|
|
108
|
+
[![Documentation][badge-docs]][documentation]
|
|
109
|
+
|
|
110
|
+
[badge-tests]: https://img.shields.io/github/actions/workflow/status/scverse/annbatch/test.yaml?branch=main
|
|
111
|
+
|
|
112
|
+
[badge-docs]: https://img.shields.io/readthedocs/annbatch
|
|
113
|
+
|
|
114
|
+
A data loader and io utilities for minibatching on-disk AnnData, co-developed by [lamin][] and [scverse][]
|
|
115
|
+
|
|
116
|
+
## Getting started
|
|
117
|
+
|
|
118
|
+
Please refer to the [documentation][],
|
|
119
|
+
in particular, the [API documentation][].
|
|
120
|
+
|
|
121
|
+
## Installation
|
|
122
|
+
|
|
123
|
+
You need to have Python 3.12 or newer installed on your system.
|
|
124
|
+
If you don't have Python installed, we recommend installing [uv][].
|
|
125
|
+
|
|
126
|
+
To install the latest release of `annbatch` from [PyPI][]:
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
pip install annbatch
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
We provide extras in the `pyproject.toml` for `torch`, `cupy-cuda12`, `cupy-cuda13`, and [zarrs-python][].
|
|
133
|
+
`cupy` provides accelerated handling of the data via `preload_to_gpu` once it has been read off disk and does not need to be used in conjunction with `torch`.
|
|
134
|
+
> [!IMPORTANT]
|
|
135
|
+
> [zarrs-python][] gives the necessary performance boost for the sharded data produced by our preprocessing functions to be useful when loading data off a local filesystem.
|
|
136
|
+
|
|
137
|
+
## Basic usage example
|
|
138
|
+
|
|
139
|
+
Basic preprocessing:
|
|
140
|
+
```python
|
|
141
|
+
from annbatch import create_anndata_collection
|
|
142
|
+
|
|
143
|
+
import zarr
|
|
144
|
+
from pathlib import Path
|
|
145
|
+
|
|
146
|
+
# Using zarrs is necessary for local filesystem perforamnce.
|
|
147
|
+
# Ensure you installed it using our `[zarrs]` extra i.e., `pip install annbatch[zarrs]` to get the right version.
|
|
148
|
+
zarr.config.set(
|
|
149
|
+
{"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"}
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
create_anndata_collection(
|
|
153
|
+
adata_paths=[
|
|
154
|
+
"path/to/your/file1.h5ad",
|
|
155
|
+
"path/to/your/file2.h5ad"
|
|
156
|
+
],
|
|
157
|
+
output_path="path/to/output/collection", # a directory containing `dataset_{i}.zarr`
|
|
158
|
+
shuffle=True, # shuffling is needed if you want to use chunked access
|
|
159
|
+
)
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
Data loading:
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
from pathlib import Path
|
|
166
|
+
|
|
167
|
+
from annbatch import ZarrSparseDataset
|
|
168
|
+
import anndata as ad
|
|
169
|
+
import zarr
|
|
170
|
+
|
|
171
|
+
# Using zarrs is necessary for local filesystem perforamnce.
|
|
172
|
+
# Ensure you installed it using our `[zarrs]` extra i.e., `pip install annbatch[zarrs]` to get the right version.
|
|
173
|
+
zarr.config.set(
|
|
174
|
+
{"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"}
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
ds = ZarrSparseDataset(
|
|
178
|
+
batch_size=4096,
|
|
179
|
+
chunk_size=32,
|
|
180
|
+
preload_nchunks=256,
|
|
181
|
+
).add_anndatas(
|
|
182
|
+
[
|
|
183
|
+
ad.AnnData(
|
|
184
|
+
# note that you can open an AnnData file using any type of zarr store
|
|
185
|
+
X=ad.io.sparse_dataset(zarr.open(p)["X"]),
|
|
186
|
+
obs=ad.io.read_elem(zarr.open(p)["obs"]),
|
|
187
|
+
)
|
|
188
|
+
for p in Path("path/to/output/collection").glob("*.zarr")
|
|
189
|
+
],
|
|
190
|
+
obs_keys="label_column",
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Iterate over dataloader (plugin replacement for torch.utils.DataLoader)
|
|
194
|
+
for batch in ds:
|
|
195
|
+
...
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
<!--TODO: proper intersphinx and/or migrate note-->
|
|
199
|
+
|
|
200
|
+
For usage of our loader inside of `torch`, please see our [this note](https://annbatch.readthedocs.io/en/latest/#user-configurable-sampling-strategy) for more info. At the minimum, be aware that deadlocking will occur on linux unless you pass `multiprocessing_context="spawn"` to the `DataLoader`.
|
|
201
|
+
|
|
202
|
+
<!--HEADER-->
|
|
203
|
+
|
|
204
|
+
For a deeper dive into this example, please see the [in-depth section of our docs][]
|
|
205
|
+
|
|
206
|
+
<!--FOOTER-->
|
|
207
|
+
## Release notes
|
|
208
|
+
|
|
209
|
+
See the [changelog][].
|
|
210
|
+
|
|
211
|
+
## Contact
|
|
212
|
+
|
|
213
|
+
For questions and help requests, you can reach out in the [scverse discourse][].
|
|
214
|
+
If you found a bug, please use the [issue tracker][].
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
annbatch/__init__.py,sha256=esIAsAlVLrscCumW9u5gcg8lSp69bXfLVvmUCZn_7bk,368
|
|
2
|
+
annbatch/abc.py,sha256=CEjqyKCwood1sH1DUy8jLVPkN6NCZHybjwqg289Imvg,9360
|
|
3
|
+
annbatch/anndata_manager.py,sha256=B5ROjVXCzxfe0crMvyICJNnEBnnosw13EMqamyncG2U,16238
|
|
4
|
+
annbatch/dense.py,sha256=CxhzUGBZq6KJo4FGKllxKxPmWnmtBURH9oi15jqSdnU,2259
|
|
5
|
+
annbatch/io.py,sha256=btu1hNhatQRU_HRtugvEGsmMuz6GBxJrG-fd8fp8o7o,21805
|
|
6
|
+
annbatch/sparse.py,sha256=8h4YWtCVz7fRZRBQ5J8fci8bk0JNPam8_vB4-NBlbTA,6456
|
|
7
|
+
annbatch/types.py,sha256=0WtkW8F95bNbZvjWT65Rar75gEGTrUu4Gf7-V6pvxc4,646
|
|
8
|
+
annbatch/utils.py,sha256=MnbDZSDnvV1U2uwOpohVDcwgpDzTliw0EP3bZlvvUIo,10567
|
|
9
|
+
annbatch-0.0.1.dist-info/METADATA,sha256=aDHuFejUCZZCI88YGk_I377joShD3eqjDg3Gq6hmOd8,7617
|
|
10
|
+
annbatch-0.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
11
|
+
annbatch-0.0.1.dist-info/licenses/LICENSE,sha256=scqcCmYL1yJVfrrEqnAuMasMn_qvqQ_iOsDt4ix0F-8,1067
|
|
12
|
+
annbatch-0.0.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025, Ilan Gold
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|