anndata 0.12.4__tar.gz → 0.12.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {anndata-0.12.4 → anndata-0.12.6}/.github/workflows/test-gpu.yml +2 -1
- {anndata-0.12.4 → anndata-0.12.6}/PKG-INFO +6 -5
- anndata-0.12.6/benchmarks/benchmarks/dataset2d.py +89 -0
- {anndata-0.12.4 → anndata-0.12.6}/benchmarks/benchmarks/sparse_dataset.py +32 -1
- anndata-0.12.6/ci/min-constraints.txt +1 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/conf.py +1 -0
- anndata-0.12.6/docs/release-notes/0.12.5.md +12 -0
- anndata-0.12.6/docs/release-notes/0.12.6.md +6 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/tutorials/zarr-v3.md +2 -1
- {anndata-0.12.4 → anndata-0.12.6}/hatch.toml +5 -2
- {anndata-0.12.4 → anndata-0.12.6}/pyproject.toml +4 -2
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_core/anndata.py +6 -2
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_core/file_backing.py +21 -12
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_core/merge.py +2 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_io/specs/lazy_methods.py +6 -5
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_io/specs/methods.py +15 -12
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_settings.py +37 -12
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_settings.pyi +3 -2
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/experimental/backed/_io.py +13 -7
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/experimental/backed/_lazy_arrays.py +2 -2
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/tests/helpers.py +72 -34
- {anndata-0.12.4 → anndata-0.12.6}/tests/lazy/test_read.py +11 -1
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_concatenate_disk.py +20 -3
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_dask.py +16 -17
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_dask_view_mem.py +1 -1
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_io_dispatched.py +7 -14
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_io_elementwise.py +68 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_settings.py +7 -6
- anndata-0.12.4/benchmarks/benchmarks/dataset2d.py +0 -63
- {anndata-0.12.4 → anndata-0.12.6}/.cirun.yml +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.codecov.yml +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.editorconfig +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.github/ISSUE_TEMPLATE/bug-report.yml +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.github/ISSUE_TEMPLATE/enhancement-request.yml +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.github/ISSUE_TEMPLATE/question.yml +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.github/dependabot.yml +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.github/workflows/benchmark.yml +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.github/workflows/check-pr.yml +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.github/workflows/close-stale.yml +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.github/workflows/codespell.yml +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.github/workflows/label-stale.yml +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.github/workflows/publish.yml +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.github/workflows/test-cpu.yml +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.gitignore +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.gitmodules +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.pre-commit-config.yaml +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.prettierignore +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.prettierrc.yaml +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.readthedocs.yml +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.taplo.toml +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.vscode/launch.json +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/.vscode/settings.json +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/LICENSE +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/README.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/benchmarks/README.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/benchmarks/asv.conf.json +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/benchmarks/benchmarks/__init__.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/benchmarks/benchmarks/anndata.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/benchmarks/benchmarks/backed_hdf5.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/benchmarks/benchmarks/readwrite.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/benchmarks/benchmarks/utils.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/biome.jsonc +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/ci/constraints.txt +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/ci/scripts/min-deps.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/ci/scripts/towncrier_automation.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/Makefile +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/_key_contributors.rst +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/_static/img/anndata_schema.svg +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/_templates/autosummary/class.rst +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/api.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/benchmark-read-write.ipynb +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/benchmarks.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/concatenation.rst +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/contributing.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/extensions/autosummary_skip_inherited.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/extensions/no_skip_abc_members.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/extensions/patch_myst_cite.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/fileformat-prose.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/index.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/interoperability.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/news.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/references.rst +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.10.0.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.10.1.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.10.2.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.10.3.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.10.4.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.10.5.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.10.6.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.10.7.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.10.8.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.10.9.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.11.0.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.11.1.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.11.2.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.11.3.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.11.4.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.12.0.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.12.1.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.12.2.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.12.3.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.12.4.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.4.0.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.5.0.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.6.0.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.6.x.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.7.0.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.7.2.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.7.3.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.7.4.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.7.5.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.7.6.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.7.7.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.7.8.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.8.0.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.9.0.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.9.1.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/0.9.2.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/2172.bug.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/release-notes/index.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/docs/tutorials/index.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/__init__.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_core/__init__.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_core/access.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_core/aligned_df.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_core/aligned_mapping.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_core/extensions.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_core/index.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_core/raw.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_core/sparse_dataset.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_core/storage.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_core/views.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_core/xarray.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_io/__init__.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_io/h5ad.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_io/read.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_io/specs/__init__.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_io/specs/registry.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_io/utils.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_io/write.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_io/zarr.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_types.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/_warnings.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/abc.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/compat/__init__.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/experimental/__init__.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/experimental/_dispatch_io.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/experimental/backed/__init__.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/experimental/backed/_compat.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/experimental/merge.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/experimental/multi_files/__init__.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/experimental/multi_files/_anncollection.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/experimental/pytorch/__init__.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/experimental/pytorch/_annloader.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/io.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/logging.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/tests/__init__.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/types.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/typing.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/anndata/utils.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/testing/anndata/__init__.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/testing/anndata/_doctest.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/testing/anndata/_pytest.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/src/testing/anndata/py.typed +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/conftest.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/data/adata-comments.tsv +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/data/adata.csv +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/data/archives/readme.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/data/archives/v0.11.4/adata.h5ad +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/data/archives/v0.11.4/adata.zarr.zip +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/data/archives/v0.11.4/readme.md +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/data/archives/v0.7.0/adata.h5ad +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/data/archives/v0.7.0/adata.zarr.zip +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/data/archives/v0.7.8/adata.h5ad +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/data/archives/v0.7.8/adata.zarr.zip +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/data/excel.xlsx +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/data/umi_tools.tsv.gz +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/lazy/conftest.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/lazy/test_concat.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/lazy/test_write.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_anncollection.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_annot.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_awkward.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_backed_dense.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_backed_hdf5.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_backed_sparse.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_base.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_concatenate.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_deprecations.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_extensions.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_get_vector.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_gpu.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_helpers.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_inplace_subset.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_io_backwards_compat.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_io_conversion.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_io_partial.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_io_utils.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_io_warnings.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_layers.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_obsmvarm.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_obspvarp.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_raw.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_readwrite.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_repr.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_structured_arrays.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_transpose.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_uns.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_utils.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_views.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_x.py +0 -0
- {anndata-0.12.4 → anndata-0.12.6}/tests/test_xarray.py +0 -0
|
@@ -66,7 +66,8 @@ jobs:
|
|
|
66
66
|
uses: astral-sh/setup-uv@v6 # TODO: upgrade once cirun image supports node 24
|
|
67
67
|
with:
|
|
68
68
|
enable-cache: true
|
|
69
|
-
|
|
69
|
+
# Any Cuda 14+ will support Python 3.14: https://github.com/cupy/cupy/issues/9346
|
|
70
|
+
python-version: '3.13' # ${{ env.max_python_version }}
|
|
70
71
|
|
|
71
72
|
- name: Install AnnData
|
|
72
73
|
run: |
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: anndata
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.6
|
|
4
4
|
Summary: Annotated data.
|
|
5
5
|
Project-URL: Documentation, https://anndata.readthedocs.io/
|
|
6
6
|
Project-URL: Source, https://github.com/scverse/anndata
|
|
7
7
|
Project-URL: Home-page, https://github.com/scverse/anndata
|
|
8
|
-
Author: Philipp Angerer, Alex Wolf, Isaac Virshup, Sergei Rybakov
|
|
9
|
-
Maintainer-email:
|
|
8
|
+
Author: Philipp Angerer, Alex Wolf, Isaac Virshup, Sergei Rybakov, Ilan Gold
|
|
9
|
+
Maintainer-email: Philipp Angerer <philipp.angerer@helmholtz-munich.de>, Ilan Gold <ilan.gold@helmholtz-munich.de>
|
|
10
10
|
License-Expression: BSD-3-Clause
|
|
11
11
|
License-File: LICENSE
|
|
12
12
|
Classifier: Environment :: Console
|
|
@@ -21,6 +21,7 @@ Classifier: Programming Language :: Python :: 3
|
|
|
21
21
|
Classifier: Programming Language :: Python :: 3.11
|
|
22
22
|
Classifier: Programming Language :: Python :: 3.12
|
|
23
23
|
Classifier: Programming Language :: Python :: 3.13
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
24
25
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
25
26
|
Classifier: Topic :: Scientific/Engineering :: Visualization
|
|
26
27
|
Requires-Python: >=3.11
|
|
@@ -78,7 +79,7 @@ Requires-Dist: joblib; extra == 'test'
|
|
|
78
79
|
Requires-Dist: loompy>=3.0.5; extra == 'test'
|
|
79
80
|
Requires-Dist: matplotlib; extra == 'test'
|
|
80
81
|
Requires-Dist: openpyxl; extra == 'test'
|
|
81
|
-
Requires-Dist: pyarrow
|
|
82
|
+
Requires-Dist: pyarrow; extra == 'test'
|
|
82
83
|
Requires-Dist: pytest-cov; extra == 'test'
|
|
83
84
|
Requires-Dist: pytest-memray; extra == 'test'
|
|
84
85
|
Requires-Dist: pytest-mock; extra == 'test'
|
|
@@ -100,7 +101,7 @@ Requires-Dist: joblib; extra == 'test-min'
|
|
|
100
101
|
Requires-Dist: loompy>=3.0.5; extra == 'test-min'
|
|
101
102
|
Requires-Dist: matplotlib; extra == 'test-min'
|
|
102
103
|
Requires-Dist: openpyxl; extra == 'test-min'
|
|
103
|
-
Requires-Dist: pyarrow
|
|
104
|
+
Requires-Dist: pyarrow; extra == 'test-min'
|
|
104
105
|
Requires-Dist: pytest-cov; extra == 'test-min'
|
|
105
106
|
Requires-Dist: pytest-memray; extra == 'test-min'
|
|
106
107
|
Requires-Dist: pytest-mock; extra == 'test-min'
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
import h5py
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
import zarr
|
|
9
|
+
|
|
10
|
+
import anndata as ad
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from typing import Literal
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Dataset2D:
|
|
17
|
+
param_names = ("store_type", "chunks", "array_type")
|
|
18
|
+
params = (
|
|
19
|
+
("zarr", "h5ad"),
|
|
20
|
+
((-1,), None),
|
|
21
|
+
("cat", "numeric", "string-array", "nullable-string-array"),
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
def setup_cache(self):
|
|
25
|
+
n_obs = 10000
|
|
26
|
+
array_types = {
|
|
27
|
+
"numeric": np.arange(n_obs),
|
|
28
|
+
"string-array": np.array(["a"] * n_obs),
|
|
29
|
+
"nullable-string-array": pd.array(
|
|
30
|
+
["a", pd.NA] * (n_obs // 2), dtype="string"
|
|
31
|
+
),
|
|
32
|
+
"cat": pd.Categorical(np.array(["a"] * n_obs)),
|
|
33
|
+
}
|
|
34
|
+
for k, v in array_types.items():
|
|
35
|
+
for store in [
|
|
36
|
+
h5py.File(f"data_{k}.h5ad", mode="w"),
|
|
37
|
+
zarr.open(f"data_{k}.zarr", mode="w", zarr_version=2),
|
|
38
|
+
]:
|
|
39
|
+
df = pd.DataFrame({"a": v}, index=[f"cell{i}" for i in range(n_obs)])
|
|
40
|
+
if writing_string_array_on_disk := (
|
|
41
|
+
isinstance(v, np.ndarray) and df["a"].dtype == "string"
|
|
42
|
+
):
|
|
43
|
+
df["a"] = df["a"].to_numpy()
|
|
44
|
+
with ad.settings.override(allow_write_nullable_strings=True):
|
|
45
|
+
ad.io.write_elem(store, "df", df)
|
|
46
|
+
if writing_string_array_on_disk:
|
|
47
|
+
assert store["df"]["a"].attrs["encoding-type"] == "string-array"
|
|
48
|
+
|
|
49
|
+
def setup(
|
|
50
|
+
self,
|
|
51
|
+
store_type: Literal["zarr", "h5ad"],
|
|
52
|
+
chunks: None | tuple[int],
|
|
53
|
+
array_type: Literal["cat", "numeric", "string-array", "nullable-string-array"],
|
|
54
|
+
):
|
|
55
|
+
self.store = (
|
|
56
|
+
h5py.File(f"data_{array_type}.h5ad", mode="r")
|
|
57
|
+
if store_type == "h5ad"
|
|
58
|
+
else zarr.open(f"data_{array_type}.zarr")
|
|
59
|
+
)
|
|
60
|
+
self.ds = ad.experimental.read_elem_lazy(self.store["df"], chunks=chunks)
|
|
61
|
+
self.n_obs = self.ds.shape[0]
|
|
62
|
+
|
|
63
|
+
def time_read_lazy_default(self, *_):
|
|
64
|
+
ad.experimental.read_elem_lazy(self.store["df"])
|
|
65
|
+
|
|
66
|
+
def peakmem_read_lazy_default(self, *_):
|
|
67
|
+
ad.experimental.read_elem_lazy(self.store["df"])
|
|
68
|
+
|
|
69
|
+
def time_getitem_slice(self, *_):
|
|
70
|
+
self.ds.iloc[0 : (self.n_obs // 2)].to_memory()
|
|
71
|
+
|
|
72
|
+
def peakmem_getitem_slice(self, *_):
|
|
73
|
+
self.ds.iloc[0 : (self.n_obs // 2)].to_memory()
|
|
74
|
+
|
|
75
|
+
def time_full_to_memory(self, *_):
|
|
76
|
+
self.ds.to_memory()
|
|
77
|
+
|
|
78
|
+
def peakmem_full_to_memory(self, *_):
|
|
79
|
+
self.ds.to_memory()
|
|
80
|
+
|
|
81
|
+
def time_getitem_bool_mask(self, *_):
|
|
82
|
+
self.ds.iloc[np.random.randint(0, self.n_obs, self.n_obs // 2)].to_memory()
|
|
83
|
+
|
|
84
|
+
def peakmem_getitem_bool_mask(self, *_):
|
|
85
|
+
self.ds.iloc[np.random.randint(0, self.n_obs, self.n_obs // 2)].to_memory()
|
|
86
|
+
|
|
87
|
+
def time_concat(self, *_):
|
|
88
|
+
adatas = [ad.AnnData(obs=self.ds)] * 50
|
|
89
|
+
ad.concat(adatas, join="outer")
|
|
@@ -7,7 +7,7 @@ import zarr
|
|
|
7
7
|
from dask.array.core import Array as DaskArray
|
|
8
8
|
from scipy import sparse
|
|
9
9
|
|
|
10
|
-
from anndata import AnnData
|
|
10
|
+
from anndata import AnnData, concat
|
|
11
11
|
from anndata._core.sparse_dataset import sparse_dataset
|
|
12
12
|
from anndata._io.specs import write_elem
|
|
13
13
|
from anndata.experimental import read_elem_lazy
|
|
@@ -77,3 +77,34 @@ class SparseCSRContiguousSlice:
|
|
|
77
77
|
res = self.adata[self.index]
|
|
78
78
|
if isinstance(res, DaskArray):
|
|
79
79
|
res.compute()
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class SparseCSRDask:
|
|
83
|
+
filepath = "data.zarr"
|
|
84
|
+
|
|
85
|
+
def setup_cache(self):
|
|
86
|
+
X = sparse.random(
|
|
87
|
+
10_000,
|
|
88
|
+
10_000,
|
|
89
|
+
density=0.01,
|
|
90
|
+
format="csr",
|
|
91
|
+
random_state=np.random.default_rng(42),
|
|
92
|
+
)
|
|
93
|
+
g = zarr.group(self.filepath)
|
|
94
|
+
write_elem(g, "X", X)
|
|
95
|
+
|
|
96
|
+
def setup(self):
|
|
97
|
+
self.group = zarr.group(self.filepath)
|
|
98
|
+
self.adata = AnnData(X=read_elem_lazy(self.group["X"]))
|
|
99
|
+
|
|
100
|
+
def time_concat(self):
|
|
101
|
+
concat([self.adata for i in range(100)])
|
|
102
|
+
|
|
103
|
+
def peakmem_concat(self):
|
|
104
|
+
concat([self.adata for i in range(100)])
|
|
105
|
+
|
|
106
|
+
def time_read(self):
|
|
107
|
+
AnnData(X=read_elem_lazy(self.group["X"]))
|
|
108
|
+
|
|
109
|
+
def peakmem_read(self):
|
|
110
|
+
AnnData(X=read_elem_lazy(self.group["X"]))
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
pyarrow<21
|
|
@@ -134,6 +134,7 @@ intersphinx_mapping = dict(
|
|
|
134
134
|
obstore=("https://developmentseed.org/obstore/latest/", None),
|
|
135
135
|
pandas=("https://pandas.pydata.org/pandas-docs/stable", None),
|
|
136
136
|
# TODO: switch to `/3` once docs are built with Python 3.14
|
|
137
|
+
# https://github.com/readthedocs/readthedocs.org/issues/12523
|
|
137
138
|
python=("https://docs.python.org/3.13", None),
|
|
138
139
|
scipy=("https://docs.scipy.org/doc/scipy", None),
|
|
139
140
|
sklearn=("https://scikit-learn.org/stable", None),
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
(v0.12.5)=
|
|
2
|
+
### 0.12.5 {small}`2025-11-03`
|
|
3
|
+
|
|
4
|
+
#### Bug fixes
|
|
5
|
+
|
|
6
|
+
- Remove use of private `read_dataset` internally inside {func}`anndata.experimental.read_elem_lazy` {user}`ilan-gold` ({pr}`2158`)
|
|
7
|
+
- Unblock version restriction on `dask` distributed writing by using threading scheduler always (see {pr}`2172`) {user}`ilan-gold` ({pr}`2183`)
|
|
8
|
+
|
|
9
|
+
#### Performance
|
|
10
|
+
|
|
11
|
+
- Use `name` on {func}`dask.array.map_blocks` internally when concatenating {class}`anndata.experimental.backed.Dataset2D` objects whose categoricals/nullable types must be converted to dask arrays {user}`ilan-gold` ({pr}`2121`)
|
|
12
|
+
- Enable automatic sharding in zarr v3 via {attr}`anndata.settings.auto_shard_zarr_v3` (via {mod}`zarr`'s own auto sharding mechanism i.e., `shards="auto"`) for all types except {class}`numpy.recarray` {user}`ilan-gold` ({pr}`2167`)
|
|
@@ -38,7 +38,8 @@ There are two ways of opening remote `zarr` stores from the `zarr-python` packag
|
|
|
38
38
|
Local data generally poses a different set of challenges.
|
|
39
39
|
First, write speeds can be somewhat slow and second, the creation of many small files on a file system can slow down a filesystem.
|
|
40
40
|
For the "many small files" problem, `zarr` has introduced {ref}`sharding <zarr:user-guide-sharding>` in the v3 file format.
|
|
41
|
-
|
|
41
|
+
We offer {attr}`anndata.settings.auto_shard_zarr_v3` to hook into zarr's ability to automatically compute shards, which is experimental at the moment.
|
|
42
|
+
Manual sharding requires knowledge of the array element you are writing (such as shape or data type), though, and therefore you will need to use {func}`anndata.experimental.write_dispatched` to use custom sharding.
|
|
42
43
|
For example, you cannot shard a 1D array with `shard` sizes `(256, 256)`.
|
|
43
44
|
Here is a short example, although you should tune the sizes to your own use-case and also use the compression that makes the most sense for you:
|
|
44
45
|
|
|
@@ -21,7 +21,7 @@ env-vars.UV_CONSTRAINT = "ci/constraints.txt"
|
|
|
21
21
|
overrides.matrix.deps.env-vars = [
|
|
22
22
|
{ if = [ "pre" ], key = "UV_PRERELEASE", value = "allow" },
|
|
23
23
|
{ if = [ "pre" ], key = "UV_CONSTRAINT", value = "ci/pre-deps.txt" },
|
|
24
|
-
{ if = [ "min" ], key = "UV_CONSTRAINT", value = "ci/constraints.txt ci/min-deps.txt" },
|
|
24
|
+
{ if = [ "min" ], key = "UV_CONSTRAINT", value = "ci/constraints.txt ci/min-constraints.txt ci/min-deps.txt" },
|
|
25
25
|
]
|
|
26
26
|
overrides.matrix.deps.pre-install-commands = [
|
|
27
27
|
{ if = [
|
|
@@ -35,7 +35,10 @@ overrides.matrix.deps.pre-install-commands = [
|
|
|
35
35
|
]
|
|
36
36
|
overrides.matrix.deps.python = [
|
|
37
37
|
{ if = [ "min" ], value = "3.11" },
|
|
38
|
-
|
|
38
|
+
# transitive test dep numba doesn’t support 3.14 in a stable release yet:
|
|
39
|
+
# https://github.com/numba/numba/issues/9957
|
|
40
|
+
{ if = [ "stable" ], value = "3.13" },
|
|
41
|
+
{ if = [ "pre" ], value = "3.14" },
|
|
39
42
|
]
|
|
40
43
|
overrides.matrix.deps.features = [
|
|
41
44
|
{ if = [ "stable", "pre" ], value = "test" },
|
|
@@ -12,9 +12,9 @@ authors = [
|
|
|
12
12
|
{ name = "Alex Wolf" },
|
|
13
13
|
{ name = "Isaac Virshup" },
|
|
14
14
|
{ name = "Sergei Rybakov" },
|
|
15
|
+
{ name = "Ilan Gold" },
|
|
15
16
|
]
|
|
16
17
|
maintainers = [
|
|
17
|
-
{ name = "Isaac Virshup", email = "ivirshup@gmail.com" },
|
|
18
18
|
{ name = "Philipp Angerer", email = "philipp.angerer@helmholtz-munich.de" },
|
|
19
19
|
{ name = "Ilan Gold", email = "ilan.gold@helmholtz-munich.de" },
|
|
20
20
|
]
|
|
@@ -32,6 +32,7 @@ classifiers = [
|
|
|
32
32
|
"Programming Language :: Python :: 3.11",
|
|
33
33
|
"Programming Language :: Python :: 3.12",
|
|
34
34
|
"Programming Language :: Python :: 3.13",
|
|
35
|
+
"Programming Language :: Python :: 3.14",
|
|
35
36
|
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
|
36
37
|
"Topic :: Scientific/Engineering :: Visualization",
|
|
37
38
|
]
|
|
@@ -96,7 +97,7 @@ test-min = [
|
|
|
96
97
|
"httpx<1.0", # For data downloading
|
|
97
98
|
"dask[distributed]",
|
|
98
99
|
"awkward>=2.3.2",
|
|
99
|
-
"pyarrow
|
|
100
|
+
"pyarrow",
|
|
100
101
|
"anndata[dask]",
|
|
101
102
|
]
|
|
102
103
|
test = [ "anndata[test-min,lazy]" ]
|
|
@@ -164,6 +165,7 @@ filterwarnings_when_strict = [
|
|
|
164
165
|
"default:Consolidated metadata is:UserWarning",
|
|
165
166
|
"default:.*Structured:zarr.core.dtype.common.UnstableSpecificationWarning",
|
|
166
167
|
"default:.*FixedLengthUTF32:zarr.core.dtype.common.UnstableSpecificationWarning",
|
|
168
|
+
"default:Automatic shard shape inference is experimental",
|
|
167
169
|
]
|
|
168
170
|
python_files = "test_*.py"
|
|
169
171
|
testpaths = [
|
|
@@ -964,7 +964,11 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): # noqa: PLW1641
|
|
|
964
964
|
@property
|
|
965
965
|
def isbacked(self) -> bool:
|
|
966
966
|
"""`True` if object is backed on disk, `False` otherwise."""
|
|
967
|
-
|
|
967
|
+
is_filename_none = self.filename is not None
|
|
968
|
+
is_x_none = (
|
|
969
|
+
getattr(self._adata_ref if self._is_view else self, "_X", None) is None
|
|
970
|
+
)
|
|
971
|
+
return is_filename_none and is_x_none
|
|
968
972
|
|
|
969
973
|
@property
|
|
970
974
|
def is_view(self) -> bool:
|
|
@@ -1418,7 +1422,7 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): # noqa: PLW1641
|
|
|
1418
1422
|
|
|
1419
1423
|
@old_positionals("copy")
|
|
1420
1424
|
def to_memory(self, *, copy: bool = False) -> AnnData:
|
|
1421
|
-
"""Return a new AnnData object with all
|
|
1425
|
+
"""Return a new AnnData object with all non-in-memory arrays loaded into memory.
|
|
1422
1426
|
|
|
1423
1427
|
Params
|
|
1424
1428
|
------
|
|
@@ -27,15 +27,24 @@ class AnnDataFileManager:
|
|
|
27
27
|
def __init__(
|
|
28
28
|
self,
|
|
29
29
|
adata: anndata.AnnData,
|
|
30
|
-
|
|
31
|
-
|
|
30
|
+
file_name: PathLike[str] | str | None = None,
|
|
31
|
+
file_mode: Literal["r", "r+"] | None = None,
|
|
32
|
+
file_obj: h5py.File | None = None,
|
|
32
33
|
):
|
|
34
|
+
if file_obj is not None and (file_name is not None or file_mode is not None):
|
|
35
|
+
msg = "Cannot provide both a h5py.File and the name and/or mode arguments to constructor"
|
|
36
|
+
raise ValueError(msg)
|
|
33
37
|
self._adata_ref = weakref.ref(adata)
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
38
|
+
if file_obj is not None:
|
|
39
|
+
self.filename = filename(file_obj)
|
|
40
|
+
self._filemode = file_obj.mode
|
|
41
|
+
self._file = file_obj
|
|
42
|
+
else:
|
|
43
|
+
self.filename = file_name
|
|
44
|
+
self._filemode = file_mode
|
|
45
|
+
self._file = file_obj
|
|
46
|
+
if file_name and not self._file:
|
|
47
|
+
self.open()
|
|
39
48
|
|
|
40
49
|
def __getstate__(self):
|
|
41
50
|
state = self.__dict__.copy()
|
|
@@ -82,16 +91,16 @@ class AnnDataFileManager:
|
|
|
82
91
|
return self._filename
|
|
83
92
|
|
|
84
93
|
@filename.setter
|
|
85
|
-
def filename(self,
|
|
86
|
-
self._filename = None if
|
|
94
|
+
def filename(self, file_name: PathLike[str] | str | None):
|
|
95
|
+
self._filename = None if file_name is None else Path(file_name)
|
|
87
96
|
|
|
88
97
|
def open(
|
|
89
98
|
self,
|
|
90
|
-
|
|
99
|
+
file_name: PathLike[str] | str | None = None,
|
|
91
100
|
filemode: Literal["r", "r+"] | None = None,
|
|
92
101
|
):
|
|
93
|
-
if
|
|
94
|
-
self.filename =
|
|
102
|
+
if file_name is not None:
|
|
103
|
+
self.filename = file_name
|
|
95
104
|
if filemode is not None:
|
|
96
105
|
self._filemode = filemode
|
|
97
106
|
if self.filename is None:
|
|
@@ -4,6 +4,7 @@ Code for merging/ concatenating AnnData objects.
|
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
|
|
7
|
+
import uuid
|
|
7
8
|
from collections import OrderedDict
|
|
8
9
|
from collections.abc import Callable, Mapping, MutableSet
|
|
9
10
|
from functools import partial, reduce, singledispatch
|
|
@@ -1251,6 +1252,7 @@ def make_dask_col_from_extension_dtype(
|
|
|
1251
1252
|
chunks=chunk_size,
|
|
1252
1253
|
meta=np.array([], dtype=dtype),
|
|
1253
1254
|
dtype=dtype,
|
|
1255
|
+
name=f"{uuid.uuid4()}/{base_path_or_zarr_group}/{elem_name}-{dtype}",
|
|
1254
1256
|
)
|
|
1255
1257
|
|
|
1256
1258
|
return da.from_array(col.values, chunks=-1) # in-memory
|
|
@@ -25,7 +25,7 @@ from anndata.compat import (
|
|
|
25
25
|
ZarrGroup,
|
|
26
26
|
)
|
|
27
27
|
|
|
28
|
-
from .registry import _LAZY_REGISTRY, IOSpec
|
|
28
|
+
from .registry import _LAZY_REGISTRY, IOSpec, read_elem
|
|
29
29
|
|
|
30
30
|
if TYPE_CHECKING:
|
|
31
31
|
from collections.abc import Generator, Mapping, Sequence
|
|
@@ -195,6 +195,9 @@ def resolve_chunks(
|
|
|
195
195
|
return elem.chunks
|
|
196
196
|
|
|
197
197
|
|
|
198
|
+
# TODO: `map_blocks` of a string array in h5py is so insanely slow on benchmarking that in the case someone has
|
|
199
|
+
# a pure string annotation (not categoricals! or nullables strings!), it's probably better to pay the memory penalty.
|
|
200
|
+
# In the long run, it might be good to figure out what exactly is going on here but for now, this will do.
|
|
198
201
|
@_LAZY_REGISTRY.register_read(H5Array, IOSpec("string-array", "0.2.0"))
|
|
199
202
|
def read_h5_string_array(
|
|
200
203
|
elem: H5Array,
|
|
@@ -204,10 +207,8 @@ def read_h5_string_array(
|
|
|
204
207
|
) -> DaskArray:
|
|
205
208
|
import dask.array as da
|
|
206
209
|
|
|
207
|
-
from anndata._io.h5ad import read_dataset
|
|
208
|
-
|
|
209
210
|
chunks = resolve_chunks(elem, chunks, tuple(elem.shape))
|
|
210
|
-
return da.from_array(
|
|
211
|
+
return da.from_array(read_elem(elem), chunks=chunks)
|
|
211
212
|
|
|
212
213
|
|
|
213
214
|
@_LAZY_REGISTRY.register_read(H5Array, IOSpec("array", "0.2.0"))
|
|
@@ -303,7 +304,7 @@ def read_dataframe(
|
|
|
303
304
|
# which is used below as well.
|
|
304
305
|
if not use_range_index:
|
|
305
306
|
dim_name = elem.attrs["_index"]
|
|
306
|
-
# no sense in reading this in multiple times
|
|
307
|
+
# no sense in reading this in multiple times since xarray requires an in-memory index
|
|
307
308
|
index = elem_dict[dim_name].compute()
|
|
308
309
|
else:
|
|
309
310
|
dim_name = DUMMY_RANGE_INDEX_KEY
|
|
@@ -102,6 +102,12 @@ def zarr_v3_compressor_compat(dataset_kwargs) -> dict:
|
|
|
102
102
|
return dataset_kwargs
|
|
103
103
|
|
|
104
104
|
|
|
105
|
+
def zarr_v3_sharding(dataset_kwargs) -> dict:
|
|
106
|
+
if "shards" not in dataset_kwargs and ad.settings.auto_shard_zarr_v3:
|
|
107
|
+
dataset_kwargs = {**dataset_kwargs, "shards": "auto"}
|
|
108
|
+
return dataset_kwargs
|
|
109
|
+
|
|
110
|
+
|
|
105
111
|
def _to_cpu_mem_wrapper(write_func):
|
|
106
112
|
"""
|
|
107
113
|
Wrapper to bring cupy types into cpu memory before writing.
|
|
@@ -432,6 +438,7 @@ def write_basic(
|
|
|
432
438
|
f.create_dataset(k, data=elem, shape=elem.shape, dtype=dtype, **dataset_kwargs)
|
|
433
439
|
else:
|
|
434
440
|
dataset_kwargs = zarr_v3_compressor_compat(dataset_kwargs)
|
|
441
|
+
dataset_kwargs = zarr_v3_sharding(dataset_kwargs)
|
|
435
442
|
f.create_array(k, shape=elem.shape, dtype=dtype, **dataset_kwargs)
|
|
436
443
|
# see https://github.com/zarr-developers/zarr-python/discussions/2712
|
|
437
444
|
if isinstance(elem, ZarrArray | H5Array):
|
|
@@ -506,26 +513,17 @@ def write_basic_dask_dask_dense(
|
|
|
506
513
|
dataset_kwargs: Mapping[str, Any] = MappingProxyType({}),
|
|
507
514
|
):
|
|
508
515
|
import dask.array as da
|
|
509
|
-
import dask.config as dc
|
|
510
|
-
|
|
511
|
-
is_distributed = dc.get("scheduler", None) == "dask.distributed"
|
|
512
|
-
is_h5 = isinstance(f, H5Group)
|
|
513
|
-
if is_distributed and is_h5:
|
|
514
|
-
msg = "Cannot write dask arrays to hdf5 when using distributed scheduler"
|
|
515
|
-
raise ValueError(msg)
|
|
516
516
|
|
|
517
517
|
dataset_kwargs = dataset_kwargs.copy()
|
|
518
|
+
is_h5 = isinstance(f, H5Group)
|
|
518
519
|
if not is_h5:
|
|
519
520
|
dataset_kwargs = zarr_v3_compressor_compat(dataset_kwargs)
|
|
520
|
-
|
|
521
|
-
if Version(version("dask")) < Version("2025.4.0") and is_distributed:
|
|
522
|
-
msg = "Writing dense data with a distributed scheduler to zarr could produce corrupted data with a Lock and will error without one when dask is older than 2025.4.0: https://github.com/dask/dask/issues/12109"
|
|
523
|
-
raise RuntimeError(msg)
|
|
521
|
+
dataset_kwargs = zarr_v3_sharding(dataset_kwargs)
|
|
524
522
|
if is_zarr_v2() or is_h5:
|
|
525
523
|
g = f.require_dataset(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs)
|
|
526
524
|
else:
|
|
527
525
|
g = f.require_array(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs)
|
|
528
|
-
da.store(elem, g)
|
|
526
|
+
da.store(elem, g, scheduler="threads")
|
|
529
527
|
|
|
530
528
|
|
|
531
529
|
@_REGISTRY.register_read(H5Array, IOSpec("array", "0.2.0"))
|
|
@@ -626,6 +624,7 @@ def write_vlen_string_array_zarr(
|
|
|
626
624
|
filters, fill_value = None, None
|
|
627
625
|
if f.metadata.zarr_format == 2:
|
|
628
626
|
filters, fill_value = [VLenUTF8()], ""
|
|
627
|
+
dataset_kwargs = zarr_v3_sharding(dataset_kwargs)
|
|
629
628
|
f.create_array(
|
|
630
629
|
k,
|
|
631
630
|
shape=elem.shape,
|
|
@@ -694,6 +693,9 @@ def write_recarray_zarr(
|
|
|
694
693
|
else:
|
|
695
694
|
dataset_kwargs = dataset_kwargs.copy()
|
|
696
695
|
dataset_kwargs = zarr_v3_compressor_compat(dataset_kwargs)
|
|
696
|
+
# https://github.com/zarr-developers/zarr-python/issues/3546
|
|
697
|
+
# if "shards" not in dataset_kwargs and ad.settings.auto_shard_zarr_v3:
|
|
698
|
+
# dataset_kwargs = {**dataset_kwargs, "shards": "auto"}
|
|
697
699
|
f.create_array(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs)
|
|
698
700
|
f[k][...] = elem
|
|
699
701
|
|
|
@@ -730,6 +732,7 @@ def write_sparse_compressed(
|
|
|
730
732
|
attr_name, data=attr, shape=attr.shape, dtype=dtype, **dataset_kwargs
|
|
731
733
|
)
|
|
732
734
|
else:
|
|
735
|
+
dataset_kwargs = zarr_v3_sharding(dataset_kwargs)
|
|
733
736
|
arr = g.create_array(
|
|
734
737
|
attr_name, shape=attr.shape, dtype=dtype, **dataset_kwargs
|
|
735
738
|
)
|
|
@@ -17,7 +17,7 @@ from .compat import is_zarr_v2, old_positionals
|
|
|
17
17
|
|
|
18
18
|
if TYPE_CHECKING:
|
|
19
19
|
from collections.abc import Callable, Sequence
|
|
20
|
-
from typing import Any, TypeGuard
|
|
20
|
+
from typing import Any, Self, TypeGuard
|
|
21
21
|
|
|
22
22
|
T = TypeVar("T")
|
|
23
23
|
|
|
@@ -55,7 +55,7 @@ class RegisteredOption(NamedTuple, Generic[T]):
|
|
|
55
55
|
option: str
|
|
56
56
|
default_value: T
|
|
57
57
|
description: str
|
|
58
|
-
validate: Callable[[T], None]
|
|
58
|
+
validate: Callable[[T, SettingsManager], None]
|
|
59
59
|
type: object
|
|
60
60
|
|
|
61
61
|
describe = describe
|
|
@@ -206,7 +206,7 @@ class SettingsManager:
|
|
|
206
206
|
*,
|
|
207
207
|
default_value: T,
|
|
208
208
|
description: str,
|
|
209
|
-
validate: Callable[[T], None],
|
|
209
|
+
validate: Callable[[T, Self], None],
|
|
210
210
|
option_type: object | None = None,
|
|
211
211
|
get_from_env: Callable[[str, T], T] = lambda x, y: y,
|
|
212
212
|
) -> None:
|
|
@@ -229,7 +229,7 @@ class SettingsManager:
|
|
|
229
229
|
Default behavior is to return `default_value` without checking the environment.
|
|
230
230
|
"""
|
|
231
231
|
try:
|
|
232
|
-
validate(default_value)
|
|
232
|
+
validate(default_value, self)
|
|
233
233
|
except (ValueError, TypeError) as e:
|
|
234
234
|
e.add_note(f"for option {option!r}")
|
|
235
235
|
raise e
|
|
@@ -307,7 +307,7 @@ class SettingsManager:
|
|
|
307
307
|
)
|
|
308
308
|
raise AttributeError(msg)
|
|
309
309
|
registered_option = self._registered_options[option]
|
|
310
|
-
registered_option.validate(val)
|
|
310
|
+
registered_option.validate(val, self)
|
|
311
311
|
self._config[option] = val
|
|
312
312
|
|
|
313
313
|
def __getattr__(self, option: str) -> object:
|
|
@@ -364,10 +364,13 @@ class SettingsManager:
|
|
|
364
364
|
"""
|
|
365
365
|
restore = {a: getattr(self, a) for a in overrides}
|
|
366
366
|
try:
|
|
367
|
-
|
|
368
|
-
|
|
367
|
+
# Preserve order so that settings that depend on each other can be overridden together i.e., always override zarr version before sharding
|
|
368
|
+
for k in self._config:
|
|
369
|
+
if k in overrides:
|
|
370
|
+
setattr(self, k, overrides.get(k))
|
|
369
371
|
yield None
|
|
370
372
|
finally:
|
|
373
|
+
# TODO: does the order need to be preserved when restoring?
|
|
371
374
|
for attr, value in restore.items():
|
|
372
375
|
setattr(self, attr, value)
|
|
373
376
|
|
|
@@ -395,7 +398,7 @@ V = TypeVar("V")
|
|
|
395
398
|
|
|
396
399
|
|
|
397
400
|
def gen_validator(_type: type[V]) -> Callable[[V], None]:
|
|
398
|
-
def validate_type(val: V) -> None:
|
|
401
|
+
def validate_type(val: V, settings: SettingsManager) -> None:
|
|
399
402
|
if not isinstance(val, _type):
|
|
400
403
|
msg = f"{val} not valid {_type}"
|
|
401
404
|
raise TypeError(msg)
|
|
@@ -434,14 +437,28 @@ settings.register(
|
|
|
434
437
|
)
|
|
435
438
|
|
|
436
439
|
|
|
437
|
-
def validate_zarr_write_format(format: int):
|
|
438
|
-
validate_int(format)
|
|
440
|
+
def validate_zarr_write_format(format: int, settings: SettingsManager):
|
|
441
|
+
validate_int(format, settings)
|
|
439
442
|
if format not in {2, 3}:
|
|
440
443
|
msg = "non-v2 zarr on-disk format not supported"
|
|
441
444
|
raise ValueError(msg)
|
|
442
445
|
if format == 3 and is_zarr_v2():
|
|
443
446
|
msg = "Cannot write v3 format against v2 package"
|
|
444
447
|
raise ValueError(msg)
|
|
448
|
+
if format == 2 and getattr(settings, "auto_shard_zarr_v3", False):
|
|
449
|
+
msg = "Cannot set `zarr_write_format` to 2 with autosharding on. Please set to `False` `anndata.settings.auto_shard_zarr_v3`"
|
|
450
|
+
raise ValueError(msg)
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
def validate_zarr_sharding(auto_shard: bool, settings: SettingsManager): # noqa: FBT001
|
|
454
|
+
validate_bool(auto_shard, settings)
|
|
455
|
+
if auto_shard:
|
|
456
|
+
if is_zarr_v2():
|
|
457
|
+
msg = "Cannot use sharding with `zarr-python<3`. Please upgrade package and set `anndata.settings.zarr_write_format` to 3."
|
|
458
|
+
raise ValueError(msg)
|
|
459
|
+
if settings.zarr_write_format == 2:
|
|
460
|
+
msg = "Cannot shard v2 format data. Please set `anndata.settings.zarr_write_format` to 3."
|
|
461
|
+
raise ValueError(msg)
|
|
445
462
|
|
|
446
463
|
|
|
447
464
|
settings.register(
|
|
@@ -458,8 +475,8 @@ settings.register(
|
|
|
458
475
|
)
|
|
459
476
|
|
|
460
477
|
|
|
461
|
-
def validate_sparse_settings(val: Any) -> None:
|
|
462
|
-
validate_bool(val)
|
|
478
|
+
def validate_sparse_settings(val: Any, settings: SettingsManager) -> None:
|
|
479
|
+
validate_bool(val, settings)
|
|
463
480
|
|
|
464
481
|
|
|
465
482
|
settings.register(
|
|
@@ -486,6 +503,14 @@ settings.register(
|
|
|
486
503
|
get_from_env=check_and_get_bool,
|
|
487
504
|
)
|
|
488
505
|
|
|
506
|
+
settings.register(
|
|
507
|
+
"auto_shard_zarr_v3",
|
|
508
|
+
default_value=False,
|
|
509
|
+
description="Whether or not to use zarr's auto computation of sharding for v3. For v2 this setting will be ignored. The setting will apply to all calls to anndata's writing mechanism (write_zarr / write_elem) and will **not** override any user-defined kwargs for shards.",
|
|
510
|
+
validate=validate_zarr_sharding,
|
|
511
|
+
get_from_env=check_and_get_bool,
|
|
512
|
+
)
|
|
513
|
+
|
|
489
514
|
|
|
490
515
|
##################################################################################
|
|
491
516
|
##################################################################################
|
|
@@ -2,7 +2,7 @@ from collections.abc import Callable as Callable
|
|
|
2
2
|
from collections.abc import Generator, Iterable
|
|
3
3
|
from contextlib import contextmanager
|
|
4
4
|
from dataclasses import dataclass
|
|
5
|
-
from typing import Literal, TypeVar
|
|
5
|
+
from typing import Literal, Self, TypeVar
|
|
6
6
|
|
|
7
7
|
_T = TypeVar("_T")
|
|
8
8
|
|
|
@@ -25,7 +25,7 @@ class SettingsManager:
|
|
|
25
25
|
*,
|
|
26
26
|
default_value: _T,
|
|
27
27
|
description: str,
|
|
28
|
-
validate: Callable[[_T], None],
|
|
28
|
+
validate: Callable[[_T, Self], None],
|
|
29
29
|
option_type: object | None = None,
|
|
30
30
|
get_from_env: Callable[[str, _T], _T] = ...,
|
|
31
31
|
) -> None: ...
|
|
@@ -46,5 +46,6 @@ class _AnnDataSettingsManager(SettingsManager):
|
|
|
46
46
|
use_sparse_array_on_read: bool = False
|
|
47
47
|
min_rows_for_chunked_h5_copy: int = 1000
|
|
48
48
|
disallow_forward_slash_in_h5ad: bool = False
|
|
49
|
+
auto_shard_zarr_v3: bool = False
|
|
49
50
|
|
|
50
51
|
settings: _AnnDataSettingsManager
|