anndata 0.12.3__tar.gz → 0.12.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {anndata-0.12.3 → anndata-0.12.5}/.github/workflows/test-cpu.yml +2 -2
- {anndata-0.12.3 → anndata-0.12.5}/.github/workflows/test-gpu.yml +1 -1
- {anndata-0.12.3 → anndata-0.12.5}/PKG-INFO +1 -1
- anndata-0.12.5/benchmarks/benchmarks/backed_hdf5.py +112 -0
- anndata-0.12.5/benchmarks/benchmarks/dataset2d.py +89 -0
- {anndata-0.12.3 → anndata-0.12.5}/benchmarks/benchmarks/readwrite.py +12 -48
- {anndata-0.12.3 → anndata-0.12.5}/benchmarks/benchmarks/sparse_dataset.py +54 -16
- {anndata-0.12.3 → anndata-0.12.5}/benchmarks/benchmarks/utils.py +21 -3
- anndata-0.12.5/docs/release-notes/0.12.4.md +4 -0
- anndata-0.12.5/docs/release-notes/0.12.5.md +12 -0
- anndata-0.12.5/docs/release-notes/2172.bug.md +1 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/tutorials/zarr-v3.md +2 -1
- {anndata-0.12.3 → anndata-0.12.5}/hatch.toml +7 -3
- {anndata-0.12.3 → anndata-0.12.5}/pyproject.toml +6 -1
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_core/aligned_df.py +7 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_core/index.py +136 -23
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_core/merge.py +8 -5
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_core/sparse_dataset.py +4 -3
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_io/specs/lazy_methods.py +6 -5
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_io/specs/methods.py +22 -28
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_settings.py +37 -12
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_settings.pyi +3 -2
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/experimental/backed/_lazy_arrays.py +7 -4
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/tests/helpers.py +22 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/conftest.py +7 -4
- {anndata-0.12.3 → anndata-0.12.5}/tests/lazy/test_concat.py +1 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_annot.py +24 -1
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_backed_hdf5.py +102 -9
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_concatenate_disk.py +20 -3
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_dask.py +16 -5
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_dask_view_mem.py +1 -1
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_inplace_subset.py +1 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_io_dispatched.py +7 -14
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_io_elementwise.py +72 -1
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_readwrite.py +6 -9
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_settings.py +1 -1
- anndata-0.12.3/benchmarks/benchmarks/dataset2d.py +0 -61
- {anndata-0.12.3 → anndata-0.12.5}/.cirun.yml +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/.codecov.yml +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/.editorconfig +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/.github/ISSUE_TEMPLATE/bug-report.yml +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/.github/ISSUE_TEMPLATE/enhancement-request.yml +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/.github/ISSUE_TEMPLATE/question.yml +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/.github/dependabot.yml +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/.github/workflows/benchmark.yml +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/.github/workflows/check-pr.yml +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/.github/workflows/close-stale.yml +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/.github/workflows/codespell.yml +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/.github/workflows/label-stale.yml +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/.github/workflows/publish.yml +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/.gitignore +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/.gitmodules +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/.pre-commit-config.yaml +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/.prettierignore +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/.prettierrc.yaml +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/.readthedocs.yml +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/.taplo.toml +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/.vscode/launch.json +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/.vscode/settings.json +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/LICENSE +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/README.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/benchmarks/README.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/benchmarks/asv.conf.json +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/benchmarks/benchmarks/__init__.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/benchmarks/benchmarks/anndata.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/biome.jsonc +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/ci/constraints.txt +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/ci/scripts/min-deps.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/ci/scripts/towncrier_automation.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/Makefile +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/_key_contributors.rst +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/_static/img/anndata_schema.svg +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/_templates/autosummary/class.rst +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/api.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/benchmark-read-write.ipynb +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/benchmarks.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/concatenation.rst +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/conf.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/contributing.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/extensions/autosummary_skip_inherited.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/extensions/no_skip_abc_members.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/extensions/patch_myst_cite.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/fileformat-prose.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/index.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/interoperability.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/news.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/references.rst +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.10.0.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.10.1.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.10.2.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.10.3.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.10.4.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.10.5.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.10.6.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.10.7.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.10.8.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.10.9.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.11.0.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.11.1.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.11.2.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.11.3.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.11.4.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.12.0.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.12.1.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.12.2.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.12.3.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.4.0.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.5.0.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.6.0.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.6.x.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.7.0.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.7.2.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.7.3.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.7.4.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.7.5.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.7.6.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.7.7.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.7.8.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.8.0.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.9.0.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.9.1.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/0.9.2.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/release-notes/index.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/docs/tutorials/index.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/__init__.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_core/__init__.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_core/access.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_core/aligned_mapping.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_core/anndata.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_core/extensions.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_core/file_backing.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_core/raw.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_core/storage.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_core/views.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_core/xarray.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_io/__init__.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_io/h5ad.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_io/read.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_io/specs/__init__.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_io/specs/registry.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_io/utils.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_io/write.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_io/zarr.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_types.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/_warnings.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/abc.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/compat/__init__.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/experimental/__init__.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/experimental/_dispatch_io.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/experimental/backed/__init__.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/experimental/backed/_compat.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/experimental/backed/_io.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/experimental/merge.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/experimental/multi_files/__init__.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/experimental/multi_files/_anncollection.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/experimental/pytorch/__init__.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/experimental/pytorch/_annloader.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/io.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/logging.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/tests/__init__.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/types.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/typing.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/anndata/utils.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/testing/anndata/__init__.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/testing/anndata/_doctest.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/testing/anndata/_pytest.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/src/testing/anndata/py.typed +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/data/adata-comments.tsv +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/data/adata.csv +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/data/archives/readme.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/data/archives/v0.11.4/adata.h5ad +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/data/archives/v0.11.4/adata.zarr.zip +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/data/archives/v0.11.4/readme.md +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/data/archives/v0.7.0/adata.h5ad +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/data/archives/v0.7.0/adata.zarr.zip +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/data/archives/v0.7.8/adata.h5ad +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/data/archives/v0.7.8/adata.zarr.zip +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/data/excel.xlsx +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/data/umi_tools.tsv.gz +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/lazy/conftest.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/lazy/test_read.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/lazy/test_write.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_anncollection.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_awkward.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_backed_dense.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_backed_sparse.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_base.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_concatenate.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_deprecations.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_extensions.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_get_vector.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_gpu.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_helpers.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_io_backwards_compat.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_io_conversion.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_io_partial.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_io_utils.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_io_warnings.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_layers.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_obsmvarm.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_obspvarp.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_raw.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_repr.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_structured_arrays.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_transpose.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_uns.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_utils.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_views.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_x.py +0 -0
- {anndata-0.12.3 → anndata-0.12.5}/tests/test_xarray.py +0 -0
|
@@ -43,7 +43,7 @@ jobs:
|
|
|
43
43
|
strategy:
|
|
44
44
|
matrix:
|
|
45
45
|
env: ${{ fromJSON(needs.get-environments.outputs.envs) }}
|
|
46
|
-
io_mark: ["zarr_io", "not zarr_io"]
|
|
46
|
+
io_mark: ["zarr_io", "not zarr_io", "dask_distributed"] # dask_distributed should not be run with -n auto as it uses a client with processes
|
|
47
47
|
env: # environment variables for use in codecov’s env_vars tagging
|
|
48
48
|
ENV_NAME: ${{ matrix.env.name }}
|
|
49
49
|
IO_MARK: ${{ matrix.io_mark }}
|
|
@@ -72,7 +72,7 @@ jobs:
|
|
|
72
72
|
env:
|
|
73
73
|
COVERAGE_PROCESS_START: ${{ github.workspace }}/pyproject.toml
|
|
74
74
|
run: |
|
|
75
|
-
hatch run ${{ matrix.env.name }}:run-cov -v --color=yes -n auto --junitxml=test-data/test-results.xml -m "${{ matrix.io_mark }}" ${{ matrix.env.args }}
|
|
75
|
+
hatch run ${{ matrix.env.name }}:run-cov -v --color=yes ${{ matrix.io_mark != 'dask_distributed' && '-n auto' || '' }} --junitxml=test-data/test-results.xml -m "${{ matrix.io_mark }}" ${{ matrix.env.args }}
|
|
76
76
|
hatch run ${{ matrix.env.name }}:cov-combine
|
|
77
77
|
hatch run ${{ matrix.env.name }}:coverage xml
|
|
78
78
|
|
|
@@ -63,7 +63,7 @@ jobs:
|
|
|
63
63
|
echo "max_python_version=$max_version" >> $GITHUB_ENV
|
|
64
64
|
|
|
65
65
|
- name: Install UV
|
|
66
|
-
uses: astral-sh/setup-uv@v6
|
|
66
|
+
uses: astral-sh/setup-uv@v6 # TODO: upgrade once cirun image supports node 24
|
|
67
67
|
with:
|
|
68
68
|
enable-cache: true
|
|
69
69
|
python-version: ${{ env.max_python_version }}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from scipy import sparse
|
|
6
|
+
|
|
7
|
+
import anndata as ad
|
|
8
|
+
|
|
9
|
+
file_paths = {"sparse": "adata_sparse.h5ad"}
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class BackedHDF5Indexing:
|
|
13
|
+
param_names = ("arr_type",)
|
|
14
|
+
params = ("sparse",)
|
|
15
|
+
|
|
16
|
+
def setup_cache(self):
|
|
17
|
+
X_sparse = sparse.random(
|
|
18
|
+
10000,
|
|
19
|
+
50000,
|
|
20
|
+
density=0.01,
|
|
21
|
+
format="csr",
|
|
22
|
+
random_state=np.random.default_rng(42),
|
|
23
|
+
)
|
|
24
|
+
for X, arr_type in [
|
|
25
|
+
(X_sparse, "sparse"),
|
|
26
|
+
]:
|
|
27
|
+
n_obs, n_var = X.shape
|
|
28
|
+
|
|
29
|
+
# Create obs and var dataframes
|
|
30
|
+
obs = pd.DataFrame(
|
|
31
|
+
{
|
|
32
|
+
"cell_type": pd.Categorical(
|
|
33
|
+
np.random.choice(["TypeA", "TypeB", "TypeC"], n_obs)
|
|
34
|
+
),
|
|
35
|
+
"total_counts": np.random.randint(1000, 5000, n_obs),
|
|
36
|
+
},
|
|
37
|
+
index=[f"cell_{i}" for i in range(n_obs)],
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
var = pd.DataFrame(
|
|
41
|
+
{
|
|
42
|
+
"gene_name": [f"gene_{i}" for i in range(n_var)],
|
|
43
|
+
},
|
|
44
|
+
index=[f"ENSG_{i:08d}" for i in range(n_var)],
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# Create AnnData object and save to HDF5
|
|
48
|
+
adata = ad.AnnData(X=X, obs=obs, var=var)
|
|
49
|
+
|
|
50
|
+
# Create temporary file
|
|
51
|
+
adata.write_h5ad(file_paths[arr_type])
|
|
52
|
+
|
|
53
|
+
def setup(self, arr_type):
|
|
54
|
+
# Open as backed
|
|
55
|
+
self.adata_backed = ad.read_h5ad(file_paths[arr_type], backed="r")
|
|
56
|
+
self.n_obs, self.n_var = self.adata_backed.shape
|
|
57
|
+
# Prepare indices for duplicate index testing
|
|
58
|
+
self.obs_idx_with_dupes = np.array([0, 1, 0, 2, 1] * (self.n_obs // 100 + 1))[
|
|
59
|
+
: (self.n_obs // 10)
|
|
60
|
+
]
|
|
61
|
+
self.var_idx_with_dupes = np.array([0, 1, 2, 0, 3] * (self.n_var // 100 + 1))[
|
|
62
|
+
: (self.n_var // 10)
|
|
63
|
+
]
|
|
64
|
+
self.obs_idx_no_dupes = np.arange(0, self.n_obs, 10)
|
|
65
|
+
self.var_idx_no_dupes = np.arange(0, self.n_var, 10)
|
|
66
|
+
|
|
67
|
+
def time_slice_obs(self, *_):
|
|
68
|
+
"""Time slicing observations from backed HDF5"""
|
|
69
|
+
self.adata_backed[0 : (self.n_obs // 2), :]
|
|
70
|
+
|
|
71
|
+
def time_slice_obs_to_memory(self, *_):
|
|
72
|
+
"""Time slicing observations from backed HDF5"""
|
|
73
|
+
self.adata_backed[0 : (self.n_obs // 2), :].to_memory()
|
|
74
|
+
|
|
75
|
+
def peakmem_slice_obs(self, *_):
|
|
76
|
+
"""Peak memory for slicing observations from backed HDF5"""
|
|
77
|
+
self.adata_backed[0 : (self.n_obs // 2), :]
|
|
78
|
+
|
|
79
|
+
def time_fancy_index_no_dupes(self, *_):
|
|
80
|
+
"""Time fancy indexing without duplicates"""
|
|
81
|
+
self.adata_backed[self.obs_idx_no_dupes, self.var_idx_no_dupes]
|
|
82
|
+
|
|
83
|
+
def peakmem_fancy_index_no_dupes(self, *_):
|
|
84
|
+
"""Peak memory for fancy indexing without duplicates"""
|
|
85
|
+
self.adata_backed[self.obs_idx_no_dupes, self.var_idx_no_dupes]
|
|
86
|
+
|
|
87
|
+
def time_fancy_index_no_dupes_to_memory(self, *_):
|
|
88
|
+
"""Time fancy indexing without duplicates"""
|
|
89
|
+
self.adata_backed[self.obs_idx_no_dupes, self.var_idx_no_dupes].to_memory()
|
|
90
|
+
|
|
91
|
+
def time_index_with_dupes_obs(self, *_):
|
|
92
|
+
"""Time fancy indexing with duplicate observation indices"""
|
|
93
|
+
self.adata_backed[self.obs_idx_with_dupes, :]
|
|
94
|
+
|
|
95
|
+
def peakmem_index_with_dupes_obs(self, *_):
|
|
96
|
+
"""Peak memory for fancy indexing with duplicate observation indices"""
|
|
97
|
+
self.adata_backed[self.obs_idx_with_dupes, :]
|
|
98
|
+
|
|
99
|
+
def time_to_memory_subset(self, *_):
|
|
100
|
+
"""Time converting subset to memory"""
|
|
101
|
+
subset = self.adata_backed[0 : (self.n_obs // 4), 0 : (self.n_var // 4)]
|
|
102
|
+
subset.to_memory()
|
|
103
|
+
|
|
104
|
+
def peakmem_to_memory_subset(self, *_):
|
|
105
|
+
"""Peak memory for converting subset to memory"""
|
|
106
|
+
subset = self.adata_backed[0 : (self.n_obs // 4), 0 : (self.n_var // 4)]
|
|
107
|
+
subset.to_memory()
|
|
108
|
+
|
|
109
|
+
def teardown(self, *_):
|
|
110
|
+
"""Clean up temporary files"""
|
|
111
|
+
if hasattr(self, "adata_backed"):
|
|
112
|
+
self.adata_backed.file.close()
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
import h5py
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
import zarr
|
|
9
|
+
|
|
10
|
+
import anndata as ad
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from typing import Literal
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Dataset2D:
|
|
17
|
+
param_names = ("store_type", "chunks", "array_type")
|
|
18
|
+
params = (
|
|
19
|
+
("zarr", "h5ad"),
|
|
20
|
+
((-1,), None),
|
|
21
|
+
("cat", "numeric", "string-array", "nullable-string-array"),
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
def setup_cache(self):
|
|
25
|
+
n_obs = 10000
|
|
26
|
+
array_types = {
|
|
27
|
+
"numeric": np.arange(n_obs),
|
|
28
|
+
"string-array": np.array(["a"] * n_obs),
|
|
29
|
+
"nullable-string-array": pd.array(
|
|
30
|
+
["a", pd.NA] * (n_obs // 2), dtype="string"
|
|
31
|
+
),
|
|
32
|
+
"cat": pd.Categorical(np.array(["a"] * n_obs)),
|
|
33
|
+
}
|
|
34
|
+
for k, v in array_types.items():
|
|
35
|
+
for store in [
|
|
36
|
+
h5py.File(f"data_{k}.h5ad", mode="w"),
|
|
37
|
+
zarr.open(f"data_{k}.zarr", mode="w", zarr_version=2),
|
|
38
|
+
]:
|
|
39
|
+
df = pd.DataFrame({"a": v}, index=[f"cell{i}" for i in range(n_obs)])
|
|
40
|
+
if writing_string_array_on_disk := (
|
|
41
|
+
isinstance(v, np.ndarray) and df["a"].dtype == "string"
|
|
42
|
+
):
|
|
43
|
+
df["a"] = df["a"].to_numpy()
|
|
44
|
+
with ad.settings.override(allow_write_nullable_strings=True):
|
|
45
|
+
ad.io.write_elem(store, "df", df)
|
|
46
|
+
if writing_string_array_on_disk:
|
|
47
|
+
assert store["df"]["a"].attrs["encoding-type"] == "string-array"
|
|
48
|
+
|
|
49
|
+
def setup(
|
|
50
|
+
self,
|
|
51
|
+
store_type: Literal["zarr", "h5ad"],
|
|
52
|
+
chunks: None | tuple[int],
|
|
53
|
+
array_type: Literal["cat", "numeric", "string-array", "nullable-string-array"],
|
|
54
|
+
):
|
|
55
|
+
self.store = (
|
|
56
|
+
h5py.File(f"data_{array_type}.h5ad", mode="r")
|
|
57
|
+
if store_type == "h5ad"
|
|
58
|
+
else zarr.open(f"data_{array_type}.zarr")
|
|
59
|
+
)
|
|
60
|
+
self.ds = ad.experimental.read_elem_lazy(self.store["df"], chunks=chunks)
|
|
61
|
+
self.n_obs = self.ds.shape[0]
|
|
62
|
+
|
|
63
|
+
def time_read_lazy_default(self, *_):
|
|
64
|
+
ad.experimental.read_elem_lazy(self.store["df"])
|
|
65
|
+
|
|
66
|
+
def peakmem_read_lazy_default(self, *_):
|
|
67
|
+
ad.experimental.read_elem_lazy(self.store["df"])
|
|
68
|
+
|
|
69
|
+
def time_getitem_slice(self, *_):
|
|
70
|
+
self.ds.iloc[0 : (self.n_obs // 2)].to_memory()
|
|
71
|
+
|
|
72
|
+
def peakmem_getitem_slice(self, *_):
|
|
73
|
+
self.ds.iloc[0 : (self.n_obs // 2)].to_memory()
|
|
74
|
+
|
|
75
|
+
def time_full_to_memory(self, *_):
|
|
76
|
+
self.ds.to_memory()
|
|
77
|
+
|
|
78
|
+
def peakmem_full_to_memory(self, *_):
|
|
79
|
+
self.ds.to_memory()
|
|
80
|
+
|
|
81
|
+
def time_getitem_bool_mask(self, *_):
|
|
82
|
+
self.ds.iloc[np.random.randint(0, self.n_obs, self.n_obs // 2)].to_memory()
|
|
83
|
+
|
|
84
|
+
def peakmem_getitem_bool_mask(self, *_):
|
|
85
|
+
self.ds.iloc[np.random.randint(0, self.n_obs, self.n_obs // 2)].to_memory()
|
|
86
|
+
|
|
87
|
+
def time_concat(self, *_):
|
|
88
|
+
adatas = [ad.AnnData(obs=self.ds)] * 50
|
|
89
|
+
ad.concat(adatas, join="outer")
|
|
@@ -38,52 +38,15 @@ from .utils import get_actualsize, get_peak_mem, sedate
|
|
|
38
38
|
|
|
39
39
|
PBMC_3K_URL = "https://falexwolf.de/data/pbmc3k_raw.h5ad"
|
|
40
40
|
|
|
41
|
-
# PBMC_3K_PATH = Path(__file__).parent / "data/pbmc3k_raw.h5ad"
|
|
42
|
-
# PBMC_REDUCED_PATH = Path(__file__).parent / "10x_pbmc68k_reduced.h5ad"
|
|
43
|
-
# BM_43K_CSR_PATH = Path(__file__).parent.parent / "datasets/BM2_43k-cells.h5ad"
|
|
44
|
-
# BM_43K_CSC_PATH = Path(__file__).parent.parent / "datasets/BM2_43k-cells_CSC.h5ad"
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
# class ZarrReadSuite:
|
|
48
|
-
# params = []
|
|
49
|
-
# param_names = ["input_url"]
|
|
50
|
-
|
|
51
|
-
# def setup(self, input_url):
|
|
52
|
-
# self.filepath = pooch.retrieve(url=input_url, known_hash=None)
|
|
53
|
-
|
|
54
|
-
# def time_read_full(self, input_url):
|
|
55
|
-
# anndata.read_zarr(self.filepath)
|
|
56
|
-
|
|
57
|
-
# def peakmem_read_full(self, input_url):
|
|
58
|
-
# anndata.read_zarr(self.filepath)
|
|
59
|
-
|
|
60
|
-
# def mem_readfull_object(self, input_url):
|
|
61
|
-
# return anndata.read_zarr(self.filepath)
|
|
62
|
-
|
|
63
|
-
# def track_read_full_memratio(self, input_url):
|
|
64
|
-
# mem_recording = memory_usage(
|
|
65
|
-
# (sedate(anndata.read_zarr, 0.005), (self.filepath,)), interval=0.001
|
|
66
|
-
# )
|
|
67
|
-
# adata = anndata.read_zarr(self.filepath)
|
|
68
|
-
# base_size = mem_recording[-1] - mem_recording[0]
|
|
69
|
-
# print(np.max(mem_recording) - np.min(mem_recording))
|
|
70
|
-
# print(base_size)
|
|
71
|
-
# return (np.max(mem_recording) - np.min(mem_recording)) / base_size
|
|
72
|
-
|
|
73
|
-
# def peakmem_read_backed(self, input_url):
|
|
74
|
-
# anndata.read_zarr(self.filepath, backed="r")
|
|
75
|
-
|
|
76
|
-
# def mem_read_backed_object(self, input_url):
|
|
77
|
-
# return anndata.read_zarr(self.filepath, backed="r")
|
|
78
|
-
|
|
79
41
|
|
|
80
42
|
class H5ADInMemorySizeSuite:
|
|
81
|
-
|
|
82
|
-
params = _urls.keys()
|
|
83
|
-
param_names = ("input_data",)
|
|
43
|
+
filepath = "pbmc_in_mem.h5ad"
|
|
84
44
|
|
|
85
|
-
def
|
|
86
|
-
|
|
45
|
+
def setup_cache(self):
|
|
46
|
+
# Need to specify path because the working directory is special for asv
|
|
47
|
+
pooch.retrieve(
|
|
48
|
+
url=PBMC_3K_URL, known_hash=None, path=Path.cwd(), fname=self.filepath
|
|
49
|
+
)
|
|
87
50
|
|
|
88
51
|
def track_in_memory_size(self, *_):
|
|
89
52
|
adata = anndata.read_h5ad(self.filepath)
|
|
@@ -99,12 +62,13 @@ class H5ADInMemorySizeSuite:
|
|
|
99
62
|
|
|
100
63
|
|
|
101
64
|
class H5ADReadSuite:
|
|
102
|
-
|
|
103
|
-
params = _urls.keys()
|
|
104
|
-
param_names = ("input_data",)
|
|
65
|
+
filepath = "pbmc_read.h5ad"
|
|
105
66
|
|
|
106
|
-
def
|
|
107
|
-
|
|
67
|
+
def setup_cache(self):
|
|
68
|
+
# Need to specify path because the working directory is special for asv
|
|
69
|
+
pooch.retrieve(
|
|
70
|
+
url=PBMC_3K_URL, known_hash=None, path=Path.cwd(), fname=self.filepath
|
|
71
|
+
)
|
|
108
72
|
|
|
109
73
|
def time_read_full(self, *_):
|
|
110
74
|
anndata.read_h5ad(self.filepath)
|
|
@@ -7,7 +7,7 @@ import zarr
|
|
|
7
7
|
from dask.array.core import Array as DaskArray
|
|
8
8
|
from scipy import sparse
|
|
9
9
|
|
|
10
|
-
from anndata import AnnData
|
|
10
|
+
from anndata import AnnData, concat
|
|
11
11
|
from anndata._core.sparse_dataset import sparse_dataset
|
|
12
12
|
from anndata._io.specs import write_elem
|
|
13
13
|
from anndata.experimental import read_elem_lazy
|
|
@@ -21,7 +21,7 @@ def make_alternating_mask(n):
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class SparseCSRContiguousSlice:
|
|
24
|
-
|
|
24
|
+
_indexers = MappingProxyType({
|
|
25
25
|
"0:1000": slice(0, 1000),
|
|
26
26
|
"0:9000": slice(0, 9000),
|
|
27
27
|
":9000:-1": slice(None, 9000, -1),
|
|
@@ -31,42 +31,80 @@ class SparseCSRContiguousSlice:
|
|
|
31
31
|
"first": 0,
|
|
32
32
|
"alternating": make_alternating_mask(10),
|
|
33
33
|
})
|
|
34
|
+
filepath = "data.zarr"
|
|
34
35
|
params = (
|
|
35
|
-
|
|
36
|
-
(10_000, 10_000),
|
|
37
|
-
# (10_000, 500)
|
|
38
|
-
],
|
|
39
|
-
_slices.keys(),
|
|
36
|
+
list(_indexers.keys()),
|
|
40
37
|
[True, False],
|
|
41
38
|
)
|
|
42
|
-
param_names = (
|
|
39
|
+
param_names = (
|
|
40
|
+
"index",
|
|
41
|
+
"use_dask",
|
|
42
|
+
)
|
|
43
43
|
|
|
44
|
-
def
|
|
44
|
+
def setup_cache(self):
|
|
45
45
|
X = sparse.random(
|
|
46
|
-
|
|
46
|
+
10_000,
|
|
47
|
+
10_000,
|
|
48
|
+
density=0.01,
|
|
49
|
+
format="csr",
|
|
50
|
+
random_state=np.random.default_rng(42),
|
|
47
51
|
)
|
|
48
|
-
|
|
49
|
-
g = zarr.group()
|
|
52
|
+
g = zarr.group(self.filepath)
|
|
50
53
|
write_elem(g, "X", X)
|
|
54
|
+
|
|
55
|
+
def setup(self, index: str, use_dask: bool): # noqa: FBT001
|
|
56
|
+
g = zarr.open(self.filepath)
|
|
51
57
|
self.x = read_elem_lazy(g["X"]) if use_dask else sparse_dataset(g["X"])
|
|
52
58
|
self.adata = AnnData(self.x)
|
|
59
|
+
self.index = self._indexers[index]
|
|
53
60
|
|
|
54
61
|
def time_getitem(self, *_):
|
|
55
|
-
res = self.x[self.
|
|
62
|
+
res = self.x[self.index]
|
|
56
63
|
if isinstance(res, DaskArray):
|
|
57
64
|
res.compute()
|
|
58
65
|
|
|
59
66
|
def peakmem_getitem(self, *_):
|
|
60
|
-
res = self.x[self.
|
|
67
|
+
res = self.x[self.index]
|
|
61
68
|
if isinstance(res, DaskArray):
|
|
62
69
|
res.compute()
|
|
63
70
|
|
|
64
71
|
def time_getitem_adata(self, *_):
|
|
65
|
-
res = self.adata[self.
|
|
72
|
+
res = self.adata[self.index]
|
|
66
73
|
if isinstance(res, DaskArray):
|
|
67
74
|
res.compute()
|
|
68
75
|
|
|
69
76
|
def peakmem_getitem_adata(self, *_):
|
|
70
|
-
res = self.adata[self.
|
|
77
|
+
res = self.adata[self.index]
|
|
71
78
|
if isinstance(res, DaskArray):
|
|
72
79
|
res.compute()
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class SparseCSRDask:
|
|
83
|
+
filepath = "data.zarr"
|
|
84
|
+
|
|
85
|
+
def setup_cache(self):
|
|
86
|
+
X = sparse.random(
|
|
87
|
+
10_000,
|
|
88
|
+
10_000,
|
|
89
|
+
density=0.01,
|
|
90
|
+
format="csr",
|
|
91
|
+
random_state=np.random.default_rng(42),
|
|
92
|
+
)
|
|
93
|
+
g = zarr.group(self.filepath)
|
|
94
|
+
write_elem(g, "X", X)
|
|
95
|
+
|
|
96
|
+
def setup(self):
|
|
97
|
+
self.group = zarr.group(self.filepath)
|
|
98
|
+
self.adata = AnnData(X=read_elem_lazy(self.group["X"]))
|
|
99
|
+
|
|
100
|
+
def time_concat(self):
|
|
101
|
+
concat([self.adata for i in range(100)])
|
|
102
|
+
|
|
103
|
+
def peakmem_concat(self):
|
|
104
|
+
concat([self.adata for i in range(100)])
|
|
105
|
+
|
|
106
|
+
def time_read(self):
|
|
107
|
+
AnnData(X=read_elem_lazy(self.group["X"]))
|
|
108
|
+
|
|
109
|
+
def peakmem_read(self):
|
|
110
|
+
AnnData(X=read_elem_lazy(self.group["X"]))
|
|
@@ -95,13 +95,31 @@ def gen_indexer(adata, dim, index_kind, ratio):
|
|
|
95
95
|
|
|
96
96
|
def gen_adata(n_obs, n_var, attr_set):
|
|
97
97
|
if "X-csr" in attr_set:
|
|
98
|
-
X = sparse.random(
|
|
98
|
+
X = sparse.random(
|
|
99
|
+
n_obs,
|
|
100
|
+
n_var,
|
|
101
|
+
density=0.1,
|
|
102
|
+
format="csr",
|
|
103
|
+
random_state=np.random.default_rng(42),
|
|
104
|
+
)
|
|
99
105
|
elif "X-dense" in attr_set:
|
|
100
|
-
X = sparse.random(
|
|
106
|
+
X = sparse.random(
|
|
107
|
+
n_obs,
|
|
108
|
+
n_var,
|
|
109
|
+
density=0.1,
|
|
110
|
+
format="csr",
|
|
111
|
+
random_state=np.random.default_rng(42),
|
|
112
|
+
)
|
|
101
113
|
X = X.toarray()
|
|
102
114
|
else:
|
|
103
115
|
# TODO: There's probably a better way to do this
|
|
104
|
-
X = sparse.random(
|
|
116
|
+
X = sparse.random(
|
|
117
|
+
n_obs,
|
|
118
|
+
n_var,
|
|
119
|
+
density=0,
|
|
120
|
+
format="csr",
|
|
121
|
+
random_state=np.random.default_rng(42),
|
|
122
|
+
)
|
|
105
123
|
adata = AnnData(X)
|
|
106
124
|
if "obs,var" in attr_set:
|
|
107
125
|
adata.obs = pd.DataFrame(
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
(v0.12.5)=
|
|
2
|
+
### 0.12.5 {small}`2025-11-03`
|
|
3
|
+
|
|
4
|
+
#### Bug fixes
|
|
5
|
+
|
|
6
|
+
- Remove use of private `read_dataset` internally inside {func}`anndata.experimental.read_elem_lazy` {user}`ilan-gold` ({pr}`2158`)
|
|
7
|
+
- Unblock version restriction on `dask` distributed writing by using threading scheduler always (see {pr}`2172`) {user}`ilan-gold` ({pr}`2183`)
|
|
8
|
+
|
|
9
|
+
#### Performance
|
|
10
|
+
|
|
11
|
+
- Use `name` on {func}`dask.array.map_blocks` internally when concatenating {class}`anndata.experimental.backed.Dataset2D` objects whose categoricals/nullable types must be converted to dask arrays {user}`ilan-gold` ({pr}`2121`)
|
|
12
|
+
- Enable automatic sharding in zarr v3 via {attr}`anndata.settings.auto_shard_zarr_v3` (via {mod}`zarr`'s own auto sharding mechanism i.e., `shards="auto"`) for all types except {class}`numpy.recarray` {user}`ilan-gold` ({pr}`2167`)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{func}`dask.array.store` was producing corrupted data with zarr v3 + distributed scheduler + a lock (which we used internally): see {ref}`dask/dask#12109`. Thus dense arrays were potentially being stored with corrupted data. The solution is to remove the lock for newer versions of dask but without the lock in older versions, it is impossible to store the data. Thus versions of dask older than `2025.4.0` will not be supported for writing dense data. {user}`ilan-gold`
|
|
@@ -38,7 +38,8 @@ There are two ways of opening remote `zarr` stores from the `zarr-python` packag
|
|
|
38
38
|
Local data generally poses a different set of challenges.
|
|
39
39
|
First, write speeds can be somewhat slow and second, the creation of many small files on a file system can slow down a filesystem.
|
|
40
40
|
For the "many small files" problem, `zarr` has introduced {ref}`sharding <zarr:user-guide-sharding>` in the v3 file format.
|
|
41
|
-
|
|
41
|
+
We offer {attr}`anndata.settings.auto_shard_zarr_v3` to hook into zarr's ability to automatically compute shards, which is experimental at the moment.
|
|
42
|
+
Manual sharding requires knowledge of the array element you are writing (such as shape or data type), though, and therefore you will need to use {func}`anndata.experimental.write_dispatched` to use custom sharding.
|
|
42
43
|
For example, you cannot shard a 1D array with `shard` sizes `(256, 256)`.
|
|
43
44
|
Here is a short example, although you should tune the sizes to your own use-case and also use the compression that makes the most sense for you:
|
|
44
45
|
|
|
@@ -24,9 +24,13 @@ overrides.matrix.deps.env-vars = [
|
|
|
24
24
|
{ if = [ "min" ], key = "UV_CONSTRAINT", value = "ci/constraints.txt ci/min-deps.txt" },
|
|
25
25
|
]
|
|
26
26
|
overrides.matrix.deps.pre-install-commands = [
|
|
27
|
-
{ if = [
|
|
28
|
-
|
|
29
|
-
|
|
27
|
+
{ if = [
|
|
28
|
+
"min",
|
|
29
|
+
], value = "uv run ci/scripts/min-deps.py pyproject.toml --all-extras -o ci/min-deps.txt" },
|
|
30
|
+
# To prevent situations like https://github.com/pydata/xarray/issues/10419 going forward, and test against zarr as well
|
|
31
|
+
{ if = [
|
|
32
|
+
"pre",
|
|
33
|
+
], value = "echo 'xarray @ git+https://github.com/pydata/xarray.git\nzarr @ git+https://github.com/zarr-developers/zarr-python.git' > ci/pre-deps.txt" },
|
|
30
34
|
|
|
31
35
|
]
|
|
32
36
|
overrides.matrix.deps.python = [
|
|
@@ -164,6 +164,7 @@ filterwarnings_when_strict = [
|
|
|
164
164
|
"default:Consolidated metadata is:UserWarning",
|
|
165
165
|
"default:.*Structured:zarr.core.dtype.common.UnstableSpecificationWarning",
|
|
166
166
|
"default:.*FixedLengthUTF32:zarr.core.dtype.common.UnstableSpecificationWarning",
|
|
167
|
+
"default:Automatic shard shape inference is experimental",
|
|
167
168
|
]
|
|
168
169
|
python_files = "test_*.py"
|
|
169
170
|
testpaths = [
|
|
@@ -174,7 +175,11 @@ testpaths = [
|
|
|
174
175
|
]
|
|
175
176
|
# For some reason this effects how logging is shown when tests are run
|
|
176
177
|
xfail_strict = true
|
|
177
|
-
markers = [
|
|
178
|
+
markers = [
|
|
179
|
+
"gpu: mark test to run on GPU",
|
|
180
|
+
"zarr_io: mark tests that involve zarr io",
|
|
181
|
+
"dask_distributed: tests that need a distributed client with multiple processes",
|
|
182
|
+
]
|
|
178
183
|
|
|
179
184
|
[tool.ruff]
|
|
180
185
|
src = [ "src" ]
|
|
@@ -78,6 +78,13 @@ def _gen_dataframe_df(
|
|
|
78
78
|
attr: Literal["obs", "var"],
|
|
79
79
|
length: int | None = None,
|
|
80
80
|
):
|
|
81
|
+
if isinstance(anno.index, pd.MultiIndex):
|
|
82
|
+
msg = (
|
|
83
|
+
"pandas.MultiIndex not supported as index for obs or var on declaration.\n\
|
|
84
|
+
You can set `obs_names` manually although most operations after will error or convert to str.\n\
|
|
85
|
+
This behavior will likely be clarified in a future breaking release."
|
|
86
|
+
)
|
|
87
|
+
raise ValueError(msg)
|
|
81
88
|
if length is not None and length != len(anno):
|
|
82
89
|
raise _mk_df_error(source, attr, length, len(anno))
|
|
83
90
|
anno = anno.copy(deep=False)
|