anndata 0.12.4__tar.gz → 0.12.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. {anndata-0.12.4 → anndata-0.12.5}/PKG-INFO +1 -1
  2. anndata-0.12.5/benchmarks/benchmarks/dataset2d.py +89 -0
  3. {anndata-0.12.4 → anndata-0.12.5}/benchmarks/benchmarks/sparse_dataset.py +32 -1
  4. anndata-0.12.5/docs/release-notes/0.12.5.md +12 -0
  5. {anndata-0.12.4 → anndata-0.12.5}/docs/tutorials/zarr-v3.md +2 -1
  6. {anndata-0.12.4 → anndata-0.12.5}/pyproject.toml +1 -0
  7. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_core/merge.py +2 -0
  8. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_io/specs/lazy_methods.py +6 -5
  9. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_io/specs/methods.py +15 -12
  10. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_settings.py +37 -12
  11. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_settings.pyi +3 -2
  12. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/experimental/backed/_lazy_arrays.py +2 -2
  13. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/tests/helpers.py +22 -0
  14. {anndata-0.12.4 → anndata-0.12.5}/tests/test_concatenate_disk.py +20 -3
  15. {anndata-0.12.4 → anndata-0.12.5}/tests/test_dask.py +13 -14
  16. {anndata-0.12.4 → anndata-0.12.5}/tests/test_dask_view_mem.py +1 -1
  17. {anndata-0.12.4 → anndata-0.12.5}/tests/test_io_dispatched.py +7 -14
  18. {anndata-0.12.4 → anndata-0.12.5}/tests/test_io_elementwise.py +68 -0
  19. {anndata-0.12.4 → anndata-0.12.5}/tests/test_settings.py +1 -1
  20. anndata-0.12.4/benchmarks/benchmarks/dataset2d.py +0 -63
  21. {anndata-0.12.4 → anndata-0.12.5}/.cirun.yml +0 -0
  22. {anndata-0.12.4 → anndata-0.12.5}/.codecov.yml +0 -0
  23. {anndata-0.12.4 → anndata-0.12.5}/.editorconfig +0 -0
  24. {anndata-0.12.4 → anndata-0.12.5}/.github/ISSUE_TEMPLATE/bug-report.yml +0 -0
  25. {anndata-0.12.4 → anndata-0.12.5}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  26. {anndata-0.12.4 → anndata-0.12.5}/.github/ISSUE_TEMPLATE/enhancement-request.yml +0 -0
  27. {anndata-0.12.4 → anndata-0.12.5}/.github/ISSUE_TEMPLATE/question.yml +0 -0
  28. {anndata-0.12.4 → anndata-0.12.5}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  29. {anndata-0.12.4 → anndata-0.12.5}/.github/dependabot.yml +0 -0
  30. {anndata-0.12.4 → anndata-0.12.5}/.github/workflows/benchmark.yml +0 -0
  31. {anndata-0.12.4 → anndata-0.12.5}/.github/workflows/check-pr.yml +0 -0
  32. {anndata-0.12.4 → anndata-0.12.5}/.github/workflows/close-stale.yml +0 -0
  33. {anndata-0.12.4 → anndata-0.12.5}/.github/workflows/codespell.yml +0 -0
  34. {anndata-0.12.4 → anndata-0.12.5}/.github/workflows/label-stale.yml +0 -0
  35. {anndata-0.12.4 → anndata-0.12.5}/.github/workflows/publish.yml +0 -0
  36. {anndata-0.12.4 → anndata-0.12.5}/.github/workflows/test-cpu.yml +0 -0
  37. {anndata-0.12.4 → anndata-0.12.5}/.github/workflows/test-gpu.yml +0 -0
  38. {anndata-0.12.4 → anndata-0.12.5}/.gitignore +0 -0
  39. {anndata-0.12.4 → anndata-0.12.5}/.gitmodules +0 -0
  40. {anndata-0.12.4 → anndata-0.12.5}/.pre-commit-config.yaml +0 -0
  41. {anndata-0.12.4 → anndata-0.12.5}/.prettierignore +0 -0
  42. {anndata-0.12.4 → anndata-0.12.5}/.prettierrc.yaml +0 -0
  43. {anndata-0.12.4 → anndata-0.12.5}/.readthedocs.yml +0 -0
  44. {anndata-0.12.4 → anndata-0.12.5}/.taplo.toml +0 -0
  45. {anndata-0.12.4 → anndata-0.12.5}/.vscode/launch.json +0 -0
  46. {anndata-0.12.4 → anndata-0.12.5}/.vscode/settings.json +0 -0
  47. {anndata-0.12.4 → anndata-0.12.5}/LICENSE +0 -0
  48. {anndata-0.12.4 → anndata-0.12.5}/README.md +0 -0
  49. {anndata-0.12.4 → anndata-0.12.5}/benchmarks/README.md +0 -0
  50. {anndata-0.12.4 → anndata-0.12.5}/benchmarks/asv.conf.json +0 -0
  51. {anndata-0.12.4 → anndata-0.12.5}/benchmarks/benchmarks/__init__.py +0 -0
  52. {anndata-0.12.4 → anndata-0.12.5}/benchmarks/benchmarks/anndata.py +0 -0
  53. {anndata-0.12.4 → anndata-0.12.5}/benchmarks/benchmarks/backed_hdf5.py +0 -0
  54. {anndata-0.12.4 → anndata-0.12.5}/benchmarks/benchmarks/readwrite.py +0 -0
  55. {anndata-0.12.4 → anndata-0.12.5}/benchmarks/benchmarks/utils.py +0 -0
  56. {anndata-0.12.4 → anndata-0.12.5}/biome.jsonc +0 -0
  57. {anndata-0.12.4 → anndata-0.12.5}/ci/constraints.txt +0 -0
  58. {anndata-0.12.4 → anndata-0.12.5}/ci/scripts/min-deps.py +0 -0
  59. {anndata-0.12.4 → anndata-0.12.5}/ci/scripts/towncrier_automation.py +0 -0
  60. {anndata-0.12.4 → anndata-0.12.5}/docs/Makefile +0 -0
  61. {anndata-0.12.4 → anndata-0.12.5}/docs/_key_contributors.rst +0 -0
  62. {anndata-0.12.4 → anndata-0.12.5}/docs/_static/img/anndata_schema.svg +0 -0
  63. {anndata-0.12.4 → anndata-0.12.5}/docs/_templates/autosummary/class.rst +0 -0
  64. {anndata-0.12.4 → anndata-0.12.5}/docs/api.md +0 -0
  65. {anndata-0.12.4 → anndata-0.12.5}/docs/benchmark-read-write.ipynb +0 -0
  66. {anndata-0.12.4 → anndata-0.12.5}/docs/benchmarks.md +0 -0
  67. {anndata-0.12.4 → anndata-0.12.5}/docs/concatenation.rst +0 -0
  68. {anndata-0.12.4 → anndata-0.12.5}/docs/conf.py +0 -0
  69. {anndata-0.12.4 → anndata-0.12.5}/docs/contributing.md +0 -0
  70. {anndata-0.12.4 → anndata-0.12.5}/docs/extensions/autosummary_skip_inherited.py +0 -0
  71. {anndata-0.12.4 → anndata-0.12.5}/docs/extensions/no_skip_abc_members.py +0 -0
  72. {anndata-0.12.4 → anndata-0.12.5}/docs/extensions/patch_myst_cite.py +0 -0
  73. {anndata-0.12.4 → anndata-0.12.5}/docs/fileformat-prose.md +0 -0
  74. {anndata-0.12.4 → anndata-0.12.5}/docs/index.md +0 -0
  75. {anndata-0.12.4 → anndata-0.12.5}/docs/interoperability.md +0 -0
  76. {anndata-0.12.4 → anndata-0.12.5}/docs/news.md +0 -0
  77. {anndata-0.12.4 → anndata-0.12.5}/docs/references.rst +0 -0
  78. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.10.0.md +0 -0
  79. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.10.1.md +0 -0
  80. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.10.2.md +0 -0
  81. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.10.3.md +0 -0
  82. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.10.4.md +0 -0
  83. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.10.5.md +0 -0
  84. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.10.6.md +0 -0
  85. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.10.7.md +0 -0
  86. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.10.8.md +0 -0
  87. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.10.9.md +0 -0
  88. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.11.0.md +0 -0
  89. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.11.1.md +0 -0
  90. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.11.2.md +0 -0
  91. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.11.3.md +0 -0
  92. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.11.4.md +0 -0
  93. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.12.0.md +0 -0
  94. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.12.1.md +0 -0
  95. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.12.2.md +0 -0
  96. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.12.3.md +0 -0
  97. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.12.4.md +0 -0
  98. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.4.0.md +0 -0
  99. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.5.0.md +0 -0
  100. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.6.0.md +0 -0
  101. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.6.x.md +0 -0
  102. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.7.0.md +0 -0
  103. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.7.2.md +0 -0
  104. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.7.3.md +0 -0
  105. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.7.4.md +0 -0
  106. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.7.5.md +0 -0
  107. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.7.6.md +0 -0
  108. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.7.7.md +0 -0
  109. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.7.8.md +0 -0
  110. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.8.0.md +0 -0
  111. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.9.0.md +0 -0
  112. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.9.1.md +0 -0
  113. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/0.9.2.md +0 -0
  114. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/2172.bug.md +0 -0
  115. {anndata-0.12.4 → anndata-0.12.5}/docs/release-notes/index.md +0 -0
  116. {anndata-0.12.4 → anndata-0.12.5}/docs/tutorials/index.md +0 -0
  117. {anndata-0.12.4 → anndata-0.12.5}/hatch.toml +0 -0
  118. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/__init__.py +0 -0
  119. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_core/__init__.py +0 -0
  120. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_core/access.py +0 -0
  121. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_core/aligned_df.py +0 -0
  122. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_core/aligned_mapping.py +0 -0
  123. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_core/anndata.py +0 -0
  124. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_core/extensions.py +0 -0
  125. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_core/file_backing.py +0 -0
  126. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_core/index.py +0 -0
  127. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_core/raw.py +0 -0
  128. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_core/sparse_dataset.py +0 -0
  129. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_core/storage.py +0 -0
  130. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_core/views.py +0 -0
  131. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_core/xarray.py +0 -0
  132. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_io/__init__.py +0 -0
  133. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_io/h5ad.py +0 -0
  134. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_io/read.py +0 -0
  135. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_io/specs/__init__.py +0 -0
  136. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_io/specs/registry.py +0 -0
  137. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_io/utils.py +0 -0
  138. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_io/write.py +0 -0
  139. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_io/zarr.py +0 -0
  140. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_types.py +0 -0
  141. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/_warnings.py +0 -0
  142. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/abc.py +0 -0
  143. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/compat/__init__.py +0 -0
  144. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/experimental/__init__.py +0 -0
  145. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/experimental/_dispatch_io.py +0 -0
  146. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/experimental/backed/__init__.py +0 -0
  147. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/experimental/backed/_compat.py +0 -0
  148. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/experimental/backed/_io.py +0 -0
  149. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/experimental/merge.py +0 -0
  150. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/experimental/multi_files/__init__.py +0 -0
  151. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/experimental/multi_files/_anncollection.py +0 -0
  152. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/experimental/pytorch/__init__.py +0 -0
  153. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/experimental/pytorch/_annloader.py +0 -0
  154. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/io.py +0 -0
  155. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/logging.py +0 -0
  156. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/tests/__init__.py +0 -0
  157. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/types.py +0 -0
  158. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/typing.py +0 -0
  159. {anndata-0.12.4 → anndata-0.12.5}/src/anndata/utils.py +0 -0
  160. {anndata-0.12.4 → anndata-0.12.5}/src/testing/anndata/__init__.py +0 -0
  161. {anndata-0.12.4 → anndata-0.12.5}/src/testing/anndata/_doctest.py +0 -0
  162. {anndata-0.12.4 → anndata-0.12.5}/src/testing/anndata/_pytest.py +0 -0
  163. {anndata-0.12.4 → anndata-0.12.5}/src/testing/anndata/py.typed +0 -0
  164. {anndata-0.12.4 → anndata-0.12.5}/tests/conftest.py +0 -0
  165. {anndata-0.12.4 → anndata-0.12.5}/tests/data/adata-comments.tsv +0 -0
  166. {anndata-0.12.4 → anndata-0.12.5}/tests/data/adata.csv +0 -0
  167. {anndata-0.12.4 → anndata-0.12.5}/tests/data/archives/readme.md +0 -0
  168. {anndata-0.12.4 → anndata-0.12.5}/tests/data/archives/v0.11.4/adata.h5ad +0 -0
  169. {anndata-0.12.4 → anndata-0.12.5}/tests/data/archives/v0.11.4/adata.zarr.zip +0 -0
  170. {anndata-0.12.4 → anndata-0.12.5}/tests/data/archives/v0.11.4/readme.md +0 -0
  171. {anndata-0.12.4 → anndata-0.12.5}/tests/data/archives/v0.7.0/adata.h5ad +0 -0
  172. {anndata-0.12.4 → anndata-0.12.5}/tests/data/archives/v0.7.0/adata.zarr.zip +0 -0
  173. {anndata-0.12.4 → anndata-0.12.5}/tests/data/archives/v0.7.8/adata.h5ad +0 -0
  174. {anndata-0.12.4 → anndata-0.12.5}/tests/data/archives/v0.7.8/adata.zarr.zip +0 -0
  175. {anndata-0.12.4 → anndata-0.12.5}/tests/data/excel.xlsx +0 -0
  176. {anndata-0.12.4 → anndata-0.12.5}/tests/data/umi_tools.tsv.gz +0 -0
  177. {anndata-0.12.4 → anndata-0.12.5}/tests/lazy/conftest.py +0 -0
  178. {anndata-0.12.4 → anndata-0.12.5}/tests/lazy/test_concat.py +0 -0
  179. {anndata-0.12.4 → anndata-0.12.5}/tests/lazy/test_read.py +0 -0
  180. {anndata-0.12.4 → anndata-0.12.5}/tests/lazy/test_write.py +0 -0
  181. {anndata-0.12.4 → anndata-0.12.5}/tests/test_anncollection.py +0 -0
  182. {anndata-0.12.4 → anndata-0.12.5}/tests/test_annot.py +0 -0
  183. {anndata-0.12.4 → anndata-0.12.5}/tests/test_awkward.py +0 -0
  184. {anndata-0.12.4 → anndata-0.12.5}/tests/test_backed_dense.py +0 -0
  185. {anndata-0.12.4 → anndata-0.12.5}/tests/test_backed_hdf5.py +0 -0
  186. {anndata-0.12.4 → anndata-0.12.5}/tests/test_backed_sparse.py +0 -0
  187. {anndata-0.12.4 → anndata-0.12.5}/tests/test_base.py +0 -0
  188. {anndata-0.12.4 → anndata-0.12.5}/tests/test_concatenate.py +0 -0
  189. {anndata-0.12.4 → anndata-0.12.5}/tests/test_deprecations.py +0 -0
  190. {anndata-0.12.4 → anndata-0.12.5}/tests/test_extensions.py +0 -0
  191. {anndata-0.12.4 → anndata-0.12.5}/tests/test_get_vector.py +0 -0
  192. {anndata-0.12.4 → anndata-0.12.5}/tests/test_gpu.py +0 -0
  193. {anndata-0.12.4 → anndata-0.12.5}/tests/test_helpers.py +0 -0
  194. {anndata-0.12.4 → anndata-0.12.5}/tests/test_inplace_subset.py +0 -0
  195. {anndata-0.12.4 → anndata-0.12.5}/tests/test_io_backwards_compat.py +0 -0
  196. {anndata-0.12.4 → anndata-0.12.5}/tests/test_io_conversion.py +0 -0
  197. {anndata-0.12.4 → anndata-0.12.5}/tests/test_io_partial.py +0 -0
  198. {anndata-0.12.4 → anndata-0.12.5}/tests/test_io_utils.py +0 -0
  199. {anndata-0.12.4 → anndata-0.12.5}/tests/test_io_warnings.py +0 -0
  200. {anndata-0.12.4 → anndata-0.12.5}/tests/test_layers.py +0 -0
  201. {anndata-0.12.4 → anndata-0.12.5}/tests/test_obsmvarm.py +0 -0
  202. {anndata-0.12.4 → anndata-0.12.5}/tests/test_obspvarp.py +0 -0
  203. {anndata-0.12.4 → anndata-0.12.5}/tests/test_raw.py +0 -0
  204. {anndata-0.12.4 → anndata-0.12.5}/tests/test_readwrite.py +0 -0
  205. {anndata-0.12.4 → anndata-0.12.5}/tests/test_repr.py +0 -0
  206. {anndata-0.12.4 → anndata-0.12.5}/tests/test_structured_arrays.py +0 -0
  207. {anndata-0.12.4 → anndata-0.12.5}/tests/test_transpose.py +0 -0
  208. {anndata-0.12.4 → anndata-0.12.5}/tests/test_uns.py +0 -0
  209. {anndata-0.12.4 → anndata-0.12.5}/tests/test_utils.py +0 -0
  210. {anndata-0.12.4 → anndata-0.12.5}/tests/test_views.py +0 -0
  211. {anndata-0.12.4 → anndata-0.12.5}/tests/test_x.py +0 -0
  212. {anndata-0.12.4 → anndata-0.12.5}/tests/test_xarray.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: anndata
3
- Version: 0.12.4
3
+ Version: 0.12.5
4
4
  Summary: Annotated data.
5
5
  Project-URL: Documentation, https://anndata.readthedocs.io/
6
6
  Project-URL: Source, https://github.com/scverse/anndata
@@ -0,0 +1,89 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ import h5py
6
+ import numpy as np
7
+ import pandas as pd
8
+ import zarr
9
+
10
+ import anndata as ad
11
+
12
+ if TYPE_CHECKING:
13
+ from typing import Literal
14
+
15
+
16
+ class Dataset2D:
17
+ param_names = ("store_type", "chunks", "array_type")
18
+ params = (
19
+ ("zarr", "h5ad"),
20
+ ((-1,), None),
21
+ ("cat", "numeric", "string-array", "nullable-string-array"),
22
+ )
23
+
24
+ def setup_cache(self):
25
+ n_obs = 10000
26
+ array_types = {
27
+ "numeric": np.arange(n_obs),
28
+ "string-array": np.array(["a"] * n_obs),
29
+ "nullable-string-array": pd.array(
30
+ ["a", pd.NA] * (n_obs // 2), dtype="string"
31
+ ),
32
+ "cat": pd.Categorical(np.array(["a"] * n_obs)),
33
+ }
34
+ for k, v in array_types.items():
35
+ for store in [
36
+ h5py.File(f"data_{k}.h5ad", mode="w"),
37
+ zarr.open(f"data_{k}.zarr", mode="w", zarr_version=2),
38
+ ]:
39
+ df = pd.DataFrame({"a": v}, index=[f"cell{i}" for i in range(n_obs)])
40
+ if writing_string_array_on_disk := (
41
+ isinstance(v, np.ndarray) and df["a"].dtype == "string"
42
+ ):
43
+ df["a"] = df["a"].to_numpy()
44
+ with ad.settings.override(allow_write_nullable_strings=True):
45
+ ad.io.write_elem(store, "df", df)
46
+ if writing_string_array_on_disk:
47
+ assert store["df"]["a"].attrs["encoding-type"] == "string-array"
48
+
49
+ def setup(
50
+ self,
51
+ store_type: Literal["zarr", "h5ad"],
52
+ chunks: None | tuple[int],
53
+ array_type: Literal["cat", "numeric", "string-array", "nullable-string-array"],
54
+ ):
55
+ self.store = (
56
+ h5py.File(f"data_{array_type}.h5ad", mode="r")
57
+ if store_type == "h5ad"
58
+ else zarr.open(f"data_{array_type}.zarr")
59
+ )
60
+ self.ds = ad.experimental.read_elem_lazy(self.store["df"], chunks=chunks)
61
+ self.n_obs = self.ds.shape[0]
62
+
63
+ def time_read_lazy_default(self, *_):
64
+ ad.experimental.read_elem_lazy(self.store["df"])
65
+
66
+ def peakmem_read_lazy_default(self, *_):
67
+ ad.experimental.read_elem_lazy(self.store["df"])
68
+
69
+ def time_getitem_slice(self, *_):
70
+ self.ds.iloc[0 : (self.n_obs // 2)].to_memory()
71
+
72
+ def peakmem_getitem_slice(self, *_):
73
+ self.ds.iloc[0 : (self.n_obs // 2)].to_memory()
74
+
75
+ def time_full_to_memory(self, *_):
76
+ self.ds.to_memory()
77
+
78
+ def peakmem_full_to_memory(self, *_):
79
+ self.ds.to_memory()
80
+
81
+ def time_getitem_bool_mask(self, *_):
82
+ self.ds.iloc[np.random.randint(0, self.n_obs, self.n_obs // 2)].to_memory()
83
+
84
+ def peakmem_getitem_bool_mask(self, *_):
85
+ self.ds.iloc[np.random.randint(0, self.n_obs, self.n_obs // 2)].to_memory()
86
+
87
+ def time_concat(self, *_):
88
+ adatas = [ad.AnnData(obs=self.ds)] * 50
89
+ ad.concat(adatas, join="outer")
@@ -7,7 +7,7 @@ import zarr
7
7
  from dask.array.core import Array as DaskArray
8
8
  from scipy import sparse
9
9
 
10
- from anndata import AnnData
10
+ from anndata import AnnData, concat
11
11
  from anndata._core.sparse_dataset import sparse_dataset
12
12
  from anndata._io.specs import write_elem
13
13
  from anndata.experimental import read_elem_lazy
@@ -77,3 +77,34 @@ class SparseCSRContiguousSlice:
77
77
  res = self.adata[self.index]
78
78
  if isinstance(res, DaskArray):
79
79
  res.compute()
80
+
81
+
82
+ class SparseCSRDask:
83
+ filepath = "data.zarr"
84
+
85
+ def setup_cache(self):
86
+ X = sparse.random(
87
+ 10_000,
88
+ 10_000,
89
+ density=0.01,
90
+ format="csr",
91
+ random_state=np.random.default_rng(42),
92
+ )
93
+ g = zarr.group(self.filepath)
94
+ write_elem(g, "X", X)
95
+
96
+ def setup(self):
97
+ self.group = zarr.group(self.filepath)
98
+ self.adata = AnnData(X=read_elem_lazy(self.group["X"]))
99
+
100
+ def time_concat(self):
101
+ concat([self.adata for i in range(100)])
102
+
103
+ def peakmem_concat(self):
104
+ concat([self.adata for i in range(100)])
105
+
106
+ def time_read(self):
107
+ AnnData(X=read_elem_lazy(self.group["X"]))
108
+
109
+ def peakmem_read(self):
110
+ AnnData(X=read_elem_lazy(self.group["X"]))
@@ -0,0 +1,12 @@
1
+ (v0.12.5)=
2
+ ### 0.12.5 {small}`2025-11-03`
3
+
4
+ #### Bug fixes
5
+
6
+ - Remove use of private `read_dataset` internally inside {func}`anndata.experimental.read_elem_lazy` {user}`ilan-gold` ({pr}`2158`)
7
+ - Unblock version restriction on `dask` distributed writing by using threading scheduler always (see {pr}`2172`) {user}`ilan-gold` ({pr}`2183`)
8
+
9
+ #### Performance
10
+
11
+ - Use `name` on {func}`dask.array.map_blocks` internally when concatenating {class}`anndata.experimental.backed.Dataset2D` objects whose categoricals/nullable types must be converted to dask arrays {user}`ilan-gold` ({pr}`2121`)
12
+ - Enable automatic sharding in zarr v3 via {attr}`anndata.settings.auto_shard_zarr_v3` (via {mod}`zarr`'s own auto sharding mechanism i.e., `shards="auto"`) for all types except {class}`numpy.recarray` {user}`ilan-gold` ({pr}`2167`)
@@ -38,7 +38,8 @@ There are two ways of opening remote `zarr` stores from the `zarr-python` packag
38
38
  Local data generally poses a different set of challenges.
39
39
  First, write speeds can be somewhat slow and second, the creation of many small files on a file system can slow down a filesystem.
40
40
  For the "many small files" problem, `zarr` has introduced {ref}`sharding <zarr:user-guide-sharding>` in the v3 file format.
41
- Sharding requires knowledge of the array element you are writing (such as shape or data type), though, and therefore you will need to use {func}`anndata.experimental.write_dispatched` to use sharding.
41
+ We offer {attr}`anndata.settings.auto_shard_zarr_v3` to hook into zarr's ability to automatically compute shards, which is experimental at the moment.
42
+ Manual sharding requires knowledge of the array element you are writing (such as shape or data type), though, and therefore you will need to use {func}`anndata.experimental.write_dispatched` to use custom sharding.
42
43
  For example, you cannot shard a 1D array with `shard` sizes `(256, 256)`.
43
44
  Here is a short example, although you should tune the sizes to your own use-case and also use the compression that makes the most sense for you:
44
45
 
@@ -164,6 +164,7 @@ filterwarnings_when_strict = [
164
164
  "default:Consolidated metadata is:UserWarning",
165
165
  "default:.*Structured:zarr.core.dtype.common.UnstableSpecificationWarning",
166
166
  "default:.*FixedLengthUTF32:zarr.core.dtype.common.UnstableSpecificationWarning",
167
+ "default:Automatic shard shape inference is experimental",
167
168
  ]
168
169
  python_files = "test_*.py"
169
170
  testpaths = [
@@ -4,6 +4,7 @@ Code for merging/ concatenating AnnData objects.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
+ import uuid
7
8
  from collections import OrderedDict
8
9
  from collections.abc import Callable, Mapping, MutableSet
9
10
  from functools import partial, reduce, singledispatch
@@ -1251,6 +1252,7 @@ def make_dask_col_from_extension_dtype(
1251
1252
  chunks=chunk_size,
1252
1253
  meta=np.array([], dtype=dtype),
1253
1254
  dtype=dtype,
1255
+ name=f"{uuid.uuid4()}/{base_path_or_zarr_group}/{elem_name}-{dtype}",
1254
1256
  )
1255
1257
 
1256
1258
  return da.from_array(col.values, chunks=-1) # in-memory
@@ -25,7 +25,7 @@ from anndata.compat import (
25
25
  ZarrGroup,
26
26
  )
27
27
 
28
- from .registry import _LAZY_REGISTRY, IOSpec
28
+ from .registry import _LAZY_REGISTRY, IOSpec, read_elem
29
29
 
30
30
  if TYPE_CHECKING:
31
31
  from collections.abc import Generator, Mapping, Sequence
@@ -195,6 +195,9 @@ def resolve_chunks(
195
195
  return elem.chunks
196
196
 
197
197
 
198
+ # TODO: `map_blocks` of a string array in h5py is so insanely slow on benchmarking that in the case someone has
199
+ # a pure string annotation (not categoricals! or nullables strings!), it's probably better to pay the memory penalty.
200
+ # In the long run, it might be good to figure out what exactly is going on here but for now, this will do.
198
201
  @_LAZY_REGISTRY.register_read(H5Array, IOSpec("string-array", "0.2.0"))
199
202
  def read_h5_string_array(
200
203
  elem: H5Array,
@@ -204,10 +207,8 @@ def read_h5_string_array(
204
207
  ) -> DaskArray:
205
208
  import dask.array as da
206
209
 
207
- from anndata._io.h5ad import read_dataset
208
-
209
210
  chunks = resolve_chunks(elem, chunks, tuple(elem.shape))
210
- return da.from_array(read_dataset(elem), chunks=chunks)
211
+ return da.from_array(read_elem(elem), chunks=chunks)
211
212
 
212
213
 
213
214
  @_LAZY_REGISTRY.register_read(H5Array, IOSpec("array", "0.2.0"))
@@ -303,7 +304,7 @@ def read_dataframe(
303
304
  # which is used below as well.
304
305
  if not use_range_index:
305
306
  dim_name = elem.attrs["_index"]
306
- # no sense in reading this in multiple times
307
+ # no sense in reading this in multiple times since xarray requires an in-memory index
307
308
  index = elem_dict[dim_name].compute()
308
309
  else:
309
310
  dim_name = DUMMY_RANGE_INDEX_KEY
@@ -102,6 +102,12 @@ def zarr_v3_compressor_compat(dataset_kwargs) -> dict:
102
102
  return dataset_kwargs
103
103
 
104
104
 
105
+ def zarr_v3_sharding(dataset_kwargs) -> dict:
106
+ if "shards" not in dataset_kwargs and ad.settings.auto_shard_zarr_v3:
107
+ dataset_kwargs = {**dataset_kwargs, "shards": "auto"}
108
+ return dataset_kwargs
109
+
110
+
105
111
  def _to_cpu_mem_wrapper(write_func):
106
112
  """
107
113
  Wrapper to bring cupy types into cpu memory before writing.
@@ -432,6 +438,7 @@ def write_basic(
432
438
  f.create_dataset(k, data=elem, shape=elem.shape, dtype=dtype, **dataset_kwargs)
433
439
  else:
434
440
  dataset_kwargs = zarr_v3_compressor_compat(dataset_kwargs)
441
+ dataset_kwargs = zarr_v3_sharding(dataset_kwargs)
435
442
  f.create_array(k, shape=elem.shape, dtype=dtype, **dataset_kwargs)
436
443
  # see https://github.com/zarr-developers/zarr-python/discussions/2712
437
444
  if isinstance(elem, ZarrArray | H5Array):
@@ -506,26 +513,17 @@ def write_basic_dask_dask_dense(
506
513
  dataset_kwargs: Mapping[str, Any] = MappingProxyType({}),
507
514
  ):
508
515
  import dask.array as da
509
- import dask.config as dc
510
-
511
- is_distributed = dc.get("scheduler", None) == "dask.distributed"
512
- is_h5 = isinstance(f, H5Group)
513
- if is_distributed and is_h5:
514
- msg = "Cannot write dask arrays to hdf5 when using distributed scheduler"
515
- raise ValueError(msg)
516
516
 
517
517
  dataset_kwargs = dataset_kwargs.copy()
518
+ is_h5 = isinstance(f, H5Group)
518
519
  if not is_h5:
519
520
  dataset_kwargs = zarr_v3_compressor_compat(dataset_kwargs)
520
- # See https://github.com/dask/dask/issues/12109
521
- if Version(version("dask")) < Version("2025.4.0") and is_distributed:
522
- msg = "Writing dense data with a distributed scheduler to zarr could produce corrupted data with a Lock and will error without one when dask is older than 2025.4.0: https://github.com/dask/dask/issues/12109"
523
- raise RuntimeError(msg)
521
+ dataset_kwargs = zarr_v3_sharding(dataset_kwargs)
524
522
  if is_zarr_v2() or is_h5:
525
523
  g = f.require_dataset(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs)
526
524
  else:
527
525
  g = f.require_array(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs)
528
- da.store(elem, g)
526
+ da.store(elem, g, scheduler="threads")
529
527
 
530
528
 
531
529
  @_REGISTRY.register_read(H5Array, IOSpec("array", "0.2.0"))
@@ -626,6 +624,7 @@ def write_vlen_string_array_zarr(
626
624
  filters, fill_value = None, None
627
625
  if f.metadata.zarr_format == 2:
628
626
  filters, fill_value = [VLenUTF8()], ""
627
+ dataset_kwargs = zarr_v3_sharding(dataset_kwargs)
629
628
  f.create_array(
630
629
  k,
631
630
  shape=elem.shape,
@@ -694,6 +693,9 @@ def write_recarray_zarr(
694
693
  else:
695
694
  dataset_kwargs = dataset_kwargs.copy()
696
695
  dataset_kwargs = zarr_v3_compressor_compat(dataset_kwargs)
696
+ # https://github.com/zarr-developers/zarr-python/issues/3546
697
+ # if "shards" not in dataset_kwargs and ad.settings.auto_shard_zarr_v3:
698
+ # dataset_kwargs = {**dataset_kwargs, "shards": "auto"}
697
699
  f.create_array(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs)
698
700
  f[k][...] = elem
699
701
 
@@ -730,6 +732,7 @@ def write_sparse_compressed(
730
732
  attr_name, data=attr, shape=attr.shape, dtype=dtype, **dataset_kwargs
731
733
  )
732
734
  else:
735
+ dataset_kwargs = zarr_v3_sharding(dataset_kwargs)
733
736
  arr = g.create_array(
734
737
  attr_name, shape=attr.shape, dtype=dtype, **dataset_kwargs
735
738
  )
@@ -17,7 +17,7 @@ from .compat import is_zarr_v2, old_positionals
17
17
 
18
18
  if TYPE_CHECKING:
19
19
  from collections.abc import Callable, Sequence
20
- from typing import Any, TypeGuard
20
+ from typing import Any, Self, TypeGuard
21
21
 
22
22
  T = TypeVar("T")
23
23
 
@@ -55,7 +55,7 @@ class RegisteredOption(NamedTuple, Generic[T]):
55
55
  option: str
56
56
  default_value: T
57
57
  description: str
58
- validate: Callable[[T], None]
58
+ validate: Callable[[T, SettingsManager], None]
59
59
  type: object
60
60
 
61
61
  describe = describe
@@ -206,7 +206,7 @@ class SettingsManager:
206
206
  *,
207
207
  default_value: T,
208
208
  description: str,
209
- validate: Callable[[T], None],
209
+ validate: Callable[[T, Self], None],
210
210
  option_type: object | None = None,
211
211
  get_from_env: Callable[[str, T], T] = lambda x, y: y,
212
212
  ) -> None:
@@ -229,7 +229,7 @@ class SettingsManager:
229
229
  Default behavior is to return `default_value` without checking the environment.
230
230
  """
231
231
  try:
232
- validate(default_value)
232
+ validate(default_value, self)
233
233
  except (ValueError, TypeError) as e:
234
234
  e.add_note(f"for option {option!r}")
235
235
  raise e
@@ -307,7 +307,7 @@ class SettingsManager:
307
307
  )
308
308
  raise AttributeError(msg)
309
309
  registered_option = self._registered_options[option]
310
- registered_option.validate(val)
310
+ registered_option.validate(val, self)
311
311
  self._config[option] = val
312
312
 
313
313
  def __getattr__(self, option: str) -> object:
@@ -364,10 +364,13 @@ class SettingsManager:
364
364
  """
365
365
  restore = {a: getattr(self, a) for a in overrides}
366
366
  try:
367
- for attr, value in overrides.items():
368
- setattr(self, attr, value)
367
+ # Preserve order so that settings that depend on each other can be overridden together i.e., always override zarr version before sharding
368
+ for k in self._config:
369
+ if k in overrides:
370
+ setattr(self, k, overrides.get(k))
369
371
  yield None
370
372
  finally:
373
+ # TODO: does the order need to be preserved when restoring?
371
374
  for attr, value in restore.items():
372
375
  setattr(self, attr, value)
373
376
 
@@ -395,7 +398,7 @@ V = TypeVar("V")
395
398
 
396
399
 
397
400
  def gen_validator(_type: type[V]) -> Callable[[V], None]:
398
- def validate_type(val: V) -> None:
401
+ def validate_type(val: V, settings: SettingsManager) -> None:
399
402
  if not isinstance(val, _type):
400
403
  msg = f"{val} not valid {_type}"
401
404
  raise TypeError(msg)
@@ -434,14 +437,28 @@ settings.register(
434
437
  )
435
438
 
436
439
 
437
- def validate_zarr_write_format(format: int):
438
- validate_int(format)
440
+ def validate_zarr_write_format(format: int, settings: SettingsManager):
441
+ validate_int(format, settings)
439
442
  if format not in {2, 3}:
440
443
  msg = "non-v2 zarr on-disk format not supported"
441
444
  raise ValueError(msg)
442
445
  if format == 3 and is_zarr_v2():
443
446
  msg = "Cannot write v3 format against v2 package"
444
447
  raise ValueError(msg)
448
+ if format == 2 and getattr(settings, "auto_shard_zarr_v3", False):
449
+ msg = "Cannot set `zarr_write_format` to 2 with autosharding on. Please set to `False` `anndata.settings.auto_shard_zarr_v3`"
450
+ raise ValueError(msg)
451
+
452
+
453
+ def validate_zarr_sharding(auto_shard: bool, settings: SettingsManager): # noqa: FBT001
454
+ validate_bool(auto_shard, settings)
455
+ if auto_shard:
456
+ if is_zarr_v2():
457
+ msg = "Cannot use sharding with `zarr-python<3`. Please upgrade package and set `anndata.settings.zarr_write_format` to 3."
458
+ raise ValueError(msg)
459
+ if settings.zarr_write_format == 2:
460
+ msg = "Cannot shard v2 format data. Please set `anndata.settings.zarr_write_format` to 3."
461
+ raise ValueError(msg)
445
462
 
446
463
 
447
464
  settings.register(
@@ -458,8 +475,8 @@ settings.register(
458
475
  )
459
476
 
460
477
 
461
- def validate_sparse_settings(val: Any) -> None:
462
- validate_bool(val)
478
+ def validate_sparse_settings(val: Any, settings: SettingsManager) -> None:
479
+ validate_bool(val, settings)
463
480
 
464
481
 
465
482
  settings.register(
@@ -486,6 +503,14 @@ settings.register(
486
503
  get_from_env=check_and_get_bool,
487
504
  )
488
505
 
506
+ settings.register(
507
+ "auto_shard_zarr_v3",
508
+ default_value=False,
509
+ description="Whether or not to use zarr's auto computation of sharding for v3. For v2 this setting will be ignored. The setting will apply to all calls to anndata's writing mechanism (write_zarr / write_elem) and will **not** override any user-defined kwargs for shards.",
510
+ validate=validate_zarr_sharding,
511
+ get_from_env=check_and_get_bool,
512
+ )
513
+
489
514
 
490
515
  ##################################################################################
491
516
  ##################################################################################
@@ -2,7 +2,7 @@ from collections.abc import Callable as Callable
2
2
  from collections.abc import Generator, Iterable
3
3
  from contextlib import contextmanager
4
4
  from dataclasses import dataclass
5
- from typing import Literal, TypeVar
5
+ from typing import Literal, Self, TypeVar
6
6
 
7
7
  _T = TypeVar("_T")
8
8
 
@@ -25,7 +25,7 @@ class SettingsManager:
25
25
  *,
26
26
  default_value: _T,
27
27
  description: str,
28
- validate: Callable[[_T], None],
28
+ validate: Callable[[_T, Self], None],
29
29
  option_type: object | None = None,
30
30
  get_from_env: Callable[[str, _T], _T] = ...,
31
31
  ) -> None: ...
@@ -46,5 +46,6 @@ class _AnnDataSettingsManager(SettingsManager):
46
46
  use_sparse_array_on_read: bool = False
47
47
  min_rows_for_chunked_h5_copy: int = 1000
48
48
  disallow_forward_slash_in_h5ad: bool = False
49
+ auto_shard_zarr_v3: bool = False
49
50
 
50
51
  settings: _AnnDataSettingsManager
@@ -111,9 +111,9 @@ class CategoricalArray(XBackendArray, Generic[K]):
111
111
  def categories(self) -> np.ndarray:
112
112
  if isinstance(self._categories, ZarrArray):
113
113
  return self._categories[...]
114
- from ..._io.h5ad import read_dataset
114
+ from anndata.io import read_elem
115
115
 
116
- return read_dataset(self._categories)
116
+ return read_elem(self._categories)
117
117
 
118
118
  def __getitem__(
119
119
  self, key: xr.core.indexing.ExplicitIndexer
@@ -14,6 +14,7 @@ import h5py
14
14
  import numpy as np
15
15
  import pandas as pd
16
16
  import pytest
17
+ import zarr
17
18
  from pandas.api.types import is_numeric_dtype
18
19
  from scipy import sparse
19
20
 
@@ -34,6 +35,7 @@ from anndata.compat import (
34
35
  XDataArray,
35
36
  XDataset,
36
37
  ZarrArray,
38
+ ZarrGroup,
37
39
  is_zarr_v2,
38
40
  )
39
41
  from anndata.utils import asarray
@@ -1187,3 +1189,23 @@ def get_multiindex_columns_df(shape: tuple[int, int]) -> pd.DataFrame:
1187
1189
  + list(itertools.product(["b"], range(shape[1] // 2)))
1188
1190
  ),
1189
1191
  )
1192
+
1193
+
1194
+ def visititems_zarr(
1195
+ z: ZarrGroup, visitor: Callable[[str, ZarrGroup | zarr.Array], None]
1196
+ ) -> None:
1197
+ for key in z:
1198
+ maybe_group = z[key]
1199
+ if isinstance(maybe_group, ZarrGroup):
1200
+ visititems_zarr(maybe_group, visitor)
1201
+ else:
1202
+ visitor(key, maybe_group)
1203
+
1204
+
1205
+ def check_all_sharded(g: ZarrGroup):
1206
+ def visit(key: str, arr: zarr.Array | zarr.Group):
1207
+ # Check for recarray via https://numpy.org/doc/stable/user/basics.rec.html#manipulating-and-displaying-structured-datatypes
1208
+ if isinstance(arr, zarr.Array) and arr.shape != () and arr.dtype.names is None:
1209
+ assert arr.shards is not None
1210
+
1211
+ visititems_zarr(g, visitor=visit)
@@ -8,12 +8,13 @@ import pandas as pd
8
8
  import pytest
9
9
  from scipy import sparse
10
10
 
11
- from anndata import AnnData, concat
11
+ from anndata import AnnData, concat, settings
12
12
  from anndata._core import merge
13
13
  from anndata._core.merge import _resolve_axis
14
+ from anndata.compat import is_zarr_v2
14
15
  from anndata.experimental.merge import as_group, concat_on_disk
15
16
  from anndata.io import read_elem, write_elem
16
- from anndata.tests.helpers import assert_equal, gen_adata
17
+ from anndata.tests.helpers import assert_equal, check_all_sharded, gen_adata
17
18
  from anndata.utils import asarray
18
19
 
19
20
  if TYPE_CHECKING:
@@ -230,7 +231,7 @@ def xxxm_adatas():
230
231
  X=sparse.csr_matrix((2, 100)),
231
232
  obs=pd.DataFrame(index=gen_index(2)),
232
233
  obsm={
233
- "sparse": np.arange(8).reshape(2, 4),
234
+ "sparse": sparse.csr_matrix(np.arange(8).reshape(2, 4)),
234
235
  "dense": np.arange(4, 8).reshape(2, 2),
235
236
  "df": pd.DataFrame(
236
237
  {
@@ -253,6 +254,22 @@ def test_concatenate_xxxm(xxxm_adatas, tmp_path, file_format, join_type):
253
254
  assert_eq_concat_on_disk(xxxm_adatas, tmp_path, file_format, join=join_type)
254
255
 
255
256
 
257
+ @pytest.mark.skipif(is_zarr_v2(), reason="auto sharding is allowed only for zarr v3.")
258
+ def test_concatenate_zarr_v3_shard(xxxm_adatas, tmp_path):
259
+ import zarr
260
+
261
+ with settings.override(auto_shard_zarr_v3=True, zarr_write_format=3):
262
+ assert_eq_concat_on_disk(xxxm_adatas, tmp_path, file_format="zarr")
263
+ g = zarr.open(tmp_path)
264
+ assert g.metadata.zarr_format == 3
265
+
266
+ def visit(key: str, arr: zarr.Array | zarr.Group):
267
+ if isinstance(arr, zarr.Array) and arr.shape != ():
268
+ assert arr.shards is not None
269
+
270
+ check_all_sharded(g)
271
+
272
+
256
273
  def test_output_dir_exists(tmp_path):
257
274
  in_pth = tmp_path / "in.h5ad"
258
275
  out_pth = tmp_path / "does_not_exist" / "out.h5ad"
@@ -4,14 +4,12 @@ For tests using dask
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- from importlib.metadata import version
8
7
  from pathlib import Path
9
8
  from typing import TYPE_CHECKING
10
9
 
11
10
  import numpy as np
12
11
  import pandas as pd
13
12
  import pytest
14
- from packaging.version import Version
15
13
  from scipy import sparse
16
14
 
17
15
  import anndata as ad
@@ -25,6 +23,7 @@ from anndata.tests.helpers import (
25
23
  as_dense_dask_array,
26
24
  as_sparse_dask_array,
27
25
  assert_equal,
26
+ check_all_sharded,
28
27
  gen_adata,
29
28
  )
30
29
 
@@ -111,12 +110,20 @@ def test_dask_write(adata, tmp_path, diskfmt):
111
110
 
112
111
  @pytest.mark.xdist_group("dask")
113
112
  @pytest.mark.dask_distributed
113
+ @pytest.mark.parametrize(
114
+ "auto_shard_zarr_v3",
115
+ [pytest.param(True, id="shard"), pytest.param(False, id="no-shard")],
116
+ )
114
117
  def test_dask_distributed_write(
115
118
  adata: AnnData,
116
119
  tmp_path: Path,
117
120
  diskfmt: Literal["h5ad", "zarr"],
118
121
  local_cluster_addr: str,
122
+ *,
123
+ auto_shard_zarr_v3: bool,
119
124
  ) -> None:
125
+ if auto_shard_zarr_v3 and ad.settings.zarr_write_format == 2:
126
+ pytest.skip(reason="Cannot shard v2 data")
120
127
  import dask.array as da
121
128
  import dask.distributed as dd
122
129
  import numpy as np
@@ -130,20 +137,12 @@ def test_dask_distributed_write(
130
137
  adata.obsm["b"] = da.random.random((M, 10))
131
138
  adata.varm["a"] = da.random.random((N, 10))
132
139
  orig = adata
133
- is_h5 = diskfmt == "h5ad"
134
- is_corrupted_dask = Version(version("dask")) < Version("2025.4.0")
135
- if is_corrupted_dask or is_h5:
136
- with pytest.raises(
137
- ValueError if is_h5 else RuntimeError,
138
- match=r"Cannot write dask arrays to hdf5"
139
- if is_h5
140
- else r"Writing dense data with a distributed scheduler to zarr",
141
- ):
142
- ad.io.write_elem(g, "", orig)
143
- return
144
- ad.io.write_elem(g, "", orig)
140
+ with ad.settings.override(auto_shard_zarr_v3=auto_shard_zarr_v3):
141
+ ad.io.write_elem(g, "", orig)
145
142
  # TODO: See https://github.com/zarr-developers/zarr-python/issues/2716
146
143
  g = as_group(pth, mode="r")
144
+ if auto_shard_zarr_v3:
145
+ check_all_sharded(g)
147
146
  curr = ad.io.read_elem(g)
148
147
 
149
148
  with pytest.raises(AssertionError):
@@ -72,7 +72,7 @@ def _alloc_cache():
72
72
  # As of 2025.09.* dask, this needs a bit more than the previous 1.5mb.
73
73
  # TODO: Why?
74
74
  @pytest.mark.usefixtures("_alloc_cache")
75
- @pytest.mark.limit_memory("1.7 MB")
75
+ @pytest.mark.limit_memory("2.2 MB")
76
76
  def test_size_of_view(mapping_name, give_chunks):
77
77
  import dask.array as da
78
78
 
@@ -12,10 +12,14 @@ import anndata as ad
12
12
  from anndata._io.zarr import open_write_group
13
13
  from anndata.compat import CSArray, CSMatrix, ZarrGroup, is_zarr_v2
14
14
  from anndata.experimental import read_dispatched, write_dispatched
15
- from anndata.tests.helpers import GEN_ADATA_NO_XARRAY_ARGS, assert_equal, gen_adata
15
+ from anndata.tests.helpers import (
16
+ GEN_ADATA_NO_XARRAY_ARGS,
17
+ assert_equal,
18
+ gen_adata,
19
+ visititems_zarr,
20
+ )
16
21
 
17
22
  if TYPE_CHECKING:
18
- from collections.abc import Callable
19
23
  from pathlib import Path
20
24
  from typing import Literal
21
25
 
@@ -180,18 +184,7 @@ def test_write_dispatched_chunks(tmp_path: Path):
180
184
  if is_zarr_v2():
181
185
  z.visititems(check_chunking)
182
186
  else:
183
-
184
- def visititems(
185
- z: ZarrGroup, visitor: Callable[[str, ZarrGroup | zarr.Array], None]
186
- ) -> None:
187
- for key in z:
188
- maybe_group = z[key]
189
- if isinstance(maybe_group, ZarrGroup):
190
- visititems(maybe_group, visitor)
191
- else:
192
- visitor(key, maybe_group)
193
-
194
- visititems(z, check_chunking)
187
+ visititems_zarr(z, check_chunking)
195
188
 
196
189
 
197
190
  @pytest.mark.zarr_io