anndata 0.12.3__tar.gz → 0.12.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. {anndata-0.12.3 → anndata-0.12.4}/.github/workflows/test-cpu.yml +2 -2
  2. {anndata-0.12.3 → anndata-0.12.4}/.github/workflows/test-gpu.yml +1 -1
  3. {anndata-0.12.3 → anndata-0.12.4}/PKG-INFO +1 -1
  4. anndata-0.12.4/benchmarks/benchmarks/backed_hdf5.py +112 -0
  5. {anndata-0.12.3 → anndata-0.12.4}/benchmarks/benchmarks/dataset2d.py +21 -19
  6. {anndata-0.12.3 → anndata-0.12.4}/benchmarks/benchmarks/readwrite.py +12 -48
  7. {anndata-0.12.3 → anndata-0.12.4}/benchmarks/benchmarks/sparse_dataset.py +22 -15
  8. {anndata-0.12.3 → anndata-0.12.4}/benchmarks/benchmarks/utils.py +21 -3
  9. anndata-0.12.4/docs/release-notes/0.12.4.md +4 -0
  10. anndata-0.12.4/docs/release-notes/2172.bug.md +1 -0
  11. {anndata-0.12.3 → anndata-0.12.4}/hatch.toml +7 -3
  12. {anndata-0.12.3 → anndata-0.12.4}/pyproject.toml +5 -1
  13. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_core/aligned_df.py +7 -0
  14. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_core/index.py +136 -23
  15. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_core/merge.py +6 -5
  16. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_core/sparse_dataset.py +4 -3
  17. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_io/specs/methods.py +16 -25
  18. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/experimental/backed/_lazy_arrays.py +5 -2
  19. {anndata-0.12.3 → anndata-0.12.4}/tests/conftest.py +7 -4
  20. {anndata-0.12.3 → anndata-0.12.4}/tests/lazy/test_concat.py +1 -0
  21. {anndata-0.12.3 → anndata-0.12.4}/tests/test_annot.py +24 -1
  22. {anndata-0.12.3 → anndata-0.12.4}/tests/test_backed_hdf5.py +102 -9
  23. {anndata-0.12.3 → anndata-0.12.4}/tests/test_dask.py +14 -2
  24. {anndata-0.12.3 → anndata-0.12.4}/tests/test_inplace_subset.py +1 -0
  25. {anndata-0.12.3 → anndata-0.12.4}/tests/test_io_elementwise.py +4 -1
  26. {anndata-0.12.3 → anndata-0.12.4}/tests/test_readwrite.py +6 -9
  27. {anndata-0.12.3 → anndata-0.12.4}/.cirun.yml +0 -0
  28. {anndata-0.12.3 → anndata-0.12.4}/.codecov.yml +0 -0
  29. {anndata-0.12.3 → anndata-0.12.4}/.editorconfig +0 -0
  30. {anndata-0.12.3 → anndata-0.12.4}/.github/ISSUE_TEMPLATE/bug-report.yml +0 -0
  31. {anndata-0.12.3 → anndata-0.12.4}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  32. {anndata-0.12.3 → anndata-0.12.4}/.github/ISSUE_TEMPLATE/enhancement-request.yml +0 -0
  33. {anndata-0.12.3 → anndata-0.12.4}/.github/ISSUE_TEMPLATE/question.yml +0 -0
  34. {anndata-0.12.3 → anndata-0.12.4}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  35. {anndata-0.12.3 → anndata-0.12.4}/.github/dependabot.yml +0 -0
  36. {anndata-0.12.3 → anndata-0.12.4}/.github/workflows/benchmark.yml +0 -0
  37. {anndata-0.12.3 → anndata-0.12.4}/.github/workflows/check-pr.yml +0 -0
  38. {anndata-0.12.3 → anndata-0.12.4}/.github/workflows/close-stale.yml +0 -0
  39. {anndata-0.12.3 → anndata-0.12.4}/.github/workflows/codespell.yml +0 -0
  40. {anndata-0.12.3 → anndata-0.12.4}/.github/workflows/label-stale.yml +0 -0
  41. {anndata-0.12.3 → anndata-0.12.4}/.github/workflows/publish.yml +0 -0
  42. {anndata-0.12.3 → anndata-0.12.4}/.gitignore +0 -0
  43. {anndata-0.12.3 → anndata-0.12.4}/.gitmodules +0 -0
  44. {anndata-0.12.3 → anndata-0.12.4}/.pre-commit-config.yaml +0 -0
  45. {anndata-0.12.3 → anndata-0.12.4}/.prettierignore +0 -0
  46. {anndata-0.12.3 → anndata-0.12.4}/.prettierrc.yaml +0 -0
  47. {anndata-0.12.3 → anndata-0.12.4}/.readthedocs.yml +0 -0
  48. {anndata-0.12.3 → anndata-0.12.4}/.taplo.toml +0 -0
  49. {anndata-0.12.3 → anndata-0.12.4}/.vscode/launch.json +0 -0
  50. {anndata-0.12.3 → anndata-0.12.4}/.vscode/settings.json +0 -0
  51. {anndata-0.12.3 → anndata-0.12.4}/LICENSE +0 -0
  52. {anndata-0.12.3 → anndata-0.12.4}/README.md +0 -0
  53. {anndata-0.12.3 → anndata-0.12.4}/benchmarks/README.md +0 -0
  54. {anndata-0.12.3 → anndata-0.12.4}/benchmarks/asv.conf.json +0 -0
  55. {anndata-0.12.3 → anndata-0.12.4}/benchmarks/benchmarks/__init__.py +0 -0
  56. {anndata-0.12.3 → anndata-0.12.4}/benchmarks/benchmarks/anndata.py +0 -0
  57. {anndata-0.12.3 → anndata-0.12.4}/biome.jsonc +0 -0
  58. {anndata-0.12.3 → anndata-0.12.4}/ci/constraints.txt +0 -0
  59. {anndata-0.12.3 → anndata-0.12.4}/ci/scripts/min-deps.py +0 -0
  60. {anndata-0.12.3 → anndata-0.12.4}/ci/scripts/towncrier_automation.py +0 -0
  61. {anndata-0.12.3 → anndata-0.12.4}/docs/Makefile +0 -0
  62. {anndata-0.12.3 → anndata-0.12.4}/docs/_key_contributors.rst +0 -0
  63. {anndata-0.12.3 → anndata-0.12.4}/docs/_static/img/anndata_schema.svg +0 -0
  64. {anndata-0.12.3 → anndata-0.12.4}/docs/_templates/autosummary/class.rst +0 -0
  65. {anndata-0.12.3 → anndata-0.12.4}/docs/api.md +0 -0
  66. {anndata-0.12.3 → anndata-0.12.4}/docs/benchmark-read-write.ipynb +0 -0
  67. {anndata-0.12.3 → anndata-0.12.4}/docs/benchmarks.md +0 -0
  68. {anndata-0.12.3 → anndata-0.12.4}/docs/concatenation.rst +0 -0
  69. {anndata-0.12.3 → anndata-0.12.4}/docs/conf.py +0 -0
  70. {anndata-0.12.3 → anndata-0.12.4}/docs/contributing.md +0 -0
  71. {anndata-0.12.3 → anndata-0.12.4}/docs/extensions/autosummary_skip_inherited.py +0 -0
  72. {anndata-0.12.3 → anndata-0.12.4}/docs/extensions/no_skip_abc_members.py +0 -0
  73. {anndata-0.12.3 → anndata-0.12.4}/docs/extensions/patch_myst_cite.py +0 -0
  74. {anndata-0.12.3 → anndata-0.12.4}/docs/fileformat-prose.md +0 -0
  75. {anndata-0.12.3 → anndata-0.12.4}/docs/index.md +0 -0
  76. {anndata-0.12.3 → anndata-0.12.4}/docs/interoperability.md +0 -0
  77. {anndata-0.12.3 → anndata-0.12.4}/docs/news.md +0 -0
  78. {anndata-0.12.3 → anndata-0.12.4}/docs/references.rst +0 -0
  79. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.10.0.md +0 -0
  80. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.10.1.md +0 -0
  81. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.10.2.md +0 -0
  82. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.10.3.md +0 -0
  83. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.10.4.md +0 -0
  84. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.10.5.md +0 -0
  85. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.10.6.md +0 -0
  86. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.10.7.md +0 -0
  87. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.10.8.md +0 -0
  88. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.10.9.md +0 -0
  89. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.11.0.md +0 -0
  90. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.11.1.md +0 -0
  91. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.11.2.md +0 -0
  92. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.11.3.md +0 -0
  93. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.11.4.md +0 -0
  94. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.12.0.md +0 -0
  95. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.12.1.md +0 -0
  96. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.12.2.md +0 -0
  97. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.12.3.md +0 -0
  98. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.4.0.md +0 -0
  99. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.5.0.md +0 -0
  100. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.6.0.md +0 -0
  101. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.6.x.md +0 -0
  102. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.7.0.md +0 -0
  103. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.7.2.md +0 -0
  104. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.7.3.md +0 -0
  105. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.7.4.md +0 -0
  106. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.7.5.md +0 -0
  107. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.7.6.md +0 -0
  108. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.7.7.md +0 -0
  109. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.7.8.md +0 -0
  110. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.8.0.md +0 -0
  111. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.9.0.md +0 -0
  112. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.9.1.md +0 -0
  113. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/0.9.2.md +0 -0
  114. {anndata-0.12.3 → anndata-0.12.4}/docs/release-notes/index.md +0 -0
  115. {anndata-0.12.3 → anndata-0.12.4}/docs/tutorials/index.md +0 -0
  116. {anndata-0.12.3 → anndata-0.12.4}/docs/tutorials/zarr-v3.md +0 -0
  117. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/__init__.py +0 -0
  118. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_core/__init__.py +0 -0
  119. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_core/access.py +0 -0
  120. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_core/aligned_mapping.py +0 -0
  121. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_core/anndata.py +0 -0
  122. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_core/extensions.py +0 -0
  123. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_core/file_backing.py +0 -0
  124. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_core/raw.py +0 -0
  125. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_core/storage.py +0 -0
  126. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_core/views.py +0 -0
  127. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_core/xarray.py +0 -0
  128. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_io/__init__.py +0 -0
  129. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_io/h5ad.py +0 -0
  130. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_io/read.py +0 -0
  131. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_io/specs/__init__.py +0 -0
  132. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_io/specs/lazy_methods.py +0 -0
  133. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_io/specs/registry.py +0 -0
  134. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_io/utils.py +0 -0
  135. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_io/write.py +0 -0
  136. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_io/zarr.py +0 -0
  137. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_settings.py +0 -0
  138. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_settings.pyi +0 -0
  139. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_types.py +0 -0
  140. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/_warnings.py +0 -0
  141. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/abc.py +0 -0
  142. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/compat/__init__.py +0 -0
  143. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/experimental/__init__.py +0 -0
  144. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/experimental/_dispatch_io.py +0 -0
  145. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/experimental/backed/__init__.py +0 -0
  146. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/experimental/backed/_compat.py +0 -0
  147. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/experimental/backed/_io.py +0 -0
  148. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/experimental/merge.py +0 -0
  149. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/experimental/multi_files/__init__.py +0 -0
  150. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/experimental/multi_files/_anncollection.py +0 -0
  151. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/experimental/pytorch/__init__.py +0 -0
  152. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/experimental/pytorch/_annloader.py +0 -0
  153. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/io.py +0 -0
  154. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/logging.py +0 -0
  155. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/tests/__init__.py +0 -0
  156. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/tests/helpers.py +0 -0
  157. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/types.py +0 -0
  158. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/typing.py +0 -0
  159. {anndata-0.12.3 → anndata-0.12.4}/src/anndata/utils.py +0 -0
  160. {anndata-0.12.3 → anndata-0.12.4}/src/testing/anndata/__init__.py +0 -0
  161. {anndata-0.12.3 → anndata-0.12.4}/src/testing/anndata/_doctest.py +0 -0
  162. {anndata-0.12.3 → anndata-0.12.4}/src/testing/anndata/_pytest.py +0 -0
  163. {anndata-0.12.3 → anndata-0.12.4}/src/testing/anndata/py.typed +0 -0
  164. {anndata-0.12.3 → anndata-0.12.4}/tests/data/adata-comments.tsv +0 -0
  165. {anndata-0.12.3 → anndata-0.12.4}/tests/data/adata.csv +0 -0
  166. {anndata-0.12.3 → anndata-0.12.4}/tests/data/archives/readme.md +0 -0
  167. {anndata-0.12.3 → anndata-0.12.4}/tests/data/archives/v0.11.4/adata.h5ad +0 -0
  168. {anndata-0.12.3 → anndata-0.12.4}/tests/data/archives/v0.11.4/adata.zarr.zip +0 -0
  169. {anndata-0.12.3 → anndata-0.12.4}/tests/data/archives/v0.11.4/readme.md +0 -0
  170. {anndata-0.12.3 → anndata-0.12.4}/tests/data/archives/v0.7.0/adata.h5ad +0 -0
  171. {anndata-0.12.3 → anndata-0.12.4}/tests/data/archives/v0.7.0/adata.zarr.zip +0 -0
  172. {anndata-0.12.3 → anndata-0.12.4}/tests/data/archives/v0.7.8/adata.h5ad +0 -0
  173. {anndata-0.12.3 → anndata-0.12.4}/tests/data/archives/v0.7.8/adata.zarr.zip +0 -0
  174. {anndata-0.12.3 → anndata-0.12.4}/tests/data/excel.xlsx +0 -0
  175. {anndata-0.12.3 → anndata-0.12.4}/tests/data/umi_tools.tsv.gz +0 -0
  176. {anndata-0.12.3 → anndata-0.12.4}/tests/lazy/conftest.py +0 -0
  177. {anndata-0.12.3 → anndata-0.12.4}/tests/lazy/test_read.py +0 -0
  178. {anndata-0.12.3 → anndata-0.12.4}/tests/lazy/test_write.py +0 -0
  179. {anndata-0.12.3 → anndata-0.12.4}/tests/test_anncollection.py +0 -0
  180. {anndata-0.12.3 → anndata-0.12.4}/tests/test_awkward.py +0 -0
  181. {anndata-0.12.3 → anndata-0.12.4}/tests/test_backed_dense.py +0 -0
  182. {anndata-0.12.3 → anndata-0.12.4}/tests/test_backed_sparse.py +0 -0
  183. {anndata-0.12.3 → anndata-0.12.4}/tests/test_base.py +0 -0
  184. {anndata-0.12.3 → anndata-0.12.4}/tests/test_concatenate.py +0 -0
  185. {anndata-0.12.3 → anndata-0.12.4}/tests/test_concatenate_disk.py +0 -0
  186. {anndata-0.12.3 → anndata-0.12.4}/tests/test_dask_view_mem.py +0 -0
  187. {anndata-0.12.3 → anndata-0.12.4}/tests/test_deprecations.py +0 -0
  188. {anndata-0.12.3 → anndata-0.12.4}/tests/test_extensions.py +0 -0
  189. {anndata-0.12.3 → anndata-0.12.4}/tests/test_get_vector.py +0 -0
  190. {anndata-0.12.3 → anndata-0.12.4}/tests/test_gpu.py +0 -0
  191. {anndata-0.12.3 → anndata-0.12.4}/tests/test_helpers.py +0 -0
  192. {anndata-0.12.3 → anndata-0.12.4}/tests/test_io_backwards_compat.py +0 -0
  193. {anndata-0.12.3 → anndata-0.12.4}/tests/test_io_conversion.py +0 -0
  194. {anndata-0.12.3 → anndata-0.12.4}/tests/test_io_dispatched.py +0 -0
  195. {anndata-0.12.3 → anndata-0.12.4}/tests/test_io_partial.py +0 -0
  196. {anndata-0.12.3 → anndata-0.12.4}/tests/test_io_utils.py +0 -0
  197. {anndata-0.12.3 → anndata-0.12.4}/tests/test_io_warnings.py +0 -0
  198. {anndata-0.12.3 → anndata-0.12.4}/tests/test_layers.py +0 -0
  199. {anndata-0.12.3 → anndata-0.12.4}/tests/test_obsmvarm.py +0 -0
  200. {anndata-0.12.3 → anndata-0.12.4}/tests/test_obspvarp.py +0 -0
  201. {anndata-0.12.3 → anndata-0.12.4}/tests/test_raw.py +0 -0
  202. {anndata-0.12.3 → anndata-0.12.4}/tests/test_repr.py +0 -0
  203. {anndata-0.12.3 → anndata-0.12.4}/tests/test_settings.py +0 -0
  204. {anndata-0.12.3 → anndata-0.12.4}/tests/test_structured_arrays.py +0 -0
  205. {anndata-0.12.3 → anndata-0.12.4}/tests/test_transpose.py +0 -0
  206. {anndata-0.12.3 → anndata-0.12.4}/tests/test_uns.py +0 -0
  207. {anndata-0.12.3 → anndata-0.12.4}/tests/test_utils.py +0 -0
  208. {anndata-0.12.3 → anndata-0.12.4}/tests/test_views.py +0 -0
  209. {anndata-0.12.3 → anndata-0.12.4}/tests/test_x.py +0 -0
  210. {anndata-0.12.3 → anndata-0.12.4}/tests/test_xarray.py +0 -0
@@ -43,7 +43,7 @@ jobs:
43
43
  strategy:
44
44
  matrix:
45
45
  env: ${{ fromJSON(needs.get-environments.outputs.envs) }}
46
- io_mark: ["zarr_io", "not zarr_io"]
46
+ io_mark: ["zarr_io", "not zarr_io", "dask_distributed"] # dask_distributed should not be run with -n auto as it uses a client with processes
47
47
  env: # environment variables for use in codecov’s env_vars tagging
48
48
  ENV_NAME: ${{ matrix.env.name }}
49
49
  IO_MARK: ${{ matrix.io_mark }}
@@ -72,7 +72,7 @@ jobs:
72
72
  env:
73
73
  COVERAGE_PROCESS_START: ${{ github.workspace }}/pyproject.toml
74
74
  run: |
75
- hatch run ${{ matrix.env.name }}:run-cov -v --color=yes -n auto --junitxml=test-data/test-results.xml -m "${{ matrix.io_mark }}" ${{ matrix.env.args }}
75
+ hatch run ${{ matrix.env.name }}:run-cov -v --color=yes ${{ matrix.io_mark != 'dask_distributed' && '-n auto' || '' }} --junitxml=test-data/test-results.xml -m "${{ matrix.io_mark }}" ${{ matrix.env.args }}
76
76
  hatch run ${{ matrix.env.name }}:cov-combine
77
77
  hatch run ${{ matrix.env.name }}:coverage xml
78
78
 
@@ -63,7 +63,7 @@ jobs:
63
63
  echo "max_python_version=$max_version" >> $GITHUB_ENV
64
64
 
65
65
  - name: Install UV
66
- uses: astral-sh/setup-uv@v6
66
+ uses: astral-sh/setup-uv@v6 # TODO: upgrade once cirun image supports node 24
67
67
  with:
68
68
  enable-cache: true
69
69
  python-version: ${{ env.max_python_version }}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: anndata
3
- Version: 0.12.3
3
+ Version: 0.12.4
4
4
  Summary: Annotated data.
5
5
  Project-URL: Documentation, https://anndata.readthedocs.io/
6
6
  Project-URL: Source, https://github.com/scverse/anndata
@@ -0,0 +1,112 @@
1
+ from __future__ import annotations
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from scipy import sparse
6
+
7
+ import anndata as ad
8
+
9
+ file_paths = {"sparse": "adata_sparse.h5ad"}
10
+
11
+
12
+ class BackedHDF5Indexing:
13
+ param_names = ("arr_type",)
14
+ params = ("sparse",)
15
+
16
+ def setup_cache(self):
17
+ X_sparse = sparse.random(
18
+ 10000,
19
+ 50000,
20
+ density=0.01,
21
+ format="csr",
22
+ random_state=np.random.default_rng(42),
23
+ )
24
+ for X, arr_type in [
25
+ (X_sparse, "sparse"),
26
+ ]:
27
+ n_obs, n_var = X.shape
28
+
29
+ # Create obs and var dataframes
30
+ obs = pd.DataFrame(
31
+ {
32
+ "cell_type": pd.Categorical(
33
+ np.random.choice(["TypeA", "TypeB", "TypeC"], n_obs)
34
+ ),
35
+ "total_counts": np.random.randint(1000, 5000, n_obs),
36
+ },
37
+ index=[f"cell_{i}" for i in range(n_obs)],
38
+ )
39
+
40
+ var = pd.DataFrame(
41
+ {
42
+ "gene_name": [f"gene_{i}" for i in range(n_var)],
43
+ },
44
+ index=[f"ENSG_{i:08d}" for i in range(n_var)],
45
+ )
46
+
47
+ # Create AnnData object and save to HDF5
48
+ adata = ad.AnnData(X=X, obs=obs, var=var)
49
+
50
+ # Create temporary file
51
+ adata.write_h5ad(file_paths[arr_type])
52
+
53
+ def setup(self, arr_type):
54
+ # Open as backed
55
+ self.adata_backed = ad.read_h5ad(file_paths[arr_type], backed="r")
56
+ self.n_obs, self.n_var = self.adata_backed.shape
57
+ # Prepare indices for duplicate index testing
58
+ self.obs_idx_with_dupes = np.array([0, 1, 0, 2, 1] * (self.n_obs // 100 + 1))[
59
+ : (self.n_obs // 10)
60
+ ]
61
+ self.var_idx_with_dupes = np.array([0, 1, 2, 0, 3] * (self.n_var // 100 + 1))[
62
+ : (self.n_var // 10)
63
+ ]
64
+ self.obs_idx_no_dupes = np.arange(0, self.n_obs, 10)
65
+ self.var_idx_no_dupes = np.arange(0, self.n_var, 10)
66
+
67
+ def time_slice_obs(self, *_):
68
+ """Time slicing observations from backed HDF5"""
69
+ self.adata_backed[0 : (self.n_obs // 2), :]
70
+
71
+ def time_slice_obs_to_memory(self, *_):
72
+ """Time slicing observations from backed HDF5"""
73
+ self.adata_backed[0 : (self.n_obs // 2), :].to_memory()
74
+
75
+ def peakmem_slice_obs(self, *_):
76
+ """Peak memory for slicing observations from backed HDF5"""
77
+ self.adata_backed[0 : (self.n_obs // 2), :]
78
+
79
+ def time_fancy_index_no_dupes(self, *_):
80
+ """Time fancy indexing without duplicates"""
81
+ self.adata_backed[self.obs_idx_no_dupes, self.var_idx_no_dupes]
82
+
83
+ def peakmem_fancy_index_no_dupes(self, *_):
84
+ """Peak memory for fancy indexing without duplicates"""
85
+ self.adata_backed[self.obs_idx_no_dupes, self.var_idx_no_dupes]
86
+
87
+ def time_fancy_index_no_dupes_to_memory(self, *_):
88
+ """Time fancy indexing without duplicates"""
89
+ self.adata_backed[self.obs_idx_no_dupes, self.var_idx_no_dupes].to_memory()
90
+
91
+ def time_index_with_dupes_obs(self, *_):
92
+ """Time fancy indexing with duplicate observation indices"""
93
+ self.adata_backed[self.obs_idx_with_dupes, :]
94
+
95
+ def peakmem_index_with_dupes_obs(self, *_):
96
+ """Peak memory for fancy indexing with duplicate observation indices"""
97
+ self.adata_backed[self.obs_idx_with_dupes, :]
98
+
99
+ def time_to_memory_subset(self, *_):
100
+ """Time converting subset to memory"""
101
+ subset = self.adata_backed[0 : (self.n_obs // 4), 0 : (self.n_var // 4)]
102
+ subset.to_memory()
103
+
104
+ def peakmem_to_memory_subset(self, *_):
105
+ """Peak memory for converting subset to memory"""
106
+ subset = self.adata_backed[0 : (self.n_obs // 4), 0 : (self.n_var // 4)]
107
+ subset.to_memory()
108
+
109
+ def teardown(self, *_):
110
+ """Clean up temporary files"""
111
+ if hasattr(self, "adata_backed"):
112
+ self.adata_backed.file.close()
@@ -1,7 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- import tempfile
4
- from pathlib import Path
5
3
  from typing import TYPE_CHECKING
6
4
 
7
5
  import h5py
@@ -12,35 +10,39 @@ import zarr
12
10
  import anndata as ad
13
11
 
14
12
  if TYPE_CHECKING:
15
- from collections.abc import Callable
13
+ from typing import Literal
16
14
 
17
15
 
18
16
  class Dataset2D:
19
- param_names = ("gen_store", "chunks")
17
+ param_names = ("store_type", "chunks")
20
18
  params = (
21
- (
22
- lambda: h5py.File(Path(tempfile.mkdtemp()) / "data.h5ad", mode="w"),
23
- lambda: zarr.open(
24
- Path(tempfile.mkdtemp()) / "data.zarr", mode="w", zarr_version=2
25
- ),
26
- ),
19
+ ("zarr", "h5ad"),
27
20
  ((-1,), None),
28
21
  )
29
22
 
30
- def setup(
31
- self, gen_store: Callable[[], zarr.Group | h5py.File], chunks: None | tuple[int]
32
- ):
33
- self.n_obs = 100000
23
+ def setup_cache(self):
24
+ n_obs = 100000
34
25
  df = pd.DataFrame(
35
26
  {
36
- "a": pd.Categorical(np.array(["a"] * self.n_obs)),
37
- "b": np.arange(self.n_obs),
27
+ "a": pd.Categorical(np.array(["a"] * n_obs)),
28
+ "b": np.arange(n_obs),
38
29
  },
39
- index=[f"cell{i}" for i in range(self.n_obs)],
30
+ index=[f"cell{i}" for i in range(n_obs)],
31
+ )
32
+ for store in [
33
+ h5py.File("data.h5ad", mode="w"),
34
+ zarr.open("data.zarr", mode="w", zarr_version=2),
35
+ ]:
36
+ ad.io.write_elem(store, "obs", df)
37
+
38
+ def setup(self, store_type: Literal["zarr", "h5ad"], chunks: None | tuple[int]):
39
+ store = (
40
+ h5py.File("data.h5ad", mode="r")
41
+ if store_type == "h5ad"
42
+ else zarr.open("data.zarr")
40
43
  )
41
- store = gen_store()
42
- ad.io.write_elem(store, "obs", df)
43
44
  self.ds = ad.experimental.read_elem_lazy(store["obs"], chunks=chunks)
45
+ self.n_obs = self.ds.shape[0]
44
46
 
45
47
  def time_getitem_slice(self, *_):
46
48
  self.ds.iloc[0 : (self.n_obs // 2)].to_memory()
@@ -38,52 +38,15 @@ from .utils import get_actualsize, get_peak_mem, sedate
38
38
 
39
39
  PBMC_3K_URL = "https://falexwolf.de/data/pbmc3k_raw.h5ad"
40
40
 
41
- # PBMC_3K_PATH = Path(__file__).parent / "data/pbmc3k_raw.h5ad"
42
- # PBMC_REDUCED_PATH = Path(__file__).parent / "10x_pbmc68k_reduced.h5ad"
43
- # BM_43K_CSR_PATH = Path(__file__).parent.parent / "datasets/BM2_43k-cells.h5ad"
44
- # BM_43K_CSC_PATH = Path(__file__).parent.parent / "datasets/BM2_43k-cells_CSC.h5ad"
45
-
46
-
47
- # class ZarrReadSuite:
48
- # params = []
49
- # param_names = ["input_url"]
50
-
51
- # def setup(self, input_url):
52
- # self.filepath = pooch.retrieve(url=input_url, known_hash=None)
53
-
54
- # def time_read_full(self, input_url):
55
- # anndata.read_zarr(self.filepath)
56
-
57
- # def peakmem_read_full(self, input_url):
58
- # anndata.read_zarr(self.filepath)
59
-
60
- # def mem_readfull_object(self, input_url):
61
- # return anndata.read_zarr(self.filepath)
62
-
63
- # def track_read_full_memratio(self, input_url):
64
- # mem_recording = memory_usage(
65
- # (sedate(anndata.read_zarr, 0.005), (self.filepath,)), interval=0.001
66
- # )
67
- # adata = anndata.read_zarr(self.filepath)
68
- # base_size = mem_recording[-1] - mem_recording[0]
69
- # print(np.max(mem_recording) - np.min(mem_recording))
70
- # print(base_size)
71
- # return (np.max(mem_recording) - np.min(mem_recording)) / base_size
72
-
73
- # def peakmem_read_backed(self, input_url):
74
- # anndata.read_zarr(self.filepath, backed="r")
75
-
76
- # def mem_read_backed_object(self, input_url):
77
- # return anndata.read_zarr(self.filepath, backed="r")
78
-
79
41
 
80
42
  class H5ADInMemorySizeSuite:
81
- _urls = MappingProxyType(dict(pbmc3k=PBMC_3K_URL))
82
- params = _urls.keys()
83
- param_names = ("input_data",)
43
+ filepath = "pbmc_in_mem.h5ad"
84
44
 
85
- def setup(self, input_data: str):
86
- self.filepath = pooch.retrieve(url=self._urls[input_data], known_hash=None)
45
+ def setup_cache(self):
46
+ # Need to specify path because the working directory is special for asv
47
+ pooch.retrieve(
48
+ url=PBMC_3K_URL, known_hash=None, path=Path.cwd(), fname=self.filepath
49
+ )
87
50
 
88
51
  def track_in_memory_size(self, *_):
89
52
  adata = anndata.read_h5ad(self.filepath)
@@ -99,12 +62,13 @@ class H5ADInMemorySizeSuite:
99
62
 
100
63
 
101
64
  class H5ADReadSuite:
102
- _urls = MappingProxyType(dict(pbmc3k=PBMC_3K_URL))
103
- params = _urls.keys()
104
- param_names = ("input_data",)
65
+ filepath = "pbmc_read.h5ad"
105
66
 
106
- def setup(self, input_data: str):
107
- self.filepath = pooch.retrieve(url=self._urls[input_data], known_hash=None)
67
+ def setup_cache(self):
68
+ # Need to specify path because the working directory is special for asv
69
+ pooch.retrieve(
70
+ url=PBMC_3K_URL, known_hash=None, path=Path.cwd(), fname=self.filepath
71
+ )
108
72
 
109
73
  def time_read_full(self, *_):
110
74
  anndata.read_h5ad(self.filepath)
@@ -21,7 +21,7 @@ def make_alternating_mask(n):
21
21
 
22
22
 
23
23
  class SparseCSRContiguousSlice:
24
- _slices = MappingProxyType({
24
+ _indexers = MappingProxyType({
25
25
  "0:1000": slice(0, 1000),
26
26
  "0:9000": slice(0, 9000),
27
27
  ":9000:-1": slice(None, 9000, -1),
@@ -31,42 +31,49 @@ class SparseCSRContiguousSlice:
31
31
  "first": 0,
32
32
  "alternating": make_alternating_mask(10),
33
33
  })
34
+ filepath = "data.zarr"
34
35
  params = (
35
- [
36
- (10_000, 10_000),
37
- # (10_000, 500)
38
- ],
39
- _slices.keys(),
36
+ list(_indexers.keys()),
40
37
  [True, False],
41
38
  )
42
- param_names = ("shape", "slice", "use_dask")
39
+ param_names = (
40
+ "index",
41
+ "use_dask",
42
+ )
43
43
 
44
- def setup(self, shape: tuple[int, int], slice: str, use_dask: bool): # noqa: FBT001
44
+ def setup_cache(self):
45
45
  X = sparse.random(
46
- *shape, density=0.01, format="csr", random_state=np.random.default_rng(42)
46
+ 10_000,
47
+ 10_000,
48
+ density=0.01,
49
+ format="csr",
50
+ random_state=np.random.default_rng(42),
47
51
  )
48
- self.slice = self._slices[slice]
49
- g = zarr.group()
52
+ g = zarr.group(self.filepath)
50
53
  write_elem(g, "X", X)
54
+
55
+ def setup(self, index: str, use_dask: bool): # noqa: FBT001
56
+ g = zarr.open(self.filepath)
51
57
  self.x = read_elem_lazy(g["X"]) if use_dask else sparse_dataset(g["X"])
52
58
  self.adata = AnnData(self.x)
59
+ self.index = self._indexers[index]
53
60
 
54
61
  def time_getitem(self, *_):
55
- res = self.x[self.slice]
62
+ res = self.x[self.index]
56
63
  if isinstance(res, DaskArray):
57
64
  res.compute()
58
65
 
59
66
  def peakmem_getitem(self, *_):
60
- res = self.x[self.slice]
67
+ res = self.x[self.index]
61
68
  if isinstance(res, DaskArray):
62
69
  res.compute()
63
70
 
64
71
  def time_getitem_adata(self, *_):
65
- res = self.adata[self.slice]
72
+ res = self.adata[self.index]
66
73
  if isinstance(res, DaskArray):
67
74
  res.compute()
68
75
 
69
76
  def peakmem_getitem_adata(self, *_):
70
- res = self.adata[self.slice]
77
+ res = self.adata[self.index]
71
78
  if isinstance(res, DaskArray):
72
79
  res.compute()
@@ -95,13 +95,31 @@ def gen_indexer(adata, dim, index_kind, ratio):
95
95
 
96
96
  def gen_adata(n_obs, n_var, attr_set):
97
97
  if "X-csr" in attr_set:
98
- X = sparse.random(n_obs, n_var, density=0.1, format="csr")
98
+ X = sparse.random(
99
+ n_obs,
100
+ n_var,
101
+ density=0.1,
102
+ format="csr",
103
+ random_state=np.random.default_rng(42),
104
+ )
99
105
  elif "X-dense" in attr_set:
100
- X = sparse.random(n_obs, n_var, density=0.1, format="csr")
106
+ X = sparse.random(
107
+ n_obs,
108
+ n_var,
109
+ density=0.1,
110
+ format="csr",
111
+ random_state=np.random.default_rng(42),
112
+ )
101
113
  X = X.toarray()
102
114
  else:
103
115
  # TODO: There's probably a better way to do this
104
- X = sparse.random(n_obs, n_var, density=0, format="csr")
116
+ X = sparse.random(
117
+ n_obs,
118
+ n_var,
119
+ density=0,
120
+ format="csr",
121
+ random_state=np.random.default_rng(42),
122
+ )
105
123
  adata = AnnData(X)
106
124
  if "obs,var" in attr_set:
107
125
  adata.obs = pd.DataFrame(
@@ -0,0 +1,4 @@
1
+ (v0.12.4)=
2
+ ### 0.12.4 {small}`2025-10-27`
3
+
4
+ No significant changes.
@@ -0,0 +1 @@
1
+ {func}`dask.array.store` was producing corrupted data with zarr v3 + distributed scheduler + a lock (which we used internally): see {ref}`dask/dask#12109`. Thus dense arrays were potentially being stored with corrupted data. The solution is to remove the lock for newer versions of dask but without the lock in older versions, it is impossible to store the data. Thus versions of dask older than `2025.4.0` will not be supported for writing dense data. {user}`ilan-gold`
@@ -24,9 +24,13 @@ overrides.matrix.deps.env-vars = [
24
24
  { if = [ "min" ], key = "UV_CONSTRAINT", value = "ci/constraints.txt ci/min-deps.txt" },
25
25
  ]
26
26
  overrides.matrix.deps.pre-install-commands = [
27
- { if = [ "min" ], value = "uv run ci/scripts/min-deps.py pyproject.toml --all-extras -o ci/min-deps.txt" },
28
- # To prevent situations like https://github.com/pydata/xarray/issues/10419 going forward
29
- { if = [ "pre" ], value = "echo xarray @ git+https://github.com/pydata/xarray.git > ci/pre-deps.txt" },
27
+ { if = [
28
+ "min",
29
+ ], value = "uv run ci/scripts/min-deps.py pyproject.toml --all-extras -o ci/min-deps.txt" },
30
+ # To prevent situations like https://github.com/pydata/xarray/issues/10419 going forward, and test against zarr as well
31
+ { if = [
32
+ "pre",
33
+ ], value = "echo 'xarray @ git+https://github.com/pydata/xarray.git\nzarr @ git+https://github.com/zarr-developers/zarr-python.git' > ci/pre-deps.txt" },
30
34
 
31
35
  ]
32
36
  overrides.matrix.deps.python = [
@@ -174,7 +174,11 @@ testpaths = [
174
174
  ]
175
175
  # For some reason this effects how logging is shown when tests are run
176
176
  xfail_strict = true
177
- markers = [ "gpu: mark test to run on GPU", "zarr_io: mark tests that involve zarr io" ]
177
+ markers = [
178
+ "gpu: mark test to run on GPU",
179
+ "zarr_io: mark tests that involve zarr io",
180
+ "dask_distributed: tests that need a distributed client with multiple processes",
181
+ ]
178
182
 
179
183
  [tool.ruff]
180
184
  src = [ "src" ]
@@ -78,6 +78,13 @@ def _gen_dataframe_df(
78
78
  attr: Literal["obs", "var"],
79
79
  length: int | None = None,
80
80
  ):
81
+ if isinstance(anno.index, pd.MultiIndex):
82
+ msg = (
83
+ "pandas.MultiIndex not supported as index for obs or var on declaration.\n\
84
+ You can set `obs_names` manually although most operations after will error or convert to str.\n\
85
+ This behavior will likely be clarified in a future breaking release."
86
+ )
87
+ raise ValueError(msg)
81
88
  if length is not None and length != len(anno):
82
89
  raise _mk_df_error(source, attr, length, len(anno))
83
90
  anno = anno.copy(deep=False)
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  from collections.abc import Iterable, Sequence
4
4
  from functools import singledispatch
5
5
  from itertools import repeat
6
- from typing import TYPE_CHECKING
6
+ from typing import TYPE_CHECKING, cast, overload
7
7
 
8
8
  import h5py
9
9
  import numpy as np
@@ -14,6 +14,8 @@ from ..compat import AwkArray, CSArray, CSMatrix, DaskArray, XDataArray
14
14
  from .xarray import Dataset2D
15
15
 
16
16
  if TYPE_CHECKING:
17
+ from numpy.typing import NDArray
18
+
17
19
  from ..compat import Index, Index1D, Index1DNorm
18
20
 
19
21
 
@@ -161,7 +163,10 @@ def unpack_index(index: Index) -> tuple[Index1D, Index1D]:
161
163
 
162
164
 
163
165
  @singledispatch
164
- def _subset(a: np.ndarray | pd.DataFrame, subset_idx: Index):
166
+ def _subset(
167
+ a: np.ndarray | pd.DataFrame,
168
+ subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm],
169
+ ):
165
170
  # Select as combination of indexes, not coordinates
166
171
  # Correcting for indexing behaviour of np.ndarray
167
172
  if all(isinstance(x, Iterable) for x in subset_idx):
@@ -170,7 +175,9 @@ def _subset(a: np.ndarray | pd.DataFrame, subset_idx: Index):
170
175
 
171
176
 
172
177
  @_subset.register(DaskArray)
173
- def _subset_dask(a: DaskArray, subset_idx: Index):
178
+ def _subset_dask(
179
+ a: DaskArray, subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm]
180
+ ):
174
181
  if len(subset_idx) > 1 and all(isinstance(x, Iterable) for x in subset_idx):
175
182
  if issparse(a._meta) and a._meta.format == "csc":
176
183
  return a[:, subset_idx[1]][subset_idx[0], :]
@@ -180,24 +187,32 @@ def _subset_dask(a: DaskArray, subset_idx: Index):
180
187
 
181
188
  @_subset.register(CSMatrix)
182
189
  @_subset.register(CSArray)
183
- def _subset_sparse(a: CSMatrix | CSArray, subset_idx: Index):
190
+ def _subset_sparse(
191
+ a: CSMatrix | CSArray,
192
+ subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm],
193
+ ):
184
194
  # Correcting for indexing behaviour of sparse.spmatrix
185
195
  if len(subset_idx) > 1 and all(isinstance(x, Iterable) for x in subset_idx):
186
196
  first_idx = subset_idx[0]
187
197
  if issubclass(first_idx.dtype.type, np.bool_):
188
- first_idx = np.where(first_idx)[0]
198
+ first_idx = np.flatnonzero(first_idx)
189
199
  subset_idx = (first_idx.reshape(-1, 1), *subset_idx[1:])
190
200
  return a[subset_idx]
191
201
 
192
202
 
193
203
  @_subset.register(pd.DataFrame)
194
204
  @_subset.register(Dataset2D)
195
- def _subset_df(df: pd.DataFrame | Dataset2D, subset_idx: Index):
205
+ def _subset_df(
206
+ df: pd.DataFrame | Dataset2D,
207
+ subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm],
208
+ ):
196
209
  return df.iloc[subset_idx]
197
210
 
198
211
 
199
212
  @_subset.register(AwkArray)
200
- def _subset_awkarray(a: AwkArray, subset_idx: Index):
213
+ def _subset_awkarray(
214
+ a: AwkArray, subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm]
215
+ ):
201
216
  if all(isinstance(x, Iterable) for x in subset_idx):
202
217
  subset_idx = np.ix_(*subset_idx)
203
218
  return a[subset_idx]
@@ -205,23 +220,121 @@ def _subset_awkarray(a: AwkArray, subset_idx: Index):
205
220
 
206
221
  # Registration for SparseDataset occurs in sparse_dataset.py
207
222
  @_subset.register(h5py.Dataset)
208
- def _subset_dataset(d: h5py.Dataset, subset_idx: Index):
209
- if not isinstance(subset_idx, tuple):
210
- subset_idx = (subset_idx,)
211
- ordered = list(subset_idx)
212
- rev_order = [slice(None) for _ in range(len(subset_idx))]
213
- for axis, axis_idx in enumerate(ordered.copy()):
214
- if isinstance(axis_idx, np.ndarray):
215
- if axis_idx.dtype == bool:
216
- axis_idx = np.where(axis_idx)[0]
217
- order = np.argsort(axis_idx)
218
- ordered[axis] = axis_idx[order]
219
- rev_order[axis] = np.argsort(order)
223
+ def _subset_dataset(
224
+ d: h5py.Dataset, subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm]
225
+ ):
226
+ order: tuple[NDArray[np.integer] | slice, ...]
227
+ inv_order: tuple[NDArray[np.integer] | slice, ...]
228
+ order, inv_order = zip(*map(_index_order_and_inverse, subset_idx), strict=True)
229
+ # check for duplicates or multi-dimensional fancy indexing
230
+ array_dims = [i for i in order if isinstance(i, np.ndarray)]
231
+ has_duplicates = any(len(np.unique(i)) != len(i) for i in array_dims)
232
+ # Use safe indexing if there are duplicates OR multiple array dimensions
233
+ # (h5py doesn't support multi-dimensional fancy indexing natively)
234
+ if has_duplicates or len(array_dims) > 1:
235
+ # For multi-dimensional indexing, bypass the sorting logic and use original indices
236
+ return _safe_fancy_index_h5py(d, subset_idx)
220
237
  # from hdf5, then to real order
221
- return d[tuple(ordered)][tuple(rev_order)]
222
-
223
-
224
- def make_slice(idx, dimidx, n=2):
238
+ return d[order][inv_order]
239
+
240
+
241
+ @overload
242
+ def _index_order_and_inverse(
243
+ axis_idx: NDArray[np.integer] | NDArray[np.bool_],
244
+ ) -> tuple[NDArray[np.integer], NDArray[np.integer]]: ...
245
+ @overload
246
+ def _index_order_and_inverse(axis_idx: slice) -> tuple[slice, slice]: ...
247
+ def _index_order_and_inverse(
248
+ axis_idx: Index1DNorm,
249
+ ) -> tuple[Index1DNorm, NDArray[np.integer] | slice]:
250
+ """Order and get inverse index array."""
251
+ if not isinstance(axis_idx, np.ndarray):
252
+ return axis_idx, slice(None)
253
+ if axis_idx.dtype == bool:
254
+ axis_idx = np.flatnonzero(axis_idx)
255
+ order = np.argsort(axis_idx)
256
+ return axis_idx[order], np.argsort(order)
257
+
258
+
259
+ @overload
260
+ def _process_index_for_h5py(
261
+ idx: NDArray[np.integer] | NDArray[np.bool_],
262
+ ) -> tuple[NDArray[np.integer], NDArray[np.integer]]: ...
263
+ @overload
264
+ def _process_index_for_h5py(idx: slice) -> tuple[slice, None]: ...
265
+ def _process_index_for_h5py(
266
+ idx: Index1DNorm,
267
+ ) -> tuple[Index1DNorm, NDArray[np.integer] | None]:
268
+ """Process a single index for h5py compatibility, handling sorting and duplicates."""
269
+ if not isinstance(idx, np.ndarray):
270
+ # Not an array (slice, integer, list) - no special processing needed
271
+ return idx, None
272
+
273
+ if idx.dtype == bool:
274
+ idx = np.flatnonzero(idx)
275
+
276
+ # For h5py fancy indexing, we need sorted indices
277
+ # But we also need to track how to reverse the sorting
278
+ unique, inverse = np.unique(idx, return_inverse=True)
279
+ return (
280
+ # Has duplicates - use unique + inverse mapping approach
281
+ (unique, inverse)
282
+ if len(unique) != len(idx)
283
+ # No duplicates - just sort and track reverse mapping
284
+ else _index_order_and_inverse(idx)
285
+ )
286
+
287
+
288
+ def _safe_fancy_index_h5py(
289
+ dataset: h5py.Dataset,
290
+ subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm],
291
+ ) -> h5py.Dataset:
292
+ # Handle multi-dimensional indexing of h5py dataset
293
+ # This avoids h5py's limitation with multi-dimensional fancy indexing
294
+ # without loading the entire dataset into memory
295
+
296
+ # Convert boolean arrays to integer arrays and handle sorting for h5py
297
+ processed_indices: tuple[NDArray[np.integer] | slice, ...]
298
+ reverse_indices: tuple[NDArray[np.integer] | None, ...]
299
+ processed_indices, reverse_indices = zip(
300
+ *map(_process_index_for_h5py, subset_idx), strict=True
301
+ )
302
+
303
+ # First find the index that reduces the size of the dataset the most
304
+ i_min = np.argmin([
305
+ _get_index_size(inds, dataset.shape[i]) / dataset.shape[i]
306
+ for i, inds in enumerate(processed_indices)
307
+ ])
308
+
309
+ # Apply the most selective index first to h5py dataset
310
+ first_index = [slice(None)] * len(processed_indices)
311
+ first_index[i_min] = processed_indices[i_min]
312
+ in_memory_array = cast("np.ndarray", dataset[tuple(first_index)])
313
+
314
+ # Apply remaining indices to the numpy array
315
+ remaining_indices = list(processed_indices)
316
+ remaining_indices[i_min] = slice(None) # Already applied
317
+ result = in_memory_array[tuple(remaining_indices)]
318
+
319
+ # Now apply reverse mappings to get the original order
320
+ for dim, reverse_map in enumerate(reverse_indices):
321
+ if reverse_map is not None:
322
+ result = result.take(reverse_map, axis=dim)
323
+
324
+ return result
325
+
326
+
327
+ def _get_index_size(idx: Index1DNorm, dim_size: int) -> int:
328
+ """Get size for any index type."""
329
+ if isinstance(idx, slice):
330
+ return len(range(*idx.indices(dim_size)))
331
+ elif isinstance(idx, int):
332
+ return 1
333
+ else: # For other types, try to get length
334
+ return len(idx)
335
+
336
+
337
+ def make_slice(idx, dimidx: int, n: int = 2) -> tuple[slice, ...]:
225
338
  mut = list(repeat(slice(None), n))
226
339
  mut[dimidx] = idx
227
340
  return tuple(mut)