anndata 0.12.4__py3-none-any.whl → 0.12.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
anndata/_core/anndata.py CHANGED
@@ -964,7 +964,11 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): # noqa: PLW1641
964
964
  @property
965
965
  def isbacked(self) -> bool:
966
966
  """`True` if object is backed on disk, `False` otherwise."""
967
- return self.filename is not None
967
+ is_filename_none = self.filename is not None
968
+ is_x_none = (
969
+ getattr(self._adata_ref if self._is_view else self, "_X", None) is None
970
+ )
971
+ return is_filename_none and is_x_none
968
972
 
969
973
  @property
970
974
  def is_view(self) -> bool:
@@ -1418,7 +1422,7 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): # noqa: PLW1641
1418
1422
 
1419
1423
  @old_positionals("copy")
1420
1424
  def to_memory(self, *, copy: bool = False) -> AnnData:
1421
- """Return a new AnnData object with all backed arrays loaded into memory.
1425
+ """Return a new AnnData object with all non-in-memory arrays loaded into memory.
1422
1426
 
1423
1427
  Params
1424
1428
  ------
@@ -27,15 +27,24 @@ class AnnDataFileManager:
27
27
  def __init__(
28
28
  self,
29
29
  adata: anndata.AnnData,
30
- filename: PathLike[str] | str | None = None,
31
- filemode: Literal["r", "r+"] | None = None,
30
+ file_name: PathLike[str] | str | None = None,
31
+ file_mode: Literal["r", "r+"] | None = None,
32
+ file_obj: h5py.File | None = None,
32
33
  ):
34
+ if file_obj is not None and (file_name is not None or file_mode is not None):
35
+ msg = "Cannot provide both a h5py.File and the name and/or mode arguments to constructor"
36
+ raise ValueError(msg)
33
37
  self._adata_ref = weakref.ref(adata)
34
- self.filename = filename
35
- self._filemode = filemode
36
- self._file = None
37
- if filename:
38
- self.open()
38
+ if file_obj is not None:
39
+ self.filename = filename(file_obj)
40
+ self._filemode = file_obj.mode
41
+ self._file = file_obj
42
+ else:
43
+ self.filename = file_name
44
+ self._filemode = file_mode
45
+ self._file = file_obj
46
+ if file_name and not self._file:
47
+ self.open()
39
48
 
40
49
  def __getstate__(self):
41
50
  state = self.__dict__.copy()
@@ -82,16 +91,16 @@ class AnnDataFileManager:
82
91
  return self._filename
83
92
 
84
93
  @filename.setter
85
- def filename(self, filename: PathLike[str] | str | None):
86
- self._filename = None if filename is None else Path(filename)
94
+ def filename(self, file_name: PathLike[str] | str | None):
95
+ self._filename = None if file_name is None else Path(file_name)
87
96
 
88
97
  def open(
89
98
  self,
90
- filename: PathLike[str] | str | None = None,
99
+ file_name: PathLike[str] | str | None = None,
91
100
  filemode: Literal["r", "r+"] | None = None,
92
101
  ):
93
- if filename is not None:
94
- self.filename = filename
102
+ if file_name is not None:
103
+ self.filename = file_name
95
104
  if filemode is not None:
96
105
  self._filemode = filemode
97
106
  if self.filename is None:
anndata/_core/merge.py CHANGED
@@ -4,6 +4,7 @@ Code for merging/ concatenating AnnData objects.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
+ import uuid
7
8
  from collections import OrderedDict
8
9
  from collections.abc import Callable, Mapping, MutableSet
9
10
  from functools import partial, reduce, singledispatch
@@ -1251,6 +1252,7 @@ def make_dask_col_from_extension_dtype(
1251
1252
  chunks=chunk_size,
1252
1253
  meta=np.array([], dtype=dtype),
1253
1254
  dtype=dtype,
1255
+ name=f"{uuid.uuid4()}/{base_path_or_zarr_group}/{elem_name}-{dtype}",
1254
1256
  )
1255
1257
 
1256
1258
  return da.from_array(col.values, chunks=-1) # in-memory
@@ -25,7 +25,7 @@ from anndata.compat import (
25
25
  ZarrGroup,
26
26
  )
27
27
 
28
- from .registry import _LAZY_REGISTRY, IOSpec
28
+ from .registry import _LAZY_REGISTRY, IOSpec, read_elem
29
29
 
30
30
  if TYPE_CHECKING:
31
31
  from collections.abc import Generator, Mapping, Sequence
@@ -195,6 +195,9 @@ def resolve_chunks(
195
195
  return elem.chunks
196
196
 
197
197
 
198
+ # TODO: `map_blocks` of a string array in h5py is so insanely slow on benchmarking that in the case someone has
199
+ # a pure string annotation (not categoricals! or nullables strings!), it's probably better to pay the memory penalty.
200
+ # In the long run, it might be good to figure out what exactly is going on here but for now, this will do.
198
201
  @_LAZY_REGISTRY.register_read(H5Array, IOSpec("string-array", "0.2.0"))
199
202
  def read_h5_string_array(
200
203
  elem: H5Array,
@@ -204,10 +207,8 @@ def read_h5_string_array(
204
207
  ) -> DaskArray:
205
208
  import dask.array as da
206
209
 
207
- from anndata._io.h5ad import read_dataset
208
-
209
210
  chunks = resolve_chunks(elem, chunks, tuple(elem.shape))
210
- return da.from_array(read_dataset(elem), chunks=chunks)
211
+ return da.from_array(read_elem(elem), chunks=chunks)
211
212
 
212
213
 
213
214
  @_LAZY_REGISTRY.register_read(H5Array, IOSpec("array", "0.2.0"))
@@ -303,7 +304,7 @@ def read_dataframe(
303
304
  # which is used below as well.
304
305
  if not use_range_index:
305
306
  dim_name = elem.attrs["_index"]
306
- # no sense in reading this in multiple times
307
+ # no sense in reading this in multiple times since xarray requires an in-memory index
307
308
  index = elem_dict[dim_name].compute()
308
309
  else:
309
310
  dim_name = DUMMY_RANGE_INDEX_KEY
@@ -102,6 +102,12 @@ def zarr_v3_compressor_compat(dataset_kwargs) -> dict:
102
102
  return dataset_kwargs
103
103
 
104
104
 
105
+ def zarr_v3_sharding(dataset_kwargs) -> dict:
106
+ if "shards" not in dataset_kwargs and ad.settings.auto_shard_zarr_v3:
107
+ dataset_kwargs = {**dataset_kwargs, "shards": "auto"}
108
+ return dataset_kwargs
109
+
110
+
105
111
  def _to_cpu_mem_wrapper(write_func):
106
112
  """
107
113
  Wrapper to bring cupy types into cpu memory before writing.
@@ -432,6 +438,7 @@ def write_basic(
432
438
  f.create_dataset(k, data=elem, shape=elem.shape, dtype=dtype, **dataset_kwargs)
433
439
  else:
434
440
  dataset_kwargs = zarr_v3_compressor_compat(dataset_kwargs)
441
+ dataset_kwargs = zarr_v3_sharding(dataset_kwargs)
435
442
  f.create_array(k, shape=elem.shape, dtype=dtype, **dataset_kwargs)
436
443
  # see https://github.com/zarr-developers/zarr-python/discussions/2712
437
444
  if isinstance(elem, ZarrArray | H5Array):
@@ -506,26 +513,17 @@ def write_basic_dask_dask_dense(
506
513
  dataset_kwargs: Mapping[str, Any] = MappingProxyType({}),
507
514
  ):
508
515
  import dask.array as da
509
- import dask.config as dc
510
-
511
- is_distributed = dc.get("scheduler", None) == "dask.distributed"
512
- is_h5 = isinstance(f, H5Group)
513
- if is_distributed and is_h5:
514
- msg = "Cannot write dask arrays to hdf5 when using distributed scheduler"
515
- raise ValueError(msg)
516
516
 
517
517
  dataset_kwargs = dataset_kwargs.copy()
518
+ is_h5 = isinstance(f, H5Group)
518
519
  if not is_h5:
519
520
  dataset_kwargs = zarr_v3_compressor_compat(dataset_kwargs)
520
- # See https://github.com/dask/dask/issues/12109
521
- if Version(version("dask")) < Version("2025.4.0") and is_distributed:
522
- msg = "Writing dense data with a distributed scheduler to zarr could produce corrupted data with a Lock and will error without one when dask is older than 2025.4.0: https://github.com/dask/dask/issues/12109"
523
- raise RuntimeError(msg)
521
+ dataset_kwargs = zarr_v3_sharding(dataset_kwargs)
524
522
  if is_zarr_v2() or is_h5:
525
523
  g = f.require_dataset(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs)
526
524
  else:
527
525
  g = f.require_array(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs)
528
- da.store(elem, g)
526
+ da.store(elem, g, scheduler="threads")
529
527
 
530
528
 
531
529
  @_REGISTRY.register_read(H5Array, IOSpec("array", "0.2.0"))
@@ -626,6 +624,7 @@ def write_vlen_string_array_zarr(
626
624
  filters, fill_value = None, None
627
625
  if f.metadata.zarr_format == 2:
628
626
  filters, fill_value = [VLenUTF8()], ""
627
+ dataset_kwargs = zarr_v3_sharding(dataset_kwargs)
629
628
  f.create_array(
630
629
  k,
631
630
  shape=elem.shape,
@@ -694,6 +693,9 @@ def write_recarray_zarr(
694
693
  else:
695
694
  dataset_kwargs = dataset_kwargs.copy()
696
695
  dataset_kwargs = zarr_v3_compressor_compat(dataset_kwargs)
696
+ # https://github.com/zarr-developers/zarr-python/issues/3546
697
+ # if "shards" not in dataset_kwargs and ad.settings.auto_shard_zarr_v3:
698
+ # dataset_kwargs = {**dataset_kwargs, "shards": "auto"}
697
699
  f.create_array(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs)
698
700
  f[k][...] = elem
699
701
 
@@ -730,6 +732,7 @@ def write_sparse_compressed(
730
732
  attr_name, data=attr, shape=attr.shape, dtype=dtype, **dataset_kwargs
731
733
  )
732
734
  else:
735
+ dataset_kwargs = zarr_v3_sharding(dataset_kwargs)
733
736
  arr = g.create_array(
734
737
  attr_name, shape=attr.shape, dtype=dtype, **dataset_kwargs
735
738
  )
anndata/_settings.py CHANGED
@@ -17,7 +17,7 @@ from .compat import is_zarr_v2, old_positionals
17
17
 
18
18
  if TYPE_CHECKING:
19
19
  from collections.abc import Callable, Sequence
20
- from typing import Any, TypeGuard
20
+ from typing import Any, Self, TypeGuard
21
21
 
22
22
  T = TypeVar("T")
23
23
 
@@ -55,7 +55,7 @@ class RegisteredOption(NamedTuple, Generic[T]):
55
55
  option: str
56
56
  default_value: T
57
57
  description: str
58
- validate: Callable[[T], None]
58
+ validate: Callable[[T, SettingsManager], None]
59
59
  type: object
60
60
 
61
61
  describe = describe
@@ -206,7 +206,7 @@ class SettingsManager:
206
206
  *,
207
207
  default_value: T,
208
208
  description: str,
209
- validate: Callable[[T], None],
209
+ validate: Callable[[T, Self], None],
210
210
  option_type: object | None = None,
211
211
  get_from_env: Callable[[str, T], T] = lambda x, y: y,
212
212
  ) -> None:
@@ -229,7 +229,7 @@ class SettingsManager:
229
229
  Default behavior is to return `default_value` without checking the environment.
230
230
  """
231
231
  try:
232
- validate(default_value)
232
+ validate(default_value, self)
233
233
  except (ValueError, TypeError) as e:
234
234
  e.add_note(f"for option {option!r}")
235
235
  raise e
@@ -307,7 +307,7 @@ class SettingsManager:
307
307
  )
308
308
  raise AttributeError(msg)
309
309
  registered_option = self._registered_options[option]
310
- registered_option.validate(val)
310
+ registered_option.validate(val, self)
311
311
  self._config[option] = val
312
312
 
313
313
  def __getattr__(self, option: str) -> object:
@@ -364,10 +364,13 @@ class SettingsManager:
364
364
  """
365
365
  restore = {a: getattr(self, a) for a in overrides}
366
366
  try:
367
- for attr, value in overrides.items():
368
- setattr(self, attr, value)
367
+ # Preserve order so that settings that depend on each other can be overridden together i.e., always override zarr version before sharding
368
+ for k in self._config:
369
+ if k in overrides:
370
+ setattr(self, k, overrides.get(k))
369
371
  yield None
370
372
  finally:
373
+ # TODO: does the order need to be preserved when restoring?
371
374
  for attr, value in restore.items():
372
375
  setattr(self, attr, value)
373
376
 
@@ -395,7 +398,7 @@ V = TypeVar("V")
395
398
 
396
399
 
397
400
  def gen_validator(_type: type[V]) -> Callable[[V], None]:
398
- def validate_type(val: V) -> None:
401
+ def validate_type(val: V, settings: SettingsManager) -> None:
399
402
  if not isinstance(val, _type):
400
403
  msg = f"{val} not valid {_type}"
401
404
  raise TypeError(msg)
@@ -434,14 +437,28 @@ settings.register(
434
437
  )
435
438
 
436
439
 
437
- def validate_zarr_write_format(format: int):
438
- validate_int(format)
440
+ def validate_zarr_write_format(format: int, settings: SettingsManager):
441
+ validate_int(format, settings)
439
442
  if format not in {2, 3}:
440
443
  msg = "non-v2 zarr on-disk format not supported"
441
444
  raise ValueError(msg)
442
445
  if format == 3 and is_zarr_v2():
443
446
  msg = "Cannot write v3 format against v2 package"
444
447
  raise ValueError(msg)
448
+ if format == 2 and getattr(settings, "auto_shard_zarr_v3", False):
449
+ msg = "Cannot set `zarr_write_format` to 2 with autosharding on. Please set to `False` `anndata.settings.auto_shard_zarr_v3`"
450
+ raise ValueError(msg)
451
+
452
+
453
+ def validate_zarr_sharding(auto_shard: bool, settings: SettingsManager): # noqa: FBT001
454
+ validate_bool(auto_shard, settings)
455
+ if auto_shard:
456
+ if is_zarr_v2():
457
+ msg = "Cannot use sharding with `zarr-python<3`. Please upgrade package and set `anndata.settings.zarr_write_format` to 3."
458
+ raise ValueError(msg)
459
+ if settings.zarr_write_format == 2:
460
+ msg = "Cannot shard v2 format data. Please set `anndata.settings.zarr_write_format` to 3."
461
+ raise ValueError(msg)
445
462
 
446
463
 
447
464
  settings.register(
@@ -458,8 +475,8 @@ settings.register(
458
475
  )
459
476
 
460
477
 
461
- def validate_sparse_settings(val: Any) -> None:
462
- validate_bool(val)
478
+ def validate_sparse_settings(val: Any, settings: SettingsManager) -> None:
479
+ validate_bool(val, settings)
463
480
 
464
481
 
465
482
  settings.register(
@@ -486,6 +503,14 @@ settings.register(
486
503
  get_from_env=check_and_get_bool,
487
504
  )
488
505
 
506
+ settings.register(
507
+ "auto_shard_zarr_v3",
508
+ default_value=False,
509
+ description="Whether or not to use zarr's auto computation of sharding for v3. For v2 this setting will be ignored. The setting will apply to all calls to anndata's writing mechanism (write_zarr / write_elem) and will **not** override any user-defined kwargs for shards.",
510
+ validate=validate_zarr_sharding,
511
+ get_from_env=check_and_get_bool,
512
+ )
513
+
489
514
 
490
515
  ##################################################################################
491
516
  ##################################################################################
anndata/_settings.pyi CHANGED
@@ -2,7 +2,7 @@ from collections.abc import Callable as Callable
2
2
  from collections.abc import Generator, Iterable
3
3
  from contextlib import contextmanager
4
4
  from dataclasses import dataclass
5
- from typing import Literal, TypeVar
5
+ from typing import Literal, Self, TypeVar
6
6
 
7
7
  _T = TypeVar("_T")
8
8
 
@@ -25,7 +25,7 @@ class SettingsManager:
25
25
  *,
26
26
  default_value: _T,
27
27
  description: str,
28
- validate: Callable[[_T], None],
28
+ validate: Callable[[_T, Self], None],
29
29
  option_type: object | None = None,
30
30
  get_from_env: Callable[[str, _T], _T] = ...,
31
31
  ) -> None: ...
@@ -46,5 +46,6 @@ class _AnnDataSettingsManager(SettingsManager):
46
46
  use_sparse_array_on_read: bool = False
47
47
  min_rows_for_chunked_h5_copy: int = 1000
48
48
  disallow_forward_slash_in_h5ad: bool = False
49
+ auto_shard_zarr_v3: bool = False
49
50
 
50
51
  settings: _AnnDataSettingsManager
@@ -8,6 +8,7 @@ from typing import TYPE_CHECKING
8
8
 
9
9
  import h5py
10
10
 
11
+ from anndata._core.file_backing import AnnDataFileManager
11
12
  from anndata._io.specs.registry import read_elem_lazy
12
13
  from anndata._types import AnnDataElem
13
14
  from testing.anndata._doctest import doctest_needs
@@ -28,7 +29,7 @@ if TYPE_CHECKING:
28
29
  @doctest_needs("xarray")
29
30
  @requires_xarray
30
31
  def read_lazy(
31
- store: PathLike[str] | str | MutableMapping | ZarrGroup | h5py.Dataset,
32
+ store: PathLike[str] | str | MutableMapping | ZarrGroup | h5py.File | h5py.Group,
32
33
  *,
33
34
  load_annotation_index: bool = True,
34
35
  ) -> AnnData:
@@ -40,6 +41,9 @@ def read_lazy(
40
41
  ----------
41
42
  store
42
43
  A store-like object to be read in. If :class:`zarr.Group`, it is best for it to be consolidated.
44
+ If a path to an ``.h5ad`` file is provided, the open HDF5 file will be attached to the {class}`~anndata.AnnData` at the `file` attribute and it will be the user’s responsibility to close it when done with the returned object.
45
+ For this reason, it is recommended to use an {class}`h5py.File` as the `store` argument when working with h5 files.
46
+ It must remain open for at least as long as this returned object is in use.
43
47
  load_annotation_index
44
48
  Whether or not to use a range index for the `{obs,var}` :class:`xarray.Dataset` so as not to load the index into memory.
45
49
  If `False`, the real `index` will be inserted as `{obs,var}_names` in the object but not be one of the `coords` thereby preventing read operations.
@@ -83,10 +87,11 @@ def read_lazy(
83
87
  AnnData object with n_obs × n_vars = 490 × 33452
84
88
  obs: 'donor_id', 'self_reported_ethnicity_ontology_term_id', 'organism_ontology_term_id'...
85
89
  """
86
- is_h5_store = isinstance(store, h5py.Dataset | h5py.File | h5py.Group)
87
- is_h5 = (
90
+ is_store_arg_h5_store = isinstance(store, h5py.Dataset | h5py.File | h5py.Group)
91
+ is_store_arg_h5_path = (
88
92
  isinstance(store, PathLike | str) and Path(store).suffix == ".h5ad"
89
- ) or is_h5_store
93
+ )
94
+ is_h5 = is_store_arg_h5_path or is_store_arg_h5_store
90
95
 
91
96
  has_keys = True # true if consolidated or h5ad
92
97
  if not is_h5:
@@ -104,7 +109,7 @@ def read_lazy(
104
109
  f = zarr.open_group(store, mode="r")
105
110
  else:
106
111
  f = store
107
- elif is_h5_store:
112
+ elif is_store_arg_h5_store:
108
113
  f = store
109
114
  else:
110
115
  f = h5py.File(store, mode="r")
@@ -151,6 +156,7 @@ def read_lazy(
151
156
  return func(elem)
152
157
 
153
158
  with settings.override(check_uniqueness=load_annotation_index):
154
- adata = read_dispatched(f, callback=callback)
155
-
159
+ adata: AnnData = read_dispatched(f, callback=callback)
160
+ if is_store_arg_h5_path and not is_store_arg_h5_store:
161
+ adata.file = AnnDataFileManager(adata, file_obj=f)
156
162
  return adata
@@ -111,9 +111,9 @@ class CategoricalArray(XBackendArray, Generic[K]):
111
111
  def categories(self) -> np.ndarray:
112
112
  if isinstance(self._categories, ZarrArray):
113
113
  return self._categories[...]
114
- from ..._io.h5ad import read_dataset
114
+ from anndata.io import read_elem
115
115
 
116
- return read_dataset(self._categories)
116
+ return read_elem(self._categories)
117
117
 
118
118
  def __getitem__(
119
119
  self, key: xr.core.indexing.ExplicitIndexer
anndata/tests/helpers.py CHANGED
@@ -14,6 +14,7 @@ import h5py
14
14
  import numpy as np
15
15
  import pandas as pd
16
16
  import pytest
17
+ import zarr
17
18
  from pandas.api.types import is_numeric_dtype
18
19
  from scipy import sparse
19
20
 
@@ -34,6 +35,7 @@ from anndata.compat import (
34
35
  XDataArray,
35
36
  XDataset,
36
37
  ZarrArray,
38
+ ZarrGroup,
37
39
  is_zarr_v2,
38
40
  )
39
41
  from anndata.utils import asarray
@@ -628,8 +630,9 @@ def assert_equal_arrayview(
628
630
 
629
631
  @assert_equal.register(BaseCompressedSparseDataset)
630
632
  @assert_equal.register(sparse.spmatrix)
633
+ @assert_equal.register(CSArray)
631
634
  def assert_equal_sparse(
632
- a: BaseCompressedSparseDataset | sparse.spmatrix,
635
+ a: BaseCompressedSparseDataset | sparse.spmatrix | CSArray,
633
636
  b: object,
634
637
  *,
635
638
  exact: bool = False,
@@ -639,13 +642,6 @@ def assert_equal_sparse(
639
642
  assert_equal(b, a, exact=exact, elem_name=elem_name)
640
643
 
641
644
 
642
- @assert_equal.register(CSArray)
643
- def assert_equal_sparse_array(
644
- a: CSArray, b: object, *, exact: bool = False, elem_name: str | None = None
645
- ):
646
- return assert_equal_sparse(a, b, exact=exact, elem_name=elem_name)
647
-
648
-
649
645
  @assert_equal.register(CupySparseMatrix)
650
646
  def assert_equal_cupy_sparse(
651
647
  a: CupySparseMatrix, b: object, *, exact: bool = False, elem_name: str | None = None
@@ -878,29 +874,53 @@ def _(a):
878
874
 
879
875
 
880
876
  @singledispatch
881
- def as_sparse_dask_array(a) -> DaskArray:
882
- import dask.array as da
883
-
884
- return da.from_array(sparse.csr_matrix(a), chunks=_half_chunk_size(a.shape))
877
+ def _as_sparse_dask(
878
+ a: NDArray | CSArray | CSMatrix | DaskArray,
879
+ *,
880
+ typ: type[CSArray | CSMatrix | CupyCSRMatrix],
881
+ chunks: tuple[int, ...] | None = None,
882
+ ) -> DaskArray:
883
+ """Convert a to a sparse dask array, preserving sparse format and container (`cs{rc}_{array,matrix}`)."""
884
+ raise NotImplementedError
885
885
 
886
886
 
887
- @as_sparse_dask_array.register(CSMatrix)
888
- def _(a):
887
+ @_as_sparse_dask.register(CSArray | CSMatrix | np.ndarray)
888
+ def _(
889
+ a: CSArray | CSMatrix | NDArray,
890
+ *,
891
+ typ: type[CSArray | CSMatrix | CupyCSRMatrix],
892
+ chunks: tuple[int, ...] | None = None,
893
+ ) -> DaskArray:
889
894
  import dask.array as da
890
895
 
891
- return da.from_array(a, _half_chunk_size(a.shape))
896
+ chunks = _half_chunk_size(a.shape) if chunks is None else chunks
897
+ return da.from_array(_as_sparse_dask_inner(a, typ=typ), chunks=chunks)
892
898
 
893
899
 
894
- @as_sparse_dask_array.register(CSArray)
895
- def _(a):
896
- import dask.array as da
900
+ @_as_sparse_dask.register(DaskArray)
901
+ def _(
902
+ a: DaskArray,
903
+ *,
904
+ typ: type[CSArray | CSMatrix | CupyCSRMatrix],
905
+ chunks: tuple[int, ...] | None = None,
906
+ ) -> DaskArray:
907
+ assert chunks is None # TODO: if needed we can add a .rechunk(chunks)
908
+ return a.map_blocks(_as_sparse_dask_inner, typ=typ, dtype=a.dtype, meta=typ((2, 2)))
897
909
 
898
- return da.from_array(sparse.csr_matrix(a), _half_chunk_size(a.shape))
899
910
 
911
+ def _as_sparse_dask_inner(
912
+ a: NDArray | CSArray | CSMatrix, *, typ: type[CSArray | CSMatrix | CupyCSRMatrix]
913
+ ) -> CSArray | CSMatrix:
914
+ """Convert into a a sparse container that dask supports (or complain)."""
915
+ if issubclass(typ, CSArray): # convert sparray to spmatrix
916
+ msg = "AnnData doesn't support `cs_{r,c}_array` inside Dask"
917
+ raise TypeError(msg)
918
+ if issubclass(typ, CupySparseMatrix):
919
+ a = as_cupy(a) # can’t Cupy sparse constructors don’t accept numpy ndarrays
920
+ return typ(a)
900
921
 
901
- @as_sparse_dask_array.register(DaskArray)
902
- def _(a):
903
- return a.map_blocks(sparse.csr_matrix)
922
+
923
+ as_sparse_dask_matrix = partial(_as_sparse_dask, typ=sparse.csr_matrix)
904
924
 
905
925
 
906
926
  @singledispatch
@@ -945,14 +965,11 @@ except ImportError:
945
965
  format_to_memory_class = {}
946
966
 
947
967
 
948
- # TODO: If there are chunks which divide along columns, then a coo_matrix is returned by compute
949
- # We should try and fix this upstream in dask/ cupy
950
968
  @singledispatch
951
- def as_cupy_sparse_dask_array(a, format="csr"):
952
- memory_class = format_to_memory_class[format]
953
- cpu_da = as_sparse_dask_array(a)
954
- return cpu_da.rechunk((cpu_da.chunks[0], -1)).map_blocks(
955
- memory_class, dtype=a.dtype, meta=memory_class(cpu_da._meta)
969
+ def as_cupy_sparse_dask_array(a, format="csr") -> DaskArray:
970
+ chunk_rows, _ = _half_chunk_size(a.shape)
971
+ return _as_sparse_dask(
972
+ a, typ=format_to_memory_class[format], chunks=(chunk_rows, -1)
956
973
  )
957
974
 
958
975
 
@@ -962,7 +979,8 @@ def _(a, format="csr"):
962
979
  import dask.array as da
963
980
 
964
981
  memory_class = format_to_memory_class[format]
965
- return da.from_array(memory_class(a), chunks=(_half_chunk_size(a.shape)[0], -1))
982
+ chunk_rows, _ = _half_chunk_size(a.shape)
983
+ return da.from_array(memory_class(a), chunks=(chunk_rows, -1))
966
984
 
967
985
 
968
986
  @as_cupy_sparse_dask_array.register(DaskArray)
@@ -980,9 +998,9 @@ def resolve_cupy_type(val):
980
998
 
981
999
  if issubclass(input_typ, np.ndarray):
982
1000
  typ = CupyArray
983
- elif issubclass(input_typ, sparse.csr_matrix):
1001
+ elif issubclass(input_typ, sparse.csr_matrix | sparse.csr_array):
984
1002
  typ = CupyCSRMatrix
985
- elif issubclass(input_typ, sparse.csc_matrix):
1003
+ elif issubclass(input_typ, sparse.csc_matrix | sparse.csc_array):
986
1004
  typ = CupyCSCMatrix
987
1005
  else:
988
1006
  msg = f"No default target type for input type {input_typ}"
@@ -1003,7 +1021,7 @@ def as_cupy(val, typ=None):
1003
1021
  if issubclass(typ, CupyArray):
1004
1022
  import cupy as cp
1005
1023
 
1006
- if isinstance(val, CSMatrix):
1024
+ if isinstance(val, CSMatrix | CSArray):
1007
1025
  val = val.toarray()
1008
1026
  return cp.array(val)
1009
1027
  elif issubclass(typ, CupyCSRMatrix):
@@ -1059,7 +1077,7 @@ BASE_MATRIX_PARAMS = [
1059
1077
 
1060
1078
  DASK_MATRIX_PARAMS = [
1061
1079
  pytest.param(as_dense_dask_array, id="dense_dask_array"),
1062
- pytest.param(as_sparse_dask_array, id="sparse_dask_array"),
1080
+ pytest.param(as_sparse_dask_matrix, id="sparse_dask_matrix"),
1063
1081
  ]
1064
1082
 
1065
1083
  CUPY_MATRIX_PARAMS = [
@@ -1187,3 +1205,23 @@ def get_multiindex_columns_df(shape: tuple[int, int]) -> pd.DataFrame:
1187
1205
  + list(itertools.product(["b"], range(shape[1] // 2)))
1188
1206
  ),
1189
1207
  )
1208
+
1209
+
1210
+ def visititems_zarr(
1211
+ z: ZarrGroup, visitor: Callable[[str, ZarrGroup | zarr.Array], None]
1212
+ ) -> None:
1213
+ for key in z:
1214
+ maybe_group = z[key]
1215
+ if isinstance(maybe_group, ZarrGroup):
1216
+ visititems_zarr(maybe_group, visitor)
1217
+ else:
1218
+ visitor(key, maybe_group)
1219
+
1220
+
1221
+ def check_all_sharded(g: ZarrGroup):
1222
+ def visit(key: str, arr: zarr.Array | zarr.Group):
1223
+ # Check for recarray via https://numpy.org/doc/stable/user/basics.rec.html#manipulating-and-displaying-structured-datatypes
1224
+ if isinstance(arr, zarr.Array) and arr.shape != () and arr.dtype.names is None:
1225
+ assert arr.shards is not None
1226
+
1227
+ visititems_zarr(g, visitor=visit)
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: anndata
3
- Version: 0.12.4
3
+ Version: 0.12.6
4
4
  Summary: Annotated data.
5
5
  Project-URL: Documentation, https://anndata.readthedocs.io/
6
6
  Project-URL: Source, https://github.com/scverse/anndata
7
7
  Project-URL: Home-page, https://github.com/scverse/anndata
8
- Author: Philipp Angerer, Alex Wolf, Isaac Virshup, Sergei Rybakov
9
- Maintainer-email: Isaac Virshup <ivirshup@gmail.com>, Philipp Angerer <philipp.angerer@helmholtz-munich.de>, Ilan Gold <ilan.gold@helmholtz-munich.de>
8
+ Author: Philipp Angerer, Alex Wolf, Isaac Virshup, Sergei Rybakov, Ilan Gold
9
+ Maintainer-email: Philipp Angerer <philipp.angerer@helmholtz-munich.de>, Ilan Gold <ilan.gold@helmholtz-munich.de>
10
10
  License-Expression: BSD-3-Clause
11
11
  License-File: LICENSE
12
12
  Classifier: Environment :: Console
@@ -21,6 +21,7 @@ Classifier: Programming Language :: Python :: 3
21
21
  Classifier: Programming Language :: Python :: 3.11
22
22
  Classifier: Programming Language :: Python :: 3.12
23
23
  Classifier: Programming Language :: Python :: 3.13
24
+ Classifier: Programming Language :: Python :: 3.14
24
25
  Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
25
26
  Classifier: Topic :: Scientific/Engineering :: Visualization
26
27
  Requires-Python: >=3.11
@@ -78,7 +79,7 @@ Requires-Dist: joblib; extra == 'test'
78
79
  Requires-Dist: loompy>=3.0.5; extra == 'test'
79
80
  Requires-Dist: matplotlib; extra == 'test'
80
81
  Requires-Dist: openpyxl; extra == 'test'
81
- Requires-Dist: pyarrow<21; extra == 'test'
82
+ Requires-Dist: pyarrow; extra == 'test'
82
83
  Requires-Dist: pytest-cov; extra == 'test'
83
84
  Requires-Dist: pytest-memray; extra == 'test'
84
85
  Requires-Dist: pytest-mock; extra == 'test'
@@ -100,7 +101,7 @@ Requires-Dist: joblib; extra == 'test-min'
100
101
  Requires-Dist: loompy>=3.0.5; extra == 'test-min'
101
102
  Requires-Dist: matplotlib; extra == 'test-min'
102
103
  Requires-Dist: openpyxl; extra == 'test-min'
103
- Requires-Dist: pyarrow<21; extra == 'test-min'
104
+ Requires-Dist: pyarrow; extra == 'test-min'
104
105
  Requires-Dist: pytest-cov; extra == 'test-min'
105
106
  Requires-Dist: pytest-memray; extra == 'test-min'
106
107
  Requires-Dist: pytest-mock; extra == 'test-min'
@@ -1,6 +1,6 @@
1
1
  anndata/__init__.py,sha256=daAzY8GGouJxCe30Lcr2pl9Jwo2dcGXHPi7WxnHpuOE,1710
2
- anndata/_settings.py,sha256=Y6d28jAF7qnZLfDdg-0L_-Z7zXZR8zjSYlmvWRU_HO4,15949
3
- anndata/_settings.pyi,sha256=88wa1EG5Bz3ZKIJ68iw41nwGPA8asnpqYsnATsC5mpc,1594
2
+ anndata/_settings.py,sha256=AvCdIKCynrP6xUjVtYpvN8dFTefsGPu6anGp4c8M58I,17532
3
+ anndata/_settings.pyi,sha256=mJQQ3I66Y3sng8K-_aGjYuo5UoNgC5syw0yuNc1FADU,1643
4
4
  anndata/_types.py,sha256=c71REP9wS7Vz2cYrNxuNjPYdnq8MJ5g04MNrSi85ATA,5427
5
5
  anndata/_warnings.py,sha256=iFXa9EzPyuPbzRAzoG04oTXAyjnXhQa5zxAMZdsGLwM,702
6
6
  anndata/abc.py,sha256=jG64k59ZZ9Hfn-QWt_btZLuF7eGv_YNYwH91WdbR240,1645
@@ -13,11 +13,11 @@ anndata/_core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  anndata/_core/access.py,sha256=pts7fGUKgGZANSsu_qAA7L10qHM-jT1zIehbl3441OY,873
14
14
  anndata/_core/aligned_df.py,sha256=EC01OveJ0tS5bQQHc_OprYSPprl-YtJQK-kIOY_4SX0,4214
15
15
  anndata/_core/aligned_mapping.py,sha256=BYU1jslMWIhtFTtUMaXY8ZCyt0J4_ZsJTmj6J2yAXTQ,14257
16
- anndata/_core/anndata.py,sha256=e9ISy2CI7QaG6mievs8Aw33sho8ZN1CMgqPQZ-ZfbSw,78782
16
+ anndata/_core/anndata.py,sha256=PmR6vKglxX8G837lOah6OhPANALbTWYNlUWHf1A-SPw,78963
17
17
  anndata/_core/extensions.py,sha256=9Rsho6qnr3PJHULrYGiZHCBinBZYJK6zyf3cFsl_gBY,10425
18
- anndata/_core/file_backing.py,sha256=kT71R_kZp_CiHImBK8IaZXsvYVtbX2Mg-7L2ldAWojM,5113
18
+ anndata/_core/file_backing.py,sha256=6DhBfLQPDFDpoe6wSgnOFtpC4Hnbh-UgOPbqvYDxm8g,5603
19
19
  anndata/_core/index.py,sha256=F3TQBUbWpt09Pb4MpwB7xfCI9uPuv7jrqx8X74CwVDU,13472
20
- anndata/_core/merge.py,sha256=HVVLWEqk3PdU_U8UoOyKJaIp0ZQIfWy0cWM2iac4_H8,60366
20
+ anndata/_core/merge.py,sha256=vRW9z_PbobmRGDzQIVxAoFc7G9Rq-y6ry57k_XMmsLc,60460
21
21
  anndata/_core/raw.py,sha256=x_PwwaDQscVQOFJ38kF7sNQ47LxowpS38h2RQfU5Zwo,7925
22
22
  anndata/_core/sparse_dataset.py,sha256=mE-PRX4znkDyuum3BBBv7MJwyn4XL9C3nIQNRjZJ94w,26877
23
23
  anndata/_core/storage.py,sha256=mHzqp7YBJ-rGQFulMAx__D-Z7y4omHPyb1cP7YxfbFE,2555
@@ -30,8 +30,8 @@ anndata/_io/utils.py,sha256=3Lg27Q0Uo3HYlz980bG2Y02_VFIt0PiXMNIj_o-mgC4,9490
30
30
  anndata/_io/write.py,sha256=r55w6yPIIuUSLW9wyYL8GnkzHHQdAxy6xiCEw9cAC38,4811
31
31
  anndata/_io/zarr.py,sha256=Z996SZ8LV1Fpa_q8o70vHnBzNLOLlVjhf_Rs5EM_Slo,5461
32
32
  anndata/_io/specs/__init__.py,sha256=Z6l8xqa7B480U3pqrNIg4-fhUvpBW85w4xA3i3maAUM,427
33
- anndata/_io/specs/lazy_methods.py,sha256=k-s-YwOtwXpdjsyrM0IAsGSadPFxDsVDA3d_Nbpb7Ak,12261
34
- anndata/_io/specs/methods.py,sha256=T7926LkdvUJzvTAneRbDxM8YVIZNGz9GKgccEcsBumU,46264
33
+ anndata/_io/specs/lazy_methods.py,sha256=hnZ3ggrVCXR_rCMVH2Of9YE3s4saWg2WODcCtj0WaIQ,12600
34
+ anndata/_io/specs/methods.py,sha256=nn9DFcUEWvCeAPPifiFxpuYW3AmtFhNDUbxo7i0toow,46314
35
35
  anndata/_io/specs/registry.py,sha256=6Z_ffk3uOIagzRPcDCvEoszcgD-U3n8wYnGiPA71ZeI,17539
36
36
  anndata/compat/__init__.py,sha256=lsLHB7je0SHSePi9noY3p7kRbOAHhZzmMT1hs_ZSXys,12702
37
37
  anndata/experimental/__init__.py,sha256=polIxriEkby0iEqw-IXkUzp8k0wp92BpYY4zl4BsHH0,1648
@@ -39,19 +39,19 @@ anndata/experimental/_dispatch_io.py,sha256=JzH8Uvewabc1gIF3L16RZnM9m2NAG28bQIQ5
39
39
  anndata/experimental/merge.py,sha256=pl4MtDs_M76cTEqrJ_YJ8zyB6ID7QGzjntlAL7vp_qk,23303
40
40
  anndata/experimental/backed/__init__.py,sha256=4dc9M_-_SlfUidDrbWt8PRyD_8bYjypHJ86IpdThHus,230
41
41
  anndata/experimental/backed/_compat.py,sha256=rM7CnSJEZCko5wPBFRfvZA9ZKUSpaOVcWFy5u09p1go,519
42
- anndata/experimental/backed/_io.py,sha256=7bFzn3h8ut49NzppUvsqAX1gjXxAVCFK55Ln0XWzZdY,5965
43
- anndata/experimental/backed/_lazy_arrays.py,sha256=9uuEgP4p5oT49qhkzWHxC8eNFJFE1gRlYwAjMKFRejI,7474
42
+ anndata/experimental/backed/_io.py,sha256=YM5FL6sKdLyQTHUa43cF0pDNbyj2xD9X7lzUiQesV20,6681
43
+ anndata/experimental/backed/_lazy_arrays.py,sha256=WgTYQ88w7rTAP719nGjvyNkIz8cBT2bbkKJhPIxG4_g,7467
44
44
  anndata/experimental/multi_files/__init__.py,sha256=T7iNLlRbe-KnLT3o7Tb7_nE4Iy_hLkG66UjBOvj2Bj8,107
45
45
  anndata/experimental/multi_files/_anncollection.py,sha256=d_d-v8X2WJTGNjAJoo2Mdykd-woSTM_oXEf2PUIqS6A,35254
46
46
  anndata/experimental/pytorch/__init__.py,sha256=4CkgrahLO8Kc-s2bmv6lVQfDxbO3IUyV0v4ygBDkttY,95
47
47
  anndata/experimental/pytorch/_annloader.py,sha256=7mpsFV5vBfxKIje1cPjahtDZ5afkU-H663XB4FJhmok,8075
48
48
  anndata/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
- anndata/tests/helpers.py,sha256=27p_Nc5vFIiW-7EhV85g3QiE0dStMnUg0uFBRyroZUg,36072
49
+ anndata/tests/helpers.py,sha256=hvcLsWY-fMR4EUrwPd6laiS8kwUlIxvlVq3oeH3jf9g,37618
50
50
  testing/anndata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  testing/anndata/_doctest.py,sha256=Qew0N0zLLNiPKN1CLunqY5cTinFLaEhY5GagiYfm6KI,344
52
52
  testing/anndata/_pytest.py,sha256=cg4oWbtH9J1sRNul0n2oOraU1h7cprugr27EUPGDaN0,3997
53
53
  testing/anndata/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
- anndata-0.12.4.dist-info/METADATA,sha256=8SqT0BaH42j9XfqXxmF0_Z6GCk0pd6lN7h1Rn3V3FTg,9937
55
- anndata-0.12.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
56
- anndata-0.12.4.dist-info/licenses/LICENSE,sha256=VcrXoEVMhtNuvMvKYGP-I5lMT8qZ_6dFf22fsL180qA,1575
57
- anndata-0.12.4.dist-info/RECORD,,
54
+ anndata-0.12.6.dist-info/METADATA,sha256=nWoP_t6iBExzxOziJ_t0KVJ_lDa6lxsNV4rPXf1EobM,9957
55
+ anndata-0.12.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
56
+ anndata-0.12.6.dist-info/licenses/LICENSE,sha256=VcrXoEVMhtNuvMvKYGP-I5lMT8qZ_6dFf22fsL180qA,1575
57
+ anndata-0.12.6.dist-info/RECORD,,