anndata 0.12.2__py3-none-any.whl → 0.12.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,7 +37,7 @@ if TYPE_CHECKING:
37
37
  from .registry import LazyDataStructures, LazyReader
38
38
 
39
39
  BlockInfo = Mapping[
40
- Literal[None],
40
+ None,
41
41
  dict[str, Sequence[tuple[int, int]]],
42
42
  ]
43
43
 
@@ -4,6 +4,7 @@ import warnings
4
4
  from collections.abc import Mapping
5
5
  from copy import copy
6
6
  from functools import partial
7
+ from importlib.metadata import version
7
8
  from itertools import product
8
9
  from types import MappingProxyType
9
10
  from typing import TYPE_CHECKING
@@ -21,7 +22,7 @@ from anndata._core import views
21
22
  from anndata._core.index import _normalize_indices
22
23
  from anndata._core.merge import intersect_keys
23
24
  from anndata._core.sparse_dataset import _CSCDataset, _CSRDataset, sparse_dataset
24
- from anndata._io.utils import H5PY_V3, check_key, zero_dim_array_as_scalar
25
+ from anndata._io.utils import check_key, zero_dim_array_as_scalar
25
26
  from anndata._warnings import OldFormatWarning
26
27
  from anndata.compat import (
27
28
  NULLABLE_NUMPY_STRING_TYPE,
@@ -492,31 +493,12 @@ _REGISTRY.register_write(ZarrGroup, CupyArray, IOSpec("array", "0.2.0"))(
492
493
  )
493
494
 
494
495
 
496
+ @_REGISTRY.register_write(ZarrGroup, views.DaskArrayView, IOSpec("array", "0.2.0"))
495
497
  @_REGISTRY.register_write(ZarrGroup, DaskArray, IOSpec("array", "0.2.0"))
496
- def write_basic_dask_zarr(
497
- f: ZarrGroup,
498
- k: str,
499
- elem: DaskArray,
500
- *,
501
- _writer: Writer,
502
- dataset_kwargs: Mapping[str, Any] = MappingProxyType({}),
503
- ):
504
- import dask.array as da
505
-
506
- dataset_kwargs = dataset_kwargs.copy()
507
- dataset_kwargs = zarr_v3_compressor_compat(dataset_kwargs)
508
- if is_zarr_v2():
509
- g = f.require_dataset(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs)
510
- else:
511
- g = f.require_array(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs)
512
- da.store(elem, g, lock=GLOBAL_LOCK)
513
-
514
-
515
- # Adding this separately because h5py isn't serializable
516
- # https://github.com/pydata/xarray/issues/4242
498
+ @_REGISTRY.register_write(H5Group, views.DaskArrayView, IOSpec("array", "0.2.0"))
517
499
  @_REGISTRY.register_write(H5Group, DaskArray, IOSpec("array", "0.2.0"))
518
- def write_basic_dask_h5(
519
- f: H5Group,
500
+ def write_basic_dask_dask_dense(
501
+ f: ZarrGroup | H5Group,
520
502
  k: str,
521
503
  elem: DaskArray,
522
504
  *,
@@ -526,11 +508,23 @@ def write_basic_dask_h5(
526
508
  import dask.array as da
527
509
  import dask.config as dc
528
510
 
529
- if dc.get("scheduler", None) == "dask.distributed":
511
+ is_distributed = dc.get("scheduler", None) == "dask.distributed"
512
+ is_h5 = isinstance(f, H5Group)
513
+ if is_distributed and is_h5:
530
514
  msg = "Cannot write dask arrays to hdf5 when using distributed scheduler"
531
515
  raise ValueError(msg)
532
516
 
533
- g = f.require_dataset(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs)
517
+ dataset_kwargs = dataset_kwargs.copy()
518
+ if not is_h5:
519
+ dataset_kwargs = zarr_v3_compressor_compat(dataset_kwargs)
520
+ # See https://github.com/dask/dask/issues/12109
521
+ if Version(version("dask")) < Version("2025.4.0") and is_distributed:
522
+ msg = "Writing dense data with a distributed scheduler to zarr could produce corrupted data with a Lock and will error without one when dask is older than 2025.4.0: https://github.com/dask/dask/issues/12109"
523
+ raise RuntimeError(msg)
524
+ if is_zarr_v2() or is_h5:
525
+ g = f.require_dataset(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs)
526
+ else:
527
+ g = f.require_array(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs)
534
528
  da.store(elem, g)
535
529
 
536
530
 
@@ -607,7 +601,7 @@ def write_vlen_string_array_zarr(
607
601
  if is_zarr_v2():
608
602
  import numcodecs
609
603
 
610
- if Version(numcodecs.__version__) < Version("0.13"):
604
+ if Version(version("numcodecs")) < Version("0.13"):
611
605
  msg = "Old numcodecs version detected. Please update for improved performance and stability."
612
606
  warnings.warn(msg, UserWarning, stacklevel=2)
613
607
  # Workaround for https://github.com/zarr-developers/numcodecs/issues/514
@@ -663,10 +657,9 @@ def _to_hdf5_vlen_strings(value: np.ndarray) -> np.ndarray:
663
657
  @_REGISTRY.register_read(ZarrArray, IOSpec("rec-array", "0.2.0"))
664
658
  def read_recarray(d: ArrayStorageType, *, _reader: Reader) -> np.recarray | npt.NDArray:
665
659
  value = d[()]
666
- dtype = value.dtype
667
- value = _from_fixed_length_strings(value)
668
- if H5PY_V3:
669
- value = _decode_structured_array(value, dtype=dtype)
660
+ value = _decode_structured_array(
661
+ _from_fixed_length_strings(value), dtype=value.dtype
662
+ )
670
663
  return value
671
664
 
672
665
 
@@ -778,10 +771,10 @@ for store_type, (cls, spec, func) in product(
778
771
  _REGISTRY.register_write(store_type, cls, spec)(func)
779
772
 
780
773
 
781
- @_REGISTRY.register_write(H5Group, _CSRDataset, IOSpec("", "0.1.0"))
782
- @_REGISTRY.register_write(H5Group, _CSCDataset, IOSpec("", "0.1.0"))
783
- @_REGISTRY.register_write(ZarrGroup, _CSRDataset, IOSpec("", "0.1.0"))
784
- @_REGISTRY.register_write(ZarrGroup, _CSCDataset, IOSpec("", "0.1.0"))
774
+ @_REGISTRY.register_write(H5Group, _CSRDataset, IOSpec("csr_matrix", "0.1.0"))
775
+ @_REGISTRY.register_write(H5Group, _CSCDataset, IOSpec("csc_matrix", "0.1.0"))
776
+ @_REGISTRY.register_write(ZarrGroup, _CSRDataset, IOSpec("csr_matrix", "0.1.0"))
777
+ @_REGISTRY.register_write(ZarrGroup, _CSCDataset, IOSpec("csc_matrix", "0.1.0"))
785
778
  def write_sparse_dataset(
786
779
  f: GroupStorageType,
787
780
  k: str,
@@ -798,26 +791,9 @@ def write_sparse_dataset(
798
791
  fmt=elem.format,
799
792
  dataset_kwargs=dataset_kwargs,
800
793
  )
801
- # TODO: Cleaner way to do this
802
- f[k].attrs["encoding-type"] = f"{elem.format}_matrix"
803
- f[k].attrs["encoding-version"] = "0.1.0"
804
794
 
805
795
 
806
- @_REGISTRY.register_write(H5Group, (DaskArray, CupyArray), IOSpec("array", "0.2.0"))
807
- @_REGISTRY.register_write(ZarrGroup, (DaskArray, CupyArray), IOSpec("array", "0.2.0"))
808
- @_REGISTRY.register_write(
809
- H5Group, (DaskArray, CupyCSRMatrix), IOSpec("csr_matrix", "0.1.0")
810
- )
811
- @_REGISTRY.register_write(
812
- H5Group, (DaskArray, CupyCSCMatrix), IOSpec("csc_matrix", "0.1.0")
813
- )
814
- @_REGISTRY.register_write(
815
- ZarrGroup, (DaskArray, CupyCSRMatrix), IOSpec("csr_matrix", "0.1.0")
816
- )
817
- @_REGISTRY.register_write(
818
- ZarrGroup, (DaskArray, CupyCSCMatrix), IOSpec("csc_matrix", "0.1.0")
819
- )
820
- def write_cupy_dask_sparse(f, k, elem, _writer, dataset_kwargs=MappingProxyType({})):
796
+ def write_cupy_dask(f, k, elem, _writer, dataset_kwargs=MappingProxyType({})):
821
797
  _writer.write_elem(
822
798
  f,
823
799
  k,
@@ -826,18 +802,6 @@ def write_cupy_dask_sparse(f, k, elem, _writer, dataset_kwargs=MappingProxyType(
826
802
  )
827
803
 
828
804
 
829
- @_REGISTRY.register_write(
830
- H5Group, (DaskArray, sparse.csr_matrix), IOSpec("csr_matrix", "0.1.0")
831
- )
832
- @_REGISTRY.register_write(
833
- H5Group, (DaskArray, sparse.csc_matrix), IOSpec("csc_matrix", "0.1.0")
834
- )
835
- @_REGISTRY.register_write(
836
- ZarrGroup, (DaskArray, sparse.csr_matrix), IOSpec("csr_matrix", "0.1.0")
837
- )
838
- @_REGISTRY.register_write(
839
- ZarrGroup, (DaskArray, sparse.csc_matrix), IOSpec("csc_matrix", "0.1.0")
840
- )
841
805
  def write_dask_sparse(
842
806
  f: GroupStorageType,
843
807
  k: str,
@@ -886,6 +850,26 @@ def write_dask_sparse(
886
850
  disk_mtx.append(elem[chunk_slice(chunk_start, chunk_stop)].compute())
887
851
 
888
852
 
853
+ for array_type, group_type in product(
854
+ [DaskArray, views.DaskArrayView], [H5Group, ZarrGroup]
855
+ ):
856
+ for cupy_array_type, spec in [
857
+ (CupyArray, IOSpec("array", "0.2.0")),
858
+ (CupyCSCMatrix, IOSpec("csc_matrix", "0.1.0")),
859
+ (CupyCSRMatrix, IOSpec("csr_matrix", "0.1.0")),
860
+ ]:
861
+ _REGISTRY.register_write(group_type, (array_type, cupy_array_type), spec)(
862
+ write_cupy_dask
863
+ )
864
+ for scipy_sparse_type, spec in [
865
+ (sparse.csr_matrix, IOSpec("csr_matrix", "0.1.0")),
866
+ (sparse.csc_matrix, IOSpec("csc_matrix", "0.1.0")),
867
+ ]:
868
+ _REGISTRY.register_write(group_type, (array_type, scipy_sparse_type), spec)(
869
+ write_dask_sparse
870
+ )
871
+
872
+
889
873
  @_REGISTRY.register_read(H5Group, IOSpec("csc_matrix", "0.1.0"))
890
874
  @_REGISTRY.register_read(H5Group, IOSpec("csr_matrix", "0.1.0"))
891
875
  @_REGISTRY.register_read(ZarrGroup, IOSpec("csc_matrix", "0.1.0"))
@@ -9,6 +9,7 @@ from types import MappingProxyType
9
9
  from typing import TYPE_CHECKING, Generic, TypeVar
10
10
 
11
11
  from anndata._io.utils import report_read_key_on_error, report_write_key_on_error
12
+ from anndata._settings import settings
12
13
  from anndata._types import Read, ReadLazy, _ReadInternal, _ReadLazyInternal
13
14
  from anndata.compat import DaskArray, ZarrGroup, _read_attr, is_zarr_v2
14
15
 
@@ -240,12 +241,9 @@ def proc_spec_mapping(spec: Mapping[str, str]) -> IOSpec:
240
241
  def get_spec(
241
242
  elem: StorageType,
242
243
  ) -> IOSpec:
243
- return proc_spec(
244
- {
245
- k: _read_attr(elem.attrs, k, "")
246
- for k in ["encoding-type", "encoding-version"]
247
- }
248
- )
244
+ return proc_spec({
245
+ k: _read_attr(elem.attrs, k, "") for k in ["encoding-type", "encoding-version"]
246
+ })
249
247
 
250
248
 
251
249
  def _iter_patterns(
@@ -349,10 +347,17 @@ class Writer:
349
347
 
350
348
  import h5py
351
349
 
350
+ from anndata._io.zarr import is_group_consolidated
351
+
352
352
  # we allow stores to have a prefix like /uns which are then written to with keys like /uns/foo
353
+ is_zarr_group = isinstance(store, ZarrGroup)
353
354
  if "/" in k.split(store.name)[-1][1:]:
354
- msg = "Forward slashes are not allowed in keys."
355
- raise ValueError(msg)
355
+ if is_zarr_group or settings.disallow_forward_slash_in_h5ad:
356
+ msg = f"Forward slashes are not allowed in keys in {type(store)}"
357
+ raise ValueError(msg)
358
+ else:
359
+ msg = "Forward slashes will be disallowed in h5 stores in the next minor release"
360
+ warnings.warn(msg, FutureWarning, stacklevel=2)
356
361
 
357
362
  if isinstance(store, h5py.File):
358
363
  store = store["/"]
@@ -360,19 +365,11 @@ class Writer:
360
365
  dest_type = type(store)
361
366
 
362
367
  # Normalize k to absolute path
363
- if (
364
- is_zarr_v2_store := (
365
- (is_zarr_store := isinstance(store, ZarrGroup)) and is_zarr_v2()
366
- )
367
- ) or (isinstance(store, h5py.Group) and not PurePosixPath(k).is_absolute()):
368
+ if (is_zarr_group and is_zarr_v2()) or (
369
+ isinstance(store, h5py.Group) and not PurePosixPath(k).is_absolute()
370
+ ):
368
371
  k = str(PurePosixPath(store.name) / k)
369
- is_consolidated = False
370
- if is_zarr_v2_store:
371
- from zarr.storage import ConsolidatedMetadataStore
372
-
373
- is_consolidated = isinstance(store.store, ConsolidatedMetadataStore)
374
- elif is_zarr_store:
375
- is_consolidated = store.metadata.consolidated_metadata is not None
372
+ is_consolidated = is_group_consolidated(store) if is_zarr_group else False
376
373
  if is_consolidated:
377
374
  msg = "Cannot overwrite/edit a store with consolidated metadata"
378
375
  raise ValueError(msg)
anndata/_io/utils.py CHANGED
@@ -1,13 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from collections.abc import Callable
3
4
  from functools import WRAPPER_ASSIGNMENTS, wraps
4
5
  from itertools import pairwise
5
- from typing import TYPE_CHECKING, cast
6
+ from typing import TYPE_CHECKING, Literal, cast
6
7
  from warnings import warn
7
8
 
8
- import h5py
9
- from packaging.version import Version
10
-
11
9
  from .._core.sparse_dataset import BaseCompressedSparseDataset
12
10
 
13
11
  if TYPE_CHECKING:
@@ -21,9 +19,6 @@ if TYPE_CHECKING:
21
19
 
22
20
  Storage = StorageType | BaseCompressedSparseDataset
23
21
 
24
- # For allowing h5py v3
25
- # https://github.com/scverse/anndata/issues/442
26
- H5PY_V3 = Version(h5py.__version__).major >= 3
27
22
 
28
23
  # -------------------------------------------------------------------------------
29
24
  # Type conversion
anndata/_io/zarr.py CHANGED
@@ -77,13 +77,11 @@ def read_zarr(store: PathLike[str] | str | MutableMapping | zarr.Group) -> AnnDa
77
77
  # Read with handling for backwards compat
78
78
  def callback(func, elem_name: str, elem, iospec):
79
79
  if iospec.encoding_type == "anndata" or elem_name.endswith("/"):
80
- return AnnData(
81
- **{
82
- k: read_dispatched(v, callback)
83
- for k, v in dict(elem).items()
84
- if not k.startswith("raw.")
85
- }
86
- )
80
+ return AnnData(**{
81
+ k: read_dispatched(v, callback)
82
+ for k, v in dict(elem).items()
83
+ if not k.startswith("raw.")
84
+ })
87
85
  elif elem_name.startswith("/raw."):
88
86
  return None
89
87
  elif elem_name in {"/obs", "/var"}:
@@ -155,3 +153,14 @@ def open_write_group(
155
153
  if not is_zarr_v2() and "zarr_format" not in kwargs:
156
154
  kwargs["zarr_format"] = settings.zarr_write_format
157
155
  return zarr.open_group(store, mode=mode, **kwargs)
156
+
157
+
158
+ def is_group_consolidated(group: zarr.Group) -> bool:
159
+ if not isinstance(group, zarr.Group):
160
+ msg = f"Expected zarr.Group, got {type(group)}"
161
+ raise TypeError(msg)
162
+ if is_zarr_v2():
163
+ from zarr.storage import ConsolidatedMetadataStore
164
+
165
+ return isinstance(group.store, ConsolidatedMetadataStore)
166
+ return group.metadata.consolidated_metadata is not None
anndata/_settings.py CHANGED
@@ -478,6 +478,14 @@ settings.register(
478
478
  get_from_env=check_and_get_int,
479
479
  )
480
480
 
481
+ settings.register(
482
+ "disallow_forward_slash_in_h5ad",
483
+ default_value=False,
484
+ description="Whether or not to disallow the `/` character in keys for h5ad files",
485
+ validate=validate_bool,
486
+ get_from_env=check_and_get_bool,
487
+ )
488
+
481
489
 
482
490
  ##################################################################################
483
491
  ##################################################################################
anndata/_settings.pyi CHANGED
@@ -45,5 +45,6 @@ class _AnnDataSettingsManager(SettingsManager):
45
45
  zarr_write_format: Literal[2, 3] = 2
46
46
  use_sparse_array_on_read: bool = False
47
47
  min_rows_for_chunked_h5_copy: int = 1000
48
+ disallow_forward_slash_in_h5ad: bool = False
48
49
 
49
50
  settings: _AnnDataSettingsManager
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
  from codecs import decode
4
4
  from collections.abc import Mapping, Sequence
5
5
  from functools import cache, partial, singledispatch
6
+ from importlib.metadata import version
6
7
  from importlib.util import find_spec
7
8
  from types import EllipsisType
8
9
  from typing import TYPE_CHECKING, TypeVar
@@ -75,10 +76,9 @@ H5File = h5py.File
75
76
  #############################
76
77
  @cache
77
78
  def is_zarr_v2() -> bool:
78
- import zarr
79
79
  from packaging.version import Version
80
80
 
81
- return Version(zarr.__version__) < Version("3.0.0")
81
+ return Version(version("zarr")) < Version("3.0.0")
82
82
 
83
83
 
84
84
  if is_zarr_v2():
@@ -213,7 +213,7 @@ else:
213
213
 
214
214
  NULLABLE_NUMPY_STRING_TYPE = (
215
215
  np.dtype("O")
216
- if Version(np.__version__) < Version("2")
216
+ if Version(version("numpy")) < Version("2")
217
217
  else np.dtypes.StringDType(na_object=pd.NA)
218
218
  )
219
219
 
@@ -428,11 +428,3 @@ def _safe_transpose(x):
428
428
  return _transpose_by_block(x)
429
429
  else:
430
430
  return x.T
431
-
432
-
433
- def _map_cat_to_str(cat: pd.Categorical) -> pd.Categorical:
434
- if Version(pd.__version__) >= Version("2.1"):
435
- # Argument added in pandas 2.1
436
- return cat.map(str, na_action="ignore")
437
- else:
438
- return cat.map(str)
@@ -25,9 +25,10 @@ if TYPE_CHECKING:
25
25
  from pathlib import Path
26
26
  from typing import Literal
27
27
 
28
- from anndata._core.index import Index
29
28
  from anndata.compat import ZarrGroup
30
29
 
30
+ from ...compat import Index1DNorm
31
+
31
32
 
32
33
  K = TypeVar("K", H5Array, ZarrArray)
33
34
 
@@ -199,7 +200,9 @@ class MaskedArray(XBackendArray, Generic[K]):
199
200
 
200
201
 
201
202
  @_subset.register(XDataArray)
202
- def _subset_masked(a: XDataArray, subset_idx: Index):
203
+ def _subset_masked(
204
+ a: XDataArray, subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm]
205
+ ):
203
206
  return a[subset_idx]
204
207
 
205
208
 
@@ -26,8 +26,8 @@ from .._core.merge import (
26
26
  )
27
27
  from .._core.sparse_dataset import BaseCompressedSparseDataset, sparse_dataset
28
28
  from .._io.specs import read_elem, write_elem
29
- from ..compat import H5Array, H5Group, ZarrArray, ZarrGroup, _map_cat_to_str
30
- from . import read_dispatched
29
+ from ..compat import H5Array, H5Group, ZarrArray, ZarrGroup
30
+ from . import read_dispatched, read_elem_lazy
31
31
 
32
32
  if TYPE_CHECKING:
33
33
  from collections.abc import Callable, Collection, Iterable, Sequence
@@ -173,7 +173,7 @@ def write_concat_dense( # noqa: PLR0917
173
173
  output_path: ZarrGroup | H5Group,
174
174
  axis: Literal[0, 1] = 0,
175
175
  reindexers: Reindexer | None = None,
176
- fill_value=None,
176
+ fill_value: Any = None,
177
177
  ):
178
178
  """
179
179
  Writes the concatenation of given dense arrays to disk using dask.
@@ -193,9 +193,10 @@ def write_concat_dense( # noqa: PLR0917
193
193
  axis=axis,
194
194
  )
195
195
  write_elem(output_group, output_path, res)
196
- output_group[output_path].attrs.update(
197
- {"encoding-type": "array", "encoding-version": "0.2.0"}
198
- )
196
+ output_group[output_path].attrs.update({
197
+ "encoding-type": "array",
198
+ "encoding-version": "0.2.0",
199
+ })
199
200
 
200
201
 
201
202
  def write_concat_sparse( # noqa: PLR0917
@@ -205,7 +206,7 @@ def write_concat_sparse( # noqa: PLR0917
205
206
  max_loaded_elems: int,
206
207
  axis: Literal[0, 1] = 0,
207
208
  reindexers: Reindexer | None = None,
208
- fill_value=None,
209
+ fill_value: Any = None,
209
210
  ):
210
211
  """
211
212
  Writes and concatenates sparse datasets into a single output dataset.
@@ -245,26 +246,24 @@ def write_concat_sparse( # noqa: PLR0917
245
246
 
246
247
 
247
248
  def _write_concat_mappings( # noqa: PLR0913, PLR0917
248
- mappings,
249
+ mappings: Collection[dict],
249
250
  output_group: ZarrGroup | H5Group,
250
- keys,
251
- path,
252
- max_loaded_elems,
253
- axis=0,
254
- index=None,
255
- reindexers=None,
256
- fill_value=None,
251
+ keys: Collection[str],
252
+ output_path: str | Path,
253
+ max_loaded_elems: int,
254
+ axis: Literal[0, 1] = 0,
255
+ index: pd.Index = None,
256
+ reindexers: list[Reindexer] | None = None,
257
+ fill_value: Any = None,
257
258
  ):
258
259
  """
259
260
  Write a list of mappings to a zarr/h5 group.
260
261
  """
261
- mapping_group = output_group.create_group(path)
262
- mapping_group.attrs.update(
263
- {
264
- "encoding-type": "dict",
265
- "encoding-version": "0.1.0",
266
- }
267
- )
262
+ mapping_group = output_group.create_group(output_path)
263
+ mapping_group.attrs.update({
264
+ "encoding-type": "dict",
265
+ "encoding-version": "0.1.0",
266
+ })
268
267
  for k in keys:
269
268
  elems = [m[k] for m in mappings]
270
269
  _write_concat_sequence(
@@ -281,13 +280,13 @@ def _write_concat_mappings( # noqa: PLR0913, PLR0917
281
280
 
282
281
  def _write_concat_arrays( # noqa: PLR0913, PLR0917
283
282
  arrays: Sequence[ZarrArray | H5Array | BaseCompressedSparseDataset],
284
- output_group,
285
- output_path,
286
- max_loaded_elems,
287
- axis=0,
288
- reindexers=None,
289
- fill_value=None,
290
- join="inner",
283
+ output_group: ZarrGroup | H5Group,
284
+ output_path: str | Path,
285
+ max_loaded_elems: int,
286
+ axis: Literal[0, 1] = 0,
287
+ reindexers: list[Reindexer] | None = None,
288
+ fill_value: Any = None,
289
+ join: Literal["inner", "outer"] = "inner",
291
290
  ):
292
291
  init_elem = arrays[0]
293
292
  init_type = type(init_elem)
@@ -325,14 +324,14 @@ def _write_concat_arrays( # noqa: PLR0913, PLR0917
325
324
 
326
325
  def _write_concat_sequence( # noqa: PLR0913, PLR0917
327
326
  arrays: Sequence[pd.DataFrame | BaseCompressedSparseDataset | H5Array | ZarrArray],
328
- output_group,
329
- output_path,
330
- max_loaded_elems,
331
- axis=0,
332
- index=None,
333
- reindexers=None,
334
- fill_value=None,
335
- join="inner",
327
+ output_group: ZarrGroup | H5Group,
328
+ output_path: str | Path,
329
+ max_loaded_elems: int,
330
+ axis: Literal[0, 1] = 0,
331
+ index: pd.Index = None,
332
+ reindexers: list[Reindexer] | None = None,
333
+ fill_value: Any = None,
334
+ join: Literal["inner", "outer"] = "inner",
336
335
  ):
337
336
  """
338
337
  array, dataframe, csc_matrix, csc_matrix
@@ -377,17 +376,27 @@ def _write_concat_sequence( # noqa: PLR0913, PLR0917
377
376
  raise NotImplementedError(msg)
378
377
 
379
378
 
380
- def _write_alt_mapping(groups, output_group, alt_axis_name, alt_indices, merge):
381
- alt_mapping = merge([read_as_backed(g[alt_axis_name]) for g in groups])
382
- # If its empty, we need to write an empty dataframe with the correct index
383
- if not alt_mapping:
384
- alt_df = pd.DataFrame(index=alt_indices)
385
- write_elem(output_group, alt_axis_name, alt_df)
386
- else:
387
- write_elem(output_group, alt_axis_name, alt_mapping)
379
+ def _write_alt_mapping(
380
+ groups: Collection[H5Group, ZarrGroup],
381
+ output_group: ZarrGroup | H5Group,
382
+ alt_axis_name: Literal["obs", "var"],
383
+ merge: Callable,
384
+ reindexers: list[Reindexer],
385
+ ):
386
+ alt_mapping = merge([
387
+ {k: r(read_elem(v), axis=0) for k, v in dict(g[f"{alt_axis_name}m"]).items()}
388
+ for r, g in zip(reindexers, groups, strict=True)
389
+ ])
390
+ write_elem(output_group, f"{alt_axis_name}m", alt_mapping)
388
391
 
389
392
 
390
- def _write_alt_annot(groups, output_group, alt_axis_name, alt_indices, merge):
393
+ def _write_alt_annot(
394
+ groups: Collection[H5Group, ZarrGroup],
395
+ output_group: ZarrGroup | H5Group,
396
+ alt_axis_name: Literal["obs", "var"],
397
+ alt_indices: pd.Index,
398
+ merge: Callable,
399
+ ):
391
400
  # Annotation for other axis
392
401
  alt_annot = merge_dataframes(
393
402
  [read_elem(g[alt_axis_name]) for g in groups], alt_indices, merge
@@ -396,7 +405,13 @@ def _write_alt_annot(groups, output_group, alt_axis_name, alt_indices, merge):
396
405
 
397
406
 
398
407
  def _write_axis_annot( # noqa: PLR0917
399
- groups, output_group, axis_name, concat_indices, label, label_col, join
408
+ groups: Collection[H5Group, ZarrGroup],
409
+ output_group: ZarrGroup | H5Group,
410
+ axis_name: Literal["obs", "var"],
411
+ concat_indices: pd.Index,
412
+ label: str,
413
+ label_col: str,
414
+ join: Literal["inner", "outer"],
400
415
  ):
401
416
  concat_annot = pd.concat(
402
417
  unify_dtypes(read_elem(g[axis_name]) for g in groups),
@@ -409,6 +424,23 @@ def _write_axis_annot( # noqa: PLR0917
409
424
  write_elem(output_group, axis_name, concat_annot)
410
425
 
411
426
 
427
+ def _write_alt_pairwise(
428
+ groups: Collection[H5Group, ZarrGroup],
429
+ output_group: ZarrGroup | H5Group,
430
+ alt_axis_name: Literal["obs", "var"],
431
+ merge: Callable,
432
+ reindexers: list[Reindexer],
433
+ ):
434
+ alt_pairwise = merge([
435
+ {
436
+ k: r(r(read_elem_lazy(v), axis=0), axis=1)
437
+ for k, v in dict(g[f"{alt_axis_name}p"]).items()
438
+ }
439
+ for r, g in zip(reindexers, groups, strict=True)
440
+ ])
441
+ write_elem(output_group, f"{alt_axis_name}p", alt_pairwise)
442
+
443
+
412
444
  def concat_on_disk( # noqa: PLR0912, PLR0913, PLR0915
413
445
  in_files: Collection[PathLike[str] | str] | Mapping[str, PathLike[str] | str],
414
446
  out_file: PathLike[str] | str,
@@ -491,7 +523,8 @@ def concat_on_disk( # noqa: PLR0912, PLR0913, PLR0915
491
523
  DataFrames are padded with missing values.
492
524
  pairwise
493
525
  Whether pairwise elements along the concatenated dimension should be included.
494
- This is False by default, since the resulting arrays are often not meaningful.
526
+ This is False by default, since the resulting arrays are often not meaningful, and raises {class}`NotImplementedError` when True.
527
+ If you are interested in this feature, please open an issue.
495
528
 
496
529
  Notes
497
530
  -----
@@ -610,7 +643,7 @@ def concat_on_disk( # noqa: PLR0912, PLR0913, PLR0915
610
643
  )
611
644
  if index_unique is not None:
612
645
  concat_indices = concat_indices.str.cat(
613
- _map_cat_to_str(label_col), sep=index_unique
646
+ label_col.map(str, na_action="ignore"), sep=index_unique
614
647
  )
615
648
 
616
649
  # Resulting indices for {axis_name} and {alt_axis_name}
@@ -635,7 +668,10 @@ def concat_on_disk( # noqa: PLR0912, PLR0913, PLR0915
635
668
  _write_alt_annot(groups, output_group, alt_axis_name, alt_index, merge)
636
669
 
637
670
  # Write {alt_axis_name}m
638
- _write_alt_mapping(groups, output_group, alt_axis_name, alt_index, merge)
671
+ _write_alt_mapping(groups, output_group, alt_axis_name, merge, reindexers)
672
+
673
+ # Write {alt_axis_name}p
674
+ _write_alt_pairwise(groups, output_group, alt_axis_name, merge, reindexers)
639
675
 
640
676
  # Write X
641
677
 
@@ -16,7 +16,7 @@ from ..._core.index import _normalize_index, _normalize_indices
16
16
  from ..._core.merge import concat_arrays, inner_concat_aligned_mapping
17
17
  from ..._core.sparse_dataset import BaseCompressedSparseDataset
18
18
  from ..._core.views import _resolve_idx
19
- from ...compat import _map_cat_to_str, old_positionals
19
+ from ...compat import old_positionals
20
20
 
21
21
  if TYPE_CHECKING:
22
22
  from collections.abc import Iterable, Sequence
@@ -731,7 +731,7 @@ class AnnCollection(_ConcatViewMixin, _IterateViewMixin):
731
731
  )
732
732
  if index_unique is not None:
733
733
  concat_indices = concat_indices.str.cat(
734
- _map_cat_to_str(label_col), sep=index_unique
734
+ label_col.map(str, na_action="ignore"), sep=index_unique
735
735
  )
736
736
  self.obs_names = pd.Index(concat_indices)
737
737