anndata 0.12.2__py3-none-any.whl → 0.12.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anndata/__init__.py +23 -18
- anndata/_core/aligned_df.py +7 -0
- anndata/_core/anndata.py +8 -7
- anndata/_core/index.py +136 -23
- anndata/_core/merge.py +34 -44
- anndata/_core/sparse_dataset.py +12 -11
- anndata/_core/views.py +1 -1
- anndata/_io/h5ad.py +18 -27
- anndata/_io/specs/lazy_methods.py +1 -1
- anndata/_io/specs/methods.py +49 -65
- anndata/_io/specs/registry.py +17 -20
- anndata/_io/utils.py +2 -7
- anndata/_io/zarr.py +16 -7
- anndata/_settings.py +8 -0
- anndata/_settings.pyi +1 -0
- anndata/compat/__init__.py +3 -11
- anndata/experimental/backed/_lazy_arrays.py +5 -2
- anndata/experimental/merge.py +86 -50
- anndata/experimental/multi_files/_anncollection.py +2 -2
- {anndata-0.12.2.dist-info → anndata-0.12.4.dist-info}/METADATA +6 -7
- {anndata-0.12.2.dist-info → anndata-0.12.4.dist-info}/RECORD +24 -25
- testing/anndata/_pytest.py +2 -6
- anndata/_version.py +0 -62
- {anndata-0.12.2.dist-info → anndata-0.12.4.dist-info}/WHEEL +0 -0
- {anndata-0.12.2.dist-info → anndata-0.12.4.dist-info}/licenses/LICENSE +0 -0
anndata/_io/specs/methods.py
CHANGED
|
@@ -4,6 +4,7 @@ import warnings
|
|
|
4
4
|
from collections.abc import Mapping
|
|
5
5
|
from copy import copy
|
|
6
6
|
from functools import partial
|
|
7
|
+
from importlib.metadata import version
|
|
7
8
|
from itertools import product
|
|
8
9
|
from types import MappingProxyType
|
|
9
10
|
from typing import TYPE_CHECKING
|
|
@@ -21,7 +22,7 @@ from anndata._core import views
|
|
|
21
22
|
from anndata._core.index import _normalize_indices
|
|
22
23
|
from anndata._core.merge import intersect_keys
|
|
23
24
|
from anndata._core.sparse_dataset import _CSCDataset, _CSRDataset, sparse_dataset
|
|
24
|
-
from anndata._io.utils import
|
|
25
|
+
from anndata._io.utils import check_key, zero_dim_array_as_scalar
|
|
25
26
|
from anndata._warnings import OldFormatWarning
|
|
26
27
|
from anndata.compat import (
|
|
27
28
|
NULLABLE_NUMPY_STRING_TYPE,
|
|
@@ -492,31 +493,12 @@ _REGISTRY.register_write(ZarrGroup, CupyArray, IOSpec("array", "0.2.0"))(
|
|
|
492
493
|
)
|
|
493
494
|
|
|
494
495
|
|
|
496
|
+
@_REGISTRY.register_write(ZarrGroup, views.DaskArrayView, IOSpec("array", "0.2.0"))
|
|
495
497
|
@_REGISTRY.register_write(ZarrGroup, DaskArray, IOSpec("array", "0.2.0"))
|
|
496
|
-
|
|
497
|
-
f: ZarrGroup,
|
|
498
|
-
k: str,
|
|
499
|
-
elem: DaskArray,
|
|
500
|
-
*,
|
|
501
|
-
_writer: Writer,
|
|
502
|
-
dataset_kwargs: Mapping[str, Any] = MappingProxyType({}),
|
|
503
|
-
):
|
|
504
|
-
import dask.array as da
|
|
505
|
-
|
|
506
|
-
dataset_kwargs = dataset_kwargs.copy()
|
|
507
|
-
dataset_kwargs = zarr_v3_compressor_compat(dataset_kwargs)
|
|
508
|
-
if is_zarr_v2():
|
|
509
|
-
g = f.require_dataset(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs)
|
|
510
|
-
else:
|
|
511
|
-
g = f.require_array(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs)
|
|
512
|
-
da.store(elem, g, lock=GLOBAL_LOCK)
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
# Adding this separately because h5py isn't serializable
|
|
516
|
-
# https://github.com/pydata/xarray/issues/4242
|
|
498
|
+
@_REGISTRY.register_write(H5Group, views.DaskArrayView, IOSpec("array", "0.2.0"))
|
|
517
499
|
@_REGISTRY.register_write(H5Group, DaskArray, IOSpec("array", "0.2.0"))
|
|
518
|
-
def
|
|
519
|
-
f: H5Group,
|
|
500
|
+
def write_basic_dask_dask_dense(
|
|
501
|
+
f: ZarrGroup | H5Group,
|
|
520
502
|
k: str,
|
|
521
503
|
elem: DaskArray,
|
|
522
504
|
*,
|
|
@@ -526,11 +508,23 @@ def write_basic_dask_h5(
|
|
|
526
508
|
import dask.array as da
|
|
527
509
|
import dask.config as dc
|
|
528
510
|
|
|
529
|
-
|
|
511
|
+
is_distributed = dc.get("scheduler", None) == "dask.distributed"
|
|
512
|
+
is_h5 = isinstance(f, H5Group)
|
|
513
|
+
if is_distributed and is_h5:
|
|
530
514
|
msg = "Cannot write dask arrays to hdf5 when using distributed scheduler"
|
|
531
515
|
raise ValueError(msg)
|
|
532
516
|
|
|
533
|
-
|
|
517
|
+
dataset_kwargs = dataset_kwargs.copy()
|
|
518
|
+
if not is_h5:
|
|
519
|
+
dataset_kwargs = zarr_v3_compressor_compat(dataset_kwargs)
|
|
520
|
+
# See https://github.com/dask/dask/issues/12109
|
|
521
|
+
if Version(version("dask")) < Version("2025.4.0") and is_distributed:
|
|
522
|
+
msg = "Writing dense data with a distributed scheduler to zarr could produce corrupted data with a Lock and will error without one when dask is older than 2025.4.0: https://github.com/dask/dask/issues/12109"
|
|
523
|
+
raise RuntimeError(msg)
|
|
524
|
+
if is_zarr_v2() or is_h5:
|
|
525
|
+
g = f.require_dataset(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs)
|
|
526
|
+
else:
|
|
527
|
+
g = f.require_array(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs)
|
|
534
528
|
da.store(elem, g)
|
|
535
529
|
|
|
536
530
|
|
|
@@ -607,7 +601,7 @@ def write_vlen_string_array_zarr(
|
|
|
607
601
|
if is_zarr_v2():
|
|
608
602
|
import numcodecs
|
|
609
603
|
|
|
610
|
-
if Version(numcodecs
|
|
604
|
+
if Version(version("numcodecs")) < Version("0.13"):
|
|
611
605
|
msg = "Old numcodecs version detected. Please update for improved performance and stability."
|
|
612
606
|
warnings.warn(msg, UserWarning, stacklevel=2)
|
|
613
607
|
# Workaround for https://github.com/zarr-developers/numcodecs/issues/514
|
|
@@ -663,10 +657,9 @@ def _to_hdf5_vlen_strings(value: np.ndarray) -> np.ndarray:
|
|
|
663
657
|
@_REGISTRY.register_read(ZarrArray, IOSpec("rec-array", "0.2.0"))
|
|
664
658
|
def read_recarray(d: ArrayStorageType, *, _reader: Reader) -> np.recarray | npt.NDArray:
|
|
665
659
|
value = d[()]
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
value = _decode_structured_array(value, dtype=dtype)
|
|
660
|
+
value = _decode_structured_array(
|
|
661
|
+
_from_fixed_length_strings(value), dtype=value.dtype
|
|
662
|
+
)
|
|
670
663
|
return value
|
|
671
664
|
|
|
672
665
|
|
|
@@ -778,10 +771,10 @@ for store_type, (cls, spec, func) in product(
|
|
|
778
771
|
_REGISTRY.register_write(store_type, cls, spec)(func)
|
|
779
772
|
|
|
780
773
|
|
|
781
|
-
@_REGISTRY.register_write(H5Group, _CSRDataset, IOSpec("", "0.1.0"))
|
|
782
|
-
@_REGISTRY.register_write(H5Group, _CSCDataset, IOSpec("", "0.1.0"))
|
|
783
|
-
@_REGISTRY.register_write(ZarrGroup, _CSRDataset, IOSpec("", "0.1.0"))
|
|
784
|
-
@_REGISTRY.register_write(ZarrGroup, _CSCDataset, IOSpec("", "0.1.0"))
|
|
774
|
+
@_REGISTRY.register_write(H5Group, _CSRDataset, IOSpec("csr_matrix", "0.1.0"))
|
|
775
|
+
@_REGISTRY.register_write(H5Group, _CSCDataset, IOSpec("csc_matrix", "0.1.0"))
|
|
776
|
+
@_REGISTRY.register_write(ZarrGroup, _CSRDataset, IOSpec("csr_matrix", "0.1.0"))
|
|
777
|
+
@_REGISTRY.register_write(ZarrGroup, _CSCDataset, IOSpec("csc_matrix", "0.1.0"))
|
|
785
778
|
def write_sparse_dataset(
|
|
786
779
|
f: GroupStorageType,
|
|
787
780
|
k: str,
|
|
@@ -798,26 +791,9 @@ def write_sparse_dataset(
|
|
|
798
791
|
fmt=elem.format,
|
|
799
792
|
dataset_kwargs=dataset_kwargs,
|
|
800
793
|
)
|
|
801
|
-
# TODO: Cleaner way to do this
|
|
802
|
-
f[k].attrs["encoding-type"] = f"{elem.format}_matrix"
|
|
803
|
-
f[k].attrs["encoding-version"] = "0.1.0"
|
|
804
794
|
|
|
805
795
|
|
|
806
|
-
|
|
807
|
-
@_REGISTRY.register_write(ZarrGroup, (DaskArray, CupyArray), IOSpec("array", "0.2.0"))
|
|
808
|
-
@_REGISTRY.register_write(
|
|
809
|
-
H5Group, (DaskArray, CupyCSRMatrix), IOSpec("csr_matrix", "0.1.0")
|
|
810
|
-
)
|
|
811
|
-
@_REGISTRY.register_write(
|
|
812
|
-
H5Group, (DaskArray, CupyCSCMatrix), IOSpec("csc_matrix", "0.1.0")
|
|
813
|
-
)
|
|
814
|
-
@_REGISTRY.register_write(
|
|
815
|
-
ZarrGroup, (DaskArray, CupyCSRMatrix), IOSpec("csr_matrix", "0.1.0")
|
|
816
|
-
)
|
|
817
|
-
@_REGISTRY.register_write(
|
|
818
|
-
ZarrGroup, (DaskArray, CupyCSCMatrix), IOSpec("csc_matrix", "0.1.0")
|
|
819
|
-
)
|
|
820
|
-
def write_cupy_dask_sparse(f, k, elem, _writer, dataset_kwargs=MappingProxyType({})):
|
|
796
|
+
def write_cupy_dask(f, k, elem, _writer, dataset_kwargs=MappingProxyType({})):
|
|
821
797
|
_writer.write_elem(
|
|
822
798
|
f,
|
|
823
799
|
k,
|
|
@@ -826,18 +802,6 @@ def write_cupy_dask_sparse(f, k, elem, _writer, dataset_kwargs=MappingProxyType(
|
|
|
826
802
|
)
|
|
827
803
|
|
|
828
804
|
|
|
829
|
-
@_REGISTRY.register_write(
|
|
830
|
-
H5Group, (DaskArray, sparse.csr_matrix), IOSpec("csr_matrix", "0.1.0")
|
|
831
|
-
)
|
|
832
|
-
@_REGISTRY.register_write(
|
|
833
|
-
H5Group, (DaskArray, sparse.csc_matrix), IOSpec("csc_matrix", "0.1.0")
|
|
834
|
-
)
|
|
835
|
-
@_REGISTRY.register_write(
|
|
836
|
-
ZarrGroup, (DaskArray, sparse.csr_matrix), IOSpec("csr_matrix", "0.1.0")
|
|
837
|
-
)
|
|
838
|
-
@_REGISTRY.register_write(
|
|
839
|
-
ZarrGroup, (DaskArray, sparse.csc_matrix), IOSpec("csc_matrix", "0.1.0")
|
|
840
|
-
)
|
|
841
805
|
def write_dask_sparse(
|
|
842
806
|
f: GroupStorageType,
|
|
843
807
|
k: str,
|
|
@@ -886,6 +850,26 @@ def write_dask_sparse(
|
|
|
886
850
|
disk_mtx.append(elem[chunk_slice(chunk_start, chunk_stop)].compute())
|
|
887
851
|
|
|
888
852
|
|
|
853
|
+
for array_type, group_type in product(
|
|
854
|
+
[DaskArray, views.DaskArrayView], [H5Group, ZarrGroup]
|
|
855
|
+
):
|
|
856
|
+
for cupy_array_type, spec in [
|
|
857
|
+
(CupyArray, IOSpec("array", "0.2.0")),
|
|
858
|
+
(CupyCSCMatrix, IOSpec("csc_matrix", "0.1.0")),
|
|
859
|
+
(CupyCSRMatrix, IOSpec("csr_matrix", "0.1.0")),
|
|
860
|
+
]:
|
|
861
|
+
_REGISTRY.register_write(group_type, (array_type, cupy_array_type), spec)(
|
|
862
|
+
write_cupy_dask
|
|
863
|
+
)
|
|
864
|
+
for scipy_sparse_type, spec in [
|
|
865
|
+
(sparse.csr_matrix, IOSpec("csr_matrix", "0.1.0")),
|
|
866
|
+
(sparse.csc_matrix, IOSpec("csc_matrix", "0.1.0")),
|
|
867
|
+
]:
|
|
868
|
+
_REGISTRY.register_write(group_type, (array_type, scipy_sparse_type), spec)(
|
|
869
|
+
write_dask_sparse
|
|
870
|
+
)
|
|
871
|
+
|
|
872
|
+
|
|
889
873
|
@_REGISTRY.register_read(H5Group, IOSpec("csc_matrix", "0.1.0"))
|
|
890
874
|
@_REGISTRY.register_read(H5Group, IOSpec("csr_matrix", "0.1.0"))
|
|
891
875
|
@_REGISTRY.register_read(ZarrGroup, IOSpec("csc_matrix", "0.1.0"))
|
anndata/_io/specs/registry.py
CHANGED
|
@@ -9,6 +9,7 @@ from types import MappingProxyType
|
|
|
9
9
|
from typing import TYPE_CHECKING, Generic, TypeVar
|
|
10
10
|
|
|
11
11
|
from anndata._io.utils import report_read_key_on_error, report_write_key_on_error
|
|
12
|
+
from anndata._settings import settings
|
|
12
13
|
from anndata._types import Read, ReadLazy, _ReadInternal, _ReadLazyInternal
|
|
13
14
|
from anndata.compat import DaskArray, ZarrGroup, _read_attr, is_zarr_v2
|
|
14
15
|
|
|
@@ -240,12 +241,9 @@ def proc_spec_mapping(spec: Mapping[str, str]) -> IOSpec:
|
|
|
240
241
|
def get_spec(
|
|
241
242
|
elem: StorageType,
|
|
242
243
|
) -> IOSpec:
|
|
243
|
-
return proc_spec(
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
for k in ["encoding-type", "encoding-version"]
|
|
247
|
-
}
|
|
248
|
-
)
|
|
244
|
+
return proc_spec({
|
|
245
|
+
k: _read_attr(elem.attrs, k, "") for k in ["encoding-type", "encoding-version"]
|
|
246
|
+
})
|
|
249
247
|
|
|
250
248
|
|
|
251
249
|
def _iter_patterns(
|
|
@@ -349,10 +347,17 @@ class Writer:
|
|
|
349
347
|
|
|
350
348
|
import h5py
|
|
351
349
|
|
|
350
|
+
from anndata._io.zarr import is_group_consolidated
|
|
351
|
+
|
|
352
352
|
# we allow stores to have a prefix like /uns which are then written to with keys like /uns/foo
|
|
353
|
+
is_zarr_group = isinstance(store, ZarrGroup)
|
|
353
354
|
if "/" in k.split(store.name)[-1][1:]:
|
|
354
|
-
|
|
355
|
-
|
|
355
|
+
if is_zarr_group or settings.disallow_forward_slash_in_h5ad:
|
|
356
|
+
msg = f"Forward slashes are not allowed in keys in {type(store)}"
|
|
357
|
+
raise ValueError(msg)
|
|
358
|
+
else:
|
|
359
|
+
msg = "Forward slashes will be disallowed in h5 stores in the next minor release"
|
|
360
|
+
warnings.warn(msg, FutureWarning, stacklevel=2)
|
|
356
361
|
|
|
357
362
|
if isinstance(store, h5py.File):
|
|
358
363
|
store = store["/"]
|
|
@@ -360,19 +365,11 @@ class Writer:
|
|
|
360
365
|
dest_type = type(store)
|
|
361
366
|
|
|
362
367
|
# Normalize k to absolute path
|
|
363
|
-
if (
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
)
|
|
367
|
-
) or (isinstance(store, h5py.Group) and not PurePosixPath(k).is_absolute()):
|
|
368
|
+
if (is_zarr_group and is_zarr_v2()) or (
|
|
369
|
+
isinstance(store, h5py.Group) and not PurePosixPath(k).is_absolute()
|
|
370
|
+
):
|
|
368
371
|
k = str(PurePosixPath(store.name) / k)
|
|
369
|
-
is_consolidated = False
|
|
370
|
-
if is_zarr_v2_store:
|
|
371
|
-
from zarr.storage import ConsolidatedMetadataStore
|
|
372
|
-
|
|
373
|
-
is_consolidated = isinstance(store.store, ConsolidatedMetadataStore)
|
|
374
|
-
elif is_zarr_store:
|
|
375
|
-
is_consolidated = store.metadata.consolidated_metadata is not None
|
|
372
|
+
is_consolidated = is_group_consolidated(store) if is_zarr_group else False
|
|
376
373
|
if is_consolidated:
|
|
377
374
|
msg = "Cannot overwrite/edit a store with consolidated metadata"
|
|
378
375
|
raise ValueError(msg)
|
anndata/_io/utils.py
CHANGED
|
@@ -1,13 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from collections.abc import Callable
|
|
3
4
|
from functools import WRAPPER_ASSIGNMENTS, wraps
|
|
4
5
|
from itertools import pairwise
|
|
5
|
-
from typing import TYPE_CHECKING, cast
|
|
6
|
+
from typing import TYPE_CHECKING, Literal, cast
|
|
6
7
|
from warnings import warn
|
|
7
8
|
|
|
8
|
-
import h5py
|
|
9
|
-
from packaging.version import Version
|
|
10
|
-
|
|
11
9
|
from .._core.sparse_dataset import BaseCompressedSparseDataset
|
|
12
10
|
|
|
13
11
|
if TYPE_CHECKING:
|
|
@@ -21,9 +19,6 @@ if TYPE_CHECKING:
|
|
|
21
19
|
|
|
22
20
|
Storage = StorageType | BaseCompressedSparseDataset
|
|
23
21
|
|
|
24
|
-
# For allowing h5py v3
|
|
25
|
-
# https://github.com/scverse/anndata/issues/442
|
|
26
|
-
H5PY_V3 = Version(h5py.__version__).major >= 3
|
|
27
22
|
|
|
28
23
|
# -------------------------------------------------------------------------------
|
|
29
24
|
# Type conversion
|
anndata/_io/zarr.py
CHANGED
|
@@ -77,13 +77,11 @@ def read_zarr(store: PathLike[str] | str | MutableMapping | zarr.Group) -> AnnDa
|
|
|
77
77
|
# Read with handling for backwards compat
|
|
78
78
|
def callback(func, elem_name: str, elem, iospec):
|
|
79
79
|
if iospec.encoding_type == "anndata" or elem_name.endswith("/"):
|
|
80
|
-
return AnnData(
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
}
|
|
86
|
-
)
|
|
80
|
+
return AnnData(**{
|
|
81
|
+
k: read_dispatched(v, callback)
|
|
82
|
+
for k, v in dict(elem).items()
|
|
83
|
+
if not k.startswith("raw.")
|
|
84
|
+
})
|
|
87
85
|
elif elem_name.startswith("/raw."):
|
|
88
86
|
return None
|
|
89
87
|
elif elem_name in {"/obs", "/var"}:
|
|
@@ -155,3 +153,14 @@ def open_write_group(
|
|
|
155
153
|
if not is_zarr_v2() and "zarr_format" not in kwargs:
|
|
156
154
|
kwargs["zarr_format"] = settings.zarr_write_format
|
|
157
155
|
return zarr.open_group(store, mode=mode, **kwargs)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def is_group_consolidated(group: zarr.Group) -> bool:
|
|
159
|
+
if not isinstance(group, zarr.Group):
|
|
160
|
+
msg = f"Expected zarr.Group, got {type(group)}"
|
|
161
|
+
raise TypeError(msg)
|
|
162
|
+
if is_zarr_v2():
|
|
163
|
+
from zarr.storage import ConsolidatedMetadataStore
|
|
164
|
+
|
|
165
|
+
return isinstance(group.store, ConsolidatedMetadataStore)
|
|
166
|
+
return group.metadata.consolidated_metadata is not None
|
anndata/_settings.py
CHANGED
|
@@ -478,6 +478,14 @@ settings.register(
|
|
|
478
478
|
get_from_env=check_and_get_int,
|
|
479
479
|
)
|
|
480
480
|
|
|
481
|
+
settings.register(
|
|
482
|
+
"disallow_forward_slash_in_h5ad",
|
|
483
|
+
default_value=False,
|
|
484
|
+
description="Whether or not to disallow the `/` character in keys for h5ad files",
|
|
485
|
+
validate=validate_bool,
|
|
486
|
+
get_from_env=check_and_get_bool,
|
|
487
|
+
)
|
|
488
|
+
|
|
481
489
|
|
|
482
490
|
##################################################################################
|
|
483
491
|
##################################################################################
|
anndata/_settings.pyi
CHANGED
anndata/compat/__init__.py
CHANGED
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
from codecs import decode
|
|
4
4
|
from collections.abc import Mapping, Sequence
|
|
5
5
|
from functools import cache, partial, singledispatch
|
|
6
|
+
from importlib.metadata import version
|
|
6
7
|
from importlib.util import find_spec
|
|
7
8
|
from types import EllipsisType
|
|
8
9
|
from typing import TYPE_CHECKING, TypeVar
|
|
@@ -75,10 +76,9 @@ H5File = h5py.File
|
|
|
75
76
|
#############################
|
|
76
77
|
@cache
|
|
77
78
|
def is_zarr_v2() -> bool:
|
|
78
|
-
import zarr
|
|
79
79
|
from packaging.version import Version
|
|
80
80
|
|
|
81
|
-
return Version(zarr
|
|
81
|
+
return Version(version("zarr")) < Version("3.0.0")
|
|
82
82
|
|
|
83
83
|
|
|
84
84
|
if is_zarr_v2():
|
|
@@ -213,7 +213,7 @@ else:
|
|
|
213
213
|
|
|
214
214
|
NULLABLE_NUMPY_STRING_TYPE = (
|
|
215
215
|
np.dtype("O")
|
|
216
|
-
if Version(
|
|
216
|
+
if Version(version("numpy")) < Version("2")
|
|
217
217
|
else np.dtypes.StringDType(na_object=pd.NA)
|
|
218
218
|
)
|
|
219
219
|
|
|
@@ -428,11 +428,3 @@ def _safe_transpose(x):
|
|
|
428
428
|
return _transpose_by_block(x)
|
|
429
429
|
else:
|
|
430
430
|
return x.T
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
def _map_cat_to_str(cat: pd.Categorical) -> pd.Categorical:
|
|
434
|
-
if Version(pd.__version__) >= Version("2.1"):
|
|
435
|
-
# Argument added in pandas 2.1
|
|
436
|
-
return cat.map(str, na_action="ignore")
|
|
437
|
-
else:
|
|
438
|
-
return cat.map(str)
|
|
@@ -25,9 +25,10 @@ if TYPE_CHECKING:
|
|
|
25
25
|
from pathlib import Path
|
|
26
26
|
from typing import Literal
|
|
27
27
|
|
|
28
|
-
from anndata._core.index import Index
|
|
29
28
|
from anndata.compat import ZarrGroup
|
|
30
29
|
|
|
30
|
+
from ...compat import Index1DNorm
|
|
31
|
+
|
|
31
32
|
|
|
32
33
|
K = TypeVar("K", H5Array, ZarrArray)
|
|
33
34
|
|
|
@@ -199,7 +200,9 @@ class MaskedArray(XBackendArray, Generic[K]):
|
|
|
199
200
|
|
|
200
201
|
|
|
201
202
|
@_subset.register(XDataArray)
|
|
202
|
-
def _subset_masked(
|
|
203
|
+
def _subset_masked(
|
|
204
|
+
a: XDataArray, subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm]
|
|
205
|
+
):
|
|
203
206
|
return a[subset_idx]
|
|
204
207
|
|
|
205
208
|
|
anndata/experimental/merge.py
CHANGED
|
@@ -26,8 +26,8 @@ from .._core.merge import (
|
|
|
26
26
|
)
|
|
27
27
|
from .._core.sparse_dataset import BaseCompressedSparseDataset, sparse_dataset
|
|
28
28
|
from .._io.specs import read_elem, write_elem
|
|
29
|
-
from ..compat import H5Array, H5Group, ZarrArray, ZarrGroup
|
|
30
|
-
from . import read_dispatched
|
|
29
|
+
from ..compat import H5Array, H5Group, ZarrArray, ZarrGroup
|
|
30
|
+
from . import read_dispatched, read_elem_lazy
|
|
31
31
|
|
|
32
32
|
if TYPE_CHECKING:
|
|
33
33
|
from collections.abc import Callable, Collection, Iterable, Sequence
|
|
@@ -173,7 +173,7 @@ def write_concat_dense( # noqa: PLR0917
|
|
|
173
173
|
output_path: ZarrGroup | H5Group,
|
|
174
174
|
axis: Literal[0, 1] = 0,
|
|
175
175
|
reindexers: Reindexer | None = None,
|
|
176
|
-
fill_value=None,
|
|
176
|
+
fill_value: Any = None,
|
|
177
177
|
):
|
|
178
178
|
"""
|
|
179
179
|
Writes the concatenation of given dense arrays to disk using dask.
|
|
@@ -193,9 +193,10 @@ def write_concat_dense( # noqa: PLR0917
|
|
|
193
193
|
axis=axis,
|
|
194
194
|
)
|
|
195
195
|
write_elem(output_group, output_path, res)
|
|
196
|
-
output_group[output_path].attrs.update(
|
|
197
|
-
|
|
198
|
-
|
|
196
|
+
output_group[output_path].attrs.update({
|
|
197
|
+
"encoding-type": "array",
|
|
198
|
+
"encoding-version": "0.2.0",
|
|
199
|
+
})
|
|
199
200
|
|
|
200
201
|
|
|
201
202
|
def write_concat_sparse( # noqa: PLR0917
|
|
@@ -205,7 +206,7 @@ def write_concat_sparse( # noqa: PLR0917
|
|
|
205
206
|
max_loaded_elems: int,
|
|
206
207
|
axis: Literal[0, 1] = 0,
|
|
207
208
|
reindexers: Reindexer | None = None,
|
|
208
|
-
fill_value=None,
|
|
209
|
+
fill_value: Any = None,
|
|
209
210
|
):
|
|
210
211
|
"""
|
|
211
212
|
Writes and concatenates sparse datasets into a single output dataset.
|
|
@@ -245,26 +246,24 @@ def write_concat_sparse( # noqa: PLR0917
|
|
|
245
246
|
|
|
246
247
|
|
|
247
248
|
def _write_concat_mappings( # noqa: PLR0913, PLR0917
|
|
248
|
-
mappings,
|
|
249
|
+
mappings: Collection[dict],
|
|
249
250
|
output_group: ZarrGroup | H5Group,
|
|
250
|
-
keys,
|
|
251
|
-
|
|
252
|
-
max_loaded_elems,
|
|
253
|
-
axis=0,
|
|
254
|
-
index=None,
|
|
255
|
-
reindexers=None,
|
|
256
|
-
fill_value=None,
|
|
251
|
+
keys: Collection[str],
|
|
252
|
+
output_path: str | Path,
|
|
253
|
+
max_loaded_elems: int,
|
|
254
|
+
axis: Literal[0, 1] = 0,
|
|
255
|
+
index: pd.Index = None,
|
|
256
|
+
reindexers: list[Reindexer] | None = None,
|
|
257
|
+
fill_value: Any = None,
|
|
257
258
|
):
|
|
258
259
|
"""
|
|
259
260
|
Write a list of mappings to a zarr/h5 group.
|
|
260
261
|
"""
|
|
261
|
-
mapping_group = output_group.create_group(
|
|
262
|
-
mapping_group.attrs.update(
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
}
|
|
267
|
-
)
|
|
262
|
+
mapping_group = output_group.create_group(output_path)
|
|
263
|
+
mapping_group.attrs.update({
|
|
264
|
+
"encoding-type": "dict",
|
|
265
|
+
"encoding-version": "0.1.0",
|
|
266
|
+
})
|
|
268
267
|
for k in keys:
|
|
269
268
|
elems = [m[k] for m in mappings]
|
|
270
269
|
_write_concat_sequence(
|
|
@@ -281,13 +280,13 @@ def _write_concat_mappings( # noqa: PLR0913, PLR0917
|
|
|
281
280
|
|
|
282
281
|
def _write_concat_arrays( # noqa: PLR0913, PLR0917
|
|
283
282
|
arrays: Sequence[ZarrArray | H5Array | BaseCompressedSparseDataset],
|
|
284
|
-
output_group,
|
|
285
|
-
output_path,
|
|
286
|
-
max_loaded_elems,
|
|
287
|
-
axis=0,
|
|
288
|
-
reindexers=None,
|
|
289
|
-
fill_value=None,
|
|
290
|
-
join="inner",
|
|
283
|
+
output_group: ZarrGroup | H5Group,
|
|
284
|
+
output_path: str | Path,
|
|
285
|
+
max_loaded_elems: int,
|
|
286
|
+
axis: Literal[0, 1] = 0,
|
|
287
|
+
reindexers: list[Reindexer] | None = None,
|
|
288
|
+
fill_value: Any = None,
|
|
289
|
+
join: Literal["inner", "outer"] = "inner",
|
|
291
290
|
):
|
|
292
291
|
init_elem = arrays[0]
|
|
293
292
|
init_type = type(init_elem)
|
|
@@ -325,14 +324,14 @@ def _write_concat_arrays( # noqa: PLR0913, PLR0917
|
|
|
325
324
|
|
|
326
325
|
def _write_concat_sequence( # noqa: PLR0913, PLR0917
|
|
327
326
|
arrays: Sequence[pd.DataFrame | BaseCompressedSparseDataset | H5Array | ZarrArray],
|
|
328
|
-
output_group,
|
|
329
|
-
output_path,
|
|
330
|
-
max_loaded_elems,
|
|
331
|
-
axis=0,
|
|
332
|
-
index=None,
|
|
333
|
-
reindexers=None,
|
|
334
|
-
fill_value=None,
|
|
335
|
-
join="inner",
|
|
327
|
+
output_group: ZarrGroup | H5Group,
|
|
328
|
+
output_path: str | Path,
|
|
329
|
+
max_loaded_elems: int,
|
|
330
|
+
axis: Literal[0, 1] = 0,
|
|
331
|
+
index: pd.Index = None,
|
|
332
|
+
reindexers: list[Reindexer] | None = None,
|
|
333
|
+
fill_value: Any = None,
|
|
334
|
+
join: Literal["inner", "outer"] = "inner",
|
|
336
335
|
):
|
|
337
336
|
"""
|
|
338
337
|
array, dataframe, csc_matrix, csc_matrix
|
|
@@ -377,17 +376,27 @@ def _write_concat_sequence( # noqa: PLR0913, PLR0917
|
|
|
377
376
|
raise NotImplementedError(msg)
|
|
378
377
|
|
|
379
378
|
|
|
380
|
-
def _write_alt_mapping(
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
379
|
+
def _write_alt_mapping(
|
|
380
|
+
groups: Collection[H5Group, ZarrGroup],
|
|
381
|
+
output_group: ZarrGroup | H5Group,
|
|
382
|
+
alt_axis_name: Literal["obs", "var"],
|
|
383
|
+
merge: Callable,
|
|
384
|
+
reindexers: list[Reindexer],
|
|
385
|
+
):
|
|
386
|
+
alt_mapping = merge([
|
|
387
|
+
{k: r(read_elem(v), axis=0) for k, v in dict(g[f"{alt_axis_name}m"]).items()}
|
|
388
|
+
for r, g in zip(reindexers, groups, strict=True)
|
|
389
|
+
])
|
|
390
|
+
write_elem(output_group, f"{alt_axis_name}m", alt_mapping)
|
|
388
391
|
|
|
389
392
|
|
|
390
|
-
def _write_alt_annot(
|
|
393
|
+
def _write_alt_annot(
|
|
394
|
+
groups: Collection[H5Group, ZarrGroup],
|
|
395
|
+
output_group: ZarrGroup | H5Group,
|
|
396
|
+
alt_axis_name: Literal["obs", "var"],
|
|
397
|
+
alt_indices: pd.Index,
|
|
398
|
+
merge: Callable,
|
|
399
|
+
):
|
|
391
400
|
# Annotation for other axis
|
|
392
401
|
alt_annot = merge_dataframes(
|
|
393
402
|
[read_elem(g[alt_axis_name]) for g in groups], alt_indices, merge
|
|
@@ -396,7 +405,13 @@ def _write_alt_annot(groups, output_group, alt_axis_name, alt_indices, merge):
|
|
|
396
405
|
|
|
397
406
|
|
|
398
407
|
def _write_axis_annot( # noqa: PLR0917
|
|
399
|
-
groups
|
|
408
|
+
groups: Collection[H5Group, ZarrGroup],
|
|
409
|
+
output_group: ZarrGroup | H5Group,
|
|
410
|
+
axis_name: Literal["obs", "var"],
|
|
411
|
+
concat_indices: pd.Index,
|
|
412
|
+
label: str,
|
|
413
|
+
label_col: str,
|
|
414
|
+
join: Literal["inner", "outer"],
|
|
400
415
|
):
|
|
401
416
|
concat_annot = pd.concat(
|
|
402
417
|
unify_dtypes(read_elem(g[axis_name]) for g in groups),
|
|
@@ -409,6 +424,23 @@ def _write_axis_annot( # noqa: PLR0917
|
|
|
409
424
|
write_elem(output_group, axis_name, concat_annot)
|
|
410
425
|
|
|
411
426
|
|
|
427
|
+
def _write_alt_pairwise(
|
|
428
|
+
groups: Collection[H5Group, ZarrGroup],
|
|
429
|
+
output_group: ZarrGroup | H5Group,
|
|
430
|
+
alt_axis_name: Literal["obs", "var"],
|
|
431
|
+
merge: Callable,
|
|
432
|
+
reindexers: list[Reindexer],
|
|
433
|
+
):
|
|
434
|
+
alt_pairwise = merge([
|
|
435
|
+
{
|
|
436
|
+
k: r(r(read_elem_lazy(v), axis=0), axis=1)
|
|
437
|
+
for k, v in dict(g[f"{alt_axis_name}p"]).items()
|
|
438
|
+
}
|
|
439
|
+
for r, g in zip(reindexers, groups, strict=True)
|
|
440
|
+
])
|
|
441
|
+
write_elem(output_group, f"{alt_axis_name}p", alt_pairwise)
|
|
442
|
+
|
|
443
|
+
|
|
412
444
|
def concat_on_disk( # noqa: PLR0912, PLR0913, PLR0915
|
|
413
445
|
in_files: Collection[PathLike[str] | str] | Mapping[str, PathLike[str] | str],
|
|
414
446
|
out_file: PathLike[str] | str,
|
|
@@ -491,7 +523,8 @@ def concat_on_disk( # noqa: PLR0912, PLR0913, PLR0915
|
|
|
491
523
|
DataFrames are padded with missing values.
|
|
492
524
|
pairwise
|
|
493
525
|
Whether pairwise elements along the concatenated dimension should be included.
|
|
494
|
-
This is False by default, since the resulting arrays are often not meaningful.
|
|
526
|
+
This is False by default, since the resulting arrays are often not meaningful, and raises {class}`NotImplementedError` when True.
|
|
527
|
+
If you are interested in this feature, please open an issue.
|
|
495
528
|
|
|
496
529
|
Notes
|
|
497
530
|
-----
|
|
@@ -610,7 +643,7 @@ def concat_on_disk( # noqa: PLR0912, PLR0913, PLR0915
|
|
|
610
643
|
)
|
|
611
644
|
if index_unique is not None:
|
|
612
645
|
concat_indices = concat_indices.str.cat(
|
|
613
|
-
|
|
646
|
+
label_col.map(str, na_action="ignore"), sep=index_unique
|
|
614
647
|
)
|
|
615
648
|
|
|
616
649
|
# Resulting indices for {axis_name} and {alt_axis_name}
|
|
@@ -635,7 +668,10 @@ def concat_on_disk( # noqa: PLR0912, PLR0913, PLR0915
|
|
|
635
668
|
_write_alt_annot(groups, output_group, alt_axis_name, alt_index, merge)
|
|
636
669
|
|
|
637
670
|
# Write {alt_axis_name}m
|
|
638
|
-
_write_alt_mapping(groups, output_group, alt_axis_name,
|
|
671
|
+
_write_alt_mapping(groups, output_group, alt_axis_name, merge, reindexers)
|
|
672
|
+
|
|
673
|
+
# Write {alt_axis_name}p
|
|
674
|
+
_write_alt_pairwise(groups, output_group, alt_axis_name, merge, reindexers)
|
|
639
675
|
|
|
640
676
|
# Write X
|
|
641
677
|
|
|
@@ -16,7 +16,7 @@ from ..._core.index import _normalize_index, _normalize_indices
|
|
|
16
16
|
from ..._core.merge import concat_arrays, inner_concat_aligned_mapping
|
|
17
17
|
from ..._core.sparse_dataset import BaseCompressedSparseDataset
|
|
18
18
|
from ..._core.views import _resolve_idx
|
|
19
|
-
from ...compat import
|
|
19
|
+
from ...compat import old_positionals
|
|
20
20
|
|
|
21
21
|
if TYPE_CHECKING:
|
|
22
22
|
from collections.abc import Iterable, Sequence
|
|
@@ -731,7 +731,7 @@ class AnnCollection(_ConcatViewMixin, _IterateViewMixin):
|
|
|
731
731
|
)
|
|
732
732
|
if index_unique is not None:
|
|
733
733
|
concat_indices = concat_indices.str.cat(
|
|
734
|
-
|
|
734
|
+
label_col.map(str, na_action="ignore"), sep=index_unique
|
|
735
735
|
)
|
|
736
736
|
self.obs_names = pd.Index(concat_indices)
|
|
737
737
|
|