PyPI - anndata - Versions diffs - 0.12.1__py3-none-any.whl → 0.12.3__py3-none-any.whl - Mend

anndata 0.12.1py3-none-any.whl → 0.12.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

anndata/__init__.py +23 -18
anndata/_core/anndata.py +30 -12
anndata/_core/index.py +6 -13
anndata/_core/merge.py +28 -39
anndata/_core/raw.py +5 -3
anndata/_core/sparse_dataset.py +8 -8
anndata/_core/views.py +21 -15
anndata/_core/xarray.py +13 -12
anndata/_io/h5ad.py +65 -49
anndata/_io/read.py +17 -6
anndata/_io/specs/lazy_methods.py +1 -1
anndata/_io/specs/methods.py +38 -45
anndata/_io/specs/registry.py +17 -20
anndata/_io/utils.py +2 -7
anndata/_io/zarr.py +16 -13
anndata/_settings.py +9 -1
anndata/_settings.pyi +1 -0
anndata/compat/__init__.py +32 -15
anndata/experimental/merge.py +86 -50
anndata/experimental/multi_files/_anncollection.py +2 -2
anndata/tests/helpers.py +44 -26
{anndata-0.12.1.dist-info → anndata-0.12.3.dist-info}/METADATA +6 -7
{anndata-0.12.1.dist-info → anndata-0.12.3.dist-info}/RECORD +26 -27
testing/anndata/_pytest.py +2 -6
anndata/_version.py +0 -62
{anndata-0.12.1.dist-info → anndata-0.12.3.dist-info}/WHEEL +0 -0
{anndata-0.12.1.dist-info → anndata-0.12.3.dist-info}/licenses/LICENSE +0 -0

anndata/__init__.py CHANGED Viewed

@@ -12,7 +12,6 @@ from ._core.extensions import register_anndata_namespace
 from ._core.merge import concat
 from ._core.raw import Raw
 from ._settings import settings
-from ._version import __version__
 from ._warnings import (
     ExperimentalFeatureWarning,
     ImplicitModificationWarning,
@@ -28,22 +27,6 @@ from . import abc, experimental, typing, io, types  # isort: skip
 # We use these in tests by attribute access
 from . import logging  # noqa: F401  # isort: skip
-_DEPRECATED_IO = (
-    "read_loom",
-    "read_hdf",
-    "read_excel",
-    "read_umi_tools",
-    "read_csv",
-    "read_text",
-    "read_mtx",
-)
-_DEPRECATED = {method: f"io.{method}" for method in _DEPRECATED_IO}
-def __getattr__(attr_name: str) -> Any:
-    return module_get_attr_redirect(attr_name, deprecated_mapping=_DEPRECATED)
 __all__ = [
     "AnnData",
     "ExperimentalFeatureWarning",
@@ -51,7 +34,6 @@ __all__ = [
     "OldFormatWarning",
     "Raw",
     "WriteWarning",
-    "__version__",
     "abc",
     "concat",
     "experimental",
@@ -63,3 +45,26 @@ __all__ = [
     "types",
     "typing",
 ]
+_DEPRECATED_IO = (
+    "read_loom",
+    "read_hdf",
+    "read_excel",
+    "read_umi_tools",
+    "read_csv",
+    "read_text",
+    "read_mtx",
+)
+_DEPRECATED = {method: f"io.{method}" for method in _DEPRECATED_IO}
+def __getattr__(attr_name: str) -> Any:
+    if attr_name == "__version__":
+        import warnings
+        from importlib.metadata import version
+        msg = "`__version__` is deprecated, use `importlib.metadata.version('anndata')` instead."
+        warnings.warn(msg, FutureWarning, stacklevel=2)
+        return version("anndata")
+    return module_get_attr_redirect(attr_name, deprecated_mapping=_DEPRECATED)

anndata/_core/anndata.py CHANGED Viewed

@@ -42,11 +42,7 @@ from .index import _normalize_indices, _subset, get_vector
 from .raw import Raw
 from .sparse_dataset import BaseCompressedSparseDataset, sparse_dataset
 from .storage import coerce_array
-from .views import (
-    DictView,
-    _resolve_idxs,
-    as_view,
-)
+from .views import DictView, _resolve_idxs, as_view
 from .xarray import Dataset2D
 if TYPE_CHECKING:
@@ -56,7 +52,7 @@ if TYPE_CHECKING:
     from zarr.storage import StoreLike
-    from ..compat import Index1D, XDataset
+    from ..compat import Index1D, Index1DNorm, XDataset
     from ..typing import XDataType
     from .aligned_mapping import AxisArraysView, LayersView, PairwiseArraysView
     from .index import Index
@@ -197,6 +193,11 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):  # noqa: PLW1641
     _accessors: ClassVar[set[str]] = set()
+    # view attributes
+    _adata_ref: AnnData | None
+    _oidx: Index1DNorm | None
+    _vidx: Index1DNorm | None
     @old_positionals(
         "obsm",
         "varm",
@@ -226,8 +227,8 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):  # noqa: PLW1641
         asview: bool = False,
         obsp: np.ndarray | Mapping[str, Sequence[Any]] | None = None,
         varp: np.ndarray | Mapping[str, Sequence[Any]] | None = None,
-        oidx: Index1D | None = None,
-        vidx: Index1D | None = None,
+        oidx: Index1DNorm | int | np.integer | None = None,
+        vidx: Index1DNorm | int | np.integer | None = None,
     ):
         # check for any multi-indices that aren’t later checked in coerce_array
         for attr, key in [(obs, "obs"), (var, "var"), (X, "X")]:
@@ -237,6 +238,8 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):  # noqa: PLW1641
             if not isinstance(X, AnnData):
                 msg = "`X` has to be an AnnData object."
                 raise ValueError(msg)
+            assert oidx is not None
+            assert vidx is not None
             self._init_as_view(X, oidx, vidx)
         else:
             self._init_as_actual(
@@ -256,7 +259,12 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):  # noqa: PLW1641
                 filemode=filemode,
             )
-    def _init_as_view(self, adata_ref: AnnData, oidx: Index, vidx: Index):
+    def _init_as_view(
+        self,
+        adata_ref: AnnData,
+        oidx: Index1DNorm | int | np.integer,
+        vidx: Index1DNorm | int | np.integer,
+    ):
         if adata_ref.isbacked and adata_ref.is_view:
             msg = (
                 "Currently, you cannot index repeatedly into a backed AnnData, "
@@ -277,6 +285,9 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):  # noqa: PLW1641
             vidx += adata_ref.n_vars * (vidx < 0)
             vidx = slice(vidx, vidx + 1, 1)
         if adata_ref.is_view:
+            assert adata_ref._adata_ref is not None
+            assert adata_ref._oidx is not None
+            assert adata_ref._vidx is not None
             prev_oidx, prev_vidx = adata_ref._oidx, adata_ref._vidx
             adata_ref = adata_ref._adata_ref
             oidx, vidx = _resolve_idxs((prev_oidx, prev_vidx), (oidx, vidx), adata_ref)
@@ -925,22 +936,27 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):  # noqa: PLW1641
     Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`.
     """
+    @deprecated("obs (e.g. `k in adata.obs` or `str(adata.obs.columns.tolist())`)")
     def obs_keys(self) -> list[str]:
         """List keys of observation annotation :attr:`obs`."""
         return self._obs.keys().tolist()
+    @deprecated("var (e.g. `k in adata.var` or `str(adata.var.columns.tolist())`)")
     def var_keys(self) -> list[str]:
         """List keys of variable annotation :attr:`var`."""
         return self._var.keys().tolist()
+    @deprecated("obsm (e.g. `k in adata.obsm` or `adata.obsm.keys() | {'u'}`)")
     def obsm_keys(self) -> list[str]:
         """List keys of observation annotation :attr:`obsm`."""
         return list(self.obsm.keys())
+    @deprecated("varm (e.g. `k in adata.varm` or `adata.varm.keys() | {'u'}`)")
     def varm_keys(self) -> list[str]:
         """List keys of variable annotation :attr:`varm`."""
         return list(self.varm.keys())
+    @deprecated("uns (e.g. `k in adata.uns` or `sorted(adata.uns)`)")
     def uns_keys(self) -> list[str]:
         """List keys of unstructured annotation."""
         return sorted(self._uns.keys())
@@ -1004,7 +1020,9 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):  # noqa: PLW1641
         write_attribute(self.file._file, attr, value)
-    def _normalize_indices(self, index: Index | None) -> tuple[slice, slice]:
+    def _normalize_indices(
+        self, index: Index | None
+    ) -> tuple[Index1DNorm | int | np.integer, Index1DNorm | int | np.integer]:
         return _normalize_indices(index, self.obs_names, self.var_names)
     # TODO: this is not quite complete...
@@ -1890,8 +1908,8 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):  # noqa: PLW1641
             compression_opts=compression_opts,
             as_dense=as_dense,
         )
-        if self.isbacked:
+        # Only reset the filename if the AnnData object now points to a complete new copy
+        if self.isbacked and not self.is_view:
             self.file.filename = filename
     write = write_h5ad  # a shortcut and backwards compat

anndata/_core/index.py CHANGED Viewed

@@ -14,18 +14,18 @@ from ..compat import AwkArray, CSArray, CSMatrix, DaskArray, XDataArray
 from .xarray import Dataset2D
 if TYPE_CHECKING:
-    from ..compat import Index, Index1D
+    from ..compat import Index, Index1D, Index1DNorm
 def _normalize_indices(
     index: Index | None, names0: pd.Index, names1: pd.Index
-) -> tuple[slice, slice]:
+) -> tuple[Index1DNorm | int | np.integer, Index1DNorm | int | np.integer]:
     # deal with tuples of length 1
     if isinstance(index, tuple) and len(index) == 1:
         index = index[0]
     # deal with pd.Series
     if isinstance(index, pd.Series):
-        index: Index = index.values
+        index = index.values
     if isinstance(index, tuple):
         # TODO: The series should probably be aligned first
         index = tuple(i.values if isinstance(i, pd.Series) else i for i in index)
@@ -36,15 +36,8 @@ def _normalize_indices(
 def _normalize_index(  # noqa: PLR0911, PLR0912
-    indexer: slice
-    | np.integer
-    | int
-    | str
-    | Sequence[bool | int | np.integer]
-    | np.ndarray
-    | pd.Index,
-    index: pd.Index,
-) -> slice | int | np.ndarray:  # ndarray of int or bool
+    indexer: Index1D, index: pd.Index
+) -> Index1DNorm | int | np.integer:
     # TODO: why is this here? All tests pass without it and it seems at the minimum not strict enough.
     if not isinstance(index, pd.RangeIndex) and index.dtype in (np.float64, np.int64):
         msg = f"Don’t call _normalize_index with non-categorical/string names and non-range index {index}"
@@ -212,7 +205,7 @@ def _subset_awkarray(a: AwkArray, subset_idx: Index):
 # Registration for SparseDataset occurs in sparse_dataset.py
 @_subset.register(h5py.Dataset)
-def _subset_dataset(d, subset_idx):
+def _subset_dataset(d: h5py.Dataset, subset_idx: Index):
     if not isinstance(subset_idx, tuple):
         subset_idx = (subset_idx,)
     ordered = list(subset_idx)

anndata/_core/merge.py CHANGED Viewed

@@ -14,9 +14,7 @@ from warnings import warn
 import numpy as np
 import pandas as pd
-import scipy
 from natsort import natsorted
-from packaging.version import Version
 from scipy import sparse
 from anndata._core.file_backing import to_memory
@@ -30,7 +28,6 @@ from ..compat import (
     CupyCSRMatrix,
     CupySparseMatrix,
     DaskArray,
-    _map_cat_to_str,
 )
 from ..utils import asarray, axis_len, warn_once
 from .anndata import AnnData
@@ -146,11 +143,16 @@ def equal_dask_array(a, b) -> bool:
         return False
     if isinstance(b, DaskArray) and tokenize(a) == tokenize(b):
         return True
-    if isinstance(a._meta, CSMatrix):
+    if isinstance(a._meta, np.ndarray):
+        return da.equal(a, b, where=~(da.isnan(a) & da.isnan(b))).all().compute()
+    if a.chunksize == b.chunksize and isinstance(
+        a._meta, CupySparseMatrix | CSMatrix | CSArray
+    ):
         # TODO: Maybe also do this in the other case?
         return da.map_blocks(equal, a, b, drop_axis=(0, 1)).all()
-    else:
-        return da.equal(a, b, where=~(da.isnan(a) == da.isnan(b))).all()
+    msg = "Misaligned chunks detected when checking for merge equality of dask arrays.  Reading full arrays into memory."
+    warn(msg, UserWarning, stacklevel=3)
+    return equal(a.compute(), b.compute())
 @equal.register(np.ndarray)
@@ -185,15 +187,6 @@ def equal_sparse(a, b) -> bool:
             # Comparison broken for CSC matrices
             # https://github.com/cupy/cupy/issues/7757
             a, b = CupyCSRMatrix(a), CupyCSRMatrix(b)
-        if Version(scipy.__version__) >= Version("1.16.0rc1"):
-            # TODO: https://github.com/scipy/scipy/issues/23068
-            return bool(
-                a.format == b.format
-                and (a.shape == b.shape)
-                and np.all(a.indptr == b.indptr)
-                and np.all(a.indices == b.indices)
-                and np.all((a.data == b.data) | (np.isnan(a.data) & np.isnan(b.data)))
-            )
         comp = a != b
         if isinstance(comp, bool):
             return not comp
@@ -617,6 +610,9 @@ class Reindexer:
         sub_el = _subset(el, make_slice(indexer, axis, len(shape)))
         if any(indexer == -1):
+            # TODO: Remove this condition once https://github.com/dask/dask/pull/12078 is released
+            if isinstance(sub_el._meta, CSArray | CSMatrix) and np.isscalar(fill_value):
+                fill_value = np.array([[fill_value]])
             sub_el[make_slice(indexer == -1, axis, len(shape))] = fill_value
         return sub_el
@@ -1643,7 +1639,7 @@ def concat(  # noqa: PLR0912, PLR0913, PLR0915
     )
     if index_unique is not None:
         concat_indices = concat_indices.str.cat(
-            _map_cat_to_str(label_col), sep=index_unique
+            label_col.map(str, na_action="ignore"), sep=index_unique
         )
     concat_indices = pd.Index(concat_indices)
@@ -1748,15 +1744,10 @@ def concat(  # noqa: PLR0912, PLR0913, PLR0915
             for r, a in zip(reindexers, adatas, strict=True)
         ],
     )
-    alt_pairwise = merge(
-        [
-            {
-                k: r(r(v, axis=0), axis=1)
-                for k, v in getattr(a, f"{alt_axis_name}p").items()
-            }
-            for r, a in zip(reindexers, adatas, strict=True)
-        ]
-    )
+    alt_pairwise = merge([
+        {k: r(r(v, axis=0), axis=1) for k, v in getattr(a, f"{alt_axis_name}p").items()}
+        for r, a in zip(reindexers, adatas, strict=True)
+    ])
     uns = uns_merge([a.uns for a in adatas])
     raw = None
@@ -1785,17 +1776,15 @@ def concat(  # noqa: PLR0912, PLR0913, PLR0915
             "not concatenating `.raw` attributes."
         )
         warn(msg, UserWarning, stacklevel=2)
-    return AnnData(
-        **{
-            "X": X,
-            "layers": layers,
-            axis_name: concat_annot,
-            alt_axis_name: alt_annot,
-            f"{axis_name}m": concat_mapping,
-            f"{alt_axis_name}m": alt_mapping,
-            f"{axis_name}p": concat_pairwise,
-            f"{alt_axis_name}p": alt_pairwise,
-            "uns": uns,
-            "raw": raw,
-        }
-    )
+    return AnnData(**{
+        "X": X,
+        "layers": layers,
+        axis_name: concat_annot,
+        alt_axis_name: alt_annot,
+        f"{axis_name}m": concat_mapping,
+        f"{alt_axis_name}m": alt_mapping,
+        f"{axis_name}p": concat_pairwise,
+        f"{alt_axis_name}p": alt_pairwise,
+        "uns": uns,
+        "raw": raw,
+    })

anndata/_core/raw.py CHANGED Viewed

@@ -17,7 +17,7 @@ if TYPE_CHECKING:
     from collections.abc import Mapping, Sequence
     from typing import ClassVar
-    from ..compat import CSMatrix
+    from ..compat import CSMatrix, Index, Index1DNorm
     from .aligned_mapping import AxisArraysView
     from .anndata import AnnData
     from .sparse_dataset import BaseCompressedSparseDataset
@@ -121,7 +121,7 @@ class Raw:
     def obs_names(self) -> pd.Index[str]:
         return self._adata.obs_names
-    def __getitem__(self, index):
+    def __getitem__(self, index: Index) -> Raw:
         oidx, vidx = self._normalize_indices(index)
         # To preserve two dimensional shape
@@ -169,7 +169,9 @@ class Raw:
             uns=self._adata.uns.copy(),
         )
-    def _normalize_indices(self, packed_index):
+    def _normalize_indices(
+        self, packed_index: Index
+    ) -> tuple[Index1DNorm | int | np.integer, Index1DNorm | int | np.integer]:
         # deal with slicing with pd.Series
         if isinstance(packed_index, pd.Series):
             packed_index = packed_index.values

anndata/_core/sparse_dataset.py CHANGED Viewed

@@ -16,6 +16,7 @@ import warnings
 from abc import ABC
 from collections.abc import Iterable
 from functools import cached_property
+from importlib.metadata import version
 from itertools import accumulate, chain, pairwise
 from math import floor
 from pathlib import Path
@@ -23,7 +24,6 @@ from typing import TYPE_CHECKING, NamedTuple
 import h5py
 import numpy as np
-import scipy
 import scipy.sparse as ss
 from packaging.version import Version
 from scipy.sparse import _sparsetools
@@ -54,7 +54,7 @@ else:
     from scipy.sparse import spmatrix as _cs_matrix
-SCIPY_1_15 = Version(scipy.__version__) >= Version("1.15rc0")
+SCIPY_1_15 = Version(version("scipy")) >= Version("1.15rc0")
 class BackedFormat(NamedTuple):
@@ -278,9 +278,9 @@ def get_compressed_vectors(
     indptr_slices = [slice(*(x.indptr[i : i + 2])) for i in row_idxs]
     # HDF5 cannot handle out-of-order integer indexing
     if isinstance(x.data, ZarrArray):
-        as_np_indptr = np.concatenate(
-            [np.arange(s.start, s.stop) for s in indptr_slices]
-        )
+        as_np_indptr = np.concatenate([
+            np.arange(s.start, s.stop) for s in indptr_slices
+        ])
         data = x.data[as_np_indptr]
         indices = x.indices[as_np_indptr]
     else:
@@ -309,9 +309,9 @@ def get_compressed_vectors_for_slices(
     start_indptr = indptr_indices[0] - next(offsets)
     if len(slices) < 2:  # there is only one slice so no need to concatenate
         return data, indices, start_indptr
-    end_indptr = np.concatenate(
-        [s[1:] - o for s, o in zip(indptr_indices[1:], offsets, strict=True)]
-    )
+    end_indptr = np.concatenate([
+        s[1:] - o for s, o in zip(indptr_indices[1:], offsets, strict=True)
+    ])
     indptr = np.concatenate([start_indptr, end_indptr])
     return data, indices, indptr

anndata/_core/views.py CHANGED Viewed

@@ -29,8 +29,12 @@ if TYPE_CHECKING:
     from collections.abc import Callable, Iterable, KeysView, Sequence
     from typing import Any, ClassVar
+    from numpy.typing import NDArray
     from anndata import AnnData
+    from ..compat import Index1DNorm
 @contextmanager
 def view_update(adata_view: AnnData, attr_name: str, keys: tuple[str, ...]):
@@ -96,7 +100,7 @@ class _ViewMixin(_SetItemMixin):
     # TODO: This makes `deepcopy(obj)` return `obj._view_args.parent._adata_ref`, fix it
     def __deepcopy__(self, memo):
-        parent, attrname, keys = self._view_args
+        parent, attrname, _keys = self._view_args
         return deepcopy(getattr(parent._adata_ref, attrname))
@@ -433,18 +437,24 @@ except ImportError:
         pass
-def _resolve_idxs(old, new, adata):
-    t = tuple(_resolve_idx(old[i], new[i], adata.shape[i]) for i in (0, 1))
-    return t
+def _resolve_idxs(
+    old: tuple[Index1DNorm, Index1DNorm],
+    new: tuple[Index1DNorm, Index1DNorm],
+    adata: AnnData,
+) -> tuple[Index1DNorm, Index1DNorm]:
+    o, v = (_resolve_idx(old[i], new[i], adata.shape[i]) for i in (0, 1))
+    return o, v
 @singledispatch
-def _resolve_idx(old, new, l):
-    return old[new]
+def _resolve_idx(old: Index1DNorm, new: Index1DNorm, l: Literal[0, 1]) -> Index1DNorm:
+    raise NotImplementedError
 @_resolve_idx.register(np.ndarray)
-def _resolve_idx_ndarray(old, new, l):
+def _resolve_idx_ndarray(
+    old: NDArray[np.bool_] | NDArray[np.integer], new: Index1DNorm, l: Literal[0, 1]
+) -> NDArray[np.bool_] | NDArray[np.integer]:
     if is_bool_dtype(old) and is_bool_dtype(new):
         mask_new = np.zeros_like(old)
         mask_new[np.flatnonzero(old)[new]] = True
@@ -454,21 +464,17 @@ def _resolve_idx_ndarray(old, new, l):
     return old[new]
-@_resolve_idx.register(np.integer)
-@_resolve_idx.register(int)
-def _resolve_idx_scalar(old, new, l):
-    return np.array([old])[new]
 @_resolve_idx.register(slice)
-def _resolve_idx_slice(old, new, l):
+def _resolve_idx_slice(
+    old: slice, new: Index1DNorm, l: Literal[0, 1]
+) -> slice | NDArray[np.integer]:
     if isinstance(new, slice):
         return _resolve_idx_slice_slice(old, new, l)
     else:
         return np.arange(*old.indices(l))[new]
-def _resolve_idx_slice_slice(old, new, l):
+def _resolve_idx_slice_slice(old: slice, new: slice, l: Literal[0, 1]) -> slice:
     r = range(*old.indices(l))[new]
     # Convert back to slice
     start, stop, step = r.start, r.stop, r.step

anndata/_core/xarray.py CHANGED Viewed

@@ -184,18 +184,6 @@ class Dataset2D:
         Handler class for doing the iloc-style indexing using :meth:`~xarray.Dataset.isel`.
         """
-        @dataclass(frozen=True)
-        class IlocGetter:
-            _ds: XDataset
-            _coord: str
-            def __getitem__(self, idx) -> Dataset2D:
-                # xarray seems to have some code looking for a second entry in tuples,
-                # so we unpack the tuple
-                if isinstance(idx, tuple) and len(idx) == 1:
-                    idx = idx[0]
-                return Dataset2D(self._ds.isel(**{self._coord: idx}))
         return IlocGetter(self.ds, self.index_dim)
     # See https://github.com/pydata/xarray/blob/568f3c1638d2d34373408ce2869028faa3949446/xarray/core/dataset.py#L1239-L1248
@@ -402,3 +390,16 @@ class Dataset2D:
     def _items(self):
         for col in self:
             yield col, self[col]
+@dataclass(frozen=True)
+class IlocGetter:
+    _ds: XDataset
+    _coord: str
+    def __getitem__(self, idx) -> Dataset2D:
+        # xarray seems to have some code looking for a second entry in tuples,
+        # so we unpack the tuple
+        if isinstance(idx, tuple) and len(idx) == 1:
+            idx = idx[0]
+        return Dataset2D(self._ds.isel(**{self._coord: idx}))

anndata 0.12.1__py3-none-any.whl → 0.12.3__py3-none-any.whl

anndata 0.12.1py3-none-any.whl → 0.12.3py3-none-any.whl