PyPI - anndata - Versions diffs - 0.12.2__py3-none-any.whl → 0.12.4__py3-none-any.whl - Mend

anndata 0.12.2py3-none-any.whl → 0.12.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

anndata/__init__.py +23 -18
anndata/_core/aligned_df.py +7 -0
anndata/_core/anndata.py +8 -7
anndata/_core/index.py +136 -23
anndata/_core/merge.py +34 -44
anndata/_core/sparse_dataset.py +12 -11
anndata/_core/views.py +1 -1
anndata/_io/h5ad.py +18 -27
anndata/_io/specs/lazy_methods.py +1 -1
anndata/_io/specs/methods.py +49 -65
anndata/_io/specs/registry.py +17 -20
anndata/_io/utils.py +2 -7
anndata/_io/zarr.py +16 -7
anndata/_settings.py +8 -0
anndata/_settings.pyi +1 -0
anndata/compat/__init__.py +3 -11
anndata/experimental/backed/_lazy_arrays.py +5 -2
anndata/experimental/merge.py +86 -50
anndata/experimental/multi_files/_anncollection.py +2 -2
{anndata-0.12.2.dist-info → anndata-0.12.4.dist-info}/METADATA +6 -7
{anndata-0.12.2.dist-info → anndata-0.12.4.dist-info}/RECORD +24 -25
testing/anndata/_pytest.py +2 -6
anndata/_version.py +0 -62
{anndata-0.12.2.dist-info → anndata-0.12.4.dist-info}/WHEEL +0 -0
{anndata-0.12.2.dist-info → anndata-0.12.4.dist-info}/licenses/LICENSE +0 -0

anndata/__init__.py CHANGED Viewed

@@ -12,7 +12,6 @@ from ._core.extensions import register_anndata_namespace
 from ._core.merge import concat
 from ._core.raw import Raw
 from ._settings import settings
-from ._version import __version__
 from ._warnings import (
     ExperimentalFeatureWarning,
     ImplicitModificationWarning,
@@ -28,22 +27,6 @@ from . import abc, experimental, typing, io, types  # isort: skip
 # We use these in tests by attribute access
 from . import logging  # noqa: F401  # isort: skip
-_DEPRECATED_IO = (
-    "read_loom",
-    "read_hdf",
-    "read_excel",
-    "read_umi_tools",
-    "read_csv",
-    "read_text",
-    "read_mtx",
-)
-_DEPRECATED = {method: f"io.{method}" for method in _DEPRECATED_IO}
-def __getattr__(attr_name: str) -> Any:
-    return module_get_attr_redirect(attr_name, deprecated_mapping=_DEPRECATED)
 __all__ = [
     "AnnData",
     "ExperimentalFeatureWarning",
@@ -51,7 +34,6 @@ __all__ = [
     "OldFormatWarning",
     "Raw",
     "WriteWarning",
-    "__version__",
     "abc",
     "concat",
     "experimental",
@@ -63,3 +45,26 @@ __all__ = [
     "types",
     "typing",
 ]
+_DEPRECATED_IO = (
+    "read_loom",
+    "read_hdf",
+    "read_excel",
+    "read_umi_tools",
+    "read_csv",
+    "read_text",
+    "read_mtx",
+)
+_DEPRECATED = {method: f"io.{method}" for method in _DEPRECATED_IO}
+def __getattr__(attr_name: str) -> Any:
+    if attr_name == "__version__":
+        import warnings
+        from importlib.metadata import version
+        msg = "`__version__` is deprecated, use `importlib.metadata.version('anndata')` instead."
+        warnings.warn(msg, FutureWarning, stacklevel=2)
+        return version("anndata")
+    return module_get_attr_redirect(attr_name, deprecated_mapping=_DEPRECATED)

anndata/_core/aligned_df.py CHANGED Viewed

@@ -78,6 +78,13 @@ def _gen_dataframe_df(
     attr: Literal["obs", "var"],
     length: int | None = None,
 ):
+    if isinstance(anno.index, pd.MultiIndex):
+        msg = (
+            "pandas.MultiIndex not supported as index for obs or var on declaration.\n\
+            You can set `obs_names` manually although most operations after will error or convert to str.\n\
+            This behavior will likely be clarified in a future breaking release."
+        )
+        raise ValueError(msg)
     if length is not None and length != len(anno):
         raise _mk_df_error(source, attr, length, len(anno))
     anno = anno.copy(deep=False)

anndata/_core/anndata.py CHANGED Viewed

@@ -42,11 +42,7 @@ from .index import _normalize_indices, _subset, get_vector
 from .raw import Raw
 from .sparse_dataset import BaseCompressedSparseDataset, sparse_dataset
 from .storage import coerce_array
-from .views import (
-    DictView,
-    _resolve_idxs,
-    as_view,
-)
+from .views import DictView, _resolve_idxs, as_view
 from .xarray import Dataset2D
 if TYPE_CHECKING:
@@ -940,22 +936,27 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):  # noqa: PLW1641
     Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`.
     """
+    @deprecated("obs (e.g. `k in adata.obs` or `str(adata.obs.columns.tolist())`)")
     def obs_keys(self) -> list[str]:
         """List keys of observation annotation :attr:`obs`."""
         return self._obs.keys().tolist()
+    @deprecated("var (e.g. `k in adata.var` or `str(adata.var.columns.tolist())`)")
     def var_keys(self) -> list[str]:
         """List keys of variable annotation :attr:`var`."""
         return self._var.keys().tolist()
+    @deprecated("obsm (e.g. `k in adata.obsm` or `adata.obsm.keys() | {'u'}`)")
     def obsm_keys(self) -> list[str]:
         """List keys of observation annotation :attr:`obsm`."""
         return list(self.obsm.keys())
+    @deprecated("varm (e.g. `k in adata.varm` or `adata.varm.keys() | {'u'}`)")
     def varm_keys(self) -> list[str]:
         """List keys of variable annotation :attr:`varm`."""
         return list(self.varm.keys())
+    @deprecated("uns (e.g. `k in adata.uns` or `sorted(adata.uns)`)")
     def uns_keys(self) -> list[str]:
         """List keys of unstructured annotation."""
         return sorted(self._uns.keys())
@@ -1907,8 +1908,8 @@ class AnnData(metaclass=utils.DeprecationMixinMeta):  # noqa: PLW1641
             compression_opts=compression_opts,
             as_dense=as_dense,
         )
-        if self.isbacked:
+        # Only reset the filename if the AnnData object now points to a complete new copy
+        if self.isbacked and not self.is_view:
             self.file.filename = filename
     write = write_h5ad  # a shortcut and backwards compat

anndata/_core/index.py CHANGED Viewed

@@ -3,7 +3,7 @@ from __future__ import annotations
 from collections.abc import Iterable, Sequence
 from functools import singledispatch
 from itertools import repeat
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, cast, overload
 import h5py
 import numpy as np
@@ -14,6 +14,8 @@ from ..compat import AwkArray, CSArray, CSMatrix, DaskArray, XDataArray
 from .xarray import Dataset2D
 if TYPE_CHECKING:
+    from numpy.typing import NDArray
     from ..compat import Index, Index1D, Index1DNorm
@@ -161,7 +163,10 @@ def unpack_index(index: Index) -> tuple[Index1D, Index1D]:
 @singledispatch
-def _subset(a: np.ndarray | pd.DataFrame, subset_idx: Index):
+def _subset(
+    a: np.ndarray | pd.DataFrame,
+    subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm],
+):
     # Select as combination of indexes, not coordinates
     # Correcting for indexing behaviour of np.ndarray
     if all(isinstance(x, Iterable) for x in subset_idx):
@@ -170,7 +175,9 @@ def _subset(a: np.ndarray | pd.DataFrame, subset_idx: Index):
 @_subset.register(DaskArray)
-def _subset_dask(a: DaskArray, subset_idx: Index):
+def _subset_dask(
+    a: DaskArray, subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm]
+):
     if len(subset_idx) > 1 and all(isinstance(x, Iterable) for x in subset_idx):
         if issparse(a._meta) and a._meta.format == "csc":
             return a[:, subset_idx[1]][subset_idx[0], :]
@@ -180,24 +187,32 @@ def _subset_dask(a: DaskArray, subset_idx: Index):
 @_subset.register(CSMatrix)
 @_subset.register(CSArray)
-def _subset_sparse(a: CSMatrix | CSArray, subset_idx: Index):
+def _subset_sparse(
+    a: CSMatrix | CSArray,
+    subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm],
+):
     # Correcting for indexing behaviour of sparse.spmatrix
     if len(subset_idx) > 1 and all(isinstance(x, Iterable) for x in subset_idx):
         first_idx = subset_idx[0]
         if issubclass(first_idx.dtype.type, np.bool_):
-            first_idx = np.where(first_idx)[0]
+            first_idx = np.flatnonzero(first_idx)
         subset_idx = (first_idx.reshape(-1, 1), *subset_idx[1:])
     return a[subset_idx]
 @_subset.register(pd.DataFrame)
 @_subset.register(Dataset2D)
-def _subset_df(df: pd.DataFrame | Dataset2D, subset_idx: Index):
+def _subset_df(
+    df: pd.DataFrame | Dataset2D,
+    subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm],
+):
     return df.iloc[subset_idx]
 @_subset.register(AwkArray)
-def _subset_awkarray(a: AwkArray, subset_idx: Index):
+def _subset_awkarray(
+    a: AwkArray, subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm]
+):
     if all(isinstance(x, Iterable) for x in subset_idx):
         subset_idx = np.ix_(*subset_idx)
     return a[subset_idx]
@@ -205,23 +220,121 @@ def _subset_awkarray(a: AwkArray, subset_idx: Index):
 # Registration for SparseDataset occurs in sparse_dataset.py
 @_subset.register(h5py.Dataset)
-def _subset_dataset(d: h5py.Dataset, subset_idx: Index):
-    if not isinstance(subset_idx, tuple):
-        subset_idx = (subset_idx,)
-    ordered = list(subset_idx)
-    rev_order = [slice(None) for _ in range(len(subset_idx))]
-    for axis, axis_idx in enumerate(ordered.copy()):
-        if isinstance(axis_idx, np.ndarray):
-            if axis_idx.dtype == bool:
-                axis_idx = np.where(axis_idx)[0]
-            order = np.argsort(axis_idx)
-            ordered[axis] = axis_idx[order]
-            rev_order[axis] = np.argsort(order)
+def _subset_dataset(
+    d: h5py.Dataset, subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm]
+):
+    order: tuple[NDArray[np.integer] | slice, ...]
+    inv_order: tuple[NDArray[np.integer] | slice, ...]
+    order, inv_order = zip(*map(_index_order_and_inverse, subset_idx), strict=True)
+    # check for duplicates or multi-dimensional fancy indexing
+    array_dims = [i for i in order if isinstance(i, np.ndarray)]
+    has_duplicates = any(len(np.unique(i)) != len(i) for i in array_dims)
+    # Use safe indexing if there are duplicates OR multiple array dimensions
+    # (h5py doesn't support multi-dimensional fancy indexing natively)
+    if has_duplicates or len(array_dims) > 1:
+        # For multi-dimensional indexing, bypass the sorting logic and use original indices
+        return _safe_fancy_index_h5py(d, subset_idx)
     # from hdf5, then to real order
-    return d[tuple(ordered)][tuple(rev_order)]
-def make_slice(idx, dimidx, n=2):
+    return d[order][inv_order]
+@overload
+def _index_order_and_inverse(
+    axis_idx: NDArray[np.integer] | NDArray[np.bool_],
+) -> tuple[NDArray[np.integer], NDArray[np.integer]]: ...
+@overload
+def _index_order_and_inverse(axis_idx: slice) -> tuple[slice, slice]: ...
+def _index_order_and_inverse(
+    axis_idx: Index1DNorm,
+) -> tuple[Index1DNorm, NDArray[np.integer] | slice]:
+    """Order and get inverse index array."""
+    if not isinstance(axis_idx, np.ndarray):
+        return axis_idx, slice(None)
+    if axis_idx.dtype == bool:
+        axis_idx = np.flatnonzero(axis_idx)
+    order = np.argsort(axis_idx)
+    return axis_idx[order], np.argsort(order)
+@overload
+def _process_index_for_h5py(
+    idx: NDArray[np.integer] | NDArray[np.bool_],
+) -> tuple[NDArray[np.integer], NDArray[np.integer]]: ...
+@overload
+def _process_index_for_h5py(idx: slice) -> tuple[slice, None]: ...
+def _process_index_for_h5py(
+    idx: Index1DNorm,
+) -> tuple[Index1DNorm, NDArray[np.integer] | None]:
+    """Process a single index for h5py compatibility, handling sorting and duplicates."""
+    if not isinstance(idx, np.ndarray):
+        # Not an array (slice, integer, list) - no special processing needed
+        return idx, None
+    if idx.dtype == bool:
+        idx = np.flatnonzero(idx)
+    # For h5py fancy indexing, we need sorted indices
+    # But we also need to track how to reverse the sorting
+    unique, inverse = np.unique(idx, return_inverse=True)
+    return (
+        # Has duplicates - use unique + inverse mapping approach
+        (unique, inverse)
+        if len(unique) != len(idx)
+        # No duplicates - just sort and track reverse mapping
+        else _index_order_and_inverse(idx)
+    )
+def _safe_fancy_index_h5py(
+    dataset: h5py.Dataset,
+    subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm],
+) -> h5py.Dataset:
+    # Handle multi-dimensional indexing of h5py dataset
+    # This avoids h5py's limitation with multi-dimensional fancy indexing
+    # without loading the entire dataset into memory
+    # Convert boolean arrays to integer arrays and handle sorting for h5py
+    processed_indices: tuple[NDArray[np.integer] | slice, ...]
+    reverse_indices: tuple[NDArray[np.integer] | None, ...]
+    processed_indices, reverse_indices = zip(
+        *map(_process_index_for_h5py, subset_idx), strict=True
+    )
+    # First find the index that reduces the size of the dataset the most
+    i_min = np.argmin([
+        _get_index_size(inds, dataset.shape[i]) / dataset.shape[i]
+        for i, inds in enumerate(processed_indices)
+    ])
+    # Apply the most selective index first to h5py dataset
+    first_index = [slice(None)] * len(processed_indices)
+    first_index[i_min] = processed_indices[i_min]
+    in_memory_array = cast("np.ndarray", dataset[tuple(first_index)])
+    # Apply remaining indices to the numpy array
+    remaining_indices = list(processed_indices)
+    remaining_indices[i_min] = slice(None)  # Already applied
+    result = in_memory_array[tuple(remaining_indices)]
+    # Now apply reverse mappings to get the original order
+    for dim, reverse_map in enumerate(reverse_indices):
+        if reverse_map is not None:
+            result = result.take(reverse_map, axis=dim)
+    return result
+def _get_index_size(idx: Index1DNorm, dim_size: int) -> int:
+    """Get size for any index type."""
+    if isinstance(idx, slice):
+        return len(range(*idx.indices(dim_size)))
+    elif isinstance(idx, int):
+        return 1
+    else:  # For other types, try to get length
+        return len(idx)
+def make_slice(idx, dimidx: int, n: int = 2) -> tuple[slice, ...]:
     mut = list(repeat(slice(None), n))
     mut[dimidx] = idx
     return tuple(mut)

anndata/_core/merge.py CHANGED Viewed

@@ -14,9 +14,7 @@ from warnings import warn
 import numpy as np
 import pandas as pd
-import scipy
 from natsort import natsorted
-from packaging.version import Version
 from scipy import sparse
 from anndata._core.file_backing import to_memory
@@ -30,7 +28,6 @@ from ..compat import (
     CupyCSRMatrix,
     CupySparseMatrix,
     DaskArray,
-    _map_cat_to_str,
 )
 from ..utils import asarray, axis_len, warn_once
 from .anndata import AnnData
@@ -41,6 +38,7 @@ if TYPE_CHECKING:
     from collections.abc import Collection, Generator, Iterable, Sequence
     from typing import Any
+    from numpy.typing import NDArray
     from pandas.api.extensions import ExtensionDtype
     from anndata._types import Join_T
@@ -146,11 +144,16 @@ def equal_dask_array(a, b) -> bool:
         return False
     if isinstance(b, DaskArray) and tokenize(a) == tokenize(b):
         return True
-    if isinstance(a._meta, CSMatrix):
+    if isinstance(a._meta, np.ndarray):
+        return da.equal(a, b, where=~(da.isnan(a) & da.isnan(b))).all().compute()
+    if a.chunksize == b.chunksize and isinstance(
+        a._meta, CupySparseMatrix | CSMatrix | CSArray
+    ):
         # TODO: Maybe also do this in the other case?
         return da.map_blocks(equal, a, b, drop_axis=(0, 1)).all()
-    else:
-        return da.equal(a, b, where=~(da.isnan(a) == da.isnan(b))).all()
+    msg = "Misaligned chunks detected when checking for merge equality of dask arrays.  Reading full arrays into memory."
+    warn(msg, UserWarning, stacklevel=3)
+    return equal(a.compute(), b.compute())
 @equal.register(np.ndarray)
@@ -185,15 +188,6 @@ def equal_sparse(a, b) -> bool:
             # Comparison broken for CSC matrices
             # https://github.com/cupy/cupy/issues/7757
             a, b = CupyCSRMatrix(a), CupyCSRMatrix(b)
-        if Version(scipy.__version__) >= Version("1.16.0rc1"):
-            # TODO: https://github.com/scipy/scipy/issues/23068
-            return bool(
-                a.format == b.format
-                and (a.shape == b.shape)
-                and np.all(a.indptr == b.indptr)
-                and np.all(a.indices == b.indices)
-                and np.all((a.data == b.data) | (np.isnan(a.data) & np.isnan(b.data)))
-            )
         comp = a != b
         if isinstance(comp, bool):
             return not comp
@@ -560,7 +554,7 @@ class Reindexer:
         Together with `old_pos` this forms a mapping.
     """
-    def __init__(self, old_idx, new_idx):
+    def __init__(self, old_idx: pd.Index, new_idx: pd.Index) -> None:
         self.old_idx = old_idx
         self.new_idx = new_idx
         self.no_change = new_idx.equals(old_idx)
@@ -617,6 +611,9 @@ class Reindexer:
         sub_el = _subset(el, make_slice(indexer, axis, len(shape)))
         if any(indexer == -1):
+            # TODO: Remove this condition once https://github.com/dask/dask/pull/12078 is released
+            if isinstance(sub_el._meta, CSArray | CSMatrix) and np.isscalar(fill_value):
+                fill_value = np.array([[fill_value]])
             sub_el[make_slice(indexer == -1, axis, len(shape))] = fill_value
         return sub_el
@@ -757,7 +754,7 @@ class Reindexer:
             return el[self.idx]
     @property
-    def idx(self):
+    def idx(self) -> NDArray[np.intp]:
         return self.old_idx.get_indexer(self.new_idx)
@@ -786,7 +783,7 @@ def default_fill_value(els):
         return np.nan
-def gen_reindexer(new_var: pd.Index, cur_var: pd.Index):
+def gen_reindexer(new_var: pd.Index, cur_var: pd.Index) -> Reindexer:
     """
     Given a new set of var_names, and a current set, generates a function which will reindex
     a matrix to be aligned with the new set.
@@ -943,7 +940,7 @@ def inner_concat_aligned_mapping(
     return result
-def gen_inner_reindexers(els, new_index, axis: Literal[0, 1] = 0):
+def gen_inner_reindexers(els, new_index, axis: Literal[0, 1] = 0) -> list[Reindexer]:
     alt_axis = 1 - axis
     if axis == 0:
         df_indices = lambda x: x.columns
@@ -1020,7 +1017,7 @@ def missing_element(
     axis: Literal[0, 1] = 0,
     fill_value: Any | None = None,
     off_axis_size: int = 0,
-) -> np.ndarray | DaskArray:
+) -> NDArray[np.bool_] | DaskArray:
     """Generates value to use when there is a missing element."""
     should_return_dask = any(isinstance(el, DaskArray) for el in els)
     # 0 sized array for in-memory prevents allocating unnecessary memory while preserving broadcasting.
@@ -1643,7 +1640,7 @@ def concat(  # noqa: PLR0912, PLR0913, PLR0915
     )
     if index_unique is not None:
         concat_indices = concat_indices.str.cat(
-            _map_cat_to_str(label_col), sep=index_unique
+            label_col.map(str, na_action="ignore"), sep=index_unique
         )
     concat_indices = pd.Index(concat_indices)
@@ -1748,15 +1745,10 @@ def concat(  # noqa: PLR0912, PLR0913, PLR0915
             for r, a in zip(reindexers, adatas, strict=True)
         ],
     )
-    alt_pairwise = merge(
-        [
-            {
-                k: r(r(v, axis=0), axis=1)
-                for k, v in getattr(a, f"{alt_axis_name}p").items()
-            }
-            for r, a in zip(reindexers, adatas, strict=True)
-        ]
-    )
+    alt_pairwise = merge([
+        {k: r(r(v, axis=0), axis=1) for k, v in getattr(a, f"{alt_axis_name}p").items()}
+        for r, a in zip(reindexers, adatas, strict=True)
+    ])
     uns = uns_merge([a.uns for a in adatas])
     raw = None
@@ -1785,17 +1777,15 @@ def concat(  # noqa: PLR0912, PLR0913, PLR0915
             "not concatenating `.raw` attributes."
         )
         warn(msg, UserWarning, stacklevel=2)
-    return AnnData(
-        **{
-            "X": X,
-            "layers": layers,
-            axis_name: concat_annot,
-            alt_axis_name: alt_annot,
-            f"{axis_name}m": concat_mapping,
-            f"{alt_axis_name}m": alt_mapping,
-            f"{axis_name}p": concat_pairwise,
-            f"{alt_axis_name}p": alt_pairwise,
-            "uns": uns,
-            "raw": raw,
-        }
-    )
+    return AnnData(**{
+        "X": X,
+        "layers": layers,
+        axis_name: concat_annot,
+        alt_axis_name: alt_annot,
+        f"{axis_name}m": concat_mapping,
+        f"{alt_axis_name}m": alt_mapping,
+        f"{axis_name}p": concat_pairwise,
+        f"{alt_axis_name}p": alt_pairwise,
+        "uns": uns,
+        "raw": raw,
+    })

anndata/_core/sparse_dataset.py CHANGED Viewed

@@ -16,6 +16,7 @@ import warnings
 from abc import ABC
 from collections.abc import Iterable
 from functools import cached_property
+from importlib.metadata import version
 from itertools import accumulate, chain, pairwise
 from math import floor
 from pathlib import Path
@@ -23,7 +24,6 @@ from typing import TYPE_CHECKING, NamedTuple
 import h5py
 import numpy as np
-import scipy
 import scipy.sparse as ss
 from packaging.version import Version
 from scipy.sparse import _sparsetools
@@ -48,13 +48,12 @@ if TYPE_CHECKING:
     from scipy.sparse._compressed import _cs_matrix
     from .._types import GroupStorageType
-    from ..compat import H5Array
-    from .index import Index, Index1D
+    from ..compat import H5Array, Index, Index1D, Index1DNorm
 else:
     from scipy.sparse import spmatrix as _cs_matrix
-SCIPY_1_15 = Version(scipy.__version__) >= Version("1.15rc0")
+SCIPY_1_15 = Version(version("scipy")) >= Version("1.15rc0")
 class BackedFormat(NamedTuple):
@@ -278,9 +277,9 @@ def get_compressed_vectors(
     indptr_slices = [slice(*(x.indptr[i : i + 2])) for i in row_idxs]
     # HDF5 cannot handle out-of-order integer indexing
     if isinstance(x.data, ZarrArray):
-        as_np_indptr = np.concatenate(
-            [np.arange(s.start, s.stop) for s in indptr_slices]
-        )
+        as_np_indptr = np.concatenate([
+            np.arange(s.start, s.stop) for s in indptr_slices
+        ])
         data = x.data[as_np_indptr]
         indices = x.indices[as_np_indptr]
     else:
@@ -309,9 +308,9 @@ def get_compressed_vectors_for_slices(
     start_indptr = indptr_indices[0] - next(offsets)
     if len(slices) < 2:  # there is only one slice so no need to concatenate
         return data, indices, start_indptr
-    end_indptr = np.concatenate(
-        [s[1:] - o for s, o in zip(indptr_indices[1:], offsets, strict=True)]
-    )
+    end_indptr = np.concatenate([
+        s[1:] - o for s, o in zip(indptr_indices[1:], offsets, strict=True)
+    ])
     indptr = np.concatenate([start_indptr, end_indptr])
     return data, indices, indptr
@@ -738,5 +737,7 @@ def sparse_dataset(
 @_subset.register(BaseCompressedSparseDataset)
-def subset_sparsedataset(d, subset_idx):
+def subset_sparsedataset(
+    d, subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm]
+):
     return d[subset_idx]

anndata/_core/views.py CHANGED Viewed

@@ -100,7 +100,7 @@ class _ViewMixin(_SetItemMixin):
     # TODO: This makes `deepcopy(obj)` return `obj._view_args.parent._adata_ref`, fix it
     def __deepcopy__(self, memo):
-        parent, attrname, keys = self._view_args
+        parent, attrname, _keys = self._view_args
         return deepcopy(getattr(parent._adata_ref, attrname))

anndata/_io/h5ad.py CHANGED Viewed

@@ -27,7 +27,6 @@ from ..experimental import read_dispatched
 from .specs import read_elem, write_elem
 from .specs.registry import IOSpec, write_spec
 from .utils import (
-    H5PY_V3,
     _read_legacy_raw,
     idx_chunks_along_axis,
     no_write_dataset_2d,
@@ -264,15 +263,13 @@ def read_h5ad(
         def callback(func, elem_name: str, elem, iospec):
             if iospec.encoding_type == "anndata" or elem_name.endswith("/"):
-                return AnnData(
-                    **{
-                        # This is covering up backwards compat in the anndata initializer
-                        # In most cases we should be able to call `func(elen[k])` instead
-                        k: read_dispatched(elem[k], callback)
-                        for k in elem
-                        if not k.startswith("raw.")
-                    }
-                )
+                return AnnData(**{
+                    # This is covering up backwards compat in the anndata initializer
+                    # In most cases we should be able to call `func(elen[k])` instead
+                    k: read_dispatched(elem[k], callback)
+                    for k in elem
+                    if not k.startswith("raw.")
+                })
             elif elem_name.startswith("/raw."):
                 return None
             elif elem_name == "/X" and "X" in as_sparse:
@@ -326,16 +323,12 @@ def read_dataframe_legacy(dataset: h5py.Dataset) -> pd.DataFrame:
         "Consider rewriting it."
     )
     warn(msg, OldFormatWarning, stacklevel=2)
-    if H5PY_V3:
-        df = pd.DataFrame(
-            _decode_structured_array(
-                _from_fixed_length_strings(dataset[()]), dtype=dataset.dtype
-            )
+    df = pd.DataFrame(
+        _decode_structured_array(
+            _from_fixed_length_strings(dataset[()]), dtype=dataset.dtype
         )
-    else:
-        df = pd.DataFrame(_from_fixed_length_strings(dataset[()]))
-    df.set_index(df.columns[0], inplace=True)
-    return df
+    )
+    return df.set_index(df.columns[0])
 def read_dataframe(group: h5py.Group | h5py.Dataset) -> pd.DataFrame:
@@ -348,10 +341,9 @@ def read_dataframe(group: h5py.Group | h5py.Dataset) -> pd.DataFrame:
 @report_read_key_on_error
 def read_dataset(dataset: h5py.Dataset):
-    if H5PY_V3:
-        string_dtype = h5py.check_string_dtype(dataset.dtype)
-        if (string_dtype is not None) and (string_dtype.encoding == "utf-8"):
-            dataset = dataset.asstr()
+    string_dtype = h5py.check_string_dtype(dataset.dtype)
+    if (string_dtype is not None) and (string_dtype.encoding == "utf-8"):
+        dataset = dataset.asstr()
     value = dataset[()]
     if not hasattr(value, "dtype"):
         return value
@@ -364,10 +356,9 @@ def read_dataset(dataset: h5py.Dataset):
             return value[0]
     elif len(value.dtype.descr) > 1:  # Compound dtype
         # For backwards compat, now strings are written as variable length
-        dtype = value.dtype
-        value = _from_fixed_length_strings(value)
-        if H5PY_V3:
-            value = _decode_structured_array(value, dtype=dtype)
+        value = _decode_structured_array(
+            _from_fixed_length_strings(value), dtype=value.dtype
+        )
     if value.shape == ():
         value = value[()]
     return value

anndata 0.12.2__py3-none-any.whl → 0.12.4__py3-none-any.whl

anndata 0.12.2py3-none-any.whl → 0.12.4py3-none-any.whl