anndata 0.12.2__py3-none-any.whl → 0.12.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
anndata/__init__.py CHANGED
@@ -12,7 +12,6 @@ from ._core.extensions import register_anndata_namespace
12
12
  from ._core.merge import concat
13
13
  from ._core.raw import Raw
14
14
  from ._settings import settings
15
- from ._version import __version__
16
15
  from ._warnings import (
17
16
  ExperimentalFeatureWarning,
18
17
  ImplicitModificationWarning,
@@ -28,22 +27,6 @@ from . import abc, experimental, typing, io, types # isort: skip
28
27
  # We use these in tests by attribute access
29
28
  from . import logging # noqa: F401 # isort: skip
30
29
 
31
- _DEPRECATED_IO = (
32
- "read_loom",
33
- "read_hdf",
34
- "read_excel",
35
- "read_umi_tools",
36
- "read_csv",
37
- "read_text",
38
- "read_mtx",
39
- )
40
- _DEPRECATED = {method: f"io.{method}" for method in _DEPRECATED_IO}
41
-
42
-
43
- def __getattr__(attr_name: str) -> Any:
44
- return module_get_attr_redirect(attr_name, deprecated_mapping=_DEPRECATED)
45
-
46
-
47
30
  __all__ = [
48
31
  "AnnData",
49
32
  "ExperimentalFeatureWarning",
@@ -51,7 +34,6 @@ __all__ = [
51
34
  "OldFormatWarning",
52
35
  "Raw",
53
36
  "WriteWarning",
54
- "__version__",
55
37
  "abc",
56
38
  "concat",
57
39
  "experimental",
@@ -63,3 +45,26 @@ __all__ = [
63
45
  "types",
64
46
  "typing",
65
47
  ]
48
+
49
+ _DEPRECATED_IO = (
50
+ "read_loom",
51
+ "read_hdf",
52
+ "read_excel",
53
+ "read_umi_tools",
54
+ "read_csv",
55
+ "read_text",
56
+ "read_mtx",
57
+ )
58
+ _DEPRECATED = {method: f"io.{method}" for method in _DEPRECATED_IO}
59
+
60
+
61
+ def __getattr__(attr_name: str) -> Any:
62
+ if attr_name == "__version__":
63
+ import warnings
64
+ from importlib.metadata import version
65
+
66
+ msg = "`__version__` is deprecated, use `importlib.metadata.version('anndata')` instead."
67
+ warnings.warn(msg, FutureWarning, stacklevel=2)
68
+ return version("anndata")
69
+
70
+ return module_get_attr_redirect(attr_name, deprecated_mapping=_DEPRECATED)
@@ -78,6 +78,13 @@ def _gen_dataframe_df(
78
78
  attr: Literal["obs", "var"],
79
79
  length: int | None = None,
80
80
  ):
81
+ if isinstance(anno.index, pd.MultiIndex):
82
+ msg = (
83
+ "pandas.MultiIndex not supported as index for obs or var on declaration.\n\
84
+ You can set `obs_names` manually although most operations after will error or convert to str.\n\
85
+ This behavior will likely be clarified in a future breaking release."
86
+ )
87
+ raise ValueError(msg)
81
88
  if length is not None and length != len(anno):
82
89
  raise _mk_df_error(source, attr, length, len(anno))
83
90
  anno = anno.copy(deep=False)
anndata/_core/anndata.py CHANGED
@@ -42,11 +42,7 @@ from .index import _normalize_indices, _subset, get_vector
42
42
  from .raw import Raw
43
43
  from .sparse_dataset import BaseCompressedSparseDataset, sparse_dataset
44
44
  from .storage import coerce_array
45
- from .views import (
46
- DictView,
47
- _resolve_idxs,
48
- as_view,
49
- )
45
+ from .views import DictView, _resolve_idxs, as_view
50
46
  from .xarray import Dataset2D
51
47
 
52
48
  if TYPE_CHECKING:
@@ -940,22 +936,27 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): # noqa: PLW1641
940
936
  Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`.
941
937
  """
942
938
 
939
+ @deprecated("obs (e.g. `k in adata.obs` or `str(adata.obs.columns.tolist())`)")
943
940
  def obs_keys(self) -> list[str]:
944
941
  """List keys of observation annotation :attr:`obs`."""
945
942
  return self._obs.keys().tolist()
946
943
 
944
+ @deprecated("var (e.g. `k in adata.var` or `str(adata.var.columns.tolist())`)")
947
945
  def var_keys(self) -> list[str]:
948
946
  """List keys of variable annotation :attr:`var`."""
949
947
  return self._var.keys().tolist()
950
948
 
949
+ @deprecated("obsm (e.g. `k in adata.obsm` or `adata.obsm.keys() | {'u'}`)")
951
950
  def obsm_keys(self) -> list[str]:
952
951
  """List keys of observation annotation :attr:`obsm`."""
953
952
  return list(self.obsm.keys())
954
953
 
954
+ @deprecated("varm (e.g. `k in adata.varm` or `adata.varm.keys() | {'u'}`)")
955
955
  def varm_keys(self) -> list[str]:
956
956
  """List keys of variable annotation :attr:`varm`."""
957
957
  return list(self.varm.keys())
958
958
 
959
+ @deprecated("uns (e.g. `k in adata.uns` or `sorted(adata.uns)`)")
959
960
  def uns_keys(self) -> list[str]:
960
961
  """List keys of unstructured annotation."""
961
962
  return sorted(self._uns.keys())
@@ -1907,8 +1908,8 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): # noqa: PLW1641
1907
1908
  compression_opts=compression_opts,
1908
1909
  as_dense=as_dense,
1909
1910
  )
1910
-
1911
- if self.isbacked:
1911
+ # Only reset the filename if the AnnData object now points to a complete new copy
1912
+ if self.isbacked and not self.is_view:
1912
1913
  self.file.filename = filename
1913
1914
 
1914
1915
  write = write_h5ad # a shortcut and backwards compat
anndata/_core/index.py CHANGED
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  from collections.abc import Iterable, Sequence
4
4
  from functools import singledispatch
5
5
  from itertools import repeat
6
- from typing import TYPE_CHECKING
6
+ from typing import TYPE_CHECKING, cast, overload
7
7
 
8
8
  import h5py
9
9
  import numpy as np
@@ -14,6 +14,8 @@ from ..compat import AwkArray, CSArray, CSMatrix, DaskArray, XDataArray
14
14
  from .xarray import Dataset2D
15
15
 
16
16
  if TYPE_CHECKING:
17
+ from numpy.typing import NDArray
18
+
17
19
  from ..compat import Index, Index1D, Index1DNorm
18
20
 
19
21
 
@@ -161,7 +163,10 @@ def unpack_index(index: Index) -> tuple[Index1D, Index1D]:
161
163
 
162
164
 
163
165
  @singledispatch
164
- def _subset(a: np.ndarray | pd.DataFrame, subset_idx: Index):
166
+ def _subset(
167
+ a: np.ndarray | pd.DataFrame,
168
+ subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm],
169
+ ):
165
170
  # Select as combination of indexes, not coordinates
166
171
  # Correcting for indexing behaviour of np.ndarray
167
172
  if all(isinstance(x, Iterable) for x in subset_idx):
@@ -170,7 +175,9 @@ def _subset(a: np.ndarray | pd.DataFrame, subset_idx: Index):
170
175
 
171
176
 
172
177
  @_subset.register(DaskArray)
173
- def _subset_dask(a: DaskArray, subset_idx: Index):
178
+ def _subset_dask(
179
+ a: DaskArray, subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm]
180
+ ):
174
181
  if len(subset_idx) > 1 and all(isinstance(x, Iterable) for x in subset_idx):
175
182
  if issparse(a._meta) and a._meta.format == "csc":
176
183
  return a[:, subset_idx[1]][subset_idx[0], :]
@@ -180,24 +187,32 @@ def _subset_dask(a: DaskArray, subset_idx: Index):
180
187
 
181
188
  @_subset.register(CSMatrix)
182
189
  @_subset.register(CSArray)
183
- def _subset_sparse(a: CSMatrix | CSArray, subset_idx: Index):
190
+ def _subset_sparse(
191
+ a: CSMatrix | CSArray,
192
+ subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm],
193
+ ):
184
194
  # Correcting for indexing behaviour of sparse.spmatrix
185
195
  if len(subset_idx) > 1 and all(isinstance(x, Iterable) for x in subset_idx):
186
196
  first_idx = subset_idx[0]
187
197
  if issubclass(first_idx.dtype.type, np.bool_):
188
- first_idx = np.where(first_idx)[0]
198
+ first_idx = np.flatnonzero(first_idx)
189
199
  subset_idx = (first_idx.reshape(-1, 1), *subset_idx[1:])
190
200
  return a[subset_idx]
191
201
 
192
202
 
193
203
  @_subset.register(pd.DataFrame)
194
204
  @_subset.register(Dataset2D)
195
- def _subset_df(df: pd.DataFrame | Dataset2D, subset_idx: Index):
205
+ def _subset_df(
206
+ df: pd.DataFrame | Dataset2D,
207
+ subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm],
208
+ ):
196
209
  return df.iloc[subset_idx]
197
210
 
198
211
 
199
212
  @_subset.register(AwkArray)
200
- def _subset_awkarray(a: AwkArray, subset_idx: Index):
213
+ def _subset_awkarray(
214
+ a: AwkArray, subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm]
215
+ ):
201
216
  if all(isinstance(x, Iterable) for x in subset_idx):
202
217
  subset_idx = np.ix_(*subset_idx)
203
218
  return a[subset_idx]
@@ -205,23 +220,121 @@ def _subset_awkarray(a: AwkArray, subset_idx: Index):
205
220
 
206
221
  # Registration for SparseDataset occurs in sparse_dataset.py
207
222
  @_subset.register(h5py.Dataset)
208
- def _subset_dataset(d: h5py.Dataset, subset_idx: Index):
209
- if not isinstance(subset_idx, tuple):
210
- subset_idx = (subset_idx,)
211
- ordered = list(subset_idx)
212
- rev_order = [slice(None) for _ in range(len(subset_idx))]
213
- for axis, axis_idx in enumerate(ordered.copy()):
214
- if isinstance(axis_idx, np.ndarray):
215
- if axis_idx.dtype == bool:
216
- axis_idx = np.where(axis_idx)[0]
217
- order = np.argsort(axis_idx)
218
- ordered[axis] = axis_idx[order]
219
- rev_order[axis] = np.argsort(order)
223
+ def _subset_dataset(
224
+ d: h5py.Dataset, subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm]
225
+ ):
226
+ order: tuple[NDArray[np.integer] | slice, ...]
227
+ inv_order: tuple[NDArray[np.integer] | slice, ...]
228
+ order, inv_order = zip(*map(_index_order_and_inverse, subset_idx), strict=True)
229
+ # check for duplicates or multi-dimensional fancy indexing
230
+ array_dims = [i for i in order if isinstance(i, np.ndarray)]
231
+ has_duplicates = any(len(np.unique(i)) != len(i) for i in array_dims)
232
+ # Use safe indexing if there are duplicates OR multiple array dimensions
233
+ # (h5py doesn't support multi-dimensional fancy indexing natively)
234
+ if has_duplicates or len(array_dims) > 1:
235
+ # For multi-dimensional indexing, bypass the sorting logic and use original indices
236
+ return _safe_fancy_index_h5py(d, subset_idx)
220
237
  # from hdf5, then to real order
221
- return d[tuple(ordered)][tuple(rev_order)]
222
-
223
-
224
- def make_slice(idx, dimidx, n=2):
238
+ return d[order][inv_order]
239
+
240
+
241
+ @overload
242
+ def _index_order_and_inverse(
243
+ axis_idx: NDArray[np.integer] | NDArray[np.bool_],
244
+ ) -> tuple[NDArray[np.integer], NDArray[np.integer]]: ...
245
+ @overload
246
+ def _index_order_and_inverse(axis_idx: slice) -> tuple[slice, slice]: ...
247
+ def _index_order_and_inverse(
248
+ axis_idx: Index1DNorm,
249
+ ) -> tuple[Index1DNorm, NDArray[np.integer] | slice]:
250
+ """Order and get inverse index array."""
251
+ if not isinstance(axis_idx, np.ndarray):
252
+ return axis_idx, slice(None)
253
+ if axis_idx.dtype == bool:
254
+ axis_idx = np.flatnonzero(axis_idx)
255
+ order = np.argsort(axis_idx)
256
+ return axis_idx[order], np.argsort(order)
257
+
258
+
259
+ @overload
260
+ def _process_index_for_h5py(
261
+ idx: NDArray[np.integer] | NDArray[np.bool_],
262
+ ) -> tuple[NDArray[np.integer], NDArray[np.integer]]: ...
263
+ @overload
264
+ def _process_index_for_h5py(idx: slice) -> tuple[slice, None]: ...
265
+ def _process_index_for_h5py(
266
+ idx: Index1DNorm,
267
+ ) -> tuple[Index1DNorm, NDArray[np.integer] | None]:
268
+ """Process a single index for h5py compatibility, handling sorting and duplicates."""
269
+ if not isinstance(idx, np.ndarray):
270
+ # Not an array (slice, integer, list) - no special processing needed
271
+ return idx, None
272
+
273
+ if idx.dtype == bool:
274
+ idx = np.flatnonzero(idx)
275
+
276
+ # For h5py fancy indexing, we need sorted indices
277
+ # But we also need to track how to reverse the sorting
278
+ unique, inverse = np.unique(idx, return_inverse=True)
279
+ return (
280
+ # Has duplicates - use unique + inverse mapping approach
281
+ (unique, inverse)
282
+ if len(unique) != len(idx)
283
+ # No duplicates - just sort and track reverse mapping
284
+ else _index_order_and_inverse(idx)
285
+ )
286
+
287
+
288
+ def _safe_fancy_index_h5py(
289
+ dataset: h5py.Dataset,
290
+ subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm],
291
+ ) -> h5py.Dataset:
292
+ # Handle multi-dimensional indexing of h5py dataset
293
+ # This avoids h5py's limitation with multi-dimensional fancy indexing
294
+ # without loading the entire dataset into memory
295
+
296
+ # Convert boolean arrays to integer arrays and handle sorting for h5py
297
+ processed_indices: tuple[NDArray[np.integer] | slice, ...]
298
+ reverse_indices: tuple[NDArray[np.integer] | None, ...]
299
+ processed_indices, reverse_indices = zip(
300
+ *map(_process_index_for_h5py, subset_idx), strict=True
301
+ )
302
+
303
+ # First find the index that reduces the size of the dataset the most
304
+ i_min = np.argmin([
305
+ _get_index_size(inds, dataset.shape[i]) / dataset.shape[i]
306
+ for i, inds in enumerate(processed_indices)
307
+ ])
308
+
309
+ # Apply the most selective index first to h5py dataset
310
+ first_index = [slice(None)] * len(processed_indices)
311
+ first_index[i_min] = processed_indices[i_min]
312
+ in_memory_array = cast("np.ndarray", dataset[tuple(first_index)])
313
+
314
+ # Apply remaining indices to the numpy array
315
+ remaining_indices = list(processed_indices)
316
+ remaining_indices[i_min] = slice(None) # Already applied
317
+ result = in_memory_array[tuple(remaining_indices)]
318
+
319
+ # Now apply reverse mappings to get the original order
320
+ for dim, reverse_map in enumerate(reverse_indices):
321
+ if reverse_map is not None:
322
+ result = result.take(reverse_map, axis=dim)
323
+
324
+ return result
325
+
326
+
327
+ def _get_index_size(idx: Index1DNorm, dim_size: int) -> int:
328
+ """Get size for any index type."""
329
+ if isinstance(idx, slice):
330
+ return len(range(*idx.indices(dim_size)))
331
+ elif isinstance(idx, int):
332
+ return 1
333
+ else: # For other types, try to get length
334
+ return len(idx)
335
+
336
+
337
+ def make_slice(idx, dimidx: int, n: int = 2) -> tuple[slice, ...]:
225
338
  mut = list(repeat(slice(None), n))
226
339
  mut[dimidx] = idx
227
340
  return tuple(mut)
anndata/_core/merge.py CHANGED
@@ -14,9 +14,7 @@ from warnings import warn
14
14
 
15
15
  import numpy as np
16
16
  import pandas as pd
17
- import scipy
18
17
  from natsort import natsorted
19
- from packaging.version import Version
20
18
  from scipy import sparse
21
19
 
22
20
  from anndata._core.file_backing import to_memory
@@ -30,7 +28,6 @@ from ..compat import (
30
28
  CupyCSRMatrix,
31
29
  CupySparseMatrix,
32
30
  DaskArray,
33
- _map_cat_to_str,
34
31
  )
35
32
  from ..utils import asarray, axis_len, warn_once
36
33
  from .anndata import AnnData
@@ -41,6 +38,7 @@ if TYPE_CHECKING:
41
38
  from collections.abc import Collection, Generator, Iterable, Sequence
42
39
  from typing import Any
43
40
 
41
+ from numpy.typing import NDArray
44
42
  from pandas.api.extensions import ExtensionDtype
45
43
 
46
44
  from anndata._types import Join_T
@@ -146,11 +144,16 @@ def equal_dask_array(a, b) -> bool:
146
144
  return False
147
145
  if isinstance(b, DaskArray) and tokenize(a) == tokenize(b):
148
146
  return True
149
- if isinstance(a._meta, CSMatrix):
147
+ if isinstance(a._meta, np.ndarray):
148
+ return da.equal(a, b, where=~(da.isnan(a) & da.isnan(b))).all().compute()
149
+ if a.chunksize == b.chunksize and isinstance(
150
+ a._meta, CupySparseMatrix | CSMatrix | CSArray
151
+ ):
150
152
  # TODO: Maybe also do this in the other case?
151
153
  return da.map_blocks(equal, a, b, drop_axis=(0, 1)).all()
152
- else:
153
- return da.equal(a, b, where=~(da.isnan(a) == da.isnan(b))).all()
154
+ msg = "Misaligned chunks detected when checking for merge equality of dask arrays. Reading full arrays into memory."
155
+ warn(msg, UserWarning, stacklevel=3)
156
+ return equal(a.compute(), b.compute())
154
157
 
155
158
 
156
159
  @equal.register(np.ndarray)
@@ -185,15 +188,6 @@ def equal_sparse(a, b) -> bool:
185
188
  # Comparison broken for CSC matrices
186
189
  # https://github.com/cupy/cupy/issues/7757
187
190
  a, b = CupyCSRMatrix(a), CupyCSRMatrix(b)
188
- if Version(scipy.__version__) >= Version("1.16.0rc1"):
189
- # TODO: https://github.com/scipy/scipy/issues/23068
190
- return bool(
191
- a.format == b.format
192
- and (a.shape == b.shape)
193
- and np.all(a.indptr == b.indptr)
194
- and np.all(a.indices == b.indices)
195
- and np.all((a.data == b.data) | (np.isnan(a.data) & np.isnan(b.data)))
196
- )
197
191
  comp = a != b
198
192
  if isinstance(comp, bool):
199
193
  return not comp
@@ -560,7 +554,7 @@ class Reindexer:
560
554
  Together with `old_pos` this forms a mapping.
561
555
  """
562
556
 
563
- def __init__(self, old_idx, new_idx):
557
+ def __init__(self, old_idx: pd.Index, new_idx: pd.Index) -> None:
564
558
  self.old_idx = old_idx
565
559
  self.new_idx = new_idx
566
560
  self.no_change = new_idx.equals(old_idx)
@@ -617,6 +611,9 @@ class Reindexer:
617
611
  sub_el = _subset(el, make_slice(indexer, axis, len(shape)))
618
612
 
619
613
  if any(indexer == -1):
614
+ # TODO: Remove this condition once https://github.com/dask/dask/pull/12078 is released
615
+ if isinstance(sub_el._meta, CSArray | CSMatrix) and np.isscalar(fill_value):
616
+ fill_value = np.array([[fill_value]])
620
617
  sub_el[make_slice(indexer == -1, axis, len(shape))] = fill_value
621
618
 
622
619
  return sub_el
@@ -757,7 +754,7 @@ class Reindexer:
757
754
  return el[self.idx]
758
755
 
759
756
  @property
760
- def idx(self):
757
+ def idx(self) -> NDArray[np.intp]:
761
758
  return self.old_idx.get_indexer(self.new_idx)
762
759
 
763
760
 
@@ -786,7 +783,7 @@ def default_fill_value(els):
786
783
  return np.nan
787
784
 
788
785
 
789
- def gen_reindexer(new_var: pd.Index, cur_var: pd.Index):
786
+ def gen_reindexer(new_var: pd.Index, cur_var: pd.Index) -> Reindexer:
790
787
  """
791
788
  Given a new set of var_names, and a current set, generates a function which will reindex
792
789
  a matrix to be aligned with the new set.
@@ -943,7 +940,7 @@ def inner_concat_aligned_mapping(
943
940
  return result
944
941
 
945
942
 
946
- def gen_inner_reindexers(els, new_index, axis: Literal[0, 1] = 0):
943
+ def gen_inner_reindexers(els, new_index, axis: Literal[0, 1] = 0) -> list[Reindexer]:
947
944
  alt_axis = 1 - axis
948
945
  if axis == 0:
949
946
  df_indices = lambda x: x.columns
@@ -1020,7 +1017,7 @@ def missing_element(
1020
1017
  axis: Literal[0, 1] = 0,
1021
1018
  fill_value: Any | None = None,
1022
1019
  off_axis_size: int = 0,
1023
- ) -> np.ndarray | DaskArray:
1020
+ ) -> NDArray[np.bool_] | DaskArray:
1024
1021
  """Generates value to use when there is a missing element."""
1025
1022
  should_return_dask = any(isinstance(el, DaskArray) for el in els)
1026
1023
  # 0 sized array for in-memory prevents allocating unnecessary memory while preserving broadcasting.
@@ -1643,7 +1640,7 @@ def concat( # noqa: PLR0912, PLR0913, PLR0915
1643
1640
  )
1644
1641
  if index_unique is not None:
1645
1642
  concat_indices = concat_indices.str.cat(
1646
- _map_cat_to_str(label_col), sep=index_unique
1643
+ label_col.map(str, na_action="ignore"), sep=index_unique
1647
1644
  )
1648
1645
  concat_indices = pd.Index(concat_indices)
1649
1646
 
@@ -1748,15 +1745,10 @@ def concat( # noqa: PLR0912, PLR0913, PLR0915
1748
1745
  for r, a in zip(reindexers, adatas, strict=True)
1749
1746
  ],
1750
1747
  )
1751
- alt_pairwise = merge(
1752
- [
1753
- {
1754
- k: r(r(v, axis=0), axis=1)
1755
- for k, v in getattr(a, f"{alt_axis_name}p").items()
1756
- }
1757
- for r, a in zip(reindexers, adatas, strict=True)
1758
- ]
1759
- )
1748
+ alt_pairwise = merge([
1749
+ {k: r(r(v, axis=0), axis=1) for k, v in getattr(a, f"{alt_axis_name}p").items()}
1750
+ for r, a in zip(reindexers, adatas, strict=True)
1751
+ ])
1760
1752
  uns = uns_merge([a.uns for a in adatas])
1761
1753
 
1762
1754
  raw = None
@@ -1785,17 +1777,15 @@ def concat( # noqa: PLR0912, PLR0913, PLR0915
1785
1777
  "not concatenating `.raw` attributes."
1786
1778
  )
1787
1779
  warn(msg, UserWarning, stacklevel=2)
1788
- return AnnData(
1789
- **{
1790
- "X": X,
1791
- "layers": layers,
1792
- axis_name: concat_annot,
1793
- alt_axis_name: alt_annot,
1794
- f"{axis_name}m": concat_mapping,
1795
- f"{alt_axis_name}m": alt_mapping,
1796
- f"{axis_name}p": concat_pairwise,
1797
- f"{alt_axis_name}p": alt_pairwise,
1798
- "uns": uns,
1799
- "raw": raw,
1800
- }
1801
- )
1780
+ return AnnData(**{
1781
+ "X": X,
1782
+ "layers": layers,
1783
+ axis_name: concat_annot,
1784
+ alt_axis_name: alt_annot,
1785
+ f"{axis_name}m": concat_mapping,
1786
+ f"{alt_axis_name}m": alt_mapping,
1787
+ f"{axis_name}p": concat_pairwise,
1788
+ f"{alt_axis_name}p": alt_pairwise,
1789
+ "uns": uns,
1790
+ "raw": raw,
1791
+ })
@@ -16,6 +16,7 @@ import warnings
16
16
  from abc import ABC
17
17
  from collections.abc import Iterable
18
18
  from functools import cached_property
19
+ from importlib.metadata import version
19
20
  from itertools import accumulate, chain, pairwise
20
21
  from math import floor
21
22
  from pathlib import Path
@@ -23,7 +24,6 @@ from typing import TYPE_CHECKING, NamedTuple
23
24
 
24
25
  import h5py
25
26
  import numpy as np
26
- import scipy
27
27
  import scipy.sparse as ss
28
28
  from packaging.version import Version
29
29
  from scipy.sparse import _sparsetools
@@ -48,13 +48,12 @@ if TYPE_CHECKING:
48
48
  from scipy.sparse._compressed import _cs_matrix
49
49
 
50
50
  from .._types import GroupStorageType
51
- from ..compat import H5Array
52
- from .index import Index, Index1D
51
+ from ..compat import H5Array, Index, Index1D, Index1DNorm
53
52
  else:
54
53
  from scipy.sparse import spmatrix as _cs_matrix
55
54
 
56
55
 
57
- SCIPY_1_15 = Version(scipy.__version__) >= Version("1.15rc0")
56
+ SCIPY_1_15 = Version(version("scipy")) >= Version("1.15rc0")
58
57
 
59
58
 
60
59
  class BackedFormat(NamedTuple):
@@ -278,9 +277,9 @@ def get_compressed_vectors(
278
277
  indptr_slices = [slice(*(x.indptr[i : i + 2])) for i in row_idxs]
279
278
  # HDF5 cannot handle out-of-order integer indexing
280
279
  if isinstance(x.data, ZarrArray):
281
- as_np_indptr = np.concatenate(
282
- [np.arange(s.start, s.stop) for s in indptr_slices]
283
- )
280
+ as_np_indptr = np.concatenate([
281
+ np.arange(s.start, s.stop) for s in indptr_slices
282
+ ])
284
283
  data = x.data[as_np_indptr]
285
284
  indices = x.indices[as_np_indptr]
286
285
  else:
@@ -309,9 +308,9 @@ def get_compressed_vectors_for_slices(
309
308
  start_indptr = indptr_indices[0] - next(offsets)
310
309
  if len(slices) < 2: # there is only one slice so no need to concatenate
311
310
  return data, indices, start_indptr
312
- end_indptr = np.concatenate(
313
- [s[1:] - o for s, o in zip(indptr_indices[1:], offsets, strict=True)]
314
- )
311
+ end_indptr = np.concatenate([
312
+ s[1:] - o for s, o in zip(indptr_indices[1:], offsets, strict=True)
313
+ ])
315
314
  indptr = np.concatenate([start_indptr, end_indptr])
316
315
  return data, indices, indptr
317
316
 
@@ -738,5 +737,7 @@ def sparse_dataset(
738
737
 
739
738
 
740
739
  @_subset.register(BaseCompressedSparseDataset)
741
- def subset_sparsedataset(d, subset_idx):
740
+ def subset_sparsedataset(
741
+ d, subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm]
742
+ ):
742
743
  return d[subset_idx]
anndata/_core/views.py CHANGED
@@ -100,7 +100,7 @@ class _ViewMixin(_SetItemMixin):
100
100
 
101
101
  # TODO: This makes `deepcopy(obj)` return `obj._view_args.parent._adata_ref`, fix it
102
102
  def __deepcopy__(self, memo):
103
- parent, attrname, keys = self._view_args
103
+ parent, attrname, _keys = self._view_args
104
104
  return deepcopy(getattr(parent._adata_ref, attrname))
105
105
 
106
106
 
anndata/_io/h5ad.py CHANGED
@@ -27,7 +27,6 @@ from ..experimental import read_dispatched
27
27
  from .specs import read_elem, write_elem
28
28
  from .specs.registry import IOSpec, write_spec
29
29
  from .utils import (
30
- H5PY_V3,
31
30
  _read_legacy_raw,
32
31
  idx_chunks_along_axis,
33
32
  no_write_dataset_2d,
@@ -264,15 +263,13 @@ def read_h5ad(
264
263
 
265
264
  def callback(func, elem_name: str, elem, iospec):
266
265
  if iospec.encoding_type == "anndata" or elem_name.endswith("/"):
267
- return AnnData(
268
- **{
269
- # This is covering up backwards compat in the anndata initializer
270
- # In most cases we should be able to call `func(elen[k])` instead
271
- k: read_dispatched(elem[k], callback)
272
- for k in elem
273
- if not k.startswith("raw.")
274
- }
275
- )
266
+ return AnnData(**{
267
+ # This is covering up backwards compat in the anndata initializer
268
+ # In most cases we should be able to call `func(elen[k])` instead
269
+ k: read_dispatched(elem[k], callback)
270
+ for k in elem
271
+ if not k.startswith("raw.")
272
+ })
276
273
  elif elem_name.startswith("/raw."):
277
274
  return None
278
275
  elif elem_name == "/X" and "X" in as_sparse:
@@ -326,16 +323,12 @@ def read_dataframe_legacy(dataset: h5py.Dataset) -> pd.DataFrame:
326
323
  "Consider rewriting it."
327
324
  )
328
325
  warn(msg, OldFormatWarning, stacklevel=2)
329
- if H5PY_V3:
330
- df = pd.DataFrame(
331
- _decode_structured_array(
332
- _from_fixed_length_strings(dataset[()]), dtype=dataset.dtype
333
- )
326
+ df = pd.DataFrame(
327
+ _decode_structured_array(
328
+ _from_fixed_length_strings(dataset[()]), dtype=dataset.dtype
334
329
  )
335
- else:
336
- df = pd.DataFrame(_from_fixed_length_strings(dataset[()]))
337
- df.set_index(df.columns[0], inplace=True)
338
- return df
330
+ )
331
+ return df.set_index(df.columns[0])
339
332
 
340
333
 
341
334
  def read_dataframe(group: h5py.Group | h5py.Dataset) -> pd.DataFrame:
@@ -348,10 +341,9 @@ def read_dataframe(group: h5py.Group | h5py.Dataset) -> pd.DataFrame:
348
341
 
349
342
  @report_read_key_on_error
350
343
  def read_dataset(dataset: h5py.Dataset):
351
- if H5PY_V3:
352
- string_dtype = h5py.check_string_dtype(dataset.dtype)
353
- if (string_dtype is not None) and (string_dtype.encoding == "utf-8"):
354
- dataset = dataset.asstr()
344
+ string_dtype = h5py.check_string_dtype(dataset.dtype)
345
+ if (string_dtype is not None) and (string_dtype.encoding == "utf-8"):
346
+ dataset = dataset.asstr()
355
347
  value = dataset[()]
356
348
  if not hasattr(value, "dtype"):
357
349
  return value
@@ -364,10 +356,9 @@ def read_dataset(dataset: h5py.Dataset):
364
356
  return value[0]
365
357
  elif len(value.dtype.descr) > 1: # Compound dtype
366
358
  # For backwards compat, now strings are written as variable length
367
- dtype = value.dtype
368
- value = _from_fixed_length_strings(value)
369
- if H5PY_V3:
370
- value = _decode_structured_array(value, dtype=dtype)
359
+ value = _decode_structured_array(
360
+ _from_fixed_length_strings(value), dtype=value.dtype
361
+ )
371
362
  if value.shape == ():
372
363
  value = value[()]
373
364
  return value