anndata 0.12.1__py3-none-any.whl → 0.12.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
anndata/_core/anndata.py CHANGED
@@ -56,7 +56,7 @@ if TYPE_CHECKING:
56
56
 
57
57
  from zarr.storage import StoreLike
58
58
 
59
- from ..compat import Index1D, XDataset
59
+ from ..compat import Index1D, Index1DNorm, XDataset
60
60
  from ..typing import XDataType
61
61
  from .aligned_mapping import AxisArraysView, LayersView, PairwiseArraysView
62
62
  from .index import Index
@@ -197,6 +197,11 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): # noqa: PLW1641
197
197
 
198
198
  _accessors: ClassVar[set[str]] = set()
199
199
 
200
+ # view attributes
201
+ _adata_ref: AnnData | None
202
+ _oidx: Index1DNorm | None
203
+ _vidx: Index1DNorm | None
204
+
200
205
  @old_positionals(
201
206
  "obsm",
202
207
  "varm",
@@ -226,8 +231,8 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): # noqa: PLW1641
226
231
  asview: bool = False,
227
232
  obsp: np.ndarray | Mapping[str, Sequence[Any]] | None = None,
228
233
  varp: np.ndarray | Mapping[str, Sequence[Any]] | None = None,
229
- oidx: Index1D | None = None,
230
- vidx: Index1D | None = None,
234
+ oidx: Index1DNorm | int | np.integer | None = None,
235
+ vidx: Index1DNorm | int | np.integer | None = None,
231
236
  ):
232
237
  # check for any multi-indices that aren’t later checked in coerce_array
233
238
  for attr, key in [(obs, "obs"), (var, "var"), (X, "X")]:
@@ -237,6 +242,8 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): # noqa: PLW1641
237
242
  if not isinstance(X, AnnData):
238
243
  msg = "`X` has to be an AnnData object."
239
244
  raise ValueError(msg)
245
+ assert oidx is not None
246
+ assert vidx is not None
240
247
  self._init_as_view(X, oidx, vidx)
241
248
  else:
242
249
  self._init_as_actual(
@@ -256,7 +263,12 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): # noqa: PLW1641
256
263
  filemode=filemode,
257
264
  )
258
265
 
259
- def _init_as_view(self, adata_ref: AnnData, oidx: Index, vidx: Index):
266
+ def _init_as_view(
267
+ self,
268
+ adata_ref: AnnData,
269
+ oidx: Index1DNorm | int | np.integer,
270
+ vidx: Index1DNorm | int | np.integer,
271
+ ):
260
272
  if adata_ref.isbacked and adata_ref.is_view:
261
273
  msg = (
262
274
  "Currently, you cannot index repeatedly into a backed AnnData, "
@@ -277,6 +289,9 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): # noqa: PLW1641
277
289
  vidx += adata_ref.n_vars * (vidx < 0)
278
290
  vidx = slice(vidx, vidx + 1, 1)
279
291
  if adata_ref.is_view:
292
+ assert adata_ref._adata_ref is not None
293
+ assert adata_ref._oidx is not None
294
+ assert adata_ref._vidx is not None
280
295
  prev_oidx, prev_vidx = adata_ref._oidx, adata_ref._vidx
281
296
  adata_ref = adata_ref._adata_ref
282
297
  oidx, vidx = _resolve_idxs((prev_oidx, prev_vidx), (oidx, vidx), adata_ref)
@@ -1004,7 +1019,9 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): # noqa: PLW1641
1004
1019
 
1005
1020
  write_attribute(self.file._file, attr, value)
1006
1021
 
1007
- def _normalize_indices(self, index: Index | None) -> tuple[slice, slice]:
1022
+ def _normalize_indices(
1023
+ self, index: Index | None
1024
+ ) -> tuple[Index1DNorm | int | np.integer, Index1DNorm | int | np.integer]:
1008
1025
  return _normalize_indices(index, self.obs_names, self.var_names)
1009
1026
 
1010
1027
  # TODO: this is not quite complete...
anndata/_core/index.py CHANGED
@@ -14,18 +14,18 @@ from ..compat import AwkArray, CSArray, CSMatrix, DaskArray, XDataArray
14
14
  from .xarray import Dataset2D
15
15
 
16
16
  if TYPE_CHECKING:
17
- from ..compat import Index, Index1D
17
+ from ..compat import Index, Index1D, Index1DNorm
18
18
 
19
19
 
20
20
  def _normalize_indices(
21
21
  index: Index | None, names0: pd.Index, names1: pd.Index
22
- ) -> tuple[slice, slice]:
22
+ ) -> tuple[Index1DNorm | int | np.integer, Index1DNorm | int | np.integer]:
23
23
  # deal with tuples of length 1
24
24
  if isinstance(index, tuple) and len(index) == 1:
25
25
  index = index[0]
26
26
  # deal with pd.Series
27
27
  if isinstance(index, pd.Series):
28
- index: Index = index.values
28
+ index = index.values
29
29
  if isinstance(index, tuple):
30
30
  # TODO: The series should probably be aligned first
31
31
  index = tuple(i.values if isinstance(i, pd.Series) else i for i in index)
@@ -36,15 +36,8 @@ def _normalize_indices(
36
36
 
37
37
 
38
38
  def _normalize_index( # noqa: PLR0911, PLR0912
39
- indexer: slice
40
- | np.integer
41
- | int
42
- | str
43
- | Sequence[bool | int | np.integer]
44
- | np.ndarray
45
- | pd.Index,
46
- index: pd.Index,
47
- ) -> slice | int | np.ndarray: # ndarray of int or bool
39
+ indexer: Index1D, index: pd.Index
40
+ ) -> Index1DNorm | int | np.integer:
48
41
  # TODO: why is this here? All tests pass without it and it seems at the minimum not strict enough.
49
42
  if not isinstance(index, pd.RangeIndex) and index.dtype in (np.float64, np.int64):
50
43
  msg = f"Don’t call _normalize_index with non-categorical/string names and non-range index {index}"
@@ -212,7 +205,7 @@ def _subset_awkarray(a: AwkArray, subset_idx: Index):
212
205
 
213
206
  # Registration for SparseDataset occurs in sparse_dataset.py
214
207
  @_subset.register(h5py.Dataset)
215
- def _subset_dataset(d, subset_idx):
208
+ def _subset_dataset(d: h5py.Dataset, subset_idx: Index):
216
209
  if not isinstance(subset_idx, tuple):
217
210
  subset_idx = (subset_idx,)
218
211
  ordered = list(subset_idx)
anndata/_core/raw.py CHANGED
@@ -17,7 +17,7 @@ if TYPE_CHECKING:
17
17
  from collections.abc import Mapping, Sequence
18
18
  from typing import ClassVar
19
19
 
20
- from ..compat import CSMatrix
20
+ from ..compat import CSMatrix, Index, Index1DNorm
21
21
  from .aligned_mapping import AxisArraysView
22
22
  from .anndata import AnnData
23
23
  from .sparse_dataset import BaseCompressedSparseDataset
@@ -121,7 +121,7 @@ class Raw:
121
121
  def obs_names(self) -> pd.Index[str]:
122
122
  return self._adata.obs_names
123
123
 
124
- def __getitem__(self, index):
124
+ def __getitem__(self, index: Index) -> Raw:
125
125
  oidx, vidx = self._normalize_indices(index)
126
126
 
127
127
  # To preserve two dimensional shape
@@ -169,7 +169,9 @@ class Raw:
169
169
  uns=self._adata.uns.copy(),
170
170
  )
171
171
 
172
- def _normalize_indices(self, packed_index):
172
+ def _normalize_indices(
173
+ self, packed_index: Index
174
+ ) -> tuple[Index1DNorm | int | np.integer, Index1DNorm | int | np.integer]:
173
175
  # deal with slicing with pd.Series
174
176
  if isinstance(packed_index, pd.Series):
175
177
  packed_index = packed_index.values
anndata/_core/views.py CHANGED
@@ -29,8 +29,12 @@ if TYPE_CHECKING:
29
29
  from collections.abc import Callable, Iterable, KeysView, Sequence
30
30
  from typing import Any, ClassVar
31
31
 
32
+ from numpy.typing import NDArray
33
+
32
34
  from anndata import AnnData
33
35
 
36
+ from ..compat import Index1DNorm
37
+
34
38
 
35
39
  @contextmanager
36
40
  def view_update(adata_view: AnnData, attr_name: str, keys: tuple[str, ...]):
@@ -433,18 +437,24 @@ except ImportError:
433
437
  pass
434
438
 
435
439
 
436
- def _resolve_idxs(old, new, adata):
437
- t = tuple(_resolve_idx(old[i], new[i], adata.shape[i]) for i in (0, 1))
438
- return t
440
+ def _resolve_idxs(
441
+ old: tuple[Index1DNorm, Index1DNorm],
442
+ new: tuple[Index1DNorm, Index1DNorm],
443
+ adata: AnnData,
444
+ ) -> tuple[Index1DNorm, Index1DNorm]:
445
+ o, v = (_resolve_idx(old[i], new[i], adata.shape[i]) for i in (0, 1))
446
+ return o, v
439
447
 
440
448
 
441
449
  @singledispatch
442
- def _resolve_idx(old, new, l):
443
- return old[new]
450
+ def _resolve_idx(old: Index1DNorm, new: Index1DNorm, l: Literal[0, 1]) -> Index1DNorm:
451
+ raise NotImplementedError
444
452
 
445
453
 
446
454
  @_resolve_idx.register(np.ndarray)
447
- def _resolve_idx_ndarray(old, new, l):
455
+ def _resolve_idx_ndarray(
456
+ old: NDArray[np.bool_] | NDArray[np.integer], new: Index1DNorm, l: Literal[0, 1]
457
+ ) -> NDArray[np.bool_] | NDArray[np.integer]:
448
458
  if is_bool_dtype(old) and is_bool_dtype(new):
449
459
  mask_new = np.zeros_like(old)
450
460
  mask_new[np.flatnonzero(old)[new]] = True
@@ -454,21 +464,17 @@ def _resolve_idx_ndarray(old, new, l):
454
464
  return old[new]
455
465
 
456
466
 
457
- @_resolve_idx.register(np.integer)
458
- @_resolve_idx.register(int)
459
- def _resolve_idx_scalar(old, new, l):
460
- return np.array([old])[new]
461
-
462
-
463
467
  @_resolve_idx.register(slice)
464
- def _resolve_idx_slice(old, new, l):
468
+ def _resolve_idx_slice(
469
+ old: slice, new: Index1DNorm, l: Literal[0, 1]
470
+ ) -> slice | NDArray[np.integer]:
465
471
  if isinstance(new, slice):
466
472
  return _resolve_idx_slice_slice(old, new, l)
467
473
  else:
468
474
  return np.arange(*old.indices(l))[new]
469
475
 
470
476
 
471
- def _resolve_idx_slice_slice(old, new, l):
477
+ def _resolve_idx_slice_slice(old: slice, new: slice, l: Literal[0, 1]) -> slice:
472
478
  r = range(*old.indices(l))[new]
473
479
  # Convert back to slice
474
480
  start, stop, step = r.start, r.stop, r.step
anndata/_core/xarray.py CHANGED
@@ -184,18 +184,6 @@ class Dataset2D:
184
184
  Handler class for doing the iloc-style indexing using :meth:`~xarray.Dataset.isel`.
185
185
  """
186
186
 
187
- @dataclass(frozen=True)
188
- class IlocGetter:
189
- _ds: XDataset
190
- _coord: str
191
-
192
- def __getitem__(self, idx) -> Dataset2D:
193
- # xarray seems to have some code looking for a second entry in tuples,
194
- # so we unpack the tuple
195
- if isinstance(idx, tuple) and len(idx) == 1:
196
- idx = idx[0]
197
- return Dataset2D(self._ds.isel(**{self._coord: idx}))
198
-
199
187
  return IlocGetter(self.ds, self.index_dim)
200
188
 
201
189
  # See https://github.com/pydata/xarray/blob/568f3c1638d2d34373408ce2869028faa3949446/xarray/core/dataset.py#L1239-L1248
@@ -402,3 +390,16 @@ class Dataset2D:
402
390
  def _items(self):
403
391
  for col in self:
404
392
  yield col, self[col]
393
+
394
+
395
+ @dataclass(frozen=True)
396
+ class IlocGetter:
397
+ _ds: XDataset
398
+ _coord: str
399
+
400
+ def __getitem__(self, idx) -> Dataset2D:
401
+ # xarray seems to have some code looking for a second entry in tuples,
402
+ # so we unpack the tuple
403
+ if isinstance(idx, tuple) and len(idx) == 1:
404
+ idx = idx[0]
405
+ return Dataset2D(self._ds.isel(**{self._coord: idx}))
anndata/_io/h5ad.py CHANGED
@@ -4,7 +4,7 @@ import re
4
4
  from functools import partial
5
5
  from pathlib import Path
6
6
  from types import MappingProxyType
7
- from typing import TYPE_CHECKING, TypeVar
7
+ from typing import TYPE_CHECKING, TypeVar, cast
8
8
  from warnings import warn
9
9
 
10
10
  import h5py
@@ -36,11 +36,12 @@ from .utils import (
36
36
  )
37
37
 
38
38
  if TYPE_CHECKING:
39
- from collections.abc import Callable, Collection, Mapping, Sequence
39
+ from collections.abc import Callable, Collection, Container, Mapping, Sequence
40
40
  from os import PathLike
41
41
  from typing import Any, Literal
42
42
 
43
43
  from .._core.file_backing import AnnDataFileManager
44
+ from .._core.raw import Raw
44
45
 
45
46
  T = TypeVar("T")
46
47
 
@@ -82,29 +83,18 @@ def write_h5ad(
82
83
  # TODO: Use spec writing system for this
83
84
  # Currently can't use write_dispatched here because this function is also called to do an
84
85
  # inplace update of a backed object, which would delete "/"
85
- f = f["/"]
86
+ f = cast("h5py.Group", f["/"])
86
87
  f.attrs.setdefault("encoding-type", "anndata")
87
88
  f.attrs.setdefault("encoding-version", "0.1.0")
88
89
 
89
- if "X" in as_dense and isinstance(
90
- adata.X, CSMatrix | BaseCompressedSparseDataset
91
- ):
92
- write_sparse_as_dense(f, "X", adata.X, dataset_kwargs=dataset_kwargs)
93
- elif not (adata.isbacked and Path(adata.filename) == Path(filepath)):
94
- # If adata.isbacked, X should already be up to date
95
- write_elem(f, "X", adata.X, dataset_kwargs=dataset_kwargs)
96
- if "raw/X" in as_dense and isinstance(
97
- adata.raw.X, CSMatrix | BaseCompressedSparseDataset
98
- ):
99
- write_sparse_as_dense(
100
- f, "raw/X", adata.raw.X, dataset_kwargs=dataset_kwargs
101
- )
102
- write_elem(f, "raw/var", adata.raw.var, dataset_kwargs=dataset_kwargs)
103
- write_elem(
104
- f, "raw/varm", dict(adata.raw.varm), dataset_kwargs=dataset_kwargs
105
- )
106
- elif adata.raw is not None:
107
- write_elem(f, "raw", adata.raw, dataset_kwargs=dataset_kwargs)
90
+ _write_x(
91
+ f,
92
+ adata, # accessing adata.X reopens adata.file if it’s backed
93
+ is_backed=adata.isbacked and adata.filename == filepath,
94
+ as_dense=as_dense,
95
+ dataset_kwargs=dataset_kwargs,
96
+ )
97
+ _write_raw(f, adata.raw, as_dense=as_dense, dataset_kwargs=dataset_kwargs)
108
98
  write_elem(f, "obs", adata.obs, dataset_kwargs=dataset_kwargs)
109
99
  write_elem(f, "var", adata.var, dataset_kwargs=dataset_kwargs)
110
100
  write_elem(f, "obsm", dict(adata.obsm), dataset_kwargs=dataset_kwargs)
@@ -115,6 +105,41 @@ def write_h5ad(
115
105
  write_elem(f, "uns", dict(adata.uns), dataset_kwargs=dataset_kwargs)
116
106
 
117
107
 
108
+ def _write_x(
109
+ f: h5py.Group,
110
+ adata: AnnData,
111
+ *,
112
+ is_backed: bool,
113
+ as_dense: Container[str],
114
+ dataset_kwargs: Mapping[str, Any],
115
+ ) -> None:
116
+ if "X" in as_dense and isinstance(adata.X, CSMatrix | BaseCompressedSparseDataset):
117
+ write_sparse_as_dense(f, "X", adata.X, dataset_kwargs=dataset_kwargs)
118
+ elif is_backed:
119
+ pass # If adata.isbacked, X should already be up to date
120
+ elif adata.X is None:
121
+ f.pop("X", None)
122
+ else:
123
+ write_elem(f, "X", adata.X, dataset_kwargs=dataset_kwargs)
124
+
125
+
126
+ def _write_raw(
127
+ f: h5py.Group,
128
+ raw: Raw,
129
+ *,
130
+ as_dense: Container[str],
131
+ dataset_kwargs: Mapping[str, Any],
132
+ ) -> None:
133
+ if "raw/X" in as_dense and isinstance(
134
+ raw.X, CSMatrix | BaseCompressedSparseDataset
135
+ ):
136
+ write_sparse_as_dense(f, "raw/X", raw.X, dataset_kwargs=dataset_kwargs)
137
+ write_elem(f, "raw/var", raw.var, dataset_kwargs=dataset_kwargs)
138
+ write_elem(f, "raw/varm", dict(raw.varm), dataset_kwargs=dataset_kwargs)
139
+ elif raw is not None:
140
+ write_elem(f, "raw", raw, dataset_kwargs=dataset_kwargs)
141
+
142
+
118
143
  @report_write_key_on_error
119
144
  @write_spec(IOSpec("array", "0.2.0"))
120
145
  def write_sparse_as_dense(
anndata/_io/read.py CHANGED
@@ -48,7 +48,9 @@ def read_csv(
48
48
  dtype
49
49
  Numpy data type.
50
50
  """
51
- return read_text(filename, delimiter, first_column_names, dtype)
51
+ return read_text(
52
+ filename, delimiter, first_column_names=first_column_names, dtype=dtype
53
+ )
52
54
 
53
55
 
54
56
  def read_excel(
@@ -360,18 +362,26 @@ def read_text(
360
362
  Numpy data type.
361
363
  """
362
364
  if not isinstance(filename, PathLike | str | bytes):
363
- return _read_text(filename, delimiter, first_column_names, dtype)
365
+ return _read_text(
366
+ filename, delimiter, first_column_names=first_column_names, dtype=dtype
367
+ )
364
368
 
365
369
  filename = Path(filename)
366
370
  if filename.suffix == ".gz":
367
371
  with gzip.open(str(filename), mode="rt") as f:
368
- return _read_text(f, delimiter, first_column_names, dtype)
372
+ return _read_text(
373
+ f, delimiter, first_column_names=first_column_names, dtype=dtype
374
+ )
369
375
  elif filename.suffix == ".bz2":
370
376
  with bz2.open(str(filename), mode="rt") as f:
371
- return _read_text(f, delimiter, first_column_names, dtype)
377
+ return _read_text(
378
+ f, delimiter, first_column_names=first_column_names, dtype=dtype
379
+ )
372
380
  else:
373
381
  with filename.open() as f:
374
- return _read_text(f, delimiter, first_column_names, dtype)
382
+ return _read_text(
383
+ f, delimiter, first_column_names=first_column_names, dtype=dtype
384
+ )
375
385
 
376
386
 
377
387
  def _iter_lines(file_like: Iterable[str]) -> Generator[str, None, None]:
@@ -385,7 +395,8 @@ def _iter_lines(file_like: Iterable[str]) -> Generator[str, None, None]:
385
395
  def _read_text( # noqa: PLR0912, PLR0915
386
396
  f: Iterator[str],
387
397
  delimiter: str | None,
388
- first_column_names: bool | None, # noqa: FBT001
398
+ *,
399
+ first_column_names: bool | None,
389
400
  dtype: str,
390
401
  ) -> AnnData:
391
402
  comments = []
@@ -275,7 +275,8 @@ def write_anndata(
275
275
  dataset_kwargs: Mapping[str, Any] = MappingProxyType({}),
276
276
  ):
277
277
  g = f.require_group(k)
278
- _writer.write_elem(g, "X", adata.X, dataset_kwargs=dataset_kwargs)
278
+ if adata.X is not None:
279
+ _writer.write_elem(g, "X", adata.X, dataset_kwargs=dataset_kwargs)
279
280
  _writer.write_elem(g, "obs", adata.obs, dataset_kwargs=dataset_kwargs)
280
281
  _writer.write_elem(g, "var", adata.var, dataset_kwargs=dataset_kwargs)
281
282
  _writer.write_elem(g, "obsm", dict(adata.obsm), dataset_kwargs=dataset_kwargs)
@@ -629,7 +630,7 @@ def write_vlen_string_array_zarr(
629
630
  dataset_kwargs = zarr_v3_compressor_compat(dataset_kwargs)
630
631
  dtype = VariableLengthUTF8()
631
632
  filters, fill_value = None, None
632
- if ad.settings.zarr_write_format == 2:
633
+ if f.metadata.zarr_format == 2:
633
634
  filters, fill_value = [VLenUTF8()], ""
634
635
  f.create_array(
635
636
  k,
@@ -695,12 +696,11 @@ def write_recarray_zarr(
695
696
  from anndata.compat import _to_fixed_length_strings
696
697
 
697
698
  elem = _to_fixed_length_strings(elem)
698
- if isinstance(f, H5Group) or is_zarr_v2():
699
+ if is_zarr_v2():
699
700
  f.create_dataset(k, data=elem, shape=elem.shape, **dataset_kwargs)
700
701
  else:
701
702
  dataset_kwargs = dataset_kwargs.copy()
702
703
  dataset_kwargs = zarr_v3_compressor_compat(dataset_kwargs)
703
- # TODO: zarr’s on-disk format v3 doesn’t support this dtype
704
704
  f.create_array(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs)
705
705
  f[k][...] = elem
706
706
 
@@ -1283,7 +1283,7 @@ def write_scalar_zarr(
1283
1283
  from numcodecs import VLenUTF8
1284
1284
  from zarr.core.dtype import VariableLengthUTF8
1285
1285
 
1286
- match ad.settings.zarr_write_format, value:
1286
+ match f.metadata.zarr_format, value:
1287
1287
  case 2, str():
1288
1288
  filters, dtype, fill_value = [VLenUTF8()], VariableLengthUTF8(), ""
1289
1289
  case 3, str():
anndata/_io/zarr.py CHANGED
@@ -1,6 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- from pathlib import Path
4
3
  from typing import TYPE_CHECKING, TypeVar
5
4
  from warnings import warn
6
5
 
@@ -37,8 +36,6 @@ def write_zarr(
37
36
  **ds_kwargs,
38
37
  ) -> None:
39
38
  """See :meth:`~anndata.AnnData.write_zarr`."""
40
- if isinstance(store, Path):
41
- store = str(store)
42
39
  if convert_strings_to_categoricals:
43
40
  adata.strings_to_categoricals()
44
41
  if adata.raw is not None:
@@ -75,9 +72,6 @@ def read_zarr(store: PathLike[str] | str | MutableMapping | zarr.Group) -> AnnDa
75
72
  store
76
73
  The filename, a :class:`~typing.MutableMapping`, or a Zarr storage class.
77
74
  """
78
- if isinstance(store, Path):
79
- store = str(store)
80
-
81
75
  f = store if isinstance(store, zarr.Group) else zarr.open(store, mode="r")
82
76
 
83
77
  # Read with handling for backwards compat
anndata/_settings.py CHANGED
@@ -447,7 +447,7 @@ def validate_zarr_write_format(format: int):
447
447
  settings.register(
448
448
  "zarr_write_format",
449
449
  default_value=2,
450
- description="Which version of zarr to write to.",
450
+ description="Which version of zarr to write to when anndata must internally open a write-able zarr group.",
451
451
  validate=validate_zarr_write_format,
452
452
  get_from_env=lambda name, default: check_and_get_environ_var(
453
453
  f"ANNDATA_{name.upper()}",
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from codecs import decode
4
- from collections.abc import Mapping
4
+ from collections.abc import Mapping, Sequence
5
5
  from functools import cache, partial, singledispatch
6
6
  from importlib.util import find_spec
7
7
  from types import EllipsisType
@@ -12,6 +12,7 @@ import h5py
12
12
  import numpy as np
13
13
  import pandas as pd
14
14
  import scipy
15
+ from numpy.typing import NDArray
15
16
  from packaging.version import Version
16
17
  from zarr import Array as ZarrArray # noqa: F401
17
18
  from zarr import Group as ZarrGroup
@@ -19,6 +20,7 @@ from zarr import Group as ZarrGroup
19
20
  if TYPE_CHECKING:
20
21
  from typing import Any
21
22
 
23
+
22
24
  #############################
23
25
  # scipy sparse array comapt #
24
26
  #############################
@@ -32,7 +34,26 @@ class Empty:
32
34
  pass
33
35
 
34
36
 
35
- Index1D = slice | int | str | np.int64 | np.ndarray | pd.Series
37
+ Index1DNorm = slice | NDArray[np.bool_] | NDArray[np.integer]
38
+ # TODO: pd.Index[???]
39
+ Index1D = (
40
+ # 0D index
41
+ int
42
+ | str
43
+ | np.int64
44
+ # normalized 1D idex
45
+ | Index1DNorm
46
+ # different containers for mask, obs/varnames, or numerical index
47
+ | Sequence[int]
48
+ | Sequence[str]
49
+ | Sequence[bool]
50
+ | pd.Series # bool, int, str
51
+ | pd.Index
52
+ | NDArray[np.str_]
53
+ | np.matrix # bool
54
+ | CSMatrix # bool
55
+ | CSArray # bool
56
+ )
36
57
  IndexRest = Index1D | EllipsisType
37
58
  Index = (
38
59
  IndexRest
@@ -286,8 +307,12 @@ def _to_fixed_length_strings(value: np.ndarray) -> np.ndarray:
286
307
  """\
287
308
  Convert variable length strings to fixed length.
288
309
 
289
- Currently a workaround for
290
- https://github.com/zarr-developers/zarr-python/pull/422
310
+ Formerly a workaround for
311
+ https://github.com/zarr-developers/zarr-python/pull/422,
312
+ resolved in https://github.com/zarr-developers/zarr-python/pull/813.
313
+
314
+ But if we didn't do this conversion, we would have to use a special codec in v2
315
+ for objects and v3 doesn't support objects at all. So we leave this function as-is.
291
316
  """
292
317
  new_dtype = []
293
318
  for dt_name, (dt_type, dt_offset) in value.dtype.fields.items():
anndata/tests/helpers.py CHANGED
@@ -42,12 +42,15 @@ if TYPE_CHECKING:
42
42
  from collections.abc import Callable, Collection, Iterable
43
43
  from typing import Literal, TypeGuard, TypeVar
44
44
 
45
+ from numpy.typing import NDArray
45
46
  from zarr.abc.store import ByteRequest
46
47
  from zarr.core.buffer import BufferPrototype
47
48
 
48
49
  from .._types import ArrayStorageType
50
+ from ..compat import Index1D
49
51
 
50
52
  DT = TypeVar("DT")
53
+ _SubsetFunc = Callable[[pd.Index[str], int], Index1D]
51
54
 
52
55
 
53
56
  try:
@@ -428,7 +431,7 @@ def gen_adata( # noqa: PLR0913
428
431
  return adata
429
432
 
430
433
 
431
- def array_bool_subset(index, min_size=2):
434
+ def array_bool_subset(index: pd.Index[str], min_size: int = 2) -> NDArray[np.bool_]:
432
435
  b = np.zeros(len(index), dtype=bool)
433
436
  selected = np.random.choice(
434
437
  range(len(index)),
@@ -439,11 +442,11 @@ def array_bool_subset(index, min_size=2):
439
442
  return b
440
443
 
441
444
 
442
- def list_bool_subset(index, min_size=2):
445
+ def list_bool_subset(index: pd.Index[str], min_size: int = 2) -> list[bool]:
443
446
  return array_bool_subset(index, min_size=min_size).tolist()
444
447
 
445
448
 
446
- def matrix_bool_subset(index, min_size=2):
449
+ def matrix_bool_subset(index: pd.Index[str], min_size: int = 2) -> np.matrix:
447
450
  with warnings.catch_warnings():
448
451
  warnings.simplefilter("ignore", PendingDeprecationWarning)
449
452
  indexer = np.matrix(
@@ -452,19 +455,26 @@ def matrix_bool_subset(index, min_size=2):
452
455
  return indexer
453
456
 
454
457
 
455
- def spmatrix_bool_subset(index, min_size=2):
458
+ def spmatrix_bool_subset(index: pd.Index[str], min_size: int = 2) -> sparse.csr_matrix:
456
459
  return sparse.csr_matrix(
457
460
  array_bool_subset(index, min_size=min_size).reshape(len(index), 1)
458
461
  )
459
462
 
460
463
 
461
- def sparray_bool_subset(index, min_size=2):
464
+ def sparray_bool_subset(index: pd.Index[str], min_size: int = 2) -> sparse.csr_array:
462
465
  return sparse.csr_array(
463
466
  array_bool_subset(index, min_size=min_size).reshape(len(index), 1)
464
467
  )
465
468
 
466
469
 
467
- def array_subset(index, min_size=2):
470
+ def single_subset(index: pd.Index[str], min_size: int = 1) -> str:
471
+ if min_size > 1:
472
+ msg = "max_size must be ≤1"
473
+ raise AssertionError(msg)
474
+ return index[np.random.randint(0, len(index))]
475
+
476
+
477
+ def array_subset(index: pd.Index[str], min_size: int = 2) -> NDArray[np.str_]:
468
478
  if len(index) < min_size:
469
479
  msg = f"min_size (={min_size}) must be smaller than len(index) (={len(index)}"
470
480
  raise ValueError(msg)
@@ -473,7 +483,7 @@ def array_subset(index, min_size=2):
473
483
  )
474
484
 
475
485
 
476
- def array_int_subset(index, min_size=2):
486
+ def array_int_subset(index: pd.Index[str], min_size: int = 2) -> NDArray[np.int64]:
477
487
  if len(index) < min_size:
478
488
  msg = f"min_size (={min_size}) must be smaller than len(index) (={len(index)}"
479
489
  raise ValueError(msg)
@@ -484,11 +494,11 @@ def array_int_subset(index, min_size=2):
484
494
  )
485
495
 
486
496
 
487
- def list_int_subset(index, min_size=2):
497
+ def list_int_subset(index: pd.Index[str], min_size: int = 2) -> list[int]:
488
498
  return array_int_subset(index, min_size=min_size).tolist()
489
499
 
490
500
 
491
- def slice_subset(index, min_size=2):
501
+ def slice_int_subset(index: pd.Index[str], min_size: int = 2) -> slice:
492
502
  while True:
493
503
  points = np.random.choice(np.arange(len(index) + 1), size=2, replace=False)
494
504
  s = slice(*sorted(points))
@@ -497,25 +507,33 @@ def slice_subset(index, min_size=2):
497
507
  return s
498
508
 
499
509
 
500
- def single_subset(index):
501
- return index[np.random.randint(0, len(index))]
510
+ def single_int_subset(index: pd.Index[str], min_size: int = 1) -> int:
511
+ if min_size > 1:
512
+ msg = "max_size must be ≤1"
513
+ raise AssertionError(msg)
514
+ return np.random.randint(0, len(index))
515
+
516
+
517
+ _SUBSET_FUNCS: list[_SubsetFunc] = [
518
+ # str (obs/var name)
519
+ single_subset,
520
+ array_subset,
521
+ # int (numeric index)
522
+ single_int_subset,
523
+ slice_int_subset,
524
+ array_int_subset,
525
+ list_int_subset,
526
+ # bool (mask)
527
+ array_bool_subset,
528
+ list_bool_subset,
529
+ matrix_bool_subset,
530
+ spmatrix_bool_subset,
531
+ sparray_bool_subset,
532
+ ]
502
533
 
503
534
 
504
- @pytest.fixture(
505
- params=[
506
- array_subset,
507
- slice_subset,
508
- single_subset,
509
- array_int_subset,
510
- list_int_subset,
511
- array_bool_subset,
512
- list_bool_subset,
513
- matrix_bool_subset,
514
- spmatrix_bool_subset,
515
- sparray_bool_subset,
516
- ]
517
- )
518
- def subset_func(request):
535
+ @pytest.fixture(params=_SUBSET_FUNCS)
536
+ def subset_func(request: pytest.FixtureRequest) -> _SubsetFunc:
519
537
  return request.param
520
538
 
521
539
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: anndata
3
- Version: 0.12.1
3
+ Version: 0.12.2
4
4
  Summary: Annotated data.
5
5
  Project-URL: Documentation, https://anndata.readthedocs.io/
6
6
  Project-URL: Source, https://github.com/scverse/anndata
@@ -1,5 +1,5 @@
1
1
  anndata/__init__.py,sha256=GdrXtUOgciN34rNOl3CxpzvoBgl0yERqrDLWjlwF3RI,1468
2
- anndata/_settings.py,sha256=Le3ysSfuG0vuYCtnUN3DV4HOnyiGgC-k9Gv0JShosSs,15654
2
+ anndata/_settings.py,sha256=UR6tHlWmEEgoaC3e4LuYavveeHICgaC7XOZOyEgNX-0,15712
3
3
  anndata/_settings.pyi,sha256=KUJEheJy1zvCBN2NvNEXAGcjLPUQcdhk0STIcU4mM4I,1545
4
4
  anndata/_types.py,sha256=c71REP9wS7Vz2cYrNxuNjPYdnq8MJ5g04MNrSi85ATA,5427
5
5
  anndata/_version.py,sha256=qsRPKvJAzUrnf49DHZk-FYfXKOASg5nJV2qomuOttX0,2160
@@ -14,27 +14,27 @@ anndata/_core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  anndata/_core/access.py,sha256=pts7fGUKgGZANSsu_qAA7L10qHM-jT1zIehbl3441OY,873
15
15
  anndata/_core/aligned_df.py,sha256=MrGdi1zNZZlkkv4IeS2yY-R5ldXpchTlMlJK7PKRa7A,3833
16
16
  anndata/_core/aligned_mapping.py,sha256=BYU1jslMWIhtFTtUMaXY8ZCyt0J4_ZsJTmj6J2yAXTQ,14257
17
- anndata/_core/anndata.py,sha256=BADYc8TpS6yc1-bw5sh5cPzB4v63O5ZH0YFFChKO7TY,77747
17
+ anndata/_core/anndata.py,sha256=ZQF9L5Zt4k1Bg0npd9iL-PS_pYx62MfQzW1llN4QcRA,78291
18
18
  anndata/_core/extensions.py,sha256=9Rsho6qnr3PJHULrYGiZHCBinBZYJK6zyf3cFsl_gBY,10425
19
19
  anndata/_core/file_backing.py,sha256=kT71R_kZp_CiHImBK8IaZXsvYVtbX2Mg-7L2ldAWojM,5113
20
- anndata/_core/index.py,sha256=lyVuDfKvEeQYpgKjDsnuZNt4k4wV3adFowO_RrcohpE,9353
20
+ anndata/_core/index.py,sha256=6oED8kjTFKXnZSJXbkGFwscRtqV346h05Dx_Spd68WY,9298
21
21
  anndata/_core/merge.py,sha256=v0PxVs49KUZx6X252EtGt7XUHbO3mytTPK_o3Vd1nuo,60302
22
- anndata/_core/raw.py,sha256=EfTLoizP_mLtfYn0BR1Rpya0iZoGF4CupojWyeLGWpc,7797
22
+ anndata/_core/raw.py,sha256=x_PwwaDQscVQOFJ38kF7sNQ47LxowpS38h2RQfU5Zwo,7925
23
23
  anndata/_core/sparse_dataset.py,sha256=syXVDdQsEPSBvEi4eyBiAxPiq2TSk7qGStP8QSBTwBg,26801
24
24
  anndata/_core/storage.py,sha256=mHzqp7YBJ-rGQFulMAx__D-Z7y4omHPyb1cP7YxfbFE,2555
25
- anndata/_core/views.py,sha256=rbmI7P4dEcefhjons2l42H9D509xhxyXKTeyWiIBdtw,14657
26
- anndata/_core/xarray.py,sha256=pvAMSxrHmuz2d-xbalYmuNi56GrLPw_2Ehko8nss9Xg,16157
25
+ anndata/_core/views.py,sha256=eOFSV9pKhgcfPvK-c9nxfYdD6o0EVSxE7eSJissojwU,15018
26
+ anndata/_core/xarray.py,sha256=JeQjTuSQEiZF8cryKDYf9d7yt-ufQEVo9x94YaczuPQ,16078
27
27
  anndata/_io/__init__.py,sha256=GTNeUZ8d8aA3sK4P33tyljIc60KapLbkqBC6J1y3l9U,346
28
- anndata/_io/h5ad.py,sha256=ekfuxldN6LuhR3b1aXJKvjfivOBlRP_A4rhEqvej-8Q,13633
29
- anndata/_io/read.py,sha256=oc8Af3r9ieh0-SFN3GKRIFxenijrbng55-Ds-WUhbdE,15691
28
+ anndata/_io/h5ad.py,sha256=gABV7M7YjThZ7X9VAsHWs7baqQ4L0Zp90dBz4Kle7ys,14191
29
+ anndata/_io/read.py,sha256=MuTR6dR2WItV2y0sKYvxSO2fu7OlDjaCRYJuT5UbuBo,15933
30
30
  anndata/_io/utils.py,sha256=dB2RRaWm9V-CMGI46KhqpmzBGA6LLBnsJc83Q85OcgM,9613
31
31
  anndata/_io/write.py,sha256=r55w6yPIIuUSLW9wyYL8GnkzHHQdAxy6xiCEw9cAC38,4811
32
- anndata/_io/zarr.py,sha256=6ejnp9VNsxSihSd3HXAv2uVkrv7dRnuox4Jt1Y0yyGY,5261
32
+ anndata/_io/zarr.py,sha256=hLi7nkgTiqe0ftgVaVSpwlU0Y7VurSTf_C3DfILdfAo,5117
33
33
  anndata/_io/specs/__init__.py,sha256=Z6l8xqa7B480U3pqrNIg4-fhUvpBW85w4xA3i3maAUM,427
34
34
  anndata/_io/specs/lazy_methods.py,sha256=64rr1C83aaiN-iO7m1R03dqd6TbiJQbhzOQI5lXHD0o,12270
35
- anndata/_io/specs/methods.py,sha256=qRRZfz4pqjaxRrkhiYiZe9_mXuWjAj4OLhbL-CO_VSM,46485
35
+ anndata/_io/specs/methods.py,sha256=dz-lfDjoLRpnRXdKjzDaA9_DBbOdys5BIL5ivMCHqvs,46405
36
36
  anndata/_io/specs/registry.py,sha256=hno-mvefhcDHmACrBjcKmlf5T87FPoe-N1Gd3i9otYo,17459
37
- anndata/compat/__init__.py,sha256=FZZbV36BG0L0v-N5IXQbrCa5jNgBRsFY44vmdG-2y_4,12227
37
+ anndata/compat/__init__.py,sha256=zCh9_7FcePbTTvXceZYY6mPemlKFgiYCy3x7srFQum4,12915
38
38
  anndata/experimental/__init__.py,sha256=polIxriEkby0iEqw-IXkUzp8k0wp92BpYY4zl4BsHH0,1648
39
39
  anndata/experimental/_dispatch_io.py,sha256=JzH8Uvewabc1gIF3L16RZnM9m2NAG28bQIQ57uP097k,1869
40
40
  anndata/experimental/merge.py,sha256=Y18C1AT9R1IF60Y7YxyprJ1hz-ySNb2okXHA6IR6fCU,21914
@@ -47,12 +47,12 @@ anndata/experimental/multi_files/_anncollection.py,sha256=RQ79_7r8sdV_kx66UJUuY9
47
47
  anndata/experimental/pytorch/__init__.py,sha256=4CkgrahLO8Kc-s2bmv6lVQfDxbO3IUyV0v4ygBDkttY,95
48
48
  anndata/experimental/pytorch/_annloader.py,sha256=7mpsFV5vBfxKIje1cPjahtDZ5afkU-H663XB4FJhmok,8075
49
49
  anndata/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
- anndata/tests/helpers.py,sha256=yVFdqB2PDDw8jNOkKAo-6VR7vk8yUlbafZbtDyibB8c,35111
50
+ anndata/tests/helpers.py,sha256=27p_Nc5vFIiW-7EhV85g3QiE0dStMnUg0uFBRyroZUg,36072
51
51
  testing/anndata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
52
  testing/anndata/_doctest.py,sha256=Qew0N0zLLNiPKN1CLunqY5cTinFLaEhY5GagiYfm6KI,344
53
53
  testing/anndata/_pytest.py,sha256=a1Qn5KR5mjCcQI05negkMEv0e2aJX2P_teyl0UMfoeQ,4160
54
54
  testing/anndata/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
- anndata-0.12.1.dist-info/METADATA,sha256=ah9uvJCDn4h1tspyYlcwiSC7mGBhwWHVs6irKQ8LzXY,9643
56
- anndata-0.12.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
57
- anndata-0.12.1.dist-info/licenses/LICENSE,sha256=VcrXoEVMhtNuvMvKYGP-I5lMT8qZ_6dFf22fsL180qA,1575
58
- anndata-0.12.1.dist-info/RECORD,,
55
+ anndata-0.12.2.dist-info/METADATA,sha256=M4Jcewb7LkN2pgT41Trw2y3YvZ4Z1EzZAzcQ78K03SE,9643
56
+ anndata-0.12.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
57
+ anndata-0.12.2.dist-info/licenses/LICENSE,sha256=VcrXoEVMhtNuvMvKYGP-I5lMT8qZ_6dFf22fsL180qA,1575
58
+ anndata-0.12.2.dist-info/RECORD,,