anndata 0.12.7__py3-none-any.whl → 0.12.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
anndata/_core/anndata.py CHANGED
@@ -362,7 +362,12 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): # noqa: PLW1641
362
362
 
363
363
  # init from file
364
364
  if filename is not None:
365
- self.file = AnnDataFileManager(self, filename, filemode)
365
+ fileobj, filename = (
366
+ (filename, None)
367
+ if isinstance(filename, h5py.File)
368
+ else (None, filename)
369
+ )
370
+ self.file = AnnDataFileManager(self, filename, filemode, fileobj)
366
371
  else:
367
372
  self.file = AnnDataFileManager(self, None)
368
373
 
anndata/_core/index.py CHANGED
@@ -25,12 +25,6 @@ def _normalize_indices(
25
25
  # deal with tuples of length 1
26
26
  if isinstance(index, tuple) and len(index) == 1:
27
27
  index = index[0]
28
- # deal with pd.Series
29
- if isinstance(index, pd.Series):
30
- index = index.values
31
- if isinstance(index, tuple):
32
- # TODO: The series should probably be aligned first
33
- index = tuple(i.values if isinstance(i, pd.Series) else i for i in index)
34
28
  ax0, ax1 = unpack_index(index)
35
29
  ax0 = _normalize_index(ax0, names0)
36
30
  ax1 = _normalize_index(ax1, names1)
@@ -45,6 +39,9 @@ def _normalize_index( # noqa: PLR0911, PLR0912
45
39
  msg = f"Don’t call _normalize_index with non-categorical/string names and non-range index {index}"
46
40
  raise TypeError(msg)
47
41
 
42
+ if isinstance(indexer, pd.Index | pd.Series):
43
+ indexer = indexer.array
44
+
48
45
  # the following is insanely slow for sequences,
49
46
  # we replaced it using pandas below
50
47
  def name_idx(i):
@@ -65,16 +62,21 @@ def _normalize_index( # noqa: PLR0911, PLR0912
65
62
  elif isinstance(indexer, str):
66
63
  return index.get_loc(indexer) # int
67
64
  elif isinstance(
68
- indexer, Sequence | np.ndarray | pd.Index | CSMatrix | np.matrix | CSArray
65
+ indexer,
66
+ Sequence
67
+ | np.ndarray
68
+ | pd.api.extensions.ExtensionArray
69
+ | CSMatrix
70
+ | np.matrix
71
+ | CSArray,
69
72
  ):
70
- if hasattr(indexer, "shape") and (
71
- (indexer.shape == (index.shape[0], 1))
72
- or (indexer.shape == (1, index.shape[0]))
73
+ if (shape := getattr(indexer, "shape", None)) is not None and (
74
+ shape == (index.shape[0], 1) or shape == (1, index.shape[0])
73
75
  ):
74
76
  if isinstance(indexer, CSMatrix | CSArray):
75
77
  indexer = indexer.toarray()
76
78
  indexer = np.ravel(indexer)
77
- if not isinstance(indexer, np.ndarray | pd.Index):
79
+ if not isinstance(indexer, np.ndarray):
78
80
  indexer = np.array(indexer)
79
81
  if len(indexer) == 0:
80
82
  indexer = indexer.astype(int)
@@ -111,7 +113,7 @@ def _normalize_index( # noqa: PLR0911, PLR0912
111
113
  return indexer.data.compute()
112
114
  return indexer.data
113
115
  msg = f"Unknown indexer {indexer!r} of type {type(indexer)}"
114
- raise IndexError()
116
+ raise IndexError(msg)
115
117
 
116
118
 
117
119
  def _fix_slice_bounds(s: slice, length: int) -> slice:
anndata/_core/merge.py CHANGED
@@ -10,7 +10,7 @@ from collections.abc import Callable, Mapping, MutableSet
10
10
  from functools import partial, reduce, singledispatch
11
11
  from itertools import repeat
12
12
  from operator import and_, or_, sub
13
- from typing import TYPE_CHECKING, Literal, TypeVar
13
+ from typing import TYPE_CHECKING, Literal, TypeVar, cast
14
14
  from warnings import warn
15
15
 
16
16
  import numpy as np
@@ -44,7 +44,7 @@ if TYPE_CHECKING:
44
44
 
45
45
  from anndata._types import Join_T
46
46
 
47
- from ..compat import XDataArray, XDataset
47
+ from ..compat import XDataArray
48
48
 
49
49
  T = TypeVar("T")
50
50
 
@@ -244,110 +244,89 @@ def as_cp_sparse(x) -> CupySparseMatrix:
244
244
  def unify_dtypes(
245
245
  dfs: Iterable[pd.DataFrame | Dataset2D],
246
246
  ) -> list[pd.DataFrame | Dataset2D]:
247
- """
248
- Attempts to unify datatypes from multiple dataframes.
247
+ """Attempt to unify datatypes from multiple dataframes.
249
248
 
250
249
  For catching cases where pandas would convert to object dtype.
251
250
  """
252
251
  dfs = list(dfs)
253
252
  # Get shared categorical columns
254
- df_dtypes = [dict(df.dtypes) for df in dfs]
253
+ df_dtypes = [
254
+ cast("pd.Series[ExtensionDtype]", df.dtypes).to_dict()
255
+ if isinstance(df, pd.DataFrame)
256
+ else df.dtypes
257
+ for df in dfs
258
+ ]
255
259
  columns = reduce(lambda x, y: x.union(y), [df.columns for df in dfs])
256
-
257
- dtypes: dict[str, list[np.dtype | ExtensionDtype]] = {col: [] for col in columns}
258
- for col in columns:
259
- for df in df_dtypes:
260
- dtypes[col].append(df.get(col, None))
261
-
260
+ dtypes = {
261
+ col: (
262
+ [df[col] for df in df_dtypes if col in df],
263
+ any(col not in df for df in df_dtypes),
264
+ )
265
+ for col in columns
266
+ }
262
267
  if len(dtypes) == 0:
263
268
  return dfs
264
- else:
265
- dfs = [df.copy(deep=False) for df in dfs]
266
269
 
267
270
  new_dtypes = {
268
271
  col: target_dtype
269
- for col, dtype in dtypes.items()
270
- if (target_dtype := try_unifying_dtype(dtype)) is not None
272
+ for col, (dts, has_missing) in dtypes.items()
273
+ if (target_dtype := try_unifying_dtype(dts, has_missing=has_missing))
274
+ is not None
271
275
  }
272
276
 
277
+ dfs = [df.copy(deep=False) for df in dfs]
273
278
  for df in dfs:
274
279
  for col, dtype in new_dtypes.items():
275
280
  if col in df:
276
281
  df[col] = df[col].astype(dtype)
277
-
278
282
  return dfs
279
283
 
280
284
 
281
- def try_unifying_dtype( # noqa PLR0911, PLR0912
282
- col: Sequence[np.dtype | ExtensionDtype],
283
- ) -> pd.core.dtypes.base.ExtensionDtype | None:
284
- """
285
- If dtypes can be unified, returns the dtype they would be unified to.
285
+ def try_unifying_dtype(
286
+ dtypes: Sequence[np.dtype | ExtensionDtype], *, has_missing: bool
287
+ ) -> ExtensionDtype | type[object] | None:
288
+ """Determine unified dtype if possible.
286
289
 
287
- Returns None if they can't be unified, or if we can expect pandas to unify them for
288
- us.
290
+ Returns None if they cant be unified, or if we can expect pandas to unify them for us.
289
291
 
290
292
  Params
291
293
  ------
292
- col:
293
- A list of dtypes to unify. Can be numpy/ pandas dtypes, or None (which denotes
294
- a missing value)
294
+ dtypes
295
+ A list of dtypes to unify. Can be numpy or pandas dtypes
296
+ has_missing
297
+ Whether the result needs to accommodate missing values
295
298
  """
296
- dtypes: set[pd.CategoricalDtype] = set()
297
299
  # Categorical
298
- if any(isinstance(dtype, pd.CategoricalDtype) for dtype in col):
299
- ordered = False
300
- for dtype in col:
301
- if isinstance(dtype, pd.CategoricalDtype):
302
- dtypes.add(dtype)
303
- ordered = ordered | dtype.ordered
304
- elif not pd.isnull(dtype):
305
- return None
306
- if len(dtypes) > 0:
307
- categories = reduce(
308
- lambda x, y: x.union(y),
309
- (dtype.categories for dtype in dtypes if not pd.isnull(dtype)),
310
- )
300
+ if any(isinstance(dtype, pd.CategoricalDtype) for dtype in dtypes):
301
+ if not all(isinstance(dtype, pd.CategoricalDtype) for dtype in dtypes):
302
+ return None
303
+ if TYPE_CHECKING:
304
+ dtypes = cast("Sequence[pd.CategoricalDtype]", dtypes)
305
+
306
+ all_categories = reduce(
307
+ lambda x, y: x.union(y), (dtype.categories for dtype in dtypes)
308
+ )
309
+ if not any(dtype.ordered for dtype in dtypes):
310
+ return pd.CategoricalDtype(natsorted(all_categories), ordered=False)
311
+
312
+ dtypes_with_categories = [
313
+ dtype for dtype in dtypes if len(dtype.categories) > 0
314
+ ]
315
+ if dtypes_with_categories and all(
316
+ len(dtype.categories) == len(all_categories)
317
+ and dtype.ordered
318
+ and np.all(all_categories == dtype.categories)
319
+ for dtype in dtypes_with_categories
320
+ ):
321
+ return dtypes_with_categories[0]
322
+
323
+ return object
311
324
 
312
- if not ordered:
313
- return pd.CategoricalDtype(natsorted(categories), ordered=False)
314
- else: # for xarray Datasets, see https://github.com/pydata/xarray/issues/10247
315
- categories_intersection = reduce(
316
- lambda x, y: x.intersection(y),
317
- (
318
- dtype.categories
319
- for dtype in dtypes
320
- if not pd.isnull(dtype) and len(dtype.categories) > 0
321
- ),
322
- )
323
- if len(categories_intersection) < len(categories):
324
- return object
325
- else:
326
- same_orders = all(
327
- dtype.ordered
328
- for dtype in dtypes
329
- if not pd.isnull(dtype) and len(dtype.categories) > 0
330
- )
331
- same_orders &= all(
332
- np.all(categories == dtype.categories)
333
- for dtype in dtypes
334
- if not pd.isnull(dtype) and len(dtype.categories) > 0
335
- )
336
- if same_orders:
337
- return next(
338
- dtype
339
- for dtype in dtypes
340
- if not pd.isnull(dtype) and len(dtype.categories) > 0
341
- )
342
- return object
343
325
  # Boolean
344
- elif all(pd.api.types.is_bool_dtype(dtype) or dtype is None for dtype in col):
345
- if any(dtype is None for dtype in col):
346
- return pd.BooleanDtype()
347
- else:
348
- return None
349
- else:
350
- return None
326
+ if all(pd.api.types.is_bool_dtype(dtype) for dtype in dtypes) and has_missing:
327
+ return pd.BooleanDtype()
328
+
329
+ return None
351
330
 
352
331
 
353
332
  def check_combinable_cols(cols: list[pd.Index], join: Join_T):
@@ -1207,15 +1186,13 @@ def make_dask_col_from_extension_dtype(
1207
1186
  A :class:`dask.Array`: representation of the column.
1208
1187
  """
1209
1188
  import dask.array as da
1210
- import xarray as xr
1211
- from xarray.core.indexing import LazilyIndexedArray
1212
1189
 
1213
1190
  from anndata._io.specs.lazy_methods import (
1214
1191
  compute_chunk_layout_for_axis_size,
1215
1192
  get_chunksize,
1216
1193
  maybe_open_h5,
1217
1194
  )
1218
- from anndata.compat import XDataArray
1195
+ from anndata.compat import xarray as xr
1219
1196
  from anndata.experimental import read_elem_lazy
1220
1197
 
1221
1198
  base_path_or_zarr_group = col.attrs.get("base_path_or_zarr_group")
@@ -1224,7 +1201,6 @@ def make_dask_col_from_extension_dtype(
1224
1201
  base_path_or_zarr_group is not None and elem_name is not None
1225
1202
  ): # lazy, backed by store
1226
1203
  dims = col.dims
1227
- coords = col.coords.copy()
1228
1204
  with maybe_open_h5(base_path_or_zarr_group, elem_name) as f:
1229
1205
  maybe_chunk_size = get_chunksize(read_elem_lazy(f))
1230
1206
  chunk_size = (
@@ -1238,17 +1214,14 @@ def make_dask_col_from_extension_dtype(
1238
1214
  # reopening is important to get around h5py's unserializable lock in processes
1239
1215
  with maybe_open_h5(base_path_or_zarr_group, elem_name) as f:
1240
1216
  v = read_elem_lazy(f)
1241
- variable = xr.Variable(data=LazilyIndexedArray(v), dims=dims)
1242
- data_array = XDataArray(
1243
- variable,
1244
- coords=coords,
1245
- dims=dims,
1217
+ variable = xr.Variable(
1218
+ data=xr.core.indexing.LazilyIndexedArray(v), dims=dims
1246
1219
  )
1247
1220
  idx = tuple(
1248
1221
  slice(start, stop)
1249
1222
  for start, stop in block_info[None]["array-location"]
1250
1223
  )
1251
- chunk = np.array(data_array.data[idx])
1224
+ chunk = np.array(variable.data[idx])
1252
1225
  return chunk
1253
1226
 
1254
1227
  if col.dtype == "category" or col.dtype == "string" or use_only_object_dtype: # noqa PLR1714
@@ -1268,7 +1241,7 @@ def make_dask_col_from_extension_dtype(
1268
1241
 
1269
1242
  def make_xarray_extension_dtypes_dask(
1270
1243
  annotations: Iterable[Dataset2D], *, use_only_object_dtype: bool = False
1271
- ) -> Generator[XDataset, None, None]:
1244
+ ) -> Generator[Dataset2D, None, None]:
1272
1245
  """
1273
1246
  Creates a generator of Dataset2D objects with dask arrays in place of :class:`pandas.api.extensions.ExtensionArray` dtype columns.
1274
1247
 
@@ -1710,6 +1683,9 @@ def concat( # noqa: PLR0912, PLR0913, PLR0915
1710
1683
  alt_annotations, use_only_object_dtype=True
1711
1684
  )
1712
1685
  )
1686
+ for a in annotations_with_only_dask:
1687
+ if a.true_index_dim != a.index_dim:
1688
+ a.index = a.true_index
1713
1689
  annotations_with_only_dask = [
1714
1690
  a.ds.rename({a.true_index_dim: "merge_index"})
1715
1691
  for a in annotations_with_only_dask
@@ -1717,7 +1693,6 @@ def concat( # noqa: PLR0912, PLR0913, PLR0915
1717
1693
  alt_annot = Dataset2D(
1718
1694
  xr.merge(annotations_with_only_dask, join=join, compat="override")
1719
1695
  )
1720
- alt_annot.true_index_dim = "merge_index"
1721
1696
 
1722
1697
  X = concat_Xs(adatas, reindexers, axis=axis, fill_value=fill_value)
1723
1698
 
anndata/_core/xarray.py CHANGED
@@ -117,7 +117,8 @@ class Dataset2D:
117
117
 
118
118
  @property
119
119
  def true_index_dim(self) -> str:
120
- """
120
+ """Key of the “true” index.
121
+
121
122
  Because xarray loads its coordinates/indexes in memory,
122
123
  we allow for signaling that a given variable, which is not a coordinate, is the "true" index.
123
124
 
@@ -130,7 +131,7 @@ class Dataset2D:
130
131
  return self.ds.attrs.get("indexing_key", self.index_dim)
131
132
 
132
133
  @true_index_dim.setter
133
- def true_index_dim(self, val: str):
134
+ def true_index_dim(self, val: str | None) -> None:
134
135
  if val is None or (val == self.index_dim and "indexing_key" in self.ds.attrs):
135
136
  del self.ds.attrs["indexing_key"]
136
137
  elif val not in self.ds.dims:
@@ -146,8 +147,10 @@ class Dataset2D:
146
147
 
147
148
  @property
148
149
  def index(self) -> pd.Index:
149
- """:attr:`~anndata.AnnData` internally looks for :attr:`~pandas.DataFrame.index` so this ensures usability
150
- A :class:`pandas.Index` object corresponding to :attr:`anndata.experimental.backed.Dataset2D.index_dim`
150
+ """A :class:`pandas.Index` object corresponding to :attr:`anndata.experimental.backed.Dataset2D.index_dim`.
151
+
152
+ :attr:`~anndata.AnnData` internally looks for :attr:`~pandas.DataFrame.index` so this ensures usability.
153
+
151
154
  Returns
152
155
  -------
153
156
  The index of the of the dataframe as resolved from :attr:`~xarray.Dataset.coords`.
@@ -155,14 +158,26 @@ class Dataset2D:
155
158
  return self.ds.indexes[self.index_dim]
156
159
 
157
160
  @index.setter
158
- def index(self, val) -> None:
161
+ def index(self, val: object | pd.Index | XDataArray) -> None:
159
162
  index_dim = self.index_dim
160
- self.ds.coords[index_dim] = (index_dim, val)
161
- if isinstance(val, pd.Index) and val.name is not None and val.name != index_dim:
162
- self.ds.update(self.ds.rename({self.index_dim: val.name}))
163
- del self.ds.coords[index_dim]
163
+ if (
164
+ isinstance(val, pd.Index | XDataArray)
165
+ and val.name is not None
166
+ and val.name != index_dim
167
+ ):
168
+ # swap the names of the dimensions out and drop the old index variable, setting `coords` in the process if `val` came from this dataset.
169
+ self._ds = self.ds.swap_dims({index_dim: val.name}).drop_vars(index_dim)
170
+ # swapping dims only changes the name, but not the underlying value i.e., the coordinate, if the underlying value was not present in the dataset.
171
+ # If we were to `__setitem__` on `.coords` without checking, `val` could have the old `index_dim` as its `name` because it was present in the dataset.
172
+ if val.name not in self.ds.coords:
173
+ self.ds.coords[val.name] = val
174
+ self._validate_shape_invariants(self._ds)
175
+ else:
176
+ self.ds.coords[index_dim] = (index_dim, val)
164
177
  # without `indexing_key` explicitly set on `self.ds.attrs`, `self.true_index_dim` will use the `self.index_dim`
165
- if "indexing_key" in self.ds.attrs:
178
+ if "indexing_key" in self.ds.attrs and (
179
+ hasattr(val, "name") and val.name == self.ds.attrs["indexing_key"]
180
+ ):
166
181
  del self.ds.attrs["indexing_key"]
167
182
 
168
183
  @property
@@ -172,12 +187,14 @@ class Dataset2D:
172
187
 
173
188
  @property
174
189
  def true_index(self) -> pd.Index:
175
- """:attr:`~anndata.experimental.backed.Dataset2D.true_xr_index` as a :class:`pandas.Index`"""
176
- return self.true_xr_index.to_index()
190
+ """:attr:`~anndata.experimental.backed.Dataset2D.true_xr_index` as a :class:`pandas.Index`."""
191
+ idx = self.true_xr_index.to_index()
192
+ idx.name = self.true_xr_index.name
193
+ return idx
177
194
 
178
195
  @property
179
196
  def shape(self) -> tuple[int, int]:
180
- """:attr:`~anndata.AnnData` internally looks for :attr:`~pandas.DataFrame.shape` so this ensures usability
197
+ """:attr:`~anndata.AnnData` internally looks for :attr:`~pandas.DataFrame.shape` so this ensures usability.
181
198
 
182
199
  Returns
183
200
  -------
@@ -187,7 +204,7 @@ class Dataset2D:
187
204
 
188
205
  @property
189
206
  def iloc(self) -> Dataset2DIlocIndexer:
190
- """:attr:`~anndata.AnnData` internally looks for :attr:`~pandas.DataFrame.iloc` so this ensures usability
207
+ """:attr:`~anndata.AnnData` internally looks for :attr:`~pandas.DataFrame.iloc` so this ensures usability.
191
208
 
192
209
  Returns
193
210
  -------
@@ -268,6 +285,17 @@ class Dataset2D:
268
285
  columns.discard(index_key)
269
286
  return pd.Index(columns)
270
287
 
288
+ @columns.setter
289
+ def columns(self, val) -> None:
290
+ if len(self.columns.symmetric_difference(val)) > 0:
291
+ msg = "Trying to rename the keys of the mapping with new names - please use a different API to rename the keys of the underlying dataset mapping."
292
+ raise ValueError(msg)
293
+ warnings.warn(
294
+ "Renaming or reordering columns on `Dataset2D` has no effect because the underlying data structure has no apparent ordering on its keys",
295
+ UserWarning,
296
+ stacklevel=2,
297
+ )
298
+
271
299
  def __setitem__(
272
300
  self, key: Hashable | Iterable[Hashable] | Mapping, value: Any
273
301
  ) -> None:
@@ -348,9 +376,9 @@ class Dataset2D:
348
376
  return len(self.ds)
349
377
 
350
378
  @property
351
- def dtypes(self) -> pd.Series:
379
+ def dtypes(self) -> Mapping[Hashable, np.dtype]:
352
380
  """
353
- Return a Series with the dtypes of the variables in the Dataset2D.
381
+ Return a Mapping with the dtypes of the variables in the Dataset2D.
354
382
  """
355
383
  return self.ds.dtypes
356
384
 
anndata/_io/h5ad.py CHANGED
@@ -172,9 +172,8 @@ def write_sparse_as_dense(
172
172
  def read_h5ad_backed(
173
173
  filename: str | PathLike[str], mode: Literal["r", "r+"]
174
174
  ) -> AnnData:
175
- d = dict(filename=filename, filemode=mode)
176
-
177
175
  f = h5py.File(filename, mode)
176
+ d = dict(filename=f)
178
177
 
179
178
  attributes = ["obsm", "varm", "obsp", "varp", "uns", "layers"]
180
179
  df_attributes = ["obs", "var"]
@@ -191,6 +190,7 @@ def read_h5ad_backed(
191
190
  d["raw"] = _read_raw(f, attrs={"var", "varm"})
192
191
 
193
192
  adata = AnnData(**d)
193
+ assert adata.file._file is f
194
194
 
195
195
  # Backwards compat to <0.7
196
196
  if isinstance(f["obs"], h5py.Dataset):
@@ -16,11 +16,12 @@ from anndata._core.xarray import Dataset2D, requires_xarray
16
16
  from anndata.abc import CSCDataset, CSRDataset
17
17
  from anndata.compat import (
18
18
  NULLABLE_NUMPY_STRING_TYPE,
19
+ NUMPY_2,
19
20
  DaskArray,
20
21
  H5Array,
21
22
  H5Group,
22
- XDataArray,
23
23
  XDataset,
24
+ XVariable,
24
25
  ZarrArray,
25
26
  ZarrGroup,
26
27
  )
@@ -248,24 +249,18 @@ def _gen_xarray_dict_iterator_from_elems(
248
249
  elem_dict: dict[str, LazyDataStructures],
249
250
  dim_name: str,
250
251
  index: np.NDArray,
251
- ) -> Generator[tuple[str, XDataArray], None, None]:
252
+ ) -> Generator[tuple[str, XVariable], None, None]:
252
253
  from anndata.experimental.backed._lazy_arrays import CategoricalArray, MaskedArray
253
254
 
254
- from ...compat import XDataArray
255
255
  from ...compat import xarray as xr
256
256
 
257
257
  for k, v in elem_dict.items():
258
258
  if isinstance(v, DaskArray) and k != dim_name:
259
- data_array = XDataArray(v, coords=[index], dims=[dim_name], name=k)
259
+ variable = xr.Variable([dim_name], data=v)
260
260
  elif isinstance(v, CategoricalArray | MaskedArray) and k != dim_name:
261
261
  variable = xr.Variable(
262
- data=xr.core.indexing.LazilyIndexedArray(v), dims=[dim_name]
263
- )
264
- data_array = XDataArray(
265
- variable,
266
- coords=[index],
267
- dims=[dim_name],
268
- name=k,
262
+ [dim_name],
263
+ data=xr.core.indexing.LazilyIndexedArray(v),
269
264
  attrs={
270
265
  "base_path_or_zarr_group": v.base_path_or_zarr_group,
271
266
  "elem_name": v.elem_name,
@@ -277,13 +272,11 @@ def _gen_xarray_dict_iterator_from_elems(
277
272
  },
278
273
  )
279
274
  elif k == dim_name:
280
- data_array = XDataArray(
281
- index, coords=[index], dims=[dim_name], name=dim_name
282
- )
275
+ variable = xr.Variable([dim_name], data=index)
283
276
  else:
284
277
  msg = f"Could not read {k}: {v} from into xarray Dataset2D"
285
278
  raise ValueError(msg)
286
- yield k, data_array
279
+ yield k, variable
287
280
 
288
281
 
289
282
  DUMMY_RANGE_INDEX_KEY = "_anndata_dummy_range_index"
@@ -325,11 +318,9 @@ def read_dataframe(
325
318
  _gen_xarray_dict_iterator_from_elems(elem_dict, dim_name, index)
326
319
  )
327
320
  if use_range_index:
328
- elem_xarray_dict[DUMMY_RANGE_INDEX_KEY] = XDataArray(
329
- index,
330
- coords=[index],
331
- dims=[DUMMY_RANGE_INDEX_KEY],
332
- name=DUMMY_RANGE_INDEX_KEY,
321
+ elem_xarray_dict[DUMMY_RANGE_INDEX_KEY] = XVariable(
322
+ [DUMMY_RANGE_INDEX_KEY],
323
+ data=index,
333
324
  )
334
325
  ds = Dataset2D(XDataset(elem_xarray_dict))
335
326
  ds.is_backed = True
@@ -377,9 +368,14 @@ def read_nullable(
377
368
  Path(filename(elem)) if isinstance(elem, H5Group) else elem
378
369
  )
379
370
  elem_name = get_elem_name(elem)
371
+ values = elem["values"]
372
+ # HDF5 stores strings as bytes; use .astype("T") to decode on access
373
+ # h5py recommends .astype("T") over .asstr() when using numpy ≥2
374
+ if encoding_type == "nullable-string-array" and isinstance(elem, H5Group):
375
+ values = values.astype("T") if NUMPY_2 else values.asstr()
380
376
  return MaskedArray(
381
- values=elem["values"],
382
- mask=elem.get("mask", None),
377
+ values=values,
378
+ mask=elem["mask"],
383
379
  dtype_str=encoding_type,
384
380
  base_path_or_zarr_group=base_path_or_zarr_group,
385
381
  elem_name=elem_name,
@@ -95,9 +95,21 @@ GLOBAL_LOCK = Lock()
95
95
  # return False
96
96
 
97
97
 
98
- def zarr_v3_compressor_compat(dataset_kwargs) -> dict:
99
- if not is_zarr_v2() and (compressor := dataset_kwargs.pop("compressor", None)):
100
- dataset_kwargs["compressors"] = compressor
98
+ def zarr_v3_compressor_compat(dataset_kwargs: dict) -> dict:
99
+ """Handle mismatch between our compressor kwarg and :func:`zarr.create_array` in v3's `compressors` arg
100
+ See https://zarr.readthedocs.io/en/stable/api/zarr/create/#zarr.create_array
101
+
102
+ Parameters
103
+ ----------
104
+ dataset_kwarg
105
+ The kwarg dict potentially containing "compressor"
106
+
107
+ Returns
108
+ -------
109
+ The kwarg dict with "compressor" moved to "compressors" if zarr v3 is in use.
110
+ """
111
+ if not is_zarr_v2() and "compressor" in dataset_kwargs:
112
+ dataset_kwargs["compressors"] = dataset_kwargs.pop("compressor")
101
113
  return dataset_kwargs
102
114
 
103
115
 
@@ -1098,7 +1110,10 @@ def write_categorical(
1098
1110
 
1099
1111
  _writer.write_elem(g, "codes", v.codes, dataset_kwargs=dataset_kwargs)
1100
1112
  _writer.write_elem(
1101
- g, "categories", v.categories._values, dataset_kwargs=dataset_kwargs
1113
+ g,
1114
+ "categories",
1115
+ v.categories.to_numpy(),
1116
+ dataset_kwargs=dataset_kwargs,
1102
1117
  )
1103
1118
 
1104
1119
 
anndata/_io/utils.py CHANGED
@@ -1,17 +1,22 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from collections.abc import Callable
4
- from functools import WRAPPER_ASSIGNMENTS, wraps
4
+ from functools import WRAPPER_ASSIGNMENTS, cache, wraps
5
5
  from itertools import pairwise
6
6
  from typing import TYPE_CHECKING, Literal, cast
7
7
  from warnings import warn
8
8
 
9
+ import numpy as np
10
+ import pandas as pd
11
+
9
12
  from .._core.sparse_dataset import BaseCompressedSparseDataset
10
13
 
11
14
  if TYPE_CHECKING:
12
15
  from collections.abc import Callable, Mapping
13
16
  from typing import Any, Literal
14
17
 
18
+ from pandas.core.dtypes.dtypes import BaseMaskedDtype
19
+
15
20
  from .._types import StorageType, _WriteInternal
16
21
  from ..compat import H5Group, ZarrGroup
17
22
  from ..typing import RWAble
@@ -119,6 +124,30 @@ def check_key(key):
119
124
  raise TypeError(msg)
120
125
 
121
126
 
127
+ @cache
128
+ def pandas_nullable_dtype(dtype: np.dtype) -> BaseMaskedDtype:
129
+ """Infer nullable dtype from numpy dtype.
130
+
131
+ There is no public pandas API for this, so this is the cleanest way.
132
+ See <https://github.com/pandas-dev/pandas/issues/63608>
133
+ """
134
+ try:
135
+ from pandas.core.dtypes.dtypes import BaseMaskedDtype
136
+ except ImportError:
137
+ pass
138
+ else:
139
+ return BaseMaskedDtype.from_numpy_dtype(dtype)
140
+
141
+ match dtype.kind:
142
+ case "b":
143
+ array_type = pd.arrays.BooleanArray
144
+ case "i" | "u":
145
+ array_type = pd.arrays.IntegerArray
146
+ case _:
147
+ raise NotImplementedError
148
+ return array_type(np.ones(1, dtype), np.ones(1, bool)).dtype
149
+
150
+
122
151
  # -------------------------------------------------------------------------------
123
152
  # Generic functions
124
153
  # -------------------------------------------------------------------------------
anndata/_io/zarr.py CHANGED
@@ -150,8 +150,12 @@ def read_dataframe(group: zarr.Group | zarr.Array) -> pd.DataFrame:
150
150
  def open_write_group(
151
151
  store: StoreLike, *, mode: AccessModeLiteral = "w", **kwargs
152
152
  ) -> zarr.Group:
153
- if not is_zarr_v2() and "zarr_format" not in kwargs:
154
- kwargs["zarr_format"] = settings.zarr_write_format
153
+ if "zarr_format" not in kwargs:
154
+ if settings.zarr_write_format == 2 or is_zarr_v2():
155
+ msg = "Writing zarr v2 data will no longer be the default in the next minor release. v3 data will be written by default. If you are explicitly setting this configuration, consider migrating to the zarr v3 file format."
156
+ warn(msg, UserWarning, stacklevel=2)
157
+ if not is_zarr_v2():
158
+ kwargs["zarr_format"] = settings.zarr_write_format
155
159
  return zarr.open_group(store, mode=mode, **kwargs)
156
160
 
157
161
 
@@ -51,6 +51,7 @@ Index1D = (
51
51
  | Sequence[bool]
52
52
  | pd.Series # bool, int, str
53
53
  | pd.Index
54
+ | pd.api.extensions.ExtensionArray # bool | int | str
54
55
  | NDArray[np.str_]
55
56
  | np.matrix # bool
56
57
  | CSMatrix # bool
@@ -71,6 +72,26 @@ H5Group = h5py.Group
71
72
  H5Array = h5py.Dataset
72
73
  H5File = h5py.File
73
74
 
75
+ # h5py recommends using .astype("T") over .asstr() when using numpy ≥2
76
+ if TYPE_CHECKING:
77
+ from h5py._hl.dataset import AsStrView as H5AsStrView
78
+ from h5py._hl.dataset import AsTypeView as H5AsTypeView
79
+ else:
80
+ try:
81
+ try:
82
+ from h5py._hl.dataset import AsStrView as H5AsStrView
83
+ from h5py._hl.dataset import AsTypeView as H5AsTypeView
84
+ except ImportError:
85
+ # h5py 3.11 uses AsStrWrapper/AstypeWrapper (lowercase 't')
86
+ from h5py._hl.dataset import AsStrWrapper as H5AsStrView
87
+ from h5py._hl.dataset import AstypeWrapper as H5AsTypeView
88
+ except ImportError: # pragma: no cover
89
+ warn("AsTypeView changed import location", DeprecationWarning, stacklevel=1)
90
+ _ds = h5py.File.in_memory().create_dataset("x", shape=(), dtype="S1")
91
+ H5AsStrView = type(_ds.asstr())
92
+ H5AsTypeView = type(_ds.astype("U1"))
93
+ del _ds
94
+
74
95
 
75
96
  #############################
76
97
  # Optional deps
@@ -209,11 +230,10 @@ else:
209
230
  # IO helpers
210
231
  #############################
211
232
 
233
+ NUMPY_2 = Version(version("numpy")) >= Version("2")
212
234
 
213
235
  NULLABLE_NUMPY_STRING_TYPE = (
214
- np.dtype("O")
215
- if Version(version("numpy")) < Version("2")
216
- else np.dtypes.StringDType(na_object=pd.NA)
236
+ np.dtypes.StringDType(na_object=pd.NA) if NUMPY_2 else np.dtype("O")
217
237
  )
218
238
 
219
239
  PANDAS_SUPPORTS_NA_VALUE = Version(version("pandas")) >= Version("2.3")
@@ -59,19 +59,25 @@ def read_lazy(
59
59
  Preparing example objects
60
60
 
61
61
  >>> import anndata as ad
62
- >>> from urllib.request import urlretrieve
62
+ >>> import pooch
63
63
  >>> import scanpy as sc
64
64
  >>> base_url = "https://datasets.cellxgene.cziscience.com"
65
- >>> def get_cellxgene_data(id_: str):
66
- ... out_path = sc.settings.datasetdir / f"{id_}.h5ad"
67
- ... if out_path.exists():
68
- ... return out_path
69
- ... file_url = f"{base_url}/{id_}.h5ad"
70
- ... sc.settings.datasetdir.mkdir(parents=True, exist_ok=True)
71
- ... urlretrieve(file_url, out_path)
72
- ... return out_path
73
- >>> path_b_cells = get_cellxgene_data("a93eab58-3d82-4b61-8a2f-d7666dcdb7c4")
74
- >>> path_fetal = get_cellxgene_data("d170ff04-6da0-4156-a719-f8e1bbefbf53")
65
+ >>> # To update hashes: pooch.retrieve(url, known_hash=None) prints the new hash
66
+ >>> def get_cellxgene_data(id_: str, hash_: str):
67
+ ... return pooch.retrieve(
68
+ ... f"{base_url}/{id_}.h5ad",
69
+ ... known_hash=hash_,
70
+ ... fname=f"{id_}.h5ad",
71
+ ... path=sc.settings.datasetdir,
72
+ ... )
73
+ >>> path_b_cells = get_cellxgene_data(
74
+ ... "a93eab58-3d82-4b61-8a2f-d7666dcdb7c4",
75
+ ... "sha256:dac90fe2aa8b78aee2c1fc963104592f8eff7b873ca21d01a51a5e416734651c",
76
+ ... )
77
+ >>> path_fetal = get_cellxgene_data(
78
+ ... "d170ff04-6da0-4156-a719-f8e1bbefbf53",
79
+ ... "sha256:d497eebca03533919877b6fc876e8c9d8ba063199ddc86dd9fbcb9d1d87a3622",
80
+ ... )
75
81
  >>> b_cells_adata = ad.experimental.read_lazy(path_b_cells)
76
82
  >>> fetal_adata = ad.experimental.read_lazy(path_fetal)
77
83
  >>> print(b_cells_adata)
@@ -10,10 +10,13 @@ from anndata._core.index import _subset
10
10
  from anndata._core.views import as_view
11
11
  from anndata._io.specs.lazy_methods import get_chunksize
12
12
 
13
+ from ..._io.utils import pandas_nullable_dtype
13
14
  from ..._settings import settings
14
15
  from ...compat import (
15
16
  NULLABLE_NUMPY_STRING_TYPE,
16
17
  H5Array,
18
+ H5AsStrView,
19
+ H5AsTypeView,
17
20
  XBackendArray,
18
21
  XDataArray,
19
22
  XZarrArrayWrapper,
@@ -24,8 +27,9 @@ if TYPE_CHECKING:
24
27
  from pathlib import Path
25
28
  from typing import Literal
26
29
 
30
+ from numpy.typing import NDArray
27
31
  from pandas._libs.missing import NAType
28
- from pandas.core.dtypes.base import ExtensionDtype
32
+ from pandas.core.dtypes.dtypes import BaseMaskedDtype
29
33
 
30
34
  from anndata.compat import ZarrGroup
31
35
 
@@ -36,12 +40,13 @@ if TYPE_CHECKING:
36
40
  from xarray.core.indexing import ExplicitIndexer
37
41
 
38
42
 
39
- K = TypeVar("K", H5Array, ZarrArray)
43
+ K = TypeVar("K", H5Array | H5AsStrView | H5AsTypeView, ZarrArray)
40
44
 
41
45
 
42
46
  class ZarrOrHDF5Wrapper(XZarrArrayWrapper, Generic[K]):
43
- def __init__(self, array: K):
44
- self.chunks = array.chunks
47
+ def __init__(self, array: K) -> None:
48
+ # AstypeView from h5py .astype() lacks chunks attribute
49
+ self.chunks = getattr(array, "chunks", None)
45
50
  if isinstance(array, ZarrArray):
46
51
  super().__init__(array)
47
52
  return
@@ -73,7 +78,7 @@ class ZarrOrHDF5Wrapper(XZarrArrayWrapper, Generic[K]):
73
78
  if (
74
79
  isinstance(key, np.ndarray)
75
80
  and np.issubdtype(key.dtype, np.integer)
76
- and isinstance(self._array, H5Array)
81
+ and isinstance(self._array, H5Array | H5AsTypeView | H5AsStrView)
77
82
  ):
78
83
  key_mask = np.zeros(self._array.shape).astype("bool")
79
84
  key_mask[key] = True
@@ -89,7 +94,7 @@ class CategoricalArray(XBackendArray, Generic[K]):
89
94
  """
90
95
 
91
96
  _codes: ZarrOrHDF5Wrapper[K]
92
- _categories: ZarrArray | H5Array
97
+ _categories: K
93
98
  shape: tuple[int, ...]
94
99
  base_path_or_zarr_group: Path | ZarrGroup
95
100
  elem_name: str
@@ -97,7 +102,7 @@ class CategoricalArray(XBackendArray, Generic[K]):
97
102
  def __init__(
98
103
  self,
99
104
  codes: K,
100
- categories: ZarrArray | H5Array,
105
+ categories: K,
101
106
  base_path_or_zarr_group: Path | ZarrGroup,
102
107
  elem_name: str,
103
108
  *args,
@@ -153,11 +158,11 @@ class MaskedArray(XBackendArray, Generic[K]):
153
158
 
154
159
  def __init__(
155
160
  self,
156
- values: ZarrArray | H5Array,
161
+ values: K,
157
162
  dtype_str: Literal[
158
163
  "nullable-integer", "nullable-boolean", "nullable-string-array"
159
164
  ],
160
- mask: ZarrArray | H5Array,
165
+ mask: K,
161
166
  base_path_or_zarr_group: Path | ZarrGroup,
162
167
  elem_name: str,
163
168
  ):
@@ -169,40 +174,33 @@ class MaskedArray(XBackendArray, Generic[K]):
169
174
  self.file_format = "zarr" if isinstance(mask, ZarrArray) else "h5"
170
175
  self.elem_name = elem_name
171
176
 
172
- def __getitem__(self, key: ExplicitIndexer) -> PandasExtensionArray | np.ndarray:
173
- from xarray.core.extension_array import PandasExtensionArray
174
-
177
+ def __getitem__(
178
+ self, key: ExplicitIndexer
179
+ ) -> PandasExtensionArray | NDArray[np.str_]:
175
180
  values = self._values[key]
176
181
  mask = self._mask[key]
177
- if self._dtype_str == "nullable-integer":
178
- # numpy does not support nan ints
179
- extension_array = pd.arrays.IntegerArray(values, mask=mask)
180
- elif self._dtype_str == "nullable-boolean":
181
- extension_array = pd.arrays.BooleanArray(values, mask=mask)
182
- elif self._dtype_str == "nullable-string-array":
182
+
183
+ if isinstance(self.dtype, np.dtypes.StringDType):
183
184
  # https://github.com/pydata/xarray/issues/10419
184
185
  values = values.astype(self.dtype)
185
186
  values[mask] = pd.NA
186
187
  return values
187
- else:
188
- msg = f"Invalid dtype_str {self._dtype_str}"
189
- raise RuntimeError(msg)
190
- return PandasExtensionArray(extension_array)
188
+
189
+ from xarray.core.extension_array import PandasExtensionArray
190
+
191
+ cls = self.dtype.construct_array_type()
192
+ return PandasExtensionArray(cls(values, mask))
191
193
 
192
194
  @cached_property
193
- def dtype(self) -> np.dtypes.StringDType[NAType] | ExtensionDtype:
194
- if self._dtype_str == "nullable-integer":
195
- return pd.array(
196
- [],
197
- dtype=str(pd.api.types.pandas_dtype(self._values.dtype)).capitalize(),
198
- ).dtype
199
- elif self._dtype_str == "nullable-boolean":
200
- return pd.BooleanDtype()
201
- elif self._dtype_str == "nullable-string-array":
195
+ def dtype(self) -> BaseMaskedDtype | np.dtypes.StringDType[NAType]:
196
+ if self._dtype_str == "nullable-string-array":
202
197
  # https://github.com/pydata/xarray/issues/10419
203
198
  return NULLABLE_NUMPY_STRING_TYPE
204
- msg = f"Invalid dtype_str {self._dtype_str}"
205
- raise RuntimeError(msg)
199
+ try:
200
+ return pandas_nullable_dtype(self._values.dtype)
201
+ except NotImplementedError:
202
+ msg = f"Invalid dtype_str {self._dtype_str}"
203
+ raise RuntimeError(msg) from None
206
204
 
207
205
 
208
206
  @_subset.register(XDataArray)
@@ -551,19 +551,25 @@ def concat_on_disk( # noqa: PLR0913
551
551
 
552
552
  First, let’s get some “big” datasets with a compatible ``var`` axis:
553
553
 
554
- >>> import httpx
554
+ >>> import pooch
555
555
  >>> import scanpy as sc
556
556
  >>> base_url = "https://datasets.cellxgene.cziscience.com"
557
- >>> def get_cellxgene_data(id_: str):
558
- ... out_path = sc.settings.datasetdir / f'{id_}.h5ad'
559
- ... if out_path.exists():
560
- ... return out_path
561
- ... file_url = f"{base_url}/{id_}.h5ad"
562
- ... sc.settings.datasetdir.mkdir(parents=True, exist_ok=True)
563
- ... out_path.write_bytes(httpx.get(file_url).content)
564
- ... return out_path
565
- >>> path_b_cells = get_cellxgene_data('a93eab58-3d82-4b61-8a2f-d7666dcdb7c4')
566
- >>> path_fetal = get_cellxgene_data('d170ff04-6da0-4156-a719-f8e1bbefbf53')
557
+ >>> # To update hashes: pooch.retrieve(url, known_hash=None) prints the new hash
558
+ >>> def get_cellxgene_data(id_: str, hash_: str):
559
+ ... return pooch.retrieve(
560
+ ... f"{base_url}/{id_}.h5ad",
561
+ ... known_hash=hash_,
562
+ ... fname=f"{id_}.h5ad",
563
+ ... path=sc.settings.datasetdir,
564
+ ... )
565
+ >>> path_b_cells = get_cellxgene_data(
566
+ ... 'a93eab58-3d82-4b61-8a2f-d7666dcdb7c4',
567
+ ... 'sha256:dac90fe2aa8b78aee2c1fc963104592f8eff7b873ca21d01a51a5e416734651c',
568
+ ... )
569
+ >>> path_fetal = get_cellxgene_data(
570
+ ... 'd170ff04-6da0-4156-a719-f8e1bbefbf53',
571
+ ... 'sha256:d497eebca03533919877b6fc876e8c9d8ba063199ddc86dd9fbcb9d1d87a3622',
572
+ ... )
567
573
 
568
574
  Now we can concatenate them on-disk:
569
575
 
@@ -613,10 +619,10 @@ def concat_on_disk( # noqa: PLR0913
613
619
 
614
620
  if (
615
621
  len(in_files) == 1
616
- and isinstance(in_files[0], str | PathLike)
622
+ and isinstance(in_file := in_files[0], str | PathLike)
617
623
  and is_out_path_like
618
624
  ):
619
- shutil.copy2(in_files[0], out_file)
625
+ (shutil.copytree if in_file.is_dir() else shutil.copy2)(in_file, out_file)
620
626
  return
621
627
 
622
628
  if keys is None:
anndata/tests/helpers.py CHANGED
@@ -75,12 +75,13 @@ DEFAULT_KEY_TYPES = (
75
75
  DEFAULT_COL_TYPES = (
76
76
  pd.CategoricalDtype(ordered=False),
77
77
  pd.CategoricalDtype(ordered=True),
78
- np.int64,
79
- np.float64,
80
- np.uint8,
81
- np.bool_,
82
- pd.BooleanDtype,
83
- pd.Int32Dtype,
78
+ np.dtype(np.int64),
79
+ np.dtype(np.float64),
80
+ np.dtype(np.uint8),
81
+ np.dtype(bool),
82
+ pd.BooleanDtype(),
83
+ pd.Int32Dtype(),
84
+ pd.UInt8Dtype(),
84
85
  )
85
86
 
86
87
 
@@ -108,13 +109,11 @@ def gen_vstr_recarray(m, n, dtype=None):
108
109
 
109
110
 
110
111
  def issubdtype(
111
- a: np.dtype | pd.api.extensions.ExtensionDtype | type,
112
- b: type[DT] | tuple[type[DT], ...],
112
+ a: np.dtype | pd.api.extensions.ExtensionDtype, b: type[DT] | tuple[type[DT], ...]
113
113
  ) -> TypeGuard[DT]:
114
+ assert not isinstance(a, type)
114
115
  if isinstance(b, tuple):
115
116
  return any(issubdtype(a, t) for t in b)
116
- if isinstance(a, type) and issubclass(a, pd.api.extensions.ExtensionDtype):
117
- return issubclass(a, b)
118
117
  if isinstance(a, pd.api.extensions.ExtensionDtype):
119
118
  return isinstance(a, b)
120
119
  try:
@@ -126,6 +125,7 @@ def issubdtype(
126
125
  def gen_random_column( # noqa: PLR0911
127
126
  n: int, dtype: np.dtype | pd.api.extensions.ExtensionDtype
128
127
  ) -> tuple[str, np.ndarray | pd.api.extensions.ExtensionArray]:
128
+ assert isinstance(dtype, np.dtype | pd.api.extensions.ExtensionDtype)
129
129
  if issubdtype(dtype, pd.CategoricalDtype):
130
130
  # TODO: Think about allowing index to be passed for n
131
131
  letters = np.fromiter(iter(ascii_letters), "U1")
@@ -142,13 +142,9 @@ def gen_random_column( # noqa: PLR0911
142
142
  ),
143
143
  )
144
144
  if issubdtype(dtype, IntegerDtype):
145
- return (
146
- "nullable-int",
147
- pd.arrays.IntegerArray(
148
- np.random.randint(0, 1000, size=n, dtype=np.int32),
149
- mask=np.random.randint(0, 2, size=n, dtype=bool),
150
- ),
151
- )
145
+ name, values = gen_random_column(n, dtype.numpy_dtype)
146
+ mask = np.random.randint(0, 2, size=n, dtype=bool)
147
+ return f"nullable-{name}", pd.arrays.IntegerArray(values, mask)
152
148
  if issubdtype(dtype, pd.StringDtype):
153
149
  letters = np.fromiter(iter(ascii_letters), "U1")
154
150
  array = pd.array(np.random.choice(letters, n), dtype=pd.StringDtype())
@@ -162,7 +158,7 @@ def gen_random_column( # noqa: PLR0911
162
158
  if not issubdtype(dtype, np.number): # pragma: no cover
163
159
  pytest.fail(f"Unexpected dtype: {dtype}")
164
160
 
165
- n_bits = 8 * (dtype().itemsize if isinstance(dtype, type) else dtype.itemsize)
161
+ n_bits = 8 * dtype.itemsize
166
162
 
167
163
  if issubdtype(dtype, np.unsignedinteger):
168
164
  return f"uint{n_bits}", np.random.randint(0, 255, n, dtype=dtype)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: anndata
3
- Version: 0.12.7
3
+ Version: 0.12.8
4
4
  Summary: Annotated data.
5
5
  Project-URL: Documentation, https://anndata.readthedocs.io/
6
6
  Project-URL: Source, https://github.com/scverse/anndata
@@ -74,11 +74,11 @@ Requires-Dist: boltons; extra == 'test'
74
74
  Requires-Dist: dask[array]!=2024.8.*,!=2024.9.*,!=2025.2.*,!=2025.3.*,!=2025.4.*,!=2025.5.*,!=2025.6.*,!=2025.7.*,!=2025.8.*,>=2023.5.1; extra == 'test'
75
75
  Requires-Dist: dask[distributed]; extra == 'test'
76
76
  Requires-Dist: filelock; extra == 'test'
77
- Requires-Dist: httpx<1.0; extra == 'test'
78
77
  Requires-Dist: joblib; extra == 'test'
79
78
  Requires-Dist: loompy>=3.0.5; extra == 'test'
80
79
  Requires-Dist: matplotlib; extra == 'test'
81
80
  Requires-Dist: openpyxl; extra == 'test'
81
+ Requires-Dist: pooch; extra == 'test'
82
82
  Requires-Dist: pyarrow; extra == 'test'
83
83
  Requires-Dist: pytest; extra == 'test'
84
84
  Requires-Dist: pytest-cov; extra == 'test'
@@ -96,11 +96,11 @@ Requires-Dist: boltons; extra == 'test-min'
96
96
  Requires-Dist: dask[array]!=2024.8.*,!=2024.9.*,!=2025.2.*,!=2025.3.*,!=2025.4.*,!=2025.5.*,!=2025.6.*,!=2025.7.*,!=2025.8.*,>=2023.5.1; extra == 'test-min'
97
97
  Requires-Dist: dask[distributed]; extra == 'test-min'
98
98
  Requires-Dist: filelock; extra == 'test-min'
99
- Requires-Dist: httpx<1.0; extra == 'test-min'
100
99
  Requires-Dist: joblib; extra == 'test-min'
101
100
  Requires-Dist: loompy>=3.0.5; extra == 'test-min'
102
101
  Requires-Dist: matplotlib; extra == 'test-min'
103
102
  Requires-Dist: openpyxl; extra == 'test-min'
103
+ Requires-Dist: pooch; extra == 'test-min'
104
104
  Requires-Dist: pyarrow; extra == 'test-min'
105
105
  Requires-Dist: pytest; extra == 'test-min'
106
106
  Requires-Dist: pytest-cov; extra == 'test-min'
@@ -13,45 +13,45 @@ anndata/_core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  anndata/_core/access.py,sha256=pts7fGUKgGZANSsu_qAA7L10qHM-jT1zIehbl3441OY,873
14
14
  anndata/_core/aligned_df.py,sha256=bM9kkEFURRLeUOUMk90WxVnRC-ZsXGEDx36kDj5gC9I,4278
15
15
  anndata/_core/aligned_mapping.py,sha256=BYU1jslMWIhtFTtUMaXY8ZCyt0J4_ZsJTmj6J2yAXTQ,14257
16
- anndata/_core/anndata.py,sha256=e_IgHjIpXPQPJqakeH83bl8Quu9T0N--Mb_QHINayv4,79276
16
+ anndata/_core/anndata.py,sha256=s-ExKqeQXuvin9dk0vKa1GLx6tRvrAX5HlvmqRR9uRw,79455
17
17
  anndata/_core/extensions.py,sha256=9Rsho6qnr3PJHULrYGiZHCBinBZYJK6zyf3cFsl_gBY,10425
18
18
  anndata/_core/file_backing.py,sha256=6DhBfLQPDFDpoe6wSgnOFtpC4Hnbh-UgOPbqvYDxm8g,5603
19
- anndata/_core/index.py,sha256=F3TQBUbWpt09Pb4MpwB7xfCI9uPuv7jrqx8X74CwVDU,13472
20
- anndata/_core/merge.py,sha256=wFsUotHnQsnFp84UoCp78XMw3zVfvUH5eIi-8hkb7zo,60880
19
+ anndata/_core/index.py,sha256=dz2jhrklxsNIDN-q0WhiXhxwtOreK-T8Iate-MGXpH0,13350
20
+ anndata/_core/merge.py,sha256=xtVLQzUIUiIv3seNWtMmURaxkJ1X4LXSzHCP_m_HJCs,59738
21
21
  anndata/_core/raw.py,sha256=x_PwwaDQscVQOFJ38kF7sNQ47LxowpS38h2RQfU5Zwo,7925
22
22
  anndata/_core/sparse_dataset.py,sha256=R2BeSLiREiwk9FNjdLCR3VfbYatz-7BK0l2F9XqCiTk,27280
23
23
  anndata/_core/storage.py,sha256=mHzqp7YBJ-rGQFulMAx__D-Z7y4omHPyb1cP7YxfbFE,2555
24
24
  anndata/_core/views.py,sha256=-tiUwugw0bRYXzewruhU0xXT7nnDLdYf4CiFByLl34w,15067
25
- anndata/_core/xarray.py,sha256=0de8K7YjG9mnT-dFSRoxVxgwQktjrGI9n5Yy-1YJSHg,16624
25
+ anndata/_core/xarray.py,sha256=XKpmkp9IyMuMAFI9ho5PoKKQSx9vX-Gau_k6moHJQ5w,18095
26
26
  anndata/_io/__init__.py,sha256=GTNeUZ8d8aA3sK4P33tyljIc60KapLbkqBC6J1y3l9U,346
27
- anndata/_io/h5ad.py,sha256=JT5DxTXXibz2jh1mjaQB3_0QYdhJ3gv4IcWLPjKD-dw,13976
27
+ anndata/_io/h5ad.py,sha256=IPM2WrS6Xg2-LRkya3uVeNdCBcjQlxEFKHYdcGItg9c,13986
28
28
  anndata/_io/read.py,sha256=Z0QdFkaaXmGo5a25O9N9Ej2v8U7b9oV9Umw98YtB5uA,15950
29
- anndata/_io/utils.py,sha256=3Lg27Q0Uo3HYlz980bG2Y02_VFIt0PiXMNIj_o-mgC4,9490
29
+ anndata/_io/utils.py,sha256=RqD5JAiGtfQmxxsmr3GSg4i0Oq2XckK1MhttjdsoFLM,10309
30
30
  anndata/_io/write.py,sha256=r55w6yPIIuUSLW9wyYL8GnkzHHQdAxy6xiCEw9cAC38,4811
31
- anndata/_io/zarr.py,sha256=Z996SZ8LV1Fpa_q8o70vHnBzNLOLlVjhf_Rs5EM_Slo,5461
31
+ anndata/_io/zarr.py,sha256=k0hhYKxf7FwgpYUyDFwfpLOrWTjx5BDW1XUX6c8xhcs,5813
32
32
  anndata/_io/specs/__init__.py,sha256=Z6l8xqa7B480U3pqrNIg4-fhUvpBW85w4xA3i3maAUM,427
33
- anndata/_io/specs/lazy_methods.py,sha256=aCdmmYLrOHlMyT18t3sLE2I51YGT-jDna2F3m7b_kv0,13093
34
- anndata/_io/specs/methods.py,sha256=awmdbUMAP9Xjkid56LAbyWNQfKcCOrkx0BeQ6CDKek4,46422
33
+ anndata/_io/specs/lazy_methods.py,sha256=ueV9ICJ87a-mY3fTTaYd98ug7JwOh4dfJY4bQj_HU8c,13055
34
+ anndata/_io/specs/methods.py,sha256=jB0qq-f4UxppfbvhoFAbYVBE6Nz3u-UIfFuqM7BwOiw,46870
35
35
  anndata/_io/specs/registry.py,sha256=6Z_ffk3uOIagzRPcDCvEoszcgD-U3n8wYnGiPA71ZeI,17539
36
- anndata/compat/__init__.py,sha256=9696gHdOUz2yKih9epmT8WGSr6UX0pI8dJYTrqn0SJQ,14968
36
+ anndata/compat/__init__.py,sha256=fvdnMtf7mhkK5nPXvWvQI-H7mWb016sKqVJ4pEVKUL4,15959
37
37
  anndata/experimental/__init__.py,sha256=polIxriEkby0iEqw-IXkUzp8k0wp92BpYY4zl4BsHH0,1648
38
38
  anndata/experimental/_dispatch_io.py,sha256=gb9JUcgS1cIERjxM1PBpWDXfPkKgMevoLF0QInZfC-g,1858
39
- anndata/experimental/merge.py,sha256=gWBS4HSkG8E3seIs2AS7jqqFc0Zp6JW94KWtNXApafg,24882
39
+ anndata/experimental/merge.py,sha256=b9rrAtE0t5UzcUulc9mXH9u7RW68p_SYIFPJOqUxSNY,25120
40
40
  anndata/experimental/backed/__init__.py,sha256=4dc9M_-_SlfUidDrbWt8PRyD_8bYjypHJ86IpdThHus,230
41
41
  anndata/experimental/backed/_compat.py,sha256=rM7CnSJEZCko5wPBFRfvZA9ZKUSpaOVcWFy5u09p1go,519
42
- anndata/experimental/backed/_io.py,sha256=YM5FL6sKdLyQTHUa43cF0pDNbyj2xD9X7lzUiQesV20,6681
43
- anndata/experimental/backed/_lazy_arrays.py,sha256=8vcu7eyoRRlzNXyAzuY0s9CqEZCOAYoZIo-iI5d71_g,7805
42
+ anndata/experimental/backed/_io.py,sha256=fG_KkGVxnqK0VukiMGYHSKasSiurFLKeWqyKftJnblw,6861
43
+ anndata/experimental/backed/_lazy_arrays.py,sha256=mwcovT31AoXdVfoeyoNzmqCXL1SSmKF33hYK8ftUxM0,7509
44
44
  anndata/experimental/multi_files/__init__.py,sha256=T7iNLlRbe-KnLT3o7Tb7_nE4Iy_hLkG66UjBOvj2Bj8,107
45
45
  anndata/experimental/multi_files/_anncollection.py,sha256=Ra8A4MzyFWlid5RJd0cc2d4SJeSZ2HXz3odKSqAbChw,35264
46
46
  anndata/experimental/pytorch/__init__.py,sha256=4CkgrahLO8Kc-s2bmv6lVQfDxbO3IUyV0v4ygBDkttY,95
47
47
  anndata/experimental/pytorch/_annloader.py,sha256=7mpsFV5vBfxKIje1cPjahtDZ5afkU-H663XB4FJhmok,8075
48
48
  anndata/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
- anndata/tests/helpers.py,sha256=BORIeSbcD0R_PDzi1IeR252it-aq6bL8fGN-bDR-Q1I,37689
49
+ anndata/tests/helpers.py,sha256=9Tpt8QMZig7ggMFQGI2hDZE2u6IxQtVOEJXDZB3j1Ao,37638
50
50
  testing/anndata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  testing/anndata/_doctest.py,sha256=Qew0N0zLLNiPKN1CLunqY5cTinFLaEhY5GagiYfm6KI,344
52
52
  testing/anndata/_pytest.py,sha256=C_R-N2x9NHKZ66YLkvMLWkXQG1WiouOkBnLQpYx_62Q,3994
53
53
  testing/anndata/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
- anndata-0.12.7.dist-info/METADATA,sha256=LQgLMW_q9Q4V4k0yBNpYNCPnPSUCxzyubwSDxR_RqTE,9939
55
- anndata-0.12.7.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
56
- anndata-0.12.7.dist-info/licenses/LICENSE,sha256=VcrXoEVMhtNuvMvKYGP-I5lMT8qZ_6dFf22fsL180qA,1575
57
- anndata-0.12.7.dist-info/RECORD,,
54
+ anndata-0.12.8.dist-info/METADATA,sha256=rlyz7u-gJSRMTRJSDWO-wGdsBzt7L59gVK__C_BAqko,9931
55
+ anndata-0.12.8.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
56
+ anndata-0.12.8.dist-info/licenses/LICENSE,sha256=VcrXoEVMhtNuvMvKYGP-I5lMT8qZ_6dFf22fsL180qA,1575
57
+ anndata-0.12.8.dist-info/RECORD,,