anndata 0.12.2__py3-none-any.whl → 0.12.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anndata/__init__.py +23 -18
- anndata/_core/aligned_df.py +7 -0
- anndata/_core/anndata.py +8 -7
- anndata/_core/index.py +136 -23
- anndata/_core/merge.py +34 -44
- anndata/_core/sparse_dataset.py +12 -11
- anndata/_core/views.py +1 -1
- anndata/_io/h5ad.py +18 -27
- anndata/_io/specs/lazy_methods.py +1 -1
- anndata/_io/specs/methods.py +49 -65
- anndata/_io/specs/registry.py +17 -20
- anndata/_io/utils.py +2 -7
- anndata/_io/zarr.py +16 -7
- anndata/_settings.py +8 -0
- anndata/_settings.pyi +1 -0
- anndata/compat/__init__.py +3 -11
- anndata/experimental/backed/_lazy_arrays.py +5 -2
- anndata/experimental/merge.py +86 -50
- anndata/experimental/multi_files/_anncollection.py +2 -2
- {anndata-0.12.2.dist-info → anndata-0.12.4.dist-info}/METADATA +6 -7
- {anndata-0.12.2.dist-info → anndata-0.12.4.dist-info}/RECORD +24 -25
- testing/anndata/_pytest.py +2 -6
- anndata/_version.py +0 -62
- {anndata-0.12.2.dist-info → anndata-0.12.4.dist-info}/WHEEL +0 -0
- {anndata-0.12.2.dist-info → anndata-0.12.4.dist-info}/licenses/LICENSE +0 -0
anndata/__init__.py
CHANGED
|
@@ -12,7 +12,6 @@ from ._core.extensions import register_anndata_namespace
|
|
|
12
12
|
from ._core.merge import concat
|
|
13
13
|
from ._core.raw import Raw
|
|
14
14
|
from ._settings import settings
|
|
15
|
-
from ._version import __version__
|
|
16
15
|
from ._warnings import (
|
|
17
16
|
ExperimentalFeatureWarning,
|
|
18
17
|
ImplicitModificationWarning,
|
|
@@ -28,22 +27,6 @@ from . import abc, experimental, typing, io, types # isort: skip
|
|
|
28
27
|
# We use these in tests by attribute access
|
|
29
28
|
from . import logging # noqa: F401 # isort: skip
|
|
30
29
|
|
|
31
|
-
_DEPRECATED_IO = (
|
|
32
|
-
"read_loom",
|
|
33
|
-
"read_hdf",
|
|
34
|
-
"read_excel",
|
|
35
|
-
"read_umi_tools",
|
|
36
|
-
"read_csv",
|
|
37
|
-
"read_text",
|
|
38
|
-
"read_mtx",
|
|
39
|
-
)
|
|
40
|
-
_DEPRECATED = {method: f"io.{method}" for method in _DEPRECATED_IO}
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def __getattr__(attr_name: str) -> Any:
|
|
44
|
-
return module_get_attr_redirect(attr_name, deprecated_mapping=_DEPRECATED)
|
|
45
|
-
|
|
46
|
-
|
|
47
30
|
__all__ = [
|
|
48
31
|
"AnnData",
|
|
49
32
|
"ExperimentalFeatureWarning",
|
|
@@ -51,7 +34,6 @@ __all__ = [
|
|
|
51
34
|
"OldFormatWarning",
|
|
52
35
|
"Raw",
|
|
53
36
|
"WriteWarning",
|
|
54
|
-
"__version__",
|
|
55
37
|
"abc",
|
|
56
38
|
"concat",
|
|
57
39
|
"experimental",
|
|
@@ -63,3 +45,26 @@ __all__ = [
|
|
|
63
45
|
"types",
|
|
64
46
|
"typing",
|
|
65
47
|
]
|
|
48
|
+
|
|
49
|
+
_DEPRECATED_IO = (
|
|
50
|
+
"read_loom",
|
|
51
|
+
"read_hdf",
|
|
52
|
+
"read_excel",
|
|
53
|
+
"read_umi_tools",
|
|
54
|
+
"read_csv",
|
|
55
|
+
"read_text",
|
|
56
|
+
"read_mtx",
|
|
57
|
+
)
|
|
58
|
+
_DEPRECATED = {method: f"io.{method}" for method in _DEPRECATED_IO}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def __getattr__(attr_name: str) -> Any:
|
|
62
|
+
if attr_name == "__version__":
|
|
63
|
+
import warnings
|
|
64
|
+
from importlib.metadata import version
|
|
65
|
+
|
|
66
|
+
msg = "`__version__` is deprecated, use `importlib.metadata.version('anndata')` instead."
|
|
67
|
+
warnings.warn(msg, FutureWarning, stacklevel=2)
|
|
68
|
+
return version("anndata")
|
|
69
|
+
|
|
70
|
+
return module_get_attr_redirect(attr_name, deprecated_mapping=_DEPRECATED)
|
anndata/_core/aligned_df.py
CHANGED
|
@@ -78,6 +78,13 @@ def _gen_dataframe_df(
|
|
|
78
78
|
attr: Literal["obs", "var"],
|
|
79
79
|
length: int | None = None,
|
|
80
80
|
):
|
|
81
|
+
if isinstance(anno.index, pd.MultiIndex):
|
|
82
|
+
msg = (
|
|
83
|
+
"pandas.MultiIndex not supported as index for obs or var on declaration.\n\
|
|
84
|
+
You can set `obs_names` manually although most operations after will error or convert to str.\n\
|
|
85
|
+
This behavior will likely be clarified in a future breaking release."
|
|
86
|
+
)
|
|
87
|
+
raise ValueError(msg)
|
|
81
88
|
if length is not None and length != len(anno):
|
|
82
89
|
raise _mk_df_error(source, attr, length, len(anno))
|
|
83
90
|
anno = anno.copy(deep=False)
|
anndata/_core/anndata.py
CHANGED
|
@@ -42,11 +42,7 @@ from .index import _normalize_indices, _subset, get_vector
|
|
|
42
42
|
from .raw import Raw
|
|
43
43
|
from .sparse_dataset import BaseCompressedSparseDataset, sparse_dataset
|
|
44
44
|
from .storage import coerce_array
|
|
45
|
-
from .views import
|
|
46
|
-
DictView,
|
|
47
|
-
_resolve_idxs,
|
|
48
|
-
as_view,
|
|
49
|
-
)
|
|
45
|
+
from .views import DictView, _resolve_idxs, as_view
|
|
50
46
|
from .xarray import Dataset2D
|
|
51
47
|
|
|
52
48
|
if TYPE_CHECKING:
|
|
@@ -940,22 +936,27 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): # noqa: PLW1641
|
|
|
940
936
|
Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`.
|
|
941
937
|
"""
|
|
942
938
|
|
|
939
|
+
@deprecated("obs (e.g. `k in adata.obs` or `str(adata.obs.columns.tolist())`)")
|
|
943
940
|
def obs_keys(self) -> list[str]:
|
|
944
941
|
"""List keys of observation annotation :attr:`obs`."""
|
|
945
942
|
return self._obs.keys().tolist()
|
|
946
943
|
|
|
944
|
+
@deprecated("var (e.g. `k in adata.var` or `str(adata.var.columns.tolist())`)")
|
|
947
945
|
def var_keys(self) -> list[str]:
|
|
948
946
|
"""List keys of variable annotation :attr:`var`."""
|
|
949
947
|
return self._var.keys().tolist()
|
|
950
948
|
|
|
949
|
+
@deprecated("obsm (e.g. `k in adata.obsm` or `adata.obsm.keys() | {'u'}`)")
|
|
951
950
|
def obsm_keys(self) -> list[str]:
|
|
952
951
|
"""List keys of observation annotation :attr:`obsm`."""
|
|
953
952
|
return list(self.obsm.keys())
|
|
954
953
|
|
|
954
|
+
@deprecated("varm (e.g. `k in adata.varm` or `adata.varm.keys() | {'u'}`)")
|
|
955
955
|
def varm_keys(self) -> list[str]:
|
|
956
956
|
"""List keys of variable annotation :attr:`varm`."""
|
|
957
957
|
return list(self.varm.keys())
|
|
958
958
|
|
|
959
|
+
@deprecated("uns (e.g. `k in adata.uns` or `sorted(adata.uns)`)")
|
|
959
960
|
def uns_keys(self) -> list[str]:
|
|
960
961
|
"""List keys of unstructured annotation."""
|
|
961
962
|
return sorted(self._uns.keys())
|
|
@@ -1907,8 +1908,8 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): # noqa: PLW1641
|
|
|
1907
1908
|
compression_opts=compression_opts,
|
|
1908
1909
|
as_dense=as_dense,
|
|
1909
1910
|
)
|
|
1910
|
-
|
|
1911
|
-
if self.isbacked:
|
|
1911
|
+
# Only reset the filename if the AnnData object now points to a complete new copy
|
|
1912
|
+
if self.isbacked and not self.is_view:
|
|
1912
1913
|
self.file.filename = filename
|
|
1913
1914
|
|
|
1914
1915
|
write = write_h5ad # a shortcut and backwards compat
|
anndata/_core/index.py
CHANGED
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
from collections.abc import Iterable, Sequence
|
|
4
4
|
from functools import singledispatch
|
|
5
5
|
from itertools import repeat
|
|
6
|
-
from typing import TYPE_CHECKING
|
|
6
|
+
from typing import TYPE_CHECKING, cast, overload
|
|
7
7
|
|
|
8
8
|
import h5py
|
|
9
9
|
import numpy as np
|
|
@@ -14,6 +14,8 @@ from ..compat import AwkArray, CSArray, CSMatrix, DaskArray, XDataArray
|
|
|
14
14
|
from .xarray import Dataset2D
|
|
15
15
|
|
|
16
16
|
if TYPE_CHECKING:
|
|
17
|
+
from numpy.typing import NDArray
|
|
18
|
+
|
|
17
19
|
from ..compat import Index, Index1D, Index1DNorm
|
|
18
20
|
|
|
19
21
|
|
|
@@ -161,7 +163,10 @@ def unpack_index(index: Index) -> tuple[Index1D, Index1D]:
|
|
|
161
163
|
|
|
162
164
|
|
|
163
165
|
@singledispatch
|
|
164
|
-
def _subset(
|
|
166
|
+
def _subset(
|
|
167
|
+
a: np.ndarray | pd.DataFrame,
|
|
168
|
+
subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm],
|
|
169
|
+
):
|
|
165
170
|
# Select as combination of indexes, not coordinates
|
|
166
171
|
# Correcting for indexing behaviour of np.ndarray
|
|
167
172
|
if all(isinstance(x, Iterable) for x in subset_idx):
|
|
@@ -170,7 +175,9 @@ def _subset(a: np.ndarray | pd.DataFrame, subset_idx: Index):
|
|
|
170
175
|
|
|
171
176
|
|
|
172
177
|
@_subset.register(DaskArray)
|
|
173
|
-
def _subset_dask(
|
|
178
|
+
def _subset_dask(
|
|
179
|
+
a: DaskArray, subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm]
|
|
180
|
+
):
|
|
174
181
|
if len(subset_idx) > 1 and all(isinstance(x, Iterable) for x in subset_idx):
|
|
175
182
|
if issparse(a._meta) and a._meta.format == "csc":
|
|
176
183
|
return a[:, subset_idx[1]][subset_idx[0], :]
|
|
@@ -180,24 +187,32 @@ def _subset_dask(a: DaskArray, subset_idx: Index):
|
|
|
180
187
|
|
|
181
188
|
@_subset.register(CSMatrix)
|
|
182
189
|
@_subset.register(CSArray)
|
|
183
|
-
def _subset_sparse(
|
|
190
|
+
def _subset_sparse(
|
|
191
|
+
a: CSMatrix | CSArray,
|
|
192
|
+
subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm],
|
|
193
|
+
):
|
|
184
194
|
# Correcting for indexing behaviour of sparse.spmatrix
|
|
185
195
|
if len(subset_idx) > 1 and all(isinstance(x, Iterable) for x in subset_idx):
|
|
186
196
|
first_idx = subset_idx[0]
|
|
187
197
|
if issubclass(first_idx.dtype.type, np.bool_):
|
|
188
|
-
first_idx = np.
|
|
198
|
+
first_idx = np.flatnonzero(first_idx)
|
|
189
199
|
subset_idx = (first_idx.reshape(-1, 1), *subset_idx[1:])
|
|
190
200
|
return a[subset_idx]
|
|
191
201
|
|
|
192
202
|
|
|
193
203
|
@_subset.register(pd.DataFrame)
|
|
194
204
|
@_subset.register(Dataset2D)
|
|
195
|
-
def _subset_df(
|
|
205
|
+
def _subset_df(
|
|
206
|
+
df: pd.DataFrame | Dataset2D,
|
|
207
|
+
subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm],
|
|
208
|
+
):
|
|
196
209
|
return df.iloc[subset_idx]
|
|
197
210
|
|
|
198
211
|
|
|
199
212
|
@_subset.register(AwkArray)
|
|
200
|
-
def _subset_awkarray(
|
|
213
|
+
def _subset_awkarray(
|
|
214
|
+
a: AwkArray, subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm]
|
|
215
|
+
):
|
|
201
216
|
if all(isinstance(x, Iterable) for x in subset_idx):
|
|
202
217
|
subset_idx = np.ix_(*subset_idx)
|
|
203
218
|
return a[subset_idx]
|
|
@@ -205,23 +220,121 @@ def _subset_awkarray(a: AwkArray, subset_idx: Index):
|
|
|
205
220
|
|
|
206
221
|
# Registration for SparseDataset occurs in sparse_dataset.py
|
|
207
222
|
@_subset.register(h5py.Dataset)
|
|
208
|
-
def _subset_dataset(
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
223
|
+
def _subset_dataset(
|
|
224
|
+
d: h5py.Dataset, subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm]
|
|
225
|
+
):
|
|
226
|
+
order: tuple[NDArray[np.integer] | slice, ...]
|
|
227
|
+
inv_order: tuple[NDArray[np.integer] | slice, ...]
|
|
228
|
+
order, inv_order = zip(*map(_index_order_and_inverse, subset_idx), strict=True)
|
|
229
|
+
# check for duplicates or multi-dimensional fancy indexing
|
|
230
|
+
array_dims = [i for i in order if isinstance(i, np.ndarray)]
|
|
231
|
+
has_duplicates = any(len(np.unique(i)) != len(i) for i in array_dims)
|
|
232
|
+
# Use safe indexing if there are duplicates OR multiple array dimensions
|
|
233
|
+
# (h5py doesn't support multi-dimensional fancy indexing natively)
|
|
234
|
+
if has_duplicates or len(array_dims) > 1:
|
|
235
|
+
# For multi-dimensional indexing, bypass the sorting logic and use original indices
|
|
236
|
+
return _safe_fancy_index_h5py(d, subset_idx)
|
|
220
237
|
# from hdf5, then to real order
|
|
221
|
-
return d[
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
238
|
+
return d[order][inv_order]
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
@overload
|
|
242
|
+
def _index_order_and_inverse(
|
|
243
|
+
axis_idx: NDArray[np.integer] | NDArray[np.bool_],
|
|
244
|
+
) -> tuple[NDArray[np.integer], NDArray[np.integer]]: ...
|
|
245
|
+
@overload
|
|
246
|
+
def _index_order_and_inverse(axis_idx: slice) -> tuple[slice, slice]: ...
|
|
247
|
+
def _index_order_and_inverse(
|
|
248
|
+
axis_idx: Index1DNorm,
|
|
249
|
+
) -> tuple[Index1DNorm, NDArray[np.integer] | slice]:
|
|
250
|
+
"""Order and get inverse index array."""
|
|
251
|
+
if not isinstance(axis_idx, np.ndarray):
|
|
252
|
+
return axis_idx, slice(None)
|
|
253
|
+
if axis_idx.dtype == bool:
|
|
254
|
+
axis_idx = np.flatnonzero(axis_idx)
|
|
255
|
+
order = np.argsort(axis_idx)
|
|
256
|
+
return axis_idx[order], np.argsort(order)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
@overload
|
|
260
|
+
def _process_index_for_h5py(
|
|
261
|
+
idx: NDArray[np.integer] | NDArray[np.bool_],
|
|
262
|
+
) -> tuple[NDArray[np.integer], NDArray[np.integer]]: ...
|
|
263
|
+
@overload
|
|
264
|
+
def _process_index_for_h5py(idx: slice) -> tuple[slice, None]: ...
|
|
265
|
+
def _process_index_for_h5py(
|
|
266
|
+
idx: Index1DNorm,
|
|
267
|
+
) -> tuple[Index1DNorm, NDArray[np.integer] | None]:
|
|
268
|
+
"""Process a single index for h5py compatibility, handling sorting and duplicates."""
|
|
269
|
+
if not isinstance(idx, np.ndarray):
|
|
270
|
+
# Not an array (slice, integer, list) - no special processing needed
|
|
271
|
+
return idx, None
|
|
272
|
+
|
|
273
|
+
if idx.dtype == bool:
|
|
274
|
+
idx = np.flatnonzero(idx)
|
|
275
|
+
|
|
276
|
+
# For h5py fancy indexing, we need sorted indices
|
|
277
|
+
# But we also need to track how to reverse the sorting
|
|
278
|
+
unique, inverse = np.unique(idx, return_inverse=True)
|
|
279
|
+
return (
|
|
280
|
+
# Has duplicates - use unique + inverse mapping approach
|
|
281
|
+
(unique, inverse)
|
|
282
|
+
if len(unique) != len(idx)
|
|
283
|
+
# No duplicates - just sort and track reverse mapping
|
|
284
|
+
else _index_order_and_inverse(idx)
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def _safe_fancy_index_h5py(
|
|
289
|
+
dataset: h5py.Dataset,
|
|
290
|
+
subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm],
|
|
291
|
+
) -> h5py.Dataset:
|
|
292
|
+
# Handle multi-dimensional indexing of h5py dataset
|
|
293
|
+
# This avoids h5py's limitation with multi-dimensional fancy indexing
|
|
294
|
+
# without loading the entire dataset into memory
|
|
295
|
+
|
|
296
|
+
# Convert boolean arrays to integer arrays and handle sorting for h5py
|
|
297
|
+
processed_indices: tuple[NDArray[np.integer] | slice, ...]
|
|
298
|
+
reverse_indices: tuple[NDArray[np.integer] | None, ...]
|
|
299
|
+
processed_indices, reverse_indices = zip(
|
|
300
|
+
*map(_process_index_for_h5py, subset_idx), strict=True
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
# First find the index that reduces the size of the dataset the most
|
|
304
|
+
i_min = np.argmin([
|
|
305
|
+
_get_index_size(inds, dataset.shape[i]) / dataset.shape[i]
|
|
306
|
+
for i, inds in enumerate(processed_indices)
|
|
307
|
+
])
|
|
308
|
+
|
|
309
|
+
# Apply the most selective index first to h5py dataset
|
|
310
|
+
first_index = [slice(None)] * len(processed_indices)
|
|
311
|
+
first_index[i_min] = processed_indices[i_min]
|
|
312
|
+
in_memory_array = cast("np.ndarray", dataset[tuple(first_index)])
|
|
313
|
+
|
|
314
|
+
# Apply remaining indices to the numpy array
|
|
315
|
+
remaining_indices = list(processed_indices)
|
|
316
|
+
remaining_indices[i_min] = slice(None) # Already applied
|
|
317
|
+
result = in_memory_array[tuple(remaining_indices)]
|
|
318
|
+
|
|
319
|
+
# Now apply reverse mappings to get the original order
|
|
320
|
+
for dim, reverse_map in enumerate(reverse_indices):
|
|
321
|
+
if reverse_map is not None:
|
|
322
|
+
result = result.take(reverse_map, axis=dim)
|
|
323
|
+
|
|
324
|
+
return result
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def _get_index_size(idx: Index1DNorm, dim_size: int) -> int:
|
|
328
|
+
"""Get size for any index type."""
|
|
329
|
+
if isinstance(idx, slice):
|
|
330
|
+
return len(range(*idx.indices(dim_size)))
|
|
331
|
+
elif isinstance(idx, int):
|
|
332
|
+
return 1
|
|
333
|
+
else: # For other types, try to get length
|
|
334
|
+
return len(idx)
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def make_slice(idx, dimidx: int, n: int = 2) -> tuple[slice, ...]:
|
|
225
338
|
mut = list(repeat(slice(None), n))
|
|
226
339
|
mut[dimidx] = idx
|
|
227
340
|
return tuple(mut)
|
anndata/_core/merge.py
CHANGED
|
@@ -14,9 +14,7 @@ from warnings import warn
|
|
|
14
14
|
|
|
15
15
|
import numpy as np
|
|
16
16
|
import pandas as pd
|
|
17
|
-
import scipy
|
|
18
17
|
from natsort import natsorted
|
|
19
|
-
from packaging.version import Version
|
|
20
18
|
from scipy import sparse
|
|
21
19
|
|
|
22
20
|
from anndata._core.file_backing import to_memory
|
|
@@ -30,7 +28,6 @@ from ..compat import (
|
|
|
30
28
|
CupyCSRMatrix,
|
|
31
29
|
CupySparseMatrix,
|
|
32
30
|
DaskArray,
|
|
33
|
-
_map_cat_to_str,
|
|
34
31
|
)
|
|
35
32
|
from ..utils import asarray, axis_len, warn_once
|
|
36
33
|
from .anndata import AnnData
|
|
@@ -41,6 +38,7 @@ if TYPE_CHECKING:
|
|
|
41
38
|
from collections.abc import Collection, Generator, Iterable, Sequence
|
|
42
39
|
from typing import Any
|
|
43
40
|
|
|
41
|
+
from numpy.typing import NDArray
|
|
44
42
|
from pandas.api.extensions import ExtensionDtype
|
|
45
43
|
|
|
46
44
|
from anndata._types import Join_T
|
|
@@ -146,11 +144,16 @@ def equal_dask_array(a, b) -> bool:
|
|
|
146
144
|
return False
|
|
147
145
|
if isinstance(b, DaskArray) and tokenize(a) == tokenize(b):
|
|
148
146
|
return True
|
|
149
|
-
if isinstance(a._meta,
|
|
147
|
+
if isinstance(a._meta, np.ndarray):
|
|
148
|
+
return da.equal(a, b, where=~(da.isnan(a) & da.isnan(b))).all().compute()
|
|
149
|
+
if a.chunksize == b.chunksize and isinstance(
|
|
150
|
+
a._meta, CupySparseMatrix | CSMatrix | CSArray
|
|
151
|
+
):
|
|
150
152
|
# TODO: Maybe also do this in the other case?
|
|
151
153
|
return da.map_blocks(equal, a, b, drop_axis=(0, 1)).all()
|
|
152
|
-
|
|
153
|
-
|
|
154
|
+
msg = "Misaligned chunks detected when checking for merge equality of dask arrays. Reading full arrays into memory."
|
|
155
|
+
warn(msg, UserWarning, stacklevel=3)
|
|
156
|
+
return equal(a.compute(), b.compute())
|
|
154
157
|
|
|
155
158
|
|
|
156
159
|
@equal.register(np.ndarray)
|
|
@@ -185,15 +188,6 @@ def equal_sparse(a, b) -> bool:
|
|
|
185
188
|
# Comparison broken for CSC matrices
|
|
186
189
|
# https://github.com/cupy/cupy/issues/7757
|
|
187
190
|
a, b = CupyCSRMatrix(a), CupyCSRMatrix(b)
|
|
188
|
-
if Version(scipy.__version__) >= Version("1.16.0rc1"):
|
|
189
|
-
# TODO: https://github.com/scipy/scipy/issues/23068
|
|
190
|
-
return bool(
|
|
191
|
-
a.format == b.format
|
|
192
|
-
and (a.shape == b.shape)
|
|
193
|
-
and np.all(a.indptr == b.indptr)
|
|
194
|
-
and np.all(a.indices == b.indices)
|
|
195
|
-
and np.all((a.data == b.data) | (np.isnan(a.data) & np.isnan(b.data)))
|
|
196
|
-
)
|
|
197
191
|
comp = a != b
|
|
198
192
|
if isinstance(comp, bool):
|
|
199
193
|
return not comp
|
|
@@ -560,7 +554,7 @@ class Reindexer:
|
|
|
560
554
|
Together with `old_pos` this forms a mapping.
|
|
561
555
|
"""
|
|
562
556
|
|
|
563
|
-
def __init__(self, old_idx, new_idx):
|
|
557
|
+
def __init__(self, old_idx: pd.Index, new_idx: pd.Index) -> None:
|
|
564
558
|
self.old_idx = old_idx
|
|
565
559
|
self.new_idx = new_idx
|
|
566
560
|
self.no_change = new_idx.equals(old_idx)
|
|
@@ -617,6 +611,9 @@ class Reindexer:
|
|
|
617
611
|
sub_el = _subset(el, make_slice(indexer, axis, len(shape)))
|
|
618
612
|
|
|
619
613
|
if any(indexer == -1):
|
|
614
|
+
# TODO: Remove this condition once https://github.com/dask/dask/pull/12078 is released
|
|
615
|
+
if isinstance(sub_el._meta, CSArray | CSMatrix) and np.isscalar(fill_value):
|
|
616
|
+
fill_value = np.array([[fill_value]])
|
|
620
617
|
sub_el[make_slice(indexer == -1, axis, len(shape))] = fill_value
|
|
621
618
|
|
|
622
619
|
return sub_el
|
|
@@ -757,7 +754,7 @@ class Reindexer:
|
|
|
757
754
|
return el[self.idx]
|
|
758
755
|
|
|
759
756
|
@property
|
|
760
|
-
def idx(self):
|
|
757
|
+
def idx(self) -> NDArray[np.intp]:
|
|
761
758
|
return self.old_idx.get_indexer(self.new_idx)
|
|
762
759
|
|
|
763
760
|
|
|
@@ -786,7 +783,7 @@ def default_fill_value(els):
|
|
|
786
783
|
return np.nan
|
|
787
784
|
|
|
788
785
|
|
|
789
|
-
def gen_reindexer(new_var: pd.Index, cur_var: pd.Index):
|
|
786
|
+
def gen_reindexer(new_var: pd.Index, cur_var: pd.Index) -> Reindexer:
|
|
790
787
|
"""
|
|
791
788
|
Given a new set of var_names, and a current set, generates a function which will reindex
|
|
792
789
|
a matrix to be aligned with the new set.
|
|
@@ -943,7 +940,7 @@ def inner_concat_aligned_mapping(
|
|
|
943
940
|
return result
|
|
944
941
|
|
|
945
942
|
|
|
946
|
-
def gen_inner_reindexers(els, new_index, axis: Literal[0, 1] = 0):
|
|
943
|
+
def gen_inner_reindexers(els, new_index, axis: Literal[0, 1] = 0) -> list[Reindexer]:
|
|
947
944
|
alt_axis = 1 - axis
|
|
948
945
|
if axis == 0:
|
|
949
946
|
df_indices = lambda x: x.columns
|
|
@@ -1020,7 +1017,7 @@ def missing_element(
|
|
|
1020
1017
|
axis: Literal[0, 1] = 0,
|
|
1021
1018
|
fill_value: Any | None = None,
|
|
1022
1019
|
off_axis_size: int = 0,
|
|
1023
|
-
) -> np.
|
|
1020
|
+
) -> NDArray[np.bool_] | DaskArray:
|
|
1024
1021
|
"""Generates value to use when there is a missing element."""
|
|
1025
1022
|
should_return_dask = any(isinstance(el, DaskArray) for el in els)
|
|
1026
1023
|
# 0 sized array for in-memory prevents allocating unnecessary memory while preserving broadcasting.
|
|
@@ -1643,7 +1640,7 @@ def concat( # noqa: PLR0912, PLR0913, PLR0915
|
|
|
1643
1640
|
)
|
|
1644
1641
|
if index_unique is not None:
|
|
1645
1642
|
concat_indices = concat_indices.str.cat(
|
|
1646
|
-
|
|
1643
|
+
label_col.map(str, na_action="ignore"), sep=index_unique
|
|
1647
1644
|
)
|
|
1648
1645
|
concat_indices = pd.Index(concat_indices)
|
|
1649
1646
|
|
|
@@ -1748,15 +1745,10 @@ def concat( # noqa: PLR0912, PLR0913, PLR0915
|
|
|
1748
1745
|
for r, a in zip(reindexers, adatas, strict=True)
|
|
1749
1746
|
],
|
|
1750
1747
|
)
|
|
1751
|
-
alt_pairwise = merge(
|
|
1752
|
-
|
|
1753
|
-
|
|
1754
|
-
|
|
1755
|
-
for k, v in getattr(a, f"{alt_axis_name}p").items()
|
|
1756
|
-
}
|
|
1757
|
-
for r, a in zip(reindexers, adatas, strict=True)
|
|
1758
|
-
]
|
|
1759
|
-
)
|
|
1748
|
+
alt_pairwise = merge([
|
|
1749
|
+
{k: r(r(v, axis=0), axis=1) for k, v in getattr(a, f"{alt_axis_name}p").items()}
|
|
1750
|
+
for r, a in zip(reindexers, adatas, strict=True)
|
|
1751
|
+
])
|
|
1760
1752
|
uns = uns_merge([a.uns for a in adatas])
|
|
1761
1753
|
|
|
1762
1754
|
raw = None
|
|
@@ -1785,17 +1777,15 @@ def concat( # noqa: PLR0912, PLR0913, PLR0915
|
|
|
1785
1777
|
"not concatenating `.raw` attributes."
|
|
1786
1778
|
)
|
|
1787
1779
|
warn(msg, UserWarning, stacklevel=2)
|
|
1788
|
-
return AnnData(
|
|
1789
|
-
|
|
1790
|
-
|
|
1791
|
-
|
|
1792
|
-
|
|
1793
|
-
|
|
1794
|
-
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
}
|
|
1801
|
-
)
|
|
1780
|
+
return AnnData(**{
|
|
1781
|
+
"X": X,
|
|
1782
|
+
"layers": layers,
|
|
1783
|
+
axis_name: concat_annot,
|
|
1784
|
+
alt_axis_name: alt_annot,
|
|
1785
|
+
f"{axis_name}m": concat_mapping,
|
|
1786
|
+
f"{alt_axis_name}m": alt_mapping,
|
|
1787
|
+
f"{axis_name}p": concat_pairwise,
|
|
1788
|
+
f"{alt_axis_name}p": alt_pairwise,
|
|
1789
|
+
"uns": uns,
|
|
1790
|
+
"raw": raw,
|
|
1791
|
+
})
|
anndata/_core/sparse_dataset.py
CHANGED
|
@@ -16,6 +16,7 @@ import warnings
|
|
|
16
16
|
from abc import ABC
|
|
17
17
|
from collections.abc import Iterable
|
|
18
18
|
from functools import cached_property
|
|
19
|
+
from importlib.metadata import version
|
|
19
20
|
from itertools import accumulate, chain, pairwise
|
|
20
21
|
from math import floor
|
|
21
22
|
from pathlib import Path
|
|
@@ -23,7 +24,6 @@ from typing import TYPE_CHECKING, NamedTuple
|
|
|
23
24
|
|
|
24
25
|
import h5py
|
|
25
26
|
import numpy as np
|
|
26
|
-
import scipy
|
|
27
27
|
import scipy.sparse as ss
|
|
28
28
|
from packaging.version import Version
|
|
29
29
|
from scipy.sparse import _sparsetools
|
|
@@ -48,13 +48,12 @@ if TYPE_CHECKING:
|
|
|
48
48
|
from scipy.sparse._compressed import _cs_matrix
|
|
49
49
|
|
|
50
50
|
from .._types import GroupStorageType
|
|
51
|
-
from ..compat import H5Array
|
|
52
|
-
from .index import Index, Index1D
|
|
51
|
+
from ..compat import H5Array, Index, Index1D, Index1DNorm
|
|
53
52
|
else:
|
|
54
53
|
from scipy.sparse import spmatrix as _cs_matrix
|
|
55
54
|
|
|
56
55
|
|
|
57
|
-
SCIPY_1_15 = Version(scipy
|
|
56
|
+
SCIPY_1_15 = Version(version("scipy")) >= Version("1.15rc0")
|
|
58
57
|
|
|
59
58
|
|
|
60
59
|
class BackedFormat(NamedTuple):
|
|
@@ -278,9 +277,9 @@ def get_compressed_vectors(
|
|
|
278
277
|
indptr_slices = [slice(*(x.indptr[i : i + 2])) for i in row_idxs]
|
|
279
278
|
# HDF5 cannot handle out-of-order integer indexing
|
|
280
279
|
if isinstance(x.data, ZarrArray):
|
|
281
|
-
as_np_indptr = np.concatenate(
|
|
282
|
-
|
|
283
|
-
)
|
|
280
|
+
as_np_indptr = np.concatenate([
|
|
281
|
+
np.arange(s.start, s.stop) for s in indptr_slices
|
|
282
|
+
])
|
|
284
283
|
data = x.data[as_np_indptr]
|
|
285
284
|
indices = x.indices[as_np_indptr]
|
|
286
285
|
else:
|
|
@@ -309,9 +308,9 @@ def get_compressed_vectors_for_slices(
|
|
|
309
308
|
start_indptr = indptr_indices[0] - next(offsets)
|
|
310
309
|
if len(slices) < 2: # there is only one slice so no need to concatenate
|
|
311
310
|
return data, indices, start_indptr
|
|
312
|
-
end_indptr = np.concatenate(
|
|
313
|
-
|
|
314
|
-
)
|
|
311
|
+
end_indptr = np.concatenate([
|
|
312
|
+
s[1:] - o for s, o in zip(indptr_indices[1:], offsets, strict=True)
|
|
313
|
+
])
|
|
315
314
|
indptr = np.concatenate([start_indptr, end_indptr])
|
|
316
315
|
return data, indices, indptr
|
|
317
316
|
|
|
@@ -738,5 +737,7 @@ def sparse_dataset(
|
|
|
738
737
|
|
|
739
738
|
|
|
740
739
|
@_subset.register(BaseCompressedSparseDataset)
|
|
741
|
-
def subset_sparsedataset(
|
|
740
|
+
def subset_sparsedataset(
|
|
741
|
+
d, subset_idx: tuple[Index1DNorm] | tuple[Index1DNorm, Index1DNorm]
|
|
742
|
+
):
|
|
742
743
|
return d[subset_idx]
|
anndata/_core/views.py
CHANGED
|
@@ -100,7 +100,7 @@ class _ViewMixin(_SetItemMixin):
|
|
|
100
100
|
|
|
101
101
|
# TODO: This makes `deepcopy(obj)` return `obj._view_args.parent._adata_ref`, fix it
|
|
102
102
|
def __deepcopy__(self, memo):
|
|
103
|
-
parent, attrname,
|
|
103
|
+
parent, attrname, _keys = self._view_args
|
|
104
104
|
return deepcopy(getattr(parent._adata_ref, attrname))
|
|
105
105
|
|
|
106
106
|
|
anndata/_io/h5ad.py
CHANGED
|
@@ -27,7 +27,6 @@ from ..experimental import read_dispatched
|
|
|
27
27
|
from .specs import read_elem, write_elem
|
|
28
28
|
from .specs.registry import IOSpec, write_spec
|
|
29
29
|
from .utils import (
|
|
30
|
-
H5PY_V3,
|
|
31
30
|
_read_legacy_raw,
|
|
32
31
|
idx_chunks_along_axis,
|
|
33
32
|
no_write_dataset_2d,
|
|
@@ -264,15 +263,13 @@ def read_h5ad(
|
|
|
264
263
|
|
|
265
264
|
def callback(func, elem_name: str, elem, iospec):
|
|
266
265
|
if iospec.encoding_type == "anndata" or elem_name.endswith("/"):
|
|
267
|
-
return AnnData(
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
}
|
|
275
|
-
)
|
|
266
|
+
return AnnData(**{
|
|
267
|
+
# This is covering up backwards compat in the anndata initializer
|
|
268
|
+
# In most cases we should be able to call `func(elen[k])` instead
|
|
269
|
+
k: read_dispatched(elem[k], callback)
|
|
270
|
+
for k in elem
|
|
271
|
+
if not k.startswith("raw.")
|
|
272
|
+
})
|
|
276
273
|
elif elem_name.startswith("/raw."):
|
|
277
274
|
return None
|
|
278
275
|
elif elem_name == "/X" and "X" in as_sparse:
|
|
@@ -326,16 +323,12 @@ def read_dataframe_legacy(dataset: h5py.Dataset) -> pd.DataFrame:
|
|
|
326
323
|
"Consider rewriting it."
|
|
327
324
|
)
|
|
328
325
|
warn(msg, OldFormatWarning, stacklevel=2)
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
_from_fixed_length_strings(dataset[()]), dtype=dataset.dtype
|
|
333
|
-
)
|
|
326
|
+
df = pd.DataFrame(
|
|
327
|
+
_decode_structured_array(
|
|
328
|
+
_from_fixed_length_strings(dataset[()]), dtype=dataset.dtype
|
|
334
329
|
)
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
df.set_index(df.columns[0], inplace=True)
|
|
338
|
-
return df
|
|
330
|
+
)
|
|
331
|
+
return df.set_index(df.columns[0])
|
|
339
332
|
|
|
340
333
|
|
|
341
334
|
def read_dataframe(group: h5py.Group | h5py.Dataset) -> pd.DataFrame:
|
|
@@ -348,10 +341,9 @@ def read_dataframe(group: h5py.Group | h5py.Dataset) -> pd.DataFrame:
|
|
|
348
341
|
|
|
349
342
|
@report_read_key_on_error
|
|
350
343
|
def read_dataset(dataset: h5py.Dataset):
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
dataset = dataset.asstr()
|
|
344
|
+
string_dtype = h5py.check_string_dtype(dataset.dtype)
|
|
345
|
+
if (string_dtype is not None) and (string_dtype.encoding == "utf-8"):
|
|
346
|
+
dataset = dataset.asstr()
|
|
355
347
|
value = dataset[()]
|
|
356
348
|
if not hasattr(value, "dtype"):
|
|
357
349
|
return value
|
|
@@ -364,10 +356,9 @@ def read_dataset(dataset: h5py.Dataset):
|
|
|
364
356
|
return value[0]
|
|
365
357
|
elif len(value.dtype.descr) > 1: # Compound dtype
|
|
366
358
|
# For backwards compat, now strings are written as variable length
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
value = _decode_structured_array(value, dtype=dtype)
|
|
359
|
+
value = _decode_structured_array(
|
|
360
|
+
_from_fixed_length_strings(value), dtype=value.dtype
|
|
361
|
+
)
|
|
371
362
|
if value.shape == ():
|
|
372
363
|
value = value[()]
|
|
373
364
|
return value
|