anndata 0.12.6__py3-none-any.whl → 0.12.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anndata/_core/aligned_df.py +5 -5
- anndata/_core/anndata.py +25 -15
- anndata/_core/merge.py +25 -13
- anndata/_core/sparse_dataset.py +11 -2
- anndata/_core/views.py +1 -0
- anndata/_core/xarray.py +31 -15
- anndata/_io/h5ad.py +3 -2
- anndata/_io/read.py +3 -3
- anndata/_io/specs/lazy_methods.py +14 -2
- anndata/_io/specs/methods.py +31 -31
- anndata/_settings.py +3 -3
- anndata/_types.py +2 -2
- anndata/compat/__init__.py +78 -10
- anndata/experimental/_dispatch_io.py +1 -4
- anndata/experimental/backed/_lazy_arrays.py +25 -19
- anndata/experimental/merge.py +67 -21
- anndata/experimental/multi_files/_anncollection.py +2 -2
- anndata/tests/helpers.py +4 -3
- anndata/utils.py +7 -7
- {anndata-0.12.6.dist-info → anndata-0.12.7.dist-info}/METADATA +5 -5
- {anndata-0.12.6.dist-info → anndata-0.12.7.dist-info}/RECORD +24 -24
- {anndata-0.12.6.dist-info → anndata-0.12.7.dist-info}/WHEEL +1 -1
- testing/anndata/_pytest.py +2 -1
- {anndata-0.12.6.dist-info → anndata-0.12.7.dist-info}/licenses/LICENSE +0 -0
anndata/_core/aligned_df.py
CHANGED
|
@@ -9,7 +9,7 @@ import pandas as pd
|
|
|
9
9
|
from pandas.api.types import is_string_dtype
|
|
10
10
|
|
|
11
11
|
from .._warnings import ImplicitModificationWarning
|
|
12
|
-
from ..compat import XDataset
|
|
12
|
+
from ..compat import XDataset, pandas_as_str
|
|
13
13
|
from .xarray import Dataset2D
|
|
14
14
|
|
|
15
15
|
if TYPE_CHECKING:
|
|
@@ -59,7 +59,7 @@ def _gen_dataframe_mapping(
|
|
|
59
59
|
df = pd.DataFrame(
|
|
60
60
|
anno,
|
|
61
61
|
index=None if length is None else mk_index(length),
|
|
62
|
-
columns=None if anno else [],
|
|
62
|
+
columns=None if anno else pd.array([], dtype="str"),
|
|
63
63
|
)
|
|
64
64
|
|
|
65
65
|
if length is None:
|
|
@@ -88,12 +88,12 @@ def _gen_dataframe_df(
|
|
|
88
88
|
if length is not None and length != len(anno):
|
|
89
89
|
raise _mk_df_error(source, attr, length, len(anno))
|
|
90
90
|
anno = anno.copy(deep=False)
|
|
91
|
-
if not is_string_dtype(anno.index):
|
|
91
|
+
if not is_string_dtype(anno.index[~anno.index.isna()]):
|
|
92
92
|
msg = "Transforming to str index."
|
|
93
93
|
warnings.warn(msg, ImplicitModificationWarning, stacklevel=2)
|
|
94
|
-
anno.index = anno.index
|
|
94
|
+
anno.index = pandas_as_str(anno.index)
|
|
95
95
|
if not len(anno.columns):
|
|
96
|
-
anno.columns = anno.columns
|
|
96
|
+
anno.columns = pandas_as_str(anno.columns)
|
|
97
97
|
return anno
|
|
98
98
|
|
|
99
99
|
|
anndata/_core/anndata.py
CHANGED
|
@@ -26,7 +26,14 @@ from anndata._warnings import ImplicitModificationWarning
|
|
|
26
26
|
|
|
27
27
|
from .. import utils
|
|
28
28
|
from .._settings import settings
|
|
29
|
-
from ..compat import
|
|
29
|
+
from ..compat import (
|
|
30
|
+
CSArray,
|
|
31
|
+
DaskArray,
|
|
32
|
+
ZarrArray,
|
|
33
|
+
_move_adj_mtx,
|
|
34
|
+
old_positionals,
|
|
35
|
+
pandas_as_str,
|
|
36
|
+
)
|
|
30
37
|
from ..logging import anndata_logger as logger
|
|
31
38
|
from ..utils import (
|
|
32
39
|
axis_len,
|
|
@@ -48,7 +55,7 @@ from .xarray import Dataset2D
|
|
|
48
55
|
if TYPE_CHECKING:
|
|
49
56
|
from collections.abc import Iterable
|
|
50
57
|
from os import PathLike
|
|
51
|
-
from typing import Any, ClassVar, Literal
|
|
58
|
+
from typing import Any, ClassVar, Literal, NoReturn
|
|
52
59
|
|
|
53
60
|
from zarr.storage import StoreLike
|
|
54
61
|
|
|
@@ -383,11 +390,11 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): # noqa: PLW1641
|
|
|
383
390
|
if obs is None:
|
|
384
391
|
obs = pd.DataFrame(index=X.index)
|
|
385
392
|
elif not isinstance(X.index, pd.RangeIndex):
|
|
386
|
-
x_indices.append(("obs", "index", X.index
|
|
393
|
+
x_indices.append(("obs", "index", pandas_as_str(X.index)))
|
|
387
394
|
if var is None:
|
|
388
395
|
var = pd.DataFrame(index=X.columns)
|
|
389
396
|
elif not isinstance(X.columns, pd.RangeIndex):
|
|
390
|
-
x_indices.append(("var", "columns", X.columns
|
|
397
|
+
x_indices.append(("var", "columns", pandas_as_str(X.columns)))
|
|
391
398
|
X = ensure_df_homogeneous(X, "X")
|
|
392
399
|
|
|
393
400
|
# ----------------------------------------------------------------------
|
|
@@ -790,7 +797,9 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): # noqa: PLW1641
|
|
|
790
797
|
)
|
|
791
798
|
raise ValueError(msg)
|
|
792
799
|
else:
|
|
793
|
-
value =
|
|
800
|
+
value = (
|
|
801
|
+
value if isinstance(value, pd.Index) else pandas_as_str(pd.Index(value))
|
|
802
|
+
)
|
|
794
803
|
if not isinstance(value.name, str | type(None)):
|
|
795
804
|
value.name = None
|
|
796
805
|
if (
|
|
@@ -1058,6 +1067,7 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): # noqa: PLW1641
|
|
|
1058
1067
|
if not isinstance(df_full[k].dtype, pd.CategoricalDtype):
|
|
1059
1068
|
continue
|
|
1060
1069
|
all_categories = df_full[k].cat.categories
|
|
1070
|
+
# TODO: this mode is going away
|
|
1061
1071
|
with pd.option_context("mode.chained_assignment", None):
|
|
1062
1072
|
df_sub[k] = df_sub[k].cat.remove_unused_categories()
|
|
1063
1073
|
# also correct the colors...
|
|
@@ -1627,8 +1637,8 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): # noqa: PLW1641
|
|
|
1627
1637
|
annoA-1 NaN 2.0 1.0 0.0
|
|
1628
1638
|
annoA-2 NaN 3.0 2.0 0.0
|
|
1629
1639
|
annoB-2 NaN 2.0 1.0 0.0
|
|
1630
|
-
>>> outer.var_names
|
|
1631
|
-
Index(['a', 'b', 'c', 'd'], dtype='
|
|
1640
|
+
>>> outer.var_names.astype("string")
|
|
1641
|
+
Index(['a', 'b', 'c', 'd'], dtype='string')
|
|
1632
1642
|
>>> outer.X
|
|
1633
1643
|
array([[ 1., 2., 3., nan],
|
|
1634
1644
|
[ 4., 5., 6., nan],
|
|
@@ -1710,8 +1720,8 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): # noqa: PLW1641
|
|
|
1710
1720
|
... dict(var_names=['d', 'c', 'b']),
|
|
1711
1721
|
... )
|
|
1712
1722
|
>>> adata = adata1.concatenate(adata2, adata3, join='outer')
|
|
1713
|
-
>>> adata.var_names
|
|
1714
|
-
Index(['a', 'b', 'c', 'd'], dtype='
|
|
1723
|
+
>>> adata.var_names.astype("string")
|
|
1724
|
+
Index(['a', 'b', 'c', 'd'], dtype='string')
|
|
1715
1725
|
>>> adata.X.toarray()
|
|
1716
1726
|
array([[0., 2., 3., 0.],
|
|
1717
1727
|
[0., 5., 6., 0.],
|
|
@@ -1779,25 +1789,25 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): # noqa: PLW1641
|
|
|
1779
1789
|
|
|
1780
1790
|
return out
|
|
1781
1791
|
|
|
1782
|
-
def var_names_make_unique(self, join: str = "-"):
|
|
1792
|
+
def var_names_make_unique(self, join: str = "-") -> None:
|
|
1783
1793
|
# Important to go through the setter so obsm dataframes are updated too
|
|
1784
1794
|
self.var_names = utils.make_index_unique(self.var.index, join)
|
|
1785
1795
|
|
|
1786
1796
|
var_names_make_unique.__doc__ = utils.make_index_unique.__doc__
|
|
1787
1797
|
|
|
1788
|
-
def obs_names_make_unique(self, join: str = "-"):
|
|
1798
|
+
def obs_names_make_unique(self, join: str = "-") -> None:
|
|
1789
1799
|
# Important to go through the setter so obsm dataframes are updated too
|
|
1790
1800
|
self.obs_names = utils.make_index_unique(self.obs.index, join)
|
|
1791
1801
|
|
|
1792
1802
|
obs_names_make_unique.__doc__ = utils.make_index_unique.__doc__
|
|
1793
1803
|
|
|
1794
|
-
def _check_uniqueness(self):
|
|
1795
|
-
if
|
|
1804
|
+
def _check_uniqueness(self) -> None:
|
|
1805
|
+
if self.obs.index[~self.obs.index.isna()].has_duplicates:
|
|
1796
1806
|
utils.warn_names_duplicates("obs")
|
|
1797
|
-
if
|
|
1807
|
+
if self.var.index[~self.var.index.isna()].has_duplicates:
|
|
1798
1808
|
utils.warn_names_duplicates("var")
|
|
1799
1809
|
|
|
1800
|
-
def __contains__(self, key: Any):
|
|
1810
|
+
def __contains__(self, key: Any) -> NoReturn:
|
|
1801
1811
|
msg = "AnnData has no attribute __contains__, don’t check `in adata`."
|
|
1802
1812
|
raise AttributeError(msg)
|
|
1803
1813
|
|
anndata/_core/merge.py
CHANGED
|
@@ -334,7 +334,11 @@ def try_unifying_dtype( # noqa PLR0911, PLR0912
|
|
|
334
334
|
if not pd.isnull(dtype) and len(dtype.categories) > 0
|
|
335
335
|
)
|
|
336
336
|
if same_orders:
|
|
337
|
-
return next(
|
|
337
|
+
return next(
|
|
338
|
+
dtype
|
|
339
|
+
for dtype in dtypes
|
|
340
|
+
if not pd.isnull(dtype) and len(dtype.categories) > 0
|
|
341
|
+
)
|
|
338
342
|
return object
|
|
339
343
|
# Boolean
|
|
340
344
|
elif all(pd.api.types.is_bool_dtype(dtype) or dtype is None for dtype in col):
|
|
@@ -958,8 +962,13 @@ def gen_inner_reindexers(els, new_index, axis: Literal[0, 1] = 0) -> list[Reinde
|
|
|
958
962
|
msg = "Cannot concatenate an AwkwardArray with other array types."
|
|
959
963
|
raise NotImplementedError(msg)
|
|
960
964
|
common_keys = intersect_keys(el.fields for el in els)
|
|
965
|
+
# TODO: replace dtype=object once this is fixed: https://github.com/scikit-hep/awkward/issues/3730
|
|
961
966
|
reindexers = [
|
|
962
|
-
Reindexer(
|
|
967
|
+
Reindexer(
|
|
968
|
+
pd.Index(el.fields, dtype=object),
|
|
969
|
+
pd.Index(list(common_keys), dtype=object),
|
|
970
|
+
)
|
|
971
|
+
for el in els
|
|
963
972
|
]
|
|
964
973
|
else:
|
|
965
974
|
min_ind = min(el.shape[alt_axis] for el in els)
|
|
@@ -1198,6 +1207,8 @@ def make_dask_col_from_extension_dtype(
|
|
|
1198
1207
|
A :class:`dask.Array`: representation of the column.
|
|
1199
1208
|
"""
|
|
1200
1209
|
import dask.array as da
|
|
1210
|
+
import xarray as xr
|
|
1211
|
+
from xarray.core.indexing import LazilyIndexedArray
|
|
1201
1212
|
|
|
1202
1213
|
from anndata._io.specs.lazy_methods import (
|
|
1203
1214
|
compute_chunk_layout_for_axis_size,
|
|
@@ -1205,7 +1216,6 @@ def make_dask_col_from_extension_dtype(
|
|
|
1205
1216
|
maybe_open_h5,
|
|
1206
1217
|
)
|
|
1207
1218
|
from anndata.compat import XDataArray
|
|
1208
|
-
from anndata.compat import xarray as xr
|
|
1209
1219
|
from anndata.experimental import read_elem_lazy
|
|
1210
1220
|
|
|
1211
1221
|
base_path_or_zarr_group = col.attrs.get("base_path_or_zarr_group")
|
|
@@ -1228,9 +1238,7 @@ def make_dask_col_from_extension_dtype(
|
|
|
1228
1238
|
# reopening is important to get around h5py's unserializable lock in processes
|
|
1229
1239
|
with maybe_open_h5(base_path_or_zarr_group, elem_name) as f:
|
|
1230
1240
|
v = read_elem_lazy(f)
|
|
1231
|
-
variable = xr.Variable(
|
|
1232
|
-
data=xr.core.indexing.LazilyIndexedArray(v), dims=dims
|
|
1233
|
-
)
|
|
1241
|
+
variable = xr.Variable(data=LazilyIndexedArray(v), dims=dims)
|
|
1234
1242
|
data_array = XDataArray(
|
|
1235
1243
|
variable,
|
|
1236
1244
|
coords=coords,
|
|
@@ -1323,9 +1331,10 @@ def concat_dataset2d_on_annot_axis(
|
|
|
1323
1331
|
-------
|
|
1324
1332
|
Concatenated :class:`~anndata.experimental.backed.Dataset2D`
|
|
1325
1333
|
"""
|
|
1334
|
+
import xarray as xr
|
|
1335
|
+
|
|
1326
1336
|
from anndata._core.xarray import Dataset2D
|
|
1327
1337
|
from anndata._io.specs.lazy_methods import DUMMY_RANGE_INDEX_KEY
|
|
1328
|
-
from anndata.compat import xarray as xr
|
|
1329
1338
|
|
|
1330
1339
|
annotations_re_indexed = []
|
|
1331
1340
|
have_backed = any(a.is_backed for a in annotations)
|
|
@@ -1525,15 +1534,18 @@ def concat( # noqa: PLR0912, PLR0913, PLR0915
|
|
|
1525
1534
|
>>> inner
|
|
1526
1535
|
AnnData object with n_obs × n_vars = 4 × 2
|
|
1527
1536
|
obs: 'group'
|
|
1528
|
-
>>> (
|
|
1529
|
-
(
|
|
1530
|
-
|
|
1537
|
+
>>> (
|
|
1538
|
+
... inner.obs_names.astype("string"),
|
|
1539
|
+
... inner.var_names.astype("string"),
|
|
1540
|
+
... ) # doctest: +NORMALIZE_WHITESPACE
|
|
1541
|
+
(Index(['s1', 's2', 's3', 's4'], dtype='string'),
|
|
1542
|
+
Index(['var1', 'var2'], dtype='string'))
|
|
1531
1543
|
>>> outer = ad.concat([a, b], join="outer") # Joining on union of variables
|
|
1532
1544
|
>>> outer
|
|
1533
1545
|
AnnData object with n_obs × n_vars = 4 × 3
|
|
1534
1546
|
obs: 'group', 'measure'
|
|
1535
|
-
>>> outer.var_names
|
|
1536
|
-
Index(['var1', 'var2', 'var3'], dtype='
|
|
1547
|
+
>>> outer.var_names.astype("string")
|
|
1548
|
+
Index(['var1', 'var2', 'var3'], dtype='string')
|
|
1537
1549
|
>>> outer.to_df() # Sparse arrays are padded with zeroes by default
|
|
1538
1550
|
var1 var2 var3
|
|
1539
1551
|
s1 0 1 0
|
|
@@ -1638,7 +1650,7 @@ def concat( # noqa: PLR0912, PLR0913, PLR0915
|
|
|
1638
1650
|
|
|
1639
1651
|
# Combining indexes
|
|
1640
1652
|
concat_indices = pd.concat(
|
|
1641
|
-
[
|
|
1653
|
+
[axis_indices(a, axis=axis).to_series() for a in adatas], ignore_index=True
|
|
1642
1654
|
)
|
|
1643
1655
|
if index_unique is not None:
|
|
1644
1656
|
concat_indices = concat_indices.str.cat(
|
anndata/_core/sparse_dataset.py
CHANGED
|
@@ -392,8 +392,17 @@ def is_sparse_indexing_overridden(
|
|
|
392
392
|
def validate_indices(
|
|
393
393
|
mtx: BackedSparseMatrix, indices: tuple[Index1D, Index1D]
|
|
394
394
|
) -> tuple[Index1D, Index1D]:
|
|
395
|
-
|
|
396
|
-
|
|
395
|
+
if hasattr(mtx, "_validate_indices"):
|
|
396
|
+
res = mtx._validate_indices(indices)
|
|
397
|
+
return res[0] if SCIPY_1_15 else res
|
|
398
|
+
# https://github.com/scipy/scipy/pull/23267
|
|
399
|
+
elif Version(version("scipy")) >= Version("1.17.0rc0"):
|
|
400
|
+
from scipy.sparse._index import _validate_indices # type: ignore
|
|
401
|
+
|
|
402
|
+
return _validate_indices(indices, mtx.shape, mtx.format)[0]
|
|
403
|
+
else: # pragma: no cover
|
|
404
|
+
msg = "Cannot validate indices"
|
|
405
|
+
raise RuntimeError(msg)
|
|
397
406
|
|
|
398
407
|
|
|
399
408
|
class BaseCompressedSparseDataset(abc._AbstractCSDataset, ABC):
|
anndata/_core/views.py
CHANGED
|
@@ -315,6 +315,7 @@ def as_view_df(df, view_args):
|
|
|
315
315
|
if settings.remove_unused_categories:
|
|
316
316
|
for col in df.columns:
|
|
317
317
|
if isinstance(df[col].dtype, pd.CategoricalDtype):
|
|
318
|
+
# TODO: this mode is going away
|
|
318
319
|
with pd.option_context("mode.chained_assignment", None):
|
|
319
320
|
df[col] = df[col].cat.remove_unused_categories()
|
|
320
321
|
return DataFrameView(df, view_args=view_args)
|
anndata/_core/xarray.py
CHANGED
|
@@ -3,23 +3,33 @@ from __future__ import annotations
|
|
|
3
3
|
import warnings
|
|
4
4
|
from dataclasses import dataclass
|
|
5
5
|
from functools import wraps
|
|
6
|
-
from typing import TYPE_CHECKING, overload
|
|
6
|
+
from typing import TYPE_CHECKING, TypeVar, overload
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
import pandas as pd
|
|
10
10
|
|
|
11
|
-
from ..compat import XDataArray, XDataset, XVariable
|
|
11
|
+
from ..compat import XDataArray, XDataset, XVariable, pandas_as_str
|
|
12
12
|
|
|
13
13
|
if TYPE_CHECKING:
|
|
14
|
-
from collections.abc import
|
|
14
|
+
from collections.abc import (
|
|
15
|
+
Callable,
|
|
16
|
+
Collection,
|
|
17
|
+
Hashable,
|
|
18
|
+
Iterable,
|
|
19
|
+
Iterator,
|
|
20
|
+
Mapping,
|
|
21
|
+
)
|
|
15
22
|
from typing import Any, Literal
|
|
16
23
|
|
|
17
24
|
from .._types import Dataset2DIlocIndexer
|
|
18
25
|
|
|
26
|
+
P = TypeVar("P")
|
|
27
|
+
R = TypeVar("R")
|
|
19
28
|
|
|
20
|
-
|
|
29
|
+
|
|
30
|
+
def requires_xarray(func: Callable[P, R]) -> Callable[P, R]:
|
|
21
31
|
@wraps(func)
|
|
22
|
-
def wrapper(*args, **kwargs):
|
|
32
|
+
def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
|
23
33
|
try:
|
|
24
34
|
import xarray # noqa: F401
|
|
25
35
|
except ImportError as e:
|
|
@@ -91,7 +101,7 @@ class Dataset2D:
|
|
|
91
101
|
return self.ds.attrs.get("is_backed", False)
|
|
92
102
|
|
|
93
103
|
@is_backed.setter
|
|
94
|
-
def is_backed(self, isbacked: bool) ->
|
|
104
|
+
def is_backed(self, isbacked: bool) -> None:
|
|
95
105
|
if not isbacked and "is_backed" in self.ds.attrs:
|
|
96
106
|
del self.ds.attrs["is_backed"]
|
|
97
107
|
else:
|
|
@@ -191,18 +201,21 @@ class Dataset2D:
|
|
|
191
201
|
@overload
|
|
192
202
|
def __getitem__(self, key: Hashable) -> XDataArray: ...
|
|
193
203
|
@overload
|
|
194
|
-
def __getitem__(self, key:
|
|
204
|
+
def __getitem__(self, key: Collection[Hashable]) -> Dataset2D: ...
|
|
195
205
|
def __getitem__(
|
|
196
206
|
self, key: Mapping[Any, Any] | Hashable | Iterable[Hashable]
|
|
197
207
|
) -> Dataset2D | XDataArray:
|
|
198
208
|
ret = self.ds.__getitem__(key)
|
|
199
|
-
if len(key) == 0 and not isinstance(key, tuple): # empty
|
|
209
|
+
if is_empty := (len(key) == 0 and not isinstance(key, tuple)): # empty Dataset
|
|
200
210
|
ret.coords[self.index_dim] = self.xr_index
|
|
201
211
|
if isinstance(ret, XDataset):
|
|
202
212
|
# If we get an xarray Dataset, we return a Dataset2D
|
|
203
213
|
as_2d = Dataset2D(ret)
|
|
204
|
-
|
|
205
|
-
|
|
214
|
+
if not is_empty and self.true_index_dim not in [
|
|
215
|
+
*as_2d.columns,
|
|
216
|
+
as_2d.index_dim,
|
|
217
|
+
]:
|
|
218
|
+
as_2d[self.true_index_dim] = self.true_index
|
|
206
219
|
as_2d.is_backed = self.is_backed
|
|
207
220
|
return as_2d
|
|
208
221
|
return ret
|
|
@@ -222,18 +235,21 @@ class Dataset2D:
|
|
|
222
235
|
-------
|
|
223
236
|
:class:`pandas.DataFrame` with index set accordingly.
|
|
224
237
|
"""
|
|
238
|
+
index_key = self.ds.attrs.get("indexing_key", None)
|
|
239
|
+
all_columns = {*self.columns, *([] if index_key is None else [index_key])}
|
|
225
240
|
# https://github.com/pydata/xarray/issues/10419
|
|
226
241
|
non_nullable_string_cols = {
|
|
227
242
|
col
|
|
228
|
-
for col in
|
|
243
|
+
for col in all_columns
|
|
229
244
|
if not self[col].attrs.get("is_nullable_string", False)
|
|
230
245
|
}
|
|
231
246
|
df = self.ds.to_dataframe()
|
|
232
|
-
|
|
247
|
+
for col in all_columns - non_nullable_string_cols:
|
|
248
|
+
df[col] = (
|
|
249
|
+
pandas_as_str(df[col]) if col == index_key else df[col].astype("string")
|
|
250
|
+
)
|
|
233
251
|
if df.index.name != index_key and index_key is not None:
|
|
234
252
|
df = df.set_index(index_key)
|
|
235
|
-
for col in set(self.columns) - non_nullable_string_cols:
|
|
236
|
-
df[col] = df[col].astype(dtype="string")
|
|
237
253
|
df.index.name = None # matches old AnnData object
|
|
238
254
|
return df
|
|
239
255
|
|
|
@@ -263,7 +279,7 @@ class Dataset2D:
|
|
|
263
279
|
For supported setter values see :meth:`xarray.Dataset.__setitem__`.
|
|
264
280
|
"""
|
|
265
281
|
if key == self.index_dim:
|
|
266
|
-
msg = f"Cannot set {self.index_dim} as a variable. Use `index
|
|
282
|
+
msg = f"Cannot set the index dimension {self.index_dim} as if it were a variable. Use `ds.index = ...` instead."
|
|
267
283
|
raise KeyError(msg)
|
|
268
284
|
if isinstance(value, tuple):
|
|
269
285
|
if isinstance(value[0], tuple):
|
anndata/_io/h5ad.py
CHANGED
|
@@ -41,6 +41,7 @@ if TYPE_CHECKING:
|
|
|
41
41
|
|
|
42
42
|
from .._core.file_backing import AnnDataFileManager
|
|
43
43
|
from .._core.raw import Raw
|
|
44
|
+
from .._types import StorageType
|
|
44
45
|
|
|
45
46
|
T = TypeVar("T")
|
|
46
47
|
|
|
@@ -261,7 +262,7 @@ def read_h5ad(
|
|
|
261
262
|
|
|
262
263
|
with h5py.File(filename, "r") as f:
|
|
263
264
|
|
|
264
|
-
def callback(
|
|
265
|
+
def callback(read_func, elem_name: str, elem: StorageType, iospec: IOSpec):
|
|
265
266
|
if iospec.encoding_type == "anndata" or elem_name.endswith("/"):
|
|
266
267
|
return AnnData(**{
|
|
267
268
|
# This is covering up backwards compat in the anndata initializer
|
|
@@ -279,7 +280,7 @@ def read_h5ad(
|
|
|
279
280
|
elif elem_name in {"/obs", "/var"}:
|
|
280
281
|
# Backwards compat
|
|
281
282
|
return read_dataframe(elem)
|
|
282
|
-
return
|
|
283
|
+
return read_func(elem)
|
|
283
284
|
|
|
284
285
|
adata = read_dispatched(f, callback=callback)
|
|
285
286
|
|
anndata/_io/read.py
CHANGED
|
@@ -15,7 +15,7 @@ import pandas as pd
|
|
|
15
15
|
from scipy import sparse
|
|
16
16
|
|
|
17
17
|
from .. import AnnData
|
|
18
|
-
from ..compat import old_positionals
|
|
18
|
+
from ..compat import old_positionals, pandas_as_str
|
|
19
19
|
from .utils import is_float
|
|
20
20
|
|
|
21
21
|
if TYPE_CHECKING:
|
|
@@ -74,8 +74,8 @@ def read_excel(
|
|
|
74
74
|
|
|
75
75
|
df = read_excel(fspath(filename), sheet)
|
|
76
76
|
X = df.values[:, 1:]
|
|
77
|
-
row = dict(row_names=df.iloc[:, 0].
|
|
78
|
-
col = dict(col_names=
|
|
77
|
+
row = dict(row_names=pandas_as_str(df.iloc[:, 0]).array)
|
|
78
|
+
col = dict(col_names=pandas_as_str(df.columns[1:]).array)
|
|
79
79
|
return AnnData(X, row, col)
|
|
80
80
|
|
|
81
81
|
|
|
@@ -270,7 +270,10 @@ def _gen_xarray_dict_iterator_from_elems(
|
|
|
270
270
|
"base_path_or_zarr_group": v.base_path_or_zarr_group,
|
|
271
271
|
"elem_name": v.elem_name,
|
|
272
272
|
"is_nullable_string": isinstance(v, MaskedArray)
|
|
273
|
-
and
|
|
273
|
+
and (
|
|
274
|
+
v.dtype == NULLABLE_NUMPY_STRING_TYPE
|
|
275
|
+
or isinstance(v.dtype, pd.StringDtype | np.dtypes.StringDType)
|
|
276
|
+
),
|
|
274
277
|
},
|
|
275
278
|
)
|
|
276
279
|
elif k == dim_name:
|
|
@@ -296,6 +299,10 @@ def read_dataframe(
|
|
|
296
299
|
use_range_index: bool = False,
|
|
297
300
|
chunks: tuple[int] | None = None,
|
|
298
301
|
) -> Dataset2D:
|
|
302
|
+
from xarray.core.indexing import BasicIndexer
|
|
303
|
+
|
|
304
|
+
from ...experimental.backed._lazy_arrays import MaskedArray
|
|
305
|
+
|
|
299
306
|
elem_dict = {
|
|
300
307
|
k: _reader.read_elem(elem[k], chunks=chunks)
|
|
301
308
|
for k in [*elem.attrs["column-order"], elem.attrs["_index"]]
|
|
@@ -305,7 +312,12 @@ def read_dataframe(
|
|
|
305
312
|
if not use_range_index:
|
|
306
313
|
dim_name = elem.attrs["_index"]
|
|
307
314
|
# no sense in reading this in multiple times since xarray requires an in-memory index
|
|
308
|
-
|
|
315
|
+
if isinstance(elem_dict[dim_name], DaskArray):
|
|
316
|
+
index = elem_dict[dim_name].compute()
|
|
317
|
+
elif isinstance(elem_dict[dim_name], MaskedArray):
|
|
318
|
+
index = elem_dict[dim_name][BasicIndexer((slice(None),))]
|
|
319
|
+
else:
|
|
320
|
+
raise NotImplementedError()
|
|
309
321
|
else:
|
|
310
322
|
dim_name = DUMMY_RANGE_INDEX_KEY
|
|
311
323
|
index = pd.RangeIndex(len(elem_dict[elem.attrs["_index"]])).astype("str")
|
anndata/_io/specs/methods.py
CHANGED
|
@@ -25,7 +25,6 @@ from anndata._core.sparse_dataset import _CSCDataset, _CSRDataset, sparse_datase
|
|
|
25
25
|
from anndata._io.utils import check_key, zero_dim_array_as_scalar
|
|
26
26
|
from anndata._warnings import OldFormatWarning
|
|
27
27
|
from anndata.compat import (
|
|
28
|
-
NULLABLE_NUMPY_STRING_TYPE,
|
|
29
28
|
AwkArray,
|
|
30
29
|
CupyArray,
|
|
31
30
|
CupyCSCMatrix,
|
|
@@ -43,7 +42,7 @@ from anndata.compat import (
|
|
|
43
42
|
)
|
|
44
43
|
|
|
45
44
|
from ..._settings import settings
|
|
46
|
-
from ...compat import is_zarr_v2
|
|
45
|
+
from ...compat import NULLABLE_NUMPY_STRING_TYPE, PANDAS_STRING_ARRAY_TYPES, is_zarr_v2
|
|
47
46
|
from .registry import _REGISTRY, IOSpec, read_elem, read_elem_partial
|
|
48
47
|
|
|
49
48
|
if TYPE_CHECKING:
|
|
@@ -1140,27 +1139,24 @@ def read_partial_categorical(elem, *, items=None, indices=(slice(None),)):
|
|
|
1140
1139
|
@_REGISTRY.register_write(
|
|
1141
1140
|
ZarrGroup, pd.arrays.BooleanArray, IOSpec("nullable-boolean", "0.1.0")
|
|
1142
1141
|
)
|
|
1143
|
-
@_REGISTRY.register_write(
|
|
1144
|
-
H5Group, pd.arrays.StringArray, IOSpec("nullable-string-array", "0.1.0")
|
|
1145
|
-
)
|
|
1146
|
-
@_REGISTRY.register_write(
|
|
1147
|
-
ZarrGroup, pd.arrays.StringArray, IOSpec("nullable-string-array", "0.1.0")
|
|
1148
|
-
)
|
|
1149
1142
|
def write_nullable(
|
|
1150
1143
|
f: GroupStorageType,
|
|
1151
1144
|
k: str,
|
|
1152
|
-
v: pd.arrays.IntegerArray
|
|
1145
|
+
v: pd.arrays.IntegerArray
|
|
1146
|
+
| pd.arrays.BooleanArray
|
|
1147
|
+
| pd.arrays.StringArray
|
|
1148
|
+
| pd.arrays.ArrowStringArray,
|
|
1153
1149
|
*,
|
|
1154
1150
|
_writer: Writer,
|
|
1155
1151
|
dataset_kwargs: Mapping[str, Any] = MappingProxyType({}),
|
|
1156
|
-
):
|
|
1152
|
+
) -> None:
|
|
1157
1153
|
if (
|
|
1158
|
-
isinstance(v, pd.arrays.StringArray)
|
|
1154
|
+
isinstance(v, pd.arrays.StringArray | pd.arrays.ArrowStringArray)
|
|
1159
1155
|
and not settings.allow_write_nullable_strings
|
|
1160
1156
|
):
|
|
1161
1157
|
msg = (
|
|
1162
1158
|
"`anndata.settings.allow_write_nullable_strings` is False, "
|
|
1163
|
-
"because writing of `pd.arrays.StringArray` is new "
|
|
1159
|
+
"because writing of `pd.arrays.{StringArray,ArrowStringArray}` is new "
|
|
1164
1160
|
"and not supported in anndata < 0.11, still use by many people. "
|
|
1165
1161
|
"Opt-in to writing these arrays by toggling the setting to True."
|
|
1166
1162
|
)
|
|
@@ -1168,13 +1164,19 @@ def write_nullable(
|
|
|
1168
1164
|
g = f.require_group(k)
|
|
1169
1165
|
values = (
|
|
1170
1166
|
v.to_numpy(na_value="")
|
|
1171
|
-
if isinstance(v, pd.arrays.StringArray)
|
|
1167
|
+
if isinstance(v, pd.arrays.StringArray | pd.arrays.ArrowStringArray)
|
|
1172
1168
|
else v.to_numpy(na_value=0, dtype=v.dtype.numpy_dtype)
|
|
1173
1169
|
)
|
|
1174
1170
|
_writer.write_elem(g, "values", values, dataset_kwargs=dataset_kwargs)
|
|
1175
1171
|
_writer.write_elem(g, "mask", v.isna(), dataset_kwargs=dataset_kwargs)
|
|
1176
1172
|
|
|
1177
1173
|
|
|
1174
|
+
for store_type, array_type in product([H5Group, ZarrGroup], PANDAS_STRING_ARRAY_TYPES):
|
|
1175
|
+
_REGISTRY.register_write(
|
|
1176
|
+
store_type, array_type, IOSpec("nullable-string-array", "0.1.0")
|
|
1177
|
+
)(write_nullable)
|
|
1178
|
+
|
|
1179
|
+
|
|
1178
1180
|
def _read_nullable(
|
|
1179
1181
|
elem: GroupStorageType,
|
|
1180
1182
|
*,
|
|
@@ -1190,18 +1192,6 @@ def _read_nullable(
|
|
|
1190
1192
|
)
|
|
1191
1193
|
|
|
1192
1194
|
|
|
1193
|
-
def _string_array(
|
|
1194
|
-
values: np.ndarray, mask: np.ndarray
|
|
1195
|
-
) -> pd.api.extensions.ExtensionArray:
|
|
1196
|
-
"""Construct a string array from values and mask."""
|
|
1197
|
-
arr = pd.array(
|
|
1198
|
-
values.astype(NULLABLE_NUMPY_STRING_TYPE),
|
|
1199
|
-
dtype=pd.StringDtype(),
|
|
1200
|
-
)
|
|
1201
|
-
arr[mask] = pd.NA
|
|
1202
|
-
return arr
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
1195
|
_REGISTRY.register_read(H5Group, IOSpec("nullable-integer", "0.1.0"))(
|
|
1206
1196
|
read_nullable_integer := partial(_read_nullable, array_type=pd.arrays.IntegerArray)
|
|
1207
1197
|
)
|
|
@@ -1216,12 +1206,22 @@ _REGISTRY.register_read(ZarrGroup, IOSpec("nullable-boolean", "0.1.0"))(
|
|
|
1216
1206
|
read_nullable_boolean
|
|
1217
1207
|
)
|
|
1218
1208
|
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
)
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
)
|
|
1209
|
+
|
|
1210
|
+
@_REGISTRY.register_read(H5Group, IOSpec("nullable-string-array", "0.1.0"))
|
|
1211
|
+
@_REGISTRY.register_read(ZarrGroup, IOSpec("nullable-string-array", "0.1.0"))
|
|
1212
|
+
def _read_nullable_string(
|
|
1213
|
+
elem: GroupStorageType, *, _reader: Reader
|
|
1214
|
+
) -> pd.api.extensions.ExtensionArray:
|
|
1215
|
+
values = _reader.read_elem(elem["values"])
|
|
1216
|
+
mask = _reader.read_elem(elem["mask"])
|
|
1217
|
+
dtype = pd.StringDtype()
|
|
1218
|
+
|
|
1219
|
+
arr = pd.array(
|
|
1220
|
+
values.astype(NULLABLE_NUMPY_STRING_TYPE),
|
|
1221
|
+
dtype=dtype,
|
|
1222
|
+
)
|
|
1223
|
+
arr[mask] = pd.NA
|
|
1224
|
+
return arr
|
|
1225
1225
|
|
|
1226
1226
|
|
|
1227
1227
|
###########
|
anndata/_settings.py
CHANGED
|
@@ -102,7 +102,7 @@ def check_and_get_environ_var(
|
|
|
102
102
|
)
|
|
103
103
|
|
|
104
104
|
|
|
105
|
-
def check_and_get_bool(option, default_value):
|
|
105
|
+
def check_and_get_bool(option: str, default_value: bool) -> bool: # noqa: FBT001
|
|
106
106
|
return check_and_get_environ_var(
|
|
107
107
|
f"ANNDATA_{option.upper()}",
|
|
108
108
|
str(int(default_value)),
|
|
@@ -111,7 +111,7 @@ def check_and_get_bool(option, default_value):
|
|
|
111
111
|
)
|
|
112
112
|
|
|
113
113
|
|
|
114
|
-
def check_and_get_int(option, default_value):
|
|
114
|
+
def check_and_get_int(option: str, default_value: int) -> int:
|
|
115
115
|
return check_and_get_environ_var(
|
|
116
116
|
f"ANNDATA_{option.upper()}",
|
|
117
117
|
str(int(default_value)),
|
|
@@ -431,7 +431,7 @@ settings.register(
|
|
|
431
431
|
settings.register(
|
|
432
432
|
"allow_write_nullable_strings",
|
|
433
433
|
default_value=False,
|
|
434
|
-
description="Whether or not to allow writing of `pd.arrays.StringArray`.",
|
|
434
|
+
description="Whether or not to allow writing of `pd.arrays.{StringArray,ArrowStringArray}`.",
|
|
435
435
|
validate=validate_bool,
|
|
436
436
|
get_from_env=check_and_get_bool,
|
|
437
437
|
)
|
anndata/_types.py
CHANGED
|
@@ -130,7 +130,7 @@ class Write(Protocol[RWAble_contra]):
|
|
|
130
130
|
v
|
|
131
131
|
The element to write out.
|
|
132
132
|
dataset_kwargs
|
|
133
|
-
Keyword arguments to be passed to a library-level io function, like `chunks` for :
|
|
133
|
+
Keyword arguments to be passed to a library-level io function, like `chunks` for :mod:`zarr`.
|
|
134
134
|
"""
|
|
135
135
|
...
|
|
136
136
|
|
|
@@ -194,7 +194,7 @@ class WriteCallback(Protocol[RWAble]):
|
|
|
194
194
|
iospec
|
|
195
195
|
Internal AnnData encoding specification for the element.
|
|
196
196
|
dataset_kwargs
|
|
197
|
-
Keyword arguments to be passed to a library-level io function, like `chunks` for :
|
|
197
|
+
Keyword arguments to be passed to a library-level io function, like `chunks` for :mod:`zarr`.
|
|
198
198
|
"""
|
|
199
199
|
...
|
|
200
200
|
|
anndata/compat/__init__.py
CHANGED
|
@@ -2,11 +2,12 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from codecs import decode
|
|
4
4
|
from collections.abc import Mapping, Sequence
|
|
5
|
+
from enum import Enum, auto
|
|
5
6
|
from functools import cache, partial, singledispatch
|
|
6
7
|
from importlib.metadata import version
|
|
7
8
|
from importlib.util import find_spec
|
|
8
9
|
from types import EllipsisType
|
|
9
|
-
from typing import TYPE_CHECKING, TypeVar
|
|
10
|
+
from typing import TYPE_CHECKING, TypeVar, overload
|
|
10
11
|
from warnings import warn
|
|
11
12
|
|
|
12
13
|
import h5py
|
|
@@ -31,8 +32,8 @@ CSMatrix = scipy.sparse.csr_matrix | scipy.sparse.csc_matrix
|
|
|
31
32
|
CSArray = scipy.sparse.csr_array | scipy.sparse.csc_array
|
|
32
33
|
|
|
33
34
|
|
|
34
|
-
class Empty:
|
|
35
|
-
|
|
35
|
+
class Empty(Enum):
|
|
36
|
+
TOKEN = auto()
|
|
36
37
|
|
|
37
38
|
|
|
38
39
|
Index1DNorm = slice | NDArray[np.bool_] | NDArray[np.integer]
|
|
@@ -76,8 +77,6 @@ H5File = h5py.File
|
|
|
76
77
|
#############################
|
|
77
78
|
@cache
|
|
78
79
|
def is_zarr_v2() -> bool:
|
|
79
|
-
from packaging.version import Version
|
|
80
|
-
|
|
81
80
|
return Version(version("zarr")) < Version("3.0.0")
|
|
82
81
|
|
|
83
82
|
|
|
@@ -217,10 +216,79 @@ NULLABLE_NUMPY_STRING_TYPE = (
|
|
|
217
216
|
else np.dtypes.StringDType(na_object=pd.NA)
|
|
218
217
|
)
|
|
219
218
|
|
|
219
|
+
PANDAS_SUPPORTS_NA_VALUE = Version(version("pandas")) >= Version("2.3")
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
PANDAS_STRING_ARRAY_TYPES: list[type[pd.api.extensions.ExtensionArray]] = [
|
|
223
|
+
pd.arrays.StringArray,
|
|
224
|
+
pd.arrays.ArrowStringArray,
|
|
225
|
+
]
|
|
226
|
+
# these are removed in favor of the above classes: https://github.com/pandas-dev/pandas/pull/62149
|
|
227
|
+
try:
|
|
228
|
+
from pandas.core.arrays.string_ import StringArrayNumpySemantics
|
|
229
|
+
except ImportError:
|
|
230
|
+
pass
|
|
231
|
+
else:
|
|
232
|
+
PANDAS_STRING_ARRAY_TYPES += [StringArrayNumpySemantics]
|
|
233
|
+
try:
|
|
234
|
+
from pandas.core.arrays.string_arrow import ArrowStringArrayNumpySemantics
|
|
235
|
+
except ImportError:
|
|
236
|
+
pass
|
|
237
|
+
else:
|
|
238
|
+
PANDAS_STRING_ARRAY_TYPES += [ArrowStringArrayNumpySemantics]
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
@overload
|
|
242
|
+
def pandas_as_str(a: pd.Index[Any]) -> pd.Index[str]: ...
|
|
243
|
+
@overload
|
|
244
|
+
def pandas_as_str(a: pd.Series[Any]) -> pd.Series[str]: ...
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def pandas_as_str(a: pd.Index | pd.Series) -> pd.Index[str] | pd.Series[str]:
|
|
248
|
+
"""Convert to fitting dtype, maintaining NA semantics if possible.
|
|
249
|
+
|
|
250
|
+
This is `"str"` when `pd.options.future.infer_string` is `True` (e.g. in Pandas 3+), and `"object"` otherwise.
|
|
251
|
+
"""
|
|
252
|
+
if not pd.options.future.infer_string:
|
|
253
|
+
return a.astype(str)
|
|
254
|
+
if a.array.dtype == "string": # any `pd.StringDtype`
|
|
255
|
+
return a
|
|
256
|
+
if PANDAS_SUPPORTS_NA_VALUE:
|
|
257
|
+
dtype = pd.StringDtype(na_value=a.array.dtype.na_value)
|
|
258
|
+
elif a.array.dtype.na_value is pd.NA:
|
|
259
|
+
dtype = pd.StringDtype() # NA semantics
|
|
260
|
+
elif a.array.dtype.na_value is np.nan and find_spec("pyarrow"): # noqa: PLW0177
|
|
261
|
+
# on pandas 2.2, this is the only way to get `np.nan` semantics
|
|
262
|
+
dtype = pd.StringDtype("pyarrow_numpy")
|
|
263
|
+
else:
|
|
264
|
+
msg = (
|
|
265
|
+
f"Converting an array with `dtype.na_value={a.array.dtype.na_value}` to a string array requires pyarrow or pandas>=2.3. "
|
|
266
|
+
"Converting to `pd.NA` semantics instead."
|
|
267
|
+
)
|
|
268
|
+
warn(msg, UserWarning, stacklevel=2)
|
|
269
|
+
dtype = pd.StringDtype() # NA semantics
|
|
270
|
+
return a.astype(dtype)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
V = TypeVar("V")
|
|
274
|
+
T = TypeVar("T")
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
@overload
|
|
278
|
+
def _read_attr(
|
|
279
|
+
attrs: Mapping[str, V], name: str, default: Empty = Empty.TOKEN
|
|
280
|
+
) -> V: ...
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
@overload
|
|
284
|
+
def _read_attr(attrs: Mapping[str, V], name: str, default: T) -> V | T: ...
|
|
285
|
+
|
|
220
286
|
|
|
221
287
|
@singledispatch
|
|
222
|
-
def _read_attr(
|
|
223
|
-
|
|
288
|
+
def _read_attr(
|
|
289
|
+
attrs: Mapping[str, V], name: str, default: T | Empty = Empty.TOKEN
|
|
290
|
+
) -> V | T:
|
|
291
|
+
if default is Empty.TOKEN:
|
|
224
292
|
return attrs[name]
|
|
225
293
|
else:
|
|
226
294
|
return attrs.get(name, default=default)
|
|
@@ -228,8 +296,8 @@ def _read_attr(attrs: Mapping, name: str, default: Any | None = Empty):
|
|
|
228
296
|
|
|
229
297
|
@_read_attr.register(h5py.AttributeManager)
|
|
230
298
|
def _read_attr_hdf5(
|
|
231
|
-
attrs: h5py.AttributeManager, name: str, default:
|
|
232
|
-
):
|
|
299
|
+
attrs: h5py.AttributeManager, name: str, default: T | Empty = Empty.TOKEN
|
|
300
|
+
) -> str | T:
|
|
233
301
|
"""
|
|
234
302
|
Read an HDF5 attribute and perform all necessary conversions.
|
|
235
303
|
|
|
@@ -238,7 +306,7 @@ def _read_attr_hdf5(
|
|
|
238
306
|
For example Julia's HDF5.jl writes string attributes as fixed-size strings, which
|
|
239
307
|
are read as bytes by h5py.
|
|
240
308
|
"""
|
|
241
|
-
if name not in attrs and default is not Empty:
|
|
309
|
+
if name not in attrs and default is not Empty.TOKEN:
|
|
242
310
|
return default
|
|
243
311
|
attr = attrs[name]
|
|
244
312
|
attr_id = attrs.get_id(name)
|
|
@@ -16,10 +16,7 @@ if TYPE_CHECKING:
|
|
|
16
16
|
from anndata.typing import RWAble
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
def read_dispatched(
|
|
20
|
-
elem: StorageType,
|
|
21
|
-
callback: ReadCallback,
|
|
22
|
-
) -> RWAble:
|
|
19
|
+
def read_dispatched(elem: StorageType, callback: ReadCallback) -> RWAble:
|
|
23
20
|
"""
|
|
24
21
|
Read elem, calling the callback at each sub-element.
|
|
25
22
|
|
|
@@ -19,16 +19,22 @@ from ...compat import (
|
|
|
19
19
|
XZarrArrayWrapper,
|
|
20
20
|
ZarrArray,
|
|
21
21
|
)
|
|
22
|
-
from ...compat import xarray as xr
|
|
23
22
|
|
|
24
23
|
if TYPE_CHECKING:
|
|
25
24
|
from pathlib import Path
|
|
26
25
|
from typing import Literal
|
|
27
26
|
|
|
27
|
+
from pandas._libs.missing import NAType
|
|
28
|
+
from pandas.core.dtypes.base import ExtensionDtype
|
|
29
|
+
|
|
28
30
|
from anndata.compat import ZarrGroup
|
|
29
31
|
|
|
30
32
|
from ...compat import Index1DNorm
|
|
31
33
|
|
|
34
|
+
if TYPE_CHECKING: # Double nesting so Sphinx can import the parent block
|
|
35
|
+
from xarray.core.extension_array import PandasExtensionArray
|
|
36
|
+
from xarray.core.indexing import ExplicitIndexer
|
|
37
|
+
|
|
32
38
|
|
|
33
39
|
K = TypeVar("K", H5Array, ZarrArray)
|
|
34
40
|
|
|
@@ -43,14 +49,13 @@ class ZarrOrHDF5Wrapper(XZarrArrayWrapper, Generic[K]):
|
|
|
43
49
|
self.shape = self._array.shape
|
|
44
50
|
self.dtype = self._array.dtype
|
|
45
51
|
|
|
46
|
-
def __getitem__(self, key:
|
|
52
|
+
def __getitem__(self, key: ExplicitIndexer):
|
|
53
|
+
from xarray.core.indexing import IndexingSupport, explicit_indexing_adapter
|
|
54
|
+
|
|
47
55
|
if isinstance(self._array, ZarrArray):
|
|
48
56
|
return super().__getitem__(key)
|
|
49
|
-
res =
|
|
50
|
-
key,
|
|
51
|
-
self.shape,
|
|
52
|
-
xr.core.indexing.IndexingSupport.OUTER_1VECTOR,
|
|
53
|
-
self._getitem,
|
|
57
|
+
res = explicit_indexing_adapter(
|
|
58
|
+
key, self.shape, IndexingSupport.OUTER_1VECTOR, self._getitem
|
|
54
59
|
)
|
|
55
60
|
return res
|
|
56
61
|
|
|
@@ -109,22 +114,23 @@ class CategoricalArray(XBackendArray, Generic[K]):
|
|
|
109
114
|
|
|
110
115
|
@cached_property
|
|
111
116
|
def categories(self) -> np.ndarray:
|
|
112
|
-
if isinstance(self._categories, ZarrArray):
|
|
113
|
-
return self._categories[...]
|
|
114
117
|
from anndata.io import read_elem
|
|
115
118
|
|
|
116
119
|
return read_elem(self._categories)
|
|
117
120
|
|
|
118
|
-
def __getitem__(
|
|
119
|
-
|
|
120
|
-
|
|
121
|
+
def __getitem__(self, key: ExplicitIndexer) -> PandasExtensionArray:
|
|
122
|
+
from xarray.core.extension_array import PandasExtensionArray
|
|
123
|
+
|
|
121
124
|
codes = self._codes[key]
|
|
122
125
|
categorical_array = pd.Categorical.from_codes(
|
|
123
|
-
codes=codes,
|
|
126
|
+
codes=codes,
|
|
127
|
+
# casting to numpy (string) maintains our old behavior, this will be relaxed in 0.13
|
|
128
|
+
categories=np.array(self.categories),
|
|
129
|
+
ordered=self._ordered,
|
|
124
130
|
)
|
|
125
131
|
if settings.remove_unused_categories:
|
|
126
132
|
categorical_array = categorical_array.remove_unused_categories()
|
|
127
|
-
return
|
|
133
|
+
return PandasExtensionArray(categorical_array)
|
|
128
134
|
|
|
129
135
|
@cached_property
|
|
130
136
|
def dtype(self):
|
|
@@ -163,9 +169,9 @@ class MaskedArray(XBackendArray, Generic[K]):
|
|
|
163
169
|
self.file_format = "zarr" if isinstance(mask, ZarrArray) else "h5"
|
|
164
170
|
self.elem_name = elem_name
|
|
165
171
|
|
|
166
|
-
def __getitem__(
|
|
167
|
-
|
|
168
|
-
|
|
172
|
+
def __getitem__(self, key: ExplicitIndexer) -> PandasExtensionArray | np.ndarray:
|
|
173
|
+
from xarray.core.extension_array import PandasExtensionArray
|
|
174
|
+
|
|
169
175
|
values = self._values[key]
|
|
170
176
|
mask = self._mask[key]
|
|
171
177
|
if self._dtype_str == "nullable-integer":
|
|
@@ -181,10 +187,10 @@ class MaskedArray(XBackendArray, Generic[K]):
|
|
|
181
187
|
else:
|
|
182
188
|
msg = f"Invalid dtype_str {self._dtype_str}"
|
|
183
189
|
raise RuntimeError(msg)
|
|
184
|
-
return
|
|
190
|
+
return PandasExtensionArray(extension_array)
|
|
185
191
|
|
|
186
192
|
@cached_property
|
|
187
|
-
def dtype(self):
|
|
193
|
+
def dtype(self) -> np.dtypes.StringDType[NAType] | ExtensionDtype:
|
|
188
194
|
if self._dtype_str == "nullable-integer":
|
|
189
195
|
return pd.array(
|
|
190
196
|
[],
|
anndata/experimental/merge.py
CHANGED
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import shutil
|
|
4
4
|
from collections.abc import Mapping
|
|
5
|
+
from contextlib import ExitStack, contextmanager
|
|
5
6
|
from functools import singledispatch
|
|
6
7
|
from os import PathLike
|
|
7
8
|
from pathlib import Path
|
|
@@ -30,10 +31,11 @@ from ..compat import H5Array, H5Group, ZarrArray, ZarrGroup
|
|
|
30
31
|
from . import read_dispatched, read_elem_lazy
|
|
31
32
|
|
|
32
33
|
if TYPE_CHECKING:
|
|
33
|
-
from collections.abc import Callable, Collection, Iterable, Sequence
|
|
34
|
+
from collections.abc import Callable, Collection, Generator, Iterable, Sequence
|
|
34
35
|
from typing import Any, Literal
|
|
35
36
|
|
|
36
37
|
from .._core.merge import Reindexer, StrategiesLiteral
|
|
38
|
+
from .._types import Join_T
|
|
37
39
|
|
|
38
40
|
SPARSE_MATRIX = {"csc_matrix", "csr_matrix"}
|
|
39
41
|
|
|
@@ -100,35 +102,42 @@ def _gen_slice_to_append(
|
|
|
100
102
|
|
|
101
103
|
|
|
102
104
|
@singledispatch
|
|
103
|
-
|
|
105
|
+
@contextmanager
|
|
106
|
+
def as_group(store, *, mode: str) -> Generator[ZarrGroup | H5Group]:
|
|
104
107
|
msg = "This is not yet implemented."
|
|
105
108
|
raise NotImplementedError(msg)
|
|
106
109
|
|
|
107
110
|
|
|
108
111
|
@as_group.register(PathLike)
|
|
109
112
|
@as_group.register(str)
|
|
110
|
-
|
|
113
|
+
@contextmanager
|
|
114
|
+
def _(store: PathLike[str] | str, *, mode: str) -> Generator[ZarrGroup | H5Group]:
|
|
111
115
|
store = Path(store)
|
|
112
116
|
if store.suffix == ".h5ad":
|
|
113
117
|
import h5py
|
|
114
118
|
|
|
115
|
-
|
|
119
|
+
f = h5py.File(store, mode=mode)
|
|
120
|
+
try:
|
|
121
|
+
yield f
|
|
122
|
+
finally:
|
|
123
|
+
f.close()
|
|
116
124
|
|
|
117
|
-
|
|
125
|
+
elif mode == "r": # others all write: r+, a, w, w-
|
|
118
126
|
import zarr
|
|
119
127
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
128
|
+
yield zarr.open_group(store, mode=mode)
|
|
129
|
+
else:
|
|
130
|
+
from anndata._io.zarr import open_write_group
|
|
123
131
|
|
|
124
|
-
|
|
132
|
+
yield open_write_group(store, mode=mode)
|
|
125
133
|
|
|
126
134
|
|
|
127
135
|
@as_group.register(ZarrGroup)
|
|
128
136
|
@as_group.register(H5Group)
|
|
129
|
-
|
|
137
|
+
@contextmanager
|
|
138
|
+
def _(store: ZarrGroup | H5Group, *, mode: str) -> Generator[ZarrGroup | H5Group]:
|
|
130
139
|
del mode
|
|
131
|
-
|
|
140
|
+
yield store
|
|
132
141
|
|
|
133
142
|
|
|
134
143
|
###################
|
|
@@ -441,9 +450,10 @@ def _write_alt_pairwise(
|
|
|
441
450
|
write_elem(output_group, f"{alt_axis_name}p", alt_pairwise)
|
|
442
451
|
|
|
443
452
|
|
|
444
|
-
def concat_on_disk( # noqa:
|
|
445
|
-
in_files: Collection[PathLike[str] | str
|
|
446
|
-
|
|
453
|
+
def concat_on_disk( # noqa: PLR0913
|
|
454
|
+
in_files: Collection[PathLike[str] | str | H5Group | ZarrGroup]
|
|
455
|
+
| Mapping[str, PathLike[str] | str | H5Group | ZarrGroup],
|
|
456
|
+
out_file: PathLike[str] | str | H5Group | ZarrGroup,
|
|
447
457
|
*,
|
|
448
458
|
max_loaded_elems: int = 100_000_000,
|
|
449
459
|
axis: Literal["obs", 0, "var", 1] = 0,
|
|
@@ -584,10 +594,11 @@ def concat_on_disk( # noqa: PLR0912, PLR0913, PLR0915
|
|
|
584
594
|
merge = resolve_merge_strategy(merge)
|
|
585
595
|
uns_merge = resolve_merge_strategy(uns_merge)
|
|
586
596
|
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
597
|
+
if is_out_path_like := isinstance(out_file, str | PathLike):
|
|
598
|
+
out_file = Path(out_file)
|
|
599
|
+
if not out_file.parent.exists():
|
|
600
|
+
msg = f"Parent directory of {out_file} does not exist."
|
|
601
|
+
raise FileNotFoundError(msg)
|
|
591
602
|
|
|
592
603
|
if isinstance(in_files, Mapping):
|
|
593
604
|
if keys is not None:
|
|
@@ -600,7 +611,11 @@ def concat_on_disk( # noqa: PLR0912, PLR0913, PLR0915
|
|
|
600
611
|
else:
|
|
601
612
|
in_files = list(in_files)
|
|
602
613
|
|
|
603
|
-
if
|
|
614
|
+
if (
|
|
615
|
+
len(in_files) == 1
|
|
616
|
+
and isinstance(in_files[0], str | PathLike)
|
|
617
|
+
and is_out_path_like
|
|
618
|
+
):
|
|
604
619
|
shutil.copy2(in_files[0], out_file)
|
|
605
620
|
return
|
|
606
621
|
|
|
@@ -610,9 +625,40 @@ def concat_on_disk( # noqa: PLR0912, PLR0913, PLR0915
|
|
|
610
625
|
axis, axis_name = _resolve_axis(axis)
|
|
611
626
|
_, alt_axis_name = _resolve_axis(1 - axis)
|
|
612
627
|
|
|
613
|
-
|
|
614
|
-
|
|
628
|
+
with ExitStack() as stack, as_group(out_file, mode="w") as output_group:
|
|
629
|
+
groups = [stack.enter_context(as_group(f, mode="r")) for f in in_files]
|
|
630
|
+
_concat_on_disk_inner(
|
|
631
|
+
groups=groups,
|
|
632
|
+
output_group=output_group,
|
|
633
|
+
axis=axis,
|
|
634
|
+
axis_name=axis_name,
|
|
635
|
+
alt_axis_name=alt_axis_name,
|
|
636
|
+
keys=keys,
|
|
637
|
+
max_loaded_elems=max_loaded_elems,
|
|
638
|
+
join=join,
|
|
639
|
+
label=label,
|
|
640
|
+
index_unique=index_unique,
|
|
641
|
+
fill_value=fill_value,
|
|
642
|
+
merge=merge,
|
|
643
|
+
)
|
|
615
644
|
|
|
645
|
+
|
|
646
|
+
def _concat_on_disk_inner( # noqa: PLR0913
|
|
647
|
+
*,
|
|
648
|
+
groups: list[H5Group | ZarrGroup],
|
|
649
|
+
output_group: H5Group | ZarrGroup,
|
|
650
|
+
axis: Literal[0, 1],
|
|
651
|
+
axis_name: Literal["obs", "var"],
|
|
652
|
+
alt_axis_name: Literal["obs", "var"],
|
|
653
|
+
keys: np.ndarray[tuple[int], np.dtype[Any]] | Collection[str],
|
|
654
|
+
max_loaded_elems: int,
|
|
655
|
+
join: Join_T = "inner",
|
|
656
|
+
label: str | None,
|
|
657
|
+
index_unique: str | None,
|
|
658
|
+
fill_value: Any | None,
|
|
659
|
+
merge: Callable[[Collection[Mapping]], Mapping],
|
|
660
|
+
) -> None:
|
|
661
|
+
"""Internal helper to minimize the amount of indented code within the context manager"""
|
|
616
662
|
use_reindexing = False
|
|
617
663
|
|
|
618
664
|
alt_idxs = [_df_index(g[alt_axis_name]) for g in groups]
|
|
@@ -489,7 +489,7 @@ class AnnCollectionView(_ConcatViewMixin, _IterateViewMixin):
|
|
|
489
489
|
# change dtype for all keys of .obsm
|
|
490
490
|
"obsm": lambda a: np.asarray(a, dtype="float32"),
|
|
491
491
|
# change type only for one key of .obs
|
|
492
|
-
"obs": dict(key1=lambda c: c.astype(
|
|
492
|
+
"obs": dict(key1=lambda c: c.astype("string")),
|
|
493
493
|
}
|
|
494
494
|
"""
|
|
495
495
|
return self._convert
|
|
@@ -834,7 +834,7 @@ class AnnCollection(_ConcatViewMixin, _IterateViewMixin):
|
|
|
834
834
|
# change dtype for all keys of .obsm
|
|
835
835
|
"obsm": lambda a: np.asarray(a, dtype="float32"),
|
|
836
836
|
# change type only for one key of .obs
|
|
837
|
-
"obs": dict(key1=lambda c: c.astype(
|
|
837
|
+
"obs": dict(key1=lambda c: c.astype("string")),
|
|
838
838
|
}
|
|
839
839
|
"""
|
|
840
840
|
return self._convert
|
anndata/tests/helpers.py
CHANGED
|
@@ -322,8 +322,8 @@ def gen_adata( # noqa: PLR0913
|
|
|
322
322
|
random_state = np.random.default_rng()
|
|
323
323
|
|
|
324
324
|
M, N = shape
|
|
325
|
-
obs_names = pd.Index(f"cell{i}" for i in range(shape[0]))
|
|
326
|
-
var_names = pd.Index(f"gene{i}" for i in range(shape[1]))
|
|
325
|
+
obs_names = pd.Index([f"cell{i}" for i in range(shape[0])], dtype="str")
|
|
326
|
+
var_names = pd.Index([f"gene{i}" for i in range(shape[1])], dtype="str")
|
|
327
327
|
obs = gen_typed_df(M, obs_names, dtypes=obs_dtypes)
|
|
328
328
|
var = gen_typed_df(N, var_names, dtypes=var_dtypes)
|
|
329
329
|
# For #147
|
|
@@ -1166,7 +1166,8 @@ class AccessTrackingStoreBase(LocalStore):
|
|
|
1166
1166
|
def reset_key_trackers(self) -> None:
|
|
1167
1167
|
self.initialize_key_trackers(self._access_count.keys())
|
|
1168
1168
|
|
|
1169
|
-
def assert_access_count(self, key: str, count: int):
|
|
1169
|
+
def assert_access_count(self, key: str, count: int) -> None:
|
|
1170
|
+
__tracebackhide__ = True
|
|
1170
1171
|
keys_accessed = self.get_subkeys_accessed(key)
|
|
1171
1172
|
access_count = self.get_access_count(key)
|
|
1172
1173
|
assert self.get_access_count(key) == count, (
|
anndata/utils.py
CHANGED
|
@@ -216,7 +216,7 @@ except ImportError:
|
|
|
216
216
|
pass
|
|
217
217
|
|
|
218
218
|
|
|
219
|
-
def make_index_unique(index: pd.Index, join: str = "-"):
|
|
219
|
+
def make_index_unique(index: pd.Index[str], join: str = "-") -> pd.Index[str]:
|
|
220
220
|
"""
|
|
221
221
|
Makes the index unique by appending a number string to each duplicate index element:
|
|
222
222
|
'1', '2', etc.
|
|
@@ -235,18 +235,18 @@ def make_index_unique(index: pd.Index, join: str = "-"):
|
|
|
235
235
|
--------
|
|
236
236
|
>>> from anndata import AnnData
|
|
237
237
|
>>> adata = AnnData(np.ones((2, 3)), var=pd.DataFrame(index=["a", "a", "b"]))
|
|
238
|
-
>>> adata.var_names
|
|
239
|
-
Index(['a', 'a', 'b'], dtype='
|
|
238
|
+
>>> adata.var_names.astype("string")
|
|
239
|
+
Index(['a', 'a', 'b'], dtype='string')
|
|
240
240
|
>>> adata.var_names_make_unique()
|
|
241
|
-
>>> adata.var_names
|
|
242
|
-
Index(['a', 'a-1', 'b'], dtype='
|
|
241
|
+
>>> adata.var_names.astype("string")
|
|
242
|
+
Index(['a', 'a-1', 'b'], dtype='string')
|
|
243
243
|
"""
|
|
244
244
|
if index.is_unique:
|
|
245
245
|
return index
|
|
246
246
|
from collections import Counter
|
|
247
247
|
|
|
248
|
-
values = index.
|
|
249
|
-
indices_dup = index.duplicated(keep="first")
|
|
248
|
+
values = index.array.copy()
|
|
249
|
+
indices_dup = index.duplicated(keep="first") & ~index.isna()
|
|
250
250
|
values_dup = values[indices_dup]
|
|
251
251
|
values_set = set(values)
|
|
252
252
|
counter = Counter()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: anndata
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.7
|
|
4
4
|
Summary: Annotated data.
|
|
5
5
|
Project-URL: Documentation, https://anndata.readthedocs.io/
|
|
6
6
|
Project-URL: Source, https://github.com/scverse/anndata
|
|
@@ -31,7 +31,7 @@ Requires-Dist: legacy-api-wrap
|
|
|
31
31
|
Requires-Dist: natsort
|
|
32
32
|
Requires-Dist: numpy>=1.26
|
|
33
33
|
Requires-Dist: packaging>=24.2
|
|
34
|
-
Requires-Dist: pandas!=2.1.2,>=2.1.0
|
|
34
|
+
Requires-Dist: pandas!=2.1.2,<3,>=2.1.0
|
|
35
35
|
Requires-Dist: scipy>=1.12
|
|
36
36
|
Requires-Dist: zarr!=3.0.*,>=2.18.7
|
|
37
37
|
Provides-Extra: cu11
|
|
@@ -57,7 +57,7 @@ Requires-Dist: sphinx-copybutton; extra == 'doc'
|
|
|
57
57
|
Requires-Dist: sphinx-design>=0.5.0; extra == 'doc'
|
|
58
58
|
Requires-Dist: sphinx-issues>=5.0.1; extra == 'doc'
|
|
59
59
|
Requires-Dist: sphinx-toolbox>=3.8.0; extra == 'doc'
|
|
60
|
-
Requires-Dist: sphinx
|
|
60
|
+
Requires-Dist: sphinx<9,>=8.2.1; extra == 'doc'
|
|
61
61
|
Requires-Dist: sphinxext-opengraph; extra == 'doc'
|
|
62
62
|
Requires-Dist: towncrier>=24.8.0; extra == 'doc'
|
|
63
63
|
Provides-Extra: gpu
|
|
@@ -80,12 +80,12 @@ Requires-Dist: loompy>=3.0.5; extra == 'test'
|
|
|
80
80
|
Requires-Dist: matplotlib; extra == 'test'
|
|
81
81
|
Requires-Dist: openpyxl; extra == 'test'
|
|
82
82
|
Requires-Dist: pyarrow; extra == 'test'
|
|
83
|
+
Requires-Dist: pytest; extra == 'test'
|
|
83
84
|
Requires-Dist: pytest-cov; extra == 'test'
|
|
84
85
|
Requires-Dist: pytest-memray; extra == 'test'
|
|
85
86
|
Requires-Dist: pytest-mock; extra == 'test'
|
|
86
87
|
Requires-Dist: pytest-randomly; extra == 'test'
|
|
87
88
|
Requires-Dist: pytest-xdist[psutil]; extra == 'test'
|
|
88
|
-
Requires-Dist: pytest<8.3.4,>=8.2; extra == 'test'
|
|
89
89
|
Requires-Dist: requests; extra == 'test'
|
|
90
90
|
Requires-Dist: scanpy>=1.10; extra == 'test'
|
|
91
91
|
Requires-Dist: scikit-learn; extra == 'test'
|
|
@@ -102,12 +102,12 @@ Requires-Dist: loompy>=3.0.5; extra == 'test-min'
|
|
|
102
102
|
Requires-Dist: matplotlib; extra == 'test-min'
|
|
103
103
|
Requires-Dist: openpyxl; extra == 'test-min'
|
|
104
104
|
Requires-Dist: pyarrow; extra == 'test-min'
|
|
105
|
+
Requires-Dist: pytest; extra == 'test-min'
|
|
105
106
|
Requires-Dist: pytest-cov; extra == 'test-min'
|
|
106
107
|
Requires-Dist: pytest-memray; extra == 'test-min'
|
|
107
108
|
Requires-Dist: pytest-mock; extra == 'test-min'
|
|
108
109
|
Requires-Dist: pytest-randomly; extra == 'test-min'
|
|
109
110
|
Requires-Dist: pytest-xdist[psutil]; extra == 'test-min'
|
|
110
|
-
Requires-Dist: pytest<8.3.4,>=8.2; extra == 'test-min'
|
|
111
111
|
Requires-Dist: scanpy>=1.10; extra == 'test-min'
|
|
112
112
|
Requires-Dist: scikit-learn; extra == 'test-min'
|
|
113
113
|
Description-Content-Type: text/markdown
|
|
@@ -1,57 +1,57 @@
|
|
|
1
1
|
anndata/__init__.py,sha256=daAzY8GGouJxCe30Lcr2pl9Jwo2dcGXHPi7WxnHpuOE,1710
|
|
2
|
-
anndata/_settings.py,sha256=
|
|
2
|
+
anndata/_settings.py,sha256=A5duA5C2-S2vt2O53kWpBpa6PyQi4qfGm24ndudU6fs,17603
|
|
3
3
|
anndata/_settings.pyi,sha256=mJQQ3I66Y3sng8K-_aGjYuo5UoNgC5syw0yuNc1FADU,1643
|
|
4
|
-
anndata/_types.py,sha256=
|
|
4
|
+
anndata/_types.py,sha256=RbSN6dc46J2qDTZ9y9JXrzqfwhoCX5zL1ZPH7wTQyrM,5415
|
|
5
5
|
anndata/_warnings.py,sha256=iFXa9EzPyuPbzRAzoG04oTXAyjnXhQa5zxAMZdsGLwM,702
|
|
6
6
|
anndata/abc.py,sha256=jG64k59ZZ9Hfn-QWt_btZLuF7eGv_YNYwH91WdbR240,1645
|
|
7
7
|
anndata/io.py,sha256=DrIo-FU6qbrdk5aVKoUIBoMttZaO5QWP4bowS9xaebI,698
|
|
8
8
|
anndata/logging.py,sha256=E6nlPl-Em0yBjL5p-EcQFmhHTIUirhnZbfXbQtSVSek,1662
|
|
9
9
|
anndata/types.py,sha256=FF3wDkntl6Jq35l0r_kEET33ljj9L7pmIrUr5-MLAvE,698
|
|
10
10
|
anndata/typing.py,sha256=sRiAg16asjnKyXk1L4BtKWggyHMPLoxXzxTDmX3i7MY,1555
|
|
11
|
-
anndata/utils.py,sha256=
|
|
11
|
+
anndata/utils.py,sha256=25M9B1rv3ZqHv0ZSIWpWjBLZSg4Bn_kv1ZnfRZghSaU,14829
|
|
12
12
|
anndata/_core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
13
|
anndata/_core/access.py,sha256=pts7fGUKgGZANSsu_qAA7L10qHM-jT1zIehbl3441OY,873
|
|
14
|
-
anndata/_core/aligned_df.py,sha256=
|
|
14
|
+
anndata/_core/aligned_df.py,sha256=bM9kkEFURRLeUOUMk90WxVnRC-ZsXGEDx36kDj5gC9I,4278
|
|
15
15
|
anndata/_core/aligned_mapping.py,sha256=BYU1jslMWIhtFTtUMaXY8ZCyt0J4_ZsJTmj6J2yAXTQ,14257
|
|
16
|
-
anndata/_core/anndata.py,sha256=
|
|
16
|
+
anndata/_core/anndata.py,sha256=e_IgHjIpXPQPJqakeH83bl8Quu9T0N--Mb_QHINayv4,79276
|
|
17
17
|
anndata/_core/extensions.py,sha256=9Rsho6qnr3PJHULrYGiZHCBinBZYJK6zyf3cFsl_gBY,10425
|
|
18
18
|
anndata/_core/file_backing.py,sha256=6DhBfLQPDFDpoe6wSgnOFtpC4Hnbh-UgOPbqvYDxm8g,5603
|
|
19
19
|
anndata/_core/index.py,sha256=F3TQBUbWpt09Pb4MpwB7xfCI9uPuv7jrqx8X74CwVDU,13472
|
|
20
|
-
anndata/_core/merge.py,sha256=
|
|
20
|
+
anndata/_core/merge.py,sha256=wFsUotHnQsnFp84UoCp78XMw3zVfvUH5eIi-8hkb7zo,60880
|
|
21
21
|
anndata/_core/raw.py,sha256=x_PwwaDQscVQOFJ38kF7sNQ47LxowpS38h2RQfU5Zwo,7925
|
|
22
|
-
anndata/_core/sparse_dataset.py,sha256=
|
|
22
|
+
anndata/_core/sparse_dataset.py,sha256=R2BeSLiREiwk9FNjdLCR3VfbYatz-7BK0l2F9XqCiTk,27280
|
|
23
23
|
anndata/_core/storage.py,sha256=mHzqp7YBJ-rGQFulMAx__D-Z7y4omHPyb1cP7YxfbFE,2555
|
|
24
|
-
anndata/_core/views.py,sha256
|
|
25
|
-
anndata/_core/xarray.py,sha256=
|
|
24
|
+
anndata/_core/views.py,sha256=-tiUwugw0bRYXzewruhU0xXT7nnDLdYf4CiFByLl34w,15067
|
|
25
|
+
anndata/_core/xarray.py,sha256=0de8K7YjG9mnT-dFSRoxVxgwQktjrGI9n5Yy-1YJSHg,16624
|
|
26
26
|
anndata/_io/__init__.py,sha256=GTNeUZ8d8aA3sK4P33tyljIc60KapLbkqBC6J1y3l9U,346
|
|
27
|
-
anndata/_io/h5ad.py,sha256=
|
|
28
|
-
anndata/_io/read.py,sha256=
|
|
27
|
+
anndata/_io/h5ad.py,sha256=JT5DxTXXibz2jh1mjaQB3_0QYdhJ3gv4IcWLPjKD-dw,13976
|
|
28
|
+
anndata/_io/read.py,sha256=Z0QdFkaaXmGo5a25O9N9Ej2v8U7b9oV9Umw98YtB5uA,15950
|
|
29
29
|
anndata/_io/utils.py,sha256=3Lg27Q0Uo3HYlz980bG2Y02_VFIt0PiXMNIj_o-mgC4,9490
|
|
30
30
|
anndata/_io/write.py,sha256=r55w6yPIIuUSLW9wyYL8GnkzHHQdAxy6xiCEw9cAC38,4811
|
|
31
31
|
anndata/_io/zarr.py,sha256=Z996SZ8LV1Fpa_q8o70vHnBzNLOLlVjhf_Rs5EM_Slo,5461
|
|
32
32
|
anndata/_io/specs/__init__.py,sha256=Z6l8xqa7B480U3pqrNIg4-fhUvpBW85w4xA3i3maAUM,427
|
|
33
|
-
anndata/_io/specs/lazy_methods.py,sha256=
|
|
34
|
-
anndata/_io/specs/methods.py,sha256=
|
|
33
|
+
anndata/_io/specs/lazy_methods.py,sha256=aCdmmYLrOHlMyT18t3sLE2I51YGT-jDna2F3m7b_kv0,13093
|
|
34
|
+
anndata/_io/specs/methods.py,sha256=awmdbUMAP9Xjkid56LAbyWNQfKcCOrkx0BeQ6CDKek4,46422
|
|
35
35
|
anndata/_io/specs/registry.py,sha256=6Z_ffk3uOIagzRPcDCvEoszcgD-U3n8wYnGiPA71ZeI,17539
|
|
36
|
-
anndata/compat/__init__.py,sha256=
|
|
36
|
+
anndata/compat/__init__.py,sha256=9696gHdOUz2yKih9epmT8WGSr6UX0pI8dJYTrqn0SJQ,14968
|
|
37
37
|
anndata/experimental/__init__.py,sha256=polIxriEkby0iEqw-IXkUzp8k0wp92BpYY4zl4BsHH0,1648
|
|
38
|
-
anndata/experimental/_dispatch_io.py,sha256=
|
|
39
|
-
anndata/experimental/merge.py,sha256=
|
|
38
|
+
anndata/experimental/_dispatch_io.py,sha256=gb9JUcgS1cIERjxM1PBpWDXfPkKgMevoLF0QInZfC-g,1858
|
|
39
|
+
anndata/experimental/merge.py,sha256=gWBS4HSkG8E3seIs2AS7jqqFc0Zp6JW94KWtNXApafg,24882
|
|
40
40
|
anndata/experimental/backed/__init__.py,sha256=4dc9M_-_SlfUidDrbWt8PRyD_8bYjypHJ86IpdThHus,230
|
|
41
41
|
anndata/experimental/backed/_compat.py,sha256=rM7CnSJEZCko5wPBFRfvZA9ZKUSpaOVcWFy5u09p1go,519
|
|
42
42
|
anndata/experimental/backed/_io.py,sha256=YM5FL6sKdLyQTHUa43cF0pDNbyj2xD9X7lzUiQesV20,6681
|
|
43
|
-
anndata/experimental/backed/_lazy_arrays.py,sha256=
|
|
43
|
+
anndata/experimental/backed/_lazy_arrays.py,sha256=8vcu7eyoRRlzNXyAzuY0s9CqEZCOAYoZIo-iI5d71_g,7805
|
|
44
44
|
anndata/experimental/multi_files/__init__.py,sha256=T7iNLlRbe-KnLT3o7Tb7_nE4Iy_hLkG66UjBOvj2Bj8,107
|
|
45
|
-
anndata/experimental/multi_files/_anncollection.py,sha256=
|
|
45
|
+
anndata/experimental/multi_files/_anncollection.py,sha256=Ra8A4MzyFWlid5RJd0cc2d4SJeSZ2HXz3odKSqAbChw,35264
|
|
46
46
|
anndata/experimental/pytorch/__init__.py,sha256=4CkgrahLO8Kc-s2bmv6lVQfDxbO3IUyV0v4ygBDkttY,95
|
|
47
47
|
anndata/experimental/pytorch/_annloader.py,sha256=7mpsFV5vBfxKIje1cPjahtDZ5afkU-H663XB4FJhmok,8075
|
|
48
48
|
anndata/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
|
-
anndata/tests/helpers.py,sha256=
|
|
49
|
+
anndata/tests/helpers.py,sha256=BORIeSbcD0R_PDzi1IeR252it-aq6bL8fGN-bDR-Q1I,37689
|
|
50
50
|
testing/anndata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
51
|
testing/anndata/_doctest.py,sha256=Qew0N0zLLNiPKN1CLunqY5cTinFLaEhY5GagiYfm6KI,344
|
|
52
|
-
testing/anndata/_pytest.py,sha256=
|
|
52
|
+
testing/anndata/_pytest.py,sha256=C_R-N2x9NHKZ66YLkvMLWkXQG1WiouOkBnLQpYx_62Q,3994
|
|
53
53
|
testing/anndata/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
54
|
-
anndata-0.12.
|
|
55
|
-
anndata-0.12.
|
|
56
|
-
anndata-0.12.
|
|
57
|
-
anndata-0.12.
|
|
54
|
+
anndata-0.12.7.dist-info/METADATA,sha256=LQgLMW_q9Q4V4k0yBNpYNCPnPSUCxzyubwSDxR_RqTE,9939
|
|
55
|
+
anndata-0.12.7.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
56
|
+
anndata-0.12.7.dist-info/licenses/LICENSE,sha256=VcrXoEVMhtNuvMvKYGP-I5lMT8qZ_6dFf22fsL180qA,1575
|
|
57
|
+
anndata-0.12.7.dist-info/RECORD,,
|
testing/anndata/_pytest.py
CHANGED
|
@@ -16,6 +16,8 @@ from typing import TYPE_CHECKING, cast
|
|
|
16
16
|
|
|
17
17
|
import pytest
|
|
18
18
|
|
|
19
|
+
import anndata
|
|
20
|
+
|
|
19
21
|
if TYPE_CHECKING:
|
|
20
22
|
from collections.abc import Generator, Iterable
|
|
21
23
|
from pathlib import Path
|
|
@@ -23,7 +25,6 @@ if TYPE_CHECKING:
|
|
|
23
25
|
|
|
24
26
|
@pytest.fixture(autouse=True)
|
|
25
27
|
def _anndata_test_env(request: pytest.FixtureRequest) -> None:
|
|
26
|
-
import anndata
|
|
27
28
|
|
|
28
29
|
if isinstance(request.node, pytest.DoctestItem):
|
|
29
30
|
request.getfixturevalue("_doctest_env")
|
|
File without changes
|