anndata 0.12.0rc3__py3-none-any.whl → 0.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anndata/_core/aligned_df.py +1 -1
- anndata/_core/aligned_mapping.py +1 -1
- anndata/_core/anndata.py +1 -1
- anndata/_core/index.py +2 -10
- anndata/_core/merge.py +71 -91
- anndata/_core/sparse_dataset.py +24 -7
- anndata/_core/storage.py +2 -2
- anndata/_core/xarray.py +310 -51
- anndata/_io/h5ad.py +1 -1
- anndata/_io/read.py +5 -1
- anndata/_io/specs/lazy_methods.py +52 -19
- anndata/_io/specs/methods.py +18 -16
- anndata/_io/specs/registry.py +16 -5
- anndata/_io/zarr.py +0 -14
- anndata/_settings.py +5 -2
- anndata/_settings.pyi +49 -0
- anndata/_types.py +7 -0
- anndata/compat/__init__.py +12 -0
- anndata/experimental/__init__.py +9 -1
- anndata/experimental/backed/__init__.py +3 -1
- anndata/experimental/backed/_io.py +2 -7
- anndata/experimental/backed/_lazy_arrays.py +39 -9
- anndata/tests/helpers.py +36 -42
- anndata/utils.py +3 -4
- {anndata-0.12.0rc3.dist-info → anndata-0.12.1.dist-info}/METADATA +11 -35
- anndata-0.12.1.dist-info/RECORD +58 -0
- testing/anndata/_pytest.py +4 -0
- anndata-0.12.0rc3.dist-info/RECORD +0 -57
- {anndata-0.12.0rc3.dist-info → anndata-0.12.1.dist-info}/WHEEL +0 -0
- {anndata-0.12.0rc3.dist-info → anndata-0.12.1.dist-info}/licenses/LICENSE +0 -0
anndata/_core/aligned_df.py
CHANGED
anndata/_core/aligned_mapping.py
CHANGED
|
@@ -79,7 +79,7 @@ class AlignedMappingBase(MutableMapping[str, Value], ABC):
|
|
|
79
79
|
elif isinstance(val, np.ndarray | CupyArray) and len(val.shape) == 1:
|
|
80
80
|
val = val.reshape((val.shape[0], 1))
|
|
81
81
|
elif isinstance(val, XDataset):
|
|
82
|
-
val = Dataset2D(
|
|
82
|
+
val = Dataset2D(val)
|
|
83
83
|
for i, axis in enumerate(self.axes):
|
|
84
84
|
if self.parent.shape[axis] == axis_len(val, i):
|
|
85
85
|
continue
|
anndata/_core/anndata.py
CHANGED
anndata/_core/index.py
CHANGED
|
@@ -198,7 +198,8 @@ def _subset_sparse(a: CSMatrix | CSArray, subset_idx: Index):
|
|
|
198
198
|
|
|
199
199
|
|
|
200
200
|
@_subset.register(pd.DataFrame)
|
|
201
|
-
|
|
201
|
+
@_subset.register(Dataset2D)
|
|
202
|
+
def _subset_df(df: pd.DataFrame | Dataset2D, subset_idx: Index):
|
|
202
203
|
return df.iloc[subset_idx]
|
|
203
204
|
|
|
204
205
|
|
|
@@ -209,15 +210,6 @@ def _subset_awkarray(a: AwkArray, subset_idx: Index):
|
|
|
209
210
|
return a[subset_idx]
|
|
210
211
|
|
|
211
212
|
|
|
212
|
-
@_subset.register(Dataset2D)
|
|
213
|
-
def _(a: Dataset2D, subset_idx: Index):
|
|
214
|
-
key = a.index_dim
|
|
215
|
-
# xarray seems to have some code looking for a second entry in tuples
|
|
216
|
-
if isinstance(subset_idx, tuple) and len(subset_idx) == 1:
|
|
217
|
-
subset_idx = subset_idx[0]
|
|
218
|
-
return a.isel(**{key: subset_idx})
|
|
219
|
-
|
|
220
|
-
|
|
221
213
|
# Registration for SparseDataset occurs in sparse_dataset.py
|
|
222
214
|
@_subset.register(h5py.Dataset)
|
|
223
215
|
def _subset_dataset(d, subset_idx):
|
anndata/_core/merge.py
CHANGED
|
@@ -17,7 +17,6 @@ import pandas as pd
|
|
|
17
17
|
import scipy
|
|
18
18
|
from natsort import natsorted
|
|
19
19
|
from packaging.version import Version
|
|
20
|
-
from pandas.api.types import is_extension_array_dtype
|
|
21
20
|
from scipy import sparse
|
|
22
21
|
|
|
23
22
|
from anndata._core.file_backing import to_memory
|
|
@@ -46,7 +45,7 @@ if TYPE_CHECKING:
|
|
|
46
45
|
|
|
47
46
|
from anndata._types import Join_T
|
|
48
47
|
|
|
49
|
-
from ..compat import XDataArray
|
|
48
|
+
from ..compat import XDataArray, XDataset
|
|
50
49
|
|
|
51
50
|
T = TypeVar("T")
|
|
52
51
|
|
|
@@ -130,6 +129,8 @@ def equal(a, b) -> bool:
|
|
|
130
129
|
|
|
131
130
|
|
|
132
131
|
@equal.register(pd.DataFrame)
|
|
132
|
+
@equal.register(Dataset2D)
|
|
133
|
+
@equal.register(pd.Series)
|
|
133
134
|
def equal_dataframe(a, b) -> bool:
|
|
134
135
|
return a.equals(b)
|
|
135
136
|
|
|
@@ -170,11 +171,6 @@ def equal_cupyarray(a, b) -> bool:
|
|
|
170
171
|
return bool(cp.array_equal(a, b, equal_nan=True))
|
|
171
172
|
|
|
172
173
|
|
|
173
|
-
@equal.register(pd.Series)
|
|
174
|
-
def equal_series(a, b) -> bool:
|
|
175
|
-
return a.equals(b)
|
|
176
|
-
|
|
177
|
-
|
|
178
174
|
@equal.register(CSMatrix)
|
|
179
175
|
@equal.register(CSArray)
|
|
180
176
|
@equal.register(CupySparseMatrix)
|
|
@@ -189,6 +185,15 @@ def equal_sparse(a, b) -> bool:
|
|
|
189
185
|
# Comparison broken for CSC matrices
|
|
190
186
|
# https://github.com/cupy/cupy/issues/7757
|
|
191
187
|
a, b = CupyCSRMatrix(a), CupyCSRMatrix(b)
|
|
188
|
+
if Version(scipy.__version__) >= Version("1.16.0rc1"):
|
|
189
|
+
# TODO: https://github.com/scipy/scipy/issues/23068
|
|
190
|
+
return bool(
|
|
191
|
+
a.format == b.format
|
|
192
|
+
and (a.shape == b.shape)
|
|
193
|
+
and np.all(a.indptr == b.indptr)
|
|
194
|
+
and np.all(a.indices == b.indices)
|
|
195
|
+
and np.all((a.data == b.data) | (np.isnan(a.data) & np.isnan(b.data)))
|
|
196
|
+
)
|
|
192
197
|
comp = a != b
|
|
193
198
|
if isinstance(comp, bool):
|
|
194
199
|
return not comp
|
|
@@ -215,11 +220,6 @@ def equal_awkward(a, b) -> bool:
|
|
|
215
220
|
return ak.almost_equal(a, b)
|
|
216
221
|
|
|
217
222
|
|
|
218
|
-
@equal.register(Dataset2D)
|
|
219
|
-
def equal_dataset2d(a, b) -> bool:
|
|
220
|
-
return a.equals(b)
|
|
221
|
-
|
|
222
|
-
|
|
223
223
|
def as_sparse(x, *, use_sparse_array: bool = False) -> CSMatrix | CSArray:
|
|
224
224
|
if not isinstance(x, CSMatrix | CSArray):
|
|
225
225
|
in_memory_array_class = (
|
|
@@ -584,8 +584,8 @@ class Reindexer:
|
|
|
584
584
|
"""
|
|
585
585
|
if self.no_change and (axis_len(el, axis) == len(self.old_idx)):
|
|
586
586
|
return el
|
|
587
|
-
if isinstance(el, pd.DataFrame):
|
|
588
|
-
return self.
|
|
587
|
+
if isinstance(el, pd.DataFrame | Dataset2D):
|
|
588
|
+
return self._apply_to_df_like(el, axis=axis, fill_value=fill_value)
|
|
589
589
|
elif isinstance(el, CSMatrix | CSArray | CupySparseMatrix):
|
|
590
590
|
return self._apply_to_sparse(el, axis=axis, fill_value=fill_value)
|
|
591
591
|
elif isinstance(el, AwkArray):
|
|
@@ -594,12 +594,10 @@ class Reindexer:
|
|
|
594
594
|
return self._apply_to_dask_array(el, axis=axis, fill_value=fill_value)
|
|
595
595
|
elif isinstance(el, CupyArray):
|
|
596
596
|
return self._apply_to_cupy_array(el, axis=axis, fill_value=fill_value)
|
|
597
|
-
elif isinstance(el, Dataset2D):
|
|
598
|
-
return self._apply_to_dataset2d(el, axis=axis, fill_value=fill_value)
|
|
599
597
|
else:
|
|
600
598
|
return self._apply_to_array(el, axis=axis, fill_value=fill_value)
|
|
601
599
|
|
|
602
|
-
def
|
|
600
|
+
def _apply_to_df_like(self, el: pd.DataFrame | Dataset2D, *, axis, fill_value=None):
|
|
603
601
|
if fill_value is None:
|
|
604
602
|
fill_value = np.nan
|
|
605
603
|
return el.reindex(self.new_idx, axis=axis, fill_value=fill_value)
|
|
@@ -758,31 +756,6 @@ class Reindexer:
|
|
|
758
756
|
el = ak.pad_none(el, 1, axis=axis) # axis == 0
|
|
759
757
|
return el[self.idx]
|
|
760
758
|
|
|
761
|
-
def _apply_to_dataset2d(self, el: Dataset2D, *, axis, fill_value=None):
|
|
762
|
-
if fill_value is None:
|
|
763
|
-
fill_value = np.nan
|
|
764
|
-
index_dim = el.index_dim
|
|
765
|
-
if axis == 0:
|
|
766
|
-
# Dataset.reindex() can't handle ExtensionArrays
|
|
767
|
-
extension_arrays = {
|
|
768
|
-
col: arr for col, arr in el.items() if is_extension_array_dtype(arr)
|
|
769
|
-
}
|
|
770
|
-
el = el.drop_vars(extension_arrays.keys())
|
|
771
|
-
el = el.reindex(
|
|
772
|
-
{index_dim: self.new_idx}, method=None, fill_value=fill_value
|
|
773
|
-
)
|
|
774
|
-
for col, arr in extension_arrays.items():
|
|
775
|
-
el[col] = (
|
|
776
|
-
index_dim,
|
|
777
|
-
pd.Series(arr, index=self.old_idx).reindex(
|
|
778
|
-
self.new_idx, fill_value=fill_value
|
|
779
|
-
),
|
|
780
|
-
)
|
|
781
|
-
return el
|
|
782
|
-
else:
|
|
783
|
-
msg = "This should be unreachable, please open an issue."
|
|
784
|
-
raise Exception(msg)
|
|
785
|
-
|
|
786
759
|
@property
|
|
787
760
|
def idx(self):
|
|
788
761
|
return self.old_idx.get_indexer(self.new_idx)
|
|
@@ -931,12 +904,6 @@ def concat_arrays( # noqa: PLR0911, PLR0912
|
|
|
931
904
|
],
|
|
932
905
|
format="csr",
|
|
933
906
|
)
|
|
934
|
-
scipy_version = Version(scipy.__version__)
|
|
935
|
-
# Bug where xstack produces a matrix not an array in 1.11.*
|
|
936
|
-
if use_sparse_array and (scipy_version.major, scipy_version.minor) == (1, 11):
|
|
937
|
-
if mat.format == "csc":
|
|
938
|
-
return sparse.csc_array(mat)
|
|
939
|
-
return sparse.csr_array(mat)
|
|
940
907
|
return mat
|
|
941
908
|
else:
|
|
942
909
|
return np.concatenate(
|
|
@@ -1294,7 +1261,7 @@ def make_dask_col_from_extension_dtype(
|
|
|
1294
1261
|
|
|
1295
1262
|
def make_xarray_extension_dtypes_dask(
|
|
1296
1263
|
annotations: Iterable[Dataset2D], *, use_only_object_dtype: bool = False
|
|
1297
|
-
) -> Generator[
|
|
1264
|
+
) -> Generator[XDataset, None, None]:
|
|
1298
1265
|
"""
|
|
1299
1266
|
Creates a generator of Dataset2D objects with dask arrays in place of :class:`pandas.api.extensions.ExtensionArray` dtype columns.
|
|
1300
1267
|
|
|
@@ -1323,7 +1290,7 @@ def make_xarray_extension_dtypes_dask(
|
|
|
1323
1290
|
if name in extension_cols
|
|
1324
1291
|
else col
|
|
1325
1292
|
)
|
|
1326
|
-
for name, col in a.
|
|
1293
|
+
for name, col in a._items()
|
|
1327
1294
|
}
|
|
1328
1295
|
)
|
|
1329
1296
|
|
|
@@ -1336,30 +1303,26 @@ def concat_dataset2d_on_annot_axis(
|
|
|
1336
1303
|
join: Join_T,
|
|
1337
1304
|
*,
|
|
1338
1305
|
force_lazy: bool,
|
|
1339
|
-
|
|
1340
|
-
label_col: pd.Categorical | None = None,
|
|
1306
|
+
concat_indices: pd.Index | None = None,
|
|
1341
1307
|
) -> Dataset2D:
|
|
1342
|
-
"""Create a concatenate dataset from a list of :class:`~anndata.
|
|
1308
|
+
"""Create a concatenate dataset from a list of :class:`~anndata.experimental.backed.Dataset2D` objects.
|
|
1343
1309
|
The goal of this function is to mimic `pd.concat(..., ignore_index=True)` so has some complicated logic
|
|
1344
1310
|
for handling the "index" to ensure (a) nothing is loaded into memory and (b) the true index is always tracked.
|
|
1345
1311
|
|
|
1346
1312
|
Parameters
|
|
1347
1313
|
----------
|
|
1348
1314
|
annotations
|
|
1349
|
-
The :class:`~anndata.
|
|
1315
|
+
The :class:`~anndata.experimental.backed.Dataset2D` objects to be concatenated.
|
|
1350
1316
|
join
|
|
1351
1317
|
Type of join operation
|
|
1352
1318
|
force_lazy
|
|
1353
1319
|
Whether to lazily concatenate elements using dask even when eager concatenation is possible.
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
If it's None, no column is added.
|
|
1357
|
-
label_col
|
|
1358
|
-
The bath information annotation.
|
|
1320
|
+
concat_indices
|
|
1321
|
+
Already calculated indices to be used as the index on the concatenated object.
|
|
1359
1322
|
|
|
1360
1323
|
Returns
|
|
1361
1324
|
-------
|
|
1362
|
-
Concatenated :class:`~anndata.
|
|
1325
|
+
Concatenated :class:`~anndata.experimental.backed.Dataset2D`
|
|
1363
1326
|
"""
|
|
1364
1327
|
from anndata._core.xarray import Dataset2D
|
|
1365
1328
|
from anndata._io.specs.lazy_methods import DUMMY_RANGE_INDEX_KEY
|
|
@@ -1375,49 +1338,65 @@ def concat_dataset2d_on_annot_axis(
|
|
|
1375
1338
|
old_key = a.index_dim
|
|
1376
1339
|
is_fake_index = old_key != a.true_index_dim
|
|
1377
1340
|
# First create a dummy index
|
|
1378
|
-
a.coords[DS_CONCAT_DUMMY_INDEX_NAME] = (
|
|
1341
|
+
a.ds.coords[DS_CONCAT_DUMMY_INDEX_NAME] = (
|
|
1379
1342
|
old_key,
|
|
1380
1343
|
pd.RangeIndex(a.shape[0]),
|
|
1381
1344
|
)
|
|
1382
1345
|
# Set all the dimensions to this new dummy index
|
|
1383
|
-
|
|
1346
|
+
ds_swapped = a.ds.swap_dims({old_key: DS_CONCAT_DUMMY_INDEX_NAME})
|
|
1384
1347
|
# Move the old coordinate into a variable
|
|
1385
|
-
old_coord =
|
|
1386
|
-
del
|
|
1387
|
-
|
|
1348
|
+
old_coord = ds_swapped.coords[old_key]
|
|
1349
|
+
del ds_swapped.coords[old_key]
|
|
1350
|
+
ds_swapped[old_key] = old_coord
|
|
1351
|
+
a = Dataset2D(ds_swapped)
|
|
1388
1352
|
if not is_fake_index:
|
|
1389
1353
|
a.true_index_dim = old_key
|
|
1390
1354
|
annotations_re_indexed.append(a)
|
|
1391
1355
|
# Concat along the dummy index
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
ds.coords[DS_CONCAT_DUMMY_INDEX_NAME] = pd.RangeIndex(
|
|
1397
|
-
ds.coords[DS_CONCAT_DUMMY_INDEX_NAME].shape[0]
|
|
1356
|
+
ds_concat = xr.concat(
|
|
1357
|
+
[a.ds for a in annotations_re_indexed],
|
|
1358
|
+
join=join,
|
|
1359
|
+
dim=DS_CONCAT_DUMMY_INDEX_NAME,
|
|
1398
1360
|
)
|
|
1361
|
+
ds_concat.attrs.pop("indexing_key", None)
|
|
1362
|
+
# Wrapping allows us to use the Dataset2D methods
|
|
1363
|
+
# directly for setting certain attrs/coords without duplicating here.
|
|
1364
|
+
ds_concat_2d = Dataset2D(ds_concat)
|
|
1365
|
+
ds_concat_2d.is_backed = have_backed
|
|
1366
|
+
if concat_indices is not None:
|
|
1367
|
+
concat_indices.name = DS_CONCAT_DUMMY_INDEX_NAME
|
|
1368
|
+
ds_concat_2d.index = concat_indices
|
|
1369
|
+
ds_concat = ds_concat_2d.ds
|
|
1370
|
+
else:
|
|
1371
|
+
ds_concat.coords[DS_CONCAT_DUMMY_INDEX_NAME] = pd.RangeIndex(
|
|
1372
|
+
ds_concat.coords[DS_CONCAT_DUMMY_INDEX_NAME].shape[0]
|
|
1373
|
+
)
|
|
1399
1374
|
# Drop any lingering dimensions (swap doesn't delete)
|
|
1400
|
-
|
|
1375
|
+
ds_concat = ds_concat.drop_dims(
|
|
1376
|
+
d for d in ds_concat.dims if d != DS_CONCAT_DUMMY_INDEX_NAME
|
|
1377
|
+
)
|
|
1401
1378
|
# Create a new true index and then delete the columns resulting from the concatenation for each index.
|
|
1402
1379
|
# This includes the dummy column (which is neither a dimension nor a true indexing column)
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1380
|
+
if concat_indices is None:
|
|
1381
|
+
index = xr.concat(
|
|
1382
|
+
[a.true_xr_index for a in annotations_re_indexed],
|
|
1383
|
+
dim=DS_CONCAT_DUMMY_INDEX_NAME,
|
|
1384
|
+
)
|
|
1385
|
+
# prevent duplicate values
|
|
1386
|
+
index.coords[DS_CONCAT_DUMMY_INDEX_NAME] = ds_concat.coords[
|
|
1387
|
+
DS_CONCAT_DUMMY_INDEX_NAME
|
|
1388
|
+
]
|
|
1389
|
+
ds_concat.coords[DS_CONCAT_DUMMY_INDEX_NAME] = index
|
|
1410
1390
|
for key in {
|
|
1411
1391
|
true_index
|
|
1412
1392
|
for a in annotations_re_indexed
|
|
1413
1393
|
if (true_index := a.true_index_dim) != a.index_dim
|
|
1414
1394
|
}:
|
|
1415
|
-
del
|
|
1416
|
-
if DUMMY_RANGE_INDEX_KEY in
|
|
1417
|
-
del
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
return ds
|
|
1395
|
+
del ds_concat[key]
|
|
1396
|
+
if DUMMY_RANGE_INDEX_KEY in ds_concat:
|
|
1397
|
+
del ds_concat[DUMMY_RANGE_INDEX_KEY]
|
|
1398
|
+
ds_concat_2d = Dataset2D(ds_concat)
|
|
1399
|
+
return ds_concat_2d
|
|
1421
1400
|
|
|
1422
1401
|
|
|
1423
1402
|
def concat( # noqa: PLR0912, PLR0913, PLR0915
|
|
@@ -1691,14 +1670,15 @@ def concat( # noqa: PLR0912, PLR0913, PLR0915
|
|
|
1691
1670
|
ignore_index=True,
|
|
1692
1671
|
)
|
|
1693
1672
|
concat_annot.index = concat_indices
|
|
1694
|
-
if label is not None:
|
|
1695
|
-
concat_annot[label] = label_col
|
|
1696
1673
|
else:
|
|
1697
1674
|
concat_annot = concat_dataset2d_on_annot_axis(
|
|
1698
|
-
annotations,
|
|
1675
|
+
annotations,
|
|
1676
|
+
join,
|
|
1677
|
+
force_lazy=force_lazy,
|
|
1678
|
+
concat_indices=concat_indices,
|
|
1699
1679
|
)
|
|
1700
|
-
|
|
1701
|
-
concat_annot
|
|
1680
|
+
if label is not None:
|
|
1681
|
+
concat_annot[label] = label_col
|
|
1702
1682
|
|
|
1703
1683
|
# Annotation for other axis
|
|
1704
1684
|
alt_annotations = [getattr(a, alt_axis_name) for a in adatas]
|
|
@@ -1720,7 +1700,7 @@ def concat( # noqa: PLR0912, PLR0913, PLR0915
|
|
|
1720
1700
|
)
|
|
1721
1701
|
)
|
|
1722
1702
|
annotations_with_only_dask = [
|
|
1723
|
-
a.rename({a.true_index_dim: "merge_index"})
|
|
1703
|
+
a.ds.rename({a.true_index_dim: "merge_index"})
|
|
1724
1704
|
for a in annotations_with_only_dask
|
|
1725
1705
|
]
|
|
1726
1706
|
alt_annot = Dataset2D(
|
anndata/_core/sparse_dataset.py
CHANGED
|
@@ -165,7 +165,11 @@ class BackedSparseMatrix(_cs_matrix):
|
|
|
165
165
|
def _get_contiguous_compressed_slice(
|
|
166
166
|
self, s: slice
|
|
167
167
|
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
|
168
|
-
new_indptr = self.indptr[s.start : s.stop + 1]
|
|
168
|
+
new_indptr = self.indptr[s.start : s.stop + 1]
|
|
169
|
+
# If indptr is cached, we need to make a copy of the subset
|
|
170
|
+
# so as not to alter the underlying cached data.
|
|
171
|
+
if isinstance(self.indptr, np.ndarray):
|
|
172
|
+
new_indptr = new_indptr.copy()
|
|
169
173
|
|
|
170
174
|
start = new_indptr[0]
|
|
171
175
|
stop = new_indptr[-1]
|
|
@@ -395,10 +399,12 @@ def validate_indices(
|
|
|
395
399
|
|
|
396
400
|
class BaseCompressedSparseDataset(abc._AbstractCSDataset, ABC):
|
|
397
401
|
_group: GroupStorageType
|
|
402
|
+
_should_cache_indptr: bool
|
|
398
403
|
|
|
399
|
-
def __init__(self, group: GroupStorageType):
|
|
404
|
+
def __init__(self, group: GroupStorageType, *, should_cache_indptr: bool = True):
|
|
400
405
|
type(self)._check_group_format(group)
|
|
401
406
|
self._group = group
|
|
407
|
+
self._should_cache_indptr = should_cache_indptr
|
|
402
408
|
|
|
403
409
|
@property
|
|
404
410
|
def group(self) -> GroupStorageType:
|
|
@@ -616,8 +622,9 @@ class BaseCompressedSparseDataset(abc._AbstractCSDataset, ABC):
|
|
|
616
622
|
|
|
617
623
|
It should therefore fit into memory, so we cache it for faster access.
|
|
618
624
|
"""
|
|
619
|
-
|
|
620
|
-
|
|
625
|
+
if self._should_cache_indptr:
|
|
626
|
+
return self.group["indptr"][...]
|
|
627
|
+
return self.group["indptr"]
|
|
621
628
|
|
|
622
629
|
@cached_property
|
|
623
630
|
def _indices(self) -> H5Array | ZarrArray:
|
|
@@ -660,13 +667,23 @@ class _CSCDataset(BaseCompressedSparseDataset, abc.CSCDataset):
|
|
|
660
667
|
"""Internal concrete version of :class:`anndata.abc.CSRDataset`."""
|
|
661
668
|
|
|
662
669
|
|
|
663
|
-
def sparse_dataset(
|
|
670
|
+
def sparse_dataset(
|
|
671
|
+
group: GroupStorageType,
|
|
672
|
+
*,
|
|
673
|
+
should_cache_indptr: bool = True,
|
|
674
|
+
) -> abc.CSRDataset | abc.CSCDataset:
|
|
664
675
|
"""Generates a backed mode-compatible sparse dataset class.
|
|
665
676
|
|
|
666
677
|
Parameters
|
|
667
678
|
----------
|
|
668
679
|
group
|
|
669
680
|
The backing group store.
|
|
681
|
+
should_cache_indptr
|
|
682
|
+
Whether or not to cache the indptr for repeated reuse as a :class:`numpy.ndarray`.
|
|
683
|
+
The default is `True` but one might set it to false if the dataset is repeatedly reopened
|
|
684
|
+
using this command, and then only a subset is read in before closing again.
|
|
685
|
+
See https://github.com/scverse/anndata/blob/3c489b979086c39c59d3eb5dad90ebacce3b9a80/src/anndata/_io/specs/lazy_methods.py#L85-L95
|
|
686
|
+
for the target use-case.
|
|
670
687
|
|
|
671
688
|
Returns
|
|
672
689
|
-------
|
|
@@ -713,9 +730,9 @@ def sparse_dataset(group: GroupStorageType) -> abc.CSRDataset | abc.CSCDataset:
|
|
|
713
730
|
"""
|
|
714
731
|
encoding_type = _get_group_format(group)
|
|
715
732
|
if encoding_type == "csr":
|
|
716
|
-
return _CSRDataset(group)
|
|
733
|
+
return _CSRDataset(group, should_cache_indptr=should_cache_indptr)
|
|
717
734
|
elif encoding_type == "csc":
|
|
718
|
-
return _CSCDataset(group)
|
|
735
|
+
return _CSCDataset(group, should_cache_indptr=should_cache_indptr)
|
|
719
736
|
msg = f"Unknown encoding type {encoding_type}"
|
|
720
737
|
raise ValueError(msg)
|
|
721
738
|
|
anndata/_core/storage.py
CHANGED
|
@@ -37,8 +37,8 @@ def coerce_array(
|
|
|
37
37
|
return value
|
|
38
38
|
# If value is one of the allowed types, return it
|
|
39
39
|
array_data_structure_types = get_args(ArrayDataStructureTypes)
|
|
40
|
-
if isinstance(value, XDataset)
|
|
41
|
-
value = Dataset2D(value
|
|
40
|
+
if isinstance(value, XDataset):
|
|
41
|
+
value = Dataset2D(value)
|
|
42
42
|
if isinstance(value, (*array_data_structure_types, Dataset2D)):
|
|
43
43
|
if isinstance(value, np.matrix):
|
|
44
44
|
msg = f"{name} should not be a np.matrix, use np.ndarray instead."
|