lamindb 0.74.3__py3-none-any.whl → 0.75.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,735 @@
1
+ from __future__ import annotations
2
+
3
+ import inspect
4
+ from functools import cached_property
5
+ from itertools import chain
6
+ from typing import TYPE_CHECKING, Callable, Literal, Mapping, Union
7
+
8
+ import h5py
9
+ import numpy as np
10
+ import pandas as pd
11
+ from anndata import AnnData
12
+ from anndata import __version__ as anndata_version
13
+ from anndata._core.index import Index, _normalize_indices
14
+ from anndata._core.views import _resolve_idx
15
+ from anndata._io.h5ad import read_dataframe_legacy as read_dataframe_legacy_h5
16
+ from anndata._io.specs.registry import get_spec, read_elem, read_elem_partial
17
+ from anndata.compat import _read_attr
18
+ from fsspec.implementations.local import LocalFileSystem
19
+ from lamin_utils import logger
20
+ from lamindb_setup.core.upath import UPath, create_mapper, infer_filesystem
21
+ from packaging import version
22
+
23
+ if TYPE_CHECKING:
24
+ from pathlib import Path
25
+
26
+ from fsspec.core import OpenFile
27
+ from lamindb_setup.core.types import UPathStr
28
+
29
+
30
+ anndata_version_parse = version.parse(anndata_version)
31
+
32
+ if anndata_version_parse < version.parse("0.10.0"):
33
+ if anndata_version_parse < version.parse("0.9.1"):
34
+ logger.warning(
35
+ "Full backed capabilities are not available for this version of anndata,"
36
+ " please install anndata>=0.9.1."
37
+ )
38
+
39
+ from anndata._core.sparse_dataset import SparseDataset
40
+
41
+ # try csr for groups with no encoding_type
42
+ class CSRDataset(SparseDataset):
43
+ @property
44
+ def format_str(self) -> str:
45
+ return "csr"
46
+
47
+ def sparse_dataset(group):
48
+ return SparseDataset(group)
49
+
50
+ else:
51
+ from anndata._core.sparse_dataset import (
52
+ BaseCompressedSparseDataset as SparseDataset,
53
+ )
54
+ from anndata._core.sparse_dataset import ( # type: ignore
55
+ CSRDataset,
56
+ sparse_dataset,
57
+ )
58
+
59
+ def _check_group_format(*args):
60
+ pass
61
+
62
+ CSRDataset._check_group_format = _check_group_format
63
+
64
+
65
+ # zarr and CSRDataset have problems with full selection
66
+ def _subset_sparse(sparse_ds: CSRDataset | SparseDataset, indices):
67
+ has_arrays = isinstance(indices[0], np.ndarray) or isinstance(
68
+ indices[1], np.ndarray
69
+ )
70
+ if not has_arrays and indices == (slice(None), slice(None)):
71
+ return sparse_ds.to_memory()
72
+ else:
73
+ return sparse_ds[indices]
74
+
75
+
76
+ def get_module_name(obj):
77
+ return inspect.getmodule(obj).__name__.partition(".")[0]
78
+
79
+
80
+ def _records_to_df(obj):
81
+ if isinstance(obj, pd.DataFrame):
82
+ return obj
83
+
84
+ if hasattr(obj, "dtype") and obj.dtype.names is not None:
85
+ formats = []
86
+ for name, (dt, _) in obj.dtype.fields.items():
87
+ if dt.char == "S":
88
+ new_dt = str(dt).replace("S", "U")
89
+ else:
90
+ new_dt = dt
91
+ formats.append((name, new_dt))
92
+ df = pd.DataFrame(obj.astype(formats, copy=False))
93
+ for index_name in ("index", "_index"):
94
+ if index_name in df.columns:
95
+ return df.set_index(index_name)
96
+ return df
97
+ else:
98
+ return obj
99
+
100
+
101
+ class AccessRegistry:
102
+ def __init__(self):
103
+ self._registry = {}
104
+ self._openers = {}
105
+
106
+ def register_open(self, module: str):
107
+ def wrapper(func: Callable):
108
+ self._openers[module] = func
109
+ return func
110
+
111
+ return wrapper
112
+
113
+ def open(self, module: str, *args, **kwargs):
114
+ if module in self._openers:
115
+ return self._openers[module](*args, **kwargs)
116
+ else:
117
+ raise ValueError(f"Module {module} not found, please install it.")
118
+
119
+ def register(self, module: str):
120
+ def wrapper(func: Callable):
121
+ func_name = func.__name__
122
+ if func_name not in self._registry:
123
+ self._registry[func_name] = {}
124
+ self._registry[func_name][module] = func
125
+ return func
126
+
127
+ return wrapper
128
+
129
+ def __getattr__(self, func_name: str):
130
+ def wrapper(*args, **kwargs):
131
+ func_registry = self._registry[func_name]
132
+ for arg in chain(args, kwargs.values()):
133
+ arg_module = get_module_name(arg)
134
+ if arg_module in func_registry:
135
+ return func_registry[arg_module](*args, **kwargs)
136
+ raise ValueError(f"{func_name} is not registered for this module.")
137
+
138
+ return wrapper
139
+
140
+
141
+ # storage specific functions should be registered and called through the registry
142
+ registry = AccessRegistry()
143
+
144
+
145
+ @registry.register_open("h5py")
146
+ def open(filepath: UPathStr, mode: str = "r"):
147
+ fs, file_path_str = infer_filesystem(filepath)
148
+ if isinstance(fs, LocalFileSystem):
149
+ assert mode in {"r", "r+", "a", "w", "w-"}, f"Unknown mode {mode}!" # noqa: S101
150
+ return None, h5py.File(file_path_str, mode=mode)
151
+ if mode == "r":
152
+ conn_mode = "rb"
153
+ elif mode == "w":
154
+ conn_mode = "wb"
155
+ elif mode == "a":
156
+ conn_mode = "ab"
157
+ else:
158
+ raise ValueError(f"Unknown mode {mode}! Should be 'r', 'w' or 'a'.")
159
+ conn = fs.open(file_path_str, mode=conn_mode)
160
+ try:
161
+ storage = h5py.File(conn, mode=mode)
162
+ except Exception as e:
163
+ conn.close()
164
+ raise e
165
+ return conn, storage
166
+
167
+
168
+ @registry.register("h5py")
169
+ def read_dataframe(elem: h5py.Dataset | h5py.Group):
170
+ if isinstance(elem, h5py.Dataset):
171
+ return read_dataframe_legacy_h5(elem)
172
+ else:
173
+ return read_elem(elem)
174
+
175
+
176
+ @registry.register("h5py")
177
+ def safer_read_partial(elem, indices):
178
+ is_dataset = isinstance(elem, h5py.Dataset)
179
+ indices_inverse: list | None = None
180
+ encoding_type = get_spec(elem).encoding_type
181
+ # h5py selection for datasets requires sorted indices
182
+ if is_dataset or encoding_type == "dataframe":
183
+ indices_increasing = []
184
+ indices_inverse = []
185
+ for indices_dim in indices:
186
+ # should be integer or bool
187
+ # ignore bool or increasing unique integers
188
+ if (
189
+ isinstance(indices_dim, np.ndarray)
190
+ and indices_dim.dtype != "bool"
191
+ and not np.all(np.diff(indices_dim) > 0)
192
+ ):
193
+ idx_unique, idx_inverse = np.unique(indices_dim, return_inverse=True)
194
+ indices_increasing.append(idx_unique)
195
+ indices_inverse.append(idx_inverse)
196
+ else:
197
+ indices_increasing.append(indices_dim)
198
+ indices_inverse.append(None)
199
+ indices = tuple(indices_increasing)
200
+ if all(idx is None for idx in indices_inverse):
201
+ indices_inverse = None
202
+ result = None
203
+ if encoding_type == "":
204
+ if is_dataset:
205
+ dims = len(elem.shape)
206
+ if dims == 2:
207
+ result = elem[indices]
208
+ elif dims == 1:
209
+ if indices[0] == slice(None):
210
+ result = elem[indices[1]]
211
+ elif indices[1] == slice(None):
212
+ result = elem[indices[0]]
213
+ elif isinstance(elem, h5py.Group):
214
+ try:
215
+ ds = CSRDataset(elem)
216
+ result = _subset_sparse(ds, indices)
217
+ except Exception as e:
218
+ logger.debug(
219
+ f"Encountered an exception while attempting to subset a sparse dataset by indices.\n{e}"
220
+ )
221
+ if result is None:
222
+ raise ValueError(
223
+ "Can not get a subset of the element of type"
224
+ f" {type(elem).__name__} with an empty spec."
225
+ )
226
+ else:
227
+ result = read_elem_partial(elem, indices=indices)
228
+ if indices_inverse is None:
229
+ return result
230
+ else:
231
+ if indices_inverse[0] is None:
232
+ if len(result.shape) == 2:
233
+ return result[:, indices_inverse[1]]
234
+ else:
235
+ return result[indices_inverse[1]]
236
+ elif indices_inverse[1] is None:
237
+ if isinstance(result, pd.DataFrame):
238
+ return result.iloc[indices_inverse[0]]
239
+ else:
240
+ return result[indices_inverse[0]]
241
+ else:
242
+ return result[tuple(indices_inverse)]
243
+
244
+
245
+ @registry.register("h5py")
246
+ def keys(storage: h5py.File):
247
+ attrs_keys: dict[str, list] = {}
248
+ for attr in storage.keys():
249
+ if attr == "X":
250
+ continue
251
+ attr_obj = storage[attr]
252
+ if attr in ("obs", "var") and isinstance(attr_obj, h5py.Dataset):
253
+ keys = list(attr_obj.dtype.fields.keys())
254
+ else:
255
+ keys = list(attr_obj.keys())
256
+ if len(keys) > 0:
257
+ attrs_keys[attr] = keys
258
+ return attrs_keys
259
+
260
+
261
+ ArrayTypes = [h5py.Dataset]
262
+ GroupTypes = [h5py.Group]
263
+ StorageTypes = [h5py.File]
264
+
265
+
266
+ ZARR_INSTALLED = False
267
+ try:
268
+ import zarr
269
+
270
+ ZARR_INSTALLED = True
271
+ except ImportError:
272
+ pass
273
+
274
+ if ZARR_INSTALLED:
275
+ from anndata._io.zarr import read_dataframe_legacy as read_dataframe_legacy_zarr
276
+
277
+ ArrayTypes.append(zarr.Array)
278
+ GroupTypes.append(zarr.Group)
279
+ StorageTypes.append(zarr.Group)
280
+
281
+ @registry.register_open("zarr")
282
+ def open(filepath: UPathStr, mode: Literal["r", "r+", "a", "w", "w-"] = "r"):
283
+ assert mode in {"r", "r+", "a", "w", "w-"}, f"Unknown mode {mode}!" # noqa: S101
284
+
285
+ fs, file_path_str = infer_filesystem(filepath)
286
+ conn = None
287
+ if isinstance(fs, LocalFileSystem):
288
+ # this is faster than through an fsspec mapper for local
289
+ open_obj = file_path_str
290
+ else:
291
+ open_obj = create_mapper(fs, file_path_str, check=True)
292
+ storage = zarr.open(open_obj, mode=mode)
293
+ return conn, storage
294
+
295
+ @registry.register("zarr")
296
+ def read_dataframe(elem: Union[zarr.Array, zarr.Group]): # noqa
297
+ if isinstance(elem, zarr.Array):
298
+ return read_dataframe_legacy_zarr(elem)
299
+ else:
300
+ return read_elem(elem)
301
+
302
+ @registry.register("zarr")
303
+ def safer_read_partial(elem, indices):
304
+ encoding_type = get_spec(elem).encoding_type
305
+ if encoding_type == "":
306
+ if isinstance(elem, zarr.Array):
307
+ dims = len(elem.shape)
308
+ if dims == 2:
309
+ return elem.oindex[indices]
310
+ elif dims == 1:
311
+ if indices[0] == slice(None):
312
+ return elem.oindex[indices[1]]
313
+ elif indices[1] == slice(None):
314
+ return elem.oindex[indices[0]]
315
+ elif isinstance(elem, zarr.Group):
316
+ try:
317
+ ds = CSRDataset(elem)
318
+ return _subset_sparse(ds, indices)
319
+ except Exception as e:
320
+ logger.debug(
321
+ f"Encountered an exception while attempting to subset a sparse dataset by indices.\n{e}"
322
+ )
323
+ raise ValueError(
324
+ "Can not get a subset of the element of type"
325
+ f" {type(elem).__name__} with an empty spec."
326
+ )
327
+ else:
328
+ if encoding_type in ("csr_matrix", "csc_matrix"):
329
+ ds = sparse_dataset(elem)
330
+ return _subset_sparse(ds, indices)
331
+ else:
332
+ return read_elem_partial(elem, indices=indices)
333
+
334
+ # this is needed because accessing zarr.Group.keys() directly is very slow
335
+ @registry.register("zarr")
336
+ def keys(storage: zarr.Group):
337
+ paths = storage._store.keys()
338
+
339
+ attrs_keys: dict[str, list] = {}
340
+ obs_var_arrays = []
341
+
342
+ for path in paths:
343
+ if path in (".zattrs", ".zgroup"):
344
+ continue
345
+ parts = path.split("/")
346
+ if len(parts) < 2:
347
+ continue
348
+ attr = parts[0]
349
+ key = parts[1]
350
+
351
+ if attr == "X":
352
+ continue
353
+
354
+ if attr in ("obs", "var"):
355
+ if attr in obs_var_arrays:
356
+ continue
357
+ if key == ".zarray":
358
+ attrs_keys.pop(attr, None)
359
+ obs_var_arrays.append(attr)
360
+
361
+ if attr not in attrs_keys:
362
+ attrs_keys[attr] = []
363
+
364
+ if key in (".zattrs", ".zgroup", ".zarray"):
365
+ continue
366
+ attr_keys = attrs_keys[attr]
367
+ if key not in attr_keys:
368
+ attr_keys.append(key)
369
+
370
+ for attr in obs_var_arrays:
371
+ attrs_keys[attr] = list(storage[attr].dtype.fields.keys())
372
+
373
+ return {attr: keys for attr, keys in attrs_keys.items() if len(keys) > 0}
374
+
375
+
376
+ ArrayTypes = tuple(ArrayTypes) # type: ignore
377
+ GroupTypes = tuple(GroupTypes) # type: ignore
378
+ StorageTypes = tuple(StorageTypes) # type: ignore
379
+
380
+
381
+ ArrayType = Union[ArrayTypes] # type: ignore
382
+ GroupType = Union[GroupTypes] # type: ignore
383
+ StorageType = Union[StorageTypes] # type: ignore
384
+
385
+
386
+ def _to_memory(elem):
387
+ if isinstance(elem, ArrayTypes):
388
+ return elem[()]
389
+ elif isinstance(elem, SparseDataset):
390
+ return elem.to_memory()
391
+ else:
392
+ return elem
393
+
394
+
395
+ def _try_backed_full(elem):
396
+ # think what to do for compatibility with old var and obs
397
+ if isinstance(elem, ArrayTypes):
398
+ return elem
399
+
400
+ if isinstance(elem, GroupTypes):
401
+ encoding_type = get_spec(elem).encoding_type
402
+ if encoding_type in ("csr_matrix", "csc_matrix"):
403
+ return sparse_dataset(elem)
404
+ if "h5sparse_format" in elem.attrs:
405
+ return sparse_dataset(elem)
406
+ if encoding_type == "" and "indptr" in elem:
407
+ return CSRDataset(elem)
408
+
409
+ return read_elem(elem)
410
+
411
+
412
+ def _safer_read_index(elem):
413
+ if isinstance(elem, GroupTypes):
414
+ return pd.Index(read_elem(elem[_read_attr(elem.attrs, "_index")]))
415
+ elif isinstance(elem, ArrayTypes):
416
+ indices = None
417
+ for index_name in ("index", "_index"):
418
+ if index_name in elem.dtype.names:
419
+ indices = elem[index_name]
420
+ break
421
+ if indices is not None and len(indices) > 0:
422
+ if isinstance(indices[0], bytes):
423
+ indices = np.frompyfunc(lambda x: x.decode("utf-8"), 1, 1)(indices)
424
+ return pd.Index(indices)
425
+ else:
426
+ raise ValueError("Indices not found.")
427
+ else:
428
+ raise ValueError(f"Unknown elem type {type(elem)} when reading indices.")
429
+
430
+
431
+ class _MapAccessor:
432
+ def __init__(self, elem, name, indices=None):
433
+ self.elem = elem
434
+ self.indices = indices
435
+ self.name = name
436
+
437
+ def __getitem__(self, key):
438
+ if self.indices is None:
439
+ return _try_backed_full(self.elem[key])
440
+ else:
441
+ return registry.safer_read_partial(self.elem[key], indices=self.indices)
442
+
443
+ def keys(self):
444
+ return list(self.elem.keys())
445
+
446
+ def __repr__(self):
447
+ """Description of the _MapAccessor object."""
448
+ descr = f"Accessor for the AnnData attribute {self.name}"
449
+ descr += f"\n with keys: {self.keys()}"
450
+ return descr
451
+
452
+
453
+ class _AnnDataAttrsMixin:
454
+ storage: StorageType
455
+ _attrs_keys: Mapping[str, list]
456
+
457
+ @cached_property
458
+ def obs(self) -> pd.DataFrame:
459
+ if "obs" not in self._attrs_keys:
460
+ return None
461
+ indices = getattr(self, "indices", None)
462
+ if indices is not None:
463
+ indices = (indices[0], slice(None))
464
+ obj = registry.safer_read_partial(self.storage["obs"], indices=indices) # type: ignore
465
+ return _records_to_df(obj)
466
+ else:
467
+ return registry.read_dataframe(self.storage["obs"]) # type: ignore
468
+
469
+ @cached_property
470
+ def var(self) -> pd.DataFrame:
471
+ if "var" not in self._attrs_keys:
472
+ return None
473
+ indices = getattr(self, "indices", None)
474
+ if indices is not None:
475
+ indices = (indices[1], slice(None))
476
+ obj = registry.safer_read_partial(self.storage["var"], indices=indices) # type: ignore
477
+ return _records_to_df(obj)
478
+ else:
479
+ return registry.read_dataframe(self.storage["var"]) # type: ignore
480
+
481
+ @cached_property
482
+ def uns(self):
483
+ if "uns" not in self._attrs_keys:
484
+ return None
485
+ return read_elem(self.storage["uns"])
486
+
487
+ @cached_property
488
+ def X(self):
489
+ indices = getattr(self, "indices", None)
490
+ if indices is not None:
491
+ return registry.safer_read_partial(self.storage["X"], indices=indices)
492
+ else:
493
+ return _try_backed_full(self.storage["X"])
494
+
495
+ @cached_property
496
+ def obsm(self):
497
+ if "obsm" not in self._attrs_keys:
498
+ return None
499
+ indices = getattr(self, "indices", None)
500
+ if indices is not None:
501
+ indices = (indices[0], slice(None))
502
+ return _MapAccessor(self.storage["obsm"], "obsm", indices)
503
+
504
+ @cached_property
505
+ def varm(self):
506
+ if "varm" not in self._attrs_keys:
507
+ return None
508
+ indices = getattr(self, "indices", None)
509
+ if indices is not None:
510
+ indices = (indices[1], slice(None))
511
+ return _MapAccessor(self.storage["varm"], "varm", indices)
512
+
513
+ @cached_property
514
+ def obsp(self):
515
+ if "obsp" not in self._attrs_keys:
516
+ return None
517
+ indices = getattr(self, "indices", None)
518
+ if indices is not None:
519
+ indices = (indices[0], indices[0])
520
+ return _MapAccessor(self.storage["obsp"], "obsp", indices)
521
+
522
+ @cached_property
523
+ def varp(self):
524
+ if "varp" not in self._attrs_keys:
525
+ return None
526
+ indices = getattr(self, "indices", None)
527
+ if indices is not None:
528
+ indices = (indices[1], indices[1])
529
+ return _MapAccessor(self.storage["varp"], "varp", indices)
530
+
531
+ @cached_property
532
+ def layers(self):
533
+ if "layers" not in self._attrs_keys:
534
+ return None
535
+ indices = getattr(self, "indices", None)
536
+ return _MapAccessor(self.storage["layers"], "layers", indices)
537
+
538
+ @property
539
+ def obs_names(self):
540
+ return self._obs_names
541
+
542
+ @property
543
+ def var_names(self):
544
+ return self._var_names
545
+
546
+ @cached_property
547
+ def shape(self):
548
+ return len(self._obs_names), len(self._var_names)
549
+
550
+ def to_dict(self):
551
+ prepare_adata = {}
552
+
553
+ prepare_adata["X"] = _to_memory(self.X)
554
+
555
+ if "uns" in self._attrs_keys:
556
+ prepare_adata["uns"] = self.uns
557
+
558
+ for attr in ("obs", "var"):
559
+ if attr in self._attrs_keys:
560
+ prepare_adata[attr] = getattr(self, attr)
561
+
562
+ for attr in ("obsm", "varm", "obsp", "varp", "layers"):
563
+ if attr in self._attrs_keys:
564
+ prepare_adata[attr] = {}
565
+ get_attr = getattr(self, attr)
566
+ for key in self._attrs_keys[attr]:
567
+ prepare_adata[attr][key] = _to_memory(get_attr[key])
568
+
569
+ if "raw" in self._attrs_keys:
570
+ prepare_adata["raw"] = self.raw.to_dict()
571
+
572
+ return prepare_adata
573
+
574
+ def to_memory(self):
575
+ adata = AnnData(**self.to_dict())
576
+ return adata
577
+
578
+
579
+ class AnnDataAccessorSubset(_AnnDataAttrsMixin):
580
+ def __init__(self, storage, indices, attrs_keys, obs_names, var_names, ref_shape):
581
+ self.storage = storage
582
+ self.indices = indices
583
+
584
+ self._attrs_keys = attrs_keys
585
+ self._obs_names, self._var_names = obs_names, var_names
586
+
587
+ self._ref_shape = ref_shape
588
+
589
+ def __getitem__(self, index: Index):
590
+ """Access a subset of the underlying AnnData object."""
591
+ oidx, vidx = _normalize_indices(index, self._obs_names, self._var_names)
592
+ new_obs_names, new_var_names = self._obs_names[oidx], self._var_names[vidx]
593
+ if self.indices is not None:
594
+ oidx = _resolve_idx(self.indices[0], oidx, self._ref_shape[0])
595
+ vidx = _resolve_idx(self.indices[1], vidx, self._ref_shape[1])
596
+ return type(self)(
597
+ self.storage,
598
+ (oidx, vidx),
599
+ self._attrs_keys,
600
+ new_obs_names,
601
+ new_var_names,
602
+ self._ref_shape,
603
+ )
604
+
605
+ def __repr__(self):
606
+ """Description of the object."""
607
+ n_obs, n_vars = self.shape
608
+ descr = f"{type(self).__name__} object with n_obs × n_vars = {n_obs} × {n_vars}"
609
+ for attr, keys in self._attrs_keys.items():
610
+ descr += f"\n {attr}: {keys}"
611
+ return descr
612
+
613
+ @cached_property
614
+ def raw(self):
615
+ if "raw" not in self._attrs_keys:
616
+ return None
617
+ prepare_indices = None
618
+ if self.indices is not None:
619
+ oidx = self.indices[0]
620
+ if isinstance(oidx, np.ndarray) or oidx != slice(None):
621
+ prepare_indices = oidx, slice(None)
622
+ return AnnDataRawAccessor(
623
+ self.storage["raw"],
624
+ prepare_indices,
625
+ None,
626
+ self._obs_names,
627
+ None,
628
+ self._ref_shape[0],
629
+ )
630
+
631
+
632
+ class AnnDataRawAccessor(AnnDataAccessorSubset):
633
+ def __init__(
634
+ self, storage_raw, indices, attrs_keys, obs_names, var_names, ref_shape
635
+ ):
636
+ var_raw = storage_raw["var"]
637
+
638
+ if var_names is None:
639
+ var_names = _safer_read_index(var_raw)
640
+
641
+ if isinstance(ref_shape, int):
642
+ ref_shape = ref_shape, len(var_names)
643
+ elif isinstance(ref_shape, tuple) and len(ref_shape) < 2:
644
+ ref_shape = ref_shape[0], len(var_names)
645
+
646
+ if attrs_keys is None:
647
+ attrs_keys = {}
648
+ if isinstance(var_raw, ArrayTypes):
649
+ attrs_keys["var"] = list(var_raw.dtype.fields.keys())
650
+ else:
651
+ # for some reason list(var_raw.keys()) is very slow for zarr
652
+ # maybe also directly get keys from the underlying mapper
653
+ attrs_keys["var"] = list(var_raw)
654
+ if "varm" in storage_raw:
655
+ varm_keys_raw = list(storage_raw["varm"])
656
+ if len(varm_keys_raw) > 0:
657
+ attrs_keys["varm"] = varm_keys_raw
658
+
659
+ super().__init__(
660
+ storage_raw, indices, attrs_keys, obs_names, var_names, ref_shape
661
+ )
662
+
663
+ @property
664
+ def raw(self):
665
+ raise AttributeError
666
+
667
+
668
+ class AnnDataAccessor(_AnnDataAttrsMixin):
669
+ """Cloud-backed AnnData."""
670
+
671
+ def __init__(
672
+ self,
673
+ connection: OpenFile | None,
674
+ storage: StorageType,
675
+ filename: str,
676
+ ):
677
+ self._conn = connection
678
+ self.storage = storage
679
+
680
+ self._attrs_keys = registry.keys(self.storage)
681
+
682
+ self._name = filename
683
+
684
+ self._obs_names = _safer_read_index(self.storage["obs"]) # type: ignore
685
+ self._var_names = _safer_read_index(self.storage["var"]) # type: ignore
686
+
687
+ self._closed = False
688
+
689
+ def close(self):
690
+ """Closes the connection."""
691
+ if hasattr(self, "storage") and hasattr(self.storage, "close"):
692
+ self.storage.close()
693
+ if hasattr(self, "_conn") and hasattr(self._conn, "close"):
694
+ self._conn.close()
695
+ self._closed = True
696
+
697
+ @property
698
+ def closed(self):
699
+ return self._closed
700
+
701
+ def __enter__(self):
702
+ return self
703
+
704
+ def __exit__(self, exc_type, exc_val, exc_tb):
705
+ self.close()
706
+
707
+ def __getitem__(self, index: Index) -> AnnDataAccessorSubset:
708
+ """Access a subset of the underlying AnnData object."""
709
+ oidx, vidx = _normalize_indices(index, self._obs_names, self._var_names)
710
+ new_obs_names, new_var_names = self._obs_names[oidx], self._var_names[vidx]
711
+ return AnnDataAccessorSubset(
712
+ self.storage,
713
+ (oidx, vidx),
714
+ self._attrs_keys,
715
+ new_obs_names,
716
+ new_var_names,
717
+ self.shape,
718
+ )
719
+
720
+ def __repr__(self):
721
+ """Description of the AnnDataAccessor object."""
722
+ n_obs, n_vars = self.shape
723
+ descr = f"AnnDataAccessor object with n_obs × n_vars = {n_obs} × {n_vars}"
724
+ descr += f"\n constructed for the AnnData object {self._name}"
725
+ for attr, keys in self._attrs_keys.items():
726
+ descr += f"\n {attr}: {keys}"
727
+ return descr
728
+
729
+ @cached_property
730
+ def raw(self):
731
+ if "raw" not in self._attrs_keys:
732
+ return None
733
+ return AnnDataRawAccessor(
734
+ self.storage["raw"], None, None, self._obs_names, None, self.shape[0]
735
+ )