lamindb 0.76.0__py3-none-any.whl → 0.76.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +12 -11
- lamindb/_artifact.py +81 -54
- lamindb/_can_validate.py +10 -3
- lamindb/_collection.py +17 -18
- lamindb/_curate.py +37 -19
- lamindb/_feature.py +0 -49
- lamindb/_filter.py +6 -5
- lamindb/_finish.py +11 -54
- lamindb/_from_values.py +14 -10
- lamindb/_is_versioned.py +3 -5
- lamindb/_query_manager.py +4 -4
- lamindb/_query_set.py +36 -10
- lamindb/_record.py +44 -43
- lamindb/_save.py +2 -3
- lamindb/_transform.py +23 -10
- lamindb/core/__init__.py +9 -3
- lamindb/core/_context.py +518 -0
- lamindb/core/_data.py +8 -6
- lamindb/core/_feature_manager.py +25 -8
- lamindb/core/_label_manager.py +1 -1
- lamindb/core/_mapped_collection.py +82 -26
- lamindb/core/_settings.py +4 -8
- lamindb/core/datasets/_core.py +1 -0
- lamindb/core/exceptions.py +22 -5
- lamindb/core/storage/__init__.py +1 -1
- lamindb/core/storage/_backed_access.py +2 -38
- lamindb/core/storage/_tiledbsoma.py +229 -0
- lamindb/core/storage/_valid_suffixes.py +2 -0
- lamindb/core/storage/paths.py +2 -6
- lamindb/core/versioning.py +56 -47
- lamindb/integrations/_vitessce.py +2 -0
- {lamindb-0.76.0.dist-info → lamindb-0.76.2.dist-info}/METADATA +7 -15
- lamindb-0.76.2.dist-info/RECORD +59 -0
- lamindb/core/_run_context.py +0 -514
- lamindb-0.76.0.dist-info/RECORD +0 -58
- {lamindb-0.76.0.dist-info → lamindb-0.76.2.dist-info}/LICENSE +0 -0
- {lamindb-0.76.0.dist-info → lamindb-0.76.2.dist-info}/WHEEL +0 -0
@@ -149,7 +149,7 @@ class MappedCollection:
|
|
149
149
|
self.storages = [] # type: ignore
|
150
150
|
self.conns = [] # type: ignore
|
151
151
|
self.parallel = parallel
|
152
|
-
self.
|
152
|
+
self.path_list = path_list
|
153
153
|
self._make_connections(path_list, parallel)
|
154
154
|
|
155
155
|
self.n_obs_list = []
|
@@ -165,11 +165,12 @@ class MappedCollection:
|
|
165
165
|
self.indices = np.hstack([np.arange(n_obs) for n_obs in self.n_obs_list])
|
166
166
|
self.storage_idx = np.repeat(np.arange(len(self.storages)), self.n_obs_list)
|
167
167
|
|
168
|
-
self.join_vars = join
|
169
|
-
self.var_indices = None
|
170
|
-
self.var_joint = None
|
171
|
-
self.n_vars_list = None
|
172
|
-
self.
|
168
|
+
self.join_vars: Literal["inner", "outer"] | None = join
|
169
|
+
self.var_indices: list | None = None
|
170
|
+
self.var_joint: pd.Index | None = None
|
171
|
+
self.n_vars_list: list | None = None
|
172
|
+
self.var_list: list | None = None
|
173
|
+
self.n_vars: int | None = None
|
173
174
|
if self.join_vars is not None:
|
174
175
|
self._make_join_vars()
|
175
176
|
self.n_vars = len(self.var_joint)
|
@@ -225,43 +226,71 @@ class MappedCollection:
|
|
225
226
|
encoder.update({cat: i for i, cat in enumerate(cats)})
|
226
227
|
self.encoders[label] = encoder
|
227
228
|
|
228
|
-
def
|
229
|
-
var_list = []
|
229
|
+
def _read_vars(self):
|
230
|
+
self.var_list = []
|
230
231
|
self.n_vars_list = []
|
231
232
|
for storage in self.storages:
|
232
233
|
with _Connect(storage) as store:
|
233
234
|
vars = _safer_read_index(store["var"])
|
234
|
-
var_list.append(vars)
|
235
|
+
self.var_list.append(vars)
|
235
236
|
self.n_vars_list.append(len(vars))
|
236
237
|
|
237
|
-
|
238
|
+
def _make_join_vars(self):
|
239
|
+
if self.var_list is None:
|
240
|
+
self._read_vars()
|
241
|
+
vars_eq = all(self.var_list[0].equals(vrs) for vrs in self.var_list[1:])
|
238
242
|
if vars_eq:
|
239
243
|
self.join_vars = None
|
240
|
-
self.var_joint = var_list[0]
|
244
|
+
self.var_joint = self.var_list[0]
|
241
245
|
return
|
242
246
|
|
243
247
|
if self.join_vars == "inner":
|
244
|
-
self.var_joint = reduce(pd.Index.intersection, var_list)
|
248
|
+
self.var_joint = reduce(pd.Index.intersection, self.var_list)
|
245
249
|
if len(self.var_joint) == 0:
|
246
250
|
raise ValueError(
|
247
251
|
"The provided AnnData objects don't have shared varibales.\n"
|
248
252
|
"Use join='outer'."
|
249
253
|
)
|
250
|
-
self.var_indices = [
|
254
|
+
self.var_indices = [
|
255
|
+
vrs.get_indexer(self.var_joint) for vrs in self.var_list
|
256
|
+
]
|
251
257
|
elif self.join_vars == "outer":
|
252
|
-
self.var_joint = reduce(pd.Index.union, var_list)
|
253
|
-
self.var_indices = [
|
258
|
+
self.var_joint = reduce(pd.Index.union, self.var_list)
|
259
|
+
self.var_indices = [
|
260
|
+
self.var_joint.get_indexer(vrs) for vrs in self.var_list
|
261
|
+
]
|
262
|
+
|
263
|
+
def check_vars_sorted(self, ascending: bool = True) -> bool:
|
264
|
+
"""Returns `True` if all variables are sorted in all objects."""
|
265
|
+
if self.var_list is None:
|
266
|
+
self._read_vars()
|
267
|
+
if ascending:
|
268
|
+
vrs_sort_status = (vrs.is_monotonic_increasing for vrs in self.var_list)
|
269
|
+
else:
|
270
|
+
vrs_sort_status = (vrs.is_monotonic_decreasing for vrs in self.var_list)
|
271
|
+
return all(vrs_sort_status)
|
272
|
+
|
273
|
+
def check_vars_non_aligned(self, vars: pd.Index | list) -> list[int]:
|
274
|
+
"""Returns indices of objects with non-aligned variables.
|
275
|
+
|
276
|
+
Args:
|
277
|
+
vars: Check alignment against these variables.
|
278
|
+
"""
|
279
|
+
if self.var_list is None:
|
280
|
+
self._read_vars()
|
281
|
+
vars = pd.Index(vars)
|
282
|
+
return [i for i, vrs in enumerate(self.var_list) if not vrs.equals(vars)]
|
254
283
|
|
255
284
|
def __len__(self):
|
256
285
|
return self.n_obs
|
257
286
|
|
258
287
|
@property
|
259
|
-
def shape(self):
|
288
|
+
def shape(self) -> tuple[int, int]:
|
260
289
|
"""Shape of the (virtually aligned) dataset."""
|
261
290
|
return (self.n_obs, self.n_vars)
|
262
291
|
|
263
292
|
@property
|
264
|
-
def original_shapes(self):
|
293
|
+
def original_shapes(self) -> list[tuple[int, int]]:
|
265
294
|
"""Shapes of the underlying AnnData objects."""
|
266
295
|
if self.n_vars_list is None:
|
267
296
|
n_vars_list = [None] * len(self.n_obs_list)
|
@@ -374,8 +403,27 @@ class MappedCollection:
|
|
374
403
|
label = label.decode("utf-8")
|
375
404
|
return label
|
376
405
|
|
377
|
-
def get_label_weights(
|
378
|
-
|
406
|
+
def get_label_weights(
|
407
|
+
self,
|
408
|
+
obs_keys: str | list[str],
|
409
|
+
scaler: float | None = None,
|
410
|
+
return_categories: bool = False,
|
411
|
+
):
|
412
|
+
"""Get all weights for the given label keys.
|
413
|
+
|
414
|
+
This counts the number of labels for each label and returns
|
415
|
+
weights for each obs label accoding to the formula `1 / num of this label in the data`.
|
416
|
+
If `scaler` is provided, then `scaler / (scaler + num of this label in the data)`.
|
417
|
+
|
418
|
+
Args:
|
419
|
+
obs_keys: A key in the ``.obs`` slots or a list of keys. If a list is provided,
|
420
|
+
the labels from the obs keys will be concatenated with ``"__"`` delimeter
|
421
|
+
scaler: Use this number to scale the provided weights.
|
422
|
+
return_categories: If `False`, returns weights for each observation,
|
423
|
+
can be directly passed to a sampler. If `True`, returns a dictionary with
|
424
|
+
unique categories for labels (concatenated if `obs_keys` is a list)
|
425
|
+
and their weights.
|
426
|
+
"""
|
379
427
|
if isinstance(obs_keys, str):
|
380
428
|
obs_keys = [obs_keys]
|
381
429
|
labels_list = []
|
@@ -383,12 +431,20 @@ class MappedCollection:
|
|
383
431
|
labels_to_str = self.get_merged_labels(label_key).astype(str).astype("O")
|
384
432
|
labels_list.append(labels_to_str)
|
385
433
|
if len(labels_list) > 1:
|
386
|
-
labels =
|
434
|
+
labels = ["__".join(labels_obs) for labels_obs in zip(*labels_list)]
|
387
435
|
else:
|
388
436
|
labels = labels_list[0]
|
389
|
-
|
390
|
-
|
391
|
-
|
437
|
+
counter = Counter(labels)
|
438
|
+
if return_categories:
|
439
|
+
return {
|
440
|
+
k: 1.0 / v if scaler is None else scaler / (v + scaler)
|
441
|
+
for k, v in counter.items()
|
442
|
+
}
|
443
|
+
counts = np.array([counter[label] for label in labels])
|
444
|
+
if scaler is None:
|
445
|
+
weights = 1.0 / counts
|
446
|
+
else:
|
447
|
+
weights = scaler / (counts + scaler)
|
392
448
|
return weights
|
393
449
|
|
394
450
|
def get_merged_labels(self, label_key: str):
|
@@ -426,7 +482,7 @@ class MappedCollection:
|
|
426
482
|
codes = self._get_codes(store, label_key)
|
427
483
|
codes = decode(codes) if isinstance(codes[0], bytes) else codes
|
428
484
|
cats_merge.update(codes)
|
429
|
-
return cats_merge
|
485
|
+
return sorted(cats_merge)
|
430
486
|
|
431
487
|
def _get_categories(self, storage: StorageType, label_key: str): # type: ignore
|
432
488
|
"""Get categories."""
|
@@ -483,7 +539,7 @@ class MappedCollection:
|
|
483
539
|
self._closed = True
|
484
540
|
|
485
541
|
@property
|
486
|
-
def closed(self):
|
542
|
+
def closed(self) -> bool:
|
487
543
|
"""Check if connections to array streaming backend are closed.
|
488
544
|
|
489
545
|
Does not matter if `parallel=True`.
|
@@ -508,4 +564,4 @@ class MappedCollection:
|
|
508
564
|
mapped.parallel = False
|
509
565
|
mapped.storages = []
|
510
566
|
mapped.conns = []
|
511
|
-
mapped._make_connections(mapped.
|
567
|
+
mapped._make_connections(mapped.path_list, parallel=False)
|
lamindb/core/_settings.py
CHANGED
@@ -54,7 +54,7 @@ class Settings:
|
|
54
54
|
track_run_inputs: bool = True
|
55
55
|
"""Track files as input upon `.load()`, `.cache()` and `.backed()`.
|
56
56
|
|
57
|
-
Requires a global run context with :func:`~lamindb.track` was created!
|
57
|
+
Requires a global run context with :func:`~lamindb.core.Context.track` was created!
|
58
58
|
|
59
59
|
FAQ: :doc:`/faq/track-run-inputs`
|
60
60
|
"""
|
@@ -83,14 +83,10 @@ class Settings:
|
|
83
83
|
def transform(self) -> TransformSettings:
|
84
84
|
"""Transform settings.
|
85
85
|
|
86
|
-
|
87
|
-
|
88
|
-
ln.settings.transform.stem_uid = "FPnfDtJz8qbE" # defines version family
|
89
|
-
ln.settings.transform.version = "1" # defines version
|
90
|
-
ln.settings.transform.name = "My good script" # semantic name
|
91
|
-
|
92
|
-
The first two are typically auto-generated by :func:`~lamindb.track`.
|
86
|
+
Is deprecated since version 0.76.1.
|
93
87
|
"""
|
88
|
+
# enable warning soon
|
89
|
+
# logger.warning("Transform settings are deprecated, please instead set `ln.context.uid`")
|
94
90
|
return transform_settings
|
95
91
|
|
96
92
|
@property
|
lamindb/core/datasets/_core.py
CHANGED
@@ -432,6 +432,7 @@ def mudata_papalexi21_subset() -> MuData: # pragma: no cover
|
|
432
432
|
mdata.push_obs(["percent.mito"], mods=["rna"], drop=True)
|
433
433
|
mdata["hto"].obs["technique"] = "cell hashing"
|
434
434
|
mdata["hto"].obs["technique"] = mdata["hto"].obs["technique"].astype("category")
|
435
|
+
mdata.pull_obs(["technique"], mods="hto")
|
435
436
|
|
436
437
|
return mdata
|
437
438
|
|
lamindb/core/exceptions.py
CHANGED
@@ -5,24 +5,41 @@ The registry base class:
|
|
5
5
|
.. autosummary::
|
6
6
|
:toctree: .
|
7
7
|
|
8
|
+
DoesNotExist
|
8
9
|
ValidationError
|
9
10
|
NotebookNotSavedError
|
10
11
|
NoTitleError
|
11
|
-
|
12
|
-
|
12
|
+
MissingContext
|
13
|
+
UpdateContext
|
13
14
|
IntegrityError
|
14
15
|
|
15
16
|
"""
|
16
17
|
|
17
18
|
|
19
|
+
class TrackNotCalled(SystemExit):
|
20
|
+
pass
|
21
|
+
|
22
|
+
|
23
|
+
class NotebookNotSaved(SystemExit):
|
24
|
+
pass
|
25
|
+
|
26
|
+
|
18
27
|
class ValidationError(SystemExit):
|
19
28
|
"""Validation error: not mapped in registry."""
|
20
29
|
|
21
30
|
pass
|
22
31
|
|
23
32
|
|
33
|
+
# inspired by Django's DoesNotExist
|
34
|
+
# equivalent to SQLAlchemy's NoResultFound
|
35
|
+
class DoesNotExist(Exception):
|
36
|
+
"""No record found."""
|
37
|
+
|
38
|
+
pass
|
39
|
+
|
40
|
+
|
24
41
|
# -------------------------------------------------------------------------------------
|
25
|
-
# ln.track() AKA
|
42
|
+
# ln.context.track() AKA context
|
26
43
|
# -------------------------------------------------------------------------------------
|
27
44
|
|
28
45
|
|
@@ -48,13 +65,13 @@ class NoTitleError(Exception):
|
|
48
65
|
pass
|
49
66
|
|
50
67
|
|
51
|
-
class
|
68
|
+
class MissingContext(SystemExit):
|
52
69
|
"""User didn't define transform settings."""
|
53
70
|
|
54
71
|
pass
|
55
72
|
|
56
73
|
|
57
|
-
class
|
74
|
+
class UpdateContext(SystemExit):
|
58
75
|
"""Transform settings require update."""
|
59
76
|
|
60
77
|
pass
|
lamindb/core/storage/__init__.py
CHANGED
@@ -18,8 +18,8 @@ Array accessors.
|
|
18
18
|
|
19
19
|
from lamindb_setup.core.upath import LocalPathClasses, UPath, infer_filesystem
|
20
20
|
|
21
|
-
from ._anndata_sizes import size_adata
|
22
21
|
from ._backed_access import AnnDataAccessor, BackedAccessor
|
22
|
+
from ._tiledbsoma import register_for_tiledbsoma_store, write_tiledbsoma_store
|
23
23
|
from ._valid_suffixes import VALID_SUFFIXES
|
24
24
|
from .objects import infer_suffix, write_to_disk
|
25
25
|
from .paths import delete_storage, load_to_memory
|
@@ -1,12 +1,13 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
from dataclasses import dataclass
|
4
|
-
from typing import TYPE_CHECKING, Any, Callable
|
4
|
+
from typing import TYPE_CHECKING, Any, Callable
|
5
5
|
|
6
6
|
from anndata._io.specs.registry import get_spec
|
7
7
|
from lnschema_core import Artifact
|
8
8
|
|
9
9
|
from ._anndata_accessor import AnnDataAccessor, StorageType, registry
|
10
|
+
from ._tiledbsoma import _open_tiledbsoma
|
10
11
|
from .paths import filepath_from_artifact
|
11
12
|
|
12
13
|
if TYPE_CHECKING:
|
@@ -52,43 +53,6 @@ def _track_writes_factory(obj: Any, finalize: Callable):
|
|
52
53
|
return obj
|
53
54
|
|
54
55
|
|
55
|
-
def _open_tiledbsoma(
|
56
|
-
filepath: UPath, mode: Literal["r", "w"] = "r"
|
57
|
-
) -> SOMACollection | SOMAExperiment:
|
58
|
-
try:
|
59
|
-
import tiledbsoma as soma
|
60
|
-
except ImportError as e:
|
61
|
-
raise ImportError("Please install tiledbsoma: pip install tiledbsoma") from e
|
62
|
-
filepath_str = filepath.as_posix()
|
63
|
-
if filepath.protocol == "s3":
|
64
|
-
from lamindb_setup.core._settings_storage import get_storage_region
|
65
|
-
|
66
|
-
region = get_storage_region(filepath_str)
|
67
|
-
tiledb_config = {"vfs.s3.region": region}
|
68
|
-
storage_options = filepath.storage_options
|
69
|
-
if "key" in storage_options:
|
70
|
-
tiledb_config["vfs.s3.aws_access_key_id"] = storage_options["key"]
|
71
|
-
if "secret" in storage_options:
|
72
|
-
tiledb_config["vfs.s3.aws_secret_access_key"] = storage_options["secret"]
|
73
|
-
if "token" in storage_options:
|
74
|
-
tiledb_config["vfs.s3.aws_session_token"] = storage_options["token"]
|
75
|
-
ctx = soma.SOMATileDBContext(tiledb_config=tiledb_config)
|
76
|
-
# this is a strange bug
|
77
|
-
# for some reason iterdir futher gives incorrect results
|
78
|
-
# if cache is not invalidated
|
79
|
-
# instead of obs and ms it gives ms and ms in the list of names
|
80
|
-
filepath.fs.invalidate_cache()
|
81
|
-
else:
|
82
|
-
ctx = None
|
83
|
-
|
84
|
-
soma_objects = [obj.name for obj in filepath.iterdir()]
|
85
|
-
if "obs" in soma_objects and "ms" in soma_objects:
|
86
|
-
SOMAType = soma.Experiment
|
87
|
-
else:
|
88
|
-
SOMAType = soma.Collection
|
89
|
-
return SOMAType.open(filepath_str, mode=mode, context=ctx)
|
90
|
-
|
91
|
-
|
92
56
|
@dataclass
|
93
57
|
class BackedAccessor:
|
94
58
|
"""h5py.File or zarr.Group accessor."""
|
@@ -0,0 +1,229 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import TYPE_CHECKING, Literal
|
4
|
+
|
5
|
+
from anndata import AnnData
|
6
|
+
from lamin_utils import logger
|
7
|
+
from lamindb_setup.core._settings_storage import get_storage_region
|
8
|
+
from lamindb_setup.core.upath import create_path
|
9
|
+
from lnschema_core import Artifact, Run, Storage
|
10
|
+
from upath import UPath
|
11
|
+
|
12
|
+
if TYPE_CHECKING:
|
13
|
+
from lamindb_setup.core.types import UPathStr
|
14
|
+
from tiledbsoma import Collection as SOMACollection
|
15
|
+
from tiledbsoma import Experiment as SOMAExperiment
|
16
|
+
from tiledbsoma.io import ExperimentAmbientLabelMapping
|
17
|
+
|
18
|
+
|
19
|
+
def _read_adata_h5ad_zarr(objpath: UPath):
|
20
|
+
from lamindb.core.storage.paths import read_adata_h5ad, read_adata_zarr
|
21
|
+
|
22
|
+
if objpath.is_dir():
|
23
|
+
adata = read_adata_zarr(objpath)
|
24
|
+
else:
|
25
|
+
adata = read_adata_h5ad(objpath)
|
26
|
+
return adata
|
27
|
+
|
28
|
+
|
29
|
+
def _tiledb_config_s3(storepath: UPath) -> dict:
|
30
|
+
region = get_storage_region(storepath)
|
31
|
+
tiledb_config = {"vfs.s3.region": region}
|
32
|
+
storage_options = storepath.storage_options
|
33
|
+
if "key" in storage_options:
|
34
|
+
tiledb_config["vfs.s3.aws_access_key_id"] = storage_options["key"]
|
35
|
+
if "secret" in storage_options:
|
36
|
+
tiledb_config["vfs.s3.aws_secret_access_key"] = storage_options["secret"]
|
37
|
+
if "token" in storage_options:
|
38
|
+
tiledb_config["vfs.s3.aws_session_token"] = storage_options["token"]
|
39
|
+
|
40
|
+
return tiledb_config
|
41
|
+
|
42
|
+
|
43
|
+
def _open_tiledbsoma(
|
44
|
+
storepath: UPath, mode: Literal["r", "w"] = "r"
|
45
|
+
) -> SOMACollection | SOMAExperiment:
|
46
|
+
try:
|
47
|
+
import tiledbsoma as soma
|
48
|
+
except ImportError as e:
|
49
|
+
raise ImportError("Please install tiledbsoma: pip install tiledbsoma") from e
|
50
|
+
|
51
|
+
storepath_str = storepath.as_posix()
|
52
|
+
if storepath.protocol == "s3":
|
53
|
+
ctx = soma.SOMATileDBContext(tiledb_config=_tiledb_config_s3(storepath))
|
54
|
+
# this is a strange bug
|
55
|
+
# for some reason iterdir futher gives incorrect results
|
56
|
+
# if cache is not invalidated
|
57
|
+
# instead of obs and ms it gives ms and ms in the list of names
|
58
|
+
storepath.fs.invalidate_cache()
|
59
|
+
else:
|
60
|
+
ctx = None
|
61
|
+
|
62
|
+
soma_objects = [obj.name for obj in storepath.iterdir()]
|
63
|
+
if "obs" in soma_objects and "ms" in soma_objects:
|
64
|
+
SOMAType = soma.Experiment
|
65
|
+
else:
|
66
|
+
SOMAType = soma.Collection
|
67
|
+
return SOMAType.open(storepath_str, mode=mode, context=ctx)
|
68
|
+
|
69
|
+
|
70
|
+
def register_for_tiledbsoma_store(
|
71
|
+
store: UPathStr | Artifact | None,
|
72
|
+
adatas: list[AnnData | UPathStr],
|
73
|
+
measurement_name: str,
|
74
|
+
obs_field_name: str,
|
75
|
+
var_field_name: str,
|
76
|
+
append_obsm_varm: bool = False,
|
77
|
+
run: Run | None = None,
|
78
|
+
) -> tuple[ExperimentAmbientLabelMapping, list[AnnData]]:
|
79
|
+
"""Register `AnnData` objects to append to `tiledbsoma.Experiment`.
|
80
|
+
|
81
|
+
Pass the returned registration mapping and `AnnData` objects to `write_tiledbsoma_store`.
|
82
|
+
|
83
|
+
See `tiledbsoma.io.from_h5ad
|
84
|
+
<https://tiledbsoma.readthedocs.io/en/latest/_autosummary/tiledbsoma.io.from_h5ad.html>`__.
|
85
|
+
"""
|
86
|
+
try:
|
87
|
+
import tiledbsoma as soma
|
88
|
+
import tiledbsoma.io as soma_io
|
89
|
+
except ImportError as e:
|
90
|
+
raise ImportError("Please install tiledbsoma: pip install tiledbsoma") from e
|
91
|
+
|
92
|
+
if isinstance(store, Artifact):
|
93
|
+
storepath = store.path
|
94
|
+
else:
|
95
|
+
storepath = None if store is None else create_path(store)
|
96
|
+
|
97
|
+
add_run_uid = True
|
98
|
+
ctx = None
|
99
|
+
if storepath is not None:
|
100
|
+
if storepath.protocol == "s3":
|
101
|
+
ctx = soma.SOMATileDBContext(tiledb_config=_tiledb_config_s3(storepath))
|
102
|
+
if storepath.exists():
|
103
|
+
with soma.Experiment.open(
|
104
|
+
storepath.as_posix(), mode="r", context=ctx
|
105
|
+
) as store:
|
106
|
+
add_run_uid = "lamin_run_uid" in store["obs"].schema.names
|
107
|
+
storepath = storepath.as_posix()
|
108
|
+
|
109
|
+
if add_run_uid:
|
110
|
+
from lamindb.core._data import get_run
|
111
|
+
|
112
|
+
run = get_run(run)
|
113
|
+
|
114
|
+
adata_objects = []
|
115
|
+
for adata in adatas:
|
116
|
+
if isinstance(adata, AnnData):
|
117
|
+
if add_run_uid:
|
118
|
+
if adata.is_view:
|
119
|
+
raise ValueError(
|
120
|
+
"Can not register an `AnnData` view, please do `adata.copy()` before passing."
|
121
|
+
)
|
122
|
+
else:
|
123
|
+
logger.warning("Mutating in-memory AnnData.")
|
124
|
+
adata.obs["lamin_run_uid"] = run.uid
|
125
|
+
else:
|
126
|
+
adata = _read_adata_h5ad_zarr(create_path(adata))
|
127
|
+
if add_run_uid:
|
128
|
+
adata.obs["lamin_run_uid"] = run.uid
|
129
|
+
adata_objects.append(adata)
|
130
|
+
|
131
|
+
registration_mapping = soma_io.register_anndatas(
|
132
|
+
experiment_uri=storepath,
|
133
|
+
adatas=adata_objects,
|
134
|
+
measurement_name=measurement_name,
|
135
|
+
obs_field_name=obs_field_name,
|
136
|
+
var_field_name=var_field_name,
|
137
|
+
append_obsm_varm=append_obsm_varm,
|
138
|
+
context=ctx,
|
139
|
+
)
|
140
|
+
|
141
|
+
return registration_mapping, adata_objects
|
142
|
+
|
143
|
+
|
144
|
+
def write_tiledbsoma_store(
|
145
|
+
store: Artifact | UPathStr,
|
146
|
+
adata: AnnData | UPathStr,
|
147
|
+
run: Run | None = None,
|
148
|
+
artifact_kwargs: dict | None = None,
|
149
|
+
**kwargs,
|
150
|
+
) -> Artifact:
|
151
|
+
"""Write `AnnData` to `tiledbsoma.Experiment`.
|
152
|
+
|
153
|
+
Reads `AnnData`, writes it to `tiledbsoma.Experiment` and creates `lamindb.Artifact`.
|
154
|
+
|
155
|
+
See `tiledbsoma.io.from_h5ad
|
156
|
+
<https://tiledbsoma.readthedocs.io/en/latest/_autosummary/tiledbsoma.io.from_h5ad.html>`__.
|
157
|
+
"""
|
158
|
+
try:
|
159
|
+
import tiledbsoma as soma
|
160
|
+
import tiledbsoma.io as soma_io
|
161
|
+
except ImportError as e:
|
162
|
+
raise ImportError("Please install tiledbsoma: pip install tiledbsoma") from e
|
163
|
+
|
164
|
+
from lamindb.core._data import get_run
|
165
|
+
|
166
|
+
if artifact_kwargs is None:
|
167
|
+
artifact_kwargs = {}
|
168
|
+
|
169
|
+
appending: bool = kwargs.get("registration_mapping", None) is not None
|
170
|
+
store_is_artifact: bool = isinstance(store, Artifact)
|
171
|
+
if store_is_artifact:
|
172
|
+
if not appending:
|
173
|
+
raise ValueError(
|
174
|
+
"Trying to append to an existing store without `registration_mapping`."
|
175
|
+
)
|
176
|
+
storepath = store.path
|
177
|
+
else:
|
178
|
+
storepath = create_path(store)
|
179
|
+
add_run_uid: bool = not appending
|
180
|
+
|
181
|
+
if not isinstance(adata, AnnData):
|
182
|
+
# create_path is used
|
183
|
+
# in case adata is somewhere in our managed s3 bucket or just in s3
|
184
|
+
adata = _read_adata_h5ad_zarr(create_path(adata))
|
185
|
+
elif add_run_uid and adata.is_view:
|
186
|
+
raise ValueError(
|
187
|
+
"Can not write from an `AnnData` view, please do `adata.copy()` before passing."
|
188
|
+
)
|
189
|
+
|
190
|
+
run = get_run(run)
|
191
|
+
|
192
|
+
if add_run_uid:
|
193
|
+
adata.obs["lamin_run_uid"] = run.uid
|
194
|
+
|
195
|
+
if storepath.protocol == "s3":
|
196
|
+
ctx = soma.SOMATileDBContext(tiledb_config=_tiledb_config_s3(storepath))
|
197
|
+
else:
|
198
|
+
ctx = None
|
199
|
+
|
200
|
+
soma_io.from_anndata(storepath.as_posix(), adata, context=ctx, **kwargs)
|
201
|
+
|
202
|
+
if add_run_uid:
|
203
|
+
del adata.obs["lamin_run_uid"]
|
204
|
+
|
205
|
+
revises = None
|
206
|
+
if appending:
|
207
|
+
if store_is_artifact:
|
208
|
+
revises = store
|
209
|
+
else:
|
210
|
+
from lamindb._artifact import (
|
211
|
+
check_path_in_existing_storage,
|
212
|
+
get_relative_path_to_directory,
|
213
|
+
)
|
214
|
+
|
215
|
+
storage = check_path_in_existing_storage(storepath)
|
216
|
+
if isinstance(storage, Storage):
|
217
|
+
search_by_key = get_relative_path_to_directory(
|
218
|
+
path=storepath, directory=UPath(storage.root)
|
219
|
+
).as_posix()
|
220
|
+
revises = Artifact.filter(
|
221
|
+
key=search_by_key, is_latest=True, _key_is_virtual=False
|
222
|
+
).one_or_none()
|
223
|
+
if revises is not None:
|
224
|
+
logger.info(f"Assuming it is a new version of {revises}.")
|
225
|
+
|
226
|
+
if revises is None:
|
227
|
+
return Artifact(storepath, run=run, **artifact_kwargs)
|
228
|
+
else:
|
229
|
+
return Artifact(storepath, run=run, revises=revises, **artifact_kwargs)
|
lamindb/core/storage/paths.py
CHANGED
@@ -84,13 +84,9 @@ def attempt_accessing_path(
|
|
84
84
|
)
|
85
85
|
else:
|
86
86
|
if artifact._state.db not in ("default", None) and using_key is None:
|
87
|
-
storage = (
|
88
|
-
Storage.using(artifact._state.db).filter(id=artifact.storage_id).one()
|
89
|
-
)
|
87
|
+
storage = Storage.using(artifact._state.db).get(id=artifact.storage_id)
|
90
88
|
else:
|
91
|
-
storage = (
|
92
|
-
Storage.objects.using(using_key).filter(id=artifact.storage_id).one()
|
93
|
-
)
|
89
|
+
storage = Storage.objects.using(using_key).get(id=artifact.storage_id)
|
94
90
|
# find a better way than passing None to instance_settings in the future!
|
95
91
|
storage_settings = StorageSettings(storage.root, access_token=access_token)
|
96
92
|
path = storage_settings.key_to_filepath(storage_key)
|