lamindb 1.3.2__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +52 -36
- lamindb/_finish.py +17 -10
- lamindb/_tracked.py +1 -1
- lamindb/base/__init__.py +3 -1
- lamindb/base/fields.py +40 -22
- lamindb/base/ids.py +1 -94
- lamindb/base/types.py +2 -0
- lamindb/base/uids.py +117 -0
- lamindb/core/_context.py +216 -133
- lamindb/core/_settings.py +38 -25
- lamindb/core/datasets/__init__.py +11 -4
- lamindb/core/datasets/_core.py +5 -5
- lamindb/core/datasets/_small.py +0 -93
- lamindb/core/datasets/mini_immuno.py +172 -0
- lamindb/core/loaders.py +1 -1
- lamindb/core/storage/_backed_access.py +100 -6
- lamindb/core/storage/_polars_lazy_df.py +51 -0
- lamindb/core/storage/_pyarrow_dataset.py +15 -30
- lamindb/core/storage/objects.py +6 -0
- lamindb/core/subsettings/__init__.py +2 -0
- lamindb/core/subsettings/_annotation_settings.py +11 -0
- lamindb/curators/__init__.py +7 -3559
- lamindb/curators/_legacy.py +2056 -0
- lamindb/curators/core.py +1546 -0
- lamindb/errors.py +11 -0
- lamindb/examples/__init__.py +27 -0
- lamindb/examples/schemas/__init__.py +12 -0
- lamindb/examples/schemas/_anndata.py +25 -0
- lamindb/examples/schemas/_simple.py +19 -0
- lamindb/integrations/_vitessce.py +8 -5
- lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +24 -0
- lamindb/migrations/0092_alter_artifactfeaturevalue_artifact_and_more.py +75 -0
- lamindb/models/__init__.py +12 -2
- lamindb/models/_describe.py +21 -4
- lamindb/models/_feature_manager.py +384 -301
- lamindb/models/_from_values.py +1 -1
- lamindb/models/_is_versioned.py +5 -15
- lamindb/models/_label_manager.py +8 -2
- lamindb/models/artifact.py +354 -177
- lamindb/models/artifact_set.py +122 -0
- lamindb/models/can_curate.py +4 -1
- lamindb/models/collection.py +79 -56
- lamindb/models/core.py +1 -1
- lamindb/models/feature.py +78 -47
- lamindb/models/has_parents.py +24 -9
- lamindb/models/project.py +3 -3
- lamindb/models/query_manager.py +221 -22
- lamindb/models/query_set.py +251 -206
- lamindb/models/record.py +211 -344
- lamindb/models/run.py +59 -5
- lamindb/models/save.py +9 -5
- lamindb/models/schema.py +673 -196
- lamindb/models/transform.py +5 -14
- lamindb/models/ulabel.py +8 -5
- {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/METADATA +8 -7
- lamindb-1.5.0.dist-info/RECORD +108 -0
- lamindb-1.3.2.dist-info/RECORD +0 -95
- {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/LICENSE +0 -0
- {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/WHEEL +0 -0
lamindb/models/artifact.py
CHANGED
@@ -5,10 +5,11 @@ import os
|
|
5
5
|
import shutil
|
6
6
|
from collections import defaultdict
|
7
7
|
from pathlib import Path, PurePath, PurePosixPath
|
8
|
-
from typing import TYPE_CHECKING, Any, Union, overload
|
8
|
+
from typing import TYPE_CHECKING, Any, Literal, Union, overload
|
9
9
|
|
10
10
|
import fsspec
|
11
11
|
import lamindb_setup as ln_setup
|
12
|
+
import numpy as np
|
12
13
|
import pandas as pd
|
13
14
|
from anndata import AnnData
|
14
15
|
from django.db import connections, models
|
@@ -38,7 +39,6 @@ from lamindb.errors import FieldValidationError
|
|
38
39
|
from lamindb.models.query_set import QuerySet
|
39
40
|
|
40
41
|
from ..base.users import current_user_id
|
41
|
-
from ..core._compat import is_package_installed
|
42
42
|
from ..core.loaders import load_to_memory
|
43
43
|
from ..core.storage import (
|
44
44
|
LocalPathClasses,
|
@@ -48,6 +48,11 @@ from ..core.storage import (
|
|
48
48
|
write_to_disk,
|
49
49
|
)
|
50
50
|
from ..core.storage._anndata_accessor import _anndata_n_observations
|
51
|
+
from ..core.storage._backed_access import (
|
52
|
+
_track_writes_factory,
|
53
|
+
backed_access,
|
54
|
+
)
|
55
|
+
from ..core.storage._polars_lazy_df import POLARS_SUFFIXES
|
51
56
|
from ..core.storage._pyarrow_dataset import PYARROW_SUFFIXES
|
52
57
|
from ..core.storage._tiledbsoma import _soma_n_observations
|
53
58
|
from ..core.storage.paths import (
|
@@ -61,7 +66,6 @@ from ..core.storage.paths import (
|
|
61
66
|
from ..errors import IntegrityError, InvalidArgument, ValidationError
|
62
67
|
from ..models._is_versioned import (
|
63
68
|
create_uid,
|
64
|
-
message_update_key_in_version_family,
|
65
69
|
)
|
66
70
|
from ._django import get_artifact_with_related
|
67
71
|
from ._feature_manager import (
|
@@ -69,6 +73,7 @@ from ._feature_manager import (
|
|
69
73
|
ParamManager,
|
70
74
|
ParamManagerArtifact,
|
71
75
|
add_label_feature_links,
|
76
|
+
filter_base,
|
72
77
|
get_label_links,
|
73
78
|
)
|
74
79
|
from ._is_versioned import IsVersioned
|
@@ -86,7 +91,7 @@ from .record import (
|
|
86
91
|
_get_record_kwargs,
|
87
92
|
record_repr,
|
88
93
|
)
|
89
|
-
from .run import ParamValue, Run, TracksRun, TracksUpdates, User
|
94
|
+
from .run import Param, ParamValue, Run, TracksRun, TracksUpdates, User
|
90
95
|
from .schema import Schema
|
91
96
|
from .ulabel import ULabel
|
92
97
|
|
@@ -105,9 +110,10 @@ except ImportError:
|
|
105
110
|
|
106
111
|
|
107
112
|
if TYPE_CHECKING:
|
108
|
-
from collections.abc import Iterable
|
113
|
+
from collections.abc import Iterable, Iterator
|
109
114
|
|
110
115
|
from mudata import MuData # noqa: TC004
|
116
|
+
from polars import LazyFrame as PolarsLazyFrame
|
111
117
|
from pyarrow.dataset import Dataset as PyArrowDataset
|
112
118
|
from spatialdata import SpatialData # noqa: TC004
|
113
119
|
from tiledbsoma import Collection as SOMACollection
|
@@ -210,17 +216,6 @@ def process_data(
|
|
210
216
|
|
211
217
|
if not overwritten, data gets stored in default storage
|
212
218
|
"""
|
213
|
-
supported_data_types = [pd.DataFrame, AnnData]
|
214
|
-
if is_package_installed("mudata"):
|
215
|
-
from mudata import MuData
|
216
|
-
|
217
|
-
supported_data_types.append(MuData)
|
218
|
-
if is_package_installed("spatialdata"):
|
219
|
-
from spatialdata import SpatialData
|
220
|
-
|
221
|
-
supported_data_types.append(SpatialData)
|
222
|
-
supported_data_types = tuple(supported_data_types) # type: ignore
|
223
|
-
|
224
219
|
if key is not None:
|
225
220
|
key_suffix = extract_suffix_from_path(PurePosixPath(key), arg_name="key")
|
226
221
|
# use suffix as the (adata) format if the format is not provided
|
@@ -228,7 +223,8 @@ def process_data(
|
|
228
223
|
format = key_suffix[1:]
|
229
224
|
else:
|
230
225
|
key_suffix = None
|
231
|
-
|
226
|
+
|
227
|
+
if isinstance(data, (str, Path, UPath)):
|
232
228
|
access_token = (
|
233
229
|
default_storage._access_token
|
234
230
|
if hasattr(default_storage, "_access_token")
|
@@ -239,6 +235,7 @@ def process_data(
|
|
239
235
|
# for example into a temporary url
|
240
236
|
if path.protocol not in {"http", "https"}:
|
241
237
|
path = path.resolve()
|
238
|
+
|
242
239
|
storage, use_existing_storage_key = process_pathlike(
|
243
240
|
path,
|
244
241
|
default_storage=default_storage,
|
@@ -247,28 +244,37 @@ def process_data(
|
|
247
244
|
)
|
248
245
|
suffix = extract_suffix_from_path(path)
|
249
246
|
memory_rep = None
|
250
|
-
elif
|
247
|
+
elif (
|
248
|
+
isinstance(data, pd.DataFrame)
|
249
|
+
or isinstance(data, AnnData)
|
250
|
+
or data_is_mudata(data)
|
251
|
+
or data_is_spatialdata(data)
|
252
|
+
):
|
251
253
|
storage = default_storage
|
252
254
|
memory_rep = data
|
253
255
|
suffix = infer_suffix(data, format)
|
254
256
|
else:
|
255
257
|
raise NotImplementedError(
|
256
|
-
f"Do not know how to create
|
258
|
+
f"Do not know how to create an Artifact from {data}, pass a path instead."
|
257
259
|
)
|
260
|
+
|
261
|
+
# Check for suffix consistency
|
258
262
|
if key_suffix is not None and key_suffix != suffix and not is_replace:
|
259
263
|
# consciously omitting a trailing period
|
260
|
-
if isinstance(data, (str, Path, UPath)):
|
264
|
+
if isinstance(data, (str, Path, UPath)): # UPathStr, spelled out
|
261
265
|
message = f"The suffix '{suffix}' of the provided path is inconsistent, it should be '{key_suffix}'"
|
262
266
|
else:
|
263
267
|
message = f"The suffix '{key_suffix}' of the provided key is inconsistent, it should be '{suffix}'"
|
264
268
|
raise InvalidArgument(message)
|
269
|
+
|
265
270
|
# in case we have an in-memory representation, we need to write it to disk
|
266
|
-
|
271
|
+
if memory_rep is not None:
|
272
|
+
from lamindb import settings
|
267
273
|
|
268
|
-
if isinstance(data, supported_data_types):
|
269
274
|
path = settings.cache_dir / f"{provisional_uid}{suffix}"
|
270
275
|
write_to_disk(data, path)
|
271
276
|
use_existing_storage_key = False
|
277
|
+
|
272
278
|
return memory_rep, path, suffix, storage, use_existing_storage_key
|
273
279
|
|
274
280
|
|
@@ -311,10 +317,9 @@ def get_stat_or_artifact(
|
|
311
317
|
result = Artifact.objects.using(instance).filter(hash=hash).all()
|
312
318
|
artifact_with_same_hash_exists = len(result) > 0
|
313
319
|
else:
|
314
|
-
storage_id = settings.storage.id
|
315
320
|
result = (
|
316
321
|
Artifact.objects.using(instance)
|
317
|
-
.filter(Q(hash=hash) | Q(key=key,
|
322
|
+
.filter(Q(hash=hash) | Q(key=key, storage=settings.storage.record))
|
318
323
|
.order_by("-created_at")
|
319
324
|
.all()
|
320
325
|
)
|
@@ -533,28 +538,24 @@ def data_is_anndata(data: AnnData | UPathStr) -> bool:
|
|
533
538
|
|
534
539
|
|
535
540
|
def data_is_mudata(data: MuData | UPathStr) -> bool:
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
if isinstance(data, MuData):
|
540
|
-
return True
|
541
|
+
# We are not importing MuData here to keep loaded modules minimal
|
542
|
+
if hasattr(data, "__class__") and data.__class__.__name__ == "MuData":
|
543
|
+
return True
|
541
544
|
if isinstance(data, (str, Path)):
|
542
545
|
return UPath(data).suffix == ".h5mu"
|
543
546
|
return False
|
544
547
|
|
545
548
|
|
546
549
|
def data_is_spatialdata(data: SpatialData | UPathStr) -> bool:
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
return identify_zarr_type(data, check=False) == "spatialdata"
|
557
|
-
return False
|
550
|
+
# We are not importing SpatialData here to keep loaded modules minimal
|
551
|
+
if hasattr(data, "__class__") and data.__class__.__name__ == "SpatialData":
|
552
|
+
return True
|
553
|
+
if isinstance(data, (str, Path)):
|
554
|
+
if UPath(data).suffix == ".zarr":
|
555
|
+
# TODO: inconsistent with anndata, where we run the storage
|
556
|
+
# check only for local, expensive for cloud
|
557
|
+
return identify_zarr_type(data, check=False) == "spatialdata"
|
558
|
+
return False
|
558
559
|
|
559
560
|
|
560
561
|
def _check_otype_artifact(
|
@@ -763,15 +764,15 @@ def _describe_sqlite(self, print_types: bool = False): # for artifact & collect
|
|
763
764
|
return tree
|
764
765
|
|
765
766
|
|
766
|
-
def describe_artifact_collection(self
|
767
|
-
from ._describe import
|
767
|
+
def describe_artifact_collection(self, return_str: bool = False) -> str | None:
|
768
|
+
from ._describe import format_rich_tree
|
768
769
|
|
769
770
|
if not self._state.adding and connections[self._state.db].vendor == "postgresql":
|
770
771
|
tree = _describe_postgres(self)
|
771
772
|
else:
|
772
773
|
tree = _describe_sqlite(self)
|
773
774
|
|
774
|
-
|
775
|
+
return format_rich_tree(tree, return_str=return_str)
|
775
776
|
|
776
777
|
|
777
778
|
def validate_feature(feature: Feature, records: list[Record]) -> None:
|
@@ -962,55 +963,66 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
962
963
|
revises: `Artifact | None = None` Previous version of the artifact. Is an alternative way to passing `key` to trigger a new version.
|
963
964
|
run: `Run | None = None` The run that creates the artifact.
|
964
965
|
|
965
|
-
|
966
|
+
Examples:
|
966
967
|
|
967
|
-
|
968
|
+
Create an artifact **from a local file or folder**::
|
968
969
|
|
969
|
-
|
970
|
-
|
971
|
-
- Generic array: HDF5 group, zarr group, TileDB store ⟷ HDF5, zarr, TileDB loaders
|
970
|
+
artifact = ln.Artifact("./my_file.parquet", key="examples/my_file.parquet").save()
|
971
|
+
artifact = ln.Artifact("./my_folder", key="project1/my_folder").save()
|
972
972
|
|
973
|
-
|
973
|
+
Calling `.save()` copies or uploads the file to the default storage location of your lamindb instance.
|
974
|
+
If you create an artifact **from a remote file or folder**, lamindb merely registers the S3 `key` and avoids copying the data::
|
974
975
|
|
975
|
-
|
976
|
-
- Fastq: `.fastq` ⟷ /
|
977
|
-
- VCF: `.vcf` ⟷ /
|
978
|
-
- QC: `.html` ⟷ /
|
976
|
+
artifact = ln.Artifact("s3://my_bucket/my_folder/my_file.csv").save()
|
979
977
|
|
980
|
-
|
978
|
+
If you want to **validate & annotate** an array, pass a `schema` to one of the `.from_df()`, `.from_anndata()`, ... constructors::
|
981
979
|
|
982
|
-
|
980
|
+
schema = ln.Schema(itype=ln.Feature) # a schema that merely enforces that feature names exist in the Feature registry
|
981
|
+
artifact = ln.Artifact.from_df("./my_file.parquet", key="my_dataset.parquet", schema=schema).save() # validated and annotated
|
983
982
|
|
984
|
-
|
985
|
-
:class:`~lamindb.Storage`
|
986
|
-
Storage locations for artifacts.
|
987
|
-
:class:`~lamindb.Collection`
|
988
|
-
Collections of artifacts.
|
989
|
-
:meth:`~lamindb.Artifact.from_df`
|
990
|
-
Create an artifact from a `DataFrame`.
|
991
|
-
:meth:`~lamindb.Artifact.from_anndata`
|
992
|
-
Create an artifact from an `AnnData`.
|
983
|
+
You can make a **new version** of an artifact by passing an existing `key`::
|
993
984
|
|
994
|
-
|
985
|
+
artifact_v2 = ln.Artifact("./my_file.parquet", key="examples/my_file.parquet").save()
|
986
|
+
artifact_v2.versions.df() # see all versions
|
987
|
+
|
988
|
+
You can write artifacts to other storage locations by switching the current default storage location (:attr:`~lamindb.core.Settings.storage`)::
|
989
|
+
|
990
|
+
ln.settings.storage = "s3://some-bucket"
|
995
991
|
|
996
|
-
|
992
|
+
Sometimes you want to **avoid mapping the artifact into a path hierarchy**, and you only pass `description`::
|
997
993
|
|
998
|
-
|
999
|
-
|
994
|
+
artifact = ln.Artifact("./my_folder", description="My folder").save()
|
995
|
+
artifact_v2 = ln.Artifact("./my_folder", revises=old_artifact).save() # need to version based on `revises`, a shared description does not trigger a new version
|
1000
996
|
|
1001
|
-
|
1002
|
-
(If it's a local instance, the "upload" is a mere copy operation.)
|
997
|
+
Notes:
|
1003
998
|
|
1004
|
-
|
999
|
+
.. dropdown:: Typical storage formats & their API accessors
|
1005
1000
|
|
1006
|
-
|
1001
|
+
Arrays:
|
1007
1002
|
|
1008
|
-
|
1003
|
+
- Table: `.csv`, `.tsv`, `.parquet`, `.ipc` ⟷ `DataFrame`, `pyarrow.Table`
|
1004
|
+
- Annotated matrix: `.h5ad`, `.h5mu`, `.zrad` ⟷ `AnnData`, `MuData`
|
1005
|
+
- Generic array: HDF5 group, zarr group, TileDB store ⟷ HDF5, zarr, TileDB loaders
|
1009
1006
|
|
1010
|
-
|
1011
|
-
>>> artifact_v2.versions.df() # see all versions
|
1007
|
+
Non-arrays:
|
1012
1008
|
|
1013
|
-
|
1009
|
+
- Image: `.jpg`, `.png` ⟷ `np.ndarray`, ...
|
1010
|
+
- Fastq: `.fastq` ⟷ /
|
1011
|
+
- VCF: `.vcf` ⟷ /
|
1012
|
+
- QC: `.html` ⟷ /
|
1013
|
+
|
1014
|
+
You'll find these values in the `suffix` & `accessor` fields.
|
1015
|
+
|
1016
|
+
LaminDB makes some default choices (e.g., serialize a `DataFrame` as a `.parquet` file).
|
1017
|
+
|
1018
|
+
.. dropdown:: Will artifacts get duplicated?
|
1019
|
+
|
1020
|
+
If an artifact with the exact same hash already exists, `Artifact()` returns the existing artifact.
|
1021
|
+
|
1022
|
+
In concurrent workloads where the same artifact is created repeatedly at the exact same time, `.save()`
|
1023
|
+
detects the duplication and will return the existing artifact.
|
1024
|
+
|
1025
|
+
.. dropdown:: Why does the constructor look the way it looks?
|
1014
1026
|
|
1015
1027
|
It's inspired by APIs building on AWS S3.
|
1016
1028
|
|
@@ -1031,18 +1043,15 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1031
1043
|
bucket = quilt3.Bucket('mybucket')
|
1032
1044
|
bucket.put_file('hello.txt', '/tmp/hello.txt')
|
1033
1045
|
|
1034
|
-
|
1035
|
-
|
1036
|
-
|
1037
|
-
|
1038
|
-
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1043
|
-
If an artifact with the exact same hash already exists, `Artifact()` returns the existing artifact. In concurrent workloads where
|
1044
|
-
the same artifact is created multiple times, `Artifact()` doesn't yet return the existing artifact but creates a new one; `.save()` however
|
1045
|
-
detects the duplication and will return the existing artifact.
|
1046
|
+
See Also:
|
1047
|
+
:class:`~lamindb.Storage`
|
1048
|
+
Storage locations for artifacts.
|
1049
|
+
:class:`~lamindb.Collection`
|
1050
|
+
Collections of artifacts.
|
1051
|
+
:meth:`~lamindb.Artifact.from_df`
|
1052
|
+
Create an artifact from a `DataFrame`.
|
1053
|
+
:meth:`~lamindb.Artifact.from_anndata`
|
1054
|
+
Create an artifact from an `AnnData`.
|
1046
1055
|
|
1047
1056
|
"""
|
1048
1057
|
|
@@ -1055,6 +1064,8 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1055
1064
|
params: ParamManager = ParamManagerArtifact # type: ignore
|
1056
1065
|
"""Param manager.
|
1057
1066
|
|
1067
|
+
What features are for dataset-like artifacts, parameters are for model-like artifacts & runs.
|
1068
|
+
|
1058
1069
|
Example::
|
1059
1070
|
|
1060
1071
|
artifact.params.add_values({
|
@@ -1071,23 +1082,23 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1071
1082
|
features: FeatureManager = FeatureManager # type: ignore
|
1072
1083
|
"""Feature manager.
|
1073
1084
|
|
1074
|
-
|
1085
|
+
Typically, you annotate a dataset with features by defining a `Schema` and passing it to the `Artifact` constructor.
|
1075
1086
|
|
1076
|
-
|
1087
|
+
Here is how to do annotate an artifact ad hoc::
|
1077
1088
|
|
1078
1089
|
artifact.features.add_values({
|
1079
1090
|
"species": organism, # here, organism is an Organism record
|
1080
1091
|
"scientist": ['Barbara McClintock', 'Edgar Anderson'],
|
1081
1092
|
"temperature": 27.6,
|
1082
|
-
"
|
1093
|
+
"experiment": "Experiment 1"
|
1083
1094
|
})
|
1084
1095
|
|
1085
|
-
Query
|
1096
|
+
Query artifacts by features::
|
1086
1097
|
|
1087
|
-
ln.Artifact.
|
1098
|
+
ln.Artifact.filter(scientist="Barbara McClintock")
|
1088
1099
|
|
1089
1100
|
Features may or may not be part of the artifact content in storage. For
|
1090
|
-
instance, the :class:`~lamindb.
|
1101
|
+
instance, the :class:`~lamindb.curators.DataFrameCurator` flow validates the columns of a
|
1091
1102
|
`DataFrame`-like artifact and annotates it with features corresponding to
|
1092
1103
|
these columns. `artifact.features.add_values`, by contrast, does not
|
1093
1104
|
validate the content of the artifact.
|
@@ -1100,22 +1111,22 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1100
1111
|
To annotate with labels, you typically use the registry-specific accessors,
|
1101
1112
|
for instance :attr:`~lamindb.Artifact.ulabels`::
|
1102
1113
|
|
1103
|
-
|
1104
|
-
artifact.ulabels.add(
|
1114
|
+
experiment = ln.ULabel(name="Experiment 1").save()
|
1115
|
+
artifact.ulabels.add(experiment)
|
1105
1116
|
|
1106
1117
|
Similarly, you query based on these accessors::
|
1107
1118
|
|
1108
|
-
ln.Artifact.filter(ulabels__name="
|
1119
|
+
ln.Artifact.filter(ulabels__name="Experiment 1").all()
|
1109
1120
|
|
1110
1121
|
Unlike the registry-specific accessors, the `.labels` accessor provides
|
1111
1122
|
a way of associating labels with features::
|
1112
1123
|
|
1113
|
-
|
1114
|
-
artifact.labels.add(
|
1124
|
+
experiment = ln.Feature(name="experiment", dtype="cat").save()
|
1125
|
+
artifact.labels.add(experiment, feature=study)
|
1115
1126
|
|
1116
1127
|
Note that the above is equivalent to::
|
1117
1128
|
|
1118
|
-
artifact.features.add_values({"
|
1129
|
+
artifact.features.add_values({"experiment": experiment})
|
1119
1130
|
"""
|
1120
1131
|
from ._label_manager import LabelManager
|
1121
1132
|
|
@@ -1343,15 +1354,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1343
1354
|
f"Only {valid_keywords} can be passed, you passed: {kwargs}"
|
1344
1355
|
)
|
1345
1356
|
if revises is not None and key is not None and revises.key != key:
|
1346
|
-
|
1347
|
-
suid=revises.stem_uid,
|
1348
|
-
existing_key=revises.key,
|
1349
|
-
new_key=key,
|
1350
|
-
registry="Artifact",
|
1351
|
-
)
|
1352
|
-
raise ValueError(
|
1353
|
-
f"`key` is {key}, but `revises.key` is '{revises.key}'\n\n Either do *not* pass `key`.\n\n{note}"
|
1354
|
-
)
|
1357
|
+
logger.warning(f"renaming artifact from '{revises.key}' to {key}")
|
1355
1358
|
if revises is not None:
|
1356
1359
|
if not isinstance(revises, Artifact):
|
1357
1360
|
raise TypeError("`revises` has to be of type `Artifact`")
|
@@ -1431,11 +1434,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1431
1434
|
kwargs["uid"] = uid
|
1432
1435
|
|
1433
1436
|
# only set key now so that we don't do a look-up on it in case revises is passed
|
1434
|
-
if revises is not None and revises.key is not None:
|
1435
|
-
assert revises.key.endswith(kwargs["suffix"]), ( # noqa: S101
|
1436
|
-
revises.key,
|
1437
|
-
kwargs["suffix"],
|
1438
|
-
)
|
1437
|
+
if revises is not None and revises.key is not None and kwargs["key"] is None:
|
1439
1438
|
kwargs["key"] = revises.key
|
1440
1439
|
|
1441
1440
|
kwargs["kind"] = kind
|
@@ -1530,15 +1529,84 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1530
1529
|
- Guide: :doc:`docs:registries`
|
1531
1530
|
- Method in `Record` base class: :meth:`~lamindb.models.Record.get`
|
1532
1531
|
|
1533
|
-
Examples
|
1532
|
+
Examples:
|
1533
|
+
|
1534
|
+
::
|
1534
1535
|
|
1535
|
-
|
1536
|
-
|
1536
|
+
artifact = ln.Artifact.get("tCUkRcaEjTjhtozp0000")
|
1537
|
+
artifact = ln.Arfifact.get(key="examples/my_file.parquet")
|
1537
1538
|
"""
|
1538
1539
|
from .query_set import QuerySet
|
1539
1540
|
|
1540
1541
|
return QuerySet(model=cls).get(idlike, **expressions)
|
1541
1542
|
|
1543
|
+
@classmethod
|
1544
|
+
def filter(
|
1545
|
+
cls,
|
1546
|
+
*queries,
|
1547
|
+
**expressions,
|
1548
|
+
) -> QuerySet:
|
1549
|
+
"""Query a set of artifacts.
|
1550
|
+
|
1551
|
+
Args:
|
1552
|
+
*queries: `Q` expressions.
|
1553
|
+
**expressions: Features, params, fields via the Django query syntax.
|
1554
|
+
|
1555
|
+
See Also:
|
1556
|
+
- Guide: :doc:`docs:registries`
|
1557
|
+
|
1558
|
+
Examples:
|
1559
|
+
|
1560
|
+
Query by fields::
|
1561
|
+
|
1562
|
+
ln.Arfifact.filter(key="examples/my_file.parquet")
|
1563
|
+
|
1564
|
+
Query by features::
|
1565
|
+
|
1566
|
+
ln.Arfifact.filter(cell_type_by_model__name="T cell")
|
1567
|
+
|
1568
|
+
Query by params::
|
1569
|
+
|
1570
|
+
ln.Arfifact.filter(hyperparam_x=100)
|
1571
|
+
"""
|
1572
|
+
from .query_set import QuerySet
|
1573
|
+
|
1574
|
+
if expressions:
|
1575
|
+
keys_normalized = [key.split("__")[0] for key in expressions]
|
1576
|
+
field_or_feature_or_param = keys_normalized[0].split("__")[0]
|
1577
|
+
if field_or_feature_or_param in Artifact.__get_available_fields__():
|
1578
|
+
return QuerySet(model=cls).filter(*queries, **expressions)
|
1579
|
+
elif all(
|
1580
|
+
features_validated := Feature.validate(
|
1581
|
+
keys_normalized, field="name", mute=True
|
1582
|
+
)
|
1583
|
+
):
|
1584
|
+
return filter_base(FeatureManager, **expressions)
|
1585
|
+
elif all(
|
1586
|
+
params_validated := Param.validate(
|
1587
|
+
keys_normalized, field="name", mute=True
|
1588
|
+
)
|
1589
|
+
):
|
1590
|
+
return filter_base(ParamManagerArtifact, **expressions)
|
1591
|
+
else:
|
1592
|
+
if sum(features_validated) < sum(params_validated):
|
1593
|
+
params = ", ".join(
|
1594
|
+
sorted(np.array(keys_normalized)[~params_validated])
|
1595
|
+
)
|
1596
|
+
message = f"param names: {params}"
|
1597
|
+
else:
|
1598
|
+
features = ", ".join(
|
1599
|
+
sorted(np.array(keys_normalized)[~params_validated])
|
1600
|
+
)
|
1601
|
+
message = f"feature names: {features}"
|
1602
|
+
fields = ", ".join(sorted(cls.__get_available_fields__()))
|
1603
|
+
raise InvalidArgument(
|
1604
|
+
f"You can query either by available fields: {fields}\n"
|
1605
|
+
f"Or fix invalid {message}"
|
1606
|
+
)
|
1607
|
+
else:
|
1608
|
+
return QuerySet(model=cls).filter(*queries, **expressions)
|
1609
|
+
|
1542
1610
|
@classmethod
|
1543
1611
|
def from_df(
|
1544
1612
|
cls,
|
@@ -1548,9 +1616,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1548
1616
|
description: str | None = None,
|
1549
1617
|
run: Run | None = None,
|
1550
1618
|
revises: Artifact | None = None,
|
1619
|
+
schema: Schema | None = None,
|
1551
1620
|
**kwargs,
|
1552
1621
|
) -> Artifact:
|
1553
|
-
"""Create from `DataFrame`, validate &
|
1622
|
+
"""Create from `DataFrame`, optionally validate & annotate.
|
1554
1623
|
|
1555
1624
|
Args:
|
1556
1625
|
df: A `DataFrame` object.
|
@@ -1559,6 +1628,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1559
1628
|
description: A description.
|
1560
1629
|
revises: An old version of the artifact.
|
1561
1630
|
run: The run that creates the artifact.
|
1631
|
+
schema: A schema that defines how to validate & annotate.
|
1562
1632
|
|
1563
1633
|
See Also:
|
1564
1634
|
:meth:`~lamindb.Collection`
|
@@ -1566,19 +1636,30 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1566
1636
|
:class:`~lamindb.Feature`
|
1567
1637
|
Track features.
|
1568
1638
|
|
1569
|
-
Example
|
1639
|
+
Example:
|
1570
1640
|
|
1571
|
-
|
1641
|
+
No validation and annotation::
|
1642
|
+
|
1643
|
+
import lamindb as ln
|
1644
|
+
|
1645
|
+
df = ln.core.datasets.mini_immuno.get_dataset1()
|
1646
|
+
artifact = ln.Artifact.from_df(df, key="examples/dataset1.parquet").save()
|
1647
|
+
|
1648
|
+
With validation and annotation.
|
1649
|
+
|
1650
|
+
.. literalinclude:: scripts/curate_dataframe_flexible.py
|
1651
|
+
:language: python
|
1652
|
+
|
1653
|
+
Under-the-hood, this used the following schema.
|
1654
|
+
|
1655
|
+
.. literalinclude:: scripts/define_valid_features.py
|
1656
|
+
:language: python
|
1657
|
+
|
1658
|
+
Valid features & labels were defined as:
|
1659
|
+
|
1660
|
+
.. literalinclude:: scripts/define_mini_immuno_features_labels.py
|
1661
|
+
:language: python
|
1572
1662
|
|
1573
|
-
df = ln.core.datasets.df_iris_in_meter_batch1()
|
1574
|
-
df.head()
|
1575
|
-
#> sepal_length sepal_width petal_length petal_width iris_organism_code
|
1576
|
-
#> 0 0.051 0.035 0.014 0.002 0
|
1577
|
-
#> 1 0.049 0.030 0.014 0.002 0
|
1578
|
-
#> 2 0.047 0.032 0.013 0.002 0
|
1579
|
-
#> 3 0.046 0.031 0.015 0.002 0
|
1580
|
-
#> 4 0.050 0.036 0.014 0.002 0
|
1581
|
-
artifact = ln.Artifact.from_df(df, key="iris/result_batch1.parquet").save()
|
1582
1663
|
"""
|
1583
1664
|
artifact = Artifact( # type: ignore
|
1584
1665
|
data=df,
|
@@ -1591,6 +1672,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1591
1672
|
**kwargs,
|
1592
1673
|
)
|
1593
1674
|
artifact.n_observations = len(df)
|
1675
|
+
if schema is not None:
|
1676
|
+
from ..curators import DataFrameCurator
|
1677
|
+
|
1678
|
+
curator = DataFrameCurator(artifact, schema)
|
1679
|
+
curator.validate()
|
1680
|
+
artifact.schema = schema
|
1681
|
+
artifact._curator = curator
|
1594
1682
|
return artifact
|
1595
1683
|
|
1596
1684
|
@classmethod
|
@@ -1602,9 +1690,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1602
1690
|
description: str | None = None,
|
1603
1691
|
run: Run | None = None,
|
1604
1692
|
revises: Artifact | None = None,
|
1693
|
+
schema: Schema | None = None,
|
1605
1694
|
**kwargs,
|
1606
1695
|
) -> Artifact:
|
1607
|
-
"""Create from
|
1696
|
+
"""Create from `AnnData`, optionally validate & annotate.
|
1608
1697
|
|
1609
1698
|
Args:
|
1610
1699
|
adata: An `AnnData` object or a path of AnnData-like.
|
@@ -1613,6 +1702,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1613
1702
|
description: A description.
|
1614
1703
|
revises: An old version of the artifact.
|
1615
1704
|
run: The run that creates the artifact.
|
1705
|
+
schema: A schema that defines how to validate & annotate.
|
1616
1706
|
|
1617
1707
|
See Also:
|
1618
1708
|
|
@@ -1621,12 +1711,31 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1621
1711
|
:class:`~lamindb.Feature`
|
1622
1712
|
Track features.
|
1623
1713
|
|
1624
|
-
Example
|
1714
|
+
Example:
|
1625
1715
|
|
1626
|
-
|
1716
|
+
No validation and annotation::
|
1717
|
+
|
1718
|
+
import lamindb as ln
|
1719
|
+
|
1720
|
+
adata = ln.core.datasets.anndata_with_obs()
|
1721
|
+
artifact = ln.Artifact.from_anndata(adata, key="mini_anndata_with_obs.h5ad").save()
|
1722
|
+
|
1723
|
+
With validation and annotation.
|
1724
|
+
|
1725
|
+
.. literalinclude:: scripts/curate_anndata_flexible.py
|
1726
|
+
:language: python
|
1727
|
+
|
1728
|
+
Under-the-hood, this used the following schema.
|
1729
|
+
|
1730
|
+
.. literalinclude:: scripts/define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py
|
1731
|
+
:language: python
|
1732
|
+
|
1733
|
+
This schema tranposes the `var` DataFrame during curation, so that one validates and annotates the `var.T` schema, i.e., `[ENSG00000153563, ENSG00000010610, ENSG00000170458]`.
|
1734
|
+
If one doesn't transpose, one would annotate with the schema of `var`, i.e., `[gene_symbol, gene_type]`.
|
1735
|
+
|
1736
|
+
.. image:: https://lamin-site-assets.s3.amazonaws.com/.lamindb/gLyfToATM7WUzkWW0001.png
|
1737
|
+
:width: 800px
|
1627
1738
|
|
1628
|
-
adata = ln.core.datasets.anndata_with_obs()
|
1629
|
-
artifact = ln.Artifact.from_anndata(adata, key="mini_anndata_with_obs.h5ad").save()
|
1630
1739
|
"""
|
1631
1740
|
if not data_is_anndata(adata):
|
1632
1741
|
raise ValueError(
|
@@ -1654,6 +1763,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1654
1763
|
# and the proper path through create_path for cloud paths
|
1655
1764
|
obj_for_obs = artifact.path
|
1656
1765
|
artifact.n_observations = _anndata_n_observations(obj_for_obs)
|
1766
|
+
if schema is not None:
|
1767
|
+
from ..curators import AnnDataCurator
|
1768
|
+
|
1769
|
+
curator = AnnDataCurator(artifact, schema)
|
1770
|
+
curator.validate()
|
1771
|
+
artifact.schema = schema
|
1772
|
+
artifact._curator = curator
|
1657
1773
|
return artifact
|
1658
1774
|
|
1659
1775
|
@classmethod
|
@@ -1665,9 +1781,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1665
1781
|
description: str | None = None,
|
1666
1782
|
run: Run | None = None,
|
1667
1783
|
revises: Artifact | None = None,
|
1784
|
+
schema: Schema | None = None,
|
1668
1785
|
**kwargs,
|
1669
1786
|
) -> Artifact:
|
1670
|
-
"""Create from
|
1787
|
+
"""Create from `MuData`, optionally validate & annotate.
|
1671
1788
|
|
1672
1789
|
Args:
|
1673
1790
|
mdata: A `MuData` object.
|
@@ -1676,6 +1793,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1676
1793
|
description: A description.
|
1677
1794
|
revises: An old version of the artifact.
|
1678
1795
|
run: The run that creates the artifact.
|
1796
|
+
schema: A schema that defines how to validate & annotate.
|
1679
1797
|
|
1680
1798
|
See Also:
|
1681
1799
|
:meth:`~lamindb.Collection`
|
@@ -1704,6 +1822,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1704
1822
|
)
|
1705
1823
|
if not isinstance(mdata, UPathStr):
|
1706
1824
|
artifact.n_observations = mdata.n_obs
|
1825
|
+
if schema is not None:
|
1826
|
+
from ..curators import MuDataCurator
|
1827
|
+
|
1828
|
+
curator = MuDataCurator(artifact, schema)
|
1829
|
+
curator.validate()
|
1830
|
+
artifact.schema = schema
|
1831
|
+
artifact._curator = curator
|
1707
1832
|
return artifact
|
1708
1833
|
|
1709
1834
|
@classmethod
|
@@ -1715,17 +1840,19 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1715
1840
|
description: str | None = None,
|
1716
1841
|
run: Run | None = None,
|
1717
1842
|
revises: Artifact | None = None,
|
1843
|
+
schema: Schema | None = None,
|
1718
1844
|
**kwargs,
|
1719
1845
|
) -> Artifact:
|
1720
|
-
"""Create from
|
1846
|
+
"""Create from `SpatialData`, optionally validate & annotate.
|
1721
1847
|
|
1722
1848
|
Args:
|
1723
|
-
|
1849
|
+
sdata: A `SpatialData` object.
|
1724
1850
|
key: A relative path within default storage,
|
1725
1851
|
e.g., `"myfolder/myfile.zarr"`.
|
1726
1852
|
description: A description.
|
1727
1853
|
revises: An old version of the artifact.
|
1728
1854
|
run: The run that creates the artifact.
|
1855
|
+
schema: A schema that defines how to validate & annotate.
|
1729
1856
|
|
1730
1857
|
See Also:
|
1731
1858
|
:meth:`~lamindb.Collection`
|
@@ -1733,11 +1860,21 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1733
1860
|
:class:`~lamindb.Feature`
|
1734
1861
|
Track features.
|
1735
1862
|
|
1736
|
-
Example
|
1863
|
+
Example:
|
1737
1864
|
|
1738
|
-
|
1865
|
+
No validation and annotation::
|
1866
|
+
|
1867
|
+
import lamindb as ln
|
1868
|
+
|
1869
|
+
artifact = ln.Artifact.from_spatialdata(sdata, key="my_dataset.zarr").save()
|
1739
1870
|
|
1740
|
-
|
1871
|
+
With validation and annotation.
|
1872
|
+
|
1873
|
+
.. literalinclude:: scripts/define_schema_spatialdata.py
|
1874
|
+
:language: python
|
1875
|
+
|
1876
|
+
.. literalinclude:: scripts/curate_spatialdata.py
|
1877
|
+
:language: python
|
1741
1878
|
"""
|
1742
1879
|
if not data_is_spatialdata(sdata):
|
1743
1880
|
raise ValueError(
|
@@ -1755,6 +1892,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1755
1892
|
)
|
1756
1893
|
# ill-defined https://scverse.zulipchat.com/#narrow/channel/315824-spatial/topic/How.20to.20calculate.20the.20number.20of.20observations.3F
|
1757
1894
|
# artifact.n_observations = ...
|
1895
|
+
if schema is not None:
|
1896
|
+
from ..curators import SpatialDataCurator
|
1897
|
+
|
1898
|
+
curator = SpatialDataCurator(artifact, schema)
|
1899
|
+
curator.validate()
|
1900
|
+
artifact.schema = schema
|
1901
|
+
artifact._curator = curator
|
1758
1902
|
return artifact
|
1759
1903
|
|
1760
1904
|
@classmethod
|
@@ -2022,29 +2166,39 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2022
2166
|
self._old_suffix = self.suffix
|
2023
2167
|
|
2024
2168
|
def open(
|
2025
|
-
self,
|
2026
|
-
|
2027
|
-
|
2028
|
-
|
2029
|
-
|
2030
|
-
|
2031
|
-
|
2032
|
-
|
2033
|
-
|
2034
|
-
|
2169
|
+
self,
|
2170
|
+
mode: str = "r",
|
2171
|
+
engine: Literal["pyarrow", "polars"] = "pyarrow",
|
2172
|
+
is_run_input: bool | None = None,
|
2173
|
+
**kwargs,
|
2174
|
+
) -> (
|
2175
|
+
AnnDataAccessor
|
2176
|
+
| BackedAccessor
|
2177
|
+
| SOMACollection
|
2178
|
+
| SOMAExperiment
|
2179
|
+
| SOMAMeasurement
|
2180
|
+
| PyArrowDataset
|
2181
|
+
| Iterator[PolarsLazyFrame]
|
2182
|
+
):
|
2183
|
+
"""Open a dataset for streaming.
|
2035
2184
|
|
2036
2185
|
Works for `AnnData` (`.h5ad` and `.zarr`), generic `hdf5` and `zarr`,
|
2037
|
-
`tiledbsoma` objects (`.tiledbsoma`), `pyarrow` compatible formats
|
2186
|
+
`tiledbsoma` objects (`.tiledbsoma`), `pyarrow` or `polars` compatible formats
|
2187
|
+
(`.parquet`, `.csv`, `.ipc` etc. files or directories with such files).
|
2038
2188
|
|
2039
2189
|
Args:
|
2040
2190
|
mode: can only be `"w"` (write mode) for `tiledbsoma` stores,
|
2041
2191
|
otherwise should be always `"r"` (read-only mode).
|
2192
|
+
engine: Which module to use for lazy loading of a dataframe
|
2193
|
+
from `pyarrow` or `polars` compatible formats.
|
2194
|
+
This has no effect if the artifact is not a dataframe, i.e.
|
2195
|
+
if it is an `AnnData,` `hdf5`, `zarr` or `tiledbsoma` object.
|
2042
2196
|
is_run_input: Whether to track this artifact as run input.
|
2043
2197
|
**kwargs: Keyword arguments for the accessor, i.e. `h5py` or `zarr` connection,
|
2044
|
-
`pyarrow.dataset.dataset
|
2198
|
+
`pyarrow.dataset.dataset`, `polars.scan_*` function.
|
2045
2199
|
|
2046
2200
|
Notes:
|
2047
|
-
For more info, see
|
2201
|
+
For more info, see guide: :doc:`/arrays`.
|
2048
2202
|
|
2049
2203
|
Example::
|
2050
2204
|
|
@@ -2057,6 +2211,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2057
2211
|
#> AnnDataAccessor object with n_obs × n_vars = 70 × 765
|
2058
2212
|
#> constructed for the AnnData object pbmc68k.h5ad
|
2059
2213
|
#> ...
|
2214
|
+
artifact = ln.Artifact.get(key="lndb-storage/df.parquet")
|
2215
|
+
artifact.open()
|
2216
|
+
#> pyarrow._dataset.FileSystemDataset
|
2217
|
+
|
2060
2218
|
"""
|
2061
2219
|
if self._overwrite_versions and not self.is_latest:
|
2062
2220
|
raise ValueError(INCONSISTENT_STATE_MSG)
|
@@ -2064,6 +2222,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2064
2222
|
h5_suffixes = [".h5", ".hdf5", ".h5ad"]
|
2065
2223
|
h5_suffixes += [s + ".gz" for s in h5_suffixes]
|
2066
2224
|
# ignore empty suffix for now
|
2225
|
+
df_suffixes = tuple(set(PYARROW_SUFFIXES).union(POLARS_SUFFIXES))
|
2067
2226
|
suffixes = (
|
2068
2227
|
(
|
2069
2228
|
"",
|
@@ -2072,7 +2231,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2072
2231
|
".tiledbsoma",
|
2073
2232
|
)
|
2074
2233
|
+ tuple(h5_suffixes)
|
2075
|
-
+
|
2234
|
+
+ df_suffixes
|
2076
2235
|
+ tuple(
|
2077
2236
|
s + ".gz" for s in PYARROW_SUFFIXES
|
2078
2237
|
) # this doesn't work for externally gzipped files, REMOVE LATER
|
@@ -2080,10 +2239,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2080
2239
|
if self.suffix not in suffixes:
|
2081
2240
|
raise ValueError(
|
2082
2241
|
"Artifact should have a zarr, h5, tiledbsoma object"
|
2083
|
-
" or a compatible `pyarrow.dataset.dataset` directory"
|
2242
|
+
" or a compatible `pyarrow.dataset.dataset` or `polars.scan_*` directory"
|
2084
2243
|
" as the underlying data, please use one of the following suffixes"
|
2085
2244
|
f" for the object name: {', '.join(suffixes[1:])}."
|
2086
|
-
f" Or no suffix for a folder with {', '.join(
|
2245
|
+
f" Or no suffix for a folder with {', '.join(df_suffixes)} files"
|
2087
2246
|
" (no mixing allowed)."
|
2088
2247
|
)
|
2089
2248
|
if self.suffix != ".tiledbsoma" and self.key != "soma" and mode != "r":
|
@@ -2092,10 +2251,6 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2092
2251
|
)
|
2093
2252
|
|
2094
2253
|
from lamindb import settings
|
2095
|
-
from lamindb.core.storage._backed_access import (
|
2096
|
-
_track_writes_factory,
|
2097
|
-
backed_access,
|
2098
|
-
)
|
2099
2254
|
|
2100
2255
|
using_key = settings._using_key
|
2101
2256
|
filepath, cache_key = filepath_cache_key_from_artifact(
|
@@ -2116,14 +2271,22 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2116
2271
|
) and not filepath.synchronize(localpath, just_check=True)
|
2117
2272
|
if open_cache:
|
2118
2273
|
try:
|
2119
|
-
access = backed_access(
|
2274
|
+
access = backed_access(
|
2275
|
+
localpath, mode, engine, using_key=using_key, **kwargs
|
2276
|
+
)
|
2120
2277
|
except Exception as e:
|
2121
|
-
|
2278
|
+
# also ignore ValueError here because
|
2279
|
+
# such errors most probably just imply an incorrect argument
|
2280
|
+
if isinstance(filepath, LocalPathClasses) or isinstance(
|
2281
|
+
e, (ImportError, ValueError)
|
2282
|
+
):
|
2122
2283
|
raise e
|
2123
2284
|
logger.warning(
|
2124
2285
|
f"The cache might be corrupted: {e}. Trying to open directly."
|
2125
2286
|
)
|
2126
|
-
access = backed_access(
|
2287
|
+
access = backed_access(
|
2288
|
+
filepath, mode, engine, using_key=using_key, **kwargs
|
2289
|
+
)
|
2127
2290
|
# happens only if backed_access has been successful
|
2128
2291
|
# delete the corrupted cache
|
2129
2292
|
if localpath.is_dir():
|
@@ -2131,7 +2294,9 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2131
2294
|
else:
|
2132
2295
|
localpath.unlink(missing_ok=True)
|
2133
2296
|
else:
|
2134
|
-
access = backed_access(
|
2297
|
+
access = backed_access(
|
2298
|
+
filepath, mode, engine, using_key=using_key, **kwargs
|
2299
|
+
)
|
2135
2300
|
if is_tiledbsoma_w:
|
2136
2301
|
|
2137
2302
|
def finalize():
|
@@ -2304,6 +2469,9 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2304
2469
|
artifact = ln.Artifact.get(key="some.tiledbsoma". is_latest=True)
|
2305
2470
|
artiact.delete() # delete all versions, the data will be deleted or prompted for deletion.
|
2306
2471
|
"""
|
2472
|
+
# we're *not* running the line below because the case `storage is None` triggers user feedback in one case
|
2473
|
+
# storage = True if storage is None else storage
|
2474
|
+
|
2307
2475
|
# this first check means an invalid delete fails fast rather than cascading through
|
2308
2476
|
# database and storage permission errors
|
2309
2477
|
if os.getenv("LAMINDB_MULTI_INSTANCE") is None:
|
@@ -2354,8 +2522,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2354
2522
|
# only delete in storage if DB delete is successful
|
2355
2523
|
# DB delete might error because of a foreign key constraint violated etc.
|
2356
2524
|
if self._overwrite_versions and self.is_latest:
|
2357
|
-
|
2358
|
-
|
2525
|
+
logger.important(
|
2526
|
+
"deleting all versions of this artifact because they all share the same store"
|
2527
|
+
)
|
2528
|
+
for version in self.versions.all(): # includes self
|
2359
2529
|
_delete_skip_storage(version)
|
2360
2530
|
else:
|
2361
2531
|
self._delete_skip_storage()
|
@@ -2365,7 +2535,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2365
2535
|
delete_in_storage = False
|
2366
2536
|
if storage:
|
2367
2537
|
logger.warning(
|
2368
|
-
"
|
2538
|
+
"storage argument is ignored; can't delete store of a previous version if overwrite_versions is True"
|
2369
2539
|
)
|
2370
2540
|
elif self.key is None or self._key_is_virtual:
|
2371
2541
|
# do not ask for confirmation also if storage is None
|
@@ -2466,6 +2636,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2466
2636
|
local_path_cache,
|
2467
2637
|
)
|
2468
2638
|
logger.important(f"moved local artifact to cache: {local_path_cache}")
|
2639
|
+
if hasattr(self, "_curator"):
|
2640
|
+
curator = self._curator
|
2641
|
+
delattr(self, "_curator")
|
2642
|
+
curator.save_artifact()
|
2469
2643
|
return self
|
2470
2644
|
|
2471
2645
|
def restore(self) -> None:
|
@@ -2478,14 +2652,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2478
2652
|
self._branch_code = 1
|
2479
2653
|
self.save()
|
2480
2654
|
|
2481
|
-
def describe(self) -> None:
|
2482
|
-
"""Describe
|
2483
|
-
|
2484
|
-
Example::
|
2655
|
+
def describe(self, return_str: bool = False) -> None:
|
2656
|
+
"""Describe record including linked records.
|
2485
2657
|
|
2486
|
-
|
2658
|
+
Args:
|
2659
|
+
return_str: Return a string instead of printing.
|
2487
2660
|
"""
|
2488
|
-
return describe_artifact_collection(self)
|
2661
|
+
return describe_artifact_collection(self, return_str=return_str)
|
2489
2662
|
|
2490
2663
|
def _populate_subsequent_runs(self, run: Run) -> None:
|
2491
2664
|
_populate_subsequent_runs_(self, run)
|
@@ -2525,9 +2698,11 @@ def _save_skip_storage(artifact, **kwargs) -> None:
|
|
2525
2698
|
|
2526
2699
|
class ArtifactFeatureValue(BasicRecord, LinkORM, TracksRun):
|
2527
2700
|
id: int = models.BigAutoField(primary_key=True)
|
2528
|
-
artifact: Artifact = ForeignKey(
|
2701
|
+
artifact: Artifact = ForeignKey(
|
2702
|
+
Artifact, CASCADE, related_name="links_featurevalue"
|
2703
|
+
)
|
2529
2704
|
# we follow the lower() case convention rather than snake case for link models
|
2530
|
-
featurevalue = ForeignKey(FeatureValue, PROTECT, related_name="
|
2705
|
+
featurevalue = ForeignKey(FeatureValue, PROTECT, related_name="links_artifact")
|
2531
2706
|
|
2532
2707
|
class Meta:
|
2533
2708
|
unique_together = ("artifact", "featurevalue")
|
@@ -2535,9 +2710,11 @@ class ArtifactFeatureValue(BasicRecord, LinkORM, TracksRun):
|
|
2535
2710
|
|
2536
2711
|
class ArtifactParamValue(BasicRecord, LinkORM, TracksRun):
|
2537
2712
|
id: int = models.BigAutoField(primary_key=True)
|
2538
|
-
artifact: Artifact = ForeignKey(Artifact, CASCADE, related_name="
|
2713
|
+
artifact: Artifact = ForeignKey(Artifact, CASCADE, related_name="links_paramvalue")
|
2539
2714
|
# we follow the lower() case convention rather than snake case for link models
|
2540
|
-
paramvalue: ParamValue = ForeignKey(
|
2715
|
+
paramvalue: ParamValue = ForeignKey(
|
2716
|
+
ParamValue, PROTECT, related_name="links_artifact"
|
2717
|
+
)
|
2541
2718
|
|
2542
2719
|
class Meta:
|
2543
2720
|
unique_together = ("artifact", "paramvalue")
|