lamindb 1.3.2__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. lamindb/__init__.py +52 -36
  2. lamindb/_finish.py +17 -10
  3. lamindb/_tracked.py +1 -1
  4. lamindb/base/__init__.py +3 -1
  5. lamindb/base/fields.py +40 -22
  6. lamindb/base/ids.py +1 -94
  7. lamindb/base/types.py +2 -0
  8. lamindb/base/uids.py +117 -0
  9. lamindb/core/_context.py +216 -133
  10. lamindb/core/_settings.py +38 -25
  11. lamindb/core/datasets/__init__.py +11 -4
  12. lamindb/core/datasets/_core.py +5 -5
  13. lamindb/core/datasets/_small.py +0 -93
  14. lamindb/core/datasets/mini_immuno.py +172 -0
  15. lamindb/core/loaders.py +1 -1
  16. lamindb/core/storage/_backed_access.py +100 -6
  17. lamindb/core/storage/_polars_lazy_df.py +51 -0
  18. lamindb/core/storage/_pyarrow_dataset.py +15 -30
  19. lamindb/core/storage/objects.py +6 -0
  20. lamindb/core/subsettings/__init__.py +2 -0
  21. lamindb/core/subsettings/_annotation_settings.py +11 -0
  22. lamindb/curators/__init__.py +7 -3559
  23. lamindb/curators/_legacy.py +2056 -0
  24. lamindb/curators/core.py +1546 -0
  25. lamindb/errors.py +11 -0
  26. lamindb/examples/__init__.py +27 -0
  27. lamindb/examples/schemas/__init__.py +12 -0
  28. lamindb/examples/schemas/_anndata.py +25 -0
  29. lamindb/examples/schemas/_simple.py +19 -0
  30. lamindb/integrations/_vitessce.py +8 -5
  31. lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +24 -0
  32. lamindb/migrations/0092_alter_artifactfeaturevalue_artifact_and_more.py +75 -0
  33. lamindb/models/__init__.py +12 -2
  34. lamindb/models/_describe.py +21 -4
  35. lamindb/models/_feature_manager.py +384 -301
  36. lamindb/models/_from_values.py +1 -1
  37. lamindb/models/_is_versioned.py +5 -15
  38. lamindb/models/_label_manager.py +8 -2
  39. lamindb/models/artifact.py +354 -177
  40. lamindb/models/artifact_set.py +122 -0
  41. lamindb/models/can_curate.py +4 -1
  42. lamindb/models/collection.py +79 -56
  43. lamindb/models/core.py +1 -1
  44. lamindb/models/feature.py +78 -47
  45. lamindb/models/has_parents.py +24 -9
  46. lamindb/models/project.py +3 -3
  47. lamindb/models/query_manager.py +221 -22
  48. lamindb/models/query_set.py +251 -206
  49. lamindb/models/record.py +211 -344
  50. lamindb/models/run.py +59 -5
  51. lamindb/models/save.py +9 -5
  52. lamindb/models/schema.py +673 -196
  53. lamindb/models/transform.py +5 -14
  54. lamindb/models/ulabel.py +8 -5
  55. {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/METADATA +8 -7
  56. lamindb-1.5.0.dist-info/RECORD +108 -0
  57. lamindb-1.3.2.dist-info/RECORD +0 -95
  58. {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/LICENSE +0 -0
  59. {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/WHEEL +0 -0
@@ -5,10 +5,11 @@ import os
5
5
  import shutil
6
6
  from collections import defaultdict
7
7
  from pathlib import Path, PurePath, PurePosixPath
8
- from typing import TYPE_CHECKING, Any, Union, overload
8
+ from typing import TYPE_CHECKING, Any, Literal, Union, overload
9
9
 
10
10
  import fsspec
11
11
  import lamindb_setup as ln_setup
12
+ import numpy as np
12
13
  import pandas as pd
13
14
  from anndata import AnnData
14
15
  from django.db import connections, models
@@ -38,7 +39,6 @@ from lamindb.errors import FieldValidationError
38
39
  from lamindb.models.query_set import QuerySet
39
40
 
40
41
  from ..base.users import current_user_id
41
- from ..core._compat import is_package_installed
42
42
  from ..core.loaders import load_to_memory
43
43
  from ..core.storage import (
44
44
  LocalPathClasses,
@@ -48,6 +48,11 @@ from ..core.storage import (
48
48
  write_to_disk,
49
49
  )
50
50
  from ..core.storage._anndata_accessor import _anndata_n_observations
51
+ from ..core.storage._backed_access import (
52
+ _track_writes_factory,
53
+ backed_access,
54
+ )
55
+ from ..core.storage._polars_lazy_df import POLARS_SUFFIXES
51
56
  from ..core.storage._pyarrow_dataset import PYARROW_SUFFIXES
52
57
  from ..core.storage._tiledbsoma import _soma_n_observations
53
58
  from ..core.storage.paths import (
@@ -61,7 +66,6 @@ from ..core.storage.paths import (
61
66
  from ..errors import IntegrityError, InvalidArgument, ValidationError
62
67
  from ..models._is_versioned import (
63
68
  create_uid,
64
- message_update_key_in_version_family,
65
69
  )
66
70
  from ._django import get_artifact_with_related
67
71
  from ._feature_manager import (
@@ -69,6 +73,7 @@ from ._feature_manager import (
69
73
  ParamManager,
70
74
  ParamManagerArtifact,
71
75
  add_label_feature_links,
76
+ filter_base,
72
77
  get_label_links,
73
78
  )
74
79
  from ._is_versioned import IsVersioned
@@ -86,7 +91,7 @@ from .record import (
86
91
  _get_record_kwargs,
87
92
  record_repr,
88
93
  )
89
- from .run import ParamValue, Run, TracksRun, TracksUpdates, User
94
+ from .run import Param, ParamValue, Run, TracksRun, TracksUpdates, User
90
95
  from .schema import Schema
91
96
  from .ulabel import ULabel
92
97
 
@@ -105,9 +110,10 @@ except ImportError:
105
110
 
106
111
 
107
112
  if TYPE_CHECKING:
108
- from collections.abc import Iterable
113
+ from collections.abc import Iterable, Iterator
109
114
 
110
115
  from mudata import MuData # noqa: TC004
116
+ from polars import LazyFrame as PolarsLazyFrame
111
117
  from pyarrow.dataset import Dataset as PyArrowDataset
112
118
  from spatialdata import SpatialData # noqa: TC004
113
119
  from tiledbsoma import Collection as SOMACollection
@@ -210,17 +216,6 @@ def process_data(
210
216
 
211
217
  if not overwritten, data gets stored in default storage
212
218
  """
213
- supported_data_types = [pd.DataFrame, AnnData]
214
- if is_package_installed("mudata"):
215
- from mudata import MuData
216
-
217
- supported_data_types.append(MuData)
218
- if is_package_installed("spatialdata"):
219
- from spatialdata import SpatialData
220
-
221
- supported_data_types.append(SpatialData)
222
- supported_data_types = tuple(supported_data_types) # type: ignore
223
-
224
219
  if key is not None:
225
220
  key_suffix = extract_suffix_from_path(PurePosixPath(key), arg_name="key")
226
221
  # use suffix as the (adata) format if the format is not provided
@@ -228,7 +223,8 @@ def process_data(
228
223
  format = key_suffix[1:]
229
224
  else:
230
225
  key_suffix = None
231
- if isinstance(data, (str, Path, UPath)): # UPathStr, spelled out
226
+
227
+ if isinstance(data, (str, Path, UPath)):
232
228
  access_token = (
233
229
  default_storage._access_token
234
230
  if hasattr(default_storage, "_access_token")
@@ -239,6 +235,7 @@ def process_data(
239
235
  # for example into a temporary url
240
236
  if path.protocol not in {"http", "https"}:
241
237
  path = path.resolve()
238
+
242
239
  storage, use_existing_storage_key = process_pathlike(
243
240
  path,
244
241
  default_storage=default_storage,
@@ -247,28 +244,37 @@ def process_data(
247
244
  )
248
245
  suffix = extract_suffix_from_path(path)
249
246
  memory_rep = None
250
- elif isinstance(data, supported_data_types):
247
+ elif (
248
+ isinstance(data, pd.DataFrame)
249
+ or isinstance(data, AnnData)
250
+ or data_is_mudata(data)
251
+ or data_is_spatialdata(data)
252
+ ):
251
253
  storage = default_storage
252
254
  memory_rep = data
253
255
  suffix = infer_suffix(data, format)
254
256
  else:
255
257
  raise NotImplementedError(
256
- f"Do not know how to create a artifact object from {data}, pass a path instead!"
258
+ f"Do not know how to create an Artifact from {data}, pass a path instead."
257
259
  )
260
+
261
+ # Check for suffix consistency
258
262
  if key_suffix is not None and key_suffix != suffix and not is_replace:
259
263
  # consciously omitting a trailing period
260
- if isinstance(data, (str, Path, UPath)):
264
+ if isinstance(data, (str, Path, UPath)): # UPathStr, spelled out
261
265
  message = f"The suffix '{suffix}' of the provided path is inconsistent, it should be '{key_suffix}'"
262
266
  else:
263
267
  message = f"The suffix '{key_suffix}' of the provided key is inconsistent, it should be '{suffix}'"
264
268
  raise InvalidArgument(message)
269
+
265
270
  # in case we have an in-memory representation, we need to write it to disk
266
- from lamindb import settings
271
+ if memory_rep is not None:
272
+ from lamindb import settings
267
273
 
268
- if isinstance(data, supported_data_types):
269
274
  path = settings.cache_dir / f"{provisional_uid}{suffix}"
270
275
  write_to_disk(data, path)
271
276
  use_existing_storage_key = False
277
+
272
278
  return memory_rep, path, suffix, storage, use_existing_storage_key
273
279
 
274
280
 
@@ -311,10 +317,9 @@ def get_stat_or_artifact(
311
317
  result = Artifact.objects.using(instance).filter(hash=hash).all()
312
318
  artifact_with_same_hash_exists = len(result) > 0
313
319
  else:
314
- storage_id = settings.storage.id
315
320
  result = (
316
321
  Artifact.objects.using(instance)
317
- .filter(Q(hash=hash) | Q(key=key, storage_id=storage_id))
322
+ .filter(Q(hash=hash) | Q(key=key, storage=settings.storage.record))
318
323
  .order_by("-created_at")
319
324
  .all()
320
325
  )
@@ -533,28 +538,24 @@ def data_is_anndata(data: AnnData | UPathStr) -> bool:
533
538
 
534
539
 
535
540
  def data_is_mudata(data: MuData | UPathStr) -> bool:
536
- if is_package_installed("mudata"):
537
- from mudata import MuData
538
-
539
- if isinstance(data, MuData):
540
- return True
541
+ # We are not importing MuData here to keep loaded modules minimal
542
+ if hasattr(data, "__class__") and data.__class__.__name__ == "MuData":
543
+ return True
541
544
  if isinstance(data, (str, Path)):
542
545
  return UPath(data).suffix == ".h5mu"
543
546
  return False
544
547
 
545
548
 
546
549
  def data_is_spatialdata(data: SpatialData | UPathStr) -> bool:
547
- if is_package_installed("spatialdata"):
548
- from spatialdata import SpatialData
549
-
550
- if isinstance(data, SpatialData):
551
- return True
552
- if isinstance(data, (str, Path)):
553
- if UPath(data).suffix == ".zarr":
554
- # TODO: inconsistent with anndata, where we run the storage
555
- # check only for local, expensive for cloud
556
- return identify_zarr_type(data, check=False) == "spatialdata"
557
- return False
550
+ # We are not importing SpatialData here to keep loaded modules minimal
551
+ if hasattr(data, "__class__") and data.__class__.__name__ == "SpatialData":
552
+ return True
553
+ if isinstance(data, (str, Path)):
554
+ if UPath(data).suffix == ".zarr":
555
+ # TODO: inconsistent with anndata, where we run the storage
556
+ # check only for local, expensive for cloud
557
+ return identify_zarr_type(data, check=False) == "spatialdata"
558
+ return False
558
559
 
559
560
 
560
561
  def _check_otype_artifact(
@@ -763,15 +764,15 @@ def _describe_sqlite(self, print_types: bool = False): # for artifact & collect
763
764
  return tree
764
765
 
765
766
 
766
- def describe_artifact_collection(self): # for artifact & collection
767
- from ._describe import print_rich_tree
767
+ def describe_artifact_collection(self, return_str: bool = False) -> str | None:
768
+ from ._describe import format_rich_tree
768
769
 
769
770
  if not self._state.adding and connections[self._state.db].vendor == "postgresql":
770
771
  tree = _describe_postgres(self)
771
772
  else:
772
773
  tree = _describe_sqlite(self)
773
774
 
774
- print_rich_tree(tree)
775
+ return format_rich_tree(tree, return_str=return_str)
775
776
 
776
777
 
777
778
  def validate_feature(feature: Feature, records: list[Record]) -> None:
@@ -962,55 +963,66 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
962
963
  revises: `Artifact | None = None` Previous version of the artifact. Is an alternative way to passing `key` to trigger a new version.
963
964
  run: `Run | None = None` The run that creates the artifact.
964
965
 
965
- .. dropdown:: Typical storage formats & their API accessors
966
+ Examples:
966
967
 
967
- Arrays:
968
+ Create an artifact **from a local file or folder**::
968
969
 
969
- - Table: `.csv`, `.tsv`, `.parquet`, `.ipc` ⟷ `DataFrame`, `pyarrow.Table`
970
- - Annotated matrix: `.h5ad`, `.h5mu`, `.zrad` ⟷ `AnnData`, `MuData`
971
- - Generic array: HDF5 group, zarr group, TileDB store ⟷ HDF5, zarr, TileDB loaders
970
+ artifact = ln.Artifact("./my_file.parquet", key="examples/my_file.parquet").save()
971
+ artifact = ln.Artifact("./my_folder", key="project1/my_folder").save()
972
972
 
973
- Non-arrays:
973
+ Calling `.save()` copies or uploads the file to the default storage location of your lamindb instance.
974
+ If you create an artifact **from a remote file or folder**, lamindb merely registers the S3 `key` and avoids copying the data::
974
975
 
975
- - Image: `.jpg`, `.png` ⟷ `np.ndarray`, ...
976
- - Fastq: `.fastq` ⟷ /
977
- - VCF: `.vcf` ⟷ /
978
- - QC: `.html` ⟷ /
976
+ artifact = ln.Artifact("s3://my_bucket/my_folder/my_file.csv").save()
979
977
 
980
- You'll find these values in the `suffix` & `accessor` fields.
978
+ If you want to **validate & annotate** an array, pass a `schema` to one of the `.from_df()`, `.from_anndata()`, ... constructors::
981
979
 
982
- LaminDB makes some default choices (e.g., serialize a `DataFrame` as a `.parquet` file).
980
+ schema = ln.Schema(itype=ln.Feature) # a schema that merely enforces that feature names exist in the Feature registry
981
+ artifact = ln.Artifact.from_df("./my_file.parquet", key="my_dataset.parquet", schema=schema).save() # validated and annotated
983
982
 
984
- See Also:
985
- :class:`~lamindb.Storage`
986
- Storage locations for artifacts.
987
- :class:`~lamindb.Collection`
988
- Collections of artifacts.
989
- :meth:`~lamindb.Artifact.from_df`
990
- Create an artifact from a `DataFrame`.
991
- :meth:`~lamindb.Artifact.from_anndata`
992
- Create an artifact from an `AnnData`.
983
+ You can make a **new version** of an artifact by passing an existing `key`::
993
984
 
994
- Examples:
985
+ artifact_v2 = ln.Artifact("./my_file.parquet", key="examples/my_file.parquet").save()
986
+ artifact_v2.versions.df() # see all versions
987
+
988
+ You can write artifacts to other storage locations by switching the current default storage location (:attr:`~lamindb.core.Settings.storage`)::
989
+
990
+ ln.settings.storage = "s3://some-bucket"
995
991
 
996
- Create an artifact by passing `key`:
992
+ Sometimes you want to **avoid mapping the artifact into a path hierarchy**, and you only pass `description`::
997
993
 
998
- >>> artifact = ln.Artifact("./my_file.parquet", key="example_datasets/my_file.parquet").save()
999
- >>> artifact = ln.Artifact("./my_folder", key="project1/my_folder").save()
994
+ artifact = ln.Artifact("./my_folder", description="My folder").save()
995
+ artifact_v2 = ln.Artifact("./my_folder", revises=old_artifact).save() # need to version based on `revises`, a shared description does not trigger a new version
1000
996
 
1001
- Calling `.save()` uploads the file to the default storage location of your lamindb instance.
1002
- (If it's a local instance, the "upload" is a mere copy operation.)
997
+ Notes:
1003
998
 
1004
- If your artifact is already in the cloud, lamindb auto-populates the `key` field based on the S3 key and there is no upload:
999
+ .. dropdown:: Typical storage formats & their API accessors
1005
1000
 
1006
- >>> artifact = ln.Artifact("s3://my_bucket/my_folder/my_file.csv").save()
1001
+ Arrays:
1007
1002
 
1008
- You can make a new version of the artifact with `key = "example_datasets/my_file.parquet"`
1003
+ - Table: `.csv`, `.tsv`, `.parquet`, `.ipc` `DataFrame`, `pyarrow.Table`
1004
+ - Annotated matrix: `.h5ad`, `.h5mu`, `.zrad` ⟷ `AnnData`, `MuData`
1005
+ - Generic array: HDF5 group, zarr group, TileDB store ⟷ HDF5, zarr, TileDB loaders
1009
1006
 
1010
- >>> artifact_v2 = ln.Artifact("./my_file.parquet", key="example_datasets/my_file.parquet").save()
1011
- >>> artifact_v2.versions.df() # see all versions
1007
+ Non-arrays:
1012
1008
 
1013
- .. dropdown:: Why does the API look this way?
1009
+ - Image: `.jpg`, `.png` `np.ndarray`, ...
1010
+ - Fastq: `.fastq` ⟷ /
1011
+ - VCF: `.vcf` ⟷ /
1012
+ - QC: `.html` ⟷ /
1013
+
1014
+ You'll find these values in the `suffix` & `accessor` fields.
1015
+
1016
+ LaminDB makes some default choices (e.g., serialize a `DataFrame` as a `.parquet` file).
1017
+
1018
+ .. dropdown:: Will artifacts get duplicated?
1019
+
1020
+ If an artifact with the exact same hash already exists, `Artifact()` returns the existing artifact.
1021
+
1022
+ In concurrent workloads where the same artifact is created repeatedly at the exact same time, `.save()`
1023
+ detects the duplication and will return the existing artifact.
1024
+
1025
+ .. dropdown:: Why does the constructor look the way it looks?
1014
1026
 
1015
1027
  It's inspired by APIs building on AWS S3.
1016
1028
 
@@ -1031,18 +1043,15 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1031
1043
  bucket = quilt3.Bucket('mybucket')
1032
1044
  bucket.put_file('hello.txt', '/tmp/hello.txt')
1033
1045
 
1034
- Sometimes you want to avoid mapping the artifact into a file hierarchy, and you can then _just_ populate `description` instead:
1035
-
1036
- >>> artifact = ln.Artifact("s3://my_bucket/my_folder", description="My folder").save()
1037
- >>> artifact = ln.Artifact("./my_local_folder", description="My local folder").save()
1038
-
1039
- Because you can then not use `key`-based versioning you have to pass `revises` to make a new artifact version:
1040
-
1041
- >>> artifact_v2 = ln.Artifact("./my_file.parquet", revises=old_artifact).save()
1042
-
1043
- If an artifact with the exact same hash already exists, `Artifact()` returns the existing artifact. In concurrent workloads where
1044
- the same artifact is created multiple times, `Artifact()` doesn't yet return the existing artifact but creates a new one; `.save()` however
1045
- detects the duplication and will return the existing artifact.
1046
+ See Also:
1047
+ :class:`~lamindb.Storage`
1048
+ Storage locations for artifacts.
1049
+ :class:`~lamindb.Collection`
1050
+ Collections of artifacts.
1051
+ :meth:`~lamindb.Artifact.from_df`
1052
+ Create an artifact from a `DataFrame`.
1053
+ :meth:`~lamindb.Artifact.from_anndata`
1054
+ Create an artifact from an `AnnData`.
1046
1055
 
1047
1056
  """
1048
1057
 
@@ -1055,6 +1064,8 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1055
1064
  params: ParamManager = ParamManagerArtifact # type: ignore
1056
1065
  """Param manager.
1057
1066
 
1067
+ What features are for dataset-like artifacts, parameters are for model-like artifacts & runs.
1068
+
1058
1069
  Example::
1059
1070
 
1060
1071
  artifact.params.add_values({
@@ -1071,23 +1082,23 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1071
1082
  features: FeatureManager = FeatureManager # type: ignore
1072
1083
  """Feature manager.
1073
1084
 
1074
- Features denote dataset dimensions, i.e., the variables that measure labels & numbers.
1085
+ Typically, you annotate a dataset with features by defining a `Schema` and passing it to the `Artifact` constructor.
1075
1086
 
1076
- Annotate with features & values::
1087
+ Here is how to do annotate an artifact ad hoc::
1077
1088
 
1078
1089
  artifact.features.add_values({
1079
1090
  "species": organism, # here, organism is an Organism record
1080
1091
  "scientist": ['Barbara McClintock', 'Edgar Anderson'],
1081
1092
  "temperature": 27.6,
1082
- "study": "Candidate marker study"
1093
+ "experiment": "Experiment 1"
1083
1094
  })
1084
1095
 
1085
- Query for features & values::
1096
+ Query artifacts by features::
1086
1097
 
1087
- ln.Artifact.features.filter(scientist="Barbara McClintock")
1098
+ ln.Artifact.filter(scientist="Barbara McClintock")
1088
1099
 
1089
1100
  Features may or may not be part of the artifact content in storage. For
1090
- instance, the :class:`~lamindb.Curator` flow validates the columns of a
1101
+ instance, the :class:`~lamindb.curators.DataFrameCurator` flow validates the columns of a
1091
1102
  `DataFrame`-like artifact and annotates it with features corresponding to
1092
1103
  these columns. `artifact.features.add_values`, by contrast, does not
1093
1104
  validate the content of the artifact.
@@ -1100,22 +1111,22 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1100
1111
  To annotate with labels, you typically use the registry-specific accessors,
1101
1112
  for instance :attr:`~lamindb.Artifact.ulabels`::
1102
1113
 
1103
- candidate_marker_study = ln.ULabel(name="Candidate marker study").save()
1104
- artifact.ulabels.add(candidate_marker_study)
1114
+ experiment = ln.ULabel(name="Experiment 1").save()
1115
+ artifact.ulabels.add(experiment)
1105
1116
 
1106
1117
  Similarly, you query based on these accessors::
1107
1118
 
1108
- ln.Artifact.filter(ulabels__name="Candidate marker study").all()
1119
+ ln.Artifact.filter(ulabels__name="Experiment 1").all()
1109
1120
 
1110
1121
  Unlike the registry-specific accessors, the `.labels` accessor provides
1111
1122
  a way of associating labels with features::
1112
1123
 
1113
- study = ln.Feature(name="study", dtype="cat").save()
1114
- artifact.labels.add(candidate_marker_study, feature=study)
1124
+ experiment = ln.Feature(name="experiment", dtype="cat").save()
1125
+ artifact.labels.add(experiment, feature=study)
1115
1126
 
1116
1127
  Note that the above is equivalent to::
1117
1128
 
1118
- artifact.features.add_values({"study": candidate_marker_study})
1129
+ artifact.features.add_values({"experiment": experiment})
1119
1130
  """
1120
1131
  from ._label_manager import LabelManager
1121
1132
 
@@ -1343,15 +1354,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1343
1354
  f"Only {valid_keywords} can be passed, you passed: {kwargs}"
1344
1355
  )
1345
1356
  if revises is not None and key is not None and revises.key != key:
1346
- note = message_update_key_in_version_family(
1347
- suid=revises.stem_uid,
1348
- existing_key=revises.key,
1349
- new_key=key,
1350
- registry="Artifact",
1351
- )
1352
- raise ValueError(
1353
- f"`key` is {key}, but `revises.key` is '{revises.key}'\n\n Either do *not* pass `key`.\n\n{note}"
1354
- )
1357
+ logger.warning(f"renaming artifact from '{revises.key}' to {key}")
1355
1358
  if revises is not None:
1356
1359
  if not isinstance(revises, Artifact):
1357
1360
  raise TypeError("`revises` has to be of type `Artifact`")
@@ -1431,11 +1434,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1431
1434
  kwargs["uid"] = uid
1432
1435
 
1433
1436
  # only set key now so that we don't do a look-up on it in case revises is passed
1434
- if revises is not None and revises.key is not None:
1435
- assert revises.key.endswith(kwargs["suffix"]), ( # noqa: S101
1436
- revises.key,
1437
- kwargs["suffix"],
1438
- )
1437
+ if revises is not None and revises.key is not None and kwargs["key"] is None:
1439
1438
  kwargs["key"] = revises.key
1440
1439
 
1441
1440
  kwargs["kind"] = kind
@@ -1530,15 +1529,84 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1530
1529
  - Guide: :doc:`docs:registries`
1531
1530
  - Method in `Record` base class: :meth:`~lamindb.models.Record.get`
1532
1531
 
1533
- Examples::
1532
+ Examples:
1533
+
1534
+ ::
1534
1535
 
1535
- artifact = ln.Artifact.get("tCUkRcaEjTjhtozp0000")
1536
- artifact = ln.Arfifact.get(key="my_datasets/my_file.parquet")
1536
+ artifact = ln.Artifact.get("tCUkRcaEjTjhtozp0000")
1537
+ artifact = ln.Arfifact.get(key="examples/my_file.parquet")
1537
1538
  """
1538
1539
  from .query_set import QuerySet
1539
1540
 
1540
1541
  return QuerySet(model=cls).get(idlike, **expressions)
1541
1542
 
1543
+ @classmethod
1544
+ def filter(
1545
+ cls,
1546
+ *queries,
1547
+ **expressions,
1548
+ ) -> QuerySet:
1549
+ """Query a set of artifacts.
1550
+
1551
+ Args:
1552
+ *queries: `Q` expressions.
1553
+ **expressions: Features, params, fields via the Django query syntax.
1554
+
1555
+ See Also:
1556
+ - Guide: :doc:`docs:registries`
1557
+
1558
+ Examples:
1559
+
1560
+ Query by fields::
1561
+
1562
+ ln.Arfifact.filter(key="examples/my_file.parquet")
1563
+
1564
+ Query by features::
1565
+
1566
+ ln.Arfifact.filter(cell_type_by_model__name="T cell")
1567
+
1568
+ Query by params::
1569
+
1570
+ ln.Arfifact.filter(hyperparam_x=100)
1571
+ """
1572
+ from .query_set import QuerySet
1573
+
1574
+ if expressions:
1575
+ keys_normalized = [key.split("__")[0] for key in expressions]
1576
+ field_or_feature_or_param = keys_normalized[0].split("__")[0]
1577
+ if field_or_feature_or_param in Artifact.__get_available_fields__():
1578
+ return QuerySet(model=cls).filter(*queries, **expressions)
1579
+ elif all(
1580
+ features_validated := Feature.validate(
1581
+ keys_normalized, field="name", mute=True
1582
+ )
1583
+ ):
1584
+ return filter_base(FeatureManager, **expressions)
1585
+ elif all(
1586
+ params_validated := Param.validate(
1587
+ keys_normalized, field="name", mute=True
1588
+ )
1589
+ ):
1590
+ return filter_base(ParamManagerArtifact, **expressions)
1591
+ else:
1592
+ if sum(features_validated) < sum(params_validated):
1593
+ params = ", ".join(
1594
+ sorted(np.array(keys_normalized)[~params_validated])
1595
+ )
1596
+ message = f"param names: {params}"
1597
+ else:
1598
+ features = ", ".join(
1599
+ sorted(np.array(keys_normalized)[~params_validated])
1600
+ )
1601
+ message = f"feature names: {features}"
1602
+ fields = ", ".join(sorted(cls.__get_available_fields__()))
1603
+ raise InvalidArgument(
1604
+ f"You can query either by available fields: {fields}\n"
1605
+ f"Or fix invalid {message}"
1606
+ )
1607
+ else:
1608
+ return QuerySet(model=cls).filter(*queries, **expressions)
1609
+
1542
1610
  @classmethod
1543
1611
  def from_df(
1544
1612
  cls,
@@ -1548,9 +1616,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1548
1616
  description: str | None = None,
1549
1617
  run: Run | None = None,
1550
1618
  revises: Artifact | None = None,
1619
+ schema: Schema | None = None,
1551
1620
  **kwargs,
1552
1621
  ) -> Artifact:
1553
- """Create from `DataFrame`, validate & link features.
1622
+ """Create from `DataFrame`, optionally validate & annotate.
1554
1623
 
1555
1624
  Args:
1556
1625
  df: A `DataFrame` object.
@@ -1559,6 +1628,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1559
1628
  description: A description.
1560
1629
  revises: An old version of the artifact.
1561
1630
  run: The run that creates the artifact.
1631
+ schema: A schema that defines how to validate & annotate.
1562
1632
 
1563
1633
  See Also:
1564
1634
  :meth:`~lamindb.Collection`
@@ -1566,19 +1636,30 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1566
1636
  :class:`~lamindb.Feature`
1567
1637
  Track features.
1568
1638
 
1569
- Example::
1639
+ Example:
1570
1640
 
1571
- import lamindb as ln
1641
+ No validation and annotation::
1642
+
1643
+ import lamindb as ln
1644
+
1645
+ df = ln.core.datasets.mini_immuno.get_dataset1()
1646
+ artifact = ln.Artifact.from_df(df, key="examples/dataset1.parquet").save()
1647
+
1648
+ With validation and annotation.
1649
+
1650
+ .. literalinclude:: scripts/curate_dataframe_flexible.py
1651
+ :language: python
1652
+
1653
+ Under-the-hood, this used the following schema.
1654
+
1655
+ .. literalinclude:: scripts/define_valid_features.py
1656
+ :language: python
1657
+
1658
+ Valid features & labels were defined as:
1659
+
1660
+ .. literalinclude:: scripts/define_mini_immuno_features_labels.py
1661
+ :language: python
1572
1662
 
1573
- df = ln.core.datasets.df_iris_in_meter_batch1()
1574
- df.head()
1575
- #> sepal_length sepal_width petal_length petal_width iris_organism_code
1576
- #> 0 0.051 0.035 0.014 0.002 0
1577
- #> 1 0.049 0.030 0.014 0.002 0
1578
- #> 2 0.047 0.032 0.013 0.002 0
1579
- #> 3 0.046 0.031 0.015 0.002 0
1580
- #> 4 0.050 0.036 0.014 0.002 0
1581
- artifact = ln.Artifact.from_df(df, key="iris/result_batch1.parquet").save()
1582
1663
  """
1583
1664
  artifact = Artifact( # type: ignore
1584
1665
  data=df,
@@ -1591,6 +1672,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1591
1672
  **kwargs,
1592
1673
  )
1593
1674
  artifact.n_observations = len(df)
1675
+ if schema is not None:
1676
+ from ..curators import DataFrameCurator
1677
+
1678
+ curator = DataFrameCurator(artifact, schema)
1679
+ curator.validate()
1680
+ artifact.schema = schema
1681
+ artifact._curator = curator
1594
1682
  return artifact
1595
1683
 
1596
1684
  @classmethod
@@ -1602,9 +1690,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1602
1690
  description: str | None = None,
1603
1691
  run: Run | None = None,
1604
1692
  revises: Artifact | None = None,
1693
+ schema: Schema | None = None,
1605
1694
  **kwargs,
1606
1695
  ) -> Artifact:
1607
- """Create from ``AnnData``, validate & link features.
1696
+ """Create from `AnnData`, optionally validate & annotate.
1608
1697
 
1609
1698
  Args:
1610
1699
  adata: An `AnnData` object or a path of AnnData-like.
@@ -1613,6 +1702,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1613
1702
  description: A description.
1614
1703
  revises: An old version of the artifact.
1615
1704
  run: The run that creates the artifact.
1705
+ schema: A schema that defines how to validate & annotate.
1616
1706
 
1617
1707
  See Also:
1618
1708
 
@@ -1621,12 +1711,31 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1621
1711
  :class:`~lamindb.Feature`
1622
1712
  Track features.
1623
1713
 
1624
- Example::
1714
+ Example:
1625
1715
 
1626
- import lamindb as ln
1716
+ No validation and annotation::
1717
+
1718
+ import lamindb as ln
1719
+
1720
+ adata = ln.core.datasets.anndata_with_obs()
1721
+ artifact = ln.Artifact.from_anndata(adata, key="mini_anndata_with_obs.h5ad").save()
1722
+
1723
+ With validation and annotation.
1724
+
1725
+ .. literalinclude:: scripts/curate_anndata_flexible.py
1726
+ :language: python
1727
+
1728
+ Under-the-hood, this used the following schema.
1729
+
1730
+ .. literalinclude:: scripts/define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py
1731
+ :language: python
1732
+
1733
+ This schema tranposes the `var` DataFrame during curation, so that one validates and annotates the `var.T` schema, i.e., `[ENSG00000153563, ENSG00000010610, ENSG00000170458]`.
1734
+ If one doesn't transpose, one would annotate with the schema of `var`, i.e., `[gene_symbol, gene_type]`.
1735
+
1736
+ .. image:: https://lamin-site-assets.s3.amazonaws.com/.lamindb/gLyfToATM7WUzkWW0001.png
1737
+ :width: 800px
1627
1738
 
1628
- adata = ln.core.datasets.anndata_with_obs()
1629
- artifact = ln.Artifact.from_anndata(adata, key="mini_anndata_with_obs.h5ad").save()
1630
1739
  """
1631
1740
  if not data_is_anndata(adata):
1632
1741
  raise ValueError(
@@ -1654,6 +1763,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1654
1763
  # and the proper path through create_path for cloud paths
1655
1764
  obj_for_obs = artifact.path
1656
1765
  artifact.n_observations = _anndata_n_observations(obj_for_obs)
1766
+ if schema is not None:
1767
+ from ..curators import AnnDataCurator
1768
+
1769
+ curator = AnnDataCurator(artifact, schema)
1770
+ curator.validate()
1771
+ artifact.schema = schema
1772
+ artifact._curator = curator
1657
1773
  return artifact
1658
1774
 
1659
1775
  @classmethod
@@ -1665,9 +1781,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1665
1781
  description: str | None = None,
1666
1782
  run: Run | None = None,
1667
1783
  revises: Artifact | None = None,
1784
+ schema: Schema | None = None,
1668
1785
  **kwargs,
1669
1786
  ) -> Artifact:
1670
- """Create from ``MuData``, validate & link features.
1787
+ """Create from `MuData`, optionally validate & annotate.
1671
1788
 
1672
1789
  Args:
1673
1790
  mdata: A `MuData` object.
@@ -1676,6 +1793,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1676
1793
  description: A description.
1677
1794
  revises: An old version of the artifact.
1678
1795
  run: The run that creates the artifact.
1796
+ schema: A schema that defines how to validate & annotate.
1679
1797
 
1680
1798
  See Also:
1681
1799
  :meth:`~lamindb.Collection`
@@ -1704,6 +1822,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1704
1822
  )
1705
1823
  if not isinstance(mdata, UPathStr):
1706
1824
  artifact.n_observations = mdata.n_obs
1825
+ if schema is not None:
1826
+ from ..curators import MuDataCurator
1827
+
1828
+ curator = MuDataCurator(artifact, schema)
1829
+ curator.validate()
1830
+ artifact.schema = schema
1831
+ artifact._curator = curator
1707
1832
  return artifact
1708
1833
 
1709
1834
  @classmethod
@@ -1715,17 +1840,19 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1715
1840
  description: str | None = None,
1716
1841
  run: Run | None = None,
1717
1842
  revises: Artifact | None = None,
1843
+ schema: Schema | None = None,
1718
1844
  **kwargs,
1719
1845
  ) -> Artifact:
1720
- """Create from ``SpatialData``, validate & link features.
1846
+ """Create from `SpatialData`, optionally validate & annotate.
1721
1847
 
1722
1848
  Args:
1723
- mdata: A `SpatialData` object.
1849
+ sdata: A `SpatialData` object.
1724
1850
  key: A relative path within default storage,
1725
1851
  e.g., `"myfolder/myfile.zarr"`.
1726
1852
  description: A description.
1727
1853
  revises: An old version of the artifact.
1728
1854
  run: The run that creates the artifact.
1855
+ schema: A schema that defines how to validate & annotate.
1729
1856
 
1730
1857
  See Also:
1731
1858
  :meth:`~lamindb.Collection`
@@ -1733,11 +1860,21 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1733
1860
  :class:`~lamindb.Feature`
1734
1861
  Track features.
1735
1862
 
1736
- Example::
1863
+ Example:
1737
1864
 
1738
- import lamindb as ln
1865
+ No validation and annotation::
1866
+
1867
+ import lamindb as ln
1868
+
1869
+ artifact = ln.Artifact.from_spatialdata(sdata, key="my_dataset.zarr").save()
1739
1870
 
1740
- artifact = ln.Artifact.from_spatialdata(sdata, key="my_dataset.zarr").save()
1871
+ With validation and annotation.
1872
+
1873
+ .. literalinclude:: scripts/define_schema_spatialdata.py
1874
+ :language: python
1875
+
1876
+ .. literalinclude:: scripts/curate_spatialdata.py
1877
+ :language: python
1741
1878
  """
1742
1879
  if not data_is_spatialdata(sdata):
1743
1880
  raise ValueError(
@@ -1755,6 +1892,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
1755
1892
  )
1756
1893
  # ill-defined https://scverse.zulipchat.com/#narrow/channel/315824-spatial/topic/How.20to.20calculate.20the.20number.20of.20observations.3F
1757
1894
  # artifact.n_observations = ...
1895
+ if schema is not None:
1896
+ from ..curators import SpatialDataCurator
1897
+
1898
+ curator = SpatialDataCurator(artifact, schema)
1899
+ curator.validate()
1900
+ artifact.schema = schema
1901
+ artifact._curator = curator
1758
1902
  return artifact
1759
1903
 
1760
1904
  @classmethod
@@ -2022,29 +2166,39 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2022
2166
  self._old_suffix = self.suffix
2023
2167
 
2024
2168
  def open(
2025
- self, mode: str = "r", is_run_input: bool | None = None, **kwargs
2026
- ) -> Union[
2027
- AnnDataAccessor,
2028
- BackedAccessor,
2029
- SOMACollection,
2030
- SOMAExperiment,
2031
- SOMAMeasurement,
2032
- PyArrowDataset,
2033
- ]:
2034
- """Return a cloud-backed data object.
2169
+ self,
2170
+ mode: str = "r",
2171
+ engine: Literal["pyarrow", "polars"] = "pyarrow",
2172
+ is_run_input: bool | None = None,
2173
+ **kwargs,
2174
+ ) -> (
2175
+ AnnDataAccessor
2176
+ | BackedAccessor
2177
+ | SOMACollection
2178
+ | SOMAExperiment
2179
+ | SOMAMeasurement
2180
+ | PyArrowDataset
2181
+ | Iterator[PolarsLazyFrame]
2182
+ ):
2183
+ """Open a dataset for streaming.
2035
2184
 
2036
2185
  Works for `AnnData` (`.h5ad` and `.zarr`), generic `hdf5` and `zarr`,
2037
- `tiledbsoma` objects (`.tiledbsoma`), `pyarrow` compatible formats.
2186
+ `tiledbsoma` objects (`.tiledbsoma`), `pyarrow` or `polars` compatible formats
2187
+ (`.parquet`, `.csv`, `.ipc` etc. files or directories with such files).
2038
2188
 
2039
2189
  Args:
2040
2190
  mode: can only be `"w"` (write mode) for `tiledbsoma` stores,
2041
2191
  otherwise should be always `"r"` (read-only mode).
2192
+ engine: Which module to use for lazy loading of a dataframe
2193
+ from `pyarrow` or `polars` compatible formats.
2194
+ This has no effect if the artifact is not a dataframe, i.e.
2195
+ if it is an `AnnData,` `hdf5`, `zarr` or `tiledbsoma` object.
2042
2196
  is_run_input: Whether to track this artifact as run input.
2043
2197
  **kwargs: Keyword arguments for the accessor, i.e. `h5py` or `zarr` connection,
2044
- `pyarrow.dataset.dataset`.
2198
+ `pyarrow.dataset.dataset`, `polars.scan_*` function.
2045
2199
 
2046
2200
  Notes:
2047
- For more info, see tutorial: :doc:`/arrays`.
2201
+ For more info, see guide: :doc:`/arrays`.
2048
2202
 
2049
2203
  Example::
2050
2204
 
@@ -2057,6 +2211,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2057
2211
  #> AnnDataAccessor object with n_obs × n_vars = 70 × 765
2058
2212
  #> constructed for the AnnData object pbmc68k.h5ad
2059
2213
  #> ...
2214
+ artifact = ln.Artifact.get(key="lndb-storage/df.parquet")
2215
+ artifact.open()
2216
+ #> pyarrow._dataset.FileSystemDataset
2217
+
2060
2218
  """
2061
2219
  if self._overwrite_versions and not self.is_latest:
2062
2220
  raise ValueError(INCONSISTENT_STATE_MSG)
@@ -2064,6 +2222,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2064
2222
  h5_suffixes = [".h5", ".hdf5", ".h5ad"]
2065
2223
  h5_suffixes += [s + ".gz" for s in h5_suffixes]
2066
2224
  # ignore empty suffix for now
2225
+ df_suffixes = tuple(set(PYARROW_SUFFIXES).union(POLARS_SUFFIXES))
2067
2226
  suffixes = (
2068
2227
  (
2069
2228
  "",
@@ -2072,7 +2231,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2072
2231
  ".tiledbsoma",
2073
2232
  )
2074
2233
  + tuple(h5_suffixes)
2075
- + PYARROW_SUFFIXES
2234
+ + df_suffixes
2076
2235
  + tuple(
2077
2236
  s + ".gz" for s in PYARROW_SUFFIXES
2078
2237
  ) # this doesn't work for externally gzipped files, REMOVE LATER
@@ -2080,10 +2239,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2080
2239
  if self.suffix not in suffixes:
2081
2240
  raise ValueError(
2082
2241
  "Artifact should have a zarr, h5, tiledbsoma object"
2083
- " or a compatible `pyarrow.dataset.dataset` directory"
2242
+ " or a compatible `pyarrow.dataset.dataset` or `polars.scan_*` directory"
2084
2243
  " as the underlying data, please use one of the following suffixes"
2085
2244
  f" for the object name: {', '.join(suffixes[1:])}."
2086
- f" Or no suffix for a folder with {', '.join(PYARROW_SUFFIXES)} files"
2245
+ f" Or no suffix for a folder with {', '.join(df_suffixes)} files"
2087
2246
  " (no mixing allowed)."
2088
2247
  )
2089
2248
  if self.suffix != ".tiledbsoma" and self.key != "soma" and mode != "r":
@@ -2092,10 +2251,6 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2092
2251
  )
2093
2252
 
2094
2253
  from lamindb import settings
2095
- from lamindb.core.storage._backed_access import (
2096
- _track_writes_factory,
2097
- backed_access,
2098
- )
2099
2254
 
2100
2255
  using_key = settings._using_key
2101
2256
  filepath, cache_key = filepath_cache_key_from_artifact(
@@ -2116,14 +2271,22 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2116
2271
  ) and not filepath.synchronize(localpath, just_check=True)
2117
2272
  if open_cache:
2118
2273
  try:
2119
- access = backed_access(localpath, mode, using_key, **kwargs)
2274
+ access = backed_access(
2275
+ localpath, mode, engine, using_key=using_key, **kwargs
2276
+ )
2120
2277
  except Exception as e:
2121
- if isinstance(filepath, LocalPathClasses):
2278
+ # also ignore ValueError here because
2279
+ # such errors most probably just imply an incorrect argument
2280
+ if isinstance(filepath, LocalPathClasses) or isinstance(
2281
+ e, (ImportError, ValueError)
2282
+ ):
2122
2283
  raise e
2123
2284
  logger.warning(
2124
2285
  f"The cache might be corrupted: {e}. Trying to open directly."
2125
2286
  )
2126
- access = backed_access(filepath, mode, using_key, **kwargs)
2287
+ access = backed_access(
2288
+ filepath, mode, engine, using_key=using_key, **kwargs
2289
+ )
2127
2290
  # happens only if backed_access has been successful
2128
2291
  # delete the corrupted cache
2129
2292
  if localpath.is_dir():
@@ -2131,7 +2294,9 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2131
2294
  else:
2132
2295
  localpath.unlink(missing_ok=True)
2133
2296
  else:
2134
- access = backed_access(filepath, mode, using_key, **kwargs)
2297
+ access = backed_access(
2298
+ filepath, mode, engine, using_key=using_key, **kwargs
2299
+ )
2135
2300
  if is_tiledbsoma_w:
2136
2301
 
2137
2302
  def finalize():
@@ -2304,6 +2469,9 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2304
2469
  artifact = ln.Artifact.get(key="some.tiledbsoma". is_latest=True)
2305
2470
  artiact.delete() # delete all versions, the data will be deleted or prompted for deletion.
2306
2471
  """
2472
+ # we're *not* running the line below because the case `storage is None` triggers user feedback in one case
2473
+ # storage = True if storage is None else storage
2474
+
2307
2475
  # this first check means an invalid delete fails fast rather than cascading through
2308
2476
  # database and storage permission errors
2309
2477
  if os.getenv("LAMINDB_MULTI_INSTANCE") is None:
@@ -2354,8 +2522,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2354
2522
  # only delete in storage if DB delete is successful
2355
2523
  # DB delete might error because of a foreign key constraint violated etc.
2356
2524
  if self._overwrite_versions and self.is_latest:
2357
- # includes self
2358
- for version in self.versions.all():
2525
+ logger.important(
2526
+ "deleting all versions of this artifact because they all share the same store"
2527
+ )
2528
+ for version in self.versions.all(): # includes self
2359
2529
  _delete_skip_storage(version)
2360
2530
  else:
2361
2531
  self._delete_skip_storage()
@@ -2365,7 +2535,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2365
2535
  delete_in_storage = False
2366
2536
  if storage:
2367
2537
  logger.warning(
2368
- "Storage argument is ignored; can't delete storage on an previous version"
2538
+ "storage argument is ignored; can't delete store of a previous version if overwrite_versions is True"
2369
2539
  )
2370
2540
  elif self.key is None or self._key_is_virtual:
2371
2541
  # do not ask for confirmation also if storage is None
@@ -2466,6 +2636,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2466
2636
  local_path_cache,
2467
2637
  )
2468
2638
  logger.important(f"moved local artifact to cache: {local_path_cache}")
2639
+ if hasattr(self, "_curator"):
2640
+ curator = self._curator
2641
+ delattr(self, "_curator")
2642
+ curator.save_artifact()
2469
2643
  return self
2470
2644
 
2471
2645
  def restore(self) -> None:
@@ -2478,14 +2652,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
2478
2652
  self._branch_code = 1
2479
2653
  self.save()
2480
2654
 
2481
- def describe(self) -> None:
2482
- """Describe relations of record.
2483
-
2484
- Example::
2655
+ def describe(self, return_str: bool = False) -> None:
2656
+ """Describe record including linked records.
2485
2657
 
2486
- artifact.describe()
2658
+ Args:
2659
+ return_str: Return a string instead of printing.
2487
2660
  """
2488
- return describe_artifact_collection(self)
2661
+ return describe_artifact_collection(self, return_str=return_str)
2489
2662
 
2490
2663
  def _populate_subsequent_runs(self, run: Run) -> None:
2491
2664
  _populate_subsequent_runs_(self, run)
@@ -2525,9 +2698,11 @@ def _save_skip_storage(artifact, **kwargs) -> None:
2525
2698
 
2526
2699
  class ArtifactFeatureValue(BasicRecord, LinkORM, TracksRun):
2527
2700
  id: int = models.BigAutoField(primary_key=True)
2528
- artifact: Artifact = ForeignKey(Artifact, CASCADE, related_name="+")
2701
+ artifact: Artifact = ForeignKey(
2702
+ Artifact, CASCADE, related_name="links_featurevalue"
2703
+ )
2529
2704
  # we follow the lower() case convention rather than snake case for link models
2530
- featurevalue = ForeignKey(FeatureValue, PROTECT, related_name="+")
2705
+ featurevalue = ForeignKey(FeatureValue, PROTECT, related_name="links_artifact")
2531
2706
 
2532
2707
  class Meta:
2533
2708
  unique_together = ("artifact", "featurevalue")
@@ -2535,9 +2710,11 @@ class ArtifactFeatureValue(BasicRecord, LinkORM, TracksRun):
2535
2710
 
2536
2711
  class ArtifactParamValue(BasicRecord, LinkORM, TracksRun):
2537
2712
  id: int = models.BigAutoField(primary_key=True)
2538
- artifact: Artifact = ForeignKey(Artifact, CASCADE, related_name="+")
2713
+ artifact: Artifact = ForeignKey(Artifact, CASCADE, related_name="links_paramvalue")
2539
2714
  # we follow the lower() case convention rather than snake case for link models
2540
- paramvalue: ParamValue = ForeignKey(ParamValue, PROTECT, related_name="+")
2715
+ paramvalue: ParamValue = ForeignKey(
2716
+ ParamValue, PROTECT, related_name="links_artifact"
2717
+ )
2541
2718
 
2542
2719
  class Meta:
2543
2720
  unique_together = ("artifact", "paramvalue")