lamindb 1.10.1__py3-none-any.whl → 1.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +89 -49
- lamindb/_finish.py +17 -15
- lamindb/_tracked.py +2 -4
- lamindb/_view.py +1 -1
- lamindb/base/__init__.py +2 -1
- lamindb/base/dtypes.py +76 -0
- lamindb/core/_settings.py +45 -2
- lamindb/core/storage/_anndata_accessor.py +118 -26
- lamindb/core/storage/_backed_access.py +10 -7
- lamindb/core/storage/_spatialdata_accessor.py +15 -4
- lamindb/core/storage/_zarr.py +3 -0
- lamindb/curators/_legacy.py +16 -3
- lamindb/curators/core.py +449 -193
- lamindb/errors.py +6 -0
- lamindb/examples/cellxgene/__init__.py +8 -3
- lamindb/examples/cellxgene/_cellxgene.py +127 -13
- lamindb/examples/cellxgene/{cxg_schema_versions.csv → cellxgene_schema_versions.csv} +11 -0
- lamindb/examples/croissant/__init__.py +32 -6
- lamindb/examples/datasets/__init__.py +2 -2
- lamindb/examples/datasets/_core.py +9 -2
- lamindb/examples/datasets/_small.py +66 -22
- lamindb/examples/fixtures/sheets.py +8 -2
- lamindb/integrations/_croissant.py +34 -11
- lamindb/migrations/0118_alter_recordproject_value_projectrecord.py +99 -0
- lamindb/migrations/0119_rename_records_project_linked_in_records.py +26 -0
- lamindb/migrations/{0117_squashed.py → 0119_squashed.py} +92 -5
- lamindb/migrations/0120_add_record_fk_constraint.py +64 -0
- lamindb/migrations/0121_recorduser.py +60 -0
- lamindb/models/__init__.py +4 -1
- lamindb/models/_describe.py +2 -2
- lamindb/models/_feature_manager.py +131 -71
- lamindb/models/_from_values.py +2 -2
- lamindb/models/_is_versioned.py +4 -4
- lamindb/models/_label_manager.py +4 -4
- lamindb/models/artifact.py +357 -192
- lamindb/models/artifact_set.py +45 -1
- lamindb/models/can_curate.py +1 -2
- lamindb/models/collection.py +3 -34
- lamindb/models/feature.py +111 -7
- lamindb/models/has_parents.py +11 -11
- lamindb/models/project.py +42 -2
- lamindb/models/query_manager.py +16 -7
- lamindb/models/query_set.py +191 -78
- lamindb/models/record.py +30 -5
- lamindb/models/run.py +10 -33
- lamindb/models/save.py +6 -8
- lamindb/models/schema.py +54 -26
- lamindb/models/sqlrecord.py +152 -40
- lamindb/models/storage.py +59 -14
- lamindb/models/transform.py +17 -17
- lamindb/models/ulabel.py +6 -1
- {lamindb-1.10.1.dist-info → lamindb-1.11.0.dist-info}/METADATA +11 -16
- {lamindb-1.10.1.dist-info → lamindb-1.11.0.dist-info}/RECORD +55 -50
- {lamindb-1.10.1.dist-info → lamindb-1.11.0.dist-info}/LICENSE +0 -0
- {lamindb-1.10.1.dist-info → lamindb-1.11.0.dist-info}/WHEEL +0 -0
lamindb/models/artifact.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# ruff: noqa: TC004
|
2
2
|
from __future__ import annotations
|
3
3
|
|
4
|
-
import os
|
5
4
|
import shutil
|
6
5
|
from collections import defaultdict
|
7
6
|
from pathlib import Path, PurePath, PurePosixPath
|
@@ -9,7 +8,6 @@ from typing import TYPE_CHECKING, Any, Literal, Union, overload
|
|
9
8
|
|
10
9
|
import fsspec
|
11
10
|
import lamindb_setup as ln_setup
|
12
|
-
import numpy as np
|
13
11
|
import pandas as pd
|
14
12
|
from anndata import AnnData
|
15
13
|
from django.db import connections, models
|
@@ -63,14 +61,13 @@ from ..core.storage.paths import (
|
|
63
61
|
filepath_cache_key_from_artifact,
|
64
62
|
filepath_from_artifact,
|
65
63
|
)
|
66
|
-
from ..errors import
|
64
|
+
from ..errors import InvalidArgument, ValidationError
|
67
65
|
from ..models._is_versioned import (
|
68
66
|
create_uid,
|
69
67
|
)
|
70
68
|
from ._django import get_artifact_with_related, get_collection_with_related
|
71
69
|
from ._feature_manager import (
|
72
70
|
FeatureManager,
|
73
|
-
filter_base,
|
74
71
|
get_label_links,
|
75
72
|
)
|
76
73
|
from ._is_versioned import IsVersioned
|
@@ -201,7 +198,7 @@ def process_pathlike(
|
|
201
198
|
# hence, we revert the creation and throw an error
|
202
199
|
storage_record.delete()
|
203
200
|
raise UnknownStorageLocation(
|
204
|
-
f"Path {filepath} is not contained in any known storage location:\n{Storage.
|
201
|
+
f"Path {filepath} is not contained in any known storage location:\n{Storage.to_dataframe()[['uid', 'root', 'type']]}\n\n"
|
205
202
|
f"Create a managed storage location that contains the path, e.g., by calling: ln.Storage(root='{new_root}').save()"
|
206
203
|
)
|
207
204
|
use_existing_storage_key = True
|
@@ -419,24 +416,6 @@ def get_artifact_kwargs_from_data(
|
|
419
416
|
skip_check_exists,
|
420
417
|
is_replace=is_replace,
|
421
418
|
)
|
422
|
-
stat_or_artifact = get_stat_or_artifact(
|
423
|
-
path=path,
|
424
|
-
key=key,
|
425
|
-
instance=using_key,
|
426
|
-
is_replace=is_replace,
|
427
|
-
)
|
428
|
-
if isinstance(stat_or_artifact, Artifact):
|
429
|
-
existing_artifact = stat_or_artifact
|
430
|
-
if run is not None:
|
431
|
-
existing_artifact._populate_subsequent_runs(run)
|
432
|
-
return existing_artifact, None
|
433
|
-
else:
|
434
|
-
size, hash, hash_type, n_files, revises = stat_or_artifact
|
435
|
-
|
436
|
-
if revises is not None: # update provisional_uid
|
437
|
-
provisional_uid, revises = create_uid(revises=revises, version=version)
|
438
|
-
if settings.cache_dir in path.parents:
|
439
|
-
path = path.rename(path.with_name(f"{provisional_uid}{suffix}"))
|
440
419
|
|
441
420
|
check_path_in_storage = False
|
442
421
|
if use_existing_storage_key:
|
@@ -457,6 +436,25 @@ def get_artifact_kwargs_from_data(
|
|
457
436
|
else:
|
458
437
|
storage = storage
|
459
438
|
|
439
|
+
stat_or_artifact = get_stat_or_artifact(
|
440
|
+
path=path,
|
441
|
+
key=key,
|
442
|
+
instance=using_key,
|
443
|
+
is_replace=is_replace,
|
444
|
+
)
|
445
|
+
if isinstance(stat_or_artifact, Artifact):
|
446
|
+
existing_artifact = stat_or_artifact
|
447
|
+
if run is not None:
|
448
|
+
existing_artifact._populate_subsequent_runs(run)
|
449
|
+
return existing_artifact, None
|
450
|
+
else:
|
451
|
+
size, hash, hash_type, n_files, revises = stat_or_artifact
|
452
|
+
|
453
|
+
if revises is not None: # update provisional_uid
|
454
|
+
provisional_uid, revises = create_uid(revises=revises, version=version)
|
455
|
+
if settings.cache_dir in path.parents:
|
456
|
+
path = path.rename(path.with_name(f"{provisional_uid}{suffix}"))
|
457
|
+
|
460
458
|
log_storage_hint(
|
461
459
|
check_path_in_storage=check_path_in_storage,
|
462
460
|
storage=storage,
|
@@ -542,6 +540,7 @@ def log_storage_hint(
|
|
542
540
|
def data_is_scversedatastructure(
|
543
541
|
data: ScverseDataStructures | UPathStr,
|
544
542
|
structure_type: Literal["AnnData", "MuData", "SpatialData"] | None = None,
|
543
|
+
cloud_warning: bool = True,
|
545
544
|
) -> bool:
|
546
545
|
"""Determine whether a specific in-memory object or a UPathstr is any or a specific scverse data structure."""
|
547
546
|
file_suffix = None
|
@@ -551,12 +550,19 @@ def data_is_scversedatastructure(
|
|
551
550
|
file_suffix = ".h5mu"
|
552
551
|
# SpatialData does not have a unique suffix but `.zarr`
|
553
552
|
|
553
|
+
# AnnData allows both AnnDataAccessor and AnnData
|
554
|
+
class_name = data.__class__.__name__
|
554
555
|
if structure_type is None:
|
555
556
|
return any(
|
556
|
-
|
557
|
+
class_name
|
558
|
+
in (["AnnData", "AnnDataAccessor"] if cl_name == "AnnData" else [cl_name])
|
557
559
|
for cl_name in ["AnnData", "MuData", "SpatialData"]
|
558
560
|
)
|
559
|
-
elif
|
561
|
+
elif class_name in (
|
562
|
+
["AnnData", "AnnDataAccessor"]
|
563
|
+
if structure_type == "AnnData"
|
564
|
+
else [structure_type]
|
565
|
+
):
|
560
566
|
return True
|
561
567
|
|
562
568
|
data_type = structure_type.lower()
|
@@ -580,11 +586,12 @@ def data_is_scversedatastructure(
|
|
580
586
|
)
|
581
587
|
== data_type
|
582
588
|
)
|
583
|
-
|
589
|
+
elif cloud_warning:
|
584
590
|
logger.warning(
|
585
591
|
f"we do not check whether cloud zarr is {structure_type}"
|
586
592
|
)
|
587
593
|
return False
|
594
|
+
|
588
595
|
return False
|
589
596
|
|
590
597
|
|
@@ -600,23 +607,24 @@ def data_is_soma_experiment(data: SOMAExperiment | UPathStr) -> bool:
|
|
600
607
|
def _check_otype_artifact(
|
601
608
|
data: UPathStr | pd.DataFrame | ScverseDataStructures,
|
602
609
|
otype: str | None = None,
|
610
|
+
cloud_warning: bool = True,
|
603
611
|
) -> str:
|
604
612
|
if otype is None:
|
605
613
|
if isinstance(data, pd.DataFrame):
|
606
|
-
logger.warning("data is a DataFrame, please use .
|
614
|
+
logger.warning("data is a DataFrame, please use .from_dataframe()")
|
607
615
|
otype = "DataFrame"
|
608
616
|
return otype
|
609
617
|
|
610
618
|
data_is_path = isinstance(data, (str, Path))
|
611
|
-
if data_is_scversedatastructure(data, "AnnData"):
|
619
|
+
if data_is_scversedatastructure(data, "AnnData", cloud_warning):
|
612
620
|
if not data_is_path:
|
613
621
|
logger.warning("data is an AnnData, please use .from_anndata()")
|
614
622
|
otype = "AnnData"
|
615
|
-
elif data_is_scversedatastructure(data, "MuData"):
|
623
|
+
elif data_is_scversedatastructure(data, "MuData", cloud_warning):
|
616
624
|
if not data_is_path:
|
617
625
|
logger.warning("data is a MuData, please use .from_mudata()")
|
618
626
|
otype = "MuData"
|
619
|
-
elif data_is_scversedatastructure(data, "SpatialData"):
|
627
|
+
elif data_is_scversedatastructure(data, "SpatialData", cloud_warning):
|
620
628
|
if not data_is_path:
|
621
629
|
logger.warning("data is a SpatialData, please use .from_spatialdata()")
|
622
630
|
otype = "SpatialData"
|
@@ -871,7 +879,7 @@ def get_labels(
|
|
871
879
|
|
872
880
|
values = []
|
873
881
|
for v in qs_by_registry.values():
|
874
|
-
values += v.
|
882
|
+
values += v.to_list(get_name_field(v))
|
875
883
|
return values
|
876
884
|
if len(registries_to_check) == 1 and registry in qs_by_registry:
|
877
885
|
return qs_by_registry[registry]
|
@@ -894,7 +902,7 @@ def add_labels(
|
|
894
902
|
raise ValueError("Please save the artifact/collection before adding a label!")
|
895
903
|
|
896
904
|
if isinstance(records, (QuerySet, QuerySet.__base__)): # need to have both
|
897
|
-
records = records.
|
905
|
+
records = records.to_list()
|
898
906
|
if isinstance(records, (str, SQLRecord)):
|
899
907
|
records = [records]
|
900
908
|
if not isinstance(records, list): # avoids warning for pd Series
|
@@ -993,6 +1001,112 @@ def add_labels(
|
|
993
1001
|
)
|
994
1002
|
|
995
1003
|
|
1004
|
+
def delete_permanently(artifact: Artifact, storage: bool, using_key: str):
|
1005
|
+
# need to grab file path before deletion
|
1006
|
+
try:
|
1007
|
+
path, _ = filepath_from_artifact(artifact, using_key)
|
1008
|
+
except OSError:
|
1009
|
+
# we can still delete the record
|
1010
|
+
logger.warning("Could not get path")
|
1011
|
+
storage = False
|
1012
|
+
# only delete in storage if DB delete is successful
|
1013
|
+
# DB delete might error because of a foreign key constraint violated etc.
|
1014
|
+
if artifact._overwrite_versions and artifact.is_latest:
|
1015
|
+
logger.important(
|
1016
|
+
"deleting all versions of this artifact because they all share the same store"
|
1017
|
+
)
|
1018
|
+
for version in artifact.versions.all(): # includes artifact
|
1019
|
+
_delete_skip_storage(version)
|
1020
|
+
else:
|
1021
|
+
artifact._delete_skip_storage()
|
1022
|
+
# by default do not delete storage if deleting only a previous version
|
1023
|
+
# and the underlying store is mutable
|
1024
|
+
if artifact._overwrite_versions and not artifact.is_latest:
|
1025
|
+
delete_in_storage = False
|
1026
|
+
if storage:
|
1027
|
+
logger.warning(
|
1028
|
+
"storage argument is ignored; can't delete store of a previous version if overwrite_versions is True"
|
1029
|
+
)
|
1030
|
+
elif artifact.key is None or artifact._key_is_virtual:
|
1031
|
+
# do not ask for confirmation also if storage is None
|
1032
|
+
delete_in_storage = storage is None or storage
|
1033
|
+
else:
|
1034
|
+
# for artifacts with non-virtual semantic storage keys (key is not None)
|
1035
|
+
# ask for extra-confirmation if storage is None
|
1036
|
+
if storage is None:
|
1037
|
+
response = input(
|
1038
|
+
f"Are you sure to want to delete {path}? (y/n) You can't undo"
|
1039
|
+
" this action."
|
1040
|
+
)
|
1041
|
+
delete_in_storage = response == "y"
|
1042
|
+
else:
|
1043
|
+
delete_in_storage = storage
|
1044
|
+
if not delete_in_storage:
|
1045
|
+
logger.important(f"a file/folder remains here: {path}")
|
1046
|
+
# we don't yet have logic to bring back the deleted metadata record
|
1047
|
+
# in case storage deletion fails - this is important for ACID down the road
|
1048
|
+
if delete_in_storage:
|
1049
|
+
delete_msg = delete_storage(path, raise_file_not_found_error=False)
|
1050
|
+
if delete_msg != "did-not-delete":
|
1051
|
+
logger.success(f"deleted {colors.yellow(f'{path}')}")
|
1052
|
+
|
1053
|
+
|
1054
|
+
class LazyArtifact:
|
1055
|
+
"""Lazy artifact for streaming to auto-generated internal paths.
|
1056
|
+
|
1057
|
+
This is needed when it is desirable to stream to a `lamindb` auto-generated internal path
|
1058
|
+
and register the path as an artifact (see :class:`~lamindb.Artifact`).
|
1059
|
+
|
1060
|
+
This object creates a real artifact on `.save()` with the provided arguments.
|
1061
|
+
|
1062
|
+
Args:
|
1063
|
+
suffix: The suffix for the auto-generated internal path
|
1064
|
+
overwrite_versions: Whether to overwrite versions.
|
1065
|
+
**kwargs: Keyword arguments for the artifact to be created.
|
1066
|
+
|
1067
|
+
Examples:
|
1068
|
+
|
1069
|
+
Create a lazy artifact, write to the path and save to get a real artifact::
|
1070
|
+
|
1071
|
+
lazy = ln.Artifact.from_lazy(suffix=".zarr", overwrite_versions=True, key="mydata.zarr")
|
1072
|
+
zarr.open(lazy.path, mode="w")["test"] = np.array(["test"]) # stream to the path
|
1073
|
+
artifact = lazy.save()
|
1074
|
+
"""
|
1075
|
+
|
1076
|
+
def __init__(self, suffix: str, overwrite_versions: bool, **kwargs):
|
1077
|
+
self.kwargs = kwargs
|
1078
|
+
self.kwargs["overwrite_versions"] = overwrite_versions
|
1079
|
+
|
1080
|
+
if (key := kwargs.get("key")) is not None and extract_suffix_from_path(
|
1081
|
+
PurePosixPath(key)
|
1082
|
+
) != suffix:
|
1083
|
+
raise ValueError(
|
1084
|
+
"The suffix argument and the suffix of key should be the same."
|
1085
|
+
)
|
1086
|
+
|
1087
|
+
uid, _ = create_uid(n_full_id=20)
|
1088
|
+
storage_key = auto_storage_key_from_artifact_uid(
|
1089
|
+
uid, suffix, overwrite_versions=overwrite_versions
|
1090
|
+
)
|
1091
|
+
storepath = setup_settings.storage.root / storage_key
|
1092
|
+
|
1093
|
+
self._path = storepath
|
1094
|
+
|
1095
|
+
@property
|
1096
|
+
def path(self) -> UPath:
|
1097
|
+
return self._path
|
1098
|
+
|
1099
|
+
def save(self, upload: bool | None = None, **kwargs) -> Artifact:
|
1100
|
+
artifact = Artifact(self.path, _is_internal_call=True, **self.kwargs)
|
1101
|
+
return artifact.save(upload=upload, **kwargs)
|
1102
|
+
|
1103
|
+
def __repr__(self) -> str: # pragma: no cover
|
1104
|
+
show_kwargs = {k: v for k, v in self.kwargs.items() if v is not None}
|
1105
|
+
return (
|
1106
|
+
f"LazyArtifact object with\n path: {self.path}\n arguments: {show_kwargs}"
|
1107
|
+
)
|
1108
|
+
|
1109
|
+
|
996
1110
|
class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
997
1111
|
# Note that this docstring has to be consistent with Curator.save_artifact()
|
998
1112
|
"""Datasets & models stored as files, folders, or arrays.
|
@@ -1028,15 +1142,22 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1028
1142
|
|
1029
1143
|
artifact = ln.Artifact("s3://my_bucket/my_folder/my_file.csv").save()
|
1030
1144
|
|
1031
|
-
If you want to **validate & annotate** an array, pass a `schema` to one of the `.
|
1145
|
+
If you want to **validate & annotate** an array, pass a `schema` to one of the `.from_dataframe()`, `.from_anndata()`, ... constructors::
|
1032
1146
|
|
1033
1147
|
schema = ln.Schema(itype=ln.Feature) # a schema that merely enforces that feature names exist in the Feature registry
|
1034
|
-
artifact = ln.Artifact.
|
1148
|
+
artifact = ln.Artifact.from_dataframe("./my_file.parquet", key="my_dataset.parquet", schema=schema).save() # validated and annotated
|
1149
|
+
|
1150
|
+
To annotate by **external features**::
|
1151
|
+
|
1152
|
+
schema = ln.examples.schemas.valid_features()
|
1153
|
+
artifact = ln.Artifact("./my_file.parquet", features={"species": "bird"}).save()
|
1154
|
+
|
1155
|
+
A `schema` can be optionally passed to also validate the features.
|
1035
1156
|
|
1036
1157
|
You can make a **new version** of an artifact by passing an existing `key`::
|
1037
1158
|
|
1038
1159
|
artifact_v2 = ln.Artifact("./my_file.parquet", key="examples/my_file.parquet").save()
|
1039
|
-
artifact_v2.versions.
|
1160
|
+
artifact_v2.versions.to_dataframe() # see all versions
|
1040
1161
|
|
1041
1162
|
You can write artifacts to other storage locations by switching the current default storage location (:attr:`~lamindb.core.Settings.storage`)::
|
1042
1163
|
|
@@ -1110,6 +1231,7 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1110
1231
|
|
1111
1232
|
class Meta(SQLRecord.Meta, IsVersioned.Meta, TracksRun.Meta, TracksUpdates.Meta):
|
1112
1233
|
abstract = False
|
1234
|
+
app_label = "lamindb"
|
1113
1235
|
constraints = [
|
1114
1236
|
# a simple hard unique constraint on `hash` clashes with the fact
|
1115
1237
|
# that pipelines sometimes aim to ingest the exact same file in different
|
@@ -1157,11 +1279,10 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1157
1279
|
|
1158
1280
|
ln.Artifact.filter(scientist="Barbara McClintock")
|
1159
1281
|
|
1160
|
-
Features may or may not be part of the dataset, i.e., the artifact content in storage.
|
1161
|
-
instance, the :class:`~lamindb.curators.DataFrameCurator` flow validates the columns of a
|
1162
|
-
`DataFrame`-like artifact and annotates it with features corresponding to
|
1163
|
-
|
1164
|
-
validate the content of the artifact.
|
1282
|
+
Features may or may not be part of the dataset, i.e., the artifact content in storage.
|
1283
|
+
For instance, the :class:`~lamindb.curators.DataFrameCurator` flow validates the columns of a
|
1284
|
+
`DataFrame`-like artifact and annotates it with features corresponding to these columns.
|
1285
|
+
`artifact.features.add_values`, by contrast, does not validate the content of the artifact.
|
1165
1286
|
|
1166
1287
|
.. dropdown:: An example for a model-like artifact
|
1167
1288
|
|
@@ -1176,6 +1297,11 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1176
1297
|
"subset_highlyvariable": True,
|
1177
1298
|
},
|
1178
1299
|
})
|
1300
|
+
|
1301
|
+
To validate external features::
|
1302
|
+
|
1303
|
+
schema = ln.Schema([ln.Feature(name="species", dtype=str).save()]).save()
|
1304
|
+
artifact.features.add_values({"species": "bird"}, schema=schema)
|
1179
1305
|
"""
|
1180
1306
|
from ._feature_manager import FeatureManager
|
1181
1307
|
|
@@ -1385,15 +1511,46 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1385
1511
|
# now proceed with the user-facing constructor
|
1386
1512
|
if len(args) > 1:
|
1387
1513
|
raise ValueError("Only one non-keyword arg allowed: data")
|
1514
|
+
|
1388
1515
|
data: str | Path = kwargs.pop("data") if len(args) == 0 else args[0]
|
1389
1516
|
kind: str = kwargs.pop("kind", None)
|
1390
1517
|
key: str | None = kwargs.pop("key", None)
|
1391
1518
|
run_id: int | None = kwargs.pop("run_id", None) # for REST API
|
1392
1519
|
run: Run | None = kwargs.pop("run", None)
|
1520
|
+
using_key = kwargs.pop("using_key", None)
|
1393
1521
|
description: str | None = kwargs.pop("description", None)
|
1394
1522
|
revises: Artifact | None = kwargs.pop("revises", None)
|
1395
1523
|
overwrite_versions: bool | None = kwargs.pop("overwrite_versions", None)
|
1396
1524
|
version: str | None = kwargs.pop("version", None)
|
1525
|
+
|
1526
|
+
features: dict[str, Any] = kwargs.pop("features", None)
|
1527
|
+
schema: Schema | None = kwargs.pop("schema", None)
|
1528
|
+
if features is not None and schema is not None:
|
1529
|
+
from lamindb.curators import DataFrameCurator
|
1530
|
+
|
1531
|
+
temp_df = pd.DataFrame([features])
|
1532
|
+
validation_schema = schema
|
1533
|
+
if schema.itype == "Composite" and schema.slots:
|
1534
|
+
if len(schema.slots) > 1:
|
1535
|
+
raise ValueError(
|
1536
|
+
f"Composite schema has {len(schema.slots)} slots. "
|
1537
|
+
"External feature validation only supports schemas with a single slot."
|
1538
|
+
)
|
1539
|
+
try:
|
1540
|
+
validation_schema = next(
|
1541
|
+
k for k in schema.slots.keys() if k.startswith("__external")
|
1542
|
+
)
|
1543
|
+
except StopIteration:
|
1544
|
+
raise ValueError(
|
1545
|
+
"External feature validation requires a slot that starts with __external."
|
1546
|
+
) from None
|
1547
|
+
|
1548
|
+
external_curator = DataFrameCurator(temp_df, validation_schema)
|
1549
|
+
external_curator.validate()
|
1550
|
+
external_curator._artifact = self
|
1551
|
+
|
1552
|
+
self._external_features = features
|
1553
|
+
|
1397
1554
|
branch_id: int | None = None
|
1398
1555
|
if "visibility" in kwargs: # backward compat
|
1399
1556
|
branch_id = kwargs.pop("visibility")
|
@@ -1404,13 +1561,16 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1404
1561
|
else:
|
1405
1562
|
branch_id = 1
|
1406
1563
|
branch = kwargs.pop("branch", None)
|
1564
|
+
|
1407
1565
|
space = kwargs.pop("space", None)
|
1408
|
-
space_id
|
1566
|
+
assert "space_id" not in kwargs, "please pass space instead" # noqa: S101
|
1409
1567
|
format = kwargs.pop("format", None)
|
1410
1568
|
_is_internal_call = kwargs.pop("_is_internal_call", False)
|
1411
1569
|
skip_check_exists = kwargs.pop("skip_check_exists", False)
|
1570
|
+
storage_was_passed = False
|
1412
1571
|
if "storage" in kwargs:
|
1413
1572
|
storage = kwargs.pop("storage")
|
1573
|
+
storage_was_passed = True
|
1414
1574
|
elif (
|
1415
1575
|
setup_settings.instance.keep_artifacts_local
|
1416
1576
|
and setup_settings.instance._local_storage is not None
|
@@ -1418,13 +1578,32 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1418
1578
|
storage = setup_settings.instance.local_storage.record
|
1419
1579
|
else:
|
1420
1580
|
storage = setup_settings.instance.storage.record
|
1421
|
-
|
1581
|
+
if space is None:
|
1582
|
+
from lamindb import context as run_context
|
1583
|
+
|
1584
|
+
if run_context.space is not None:
|
1585
|
+
space = run_context.space
|
1586
|
+
elif setup_settings.space is not None:
|
1587
|
+
space = setup_settings.space
|
1588
|
+
if space is not None and space != storage.space:
|
1589
|
+
if storage_was_passed:
|
1590
|
+
logger.warning(
|
1591
|
+
"storage argument ignored as storage information from space takes precedence"
|
1592
|
+
)
|
1593
|
+
storage_locs_for_space = Storage.filter(space=space)
|
1594
|
+
storage = storage_locs_for_space.first()
|
1595
|
+
if len(storage_locs_for_space) > 1:
|
1596
|
+
logger.warning(
|
1597
|
+
f"more than one storage location for space {space}, choosing {storage}"
|
1598
|
+
)
|
1422
1599
|
otype = kwargs.pop("otype") if "otype" in kwargs else None
|
1423
1600
|
if isinstance(data, str) and data.startswith("s3:///"):
|
1424
1601
|
# issue in Groovy / nf-lamin producing malformed S3 paths
|
1425
1602
|
# https://laminlabs.slack.com/archives/C08J590666Q/p1751315027830849?thread_ts=1751039961.479259&cid=C08J590666Q
|
1426
1603
|
data = data.replace("s3:///", "s3://")
|
1427
|
-
otype = _check_otype_artifact(
|
1604
|
+
otype = _check_otype_artifact(
|
1605
|
+
data=data, otype=otype, cloud_warning=not _is_internal_call
|
1606
|
+
)
|
1428
1607
|
if "type" in kwargs:
|
1429
1608
|
logger.warning("`type` will be removed soon, please use `kind`")
|
1430
1609
|
kind = kwargs.pop("type")
|
@@ -1457,6 +1636,7 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1457
1636
|
)
|
1458
1637
|
else:
|
1459
1638
|
is_automanaged_path = False
|
1639
|
+
|
1460
1640
|
provisional_uid, revises = create_uid(revises=revises, version=version)
|
1461
1641
|
kwargs_or_artifact, privates = get_artifact_kwargs_from_data(
|
1462
1642
|
data=data,
|
@@ -1514,7 +1694,7 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1514
1694
|
uid, revises = create_uid(revises=revises, version=version)
|
1515
1695
|
kwargs["uid"] = uid
|
1516
1696
|
|
1517
|
-
# only set key now so that we don't
|
1697
|
+
# only set key now so that we don't perform a look-up on it in case revises is passed
|
1518
1698
|
if revises is not None and revises.key is not None and kwargs["key"] is None:
|
1519
1699
|
kwargs["key"] = revises.key
|
1520
1700
|
|
@@ -1526,7 +1706,6 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1526
1706
|
kwargs["branch"] = branch
|
1527
1707
|
kwargs["branch_id"] = branch_id
|
1528
1708
|
kwargs["space"] = space
|
1529
|
-
kwargs["space_id"] = space_id
|
1530
1709
|
kwargs["otype"] = otype
|
1531
1710
|
kwargs["revises"] = revises
|
1532
1711
|
# this check needs to come down here because key might be populated from an
|
@@ -1540,6 +1719,43 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1540
1719
|
|
1541
1720
|
super().__init__(**kwargs)
|
1542
1721
|
|
1722
|
+
@classmethod
|
1723
|
+
def from_lazy(
|
1724
|
+
cls,
|
1725
|
+
suffix: str,
|
1726
|
+
overwrite_versions: bool,
|
1727
|
+
key: str | None = None,
|
1728
|
+
description: str | None = None,
|
1729
|
+
run: Run | None = None,
|
1730
|
+
**kwargs,
|
1731
|
+
) -> LazyArtifact:
|
1732
|
+
"""Create a lazy artifact for streaming to auto-generated internal paths.
|
1733
|
+
|
1734
|
+
This is needed when it is desirable to stream to a `lamindb` auto-generated internal path
|
1735
|
+
and register the path as an artifact.
|
1736
|
+
|
1737
|
+
The lazy artifact object (see :class:`~lamindb.models.LazyArtifact`) creates a real artifact
|
1738
|
+
on `.save()` with the provided arguments.
|
1739
|
+
|
1740
|
+
Args:
|
1741
|
+
suffix: The suffix for the auto-generated internal path
|
1742
|
+
overwrite_versions: Whether to overwrite versions.
|
1743
|
+
key: An optional key to reference the artifact.
|
1744
|
+
description: A description.
|
1745
|
+
run: The run that creates the artifact.
|
1746
|
+
**kwargs: Other keyword arguments for the artifact to be created.
|
1747
|
+
|
1748
|
+
Examples:
|
1749
|
+
|
1750
|
+
Create a lazy artifact, write to the path and save to get a real artifact::
|
1751
|
+
|
1752
|
+
lazy = ln.Artifact.from_lazy(suffix=".zarr", overwrite_versions=True, key="mydata.zarr")
|
1753
|
+
zarr.open(lazy.path, mode="w")["test"] = np.array(["test"]) # stream to the path
|
1754
|
+
artifact = lazy.save()
|
1755
|
+
"""
|
1756
|
+
args = {"key": key, "description": description, "run": run, **kwargs}
|
1757
|
+
return LazyArtifact(suffix, overwrite_versions, **args)
|
1758
|
+
|
1543
1759
|
@property
|
1544
1760
|
@deprecated("kind")
|
1545
1761
|
def type(self) -> str:
|
@@ -1623,6 +1839,7 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1623
1839
|
idlike: Either a uid stub, uid or an integer id.
|
1624
1840
|
is_run_input: Whether to track this artifact as run input.
|
1625
1841
|
expressions: Fields and values passed as Django query expressions.
|
1842
|
+
Use `path=...` to get an artifact for a local or remote filepath if exists.
|
1626
1843
|
|
1627
1844
|
Raises:
|
1628
1845
|
:exc:`docs:lamindb.errors.DoesNotExist`: In case no matching record is found.
|
@@ -1637,6 +1854,7 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1637
1854
|
|
1638
1855
|
artifact = ln.Artifact.get("tCUkRcaEjTjhtozp0000")
|
1639
1856
|
artifact = ln.Arfifact.get(key="examples/my_file.parquet")
|
1857
|
+
artifact = ln.Artifact.get(path="s3://bucket/folder/adata.h5ad")
|
1640
1858
|
"""
|
1641
1859
|
from .query_set import QuerySet
|
1642
1860
|
|
@@ -1668,45 +1886,11 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1668
1886
|
ln.Arfifact.filter(cell_type_by_model__name="T cell")
|
1669
1887
|
|
1670
1888
|
"""
|
1671
|
-
from
|
1672
|
-
|
1673
|
-
if expressions:
|
1674
|
-
keys_normalized = [key.split("__")[0] for key in expressions]
|
1675
|
-
field_or_feature_or_param = keys_normalized[0].split("__")[0]
|
1676
|
-
if field_or_feature_or_param in Artifact.__get_available_fields__():
|
1677
|
-
qs = QuerySet(model=cls).filter(*queries, **expressions)
|
1678
|
-
if not any(e.startswith("kind") for e in expressions):
|
1679
|
-
return qs.exclude(kind="__lamindb_run__")
|
1680
|
-
else:
|
1681
|
-
return qs
|
1682
|
-
elif all(
|
1683
|
-
features_validated := Feature.validate(
|
1684
|
-
keys_normalized, field="name", mute=True
|
1685
|
-
)
|
1686
|
-
):
|
1687
|
-
return filter_base(Artifact, **expressions)
|
1688
|
-
else:
|
1689
|
-
features = ", ".join(
|
1690
|
-
sorted(np.array(keys_normalized)[~features_validated])
|
1691
|
-
)
|
1692
|
-
message = f"feature names: {features}"
|
1693
|
-
avail_fields = cls.__get_available_fields__()
|
1694
|
-
if "_branch_code" in avail_fields:
|
1695
|
-
avail_fields.remove("_branch_code") # backward compat
|
1696
|
-
fields = ", ".join(sorted(avail_fields))
|
1697
|
-
raise InvalidArgument(
|
1698
|
-
f"You can query either by available fields: {fields}\n"
|
1699
|
-
f"Or fix invalid {message}"
|
1700
|
-
)
|
1701
|
-
else:
|
1702
|
-
return (
|
1703
|
-
QuerySet(model=cls)
|
1704
|
-
.filter(*queries, **expressions)
|
1705
|
-
.exclude(kind="__lamindb_run__")
|
1706
|
-
)
|
1889
|
+
# from Registry metaclass
|
1890
|
+
return type(cls).filter(cls, *queries, **expressions)
|
1707
1891
|
|
1708
1892
|
@classmethod
|
1709
|
-
def
|
1893
|
+
def from_dataframe(
|
1710
1894
|
cls,
|
1711
1895
|
df: pd.DataFrame,
|
1712
1896
|
*,
|
@@ -1715,6 +1899,7 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1715
1899
|
run: Run | None = None,
|
1716
1900
|
revises: Artifact | None = None,
|
1717
1901
|
schema: Schema | None = None,
|
1902
|
+
features: dict[str, Any] | None = None,
|
1718
1903
|
**kwargs,
|
1719
1904
|
) -> Artifact:
|
1720
1905
|
"""Create from `DataFrame`, optionally validate & annotate.
|
@@ -1727,6 +1912,7 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1727
1912
|
revises: An old version of the artifact.
|
1728
1913
|
run: The run that creates the artifact.
|
1729
1914
|
schema: A schema that defines how to validate & annotate.
|
1915
|
+
features: External features dict for additional annotation.
|
1730
1916
|
|
1731
1917
|
See Also:
|
1732
1918
|
:meth:`~lamindb.Collection`
|
@@ -1741,7 +1927,7 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1741
1927
|
import lamindb as ln
|
1742
1928
|
|
1743
1929
|
df = ln.core.datasets.mini_immuno.get_dataset1()
|
1744
|
-
artifact = ln.Artifact.
|
1930
|
+
artifact = ln.Artifact.from_dataframe(df, key="examples/dataset1.parquet").save()
|
1745
1931
|
|
1746
1932
|
With validation and annotation.
|
1747
1933
|
|
@@ -1758,6 +1944,10 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1758
1944
|
.. literalinclude:: scripts/define_mini_immuno_features_labels.py
|
1759
1945
|
:language: python
|
1760
1946
|
|
1947
|
+
External features:
|
1948
|
+
|
1949
|
+
.. literalinclude:: scripts/curate_dataframe_external_features.py
|
1950
|
+
:language: python
|
1761
1951
|
"""
|
1762
1952
|
artifact = Artifact( # type: ignore
|
1763
1953
|
data=df,
|
@@ -1770,8 +1960,9 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1770
1960
|
**kwargs,
|
1771
1961
|
)
|
1772
1962
|
artifact.n_observations = len(df)
|
1963
|
+
|
1773
1964
|
if schema is not None:
|
1774
|
-
from
|
1965
|
+
from lamindb.curators.core import ComponentCurator
|
1775
1966
|
|
1776
1967
|
if not artifact._state.adding and artifact.suffix != ".parquet":
|
1777
1968
|
logger.warning(
|
@@ -1780,12 +1971,56 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1780
1971
|
)
|
1781
1972
|
return artifact
|
1782
1973
|
|
1783
|
-
|
1784
|
-
|
1785
|
-
|
1786
|
-
|
1974
|
+
# Handle external features validation for Composite schemas
|
1975
|
+
if schema.itype == "Composite" and features is not None:
|
1976
|
+
try:
|
1977
|
+
external_slot = next(
|
1978
|
+
k for k in schema.slots.keys() if "__external__" in k
|
1979
|
+
)
|
1980
|
+
validation_schema = schema.slots[external_slot]
|
1981
|
+
except StopIteration:
|
1982
|
+
raise ValueError(
|
1983
|
+
"External feature validation requires a slot __external__."
|
1984
|
+
) from None
|
1985
|
+
|
1986
|
+
external_curator = ComponentCurator(
|
1987
|
+
pd.DataFrame([features]), validation_schema
|
1988
|
+
)
|
1989
|
+
external_curator.validate()
|
1990
|
+
artifact._external_features = features
|
1991
|
+
|
1992
|
+
# Validate main DataFrame if not Composite or if Composite has attrs
|
1993
|
+
if schema.itype != "Composite" or "attrs" in schema.slots:
|
1994
|
+
curator = ComponentCurator(artifact, schema)
|
1995
|
+
curator.validate()
|
1996
|
+
artifact.schema = schema
|
1997
|
+
artifact._curator = curator
|
1998
|
+
|
1787
1999
|
return artifact
|
1788
2000
|
|
2001
|
+
@classmethod
|
2002
|
+
@deprecated("from_dataframe")
|
2003
|
+
def from_df(
|
2004
|
+
cls,
|
2005
|
+
df: pd.DataFrame,
|
2006
|
+
*,
|
2007
|
+
key: str | None = None,
|
2008
|
+
description: str | None = None,
|
2009
|
+
run: Run | None = None,
|
2010
|
+
revises: Artifact | None = None,
|
2011
|
+
schema: Schema | None = None,
|
2012
|
+
**kwargs,
|
2013
|
+
) -> Artifact:
|
2014
|
+
return cls.from_dataframe(
|
2015
|
+
df,
|
2016
|
+
key=key,
|
2017
|
+
description=description,
|
2018
|
+
run=run,
|
2019
|
+
revises=revises,
|
2020
|
+
schema=schema,
|
2021
|
+
**kwargs,
|
2022
|
+
)
|
2023
|
+
|
1789
2024
|
@classmethod
|
1790
2025
|
def from_anndata(
|
1791
2026
|
cls,
|
@@ -2285,17 +2520,19 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
2285
2520
|
):
|
2286
2521
|
"""Open a dataset for streaming.
|
2287
2522
|
|
2288
|
-
Works for `AnnData` (`.h5ad` and `.zarr`),
|
2289
|
-
`tiledbsoma` objects (`.tiledbsoma`),
|
2523
|
+
Works for `AnnData` (`.h5ad` and `.zarr`), `SpatialData` (`.zarr`),
|
2524
|
+
generic `hdf5` and `zarr`, `tiledbsoma` objects (`.tiledbsoma`),
|
2525
|
+
`pyarrow` or `polars` compatible formats
|
2290
2526
|
(`.parquet`, `.csv`, `.ipc` etc. files or directories with such files).
|
2291
2527
|
|
2292
2528
|
Args:
|
2293
|
-
mode: can
|
2529
|
+
mode: can be `"r"` or `"w"` (write mode) for `tiledbsoma` stores,
|
2530
|
+
`"r"` or `"r+"` for `AnnData` or `SpatialData` `zarr` stores,
|
2294
2531
|
otherwise should be always `"r"` (read-only mode).
|
2295
2532
|
engine: Which module to use for lazy loading of a dataframe
|
2296
2533
|
from `pyarrow` or `polars` compatible formats.
|
2297
2534
|
This has no effect if the artifact is not a dataframe, i.e.
|
2298
|
-
if it is an `AnnData,` `hdf5`, `zarr
|
2535
|
+
if it is an `AnnData,` `hdf5`, `zarr`, `tiledbsoma` object etc.
|
2299
2536
|
is_run_input: Whether to track this artifact as run input.
|
2300
2537
|
**kwargs: Keyword arguments for the accessor, i.e. `h5py` or `zarr` connection,
|
2301
2538
|
`pyarrow.dataset.dataset`, `polars.scan_*` function.
|
@@ -2339,7 +2576,8 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
2339
2576
|
s + ".gz" for s in PYARROW_SUFFIXES
|
2340
2577
|
) # this doesn't work for externally gzipped files, REMOVE LATER
|
2341
2578
|
)
|
2342
|
-
|
2579
|
+
suffix = self.suffix
|
2580
|
+
if suffix not in suffixes:
|
2343
2581
|
raise ValueError(
|
2344
2582
|
"Artifact should have a zarr, h5, tiledbsoma object"
|
2345
2583
|
" or a compatible `pyarrow.dataset.dataset` or `polars.scan_*` directory"
|
@@ -2348,23 +2586,28 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
2348
2586
|
f" Or no suffix for a folder with {', '.join(df_suffixes)} files"
|
2349
2587
|
" (no mixing allowed)."
|
2350
2588
|
)
|
2351
|
-
if self.suffix != ".tiledbsoma" and self.key != "soma" and mode != "r":
|
2352
|
-
raise ValueError(
|
2353
|
-
"Only a tiledbsoma store can be openened with `mode!='r'`."
|
2354
|
-
)
|
2355
|
-
|
2356
2589
|
using_key = settings._using_key
|
2357
2590
|
filepath, cache_key = filepath_cache_key_from_artifact(
|
2358
2591
|
self, using_key=using_key
|
2359
2592
|
)
|
2593
|
+
|
2360
2594
|
is_tiledbsoma_w = (
|
2361
|
-
filepath.name == "soma" or
|
2595
|
+
filepath.name == "soma" or suffix == ".tiledbsoma"
|
2362
2596
|
) and mode == "w"
|
2597
|
+
is_zarr_w = suffix == ".zarr" and mode == "r+"
|
2598
|
+
|
2599
|
+
if mode != "r" and not (is_tiledbsoma_w or is_zarr_w):
|
2600
|
+
raise ValueError(
|
2601
|
+
f"It is not allowed to open a {suffix} object with mode='{mode}'. "
|
2602
|
+
"You can open all supported formats with mode='r', "
|
2603
|
+
"a tiledbsoma store with mode='w', "
|
2604
|
+
"AnnData or SpatialData zarr store with mode='r+'."
|
2605
|
+
)
|
2363
2606
|
# consider the case where an object is already locally cached
|
2364
2607
|
localpath = setup_settings.paths.cloud_to_local_no_update(
|
2365
2608
|
filepath, cache_key=cache_key
|
2366
2609
|
)
|
2367
|
-
if is_tiledbsoma_w:
|
2610
|
+
if is_tiledbsoma_w or is_zarr_w:
|
2368
2611
|
open_cache = False
|
2369
2612
|
else:
|
2370
2613
|
open_cache = not isinstance(
|
@@ -2395,9 +2638,7 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
2395
2638
|
else:
|
2396
2639
|
localpath.unlink(missing_ok=True)
|
2397
2640
|
else:
|
2398
|
-
access = backed_access(
|
2399
|
-
filepath, mode, engine, using_key=using_key, **kwargs
|
2400
|
-
)
|
2641
|
+
access = backed_access(self, mode, engine, using_key=using_key, **kwargs)
|
2401
2642
|
if is_tiledbsoma_w:
|
2402
2643
|
|
2403
2644
|
def finalize():
|
@@ -2413,6 +2654,7 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
2413
2654
|
new_version = Artifact(
|
2414
2655
|
filepath, revises=self, _is_internal_call=True
|
2415
2656
|
).save()
|
2657
|
+
# note: sets _state.db = "default"
|
2416
2658
|
init_self_from_db(self, new_version)
|
2417
2659
|
|
2418
2660
|
if localpath != filepath and localpath.exists():
|
@@ -2569,94 +2811,7 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
2569
2811
|
artifact = ln.Artifact.get(key="some.tiledbsoma". is_latest=True)
|
2570
2812
|
artiact.delete() # delete all versions, the data will be deleted or prompted for deletion.
|
2571
2813
|
"""
|
2572
|
-
|
2573
|
-
# storage = True if storage is None else storage
|
2574
|
-
|
2575
|
-
# this first check means an invalid delete fails fast rather than cascading through
|
2576
|
-
# database and storage permission errors
|
2577
|
-
if os.getenv("LAMINDB_MULTI_INSTANCE") is None:
|
2578
|
-
isettings = setup_settings.instance
|
2579
|
-
if self.storage.instance_uid != isettings.uid and (
|
2580
|
-
storage or storage is None
|
2581
|
-
):
|
2582
|
-
raise IntegrityError(
|
2583
|
-
"Cannot simply delete artifacts outside of this instance's managed storage locations."
|
2584
|
-
"\n(1) If you only want to delete the metadata record in this instance, pass `storage=False`"
|
2585
|
-
f"\n(2) If you want to delete the artifact in storage, please load the managing lamindb instance (uid={self.storage.instance_uid})."
|
2586
|
-
f"\nThese are all managed storage locations of this instance:\n{Storage.filter(instance_uid=isettings.uid).df()}"
|
2587
|
-
)
|
2588
|
-
# by default, we only move artifacts into the trash (branch_id = -1)
|
2589
|
-
trash_branch_id = -1
|
2590
|
-
if self.branch_id > trash_branch_id and not permanent:
|
2591
|
-
if storage is not None:
|
2592
|
-
logger.warning("moving artifact to trash, storage arg is ignored")
|
2593
|
-
# move to trash
|
2594
|
-
self.branch_id = trash_branch_id
|
2595
|
-
self.save()
|
2596
|
-
logger.important(f"moved artifact to trash (branch_id = {trash_branch_id})")
|
2597
|
-
return
|
2598
|
-
|
2599
|
-
# if the artifact is already in the trash
|
2600
|
-
# permanent delete skips the trash
|
2601
|
-
if permanent is None:
|
2602
|
-
# ask for confirmation of permanent delete
|
2603
|
-
response = input(
|
2604
|
-
"Artifact record is already in trash! Are you sure you want to permanently"
|
2605
|
-
" delete it? (y/n) You can't undo this action."
|
2606
|
-
)
|
2607
|
-
delete_record = response == "y"
|
2608
|
-
else:
|
2609
|
-
assert permanent # noqa: S101
|
2610
|
-
delete_record = True
|
2611
|
-
|
2612
|
-
if delete_record:
|
2613
|
-
# need to grab file path before deletion
|
2614
|
-
try:
|
2615
|
-
path, _ = filepath_from_artifact(self, using_key)
|
2616
|
-
except OSError:
|
2617
|
-
# we can still delete the record
|
2618
|
-
logger.warning("Could not get path")
|
2619
|
-
storage = False
|
2620
|
-
# only delete in storage if DB delete is successful
|
2621
|
-
# DB delete might error because of a foreign key constraint violated etc.
|
2622
|
-
if self._overwrite_versions and self.is_latest:
|
2623
|
-
logger.important(
|
2624
|
-
"deleting all versions of this artifact because they all share the same store"
|
2625
|
-
)
|
2626
|
-
for version in self.versions.all(): # includes self
|
2627
|
-
_delete_skip_storage(version)
|
2628
|
-
else:
|
2629
|
-
self._delete_skip_storage()
|
2630
|
-
# by default do not delete storage if deleting only a previous version
|
2631
|
-
# and the underlying store is mutable
|
2632
|
-
if self._overwrite_versions and not self.is_latest:
|
2633
|
-
delete_in_storage = False
|
2634
|
-
if storage:
|
2635
|
-
logger.warning(
|
2636
|
-
"storage argument is ignored; can't delete store of a previous version if overwrite_versions is True"
|
2637
|
-
)
|
2638
|
-
elif self.key is None or self._key_is_virtual:
|
2639
|
-
# do not ask for confirmation also if storage is None
|
2640
|
-
delete_in_storage = storage is None or storage
|
2641
|
-
else:
|
2642
|
-
# for artifacts with non-virtual semantic storage keys (key is not None)
|
2643
|
-
# ask for extra-confirmation
|
2644
|
-
if storage is None:
|
2645
|
-
response = input(
|
2646
|
-
f"Are you sure to want to delete {path}? (y/n) You can't undo"
|
2647
|
-
" this action."
|
2648
|
-
)
|
2649
|
-
delete_in_storage = response == "y"
|
2650
|
-
else:
|
2651
|
-
delete_in_storage = storage
|
2652
|
-
if not delete_in_storage:
|
2653
|
-
logger.important(f"a file/folder remains here: {path}")
|
2654
|
-
# we don't yet have logic to bring back the deleted metadata record
|
2655
|
-
# in case storage deletion fails - this is important for ACID down the road
|
2656
|
-
if delete_in_storage:
|
2657
|
-
delete_msg = delete_storage(path, raise_file_not_found_error=False)
|
2658
|
-
if delete_msg != "did-not-delete":
|
2659
|
-
logger.success(f"deleted {colors.yellow(f'{path}')}")
|
2814
|
+
super().delete(permanent=permanent, storage=storage, using_key=using_key)
|
2660
2815
|
|
2661
2816
|
@property
|
2662
2817
|
def _is_saved_to_storage_location(self) -> bool | None:
|
@@ -2785,11 +2940,20 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
2785
2940
|
local_path_cache,
|
2786
2941
|
)
|
2787
2942
|
logger.important(f"moved local artifact to cache: {local_path_cache}")
|
2943
|
+
|
2944
|
+
# Handle external features
|
2945
|
+
if hasattr(self, "_external_features") and self._external_features is not None:
|
2946
|
+
external_features = self._external_features
|
2947
|
+
delattr(self, "_external_features")
|
2948
|
+
self.features.add_values(external_features)
|
2949
|
+
|
2950
|
+
# annotate Artifact
|
2788
2951
|
if hasattr(self, "_curator"):
|
2789
2952
|
curator = self._curator
|
2790
2953
|
delattr(self, "_curator")
|
2791
2954
|
# just annotates this artifact
|
2792
2955
|
curator.save_artifact()
|
2956
|
+
|
2793
2957
|
return self
|
2794
2958
|
|
2795
2959
|
def restore(self) -> None:
|
@@ -2837,7 +3001,7 @@ def _synchronize_cleanup_on_error(
|
|
2837
3001
|
|
2838
3002
|
|
2839
3003
|
def _delete_skip_storage(artifact, *args, **kwargs) -> None:
|
2840
|
-
super(
|
3004
|
+
super(SQLRecord, artifact).delete(*args, **kwargs)
|
2841
3005
|
|
2842
3006
|
|
2843
3007
|
def _save_skip_storage(artifact, **kwargs) -> None:
|
@@ -2855,6 +3019,7 @@ class ArtifactFeatureValue(BaseSQLRecord, IsLink, TracksRun):
|
|
2855
3019
|
featurevalue = ForeignKey(FeatureValue, PROTECT, related_name="links_artifact")
|
2856
3020
|
|
2857
3021
|
class Meta:
|
3022
|
+
app_label = "lamindb"
|
2858
3023
|
unique_together = ("artifact", "featurevalue")
|
2859
3024
|
|
2860
3025
|
|