lamindb 1.6.2__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -3
- lamindb/_finish.py +32 -16
- lamindb/base/types.py +6 -4
- lamindb/core/_context.py +127 -57
- lamindb/core/_mapped_collection.py +1 -1
- lamindb/core/_settings.py +44 -4
- lamindb/core/_track_environment.py +5 -2
- lamindb/core/loaders.py +1 -1
- lamindb/core/storage/_anndata_accessor.py +1 -1
- lamindb/core/storage/_tiledbsoma.py +14 -8
- lamindb/core/storage/_valid_suffixes.py +0 -1
- lamindb/core/storage/_zarr.py +1 -1
- lamindb/core/storage/objects.py +13 -8
- lamindb/core/storage/paths.py +9 -6
- lamindb/core/types.py +1 -1
- lamindb/curators/_legacy.py +2 -1
- lamindb/curators/core.py +106 -105
- lamindb/errors.py +9 -0
- lamindb/examples/fixtures/__init__.py +0 -0
- lamindb/examples/fixtures/sheets.py +224 -0
- lamindb/migrations/0103_remove_writelog_migration_state_and_more.py +1 -1
- lamindb/migrations/0105_record_unique_name.py +20 -0
- lamindb/migrations/0106_transfer_data_migration.py +25 -0
- lamindb/migrations/0107_add_schema_to_record.py +68 -0
- lamindb/migrations/0108_remove_record_sheet_remove_sheetproject_sheet_and_more.py +30 -0
- lamindb/migrations/0109_record_input_of_runs_alter_record_run_and_more.py +123 -0
- lamindb/migrations/0110_rename_values_artifacts_record_linked_artifacts.py +17 -0
- lamindb/migrations/0111_remove_record__sort_order.py +148 -0
- lamindb/migrations/0112_alter_recordartifact_feature_and_more.py +105 -0
- lamindb/migrations/0113_lower_case_branch_and_space_names.py +62 -0
- lamindb/migrations/0114_alter_run__status_code.py +24 -0
- lamindb/migrations/0115_alter_space_uid.py +52 -0
- lamindb/migrations/{0104_squashed.py → 0115_squashed.py} +261 -257
- lamindb/models/__init__.py +4 -3
- lamindb/models/_describe.py +88 -31
- lamindb/models/_feature_manager.py +627 -658
- lamindb/models/_label_manager.py +1 -3
- lamindb/models/artifact.py +214 -99
- lamindb/models/collection.py +7 -1
- lamindb/models/feature.py +288 -60
- lamindb/models/has_parents.py +3 -3
- lamindb/models/project.py +32 -15
- lamindb/models/query_manager.py +7 -1
- lamindb/models/query_set.py +118 -41
- lamindb/models/record.py +140 -94
- lamindb/models/run.py +42 -42
- lamindb/models/save.py +102 -16
- lamindb/models/schema.py +41 -8
- lamindb/models/sqlrecord.py +105 -40
- lamindb/models/storage.py +278 -0
- lamindb/models/transform.py +10 -2
- lamindb/models/ulabel.py +9 -1
- lamindb/py.typed +0 -0
- lamindb/setup/__init__.py +2 -1
- lamindb/setup/_switch.py +16 -0
- lamindb/setup/errors/__init__.py +4 -0
- lamindb/setup/types/__init__.py +4 -0
- {lamindb-1.6.2.dist-info → lamindb-1.7.0.dist-info}/METADATA +5 -5
- {lamindb-1.6.2.dist-info → lamindb-1.7.0.dist-info}/RECORD +61 -44
- lamindb/models/core.py +0 -135
- {lamindb-1.6.2.dist-info → lamindb-1.7.0.dist-info}/LICENSE +0 -0
- {lamindb-1.6.2.dist-info → lamindb-1.7.0.dist-info}/WHEEL +0 -0
lamindb/models/_label_manager.py
CHANGED
@@ -33,9 +33,7 @@ if TYPE_CHECKING:
|
|
33
33
|
from lamindb.models import Artifact, Collection, SQLRecord
|
34
34
|
from lamindb.models.query_set import QuerySet
|
35
35
|
|
36
|
-
|
37
|
-
# TODO: remove records from below once all instances are migrated
|
38
|
-
EXCLUDE_LABELS = {"feature_sets", "records"}
|
36
|
+
EXCLUDE_LABELS = {"feature_sets"}
|
39
37
|
|
40
38
|
|
41
39
|
def _get_labels(
|
lamindb/models/artifact.py
CHANGED
@@ -16,17 +16,15 @@ from django.db import connections, models
|
|
16
16
|
from django.db.models import CASCADE, PROTECT, Q
|
17
17
|
from lamin_utils import colors, logger
|
18
18
|
from lamindb_setup import settings as setup_settings
|
19
|
-
from lamindb_setup._init_instance import register_storage_in_instance
|
20
19
|
from lamindb_setup.core._hub_core import select_storage_or_parent
|
21
|
-
from lamindb_setup.core._settings_storage import init_storage
|
22
20
|
from lamindb_setup.core.hashing import HASH_LENGTH, hash_dir, hash_file
|
23
|
-
from lamindb_setup.core.types import UPathStr
|
24
21
|
from lamindb_setup.core.upath import (
|
25
22
|
create_path,
|
26
23
|
extract_suffix_from_path,
|
27
24
|
get_stat_dir_cloud,
|
28
25
|
get_stat_file_cloud,
|
29
26
|
)
|
27
|
+
from lamindb_setup.types import UPathStr
|
30
28
|
|
31
29
|
from lamindb.base import deprecated
|
32
30
|
from lamindb.base.fields import (
|
@@ -35,7 +33,7 @@ from lamindb.base.fields import (
|
|
35
33
|
CharField,
|
36
34
|
ForeignKey,
|
37
35
|
)
|
38
|
-
from lamindb.errors import FieldValidationError
|
36
|
+
from lamindb.errors import FieldValidationError, UnknownStorageLocation
|
39
37
|
from lamindb.models.query_set import QuerySet
|
40
38
|
|
41
39
|
from ..base.users import current_user_id
|
@@ -70,8 +68,6 @@ from ..models._is_versioned import (
|
|
70
68
|
from ._django import get_artifact_with_related
|
71
69
|
from ._feature_manager import (
|
72
70
|
FeatureManager,
|
73
|
-
FeatureManagerArtifact,
|
74
|
-
add_label_feature_links,
|
75
71
|
filter_base,
|
76
72
|
get_label_links,
|
77
73
|
)
|
@@ -80,10 +76,10 @@ from ._relations import (
|
|
80
76
|
dict_module_name_to_model_name,
|
81
77
|
dict_related_model_to_related_name,
|
82
78
|
)
|
83
|
-
from .core import Storage
|
84
79
|
from .feature import Feature, FeatureValue
|
85
80
|
from .has_parents import view_lineage
|
86
81
|
from .run import Run, TracksRun, TracksUpdates, User
|
82
|
+
from .save import check_and_attempt_clearing, check_and_attempt_upload
|
87
83
|
from .schema import Schema
|
88
84
|
from .sqlrecord import (
|
89
85
|
BaseSQLRecord,
|
@@ -92,6 +88,7 @@ from .sqlrecord import (
|
|
92
88
|
_get_record_kwargs,
|
93
89
|
record_repr,
|
94
90
|
)
|
91
|
+
from .storage import Storage
|
95
92
|
from .ulabel import ULabel
|
96
93
|
|
97
94
|
WARNING_RUN_TRANSFORM = "no run & transform got linked, call `ln.track()` & re-run"
|
@@ -127,6 +124,7 @@ if TYPE_CHECKING:
|
|
127
124
|
from ._label_manager import LabelManager
|
128
125
|
from .collection import Collection
|
129
126
|
from .project import Project, Reference
|
127
|
+
from .record import Record
|
130
128
|
from .transform import Transform
|
131
129
|
|
132
130
|
|
@@ -173,19 +171,32 @@ def process_pathlike(
|
|
173
171
|
if filepath.protocol == "hf":
|
174
172
|
hf_path = filepath.fs.resolve_path(filepath.as_posix())
|
175
173
|
hf_path.path_in_repo = ""
|
176
|
-
new_root = "hf://" + hf_path.unresolve()
|
174
|
+
new_root = "hf://" + hf_path.unresolve().rstrip("/")
|
177
175
|
else:
|
178
176
|
if filepath.protocol == "s3":
|
179
177
|
# check that endpoint_url didn't propagate here
|
180
178
|
# as a part of the path string
|
181
179
|
assert "?" not in filepath.path # noqa: S101
|
182
|
-
new_root = list(filepath.parents)[-1]
|
183
|
-
#
|
184
|
-
#
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
180
|
+
new_root = list(filepath.parents)[-1].as_posix().rstrip("/")
|
181
|
+
# Re the Parallel execution of the logic below:
|
182
|
+
# One of the threads (or processes) would start to write the hub record and then the test file.
|
183
|
+
# The other ones would retrieve the hub record and the test file.
|
184
|
+
# All of them would come out of the exercise with storage_record.instance_uid == setup_settings.instance.uid
|
185
|
+
# and all of them would raise UnkownStorageLocation.
|
186
|
+
# Then one of these threads will trigger storage_record.delete() but also this is idempotent;
|
187
|
+
# this means they all throw the same error and deletion of the inexistent stuff (hub record, marker file)
|
188
|
+
# would just silently fail.
|
189
|
+
# Edge case: A user legitimately creates a storage location and another user runs this here at the exact same time.
|
190
|
+
# There is no way to decide then which is the legitimate creation.
|
191
|
+
storage_record = Storage(root=new_root).save()
|
192
|
+
if storage_record.instance_uid == setup_settings.instance.uid:
|
193
|
+
# we don't want to inadvertently create managed storage locations
|
194
|
+
# hence, we revert the creation and throw an error
|
195
|
+
storage_record.delete()
|
196
|
+
raise UnknownStorageLocation(
|
197
|
+
f"Path {filepath} is not contained in any known storage location:\n{Storage.df()[['uid', 'root', 'type']]}\n\n"
|
198
|
+
f"Create a managed storage location that contains the path, e.g., by calling: ln.Storage(root='{new_root}').save()"
|
199
|
+
)
|
189
200
|
use_existing_storage_key = True
|
190
201
|
return storage_record, use_existing_storage_key
|
191
202
|
# if the filepath is local
|
@@ -271,7 +282,11 @@ def process_data(
|
|
271
282
|
from lamindb import settings
|
272
283
|
|
273
284
|
path = settings.cache_dir / f"{provisional_uid}{suffix}"
|
274
|
-
|
285
|
+
if isinstance(format, dict):
|
286
|
+
format.pop("suffix", None)
|
287
|
+
else:
|
288
|
+
format = {}
|
289
|
+
write_to_disk(data, path, **format)
|
275
290
|
use_existing_storage_key = False
|
276
291
|
|
277
292
|
return memory_rep, path, suffix, storage, use_existing_storage_key
|
@@ -355,7 +370,7 @@ def check_path_in_existing_storage(
|
|
355
370
|
if check_hub_register_storage and getattr(path, "protocol", None) in {"s3", "gs"}:
|
356
371
|
result = select_storage_or_parent(path.as_posix())
|
357
372
|
if result is not None:
|
358
|
-
return Storage(**result).save()
|
373
|
+
return Storage(**result, _skip_preparation=True).save()
|
359
374
|
return None
|
360
375
|
|
361
376
|
|
@@ -390,6 +405,7 @@ def get_artifact_kwargs_from_data(
|
|
390
405
|
using_key: str | None = None,
|
391
406
|
is_replace: bool = False,
|
392
407
|
skip_check_exists: bool = False,
|
408
|
+
overwrite_versions: bool | None = None,
|
393
409
|
):
|
394
410
|
from lamindb import settings
|
395
411
|
|
@@ -458,7 +474,8 @@ def get_artifact_kwargs_from_data(
|
|
458
474
|
# we use an actual storage key
|
459
475
|
if check_path_in_storage:
|
460
476
|
key_is_virtual = False
|
461
|
-
|
477
|
+
if overwrite_versions is None:
|
478
|
+
overwrite_versions = n_files is not None
|
462
479
|
kwargs = {
|
463
480
|
"uid": provisional_uid,
|
464
481
|
"suffix": suffix,
|
@@ -471,7 +488,7 @@ def get_artifact_kwargs_from_data(
|
|
471
488
|
# to make them both available immediately
|
472
489
|
# after object creation
|
473
490
|
"n_files": n_files,
|
474
|
-
"_overwrite_versions":
|
491
|
+
"_overwrite_versions": overwrite_versions, # True for folder, False for file
|
475
492
|
"n_observations": None, # to implement
|
476
493
|
"run_id": run.id if run is not None else None,
|
477
494
|
"run": run,
|
@@ -525,25 +542,25 @@ def log_storage_hint(
|
|
525
542
|
|
526
543
|
def data_is_scversedatastructure(
|
527
544
|
data: ScverseDataStructures | UPathStr,
|
528
|
-
|
545
|
+
structure_type: Literal["AnnData", "MuData", "SpatialData"] | None = None,
|
529
546
|
) -> bool:
|
530
547
|
"""Determine whether a specific in-memory object or a UPathstr is any or a specific scverse data structure."""
|
531
548
|
file_suffix = None
|
532
|
-
if
|
549
|
+
if structure_type == "AnnData":
|
533
550
|
file_suffix = ".h5ad"
|
534
|
-
elif
|
551
|
+
elif structure_type == "MuData":
|
535
552
|
file_suffix = ".h5mu"
|
536
553
|
# SpatialData does not have a unique suffix but `.zarr`
|
537
554
|
|
538
|
-
if
|
555
|
+
if structure_type is None:
|
539
556
|
return any(
|
540
557
|
hasattr(data, "__class__") and data.__class__.__name__ == cl_name
|
541
558
|
for cl_name in ["AnnData", "MuData", "SpatialData"]
|
542
559
|
)
|
543
|
-
elif hasattr(data, "__class__") and data.__class__.__name__ ==
|
560
|
+
elif hasattr(data, "__class__") and data.__class__.__name__ == structure_type:
|
544
561
|
return True
|
545
562
|
|
546
|
-
data_type =
|
563
|
+
data_type = structure_type.lower()
|
547
564
|
if isinstance(data, (str, Path, UPath)):
|
548
565
|
data_path = UPath(data)
|
549
566
|
|
@@ -559,13 +576,15 @@ def data_is_scversedatastructure(
|
|
559
576
|
if fsspec.utils.get_protocol(data_path.as_posix()) == "file":
|
560
577
|
return (
|
561
578
|
identify_zarr_type(
|
562
|
-
data_path if
|
563
|
-
check=True if
|
579
|
+
data_path if structure_type == "AnnData" else data,
|
580
|
+
check=True if structure_type == "AnnData" else False,
|
564
581
|
)
|
565
582
|
== data_type
|
566
583
|
)
|
567
584
|
else:
|
568
|
-
logger.warning(
|
585
|
+
logger.warning(
|
586
|
+
f"we do not check whether cloud zarr is {structure_type}"
|
587
|
+
)
|
569
588
|
return False
|
570
589
|
return False
|
571
590
|
|
@@ -957,8 +976,7 @@ def add_labels(
|
|
957
976
|
features_labels = {
|
958
977
|
registry_name: [(feature, label_record) for label_record in records]
|
959
978
|
}
|
960
|
-
|
961
|
-
self.features,
|
979
|
+
self.features._add_label_feature_links(
|
962
980
|
features_labels,
|
963
981
|
feature_ref_is_name=feature_ref_is_name,
|
964
982
|
label_ref_is_name=label_ref_is_name,
|
@@ -980,7 +998,9 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
980
998
|
key: `str | None = None` A path-like key to reference artifact in default storage, e.g., `"myfolder/myfile.fcs"`. Artifacts with the same key form a version family.
|
981
999
|
description: `str | None = None` A description.
|
982
1000
|
revises: `Artifact | None = None` Previous version of the artifact. Is an alternative way to passing `key` to trigger a new version.
|
983
|
-
|
1001
|
+
overwrite_versions: `bool | None = None` Whether to overwrite versions. Defaults to `True` for folders and `False` for files.
|
1002
|
+
run: `Run | bool | None = None` The run that creates the artifact. If `False`, surpress tracking the run.
|
1003
|
+
If `None`, infer the run from the global run context.
|
984
1004
|
|
985
1005
|
Examples:
|
986
1006
|
|
@@ -1090,47 +1110,54 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1090
1110
|
),
|
1091
1111
|
]
|
1092
1112
|
|
1113
|
+
_aux_fields: dict[str, tuple[str, type]] = {
|
1114
|
+
"0": ("_is_saved_to_storage_location", bool),
|
1115
|
+
}
|
1093
1116
|
_len_full_uid: int = 20
|
1094
1117
|
_len_stem_uid: int = 16
|
1095
1118
|
|
1096
|
-
|
1097
|
-
|
1119
|
+
@property
|
1120
|
+
def features(self) -> FeatureManager:
|
1121
|
+
"""Feature manager.
|
1122
|
+
|
1123
|
+
Typically, you annotate a dataset with features by defining a `Schema` and passing it to the `Artifact` constructor.
|
1098
1124
|
|
1099
|
-
|
1125
|
+
Here is how to do annotate an artifact ad hoc::
|
1100
1126
|
|
1101
|
-
|
1127
|
+
artifact.features.add_values({
|
1128
|
+
"species": organism, # here, organism is an Organism record
|
1129
|
+
"scientist": ['Barbara McClintock', 'Edgar Anderson'],
|
1130
|
+
"temperature": 27.6,
|
1131
|
+
"experiment": "Experiment 1"
|
1132
|
+
})
|
1102
1133
|
|
1103
|
-
|
1104
|
-
"species": organism, # here, organism is an Organism record
|
1105
|
-
"scientist": ['Barbara McClintock', 'Edgar Anderson'],
|
1106
|
-
"temperature": 27.6,
|
1107
|
-
"experiment": "Experiment 1"
|
1108
|
-
})
|
1134
|
+
Query artifacts by features::
|
1109
1135
|
|
1110
|
-
|
1136
|
+
ln.Artifact.filter(scientist="Barbara McClintock")
|
1111
1137
|
|
1112
|
-
|
1138
|
+
Features may or may not be part of the dataset, i.e., the artifact content in storage. For
|
1139
|
+
instance, the :class:`~lamindb.curators.DataFrameCurator` flow validates the columns of a
|
1140
|
+
`DataFrame`-like artifact and annotates it with features corresponding to
|
1141
|
+
these columns. `artifact.features.add_values`, by contrast, does not
|
1142
|
+
validate the content of the artifact.
|
1113
1143
|
|
1114
|
-
|
1115
|
-
instance, the :class:`~lamindb.curators.DataFrameCurator` flow validates the columns of a
|
1116
|
-
`DataFrame`-like artifact and annotates it with features corresponding to
|
1117
|
-
these columns. `artifact.features.add_values`, by contrast, does not
|
1118
|
-
validate the content of the artifact.
|
1144
|
+
.. dropdown:: An example for a model-like artifact
|
1119
1145
|
|
1120
|
-
|
1146
|
+
::
|
1121
1147
|
|
1122
|
-
|
1148
|
+
artifact.features.add_values({
|
1149
|
+
"hidden_size": 32,
|
1150
|
+
"bottleneck_size": 16,
|
1151
|
+
"batch_size": 32,
|
1152
|
+
"preprocess_params": {
|
1153
|
+
"normalization_type": "cool",
|
1154
|
+
"subset_highlyvariable": True,
|
1155
|
+
},
|
1156
|
+
})
|
1157
|
+
"""
|
1158
|
+
from ._feature_manager import FeatureManager
|
1123
1159
|
|
1124
|
-
|
1125
|
-
"hidden_size": 32,
|
1126
|
-
"bottleneck_size": 16,
|
1127
|
-
"batch_size": 32,
|
1128
|
-
"preprocess_params": {
|
1129
|
-
"normalization_type": "cool",
|
1130
|
-
"subset_highlyvariable": True,
|
1131
|
-
},
|
1132
|
-
})
|
1133
|
-
"""
|
1160
|
+
return FeatureManager(self)
|
1134
1161
|
|
1135
1162
|
@property
|
1136
1163
|
def labels(self) -> LabelManager:
|
@@ -1219,9 +1246,6 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1219
1246
|
"""Number of files for folder-like artifacts, `None` for file-like artifacts.
|
1220
1247
|
|
1221
1248
|
Note that some arrays are also stored as folders, e.g., `.zarr` or `.tiledbsoma`.
|
1222
|
-
|
1223
|
-
.. versionchanged:: 1.0
|
1224
|
-
Renamed from `n_objects` to `n_files`.
|
1225
1249
|
"""
|
1226
1250
|
n_observations: int | None = BigIntegerField(
|
1227
1251
|
null=True, db_index=True, default=None, editable=False
|
@@ -1289,14 +1313,15 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1289
1313
|
)
|
1290
1314
|
"""Creator of record."""
|
1291
1315
|
_overwrite_versions: bool = BooleanField(default=None)
|
1292
|
-
|
1293
|
-
|
1294
|
-
It defaults to False for file-like artifacts and to True for folder-like artifacts.
|
1295
|
-
"""
|
1316
|
+
# see corresponding property `overwrite_versions`
|
1296
1317
|
projects: Project
|
1297
|
-
"""
|
1318
|
+
"""Annotating projects."""
|
1298
1319
|
references: Reference
|
1299
|
-
"""
|
1320
|
+
"""Annotating references."""
|
1321
|
+
records: Record
|
1322
|
+
"""Annotating records."""
|
1323
|
+
linked_in_records: Record
|
1324
|
+
"""Linked in records."""
|
1300
1325
|
|
1301
1326
|
@overload
|
1302
1327
|
def __init__(
|
@@ -1313,7 +1338,8 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1313
1338
|
key: str | None = None,
|
1314
1339
|
description: str | None = None,
|
1315
1340
|
revises: Artifact | None = None,
|
1316
|
-
|
1341
|
+
overwrite_versions: bool | None = None,
|
1342
|
+
run: Run | False | None = None,
|
1317
1343
|
): ...
|
1318
1344
|
|
1319
1345
|
@overload
|
@@ -1327,28 +1353,23 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1327
1353
|
*args,
|
1328
1354
|
**kwargs,
|
1329
1355
|
):
|
1330
|
-
|
1331
|
-
# Below checks for the Django-internal call in from_db()
|
1332
|
-
# it'd be better if we could avoid this, but not being able to create a Artifact
|
1333
|
-
# from data with the default constructor renders the central class of the API
|
1334
|
-
# essentially useless
|
1335
|
-
# The danger below is not that a user might pass as many args (12 of it), but rather
|
1336
|
-
# that at some point the Django API might change; on the other hand, this
|
1337
|
-
# condition of for calling the constructor based on kwargs should always
|
1338
|
-
# stay robust
|
1356
|
+
# check whether we are called with db args
|
1339
1357
|
if len(args) == len(self._meta.concrete_fields):
|
1340
1358
|
super().__init__(*args, **kwargs)
|
1341
1359
|
return None
|
1342
|
-
# now
|
1360
|
+
# now proceed with the user-facing constructor
|
1343
1361
|
if len(args) > 1:
|
1344
1362
|
raise ValueError("Only one non-keyword arg allowed: data")
|
1345
1363
|
data: str | Path = kwargs.pop("data") if len(args) == 0 else args[0]
|
1346
1364
|
kind: str = kwargs.pop("kind", None)
|
1347
1365
|
key: str | None = kwargs.pop("key", None)
|
1366
|
+
run_id: int | None = kwargs.pop("run_id", None) # for REST API
|
1348
1367
|
run: Run | None = kwargs.pop("run", None)
|
1349
1368
|
description: str | None = kwargs.pop("description", None)
|
1350
1369
|
revises: Artifact | None = kwargs.pop("revises", None)
|
1370
|
+
overwrite_versions: bool | None = kwargs.pop("overwrite_versions", None)
|
1351
1371
|
version: str | None = kwargs.pop("version", None)
|
1372
|
+
branch_id: int | None = None
|
1352
1373
|
if "visibility" in kwargs: # backward compat
|
1353
1374
|
branch_id = kwargs.pop("visibility")
|
1354
1375
|
if "_branch_code" in kwargs: # backward compat
|
@@ -1357,6 +1378,9 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1357
1378
|
branch_id = kwargs.pop("branch_id")
|
1358
1379
|
else:
|
1359
1380
|
branch_id = 1
|
1381
|
+
branch = kwargs.pop("branch", None)
|
1382
|
+
space = kwargs.pop("space", None)
|
1383
|
+
space_id = kwargs.pop("space_id", 1)
|
1360
1384
|
format = kwargs.pop("format", None)
|
1361
1385
|
_is_internal_call = kwargs.pop("_is_internal_call", False)
|
1362
1386
|
skip_check_exists = kwargs.pop("skip_check_exists", False)
|
@@ -1369,6 +1393,10 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1369
1393
|
default_storage = setup_settings.instance.storage.record
|
1370
1394
|
using_key = kwargs.pop("using_key", None)
|
1371
1395
|
otype = kwargs.pop("otype") if "otype" in kwargs else None
|
1396
|
+
if isinstance(data, str) and data.startswith("s3:///"):
|
1397
|
+
# issue in Groovy / nf-lamin producing malformed S3 paths
|
1398
|
+
# https://laminlabs.slack.com/archives/C08J590666Q/p1751315027830849?thread_ts=1751039961.479259&cid=C08J590666Q
|
1399
|
+
data = data.replace("s3:///", "s3://")
|
1372
1400
|
otype = _check_otype_artifact(data=data, otype=otype)
|
1373
1401
|
if "type" in kwargs:
|
1374
1402
|
logger.warning("`type` will be removed soon, please use `kind`")
|
@@ -1413,6 +1441,7 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1413
1441
|
default_storage=default_storage,
|
1414
1442
|
using_key=using_key,
|
1415
1443
|
skip_check_exists=skip_check_exists,
|
1444
|
+
overwrite_versions=overwrite_versions,
|
1416
1445
|
)
|
1417
1446
|
|
1418
1447
|
# an object with the same hash already exists
|
@@ -1462,10 +1491,15 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1462
1491
|
if revises is not None and revises.key is not None and kwargs["key"] is None:
|
1463
1492
|
kwargs["key"] = revises.key
|
1464
1493
|
|
1494
|
+
if run_id is not None:
|
1495
|
+
kwargs["run_id"] = run_id
|
1465
1496
|
kwargs["kind"] = kind
|
1466
1497
|
kwargs["version"] = version
|
1467
1498
|
kwargs["description"] = description
|
1499
|
+
kwargs["branch"] = branch
|
1468
1500
|
kwargs["branch_id"] = branch_id
|
1501
|
+
kwargs["space"] = space
|
1502
|
+
kwargs["space_id"] = space_id
|
1469
1503
|
kwargs["otype"] = otype
|
1470
1504
|
kwargs["revises"] = revises
|
1471
1505
|
# this check needs to come down here because key might be populated from an
|
@@ -1504,6 +1538,18 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1504
1538
|
def n_objects(self) -> int:
|
1505
1539
|
return self.n_files
|
1506
1540
|
|
1541
|
+
@property
|
1542
|
+
def overwrite_versions(self) -> bool:
|
1543
|
+
"""Indicates whether to keep or overwrite versions.
|
1544
|
+
|
1545
|
+
It defaults to `False` for file-like artifacts and to `True` for folder-like artifacts.
|
1546
|
+
|
1547
|
+
Note that this requires significant storage space for large folders with
|
1548
|
+
many duplicated files. Currently, `lamindb` does *not* de-duplicate files across
|
1549
|
+
versions as in git, but keeps all files for all versions of the folder in storage.
|
1550
|
+
"""
|
1551
|
+
return self._overwrite_versions
|
1552
|
+
|
1507
1553
|
@property
|
1508
1554
|
def path(self) -> Path:
|
1509
1555
|
"""Path.
|
@@ -1605,13 +1651,17 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1605
1651
|
keys_normalized = [key.split("__")[0] for key in expressions]
|
1606
1652
|
field_or_feature_or_param = keys_normalized[0].split("__")[0]
|
1607
1653
|
if field_or_feature_or_param in Artifact.__get_available_fields__():
|
1608
|
-
|
1654
|
+
qs = QuerySet(model=cls).filter(*queries, **expressions)
|
1655
|
+
if not any(e.startswith("kind") for e in expressions):
|
1656
|
+
return qs.exclude(kind="__lamindb_run__")
|
1657
|
+
else:
|
1658
|
+
return qs
|
1609
1659
|
elif all(
|
1610
1660
|
features_validated := Feature.validate(
|
1611
1661
|
keys_normalized, field="name", mute=True
|
1612
1662
|
)
|
1613
1663
|
):
|
1614
|
-
return filter_base(
|
1664
|
+
return filter_base(Artifact, **expressions)
|
1615
1665
|
else:
|
1616
1666
|
features = ", ".join(
|
1617
1667
|
sorted(np.array(keys_normalized)[~features_validated])
|
@@ -1626,7 +1676,11 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1626
1676
|
f"Or fix invalid {message}"
|
1627
1677
|
)
|
1628
1678
|
else:
|
1629
|
-
return
|
1679
|
+
return (
|
1680
|
+
QuerySet(model=cls)
|
1681
|
+
.filter(*queries, **expressions)
|
1682
|
+
.exclude(kind="__lamindb_run__")
|
1683
|
+
)
|
1630
1684
|
|
1631
1685
|
@classmethod
|
1632
1686
|
def from_df(
|
@@ -1696,6 +1750,13 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1696
1750
|
if schema is not None:
|
1697
1751
|
from ..curators import DataFrameCurator
|
1698
1752
|
|
1753
|
+
if not artifact._state.adding and artifact.suffix != ".parquet":
|
1754
|
+
logger.warning(
|
1755
|
+
f"not re-validating existing artifact as it was stored as {artifact.suffix}, "
|
1756
|
+
"which does not maintain categorical dtype information"
|
1757
|
+
)
|
1758
|
+
return artifact
|
1759
|
+
|
1699
1760
|
curator = DataFrameCurator(artifact, schema)
|
1700
1761
|
curator.validate()
|
1701
1762
|
artifact.schema = schema
|
@@ -1726,7 +1787,6 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1726
1787
|
schema: A schema that defines how to validate & annotate.
|
1727
1788
|
|
1728
1789
|
See Also:
|
1729
|
-
|
1730
1790
|
:meth:`~lamindb.Collection`
|
1731
1791
|
Track collections.
|
1732
1792
|
:class:`~lamindb.Feature`
|
@@ -1933,12 +1993,11 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1933
1993
|
revises: Artifact | None = None,
|
1934
1994
|
**kwargs,
|
1935
1995
|
) -> Artifact:
|
1936
|
-
"""Create from a tiledbsoma store.
|
1996
|
+
"""Create from a `tiledbsoma.Experiment` store.
|
1937
1997
|
|
1938
1998
|
Args:
|
1939
|
-
|
1940
|
-
key: A relative path within default storage,
|
1941
|
-
e.g., `"myfolder/mystore.tiledbsoma"`.
|
1999
|
+
exp: TileDB-SOMA Experiment object or path to Experiment store.
|
2000
|
+
key: A relative path within default storage, e.g., `"myfolder/mystore.tiledbsoma"`.
|
1942
2001
|
description: A description.
|
1943
2002
|
revises: An old version of the artifact.
|
1944
2003
|
run: The run that creates the artifact.
|
@@ -1953,7 +2012,11 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1953
2012
|
raise ValueError(
|
1954
2013
|
"data has to be a SOMA Experiment object or a path to SOMA Experiment store."
|
1955
2014
|
)
|
2015
|
+
|
2016
|
+
# SOMAExperiment.uri may have file:// prefix for local paths which needs stripping for filesystem access.
|
2017
|
+
# Other URI schemes (s3://, etc.) are preserved and supported.
|
1956
2018
|
exp = exp.uri.removeprefix("file://") if not isinstance(exp, UPathStr) else exp
|
2019
|
+
|
1957
2020
|
artifact = Artifact( # type: ignore
|
1958
2021
|
data=exp,
|
1959
2022
|
key=key,
|
@@ -1975,7 +2038,7 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1975
2038
|
key: str | None = None,
|
1976
2039
|
run: Run | None = None,
|
1977
2040
|
) -> list[Artifact]:
|
1978
|
-
"""Create a list of
|
2041
|
+
"""Create a list of :class:`~lamindb.Artifact` objects from a directory.
|
1979
2042
|
|
1980
2043
|
Hint:
|
1981
2044
|
If you have a high number of files (several 100k) and don't want to
|
@@ -2476,8 +2539,6 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
2476
2539
|
will not delete the underlying storage by default (if `storage=True` is not specified).
|
2477
2540
|
Deleting the latest version will delete all the versions for folder artifacts.
|
2478
2541
|
|
2479
|
-
FAQ: :doc:`docs:faq/storage`
|
2480
|
-
|
2481
2542
|
Args:
|
2482
2543
|
permanent: Permanently delete the artifact (skip trash).
|
2483
2544
|
storage: Indicate whether you want to delete the artifact in storage.
|
@@ -2585,18 +2646,56 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
2585
2646
|
if delete_msg != "did-not-delete":
|
2586
2647
|
logger.success(f"deleted {colors.yellow(f'{path}')}")
|
2587
2648
|
|
2588
|
-
|
2649
|
+
@property
|
2650
|
+
def _is_saved_to_storage_location(self) -> bool | None:
|
2651
|
+
"""Indicates whether this artifact was correctly written to its storage.
|
2652
|
+
|
2653
|
+
This is meaningful only after calling `.save()`.
|
2654
|
+
|
2655
|
+
`None` means no writing was necessary, `True` - that it was written correctly.
|
2656
|
+
`False` shows that there was a problem with writing.
|
2657
|
+
"""
|
2658
|
+
if self._aux is not None:
|
2659
|
+
return self._aux.get("af", {}).get("0", None)
|
2660
|
+
else:
|
2661
|
+
return None
|
2662
|
+
|
2663
|
+
@_is_saved_to_storage_location.setter
|
2664
|
+
def _is_saved_to_storage_location(self, value: bool) -> None:
|
2665
|
+
self._aux = self._aux or {}
|
2666
|
+
self._aux.setdefault("af", {})["0"] = value
|
2667
|
+
|
2668
|
+
def save(
|
2669
|
+
self,
|
2670
|
+
upload: bool | None = None,
|
2671
|
+
transfer: Literal["record", "annotations"] = "record",
|
2672
|
+
**kwargs,
|
2673
|
+
) -> Artifact:
|
2589
2674
|
"""Save to database & storage.
|
2590
2675
|
|
2591
2676
|
Args:
|
2592
2677
|
upload: Trigger upload to cloud storage in instances with hybrid storage mode.
|
2678
|
+
transfer: In case artifact was queried on a different instance, dictates behavior of transfer.
|
2679
|
+
If "record", only the artifact record is transferred to the current instance.
|
2680
|
+
If "annotations", also the annotations linked in the source instance are transferred.
|
2593
2681
|
|
2594
|
-
|
2682
|
+
See Also:
|
2683
|
+
:doc:`transfer`
|
2595
2684
|
|
2596
|
-
|
2685
|
+
Example:
|
2686
|
+
|
2687
|
+
::
|
2688
|
+
|
2689
|
+
import lamindb as ln
|
2597
2690
|
|
2598
|
-
|
2691
|
+
artifact = ln.Artifact("./myfile.csv", key="myfile.parquet").save()
|
2599
2692
|
"""
|
2693
|
+
if transfer not in {"record", "annotations"}:
|
2694
|
+
raise ValueError(
|
2695
|
+
f"transfer should be either 'record' or 'annotations', not {transfer}"
|
2696
|
+
)
|
2697
|
+
else:
|
2698
|
+
kwargs["transfer"] = transfer
|
2600
2699
|
state_was_adding = self._state.adding
|
2601
2700
|
print_progress = kwargs.pop("print_progress", True)
|
2602
2701
|
store_kwargs = kwargs.pop(
|
@@ -2615,9 +2714,16 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
2615
2714
|
# ensure that the artifact is uploaded
|
2616
2715
|
self._to_store = True
|
2617
2716
|
|
2618
|
-
|
2717
|
+
# _is_saved_to_storage_location indicates whether the saving / upload process is successful
|
2718
|
+
flag_complete = hasattr(self, "_local_filepath") and getattr(
|
2719
|
+
self, "_to_store", False
|
2720
|
+
)
|
2721
|
+
if flag_complete:
|
2722
|
+
self._is_saved_to_storage_location = (
|
2723
|
+
False # will be updated to True at the end
|
2724
|
+
)
|
2619
2725
|
|
2620
|
-
|
2726
|
+
self._save_skip_storage(**kwargs)
|
2621
2727
|
|
2622
2728
|
using_key = None
|
2623
2729
|
if "using" in kwargs:
|
@@ -2645,9 +2751,17 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
2645
2751
|
using_key=using_key,
|
2646
2752
|
)
|
2647
2753
|
if exception_upload is not None:
|
2648
|
-
raise
|
2754
|
+
raise exception_upload
|
2649
2755
|
if exception_clear is not None:
|
2650
|
-
raise
|
2756
|
+
raise exception_clear
|
2757
|
+
# the saving / upload process has been successfull, just mark it as such
|
2758
|
+
# maybe some error handling here?
|
2759
|
+
if flag_complete:
|
2760
|
+
self._is_saved_to_storage_location = True
|
2761
|
+
# pass kwargs here because it can contain `using` or other things
|
2762
|
+
# affecting the connection
|
2763
|
+
super().save(**kwargs)
|
2764
|
+
|
2651
2765
|
# this is only for keep_artifacts_local
|
2652
2766
|
if local_path is not None and not state_was_adding:
|
2653
2767
|
# only move the local artifact to cache if it was not newly created
|
@@ -2662,6 +2776,7 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
2662
2776
|
if hasattr(self, "_curator"):
|
2663
2777
|
curator = self._curator
|
2664
2778
|
delattr(self, "_curator")
|
2779
|
+
# just annotates this artifact
|
2665
2780
|
curator.save_artifact()
|
2666
2781
|
return self
|
2667
2782
|
|