lamindb 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +14 -5
- lamindb/_artifact.py +174 -57
- lamindb/_can_curate.py +27 -8
- lamindb/_collection.py +85 -51
- lamindb/_feature.py +177 -41
- lamindb/_finish.py +222 -81
- lamindb/_from_values.py +83 -98
- lamindb/_parents.py +4 -4
- lamindb/_query_set.py +59 -17
- lamindb/_record.py +171 -53
- lamindb/_run.py +4 -4
- lamindb/_save.py +33 -10
- lamindb/_schema.py +135 -38
- lamindb/_storage.py +1 -1
- lamindb/_tracked.py +106 -0
- lamindb/_transform.py +21 -8
- lamindb/_ulabel.py +5 -14
- lamindb/base/validation.py +2 -6
- lamindb/core/__init__.py +13 -14
- lamindb/core/_context.py +39 -36
- lamindb/core/_data.py +29 -25
- lamindb/core/_describe.py +1 -1
- lamindb/core/_django.py +1 -1
- lamindb/core/_feature_manager.py +54 -44
- lamindb/core/_label_manager.py +4 -4
- lamindb/core/_mapped_collection.py +20 -7
- lamindb/core/datasets/__init__.py +6 -1
- lamindb/core/datasets/_core.py +12 -11
- lamindb/core/datasets/_small.py +66 -20
- lamindb/core/exceptions.py +1 -90
- lamindb/core/loaders.py +7 -13
- lamindb/core/relations.py +6 -4
- lamindb/core/storage/_anndata_accessor.py +41 -0
- lamindb/core/storage/_backed_access.py +2 -2
- lamindb/core/storage/_pyarrow_dataset.py +25 -15
- lamindb/core/storage/_tiledbsoma.py +56 -12
- lamindb/core/storage/paths.py +41 -22
- lamindb/core/subsettings/_creation_settings.py +4 -16
- lamindb/curators/__init__.py +2168 -833
- lamindb/curators/_cellxgene_schemas/__init__.py +26 -0
- lamindb/curators/_cellxgene_schemas/schema_versions.yml +104 -0
- lamindb/errors.py +96 -0
- lamindb/integrations/_vitessce.py +3 -3
- lamindb/migrations/0069_squashed.py +76 -75
- lamindb/migrations/0075_lamindbv1_part5.py +4 -5
- lamindb/migrations/0082_alter_feature_dtype.py +21 -0
- lamindb/migrations/0083_alter_feature_is_type_alter_flextable_is_type_and_more.py +94 -0
- lamindb/migrations/0084_alter_schemafeature_feature_and_more.py +35 -0
- lamindb/migrations/0085_alter_feature_is_type_alter_flextable_is_type_and_more.py +63 -0
- lamindb/migrations/0086_various.py +95 -0
- lamindb/migrations/0087_rename__schemas_m2m_artifact_feature_sets_and_more.py +41 -0
- lamindb/migrations/0088_schema_components.py +273 -0
- lamindb/migrations/0088_squashed.py +4372 -0
- lamindb/models.py +423 -156
- {lamindb-1.0.4.dist-info → lamindb-1.1.0.dist-info}/METADATA +10 -7
- lamindb-1.1.0.dist-info/RECORD +95 -0
- lamindb/curators/_spatial.py +0 -528
- lamindb/migrations/0052_squashed.py +0 -1261
- lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +0 -57
- lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +0 -35
- lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +0 -61
- lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +0 -22
- lamindb/migrations/0057_link_models_latest_report_and_others.py +0 -356
- lamindb/migrations/0058_artifact__actions_collection__actions.py +0 -22
- lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -31
- lamindb/migrations/0060_alter_artifact__actions.py +0 -22
- lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +0 -45
- lamindb/migrations/0062_add_is_latest_field.py +0 -32
- lamindb/migrations/0063_populate_latest_field.py +0 -45
- lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +0 -33
- lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +0 -22
- lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +0 -352
- lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +0 -20
- lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +0 -20
- lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -1294
- lamindb-1.0.4.dist-info/RECORD +0 -102
- {lamindb-1.0.4.dist-info → lamindb-1.1.0.dist-info}/LICENSE +0 -0
- {lamindb-1.0.4.dist-info → lamindb-1.1.0.dist-info}/WHEEL +0 -0
lamindb/_record.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import builtins
|
4
|
+
import inspect
|
4
5
|
import re
|
5
6
|
from functools import reduce
|
6
7
|
from pathlib import PurePosixPath
|
@@ -29,6 +30,7 @@ from django.db.models.lookups import (
|
|
29
30
|
Regex,
|
30
31
|
StartsWith,
|
31
32
|
)
|
33
|
+
from django.db.utils import IntegrityError
|
32
34
|
from lamin_utils import colors, logger
|
33
35
|
from lamin_utils._lookup import Lookup
|
34
36
|
from lamindb_setup._connect_instance import (
|
@@ -41,7 +43,7 @@ from lamindb_setup.core._hub_core import connect_instance_hub
|
|
41
43
|
from lamindb_setup.core._settings_store import instance_settings_file
|
42
44
|
from lamindb_setup.core.upath import extract_suffix_from_path
|
43
45
|
|
44
|
-
from lamindb.
|
46
|
+
from lamindb.errors import FieldValidationError
|
45
47
|
from lamindb.models import (
|
46
48
|
Artifact,
|
47
49
|
BasicRecord,
|
@@ -60,7 +62,7 @@ from lamindb.models import (
|
|
60
62
|
|
61
63
|
from ._utils import attach_func_to_class_method
|
62
64
|
from .core._settings import settings
|
63
|
-
from .
|
65
|
+
from .errors import (
|
64
66
|
InvalidArgument,
|
65
67
|
RecordNameChangeIntegrityError,
|
66
68
|
ValidationError,
|
@@ -75,6 +77,30 @@ if TYPE_CHECKING:
|
|
75
77
|
IPYTHON = getattr(builtins, "__IPYTHON__", False)
|
76
78
|
|
77
79
|
|
80
|
+
def is_approx_pascal_case(s):
|
81
|
+
"""Check if the last component of a dotted string is in PascalCase.
|
82
|
+
|
83
|
+
Args:
|
84
|
+
s (str): The string to check
|
85
|
+
|
86
|
+
Returns:
|
87
|
+
bool: True if the last component is in PascalCase
|
88
|
+
|
89
|
+
Raises:
|
90
|
+
ValueError: If the last component doesn't start with a capital letter
|
91
|
+
"""
|
92
|
+
if "[" in s: # this is because we allow types of form 'script[test_script.py]'
|
93
|
+
return True
|
94
|
+
last_component = s.split(".")[-1]
|
95
|
+
|
96
|
+
if not last_component[0].isupper():
|
97
|
+
raise ValueError(
|
98
|
+
f"'{last_component}' should start with a capital letter given you're defining a type"
|
99
|
+
)
|
100
|
+
|
101
|
+
return True
|
102
|
+
|
103
|
+
|
78
104
|
def init_self_from_db(self: Record, existing_record: Record):
|
79
105
|
new_args = [
|
80
106
|
getattr(existing_record, field.attname) for field in self._meta.concrete_fields
|
@@ -86,7 +112,12 @@ def init_self_from_db(self: Record, existing_record: Record):
|
|
86
112
|
|
87
113
|
def update_attributes(record: Record, attributes: dict[str, str]):
|
88
114
|
for key, value in attributes.items():
|
89
|
-
if
|
115
|
+
if (
|
116
|
+
getattr(record, key) != value
|
117
|
+
and value is not None
|
118
|
+
and key != "dtype"
|
119
|
+
and key != "_aux"
|
120
|
+
):
|
90
121
|
logger.warning(f"updated {key} from {getattr(record, key)} to {value}")
|
91
122
|
setattr(record, key, value)
|
92
123
|
|
@@ -105,7 +136,7 @@ def validate_fields(record: Record, kwargs):
|
|
105
136
|
k for k, v in kwargs.items() if v is None and k in required_fields
|
106
137
|
]
|
107
138
|
if missing_fields:
|
108
|
-
raise
|
139
|
+
raise FieldValidationError(f"{missing_fields} are required.")
|
109
140
|
# ensure the exact length of the internal uid for core entities
|
110
141
|
if "uid" in kwargs and record.__class__ in {
|
111
142
|
Artifact,
|
@@ -122,19 +153,36 @@ def validate_fields(record: Record, kwargs):
|
|
122
153
|
).max_length # triggers FieldDoesNotExist
|
123
154
|
if len(kwargs["uid"]) != uid_max_length: # triggers KeyError
|
124
155
|
raise ValidationError(
|
125
|
-
f
|
156
|
+
f"`uid` must be exactly {uid_max_length} characters long, got {len(kwargs['uid'])}."
|
157
|
+
)
|
158
|
+
# validate is_type
|
159
|
+
if "is_type" in kwargs and "name" in kwargs and kwargs["is_type"]:
|
160
|
+
if kwargs["name"].endswith("s"):
|
161
|
+
logger.warning(
|
162
|
+
f"name '{kwargs['name']}' for type ends with 's', in case you're naming with plural, consider the singular for a type name"
|
126
163
|
)
|
164
|
+
is_approx_pascal_case(kwargs["name"])
|
127
165
|
# validate literals
|
128
166
|
validate_literal_fields(record, kwargs)
|
129
167
|
|
130
168
|
|
131
|
-
def suggest_records_with_similar_names(
|
169
|
+
def suggest_records_with_similar_names(
|
170
|
+
record: Record, name_field: str, kwargs
|
171
|
+
) -> Record | None:
|
132
172
|
"""Returns True if found exact match, otherwise False.
|
133
173
|
|
134
174
|
Logs similar matches if found.
|
135
175
|
"""
|
136
176
|
if kwargs.get(name_field) is None or not isinstance(kwargs.get(name_field), str):
|
137
|
-
return
|
177
|
+
return None
|
178
|
+
# need to perform an additional request to find the exact match
|
179
|
+
# previously, this was inferred from the truncated/fuzzy search below
|
180
|
+
# but this isn't reliable: https://laminlabs.slack.com/archives/C04FPE8V01W/p1737812808563409
|
181
|
+
# the below needs to be .first() because there might be multiple records with the same
|
182
|
+
# name field in case the record is versioned (e.g. for Transform key)
|
183
|
+
exact_match = record.__class__.filter(**{name_field: kwargs[name_field]}).first()
|
184
|
+
if exact_match is not None:
|
185
|
+
return exact_match
|
138
186
|
queryset = _search(
|
139
187
|
record.__class__,
|
140
188
|
kwargs[name_field],
|
@@ -143,10 +191,7 @@ def suggest_records_with_similar_names(record: Record, name_field: str, kwargs)
|
|
143
191
|
limit=3,
|
144
192
|
)
|
145
193
|
if not queryset.exists(): # empty queryset
|
146
|
-
return
|
147
|
-
for alternative_record in queryset:
|
148
|
-
if getattr(alternative_record, name_field) == kwargs[name_field]:
|
149
|
-
return True
|
194
|
+
return None
|
150
195
|
s, it, nots = ("", "it", "s") if len(queryset) == 1 else ("s", "one of them", "")
|
151
196
|
msg = f"record{s} with similar {name_field}{s} exist{nots}! did you mean to load {it}?"
|
152
197
|
if IPYTHON:
|
@@ -157,11 +202,14 @@ def suggest_records_with_similar_names(record: Record, name_field: str, kwargs)
|
|
157
202
|
display(queryset.df())
|
158
203
|
else:
|
159
204
|
logger.warning(f"{msg}\n{queryset}")
|
160
|
-
return
|
205
|
+
return None
|
161
206
|
|
162
207
|
|
163
208
|
def __init__(record: Record, *args, **kwargs):
|
164
|
-
|
209
|
+
skip_validation = kwargs.pop("_skip_validation", False)
|
210
|
+
if not args and skip_validation:
|
211
|
+
super(BasicRecord, record).__init__(**kwargs)
|
212
|
+
elif not args and not skip_validation:
|
165
213
|
validate_fields(record, kwargs)
|
166
214
|
|
167
215
|
# do not search for names if an id is passed; this is important
|
@@ -170,15 +218,13 @@ def __init__(record: Record, *args, **kwargs):
|
|
170
218
|
if "_has_consciously_provided_uid" in kwargs:
|
171
219
|
has_consciously_provided_uid = kwargs.pop("_has_consciously_provided_uid")
|
172
220
|
if (
|
173
|
-
isinstance(
|
174
|
-
record, (CanCurate, Collection, Transform)
|
175
|
-
) # Collection is only temporary because it'll get a key field
|
221
|
+
isinstance(record, (CanCurate, Collection, Transform))
|
176
222
|
and settings.creation.search_names
|
177
223
|
and not has_consciously_provided_uid
|
178
224
|
):
|
179
225
|
name_field = getattr(record, "_name_field", "name")
|
180
|
-
|
181
|
-
if
|
226
|
+
exact_match = suggest_records_with_similar_names(record, name_field, kwargs)
|
227
|
+
if exact_match is not None:
|
182
228
|
if "version" in kwargs:
|
183
229
|
if kwargs["version"] is not None:
|
184
230
|
version_comment = " and version"
|
@@ -189,22 +235,26 @@ def __init__(record: Record, *args, **kwargs):
|
|
189
235
|
}
|
190
236
|
).one_or_none()
|
191
237
|
else:
|
192
|
-
# for a versioned record, an exact name match is not a
|
193
|
-
#
|
194
|
-
#
|
195
|
-
# same name
|
238
|
+
# for a versioned record, an exact name match is not a criterion
|
239
|
+
# for retrieving a record in case `version` isn't passed -
|
240
|
+
# we'd always pull out many records with exactly the same name
|
196
241
|
existing_record = None
|
197
242
|
else:
|
198
243
|
version_comment = ""
|
199
|
-
existing_record =
|
200
|
-
**{name_field: kwargs[name_field]}
|
201
|
-
).one_or_none()
|
244
|
+
existing_record = exact_match
|
202
245
|
if existing_record is not None:
|
203
246
|
logger.important(
|
204
247
|
f"returning existing {record.__class__.__name__} record with same"
|
205
248
|
f" {name_field}{version_comment}: '{kwargs[name_field]}'"
|
206
249
|
)
|
250
|
+
if isinstance(record, Schema):
|
251
|
+
if Artifact.filter(schema=record).exists():
|
252
|
+
if record.hash != kwargs["hash"]:
|
253
|
+
raise ValueError(
|
254
|
+
"Schema is already in use, can't be changed."
|
255
|
+
)
|
207
256
|
init_self_from_db(record, existing_record)
|
257
|
+
update_attributes(record, kwargs)
|
208
258
|
return None
|
209
259
|
super(BasicRecord, record).__init__(**kwargs)
|
210
260
|
if isinstance(record, ValidateFields):
|
@@ -218,7 +268,9 @@ def __init__(record: Record, *args, **kwargs):
|
|
218
268
|
message = _format_django_validation_error(record, e)
|
219
269
|
raise FieldValidationError(message) from e
|
220
270
|
elif len(args) != len(record._meta.concrete_fields):
|
221
|
-
raise
|
271
|
+
raise FieldValidationError(
|
272
|
+
f"Use keyword arguments instead of positional arguments, e.g.: {record.__class__.__name__}(name='...')."
|
273
|
+
)
|
222
274
|
else:
|
223
275
|
# object is loaded from DB (**kwargs could be omitted below, I believe)
|
224
276
|
super(BasicRecord, record).__init__(*args, **kwargs)
|
@@ -257,6 +309,60 @@ def _format_django_validation_error(record: Record, e: DjangoValidationError):
|
|
257
309
|
return message
|
258
310
|
|
259
311
|
|
312
|
+
def _get_record_kwargs(record_class) -> list[tuple[str, str]]:
|
313
|
+
"""Gets the parameters of a Record from the overloaded signature.
|
314
|
+
|
315
|
+
Example:
|
316
|
+
>>> get_record_params(bt.Organism)
|
317
|
+
>>> [('name', 'str'), ('taxon_id', 'str | None'), ('scientific_name', 'str | None')]
|
318
|
+
"""
|
319
|
+
source = inspect.getsource(record_class)
|
320
|
+
|
321
|
+
# Find first overload that's not *db_args
|
322
|
+
pattern = r"@overload\s+def __init__\s*\(([\s\S]*?)\):\s*\.{3}"
|
323
|
+
overloads = re.finditer(pattern, source)
|
324
|
+
|
325
|
+
for overload in overloads:
|
326
|
+
params_block = overload.group(1)
|
327
|
+
# This is an additional safety measure if the overloaded signature that we're
|
328
|
+
# looking for is not at the top but a "db_args" constructor
|
329
|
+
if "*db_args" in params_block:
|
330
|
+
continue
|
331
|
+
|
332
|
+
params = []
|
333
|
+
for line in params_block.split("\n"):
|
334
|
+
line = line.strip()
|
335
|
+
if not line or "self" in line:
|
336
|
+
continue
|
337
|
+
|
338
|
+
# Extract name and type annotation
|
339
|
+
# The regex pattern finds parameter definitions like:
|
340
|
+
# Simple: name: str
|
341
|
+
# With default: age: int = 0
|
342
|
+
# With complex types: items: List[str] = []
|
343
|
+
param_pattern = (
|
344
|
+
r"(\w+)" # Parameter name
|
345
|
+
r"\s*:\s*" # Colon with optional whitespace
|
346
|
+
r"((?:[^=,]|" # Type hint: either non-equals/comma chars
|
347
|
+
r"(?<=\[)[^[\]]*" # or contents within square brackets
|
348
|
+
r"(?=\]))+)" # looking ahead for closing bracket
|
349
|
+
r"(?:\s*=\s*" # Optional default value part
|
350
|
+
r"([^,]+))?" # Default value: anything but comma
|
351
|
+
)
|
352
|
+
match = re.match(param_pattern, line)
|
353
|
+
if not match:
|
354
|
+
continue
|
355
|
+
|
356
|
+
name, type_str = match.group(1), match.group(2).strip()
|
357
|
+
|
358
|
+
# Keep type as string instead of evaluating
|
359
|
+
params.append((name, type_str))
|
360
|
+
|
361
|
+
return params
|
362
|
+
|
363
|
+
return []
|
364
|
+
|
365
|
+
|
260
366
|
@classmethod # type:ignore
|
261
367
|
@doc_args(Record.filter.__doc__)
|
262
368
|
def filter(cls, *queries, **expressions) -> QuerySet:
|
@@ -639,8 +745,8 @@ def get_transfer_run(record) -> Run:
|
|
639
745
|
if transform is None:
|
640
746
|
search_names = settings.creation.search_names
|
641
747
|
settings.creation.search_names = False
|
642
|
-
transform = Transform(
|
643
|
-
uid=uid,
|
748
|
+
transform = Transform( # type: ignore
|
749
|
+
uid=uid, description=f"Transfer from `{slug}`", key=key, type="function"
|
644
750
|
).save()
|
645
751
|
settings.creation.search_names = search_names
|
646
752
|
# use the global run context to get the initiated_by_run run id
|
@@ -655,7 +761,7 @@ def get_transfer_run(record) -> Run:
|
|
655
761
|
transform=transform, initiated_by_run=initiated_by_run
|
656
762
|
).one_or_none()
|
657
763
|
if run is None:
|
658
|
-
run = Run(transform=transform, initiated_by_run=initiated_by_run).save()
|
764
|
+
run = Run(transform=transform, initiated_by_run=initiated_by_run).save() # type: ignore
|
659
765
|
run.initiated_by_run = initiated_by_run # so that it's available in memory
|
660
766
|
return run
|
661
767
|
|
@@ -738,28 +844,40 @@ def save(self, *args, **kwargs) -> Record:
|
|
738
844
|
if pre_existing_record is not None:
|
739
845
|
init_self_from_db(self, pre_existing_record)
|
740
846
|
else:
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
revises
|
748
|
-
revises.
|
749
|
-
|
750
|
-
|
847
|
+
check_key_change(self)
|
848
|
+
check_name_change(self)
|
849
|
+
try:
|
850
|
+
# save versioned record in presence of self._revises
|
851
|
+
if isinstance(self, IsVersioned) and self._revises is not None:
|
852
|
+
assert self._revises.is_latest # noqa: S101
|
853
|
+
revises = self._revises
|
854
|
+
revises.is_latest = False
|
855
|
+
with transaction.atomic():
|
856
|
+
revises._revises = None # ensure we don't start a recursion
|
857
|
+
revises.save()
|
858
|
+
super(BasicRecord, self).save(*args, **kwargs) # type: ignore
|
859
|
+
self._revises = None
|
860
|
+
# save unversioned record
|
861
|
+
else:
|
751
862
|
super(BasicRecord, self).save(*args, **kwargs)
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
863
|
+
except IntegrityError as e:
|
864
|
+
error_msg = str(e)
|
865
|
+
# two possible error messages for hash duplication
|
866
|
+
# "duplicate key value violates unique constraint"
|
867
|
+
# "UNIQUE constraint failed"
|
868
|
+
if (
|
869
|
+
"UNIQUE constraint failed" in error_msg
|
870
|
+
or "duplicate key value violates unique constraint" in error_msg
|
871
|
+
) and "hash" in error_msg:
|
872
|
+
pre_existing_record = self.__class__.get(hash=self.hash)
|
873
|
+
logger.warning(
|
874
|
+
f"returning {self.__class__.__name__.lower()} with same hash: {pre_existing_record}"
|
875
|
+
)
|
876
|
+
init_self_from_db(self, pre_existing_record)
|
877
|
+
else:
|
878
|
+
raise
|
879
|
+
_store_record_old_name(self)
|
880
|
+
_store_record_old_key(self)
|
763
881
|
# perform transfer of many-to-many fields
|
764
882
|
# only supported for Artifact and Collection records
|
765
883
|
if db is not None and db != "default" and using_key is None:
|
@@ -778,7 +896,7 @@ def save(self, *args, **kwargs) -> Record:
|
|
778
896
|
self_on_db = copy(self)
|
779
897
|
self_on_db._state.db = db
|
780
898
|
self_on_db.pk = pk_on_db # manually set the primary key
|
781
|
-
self_on_db.features = FeatureManager(self_on_db)
|
899
|
+
self_on_db.features = FeatureManager(self_on_db) # type: ignore
|
782
900
|
self.features._add_from(self_on_db, transfer_logs=transfer_logs)
|
783
901
|
self.labels.add_from(self_on_db, transfer_logs=transfer_logs)
|
784
902
|
for k, v in transfer_logs.items():
|
@@ -851,7 +969,7 @@ def check_name_change(record: Record):
|
|
851
969
|
# when a feature is renamed
|
852
970
|
elif isinstance(record, Feature):
|
853
971
|
# only internal features are associated with schemas
|
854
|
-
linked_artifacts = Artifact.filter(
|
972
|
+
linked_artifacts = Artifact.filter(feature_sets__features=record).list(
|
855
973
|
"uid"
|
856
974
|
)
|
857
975
|
n = len(linked_artifacts)
|
@@ -915,7 +1033,7 @@ def delete(self) -> None:
|
|
915
1033
|
new_latest.is_latest = True
|
916
1034
|
with transaction.atomic():
|
917
1035
|
new_latest.save()
|
918
|
-
super(BasicRecord, self).delete()
|
1036
|
+
super(BasicRecord, self).delete() # type: ignore
|
919
1037
|
logger.warning(f"new latest version is {new_latest}")
|
920
1038
|
return None
|
921
1039
|
super(BasicRecord, self).delete()
|
lamindb/_run.py
CHANGED
@@ -4,7 +4,7 @@ from lamindb.models import ParamManager, Run, Transform
|
|
4
4
|
|
5
5
|
|
6
6
|
def __init__(run: Run, *args, **kwargs):
|
7
|
-
run.params = ParamManager(run)
|
7
|
+
run.params = ParamManager(run) # type: ignore
|
8
8
|
if len(args) == len(run._meta.concrete_fields):
|
9
9
|
super(Run, run).__init__(*args, **kwargs)
|
10
10
|
return None
|
@@ -24,7 +24,7 @@ def __init__(run: Run, *args, **kwargs):
|
|
24
24
|
if transform._state.adding:
|
25
25
|
raise ValueError("Please save transform record before creating a run")
|
26
26
|
|
27
|
-
super(Run, run).__init__(
|
27
|
+
super(Run, run).__init__( # type: ignore
|
28
28
|
transform=transform,
|
29
29
|
reference=reference,
|
30
30
|
initiated_by_run=initiated_by_run,
|
@@ -56,5 +56,5 @@ def delete(self) -> None:
|
|
56
56
|
super(Run, self).delete()
|
57
57
|
|
58
58
|
|
59
|
-
Run.__init__ = __init__
|
60
|
-
Run.delete = delete
|
59
|
+
Run.__init__ = __init__ # type: ignore
|
60
|
+
Run.delete = delete # type: ignore
|
lamindb/_save.py
CHANGED
@@ -57,7 +57,7 @@ def save(records: Iterable[Record], ignore_conflicts: bool | None = False) -> No
|
|
57
57
|
|
58
58
|
For a single record, use ``record.save()``:
|
59
59
|
|
60
|
-
>>> transform = ln.Transform(
|
60
|
+
>>> transform = ln.Transform(key="My pipeline")
|
61
61
|
>>> transform.save()
|
62
62
|
|
63
63
|
Update a single existing record:
|
@@ -146,6 +146,9 @@ def check_and_attempt_upload(
|
|
146
146
|
)
|
147
147
|
except Exception as exception:
|
148
148
|
logger.warning(f"could not upload artifact: {artifact}")
|
149
|
+
# clear dangling storages if we were actually uploading or saving
|
150
|
+
if hasattr(artifact, "_to_store") and artifact._to_store:
|
151
|
+
artifact._clear_storagekey = auto_storage_key_from_artifact(artifact)
|
149
152
|
return exception
|
150
153
|
# copies (if on-disk) or moves the temporary file (if in-memory) to the cache
|
151
154
|
if os.getenv("LAMINDB_MULTI_INSTANCE") is None:
|
@@ -212,19 +215,25 @@ def copy_or_move_to_cache(
|
|
212
215
|
|
213
216
|
# This is also used within Artifact.save()
|
214
217
|
def check_and_attempt_clearing(
|
215
|
-
artifact: Artifact,
|
218
|
+
artifact: Artifact,
|
219
|
+
raise_file_not_found_error: bool = True,
|
220
|
+
using_key: str | None = None,
|
216
221
|
) -> Exception | None:
|
217
222
|
# this is a clean-up operation after replace() was called
|
218
|
-
#
|
223
|
+
# or if there was an exception during upload
|
219
224
|
if hasattr(artifact, "_clear_storagekey"):
|
220
225
|
try:
|
221
226
|
if artifact._clear_storagekey is not None:
|
222
|
-
delete_storage_using_key(
|
223
|
-
artifact,
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
+
delete_msg = delete_storage_using_key(
|
228
|
+
artifact,
|
229
|
+
artifact._clear_storagekey,
|
230
|
+
raise_file_not_found_error=raise_file_not_found_error,
|
231
|
+
using_key=using_key,
|
227
232
|
)
|
233
|
+
if delete_msg != "did-not-delete":
|
234
|
+
logger.success(
|
235
|
+
f"deleted stale object at storage key {artifact._clear_storagekey}"
|
236
|
+
)
|
228
237
|
artifact._clear_storagekey = None
|
229
238
|
except Exception as exception:
|
230
239
|
return exception
|
@@ -246,11 +255,17 @@ def store_artifacts(
|
|
246
255
|
|
247
256
|
# upload new local artifacts
|
248
257
|
for artifact in artifacts:
|
258
|
+
# failure here sets ._clear_storagekey
|
259
|
+
# for cleanup below
|
249
260
|
exception = check_and_attempt_upload(artifact, using_key)
|
250
261
|
if exception is not None:
|
251
262
|
break
|
252
263
|
stored_artifacts += [artifact]
|
253
|
-
|
264
|
+
# if check_and_attempt_upload was successfull
|
265
|
+
# then this can have only ._clear_storagekey from .replace
|
266
|
+
exception = check_and_attempt_clearing(
|
267
|
+
artifact, raise_file_not_found_error=True, using_key=using_key
|
268
|
+
)
|
254
269
|
if exception is not None:
|
255
270
|
logger.warning(f"clean up of {artifact._clear_storagekey} failed")
|
256
271
|
break
|
@@ -261,6 +276,14 @@ def store_artifacts(
|
|
261
276
|
for artifact in artifacts:
|
262
277
|
if artifact not in stored_artifacts:
|
263
278
|
artifact._delete_skip_storage()
|
279
|
+
# clean up storage after failure in check_and_attempt_upload
|
280
|
+
exception_clear = check_and_attempt_clearing(
|
281
|
+
artifact, raise_file_not_found_error=False, using_key=using_key
|
282
|
+
)
|
283
|
+
if exception_clear is not None:
|
284
|
+
logger.warning(
|
285
|
+
f"clean up of {artifact._clear_storagekey} after the upload error failed"
|
286
|
+
)
|
264
287
|
error_message = prepare_error_message(artifacts, stored_artifacts, exception)
|
265
288
|
# this is bad because we're losing the original traceback
|
266
289
|
# needs to be refactored - also, the orginal error should be raised
|
@@ -269,7 +292,7 @@ def store_artifacts(
|
|
269
292
|
|
270
293
|
|
271
294
|
def prepare_error_message(records, stored_artifacts, exception) -> str:
|
272
|
-
if len(
|
295
|
+
if len(stored_artifacts) == 0:
|
273
296
|
error_message = (
|
274
297
|
"No entries were uploaded or committed"
|
275
298
|
" to the database. See error message:\n\n"
|