lamindb 1.0.5__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +17 -6
- lamindb/_artifact.py +202 -87
- lamindb/_can_curate.py +27 -8
- lamindb/_collection.py +86 -52
- lamindb/_feature.py +177 -41
- lamindb/_finish.py +21 -7
- lamindb/_from_values.py +83 -98
- lamindb/_parents.py +4 -4
- lamindb/_query_set.py +78 -18
- lamindb/_record.py +170 -53
- lamindb/_run.py +4 -4
- lamindb/_save.py +42 -11
- lamindb/_schema.py +135 -38
- lamindb/_storage.py +1 -1
- lamindb/_tracked.py +129 -0
- lamindb/_transform.py +21 -8
- lamindb/_ulabel.py +5 -14
- lamindb/base/users.py +1 -4
- lamindb/base/validation.py +2 -6
- lamindb/core/__init__.py +13 -14
- lamindb/core/_context.py +14 -9
- lamindb/core/_data.py +29 -25
- lamindb/core/_describe.py +1 -1
- lamindb/core/_django.py +1 -1
- lamindb/core/_feature_manager.py +53 -43
- lamindb/core/_label_manager.py +4 -4
- lamindb/core/_mapped_collection.py +24 -9
- lamindb/core/_track_environment.py +2 -1
- lamindb/core/datasets/__init__.py +6 -1
- lamindb/core/datasets/_core.py +12 -11
- lamindb/core/datasets/_small.py +67 -21
- lamindb/core/exceptions.py +1 -90
- lamindb/core/loaders.py +21 -15
- lamindb/core/relations.py +6 -4
- lamindb/core/storage/_anndata_accessor.py +49 -3
- lamindb/core/storage/_backed_access.py +12 -7
- lamindb/core/storage/_pyarrow_dataset.py +40 -15
- lamindb/core/storage/_tiledbsoma.py +56 -12
- lamindb/core/storage/paths.py +30 -24
- lamindb/core/subsettings/_creation_settings.py +4 -16
- lamindb/curators/__init__.py +2193 -846
- lamindb/curators/_cellxgene_schemas/__init__.py +26 -0
- lamindb/curators/_cellxgene_schemas/schema_versions.yml +104 -0
- lamindb/errors.py +96 -0
- lamindb/integrations/_vitessce.py +3 -3
- lamindb/migrations/0069_squashed.py +76 -75
- lamindb/migrations/0075_lamindbv1_part5.py +4 -5
- lamindb/migrations/0082_alter_feature_dtype.py +21 -0
- lamindb/migrations/0083_alter_feature_is_type_alter_flextable_is_type_and_more.py +94 -0
- lamindb/migrations/0084_alter_schemafeature_feature_and_more.py +35 -0
- lamindb/migrations/0085_alter_feature_is_type_alter_flextable_is_type_and_more.py +63 -0
- lamindb/migrations/0086_various.py +95 -0
- lamindb/migrations/0087_rename__schemas_m2m_artifact_feature_sets_and_more.py +41 -0
- lamindb/migrations/0088_schema_components.py +273 -0
- lamindb/migrations/0088_squashed.py +4372 -0
- lamindb/models.py +475 -168
- {lamindb-1.0.5.dist-info → lamindb-1.1.1.dist-info}/METADATA +9 -7
- lamindb-1.1.1.dist-info/RECORD +95 -0
- lamindb/curators/_spatial.py +0 -528
- lamindb/migrations/0052_squashed.py +0 -1261
- lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +0 -57
- lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +0 -35
- lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +0 -61
- lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +0 -22
- lamindb/migrations/0057_link_models_latest_report_and_others.py +0 -356
- lamindb/migrations/0058_artifact__actions_collection__actions.py +0 -22
- lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -31
- lamindb/migrations/0060_alter_artifact__actions.py +0 -22
- lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +0 -45
- lamindb/migrations/0062_add_is_latest_field.py +0 -32
- lamindb/migrations/0063_populate_latest_field.py +0 -45
- lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +0 -33
- lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +0 -22
- lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +0 -352
- lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +0 -20
- lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +0 -20
- lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -1294
- lamindb-1.0.5.dist-info/RECORD +0 -102
- {lamindb-1.0.5.dist-info → lamindb-1.1.1.dist-info}/LICENSE +0 -0
- {lamindb-1.0.5.dist-info → lamindb-1.1.1.dist-info}/WHEEL +0 -0
lamindb/_record.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import builtins
|
4
|
+
import inspect
|
4
5
|
import re
|
5
6
|
from functools import reduce
|
6
7
|
from pathlib import PurePosixPath
|
@@ -29,6 +30,7 @@ from django.db.models.lookups import (
|
|
29
30
|
Regex,
|
30
31
|
StartsWith,
|
31
32
|
)
|
33
|
+
from django.db.utils import IntegrityError
|
32
34
|
from lamin_utils import colors, logger
|
33
35
|
from lamin_utils._lookup import Lookup
|
34
36
|
from lamindb_setup._connect_instance import (
|
@@ -41,7 +43,7 @@ from lamindb_setup.core._hub_core import connect_instance_hub
|
|
41
43
|
from lamindb_setup.core._settings_store import instance_settings_file
|
42
44
|
from lamindb_setup.core.upath import extract_suffix_from_path
|
43
45
|
|
44
|
-
from lamindb.
|
46
|
+
from lamindb.errors import FieldValidationError
|
45
47
|
from lamindb.models import (
|
46
48
|
Artifact,
|
47
49
|
BasicRecord,
|
@@ -60,7 +62,7 @@ from lamindb.models import (
|
|
60
62
|
|
61
63
|
from ._utils import attach_func_to_class_method
|
62
64
|
from .core._settings import settings
|
63
|
-
from .
|
65
|
+
from .errors import (
|
64
66
|
InvalidArgument,
|
65
67
|
RecordNameChangeIntegrityError,
|
66
68
|
ValidationError,
|
@@ -75,6 +77,30 @@ if TYPE_CHECKING:
|
|
75
77
|
IPYTHON = getattr(builtins, "__IPYTHON__", False)
|
76
78
|
|
77
79
|
|
80
|
+
def is_approx_pascal_case(s):
|
81
|
+
"""Check if the last component of a dotted string is in PascalCase.
|
82
|
+
|
83
|
+
Args:
|
84
|
+
s (str): The string to check
|
85
|
+
|
86
|
+
Returns:
|
87
|
+
bool: True if the last component is in PascalCase
|
88
|
+
|
89
|
+
Raises:
|
90
|
+
ValueError: If the last component doesn't start with a capital letter
|
91
|
+
"""
|
92
|
+
if "[" in s: # this is because we allow types of form 'script[test_script.py]'
|
93
|
+
return True
|
94
|
+
last_component = s.split(".")[-1]
|
95
|
+
|
96
|
+
if not last_component[0].isupper():
|
97
|
+
raise ValueError(
|
98
|
+
f"'{last_component}' should start with a capital letter given you're defining a type"
|
99
|
+
)
|
100
|
+
|
101
|
+
return True
|
102
|
+
|
103
|
+
|
78
104
|
def init_self_from_db(self: Record, existing_record: Record):
|
79
105
|
new_args = [
|
80
106
|
getattr(existing_record, field.attname) for field in self._meta.concrete_fields
|
@@ -86,7 +112,12 @@ def init_self_from_db(self: Record, existing_record: Record):
|
|
86
112
|
|
87
113
|
def update_attributes(record: Record, attributes: dict[str, str]):
|
88
114
|
for key, value in attributes.items():
|
89
|
-
if
|
115
|
+
if (
|
116
|
+
getattr(record, key) != value
|
117
|
+
and value is not None
|
118
|
+
and key != "dtype"
|
119
|
+
and key != "_aux"
|
120
|
+
):
|
90
121
|
logger.warning(f"updated {key} from {getattr(record, key)} to {value}")
|
91
122
|
setattr(record, key, value)
|
92
123
|
|
@@ -105,7 +136,7 @@ def validate_fields(record: Record, kwargs):
|
|
105
136
|
k for k, v in kwargs.items() if v is None and k in required_fields
|
106
137
|
]
|
107
138
|
if missing_fields:
|
108
|
-
raise
|
139
|
+
raise FieldValidationError(f"{missing_fields} are required.")
|
109
140
|
# ensure the exact length of the internal uid for core entities
|
110
141
|
if "uid" in kwargs and record.__class__ in {
|
111
142
|
Artifact,
|
@@ -122,19 +153,36 @@ def validate_fields(record: Record, kwargs):
|
|
122
153
|
).max_length # triggers FieldDoesNotExist
|
123
154
|
if len(kwargs["uid"]) != uid_max_length: # triggers KeyError
|
124
155
|
raise ValidationError(
|
125
|
-
f
|
156
|
+
f"`uid` must be exactly {uid_max_length} characters long, got {len(kwargs['uid'])}."
|
157
|
+
)
|
158
|
+
# validate is_type
|
159
|
+
if "is_type" in kwargs and "name" in kwargs and kwargs["is_type"]:
|
160
|
+
if kwargs["name"].endswith("s"):
|
161
|
+
logger.warning(
|
162
|
+
f"name '{kwargs['name']}' for type ends with 's', in case you're naming with plural, consider the singular for a type name"
|
126
163
|
)
|
164
|
+
is_approx_pascal_case(kwargs["name"])
|
127
165
|
# validate literals
|
128
166
|
validate_literal_fields(record, kwargs)
|
129
167
|
|
130
168
|
|
131
|
-
def suggest_records_with_similar_names(
|
169
|
+
def suggest_records_with_similar_names(
|
170
|
+
record: Record, name_field: str, kwargs
|
171
|
+
) -> Record | None:
|
132
172
|
"""Returns True if found exact match, otherwise False.
|
133
173
|
|
134
174
|
Logs similar matches if found.
|
135
175
|
"""
|
136
176
|
if kwargs.get(name_field) is None or not isinstance(kwargs.get(name_field), str):
|
137
|
-
return
|
177
|
+
return None
|
178
|
+
# need to perform an additional request to find the exact match
|
179
|
+
# previously, this was inferred from the truncated/fuzzy search below
|
180
|
+
# but this isn't reliable: https://laminlabs.slack.com/archives/C04FPE8V01W/p1737812808563409
|
181
|
+
# the below needs to be .first() because there might be multiple records with the same
|
182
|
+
# name field in case the record is versioned (e.g. for Transform key)
|
183
|
+
exact_match = record.__class__.filter(**{name_field: kwargs[name_field]}).first()
|
184
|
+
if exact_match is not None:
|
185
|
+
return exact_match
|
138
186
|
queryset = _search(
|
139
187
|
record.__class__,
|
140
188
|
kwargs[name_field],
|
@@ -143,10 +191,7 @@ def suggest_records_with_similar_names(record: Record, name_field: str, kwargs)
|
|
143
191
|
limit=3,
|
144
192
|
)
|
145
193
|
if not queryset.exists(): # empty queryset
|
146
|
-
return
|
147
|
-
for alternative_record in queryset:
|
148
|
-
if getattr(alternative_record, name_field) == kwargs[name_field]:
|
149
|
-
return True
|
194
|
+
return None
|
150
195
|
s, it, nots = ("", "it", "s") if len(queryset) == 1 else ("s", "one of them", "")
|
151
196
|
msg = f"record{s} with similar {name_field}{s} exist{nots}! did you mean to load {it}?"
|
152
197
|
if IPYTHON:
|
@@ -157,11 +202,14 @@ def suggest_records_with_similar_names(record: Record, name_field: str, kwargs)
|
|
157
202
|
display(queryset.df())
|
158
203
|
else:
|
159
204
|
logger.warning(f"{msg}\n{queryset}")
|
160
|
-
return
|
205
|
+
return None
|
161
206
|
|
162
207
|
|
163
208
|
def __init__(record: Record, *args, **kwargs):
|
164
|
-
|
209
|
+
skip_validation = kwargs.pop("_skip_validation", False)
|
210
|
+
if not args and skip_validation:
|
211
|
+
super(BasicRecord, record).__init__(**kwargs)
|
212
|
+
elif not args and not skip_validation:
|
165
213
|
validate_fields(record, kwargs)
|
166
214
|
|
167
215
|
# do not search for names if an id is passed; this is important
|
@@ -170,15 +218,13 @@ def __init__(record: Record, *args, **kwargs):
|
|
170
218
|
if "_has_consciously_provided_uid" in kwargs:
|
171
219
|
has_consciously_provided_uid = kwargs.pop("_has_consciously_provided_uid")
|
172
220
|
if (
|
173
|
-
isinstance(
|
174
|
-
record, (CanCurate, Collection, Transform)
|
175
|
-
) # Collection is only temporary because it'll get a key field
|
221
|
+
isinstance(record, (CanCurate, Collection, Transform))
|
176
222
|
and settings.creation.search_names
|
177
223
|
and not has_consciously_provided_uid
|
178
224
|
):
|
179
225
|
name_field = getattr(record, "_name_field", "name")
|
180
|
-
|
181
|
-
if
|
226
|
+
exact_match = suggest_records_with_similar_names(record, name_field, kwargs)
|
227
|
+
if exact_match is not None:
|
182
228
|
if "version" in kwargs:
|
183
229
|
if kwargs["version"] is not None:
|
184
230
|
version_comment = " and version"
|
@@ -189,22 +235,25 @@ def __init__(record: Record, *args, **kwargs):
|
|
189
235
|
}
|
190
236
|
).one_or_none()
|
191
237
|
else:
|
192
|
-
# for a versioned record, an exact name match is not a
|
193
|
-
#
|
194
|
-
#
|
195
|
-
# same name
|
238
|
+
# for a versioned record, an exact name match is not a criterion
|
239
|
+
# for retrieving a record in case `version` isn't passed -
|
240
|
+
# we'd always pull out many records with exactly the same name
|
196
241
|
existing_record = None
|
197
242
|
else:
|
198
243
|
version_comment = ""
|
199
|
-
existing_record =
|
200
|
-
**{name_field: kwargs[name_field]}
|
201
|
-
).one_or_none()
|
244
|
+
existing_record = exact_match
|
202
245
|
if existing_record is not None:
|
203
246
|
logger.important(
|
204
247
|
f"returning existing {record.__class__.__name__} record with same"
|
205
248
|
f" {name_field}{version_comment}: '{kwargs[name_field]}'"
|
206
249
|
)
|
250
|
+
if isinstance(record, Schema):
|
251
|
+
if existing_record.hash != kwargs["hash"]:
|
252
|
+
raise ValueError(
|
253
|
+
f"Schema name is already in use by schema with uid '{existing_record.uid}', please choose a different name."
|
254
|
+
)
|
207
255
|
init_self_from_db(record, existing_record)
|
256
|
+
update_attributes(record, kwargs)
|
208
257
|
return None
|
209
258
|
super(BasicRecord, record).__init__(**kwargs)
|
210
259
|
if isinstance(record, ValidateFields):
|
@@ -218,7 +267,9 @@ def __init__(record: Record, *args, **kwargs):
|
|
218
267
|
message = _format_django_validation_error(record, e)
|
219
268
|
raise FieldValidationError(message) from e
|
220
269
|
elif len(args) != len(record._meta.concrete_fields):
|
221
|
-
raise
|
270
|
+
raise FieldValidationError(
|
271
|
+
f"Use keyword arguments instead of positional arguments, e.g.: {record.__class__.__name__}(name='...')."
|
272
|
+
)
|
222
273
|
else:
|
223
274
|
# object is loaded from DB (**kwargs could be omitted below, I believe)
|
224
275
|
super(BasicRecord, record).__init__(*args, **kwargs)
|
@@ -257,6 +308,60 @@ def _format_django_validation_error(record: Record, e: DjangoValidationError):
|
|
257
308
|
return message
|
258
309
|
|
259
310
|
|
311
|
+
def _get_record_kwargs(record_class) -> list[tuple[str, str]]:
|
312
|
+
"""Gets the parameters of a Record from the overloaded signature.
|
313
|
+
|
314
|
+
Example:
|
315
|
+
>>> get_record_params(bt.Organism)
|
316
|
+
>>> [('name', 'str'), ('taxon_id', 'str | None'), ('scientific_name', 'str | None')]
|
317
|
+
"""
|
318
|
+
source = inspect.getsource(record_class)
|
319
|
+
|
320
|
+
# Find first overload that's not *db_args
|
321
|
+
pattern = r"@overload\s+def __init__\s*\(([\s\S]*?)\):\s*\.{3}"
|
322
|
+
overloads = re.finditer(pattern, source)
|
323
|
+
|
324
|
+
for overload in overloads:
|
325
|
+
params_block = overload.group(1)
|
326
|
+
# This is an additional safety measure if the overloaded signature that we're
|
327
|
+
# looking for is not at the top but a "db_args" constructor
|
328
|
+
if "*db_args" in params_block:
|
329
|
+
continue
|
330
|
+
|
331
|
+
params = []
|
332
|
+
for line in params_block.split("\n"):
|
333
|
+
line = line.strip()
|
334
|
+
if not line or "self" in line:
|
335
|
+
continue
|
336
|
+
|
337
|
+
# Extract name and type annotation
|
338
|
+
# The regex pattern finds parameter definitions like:
|
339
|
+
# Simple: name: str
|
340
|
+
# With default: age: int = 0
|
341
|
+
# With complex types: items: List[str] = []
|
342
|
+
param_pattern = (
|
343
|
+
r"(\w+)" # Parameter name
|
344
|
+
r"\s*:\s*" # Colon with optional whitespace
|
345
|
+
r"((?:[^=,]|" # Type hint: either non-equals/comma chars
|
346
|
+
r"(?<=\[)[^[\]]*" # or contents within square brackets
|
347
|
+
r"(?=\]))+)" # looking ahead for closing bracket
|
348
|
+
r"(?:\s*=\s*" # Optional default value part
|
349
|
+
r"([^,]+))?" # Default value: anything but comma
|
350
|
+
)
|
351
|
+
match = re.match(param_pattern, line)
|
352
|
+
if not match:
|
353
|
+
continue
|
354
|
+
|
355
|
+
name, type_str = match.group(1), match.group(2).strip()
|
356
|
+
|
357
|
+
# Keep type as string instead of evaluating
|
358
|
+
params.append((name, type_str))
|
359
|
+
|
360
|
+
return params
|
361
|
+
|
362
|
+
return []
|
363
|
+
|
364
|
+
|
260
365
|
@classmethod # type:ignore
|
261
366
|
@doc_args(Record.filter.__doc__)
|
262
367
|
def filter(cls, *queries, **expressions) -> QuerySet:
|
@@ -639,8 +744,8 @@ def get_transfer_run(record) -> Run:
|
|
639
744
|
if transform is None:
|
640
745
|
search_names = settings.creation.search_names
|
641
746
|
settings.creation.search_names = False
|
642
|
-
transform = Transform(
|
643
|
-
uid=uid,
|
747
|
+
transform = Transform( # type: ignore
|
748
|
+
uid=uid, description=f"Transfer from `{slug}`", key=key, type="function"
|
644
749
|
).save()
|
645
750
|
settings.creation.search_names = search_names
|
646
751
|
# use the global run context to get the initiated_by_run run id
|
@@ -655,7 +760,7 @@ def get_transfer_run(record) -> Run:
|
|
655
760
|
transform=transform, initiated_by_run=initiated_by_run
|
656
761
|
).one_or_none()
|
657
762
|
if run is None:
|
658
|
-
run = Run(transform=transform, initiated_by_run=initiated_by_run).save()
|
763
|
+
run = Run(transform=transform, initiated_by_run=initiated_by_run).save() # type: ignore
|
659
764
|
run.initiated_by_run = initiated_by_run # so that it's available in memory
|
660
765
|
return run
|
661
766
|
|
@@ -738,28 +843,40 @@ def save(self, *args, **kwargs) -> Record:
|
|
738
843
|
if pre_existing_record is not None:
|
739
844
|
init_self_from_db(self, pre_existing_record)
|
740
845
|
else:
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
revises
|
748
|
-
revises.
|
749
|
-
|
750
|
-
|
846
|
+
check_key_change(self)
|
847
|
+
check_name_change(self)
|
848
|
+
try:
|
849
|
+
# save versioned record in presence of self._revises
|
850
|
+
if isinstance(self, IsVersioned) and self._revises is not None:
|
851
|
+
assert self._revises.is_latest # noqa: S101
|
852
|
+
revises = self._revises
|
853
|
+
revises.is_latest = False
|
854
|
+
with transaction.atomic():
|
855
|
+
revises._revises = None # ensure we don't start a recursion
|
856
|
+
revises.save()
|
857
|
+
super(BasicRecord, self).save(*args, **kwargs) # type: ignore
|
858
|
+
self._revises = None
|
859
|
+
# save unversioned record
|
860
|
+
else:
|
751
861
|
super(BasicRecord, self).save(*args, **kwargs)
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
862
|
+
except IntegrityError as e:
|
863
|
+
error_msg = str(e)
|
864
|
+
# two possible error messages for hash duplication
|
865
|
+
# "duplicate key value violates unique constraint"
|
866
|
+
# "UNIQUE constraint failed"
|
867
|
+
if (
|
868
|
+
"UNIQUE constraint failed" in error_msg
|
869
|
+
or "duplicate key value violates unique constraint" in error_msg
|
870
|
+
) and "hash" in error_msg:
|
871
|
+
pre_existing_record = self.__class__.get(hash=self.hash)
|
872
|
+
logger.warning(
|
873
|
+
f"returning {self.__class__.__name__.lower()} with same hash: {pre_existing_record}"
|
874
|
+
)
|
875
|
+
init_self_from_db(self, pre_existing_record)
|
876
|
+
else:
|
877
|
+
raise
|
878
|
+
_store_record_old_name(self)
|
879
|
+
_store_record_old_key(self)
|
763
880
|
# perform transfer of many-to-many fields
|
764
881
|
# only supported for Artifact and Collection records
|
765
882
|
if db is not None and db != "default" and using_key is None:
|
@@ -778,7 +895,7 @@ def save(self, *args, **kwargs) -> Record:
|
|
778
895
|
self_on_db = copy(self)
|
779
896
|
self_on_db._state.db = db
|
780
897
|
self_on_db.pk = pk_on_db # manually set the primary key
|
781
|
-
self_on_db.features = FeatureManager(self_on_db)
|
898
|
+
self_on_db.features = FeatureManager(self_on_db) # type: ignore
|
782
899
|
self.features._add_from(self_on_db, transfer_logs=transfer_logs)
|
783
900
|
self.labels.add_from(self_on_db, transfer_logs=transfer_logs)
|
784
901
|
for k, v in transfer_logs.items():
|
@@ -851,7 +968,7 @@ def check_name_change(record: Record):
|
|
851
968
|
# when a feature is renamed
|
852
969
|
elif isinstance(record, Feature):
|
853
970
|
# only internal features are associated with schemas
|
854
|
-
linked_artifacts = Artifact.filter(
|
971
|
+
linked_artifacts = Artifact.filter(feature_sets__features=record).list(
|
855
972
|
"uid"
|
856
973
|
)
|
857
974
|
n = len(linked_artifacts)
|
@@ -915,7 +1032,7 @@ def delete(self) -> None:
|
|
915
1032
|
new_latest.is_latest = True
|
916
1033
|
with transaction.atomic():
|
917
1034
|
new_latest.save()
|
918
|
-
super(BasicRecord, self).delete()
|
1035
|
+
super(BasicRecord, self).delete() # type: ignore
|
919
1036
|
logger.warning(f"new latest version is {new_latest}")
|
920
1037
|
return None
|
921
1038
|
super(BasicRecord, self).delete()
|
lamindb/_run.py
CHANGED
@@ -4,7 +4,7 @@ from lamindb.models import ParamManager, Run, Transform
|
|
4
4
|
|
5
5
|
|
6
6
|
def __init__(run: Run, *args, **kwargs):
|
7
|
-
run.params = ParamManager(run)
|
7
|
+
run.params = ParamManager(run) # type: ignore
|
8
8
|
if len(args) == len(run._meta.concrete_fields):
|
9
9
|
super(Run, run).__init__(*args, **kwargs)
|
10
10
|
return None
|
@@ -24,7 +24,7 @@ def __init__(run: Run, *args, **kwargs):
|
|
24
24
|
if transform._state.adding:
|
25
25
|
raise ValueError("Please save transform record before creating a run")
|
26
26
|
|
27
|
-
super(Run, run).__init__(
|
27
|
+
super(Run, run).__init__( # type: ignore
|
28
28
|
transform=transform,
|
29
29
|
reference=reference,
|
30
30
|
initiated_by_run=initiated_by_run,
|
@@ -56,5 +56,5 @@ def delete(self) -> None:
|
|
56
56
|
super(Run, self).delete()
|
57
57
|
|
58
58
|
|
59
|
-
Run.__init__ = __init__
|
60
|
-
Run.delete = delete
|
59
|
+
Run.__init__ = __init__ # type: ignore
|
60
|
+
Run.delete = delete # type: ignore
|
lamindb/_save.py
CHANGED
@@ -57,7 +57,7 @@ def save(records: Iterable[Record], ignore_conflicts: bool | None = False) -> No
|
|
57
57
|
|
58
58
|
For a single record, use ``record.save()``:
|
59
59
|
|
60
|
-
>>> transform = ln.Transform(
|
60
|
+
>>> transform = ln.Transform(key="My pipeline")
|
61
61
|
>>> transform.save()
|
62
62
|
|
63
63
|
Update a single existing record:
|
@@ -133,7 +133,9 @@ def check_and_attempt_upload(
|
|
133
133
|
using_key: str | None = None,
|
134
134
|
access_token: str | None = None,
|
135
135
|
print_progress: bool = True,
|
136
|
+
**kwargs,
|
136
137
|
) -> Exception | None:
|
138
|
+
# kwargs are propagated to .upload_from in the end
|
137
139
|
# if Artifact object is either newly instantiated or replace() was called on
|
138
140
|
# a local env it will have a _local_filepath and needs to be uploaded
|
139
141
|
if hasattr(artifact, "_local_filepath"):
|
@@ -143,9 +145,13 @@ def check_and_attempt_upload(
|
|
143
145
|
using_key,
|
144
146
|
access_token=access_token,
|
145
147
|
print_progress=print_progress,
|
148
|
+
**kwargs,
|
146
149
|
)
|
147
150
|
except Exception as exception:
|
148
151
|
logger.warning(f"could not upload artifact: {artifact}")
|
152
|
+
# clear dangling storages if we were actually uploading or saving
|
153
|
+
if hasattr(artifact, "_to_store") and artifact._to_store:
|
154
|
+
artifact._clear_storagekey = auto_storage_key_from_artifact(artifact)
|
149
155
|
return exception
|
150
156
|
# copies (if on-disk) or moves the temporary file (if in-memory) to the cache
|
151
157
|
if os.getenv("LAMINDB_MULTI_INSTANCE") is None:
|
@@ -212,19 +218,25 @@ def copy_or_move_to_cache(
|
|
212
218
|
|
213
219
|
# This is also used within Artifact.save()
|
214
220
|
def check_and_attempt_clearing(
|
215
|
-
artifact: Artifact,
|
221
|
+
artifact: Artifact,
|
222
|
+
raise_file_not_found_error: bool = True,
|
223
|
+
using_key: str | None = None,
|
216
224
|
) -> Exception | None:
|
217
225
|
# this is a clean-up operation after replace() was called
|
218
|
-
#
|
226
|
+
# or if there was an exception during upload
|
219
227
|
if hasattr(artifact, "_clear_storagekey"):
|
220
228
|
try:
|
221
229
|
if artifact._clear_storagekey is not None:
|
222
|
-
delete_storage_using_key(
|
223
|
-
artifact,
|
224
|
-
|
225
|
-
|
226
|
-
|
230
|
+
delete_msg = delete_storage_using_key(
|
231
|
+
artifact,
|
232
|
+
artifact._clear_storagekey,
|
233
|
+
raise_file_not_found_error=raise_file_not_found_error,
|
234
|
+
using_key=using_key,
|
227
235
|
)
|
236
|
+
if delete_msg != "did-not-delete":
|
237
|
+
logger.success(
|
238
|
+
f"deleted stale object at storage key {artifact._clear_storagekey}"
|
239
|
+
)
|
228
240
|
artifact._clear_storagekey = None
|
229
241
|
except Exception as exception:
|
230
242
|
return exception
|
@@ -246,11 +258,17 @@ def store_artifacts(
|
|
246
258
|
|
247
259
|
# upload new local artifacts
|
248
260
|
for artifact in artifacts:
|
261
|
+
# failure here sets ._clear_storagekey
|
262
|
+
# for cleanup below
|
249
263
|
exception = check_and_attempt_upload(artifact, using_key)
|
250
264
|
if exception is not None:
|
251
265
|
break
|
252
266
|
stored_artifacts += [artifact]
|
253
|
-
|
267
|
+
# if check_and_attempt_upload was successfull
|
268
|
+
# then this can have only ._clear_storagekey from .replace
|
269
|
+
exception = check_and_attempt_clearing(
|
270
|
+
artifact, raise_file_not_found_error=True, using_key=using_key
|
271
|
+
)
|
254
272
|
if exception is not None:
|
255
273
|
logger.warning(f"clean up of {artifact._clear_storagekey} failed")
|
256
274
|
break
|
@@ -261,6 +279,14 @@ def store_artifacts(
|
|
261
279
|
for artifact in artifacts:
|
262
280
|
if artifact not in stored_artifacts:
|
263
281
|
artifact._delete_skip_storage()
|
282
|
+
# clean up storage after failure in check_and_attempt_upload
|
283
|
+
exception_clear = check_and_attempt_clearing(
|
284
|
+
artifact, raise_file_not_found_error=False, using_key=using_key
|
285
|
+
)
|
286
|
+
if exception_clear is not None:
|
287
|
+
logger.warning(
|
288
|
+
f"clean up of {artifact._clear_storagekey} after the upload error failed"
|
289
|
+
)
|
264
290
|
error_message = prepare_error_message(artifacts, stored_artifacts, exception)
|
265
291
|
# this is bad because we're losing the original traceback
|
266
292
|
# needs to be refactored - also, the orginal error should be raised
|
@@ -269,7 +295,7 @@ def store_artifacts(
|
|
269
295
|
|
270
296
|
|
271
297
|
def prepare_error_message(records, stored_artifacts, exception) -> str:
|
272
|
-
if len(
|
298
|
+
if len(stored_artifacts) == 0:
|
273
299
|
error_message = (
|
274
300
|
"No entries were uploaded or committed"
|
275
301
|
" to the database. See error message:\n\n"
|
@@ -293,8 +319,10 @@ def upload_artifact(
|
|
293
319
|
using_key: str | None = None,
|
294
320
|
access_token: str | None = None,
|
295
321
|
print_progress: bool = True,
|
322
|
+
**kwargs,
|
296
323
|
) -> tuple[UPath, UPath | None]:
|
297
324
|
"""Store and add file and its linked entries."""
|
325
|
+
# kwargs are propagated to .upload_from in the end
|
298
326
|
# can't currently use filepath_from_artifact here because it resolves to ._local_filepath
|
299
327
|
storage_key = auto_storage_key_from_artifact(artifact)
|
300
328
|
storage_path, storage_settings = attempt_accessing_path(
|
@@ -303,7 +331,10 @@ def upload_artifact(
|
|
303
331
|
if hasattr(artifact, "_to_store") and artifact._to_store:
|
304
332
|
logger.save(f"storing artifact '{artifact.uid}' at '{storage_path}'")
|
305
333
|
store_file_or_folder(
|
306
|
-
artifact._local_filepath,
|
334
|
+
artifact._local_filepath,
|
335
|
+
storage_path,
|
336
|
+
print_progress=print_progress,
|
337
|
+
**kwargs,
|
307
338
|
)
|
308
339
|
|
309
340
|
if isinstance(storage_path, LocalPathClasses):
|