lamindb 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. lamindb/__init__.py +14 -5
  2. lamindb/_artifact.py +174 -57
  3. lamindb/_can_curate.py +27 -8
  4. lamindb/_collection.py +85 -51
  5. lamindb/_feature.py +177 -41
  6. lamindb/_finish.py +222 -81
  7. lamindb/_from_values.py +83 -98
  8. lamindb/_parents.py +4 -4
  9. lamindb/_query_set.py +59 -17
  10. lamindb/_record.py +171 -53
  11. lamindb/_run.py +4 -4
  12. lamindb/_save.py +33 -10
  13. lamindb/_schema.py +135 -38
  14. lamindb/_storage.py +1 -1
  15. lamindb/_tracked.py +106 -0
  16. lamindb/_transform.py +21 -8
  17. lamindb/_ulabel.py +5 -14
  18. lamindb/base/validation.py +2 -6
  19. lamindb/core/__init__.py +13 -14
  20. lamindb/core/_context.py +39 -36
  21. lamindb/core/_data.py +29 -25
  22. lamindb/core/_describe.py +1 -1
  23. lamindb/core/_django.py +1 -1
  24. lamindb/core/_feature_manager.py +54 -44
  25. lamindb/core/_label_manager.py +4 -4
  26. lamindb/core/_mapped_collection.py +20 -7
  27. lamindb/core/datasets/__init__.py +6 -1
  28. lamindb/core/datasets/_core.py +12 -11
  29. lamindb/core/datasets/_small.py +66 -20
  30. lamindb/core/exceptions.py +1 -90
  31. lamindb/core/loaders.py +7 -13
  32. lamindb/core/relations.py +6 -4
  33. lamindb/core/storage/_anndata_accessor.py +41 -0
  34. lamindb/core/storage/_backed_access.py +2 -2
  35. lamindb/core/storage/_pyarrow_dataset.py +25 -15
  36. lamindb/core/storage/_tiledbsoma.py +56 -12
  37. lamindb/core/storage/paths.py +41 -22
  38. lamindb/core/subsettings/_creation_settings.py +4 -16
  39. lamindb/curators/__init__.py +2168 -833
  40. lamindb/curators/_cellxgene_schemas/__init__.py +26 -0
  41. lamindb/curators/_cellxgene_schemas/schema_versions.yml +104 -0
  42. lamindb/errors.py +96 -0
  43. lamindb/integrations/_vitessce.py +3 -3
  44. lamindb/migrations/0069_squashed.py +76 -75
  45. lamindb/migrations/0075_lamindbv1_part5.py +4 -5
  46. lamindb/migrations/0082_alter_feature_dtype.py +21 -0
  47. lamindb/migrations/0083_alter_feature_is_type_alter_flextable_is_type_and_more.py +94 -0
  48. lamindb/migrations/0084_alter_schemafeature_feature_and_more.py +35 -0
  49. lamindb/migrations/0085_alter_feature_is_type_alter_flextable_is_type_and_more.py +63 -0
  50. lamindb/migrations/0086_various.py +95 -0
  51. lamindb/migrations/0087_rename__schemas_m2m_artifact_feature_sets_and_more.py +41 -0
  52. lamindb/migrations/0088_schema_components.py +273 -0
  53. lamindb/migrations/0088_squashed.py +4372 -0
  54. lamindb/models.py +423 -156
  55. {lamindb-1.0.4.dist-info → lamindb-1.1.0.dist-info}/METADATA +10 -7
  56. lamindb-1.1.0.dist-info/RECORD +95 -0
  57. lamindb/curators/_spatial.py +0 -528
  58. lamindb/migrations/0052_squashed.py +0 -1261
  59. lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +0 -57
  60. lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +0 -35
  61. lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +0 -61
  62. lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +0 -22
  63. lamindb/migrations/0057_link_models_latest_report_and_others.py +0 -356
  64. lamindb/migrations/0058_artifact__actions_collection__actions.py +0 -22
  65. lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -31
  66. lamindb/migrations/0060_alter_artifact__actions.py +0 -22
  67. lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +0 -45
  68. lamindb/migrations/0062_add_is_latest_field.py +0 -32
  69. lamindb/migrations/0063_populate_latest_field.py +0 -45
  70. lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +0 -33
  71. lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +0 -22
  72. lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +0 -352
  73. lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +0 -20
  74. lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +0 -20
  75. lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -1294
  76. lamindb-1.0.4.dist-info/RECORD +0 -102
  77. {lamindb-1.0.4.dist-info → lamindb-1.1.0.dist-info}/LICENSE +0 -0
  78. {lamindb-1.0.4.dist-info → lamindb-1.1.0.dist-info}/WHEEL +0 -0
lamindb/_record.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import builtins
4
+ import inspect
4
5
  import re
5
6
  from functools import reduce
6
7
  from pathlib import PurePosixPath
@@ -29,6 +30,7 @@ from django.db.models.lookups import (
29
30
  Regex,
30
31
  StartsWith,
31
32
  )
33
+ from django.db.utils import IntegrityError
32
34
  from lamin_utils import colors, logger
33
35
  from lamin_utils._lookup import Lookup
34
36
  from lamindb_setup._connect_instance import (
@@ -41,7 +43,7 @@ from lamindb_setup.core._hub_core import connect_instance_hub
41
43
  from lamindb_setup.core._settings_store import instance_settings_file
42
44
  from lamindb_setup.core.upath import extract_suffix_from_path
43
45
 
44
- from lamindb.base.validation import FieldValidationError
46
+ from lamindb.errors import FieldValidationError
45
47
  from lamindb.models import (
46
48
  Artifact,
47
49
  BasicRecord,
@@ -60,7 +62,7 @@ from lamindb.models import (
60
62
 
61
63
  from ._utils import attach_func_to_class_method
62
64
  from .core._settings import settings
63
- from .core.exceptions import (
65
+ from .errors import (
64
66
  InvalidArgument,
65
67
  RecordNameChangeIntegrityError,
66
68
  ValidationError,
@@ -75,6 +77,30 @@ if TYPE_CHECKING:
75
77
  IPYTHON = getattr(builtins, "__IPYTHON__", False)
76
78
 
77
79
 
80
+ def is_approx_pascal_case(s):
81
+ """Check if the last component of a dotted string is in PascalCase.
82
+
83
+ Args:
84
+ s (str): The string to check
85
+
86
+ Returns:
87
+ bool: True if the last component is in PascalCase
88
+
89
+ Raises:
90
+ ValueError: If the last component doesn't start with a capital letter
91
+ """
92
+ if "[" in s: # this is because we allow types of form 'script[test_script.py]'
93
+ return True
94
+ last_component = s.split(".")[-1]
95
+
96
+ if not last_component[0].isupper():
97
+ raise ValueError(
98
+ f"'{last_component}' should start with a capital letter given you're defining a type"
99
+ )
100
+
101
+ return True
102
+
103
+
78
104
  def init_self_from_db(self: Record, existing_record: Record):
79
105
  new_args = [
80
106
  getattr(existing_record, field.attname) for field in self._meta.concrete_fields
@@ -86,7 +112,12 @@ def init_self_from_db(self: Record, existing_record: Record):
86
112
 
87
113
  def update_attributes(record: Record, attributes: dict[str, str]):
88
114
  for key, value in attributes.items():
89
- if getattr(record, key) != value:
115
+ if (
116
+ getattr(record, key) != value
117
+ and value is not None
118
+ and key != "dtype"
119
+ and key != "_aux"
120
+ ):
90
121
  logger.warning(f"updated {key} from {getattr(record, key)} to {value}")
91
122
  setattr(record, key, value)
92
123
 
@@ -105,7 +136,7 @@ def validate_fields(record: Record, kwargs):
105
136
  k for k, v in kwargs.items() if v is None and k in required_fields
106
137
  ]
107
138
  if missing_fields:
108
- raise TypeError(f"{missing_fields} are required.")
139
+ raise FieldValidationError(f"{missing_fields} are required.")
109
140
  # ensure the exact length of the internal uid for core entities
110
141
  if "uid" in kwargs and record.__class__ in {
111
142
  Artifact,
@@ -122,19 +153,36 @@ def validate_fields(record: Record, kwargs):
122
153
  ).max_length # triggers FieldDoesNotExist
123
154
  if len(kwargs["uid"]) != uid_max_length: # triggers KeyError
124
155
  raise ValidationError(
125
- f'`uid` must be exactly {uid_max_length} characters long, got {len(kwargs["uid"])}.'
156
+ f"`uid` must be exactly {uid_max_length} characters long, got {len(kwargs['uid'])}."
157
+ )
158
+ # validate is_type
159
+ if "is_type" in kwargs and "name" in kwargs and kwargs["is_type"]:
160
+ if kwargs["name"].endswith("s"):
161
+ logger.warning(
162
+ f"name '{kwargs['name']}' for type ends with 's', in case you're naming with plural, consider the singular for a type name"
126
163
  )
164
+ is_approx_pascal_case(kwargs["name"])
127
165
  # validate literals
128
166
  validate_literal_fields(record, kwargs)
129
167
 
130
168
 
131
- def suggest_records_with_similar_names(record: Record, name_field: str, kwargs) -> bool:
169
+ def suggest_records_with_similar_names(
170
+ record: Record, name_field: str, kwargs
171
+ ) -> Record | None:
132
172
  """Returns True if found exact match, otherwise False.
133
173
 
134
174
  Logs similar matches if found.
135
175
  """
136
176
  if kwargs.get(name_field) is None or not isinstance(kwargs.get(name_field), str):
137
- return False
177
+ return None
178
+ # need to perform an additional request to find the exact match
179
+ # previously, this was inferred from the truncated/fuzzy search below
180
+ # but this isn't reliable: https://laminlabs.slack.com/archives/C04FPE8V01W/p1737812808563409
181
+ # the below needs to be .first() because there might be multiple records with the same
182
+ # name field in case the record is versioned (e.g. for Transform key)
183
+ exact_match = record.__class__.filter(**{name_field: kwargs[name_field]}).first()
184
+ if exact_match is not None:
185
+ return exact_match
138
186
  queryset = _search(
139
187
  record.__class__,
140
188
  kwargs[name_field],
@@ -143,10 +191,7 @@ def suggest_records_with_similar_names(record: Record, name_field: str, kwargs)
143
191
  limit=3,
144
192
  )
145
193
  if not queryset.exists(): # empty queryset
146
- return False
147
- for alternative_record in queryset:
148
- if getattr(alternative_record, name_field) == kwargs[name_field]:
149
- return True
194
+ return None
150
195
  s, it, nots = ("", "it", "s") if len(queryset) == 1 else ("s", "one of them", "")
151
196
  msg = f"record{s} with similar {name_field}{s} exist{nots}! did you mean to load {it}?"
152
197
  if IPYTHON:
@@ -157,11 +202,14 @@ def suggest_records_with_similar_names(record: Record, name_field: str, kwargs)
157
202
  display(queryset.df())
158
203
  else:
159
204
  logger.warning(f"{msg}\n{queryset}")
160
- return False
205
+ return None
161
206
 
162
207
 
163
208
  def __init__(record: Record, *args, **kwargs):
164
- if not args:
209
+ skip_validation = kwargs.pop("_skip_validation", False)
210
+ if not args and skip_validation:
211
+ super(BasicRecord, record).__init__(**kwargs)
212
+ elif not args and not skip_validation:
165
213
  validate_fields(record, kwargs)
166
214
 
167
215
  # do not search for names if an id is passed; this is important
@@ -170,15 +218,13 @@ def __init__(record: Record, *args, **kwargs):
170
218
  if "_has_consciously_provided_uid" in kwargs:
171
219
  has_consciously_provided_uid = kwargs.pop("_has_consciously_provided_uid")
172
220
  if (
173
- isinstance(
174
- record, (CanCurate, Collection, Transform)
175
- ) # Collection is only temporary because it'll get a key field
221
+ isinstance(record, (CanCurate, Collection, Transform))
176
222
  and settings.creation.search_names
177
223
  and not has_consciously_provided_uid
178
224
  ):
179
225
  name_field = getattr(record, "_name_field", "name")
180
- match = suggest_records_with_similar_names(record, name_field, kwargs)
181
- if match:
226
+ exact_match = suggest_records_with_similar_names(record, name_field, kwargs)
227
+ if exact_match is not None:
182
228
  if "version" in kwargs:
183
229
  if kwargs["version"] is not None:
184
230
  version_comment = " and version"
@@ -189,22 +235,26 @@ def __init__(record: Record, *args, **kwargs):
189
235
  }
190
236
  ).one_or_none()
191
237
  else:
192
- # for a versioned record, an exact name match is not a
193
- # criterion for retrieving a record in case `version`
194
- # isn't passed - we'd always pull out many records with exactly the
195
- # same name
238
+ # for a versioned record, an exact name match is not a criterion
239
+ # for retrieving a record in case `version` isn't passed -
240
+ # we'd always pull out many records with exactly the same name
196
241
  existing_record = None
197
242
  else:
198
243
  version_comment = ""
199
- existing_record = record.__class__.filter(
200
- **{name_field: kwargs[name_field]}
201
- ).one_or_none()
244
+ existing_record = exact_match
202
245
  if existing_record is not None:
203
246
  logger.important(
204
247
  f"returning existing {record.__class__.__name__} record with same"
205
248
  f" {name_field}{version_comment}: '{kwargs[name_field]}'"
206
249
  )
250
+ if isinstance(record, Schema):
251
+ if Artifact.filter(schema=record).exists():
252
+ if record.hash != kwargs["hash"]:
253
+ raise ValueError(
254
+ "Schema is already in use, can't be changed."
255
+ )
207
256
  init_self_from_db(record, existing_record)
257
+ update_attributes(record, kwargs)
208
258
  return None
209
259
  super(BasicRecord, record).__init__(**kwargs)
210
260
  if isinstance(record, ValidateFields):
@@ -218,7 +268,9 @@ def __init__(record: Record, *args, **kwargs):
218
268
  message = _format_django_validation_error(record, e)
219
269
  raise FieldValidationError(message) from e
220
270
  elif len(args) != len(record._meta.concrete_fields):
221
- raise ValueError("please provide keyword arguments, not plain arguments")
271
+ raise FieldValidationError(
272
+ f"Use keyword arguments instead of positional arguments, e.g.: {record.__class__.__name__}(name='...')."
273
+ )
222
274
  else:
223
275
  # object is loaded from DB (**kwargs could be omitted below, I believe)
224
276
  super(BasicRecord, record).__init__(*args, **kwargs)
@@ -257,6 +309,60 @@ def _format_django_validation_error(record: Record, e: DjangoValidationError):
257
309
  return message
258
310
 
259
311
 
312
+ def _get_record_kwargs(record_class) -> list[tuple[str, str]]:
313
+ """Gets the parameters of a Record from the overloaded signature.
314
+
315
+ Example:
316
+ >>> get_record_params(bt.Organism)
317
+ >>> [('name', 'str'), ('taxon_id', 'str | None'), ('scientific_name', 'str | None')]
318
+ """
319
+ source = inspect.getsource(record_class)
320
+
321
+ # Find first overload that's not *db_args
322
+ pattern = r"@overload\s+def __init__\s*\(([\s\S]*?)\):\s*\.{3}"
323
+ overloads = re.finditer(pattern, source)
324
+
325
+ for overload in overloads:
326
+ params_block = overload.group(1)
327
+ # This is an additional safety measure if the overloaded signature that we're
328
+ # looking for is not at the top but a "db_args" constructor
329
+ if "*db_args" in params_block:
330
+ continue
331
+
332
+ params = []
333
+ for line in params_block.split("\n"):
334
+ line = line.strip()
335
+ if not line or "self" in line:
336
+ continue
337
+
338
+ # Extract name and type annotation
339
+ # The regex pattern finds parameter definitions like:
340
+ # Simple: name: str
341
+ # With default: age: int = 0
342
+ # With complex types: items: List[str] = []
343
+ param_pattern = (
344
+ r"(\w+)" # Parameter name
345
+ r"\s*:\s*" # Colon with optional whitespace
346
+ r"((?:[^=,]|" # Type hint: either non-equals/comma chars
347
+ r"(?<=\[)[^[\]]*" # or contents within square brackets
348
+ r"(?=\]))+)" # looking ahead for closing bracket
349
+ r"(?:\s*=\s*" # Optional default value part
350
+ r"([^,]+))?" # Default value: anything but comma
351
+ )
352
+ match = re.match(param_pattern, line)
353
+ if not match:
354
+ continue
355
+
356
+ name, type_str = match.group(1), match.group(2).strip()
357
+
358
+ # Keep type as string instead of evaluating
359
+ params.append((name, type_str))
360
+
361
+ return params
362
+
363
+ return []
364
+
365
+
260
366
  @classmethod # type:ignore
261
367
  @doc_args(Record.filter.__doc__)
262
368
  def filter(cls, *queries, **expressions) -> QuerySet:
@@ -639,8 +745,8 @@ def get_transfer_run(record) -> Run:
639
745
  if transform is None:
640
746
  search_names = settings.creation.search_names
641
747
  settings.creation.search_names = False
642
- transform = Transform(
643
- uid=uid, name=f"Transfer from `{slug}`", key=key, type="function"
748
+ transform = Transform( # type: ignore
749
+ uid=uid, description=f"Transfer from `{slug}`", key=key, type="function"
644
750
  ).save()
645
751
  settings.creation.search_names = search_names
646
752
  # use the global run context to get the initiated_by_run run id
@@ -655,7 +761,7 @@ def get_transfer_run(record) -> Run:
655
761
  transform=transform, initiated_by_run=initiated_by_run
656
762
  ).one_or_none()
657
763
  if run is None:
658
- run = Run(transform=transform, initiated_by_run=initiated_by_run).save()
764
+ run = Run(transform=transform, initiated_by_run=initiated_by_run).save() # type: ignore
659
765
  run.initiated_by_run = initiated_by_run # so that it's available in memory
660
766
  return run
661
767
 
@@ -738,28 +844,40 @@ def save(self, *args, **kwargs) -> Record:
738
844
  if pre_existing_record is not None:
739
845
  init_self_from_db(self, pre_existing_record)
740
846
  else:
741
- # save versioned record
742
- if isinstance(self, IsVersioned) and self._revises is not None:
743
- assert self._revises.is_latest # noqa: S101
744
- revises = self._revises
745
- revises.is_latest = False
746
- with transaction.atomic():
747
- revises._revises = None # ensure we don't start a recursion
748
- revises.save()
749
- check_name_change(self)
750
- check_key_change(self)
847
+ check_key_change(self)
848
+ check_name_change(self)
849
+ try:
850
+ # save versioned record in presence of self._revises
851
+ if isinstance(self, IsVersioned) and self._revises is not None:
852
+ assert self._revises.is_latest # noqa: S101
853
+ revises = self._revises
854
+ revises.is_latest = False
855
+ with transaction.atomic():
856
+ revises._revises = None # ensure we don't start a recursion
857
+ revises.save()
858
+ super(BasicRecord, self).save(*args, **kwargs) # type: ignore
859
+ self._revises = None
860
+ # save unversioned record
861
+ else:
751
862
  super(BasicRecord, self).save(*args, **kwargs)
752
- _store_record_old_name(self)
753
- _store_record_old_key(self)
754
- self._revises = None
755
- # save unversioned record
756
- else:
757
- check_name_change(self)
758
- check_key_change(self)
759
- super(BasicRecord, self).save(*args, **kwargs)
760
- # update _old_name and _old_key after saving
761
- _store_record_old_name(self)
762
- _store_record_old_key(self)
863
+ except IntegrityError as e:
864
+ error_msg = str(e)
865
+ # two possible error messages for hash duplication
866
+ # "duplicate key value violates unique constraint"
867
+ # "UNIQUE constraint failed"
868
+ if (
869
+ "UNIQUE constraint failed" in error_msg
870
+ or "duplicate key value violates unique constraint" in error_msg
871
+ ) and "hash" in error_msg:
872
+ pre_existing_record = self.__class__.get(hash=self.hash)
873
+ logger.warning(
874
+ f"returning {self.__class__.__name__.lower()} with same hash: {pre_existing_record}"
875
+ )
876
+ init_self_from_db(self, pre_existing_record)
877
+ else:
878
+ raise
879
+ _store_record_old_name(self)
880
+ _store_record_old_key(self)
763
881
  # perform transfer of many-to-many fields
764
882
  # only supported for Artifact and Collection records
765
883
  if db is not None and db != "default" and using_key is None:
@@ -778,7 +896,7 @@ def save(self, *args, **kwargs) -> Record:
778
896
  self_on_db = copy(self)
779
897
  self_on_db._state.db = db
780
898
  self_on_db.pk = pk_on_db # manually set the primary key
781
- self_on_db.features = FeatureManager(self_on_db)
899
+ self_on_db.features = FeatureManager(self_on_db) # type: ignore
782
900
  self.features._add_from(self_on_db, transfer_logs=transfer_logs)
783
901
  self.labels.add_from(self_on_db, transfer_logs=transfer_logs)
784
902
  for k, v in transfer_logs.items():
@@ -851,7 +969,7 @@ def check_name_change(record: Record):
851
969
  # when a feature is renamed
852
970
  elif isinstance(record, Feature):
853
971
  # only internal features are associated with schemas
854
- linked_artifacts = Artifact.filter(_schemas_m2m__features=record).list(
972
+ linked_artifacts = Artifact.filter(feature_sets__features=record).list(
855
973
  "uid"
856
974
  )
857
975
  n = len(linked_artifacts)
@@ -915,7 +1033,7 @@ def delete(self) -> None:
915
1033
  new_latest.is_latest = True
916
1034
  with transaction.atomic():
917
1035
  new_latest.save()
918
- super(BasicRecord, self).delete()
1036
+ super(BasicRecord, self).delete() # type: ignore
919
1037
  logger.warning(f"new latest version is {new_latest}")
920
1038
  return None
921
1039
  super(BasicRecord, self).delete()
lamindb/_run.py CHANGED
@@ -4,7 +4,7 @@ from lamindb.models import ParamManager, Run, Transform
4
4
 
5
5
 
6
6
  def __init__(run: Run, *args, **kwargs):
7
- run.params = ParamManager(run)
7
+ run.params = ParamManager(run) # type: ignore
8
8
  if len(args) == len(run._meta.concrete_fields):
9
9
  super(Run, run).__init__(*args, **kwargs)
10
10
  return None
@@ -24,7 +24,7 @@ def __init__(run: Run, *args, **kwargs):
24
24
  if transform._state.adding:
25
25
  raise ValueError("Please save transform record before creating a run")
26
26
 
27
- super(Run, run).__init__(
27
+ super(Run, run).__init__( # type: ignore
28
28
  transform=transform,
29
29
  reference=reference,
30
30
  initiated_by_run=initiated_by_run,
@@ -56,5 +56,5 @@ def delete(self) -> None:
56
56
  super(Run, self).delete()
57
57
 
58
58
 
59
- Run.__init__ = __init__
60
- Run.delete = delete
59
+ Run.__init__ = __init__ # type: ignore
60
+ Run.delete = delete # type: ignore
lamindb/_save.py CHANGED
@@ -57,7 +57,7 @@ def save(records: Iterable[Record], ignore_conflicts: bool | None = False) -> No
57
57
 
58
58
  For a single record, use ``record.save()``:
59
59
 
60
- >>> transform = ln.Transform(name="My pipeline")
60
+ >>> transform = ln.Transform(key="My pipeline")
61
61
  >>> transform.save()
62
62
 
63
63
  Update a single existing record:
@@ -146,6 +146,9 @@ def check_and_attempt_upload(
146
146
  )
147
147
  except Exception as exception:
148
148
  logger.warning(f"could not upload artifact: {artifact}")
149
+ # clear dangling storages if we were actually uploading or saving
150
+ if hasattr(artifact, "_to_store") and artifact._to_store:
151
+ artifact._clear_storagekey = auto_storage_key_from_artifact(artifact)
149
152
  return exception
150
153
  # copies (if on-disk) or moves the temporary file (if in-memory) to the cache
151
154
  if os.getenv("LAMINDB_MULTI_INSTANCE") is None:
@@ -212,19 +215,25 @@ def copy_or_move_to_cache(
212
215
 
213
216
  # This is also used within Artifact.save()
214
217
  def check_and_attempt_clearing(
215
- artifact: Artifact, using_key: str | None = None
218
+ artifact: Artifact,
219
+ raise_file_not_found_error: bool = True,
220
+ using_key: str | None = None,
216
221
  ) -> Exception | None:
217
222
  # this is a clean-up operation after replace() was called
218
- # this will only evaluate to True if replace() was called
223
+ # or if there was an exception during upload
219
224
  if hasattr(artifact, "_clear_storagekey"):
220
225
  try:
221
226
  if artifact._clear_storagekey is not None:
222
- delete_storage_using_key(
223
- artifact, artifact._clear_storagekey, using_key=using_key
224
- )
225
- logger.success(
226
- f"deleted stale object at storage key {artifact._clear_storagekey}"
227
+ delete_msg = delete_storage_using_key(
228
+ artifact,
229
+ artifact._clear_storagekey,
230
+ raise_file_not_found_error=raise_file_not_found_error,
231
+ using_key=using_key,
227
232
  )
233
+ if delete_msg != "did-not-delete":
234
+ logger.success(
235
+ f"deleted stale object at storage key {artifact._clear_storagekey}"
236
+ )
228
237
  artifact._clear_storagekey = None
229
238
  except Exception as exception:
230
239
  return exception
@@ -246,11 +255,17 @@ def store_artifacts(
246
255
 
247
256
  # upload new local artifacts
248
257
  for artifact in artifacts:
258
+ # failure here sets ._clear_storagekey
259
+ # for cleanup below
249
260
  exception = check_and_attempt_upload(artifact, using_key)
250
261
  if exception is not None:
251
262
  break
252
263
  stored_artifacts += [artifact]
253
- exception = check_and_attempt_clearing(artifact, using_key)
264
+ # if check_and_attempt_upload was successfull
265
+ # then this can have only ._clear_storagekey from .replace
266
+ exception = check_and_attempt_clearing(
267
+ artifact, raise_file_not_found_error=True, using_key=using_key
268
+ )
254
269
  if exception is not None:
255
270
  logger.warning(f"clean up of {artifact._clear_storagekey} failed")
256
271
  break
@@ -261,6 +276,14 @@ def store_artifacts(
261
276
  for artifact in artifacts:
262
277
  if artifact not in stored_artifacts:
263
278
  artifact._delete_skip_storage()
279
+ # clean up storage after failure in check_and_attempt_upload
280
+ exception_clear = check_and_attempt_clearing(
281
+ artifact, raise_file_not_found_error=False, using_key=using_key
282
+ )
283
+ if exception_clear is not None:
284
+ logger.warning(
285
+ f"clean up of {artifact._clear_storagekey} after the upload error failed"
286
+ )
264
287
  error_message = prepare_error_message(artifacts, stored_artifacts, exception)
265
288
  # this is bad because we're losing the original traceback
266
289
  # needs to be refactored - also, the orginal error should be raised
@@ -269,7 +292,7 @@ def store_artifacts(
269
292
 
270
293
 
271
294
  def prepare_error_message(records, stored_artifacts, exception) -> str:
272
- if len(records) == 1 or len(stored_artifacts) == 0:
295
+ if len(stored_artifacts) == 0:
273
296
  error_message = (
274
297
  "No entries were uploaded or committed"
275
298
  " to the database. See error message:\n\n"