lamindb 1.0.5__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. lamindb/__init__.py +17 -6
  2. lamindb/_artifact.py +202 -87
  3. lamindb/_can_curate.py +27 -8
  4. lamindb/_collection.py +86 -52
  5. lamindb/_feature.py +177 -41
  6. lamindb/_finish.py +21 -7
  7. lamindb/_from_values.py +83 -98
  8. lamindb/_parents.py +4 -4
  9. lamindb/_query_set.py +78 -18
  10. lamindb/_record.py +170 -53
  11. lamindb/_run.py +4 -4
  12. lamindb/_save.py +42 -11
  13. lamindb/_schema.py +135 -38
  14. lamindb/_storage.py +1 -1
  15. lamindb/_tracked.py +129 -0
  16. lamindb/_transform.py +21 -8
  17. lamindb/_ulabel.py +5 -14
  18. lamindb/base/users.py +1 -4
  19. lamindb/base/validation.py +2 -6
  20. lamindb/core/__init__.py +13 -14
  21. lamindb/core/_context.py +14 -9
  22. lamindb/core/_data.py +29 -25
  23. lamindb/core/_describe.py +1 -1
  24. lamindb/core/_django.py +1 -1
  25. lamindb/core/_feature_manager.py +53 -43
  26. lamindb/core/_label_manager.py +4 -4
  27. lamindb/core/_mapped_collection.py +24 -9
  28. lamindb/core/_track_environment.py +2 -1
  29. lamindb/core/datasets/__init__.py +6 -1
  30. lamindb/core/datasets/_core.py +12 -11
  31. lamindb/core/datasets/_small.py +67 -21
  32. lamindb/core/exceptions.py +1 -90
  33. lamindb/core/loaders.py +21 -15
  34. lamindb/core/relations.py +6 -4
  35. lamindb/core/storage/_anndata_accessor.py +49 -3
  36. lamindb/core/storage/_backed_access.py +12 -7
  37. lamindb/core/storage/_pyarrow_dataset.py +40 -15
  38. lamindb/core/storage/_tiledbsoma.py +56 -12
  39. lamindb/core/storage/paths.py +30 -24
  40. lamindb/core/subsettings/_creation_settings.py +4 -16
  41. lamindb/curators/__init__.py +2193 -846
  42. lamindb/curators/_cellxgene_schemas/__init__.py +26 -0
  43. lamindb/curators/_cellxgene_schemas/schema_versions.yml +104 -0
  44. lamindb/errors.py +96 -0
  45. lamindb/integrations/_vitessce.py +3 -3
  46. lamindb/migrations/0069_squashed.py +76 -75
  47. lamindb/migrations/0075_lamindbv1_part5.py +4 -5
  48. lamindb/migrations/0082_alter_feature_dtype.py +21 -0
  49. lamindb/migrations/0083_alter_feature_is_type_alter_flextable_is_type_and_more.py +94 -0
  50. lamindb/migrations/0084_alter_schemafeature_feature_and_more.py +35 -0
  51. lamindb/migrations/0085_alter_feature_is_type_alter_flextable_is_type_and_more.py +63 -0
  52. lamindb/migrations/0086_various.py +95 -0
  53. lamindb/migrations/0087_rename__schemas_m2m_artifact_feature_sets_and_more.py +41 -0
  54. lamindb/migrations/0088_schema_components.py +273 -0
  55. lamindb/migrations/0088_squashed.py +4372 -0
  56. lamindb/models.py +475 -168
  57. {lamindb-1.0.5.dist-info → lamindb-1.1.1.dist-info}/METADATA +9 -7
  58. lamindb-1.1.1.dist-info/RECORD +95 -0
  59. lamindb/curators/_spatial.py +0 -528
  60. lamindb/migrations/0052_squashed.py +0 -1261
  61. lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +0 -57
  62. lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +0 -35
  63. lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +0 -61
  64. lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +0 -22
  65. lamindb/migrations/0057_link_models_latest_report_and_others.py +0 -356
  66. lamindb/migrations/0058_artifact__actions_collection__actions.py +0 -22
  67. lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -31
  68. lamindb/migrations/0060_alter_artifact__actions.py +0 -22
  69. lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +0 -45
  70. lamindb/migrations/0062_add_is_latest_field.py +0 -32
  71. lamindb/migrations/0063_populate_latest_field.py +0 -45
  72. lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +0 -33
  73. lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +0 -22
  74. lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +0 -352
  75. lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +0 -20
  76. lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +0 -20
  77. lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -1294
  78. lamindb-1.0.5.dist-info/RECORD +0 -102
  79. {lamindb-1.0.5.dist-info → lamindb-1.1.1.dist-info}/LICENSE +0 -0
  80. {lamindb-1.0.5.dist-info → lamindb-1.1.1.dist-info}/WHEEL +0 -0
lamindb/_record.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import builtins
4
+ import inspect
4
5
  import re
5
6
  from functools import reduce
6
7
  from pathlib import PurePosixPath
@@ -29,6 +30,7 @@ from django.db.models.lookups import (
29
30
  Regex,
30
31
  StartsWith,
31
32
  )
33
+ from django.db.utils import IntegrityError
32
34
  from lamin_utils import colors, logger
33
35
  from lamin_utils._lookup import Lookup
34
36
  from lamindb_setup._connect_instance import (
@@ -41,7 +43,7 @@ from lamindb_setup.core._hub_core import connect_instance_hub
41
43
  from lamindb_setup.core._settings_store import instance_settings_file
42
44
  from lamindb_setup.core.upath import extract_suffix_from_path
43
45
 
44
- from lamindb.base.validation import FieldValidationError
46
+ from lamindb.errors import FieldValidationError
45
47
  from lamindb.models import (
46
48
  Artifact,
47
49
  BasicRecord,
@@ -60,7 +62,7 @@ from lamindb.models import (
60
62
 
61
63
  from ._utils import attach_func_to_class_method
62
64
  from .core._settings import settings
63
- from .core.exceptions import (
65
+ from .errors import (
64
66
  InvalidArgument,
65
67
  RecordNameChangeIntegrityError,
66
68
  ValidationError,
@@ -75,6 +77,30 @@ if TYPE_CHECKING:
75
77
  IPYTHON = getattr(builtins, "__IPYTHON__", False)
76
78
 
77
79
 
80
+ def is_approx_pascal_case(s):
81
+ """Check if the last component of a dotted string is in PascalCase.
82
+
83
+ Args:
84
+ s (str): The string to check
85
+
86
+ Returns:
87
+ bool: True if the last component is in PascalCase
88
+
89
+ Raises:
90
+ ValueError: If the last component doesn't start with a capital letter
91
+ """
92
+ if "[" in s: # this is because we allow types of form 'script[test_script.py]'
93
+ return True
94
+ last_component = s.split(".")[-1]
95
+
96
+ if not last_component[0].isupper():
97
+ raise ValueError(
98
+ f"'{last_component}' should start with a capital letter given you're defining a type"
99
+ )
100
+
101
+ return True
102
+
103
+
78
104
  def init_self_from_db(self: Record, existing_record: Record):
79
105
  new_args = [
80
106
  getattr(existing_record, field.attname) for field in self._meta.concrete_fields
@@ -86,7 +112,12 @@ def init_self_from_db(self: Record, existing_record: Record):
86
112
 
87
113
  def update_attributes(record: Record, attributes: dict[str, str]):
88
114
  for key, value in attributes.items():
89
- if getattr(record, key) != value:
115
+ if (
116
+ getattr(record, key) != value
117
+ and value is not None
118
+ and key != "dtype"
119
+ and key != "_aux"
120
+ ):
90
121
  logger.warning(f"updated {key} from {getattr(record, key)} to {value}")
91
122
  setattr(record, key, value)
92
123
 
@@ -105,7 +136,7 @@ def validate_fields(record: Record, kwargs):
105
136
  k for k, v in kwargs.items() if v is None and k in required_fields
106
137
  ]
107
138
  if missing_fields:
108
- raise TypeError(f"{missing_fields} are required.")
139
+ raise FieldValidationError(f"{missing_fields} are required.")
109
140
  # ensure the exact length of the internal uid for core entities
110
141
  if "uid" in kwargs and record.__class__ in {
111
142
  Artifact,
@@ -122,19 +153,36 @@ def validate_fields(record: Record, kwargs):
122
153
  ).max_length # triggers FieldDoesNotExist
123
154
  if len(kwargs["uid"]) != uid_max_length: # triggers KeyError
124
155
  raise ValidationError(
125
- f'`uid` must be exactly {uid_max_length} characters long, got {len(kwargs["uid"])}.'
156
+ f"`uid` must be exactly {uid_max_length} characters long, got {len(kwargs['uid'])}."
157
+ )
158
+ # validate is_type
159
+ if "is_type" in kwargs and "name" in kwargs and kwargs["is_type"]:
160
+ if kwargs["name"].endswith("s"):
161
+ logger.warning(
162
+ f"name '{kwargs['name']}' for type ends with 's', in case you're naming with plural, consider the singular for a type name"
126
163
  )
164
+ is_approx_pascal_case(kwargs["name"])
127
165
  # validate literals
128
166
  validate_literal_fields(record, kwargs)
129
167
 
130
168
 
131
- def suggest_records_with_similar_names(record: Record, name_field: str, kwargs) -> bool:
169
+ def suggest_records_with_similar_names(
170
+ record: Record, name_field: str, kwargs
171
+ ) -> Record | None:
132
172
  """Returns True if found exact match, otherwise False.
133
173
 
134
174
  Logs similar matches if found.
135
175
  """
136
176
  if kwargs.get(name_field) is None or not isinstance(kwargs.get(name_field), str):
137
- return False
177
+ return None
178
+ # need to perform an additional request to find the exact match
179
+ # previously, this was inferred from the truncated/fuzzy search below
180
+ # but this isn't reliable: https://laminlabs.slack.com/archives/C04FPE8V01W/p1737812808563409
181
+ # the below needs to be .first() because there might be multiple records with the same
182
+ # name field in case the record is versioned (e.g. for Transform key)
183
+ exact_match = record.__class__.filter(**{name_field: kwargs[name_field]}).first()
184
+ if exact_match is not None:
185
+ return exact_match
138
186
  queryset = _search(
139
187
  record.__class__,
140
188
  kwargs[name_field],
@@ -143,10 +191,7 @@ def suggest_records_with_similar_names(record: Record, name_field: str, kwargs)
143
191
  limit=3,
144
192
  )
145
193
  if not queryset.exists(): # empty queryset
146
- return False
147
- for alternative_record in queryset:
148
- if getattr(alternative_record, name_field) == kwargs[name_field]:
149
- return True
194
+ return None
150
195
  s, it, nots = ("", "it", "s") if len(queryset) == 1 else ("s", "one of them", "")
151
196
  msg = f"record{s} with similar {name_field}{s} exist{nots}! did you mean to load {it}?"
152
197
  if IPYTHON:
@@ -157,11 +202,14 @@ def suggest_records_with_similar_names(record: Record, name_field: str, kwargs)
157
202
  display(queryset.df())
158
203
  else:
159
204
  logger.warning(f"{msg}\n{queryset}")
160
- return False
205
+ return None
161
206
 
162
207
 
163
208
  def __init__(record: Record, *args, **kwargs):
164
- if not args:
209
+ skip_validation = kwargs.pop("_skip_validation", False)
210
+ if not args and skip_validation:
211
+ super(BasicRecord, record).__init__(**kwargs)
212
+ elif not args and not skip_validation:
165
213
  validate_fields(record, kwargs)
166
214
 
167
215
  # do not search for names if an id is passed; this is important
@@ -170,15 +218,13 @@ def __init__(record: Record, *args, **kwargs):
170
218
  if "_has_consciously_provided_uid" in kwargs:
171
219
  has_consciously_provided_uid = kwargs.pop("_has_consciously_provided_uid")
172
220
  if (
173
- isinstance(
174
- record, (CanCurate, Collection, Transform)
175
- ) # Collection is only temporary because it'll get a key field
221
+ isinstance(record, (CanCurate, Collection, Transform))
176
222
  and settings.creation.search_names
177
223
  and not has_consciously_provided_uid
178
224
  ):
179
225
  name_field = getattr(record, "_name_field", "name")
180
- match = suggest_records_with_similar_names(record, name_field, kwargs)
181
- if match:
226
+ exact_match = suggest_records_with_similar_names(record, name_field, kwargs)
227
+ if exact_match is not None:
182
228
  if "version" in kwargs:
183
229
  if kwargs["version"] is not None:
184
230
  version_comment = " and version"
@@ -189,22 +235,25 @@ def __init__(record: Record, *args, **kwargs):
189
235
  }
190
236
  ).one_or_none()
191
237
  else:
192
- # for a versioned record, an exact name match is not a
193
- # criterion for retrieving a record in case `version`
194
- # isn't passed - we'd always pull out many records with exactly the
195
- # same name
238
+ # for a versioned record, an exact name match is not a criterion
239
+ # for retrieving a record in case `version` isn't passed -
240
+ # we'd always pull out many records with exactly the same name
196
241
  existing_record = None
197
242
  else:
198
243
  version_comment = ""
199
- existing_record = record.__class__.filter(
200
- **{name_field: kwargs[name_field]}
201
- ).one_or_none()
244
+ existing_record = exact_match
202
245
  if existing_record is not None:
203
246
  logger.important(
204
247
  f"returning existing {record.__class__.__name__} record with same"
205
248
  f" {name_field}{version_comment}: '{kwargs[name_field]}'"
206
249
  )
250
+ if isinstance(record, Schema):
251
+ if existing_record.hash != kwargs["hash"]:
252
+ raise ValueError(
253
+ f"Schema name is already in use by schema with uid '{existing_record.uid}', please choose a different name."
254
+ )
207
255
  init_self_from_db(record, existing_record)
256
+ update_attributes(record, kwargs)
208
257
  return None
209
258
  super(BasicRecord, record).__init__(**kwargs)
210
259
  if isinstance(record, ValidateFields):
@@ -218,7 +267,9 @@ def __init__(record: Record, *args, **kwargs):
218
267
  message = _format_django_validation_error(record, e)
219
268
  raise FieldValidationError(message) from e
220
269
  elif len(args) != len(record._meta.concrete_fields):
221
- raise ValueError("please provide keyword arguments, not plain arguments")
270
+ raise FieldValidationError(
271
+ f"Use keyword arguments instead of positional arguments, e.g.: {record.__class__.__name__}(name='...')."
272
+ )
222
273
  else:
223
274
  # object is loaded from DB (**kwargs could be omitted below, I believe)
224
275
  super(BasicRecord, record).__init__(*args, **kwargs)
@@ -257,6 +308,60 @@ def _format_django_validation_error(record: Record, e: DjangoValidationError):
257
308
  return message
258
309
 
259
310
 
311
+ def _get_record_kwargs(record_class) -> list[tuple[str, str]]:
312
+ """Gets the parameters of a Record from the overloaded signature.
313
+
314
+ Example:
315
+ >>> get_record_params(bt.Organism)
316
+ >>> [('name', 'str'), ('taxon_id', 'str | None'), ('scientific_name', 'str | None')]
317
+ """
318
+ source = inspect.getsource(record_class)
319
+
320
+ # Find first overload that's not *db_args
321
+ pattern = r"@overload\s+def __init__\s*\(([\s\S]*?)\):\s*\.{3}"
322
+ overloads = re.finditer(pattern, source)
323
+
324
+ for overload in overloads:
325
+ params_block = overload.group(1)
326
+ # This is an additional safety measure if the overloaded signature that we're
327
+ # looking for is not at the top but a "db_args" constructor
328
+ if "*db_args" in params_block:
329
+ continue
330
+
331
+ params = []
332
+ for line in params_block.split("\n"):
333
+ line = line.strip()
334
+ if not line or "self" in line:
335
+ continue
336
+
337
+ # Extract name and type annotation
338
+ # The regex pattern finds parameter definitions like:
339
+ # Simple: name: str
340
+ # With default: age: int = 0
341
+ # With complex types: items: List[str] = []
342
+ param_pattern = (
343
+ r"(\w+)" # Parameter name
344
+ r"\s*:\s*" # Colon with optional whitespace
345
+ r"((?:[^=,]|" # Type hint: either non-equals/comma chars
346
+ r"(?<=\[)[^[\]]*" # or contents within square brackets
347
+ r"(?=\]))+)" # looking ahead for closing bracket
348
+ r"(?:\s*=\s*" # Optional default value part
349
+ r"([^,]+))?" # Default value: anything but comma
350
+ )
351
+ match = re.match(param_pattern, line)
352
+ if not match:
353
+ continue
354
+
355
+ name, type_str = match.group(1), match.group(2).strip()
356
+
357
+ # Keep type as string instead of evaluating
358
+ params.append((name, type_str))
359
+
360
+ return params
361
+
362
+ return []
363
+
364
+
260
365
  @classmethod # type:ignore
261
366
  @doc_args(Record.filter.__doc__)
262
367
  def filter(cls, *queries, **expressions) -> QuerySet:
@@ -639,8 +744,8 @@ def get_transfer_run(record) -> Run:
639
744
  if transform is None:
640
745
  search_names = settings.creation.search_names
641
746
  settings.creation.search_names = False
642
- transform = Transform(
643
- uid=uid, name=f"Transfer from `{slug}`", key=key, type="function"
747
+ transform = Transform( # type: ignore
748
+ uid=uid, description=f"Transfer from `{slug}`", key=key, type="function"
644
749
  ).save()
645
750
  settings.creation.search_names = search_names
646
751
  # use the global run context to get the initiated_by_run run id
@@ -655,7 +760,7 @@ def get_transfer_run(record) -> Run:
655
760
  transform=transform, initiated_by_run=initiated_by_run
656
761
  ).one_or_none()
657
762
  if run is None:
658
- run = Run(transform=transform, initiated_by_run=initiated_by_run).save()
763
+ run = Run(transform=transform, initiated_by_run=initiated_by_run).save() # type: ignore
659
764
  run.initiated_by_run = initiated_by_run # so that it's available in memory
660
765
  return run
661
766
 
@@ -738,28 +843,40 @@ def save(self, *args, **kwargs) -> Record:
738
843
  if pre_existing_record is not None:
739
844
  init_self_from_db(self, pre_existing_record)
740
845
  else:
741
- # save versioned record
742
- if isinstance(self, IsVersioned) and self._revises is not None:
743
- assert self._revises.is_latest # noqa: S101
744
- revises = self._revises
745
- revises.is_latest = False
746
- with transaction.atomic():
747
- revises._revises = None # ensure we don't start a recursion
748
- revises.save()
749
- check_name_change(self)
750
- check_key_change(self)
846
+ check_key_change(self)
847
+ check_name_change(self)
848
+ try:
849
+ # save versioned record in presence of self._revises
850
+ if isinstance(self, IsVersioned) and self._revises is not None:
851
+ assert self._revises.is_latest # noqa: S101
852
+ revises = self._revises
853
+ revises.is_latest = False
854
+ with transaction.atomic():
855
+ revises._revises = None # ensure we don't start a recursion
856
+ revises.save()
857
+ super(BasicRecord, self).save(*args, **kwargs) # type: ignore
858
+ self._revises = None
859
+ # save unversioned record
860
+ else:
751
861
  super(BasicRecord, self).save(*args, **kwargs)
752
- _store_record_old_name(self)
753
- _store_record_old_key(self)
754
- self._revises = None
755
- # save unversioned record
756
- else:
757
- check_name_change(self)
758
- check_key_change(self)
759
- super(BasicRecord, self).save(*args, **kwargs)
760
- # update _old_name and _old_key after saving
761
- _store_record_old_name(self)
762
- _store_record_old_key(self)
862
+ except IntegrityError as e:
863
+ error_msg = str(e)
864
+ # two possible error messages for hash duplication
865
+ # "duplicate key value violates unique constraint"
866
+ # "UNIQUE constraint failed"
867
+ if (
868
+ "UNIQUE constraint failed" in error_msg
869
+ or "duplicate key value violates unique constraint" in error_msg
870
+ ) and "hash" in error_msg:
871
+ pre_existing_record = self.__class__.get(hash=self.hash)
872
+ logger.warning(
873
+ f"returning {self.__class__.__name__.lower()} with same hash: {pre_existing_record}"
874
+ )
875
+ init_self_from_db(self, pre_existing_record)
876
+ else:
877
+ raise
878
+ _store_record_old_name(self)
879
+ _store_record_old_key(self)
763
880
  # perform transfer of many-to-many fields
764
881
  # only supported for Artifact and Collection records
765
882
  if db is not None and db != "default" and using_key is None:
@@ -778,7 +895,7 @@ def save(self, *args, **kwargs) -> Record:
778
895
  self_on_db = copy(self)
779
896
  self_on_db._state.db = db
780
897
  self_on_db.pk = pk_on_db # manually set the primary key
781
- self_on_db.features = FeatureManager(self_on_db)
898
+ self_on_db.features = FeatureManager(self_on_db) # type: ignore
782
899
  self.features._add_from(self_on_db, transfer_logs=transfer_logs)
783
900
  self.labels.add_from(self_on_db, transfer_logs=transfer_logs)
784
901
  for k, v in transfer_logs.items():
@@ -851,7 +968,7 @@ def check_name_change(record: Record):
851
968
  # when a feature is renamed
852
969
  elif isinstance(record, Feature):
853
970
  # only internal features are associated with schemas
854
- linked_artifacts = Artifact.filter(_schemas_m2m__features=record).list(
971
+ linked_artifacts = Artifact.filter(feature_sets__features=record).list(
855
972
  "uid"
856
973
  )
857
974
  n = len(linked_artifacts)
@@ -915,7 +1032,7 @@ def delete(self) -> None:
915
1032
  new_latest.is_latest = True
916
1033
  with transaction.atomic():
917
1034
  new_latest.save()
918
- super(BasicRecord, self).delete()
1035
+ super(BasicRecord, self).delete() # type: ignore
919
1036
  logger.warning(f"new latest version is {new_latest}")
920
1037
  return None
921
1038
  super(BasicRecord, self).delete()
lamindb/_run.py CHANGED
@@ -4,7 +4,7 @@ from lamindb.models import ParamManager, Run, Transform
4
4
 
5
5
 
6
6
  def __init__(run: Run, *args, **kwargs):
7
- run.params = ParamManager(run)
7
+ run.params = ParamManager(run) # type: ignore
8
8
  if len(args) == len(run._meta.concrete_fields):
9
9
  super(Run, run).__init__(*args, **kwargs)
10
10
  return None
@@ -24,7 +24,7 @@ def __init__(run: Run, *args, **kwargs):
24
24
  if transform._state.adding:
25
25
  raise ValueError("Please save transform record before creating a run")
26
26
 
27
- super(Run, run).__init__(
27
+ super(Run, run).__init__( # type: ignore
28
28
  transform=transform,
29
29
  reference=reference,
30
30
  initiated_by_run=initiated_by_run,
@@ -56,5 +56,5 @@ def delete(self) -> None:
56
56
  super(Run, self).delete()
57
57
 
58
58
 
59
- Run.__init__ = __init__
60
- Run.delete = delete
59
+ Run.__init__ = __init__ # type: ignore
60
+ Run.delete = delete # type: ignore
lamindb/_save.py CHANGED
@@ -57,7 +57,7 @@ def save(records: Iterable[Record], ignore_conflicts: bool | None = False) -> No
57
57
 
58
58
  For a single record, use ``record.save()``:
59
59
 
60
- >>> transform = ln.Transform(name="My pipeline")
60
+ >>> transform = ln.Transform(key="My pipeline")
61
61
  >>> transform.save()
62
62
 
63
63
  Update a single existing record:
@@ -133,7 +133,9 @@ def check_and_attempt_upload(
133
133
  using_key: str | None = None,
134
134
  access_token: str | None = None,
135
135
  print_progress: bool = True,
136
+ **kwargs,
136
137
  ) -> Exception | None:
138
+ # kwargs are propagated to .upload_from in the end
137
139
  # if Artifact object is either newly instantiated or replace() was called on
138
140
  # a local env it will have a _local_filepath and needs to be uploaded
139
141
  if hasattr(artifact, "_local_filepath"):
@@ -143,9 +145,13 @@ def check_and_attempt_upload(
143
145
  using_key,
144
146
  access_token=access_token,
145
147
  print_progress=print_progress,
148
+ **kwargs,
146
149
  )
147
150
  except Exception as exception:
148
151
  logger.warning(f"could not upload artifact: {artifact}")
152
+ # clear dangling storages if we were actually uploading or saving
153
+ if hasattr(artifact, "_to_store") and artifact._to_store:
154
+ artifact._clear_storagekey = auto_storage_key_from_artifact(artifact)
149
155
  return exception
150
156
  # copies (if on-disk) or moves the temporary file (if in-memory) to the cache
151
157
  if os.getenv("LAMINDB_MULTI_INSTANCE") is None:
@@ -212,19 +218,25 @@ def copy_or_move_to_cache(
212
218
 
213
219
  # This is also used within Artifact.save()
214
220
  def check_and_attempt_clearing(
215
- artifact: Artifact, using_key: str | None = None
221
+ artifact: Artifact,
222
+ raise_file_not_found_error: bool = True,
223
+ using_key: str | None = None,
216
224
  ) -> Exception | None:
217
225
  # this is a clean-up operation after replace() was called
218
- # this will only evaluate to True if replace() was called
226
+ # or if there was an exception during upload
219
227
  if hasattr(artifact, "_clear_storagekey"):
220
228
  try:
221
229
  if artifact._clear_storagekey is not None:
222
- delete_storage_using_key(
223
- artifact, artifact._clear_storagekey, using_key=using_key
224
- )
225
- logger.success(
226
- f"deleted stale object at storage key {artifact._clear_storagekey}"
230
+ delete_msg = delete_storage_using_key(
231
+ artifact,
232
+ artifact._clear_storagekey,
233
+ raise_file_not_found_error=raise_file_not_found_error,
234
+ using_key=using_key,
227
235
  )
236
+ if delete_msg != "did-not-delete":
237
+ logger.success(
238
+ f"deleted stale object at storage key {artifact._clear_storagekey}"
239
+ )
228
240
  artifact._clear_storagekey = None
229
241
  except Exception as exception:
230
242
  return exception
@@ -246,11 +258,17 @@ def store_artifacts(
246
258
 
247
259
  # upload new local artifacts
248
260
  for artifact in artifacts:
261
+ # failure here sets ._clear_storagekey
262
+ # for cleanup below
249
263
  exception = check_and_attempt_upload(artifact, using_key)
250
264
  if exception is not None:
251
265
  break
252
266
  stored_artifacts += [artifact]
253
- exception = check_and_attempt_clearing(artifact, using_key)
267
+ # if check_and_attempt_upload was successfull
268
+ # then this can have only ._clear_storagekey from .replace
269
+ exception = check_and_attempt_clearing(
270
+ artifact, raise_file_not_found_error=True, using_key=using_key
271
+ )
254
272
  if exception is not None:
255
273
  logger.warning(f"clean up of {artifact._clear_storagekey} failed")
256
274
  break
@@ -261,6 +279,14 @@ def store_artifacts(
261
279
  for artifact in artifacts:
262
280
  if artifact not in stored_artifacts:
263
281
  artifact._delete_skip_storage()
282
+ # clean up storage after failure in check_and_attempt_upload
283
+ exception_clear = check_and_attempt_clearing(
284
+ artifact, raise_file_not_found_error=False, using_key=using_key
285
+ )
286
+ if exception_clear is not None:
287
+ logger.warning(
288
+ f"clean up of {artifact._clear_storagekey} after the upload error failed"
289
+ )
264
290
  error_message = prepare_error_message(artifacts, stored_artifacts, exception)
265
291
  # this is bad because we're losing the original traceback
266
292
  # needs to be refactored - also, the orginal error should be raised
@@ -269,7 +295,7 @@ def store_artifacts(
269
295
 
270
296
 
271
297
  def prepare_error_message(records, stored_artifacts, exception) -> str:
272
- if len(records) == 1 or len(stored_artifacts) == 0:
298
+ if len(stored_artifacts) == 0:
273
299
  error_message = (
274
300
  "No entries were uploaded or committed"
275
301
  " to the database. See error message:\n\n"
@@ -293,8 +319,10 @@ def upload_artifact(
293
319
  using_key: str | None = None,
294
320
  access_token: str | None = None,
295
321
  print_progress: bool = True,
322
+ **kwargs,
296
323
  ) -> tuple[UPath, UPath | None]:
297
324
  """Store and add file and its linked entries."""
325
+ # kwargs are propagated to .upload_from in the end
298
326
  # can't currently use filepath_from_artifact here because it resolves to ._local_filepath
299
327
  storage_key = auto_storage_key_from_artifact(artifact)
300
328
  storage_path, storage_settings = attempt_accessing_path(
@@ -303,7 +331,10 @@ def upload_artifact(
303
331
  if hasattr(artifact, "_to_store") and artifact._to_store:
304
332
  logger.save(f"storing artifact '{artifact.uid}' at '{storage_path}'")
305
333
  store_file_or_folder(
306
- artifact._local_filepath, storage_path, print_progress=print_progress
334
+ artifact._local_filepath,
335
+ storage_path,
336
+ print_progress=print_progress,
337
+ **kwargs,
307
338
  )
308
339
 
309
340
  if isinstance(storage_path, LocalPathClasses):