lamindb 1.6.2__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. lamindb/__init__.py +1 -3
  2. lamindb/_finish.py +32 -16
  3. lamindb/base/types.py +6 -4
  4. lamindb/core/_context.py +127 -57
  5. lamindb/core/_mapped_collection.py +1 -1
  6. lamindb/core/_settings.py +44 -4
  7. lamindb/core/_track_environment.py +5 -2
  8. lamindb/core/loaders.py +1 -1
  9. lamindb/core/storage/_anndata_accessor.py +1 -1
  10. lamindb/core/storage/_tiledbsoma.py +14 -8
  11. lamindb/core/storage/_valid_suffixes.py +0 -1
  12. lamindb/core/storage/_zarr.py +1 -1
  13. lamindb/core/storage/objects.py +13 -8
  14. lamindb/core/storage/paths.py +9 -6
  15. lamindb/core/types.py +1 -1
  16. lamindb/curators/_legacy.py +2 -1
  17. lamindb/curators/core.py +106 -105
  18. lamindb/errors.py +9 -0
  19. lamindb/examples/fixtures/__init__.py +0 -0
  20. lamindb/examples/fixtures/sheets.py +224 -0
  21. lamindb/migrations/0103_remove_writelog_migration_state_and_more.py +1 -1
  22. lamindb/migrations/0105_record_unique_name.py +20 -0
  23. lamindb/migrations/0106_transfer_data_migration.py +25 -0
  24. lamindb/migrations/0107_add_schema_to_record.py +68 -0
  25. lamindb/migrations/0108_remove_record_sheet_remove_sheetproject_sheet_and_more.py +30 -0
  26. lamindb/migrations/0109_record_input_of_runs_alter_record_run_and_more.py +123 -0
  27. lamindb/migrations/0110_rename_values_artifacts_record_linked_artifacts.py +17 -0
  28. lamindb/migrations/0111_remove_record__sort_order.py +148 -0
  29. lamindb/migrations/0112_alter_recordartifact_feature_and_more.py +105 -0
  30. lamindb/migrations/0113_lower_case_branch_and_space_names.py +62 -0
  31. lamindb/migrations/0114_alter_run__status_code.py +24 -0
  32. lamindb/migrations/0115_alter_space_uid.py +52 -0
  33. lamindb/migrations/{0104_squashed.py → 0115_squashed.py} +261 -257
  34. lamindb/models/__init__.py +4 -3
  35. lamindb/models/_describe.py +88 -31
  36. lamindb/models/_feature_manager.py +627 -658
  37. lamindb/models/_label_manager.py +1 -3
  38. lamindb/models/artifact.py +214 -99
  39. lamindb/models/collection.py +7 -1
  40. lamindb/models/feature.py +288 -60
  41. lamindb/models/has_parents.py +3 -3
  42. lamindb/models/project.py +32 -15
  43. lamindb/models/query_manager.py +7 -1
  44. lamindb/models/query_set.py +118 -41
  45. lamindb/models/record.py +140 -94
  46. lamindb/models/run.py +42 -42
  47. lamindb/models/save.py +102 -16
  48. lamindb/models/schema.py +41 -8
  49. lamindb/models/sqlrecord.py +105 -40
  50. lamindb/models/storage.py +278 -0
  51. lamindb/models/transform.py +10 -2
  52. lamindb/models/ulabel.py +9 -1
  53. lamindb/py.typed +0 -0
  54. lamindb/setup/__init__.py +2 -1
  55. lamindb/setup/_switch.py +16 -0
  56. lamindb/setup/errors/__init__.py +4 -0
  57. lamindb/setup/types/__init__.py +4 -0
  58. {lamindb-1.6.2.dist-info → lamindb-1.7.0.dist-info}/METADATA +5 -5
  59. {lamindb-1.6.2.dist-info → lamindb-1.7.0.dist-info}/RECORD +61 -44
  60. lamindb/models/core.py +0 -135
  61. {lamindb-1.6.2.dist-info → lamindb-1.7.0.dist-info}/LICENSE +0 -0
  62. {lamindb-1.6.2.dist-info → lamindb-1.7.0.dist-info}/WHEEL +0 -0
lamindb/models/save.py CHANGED
@@ -29,7 +29,11 @@ if TYPE_CHECKING:
29
29
  from .artifact import Artifact
30
30
 
31
31
 
32
- def save(records: Iterable[SQLRecord], ignore_conflicts: bool | None = False) -> None:
32
+ def save(
33
+ records: Iterable[SQLRecord],
34
+ ignore_conflicts: bool | None = False,
35
+ batch_size: int = 10000,
36
+ ) -> None:
33
37
  """Bulk save records.
34
38
 
35
39
  Note:
@@ -44,9 +48,11 @@ def save(records: Iterable[SQLRecord], ignore_conflicts: bool | None = False) ->
44
48
  Args:
45
49
  records: Multiple :class:`~lamindb.models.SQLRecord` objects.
46
50
  ignore_conflicts: If ``True``, do not error if some records violate a
47
- unique or another constraint. However, it won't inplace update the id
48
- fields of records. If you need records with ids, you need to query
49
- them from the database.
51
+ unique or another constraint. However, it won't inplace update the id
52
+ fields of records. If you need records with ids, you need to query
53
+ them from the database.
54
+ batch_size: Number of records to process in each batch. Defaults to 10000.
55
+ Large batch sizes can improve performance but may lead to memory issues.
50
56
 
51
57
  Examples:
52
58
 
@@ -81,9 +87,11 @@ def save(records: Iterable[SQLRecord], ignore_conflicts: bool | None = False) ->
81
87
  non_artifacts_old, non_artifacts_new = partition(
82
88
  lambda r: r._state.adding or r.pk is None, non_artifacts
83
89
  )
84
- bulk_create(non_artifacts_new, ignore_conflicts=ignore_conflicts)
90
+ bulk_create(
91
+ non_artifacts_new, ignore_conflicts=ignore_conflicts, batch_size=batch_size
92
+ )
85
93
  if non_artifacts_old:
86
- bulk_update(non_artifacts_old)
94
+ bulk_update(non_artifacts_old, batch_size=batch_size)
87
95
  non_artifacts_with_parents = [
88
96
  r for r in non_artifacts_new if hasattr(r, "_parents")
89
97
  ]
@@ -97,6 +105,11 @@ def save(records: Iterable[SQLRecord], ignore_conflicts: bool | None = False) ->
97
105
  if artifacts:
98
106
  with transaction.atomic():
99
107
  for record in artifacts:
108
+ # will swtich to True after the successful upload / saving
109
+ if hasattr(record, "_local_filepath") and getattr(
110
+ record, "_to_store", False
111
+ ):
112
+ record._is_saved_to_storage_location = False
100
113
  record._save_skip_storage()
101
114
  using_key = settings._using_key
102
115
  store_artifacts(artifacts, using_key=using_key)
@@ -107,26 +120,85 @@ def save(records: Iterable[SQLRecord], ignore_conflicts: bool | None = False) ->
107
120
  return None
108
121
 
109
122
 
110
- def bulk_create(records: Iterable[SQLRecord], ignore_conflicts: bool | None = False):
123
+ def bulk_create(
124
+ records: Iterable[SQLRecord],
125
+ ignore_conflicts: bool | None = False,
126
+ batch_size: int = 10000,
127
+ ):
128
+ """Create records in batches for safety and performance.
129
+
130
+ Args:
131
+ records: Iterable of SQLRecord objects to create
132
+ ignore_conflicts: Whether to ignore conflicts during creation
133
+ batch_size: Number of records to process in each batch. Defaults to 10000.
134
+ """
111
135
  records_by_orm = defaultdict(list)
112
136
  for record in records:
113
137
  records_by_orm[record.__class__].append(record)
114
- for registry, records in records_by_orm.items():
115
- registry.objects.bulk_create(records, ignore_conflicts=ignore_conflicts)
116
- # records[:] = created # In-place list update; does not seem to be necessary
117
138
 
139
+ for registry, records_list in records_by_orm.items():
140
+ total_records = len(records_list)
141
+ model_name = registry.__name__
142
+ if total_records > batch_size:
143
+ logger.warning(
144
+ f"Starting bulk_create for {total_records} {model_name} records in batches of {batch_size}"
145
+ )
146
+
147
+ # Process records in batches
148
+ for i in range(0, len(records_list), batch_size):
149
+ batch = records_list[i : i + batch_size]
150
+ batch_num = (i // batch_size) + 1
151
+ total_batches = (total_records + batch_size - 1) // batch_size
152
+
153
+ if total_records > batch_size:
154
+ logger.info(
155
+ f"Processing batch {batch_num}/{total_batches} for {model_name}: {len(batch)} records"
156
+ )
157
+ registry.objects.bulk_create(batch, ignore_conflicts=ignore_conflicts)
158
+ # records[:] = created # In-place list update; does not seem to be necessary
159
+
160
+
161
+ def bulk_update(
162
+ records: Iterable[SQLRecord],
163
+ ignore_conflicts: bool | None = False,
164
+ batch_size: int = 10000,
165
+ ):
166
+ """Update records in batches for safety and performance.
118
167
 
119
- def bulk_update(records: Iterable[SQLRecord], ignore_conflicts: bool | None = False):
168
+ Args:
169
+ records: Iterable of SQLRecord objects to update
170
+ ignore_conflicts: Whether to ignore conflicts during update (currently unused but kept for consistency)
171
+ batch_size: Number of records to process in each batch. If None, processes all at once.
172
+ """
120
173
  records_by_orm = defaultdict(list)
121
174
  for record in records:
122
175
  records_by_orm[record.__class__].append(record)
123
- for registry, records in records_by_orm.items():
176
+
177
+ for registry, records_list in records_by_orm.items():
178
+ total_records = len(records_list)
179
+ model_name = registry.__name__
180
+ if total_records > batch_size:
181
+ logger.warning(
182
+ f"Starting bulk_update for {total_records} {model_name} records in batches of {batch_size}"
183
+ )
184
+
124
185
  field_names = [
125
186
  field.name
126
187
  for field in registry._meta.fields
127
188
  if (field.name != "created_at" and field.name != "id")
128
189
  ]
129
- registry.objects.bulk_update(records, field_names)
190
+
191
+ # Process records in batches
192
+ for i in range(0, len(records_list), batch_size):
193
+ batch = records_list[i : i + batch_size]
194
+ batch_num = (i // batch_size) + 1
195
+ total_batches = (total_records + batch_size - 1) // batch_size
196
+
197
+ if total_records > batch_size:
198
+ logger.info(
199
+ f"Processing batch {batch_num}/{total_batches} for {model_name}: {len(batch)} records"
200
+ )
201
+ registry.objects.bulk_update(batch, field_names)
130
202
 
131
203
 
132
204
  # This is also used within Artifact.save()
@@ -152,7 +224,7 @@ def check_and_attempt_upload(
152
224
  except Exception as exception:
153
225
  logger.warning(f"could not upload artifact: {artifact}")
154
226
  # clear dangling storages if we were actually uploading or saving
155
- if hasattr(artifact, "_to_store") and artifact._to_store:
227
+ if getattr(artifact, "_to_store", False):
156
228
  artifact._clear_storagekey = auto_storage_key_from_artifact(artifact)
157
229
  return exception
158
230
  # copies (if on-disk) or moves the temporary file (if in-memory) to the cache
@@ -257,6 +329,8 @@ def store_artifacts(
257
329
 
258
330
  If any upload fails, subsequent artifacts are cleaned up from the DB.
259
331
  """
332
+ from .artifact import Artifact
333
+
260
334
  exception: Exception | None = None
261
335
  # because uploads might fail, we need to maintain a new list
262
336
  # of the succeeded uploads
@@ -269,8 +343,20 @@ def store_artifacts(
269
343
  exception = check_and_attempt_upload(artifact, using_key)
270
344
  if exception is not None:
271
345
  break
346
+
272
347
  stored_artifacts += [artifact]
273
- # if check_and_attempt_upload was successfull
348
+ # update to show successful saving
349
+ # only update if _is_saved_to_storage_location was set to False before
350
+ # this should be a single transaction for the updates of all the artifacts
351
+ # but then it would just abort all artifacts, even successfully saved before
352
+ # TODO: there should also be some kind of exception handling here
353
+ # but this requires proper refactoring
354
+ if artifact._is_saved_to_storage_location is False:
355
+ artifact._is_saved_to_storage_location = True
356
+ super(
357
+ Artifact, artifact
358
+ ).save() # each .save is a separate transaction here
359
+ # if check_and_attempt_upload was successful
274
360
  # then this can have only ._clear_storagekey from .replace
275
361
  exception = check_and_attempt_clearing(
276
362
  artifact, raise_file_not_found_error=True, using_key=using_key
@@ -334,7 +420,7 @@ def upload_artifact(
334
420
  storage_path, storage_settings = attempt_accessing_path(
335
421
  artifact, storage_key, using_key=using_key, access_token=access_token
336
422
  )
337
- if hasattr(artifact, "_to_store") and artifact._to_store:
423
+ if getattr(artifact, "_to_store", False):
338
424
  logger.save(f"storing artifact '{artifact.uid}' at '{storage_path}'")
339
425
  store_file_or_folder(
340
426
  artifact._local_filepath,
lamindb/models/schema.py CHANGED
@@ -52,7 +52,7 @@ if TYPE_CHECKING:
52
52
 
53
53
  from .artifact import Artifact
54
54
  from .project import Project
55
- from .query_set import QuerySet
55
+ from .query_set import QuerySet, SQLRecordList
56
56
 
57
57
 
58
58
  NUMBER_TYPE = "num"
@@ -464,7 +464,10 @@ class Schema(SQLRecord, CanCurate, TracksRun):
464
464
  @overload
465
465
  def __init__(
466
466
  self,
467
- features: list[SQLRecord] | list[tuple[Feature, dict]] | None = None,
467
+ features: list[SQLRecord]
468
+ | SQLRecordList
469
+ | list[tuple[Feature, dict]]
470
+ | None = None,
468
471
  index: Feature | None = None,
469
472
  slots: dict[str, Schema] | None = None,
470
473
  name: str | None = None,
@@ -518,6 +521,10 @@ class Schema(SQLRecord, CanCurate, TracksRun):
518
521
  coerce_dtype: bool | None = kwargs.pop("coerce_dtype", False)
519
522
  using: bool | None = kwargs.pop("using", None)
520
523
  n_features: int | None = kwargs.pop("n", None)
524
+ kwargs.pop("branch", None)
525
+ kwargs.pop("branch_id", 1)
526
+ kwargs.pop("space", None)
527
+ kwargs.pop("space_id", 1)
521
528
  # backward compat
522
529
  if not slots:
523
530
  if "components" in kwargs:
@@ -580,11 +587,6 @@ class Schema(SQLRecord, CanCurate, TracksRun):
580
587
  else:
581
588
  validated_kwargs["uid"] = ids.base62_16()
582
589
  super().__init__(**validated_kwargs)
583
- # manipulating aux fields is easier after calling super().__init__()
584
- self.optionals.set(optional_features)
585
- self.flexible = flexible
586
- if index is not None:
587
- self._index_feature_uid = index.uid
588
590
 
589
591
  def _validate_kwargs_calculate_hash(
590
592
  self,
@@ -608,13 +610,16 @@ class Schema(SQLRecord, CanCurate, TracksRun):
608
610
  ) -> tuple[list[Feature], dict[str, Any], list[Feature], Registry, bool]:
609
611
  optional_features = []
610
612
  features_registry: Registry = None
613
+
611
614
  if itype is not None:
612
615
  if itype != "Composite":
613
616
  itype = serialize_dtype(itype, is_itype=True)
617
+
614
618
  if index is not None:
615
619
  if not isinstance(index, Feature):
616
620
  raise TypeError("index must be a Feature")
617
621
  features.insert(0, index)
622
+
618
623
  if features:
619
624
  features, configs = get_features_config(features)
620
625
  features_registry = validate_features(features)
@@ -642,12 +647,15 @@ class Schema(SQLRecord, CanCurate, TracksRun):
642
647
  else:
643
648
  dtype = get_type_str(dtype)
644
649
  flexible_default = n_features < 0
650
+
645
651
  if flexible is None:
646
652
  flexible = flexible_default
653
+
647
654
  if slots:
648
655
  itype = "Composite"
649
656
  if otype is None:
650
657
  raise InvalidArgument("Please pass otype != None for composite schemas")
658
+
651
659
  if itype is not None and not isinstance(itype, str):
652
660
  itype_str = serialize_dtype(itype, is_itype=True)
653
661
  else:
@@ -667,8 +675,28 @@ class Schema(SQLRecord, CanCurate, TracksRun):
667
675
  }
668
676
  n_features_default = -1
669
677
  coerce_dtype_default = False
678
+ aux_dict: dict[str, dict[str, bool | str | list[str]]] = {}
679
+
680
+ # TODO: leverage a common abstraction across the properties and this here
681
+
682
+ # coerce_dtype (key "0")
670
683
  if coerce_dtype:
671
- validated_kwargs["_aux"] = {"af": {"0": coerce_dtype}}
684
+ aux_dict.setdefault("af", {})["0"] = coerce_dtype
685
+
686
+ # optional features (key "1")
687
+ if optional_features:
688
+ aux_dict.setdefault("af", {})["1"] = [f.uid for f in optional_features]
689
+
690
+ # flexible (key "2")
691
+ if flexible is not None:
692
+ aux_dict.setdefault("af", {})["2"] = flexible
693
+
694
+ # index feature (key "3")
695
+ if index is not None:
696
+ aux_dict.setdefault("af", {})["3"] = index.uid
697
+
698
+ if aux_dict:
699
+ validated_kwargs["_aux"] = aux_dict
672
700
  if slots:
673
701
  list_for_hashing = [component.hash for component in slots.values()]
674
702
  else:
@@ -716,9 +744,11 @@ class Schema(SQLRecord, CanCurate, TracksRun):
716
744
  ":".join(sorted(feature_list_for_hashing))
717
745
  )
718
746
  list_for_hashing.append(f"{HASH_CODE['features_hash']}={features_hash}")
747
+
719
748
  self._list_for_hashing = sorted(list_for_hashing)
720
749
  schema_hash = hash_string(":".join(self._list_for_hashing))
721
750
  validated_kwargs["hash"] = schema_hash
751
+
722
752
  return (
723
753
  features,
724
754
  validated_kwargs,
@@ -1111,6 +1141,9 @@ class Schema(SQLRecord, CanCurate, TracksRun):
1111
1141
 
1112
1142
  def describe(self, return_str=False) -> None | str:
1113
1143
  """Describe schema."""
1144
+ if self.pk is None:
1145
+ raise ValueError("Schema must be saved before describing")
1146
+
1114
1147
  message = str(self)
1115
1148
  # display slots for composite schemas
1116
1149
  if self.itype == "Composite":
@@ -28,6 +28,7 @@ from django.db.models.fields.related import (
28
28
  ManyToManyRel,
29
29
  ManyToOneRel,
30
30
  )
31
+ from django.db.models.functions import Lower
31
32
  from lamin_utils import colors, logger
32
33
  from lamindb_setup import settings as setup_settings
33
34
  from lamindb_setup._connect_instance import (
@@ -282,7 +283,7 @@ def validate_fields(record: SQLRecord, kwargs):
282
283
  def suggest_records_with_similar_names(
283
284
  record: SQLRecord, name_field: str, kwargs
284
285
  ) -> SQLRecord | None:
285
- """Returns True if found exact match, otherwise False.
286
+ """Returns a record if found exact match, otherwise None.
286
287
 
287
288
  Logs similar matches if found.
288
289
  """
@@ -305,18 +306,15 @@ def suggest_records_with_similar_names(
305
306
  )
306
307
  if not queryset.exists(): # empty queryset
307
308
  return None
308
- s, it, nots = ("", "it", "s") if len(queryset) == 1 else ("s", "one of them", "")
309
- msg = f"record{s} with similar {name_field}{s} exist{nots}! did you mean to load {it}?"
310
- if IPYTHON:
311
- from IPython.display import display
312
-
313
- from lamindb import settings
309
+ s, it, nots, record_text = (
310
+ ("", "it", "s", "a record")
311
+ if len(queryset) == 1
312
+ else ("s", "one of them", "", "records")
313
+ )
314
+ similar_names = ", ".join(f"'{getattr(record, name_field)}'" for record in queryset)
315
+ msg = f"you are trying to create a record with name='{kwargs[name_field]}' but {record_text} with similar {name_field}{s} exist{nots}: {similar_names}. Did you mean to load {it}?"
316
+ logger.warning(f"{msg}")
314
317
 
315
- logger.warning(f"{msg}")
316
- if settings._verbosity_int >= 1:
317
- display(queryset.df())
318
- else:
319
- logger.warning(f"{msg}\n{queryset}")
320
318
  return None
321
319
 
322
320
 
@@ -405,9 +403,10 @@ class Registry(ModelBase):
405
403
  cls,
406
404
  field: StrField | None = None,
407
405
  return_field: StrField | None = None,
406
+ keep: Literal["first", "last", False] = "first",
408
407
  ) -> NamedTuple:
409
408
  """{}""" # noqa: D415
410
- return _lookup(cls=cls, field=field, return_field=return_field)
409
+ return _lookup(cls=cls, field=field, return_field=return_field, keep=keep)
411
410
 
412
411
  def filter(cls, *queries, **expressions) -> QuerySet:
413
412
  """Query records.
@@ -467,7 +466,7 @@ class Registry(ModelBase):
467
466
  def df(
468
467
  cls,
469
468
  include: str | list[str] | None = None,
470
- features: bool | list[str] = False,
469
+ features: bool | list[str] | str = False,
471
470
  limit: int = 100,
472
471
  ) -> pd.DataFrame:
473
472
  """Convert to `pd.DataFrame`.
@@ -480,9 +479,11 @@ class Registry(ModelBase):
480
479
  include: Related fields to include as columns. Takes strings of
481
480
  form `"ulabels__name"`, `"cell_types__name"`, etc. or a list
482
481
  of such strings.
483
- features: If `True`, map all features of the
484
- :class:`~lamindb.Feature` registry onto the resulting
485
- `DataFrame`. Only available for `Artifact`.
482
+ features: If a list of feature names, filters
483
+ :class:`~lamindb.Feature` down to these features.
484
+ If `True`, prints all features with dtypes in the core schema module.
485
+ If `"queryset"`, infers the features used within the set of artifacts or records.
486
+ Only available for `Artifact` and `Record`.
486
487
  limit: Maximum number of rows to display from a Pandas DataFrame.
487
488
  Defaults to 100 to reduce database load.
488
489
 
@@ -549,8 +550,10 @@ class Registry(ModelBase):
549
550
  # we're in the default instance
550
551
  if instance is None or instance == "default":
551
552
  return QuerySet(model=cls, using=None)
553
+
552
554
  owner, name = get_owner_name_from_identifier(instance)
553
- if [owner, name] == setup_settings.instance.slug.split("/"):
555
+ current_instance_owner_name: list[str] = setup_settings.instance.slug.split("/")
556
+ if [owner, name] == current_instance_owner_name:
554
557
  return QuerySet(model=cls, using=None)
555
558
 
556
559
  # move on to different instances
@@ -565,6 +568,9 @@ class Registry(ModelBase):
565
568
  f"Failed to load instance {instance}, please check your permissions!"
566
569
  )
567
570
  iresult, _ = result
571
+ # this can happen if querying via an old instance name
572
+ if [iresult.get("owner"), iresult["name"]] == current_instance_owner_name:
573
+ return QuerySet(model=cls, using=None)
568
574
  # do not use {} syntax below, it gives rise to a dict if the schema modules
569
575
  # are empty and then triggers a TypeError in missing_members = source_modules - target_modules
570
576
  source_modules = set( # noqa
@@ -655,23 +661,53 @@ class BaseSQLRecord(models.Model, metaclass=Registry):
655
661
  def __init__(self, *args, **kwargs):
656
662
  skip_validation = kwargs.pop("_skip_validation", False)
657
663
  if not args:
658
- if self.__class__.__name__ in {
659
- "Artifact",
660
- "Collection",
661
- "Transform",
662
- "Run",
663
- }:
664
+ if (
665
+ issubclass(self.__class__, SQLRecord)
666
+ and self.__class__.__name__ != "Storage"
667
+ # do not save bionty entities in restricted spaces by default
668
+ and self.__class__.__module__ != "bionty.models"
669
+ ):
664
670
  from lamindb import context as run_context
665
671
 
666
672
  if run_context.space is not None:
667
- kwargs["space"] = run_context.space
673
+ current_space = run_context.space
674
+ elif setup_settings.space is not None:
675
+ current_space = setup_settings.space
676
+
677
+ if current_space is not None:
678
+ if "space_id" in kwargs:
679
+ # space_id takes precedence over space
680
+ # https://claude.ai/share/f045e5dc-0143-4bc5-b8a4-38309229f75e
681
+ if kwargs["space_id"] == 1: # ignore default space
682
+ kwargs.pop("space_id")
683
+ kwargs["space"] = current_space
684
+ elif "space" in kwargs:
685
+ if kwargs["space"] is None:
686
+ kwargs["space"] = current_space
687
+ else:
688
+ kwargs["space"] = current_space
668
689
  if issubclass(
669
690
  self.__class__, SQLRecord
670
691
  ) and self.__class__.__name__ not in {"Storage", "Source"}:
671
692
  from lamindb import context as run_context
672
693
 
673
694
  if run_context.branch is not None:
674
- kwargs["branch"] = run_context.branch
695
+ current_branch = run_context.branch
696
+ elif setup_settings.branch is not None:
697
+ current_branch = setup_settings.branch
698
+
699
+ if current_branch is not None:
700
+ # branch_id takes precedence over branch
701
+ # https://claude.ai/share/f045e5dc-0143-4bc5-b8a4-38309229f75e
702
+ if "branch_id" in kwargs:
703
+ if kwargs["branch_id"] == 1: # ignore default branch
704
+ kwargs.pop("branch_id")
705
+ kwargs["branch"] = current_branch
706
+ elif "branch" in kwargs:
707
+ if kwargs["branch"] is None:
708
+ kwargs["branch"] = current_branch
709
+ else:
710
+ kwargs["branch"] = current_branch
675
711
  if skip_validation:
676
712
  super().__init__(**kwargs)
677
713
  else:
@@ -743,7 +779,7 @@ class BaseSQLRecord(models.Model, metaclass=Registry):
743
779
  super().__init__(*args)
744
780
  track_current_key_and_name_values(self)
745
781
 
746
- def save(self, *args, **kwargs) -> SQLRecord:
782
+ def save(self: T, *args, **kwargs) -> T:
747
783
  """Save.
748
784
 
749
785
  Always saves to the default database.
@@ -751,6 +787,7 @@ class BaseSQLRecord(models.Model, metaclass=Registry):
751
787
  using_key = None
752
788
  if "using" in kwargs:
753
789
  using_key = kwargs["using"]
790
+ transfer_config = kwargs.pop("transfer", None)
754
791
  db = self._state.db
755
792
  pk_on_db = self.pk
756
793
  artifacts: list = []
@@ -812,6 +849,20 @@ class BaseSQLRecord(models.Model, metaclass=Registry):
812
849
  f"returning {self.__class__.__name__.lower()} with same hash: {pre_existing_record}"
813
850
  )
814
851
  init_self_from_db(self, pre_existing_record)
852
+ elif (
853
+ self.__class__.__name__ == "Storage"
854
+ and isinstance(e, IntegrityError)
855
+ and "root" in error_msg
856
+ or "uid" in error_msg
857
+ and (
858
+ "UNIQUE constraint failed" in error_msg
859
+ or "duplicate key value violates unique constraint" in error_msg
860
+ )
861
+ ):
862
+ # even if uid was in the error message, we can retrieve based on
863
+ # the root because it's going to be the same root
864
+ pre_existing_record = self.__class__.get(root=self.root)
865
+ init_self_from_db(self, pre_existing_record)
815
866
  elif (
816
867
  isinstance(e, ProgrammingError)
817
868
  and hasattr(self, "space")
@@ -834,21 +885,18 @@ class BaseSQLRecord(models.Model, metaclass=Registry):
834
885
  for artifact in artifacts:
835
886
  artifact.save()
836
887
  self.artifacts.add(*artifacts)
837
- if hasattr(self, "labels"):
888
+ if hasattr(self, "labels") and transfer_config == "annotations":
838
889
  from copy import copy
839
890
 
840
- from lamindb.models._feature_manager import FeatureManager
841
-
842
891
  # here we go back to original record on the source database
843
892
  self_on_db = copy(self)
844
893
  self_on_db._state.db = db
845
894
  self_on_db.pk = pk_on_db # manually set the primary key
846
- self_on_db.features = FeatureManager(self_on_db) # type: ignore
847
895
  self.features._add_from(self_on_db, transfer_logs=transfer_logs)
848
896
  self.labels.add_from(self_on_db, transfer_logs=transfer_logs)
849
897
  for k, v in transfer_logs.items():
850
898
  if k != "run" and len(v) > 0:
851
- logger.important(f"{k} records: {', '.join(v)}")
899
+ logger.important(f"{k}: {', '.join(v)}")
852
900
 
853
901
  if self.__class__.__name__ in {
854
902
  "Artifact",
@@ -901,6 +949,11 @@ class Space(BaseSQLRecord):
901
949
  All data in this registry is synchronized from LaminHub so that spaces can be shared and reused across multiple LaminDB instances.
902
950
  """
903
951
 
952
+ class Meta:
953
+ constraints = [
954
+ models.UniqueConstraint(Lower("name"), name="unique_space_name_lower")
955
+ ]
956
+
904
957
  id: int = models.SmallAutoField(primary_key=True)
905
958
  """Internal id, valid only in one DB instance."""
906
959
  name: str = models.CharField(max_length=100, db_index=True)
@@ -909,8 +962,8 @@ class Space(BaseSQLRecord):
909
962
  editable=False,
910
963
  unique=True,
911
964
  max_length=12,
912
- default="A",
913
- db_default="A",
965
+ default="aaaaaaaaaaaaa",
966
+ db_default="aaaaaaaaaaaa",
914
967
  db_index=True,
915
968
  )
916
969
  """Universal id."""
@@ -963,6 +1016,11 @@ class Branch(BaseSQLRecord):
963
1016
  # that can be merged onto the main branch in an experience akin to a Pull Request. The mapping
964
1017
  # onto a semantic branch name is handled through LaminHub.
965
1018
 
1019
+ class Meta:
1020
+ constraints = [
1021
+ models.UniqueConstraint(Lower("name"), name="unique_branch_name_lower")
1022
+ ]
1023
+
966
1024
  id: int = models.AutoField(primary_key=True)
967
1025
  """An integer id that's synchronized for a family of coupled database instances.
968
1026
 
@@ -1273,7 +1331,7 @@ def get_transfer_run(record) -> Run:
1273
1331
  if not cache_using_filepath.exists():
1274
1332
  raise SystemExit("Need to call .using() before")
1275
1333
  instance_uid = cache_using_filepath.read_text().split("\n")[0]
1276
- key = f"transfers/{instance_uid}"
1334
+ key = f"__lamindb_transfer__/{instance_uid}"
1277
1335
  uid = instance_uid + "0000"
1278
1336
  transform = Transform.filter(uid=uid).one_or_none()
1279
1337
  if transform is None:
@@ -1291,9 +1349,7 @@ def get_transfer_run(record) -> Run:
1291
1349
  logger.warning(WARNING_RUN_TRANSFORM)
1292
1350
  initiated_by_run = None
1293
1351
  # it doesn't seem to make sense to create new runs for every transfer
1294
- run = Run.filter(
1295
- transform=transform, initiated_by_run=initiated_by_run
1296
- ).one_or_none()
1352
+ run = Run.filter(transform=transform, initiated_by_run=initiated_by_run).first()
1297
1353
  if run is None:
1298
1354
  run = Run(transform=transform, initiated_by_run=initiated_by_run).save() # type: ignore
1299
1355
  run.initiated_by_run = initiated_by_run # so that it's available in memory
@@ -1311,6 +1367,7 @@ def transfer_to_default_db(
1311
1367
  if record._state.db is None or record._state.db == "default":
1312
1368
  return None
1313
1369
  registry = record.__class__
1370
+ logger.debug(f"transferring {registry.__name__} record {record.uid} to default db")
1314
1371
  record_on_default = registry.objects.filter(uid=record.uid).one_or_none()
1315
1372
  record_str = f"{record.__class__.__name__}(uid='{record.uid}')"
1316
1373
  if transfer_logs["run"] is None:
@@ -1366,7 +1423,15 @@ def track_current_key_and_name_values(record: SQLRecord):
1366
1423
 
1367
1424
  def check_name_change(record: SQLRecord):
1368
1425
  """Warns if a record's name has changed."""
1369
- from lamindb.models import Artifact, Collection, Feature, Schema, Transform
1426
+ from lamindb.models import (
1427
+ Artifact,
1428
+ Collection,
1429
+ Feature,
1430
+ Record,
1431
+ Schema,
1432
+ Storage,
1433
+ Transform,
1434
+ )
1370
1435
 
1371
1436
  if (
1372
1437
  not record.pk
@@ -1389,7 +1454,7 @@ def check_name_change(record: SQLRecord):
1389
1454
 
1390
1455
  if old_name != new_name:
1391
1456
  # when a label is renamed, only raise a warning if it has a feature
1392
- if hasattr(record, "artifacts"):
1457
+ if hasattr(record, "artifacts") and not isinstance(record, (Record, Storage)):
1393
1458
  linked_records = (
1394
1459
  record.artifacts.through.filter(
1395
1460
  label_ref_is_name=True, **{f"{registry.lower()}_id": record.pk}