lamindb 1.6.1__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -3
- lamindb/_finish.py +32 -16
- lamindb/base/types.py +6 -4
- lamindb/core/_context.py +127 -57
- lamindb/core/_mapped_collection.py +1 -1
- lamindb/core/_settings.py +44 -4
- lamindb/core/_track_environment.py +5 -2
- lamindb/core/loaders.py +1 -1
- lamindb/core/storage/_anndata_accessor.py +1 -1
- lamindb/core/storage/_tiledbsoma.py +14 -8
- lamindb/core/storage/_valid_suffixes.py +0 -1
- lamindb/core/storage/_zarr.py +1 -1
- lamindb/core/storage/objects.py +13 -8
- lamindb/core/storage/paths.py +9 -6
- lamindb/core/types.py +1 -1
- lamindb/curators/_legacy.py +2 -1
- lamindb/curators/core.py +106 -105
- lamindb/errors.py +9 -0
- lamindb/examples/fixtures/__init__.py +0 -0
- lamindb/examples/fixtures/sheets.py +224 -0
- lamindb/migrations/0103_remove_writelog_migration_state_and_more.py +1 -1
- lamindb/migrations/0105_record_unique_name.py +20 -0
- lamindb/migrations/0106_transfer_data_migration.py +25 -0
- lamindb/migrations/0107_add_schema_to_record.py +68 -0
- lamindb/migrations/0108_remove_record_sheet_remove_sheetproject_sheet_and_more.py +30 -0
- lamindb/migrations/0109_record_input_of_runs_alter_record_run_and_more.py +123 -0
- lamindb/migrations/0110_rename_values_artifacts_record_linked_artifacts.py +17 -0
- lamindb/migrations/0111_remove_record__sort_order.py +148 -0
- lamindb/migrations/0112_alter_recordartifact_feature_and_more.py +105 -0
- lamindb/migrations/0113_lower_case_branch_and_space_names.py +62 -0
- lamindb/migrations/0114_alter_run__status_code.py +24 -0
- lamindb/migrations/0115_alter_space_uid.py +52 -0
- lamindb/migrations/{0104_squashed.py → 0115_squashed.py} +261 -257
- lamindb/models/__init__.py +4 -3
- lamindb/models/_describe.py +88 -31
- lamindb/models/_feature_manager.py +627 -658
- lamindb/models/_label_manager.py +1 -3
- lamindb/models/artifact.py +214 -99
- lamindb/models/collection.py +7 -1
- lamindb/models/feature.py +288 -60
- lamindb/models/has_parents.py +3 -3
- lamindb/models/project.py +32 -15
- lamindb/models/query_manager.py +7 -1
- lamindb/models/query_set.py +118 -41
- lamindb/models/record.py +140 -94
- lamindb/models/run.py +42 -42
- lamindb/models/save.py +102 -16
- lamindb/models/schema.py +41 -8
- lamindb/models/sqlrecord.py +105 -40
- lamindb/models/storage.py +278 -0
- lamindb/models/transform.py +10 -2
- lamindb/models/ulabel.py +9 -1
- lamindb/py.typed +0 -0
- lamindb/setup/__init__.py +2 -1
- lamindb/setup/_switch.py +16 -0
- lamindb/setup/errors/__init__.py +4 -0
- lamindb/setup/types/__init__.py +4 -0
- {lamindb-1.6.1.dist-info → lamindb-1.7.0.dist-info}/METADATA +5 -5
- {lamindb-1.6.1.dist-info → lamindb-1.7.0.dist-info}/RECORD +61 -44
- lamindb/models/core.py +0 -135
- {lamindb-1.6.1.dist-info → lamindb-1.7.0.dist-info}/LICENSE +0 -0
- {lamindb-1.6.1.dist-info → lamindb-1.7.0.dist-info}/WHEEL +0 -0
lamindb/models/save.py
CHANGED
@@ -29,7 +29,11 @@ if TYPE_CHECKING:
|
|
29
29
|
from .artifact import Artifact
|
30
30
|
|
31
31
|
|
32
|
-
def save(
|
32
|
+
def save(
|
33
|
+
records: Iterable[SQLRecord],
|
34
|
+
ignore_conflicts: bool | None = False,
|
35
|
+
batch_size: int = 10000,
|
36
|
+
) -> None:
|
33
37
|
"""Bulk save records.
|
34
38
|
|
35
39
|
Note:
|
@@ -44,9 +48,11 @@ def save(records: Iterable[SQLRecord], ignore_conflicts: bool | None = False) ->
|
|
44
48
|
Args:
|
45
49
|
records: Multiple :class:`~lamindb.models.SQLRecord` objects.
|
46
50
|
ignore_conflicts: If ``True``, do not error if some records violate a
|
47
|
-
|
48
|
-
|
49
|
-
|
51
|
+
unique or another constraint. However, it won't inplace update the id
|
52
|
+
fields of records. If you need records with ids, you need to query
|
53
|
+
them from the database.
|
54
|
+
batch_size: Number of records to process in each batch. Defaults to 10000.
|
55
|
+
Large batch sizes can improve performance but may lead to memory issues.
|
50
56
|
|
51
57
|
Examples:
|
52
58
|
|
@@ -81,9 +87,11 @@ def save(records: Iterable[SQLRecord], ignore_conflicts: bool | None = False) ->
|
|
81
87
|
non_artifacts_old, non_artifacts_new = partition(
|
82
88
|
lambda r: r._state.adding or r.pk is None, non_artifacts
|
83
89
|
)
|
84
|
-
bulk_create(
|
90
|
+
bulk_create(
|
91
|
+
non_artifacts_new, ignore_conflicts=ignore_conflicts, batch_size=batch_size
|
92
|
+
)
|
85
93
|
if non_artifacts_old:
|
86
|
-
bulk_update(non_artifacts_old)
|
94
|
+
bulk_update(non_artifacts_old, batch_size=batch_size)
|
87
95
|
non_artifacts_with_parents = [
|
88
96
|
r for r in non_artifacts_new if hasattr(r, "_parents")
|
89
97
|
]
|
@@ -97,6 +105,11 @@ def save(records: Iterable[SQLRecord], ignore_conflicts: bool | None = False) ->
|
|
97
105
|
if artifacts:
|
98
106
|
with transaction.atomic():
|
99
107
|
for record in artifacts:
|
108
|
+
# will swtich to True after the successful upload / saving
|
109
|
+
if hasattr(record, "_local_filepath") and getattr(
|
110
|
+
record, "_to_store", False
|
111
|
+
):
|
112
|
+
record._is_saved_to_storage_location = False
|
100
113
|
record._save_skip_storage()
|
101
114
|
using_key = settings._using_key
|
102
115
|
store_artifacts(artifacts, using_key=using_key)
|
@@ -107,26 +120,85 @@ def save(records: Iterable[SQLRecord], ignore_conflicts: bool | None = False) ->
|
|
107
120
|
return None
|
108
121
|
|
109
122
|
|
110
|
-
def bulk_create(
|
123
|
+
def bulk_create(
|
124
|
+
records: Iterable[SQLRecord],
|
125
|
+
ignore_conflicts: bool | None = False,
|
126
|
+
batch_size: int = 10000,
|
127
|
+
):
|
128
|
+
"""Create records in batches for safety and performance.
|
129
|
+
|
130
|
+
Args:
|
131
|
+
records: Iterable of SQLRecord objects to create
|
132
|
+
ignore_conflicts: Whether to ignore conflicts during creation
|
133
|
+
batch_size: Number of records to process in each batch. Defaults to 10000.
|
134
|
+
"""
|
111
135
|
records_by_orm = defaultdict(list)
|
112
136
|
for record in records:
|
113
137
|
records_by_orm[record.__class__].append(record)
|
114
|
-
for registry, records in records_by_orm.items():
|
115
|
-
registry.objects.bulk_create(records, ignore_conflicts=ignore_conflicts)
|
116
|
-
# records[:] = created # In-place list update; does not seem to be necessary
|
117
138
|
|
139
|
+
for registry, records_list in records_by_orm.items():
|
140
|
+
total_records = len(records_list)
|
141
|
+
model_name = registry.__name__
|
142
|
+
if total_records > batch_size:
|
143
|
+
logger.warning(
|
144
|
+
f"Starting bulk_create for {total_records} {model_name} records in batches of {batch_size}"
|
145
|
+
)
|
146
|
+
|
147
|
+
# Process records in batches
|
148
|
+
for i in range(0, len(records_list), batch_size):
|
149
|
+
batch = records_list[i : i + batch_size]
|
150
|
+
batch_num = (i // batch_size) + 1
|
151
|
+
total_batches = (total_records + batch_size - 1) // batch_size
|
152
|
+
|
153
|
+
if total_records > batch_size:
|
154
|
+
logger.info(
|
155
|
+
f"Processing batch {batch_num}/{total_batches} for {model_name}: {len(batch)} records"
|
156
|
+
)
|
157
|
+
registry.objects.bulk_create(batch, ignore_conflicts=ignore_conflicts)
|
158
|
+
# records[:] = created # In-place list update; does not seem to be necessary
|
159
|
+
|
160
|
+
|
161
|
+
def bulk_update(
|
162
|
+
records: Iterable[SQLRecord],
|
163
|
+
ignore_conflicts: bool | None = False,
|
164
|
+
batch_size: int = 10000,
|
165
|
+
):
|
166
|
+
"""Update records in batches for safety and performance.
|
118
167
|
|
119
|
-
|
168
|
+
Args:
|
169
|
+
records: Iterable of SQLRecord objects to update
|
170
|
+
ignore_conflicts: Whether to ignore conflicts during update (currently unused but kept for consistency)
|
171
|
+
batch_size: Number of records to process in each batch. If None, processes all at once.
|
172
|
+
"""
|
120
173
|
records_by_orm = defaultdict(list)
|
121
174
|
for record in records:
|
122
175
|
records_by_orm[record.__class__].append(record)
|
123
|
-
|
176
|
+
|
177
|
+
for registry, records_list in records_by_orm.items():
|
178
|
+
total_records = len(records_list)
|
179
|
+
model_name = registry.__name__
|
180
|
+
if total_records > batch_size:
|
181
|
+
logger.warning(
|
182
|
+
f"Starting bulk_update for {total_records} {model_name} records in batches of {batch_size}"
|
183
|
+
)
|
184
|
+
|
124
185
|
field_names = [
|
125
186
|
field.name
|
126
187
|
for field in registry._meta.fields
|
127
188
|
if (field.name != "created_at" and field.name != "id")
|
128
189
|
]
|
129
|
-
|
190
|
+
|
191
|
+
# Process records in batches
|
192
|
+
for i in range(0, len(records_list), batch_size):
|
193
|
+
batch = records_list[i : i + batch_size]
|
194
|
+
batch_num = (i // batch_size) + 1
|
195
|
+
total_batches = (total_records + batch_size - 1) // batch_size
|
196
|
+
|
197
|
+
if total_records > batch_size:
|
198
|
+
logger.info(
|
199
|
+
f"Processing batch {batch_num}/{total_batches} for {model_name}: {len(batch)} records"
|
200
|
+
)
|
201
|
+
registry.objects.bulk_update(batch, field_names)
|
130
202
|
|
131
203
|
|
132
204
|
# This is also used within Artifact.save()
|
@@ -152,7 +224,7 @@ def check_and_attempt_upload(
|
|
152
224
|
except Exception as exception:
|
153
225
|
logger.warning(f"could not upload artifact: {artifact}")
|
154
226
|
# clear dangling storages if we were actually uploading or saving
|
155
|
-
if
|
227
|
+
if getattr(artifact, "_to_store", False):
|
156
228
|
artifact._clear_storagekey = auto_storage_key_from_artifact(artifact)
|
157
229
|
return exception
|
158
230
|
# copies (if on-disk) or moves the temporary file (if in-memory) to the cache
|
@@ -257,6 +329,8 @@ def store_artifacts(
|
|
257
329
|
|
258
330
|
If any upload fails, subsequent artifacts are cleaned up from the DB.
|
259
331
|
"""
|
332
|
+
from .artifact import Artifact
|
333
|
+
|
260
334
|
exception: Exception | None = None
|
261
335
|
# because uploads might fail, we need to maintain a new list
|
262
336
|
# of the succeeded uploads
|
@@ -269,8 +343,20 @@ def store_artifacts(
|
|
269
343
|
exception = check_and_attempt_upload(artifact, using_key)
|
270
344
|
if exception is not None:
|
271
345
|
break
|
346
|
+
|
272
347
|
stored_artifacts += [artifact]
|
273
|
-
#
|
348
|
+
# update to show successful saving
|
349
|
+
# only update if _is_saved_to_storage_location was set to False before
|
350
|
+
# this should be a single transaction for the updates of all the artifacts
|
351
|
+
# but then it would just abort all artifacts, even successfully saved before
|
352
|
+
# TODO: there should also be some kind of exception handling here
|
353
|
+
# but this requires proper refactoring
|
354
|
+
if artifact._is_saved_to_storage_location is False:
|
355
|
+
artifact._is_saved_to_storage_location = True
|
356
|
+
super(
|
357
|
+
Artifact, artifact
|
358
|
+
).save() # each .save is a separate transaction here
|
359
|
+
# if check_and_attempt_upload was successful
|
274
360
|
# then this can have only ._clear_storagekey from .replace
|
275
361
|
exception = check_and_attempt_clearing(
|
276
362
|
artifact, raise_file_not_found_error=True, using_key=using_key
|
@@ -334,7 +420,7 @@ def upload_artifact(
|
|
334
420
|
storage_path, storage_settings = attempt_accessing_path(
|
335
421
|
artifact, storage_key, using_key=using_key, access_token=access_token
|
336
422
|
)
|
337
|
-
if
|
423
|
+
if getattr(artifact, "_to_store", False):
|
338
424
|
logger.save(f"storing artifact '{artifact.uid}' at '{storage_path}'")
|
339
425
|
store_file_or_folder(
|
340
426
|
artifact._local_filepath,
|
lamindb/models/schema.py
CHANGED
@@ -52,7 +52,7 @@ if TYPE_CHECKING:
|
|
52
52
|
|
53
53
|
from .artifact import Artifact
|
54
54
|
from .project import Project
|
55
|
-
from .query_set import QuerySet
|
55
|
+
from .query_set import QuerySet, SQLRecordList
|
56
56
|
|
57
57
|
|
58
58
|
NUMBER_TYPE = "num"
|
@@ -464,7 +464,10 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
464
464
|
@overload
|
465
465
|
def __init__(
|
466
466
|
self,
|
467
|
-
features: list[SQLRecord]
|
467
|
+
features: list[SQLRecord]
|
468
|
+
| SQLRecordList
|
469
|
+
| list[tuple[Feature, dict]]
|
470
|
+
| None = None,
|
468
471
|
index: Feature | None = None,
|
469
472
|
slots: dict[str, Schema] | None = None,
|
470
473
|
name: str | None = None,
|
@@ -518,6 +521,10 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
518
521
|
coerce_dtype: bool | None = kwargs.pop("coerce_dtype", False)
|
519
522
|
using: bool | None = kwargs.pop("using", None)
|
520
523
|
n_features: int | None = kwargs.pop("n", None)
|
524
|
+
kwargs.pop("branch", None)
|
525
|
+
kwargs.pop("branch_id", 1)
|
526
|
+
kwargs.pop("space", None)
|
527
|
+
kwargs.pop("space_id", 1)
|
521
528
|
# backward compat
|
522
529
|
if not slots:
|
523
530
|
if "components" in kwargs:
|
@@ -580,11 +587,6 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
580
587
|
else:
|
581
588
|
validated_kwargs["uid"] = ids.base62_16()
|
582
589
|
super().__init__(**validated_kwargs)
|
583
|
-
# manipulating aux fields is easier after calling super().__init__()
|
584
|
-
self.optionals.set(optional_features)
|
585
|
-
self.flexible = flexible
|
586
|
-
if index is not None:
|
587
|
-
self._index_feature_uid = index.uid
|
588
590
|
|
589
591
|
def _validate_kwargs_calculate_hash(
|
590
592
|
self,
|
@@ -608,13 +610,16 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
608
610
|
) -> tuple[list[Feature], dict[str, Any], list[Feature], Registry, bool]:
|
609
611
|
optional_features = []
|
610
612
|
features_registry: Registry = None
|
613
|
+
|
611
614
|
if itype is not None:
|
612
615
|
if itype != "Composite":
|
613
616
|
itype = serialize_dtype(itype, is_itype=True)
|
617
|
+
|
614
618
|
if index is not None:
|
615
619
|
if not isinstance(index, Feature):
|
616
620
|
raise TypeError("index must be a Feature")
|
617
621
|
features.insert(0, index)
|
622
|
+
|
618
623
|
if features:
|
619
624
|
features, configs = get_features_config(features)
|
620
625
|
features_registry = validate_features(features)
|
@@ -642,12 +647,15 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
642
647
|
else:
|
643
648
|
dtype = get_type_str(dtype)
|
644
649
|
flexible_default = n_features < 0
|
650
|
+
|
645
651
|
if flexible is None:
|
646
652
|
flexible = flexible_default
|
653
|
+
|
647
654
|
if slots:
|
648
655
|
itype = "Composite"
|
649
656
|
if otype is None:
|
650
657
|
raise InvalidArgument("Please pass otype != None for composite schemas")
|
658
|
+
|
651
659
|
if itype is not None and not isinstance(itype, str):
|
652
660
|
itype_str = serialize_dtype(itype, is_itype=True)
|
653
661
|
else:
|
@@ -667,8 +675,28 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
667
675
|
}
|
668
676
|
n_features_default = -1
|
669
677
|
coerce_dtype_default = False
|
678
|
+
aux_dict: dict[str, dict[str, bool | str | list[str]]] = {}
|
679
|
+
|
680
|
+
# TODO: leverage a common abstraction across the properties and this here
|
681
|
+
|
682
|
+
# coerce_dtype (key "0")
|
670
683
|
if coerce_dtype:
|
671
|
-
|
684
|
+
aux_dict.setdefault("af", {})["0"] = coerce_dtype
|
685
|
+
|
686
|
+
# optional features (key "1")
|
687
|
+
if optional_features:
|
688
|
+
aux_dict.setdefault("af", {})["1"] = [f.uid for f in optional_features]
|
689
|
+
|
690
|
+
# flexible (key "2")
|
691
|
+
if flexible is not None:
|
692
|
+
aux_dict.setdefault("af", {})["2"] = flexible
|
693
|
+
|
694
|
+
# index feature (key "3")
|
695
|
+
if index is not None:
|
696
|
+
aux_dict.setdefault("af", {})["3"] = index.uid
|
697
|
+
|
698
|
+
if aux_dict:
|
699
|
+
validated_kwargs["_aux"] = aux_dict
|
672
700
|
if slots:
|
673
701
|
list_for_hashing = [component.hash for component in slots.values()]
|
674
702
|
else:
|
@@ -716,9 +744,11 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
716
744
|
":".join(sorted(feature_list_for_hashing))
|
717
745
|
)
|
718
746
|
list_for_hashing.append(f"{HASH_CODE['features_hash']}={features_hash}")
|
747
|
+
|
719
748
|
self._list_for_hashing = sorted(list_for_hashing)
|
720
749
|
schema_hash = hash_string(":".join(self._list_for_hashing))
|
721
750
|
validated_kwargs["hash"] = schema_hash
|
751
|
+
|
722
752
|
return (
|
723
753
|
features,
|
724
754
|
validated_kwargs,
|
@@ -1111,6 +1141,9 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
1111
1141
|
|
1112
1142
|
def describe(self, return_str=False) -> None | str:
|
1113
1143
|
"""Describe schema."""
|
1144
|
+
if self.pk is None:
|
1145
|
+
raise ValueError("Schema must be saved before describing")
|
1146
|
+
|
1114
1147
|
message = str(self)
|
1115
1148
|
# display slots for composite schemas
|
1116
1149
|
if self.itype == "Composite":
|
lamindb/models/sqlrecord.py
CHANGED
@@ -28,6 +28,7 @@ from django.db.models.fields.related import (
|
|
28
28
|
ManyToManyRel,
|
29
29
|
ManyToOneRel,
|
30
30
|
)
|
31
|
+
from django.db.models.functions import Lower
|
31
32
|
from lamin_utils import colors, logger
|
32
33
|
from lamindb_setup import settings as setup_settings
|
33
34
|
from lamindb_setup._connect_instance import (
|
@@ -282,7 +283,7 @@ def validate_fields(record: SQLRecord, kwargs):
|
|
282
283
|
def suggest_records_with_similar_names(
|
283
284
|
record: SQLRecord, name_field: str, kwargs
|
284
285
|
) -> SQLRecord | None:
|
285
|
-
"""Returns
|
286
|
+
"""Returns a record if found exact match, otherwise None.
|
286
287
|
|
287
288
|
Logs similar matches if found.
|
288
289
|
"""
|
@@ -305,18 +306,15 @@ def suggest_records_with_similar_names(
|
|
305
306
|
)
|
306
307
|
if not queryset.exists(): # empty queryset
|
307
308
|
return None
|
308
|
-
s, it, nots
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
309
|
+
s, it, nots, record_text = (
|
310
|
+
("", "it", "s", "a record")
|
311
|
+
if len(queryset) == 1
|
312
|
+
else ("s", "one of them", "", "records")
|
313
|
+
)
|
314
|
+
similar_names = ", ".join(f"'{getattr(record, name_field)}'" for record in queryset)
|
315
|
+
msg = f"you are trying to create a record with name='{kwargs[name_field]}' but {record_text} with similar {name_field}{s} exist{nots}: {similar_names}. Did you mean to load {it}?"
|
316
|
+
logger.warning(f"{msg}")
|
314
317
|
|
315
|
-
logger.warning(f"{msg}")
|
316
|
-
if settings._verbosity_int >= 1:
|
317
|
-
display(queryset.df())
|
318
|
-
else:
|
319
|
-
logger.warning(f"{msg}\n{queryset}")
|
320
318
|
return None
|
321
319
|
|
322
320
|
|
@@ -405,9 +403,10 @@ class Registry(ModelBase):
|
|
405
403
|
cls,
|
406
404
|
field: StrField | None = None,
|
407
405
|
return_field: StrField | None = None,
|
406
|
+
keep: Literal["first", "last", False] = "first",
|
408
407
|
) -> NamedTuple:
|
409
408
|
"""{}""" # noqa: D415
|
410
|
-
return _lookup(cls=cls, field=field, return_field=return_field)
|
409
|
+
return _lookup(cls=cls, field=field, return_field=return_field, keep=keep)
|
411
410
|
|
412
411
|
def filter(cls, *queries, **expressions) -> QuerySet:
|
413
412
|
"""Query records.
|
@@ -467,7 +466,7 @@ class Registry(ModelBase):
|
|
467
466
|
def df(
|
468
467
|
cls,
|
469
468
|
include: str | list[str] | None = None,
|
470
|
-
features: bool | list[str] = False,
|
469
|
+
features: bool | list[str] | str = False,
|
471
470
|
limit: int = 100,
|
472
471
|
) -> pd.DataFrame:
|
473
472
|
"""Convert to `pd.DataFrame`.
|
@@ -480,9 +479,11 @@ class Registry(ModelBase):
|
|
480
479
|
include: Related fields to include as columns. Takes strings of
|
481
480
|
form `"ulabels__name"`, `"cell_types__name"`, etc. or a list
|
482
481
|
of such strings.
|
483
|
-
features: If
|
484
|
-
:class:`~lamindb.Feature`
|
485
|
-
`
|
482
|
+
features: If a list of feature names, filters
|
483
|
+
:class:`~lamindb.Feature` down to these features.
|
484
|
+
If `True`, prints all features with dtypes in the core schema module.
|
485
|
+
If `"queryset"`, infers the features used within the set of artifacts or records.
|
486
|
+
Only available for `Artifact` and `Record`.
|
486
487
|
limit: Maximum number of rows to display from a Pandas DataFrame.
|
487
488
|
Defaults to 100 to reduce database load.
|
488
489
|
|
@@ -549,8 +550,10 @@ class Registry(ModelBase):
|
|
549
550
|
# we're in the default instance
|
550
551
|
if instance is None or instance == "default":
|
551
552
|
return QuerySet(model=cls, using=None)
|
553
|
+
|
552
554
|
owner, name = get_owner_name_from_identifier(instance)
|
553
|
-
|
555
|
+
current_instance_owner_name: list[str] = setup_settings.instance.slug.split("/")
|
556
|
+
if [owner, name] == current_instance_owner_name:
|
554
557
|
return QuerySet(model=cls, using=None)
|
555
558
|
|
556
559
|
# move on to different instances
|
@@ -565,6 +568,9 @@ class Registry(ModelBase):
|
|
565
568
|
f"Failed to load instance {instance}, please check your permissions!"
|
566
569
|
)
|
567
570
|
iresult, _ = result
|
571
|
+
# this can happen if querying via an old instance name
|
572
|
+
if [iresult.get("owner"), iresult["name"]] == current_instance_owner_name:
|
573
|
+
return QuerySet(model=cls, using=None)
|
568
574
|
# do not use {} syntax below, it gives rise to a dict if the schema modules
|
569
575
|
# are empty and then triggers a TypeError in missing_members = source_modules - target_modules
|
570
576
|
source_modules = set( # noqa
|
@@ -655,23 +661,53 @@ class BaseSQLRecord(models.Model, metaclass=Registry):
|
|
655
661
|
def __init__(self, *args, **kwargs):
|
656
662
|
skip_validation = kwargs.pop("_skip_validation", False)
|
657
663
|
if not args:
|
658
|
-
if
|
659
|
-
|
660
|
-
"
|
661
|
-
|
662
|
-
"
|
663
|
-
|
664
|
+
if (
|
665
|
+
issubclass(self.__class__, SQLRecord)
|
666
|
+
and self.__class__.__name__ != "Storage"
|
667
|
+
# do not save bionty entities in restricted spaces by default
|
668
|
+
and self.__class__.__module__ != "bionty.models"
|
669
|
+
):
|
664
670
|
from lamindb import context as run_context
|
665
671
|
|
666
672
|
if run_context.space is not None:
|
667
|
-
|
673
|
+
current_space = run_context.space
|
674
|
+
elif setup_settings.space is not None:
|
675
|
+
current_space = setup_settings.space
|
676
|
+
|
677
|
+
if current_space is not None:
|
678
|
+
if "space_id" in kwargs:
|
679
|
+
# space_id takes precedence over space
|
680
|
+
# https://claude.ai/share/f045e5dc-0143-4bc5-b8a4-38309229f75e
|
681
|
+
if kwargs["space_id"] == 1: # ignore default space
|
682
|
+
kwargs.pop("space_id")
|
683
|
+
kwargs["space"] = current_space
|
684
|
+
elif "space" in kwargs:
|
685
|
+
if kwargs["space"] is None:
|
686
|
+
kwargs["space"] = current_space
|
687
|
+
else:
|
688
|
+
kwargs["space"] = current_space
|
668
689
|
if issubclass(
|
669
690
|
self.__class__, SQLRecord
|
670
691
|
) and self.__class__.__name__ not in {"Storage", "Source"}:
|
671
692
|
from lamindb import context as run_context
|
672
693
|
|
673
694
|
if run_context.branch is not None:
|
674
|
-
|
695
|
+
current_branch = run_context.branch
|
696
|
+
elif setup_settings.branch is not None:
|
697
|
+
current_branch = setup_settings.branch
|
698
|
+
|
699
|
+
if current_branch is not None:
|
700
|
+
# branch_id takes precedence over branch
|
701
|
+
# https://claude.ai/share/f045e5dc-0143-4bc5-b8a4-38309229f75e
|
702
|
+
if "branch_id" in kwargs:
|
703
|
+
if kwargs["branch_id"] == 1: # ignore default branch
|
704
|
+
kwargs.pop("branch_id")
|
705
|
+
kwargs["branch"] = current_branch
|
706
|
+
elif "branch" in kwargs:
|
707
|
+
if kwargs["branch"] is None:
|
708
|
+
kwargs["branch"] = current_branch
|
709
|
+
else:
|
710
|
+
kwargs["branch"] = current_branch
|
675
711
|
if skip_validation:
|
676
712
|
super().__init__(**kwargs)
|
677
713
|
else:
|
@@ -743,7 +779,7 @@ class BaseSQLRecord(models.Model, metaclass=Registry):
|
|
743
779
|
super().__init__(*args)
|
744
780
|
track_current_key_and_name_values(self)
|
745
781
|
|
746
|
-
def save(self, *args, **kwargs) ->
|
782
|
+
def save(self: T, *args, **kwargs) -> T:
|
747
783
|
"""Save.
|
748
784
|
|
749
785
|
Always saves to the default database.
|
@@ -751,6 +787,7 @@ class BaseSQLRecord(models.Model, metaclass=Registry):
|
|
751
787
|
using_key = None
|
752
788
|
if "using" in kwargs:
|
753
789
|
using_key = kwargs["using"]
|
790
|
+
transfer_config = kwargs.pop("transfer", None)
|
754
791
|
db = self._state.db
|
755
792
|
pk_on_db = self.pk
|
756
793
|
artifacts: list = []
|
@@ -812,6 +849,20 @@ class BaseSQLRecord(models.Model, metaclass=Registry):
|
|
812
849
|
f"returning {self.__class__.__name__.lower()} with same hash: {pre_existing_record}"
|
813
850
|
)
|
814
851
|
init_self_from_db(self, pre_existing_record)
|
852
|
+
elif (
|
853
|
+
self.__class__.__name__ == "Storage"
|
854
|
+
and isinstance(e, IntegrityError)
|
855
|
+
and "root" in error_msg
|
856
|
+
or "uid" in error_msg
|
857
|
+
and (
|
858
|
+
"UNIQUE constraint failed" in error_msg
|
859
|
+
or "duplicate key value violates unique constraint" in error_msg
|
860
|
+
)
|
861
|
+
):
|
862
|
+
# even if uid was in the error message, we can retrieve based on
|
863
|
+
# the root because it's going to be the same root
|
864
|
+
pre_existing_record = self.__class__.get(root=self.root)
|
865
|
+
init_self_from_db(self, pre_existing_record)
|
815
866
|
elif (
|
816
867
|
isinstance(e, ProgrammingError)
|
817
868
|
and hasattr(self, "space")
|
@@ -834,21 +885,18 @@ class BaseSQLRecord(models.Model, metaclass=Registry):
|
|
834
885
|
for artifact in artifacts:
|
835
886
|
artifact.save()
|
836
887
|
self.artifacts.add(*artifacts)
|
837
|
-
if hasattr(self, "labels"):
|
888
|
+
if hasattr(self, "labels") and transfer_config == "annotations":
|
838
889
|
from copy import copy
|
839
890
|
|
840
|
-
from lamindb.models._feature_manager import FeatureManager
|
841
|
-
|
842
891
|
# here we go back to original record on the source database
|
843
892
|
self_on_db = copy(self)
|
844
893
|
self_on_db._state.db = db
|
845
894
|
self_on_db.pk = pk_on_db # manually set the primary key
|
846
|
-
self_on_db.features = FeatureManager(self_on_db) # type: ignore
|
847
895
|
self.features._add_from(self_on_db, transfer_logs=transfer_logs)
|
848
896
|
self.labels.add_from(self_on_db, transfer_logs=transfer_logs)
|
849
897
|
for k, v in transfer_logs.items():
|
850
898
|
if k != "run" and len(v) > 0:
|
851
|
-
logger.important(f"{k}
|
899
|
+
logger.important(f"{k}: {', '.join(v)}")
|
852
900
|
|
853
901
|
if self.__class__.__name__ in {
|
854
902
|
"Artifact",
|
@@ -901,6 +949,11 @@ class Space(BaseSQLRecord):
|
|
901
949
|
All data in this registry is synchronized from LaminHub so that spaces can be shared and reused across multiple LaminDB instances.
|
902
950
|
"""
|
903
951
|
|
952
|
+
class Meta:
|
953
|
+
constraints = [
|
954
|
+
models.UniqueConstraint(Lower("name"), name="unique_space_name_lower")
|
955
|
+
]
|
956
|
+
|
904
957
|
id: int = models.SmallAutoField(primary_key=True)
|
905
958
|
"""Internal id, valid only in one DB instance."""
|
906
959
|
name: str = models.CharField(max_length=100, db_index=True)
|
@@ -909,8 +962,8 @@ class Space(BaseSQLRecord):
|
|
909
962
|
editable=False,
|
910
963
|
unique=True,
|
911
964
|
max_length=12,
|
912
|
-
default="
|
913
|
-
db_default="
|
965
|
+
default="aaaaaaaaaaaaa",
|
966
|
+
db_default="aaaaaaaaaaaa",
|
914
967
|
db_index=True,
|
915
968
|
)
|
916
969
|
"""Universal id."""
|
@@ -963,6 +1016,11 @@ class Branch(BaseSQLRecord):
|
|
963
1016
|
# that can be merged onto the main branch in an experience akin to a Pull Request. The mapping
|
964
1017
|
# onto a semantic branch name is handled through LaminHub.
|
965
1018
|
|
1019
|
+
class Meta:
|
1020
|
+
constraints = [
|
1021
|
+
models.UniqueConstraint(Lower("name"), name="unique_branch_name_lower")
|
1022
|
+
]
|
1023
|
+
|
966
1024
|
id: int = models.AutoField(primary_key=True)
|
967
1025
|
"""An integer id that's synchronized for a family of coupled database instances.
|
968
1026
|
|
@@ -1273,7 +1331,7 @@ def get_transfer_run(record) -> Run:
|
|
1273
1331
|
if not cache_using_filepath.exists():
|
1274
1332
|
raise SystemExit("Need to call .using() before")
|
1275
1333
|
instance_uid = cache_using_filepath.read_text().split("\n")[0]
|
1276
|
-
key = f"
|
1334
|
+
key = f"__lamindb_transfer__/{instance_uid}"
|
1277
1335
|
uid = instance_uid + "0000"
|
1278
1336
|
transform = Transform.filter(uid=uid).one_or_none()
|
1279
1337
|
if transform is None:
|
@@ -1291,9 +1349,7 @@ def get_transfer_run(record) -> Run:
|
|
1291
1349
|
logger.warning(WARNING_RUN_TRANSFORM)
|
1292
1350
|
initiated_by_run = None
|
1293
1351
|
# it doesn't seem to make sense to create new runs for every transfer
|
1294
|
-
run = Run.filter(
|
1295
|
-
transform=transform, initiated_by_run=initiated_by_run
|
1296
|
-
).one_or_none()
|
1352
|
+
run = Run.filter(transform=transform, initiated_by_run=initiated_by_run).first()
|
1297
1353
|
if run is None:
|
1298
1354
|
run = Run(transform=transform, initiated_by_run=initiated_by_run).save() # type: ignore
|
1299
1355
|
run.initiated_by_run = initiated_by_run # so that it's available in memory
|
@@ -1311,6 +1367,7 @@ def transfer_to_default_db(
|
|
1311
1367
|
if record._state.db is None or record._state.db == "default":
|
1312
1368
|
return None
|
1313
1369
|
registry = record.__class__
|
1370
|
+
logger.debug(f"transferring {registry.__name__} record {record.uid} to default db")
|
1314
1371
|
record_on_default = registry.objects.filter(uid=record.uid).one_or_none()
|
1315
1372
|
record_str = f"{record.__class__.__name__}(uid='{record.uid}')"
|
1316
1373
|
if transfer_logs["run"] is None:
|
@@ -1366,7 +1423,15 @@ def track_current_key_and_name_values(record: SQLRecord):
|
|
1366
1423
|
|
1367
1424
|
def check_name_change(record: SQLRecord):
|
1368
1425
|
"""Warns if a record's name has changed."""
|
1369
|
-
from lamindb.models import
|
1426
|
+
from lamindb.models import (
|
1427
|
+
Artifact,
|
1428
|
+
Collection,
|
1429
|
+
Feature,
|
1430
|
+
Record,
|
1431
|
+
Schema,
|
1432
|
+
Storage,
|
1433
|
+
Transform,
|
1434
|
+
)
|
1370
1435
|
|
1371
1436
|
if (
|
1372
1437
|
not record.pk
|
@@ -1389,7 +1454,7 @@ def check_name_change(record: SQLRecord):
|
|
1389
1454
|
|
1390
1455
|
if old_name != new_name:
|
1391
1456
|
# when a label is renamed, only raise a warning if it has a feature
|
1392
|
-
if hasattr(record, "artifacts"):
|
1457
|
+
if hasattr(record, "artifacts") and not isinstance(record, (Record, Storage)):
|
1393
1458
|
linked_records = (
|
1394
1459
|
record.artifacts.through.filter(
|
1395
1460
|
label_ref_is_name=True, **{f"{registry.lower()}_id": record.pk}
|