lamindb 0.76.4__py3-none-any.whl → 0.76.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -41,7 +41,7 @@ Modules and settings.
41
41
  """
42
42
 
43
43
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
44
- __version__ = "0.76.4"
44
+ __version__ = "0.76.6"
45
45
 
46
46
  import os as _os
47
47
 
@@ -108,6 +108,6 @@ if _check_instance_setup(from_lamindb=True):
108
108
  track = context.track # backward compat
109
109
  finish = context.finish # backward compat
110
110
  Curate = Curator # backward compat
111
- settings.__doc__ = """Global :class:`~lamindb.core.Settings`."""
112
- context.__doc__ = """Global :class:`~lamindb.core.Context`."""
111
+ settings.__doc__ = """Global settings (:class:`~lamindb.core.Settings`)."""
112
+ context.__doc__ = """Global run context (:class:`~lamindb.core.Context`)."""
113
113
  from django.db.models import Q
lamindb/_artifact.py CHANGED
@@ -28,7 +28,7 @@ from lnschema_core.types import (
28
28
  )
29
29
 
30
30
  from lamindb._utils import attach_func_to_class_method
31
- from lamindb.core._data import HasFeatures, _track_run_input
31
+ from lamindb.core._data import _track_run_input, describe, view_lineage
32
32
  from lamindb.core._settings import settings
33
33
  from lamindb.core.exceptions import IntegrityError
34
34
  from lamindb.core.storage import (
@@ -334,7 +334,7 @@ def get_artifact_kwargs_from_data(
334
334
  # save the information that this artifact was previously
335
335
  # produced by another run
336
336
  if artifact.run is not None:
337
- artifact.run.output_artifacts_with_later_updates.add(artifact)
337
+ artifact.run._output_artifacts_with_later_updates.add(artifact)
338
338
  # update the run of the artifact with the latest run
339
339
  stat_or_artifact.run = run
340
340
  stat_or_artifact.transform = run.transform
@@ -497,13 +497,6 @@ def _check_accessor_artifact(data: Any, accessor: str | None = None):
497
497
  return accessor
498
498
 
499
499
 
500
- def update_attributes(data: HasFeatures, attributes: Mapping[str, str]):
501
- for key, value in attributes.items():
502
- if getattr(data, key) != value:
503
- logger.warning(f"updated {key} from {getattr(data, key)} to {value}")
504
- setattr(data, key, value)
505
-
506
-
507
500
  def __init__(artifact: Artifact, *args, **kwargs):
508
501
  artifact.features = FeatureManager(artifact)
509
502
  artifact.params = ParamManager(artifact)
@@ -608,7 +601,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
608
601
 
609
602
  # an object with the same hash already exists
610
603
  if isinstance(kwargs_or_artifact, Artifact):
611
- from ._record import init_self_from_db
604
+ from ._record import init_self_from_db, update_attributes
612
605
 
613
606
  init_self_from_db(artifact, kwargs_or_artifact)
614
607
  # adding "key" here is dangerous because key might be auto-populated
@@ -908,14 +901,6 @@ def replace(
908
901
  self._to_store = not check_path_in_storage
909
902
 
910
903
 
911
- # deprecated
912
- def backed(
913
- self, mode: str = "r", is_run_input: bool | None = None
914
- ) -> AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment:
915
- logger.warning("`.backed()` is deprecated, use `.open()`!'")
916
- return self.open(mode, is_run_input)
917
-
918
-
919
904
  # docstring handled through attach_func_to_class_method
920
905
  def open(
921
906
  self, mode: str = "r", is_run_input: bool | None = None
@@ -1185,5 +1170,5 @@ for name in METHOD_NAMES:
1185
1170
  Artifact._delete_skip_storage = _delete_skip_storage
1186
1171
  Artifact._save_skip_storage = _save_skip_storage
1187
1172
  Artifact.path = path
1188
- Artifact.backed = backed
1189
- Artifact.view_lineage = HasFeatures.view_lineage
1173
+ Artifact.describe = describe
1174
+ Artifact.view_lineage = view_lineage
lamindb/_collection.py CHANGED
@@ -17,19 +17,17 @@ from lamindb_setup.core.hashing import hash_set
17
17
  from lnschema_core.models import (
18
18
  Collection,
19
19
  CollectionArtifact,
20
- FeatureManager,
21
20
  FeatureSet,
22
21
  )
23
22
  from lnschema_core.types import VisibilityChoice
24
23
 
25
- from lamindb._artifact import update_attributes
26
24
  from lamindb._utils import attach_func_to_class_method
27
- from lamindb.core._data import _track_run_input
25
+ from lamindb.core._data import _track_run_input, describe, view_lineage
28
26
  from lamindb.core._mapped_collection import MappedCollection
29
27
  from lamindb.core.versioning import process_revises
30
28
 
31
29
  from . import Artifact, Run
32
- from ._record import init_self_from_db
30
+ from ._record import init_self_from_db, update_attributes
33
31
  from .core._data import (
34
32
  add_transform_to_kwargs,
35
33
  get_run,
@@ -44,12 +42,45 @@ if TYPE_CHECKING:
44
42
  from ._query_set import QuerySet
45
43
 
46
44
 
45
+ class CollectionFeatureManager:
46
+ """Query features of artifact in collection."""
47
+
48
+ def __init__(self, collection: Collection):
49
+ self._collection = collection
50
+
51
+ def get_feature_sets_union(self) -> dict[str, FeatureSet]:
52
+ links_feature_set_artifact = Artifact.feature_sets.through.objects.filter(
53
+ artifact_id__in=self._collection.artifacts.values_list("id", flat=True)
54
+ )
55
+ feature_sets_by_slots = defaultdict(list)
56
+ for link in links_feature_set_artifact:
57
+ feature_sets_by_slots[link.slot].append(link.featureset_id)
58
+ feature_sets_union = {}
59
+ for slot, feature_set_ids_slot in feature_sets_by_slots.items():
60
+ feature_set_1 = FeatureSet.get(id=feature_set_ids_slot[0])
61
+ related_name = feature_set_1._get_related_name()
62
+ features_registry = getattr(FeatureSet, related_name).field.model
63
+ # this way of writing the __in statement turned out to be the fastest
64
+ # evaluated on a link table with 16M entries connecting 500 feature sets with
65
+ # 60k genes
66
+ feature_ids = (
67
+ features_registry.feature_sets.through.objects.filter(
68
+ featureset_id__in=feature_set_ids_slot
69
+ )
70
+ .values(f"{features_registry.__name__.lower()}_id")
71
+ .distinct()
72
+ )
73
+ features = features_registry.filter(id__in=feature_ids)
74
+ feature_sets_union[slot] = FeatureSet(features, dtype=feature_set_1.dtype)
75
+ return feature_sets_union
76
+
77
+
47
78
  def __init__(
48
79
  collection: Collection,
49
80
  *args,
50
81
  **kwargs,
51
82
  ):
52
- collection.features = FeatureManager(collection)
83
+ collection.features = CollectionFeatureManager(collection)
53
84
  if len(args) == len(collection._meta.concrete_fields):
54
85
  super(Collection, collection).__init__(*args, **kwargs)
55
86
  return None
@@ -78,9 +109,6 @@ def __init__(
78
109
  if "visibility" in kwargs
79
110
  else VisibilityChoice.default.value
80
111
  )
81
- feature_sets: dict[str, FeatureSet] = (
82
- kwargs.pop("feature_sets") if "feature_sets" in kwargs else {}
83
- )
84
112
  if "is_new_version_of" in kwargs:
85
113
  logger.warning("`is_new_version_of` will be removed soon, please use `revises`")
86
114
  revises = kwargs.pop("is_new_version_of")
@@ -98,7 +126,7 @@ def __init__(
98
126
  if not hasattr(artifacts, "__getitem__"):
99
127
  raise ValueError("Artifact or List[Artifact] is allowed.")
100
128
  assert isinstance(artifacts[0], Artifact) # type: ignore # noqa: S101
101
- hash, feature_sets = from_artifacts(artifacts) # type: ignore
129
+ hash = from_artifacts(artifacts) # type: ignore
102
130
  if meta_artifact is not None:
103
131
  if not isinstance(meta_artifact, Artifact):
104
132
  raise ValueError("meta_artifact has to be an Artifact")
@@ -107,11 +135,6 @@ def __init__(
107
135
  raise ValueError(
108
136
  "Save meta_artifact artifact before creating collection!"
109
137
  )
110
- if not feature_sets:
111
- feature_sets = meta_artifact.features._feature_set_by_slot
112
- else:
113
- if len(meta_artifact.features._feature_set_by_slot) > 0:
114
- logger.info("overwriting feature sets linked to artifact")
115
138
  # we ignore collections in trash containing the same hash
116
139
  if hash is not None:
117
140
  existing_collection = Collection.filter(hash=hash).one_or_none()
@@ -126,7 +149,7 @@ def __init__(
126
149
  # save the information that this artifact was previously
127
150
  # produced by another run
128
151
  if existing_collection.run is not None:
129
- existing_collection.run.output_collections_with_later_updates.add(
152
+ existing_collection.run._output_collections_with_later_updates.add(
130
153
  existing_collection
131
154
  )
132
155
  # update the run of the artifact with the latest run
@@ -134,11 +157,6 @@ def __init__(
134
157
  existing_collection.transform = run.transform
135
158
  init_self_from_db(collection, existing_collection)
136
159
  update_attributes(collection, {"description": description, "name": name})
137
- for slot, feature_set in collection.features._feature_set_by_slot.items():
138
- if slot in feature_sets:
139
- if not feature_sets[slot] == feature_set:
140
- collection.feature_sets.remove(feature_set)
141
- logger.warning(f"removing feature set: {feature_set}")
142
160
  else:
143
161
  kwargs = {}
144
162
  add_transform_to_kwargs(kwargs, run)
@@ -161,7 +179,6 @@ def __init__(
161
179
  )
162
180
  settings.creation.search_names = search_names_setting
163
181
  collection._artifacts = artifacts
164
- collection._feature_sets = feature_sets
165
182
  # register provenance
166
183
  if revises is not None:
167
184
  _track_run_input(revises, run=run)
@@ -171,61 +188,21 @@ def __init__(
171
188
  # internal function, not exposed to user
172
189
  def from_artifacts(artifacts: Iterable[Artifact]) -> tuple[str, dict[str, str]]:
173
190
  # assert all artifacts are already saved
174
- logger.debug("check not saved")
175
191
  saved = not any(artifact._state.adding for artifact in artifacts)
176
192
  if not saved:
177
193
  raise ValueError("Not all artifacts are yet saved, please save them")
178
- # query all feature sets of artifacts
179
- logger.debug("artifact ids")
180
- artifact_ids = [artifact.id for artifact in artifacts]
181
- # query all feature sets at the same time rather
182
- # than making a single query per artifact
183
- logger.debug("links_feature_set_artifact")
184
- links_feature_set_artifact = Artifact.feature_sets.through.objects.filter(
185
- artifact_id__in=artifact_ids
186
- )
187
- feature_sets_by_slots = defaultdict(list)
188
- logger.debug("slots")
189
- for link in links_feature_set_artifact:
190
- feature_sets_by_slots[link.slot].append(link.featureset_id)
191
- feature_sets_union = {}
192
- logger.debug("union")
193
- for slot, feature_set_ids_slot in feature_sets_by_slots.items():
194
- feature_set_1 = FeatureSet.get(id=feature_set_ids_slot[0])
195
- related_name = feature_set_1._get_related_name()
196
- features_registry = getattr(FeatureSet, related_name).field.model
197
- start_time = logger.debug("run filter")
198
- # this way of writing the __in statement turned out to be the fastest
199
- # evaluated on a link table with 16M entries connecting 500 feature sets with
200
- # 60k genes
201
- feature_ids = (
202
- features_registry.feature_sets.through.objects.filter(
203
- featureset_id__in=feature_set_ids_slot
204
- )
205
- .values(f"{features_registry.__name__.lower()}_id")
206
- .distinct()
207
- )
208
- start_time = logger.debug("done, start evaluate", time=start_time)
209
- features = features_registry.filter(id__in=feature_ids)
210
- feature_sets_union[slot] = FeatureSet(features, dtype=feature_set_1.dtype)
211
- start_time = logger.debug("done", time=start_time)
212
- # validate consistency of hashes
213
- # we do not allow duplicate hashes
214
- logger.debug("hashes")
215
- # artifact.hash is None for zarr
216
- # todo: more careful handling of such cases
194
+ # validate consistency of hashes - we do not allow duplicate hashes
217
195
  hashes = [artifact.hash for artifact in artifacts if artifact.hash is not None]
218
- if len(hashes) != len(set(hashes)):
196
+ hashes_set = set(hashes)
197
+ if len(hashes) != len(hashes_set):
219
198
  seen = set()
220
199
  non_unique = [x for x in hashes if x in seen or seen.add(x)] # type: ignore
221
200
  raise ValueError(
222
201
  "Please pass artifacts with distinct hashes: these ones are non-unique"
223
202
  f" {non_unique}"
224
203
  )
225
- time = logger.debug("hash")
226
- hash = hash_set(set(hashes))
227
- logger.debug("done", time=time)
228
- return hash, feature_sets_union
204
+ hash = hash_set(hashes_set)
205
+ return hash
229
206
 
230
207
 
231
208
  # docstring handled through attach_func_to_class_method
@@ -401,3 +378,5 @@ for name in METHOD_NAMES:
401
378
 
402
379
  Collection.ordered_artifacts = ordered_artifacts
403
380
  Collection.data_artifact = data_artifact
381
+ Collection.describe = describe
382
+ Collection.view_lineage = view_lineage
lamindb/_curate.py CHANGED
@@ -334,9 +334,9 @@ class DataFrameCurator(BaseCurator):
334
334
  from lamindb.core._settings import settings
335
335
 
336
336
  if not self._validated:
337
- raise ValidationError(
338
- f"Data object is not validated, please run {colors.yellow('validate()')}!"
339
- )
337
+ self.validate()
338
+ if not self._validated:
339
+ raise ValidationError("Dataset does not validate. Please curate.")
340
340
 
341
341
  # Make sure all labels are saved in the current instance
342
342
  verbosity = settings.verbosity
@@ -442,7 +442,7 @@ class AnnDataCurator(DataFrameCurator):
442
442
  exclude=exclude,
443
443
  check_valid_keys=False,
444
444
  )
445
- self._obs_fields = categoricals
445
+ self._obs_fields = categoricals or {}
446
446
  self._check_valid_keys(extra={"var_index"})
447
447
 
448
448
  @property
@@ -563,9 +563,9 @@ class AnnDataCurator(DataFrameCurator):
563
563
  A saved artifact record.
564
564
  """
565
565
  if not self._validated:
566
- raise ValidationError(
567
- f"Data object is not validated, please run {colors.yellow('validate()')}!"
568
- )
566
+ self.validate()
567
+ if not self._validated:
568
+ raise ValidationError("Dataset does not validate. Please curate.")
569
569
 
570
570
  self._artifact = save_artifact(
571
571
  self._data,
@@ -1498,14 +1498,14 @@ def log_saved_labels(
1498
1498
 
1499
1499
  if k == "without reference" and validated_only:
1500
1500
  msg = colors.yellow(
1501
- f"{len(labels)} non-validated categories are not saved in {model_field}: {labels}!"
1501
+ f"{len(labels)} non-validated values are not saved in {model_field}: {labels}!"
1502
1502
  )
1503
1503
  lookup_print = (
1504
1504
  f"lookup().{key}" if key.isidentifier() else f".lookup()['{key}']"
1505
1505
  )
1506
1506
 
1507
1507
  hint = f".add_new_from('{key}')"
1508
- msg += f"\n → to lookup categories, use {lookup_print}"
1508
+ msg += f"\n → to lookup values, use {lookup_print}"
1509
1509
  msg += (
1510
1510
  f"\n → to save, run {colors.yellow(hint)}"
1511
1511
  if save_function == "add_new_from"
lamindb/_filter.py CHANGED
@@ -10,7 +10,7 @@ if TYPE_CHECKING:
10
10
  from lnschema_core import Record
11
11
 
12
12
 
13
- def filter(registry: type[Record], **expressions) -> QuerySet:
13
+ def filter(registry: type[Record], *queries, **expressions) -> QuerySet:
14
14
  """See :meth:`~lamindb.core.Record.filter`."""
15
15
  _using_key = None
16
16
  if "_using_key" in expressions:
@@ -18,6 +18,6 @@ def filter(registry: type[Record], **expressions) -> QuerySet:
18
18
  expressions = process_expressions(registry, expressions)
19
19
  qs = QuerySet(model=registry, using=_using_key)
20
20
  if len(expressions) > 0:
21
- return qs.filter(**expressions)
21
+ return qs.filter(*queries, **expressions)
22
22
  else:
23
23
  return qs
lamindb/_finish.py CHANGED
@@ -94,6 +94,7 @@ def save_context_core(
94
94
  transform: Transform,
95
95
  filepath: Path,
96
96
  finished_at: bool = False,
97
+ ignore_non_consecutive: bool | None = None,
97
98
  from_cli: bool = False,
98
99
  ) -> str | None:
99
100
  import lamindb as ln
@@ -118,17 +119,18 @@ def save_context_core(
118
119
  logger.error("install nbproject & jupytext: pip install nbproject jupytext")
119
120
  return None
120
121
  notebook_content = read_notebook(filepath) # type: ignore
121
- is_consecutive = check_consecutiveness(
122
- notebook_content, calling_statement=".finish()"
123
- )
124
- if not is_consecutive:
125
- msg = " Do you still want to proceed with finishing? (y/n) "
126
- if os.getenv("LAMIN_TESTING") is None:
127
- response = input(msg)
128
- else:
129
- response = "n"
130
- if response != "y":
131
- return "aborted-non-consecutive"
122
+ if not ignore_non_consecutive: # ignore_non_consecutive is None or False
123
+ is_consecutive = check_consecutiveness(
124
+ notebook_content, calling_statement=".finish()"
125
+ )
126
+ if not is_consecutive:
127
+ response = "n" # ignore_non_consecutive == False
128
+ if ignore_non_consecutive is None:
129
+ response = input(
130
+ " Do you still want to proceed with finishing? (y/n) "
131
+ )
132
+ if response != "y":
133
+ return "aborted-non-consecutive"
132
134
  # write the report
133
135
  report_path = ln_setup.settings.storage.cache_dir / filepath.name.replace(
134
136
  ".ipynb", ".html"
@@ -144,23 +146,20 @@ def save_context_core(
144
146
  hash, _ = hash_file(source_code_path) # ignore hash_type for now
145
147
  if (
146
148
  transform._source_code_artifact_id is not None
147
- or transform.source_code is not None
149
+ or transform.source_code is not None # equivalent to transform.hash is not None
148
150
  ):
149
151
  # check if the hash of the transform source code matches
150
152
  # (for scripts, we already run the same logic in track() - we can deduplicate the call at some point)
151
- if transform.hash is not None:
152
- condition = hash != transform.hash
153
- else:
154
- condition = hash != transform._source_code_artifact.hash
155
- if condition:
156
- if os.getenv("LAMIN_TESTING") is None:
157
- # in test, auto-confirm overwrite
158
- response = input(
159
- f"You are about to replace (overwrite) existing source code (hash '{transform._source_code_artifact.hash}') for transform version"
160
- f" '{transform.version}'. Proceed? (y/n)"
161
- )
162
- else:
163
- response = "y"
153
+ ref_hash = (
154
+ transform.hash
155
+ if transform.hash is not None
156
+ else transform._source_code_artifact.hash
157
+ )
158
+ if hash != ref_hash:
159
+ response = input(
160
+ f"You are about to overwrite existing source code (hash '{ref_hash}') for Transform('{transform.uid}')."
161
+ f"Proceed? (y/n)"
162
+ )
164
163
  if response == "y":
165
164
  transform.source_code = source_code_path.read_text()
166
165
  transform.hash = hash
@@ -210,13 +209,9 @@ def save_context_core(
210
209
  if run.report_id is not None:
211
210
  hash, _ = hash_file(report_path) # ignore hash_type for now
212
211
  if hash != run.report.hash:
213
- if os.getenv("LAMIN_TESTING") is None:
214
- # in test, auto-confirm overwrite
215
- response = input(
216
- f"You are about to replace (overwrite) an existing run report (hash '{run.report.hash}'). Proceed? (y/n)"
217
- )
218
- else:
219
- response = "y"
212
+ response = input(
213
+ f"You are about to overwrite an existing report (hash '{run.report.hash}') for Run('{run.uid}'). Proceed? (y/n)"
214
+ )
220
215
  if response == "y":
221
216
  run.report.replace(report_path)
222
217
  run.report.save(upload=True)
lamindb/_record.py CHANGED
@@ -12,7 +12,7 @@ from lamin_utils._lookup import Lookup
12
12
  from lamindb_setup._connect_instance import get_owner_name_from_identifier
13
13
  from lamindb_setup.core._docs import doc_args
14
14
  from lamindb_setup.core._hub_core import connect_instance
15
- from lnschema_core.models import Collection, IsVersioned, Record
15
+ from lnschema_core.models import IsVersioned, Record
16
16
 
17
17
  from lamindb._utils import attach_func_to_class_method
18
18
  from lamindb.core._settings import settings
@@ -36,6 +36,13 @@ def init_self_from_db(self: Record, existing_record: Record):
36
36
  self._state.db = "default"
37
37
 
38
38
 
39
+ def update_attributes(record: Record, attributes: dict[str, str]):
40
+ for key, value in attributes.items():
41
+ if getattr(record, key) != value:
42
+ logger.warning(f"updated {key} from {getattr(record, key)} to {value}")
43
+ setattr(record, key, value)
44
+
45
+
39
46
  def validate_required_fields(record: Record, kwargs):
40
47
  required_fields = {
41
48
  k.name for k in record._meta.fields if not k.null and k.default is None
@@ -123,11 +130,11 @@ def __init__(record: Record, *args, **kwargs):
123
130
 
124
131
  @classmethod # type:ignore
125
132
  @doc_args(Record.filter.__doc__)
126
- def filter(cls, **expressions) -> QuerySet:
133
+ def filter(cls, *queries, **expressions) -> QuerySet:
127
134
  """{}""" # noqa: D415
128
135
  from lamindb._filter import filter
129
136
 
130
- return filter(cls, **expressions)
137
+ return filter(cls, *queries, **expressions)
131
138
 
132
139
 
133
140
  @classmethod # type:ignore
@@ -430,6 +437,7 @@ def update_fk_to_default_db(
430
437
  records: Record | list[Record] | QuerySet,
431
438
  fk: str,
432
439
  using_key: str | None,
440
+ transfer_logs: dict,
433
441
  ):
434
442
  record = records[0] if isinstance(records, (List, QuerySet)) else records
435
443
  if hasattr(record, f"{fk}_id") and getattr(record, f"{fk}_id") is not None:
@@ -442,7 +450,9 @@ def update_fk_to_default_db(
442
450
  from copy import copy
443
451
 
444
452
  fk_record_default = copy(fk_record)
445
- transfer_to_default_db(fk_record_default, using_key, save=True)
453
+ transfer_to_default_db(
454
+ fk_record_default, using_key, save=True, transfer_logs=transfer_logs
455
+ )
446
456
  if isinstance(records, (List, QuerySet)):
447
457
  for r in records:
448
458
  setattr(r, f"{fk}", None)
@@ -460,66 +470,66 @@ FKBULK = [
460
470
  ]
461
471
 
462
472
 
463
- def transfer_fk_to_default_db_bulk(records: list | QuerySet, using_key: str | None):
473
+ def transfer_fk_to_default_db_bulk(
474
+ records: list | QuerySet, using_key: str | None, transfer_logs: dict
475
+ ):
464
476
  for fk in FKBULK:
465
- update_fk_to_default_db(records, fk, using_key)
477
+ update_fk_to_default_db(records, fk, using_key, transfer_logs=transfer_logs)
466
478
 
467
479
 
468
480
  def transfer_to_default_db(
469
481
  record: Record,
470
482
  using_key: str | None,
483
+ *,
484
+ transfer_logs: dict,
471
485
  save: bool = False,
472
- mute: bool = False,
473
486
  transfer_fk: bool = True,
474
487
  ) -> Record | None:
475
- db = record._state.db
476
- if db is not None and db != "default" and using_key is None:
477
- registry = record.__class__
478
- record_on_default = registry.objects.filter(uid=record.uid).one_or_none()
479
- if record_on_default is not None:
480
- logger.important(
481
- f"returning existing {record.__class__.__name__}(uid='{record.uid}') on default database"
482
- )
483
- return record_on_default
484
- if not mute:
485
- logger.hint(f"saving from instance {db} to default instance: {record}")
486
- from lamindb.core._context import context
487
- from lamindb.core._data import WARNING_RUN_TRANSFORM
488
-
489
- if hasattr(record, "created_by_id"):
490
- # this line is needed to point created_by to default db
491
- record.created_by = None
492
- record.created_by_id = ln_setup.settings.user.id
493
- if hasattr(record, "run_id"):
494
- record.run = None
495
- if context.run is not None:
496
- record.run_id = context.run.id
497
- else:
498
- if not settings.creation.artifact_silence_missing_run_warning:
499
- logger.warning(WARNING_RUN_TRANSFORM)
500
- record.run_id = None
501
- if hasattr(record, "transform_id") and record._meta.model_name != "run":
502
- record.transform = None
503
- if context.run is not None:
504
- record.transform_id = context.run.transform_id
505
- else:
506
- record.transform_id = None
507
- # transfer other foreign key fields
508
- fk_fields = [
509
- i.name
510
- for i in record._meta.fields
511
- if i.get_internal_type() == "ForeignKey"
512
- if i.name not in {"created_by", "run", "transform"}
513
- ]
514
- if not transfer_fk:
515
- # don't transfer fk fields that are already bulk transferred
516
- fk_fields = [fk for fk in fk_fields if fk not in FKBULK]
517
- for fk in fk_fields:
518
- update_fk_to_default_db(record, fk, using_key)
519
- record.id = None
520
- record._state.db = "default"
521
- if save:
522
- record.save()
488
+ from lamindb.core._context import context
489
+ from lamindb.core._data import WARNING_RUN_TRANSFORM
490
+
491
+ registry = record.__class__
492
+ record_on_default = registry.objects.filter(uid=record.uid).one_or_none()
493
+ record_str = f"{record.__class__.__name__}(uid='{record.uid}')"
494
+ if record_on_default is not None:
495
+ transfer_logs["mapped"].append(record_str)
496
+ return record_on_default
497
+ else:
498
+ transfer_logs["transferred"].append(record_str)
499
+
500
+ if hasattr(record, "created_by_id"):
501
+ record.created_by = None
502
+ record.created_by_id = ln_setup.settings.user.id
503
+ if hasattr(record, "run_id"):
504
+ record.run = None
505
+ if context.run is not None:
506
+ record.run_id = context.run.id
507
+ else:
508
+ if not settings.creation.artifact_silence_missing_run_warning:
509
+ logger.warning(WARNING_RUN_TRANSFORM)
510
+ record.run_id = None
511
+ if hasattr(record, "transform_id") and record._meta.model_name != "run":
512
+ record.transform = None
513
+ if context.run is not None:
514
+ record.transform_id = context.run.transform_id
515
+ else:
516
+ record.transform_id = None
517
+ # transfer other foreign key fields
518
+ fk_fields = [
519
+ i.name
520
+ for i in record._meta.fields
521
+ if i.get_internal_type() == "ForeignKey"
522
+ if i.name not in {"created_by", "run", "transform"}
523
+ ]
524
+ if not transfer_fk:
525
+ # don't transfer fk fields that are already bulk transferred
526
+ fk_fields = [fk for fk in fk_fields if fk not in FKBULK]
527
+ for fk in fk_fields:
528
+ update_fk_to_default_db(record, fk, using_key, transfer_logs=transfer_logs)
529
+ record.id = None
530
+ record._state.db = "default"
531
+ if save:
532
+ record.save()
523
533
  return None
524
534
 
525
535
 
@@ -534,10 +544,20 @@ def save(self, *args, **kwargs) -> Record:
534
544
  if self.__class__.__name__ == "Collection" and self.id is not None:
535
545
  # when creating a new collection without being able to access artifacts
536
546
  artifacts = self.ordered_artifacts.list()
537
- # transfer of the record to the default db with fk fields
538
- result = transfer_to_default_db(self, using_key)
539
- if result is not None:
540
- init_self_from_db(self, result)
547
+ pre_existing_record = None
548
+ # consider records that are being transferred from other databases
549
+ transfer_logs: dict[str, list[str]] = {"mapped": [], "transferred": []}
550
+ if db is not None and db != "default" and using_key is None:
551
+ if isinstance(self, IsVersioned):
552
+ if not self.is_latest:
553
+ raise NotImplementedError(
554
+ "You are attempting to transfer a record that's not the latest in its version history. This is currently not supported."
555
+ )
556
+ pre_existing_record = transfer_to_default_db(
557
+ self, using_key, transfer_logs=transfer_logs
558
+ )
559
+ if pre_existing_record is not None:
560
+ init_self_from_db(self, pre_existing_record)
541
561
  else:
542
562
  # save versioned record
543
563
  if isinstance(self, IsVersioned) and self._revises is not None:
@@ -571,8 +591,10 @@ def save(self, *args, **kwargs) -> Record:
571
591
  self_on_db._state.db = db
572
592
  self_on_db.pk = pk_on_db # manually set the primary key
573
593
  self_on_db.features = FeatureManager(self_on_db)
574
- self.features._add_from(self_on_db)
575
- self.labels.add_from(self_on_db)
594
+ self.features._add_from(self_on_db, transfer_logs=transfer_logs)
595
+ self.labels.add_from(self_on_db, transfer_logs=transfer_logs)
596
+ for k, v in transfer_logs.items():
597
+ logger.important(f"{k} records: {', '.join(v)}")
576
598
  return self
577
599
 
578
600