lamindb 0.76.5__py3-none-any.whl → 0.76.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -41,7 +41,7 @@ Modules and settings.
41
41
  """
42
42
 
43
43
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
44
- __version__ = "0.76.5"
44
+ __version__ = "0.76.7"
45
45
 
46
46
  import os as _os
47
47
 
lamindb/_artifact.py CHANGED
@@ -28,15 +28,15 @@ from lnschema_core.types import (
28
28
  )
29
29
 
30
30
  from lamindb._utils import attach_func_to_class_method
31
- from lamindb.core._data import HasFeatures, _track_run_input
31
+ from lamindb.core._data import _track_run_input, describe, view_lineage
32
32
  from lamindb.core._settings import settings
33
33
  from lamindb.core.exceptions import IntegrityError
34
+ from lamindb.core.loaders import load_to_memory
34
35
  from lamindb.core.storage import (
35
36
  LocalPathClasses,
36
37
  UPath,
37
38
  delete_storage,
38
39
  infer_suffix,
39
- load_to_memory,
40
40
  write_to_disk,
41
41
  )
42
42
  from lamindb.core.storage.paths import (
@@ -334,7 +334,7 @@ def get_artifact_kwargs_from_data(
334
334
  # save the information that this artifact was previously
335
335
  # produced by another run
336
336
  if artifact.run is not None:
337
- artifact.run.output_artifacts_with_later_updates.add(artifact)
337
+ artifact.run._output_artifacts_with_later_updates.add(artifact)
338
338
  # update the run of the artifact with the latest run
339
339
  stat_or_artifact.run = run
340
340
  stat_or_artifact.transform = run.transform
@@ -497,13 +497,6 @@ def _check_accessor_artifact(data: Any, accessor: str | None = None):
497
497
  return accessor
498
498
 
499
499
 
500
- def update_attributes(data: HasFeatures, attributes: Mapping[str, str]):
501
- for key, value in attributes.items():
502
- if getattr(data, key) != value:
503
- logger.warning(f"updated {key} from {getattr(data, key)} to {value}")
504
- setattr(data, key, value)
505
-
506
-
507
500
  def __init__(artifact: Artifact, *args, **kwargs):
508
501
  artifact.features = FeatureManager(artifact)
509
502
  artifact.params = ParamManager(artifact)
@@ -608,7 +601,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
608
601
 
609
602
  # an object with the same hash already exists
610
603
  if isinstance(kwargs_or_artifact, Artifact):
611
- from ._record import init_self_from_db
604
+ from ._record import init_self_from_db, update_attributes
612
605
 
613
606
  init_self_from_db(artifact, kwargs_or_artifact)
614
607
  # adding "key" here is dangerous because key might be auto-populated
@@ -908,14 +901,6 @@ def replace(
908
901
  self._to_store = not check_path_in_storage
909
902
 
910
903
 
911
- # deprecated
912
- def backed(
913
- self, mode: str = "r", is_run_input: bool | None = None
914
- ) -> AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment:
915
- logger.warning("`.backed()` is deprecated, use `.open()`!'")
916
- return self.open(mode, is_run_input)
917
-
918
-
919
904
  # docstring handled through attach_func_to_class_method
920
905
  def open(
921
906
  self, mode: str = "r", is_run_input: bool | None = None
@@ -970,24 +955,8 @@ def open(
970
955
  return access
971
956
 
972
957
 
973
- # docstring handled through attach_func_to_class_method
974
- def load(self, is_run_input: bool | None = None, stream: bool = False, **kwargs) -> Any:
975
- if hasattr(self, "_memory_rep") and self._memory_rep is not None:
976
- access_memory = self._memory_rep
977
- else:
978
- using_key = settings._using_key
979
- access_memory = load_to_memory(
980
- filepath_from_artifact(self, using_key=using_key), stream=stream, **kwargs
981
- )
982
- # only call if load is successfull
983
- _track_run_input(self, is_run_input)
984
- return access_memory
985
-
986
-
987
- # docstring handled through attach_func_to_class_method
988
- def cache(self, is_run_input: bool | None = None) -> Path:
989
- using_key = settings._using_key
990
- filepath = filepath_from_artifact(self, using_key=using_key)
958
+ # can't really just call .cache in .load because of double tracking
959
+ def _synchronize_cleanup_on_error(filepath: UPath) -> UPath:
991
960
  try:
992
961
  cache_path = setup_settings.instance.storage.cloud_to_local(
993
962
  filepath, print_progress=True
@@ -1002,6 +971,26 @@ def cache(self, is_run_input: bool | None = None) -> Path:
1002
971
  elif cache_path.is_dir():
1003
972
  shutil.rmtree(cache_path)
1004
973
  raise e
974
+ return cache_path
975
+
976
+
977
+ # docstring handled through attach_func_to_class_method
978
+ def load(self, is_run_input: bool | None = None, **kwargs) -> Any:
979
+ if hasattr(self, "_memory_rep") and self._memory_rep is not None:
980
+ access_memory = self._memory_rep
981
+ else:
982
+ filepath = filepath_from_artifact(self, using_key=settings._using_key)
983
+ cache_path = _synchronize_cleanup_on_error(filepath)
984
+ access_memory = load_to_memory(cache_path, **kwargs)
985
+ # only call if load is successfull
986
+ _track_run_input(self, is_run_input)
987
+ return access_memory
988
+
989
+
990
+ # docstring handled through attach_func_to_class_method
991
+ def cache(self, is_run_input: bool | None = None) -> Path:
992
+ filepath = filepath_from_artifact(self, using_key=settings._using_key)
993
+ cache_path = _synchronize_cleanup_on_error(filepath)
1005
994
  # only call if sync is successfull
1006
995
  _track_run_input(self, is_run_input)
1007
996
  return cache_path
@@ -1185,5 +1174,5 @@ for name in METHOD_NAMES:
1185
1174
  Artifact._delete_skip_storage = _delete_skip_storage
1186
1175
  Artifact._save_skip_storage = _save_skip_storage
1187
1176
  Artifact.path = path
1188
- Artifact.backed = backed
1189
- Artifact.view_lineage = HasFeatures.view_lineage
1177
+ Artifact.describe = describe
1178
+ Artifact.view_lineage = view_lineage
lamindb/_collection.py CHANGED
@@ -17,19 +17,17 @@ from lamindb_setup.core.hashing import hash_set
17
17
  from lnschema_core.models import (
18
18
  Collection,
19
19
  CollectionArtifact,
20
- FeatureManager,
21
20
  FeatureSet,
22
21
  )
23
22
  from lnschema_core.types import VisibilityChoice
24
23
 
25
- from lamindb._artifact import update_attributes
26
24
  from lamindb._utils import attach_func_to_class_method
27
- from lamindb.core._data import _track_run_input
25
+ from lamindb.core._data import _track_run_input, describe, view_lineage
28
26
  from lamindb.core._mapped_collection import MappedCollection
29
27
  from lamindb.core.versioning import process_revises
30
28
 
31
29
  from . import Artifact, Run
32
- from ._record import init_self_from_db
30
+ from ._record import init_self_from_db, update_attributes
33
31
  from .core._data import (
34
32
  add_transform_to_kwargs,
35
33
  get_run,
@@ -44,12 +42,45 @@ if TYPE_CHECKING:
44
42
  from ._query_set import QuerySet
45
43
 
46
44
 
45
+ class CollectionFeatureManager:
46
+ """Query features of artifact in collection."""
47
+
48
+ def __init__(self, collection: Collection):
49
+ self._collection = collection
50
+
51
+ def get_feature_sets_union(self) -> dict[str, FeatureSet]:
52
+ links_feature_set_artifact = Artifact.feature_sets.through.objects.filter(
53
+ artifact_id__in=self._collection.artifacts.values_list("id", flat=True)
54
+ )
55
+ feature_sets_by_slots = defaultdict(list)
56
+ for link in links_feature_set_artifact:
57
+ feature_sets_by_slots[link.slot].append(link.featureset_id)
58
+ feature_sets_union = {}
59
+ for slot, feature_set_ids_slot in feature_sets_by_slots.items():
60
+ feature_set_1 = FeatureSet.get(id=feature_set_ids_slot[0])
61
+ related_name = feature_set_1._get_related_name()
62
+ features_registry = getattr(FeatureSet, related_name).field.model
63
+ # this way of writing the __in statement turned out to be the fastest
64
+ # evaluated on a link table with 16M entries connecting 500 feature sets with
65
+ # 60k genes
66
+ feature_ids = (
67
+ features_registry.feature_sets.through.objects.filter(
68
+ featureset_id__in=feature_set_ids_slot
69
+ )
70
+ .values(f"{features_registry.__name__.lower()}_id")
71
+ .distinct()
72
+ )
73
+ features = features_registry.filter(id__in=feature_ids)
74
+ feature_sets_union[slot] = FeatureSet(features, dtype=feature_set_1.dtype)
75
+ return feature_sets_union
76
+
77
+
47
78
  def __init__(
48
79
  collection: Collection,
49
80
  *args,
50
81
  **kwargs,
51
82
  ):
52
- collection.features = FeatureManager(collection)
83
+ collection.features = CollectionFeatureManager(collection)
53
84
  if len(args) == len(collection._meta.concrete_fields):
54
85
  super(Collection, collection).__init__(*args, **kwargs)
55
86
  return None
@@ -78,9 +109,6 @@ def __init__(
78
109
  if "visibility" in kwargs
79
110
  else VisibilityChoice.default.value
80
111
  )
81
- feature_sets: dict[str, FeatureSet] = (
82
- kwargs.pop("feature_sets") if "feature_sets" in kwargs else {}
83
- )
84
112
  if "is_new_version_of" in kwargs:
85
113
  logger.warning("`is_new_version_of` will be removed soon, please use `revises`")
86
114
  revises = kwargs.pop("is_new_version_of")
@@ -98,7 +126,7 @@ def __init__(
98
126
  if not hasattr(artifacts, "__getitem__"):
99
127
  raise ValueError("Artifact or List[Artifact] is allowed.")
100
128
  assert isinstance(artifacts[0], Artifact) # type: ignore # noqa: S101
101
- hash, feature_sets = from_artifacts(artifacts) # type: ignore
129
+ hash = from_artifacts(artifacts) # type: ignore
102
130
  if meta_artifact is not None:
103
131
  if not isinstance(meta_artifact, Artifact):
104
132
  raise ValueError("meta_artifact has to be an Artifact")
@@ -107,11 +135,6 @@ def __init__(
107
135
  raise ValueError(
108
136
  "Save meta_artifact artifact before creating collection!"
109
137
  )
110
- if not feature_sets:
111
- feature_sets = meta_artifact.features._feature_set_by_slot
112
- else:
113
- if len(meta_artifact.features._feature_set_by_slot) > 0:
114
- logger.info("overwriting feature sets linked to artifact")
115
138
  # we ignore collections in trash containing the same hash
116
139
  if hash is not None:
117
140
  existing_collection = Collection.filter(hash=hash).one_or_none()
@@ -126,7 +149,7 @@ def __init__(
126
149
  # save the information that this artifact was previously
127
150
  # produced by another run
128
151
  if existing_collection.run is not None:
129
- existing_collection.run.output_collections_with_later_updates.add(
152
+ existing_collection.run._output_collections_with_later_updates.add(
130
153
  existing_collection
131
154
  )
132
155
  # update the run of the artifact with the latest run
@@ -134,11 +157,6 @@ def __init__(
134
157
  existing_collection.transform = run.transform
135
158
  init_self_from_db(collection, existing_collection)
136
159
  update_attributes(collection, {"description": description, "name": name})
137
- for slot, feature_set in collection.features._feature_set_by_slot.items():
138
- if slot in feature_sets:
139
- if not feature_sets[slot] == feature_set:
140
- collection.feature_sets.remove(feature_set)
141
- logger.warning(f"removing feature set: {feature_set}")
142
160
  else:
143
161
  kwargs = {}
144
162
  add_transform_to_kwargs(kwargs, run)
@@ -161,7 +179,6 @@ def __init__(
161
179
  )
162
180
  settings.creation.search_names = search_names_setting
163
181
  collection._artifacts = artifacts
164
- collection._feature_sets = feature_sets
165
182
  # register provenance
166
183
  if revises is not None:
167
184
  _track_run_input(revises, run=run)
@@ -171,61 +188,21 @@ def __init__(
171
188
  # internal function, not exposed to user
172
189
  def from_artifacts(artifacts: Iterable[Artifact]) -> tuple[str, dict[str, str]]:
173
190
  # assert all artifacts are already saved
174
- logger.debug("check not saved")
175
191
  saved = not any(artifact._state.adding for artifact in artifacts)
176
192
  if not saved:
177
193
  raise ValueError("Not all artifacts are yet saved, please save them")
178
- # query all feature sets of artifacts
179
- logger.debug("artifact ids")
180
- artifact_ids = [artifact.id for artifact in artifacts]
181
- # query all feature sets at the same time rather
182
- # than making a single query per artifact
183
- logger.debug("links_feature_set_artifact")
184
- links_feature_set_artifact = Artifact.feature_sets.through.objects.filter(
185
- artifact_id__in=artifact_ids
186
- )
187
- feature_sets_by_slots = defaultdict(list)
188
- logger.debug("slots")
189
- for link in links_feature_set_artifact:
190
- feature_sets_by_slots[link.slot].append(link.featureset_id)
191
- feature_sets_union = {}
192
- logger.debug("union")
193
- for slot, feature_set_ids_slot in feature_sets_by_slots.items():
194
- feature_set_1 = FeatureSet.get(id=feature_set_ids_slot[0])
195
- related_name = feature_set_1._get_related_name()
196
- features_registry = getattr(FeatureSet, related_name).field.model
197
- start_time = logger.debug("run filter")
198
- # this way of writing the __in statement turned out to be the fastest
199
- # evaluated on a link table with 16M entries connecting 500 feature sets with
200
- # 60k genes
201
- feature_ids = (
202
- features_registry.feature_sets.through.objects.filter(
203
- featureset_id__in=feature_set_ids_slot
204
- )
205
- .values(f"{features_registry.__name__.lower()}_id")
206
- .distinct()
207
- )
208
- start_time = logger.debug("done, start evaluate", time=start_time)
209
- features = features_registry.filter(id__in=feature_ids)
210
- feature_sets_union[slot] = FeatureSet(features, dtype=feature_set_1.dtype)
211
- start_time = logger.debug("done", time=start_time)
212
- # validate consistency of hashes
213
- # we do not allow duplicate hashes
214
- logger.debug("hashes")
215
- # artifact.hash is None for zarr
216
- # todo: more careful handling of such cases
194
+ # validate consistency of hashes - we do not allow duplicate hashes
217
195
  hashes = [artifact.hash for artifact in artifacts if artifact.hash is not None]
218
- if len(hashes) != len(set(hashes)):
196
+ hashes_set = set(hashes)
197
+ if len(hashes) != len(hashes_set):
219
198
  seen = set()
220
199
  non_unique = [x for x in hashes if x in seen or seen.add(x)] # type: ignore
221
200
  raise ValueError(
222
201
  "Please pass artifacts with distinct hashes: these ones are non-unique"
223
202
  f" {non_unique}"
224
203
  )
225
- time = logger.debug("hash")
226
- hash = hash_set(set(hashes))
227
- logger.debug("done", time=time)
228
- return hash, feature_sets_union
204
+ hash = hash_set(hashes_set)
205
+ return hash
229
206
 
230
207
 
231
208
  # docstring handled through attach_func_to_class_method
@@ -244,7 +221,12 @@ def mapped(
244
221
  is_run_input: bool | None = None,
245
222
  ) -> MappedCollection:
246
223
  path_list = []
247
- for artifact in self.ordered_artifacts.all():
224
+ if self._state.adding:
225
+ artifacts = self._artifacts
226
+ logger.warning("The collection isn't saved, consider calling `.save()`")
227
+ else:
228
+ artifacts = self.ordered_artifacts.all()
229
+ for artifact in artifacts:
248
230
  if artifact.suffix not in {".h5ad", ".zarr"}:
249
231
  logger.warning(f"Ignoring artifact with suffix {artifact.suffix}")
250
232
  continue
@@ -401,3 +383,5 @@ for name in METHOD_NAMES:
401
383
 
402
384
  Collection.ordered_artifacts = ordered_artifacts
403
385
  Collection.data_artifact = data_artifact
386
+ Collection.describe = describe
387
+ Collection.view_lineage = view_lineage
lamindb/_curate.py CHANGED
@@ -334,9 +334,9 @@ class DataFrameCurator(BaseCurator):
334
334
  from lamindb.core._settings import settings
335
335
 
336
336
  if not self._validated:
337
- raise ValidationError(
338
- f"Data object is not validated, please run {colors.yellow('validate()')}!"
339
- )
337
+ self.validate()
338
+ if not self._validated:
339
+ raise ValidationError("Dataset does not validate. Please curate.")
340
340
 
341
341
  # Make sure all labels are saved in the current instance
342
342
  verbosity = settings.verbosity
@@ -442,7 +442,7 @@ class AnnDataCurator(DataFrameCurator):
442
442
  exclude=exclude,
443
443
  check_valid_keys=False,
444
444
  )
445
- self._obs_fields = categoricals
445
+ self._obs_fields = categoricals or {}
446
446
  self._check_valid_keys(extra={"var_index"})
447
447
 
448
448
  @property
@@ -563,9 +563,9 @@ class AnnDataCurator(DataFrameCurator):
563
563
  A saved artifact record.
564
564
  """
565
565
  if not self._validated:
566
- raise ValidationError(
567
- f"Data object is not validated, please run {colors.yellow('validate()')}!"
568
- )
566
+ self.validate()
567
+ if not self._validated:
568
+ raise ValidationError("Dataset does not validate. Please curate.")
569
569
 
570
570
  self._artifact = save_artifact(
571
571
  self._data,
@@ -1188,7 +1188,7 @@ def validate_categories(
1188
1188
  print_values = _print_values(non_validated)
1189
1189
  warning_message = (
1190
1190
  f"{colors.red(f'{n_non_validated} terms')} {are} not validated: "
1191
- f"{colors.red(print_values)}\n → save terms via "
1191
+ f"{colors.red(print_values)}\n → fix typos, remove non-existent values, or save terms via "
1192
1192
  f"{colors.red(non_validated_hint_print)}"
1193
1193
  )
1194
1194
  if logger.indent == "":
@@ -1498,14 +1498,14 @@ def log_saved_labels(
1498
1498
 
1499
1499
  if k == "without reference" and validated_only:
1500
1500
  msg = colors.yellow(
1501
- f"{len(labels)} non-validated categories are not saved in {model_field}: {labels}!"
1501
+ f"{len(labels)} non-validated values are not saved in {model_field}: {labels}!"
1502
1502
  )
1503
1503
  lookup_print = (
1504
1504
  f"lookup().{key}" if key.isidentifier() else f".lookup()['{key}']"
1505
1505
  )
1506
1506
 
1507
1507
  hint = f".add_new_from('{key}')"
1508
- msg += f"\n → to lookup categories, use {lookup_print}"
1508
+ msg += f"\n → to lookup values, use {lookup_print}"
1509
1509
  msg += (
1510
1510
  f"\n → to save, run {colors.yellow(hint)}"
1511
1511
  if save_function == "add_new_from"
lamindb/_filter.py CHANGED
@@ -10,7 +10,7 @@ if TYPE_CHECKING:
10
10
  from lnschema_core import Record
11
11
 
12
12
 
13
- def filter(registry: type[Record], **expressions) -> QuerySet:
13
+ def filter(registry: type[Record], *queries, **expressions) -> QuerySet:
14
14
  """See :meth:`~lamindb.core.Record.filter`."""
15
15
  _using_key = None
16
16
  if "_using_key" in expressions:
@@ -18,6 +18,6 @@ def filter(registry: type[Record], **expressions) -> QuerySet:
18
18
  expressions = process_expressions(registry, expressions)
19
19
  qs = QuerySet(model=registry, using=_using_key)
20
20
  if len(expressions) > 0:
21
- return qs.filter(**expressions)
21
+ return qs.filter(*queries, **expressions)
22
22
  else:
23
23
  return qs
lamindb/_record.py CHANGED
@@ -12,7 +12,7 @@ from lamin_utils._lookup import Lookup
12
12
  from lamindb_setup._connect_instance import get_owner_name_from_identifier
13
13
  from lamindb_setup.core._docs import doc_args
14
14
  from lamindb_setup.core._hub_core import connect_instance
15
- from lnschema_core.models import Collection, IsVersioned, Record
15
+ from lnschema_core.models import IsVersioned, Record
16
16
 
17
17
  from lamindb._utils import attach_func_to_class_method
18
18
  from lamindb.core._settings import settings
@@ -36,6 +36,13 @@ def init_self_from_db(self: Record, existing_record: Record):
36
36
  self._state.db = "default"
37
37
 
38
38
 
39
+ def update_attributes(record: Record, attributes: dict[str, str]):
40
+ for key, value in attributes.items():
41
+ if getattr(record, key) != value:
42
+ logger.warning(f"updated {key} from {getattr(record, key)} to {value}")
43
+ setattr(record, key, value)
44
+
45
+
39
46
  def validate_required_fields(record: Record, kwargs):
40
47
  required_fields = {
41
48
  k.name for k in record._meta.fields if not k.null and k.default is None
@@ -123,11 +130,11 @@ def __init__(record: Record, *args, **kwargs):
123
130
 
124
131
  @classmethod # type:ignore
125
132
  @doc_args(Record.filter.__doc__)
126
- def filter(cls, **expressions) -> QuerySet:
133
+ def filter(cls, *queries, **expressions) -> QuerySet:
127
134
  """{}""" # noqa: D415
128
135
  from lamindb._filter import filter
129
136
 
130
- return filter(cls, **expressions)
137
+ return filter(cls, *queries, **expressions)
131
138
 
132
139
 
133
140
  @classmethod # type:ignore
@@ -430,6 +437,7 @@ def update_fk_to_default_db(
430
437
  records: Record | list[Record] | QuerySet,
431
438
  fk: str,
432
439
  using_key: str | None,
440
+ transfer_logs: dict,
433
441
  ):
434
442
  record = records[0] if isinstance(records, (List, QuerySet)) else records
435
443
  if hasattr(record, f"{fk}_id") and getattr(record, f"{fk}_id") is not None:
@@ -442,7 +450,9 @@ def update_fk_to_default_db(
442
450
  from copy import copy
443
451
 
444
452
  fk_record_default = copy(fk_record)
445
- transfer_to_default_db(fk_record_default, using_key, save=True)
453
+ transfer_to_default_db(
454
+ fk_record_default, using_key, save=True, transfer_logs=transfer_logs
455
+ )
446
456
  if isinstance(records, (List, QuerySet)):
447
457
  for r in records:
448
458
  setattr(r, f"{fk}", None)
@@ -460,66 +470,66 @@ FKBULK = [
460
470
  ]
461
471
 
462
472
 
463
- def transfer_fk_to_default_db_bulk(records: list | QuerySet, using_key: str | None):
473
+ def transfer_fk_to_default_db_bulk(
474
+ records: list | QuerySet, using_key: str | None, transfer_logs: dict
475
+ ):
464
476
  for fk in FKBULK:
465
- update_fk_to_default_db(records, fk, using_key)
477
+ update_fk_to_default_db(records, fk, using_key, transfer_logs=transfer_logs)
466
478
 
467
479
 
468
480
  def transfer_to_default_db(
469
481
  record: Record,
470
482
  using_key: str | None,
483
+ *,
484
+ transfer_logs: dict,
471
485
  save: bool = False,
472
- mute: bool = False,
473
486
  transfer_fk: bool = True,
474
487
  ) -> Record | None:
475
- db = record._state.db
476
- if db is not None and db != "default" and using_key is None:
477
- registry = record.__class__
478
- record_on_default = registry.objects.filter(uid=record.uid).one_or_none()
479
- if record_on_default is not None:
480
- logger.important(
481
- f"returning existing {record.__class__.__name__}(uid='{record.uid}') on default database"
482
- )
483
- return record_on_default
484
- if not mute:
485
- logger.hint(f"saving from instance {db} to default instance: {record}")
486
- from lamindb.core._context import context
487
- from lamindb.core._data import WARNING_RUN_TRANSFORM
488
-
489
- if hasattr(record, "created_by_id"):
490
- # this line is needed to point created_by to default db
491
- record.created_by = None
492
- record.created_by_id = ln_setup.settings.user.id
493
- if hasattr(record, "run_id"):
494
- record.run = None
495
- if context.run is not None:
496
- record.run_id = context.run.id
497
- else:
498
- if not settings.creation.artifact_silence_missing_run_warning:
499
- logger.warning(WARNING_RUN_TRANSFORM)
500
- record.run_id = None
501
- if hasattr(record, "transform_id") and record._meta.model_name != "run":
502
- record.transform = None
503
- if context.run is not None:
504
- record.transform_id = context.run.transform_id
505
- else:
506
- record.transform_id = None
507
- # transfer other foreign key fields
508
- fk_fields = [
509
- i.name
510
- for i in record._meta.fields
511
- if i.get_internal_type() == "ForeignKey"
512
- if i.name not in {"created_by", "run", "transform"}
513
- ]
514
- if not transfer_fk:
515
- # don't transfer fk fields that are already bulk transferred
516
- fk_fields = [fk for fk in fk_fields if fk not in FKBULK]
517
- for fk in fk_fields:
518
- update_fk_to_default_db(record, fk, using_key)
519
- record.id = None
520
- record._state.db = "default"
521
- if save:
522
- record.save()
488
+ from lamindb.core._context import context
489
+ from lamindb.core._data import WARNING_RUN_TRANSFORM
490
+
491
+ registry = record.__class__
492
+ record_on_default = registry.objects.filter(uid=record.uid).one_or_none()
493
+ record_str = f"{record.__class__.__name__}(uid='{record.uid}')"
494
+ if record_on_default is not None:
495
+ transfer_logs["mapped"].append(record_str)
496
+ return record_on_default
497
+ else:
498
+ transfer_logs["transferred"].append(record_str)
499
+
500
+ if hasattr(record, "created_by_id"):
501
+ record.created_by = None
502
+ record.created_by_id = ln_setup.settings.user.id
503
+ if hasattr(record, "run_id"):
504
+ record.run = None
505
+ if context.run is not None:
506
+ record.run_id = context.run.id
507
+ else:
508
+ if not settings.creation.artifact_silence_missing_run_warning:
509
+ logger.warning(WARNING_RUN_TRANSFORM)
510
+ record.run_id = None
511
+ if hasattr(record, "transform_id") and record._meta.model_name != "run":
512
+ record.transform = None
513
+ if context.run is not None:
514
+ record.transform_id = context.run.transform_id
515
+ else:
516
+ record.transform_id = None
517
+ # transfer other foreign key fields
518
+ fk_fields = [
519
+ i.name
520
+ for i in record._meta.fields
521
+ if i.get_internal_type() == "ForeignKey"
522
+ if i.name not in {"created_by", "run", "transform"}
523
+ ]
524
+ if not transfer_fk:
525
+ # don't transfer fk fields that are already bulk transferred
526
+ fk_fields = [fk for fk in fk_fields if fk not in FKBULK]
527
+ for fk in fk_fields:
528
+ update_fk_to_default_db(record, fk, using_key, transfer_logs=transfer_logs)
529
+ record.id = None
530
+ record._state.db = "default"
531
+ if save:
532
+ record.save()
523
533
  return None
524
534
 
525
535
 
@@ -534,10 +544,20 @@ def save(self, *args, **kwargs) -> Record:
534
544
  if self.__class__.__name__ == "Collection" and self.id is not None:
535
545
  # when creating a new collection without being able to access artifacts
536
546
  artifacts = self.ordered_artifacts.list()
537
- # transfer of the record to the default db with fk fields
538
- result = transfer_to_default_db(self, using_key)
539
- if result is not None:
540
- init_self_from_db(self, result)
547
+ pre_existing_record = None
548
+ # consider records that are being transferred from other databases
549
+ transfer_logs: dict[str, list[str]] = {"mapped": [], "transferred": []}
550
+ if db is not None and db != "default" and using_key is None:
551
+ if isinstance(self, IsVersioned):
552
+ if not self.is_latest:
553
+ raise NotImplementedError(
554
+ "You are attempting to transfer a record that's not the latest in its version history. This is currently not supported."
555
+ )
556
+ pre_existing_record = transfer_to_default_db(
557
+ self, using_key, transfer_logs=transfer_logs
558
+ )
559
+ if pre_existing_record is not None:
560
+ init_self_from_db(self, pre_existing_record)
541
561
  else:
542
562
  # save versioned record
543
563
  if isinstance(self, IsVersioned) and self._revises is not None:
@@ -571,8 +591,10 @@ def save(self, *args, **kwargs) -> Record:
571
591
  self_on_db._state.db = db
572
592
  self_on_db.pk = pk_on_db # manually set the primary key
573
593
  self_on_db.features = FeatureManager(self_on_db)
574
- self.features._add_from(self_on_db)
575
- self.labels.add_from(self_on_db)
594
+ self.features._add_from(self_on_db, transfer_logs=transfer_logs)
595
+ self.labels.add_from(self_on_db, transfer_logs=transfer_logs)
596
+ for k, v in transfer_logs.items():
597
+ logger.important(f"{k} records: {', '.join(v)}")
576
598
  return self
577
599
 
578
600
 
lamindb/_transform.py CHANGED
@@ -39,9 +39,23 @@ def __init__(transform: Transform, *args, **kwargs):
39
39
  )
40
40
  if revises is None:
41
41
  if key is not None:
42
- revises = Transform.filter(key=key).order_by("-created_at").first()
43
- elif uid is not None and not uid.endswith("0000"):
44
- revises = Transform.filter(uid__startswith=uid[:-4]).one_or_none()
42
+ revises = (
43
+ Transform.filter(key=key, is_latest=True)
44
+ .order_by("-created_at")
45
+ .first()
46
+ )
47
+ elif uid is not None:
48
+ revises = (
49
+ Transform.filter(uid__startswith=uid[:-4], is_latest=True)
50
+ .order_by("-created_at")
51
+ .first()
52
+ )
53
+ if revises is not None and uid is not None and uid == revises.uid:
54
+ from ._record import init_self_from_db, update_attributes
55
+
56
+ init_self_from_db(transform, revises)
57
+ update_attributes(transform, {"name": name})
58
+ return None
45
59
  if revises is not None and key is not None and revises.key != key:
46
60
  note = message_update_key_in_version_family(
47
61
  suid=revises.stem_uid,