PyPI - lamindb - Versions diffs - 0.76.5__py3-none-any.whl → 0.76.7__py3-none-any.whl - Mend

lamindb 0.76.5py3-none-any.whl → 0.76.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

lamindb/__init__.py +1 -1
lamindb/_artifact.py +28 -39
lamindb/_collection.py +51 -67
lamindb/_curate.py +10 -10
lamindb/_filter.py +2 -2
lamindb/_record.py +83 -61
lamindb/_transform.py +17 -3
lamindb/core/__init__.py +2 -3
lamindb/core/_context.py +43 -21
lamindb/core/_data.py +45 -29
lamindb/core/_feature_manager.py +19 -16
lamindb/core/_label_manager.py +25 -15
lamindb/core/_settings.py +1 -1
lamindb/core/exceptions.py +3 -3
lamindb/core/loaders.py +164 -0
lamindb/core/storage/__init__.py +1 -1
lamindb/core/storage/_tiledbsoma.py +17 -9
lamindb/core/storage/_zarr.py +1 -1
lamindb/core/storage/paths.py +0 -104
lamindb/integrations/_vitessce.py +1 -2
{lamindb-0.76.5.dist-info → lamindb-0.76.7.dist-info}/METADATA +5 -5
{lamindb-0.76.5.dist-info → lamindb-0.76.7.dist-info}/RECORD +24 -23
{lamindb-0.76.5.dist-info → lamindb-0.76.7.dist-info}/LICENSE +0 -0
{lamindb-0.76.5.dist-info → lamindb-0.76.7.dist-info}/WHEEL +0 -0

lamindb/__init__.py CHANGED Viewed

@@ -41,7 +41,7 @@ Modules and settings.
 """
 # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
-__version__ = "0.76.5"
+__version__ = "0.76.7"
 import os as _os

lamindb/_artifact.py CHANGED Viewed

@@ -28,15 +28,15 @@ from lnschema_core.types import (
 )
 from lamindb._utils import attach_func_to_class_method
-from lamindb.core._data import HasFeatures, _track_run_input
+from lamindb.core._data import _track_run_input, describe, view_lineage
 from lamindb.core._settings import settings
 from lamindb.core.exceptions import IntegrityError
+from lamindb.core.loaders import load_to_memory
 from lamindb.core.storage import (
     LocalPathClasses,
     UPath,
     delete_storage,
     infer_suffix,
-    load_to_memory,
     write_to_disk,
 )
 from lamindb.core.storage.paths import (
@@ -334,7 +334,7 @@ def get_artifact_kwargs_from_data(
             # save the information that this artifact was previously
             # produced by another run
             if artifact.run is not None:
-                artifact.run.output_artifacts_with_later_updates.add(artifact)
+                artifact.run._output_artifacts_with_later_updates.add(artifact)
             # update the run of the artifact with the latest run
             stat_or_artifact.run = run
             stat_or_artifact.transform = run.transform
@@ -497,13 +497,6 @@ def _check_accessor_artifact(data: Any, accessor: str | None = None):
     return accessor
-def update_attributes(data: HasFeatures, attributes: Mapping[str, str]):
-    for key, value in attributes.items():
-        if getattr(data, key) != value:
-            logger.warning(f"updated {key} from {getattr(data, key)} to {value}")
-            setattr(data, key, value)
 def __init__(artifact: Artifact, *args, **kwargs):
     artifact.features = FeatureManager(artifact)
     artifact.params = ParamManager(artifact)
@@ -608,7 +601,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
     # an object with the same hash already exists
     if isinstance(kwargs_or_artifact, Artifact):
-        from ._record import init_self_from_db
+        from ._record import init_self_from_db, update_attributes
         init_self_from_db(artifact, kwargs_or_artifact)
         # adding "key" here is dangerous because key might be auto-populated
@@ -908,14 +901,6 @@ def replace(
     self._to_store = not check_path_in_storage
-# deprecated
-def backed(
-    self, mode: str = "r", is_run_input: bool | None = None
-) -> AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment:
-    logger.warning("`.backed()` is deprecated, use `.open()`!'")
-    return self.open(mode, is_run_input)
 # docstring handled through attach_func_to_class_method
 def open(
     self, mode: str = "r", is_run_input: bool | None = None
@@ -970,24 +955,8 @@ def open(
     return access
-# docstring handled through attach_func_to_class_method
-def load(self, is_run_input: bool | None = None, stream: bool = False, **kwargs) -> Any:
-    if hasattr(self, "_memory_rep") and self._memory_rep is not None:
-        access_memory = self._memory_rep
-    else:
-        using_key = settings._using_key
-        access_memory = load_to_memory(
-            filepath_from_artifact(self, using_key=using_key), stream=stream, **kwargs
-        )
-    # only call if load is successfull
-    _track_run_input(self, is_run_input)
-    return access_memory
-# docstring handled through attach_func_to_class_method
-def cache(self, is_run_input: bool | None = None) -> Path:
-    using_key = settings._using_key
-    filepath = filepath_from_artifact(self, using_key=using_key)
+# can't really just call .cache in .load because of double tracking
+def _synchronize_cleanup_on_error(filepath: UPath) -> UPath:
     try:
         cache_path = setup_settings.instance.storage.cloud_to_local(
             filepath, print_progress=True
@@ -1002,6 +971,26 @@ def cache(self, is_run_input: bool | None = None) -> Path:
             elif cache_path.is_dir():
                 shutil.rmtree(cache_path)
         raise e
+    return cache_path
+# docstring handled through attach_func_to_class_method
+def load(self, is_run_input: bool | None = None, **kwargs) -> Any:
+    if hasattr(self, "_memory_rep") and self._memory_rep is not None:
+        access_memory = self._memory_rep
+    else:
+        filepath = filepath_from_artifact(self, using_key=settings._using_key)
+        cache_path = _synchronize_cleanup_on_error(filepath)
+        access_memory = load_to_memory(cache_path, **kwargs)
+    # only call if load is successfull
+    _track_run_input(self, is_run_input)
+    return access_memory
+# docstring handled through attach_func_to_class_method
+def cache(self, is_run_input: bool | None = None) -> Path:
+    filepath = filepath_from_artifact(self, using_key=settings._using_key)
+    cache_path = _synchronize_cleanup_on_error(filepath)
     # only call if sync is successfull
     _track_run_input(self, is_run_input)
     return cache_path
@@ -1185,5 +1174,5 @@ for name in METHOD_NAMES:
 Artifact._delete_skip_storage = _delete_skip_storage
 Artifact._save_skip_storage = _save_skip_storage
 Artifact.path = path
-Artifact.backed = backed
-Artifact.view_lineage = HasFeatures.view_lineage
+Artifact.describe = describe
+Artifact.view_lineage = view_lineage

lamindb/_collection.py CHANGED Viewed

@@ -17,19 +17,17 @@ from lamindb_setup.core.hashing import hash_set
 from lnschema_core.models import (
     Collection,
     CollectionArtifact,
-    FeatureManager,
     FeatureSet,
 )
 from lnschema_core.types import VisibilityChoice
-from lamindb._artifact import update_attributes
 from lamindb._utils import attach_func_to_class_method
-from lamindb.core._data import _track_run_input
+from lamindb.core._data import _track_run_input, describe, view_lineage
 from lamindb.core._mapped_collection import MappedCollection
 from lamindb.core.versioning import process_revises
 from . import Artifact, Run
-from ._record import init_self_from_db
+from ._record import init_self_from_db, update_attributes
 from .core._data import (
     add_transform_to_kwargs,
     get_run,
@@ -44,12 +42,45 @@ if TYPE_CHECKING:
     from ._query_set import QuerySet
+class CollectionFeatureManager:
+    """Query features of artifact in collection."""
+    def __init__(self, collection: Collection):
+        self._collection = collection
+    def get_feature_sets_union(self) -> dict[str, FeatureSet]:
+        links_feature_set_artifact = Artifact.feature_sets.through.objects.filter(
+            artifact_id__in=self._collection.artifacts.values_list("id", flat=True)
+        )
+        feature_sets_by_slots = defaultdict(list)
+        for link in links_feature_set_artifact:
+            feature_sets_by_slots[link.slot].append(link.featureset_id)
+        feature_sets_union = {}
+        for slot, feature_set_ids_slot in feature_sets_by_slots.items():
+            feature_set_1 = FeatureSet.get(id=feature_set_ids_slot[0])
+            related_name = feature_set_1._get_related_name()
+            features_registry = getattr(FeatureSet, related_name).field.model
+            # this way of writing the __in statement turned out to be the fastest
+            # evaluated on a link table with 16M entries connecting 500 feature sets with
+            # 60k genes
+            feature_ids = (
+                features_registry.feature_sets.through.objects.filter(
+                    featureset_id__in=feature_set_ids_slot
+                )
+                .values(f"{features_registry.__name__.lower()}_id")
+                .distinct()
+            )
+            features = features_registry.filter(id__in=feature_ids)
+            feature_sets_union[slot] = FeatureSet(features, dtype=feature_set_1.dtype)
+        return feature_sets_union
 def __init__(
     collection: Collection,
     *args,
     **kwargs,
 ):
-    collection.features = FeatureManager(collection)
+    collection.features = CollectionFeatureManager(collection)
     if len(args) == len(collection._meta.concrete_fields):
         super(Collection, collection).__init__(*args, **kwargs)
         return None
@@ -78,9 +109,6 @@ def __init__(
         if "visibility" in kwargs
         else VisibilityChoice.default.value
     )
-    feature_sets: dict[str, FeatureSet] = (
-        kwargs.pop("feature_sets") if "feature_sets" in kwargs else {}
-    )
     if "is_new_version_of" in kwargs:
         logger.warning("`is_new_version_of` will be removed soon, please use `revises`")
         revises = kwargs.pop("is_new_version_of")
@@ -98,7 +126,7 @@ def __init__(
         if not hasattr(artifacts, "__getitem__"):
             raise ValueError("Artifact or List[Artifact] is allowed.")
         assert isinstance(artifacts[0], Artifact)  # type: ignore  # noqa: S101
-    hash, feature_sets = from_artifacts(artifacts)  # type: ignore
+    hash = from_artifacts(artifacts)  # type: ignore
     if meta_artifact is not None:
         if not isinstance(meta_artifact, Artifact):
             raise ValueError("meta_artifact has to be an Artifact")
@@ -107,11 +135,6 @@ def __init__(
                 raise ValueError(
                     "Save meta_artifact artifact before creating collection!"
                 )
-            if not feature_sets:
-                feature_sets = meta_artifact.features._feature_set_by_slot
-            else:
-                if len(meta_artifact.features._feature_set_by_slot) > 0:
-                    logger.info("overwriting feature sets linked to artifact")
     # we ignore collections in trash containing the same hash
     if hash is not None:
         existing_collection = Collection.filter(hash=hash).one_or_none()
@@ -126,7 +149,7 @@ def __init__(
             # save the information that this artifact was previously
             # produced by another run
             if existing_collection.run is not None:
-                existing_collection.run.output_collections_with_later_updates.add(
+                existing_collection.run._output_collections_with_later_updates.add(
                     existing_collection
                 )
             # update the run of the artifact with the latest run
@@ -134,11 +157,6 @@ def __init__(
             existing_collection.transform = run.transform
         init_self_from_db(collection, existing_collection)
         update_attributes(collection, {"description": description, "name": name})
-        for slot, feature_set in collection.features._feature_set_by_slot.items():
-            if slot in feature_sets:
-                if not feature_sets[slot] == feature_set:
-                    collection.feature_sets.remove(feature_set)
-                    logger.warning(f"removing feature set: {feature_set}")
     else:
         kwargs = {}
         add_transform_to_kwargs(kwargs, run)
@@ -161,7 +179,6 @@ def __init__(
         )
         settings.creation.search_names = search_names_setting
     collection._artifacts = artifacts
-    collection._feature_sets = feature_sets
     # register provenance
     if revises is not None:
         _track_run_input(revises, run=run)
@@ -171,61 +188,21 @@ def __init__(
 # internal function, not exposed to user
 def from_artifacts(artifacts: Iterable[Artifact]) -> tuple[str, dict[str, str]]:
     # assert all artifacts are already saved
-    logger.debug("check not saved")
     saved = not any(artifact._state.adding for artifact in artifacts)
     if not saved:
         raise ValueError("Not all artifacts are yet saved, please save them")
-    # query all feature sets of artifacts
-    logger.debug("artifact ids")
-    artifact_ids = [artifact.id for artifact in artifacts]
-    # query all feature sets at the same time rather
-    # than making a single query per artifact
-    logger.debug("links_feature_set_artifact")
-    links_feature_set_artifact = Artifact.feature_sets.through.objects.filter(
-        artifact_id__in=artifact_ids
-    )
-    feature_sets_by_slots = defaultdict(list)
-    logger.debug("slots")
-    for link in links_feature_set_artifact:
-        feature_sets_by_slots[link.slot].append(link.featureset_id)
-    feature_sets_union = {}
-    logger.debug("union")
-    for slot, feature_set_ids_slot in feature_sets_by_slots.items():
-        feature_set_1 = FeatureSet.get(id=feature_set_ids_slot[0])
-        related_name = feature_set_1._get_related_name()
-        features_registry = getattr(FeatureSet, related_name).field.model
-        start_time = logger.debug("run filter")
-        # this way of writing the __in statement turned out to be the fastest
-        # evaluated on a link table with 16M entries connecting 500 feature sets with
-        # 60k genes
-        feature_ids = (
-            features_registry.feature_sets.through.objects.filter(
-                featureset_id__in=feature_set_ids_slot
-            )
-            .values(f"{features_registry.__name__.lower()}_id")
-            .distinct()
-        )
-        start_time = logger.debug("done, start evaluate", time=start_time)
-        features = features_registry.filter(id__in=feature_ids)
-        feature_sets_union[slot] = FeatureSet(features, dtype=feature_set_1.dtype)
-        start_time = logger.debug("done", time=start_time)
-    # validate consistency of hashes
-    # we do not allow duplicate hashes
-    logger.debug("hashes")
-    # artifact.hash is None for zarr
-    # todo: more careful handling of such cases
+    # validate consistency of hashes - we do not allow duplicate hashes
     hashes = [artifact.hash for artifact in artifacts if artifact.hash is not None]
-    if len(hashes) != len(set(hashes)):
+    hashes_set = set(hashes)
+    if len(hashes) != len(hashes_set):
         seen = set()
         non_unique = [x for x in hashes if x in seen or seen.add(x)]  # type: ignore
         raise ValueError(
             "Please pass artifacts with distinct hashes: these ones are non-unique"
             f" {non_unique}"
         )
-    time = logger.debug("hash")
-    hash = hash_set(set(hashes))
-    logger.debug("done", time=time)
-    return hash, feature_sets_union
+    hash = hash_set(hashes_set)
+    return hash
 # docstring handled through attach_func_to_class_method
@@ -244,7 +221,12 @@ def mapped(
     is_run_input: bool | None = None,
 ) -> MappedCollection:
     path_list = []
-    for artifact in self.ordered_artifacts.all():
+    if self._state.adding:
+        artifacts = self._artifacts
+        logger.warning("The collection isn't saved, consider calling `.save()`")
+    else:
+        artifacts = self.ordered_artifacts.all()
+    for artifact in artifacts:
         if artifact.suffix not in {".h5ad", ".zarr"}:
             logger.warning(f"Ignoring artifact with suffix {artifact.suffix}")
             continue
@@ -401,3 +383,5 @@ for name in METHOD_NAMES:
 Collection.ordered_artifacts = ordered_artifacts
 Collection.data_artifact = data_artifact
+Collection.describe = describe
+Collection.view_lineage = view_lineage

lamindb/_curate.py CHANGED Viewed

@@ -334,9 +334,9 @@ class DataFrameCurator(BaseCurator):
         from lamindb.core._settings import settings
         if not self._validated:
-            raise ValidationError(
-                f"Data object is not validated, please run {colors.yellow('validate()')}!"
-            )
+            self.validate()
+            if not self._validated:
+                raise ValidationError("Dataset does not validate. Please curate.")
         # Make sure all labels are saved in the current instance
         verbosity = settings.verbosity
@@ -442,7 +442,7 @@ class AnnDataCurator(DataFrameCurator):
             exclude=exclude,
             check_valid_keys=False,
         )
-        self._obs_fields = categoricals
+        self._obs_fields = categoricals or {}
         self._check_valid_keys(extra={"var_index"})
     @property
@@ -563,9 +563,9 @@ class AnnDataCurator(DataFrameCurator):
             A saved artifact record.
         """
         if not self._validated:
-            raise ValidationError(
-                f"Data object is not validated, please run {colors.yellow('validate()')}!"
-            )
+            self.validate()
+            if not self._validated:
+                raise ValidationError("Dataset does not validate. Please curate.")
         self._artifact = save_artifact(
             self._data,
@@ -1188,7 +1188,7 @@ def validate_categories(
         print_values = _print_values(non_validated)
         warning_message = (
             f"{colors.red(f'{n_non_validated} terms')} {are} not validated: "
-            f"{colors.red(print_values)}\n      → save terms via "
+            f"{colors.red(print_values)}\n      → fix typos, remove non-existent values, or save terms via "
             f"{colors.red(non_validated_hint_print)}"
         )
         if logger.indent == "":
@@ -1498,14 +1498,14 @@ def log_saved_labels(
         if k == "without reference" and validated_only:
             msg = colors.yellow(
-                f"{len(labels)} non-validated categories are not saved in {model_field}: {labels}!"
+                f"{len(labels)} non-validated values are not saved in {model_field}: {labels}!"
             )
             lookup_print = (
                 f"lookup().{key}" if key.isidentifier() else f".lookup()['{key}']"
             )
             hint = f".add_new_from('{key}')"
-            msg += f"\n      → to lookup categories, use {lookup_print}"
+            msg += f"\n      → to lookup values, use {lookup_print}"
             msg += (
                 f"\n      → to save, run {colors.yellow(hint)}"
                 if save_function == "add_new_from"

lamindb/_filter.py CHANGED Viewed

@@ -10,7 +10,7 @@ if TYPE_CHECKING:
     from lnschema_core import Record
-def filter(registry: type[Record], **expressions) -> QuerySet:
+def filter(registry: type[Record], *queries, **expressions) -> QuerySet:
     """See :meth:`~lamindb.core.Record.filter`."""
     _using_key = None
     if "_using_key" in expressions:
@@ -18,6 +18,6 @@ def filter(registry: type[Record], **expressions) -> QuerySet:
     expressions = process_expressions(registry, expressions)
     qs = QuerySet(model=registry, using=_using_key)
     if len(expressions) > 0:
-        return qs.filter(**expressions)
+        return qs.filter(*queries, **expressions)
     else:
         return qs

lamindb/_record.py CHANGED Viewed

@@ -12,7 +12,7 @@ from lamin_utils._lookup import Lookup
 from lamindb_setup._connect_instance import get_owner_name_from_identifier
 from lamindb_setup.core._docs import doc_args
 from lamindb_setup.core._hub_core import connect_instance
-from lnschema_core.models import Collection, IsVersioned, Record
+from lnschema_core.models import IsVersioned, Record
 from lamindb._utils import attach_func_to_class_method
 from lamindb.core._settings import settings
@@ -36,6 +36,13 @@ def init_self_from_db(self: Record, existing_record: Record):
     self._state.db = "default"
+def update_attributes(record: Record, attributes: dict[str, str]):
+    for key, value in attributes.items():
+        if getattr(record, key) != value:
+            logger.warning(f"updated {key} from {getattr(record, key)} to {value}")
+            setattr(record, key, value)
 def validate_required_fields(record: Record, kwargs):
     required_fields = {
         k.name for k in record._meta.fields if not k.null and k.default is None
@@ -123,11 +130,11 @@ def __init__(record: Record, *args, **kwargs):
 @classmethod  # type:ignore
 @doc_args(Record.filter.__doc__)
-def filter(cls, **expressions) -> QuerySet:
+def filter(cls, *queries, **expressions) -> QuerySet:
     """{}"""  # noqa: D415
     from lamindb._filter import filter
-    return filter(cls, **expressions)
+    return filter(cls, *queries, **expressions)
 @classmethod  # type:ignore
@@ -430,6 +437,7 @@ def update_fk_to_default_db(
     records: Record | list[Record] | QuerySet,
     fk: str,
     using_key: str | None,
+    transfer_logs: dict,
 ):
     record = records[0] if isinstance(records, (List, QuerySet)) else records
     if hasattr(record, f"{fk}_id") and getattr(record, f"{fk}_id") is not None:
@@ -442,7 +450,9 @@ def update_fk_to_default_db(
             from copy import copy
             fk_record_default = copy(fk_record)
-            transfer_to_default_db(fk_record_default, using_key, save=True)
+            transfer_to_default_db(
+                fk_record_default, using_key, save=True, transfer_logs=transfer_logs
+            )
         if isinstance(records, (List, QuerySet)):
             for r in records:
                 setattr(r, f"{fk}", None)
@@ -460,66 +470,66 @@ FKBULK = [
 ]
-def transfer_fk_to_default_db_bulk(records: list | QuerySet, using_key: str | None):
+def transfer_fk_to_default_db_bulk(
+    records: list | QuerySet, using_key: str | None, transfer_logs: dict
+):
     for fk in FKBULK:
-        update_fk_to_default_db(records, fk, using_key)
+        update_fk_to_default_db(records, fk, using_key, transfer_logs=transfer_logs)
 def transfer_to_default_db(
     record: Record,
     using_key: str | None,
+    *,
+    transfer_logs: dict,
     save: bool = False,
-    mute: bool = False,
     transfer_fk: bool = True,
 ) -> Record | None:
-    db = record._state.db
-    if db is not None and db != "default" and using_key is None:
-        registry = record.__class__
-        record_on_default = registry.objects.filter(uid=record.uid).one_or_none()
-        if record_on_default is not None:
-            logger.important(
-                f"returning existing {record.__class__.__name__}(uid='{record.uid}') on default database"
-            )
-            return record_on_default
-        if not mute:
-            logger.hint(f"saving from instance {db} to default instance: {record}")
-        from lamindb.core._context import context
-        from lamindb.core._data import WARNING_RUN_TRANSFORM
-        if hasattr(record, "created_by_id"):
-            # this line is needed to point created_by to default db
-            record.created_by = None
-            record.created_by_id = ln_setup.settings.user.id
-        if hasattr(record, "run_id"):
-            record.run = None
-            if context.run is not None:
-                record.run_id = context.run.id
-            else:
-                if not settings.creation.artifact_silence_missing_run_warning:
-                    logger.warning(WARNING_RUN_TRANSFORM)
-                record.run_id = None
-        if hasattr(record, "transform_id") and record._meta.model_name != "run":
-            record.transform = None
-            if context.run is not None:
-                record.transform_id = context.run.transform_id
-            else:
-                record.transform_id = None
-        # transfer other foreign key fields
-        fk_fields = [
-            i.name
-            for i in record._meta.fields
-            if i.get_internal_type() == "ForeignKey"
-            if i.name not in {"created_by", "run", "transform"}
-        ]
-        if not transfer_fk:
-            # don't transfer fk fields that are already bulk transferred
-            fk_fields = [fk for fk in fk_fields if fk not in FKBULK]
-        for fk in fk_fields:
-            update_fk_to_default_db(record, fk, using_key)
-        record.id = None
-        record._state.db = "default"
-        if save:
-            record.save()
+    from lamindb.core._context import context
+    from lamindb.core._data import WARNING_RUN_TRANSFORM
+    registry = record.__class__
+    record_on_default = registry.objects.filter(uid=record.uid).one_or_none()
+    record_str = f"{record.__class__.__name__}(uid='{record.uid}')"
+    if record_on_default is not None:
+        transfer_logs["mapped"].append(record_str)
+        return record_on_default
+    else:
+        transfer_logs["transferred"].append(record_str)
+    if hasattr(record, "created_by_id"):
+        record.created_by = None
+        record.created_by_id = ln_setup.settings.user.id
+    if hasattr(record, "run_id"):
+        record.run = None
+        if context.run is not None:
+            record.run_id = context.run.id
+        else:
+            if not settings.creation.artifact_silence_missing_run_warning:
+                logger.warning(WARNING_RUN_TRANSFORM)
+            record.run_id = None
+    if hasattr(record, "transform_id") and record._meta.model_name != "run":
+        record.transform = None
+        if context.run is not None:
+            record.transform_id = context.run.transform_id
+        else:
+            record.transform_id = None
+    # transfer other foreign key fields
+    fk_fields = [
+        i.name
+        for i in record._meta.fields
+        if i.get_internal_type() == "ForeignKey"
+        if i.name not in {"created_by", "run", "transform"}
+    ]
+    if not transfer_fk:
+        # don't transfer fk fields that are already bulk transferred
+        fk_fields = [fk for fk in fk_fields if fk not in FKBULK]
+    for fk in fk_fields:
+        update_fk_to_default_db(record, fk, using_key, transfer_logs=transfer_logs)
+    record.id = None
+    record._state.db = "default"
+    if save:
+        record.save()
     return None
@@ -534,10 +544,20 @@ def save(self, *args, **kwargs) -> Record:
     if self.__class__.__name__ == "Collection" and self.id is not None:
         # when creating a new collection without being able to access artifacts
         artifacts = self.ordered_artifacts.list()
-    # transfer of the record to the default db with fk fields
-    result = transfer_to_default_db(self, using_key)
-    if result is not None:
-        init_self_from_db(self, result)
+    pre_existing_record = None
+    # consider records that are being transferred from other databases
+    transfer_logs: dict[str, list[str]] = {"mapped": [], "transferred": []}
+    if db is not None and db != "default" and using_key is None:
+        if isinstance(self, IsVersioned):
+            if not self.is_latest:
+                raise NotImplementedError(
+                    "You are attempting to transfer a record that's not the latest in its version history. This is currently not supported."
+                )
+        pre_existing_record = transfer_to_default_db(
+            self, using_key, transfer_logs=transfer_logs
+        )
+    if pre_existing_record is not None:
+        init_self_from_db(self, pre_existing_record)
     else:
         # save versioned record
         if isinstance(self, IsVersioned) and self._revises is not None:
@@ -571,8 +591,10 @@ def save(self, *args, **kwargs) -> Record:
             self_on_db._state.db = db
             self_on_db.pk = pk_on_db  # manually set the primary key
             self_on_db.features = FeatureManager(self_on_db)
-            self.features._add_from(self_on_db)
-            self.labels.add_from(self_on_db)
+            self.features._add_from(self_on_db, transfer_logs=transfer_logs)
+            self.labels.add_from(self_on_db, transfer_logs=transfer_logs)
+        for k, v in transfer_logs.items():
+            logger.important(f"{k} records: {', '.join(v)}")
     return self

lamindb/_transform.py CHANGED Viewed

@@ -39,9 +39,23 @@ def __init__(transform: Transform, *args, **kwargs):
         )
     if revises is None:
         if key is not None:
-            revises = Transform.filter(key=key).order_by("-created_at").first()
-        elif uid is not None and not uid.endswith("0000"):
-            revises = Transform.filter(uid__startswith=uid[:-4]).one_or_none()
+            revises = (
+                Transform.filter(key=key, is_latest=True)
+                .order_by("-created_at")
+                .first()
+            )
+        elif uid is not None:
+            revises = (
+                Transform.filter(uid__startswith=uid[:-4], is_latest=True)
+                .order_by("-created_at")
+                .first()
+            )
+    if revises is not None and uid is not None and uid == revises.uid:
+        from ._record import init_self_from_db, update_attributes
+        init_self_from_db(transform, revises)
+        update_attributes(transform, {"name": name})
+        return None
     if revises is not None and key is not None and revises.key != key:
         note = message_update_key_in_version_family(
             suid=revises.stem_uid,

lamindb 0.76.5__py3-none-any.whl → 0.76.7__py3-none-any.whl

lamindb 0.76.5py3-none-any.whl → 0.76.7py3-none-any.whl