PyPI - lamindb - Versions diffs - 1.4.0__py3-none-any.whl → 1.5.1__py3-none-any.whl - Mend

lamindb 1.4.0py3-none-any.whl → 1.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

lamindb/__init__.py +52 -36
lamindb/_finish.py +17 -10
lamindb/_tracked.py +1 -1
lamindb/base/__init__.py +3 -1
lamindb/base/fields.py +40 -22
lamindb/base/ids.py +1 -94
lamindb/base/types.py +2 -0
lamindb/base/uids.py +117 -0
lamindb/core/_context.py +203 -102
lamindb/core/_settings.py +38 -25
lamindb/core/datasets/__init__.py +11 -4
lamindb/core/datasets/_core.py +5 -5
lamindb/core/datasets/_small.py +0 -93
lamindb/core/datasets/mini_immuno.py +172 -0
lamindb/core/loaders.py +1 -1
lamindb/core/storage/_backed_access.py +100 -6
lamindb/core/storage/_polars_lazy_df.py +51 -0
lamindb/core/storage/_pyarrow_dataset.py +15 -30
lamindb/core/storage/_tiledbsoma.py +29 -13
lamindb/core/storage/objects.py +6 -0
lamindb/core/subsettings/__init__.py +2 -0
lamindb/core/subsettings/_annotation_settings.py +11 -0
lamindb/curators/__init__.py +7 -3349
lamindb/curators/_legacy.py +2056 -0
lamindb/curators/core.py +1534 -0
lamindb/errors.py +11 -0
lamindb/examples/__init__.py +27 -0
lamindb/examples/schemas/__init__.py +12 -0
lamindb/examples/schemas/_anndata.py +25 -0
lamindb/examples/schemas/_simple.py +19 -0
lamindb/integrations/_vitessce.py +8 -5
lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +24 -0
lamindb/migrations/0092_alter_artifactfeaturevalue_artifact_and_more.py +75 -0
lamindb/migrations/0093_alter_schemacomponent_unique_together.py +16 -0
lamindb/models/__init__.py +4 -1
lamindb/models/_describe.py +21 -4
lamindb/models/_feature_manager.py +382 -287
lamindb/models/_label_manager.py +8 -2
lamindb/models/artifact.py +177 -106
lamindb/models/artifact_set.py +122 -0
lamindb/models/collection.py +73 -52
lamindb/models/core.py +1 -1
lamindb/models/feature.py +51 -17
lamindb/models/has_parents.py +69 -14
lamindb/models/project.py +1 -1
lamindb/models/query_manager.py +221 -22
lamindb/models/query_set.py +247 -172
lamindb/models/record.py +65 -247
lamindb/models/run.py +4 -4
lamindb/models/save.py +8 -2
lamindb/models/schema.py +456 -184
lamindb/models/transform.py +2 -2
lamindb/models/ulabel.py +8 -5
{lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/METADATA +6 -6
{lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/RECORD +57 -43
{lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/LICENSE +0 -0
{lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/WHEEL +0 -0

lamindb/models/_label_manager.py CHANGED Viewed

@@ -24,7 +24,7 @@ from ._describe import (
     TYPE_WIDTH,
     VALUES_WIDTH,
     describe_header,
-    print_rich_tree,
+    format_rich_tree,
 )
 from ._django import get_artifact_with_related, get_related_model
 from ._relations import dict_related_model_to_related_name
@@ -182,8 +182,14 @@ class LabelManager:
         self._host = host
     def __repr__(self) -> str:
+        return self.describe(return_str=True)
+    def describe(self, return_str=True) -> str:
+        """Describe the labels."""
         tree = describe_labels(self._host)
-        return print_rich_tree(tree, fallback="no linked labels")
+        return format_rich_tree(
+            tree, fallback="no linked labels", return_str=return_str
+        )
     def add(
         self,

lamindb/models/artifact.py CHANGED Viewed

@@ -5,7 +5,7 @@ import os
 import shutil
 from collections import defaultdict
 from pathlib import Path, PurePath, PurePosixPath
-from typing import TYPE_CHECKING, Any, Union, overload
+from typing import TYPE_CHECKING, Any, Literal, Union, overload
 import fsspec
 import lamindb_setup as ln_setup
@@ -17,7 +17,6 @@ from django.db.models import CASCADE, PROTECT, Q
 from lamin_utils import colors, logger
 from lamindb_setup import settings as setup_settings
 from lamindb_setup._init_instance import register_storage_in_instance
-from lamindb_setup.core import doc_args
 from lamindb_setup.core._settings_storage import init_storage
 from lamindb_setup.core.hashing import HASH_LENGTH, hash_dir, hash_file
 from lamindb_setup.core.types import UPathStr
@@ -48,6 +47,11 @@ from ..core.storage import (
     write_to_disk,
 )
 from ..core.storage._anndata_accessor import _anndata_n_observations
+from ..core.storage._backed_access import (
+    _track_writes_factory,
+    backed_access,
+)
+from ..core.storage._polars_lazy_df import POLARS_SUFFIXES
 from ..core.storage._pyarrow_dataset import PYARROW_SUFFIXES
 from ..core.storage._tiledbsoma import _soma_n_observations
 from ..core.storage.paths import (
@@ -94,8 +98,6 @@ WARNING_RUN_TRANSFORM = "no run & transform got linked, call `ln.track()` & re-r
 WARNING_NO_INPUT = "run input wasn't tracked, call `ln.track()` and re-run"
-DEBUG_KWARGS_DOC = "**kwargs: Internal arguments for debugging."
 try:
     from ..core.storage._zarr import identify_zarr_type
 except ImportError:
@@ -105,9 +107,10 @@ except ImportError:
 if TYPE_CHECKING:
-    from collections.abc import Iterable
+    from collections.abc import Iterable, Iterator
     from mudata import MuData  # noqa: TC004
+    from polars import LazyFrame as PolarsLazyFrame
     from pyarrow.dataset import Dataset as PyArrowDataset
     from spatialdata import SpatialData  # noqa: TC004
     from tiledbsoma import Collection as SOMACollection
@@ -311,10 +314,9 @@ def get_stat_or_artifact(
         result = Artifact.objects.using(instance).filter(hash=hash).all()
         artifact_with_same_hash_exists = len(result) > 0
     else:
-        storage_id = settings.storage.id
         result = (
             Artifact.objects.using(instance)
-            .filter(Q(hash=hash) | Q(key=key, storage_id=storage_id))
+            .filter(Q(hash=hash) | Q(key=key, storage=settings.storage.record))
             .order_by("-created_at")
             .all()
         )
@@ -759,15 +761,15 @@ def _describe_sqlite(self, print_types: bool = False):  # for artifact & collect
         return tree
-def describe_artifact_collection(self):  # for artifact & collection
-    from ._describe import print_rich_tree
+def describe_artifact_collection(self, return_str: bool = False) -> str | None:
+    from ._describe import format_rich_tree
     if not self._state.adding and connections[self._state.db].vendor == "postgresql":
         tree = _describe_postgres(self)
     else:
         tree = _describe_sqlite(self)
-    print_rich_tree(tree)
+    return format_rich_tree(tree, return_str=return_str)
 def validate_feature(feature: Feature, records: list[Record]) -> None:
@@ -909,7 +911,7 @@ def add_labels(
         for registry_name, records in records_by_registry.items():
             if not from_curator and feature.name in internal_features:
                 raise ValidationError(
-                    "Cannot manually annotate internal feature with label. Please use ln.Curator"
+                    "Cannot manually annotate a feature measured *within* the dataset. Please use a Curator."
                 )
             if registry_name not in feature.dtype:
                 if not feature.dtype.startswith("cat"):
@@ -962,7 +964,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         Create an artifact **from a local file or folder**::
-            artifact = ln.Artifact("./my_file.parquet", key="example_datasets/my_file.parquet").save()
+            artifact = ln.Artifact("./my_file.parquet", key="examples/my_file.parquet").save()
             artifact = ln.Artifact("./my_folder", key="project1/my_folder").save()
         Calling `.save()` copies or uploads the file to the default storage location of your lamindb instance.
@@ -977,29 +979,12 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         You can make a **new version** of an artifact by passing an existing `key`::
-            artifact_v2 = ln.Artifact("./my_file.parquet", key="example_datasets/my_file.parquet").save()
+            artifact_v2 = ln.Artifact("./my_file.parquet", key="examples/my_file.parquet").save()
             artifact_v2.versions.df()  # see all versions
-        .. dropdown:: Why does the API look this way?
-            It's inspired by APIs building on AWS S3.
-            Both boto3 and quilt select a bucket (a storage location in LaminDB) and define a target path through a `key` argument.
-            In `boto3 <https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/bucket/upload_file.html>`__::
-                # signature: S3.Bucket.upload_file(filepath, key)
-                import boto3
-                s3 = boto3.resource('s3')
-                bucket = s3.Bucket('mybucket')
-                bucket.upload_file('/tmp/hello.txt', 'hello.txt')
-            In `quilt3 <https://docs.quiltdata.com/api-reference/bucket>`__::
+        You can write artifacts to other storage locations by switching the current default storage location (:attr:`~lamindb.core.Settings.storage`)::
-                # signature: quilt3.Bucket.put_file(key, filepath)
-                import quilt3
-                bucket = quilt3.Bucket('mybucket')
-                bucket.put_file('hello.txt', '/tmp/hello.txt')
+            ln.settings.storage = "s3://some-bucket"
         Sometimes you want to **avoid mapping the artifact into a path hierarchy**, and you only pass `description`::
@@ -1034,6 +1019,27 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
             In concurrent workloads where the same artifact is created repeatedly at the exact same time, `.save()`
             detects the duplication and will return the existing artifact.
+        .. dropdown:: Why does the constructor look the way it looks?
+            It's inspired by APIs building on AWS S3.
+            Both boto3 and quilt select a bucket (a storage location in LaminDB) and define a target path through a `key` argument.
+            In `boto3 <https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/bucket/upload_file.html>`__::
+                # signature: S3.Bucket.upload_file(filepath, key)
+                import boto3
+                s3 = boto3.resource('s3')
+                bucket = s3.Bucket('mybucket')
+                bucket.upload_file('/tmp/hello.txt', 'hello.txt')
+            In `quilt3 <https://docs.quiltdata.com/api-reference/bucket>`__::
+                # signature: quilt3.Bucket.put_file(key, filepath)
+                import quilt3
+                bucket = quilt3.Bucket('mybucket')
+                bucket.put_file('hello.txt', '/tmp/hello.txt')
     See Also:
         :class:`~lamindb.Storage`
             Storage locations for artifacts.
@@ -1089,7 +1095,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         ln.Artifact.filter(scientist="Barbara McClintock")
     Features may or may not be part of the artifact content in storage. For
-    instance, the :class:`~lamindb.Curator` flow validates the columns of a
+    instance, the :class:`~lamindb.curators.DataFrameCurator` flow validates the columns of a
     `DataFrame`-like artifact and annotates it with features corresponding to
     these columns. `artifact.features.add_values`, by contrast, does not
     validate the content of the artifact.
@@ -1227,7 +1233,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         default=None,
         related_name="validated_artifacts",
     )
-    """The schema that validated this artifact in a :class:`~lamindb.curators.Curator`."""
+    """The schema that validated this artifact in a :class:`~lamindb.curators.core.Curator`."""
     feature_sets: Schema = models.ManyToManyField(
         Schema, related_name="artifacts", through="ArtifactSchema"
     )
@@ -1525,7 +1531,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
             ::
                 artifact = ln.Artifact.get("tCUkRcaEjTjhtozp0000")
-                artifact = ln.Arfifact.get(key="my_datasets/my_file.parquet")
+                artifact = ln.Arfifact.get(key="examples/my_file.parquet")
         """
         from .query_set import QuerySet
@@ -1550,7 +1556,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
             Query by fields::
-                ln.Arfifact.filter(key="my_datasets/my_file.parquet")
+                ln.Arfifact.filter(key="examples/my_file.parquet")
             Query by features::
@@ -1610,7 +1616,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         schema: Schema | None = None,
         **kwargs,
     ) -> Artifact:
-        """Create from `DataFrame`, validate & link features.
+        """Create from `DataFrame`, optionally validate & annotate.
         Args:
             df: A `DataFrame` object.
@@ -1619,7 +1625,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
             description: A description.
             revises: An old version of the artifact.
             run: The run that creates the artifact.
-            schema: A schema to validate & annotate.
+            schema: A schema that defines how to validate & annotate.
         See Also:
             :meth:`~lamindb.Collection`
@@ -1627,19 +1633,30 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
             :class:`~lamindb.Feature`
                 Track features.
-        Example::
+        Example:
-            import lamindb as ln
+            No validation and annotation::
+                import lamindb as ln
+                df = ln.core.datasets.mini_immuno.get_dataset1()
+                artifact = ln.Artifact.from_df(df, key="examples/dataset1.parquet").save()
+            With validation and annotation.
+            .. literalinclude:: scripts/curate_dataframe_flexible.py
+               :language: python
+            Under-the-hood, this used the following schema.
+            .. literalinclude:: scripts/define_valid_features.py
+               :language: python
+            Valid features & labels were defined as:
+            .. literalinclude:: scripts/define_mini_immuno_features_labels.py
+               :language: python
-            df = ln.core.datasets.df_iris_in_meter_batch1()
-            df.head()
-            #>   sepal_length sepal_width petal_length petal_width iris_organism_code
-            #> 0        0.051       0.035        0.014       0.002                 0
-            #> 1        0.049       0.030        0.014       0.002                 0
-            #> 2        0.047       0.032        0.013       0.002                 0
-            #> 3        0.046       0.031        0.015       0.002                 0
-            #> 4        0.050       0.036        0.014       0.002                 0
-            artifact = ln.Artifact.from_df(df, key="iris/result_batch1.parquet").save()
         """
         artifact = Artifact(  # type: ignore
             data=df,
@@ -1673,7 +1690,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         schema: Schema | None = None,
         **kwargs,
     ) -> Artifact:
-        """Create from ``AnnData``, validate & link features.
+        """Create from `AnnData`, optionally validate & annotate.
         Args:
             adata: An `AnnData` object or a path of AnnData-like.
@@ -1682,7 +1699,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
             description: A description.
             revises: An old version of the artifact.
             run: The run that creates the artifact.
-            schema: A schema to validate & annotate.
+            schema: A schema that defines how to validate & annotate.
         See Also:
@@ -1691,12 +1708,31 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
             :class:`~lamindb.Feature`
                 Track features.
-        Example::
+        Example:
-            import lamindb as ln
+            No validation and annotation::
+                import lamindb as ln
+                adata = ln.core.datasets.anndata_with_obs()
+                artifact = ln.Artifact.from_anndata(adata, key="mini_anndata_with_obs.h5ad").save()
+            With validation and annotation.
+            .. literalinclude:: scripts/curate_anndata_flexible.py
+               :language: python
+            Under-the-hood, this used the following schema.
+            .. literalinclude:: scripts/define_schema_anndata_ensembl_gene_ids_and_valid_features_in_obs.py
+               :language: python
+            This schema tranposes the `var` DataFrame during curation, so that one validates and annotates the `var.T` schema, i.e., `[ENSG00000153563, ENSG00000010610, ENSG00000170458]`.
+            If one doesn't transpose, one would annotate with the schema of `var`, i.e., `[gene_symbol, gene_type]`.
+            .. image:: https://lamin-site-assets.s3.amazonaws.com/.lamindb/gLyfToATM7WUzkWW0001.png
+               :width: 800px
-            adata = ln.core.datasets.anndata_with_obs()
-            artifact = ln.Artifact.from_anndata(adata, key="mini_anndata_with_obs.h5ad").save()
         """
         if not data_is_anndata(adata):
             raise ValueError(
@@ -1745,7 +1781,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         schema: Schema | None = None,
         **kwargs,
     ) -> Artifact:
-        """Create from ``MuData``, validate & link features.
+        """Create from `MuData`, optionally validate & annotate.
         Args:
             mdata: A `MuData` object.
@@ -1754,7 +1790,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
             description: A description.
             revises: An old version of the artifact.
             run: The run that creates the artifact.
-            schema: A schema to validate & annotate.
+            schema: A schema that defines how to validate & annotate.
         See Also:
             :meth:`~lamindb.Collection`
@@ -1804,16 +1840,16 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         schema: Schema | None = None,
         **kwargs,
     ) -> Artifact:
-        """Create from ``SpatialData``, validate & link features.
+        """Create from `SpatialData`, optionally validate & annotate.
         Args:
-            mdata: A `SpatialData` object.
+            sdata: A `SpatialData` object.
             key: A relative path within default storage,
                 e.g., `"myfolder/myfile.zarr"`.
             description: A description.
             revises: An old version of the artifact.
             run: The run that creates the artifact.
-             schema: A schema to validate & annotate.
+            schema: A schema that defines how to validate & annotate.
         See Also:
             :meth:`~lamindb.Collection`
@@ -1821,11 +1857,21 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
             :class:`~lamindb.Feature`
                 Track features.
-        Example::
+        Example:
-            import lamindb as ln
+            No validation and annotation::
+                import lamindb as ln
+                artifact = ln.Artifact.from_spatialdata(sdata, key="my_dataset.zarr").save()
-            artifact = ln.Artifact.from_spatialdata(sdata, key="my_dataset.zarr").save()
+            With validation and annotation.
+            .. literalinclude:: scripts/define_schema_spatialdata.py
+                :language: python
+            .. literalinclude:: scripts/curate_spatialdata.py
+                :language: python
         """
         if not data_is_spatialdata(sdata):
             raise ValueError(
@@ -2117,29 +2163,39 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         self._old_suffix = self.suffix
     def open(
-        self, mode: str = "r", is_run_input: bool | None = None, **kwargs
-    ) -> Union[
-        AnnDataAccessor,
-        BackedAccessor,
-        SOMACollection,
-        SOMAExperiment,
-        SOMAMeasurement,
-        PyArrowDataset,
-    ]:
-        """Return a cloud-backed data object.
+        self,
+        mode: str = "r",
+        engine: Literal["pyarrow", "polars"] = "pyarrow",
+        is_run_input: bool | None = None,
+        **kwargs,
+    ) -> (
+        AnnDataAccessor
+        | BackedAccessor
+        | SOMACollection
+        | SOMAExperiment
+        | SOMAMeasurement
+        | PyArrowDataset
+        | Iterator[PolarsLazyFrame]
+    ):
+        """Open a dataset for streaming.
         Works for `AnnData` (`.h5ad` and `.zarr`), generic `hdf5` and `zarr`,
-        `tiledbsoma` objects (`.tiledbsoma`), `pyarrow` compatible formats.
+        `tiledbsoma` objects (`.tiledbsoma`), `pyarrow` or `polars` compatible formats
+        (`.parquet`, `.csv`, `.ipc` etc. files or directories with such files).
         Args:
             mode: can only be `"w"` (write mode) for `tiledbsoma` stores,
                 otherwise should be always `"r"` (read-only mode).
+            engine: Which module to use for lazy loading of a dataframe
+                from `pyarrow` or `polars` compatible formats.
+                This has no effect if the artifact is not a dataframe, i.e.
+                if it is an `AnnData,` `hdf5`, `zarr` or `tiledbsoma` object.
             is_run_input: Whether to track this artifact as run input.
             **kwargs: Keyword arguments for the accessor, i.e. `h5py` or `zarr` connection,
-                `pyarrow.dataset.dataset`.
+                `pyarrow.dataset.dataset`, `polars.scan_*` function.
         Notes:
-            For more info, see tutorial: :doc:`/arrays`.
+            For more info, see guide: :doc:`/arrays`.
         Example::
@@ -2152,6 +2208,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
             #> AnnDataAccessor object with n_obs × n_vars = 70 × 765
             #>     constructed for the AnnData object pbmc68k.h5ad
             #>     ...
+            artifact = ln.Artifact.get(key="lndb-storage/df.parquet")
+            artifact.open()
+            #> pyarrow._dataset.FileSystemDataset
         """
         if self._overwrite_versions and not self.is_latest:
             raise ValueError(INCONSISTENT_STATE_MSG)
@@ -2159,6 +2219,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         h5_suffixes = [".h5", ".hdf5", ".h5ad"]
         h5_suffixes += [s + ".gz" for s in h5_suffixes]
         # ignore empty suffix for now
+        df_suffixes = tuple(set(PYARROW_SUFFIXES).union(POLARS_SUFFIXES))
         suffixes = (
             (
                 "",
@@ -2167,7 +2228,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
                 ".tiledbsoma",
             )
             + tuple(h5_suffixes)
-            + PYARROW_SUFFIXES
+            + df_suffixes
             + tuple(
                 s + ".gz" for s in PYARROW_SUFFIXES
             )  # this doesn't work for externally gzipped files, REMOVE LATER
@@ -2175,10 +2236,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         if self.suffix not in suffixes:
             raise ValueError(
                 "Artifact should have a zarr, h5, tiledbsoma object"
-                " or a compatible `pyarrow.dataset.dataset` directory"
+                " or a compatible `pyarrow.dataset.dataset` or `polars.scan_*` directory"
                 " as the underlying data, please use one of the following suffixes"
                 f" for the object name: {', '.join(suffixes[1:])}."
-                f" Or no suffix for a folder with {', '.join(PYARROW_SUFFIXES)} files"
+                f" Or no suffix for a folder with {', '.join(df_suffixes)} files"
                 " (no mixing allowed)."
             )
         if self.suffix != ".tiledbsoma" and self.key != "soma" and mode != "r":
@@ -2187,10 +2248,6 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
             )
         from lamindb import settings
-        from lamindb.core.storage._backed_access import (
-            _track_writes_factory,
-            backed_access,
-        )
         using_key = settings._using_key
         filepath, cache_key = filepath_cache_key_from_artifact(
@@ -2211,14 +2268,22 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
             ) and not filepath.synchronize(localpath, just_check=True)
         if open_cache:
             try:
-                access = backed_access(localpath, mode, using_key, **kwargs)
+                access = backed_access(
+                    localpath, mode, engine, using_key=using_key, **kwargs
+                )
             except Exception as e:
-                if isinstance(filepath, LocalPathClasses):
+                # also ignore ValueError here because
+                # such errors most probably just imply an incorrect argument
+                if isinstance(filepath, LocalPathClasses) or isinstance(
+                    e, (ImportError, ValueError)
+                ):
                     raise e
                 logger.warning(
                     f"The cache might be corrupted: {e}. Trying to open directly."
                 )
-                access = backed_access(filepath, mode, using_key, **kwargs)
+                access = backed_access(
+                    filepath, mode, engine, using_key=using_key, **kwargs
+                )
                 # happens only if backed_access has been successful
                 # delete the corrupted cache
                 if localpath.is_dir():
@@ -2226,7 +2291,9 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
                 else:
                     localpath.unlink(missing_ok=True)
         else:
-            access = backed_access(filepath, mode, using_key, **kwargs)
+            access = backed_access(
+                filepath, mode, engine, using_key=using_key, **kwargs
+            )
             if is_tiledbsoma_w:
                 def finalize():
@@ -2327,7 +2394,6 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         _track_run_input(self, is_run_input)
         return access_memory
-    @doc_args(DEBUG_KWARGS_DOC)
     def cache(
         self, *, is_run_input: bool | None = None, mute: bool = False, **kwargs
     ) -> Path:
@@ -2340,7 +2406,6 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         Args:
             mute: Silence logging of caching progress.
             is_run_input: Whether to track this artifact as run input.
-            {}
         Example::
@@ -2399,6 +2464,9 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
             artifact = ln.Artifact.get(key="some.tiledbsoma". is_latest=True)
             artiact.delete() # delete all versions, the data will be deleted or prompted for deletion.
         """
+        # we're *not* running the line below because the case `storage is None` triggers user feedback in one case
+        # storage = True if storage is None else storage
         # this first check means an invalid delete fails fast rather than cascading through
         # database and storage permission errors
         if os.getenv("LAMINDB_MULTI_INSTANCE") is None:
@@ -2449,8 +2517,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
             # only delete in storage if DB delete is successful
             # DB delete might error because of a foreign key constraint violated etc.
             if self._overwrite_versions and self.is_latest:
-                # includes self
-                for version in self.versions.all():
+                logger.important(
+                    "deleting all versions of this artifact because they all share the same store"
+                )
+                for version in self.versions.all():  # includes self
                     _delete_skip_storage(version)
             else:
                 self._delete_skip_storage()
@@ -2460,7 +2530,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
                 delete_in_storage = False
                 if storage:
                     logger.warning(
-                        "Storage argument is ignored; can't delete storage on an previous version"
+                        "storage argument is ignored; can't delete store of a previous version if overwrite_versions is True"
                     )
             elif self.key is None or self._key_is_virtual:
                 # do not ask for confirmation also if storage is None
@@ -2485,13 +2555,11 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
                 if delete_msg != "did-not-delete":
                     logger.success(f"deleted {colors.yellow(f'{path}')}")
-    @doc_args(DEBUG_KWARGS_DOC)
     def save(self, upload: bool | None = None, **kwargs) -> Artifact:
         """Save to database & storage.
         Args:
             upload: Trigger upload to cloud storage in instances with hybrid storage mode.
-            {}
         Example::
@@ -2577,14 +2645,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         self._branch_code = 1
         self.save()
-    def describe(self) -> None:
-        """Describe relations of record.
-        Example::
+    def describe(self, return_str: bool = False) -> None:
+        """Describe record including linked records.
-            artifact.describe()
+        Args:
+            return_str: Return a string instead of printing.
         """
-        return describe_artifact_collection(self)
+        return describe_artifact_collection(self, return_str=return_str)
     def _populate_subsequent_runs(self, run: Run) -> None:
         _populate_subsequent_runs_(self, run)
@@ -2624,9 +2691,11 @@ def _save_skip_storage(artifact, **kwargs) -> None:
 class ArtifactFeatureValue(BasicRecord, LinkORM, TracksRun):
     id: int = models.BigAutoField(primary_key=True)
-    artifact: Artifact = ForeignKey(Artifact, CASCADE, related_name="+")
+    artifact: Artifact = ForeignKey(
+        Artifact, CASCADE, related_name="links_featurevalue"
+    )
     # we follow the lower() case convention rather than snake case for link models
-    featurevalue = ForeignKey(FeatureValue, PROTECT, related_name="+")
+    featurevalue = ForeignKey(FeatureValue, PROTECT, related_name="links_artifact")
     class Meta:
         unique_together = ("artifact", "featurevalue")
@@ -2634,9 +2703,11 @@ class ArtifactFeatureValue(BasicRecord, LinkORM, TracksRun):
 class ArtifactParamValue(BasicRecord, LinkORM, TracksRun):
     id: int = models.BigAutoField(primary_key=True)
-    artifact: Artifact = ForeignKey(Artifact, CASCADE, related_name="+")
+    artifact: Artifact = ForeignKey(Artifact, CASCADE, related_name="links_paramvalue")
     # we follow the lower() case convention rather than snake case for link models
-    paramvalue: ParamValue = ForeignKey(ParamValue, PROTECT, related_name="+")
+    paramvalue: ParamValue = ForeignKey(
+        ParamValue, PROTECT, related_name="links_artifact"
+    )
     class Meta:
         unique_together = ("artifact", "paramvalue")
@@ -2685,8 +2756,8 @@ def _track_run_input(
                 # record is on another db
                 # we have to save the record into the current db with
                 # the run being attached to a transfer transform
-                logger.important(
-                    f"completing transfer to track {data.__class__.__name__}('{data.uid[:8]}') as input"
+                logger.info(
+                    f"completing transfer to track {data.__class__.__name__}('{data.uid[:8]}...') as input"
                 )
                 data.save()
                 is_valid = True

lamindb 1.4.0__py3-none-any.whl → 1.5.1__py3-none-any.whl

lamindb 1.4.0py3-none-any.whl → 1.5.1py3-none-any.whl