PyPI - lamindb - Versions diffs - 1.5.2__py3-none-any.whl → 1.6.0__py3-none-any.whl - Mend

lamindb 1.5.2py3-none-any.whl → 1.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

lamindb/__init__.py +25 -6
lamindb/_finish.py +5 -5
lamindb/_tracked.py +1 -1
lamindb/_view.py +4 -4
lamindb/core/_context.py +32 -6
lamindb/core/_settings.py +1 -1
lamindb/core/datasets/mini_immuno.py +8 -0
lamindb/core/loaders.py +1 -1
lamindb/core/storage/_anndata_accessor.py +9 -9
lamindb/core/storage/_valid_suffixes.py +1 -0
lamindb/core/storage/_zarr.py +32 -107
lamindb/curators/__init__.py +19 -2
lamindb/curators/_cellxgene_schemas/__init__.py +3 -3
lamindb/curators/_legacy.py +15 -19
lamindb/curators/core.py +247 -80
lamindb/errors.py +2 -2
lamindb/migrations/0069_squashed.py +8 -8
lamindb/migrations/0071_lamindbv1_migrate_schema.py +3 -3
lamindb/migrations/0073_merge_ourprojects.py +7 -7
lamindb/migrations/0075_lamindbv1_part5.py +1 -1
lamindb/migrations/0077_lamindbv1_part6b.py +3 -3
lamindb/migrations/0080_polish_lamindbv1.py +2 -2
lamindb/migrations/0088_schema_components.py +1 -1
lamindb/migrations/0090_runproject_project_runs.py +2 -2
lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +1 -1
lamindb/migrations/0094_writeloglock_writelogmigrationstate_and_more.py +84 -0
lamindb/migrations/0095_remove_rundata_flextable.py +155 -0
lamindb/migrations/0096_remove_artifact__param_values_and_more.py +266 -0
lamindb/migrations/0097_remove_schemaparam_param_remove_paramvalue_param_and_more.py +27 -0
lamindb/migrations/0098_alter_feature_type_alter_project_type_and_more.py +656 -0
lamindb/migrations/0099_alter_writelog_seqno.py +22 -0
lamindb/migrations/0100_branch_alter_artifact__branch_code_and_more.py +102 -0
lamindb/migrations/0101_alter_artifact_hash_alter_feature_name_and_more.py +444 -0
lamindb/migrations/0102_remove_writelog_branch_code_and_more.py +72 -0
lamindb/migrations/0103_remove_writelog_migration_state_and_more.py +46 -0
lamindb/migrations/{0090_squashed.py → 0103_squashed.py} +1013 -1009
lamindb/models/__init__.py +35 -18
lamindb/models/_describe.py +4 -4
lamindb/models/_django.py +38 -4
lamindb/models/_feature_manager.py +66 -123
lamindb/models/_from_values.py +13 -13
lamindb/models/_label_manager.py +8 -6
lamindb/models/_relations.py +7 -7
lamindb/models/artifact.py +166 -156
lamindb/models/can_curate.py +25 -25
lamindb/models/collection.py +48 -18
lamindb/models/core.py +3 -3
lamindb/models/feature.py +88 -60
lamindb/models/has_parents.py +17 -17
lamindb/models/project.py +52 -24
lamindb/models/query_manager.py +5 -5
lamindb/models/query_set.py +61 -37
lamindb/models/record.py +158 -1583
lamindb/models/run.py +39 -176
lamindb/models/save.py +6 -6
lamindb/models/schema.py +33 -44
lamindb/models/sqlrecord.py +1743 -0
lamindb/models/transform.py +17 -33
lamindb/models/ulabel.py +21 -15
{lamindb-1.5.2.dist-info → lamindb-1.6.0.dist-info}/METADATA +7 -11
lamindb-1.6.0.dist-info/RECORD +118 -0
lamindb/core/storage/_anndata_sizes.py +0 -41
lamindb/models/flextable.py +0 -163
lamindb-1.5.2.dist-info/RECORD +0 -109
{lamindb-1.5.2.dist-info → lamindb-1.6.0.dist-info}/LICENSE +0 -0
{lamindb-1.5.2.dist-info → lamindb-1.6.0.dist-info}/WHEEL +0 -0

lamindb/models/artifact.py CHANGED Viewed

@@ -17,6 +17,7 @@ from django.db.models import CASCADE, PROTECT, Q
 from lamin_utils import colors, logger
 from lamindb_setup import settings as setup_settings
 from lamindb_setup._init_instance import register_storage_in_instance
+from lamindb_setup.core._hub_core import select_storage_or_parent
 from lamindb_setup.core._settings_storage import init_storage
 from lamindb_setup.core.hashing import HASH_LENGTH, hash_dir, hash_file
 from lamindb_setup.core.types import UPathStr
@@ -69,8 +70,7 @@ from ..models._is_versioned import (
 from ._django import get_artifact_with_related
 from ._feature_manager import (
     FeatureManager,
-    ParamManager,
-    ParamManagerArtifact,
+    FeatureManagerArtifact,
     add_label_feature_links,
     filter_base,
     get_label_links,
@@ -83,15 +83,15 @@ from ._relations import (
 from .core import Storage
 from .feature import Feature, FeatureValue
 from .has_parents import view_lineage
-from .record import (
-    BasicRecord,
-    LinkORM,
-    Record,
+from .run import Run, TracksRun, TracksUpdates, User
+from .schema import Schema
+from .sqlrecord import (
+    BaseSQLRecord,
+    IsLink,
+    SQLRecord,
     _get_record_kwargs,
     record_repr,
 )
-from .run import Param, ParamValue, Run, TracksRun, TracksUpdates, User
-from .schema import Schema
 from .ulabel import ULabel
 WARNING_RUN_TRANSFORM = "no run & transform got linked, call `ln.track()` & re-run"
@@ -103,7 +103,7 @@ try:
 except ImportError:
     def identify_zarr_type(storepath):  # type: ignore
-        raise ImportError("Please install zarr: pip install zarr<=2.18.4")
+        raise ImportError("Please install zarr: pip install 'lamindb[zarr]'")
 if TYPE_CHECKING:
@@ -156,10 +156,12 @@ def process_pathlike(
     else:
         # check whether the path is part of one of the existing
         # already-registered storage locations
-        result = False
+        result = None
         # within the hub, we don't want to perform check_path_in_existing_storage
         if using_key is None:
-            result = check_path_in_existing_storage(filepath, using_key)
+            result = check_path_in_existing_storage(
+                filepath, check_hub_register_storage=setup_settings.instance.is_on_hub
+            )
         if isinstance(result, Storage):
             use_existing_storage_key = True
             return result, use_existing_storage_key
@@ -244,8 +246,8 @@ def process_data(
     elif (
         isinstance(data, pd.DataFrame)
         or isinstance(data, AnnData)
-        or data_is_mudata(data)
-        or data_is_spatialdata(data)
+        or data_is_scversedatastructure(data, "MuData")
+        or data_is_scversedatastructure(data, "SpatialData")
     ):
         storage = default_storage
         memory_rep = data
@@ -259,9 +261,9 @@ def process_data(
     if key_suffix is not None and key_suffix != suffix and not is_replace:
         # consciously omitting a trailing period
         if isinstance(data, (str, Path, UPath)):  # UPathStr, spelled out
-            message = f"The suffix '{suffix}' of the provided path is inconsistent, it should be '{key_suffix}'"
+            message = f"The passed path's suffix '{suffix}' must match the passed key's suffix '{key_suffix}'."
         else:
-            message = f"The suffix '{key_suffix}' of the provided key is inconsistent, it should be '{suffix}'"
+            message = f"The passed key's suffix '{key_suffix}' must match the passed path's suffix '{suffix}'."
         raise InvalidArgument(message)
     # in case we have an in-memory representation, we need to write it to disk
@@ -328,7 +330,7 @@ def get_stat_or_artifact(
             previous_artifact_version = result[0]
     if artifact_with_same_hash_exists:
         message = "returning existing artifact with same hash"
-        if result[0]._branch_code == -1:
+        if result[0].branch_id == -1:
             result[0].restore()
             message = "restored artifact with same hash from trash"
         logger.important(
@@ -340,13 +342,21 @@ def get_stat_or_artifact(
 def check_path_in_existing_storage(
-    path: Path | UPath, using_key: str | None = None
-) -> Storage | bool:
+    path: Path | UPath,
+    check_hub_register_storage: bool = False,
+    using_key: str | None = None,
+) -> Storage | None:
     for storage in Storage.objects.using(using_key).filter().all():
         # if path is part of storage, return it
         if check_path_is_child_of_root(path, root=storage.root):
             return storage
-    return False
+    # we don't see parents registered in the db, so checking the hub
+    # just check for 2 writable cloud protocols, maybe change in the future
+    if check_hub_register_storage and getattr(path, "protocol", None) in {"s3", "gs"}:
+        result = select_storage_or_parent(path.as_posix())
+        if result is not None:
+            return Storage(**result).save()
+    return None
 def get_relative_path_to_directory(
@@ -513,45 +523,59 @@ def log_storage_hint(
     logger.hint(hint)
-def data_is_anndata(data: AnnData | UPathStr) -> bool:
-    if isinstance(data, AnnData):
+def data_is_scversedatastructure(
+    data: ScverseDataStructures | UPathStr,
+    expected_ds: Literal["AnnData", "MuData", "SpatialData"] | None = None,
+) -> bool:
+    """Determine whether a specific in-memory object or a UPathstr is any or a specific scverse data structure."""
+    file_suffix = None
+    if expected_ds == "AnnData":
+        file_suffix = ".h5ad"
+    elif expected_ds == "MuData":
+        file_suffix = ".h5mu"
+    # SpatialData does not have a unique suffix but `.zarr`
+    if expected_ds is None:
+        return any(
+            hasattr(data, "__class__") and data.__class__.__name__ == cl_name
+            for cl_name in ["AnnData", "MuData", "SpatialData"]
+        )
+    elif hasattr(data, "__class__") and data.__class__.__name__ == expected_ds:
         return True
+    data_type = expected_ds.lower()
     if isinstance(data, (str, Path, UPath)):
         data_path = UPath(data)
-        if ".h5ad" in data_path.suffixes:  # ".h5ad.gz" is a valid suffix
+        if file_suffix in data_path.suffixes:
             return True
-        elif data_path.suffix == ".zarr":
-            # ".anndata.zarr" is a valid suffix (core.storage._valid_suffixes)
-            # TODO: the suffix based check should likely be moved to identify_zarr_type
-            if ".anndata" in data_path.suffixes:
+        if data_path.suffix == ".zarr":
+            type_suffix = f".{data_type}"
+            if type_suffix in data_path.suffixes:
                 return True
             # check only for local, expensive for cloud
             if fsspec.utils.get_protocol(data_path.as_posix()) == "file":
-                return identify_zarr_type(data_path) == "anndata"
+                return (
+                    identify_zarr_type(
+                        data_path if expected_ds == "AnnData" else data,
+                        check=True if expected_ds == "AnnData" else False,
+                    )
+                    == data_type
+                )
             else:
-                logger.warning("We do not check if cloud zarr is AnnData or not")
+                logger.warning(f"We do not check if cloud zarr is {expected_ds} or not")
                 return False
     return False
-def data_is_mudata(data: MuData | UPathStr) -> bool:
-    # We are not importing MuData here to keep loaded modules minimal
-    if hasattr(data, "__class__") and data.__class__.__name__ == "MuData":
+def data_is_soma_experiment(data: SOMAExperiment | UPathStr) -> bool:
+    # We are not importing tiledbsoma here to keep loaded modules minimal
+    if hasattr(data, "__class__") and data.__class__.__name__ == "Experiment":
         return True
     if isinstance(data, (str, Path)):
-        return UPath(data).suffix == ".h5mu"
-    return False
-def data_is_spatialdata(data: SpatialData | UPathStr) -> bool:
-    # We are not importing SpatialData here to keep loaded modules minimal
-    if hasattr(data, "__class__") and data.__class__.__name__ == "SpatialData":
-        return True
-    if isinstance(data, (str, Path)):
-        if UPath(data).suffix == ".zarr":
-            # TODO: inconsistent with anndata, where we run the storage
-            # check only for local, expensive for cloud
-            return identify_zarr_type(data, check=False) == "spatialdata"
+        return UPath(data).suffix == ".tiledbsoma"
     return False
@@ -566,15 +590,15 @@ def _check_otype_artifact(
             return otype
         data_is_path = isinstance(data, (str, Path))
-        if data_is_anndata(data):
+        if data_is_scversedatastructure(data, "AnnData"):
             if not data_is_path:
                 logger.warning("data is an AnnData, please use .from_anndata()")
             otype = "AnnData"
-        elif data_is_mudata(data):
+        elif data_is_scversedatastructure(data, "MuData"):
             if not data_is_path:
                 logger.warning("data is a MuData, please use .from_mudata()")
             otype = "MuData"
-        elif data_is_spatialdata(data):
+        elif data_is_scversedatastructure(data, "SpatialData"):
             if not data_is_path:
                 logger.warning("data is a SpatialData, please use .from_spatialdata()")
             otype = "SpatialData"
@@ -706,7 +730,6 @@ def _describe_postgres(self):  # for Artifact & Collection
             tree=tree,
             related_data=related_data,
             with_labels=True,
-            print_params=hasattr(self, "kind") and self.kind == "model",
         )
     else:
         return tree
@@ -755,7 +778,6 @@ def _describe_sqlite(self, print_types: bool = False):  # for artifact & collect
             self,
             tree=tree,
             with_labels=True,
-            print_params=hasattr(self, "kind") and self.kind == "kind",
         )
     else:
         return tree
@@ -772,7 +794,7 @@ def describe_artifact_collection(self, return_str: bool = False) -> str | None:
     return format_rich_tree(tree, return_str=return_str)
-def validate_feature(feature: Feature, records: list[Record]) -> None:
+def validate_feature(feature: Feature, records: list[SQLRecord]) -> None:
     """Validate feature record, adjust feature.dtype based on labels records."""
     if not isinstance(feature, Feature):
         raise TypeError("feature has to be of type Feature")
@@ -816,7 +838,7 @@ def get_labels(
             ).all()
     if flat_names:
         # returns a flat list of names
-        from .record import get_name_field
+        from .sqlrecord import get_name_field
         values = []
         for v in qs_by_registry.values():
@@ -830,7 +852,7 @@ def get_labels(
 def add_labels(
     self,
-    records: Record | list[Record] | QuerySet | Iterable,
+    records: SQLRecord | list[SQLRecord] | QuerySet | Iterable,
     feature: Feature | None = None,
     *,
     field: StrField | None = None,
@@ -844,7 +866,7 @@ def add_labels(
     if isinstance(records, (QuerySet, QuerySet.__base__)):  # need to have both
         records = records.list()
-    if isinstance(records, (str, Record)):
+    if isinstance(records, (str, SQLRecord)):
         records = [records]
     if not isinstance(records, list):  # avoids warning for pd Series
         records = list(records)
@@ -869,7 +891,7 @@ def add_labels(
         # ask users to pass records
         if len(records_validated) == 0:
             raise ValueError(
-                "Please pass a record (a `Record` object), not a string, e.g., via:"
+                "Please pass a record (a `SQLRecord` object), not a string, e.g., via:"
                 " label"
                 f" = ln.ULabel(name='{records[0]}')"  # type: ignore
             )
@@ -943,7 +965,7 @@ def add_labels(
             )
-class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
+class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
     # Note that this docstring has to be consistent with Curator.save_artifact()
     """Datasets & models stored as files, folders, or arrays.
@@ -1052,31 +1074,26 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
     """
-    class Meta(Record.Meta, IsVersioned.Meta, TracksRun.Meta, TracksUpdates.Meta):
+    class Meta(SQLRecord.Meta, IsVersioned.Meta, TracksRun.Meta, TracksUpdates.Meta):
         abstract = False
+        constraints = [
+            # a simple hard unique constraint on `hash` clashes with the fact
+            # that pipelines sometimes aim to ingest the exact same file in different
+            # folders
+            # the conditional composite constraint allows duplicating files in different parts of the
+            # file hierarchy, but errors if the same file is to be registered with the same key
+            # or if the key is not populated
+            models.UniqueConstraint(
+                fields=["storage", "key", "hash"],
+                name="unique_artifact_storage_key_hash",
+                condition=Q(key__isnull=False),
+            ),
+        ]
     _len_full_uid: int = 20
     _len_stem_uid: int = 16
-    params: ParamManager = ParamManagerArtifact  # type: ignore
-    """Param manager.
-    What features are for dataset-like artifacts, parameters are for model-like artifacts & runs.
-    Example::
-        artifact.params.add_values({
-            "hidden_size": 32,
-            "bottleneck_size": 16,
-            "batch_size": 32,
-            "preprocess_params": {
-                "normalization_type": "cool",
-                "subset_highlyvariable": True,
-            },
-        })
-    """
-    features: FeatureManager = FeatureManager  # type: ignore
+    features: FeatureManager = FeatureManagerArtifact  # type: ignore
     """Feature manager.
     Typically, you annotate a dataset with features by defining a `Schema` and passing it to the `Artifact` constructor.
@@ -1094,11 +1111,25 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         ln.Artifact.filter(scientist="Barbara McClintock")
-    Features may or may not be part of the artifact content in storage. For
+    Features may or may not be part of the dataset, i.e., the artifact content in storage. For
     instance, the :class:`~lamindb.curators.DataFrameCurator` flow validates the columns of a
     `DataFrame`-like artifact and annotates it with features corresponding to
     these columns. `artifact.features.add_values`, by contrast, does not
     validate the content of the artifact.
+    .. dropdown:: An example for a model-like artifact
+        ::
+            artifact.features.add_values({
+                "hidden_size": 32,
+                "bottleneck_size": 16,
+                "batch_size": 32,
+                "preprocess_params": {
+                    "normalization_type": "cool",
+                    "subset_highlyvariable": True,
+                },
+            })
     """
     @property
@@ -1176,7 +1207,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
     Examples: 1KB is 1e3 bytes, 1MB is 1e6, 1GB is 1e9, 1TB is 1e12 etc.
     """
     hash: str | None = CharField(
-        max_length=HASH_LENGTH, db_index=True, null=True, unique=True, editable=False
+        max_length=HASH_LENGTH, db_index=True, null=True, editable=False
     )
     """Hash or pseudo-hash of artifact content.
@@ -1242,10 +1273,6 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         FeatureValue, through="ArtifactFeatureValue", related_name="artifacts"
     )
     """Non-categorical feature values for annotation."""
-    _param_values: ParamValue = models.ManyToManyField(
-        ParamValue, through="ArtifactParamValue", related_name="artifacts"
-    )
-    """Parameter values."""
     _key_is_virtual: bool = BooleanField()
     """Indicates whether `key` is virtual or part of an actual file path."""
     # be mindful that below, passing related_name="+" leads to errors
@@ -1301,7 +1328,6 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         **kwargs,
     ):
         self.features = FeatureManager(self)  # type: ignore
-        self.params = ParamManager(self)  # type: ignore
         # Below checks for the Django-internal call in from_db()
         # it'd be better if we could avoid this, but not being able to create a Artifact
         # from data with the default constructor renders the central class of the API
@@ -1324,11 +1350,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         revises: Artifact | None = kwargs.pop("revises", None)
         version: str | None = kwargs.pop("version", None)
         if "visibility" in kwargs:  # backward compat
-            _branch_code = kwargs.pop("visibility")
-        elif "_branch_code" in kwargs:
-            _branch_code = kwargs.pop("_branch_code")
+            branch_id = kwargs.pop("visibility")
+        if "_branch_code" in kwargs:  # backward compat
+            branch_id = kwargs.pop("_branch_code")
+        elif "branch_id" in kwargs:
+            branch_id = kwargs.pop("branch_id")
         else:
-            _branch_code = 1
+            branch_id = 1
         format = kwargs.pop("format", None)
         _is_internal_call = kwargs.pop("_is_internal_call", False)
         skip_check_exists = kwargs.pop("skip_check_exists", False)
@@ -1389,7 +1417,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         # an object with the same hash already exists
         if isinstance(kwargs_or_artifact, Artifact):
-            from .record import init_self_from_db, update_attributes
+            from .sqlrecord import init_self_from_db, update_attributes
             init_self_from_db(self, kwargs_or_artifact)
             # adding "key" here is dangerous because key might be auto-populated
@@ -1437,7 +1465,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         kwargs["kind"] = kind
         kwargs["version"] = version
         kwargs["description"] = description
-        kwargs["_branch_code"] = _branch_code
+        kwargs["branch_id"] = branch_id
         kwargs["otype"] = otype
         kwargs["revises"] = revises
         # this check needs to come down here because key might be populated from an
@@ -1461,6 +1489,11 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
     def _accessor(self) -> str:
         return self.otype
+    @property
+    @deprecated("features")
+    def params(self) -> str:
+        return self.features
     @property
     def transform(self) -> Transform | None:
         """Transform whose run created the artifact."""
@@ -1511,12 +1544,15 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
     def get(
         cls,
         idlike: int | str | None = None,
+        *,
+        is_run_input: bool | Run = False,
         **expressions,
     ) -> Artifact:
         """Get a single artifact.
         Args:
             idlike: Either a uid stub, uid or an integer id.
+            is_run_input: Whether to track this artifact as run input.
             expressions: Fields and values passed as Django query expressions.
         Raises:
@@ -1524,7 +1560,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         See Also:
             - Guide: :doc:`docs:registries`
-            - Method in `Record` base class: :meth:`~lamindb.models.Record.get`
+            - Method in `SQLRecord` base class: :meth:`~lamindb.models.SQLRecord.get`
         Examples:
@@ -1535,7 +1571,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         """
         from .query_set import QuerySet
-        return QuerySet(model=cls).get(idlike, **expressions)
+        return QuerySet(model=cls).get(idlike, is_run_input=is_run_input, **expressions)
     @classmethod
     def filter(
@@ -1547,7 +1583,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
         Args:
             *queries: `Q` expressions.
-            **expressions: Features, params, fields via the Django query syntax.
+            **expressions: Features & fields via the Django query syntax.
         See Also:
             - Guide: :doc:`docs:registries`
@@ -1562,9 +1598,6 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
                 ln.Arfifact.filter(cell_type_by_model__name="T cell")
-            Query by params::
-                ln.Arfifact.filter(hyperparam_x=100)
         """
         from .query_set import QuerySet
@@ -1578,25 +1611,16 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
                     keys_normalized, field="name", mute=True
                 )
             ):
-                return filter_base(FeatureManager, **expressions)
-            elif all(
-                params_validated := Param.validate(
-                    keys_normalized, field="name", mute=True
-                )
-            ):
-                return filter_base(ParamManagerArtifact, **expressions)
+                return filter_base(FeatureManagerArtifact, **expressions)
             else:
-                if sum(features_validated) < sum(params_validated):
-                    params = ", ".join(
-                        sorted(np.array(keys_normalized)[~params_validated])
-                    )
-                    message = f"param names: {params}"
-                else:
-                    features = ", ".join(
-                        sorted(np.array(keys_normalized)[~params_validated])
-                    )
-                    message = f"feature names: {features}"
-                fields = ", ".join(sorted(cls.__get_available_fields__()))
+                features = ", ".join(
+                    sorted(np.array(keys_normalized)[~features_validated])
+                )
+                message = f"feature names: {features}"
+                avail_fields = cls.__get_available_fields__()
+                if "_branch_code" in avail_fields:
+                    avail_fields.remove("_branch_code")  # backward compat
+                fields = ", ".join(sorted(avail_fields))
                 raise InvalidArgument(
                     f"You can query either by available fields: {fields}\n"
                     f"Or fix invalid {message}"
@@ -1734,7 +1758,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
                :width: 800px
         """
-        if not data_is_anndata(adata):
+        if not data_is_scversedatastructure(adata, "AnnData"):
             raise ValueError(
                 "data has to be an AnnData object or a path to AnnData-like"
             )
@@ -1805,7 +1829,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
             mdata = ln.core.datasets.mudata_papalexi21_subset()
             artifact = ln.Artifact.from_mudata(mdata, key="mudata_papalexi21_subset.h5mu").save()
         """
-        if not data_is_mudata(mdata):
+        if not data_is_scversedatastructure(mdata, "MuData"):
             raise ValueError("data has to be a MuData object or a path to MuData-like")
         artifact = Artifact(  # type: ignore
             data=mdata,
@@ -1831,7 +1855,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
     @classmethod
     def from_spatialdata(
         cls,
-        sdata: Union[SpatialData, UPathStr],
+        sdata: SpatialData | UPathStr,
         *,
         key: str | None = None,
         description: str | None = None,
@@ -1873,7 +1897,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
             .. literalinclude:: scripts/curate_spatialdata.py
                 :language: python
         """
-        if not data_is_spatialdata(sdata):
+        if not data_is_scversedatastructure(sdata, "SpatialData"):
             raise ValueError(
                 "data has to be a SpatialData object or a path to SpatialData-like"
             )
@@ -1901,7 +1925,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
     @classmethod
     def from_tiledbsoma(
         cls,
-        path: UPathStr,
+        exp: SOMAExperiment | UPathStr,
         *,
         key: str | None = None,
         description: str | None = None,
@@ -1925,12 +1949,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
             artifact = ln.Artifact.from_tiledbsoma("s3://mybucket/store.tiledbsoma", description="a tiledbsoma store").save()
         """
-        if UPath(path).suffix != ".tiledbsoma":
+        if not data_is_soma_experiment(exp):
             raise ValueError(
-                "A tiledbsoma store should have .tiledbsoma suffix to be registered."
+                "data has to be a SOMA Experiment object or a path to SOMA Experiment store."
             )
+        exp = exp.uri.removeprefix("file://") if not isinstance(exp, UPathStr) else exp
         artifact = Artifact(  # type: ignore
-            data=path,
+            data=exp,
             key=key,
             run=run,
             description=description,
@@ -2274,8 +2299,8 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
             except Exception as e:
                 # also ignore ValueError here because
                 # such errors most probably just imply an incorrect argument
-                if isinstance(filepath, LocalPathClasses) or isinstance(
-                    e, (ImportError, ValueError)
+                if isinstance(e, (ImportError, ValueError)) or isinstance(
+                    filepath, LocalPathClasses
                 ):
                     raise e
                 logger.warning(
@@ -2304,7 +2329,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
                         # this can be very slow
                         _, hash, _, _ = hash_dir(filepath)
                     if self.hash != hash:
-                        from .record import init_self_from_db
+                        from .sqlrecord import init_self_from_db
                         new_version = Artifact(
                             filepath, revises=self, _is_internal_call=True
@@ -2377,8 +2402,9 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
                 access_memory = load_to_memory(cache_path, **kwargs)
             except Exception as e:
                 # raise the exception if it comes from not having a correct loader
+                # import error is also most probbaly not a problem with the cache
                 # or if the original path is local
-                if isinstance(e, NotImplementedError) or isinstance(
+                if isinstance(e, (NotImplementedError, ImportError)) or isinstance(
                     filepath, LocalPathClasses
                 ):
                     raise e
@@ -2444,7 +2470,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
     ) -> None:
         """Trash or permanently delete.
-        A first call to `.delete()` puts an artifact into the trash (sets `_branch_code` to `-1`).
+        A first call to `.delete()` puts an artifact into the trash (sets `branch_id` to `-1`).
         A second call permanently deletes the artifact.
         If it is a folder artifact with multiple versions, deleting a non-latest version
         will not delete the underlying storage by default (if `storage=True` is not specified).
@@ -2486,17 +2512,15 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
                     f"\n(2) If you want to delete the artifact in storage, please load the managing lamindb instance (uid={self.storage.instance_uid})."
                     f"\nThese are all managed storage locations of this instance:\n{Storage.filter(instance_uid=isettings.uid).df()}"
                 )
-        # by default, we only move artifacts into the trash (_branch_code = -1)
-        trash__branch_code = -1
-        if self._branch_code > trash__branch_code and not permanent:
+        # by default, we only move artifacts into the trash (branch_id = -1)
+        trash_branch_id = -1
+        if self.branch_id > trash_branch_id and not permanent:
             if storage is not None:
                 logger.warning("moving artifact to trash, storage arg is ignored")
             # move to trash
-            self._branch_code = trash__branch_code
+            self.branch_id = trash_branch_id
             self.save()
-            logger.important(
-                f"moved artifact to trash (_branch_code = {trash__branch_code})"
-            )
+            logger.important(f"moved artifact to trash (branch_id = {trash_branch_id})")
             return
         # if the artifact is already in the trash
@@ -2648,7 +2672,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
             artifact.restore()
         """
-        self._branch_code = 1
+        self.branch_id = 1
         self.save()
     def describe(self, return_str: bool = False) -> None:
@@ -2695,7 +2719,7 @@ def _save_skip_storage(artifact, **kwargs) -> None:
     save_schema_links(artifact)
-class ArtifactFeatureValue(BasicRecord, LinkORM, TracksRun):
+class ArtifactFeatureValue(BaseSQLRecord, IsLink, TracksRun):
     id: int = models.BigAutoField(primary_key=True)
     artifact: Artifact = ForeignKey(
         Artifact, CASCADE, related_name="links_featurevalue"
@@ -2707,18 +2731,6 @@ class ArtifactFeatureValue(BasicRecord, LinkORM, TracksRun):
         unique_together = ("artifact", "featurevalue")
-class ArtifactParamValue(BasicRecord, LinkORM, TracksRun):
-    id: int = models.BigAutoField(primary_key=True)
-    artifact: Artifact = ForeignKey(Artifact, CASCADE, related_name="links_paramvalue")
-    # we follow the lower() case convention rather than snake case for link models
-    paramvalue: ParamValue = ForeignKey(
-        ParamValue, PROTECT, related_name="links_artifact"
-    )
-    class Meta:
-        unique_together = ("artifact", "paramvalue")
 def _track_run_input(
     data: (
         Artifact | Iterable[Artifact]
@@ -2726,6 +2738,9 @@ def _track_run_input(
     is_run_input: bool | Run | None = None,
     run: Run | None = None,
 ):
+    if is_run_input is False:
+        return
     from lamindb import settings
     from .._tracked import get_current_tracked_run
@@ -2820,22 +2835,17 @@ def _track_run_input(
         # avoid adding the same run twice
         run.save()
         if data_class_name == "artifact":
-            LinkORM = run.input_artifacts.through
+            IsLink = run.input_artifacts.through
             links = [
-                LinkORM(run_id=run.id, artifact_id=data_id)
-                for data_id in input_data_ids
+                IsLink(run_id=run.id, artifact_id=data_id) for data_id in input_data_ids
             ]
         else:
-            LinkORM = run.input_collections.through
+            IsLink = run.input_collections.through
             links = [
-                LinkORM(run_id=run.id, collection_id=data_id)
+                IsLink(run_id=run.id, collection_id=data_id)
                 for data_id in input_data_ids
             ]
-        LinkORM.objects.bulk_create(links, ignore_conflicts=True)
-        # generalize below for more than one data batch
-        if len(input_data) == 1:
-            if input_data[0].transform is not None:
-                run.transform.predecessors.add(input_data[0].transform)
+        IsLink.objects.bulk_create(links, ignore_conflicts=True)
 # privates currently dealt with separately

lamindb 1.5.2__py3-none-any.whl → 1.6.0__py3-none-any.whl

lamindb 1.5.2py3-none-any.whl → 1.6.0py3-none-any.whl