PyPI - lamindb - Versions diffs - 1.12.1__py3-none-any.whl → 1.13.0__py3-none-any.whl - Mend

lamindb 1.12.1py3-none-any.whl → 1.13.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

lamindb/__init__.py +2 -2
lamindb/_finish.py +1 -1
lamindb/_tracked.py +3 -15
lamindb/core/_context.py +45 -19
lamindb/curators/_legacy.py +1 -1
lamindb/curators/core.py +51 -21
lamindb/errors.py +6 -0
lamindb/examples/datasets/_core.py +1 -1
lamindb/integrations/__init__.py +0 -18
lamindb/integrations/{_lightning.py → lightning.py} +13 -10
lamindb/migrations/0134_run_params.py +17 -0
lamindb/migrations/{0133_squashed.py → 0134_squashed.py} +93 -90
lamindb/models/_feature_manager.py +30 -20
lamindb/models/_label_manager.py +3 -5
lamindb/models/artifact.py +250 -291
lamindb/models/artifact_set.py +4 -4
lamindb/models/block.py +11 -9
lamindb/models/can_curate.py +1 -1
lamindb/models/collection.py +16 -17
lamindb/models/has_parents.py +1 -3
lamindb/models/query_manager.py +7 -7
lamindb/models/query_set.py +38 -12
lamindb/models/run.py +53 -49
lamindb/models/schema.py +79 -65
lamindb/models/sqlrecord.py +32 -17
lamindb/models/transform.py +6 -3
{lamindb-1.12.1.dist-info → lamindb-1.13.0.dist-info}/METADATA +26 -22
{lamindb-1.12.1.dist-info → lamindb-1.13.0.dist-info}/RECORD +30 -29
{lamindb-1.12.1.dist-info → lamindb-1.13.0.dist-info}/LICENSE +0 -0
{lamindb-1.12.1.dist-info → lamindb-1.13.0.dist-info}/WHEEL +0 -0

lamindb/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""A data lakehouse for biology.
+"""A data framework for biology.
 Data lineage
 ============
@@ -110,7 +110,7 @@ Backwards compatibility.
 # ruff: noqa: I001
 # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
-__version__ = "1.12.1"
+__version__ = "1.13.0"
 import warnings as _warnings

lamindb/_finish.py CHANGED Viewed

@@ -495,7 +495,7 @@ def save_context_core(
         )
         logger.important(
-            f"finished Run('{run.uid[:8]}') after {formatted_run_time} at {format_field_value(run.finished_at)}"
+            f"finished Run('{run.uid}') after {formatted_run_time} at {format_field_value(run.finished_at)}"
         )
     if ln_setup.settings.instance.is_on_hub:
         instance_slug = ln_setup.settings.instance.slug

lamindb/_tracked.py CHANGED Viewed

@@ -4,9 +4,8 @@ from contextvars import ContextVar
 from datetime import datetime, timezone
 from typing import Callable, ParamSpec, TypeVar
-from .core._context import context
+from .core._context import context, serialize_params_to_json
 from .models import Run, Transform
-from .models._feature_manager import infer_feature_type_convert_json
 P = ParamSpec("P")
 R = TypeVar("R")
@@ -92,26 +91,15 @@ def tracked(uid: str | None = None) -> Callable[[Callable[P, R]], Callable[P, R]
             run = Run(transform=transform, initiated_by_run=initiated_by_run)  # type: ignore
             run.started_at = datetime.now(timezone.utc)
             run._status_code = -1  # started
-            run.save()
             # Bind arguments to get a mapping of parameter names to values
             bound_args = sig.bind(*args, **kwargs)
             bound_args.apply_defaults()
             params = dict(bound_args.arguments)
-            # Remove the run parameter if it exists (we'll inject our own)
-            params.pop("run", None)
-            # Deal with non-trivial parameter values
-            filtered_params = {}
-            for key, value in params.items():
-                dtype, _, _ = infer_feature_type_convert_json(key, value)
-                if (dtype == "?" or dtype.startswith("cat")) and dtype != "cat ? str":
-                    continue
-                filtered_params[key] = value
             # Add parameters to the run
-            run.features.add_values(filtered_params)
+            run.params = serialize_params_to_json(params)
+            run.save()
             # Set the run in context and execute function
             token = current_tracked_run.set(run)

lamindb/core/_context.py CHANGED Viewed

@@ -23,7 +23,8 @@ from ..errors import (
     TrackNotCalled,
     UpdateContext,
 )
-from ..models import Run, Transform, format_field_value
+from ..models import Run, SQLRecord, Transform, format_field_value
+from ..models._feature_manager import infer_feature_type_convert_json
 from ..models._is_versioned import bump_version as bump_version_function
 from ..models._is_versioned import (
     increment_base62,
@@ -235,6 +236,22 @@ class LogStreamTracker:
         self.original_excepthook(exc_type, exc_value, exc_traceback)
+def serialize_params_to_json(params: dict) -> dict:
+    serialized_params = {}
+    for key, value in params.items():
+        if isinstance(value, SQLRecord):
+            value = f"{value.__class__.__get_name_with_module__()}[{value.uid}]"
+        else:
+            dtype, _, _ = infer_feature_type_convert_json(key, value)
+            if (dtype == "?" or dtype.startswith("cat")) and dtype != "cat ? str":
+                logger.warning(
+                    f"skipping param {key} because dtype not JSON serializable"
+                )
+                continue
+        serialized_params[key] = value
+    return serialized_params
 class Context:
     """Run context.
@@ -325,6 +342,7 @@ class Context:
         project: str | Project | None = None,
         space: str | Space | None = None,
         branch: str | Branch | None = None,
+        features: dict | None = None,
         params: dict | None = None,
         new_run: bool | None = None,
         path: str | None = None,
@@ -343,7 +361,8 @@ class Context:
                 Default: the `"all"` space. Note that bionty entities ignore this setting and always get written to the `"all"` space.
                 If you want to manually move entities to a different space, set the `.space` field (:doc:`docs:access`).
             branch: A branch (or its `name` or `uid`) on which to store records.
-            params: A dictionary of parameters to track for the run.
+            features: A dictionary of features & values to track for the run.
+            params: A dictionary of params & values to track for the run.
             new_run: If `False`, loads the latest run of transform
                 (default notebook), if `True`, creates new run (default non-notebook).
             path: Filepath of notebook or script. Only needed if it can't be
@@ -465,10 +484,14 @@ class Context:
                 transform_exists = Transform.filter(id=transform.id).first()
             if transform_exists is None:
                 transform.save()
-                self._logging_message_track += f"created Transform('{transform.uid}')"
+                self._logging_message_track += (
+                    f"created Transform('{transform.uid}', key='{transform.key}')"
+                )
                 transform_exists = transform
             else:
-                self._logging_message_track += f"loaded Transform('{transform.uid}')"
+                self._logging_message_track += (
+                    f"loaded Transform('{transform.uid}', key='{transform.key}')"
+                )
             self._transform = transform_exists
         if new_run is None:  # for notebooks, default to loading latest runs
@@ -493,25 +516,26 @@ class Context:
             if run is not None:  # loaded latest run
                 run.started_at = datetime.now(timezone.utc)  # update run time
                 run._status_code = -2  # re-started
-                self._logging_message_track += f", re-started Run('{run.uid[:8]}...') at {format_field_value(run.started_at)}"
+                self._logging_message_track += f", re-started Run('{run.uid}') at {format_field_value(run.started_at)}"
         if run is None:  # create new run
-            run = Run(  # type: ignore
-                transform=self._transform,
-                params=params,
-            )
+            run = Run(transform=self._transform)
             run.started_at = datetime.now(timezone.utc)
             run._status_code = -1  # started
-            self._logging_message_track += f", started new Run('{run.uid[:8]}...') at {format_field_value(run.started_at)}"
+            self._logging_message_track += f", started new Run('{run.uid}') at {format_field_value(run.started_at)}"
         # can only determine at ln.finish() if run was consecutive in
         # interactive session, otherwise, is consecutive
         run.is_consecutive = True if is_run_from_ipython else None
-        # need to save in all cases
-        run.save()
         if params is not None:
-            run.features.add_values(params)
+            run.params = serialize_params_to_json(params)
             self._logging_message_track += "\n→ params: " + ", ".join(
-                f"{key}={value}" for key, value in params.items()
+                f"{key}={value}" for key, value in run.params.items()
+            )
+        run.save()  # need to save now
+        if features is not None:
+            run.features.add_values(features)
+            self._logging_message_track += "\n→ features: " + ", ".join(
+                f"{key}={value}" for key, value in features.items()
             )
         self._run = run
         track_python_environment(run)
@@ -835,7 +859,9 @@ class Context:
                 reference_type=transform_ref_type,
                 type=transform_type,
             ).save()
-            self._logging_message_track += f"created Transform('{transform.uid}')"
+            self._logging_message_track += (
+                f"created Transform('{transform.uid}', key='{transform.key}')"
+            )
         else:
             uid = transform.uid
             # transform was already saved via `finish()`
@@ -874,9 +900,7 @@ class Context:
                     if transform_hash != transform.hash:
                         bump_revision = True
                     else:
-                        self._logging_message_track += (
-                            f"loaded Transform('{transform.uid}')"
-                        )
+                        self._logging_message_track += f"loaded Transform('{transform.uid}', key='{transform.key}')"
                 if bump_revision:
                     change_type = (
                         "re-running notebook with already-saved source code"
@@ -890,7 +914,9 @@ class Context:
                         f'✗ {change_type}, please update the `uid` argument in `track()` to "{uid[:-4]}{increment_base62(uid[-4:])}"'
                     )
             else:
-                self._logging_message_track += f"loaded Transform('{transform.uid}')"
+                self._logging_message_track += (
+                    f"loaded Transform('{transform.uid}', key='{transform.key}')"
+                )
         self._transform = transform
     def _finish(self, ignore_non_consecutive: None | bool = None) -> None:

lamindb/curators/_legacy.py CHANGED Viewed

@@ -924,7 +924,7 @@ class SpatialDataCatManager(CatManager):
         )
-class TiledbsomaCatManager(CatManager):
+class TiledbsomaCatManager(CatManager):  # pragma: no cover
     """Categorical manager for `tiledbsoma.Experiment`."""
     def __init__(

lamindb/curators/core.py CHANGED Viewed

@@ -289,13 +289,7 @@ class Curator:
         artifact_info = ""
         if self._artifact is not None:
-            artifact_uid = getattr(self._artifact, "uid", str(self._artifact))
-            short_uid = (
-                str(artifact_uid)[:8] + "..."
-                if len(str(artifact_uid)) > 8
-                else str(artifact_uid)
-            )
-            artifact_info = f", artifact: {colors.italic(short_uid)}"
+            artifact_info = f", artifact: {colors.italic(self._artifact.uid)}"
         return (
             f"{cls_name}{artifact_info}(Schema: {schema_str}{extra_info}{status_str})"
@@ -337,7 +331,7 @@ class SlotsCurator(Curator):
     def validate(self) -> None:
         """{}"""  # noqa: D415
         for slot, curator in self._slots.items():
-            logger.info(f"validating slot {slot} ...")
+            logger.debug(f"validating slot {slot} ...")
             curator.validate()
         # set _is_validated to True as no slot raised an error
         self._is_validated = True
@@ -403,6 +397,16 @@ class SlotsCurator(Curator):
         )
+def convert_dict_to_dataframe_for_validation(d: dict, schema: Schema) -> pd.DataFrame:
+    """Convert a dictionary to a DataFrame for validation against a schema."""
+    df = pd.DataFrame([d])
+    for feature in schema.members:
+        if feature.dtype.startswith("cat"):
+            if feature.name in df.columns:
+                df[feature.name] = pd.Categorical(df[feature.name])
+    return df
 # This is also currently used as DictCurator by flattening dictionaries into wide DataFrames.
 # Such an approach was never intended and there is room for a DictCurator in the future.
 # For more context, read https://laminlabs.slack.com/archives/C07DB677JF6/p1753994077716099 and
@@ -702,13 +706,11 @@ class DataFrameCurator(SlotsCurator):
     ) -> None:
         super().__init__(dataset=dataset, schema=schema)
-        # Create atomic curator for features only
-        if len(self._schema.features.all()) > 0:
-            self._atomic_curator = ComponentCurator(
-                dataset=dataset,
-                schema=schema,
-                slot=slot,
-            )
+        self._atomic_curator = ComponentCurator(
+            dataset=dataset,
+            schema=schema,
+            slot=slot,
+        )
         # Handle (nested) attrs
         if slot is None and schema.slots:
@@ -724,11 +726,11 @@ class DataFrameCurator(SlotsCurator):
                             data = _resolve_schema_slot_path(
                                 attrs_dict, deeper_keys, slot_name, "attrs"
                             )
-                        df = pd.DataFrame([data])
+                        df = convert_dict_to_dataframe_for_validation(data, slot_schema)
                         self._slots[slot_name] = ComponentCurator(
                             df, slot_schema, slot=slot_name
                         )
-                else:
+                elif slot_name != "__external__":
                     raise ValueError(
                         f"Slot '{slot_name}' is not supported for DataFrameCurator. Must be 'attrs'."
                     )
@@ -783,6 +785,26 @@ class DataFrameCurator(SlotsCurator):
             )
+class ExperimentalDictCurator(DataFrameCurator):
+    """Curator for `dict` based on `DataFrameCurator`."""
+    def __init__(
+        self,
+        dataset: dict | Artifact,
+        schema: Schema,
+        slot: str | None = None,
+    ) -> None:
+        if not isinstance(dataset, dict) and not isinstance(dataset, Artifact):
+            raise InvalidArgument("The dataset must be a dict or dict-like artifact.")
+        if isinstance(dataset, Artifact):
+            assert dataset.otype == "dict", "Artifact must be of otype 'dict'."  # noqa: S101
+            d = dataset.load(is_run_input=False)
+        else:
+            d = dataset
+        df = convert_dict_to_dataframe_for_validation(d, schema)
+        super().__init__(df, schema, slot=slot)
 def _resolve_schema_slot_path(
     target_dict: dict[str, Any], slot_keys: Iterable[str], slot: str, base_path: str
 ) -> Any:
@@ -803,13 +825,18 @@ def _resolve_schema_slot_path(
         base_path += f"['{key}']"
         try:
             current = current[key]
-        except KeyError:
+        except (
+            KeyError,
+            TypeError,
+        ):  # if not a dict, raises TypeError; if a dict and key not found, raises KeyError
             available = (
-                list(current.keys()) if isinstance(current, dict) else "not a dict"
+                list(current.keys())
+                if isinstance(current, dict)
+                else "none (not a dict)"
             )
             raise InvalidArgument(
                 f"Schema slot '{slot}' requires keys {base_path} but key '{key}' "
-                f"not found. Available keys at this level: {available}"
+                f"not found. Available keys at this level: {available}."
             ) from None
     return current
@@ -1478,7 +1505,10 @@ class CatVector:
                 if type_record is not None:
                     # if subtype_str is set, we need to set the type for new records
                     init_kwargs["type"] = type_record
-                non_validated_records.append(registry(**init_kwargs, **create_kwargs))
+                # here we create non-validated records skipping validation since we already ensured that they don't exist
+                non_validated_records.append(
+                    registry(**init_kwargs, **create_kwargs, _skip_validation=True)
+                )
         if len(non_validated_records) > 0:
             ln_save(non_validated_records)
             model_field = colors.italic(registry.__get_name_with_module__())

lamindb/errors.py CHANGED Viewed

@@ -52,6 +52,12 @@ class UnknownStorageLocation(Exception):
     pass
+class NoStorageLocationForSpace(Exception):
+    """No storage location found for space."""
+    pass
 # equivalent to Django's DoesNotExist
 # and SQLAlchemy's NoResultFound
 class DoesNotExist(Exception):

lamindb/examples/datasets/_core.py CHANGED Viewed

@@ -267,7 +267,7 @@ def anndata_file_pbmc68k_test() -> Path:
     To reproduce::
-        pbmc68k = ln.core.datasets.anndata_pbmc68k_reduced()
+        pbmc68k = ln.examples.datasets.anndata_pbmc68k_reduced()
         pbmc68k_test = pbmc68k[:30, :200].copy()
         pbmc68k_test.raw = pbmc68k_test[:, :100]
         pbmc68k_test.obsp["test"] = sparse.eye(pbmc68k_test.shape[0], format="csr")

lamindb/integrations/__init__.py CHANGED Viewed

@@ -9,29 +9,11 @@
    lightning
 """
-from typing import Any
-def __getattr__(attr_name: str) -> Any:
-    # Defers import until accessed to avoid requiring PyTorch Lightning
-    if attr_name == "lightning":
-        from lamindb.integrations import _lightning
-        return _lightning
-    raise AttributeError(f"module has no attribute {attr_name!r}")
 from lamindb.core.storage import save_tiledbsoma_experiment
 from ._croissant import curate_from_croissant
 from ._vitessce import save_vitessce_config
-def __dir__():
-    # Makes lazy imports discoverable to dir() to enable autocomplete including lazy modules
-    return __all__
 __all__ = [
     "lightning",
     "save_tiledbsoma_experiment",

lamindb/integrations/{_lightning.py → lightning.py} RENAMED Viewed

@@ -1,4 +1,4 @@
-"""PyTorch Lightning integrations.
+"""PyTorch Lightning.
 .. autosummary::
     :toctree: .
@@ -20,21 +20,24 @@ class Callback(pl.Callback):
     Creates version families of artifacts for given `key` (relative file path).
+    See also: :doc:`docs:mlflow` & :doc:`docs:wandb`.
     Args:
-        path: Path to the checkpoint
-        key: Artifact key
-        features: Additional feature values that every checkpoint gets annotated by.
+        path: A local path to the checkpoint.
+        key: The `key` for the checkpoint artifact.
+        features: Features to annotate the checkpoint.
     Examples:
-        Create a callback which creates artifacts for checkpoints and annotates them by the MLflow run ID
+        Create a callback that creates artifacts for checkpoints and annotates them by the MLflow run ID::
-            lamindb_callback = ln.integrations.lightning.Callback(
-                path=checkpoint_filename, key=artifact_key, annotate_by={ "mlflow_run_id": mlflow_run.info.run_id }
-            )
-            trainer = pl.Trainer(
-                callbacks=[lamindb_callback]
+            import lightning as pl
+            from lamindb.integrations import lightning as ll
+            lamindb_callback = ll.Callback(
+                path=checkpoint_filename, key=artifact_key, features={"mlflow_run_id": mlflow_run.info.run_id}
             )
+            trainer = pl.Trainer(callbacks=[lamindb_callback])
     """
     def __init__(

lamindb/migrations/0134_run_params.py ADDED Viewed

@@ -0,0 +1,17 @@
+# Generated by Django 5.2 on 2025-10-13 07:42
+from django.db import migrations, models
+class Migration(migrations.Migration):
+    dependencies = [
+        ("lamindb", "0133_artifactuser_artifact_users"),
+    ]
+    operations = [
+        migrations.AddField(
+            model_name="run",
+            name="params",
+            field=models.JSONField(null=True),
+        ),
+    ]

lamindb 1.12.1__py3-none-any.whl → 1.13.0__py3-none-any.whl

lamindb 1.12.1py3-none-any.whl → 1.13.0py3-none-any.whl