PyPI - lamindb - Versions diffs - 1.11.2__py3-none-any.whl → 1.12.0__py3-none-any.whl - Mend

lamindb 1.11.2py3-none-any.whl → 1.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

lamindb/__init__.py +8 -14
lamindb/_tracked.py +2 -0
lamindb/base/types.py +1 -3
lamindb/core/_context.py +16 -31
lamindb/core/_mapped_collection.py +2 -2
lamindb/core/storage/paths.py +5 -3
lamindb/curators/core.py +15 -4
lamindb/examples/__init__.py +3 -1
lamindb/examples/croissant/__init__.py +3 -1
lamindb/examples/mlflow/__init__.py +38 -0
lamindb/examples/wandb/__init__.py +40 -0
lamindb/integrations/__init__.py +26 -0
lamindb/integrations/lightning.py +87 -0
lamindb/migrations/0120_add_record_fk_constraint.py +1 -1
lamindb/migrations/0122_remove_personproject_person_and_more.py +219 -0
lamindb/migrations/0123_alter_artifact_description_alter_branch_description_and_more.py +82 -0
lamindb/migrations/0124_page_artifact_page_collection_page_feature_page_and_more.py +15 -0
lamindb/migrations/0125_artifact_is_locked_collection_is_locked_and_more.py +79 -0
lamindb/migrations/0126_alter_artifact_is_locked_alter_collection_is_locked_and_more.py +105 -0
lamindb/migrations/0127_alter_run_status_code_feature_dtype.py +31 -0
lamindb/migrations/0128_artifact__real_key.py +21 -0
lamindb/migrations/0129_remove_feature_page_remove_project_page_and_more.py +779 -0
lamindb/migrations/0130_branch_space_alter_artifactblock_artifact_and_more.py +170 -0
lamindb/migrations/0131_record_unique_name_type_space.py +18 -0
lamindb/migrations/0132_record_parents_record_reference_and_more.py +61 -0
lamindb/migrations/0133_artifactuser_artifact_users.py +108 -0
lamindb/migrations/{0119_squashed.py → 0133_squashed.py} +1211 -322
lamindb/models/__init__.py +14 -4
lamindb/models/_django.py +1 -2
lamindb/models/_feature_manager.py +1 -0
lamindb/models/_is_versioned.py +14 -16
lamindb/models/_relations.py +7 -0
lamindb/models/artifact.py +99 -56
lamindb/models/artifact_set.py +20 -3
lamindb/models/block.py +174 -0
lamindb/models/can_curate.py +7 -9
lamindb/models/collection.py +9 -9
lamindb/models/feature.py +38 -38
lamindb/models/has_parents.py +15 -6
lamindb/models/project.py +44 -99
lamindb/models/query_manager.py +1 -1
lamindb/models/query_set.py +36 -8
lamindb/models/record.py +169 -46
lamindb/models/run.py +44 -10
lamindb/models/save.py +7 -7
lamindb/models/schema.py +26 -7
lamindb/models/sqlrecord.py +87 -35
lamindb/models/storage.py +13 -3
lamindb/models/transform.py +7 -2
lamindb/models/ulabel.py +6 -23
{lamindb-1.11.2.dist-info → lamindb-1.12.0.dist-info}/METADATA +18 -21
{lamindb-1.11.2.dist-info → lamindb-1.12.0.dist-info}/RECORD +54 -38
{lamindb-1.11.2.dist-info → lamindb-1.12.0.dist-info}/LICENSE +0 -0
{lamindb-1.11.2.dist-info → lamindb-1.12.0.dist-info}/WHEEL +0 -0

lamindb/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""A data framework for biology.
+"""A data lakehouse for biology.
 Data lineage
 ============
@@ -31,21 +31,20 @@ Manage artifacts and transforms.
    Transform
    Run
-Validate and annotate artifacts.
+Create labels and manage sheets with flexible records, e.g., for samples or donors.
 .. autosummary::
    :toctree: .
-   Feature
-   ULabel
-   Schema
+   Record
-Manage flexible records to track, e.g., samples or donors.
+Define features & schemas to validate artifacts & records.
 .. autosummary::
    :toctree: .
-   Record
+   Feature
+   Schema
 Manage projects.
@@ -58,7 +57,6 @@ Manage projects.
    Space
    Branch
    Reference
-   Person
 Other
 =====
@@ -106,15 +104,13 @@ Backwards compatibility.
 .. autosummary::
    :toctree: .
-   Param
-   FeatureSet
-   Curator
+   ULabel
 """
 # ruff: noqa: I001
 # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
-__version__ = "1.11.2"
+__version__ = "1.12.0"
 import warnings as _warnings
@@ -141,7 +137,6 @@ from .models import (
     Collection,
     Feature,
     FeatureSet,  # backward compat
-    Person,
     Project,
     Reference,
     Run,
@@ -188,7 +183,6 @@ __all__ = [
     "Space",
     "Branch",
     "Reference",
-    "Person",
     # other
     "connect",
     "view",

lamindb/_tracked.py CHANGED Viewed

@@ -91,6 +91,7 @@ def tracked(uid: str | None = None) -> Callable[[Callable[P, R]], Callable[P, R]
             run = Run(transform=transform, initiated_by_run=initiated_by_run)  # type: ignore
             run.started_at = datetime.now(timezone.utc)
+            run._status_code = -1  # started
             run.save()
             # Bind arguments to get a mapping of parameter names to values
@@ -117,6 +118,7 @@ def tracked(uid: str | None = None) -> Callable[[Callable[P, R]], Callable[P, R]
             try:
                 result = func(*args, **kwargs)
                 run.finished_at = datetime.now(timezone.utc)
+                run._status_code = 0  # completed
                 run.save()
                 return result
             finally:

lamindb/base/types.py CHANGED Viewed

@@ -34,9 +34,7 @@ from lamindb_setup.types import UPathStr  # noqa: F401
 ListLike = Union[list[str], pd.Series, np.array]
 StrField = Union[str, FieldAttr]  # typing.TypeAlias
-TransformType = Literal[
-    "pipeline", "notebook", "upload", "script", "function", "linker"
-]
+TransformType = Literal["pipeline", "notebook", "script", "function", "linker"]
 ArtifactKind = Literal["dataset", "model", "__lamindb_run__"]
 # below is used for Feature.dtype and Param.dtype

lamindb/core/_context.py CHANGED Viewed

@@ -135,6 +135,12 @@ class LogStreamHandler:
         if not self.file.closed:
             self.file.flush()
+    # https://laminlabs.slack.com/archives/C07DB677JF6/p1759423901926139
+    # other tracking frameworks like W&B use our output stream and expect
+    # certain functions like isatty to be available
+    def isatty(self) -> bool:
+        return False
     # .flush is sometimes (in jupyter etc.) called after every .write
     # this needs to be called only at the end
     def flush_buffer(self):
@@ -441,10 +447,6 @@ class Context:
                 ) = self._track_source_code(path=path)
             if description is None:
                 description = self._description
-            # temporarily until the hub displays the key by default
-            # populate the description with the filename again
-            if description is None:
-                description = self._path.name
             self._create_or_load_transform(
                 description=description,
                 transform_ref=transform_ref,
@@ -710,8 +712,14 @@ class Context:
             aux_transform = Transform.filter(hash=transform_hash).one_or_none()
         else:
             aux_transform = None
+        # determine the transform key
+        if ln_setup.settings.work_dir is not None:
+            key = self._path.relative_to(ln_setup.settings.work_dir).as_posix()
+        else:
+            key = self._path.name
         # if the user did not pass a uid and there is no matching aux_transform
-        # need to search for the transform based on the filename
+        # need to search for the transform based on the key
         if self.uid is None and aux_transform is None:
             class SlashCount(Func):
@@ -720,12 +728,11 @@ class Context:
             # we need to traverse from greater depth to shorter depth so that we match better matches first
             transforms = (
-                Transform.filter(key__endswith=self._path.name, is_latest=True)
+                Transform.filter(key__endswith=key, is_latest=True)
                 .annotate(slash_count=SlashCount("key"))
                 .order_by("-slash_count")
             )
             uid = f"{base62_12()}0000"
-            key = self._path.name
             target_transform = None
             if len(transforms) != 0:
                 message = ""
@@ -755,19 +762,6 @@ class Context:
         # the user did pass the uid
         elif self.uid is not None and len(self.uid) == 16:
             transform = Transform.filter(uid=self.uid).one_or_none()
-            if transform is not None:
-                if transform.key not in self._path.as_posix():
-                    n_parts = len(Path(transform.key).parts)
-                    (
-                        Path(*self._path.parts[-n_parts:]).as_posix()
-                        if n_parts > 0
-                        else ""
-                    )
-                    key = self._path.name
-                else:
-                    key = transform.key  # type: ignore
-            else:
-                key = self._path.name
         else:
             if self.uid is not None:
                 # the case with length 16 is covered above
@@ -784,10 +778,8 @@ class Context:
                 # deal with a hash-based match
                 # the user might have a made a copy of the notebook or script
                 # and actually wants to create a new transform
-                if aux_transform is not None and not aux_transform.key.endswith(
-                    self._path.name
-                ):
-                    prompt = f"Found transform with same hash but different key: {aux_transform.key}. Did you rename your {transform_type} to {self._path.name} (1) or intentionally made a copy (2)?"
+                if aux_transform is not None and not aux_transform.key.endswith(key):
+                    prompt = f"Found transform with same hash but different key: {aux_transform.key}. Did you rename your {transform_type} to {key} (1) or intentionally made a copy (2)?"
                     response = (
                         "1" if os.getenv("LAMIN_TESTING") == "true" else input(prompt)
                     )
@@ -800,12 +792,6 @@ class Context:
                             None,
                         )  # make a new transform
             if aux_transform is not None:
-                if aux_transform.key.endswith(self._path.name):
-                    key = aux_transform.key
-                else:
-                    key = "/".join(
-                        aux_transform.key.split("/")[:-1] + [self._path.name]
-                    )
                 uid, target_transform, message = self._process_aux_transform(
                     aux_transform, transform_hash
                 )
@@ -814,7 +800,6 @@ class Context:
             else:
                 uid = f"{self.uid}0000" if self.uid is not None else None
                 target_transform = None
-                key = self._path.name
             self.uid, transform = uid, target_transform
         if self.version is not None:
             # test inconsistent version passed

lamindb/core/_mapped_collection.py CHANGED Viewed

@@ -634,8 +634,8 @@ class MappedCollection:
     def __exit__(self, exc_type, exc_val, exc_tb):
         self.close()
-    @staticmethod
-    def torch_worker_init_fn(worker_id):
+    @classmethod
+    def torch_worker_init_fn(cls, worker_id):
         """`worker_init_fn` for `torch.utils.data.DataLoader`.
         Improves performance for `num_workers > 1`.

lamindb/core/storage/paths.py CHANGED Viewed

@@ -25,12 +25,14 @@ AUTO_KEY_PREFIX = ".lamindb/"
 # add type annotations back asap when re-organizing the module
 def auto_storage_key_from_artifact(artifact: Artifact):
-    if artifact.key is None or artifact._key_is_virtual:
+    if (real_key := artifact._real_key) is not None:
+        return real_key
+    key = artifact.key
+    if key is None or artifact._key_is_virtual:
         return auto_storage_key_from_artifact_uid(
             artifact.uid, artifact.suffix, artifact.overwrite_versions
         )
-    else:
-        return artifact.key
+    return artifact.key
 def auto_storage_key_from_artifact_uid(

lamindb/curators/core.py CHANGED Viewed

@@ -625,7 +625,7 @@ class ComponentCurator(Curator):
                 has_dtype_error = "WRONG_DATATYPE" in str(err)
                 error_msg = str(err)
                 if has_dtype_error:
-                    error_msg += "   ▶ Hint: Consider setting 'coerce_datatype=True' to attempt coercing/converting values during validation to the pre-defined dtype."
+                    error_msg += "   ▶ Hint: Consider setting 'coerce_dtype=True' to attempt coercing/converting values during validation to the pre-defined dtype."
                 raise ValidationError(error_msg) from err
         else:
             self._cat_manager_validate()
@@ -911,7 +911,7 @@ class AnnDataCurator(SlotsCurator):
         super().__init__(dataset=dataset, schema=schema)
         if not data_is_scversedatastructure(self._dataset, "AnnData"):
             raise InvalidArgument("dataset must be AnnData-like.")
-        if schema.otype and schema.otype != "AnnData":
+        if schema.otype != "AnnData":
             raise InvalidArgument("Schema otype must be 'AnnData'.")
         for slot, slot_schema in schema.slots.items():
@@ -1388,7 +1388,7 @@ class CatVector:
             related_name = registry._meta.get_field("type").remote_field.related_name
             type_record = registry.get(name=self._subtype_str)
             if registry.__name__ == "Record":
-                self._subtype_query_set = type_record.query_children()
+                self._subtype_query_set = type_record.query_records()
             else:
                 self._subtype_query_set = getattr(type_record, related_name).all()
             values_array = np.array(str_values)
@@ -1561,7 +1561,18 @@ class CatVector:
             if n_non_validated > len(syn_mapper):
                 if syn_mapper:
                     warning_message += "\n    for remaining terms:\n"
-                warning_message += f"    → fix typos, remove non-existent values, or save terms via: {colors.cyan(non_validated_hint_print)}"
+                check_organism = ""
+                if registry.__base__.__name__ == "BioRecord":
+                    import bionty as bt
+                    from bionty._organism import is_organism_required
+                    if is_organism_required(registry):
+                        organism = (
+                            valid_inspect_kwargs.get("organism", False)
+                            or bt.settings.organism.name
+                        )
+                        check_organism = f"fix organism '{organism}', "
+                warning_message += f"    → {check_organism}fix typos, remove non-existent values, or save terms via: {colors.cyan(non_validated_hint_print)}"
                 if self._subtype_query_set is not None:
                     warning_message += f"\n    → a valid label for subtype '{self._subtype_str}' has to be one of {self._subtype_query_set.to_list('name')}"
             logger.info(f'mapping "{self._key}" on {colors.italic(model_field)}')

lamindb/examples/__init__.py CHANGED Viewed

@@ -7,8 +7,10 @@
    datasets
    cellxgene
    croissant
+   mlflow
+   wandb
 """
-from . import croissant, datasets, schemas
+from . import croissant, datasets, mlflow, schemas, wandb
 from .cellxgene import _cellxgene

lamindb/examples/croissant/__init__.py CHANGED Viewed

@@ -17,7 +17,7 @@ def mini_immuno(
     """Return paths to the mini immuno dataset and its metadata as a Croissant file.
     Args:
-        n_files: Number of files inside the croissant file. Default is 1.
+        n_files: Number of files inside the croissant file.
         filepath_prefix: Move the dataset and references to it in a specific directory.
     Example
@@ -63,8 +63,10 @@ def mini_immuno(
     croissant_path = Path("mini_immuno.anndata.zarr_metadata.json")
     with open(croissant_path, "w", encoding="utf-8") as f:
         json.dump(data, f, indent=2)
     result: list[Path] = [croissant_path, dataset1_path]
     if n_files == 1:
         return result
     result.append(dataset2_path)
     return result

lamindb/examples/mlflow/__init__.py ADDED Viewed

@@ -0,0 +1,38 @@
+"""Examples and utilities for Mlflow.
+.. autosummary::
+   :toctree: .
+   save_mlflow_features
+"""
+import lamindb as ln
+def save_mlflow_features():
+    """Saves all MLflow experiment and run related features.
+    Saves the following features:
+    - mlflow_run_id
+    - mlflow_run_name
+    - mlflow_experiment_id
+    - mlflow_experiment_name
+    - mlflow_user_id
+    - mlflow_status
+    - mlflow_lifecycle_stage
+    - mlflow_artifact_uri
+    - mlflow_start_time
+    - mlflow_end_time
+    """
+    mlflow_type = ln.Feature(name="MLflow", is_type=True).save()
+    ln.Feature(name="mlflow_run_id", dtype=str, type=mlflow_type).save()
+    ln.Feature(name="mlflow_run_name", dtype=str, type=mlflow_type).save()
+    ln.Feature(name="mlflow_experiment_id", dtype=str, type=mlflow_type).save()
+    ln.Feature(name="mlflow_experiment_name", dtype=str, type=mlflow_type).save()
+    ln.Feature(name="mlflow_user_id", dtype=str, type=mlflow_type).save()
+    ln.Feature(name="mlflow_status", dtype=str, type=mlflow_type).save()
+    ln.Feature(name="mlflow_lifecycle_stage", dtype=str, type=mlflow_type).save()
+    ln.Feature(name="mlflow_artifact_uri", dtype=str, type=mlflow_type).save()
+    ln.Feature(name="mlflow_start_time", dtype=int, type=mlflow_type).save()
+    ln.Feature(name="mlflow_end_time", dtype=int, type=mlflow_type).save()

lamindb/examples/wandb/__init__.py ADDED Viewed

@@ -0,0 +1,40 @@
+"""Examples and utilities for Weights & Biases.
+.. autosummary::
+   :toctree: .
+   save_wandb_features
+"""
+import lamindb as ln
+def save_wandb_features():
+    """Saves all Weights & Biases project and run related features.
+    Saves the following features:
+    - wandb_run_id
+    - wandb_run_name
+    - wandb_run_entity
+    - wandb_project
+    - wandb_state
+    - wandb_url
+    - wandb_tags
+    - wandb_group
+    - wandb_job_type
+    - timestamp
+    - runtime
+    """
+    wandb_type = ln.Feature(name="Weights & Biases", is_type=True).save()
+    ln.Feature(name="wandb_run_id", dtype=str, type=wandb_type).save()
+    ln.Feature(name="wandb_run_name", dtype=str, type=wandb_type).save()
+    ln.Feature(name="wandb_run_entity", dtype=str, type=wandb_type).save()
+    ln.Feature(name="wandb_project", dtype=str, type=wandb_type).save()
+    ln.Feature(name="wandb_state", dtype=str, type=wandb_type).save()
+    ln.Feature(name="wandb_url", dtype=str, type=wandb_type).save()
+    ln.Feature(name="wandb_tags", dtype=str, type=wandb_type).save()
+    ln.Feature(name="wandb_group", dtype=str, type=wandb_type).save()
+    ln.Feature(name="wandb_job_type", dtype=str, type=wandb_type).save()
+    ln.Feature(name="wandb_timestamp", dtype=float, type=wandb_type).save()
+    ln.Feature(name="wandb_runtime", dtype=float, type=wandb_type).save()

lamindb/integrations/__init__.py CHANGED Viewed

@@ -6,9 +6,35 @@
    save_vitessce_config
    save_tiledbsoma_experiment
    curate_from_croissant
+   lightning
 """
+from typing import Any
+def __getattr__(attr_name: str) -> Any:
+    # Defers import until accessed to avoid requiring PyTorch Lightning
+    if attr_name == "lightning":
+        from lamindb.integrations import lightning
+        return lightning
+    raise AttributeError(f"module has no attribute {attr_name!r}")
 from lamindb.core.storage import save_tiledbsoma_experiment
 from ._croissant import curate_from_croissant
 from ._vitessce import save_vitessce_config
+def __dir__():
+    # Makes lazy imports discoverable to dir() to enable autocomplete including lazy modules
+    return __all__
+__all__ = [
+    "lightning",
+    "save_tiledbsoma_experiment",
+    "curate_from_croissant",
+    "save_vitessce_config",
+]

lamindb/integrations/lightning.py ADDED Viewed

@@ -0,0 +1,87 @@
+"""PyTorch Lightning integrations.
+.. autosummary::
+    :toctree: .
+    Callback
+"""
+from pathlib import Path
+from typing import Any
+import lightning as pl
+from lightning.pytorch import LightningModule, Trainer
+import lamindb as ln
+class Callback(pl.Callback):
+    """Saves PyTorch Lightning model checkpoints to the LaminDB instance after each training epoch.
+    Creates version families of artifacts for given `key` (relative file path).
+    Args:
+        path: Path to the checkpoint
+        key: Artifact key
+        features: Additional feature values that every checkpoint gets annotated by.
+    Examples:
+        Create a callback which creates artifacts for checkpoints and annotates them by the MLflow run ID
+            lamindb_callback = ln.integrations.lightning.Callback(
+                path=checkpoint_filename, key=artifact_key, annotate_by={ "mlflow_run_id": mlflow_run.info.run_id }
+            )
+            trainer = pl.Trainer(
+                callbacks=[lamindb_callback]
+            )
+    """
+    def __init__(
+        self,
+        path: str | Path,
+        key: str,
+        features: dict[str, Any] | None = None,
+    ):
+        self.path = Path(path)
+        self.key = key
+        self.features = features or {}
+    def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
+        """Validates that features exist for all specified params."""
+        missing = [
+            feature
+            for feature in self.features.keys()
+            if ln.Feature.filter(name=feature).one_or_none() is None
+        ]
+        if missing:
+            s = "s" if len(missing) > 1 else ""
+            raise ValueError(
+                f"Feature{s} {', '.join(missing)} missing. Create {'them' if len(missing) > 1 else 'it'} first."
+            )
+    def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule) -> None:
+        """Saves model checkpoint artifacts at the end of each epoch and optionally annotates them."""
+        trainer.save_checkpoint(self.path)
+        af = ln.Artifact(self.path, key=self.key, kind="model").save()
+        feature_values = dict(self.features)
+        for name in self.features.keys():
+            if hasattr(trainer, name):
+                feature_values[name] = getattr(trainer, name)
+            elif name in trainer.callback_metrics:
+                metric_value = trainer.callback_metrics[name]
+                feature_values[name] = (
+                    metric_value.item()
+                    if hasattr(metric_value, "item")
+                    else float(metric_value)
+                )
+        if feature_values:
+            af.features.add_values(feature_values)
+        af.save()
+__all__ = ["Callback"]

lamindb/migrations/0120_add_record_fk_constraint.py CHANGED Viewed

@@ -54,7 +54,7 @@ def revert_postgres_constraint(apps, schema_editor):
 class Migration(migrations.Migration):
     dependencies = [
-        ("lamindb", "0119_squashed"),
+        ("lamindb", "0119_rename_records_project_linked_in_records"),
     ]
     operations = [

lamindb 1.11.2__py3-none-any.whl → 1.12.0__py3-none-any.whl

lamindb 1.11.2py3-none-any.whl → 1.12.0py3-none-any.whl