PyPI - lamindb - Versions diffs - 0.69.1__py3-none-any.whl → 0.69.3__py3-none-any.whl - Mend

lamindb 0.69.1py3-none-any.whl → 0.69.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

lamindb/__init__.py +6 -4
lamindb/_annotate.py +790 -0
lamindb/_artifact.py +2 -8
lamindb/_collection.py +16 -4
lamindb/_feature.py +11 -9
lamindb/_finish.py +194 -11
lamindb/_query_set.py +6 -4
lamindb/_run.py +3 -1
lamindb/_save.py +34 -21
lamindb/core/__init__.py +4 -0
lamindb/core/_data.py +3 -0
lamindb/core/_feature_manager.py +4 -3
lamindb/core/_run_context.py +17 -5
lamindb/core/storage/_backed_access.py +48 -11
lamindb/core/storage/file.py +2 -7
{lamindb-0.69.1.dist-info → lamindb-0.69.3.dist-info}/METADATA +7 -7
{lamindb-0.69.1.dist-info → lamindb-0.69.3.dist-info}/RECORD +20 -25
lamindb/validation/__init__.py +0 -19
lamindb/validation/_anndata_validator.py +0 -130
lamindb/validation/_lookup.py +0 -38
lamindb/validation/_register.py +0 -214
lamindb/validation/_validate.py +0 -131
lamindb/validation/_validator.py +0 -205
/lamindb/{_validate.py → _can_validate.py} +0 -0
{lamindb-0.69.1.dist-info → lamindb-0.69.3.dist-info}/LICENSE +0 -0
{lamindb-0.69.1.dist-info → lamindb-0.69.3.dist-info}/WHEEL +0 -0

lamindb/_artifact.py CHANGED Viewed

@@ -15,8 +15,7 @@ from lamindb_setup.core.types import UPathStr
 from lamindb_setup.core.upath import (
     create_path,
     extract_suffix_from_path,
-    get_stat_dir_gs,
-    get_stat_dir_s3,
+    get_stat_dir_cloud,
     get_stat_file_cloud,
 )
 from lnschema_core import Artifact, Run, Storage
@@ -192,10 +191,7 @@ def get_stat_or_artifact(
             if "ETag" in stat:  # is file
                 size, hash, hash_type = get_stat_file_cloud(stat)
             elif path.is_dir():
-                if path.protocol == "s3":
-                    size, hash, hash_type, n_objects = get_stat_dir_s3(path)
-                elif path.protocol == "gs":
-                    size, hash, hash_type, n_objects = get_stat_dir_gs(path)
+                size, hash, hash_type, n_objects = get_stat_dir_cloud(path)
         if hash is None:
             logger.warning(f"did not add hash for {path}")
             return size, hash, hash_type, n_objects
@@ -827,8 +823,6 @@ def load(
 # docstring handled through attach_func_to_class_method
 def stage(self, is_run_input: Optional[bool] = None) -> Path:
-    if self.suffix in {".zrad", ".zarr"}:
-        raise RuntimeError("zarr object can't be staged, please use load() or stream()")
     _track_run_input(self, is_run_input)
     using_key = settings._using_key

lamindb/_collection.py CHANGED Viewed

@@ -24,6 +24,7 @@ from lnschema_core.types import DataLike, VisibilityChoice
 from lamindb._utils import attach_func_to_class_method
 from lamindb.core._data import _track_run_input
 from lamindb.core._mapped_collection import MappedCollection
+from lamindb.core.storage import UPath
 from lamindb.core.versioning import get_uid_from_old_version, init_uid
 from . import Artifact, Run
@@ -339,17 +340,16 @@ def mapped(
     stream: bool = False,
     is_run_input: Optional[bool] = None,
 ) -> "MappedCollection":
-    _track_run_input(self, is_run_input)
     path_list = []
     for artifact in self.artifacts.all():
         if artifact.suffix not in {".h5ad", ".zrad", ".zarr"}:
             logger.warning(f"Ignoring artifact with suffix {artifact.suffix}")
             continue
-        elif not stream and artifact.suffix == ".h5ad":
+        elif not stream:
             path_list.append(artifact.stage())
         else:
             path_list.append(artifact.path)
-    return MappedCollection(
+    ds = MappedCollection(
         path_list,
         label_keys,
         join,
@@ -359,6 +359,18 @@ def mapped(
         parallel,
         dtype,
     )
+    # track only if successful
+    _track_run_input(self, is_run_input)
+    return ds
+# docstring handled through attach_func_to_class_method
+def stage(self, is_run_input: Optional[bool] = None) -> List[UPath]:
+    _track_run_input(self, is_run_input)
+    path_list = []
+    for artifact in self.artifacts.all():
+        path_list.append(artifact.stage())
+    return path_list
 # docstring handled through attach_func_to_class_method
@@ -467,7 +479,6 @@ def restore(self) -> None:
 @doc_args(Collection.artifacts.__doc__)
 def artifacts(self) -> QuerySet:
     """{}."""
-    _track_run_input(self)
     return self.unordered_artifacts.order_by("collectionartifact__id")
@@ -476,6 +487,7 @@ METHOD_NAMES = [
     "from_anndata",
     "from_df",
     "mapped",
+    "stage",
     "backed",
     "load",
     "delete",

lamindb/_feature.py CHANGED Viewed

@@ -117,16 +117,18 @@ def from_df(
         else:
             types[name] = convert_numpy_dtype_to_lamin_feature_type(col.dtype)
-    # silence the info "loaded record with exact same name "
+    # silence the warning "loaded record with exact same name "
     verbosity = settings.verbosity
-    settings.verbosity = "warning"
-    registry = field.field.model
-    if registry != Feature:
-        raise ValueError("field must be a Feature FieldAttr!")
-    # create records for all features including non-validated
-    features = [Feature(name=name, type=type) for name, type in types.items()]
-    settings.verbosity = verbosity
+    try:
+        settings.verbosity = "error"
+        registry = field.field.model
+        if registry != Feature:
+            raise ValueError("field must be a Feature FieldAttr!")
+        # create records for all features including non-validated
+        features = [Feature(name=name, type=type) for name, type in types.items()]
+    finally:
+        settings.verbosity = verbosity
     assert len(features) == len(df.columns)

lamindb/_finish.py CHANGED Viewed

@@ -1,8 +1,15 @@
+import os
+import shutil
+import subprocess
 from datetime import datetime, timezone
+from pathlib import Path
+from typing import Optional
 import lamindb_setup as ln_setup
 from lamin_utils import logger
+from lnschema_core import Run, Transform
+from ._query_set import QuerySet
 from .core._run_context import is_run_from_ipython, run_context
@@ -11,13 +18,16 @@ class CallFinishInLastCell(SystemExit):
 def finish(i_saved_the_notebook: bool = False):
-    """Mark the tracked run as finished.
+    """Mark a tracked run as finished.
-    Save the run report to your default storage location.
-    """
-    from lamin_cli._save import save
+    When run in notebooks, save the run report to your default storage location.
+    Args:
+        i_saved_the_notebook: Indicate that you saved the notebook in your
+            editor (JupyterLab, VSCode, etc.).
+    """
     if is_run_from_ipython:
+        # notebooks
         from nbproject.dev import read_notebook
         from nbproject.dev._check_last_cell import check_last_cell
@@ -26,14 +36,187 @@ def finish(i_saved_the_notebook: bool = False):
                 "Save the notebook, pass `i_saved_the_notebook=True`, and re-run this cell."
             )
             return None
-        nb = read_notebook(run_context.path)  # type: ignore
-        if not check_last_cell(nb, "i_saved_the_notebook"):
+        notebook_content = read_notebook(run_context.path)  # type: ignore
+        if not check_last_cell(notebook_content, "i_saved_the_notebook"):
             raise CallFinishInLastCell(
                 "Can only finish() from the last code cell of the notebook."
             )
-        # scripts are already saved during `ln.track()`
-        # TODO: make this more symmetric
-        save(run_context.path)
+        save_run_context_core(
+            run=run_context.run,
+            transform=run_context.transform,
+            filepath=run_context.path,
+            finished_at=True,
+            notebook_content=notebook_content,
+        )
+    else:
+        # scripts
+        run_context.run.finished_at = datetime.now(timezone.utc)  # update run time
+        run_context.run.save()
+# do not type because we need to be aware of lnschema_core import order
+def save_run_context_core(
+    *,
+    run: Run,
+    transform: Transform,
+    filepath: Path,
+    transform_family: Optional[QuerySet] = None,
+    is_consecutive: bool = True,
+    finished_at: bool = False,
+    notebook_content=None,  # nbproject.Notebook
+) -> Optional[str]:
+    import lamindb as ln
+    ln.settings.verbosity = "success"
+    if transform.type == "notebook":
+        try:
+            import nbstripout
+            from nbproject.dev import (
+                check_consecutiveness,
+                read_notebook,
+            )
+        except ImportError:
+            logger.error(
+                "install nbproject & nbstripout: pip install nbproject nbstripout"
+            )
+            return None
+        if notebook_content is None:
+            notebook_content = read_notebook(filepath)  # type: ignore
+        is_consecutive = check_consecutiveness(notebook_content)
+        if not is_consecutive:
+            if os.getenv("LAMIN_TESTING") is None:
+                decide = input(
+                    "   Do you still want to proceed with publishing? (y/n) "
+                )
+            else:
+                decide = "n"
+            if decide != "y":
+                logger.error("Aborted (non-consecutive)!")
+                return "aborted-non-consecutive"
-    run_context.run.finished_at = datetime.now(timezone.utc)  # update run time
-    run_context.run.save()
+        # convert the notebook file to html
+        # log_level is set to 40 to silence the nbconvert logging
+        result = subprocess.run(
+            "jupyter nbconvert --to html"
+            f" {filepath.as_posix()} --Application.log_level=40",
+            shell=True,
+        )
+        # move the temporary file into the cache dir in case it's accidentally
+        # in an existing storage location -> we want to move associated
+        # artifacts into default storage and not register them in an existing
+        # location
+        filepath_html = filepath.with_suffix(".html")  # current location
+        shutil.move(
+            filepath_html,  # type: ignore
+            ln_setup.settings.storage.cache_dir / filepath_html.name,
+        )  # move; don't use Path.rename here because of cross-device link error
+        # see https://laminlabs.slack.com/archives/C04A0RMA0SC/p1710259102686969
+        filepath_html = (
+            ln_setup.settings.storage.cache_dir / filepath_html.name
+        )  # adjust location
+        assert result.returncode == 0
+        # copy the notebook file to a temporary file
+        source_code_path = ln_setup.settings.storage.cache_dir / filepath.name
+        shutil.copy2(filepath, source_code_path)  # copy
+        result = subprocess.run(f"nbstripout {source_code_path}", shell=True)
+        assert result.returncode == 0
+    else:
+        source_code_path = filepath
+    # find initial versions of source codes and html reports
+    initial_report = None
+    initial_source = None
+    if transform_family is None:
+        transform_family = transform.versions
+    if len(transform_family) > 0:
+        for prev_transform in transform_family.order_by("-created_at"):
+            # check for id to avoid query
+            if prev_transform.latest_report_id is not None:
+                # any previous latest report of this transform is OK!
+                initial_report = prev_transform.latest_report
+            if prev_transform.source_code_id is not None:
+                # any previous source code id is OK!
+                initial_source = prev_transform.source_code
+    ln.settings.silence_file_run_transform_warning = True
+    # register the source code
+    if transform.source_code is not None:
+        # check if the hash of the notebook source code matches
+        check_source_code = ln.Artifact(source_code_path, key="dummy")
+        if check_source_code._state.adding:
+            if os.getenv("LAMIN_TESTING") is None:
+                # in test, auto-confirm overwrite
+                response = input(
+                    "You try to save a new notebook source code with the same version"
+                    f" '{transform.version}'; do you want to replace the content of the"
+                    f" existing source code {transform.source_code}? (y/n)"
+                )
+            else:
+                response = "y"
+            if response == "y":
+                transform.source_code.replace(source_code_path)
+                transform.source_code.save()
+            else:
+                logger.warning(
+                    "Please create a new version of the notebook via `lamin track"
+                    " <filepath>` and re-run the notebook"
+                )
+                return "rerun-the-notebook"
+    else:
+        source_code = ln.Artifact(
+            source_code_path,
+            description=f"Source of transform {transform.uid}",
+            version=transform.version,
+            is_new_version_of=initial_source,
+            visibility=0,  # hidden file
+            run=False,
+        )
+        source_code.save()
+        transform.source_code = source_code
+        logger.success(f"saved transform.source_code: {transform.source_code}")
+    # track environment
+    filepath_env = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
+    if filepath_env.exists():
+        artifact = ln.Artifact(
+            filepath_env,
+            description="requirements.txt",
+            visibility=0,
+            run=False,
+        )
+        if artifact._state.adding:
+            artifact.save()
+        run.environment = artifact
+        logger.success(f"saved run.environment: {run.environment}")
+    # save report file
+    if not transform.type == "notebook":
+        run.save()
+    else:
+        if run.report_id is not None:
+            logger.warning(
+                "there is already an existing report for this run, replacing it"
+            )
+            run.report.replace(filepath_html)
+            run.report.save()
+        else:
+            report_file = ln.Artifact(
+                filepath_html,
+                description=f"Report of run {run.uid}",
+                is_new_version_of=initial_report,
+                visibility=0,  # hidden file
+                run=False,
+            )
+            report_file.save()
+            run.report = report_file
+        run.is_consecutive = is_consecutive
+        if finished_at:
+            run.finished_at = datetime.now(timezone.utc)
+        run.save()
+        transform.latest_report = run.report
+    transform.save()
+    if transform.type == "notebook":
+        logger.success(f"saved transform.latest_report: {transform.latest_report}")
+    identifier = ln_setup.settings.instance.slug
+    logger.success(f"go to: https://lamin.ai/{identifier}/transform/{transform.uid}")
+    # because run & transform changed, update the global run_context
+    run_context.run = run
+    run_context.transform = transform
+    return None

lamindb/_query_set.py CHANGED Viewed

@@ -12,6 +12,7 @@ from lnschema_core.models import (
     IsTree,
     IsVersioned,
     Registry,
+    Run,
     Transform,
 )
 from lnschema_core.types import ListLike, StrField
@@ -165,7 +166,8 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
     def delete(self, *args, **kwargs):
         """Delete all records in the query set."""
-        if self.model in {Artifact, Collection, Transform}:
+        # both Transform & Run might reference artifacts
+        if self.model in {Artifact, Collection, Transform, Run}:
             for record in self:
                 record.delete(*args, **kwargs)
         else:
@@ -241,7 +243,7 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
         self, values: ListLike, field: Optional[Union[str, StrField]] = None, **kwargs
     ):
         """{}."""
-        from ._validate import _validate
+        from ._can_validate import _validate
         return _validate(cls=self, values=values, field=field, **kwargs)
@@ -250,7 +252,7 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
         self, values: ListLike, field: Optional[Union[str, StrField]] = None, **kwargs
     ):
         """{}."""
-        from ._validate import _inspect
+        from ._can_validate import _inspect
         return _inspect(cls=self, values=values, field=field, **kwargs)
@@ -259,7 +261,7 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
         self, values: Iterable, field: Optional[Union[str, StrField]] = None, **kwargs
     ):
         """{}."""
-        from ._validate import _standardize
+        from ._can_validate import _standardize
         return _standardize(cls=self, values=values, field=field, **kwargs)

lamindb/_run.py CHANGED Viewed

@@ -42,7 +42,9 @@ def delete_run_artifacts(run: Run) -> None:
     if environment is not None or report is not None:
         run.save()
     if environment is not None:
-        environment.delete(permanent=True)
+        # only delete if there are no other runs attached to this environment
+        if environment.environment_of.count() == 0:
+            environment.delete(permanent=True)
     if report is not None:
         report.delete(permanent=True)

lamindb/_save.py CHANGED Viewed

@@ -10,7 +10,7 @@ import lamindb_setup
 from django.db import transaction
 from django.utils.functional import partition
 from lamin_utils import logger
-from lamindb_setup.core.upath import print_hook
+from lamindb_setup.core.upath import UPath, print_hook
 from lnschema_core.models import Artifact, Registry
 from lamindb.core._settings import settings
@@ -141,13 +141,15 @@ def check_and_attempt_upload(
     # a local env it will have a _local_filepath and needs to be uploaded
     if hasattr(artifact, "_local_filepath"):
         try:
-            upload_artifact(artifact, using_key, access_token=access_token)
+            storage_path = upload_artifact(
+                artifact, using_key, access_token=access_token
+            )
         except Exception as exception:
             logger.warning(f"could not upload artifact: {artifact}")
             return exception
         # copies (if on-disk) or moves the temporary file (if in-memory) to the cache
         if os.getenv("LAMINDB_MULTI_INSTANCE") is None:
-            copy_or_move_to_cache(artifact)
+            copy_or_move_to_cache(artifact, storage_path)
         # after successful upload, we should remove the attribute so that another call
         # call to save won't upload again, the user should call replace() then
         del artifact._local_filepath
@@ -155,35 +157,44 @@ def check_and_attempt_upload(
     return None
-def copy_or_move_to_cache(artifact: Artifact):
+def copy_or_move_to_cache(artifact: Artifact, storage_path: UPath):
     local_path = artifact._local_filepath
-    # in-memory zarr or on-disk zarr
-    if local_path is None or not local_path.is_file():
+    # some in-memory cases (zarr for now)
+    if local_path is None or not local_path.exists():
         return None
     local_path = local_path.resolve()
-    cache_dir = lamindb_setup.settings.storage.cache_dir
+    is_dir = local_path.is_dir()
+    cache_dir = settings._storage_settings.cache_dir
-    # local instance, just delete the cached file
+    # just delete from the cache dir if a local instance
     if not lamindb_setup.settings.storage.is_cloud:
         if cache_dir in local_path.parents:
-            local_path.unlink()
+            if is_dir:
+                shutil.rmtree(local_path)
+            else:
+                local_path.unlink()
         return None
-    # maybe create something like storage.key_to_local(key) later to simplfy
-    storage_key = auto_storage_key_from_artifact(artifact)
-    storage_path = lamindb_setup.settings.storage.key_to_filepath(storage_key)
-    cache_path = lamindb_setup.settings.storage.cloud_to_local_no_update(storage_path)
-    cache_path.parent.mkdir(parents=True, exist_ok=True)
-    if cache_dir in local_path.parents:
-        local_path.replace(cache_path)
-    else:
-        shutil.copy(local_path, cache_path)
+    cache_path = settings._storage_settings.cloud_to_local_no_update(storage_path)
+    if local_path != cache_path:
+        cache_path.parent.mkdir(parents=True, exist_ok=True)
+        if cache_dir in local_path.parents:
+            local_path.replace(cache_path)
+        else:
+            if is_dir:
+                shutil.copytree(local_path, cache_path)
+            else:
+                shutil.copy(local_path, cache_path)
     # make sure that the cached version is older than the cloud one
     mts = datetime.now().timestamp() + 1.0
-    os.utime(cache_path, times=(mts, mts))
+    if is_dir:
+        files = (file for file in cache_path.rglob("*") if file.is_file())
+        for file in files:
+            os.utime(file, times=(mts, mts))
+    else:
+        os.utime(cache_path, times=(mts, mts))
 # This is also used within Artifact.save()
@@ -264,7 +275,7 @@ def prepare_error_message(records, stored_artifacts, exception) -> str:
 def upload_artifact(
     artifact, using_key: Optional[str] = None, access_token: Optional[str] = None
-) -> None:
+) -> UPath:
     """Store and add file and its linked entries."""
     # can't currently use  filepath_from_artifact here because it resolves to ._local_filepath
     storage_key = auto_storage_key_from_artifact(artifact)
@@ -283,3 +294,5 @@ def upload_artifact(
     elif hasattr(artifact, "_to_store") and artifact._to_store:
         logger.save(msg)
         store_artifact(artifact._local_filepath, storage_path)
+    return storage_path

lamindb/core/__init__.py CHANGED Viewed

@@ -14,6 +14,9 @@ Registries:
    LabelManager
    IsTree
    IsVersioned
+   DataFrameAnnotator
+   AnnDataAnnotator
+   AnnotateLookup
    CanValidate
    HasParents
    InspectResult
@@ -50,6 +53,7 @@ from lnschema_core.models import (
     Registry,
 )
+from lamindb._annotate import AnnDataAnnotator, AnnotateLookup, DataFrameAnnotator
 from lamindb._query_manager import QueryManager
 from lamindb._query_set import QuerySet, RecordsList
 from lamindb.core._feature_manager import FeatureManager

lamindb/core/_data.py CHANGED Viewed

@@ -46,6 +46,9 @@ def get_run(run: Optional[Run]) -> Optional[Run]:
         run = run_context.run
         if run is None and not settings.silence_file_run_transform_warning:
             logger.warning(WARNING_RUN_TRANSFORM)
+    # suppress run by passing False
+    elif not run:
+        run = None
     return run

lamindb/core/_feature_manager.py CHANGED Viewed

@@ -219,7 +219,7 @@ class FeatureManager:
             slot = "columns" if slot is None else slot
         self._add_feature_set(feature_set=FeatureSet(features=features), slot=slot)
-    def add_from_df(self):
+    def add_from_df(self, field: FieldAttr = Feature.name, **kwargs):
         """Add features from DataFrame."""
         if isinstance(self._host, Artifact):
             assert self._host.accessor == "DataFrame"
@@ -228,11 +228,12 @@ class FeatureManager:
             assert self._host.artifact.accessor == "DataFrame"
         # parse and register features
+        registry = field.field.model
         df = self._host.load()
-        features = Feature.from_values(df.columns)
+        features = registry.from_values(df.columns, field=field, **kwargs)
         if len(features) == 0:
             logger.error(
-                "no validated features found in DataFrame! please register features first:\n   → features = Feature.from_df(df)\n   → ln.save(features)"
+                "no validated features found in DataFrame! please register features first!"
             )
             return

lamindb/core/_run_context.py CHANGED Viewed

@@ -42,6 +42,10 @@ class MissingTransformSettings(SystemExit):
     pass
+class UpdateTransformSettings(SystemExit):
+    pass
 def get_uid_ext(version: str) -> str:
     from lamin_utils._base62 import encodebytes
@@ -131,7 +135,7 @@ def update_stem_uid_or_version(
             f'ln.settings.transform.stem_uid = "{new_stem_uid}"\nln.settings.transform.version ='
             f' "{new_version}"\n'
         )
-        raise SystemExit(
+        raise UpdateTransformSettings(
             f"Please update your transform settings as follows:\n{new_metadata}"
         )
     return updated, new_stem_uid, new_version
@@ -326,15 +330,18 @@ class run_context:
             )
             if run is not None:  # loaded latest run
                 run.started_at = datetime.now(timezone.utc)  # update run time
-                run.save()
                 logger.important(f"loaded: {run}")
         if run is None:  # create new run
             run = Run(
                 transform=cls.transform,
             )
-            run.save()
             logger.important(f"saved: {run}")
+        # can only determine at ln.finish() if run was consecutive in
+        # interactive session, otherwise, is consecutive
+        run.is_consecutive = True if is_run_from_ipython else None
+        # need to save in all cases
+        run.save()
         cls.run = run
         from ._track_environment import track_environment
@@ -343,9 +350,14 @@ class run_context:
         if not is_run_from_ipython and cls.path is not None:
             # upload run source code & environment
-            from lamin_cli._save import save
+            from lamindb._finish import save_run_context_core
-            save(cls.path)
+            save_run_context_core(
+                run=cls.run,
+                transform=cls.transform,
+                filepath=cls.path,
+                is_consecutive=True,
+            )
         return None
     @classmethod

lamindb 0.69.1__py3-none-any.whl → 0.69.3__py3-none-any.whl

lamindb 0.69.1py3-none-any.whl → 0.69.3py3-none-any.whl