PyPI - lamindb - Versions diffs - 0.71.0__py3-none-any.whl → 0.71.2__py3-none-any.whl - Mend

lamindb 0.71.0py3-none-any.whl → 0.71.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

lamindb/__init__.py +1 -1
lamindb/_artifact.py +26 -35
lamindb/_finish.py +44 -32
lamindb/_query_set.py +1 -21
lamindb/_run.py +2 -0
lamindb/_save.py +7 -22
lamindb/_transform.py +5 -11
lamindb/core/__init__.py +0 -2
lamindb/core/_data.py +1 -1
lamindb/core/_feature_manager.py +6 -5
lamindb/core/_run_context.py +47 -29
lamindb/core/_settings.py +2 -2
lamindb/core/_sync_git.py +6 -2
lamindb/core/datasets/_core.py +2 -2
lamindb/core/storage/__init__.py +2 -2
lamindb/core/storage/_valid_suffixes.py +4 -2
lamindb/core/storage/objects.py +10 -3
lamindb/core/storage/paths.py +1 -1
lamindb/core/versioning.py +18 -4
{lamindb-0.71.0.dist-info → lamindb-0.71.2.dist-info}/METADATA +5 -5
{lamindb-0.71.0.dist-info → lamindb-0.71.2.dist-info}/RECORD +23 -24
lamindb/core/_view_tree.py +0 -116
{lamindb-0.71.0.dist-info → lamindb-0.71.2.dist-info}/LICENSE +0 -0
{lamindb-0.71.0.dist-info → lamindb-0.71.2.dist-info}/WHEEL +0 -0

lamindb/__init__.py CHANGED Viewed

@@ -41,7 +41,7 @@ Modules & settings:
 """
 # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
-__version__ = "0.71.0"
+__version__ = "0.71.2"
 import os as _os

lamindb/_artifact.py CHANGED Viewed

@@ -21,7 +21,6 @@ from lamindb_setup.core.upath import (
     get_stat_file_cloud,
 )
 from lnschema_core import Artifact, Run, Storage
-from lnschema_core.models import IsTree
 from lnschema_core.types import (
     VisibilityChoice,
 )
@@ -35,8 +34,7 @@ from lamindb.core.storage import (
     delete_storage,
     infer_suffix,
     load_to_memory,
-    size_adata,
-    write_to_file,
+    write_to_disk,
 )
 from lamindb.core.storage.paths import (
     auto_storage_key_from_artifact,
@@ -173,8 +171,7 @@ def process_data(
         # Alex: I don't understand the line below
         if path.suffixes == []:
             path = path.with_suffix(suffix)
-        if suffix != ".zarr":
-            write_to_file(data, path)
+        write_to_disk(data, path)
         use_existing_storage_key = False
     else:
         raise NotImplementedError(
@@ -194,16 +191,13 @@ def get_stat_or_artifact(
     n_objects = None
     if settings.upon_file_create_skip_size_hash:
         return None, None, None, n_objects
-    if suffix == ".zarr" and memory_rep is not None and isinstance(memory_rep, AnnData):
-        size = size_adata(memory_rep)
-        return size, None, None, n_objects
     stat = path.stat()  # one network request
     if not isinstance(path, LocalPathClasses):
         size, hash, hash_type = None, None, None
         if stat is not None:
             if "ETag" in stat:  # is file
                 size, hash, hash_type = get_stat_file_cloud(stat)
-            elif path.is_dir():
+            elif stat["type"] == "directory":
                 size, hash, hash_type, n_objects = get_stat_dir_cloud(path)
         if hash is None:
             logger.warning(f"did not add hash for {path}")
@@ -589,7 +583,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
         init_self_from_db(artifact, kwargs_or_artifact)
         # adding "key" here is dangerous because key might be auto-populated
         update_attributes(artifact, {"description": description})
-        if artifact.key != key:
+        if artifact.key != key and key is not None:
             logger.warning(
                 f"key {artifact.key} on existing artifact differs from passed key {key}"
             )
@@ -914,11 +908,25 @@ def load(self, is_run_input: bool | None = None, stream: bool = False, **kwargs)
 # docstring handled through attach_func_to_class_method
 def cache(self, is_run_input: bool | None = None) -> Path:
-    _track_run_input(self, is_run_input)
     using_key = settings._using_key
     filepath = filepath_from_artifact(self, using_key=using_key)
-    return setup_settings.instance.storage.cloud_to_local(filepath, print_progress=True)
+    try:
+        cache_path = setup_settings.instance.storage.cloud_to_local(
+            filepath, print_progress=True
+        )
+    except Exception as e:
+        if not isinstance(filepath, LocalPathClasses):
+            cache_path = setup_settings.instance.storage.cloud_to_local_no_update(
+                filepath
+            )
+            if cache_path.is_file():
+                cache_path.unlink(missing_ok=True)
+            elif cache_path.is_dir():
+                shutil.rmtree(cache_path)
+        raise e
+    # only call if sync is successfull
+    _track_run_input(self, is_run_input)
+    return cache_path
 # docstring handled through attach_func_to_class_method
@@ -1003,6 +1011,11 @@ def save(self, upload: bool | None = None, **kwargs) -> None:
         local_path = self.path
         self.storage_id = setup_settings.instance.storage.id
         self._local_filepath = local_path
+        # switch to virtual storage key upon upload
+        # the local filepath is already cached at that point
+        self.key_is_virtual = True
+        # ensure that the artifact is uploaded
+        self._to_store = True
     self._save_skip_storage(**kwargs)
@@ -1045,27 +1058,6 @@ def path(self) -> Path | UPath:
     return filepath_from_artifact(self, using_key)
-@classmethod  # type: ignore
-@doc_args(IsTree.view_tree.__doc__)
-def view_tree(
-    cls,
-    level: int = -1,
-    limit_to_directories: bool = False,
-    length_limit: int = 1000,
-    max_files_per_dir_per_type: int = 7,
-) -> None:
-    """{}."""
-    from lamindb.core._view_tree import view_tree as _view_tree
-    _view_tree(
-        cls=cls,
-        level=level,
-        limit_to_directories=limit_to_directories,
-        length_limit=length_limit,
-        max_files_per_dir_per_type=max_files_per_dir_per_type,
-    )
 # docstring handled through attach_func_to_class_method
 def restore(self) -> None:
     self.visibility = VisibilityChoice.default.value
@@ -1085,7 +1077,6 @@ METHOD_NAMES = [
     "replace",
     "from_dir",
     "restore",
-    "view_tree",
 ]
 if ln_setup._TESTING:

lamindb/_finish.py CHANGED Viewed

@@ -8,6 +8,7 @@ from typing import TYPE_CHECKING
 import lamindb_setup as ln_setup
 from lamin_utils import logger
+from lamindb_setup.core.hashing import hash_file
 from lnschema_core.types import TransformType
 from .core._run_context import is_run_from_ipython, run_context
@@ -35,7 +36,7 @@ def get_seconds_since_modified(filepath) -> float:
 def finish():
     """Mark a tracked run as finished.
-    If run in a notebook, it saves the run report & source code to your default storage location.
+    Saves source code and, for notebooks, a run report to your default storage location.
     """
     if run_context.path is None:
         raise TrackNotCalled("Please run `ln.track()` before `ln.finish()`")
@@ -47,16 +48,12 @@ def finish():
             raise NotebookNotSaved(
                 "Please save the notebook in your editor right before running `ln.finish()`"
             )
-        save_run_context_core(
-            run=run_context.run,
-            transform=run_context.transform,
-            filepath=run_context.path,
-            finished_at=True,
-        )
-    else:  # scripts
-        # save_run_context_core was already called during ln.track()
-        run_context.run.finished_at = datetime.now(timezone.utc)  # update run time
-        run_context.run.save()
+    save_run_context_core(
+        run=run_context.run,
+        transform=run_context.transform,
+        filepath=run_context.path,
+        finished_at=True,
+    )
 def save_run_context_core(
@@ -138,15 +135,17 @@ def save_run_context_core(
             if prev_transform.source_code_id is not None:
                 prev_source = prev_transform.source_code
     ln.settings.silence_file_run_transform_warning = True
-    # register the source code
-    if transform.source_code is not None:
-        # check if the hash of the notebook source code matches
-        check_source_code = ln.Artifact(source_code_path, key="dummy")
-        if check_source_code._state.adding:
+    # track source code
+    if transform.source_code_id is not None:
+        # check if the hash of the transform source code matches
+        # (for scripts, we already run the same logic in track() - we can deduplicate the call at some point)
+        hash, _ = hash_file(source_code_path)  # ignore hash_type for now
+        if hash != transform.source_code.hash:
             if os.getenv("LAMIN_TESTING") is None:
                 # in test, auto-confirm overwrite
                 response = input(
-                    f"You are about to overwrite existing source code (hash {transform.source_code.hash}) for transform version"
+                    f"You are about to replace (overwrite) existing source code (hash '{transform.source_code.hash}') for transform version"
                     f" '{transform.version}'. Proceed? (y/n)"
                 )
             else:
@@ -154,6 +153,9 @@ def save_run_context_core(
             if response == "y":
                 transform.source_code.replace(source_code_path)
                 transform.source_code.save(upload=True)
+                logger.success(
+                    f"replaced transform.source_code: {transform.source_code}"
+                )
             else:
                 logger.warning("Please re-run `ln.track()` to make a new version")
                 return "rerun-the-notebook"
@@ -169,21 +171,32 @@ def save_run_context_core(
         source_code.save(upload=True)
         transform.source_code = source_code
         logger.success(f"saved transform.source_code: {transform.source_code}")
     # track environment
     filepath_env = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
     if filepath_env.exists():
-        artifact = ln.Artifact(
-            filepath_env,
-            description="requirements.txt",
-            visibility=0,
-            run=False,
-        )
-        if artifact._state.adding:
+        hash, _ = hash_file(filepath_env)
+        artifact = ln.Artifact.filter(hash=hash, visibility=0).one_or_none()
+        new_env_artifact = artifact is None
+        if new_env_artifact:
+            artifact = ln.Artifact(
+                filepath_env,
+                description="requirements.txt",
+                visibility=0,
+                run=False,
+            )
             artifact.save(upload=True)
         run.environment = artifact
-        logger.success(f"saved run.environment: {run.environment}")
-    # save report file
+        if new_env_artifact:
+            logger.success(f"saved run.environment: {run.environment}")
+    # set finished_at
+    if finished_at:
+        run.finished_at = datetime.now(timezone.utc)
+    # track report and set is_consecutive
     if not transform.type == TransformType.notebook:
+        run.is_consecutive = True
         run.save()
     else:
         if run.report_id is not None:
@@ -203,16 +216,15 @@ def save_run_context_core(
             report_file.save(upload=True)
             run.report = report_file
         run.is_consecutive = is_consecutive
-        if finished_at:
-            run.finished_at = datetime.now(timezone.utc)
         run.save()
         transform.latest_report = run.report
-    transform.save()
-    if transform.type == TransformType.notebook:
         logger.success(f"saved transform.latest_report: {transform.latest_report}")
-    if ln_setup.settings.instance.is_remote:
+    transform.save()
+    # finalize
+    if ln_setup.settings.instance.is_on_hub:
         identifier = ln_setup.settings.instance.slug
-        logger.success(
+        logger.important(
             f"go to: https://lamin.ai/{identifier}/transform/{transform.uid}"
         )
     # because run & transform changed, update the global run_context

lamindb/_query_set.py CHANGED Viewed

@@ -11,7 +11,6 @@ from lnschema_core.models import (
     Artifact,
     CanValidate,
     Collection,
-    IsTree,
     IsVersioned,
     Registry,
     Run,
@@ -83,7 +82,7 @@ class RecordsList(UserList):
         return one_helper(self)
-class QuerySet(models.QuerySet, CanValidate, IsTree):
+class QuerySet(models.QuerySet, CanValidate):
     """Sets of records returned by queries.
     See Also:
@@ -265,25 +264,6 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
         return _standardize(cls=self, values=values, field=field, **kwargs)
-    @doc_args(IsTree.view_tree.__doc__)
-    def view_tree(
-        self,
-        level: int = -1,
-        limit_to_directories: bool = False,
-        length_limit: int = 1000,
-        max_files_per_dir_per_type: int = 7,
-    ) -> None:
-        """{}."""
-        from .core._view_tree import view_tree as _view_tree
-        _view_tree(
-            cls=self,
-            level=level,
-            limit_to_directories=limit_to_directories,
-            length_limit=length_limit,
-            max_files_per_dir_per_type=max_files_per_dir_per_type,
-        )
 def filter_query_set_by_latest_version(ordered_query_set: QuerySet) -> RecordsList:
     # evaluating length can be very costly, hence, the try-except block

lamindb/_run.py CHANGED Viewed

@@ -13,6 +13,7 @@ def __init__(run: Run, *args, **kwargs):
     transform: Transform = None
     if "transform" in kwargs or len(args) == 1:
         transform = kwargs.pop("transform") if len(args) == 0 else args[0]
+    params: str | None = kwargs.pop("params") if "params" in kwargs else None
     reference: str | None = kwargs.pop("reference") if "reference" in kwargs else None
     reference_type: str | None = (
         kwargs.pop("reference_type") if "reference_type" in kwargs else None
@@ -25,6 +26,7 @@ def __init__(run: Run, *args, **kwargs):
         transform=transform,
         reference=reference,
         reference_type=reference_type,
+        json=params,
     )

lamindb/_save.py CHANGED Viewed

@@ -6,13 +6,13 @@ import traceback
 from collections import defaultdict
 from datetime import datetime
 from functools import partial
-from typing import Iterable, overload
+from typing import TYPE_CHECKING, Iterable, overload
 import lamindb_setup
 from django.db import transaction
 from django.utils.functional import partition
 from lamin_utils import logger
-from lamindb_setup.core.upath import UPath, print_hook
+from lamindb_setup.core.upath import print_hook
 from lnschema_core.models import Artifact, Registry
 from lamindb.core._settings import settings
@@ -23,12 +23,8 @@ from lamindb.core.storage.paths import (
     store_file_or_folder,
 )
-try:
-    from lamindb.core.storage._zarr import write_adata_zarr
-except ImportError:
-    def write_adata_zarr(filepath):  # type: ignore
-        raise ImportError("Please install zarr: pip install zarr")
+if TYPE_CHECKING:
+    from lamindb_setup.core.upath import UPath
 def save(
@@ -162,7 +158,7 @@ def check_and_attempt_upload(
 def copy_or_move_to_cache(artifact: Artifact, storage_path: UPath):
     local_path = artifact._local_filepath
-    # some in-memory cases (zarr for now)
+    # in-memory cases
     if local_path is None or not local_path.exists():
         return None
@@ -284,18 +280,7 @@ def upload_artifact(
     storage_path = attempt_accessing_path(
         artifact, storage_key, using_key=using_key, access_token=access_token
     )
-    msg = f"storing artifact '{artifact.uid}' at '{storage_path}'"
-    if (
-        artifact.suffix == ".zarr"
-        and hasattr(artifact, "_memory_rep")
-        and artifact._memory_rep is not None
-    ):
-        logger.save(msg)
-        print_progress = partial(
-            print_hook, objectname=storage_path.name, action="uploading"
-        )
-        write_adata_zarr(artifact._memory_rep, storage_path, callback=print_progress)
-    elif hasattr(artifact, "_to_store") and artifact._to_store:
-        logger.save(msg)
+    if hasattr(artifact, "_to_store") and artifact._to_store:
+        logger.save(f"storing artifact '{artifact.uid}' at '{storage_path}'")
         store_file_or_folder(artifact._local_filepath, storage_path)
     return storage_path

lamindb/_transform.py CHANGED Viewed

@@ -1,10 +1,10 @@
 from __future__ import annotations
-from lnschema_core.models import Artifact, Run, Transform
+from lnschema_core.models import Run, Transform
 from lnschema_core.types import TransformType
 from ._run import delete_run_artifacts
-from .core.versioning import get_uid_from_old_version, init_uid
+from .core.versioning import process_is_new_version_of
 def __init__(transform: Transform, *args, **kwargs):
@@ -32,15 +32,9 @@ def __init__(transform: Transform, *args, **kwargs):
             "Only name, key, version, type, is_new_version_of, reference, "
             f"reference_type can be passed, but you passed: {kwargs}"
         )
-    if is_new_version_of is None:
-        new_uid = init_uid(version=version, n_full_id=Transform._len_full_uid)
-    else:
-        if not isinstance(is_new_version_of, Transform):
-            raise TypeError("is_new_version_of has to be of type ln.Transform")
-        new_uid, version = get_uid_from_old_version(is_new_version_of, version)
-        if name is None:
-            name = is_new_version_of.name
+    new_uid, version, name = process_is_new_version_of(
+        is_new_version_of, version, name, Transform
+    )
     # this is only because the user-facing constructor allows passing an id
     # most others don't
     if uid is None:

lamindb/core/__init__.py CHANGED Viewed

@@ -12,7 +12,6 @@ Registries:
    Data
    FeatureManager
    LabelManager
-   IsTree
    IsVersioned
    CanValidate
    HasParents
@@ -55,7 +54,6 @@ from lnschema_core.models import (
     CanValidate,
     Data,
     HasParents,
-    IsTree,
     IsVersioned,
     Registry,
 )

lamindb/core/_data.py CHANGED Viewed

@@ -345,7 +345,7 @@ def add_labels(
                             f" {old_feature_set}"
                         )
                         old_feature_set.delete()
-                self.features._add_feature_set(feature_set, slot="external")
+                self.features.add_feature_set(feature_set, slot="external")
                 logger.save(
                     f"linked new feature '{feature.name}' together with new feature set"
                     f" {feature_set}"

lamindb/core/_feature_manager.py CHANGED Viewed

@@ -236,7 +236,7 @@ class FeatureManager:
             and self._host.artifact.accessor == "DataFrame"
         ):
             slot = "columns" if slot is None else slot
-        self._add_feature_set(feature_set=FeatureSet(features=features), slot=slot)
+        self.add_feature_set(feature_set=FeatureSet(features=features), slot=slot)
     def add_from_df(self, field: FieldAttr = Feature.name, organism: str | None = None):
         """Add features from DataFrame."""
@@ -325,7 +325,7 @@ class FeatureManager:
         self._host._feature_sets = feature_sets
         self._host.save()
-    def _add_feature_set(self, feature_set: FeatureSet, slot: str):
+    def add_feature_set(self, feature_set: FeatureSet, slot: str):
         """Add new feature set to a slot.
         Args:
@@ -405,7 +405,8 @@ class FeatureManager:
                         f"FeatureSet is not transferred, check if organism is set correctly: {feature_set}"
                     )
                 continue
-            # TODO: make sure the uid matches if featureset is composed of same features
-            # feature_set_self.uid = feature_set.uid
+            # make sure the uid matches if featureset is composed of same features
+            if feature_set_self.hash == feature_set.hash:
+                feature_set_self.uid = feature_set.uid
             logger.info(f"saving {slot} featureset: {feature_set_self}")
-            self._host.features._add_feature_set(feature_set_self, slot)
+            self._host.features.add_feature_set(feature_set_self, slot)

lamindb/core/_run_context.py CHANGED Viewed

@@ -8,6 +8,7 @@ from pathlib import Path, PurePath
 from typing import TYPE_CHECKING
 from lamin_utils import logger
+from lamindb_setup.core.hashing import hash_file
 from lnschema_core import Run, Transform, ids
 from lnschema_core.types import TransformType
 from lnschema_core.users import current_user_id
@@ -175,6 +176,17 @@ def raise_transform_settings_error() -> None:
     )
+def pretty_pypackages(dependencies: dict) -> str:
+    deps_list = []
+    for pkg, ver in dependencies.items():
+        if ver != "":
+            deps_list.append(pkg + f"=={ver}")
+        else:
+            deps_list.append(pkg)
+    deps_list.sort()
+    return " ".join(deps_list)
 class run_context:
     """Global run context."""
@@ -189,6 +201,7 @@ class run_context:
     def _track(
         cls,
         *,
+        params: dict | None = None,
         transform: Transform | None = None,
         new_run: bool | None = None,
         path: str | None = None,
@@ -204,6 +217,7 @@ class run_context:
         whether the script exists in the git repository and add a link.
         Args:
+            params: A dictionary of parameters to track for the run.
             transform: Can be of type `"pipeline"` or `"notebook"`
                 (:class:`~lamindb.core.types.TransformType`).
             new_run: If `False`, loads latest run of transform
@@ -298,11 +312,13 @@ class run_context:
             )
             if run is not None:  # loaded latest run
                 run.started_at = datetime.now(timezone.utc)  # update run time
+                run.json = params  # update run params
                 logger.important(f"loaded: {run}")
         if run is None:  # create new run
             run = Run(
                 transform=cls.transform,
+                params=params,
             )
             logger.important(f"saved: {run}")
         # can only determine at ln.finish() if run was consecutive in
@@ -315,16 +331,6 @@ class run_context:
         from ._track_environment import track_environment
         track_environment(run)
-        if not is_run_from_ipython and cls.path is not None:
-            # upload run source code & environment
-            from lamindb._finish import save_run_context_core
-            save_run_context_core(
-                run=cls.run,
-                transform=cls.transform,
-                filepath=cls.path,
-            )
         return None
     @classmethod
@@ -386,17 +392,12 @@ class run_context:
         # log imported python packages
         if not path_str.startswith("/fileId="):
             try:
-                from nbproject.dev._metadata_display import DisplayMeta
                 from nbproject.dev._pypackage import infer_pypackages
-                metadata, _, nb = nbproject.header(
-                    filepath=path_str,
-                    metadata_only=True,
-                )
-                dm = DisplayMeta(metadata)
+                nb = nbproject.dev.read_notebook(path_str)
                 logger.important(
                     "notebook imports:"
-                    f" {' '.join(dm.pypackage(infer_pypackages(nb, pin_versions=True)))}"
+                    f" {pretty_pypackages(infer_pypackages(nb, pin_versions=True))}"
                 )
             except Exception:
                 logger.debug("inferring imported packages failed")
@@ -451,19 +452,36 @@ class run_context:
                     transform.save()
                     logger.important(f"updated: {transform}")
             # check whether the notebook source code was already saved
-            if is_run_from_ipython and transform.source_code_id:
-                if os.getenv("LAMIN_TESTING") is None:
-                    response = input(
-                        "You already saved source code for this notebook."
-                        " Bump the version before a new run? (y/n)"
-                    )
-                else:
-                    response = "y"
-                if response == "y":
-                    update_stem_uid_or_version(stem_uid, version, bump_version=True)
+            if transform.source_code_id is not None:
+                response = None
+                if is_run_from_ipython:
+                    if os.getenv("LAMIN_TESTING") is None:
+                        response = input(
+                            "You already saved source code for this notebook."
+                            " Bump the version before a new run? (y/n)"
+                        )
+                    else:
+                        response = "y"
                 else:
-                    # we want a new stem_uid in this case, hence raise the error
-                    raise_transform_settings_error()
+                    hash, _ = hash_file(cls.path)  # ignore hash_type for now
+                    if hash != transform.source_code.hash:
+                        # only if hashes don't match, we need user input
+                        if os.getenv("LAMIN_TESTING") is None:
+                            response = input(
+                                "You already saved source code for this script and meanwhile modified it without bumping a version."
+                                " Bump the version before a new run? (y/n)"
+                            )
+                        else:
+                            response = "y"
+                    else:
+                        logger.important(f"loaded: {transform}")
+                if response is not None:
+                    # if a script is re-run and hashes match, we don't need user input
+                    if response == "y":
+                        update_stem_uid_or_version(stem_uid, version, bump_version=True)
+                    else:
+                        # we want a new stem_uid in this case, hence raise the error
+                        raise_transform_settings_error()
             else:
                 logger.important(f"loaded: {transform}")
         cls.transform = transform

lamindb/core/_settings.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Literal, Mapping
 import lamindb_setup as ln_setup
 from lamin_utils import logger
-from lamindb_setup._add_remote_storage import add_managed_storage
+from lamindb_setup._set_managed_storage import set_managed_storage
 from lamindb_setup.core._settings import settings as setup_settings
 from lamindb_setup.core._settings_instance import sanitize_git_repo_url
@@ -147,7 +147,7 @@ class Settings:
             path, kwargs = path_kwargs
         else:
             path, kwargs = path_kwargs, {}
-        add_managed_storage(path, **kwargs)
+        set_managed_storage(path, **kwargs)
     @property
     def storage_local(self) -> Path:

lamindb/core/_sync_git.py CHANGED Viewed

@@ -61,11 +61,15 @@ def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | N
         capture_output=True,
         cwd=repo_dir,
     )
-    commit_hash = result.stdout.decode()
+    # we just care to find one commit
+    # hence, we split by new line ("\n") and use the first one
+    commit_hash = result.stdout.decode().split("\n")[0]
     if commit_hash == "" or result.returncode == 1:
         return None
     else:
-        assert len(commit_hash) == 40
+        assert (
+            len(commit_hash) == 40
+        ), f"commit hash |{commit_hash}| is not 40 characters long"
         return commit_hash

lamindb/core/datasets/_core.py CHANGED Viewed

@@ -161,8 +161,8 @@ def anndata_mouse_sc_lymph_node(
     adata.obs.columns = (
         adata.obs.columns.str.replace("Sample Characteristic", "")
         .str.replace("Factor Value ", "Factor Value:", regex=True)
-        .str.replace("Factor Value\[", "Factor Value:", regex=True)  # noqa
-        .str.replace(" Ontology Term\[", "ontology_id:", regex=True)  # noqa
+        .str.replace("Factor Value\\[", "Factor Value:", regex=True)
+        .str.replace(" Ontology Term\\[", "ontology_id:", regex=True)
         .str.strip("[]")
         .str.replace("organism part", "tissue")
         .str.replace("organism", "organism")

lamindb/core/storage/__init__.py CHANGED Viewed

@@ -10,6 +10,6 @@ from lamindb_setup.core.upath import LocalPathClasses, UPath, infer_filesystem
 from ._anndata_sizes import size_adata
 from ._backed_access import AnnDataAccessor, BackedAccessor
-from ._valid_suffixes import VALID_SUFFIXES
-from .objects import infer_suffix, write_to_file
+from ._valid_suffixes import VALID_COMPOSITE_SUFFIXES, VALID_SUFFIXES
+from .objects import infer_suffix, write_to_disk
 from .paths import delete_storage, load_to_memory

lamindb/core/storage/_valid_suffixes.py CHANGED Viewed

@@ -1,3 +1,5 @@
-from lamindb_setup.core.upath import VALID_SUFFIXES
+from lamindb_setup.core.upath import VALID_COMPOSITE_SUFFIXES, VALID_SUFFIXES
-VALID_SUFFIXES.update({".vitessce.json", ".anndata.zarr", ".spatialdata.zarr"})
+# add new composite suffixes like so
+VALID_COMPOSITE_SUFFIXES.update({".vitessce.json"})
+# can do the same for simple valid suffixes

lamindb/core/storage/objects.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+from pathlib import PurePosixPath
 from typing import TYPE_CHECKING
 from anndata import AnnData
@@ -21,7 +22,7 @@ def infer_suffix(dmem, adata_format: str | None = None):
     """Infer LaminDB storage file suffix from a data object."""
     if isinstance(dmem, AnnData):
         if adata_format is not None:
-            if adata_format not in ("h5ad", "zarr"):
+            if adata_format not in {"h5ad", "zarr", "anndata.zarr"}:
                 raise ValueError(
                     "Error when specifying AnnData storage format, it should be"
                     f" 'h5ad', 'zarr', not '{adata_format}'. Check 'format'"
@@ -40,9 +41,15 @@ def infer_suffix(dmem, adata_format: str | None = None):
         raise NotImplementedError
-def write_to_file(dmem, filepath: UPathStr):
+def write_to_disk(dmem, filepath: UPathStr):
     if isinstance(dmem, AnnData):
-        dmem.write(filepath)
+        suffix = PurePosixPath(filepath).suffix
+        if suffix == ".h5ad":
+            dmem.write_h5ad(filepath)
+        elif suffix == ".zarr":
+            dmem.write_zarr(filepath)
+        else:
+            raise NotImplementedError
     elif isinstance(dmem, DataFrame):
         dmem.to_parquet(filepath)
     else:

lamindb/core/storage/paths.py CHANGED Viewed

@@ -140,7 +140,7 @@ def delete_storage(storagepath: Path):
     if not storagepath.is_relative_to(settings.storage):  # type: ignore
         allow_delete = False
         if setup_settings.instance.keep_artifacts_local:
-            allow_delete = storagepath.is_relative_to(
+            allow_delete = storagepath.is_relative_to(  # type: ignore
                 setup_settings.instance.storage_local.root
             )
         if not allow_delete:

lamindb/core/versioning.py CHANGED Viewed

@@ -42,10 +42,7 @@ def init_uid(
     if is_new_version_of is not None:
         stem_uid = is_new_version_of.stem_uid
     else:
-        if n_full_id == 20:
-            stem_uid = ids.base62_16()
-        elif n_full_id == 16:
-            stem_uid = ids.base62_12()
+        stem_uid = ids.base62(n_full_id - 4)
     if version is not None:
         if not isinstance(version, str):
             raise ValueError(
@@ -90,3 +87,20 @@ def get_new_path_from_uid(old_path: UPath, old_uid: str, new_uid: str):
         # for cloud path, the rename target must be the last part of the path
         new_path = old_path.name.replace(old_uid, new_uid)
     return new_path
+def process_is_new_version_of(
+    is_new_version_of: IsVersioned,
+    version: str | None,
+    name: str | None,
+    type: type[IsVersioned],
+) -> tuple[str, str, str]:
+    if is_new_version_of is not None and not isinstance(is_new_version_of, type):
+        raise TypeError(f"is_new_version_of has to be of type {type}")
+    if is_new_version_of is None:
+        uid = init_uid(version=version, n_full_id=type._len_stem_uid)
+    else:
+        uid, version = get_uid_from_old_version(is_new_version_of, version)
+        if name is None:
+            name = is_new_version_of.name
+    return uid, version, name

{lamindb-0.71.0.dist-info → lamindb-0.71.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: lamindb
-Version: 0.71.0
+Version: 0.71.2
 Summary: A data framework for biology.
 Author-email: Lamin Labs <open-source@lamin.ai>
 Requires-Python: >=3.8
@@ -9,10 +9,10 @@ Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
-Requires-Dist: lnschema_core==0.66.0
-Requires-Dist: lamindb_setup==0.71.0
+Requires-Dist: lnschema_core==0.66.4
+Requires-Dist: lamindb_setup==0.71.3
 Requires-Dist: lamin_utils==0.13.2
-Requires-Dist: lamin_cli==0.13.0
+Requires-Dist: lamin_cli==0.13.1
 Requires-Dist: rapidfuzz
 Requires-Dist: pyarrow
 Requires-Dist: typing_extensions!=4.6.0
@@ -37,7 +37,7 @@ Requires-Dist: faker-biology ; extra == "dev"
 Requires-Dist: django-schema-graph ; extra == "erdiagram"
 Requires-Dist: readfcs>=1.1.8 ; extra == "fcs"
 Requires-Dist: lamindb_setup[gcp] ; extra == "gcp"
-Requires-Dist: nbproject==0.10.0 ; extra == "jupyter"
+Requires-Dist: nbproject==0.10.2 ; extra == "jupyter"
 Requires-Dist: nbstripout==0.6.1 ; extra == "jupyter"
 Requires-Dist: nbconvert ; extra == "jupyter"
 Requires-Dist: zarr>=2.16.0 ; extra == "zarr"

{lamindb-0.71.0.dist-info → lamindb-0.71.2.dist-info}/RECORD RENAMED Viewed

@@ -1,55 +1,54 @@
-lamindb/__init__.py,sha256=T_mLeXTbOSi7s2DSoGxF-FrVBCSQLvBj5t02ueRNWSI,2182
+lamindb/__init__.py,sha256=GKrW6unkqBBwwpxTXjuUv-5k4c4unimsV-vGFSWt68I,2182
 lamindb/_annotate.py,sha256=kgbilILfgzoS-GEpjxzVwRMs7CoSa9BNEcIWXFBW69I,43915
-lamindb/_artifact.py,sha256=875jV8J-GgvhoscWPmg73ogTa9rAVHQdAqc3V8S46Sc,40157
+lamindb/_artifact.py,sha256=8uBW-dhuWyBUQGs728sAPCnuhTic-NKjSbaneF07aMo,40106
 lamindb/_can_validate.py,sha256=nvoZG-35n3HofkY4Xc6hBv9AV54_RDan7Hzp5TuqY9I,14709
 lamindb/_collection.py,sha256=SDM35R_5WHrgLKjVb14Q8-Rz_gn5hdZLJobPcanm4PM,14627
 lamindb/_feature.py,sha256=srAKchY7gqD-h-cWlEiAWuHlpFKFwv0PWIA-JX0Go8c,6758
 lamindb/_feature_set.py,sha256=AzjOcHzQajpeikPOAic-aj0z_C5b7VpHVegg3ThRSLw,9045
 lamindb/_filter.py,sha256=xnjJzjF3Zj4dK_Kfymvhgczk27MhhXz5ZYc7XINbgHY,1331
-lamindb/_finish.py,sha256=iUo6j89_hTP-OuRfUAj_i1YB1B5FU9QTtwxXKdX_J_4,8279
+lamindb/_finish.py,sha256=6GwhqrC-x-JdFd16i7-uyhCWeQgGKxr25aSsSXPZt4g,8598
 lamindb/_from_values.py,sha256=DVXjnQ2wwNw-2bFzy0uXLdVlqoprrn95hTnrXwn-KqM,12638
 lamindb/_is_versioned.py,sha256=0PgRCmxEmYDcAjllLSOYZm132B1lW6QgmBBERhRyFt0,1341
 lamindb/_parents.py,sha256=N9T8jbd3eaoHDLE9TD1y1QgGcO81E6Brapy8LILzRCQ,14790
 lamindb/_query_manager.py,sha256=3zokXqxgj9vTJBnN2sbYKS-q69fyDDPF_aGq_rFHzXU,4066
-lamindb/_query_set.py,sha256=K_0rJ6Keltl3Pvglvd7kkzkJEy2u6Kp0TKiHLzwqH18,11359
+lamindb/_query_set.py,sha256=n0owd74cTzGz6-mIv8SlDz0wcyRz7Xw3Ke1LhE8UlIg,10784
 lamindb/_registry.py,sha256=fmX-BUnan3Y0WrEAx3qNwRYCIJwJgjoKnRnpgcXujEI,19358
-lamindb/_run.py,sha256=b7A52M1On3QzFgIYyfQoz5Kk7V3wcu9p_Prq5bzd8v8,1838
-lamindb/_save.py,sha256=r-pUKi2xBW25brIMzDbf8iI-4xggX-X2C9cIYHzK1uI,11460
+lamindb/_run.py,sha256=We50MUeGH778begutDGoNFM-n5_81_BfMCnZS1bdkt0,1937
+lamindb/_save.py,sha256=_7r3TUV3B6Hp75r5O_ymu3fKWyBHbGa5vmE_pxrtsVI,10923
 lamindb/_storage.py,sha256=VW8xq3VRv58-ciholvOdlcgvp_OIlLxx5GxLt-e2Irs,614
-lamindb/_transform.py,sha256=rxojJ91qQSkeYDHYbwqjFAYxBMgJd3cq_K7Z0n5g8Aw,3482
+lamindb/_transform.py,sha256=E9C7psuOnsNrUQpWRuGgEUM8_pc7YhDn7n4ieHzB4X0,3169
 lamindb/_ulabel.py,sha256=e5dw9h1tR0_u-DMn7Gzx0WhUhV5w7j4v3QbnLWQV7eI,1941
 lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
 lamindb/_view.py,sha256=GV1FrqIMmdooEkA-5zvcTWgV1nqx1sehi6WdWEaFpxM,2171
-lamindb/core/__init__.py,sha256=MB1gEMKUf0GBQrI3dH8WRZOZQmWR4HIojXK_hXXVdqA,1235
-lamindb/core/_data.py,sha256=xULvge-txEO4r4amNQZRZTH3n3BqOLWauyNfxbB6WOA,17674
-lamindb/core/_feature_manager.py,sha256=LlYgU71AoTnrseWFCq-oZkUAYWITtRR7BNFm0AhHe-c,15773
+lamindb/core/__init__.py,sha256=TI9_1Jtpwae_cUPQ3-U0RRPH5c3GBA-gLhHvlAk_Nlo,1213
+lamindb/core/_data.py,sha256=Lico6-Vx15bNpGLl1bqFqEsh62pD4YKOOBnmahse1tI,17673
+lamindb/core/_feature_manager.py,sha256=uTzZZ7-qqEAmdwi48Holy2j5VGTgmoQxhb21r6mLShI,15824
 lamindb/core/_label_manager.py,sha256=0RtegYnK3zIisOnd970EobOrHMpp7OCH-mEoPrPXw2c,9075
 lamindb/core/_mapped_collection.py,sha256=_OwFZh5SePDUD70XIK5kngv3we_Z5-YdGHNfpUSatSQ,19469
-lamindb/core/_run_context.py,sha256=zwsaq1iW3yb8Y6IjpWzqUL3e0i4l1bnmPF6V2USMqpI,16155
-lamindb/core/_settings.py,sha256=lhfn6gRjZw0atrA5Hr34m1nkPFXd8DAUMEesCGat1tA,6130
-lamindb/core/_sync_git.py,sha256=IlTqw55inPp_RZbN_YScaCeKza7LeF9mClQw55W3_d4,3921
+lamindb/core/_run_context.py,sha256=7iCCOB2z154puBI7ZKzcaEZ5l6_9S8aSYBOBJI65lyc,17117
+lamindb/core/_settings.py,sha256=rW1KfEXfT56XErwcnSuQxaCytpOy1kJ-u7tVmkmNmxY,6131
+lamindb/core/_sync_git.py,sha256=06Te35UZj2QBaHNcc59VSC9vJgcFct7Z2sK78NLkZBs,4119
 lamindb/core/_track_environment.py,sha256=xLZ6kgzxWS6MWZ5LQ_wkbJX99vmYOT8iQ-Fz4OHCgWw,754
 lamindb/core/_transform_settings.py,sha256=eV96QKX9jOojjzF-a0oo0wXQsMXN2F6QV7orE06oFC8,161
-lamindb/core/_view_tree.py,sha256=PTwmKZSQL2UhKuSdV5Wp7o1JDjv1qwgsVCj3ThkbKb8,3447
 lamindb/core/exceptions.py,sha256=PHk5lyBdJPrrEQcid3ItfdNzz3fgiQsUmsEDdz063F0,197
 lamindb/core/fields.py,sha256=Jgi_XI-iTe6cT7oD8FV_JqEpjN1Q9rZWwL8VLtj4jkA,164
 lamindb/core/types.py,sha256=xeQF2x40p2pR9eIVQrXT74RrS810z2fbjmTRTSQUqPM,230
-lamindb/core/versioning.py,sha256=DsEHpCueNwhRiIaRH5-O8H_1fJVNtWslCRx30YiIS5o,3080
+lamindb/core/versioning.py,sha256=T9d28erodCUmFlRA7InralbRoffdniPQxBE7qWqs2u8,3601
 lamindb/core/datasets/__init__.py,sha256=zRP98oqUAaXhqWyKMiH0s_ImVIuNeziQQ2kQ_t0f-DI,1353
-lamindb/core/datasets/_core.py,sha256=36vUOYFkX_4hBAnM_BujV5BRARMI5b9iI_SM9qS7wGc,20191
+lamindb/core/datasets/_core.py,sha256=9bcDfVfMZ1h1WAS88ZBjy-R91xbP2KIm_ofHguXAKpY,20177
 lamindb/core/datasets/_fake.py,sha256=BZF9R_1iF0HDnvtZNqL2FtsjSMuqDIfuFxnw_LJYIh4,953
-lamindb/core/storage/__init__.py,sha256=6jnbFj-eBV3xZt04qP-kTsMWoP8YwpM50wlnnxDYsZU,415
+lamindb/core/storage/__init__.py,sha256=5LUFQKRr2BX24d-yWBezhTXBV83sShcOvPj5Y5u6qIg,441
 lamindb/core/storage/_anndata_sizes.py,sha256=aXO3OB--tF5MChenSsigW6Q-RuE8YJJOUTVukkLrv9A,1029
 lamindb/core/storage/_backed_access.py,sha256=eManrLsu3pSSQAyAKy47FDBm-iHgjaNfHA-zLy59uDs,24536
-lamindb/core/storage/_valid_suffixes.py,sha256=sewRRU3I6fJ-Jd5ACNcco_o3hic9zmqTs8BuZui-450,133
+lamindb/core/storage/_valid_suffixes.py,sha256=J08aglC9oo35pzahj0SQXW9IHib8Asp4dc11co-2uys,212
 lamindb/core/storage/_zarr.py,sha256=5ceEz6YIvgvUnVVNWhK5Z4W0WfrvyvY82Yna5jSX1_E,3661
-lamindb/core/storage/objects.py,sha256=5LbBeZVKuOOB8DceSE-PN8elKY0N9OhFXZPQJE4lK48,1538
-lamindb/core/storage/paths.py,sha256=ib50kmRGhjRTHak20i94ruXVqLL9xQnQuqJSHEW50Q8,7866
+lamindb/core/storage/objects.py,sha256=OzvBCS-Urz5mr-O95qYt6RGBDDX5HmjfRRKWPPDn1ZE,1797
+lamindb/core/storage/paths.py,sha256=JTtiTlAMICH4gkw7iZNwTRfNTT0WxrBoKiag_7E9g4I,7882
 lamindb/integrations/__init__.py,sha256=aH2PmO2m4-vwIifMYTB0Fyyr_gZWtVnV71jT0tVWSw0,123
 lamindb/integrations/_vitessce.py,sha256=b0FqTBsP-M6Q7xCYXVwFwM8DOIeeOBZEhYbryhtq4gk,2535
 lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
 lamindb/setup/core/__init__.py,sha256=SevlVrc2AZWL3uALbE5sopxBnIZPWZ1IB0NBDudiAL8,167
-lamindb-0.71.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-lamindb-0.71.0.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
-lamindb-0.71.0.dist-info/METADATA,sha256=UbJOa1wX6oHrzN1WXgN_YiudHPiw8rOzBYDE3ricYCM,2674
-lamindb-0.71.0.dist-info/RECORD,,
+lamindb-0.71.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+lamindb-0.71.2.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
+lamindb-0.71.2.dist-info/METADATA,sha256=l49_xPwqfUDB6jUvUQoAVeQu8Tj3JUNCfTPB9cqOq_Y,2674
+lamindb-0.71.2.dist-info/RECORD,,

lamindb/core/_view_tree.py DELETED Viewed

@@ -1,116 +0,0 @@
-from __future__ import annotations
-from collections import defaultdict
-from pathlib import Path
-from typing import Iterable
-from lamindb_setup import settings as setup_settings
-from lnschema_core.models import Artifact, Storage
-def view_tree(
-    cls,
-    level: int = -1,
-    limit_to_directories: bool = False,
-    length_limit: int = 1000,
-    max_files_per_dir_per_type: int = 7,
-) -> None:
-    """{}."""
-    if cls.__class__.__name__ == "QuerySet":
-        print("queryset")
-        qs = cls
-        storage_ids = qs.list("storage_id")
-    elif cls == Artifact:
-        print("file")
-        qs = cls.filter(storage_id=setup_settings.storage.id).all()
-        storage_ids = Storage.filter().list("id")
-    else:
-        print("else")
-        return
-    storages = Storage.filter().all()
-    storage_roots = {
-        storage_id: storages.get(id=storage_id).root for storage_id in storage_ids
-    }
-    keys = set()
-    for artifact in qs:
-        root = storage_roots.get(artifact.storage_id, "")
-        keys.add(f"{root}/{artifact.key}")
-    _view_tree(
-        keys=keys,
-        level=level,
-        only_dirs=limit_to_directories,
-        limit=length_limit,
-        max_files_per_dir_per_type=max_files_per_dir_per_type,
-    )
-def _view_tree(
-    keys: Iterable[str],
-    *,
-    level: int = -1,
-    only_dirs: bool = False,
-    limit: int = 1000,
-    max_files_per_dir_per_type: int = 7,
-) -> None:
-    # Create a nested dictionary from keys
-    def tree():
-        return defaultdict(tree)
-    root = tree()
-    n_files = 0
-    n_directories = 0
-    suffixes = set()
-    for key in keys:
-        parts = key.split("/")
-        node = root
-        for part in parts:
-            node = node[part]
-            if node == {}:
-                n_files += 1
-                suffix = Path(part).suffix
-                if suffix:
-                    suffixes.add(suffix)
-            else:
-                n_directories += 1
-    # Function to print the tree
-    def print_tree(node, prefix="", depth=0, count=None, n_files_per_dir_per_type=None):
-        if count is None:
-            count = [0]
-        if n_files_per_dir_per_type is None:
-            n_files_per_dir_per_type = defaultdict(int)
-        if level != -1 and depth > level:
-            return
-        for name, child in node.items():
-            if count[0] >= limit:
-                return
-            if only_dirs and child == {}:
-                continue
-            suffix = Path(name).suffix
-            n_files_per_dir_per_type[suffix] += 1
-            if (
-                depth > 0
-                and n_files_per_dir_per_type[suffix] > max_files_per_dir_per_type
-            ):
-                continue
-            new_prefix = prefix + ("├── " if name != list(node.keys())[-1] else "└── ")
-            print(new_prefix + name)
-            count[0] += 1
-            if child:
-                print_tree(
-                    child,
-                    prefix + ("│   " if name != list(node.keys())[-1] else "    "),
-                    depth + 1,
-                    count,
-                    (
-                        defaultdict(int) if depth == 0 else n_files_per_dir_per_type
-                    ),  # Reset the counter for each directory
-                )
-    suffix_message = f" with suffixes {', '.join(suffixes)}" if n_files > 0 else ""
-    print(f"{n_directories} directories, {n_files} files{suffix_message}")
-    print_tree(root)

{lamindb-0.71.0.dist-info → lamindb-0.71.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{lamindb-0.71.0.dist-info → lamindb-0.71.2.dist-info}/WHEEL RENAMED Viewed

File without changes

lamindb 0.71.0__py3-none-any.whl → 0.71.2__py3-none-any.whl

lamindb 0.71.0py3-none-any.whl → 0.71.2py3-none-any.whl