PyPI - lamindb - Versions diffs - 1.10.1__py3-none-any.whl → 1.11a1__py3-none-any.whl - Mend

lamindb 1.10.1py3-none-any.whl → 1.11a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

lamindb/__init__.py +89 -49
lamindb/_finish.py +14 -12
lamindb/_tracked.py +2 -4
lamindb/_view.py +1 -1
lamindb/base/__init__.py +2 -1
lamindb/base/dtypes.py +76 -0
lamindb/core/_settings.py +45 -2
lamindb/core/storage/_anndata_accessor.py +118 -26
lamindb/core/storage/_backed_access.py +10 -7
lamindb/core/storage/_spatialdata_accessor.py +15 -4
lamindb/core/storage/_zarr.py +3 -0
lamindb/curators/_legacy.py +16 -3
lamindb/curators/core.py +439 -191
lamindb/examples/cellxgene/__init__.py +8 -3
lamindb/examples/cellxgene/_cellxgene.py +127 -13
lamindb/examples/cellxgene/{cxg_schema_versions.csv → cellxgene_schema_versions.csv} +11 -0
lamindb/examples/croissant/__init__.py +12 -2
lamindb/examples/datasets/__init__.py +2 -2
lamindb/examples/datasets/_core.py +1 -1
lamindb/examples/datasets/_small.py +66 -22
lamindb/examples/datasets/mini_immuno.py +1 -0
lamindb/migrations/0118_alter_recordproject_value_projectrecord.py +99 -0
lamindb/migrations/0119_rename_records_project_linked_in_records.py +26 -0
lamindb/migrations/{0117_squashed.py → 0119_squashed.py} +92 -5
lamindb/migrations/0120_add_record_fk_constraint.py +64 -0
lamindb/migrations/0121_recorduser.py +53 -0
lamindb/models/__init__.py +3 -1
lamindb/models/_describe.py +2 -2
lamindb/models/_feature_manager.py +53 -53
lamindb/models/_from_values.py +2 -2
lamindb/models/_is_versioned.py +4 -4
lamindb/models/_label_manager.py +4 -4
lamindb/models/artifact.py +336 -136
lamindb/models/artifact_set.py +36 -1
lamindb/models/can_curate.py +1 -2
lamindb/models/collection.py +3 -34
lamindb/models/feature.py +111 -7
lamindb/models/has_parents.py +11 -11
lamindb/models/project.py +42 -2
lamindb/models/query_manager.py +16 -7
lamindb/models/query_set.py +59 -34
lamindb/models/record.py +25 -4
lamindb/models/run.py +8 -6
lamindb/models/schema.py +54 -26
lamindb/models/sqlrecord.py +123 -25
lamindb/models/storage.py +59 -14
lamindb/models/transform.py +17 -17
lamindb/models/ulabel.py +6 -1
{lamindb-1.10.1.dist-info → lamindb-1.11a1.dist-info}/METADATA +3 -3
{lamindb-1.10.1.dist-info → lamindb-1.11a1.dist-info}/RECORD +52 -47
{lamindb-1.10.1.dist-info → lamindb-1.11a1.dist-info}/LICENSE +0 -0
{lamindb-1.10.1.dist-info → lamindb-1.11a1.dist-info}/WHEEL +0 -0

lamindb/__init__.py CHANGED Viewed

@@ -83,7 +83,7 @@ Curators and integrations.
    curators
    integrations
-Low-level functionality.
+Examples, errors, and setup.
 .. autosummary::
    :toctree: .
@@ -91,6 +91,12 @@ Low-level functionality.
    examples
    errors
    setup
+Low-level functionality.
+.. autosummary::
+   :toctree: .
    base
    core
    models
@@ -108,63 +114,97 @@ Backwards compatibility.
 # ruff: noqa: I001
 # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
-__version__ = "1.10.1"
+__version__ = "1.11a1"
-import warnings
+import warnings as _warnings
 # through SpatialData
-warnings.filterwarnings(
+_warnings.filterwarnings(
     "ignore", message="The legacy Dask DataFrame implementation is deprecated"
 )
-from lamindb_setup._check_setup import InstanceNotSetupError as _InstanceNotSetupError
 from lamindb_setup._check_setup import _check_instance_setup
 from lamindb_setup._connect_instance import connect
 from lamindb_setup.core.upath import UPath
 from . import base, errors, setup
-def __getattr__(name):
-    raise _InstanceNotSetupError()
-if _check_instance_setup(from_module="lamindb"):
-    del __getattr__  # so that imports work out
-    from . import base
-    from ._tracked import tracked
-    from ._view import view
-    from .core._context import context
-    from .core._settings import settings
-    from .curators._legacy import CatManager as Curator
-    from .models import (
-        Artifact,
-        Collection,
-        Feature,
-        FeatureSet,  # backward compat
-        Person,
-        Project,
-        Reference,
-        Run,
-        Schema,
-        Storage,
-        Transform,
-        ULabel,
-        User,
-        Space,
-        Branch,
-        Record,
-    )
-    from .models.save import save
-    from . import core
-    from . import integrations
-    from . import curators
-    from . import examples
-    track = context._track
-    finish = context._finish
-    settings.__doc__ = """Global live settings (:class:`~lamindb.core.Settings`)."""
-    context.__doc__ = """Global run context (:class:`~lamindb.core.Context`)."""
-    from django.db.models import Q
-    Param = Feature  # backward compat
+_check_instance_setup(from_module="lamindb")
+from ._tracked import tracked
+from ._view import view
+from .core._context import context
+from .core._settings import settings
+from .curators._legacy import CatManager as Curator
+from .models import (
+    Artifact,
+    Collection,
+    Feature,
+    FeatureSet,  # backward compat
+    Person,
+    Project,
+    Reference,
+    Run,
+    Schema,
+    Storage,
+    Transform,
+    ULabel,
+    User,
+    Space,
+    Branch,
+    Record,
+)
+from .models.save import save
+from . import core
+from . import integrations
+from . import curators
+from . import examples
+track = context._track
+finish = context._finish
+settings.__doc__ = """Global live settings (:class:`~lamindb.core.Settings`)."""
+context.__doc__ = """Global run context (:class:`~lamindb.core.Context`)."""
+from django.db.models import Q
+Param = Feature  # backward compat
+__all__ = [
+    # data lineage
+    "track",
+    "finish",
+    "tracked",
+    # registries
+    "Artifact",
+    "Storage",
+    "Transform",
+    "Run",
+    "Feature",
+    "ULabel",
+    "Schema",
+    "Record",
+    "User",
+    "Collection",
+    "Project",
+    "Space",
+    "Branch",
+    "Reference",
+    "Person",
+    # other
+    "connect",
+    "view",
+    "save",
+    "UPath",
+    "settings",
+    "context",
+    # curators and integrations
+    "curators",
+    "integrations",
+    # examples, errors, setup
+    "examples",
+    "errors",
+    "setup",
+    # low-level functionality
+    "base",
+    "core",
+    "models",
+]

lamindb/_finish.py CHANGED Viewed

@@ -264,12 +264,14 @@ def save_context_core(
     if (
         is_run_from_ipython and notebook_runner != "nbconvert" and filepath.exists()
     ):  # python notebooks in interactive session
-        import nbproject
-        # it might be that the user modifies the title just before ln.finish()
-        if (nbproject_title := nbproject.meta.live.title) != transform.description:
-            transform.description = nbproject_title
-            transform.save()
+        if is_ipynb:
+            # ignore this for py:percent notebooks
+            import nbproject
+            # it might be that the user modifies the title just before ln.finish()
+            if (nbproject_title := nbproject.meta.live.title) != transform.description:
+                transform.description = nbproject_title
+                transform.save()
         if not ln_setup._TESTING:
             save_source_code_and_report = check_filepath_recently_saved(
                 filepath, is_retry
@@ -349,7 +351,7 @@ def save_context_core(
             if transform_hash != transform.hash:
                 response = input(
                     f"You are about to overwrite existing source code (hash '{transform.hash}') for Transform('{transform.uid}')."
-                    f" Proceed? (y/n)"
+                    f" Proceed? (y/n) "
                 )
                 if response == "y":
                     transform.source_code = source_code_path.read_text()
@@ -365,11 +367,11 @@ def save_context_core(
     if run is not None:
         base_path = ln_setup.settings.cache_dir / "environments" / f"run_{run.uid}"
-        paths = [base_path / "run_env_pip.txt", base_path / "r_pak_lockfile.json"]
+        paths = [base_path / "run_env_pip.txt", base_path / "r_environment.txt"]
         existing_paths = [path for path in paths if path.exists()]
         if len(existing_paths) == 2:
             # let's not store the python environment for an R session for now
-            existing_paths = [base_path / "r_pak_lockfile.json"]
+            existing_paths = [base_path / "r_environment.txt"]
         if existing_paths:
             overwrite_env = True
@@ -387,8 +389,8 @@ def save_context_core(
                 if len(existing_paths) == 1:
                     if existing_paths[0].name == "run_env_pip.txt":
                         description = "requirements.txt"
-                    elif existing_paths[0].name == "r_pak_lockfile.json":
-                        description = "r_pak_lockfile.json"
+                    elif existing_paths[0].name == "r_environment.txt":
+                        description = "r_environment.txt"
                     env_hash, _ = hash_file(artifact_path)
                 else:
                     description = "environments"
@@ -432,7 +434,7 @@ def save_context_core(
                     hash, _ = hash_file(report_path)  # ignore hash_type for now
                     if hash != run.report.hash:
                         response = input(
-                            f"You are about to overwrite an existing report (hash '{run.report.hash}') for Run('{run.uid}'). Proceed? (y/n)"
+                            f"You are about to overwrite an existing report (hash '{run.report.hash}') for Run('{run.uid}'). Proceed? (y/n) "
                         )
                         if response == "y":
                             run.report.replace(report_path)

lamindb/_tracked.py CHANGED Viewed

@@ -52,7 +52,7 @@ def tracked(uid: str | None = None) -> Callable[[Callable[P, R]], Callable[P, R]
             artifact = ln.Artifact.get(key=input_artifact_key)
             df = artifact.load()  # auto-tracked as input
             new_df = df.iloc[:subset_rows, :subset_cols]
-            ln.Artifact.from_df(new_df, key=output_artifact_key).save()  # auto-tracked as output
+            ln.Artifact.from_dataframe(new_df, key=output_artifact_key).save()  # auto-tracked as output
     """
     def decorator_tracked(func: Callable[P, R]) -> Callable[P, R]:
@@ -104,9 +104,7 @@ def tracked(uid: str | None = None) -> Callable[[Callable[P, R]], Callable[P, R]
             # Deal with non-trivial parameter values
             filtered_params = {}
             for key, value in params.items():
-                dtype, _, _ = infer_feature_type_convert_json(
-                    key, value, str_as_ulabel=False
-                )
+                dtype, _, _ = infer_feature_type_convert_json(key, value)
                 if (dtype == "?" or dtype.startswith("cat")) and dtype != "cat ? str":
                     continue
                 filtered_params[key] = value

lamindb/_view.py CHANGED Viewed

@@ -162,7 +162,7 @@ def view(
             logger.print(section)
             logger.print("*" * len(section_no_color))
         for registry in sorted(filtered_registries, key=lambda x: x.__name__):
-            df = registry.df(limit=limit)
+            df = registry.to_dataframe(limit=limit)
             if df.shape[0] > 0:
                 logger.print(colors.blue(colors.bold(registry.__name__)))
                 show(df)

lamindb/base/__init__.py CHANGED Viewed

@@ -10,6 +10,7 @@ Modules:
    uids
    types
    fields
+   dtypes
 Utils:
@@ -23,4 +24,4 @@ Utils:
 from lamindb_setup.core import deprecated, doc_args
-from . import fields, types, uids
+from . import dtypes, fields, types, uids

lamindb/base/dtypes.py ADDED Viewed

@@ -0,0 +1,76 @@
+from datetime import datetime
+from typing import Any, Callable, Iterable
+import pandas as pd
+def is_list_of_type(value: Any, expected_type: Any) -> bool:
+    """Helper function to check if a value is either of expected_type or a list of that type, or a mix of both in a nested structure."""
+    if isinstance(value, Iterable) and not isinstance(value, (str, bytes)):
+        # handle nested lists recursively
+        return all(is_list_of_type(item, expected_type) for item in value)
+    return isinstance(value, expected_type)
+def check_dtype(expected_type: Any) -> Callable:
+    """Creates a check function for Pandera that validates a column's dtype.
+    Supports both standard dtype checking and mixed list/single values for the same type.
+    For example, a column with expected_type 'float' would also accept a mix of float values and lists of floats.
+    Args:
+        expected_type: String identifier for the expected type ('int', 'float', 'num', 'str')
+    Returns:
+        A function that checks if a series has the expected dtype or contains mixed types
+    """
+    def check_function(series):
+        # first check if the series is entirely of the expected dtype (fast path)
+        if expected_type == "int" and pd.api.types.is_integer_dtype(series.dtype):
+            return True
+        elif expected_type == "float" and pd.api.types.is_float_dtype(series.dtype):
+            return True
+        elif expected_type == "num" and pd.api.types.is_numeric_dtype(series.dtype):
+            return True
+        elif expected_type == "str" and pd.api.types.is_string_dtype(series.dtype):
+            return True
+        elif expected_type == "path" and pd.api.types.is_string_dtype(series.dtype):
+            return True
+        # if we're here, it might be a mixed column with object dtype
+        # need to check each value individually
+        if series.dtype == "object" and expected_type.startswith("list"):
+            expected_type_member = expected_type.replace("list[", "").removesuffix("]")
+            if expected_type_member == "int":
+                return series.apply(lambda x: is_list_of_type(x, int)).all()
+            elif expected_type_member == "float":
+                return series.apply(lambda x: is_list_of_type(x, float)).all()
+            elif expected_type_member == "num":
+                # for numeric, accept either int or float
+                return series.apply(lambda x: is_list_of_type(x, (int, float))).all()
+            elif (
+                expected_type_member == "str"
+                or expected_type_member == "path"
+                or expected_type_member.startswith("cat[")
+            ):
+                return series.apply(lambda x: is_list_of_type(x, str)).all()
+        # if we get here, the validation failed
+        return False
+    return check_function
+def is_valid_datetime_str(date_string: str) -> bool | str:
+    try:
+        dt = datetime.fromisoformat(date_string)
+        return dt.isoformat()
+    except ValueError:
+        return False
+def is_iterable_of_sqlrecord(value: Any):
+    from lamindb.models import SQLRecord
+    return isinstance(value, Iterable) and isinstance(next(iter(value)), SQLRecord)

lamindb/core/_settings.py CHANGED Viewed

@@ -10,6 +10,7 @@ from lamindb_setup import settings as setup_settings
 from lamindb_setup._set_managed_storage import set_managed_storage
 from lamindb_setup.core import deprecated
 from lamindb_setup.core._settings_instance import sanitize_git_repo_url
+from lamindb_setup.core._settings_storage import StorageSettings
 from .subsettings._annotation_settings import AnnotationSettings, annotation_settings
 from .subsettings._creation_settings import CreationSettings, creation_settings
@@ -18,7 +19,6 @@ if TYPE_CHECKING:
     from collections.abc import Mapping
     from pathlib import Path
-    from lamindb_setup.core._settings_storage import StorageSettings
     from upath import UPath
@@ -193,13 +193,39 @@ class Settings:
     @storage.setter
     def storage(self, path_kwargs: str | Path | UPath | tuple[str | UPath, Mapping]):
+        import lamindb as ln
         if isinstance(path_kwargs, tuple):
             path, kwargs = path_kwargs
+            # we should ultimately deprecate passing host here, I think
             if isinstance(kwargs, str):
                 kwargs = {"host": kwargs}
         else:
             path, kwargs = path_kwargs, {}
-        set_managed_storage(path, **kwargs)
+        ssettings = StorageSettings(root=path)  # there is no need to pass kwargs here!
+        exists = ln.Storage.filter(root=ssettings.root_as_str).one_or_none()
+        if exists is None:
+            response = input(
+                f"Storage location {ssettings.root_as_str} does not yet exist. Do you want to continue with creating it? (y/n) "
+            )
+            # logger.warning(f"deprecated call because storage location does **not yet** exist; going forward, please create through ln.Storage(root={path}).save() going forward")
+            if response != "y":
+                return None
+            set_managed_storage(path, **kwargs)
+        else:
+            if exists.instance_uid != ln_setup.settings.instance.uid:
+                raise ValueError(
+                    f"Storage {ssettings.root_as_str} exists in another instance ({exists.instance_uid}), cannot write to it from here."
+                )
+            ssettings = StorageSettings(
+                root=exists.root,
+                region=exists.region,
+                uid=exists.uid,
+                instance_id=ln_setup.settings.instance._id,
+            )
+            ln_setup.settings.instance._storage = ssettings
+            kwargs.pop("host", None)  # host is not needed for existing storage
+            settings.storage._set_fs_kwargs(**kwargs)
     @property
     def instance_uid(self) -> str:
@@ -223,6 +249,23 @@ class Settings:
     @local_storage.setter
     def local_storage(self, local_root: Path):
+        import lamindb as ln
+        # note duplication with storage setter!
+        ssettings = StorageSettings(root=local_root)
+        exists = ln.Storage.filter(root=ssettings.root_as_str).one_or_none()
+        if exists is None:
+            response = input(
+                f"Storage location {ssettings.root_as_str} does not yet exist. Do you want to continue with creating it? (y/n) "
+            )
+            # logger.warning(f"deprecated call because storage location does **not yet** exist; going forward, please create through ln.Storage(root={path}).save() going forward")
+            if response != "y":
+                return None
+        else:
+            if exists.instance_uid != ln_setup.settings.instance.uid:
+                raise ValueError(
+                    f"Storage {ssettings.root_as_str} exists in another instance ({exists.instance_uid}), cannot write to it from here."
+                )
         ln_setup.settings.instance.local_storage = local_root
     @property

lamindb 1.10.1__py3-none-any.whl → 1.11a1__py3-none-any.whl

lamindb 1.10.1py3-none-any.whl → 1.11a1py3-none-any.whl