PyPI - lamindb - Versions diffs - 1.10.2__py3-none-any.whl → 1.11a1__py3-none-any.whl - Mend

lamindb 1.10.2py3-none-any.whl → 1.11a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

lamindb/__init__.py +89 -49
lamindb/_finish.py +14 -12
lamindb/_tracked.py +2 -4
lamindb/_view.py +1 -1
lamindb/base/__init__.py +2 -1
lamindb/base/dtypes.py +76 -0
lamindb/core/_settings.py +2 -2
lamindb/core/storage/_anndata_accessor.py +29 -9
lamindb/curators/_legacy.py +16 -3
lamindb/curators/core.py +432 -186
lamindb/examples/cellxgene/__init__.py +8 -3
lamindb/examples/cellxgene/_cellxgene.py +127 -13
lamindb/examples/cellxgene/{cxg_schema_versions.csv → cellxgene_schema_versions.csv} +11 -0
lamindb/examples/croissant/__init__.py +12 -2
lamindb/examples/datasets/__init__.py +2 -2
lamindb/examples/datasets/_core.py +1 -1
lamindb/examples/datasets/_small.py +66 -22
lamindb/examples/datasets/mini_immuno.py +1 -0
lamindb/migrations/0119_squashed.py +5 -2
lamindb/migrations/0120_add_record_fk_constraint.py +64 -0
lamindb/migrations/0121_recorduser.py +53 -0
lamindb/models/__init__.py +3 -1
lamindb/models/_describe.py +2 -2
lamindb/models/_feature_manager.py +53 -53
lamindb/models/_from_values.py +2 -2
lamindb/models/_is_versioned.py +4 -4
lamindb/models/_label_manager.py +4 -4
lamindb/models/artifact.py +305 -116
lamindb/models/artifact_set.py +36 -1
lamindb/models/can_curate.py +1 -2
lamindb/models/collection.py +3 -34
lamindb/models/feature.py +111 -7
lamindb/models/has_parents.py +11 -11
lamindb/models/project.py +18 -0
lamindb/models/query_manager.py +16 -7
lamindb/models/query_set.py +59 -34
lamindb/models/record.py +25 -4
lamindb/models/run.py +8 -6
lamindb/models/schema.py +54 -26
lamindb/models/sqlrecord.py +123 -25
lamindb/models/storage.py +59 -14
lamindb/models/transform.py +17 -17
lamindb/models/ulabel.py +6 -1
{lamindb-1.10.2.dist-info → lamindb-1.11a1.dist-info}/METADATA +4 -5
{lamindb-1.10.2.dist-info → lamindb-1.11a1.dist-info}/RECORD +47 -44
{lamindb-1.10.2.dist-info → lamindb-1.11a1.dist-info}/WHEEL +1 -1
{lamindb-1.10.2.dist-info/licenses → lamindb-1.11a1.dist-info}/LICENSE +0 -0

lamindb/__init__.py CHANGED Viewed

@@ -83,7 +83,7 @@ Curators and integrations.
    curators
    integrations
-Low-level functionality.
+Examples, errors, and setup.
 .. autosummary::
    :toctree: .
@@ -91,6 +91,12 @@ Low-level functionality.
    examples
    errors
    setup
+Low-level functionality.
+.. autosummary::
+   :toctree: .
    base
    core
    models
@@ -108,63 +114,97 @@ Backwards compatibility.
 # ruff: noqa: I001
 # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
-__version__ = "1.10.2"
+__version__ = "1.11a1"
-import warnings
+import warnings as _warnings
 # through SpatialData
-warnings.filterwarnings(
+_warnings.filterwarnings(
     "ignore", message="The legacy Dask DataFrame implementation is deprecated"
 )
-from lamindb_setup._check_setup import InstanceNotSetupError as _InstanceNotSetupError
 from lamindb_setup._check_setup import _check_instance_setup
 from lamindb_setup._connect_instance import connect
 from lamindb_setup.core.upath import UPath
 from . import base, errors, setup
-def __getattr__(name):
-    raise _InstanceNotSetupError()
-if _check_instance_setup(from_module="lamindb"):
-    del __getattr__  # so that imports work out
-    from . import base
-    from ._tracked import tracked
-    from ._view import view
-    from .core._context import context
-    from .core._settings import settings
-    from .curators._legacy import CatManager as Curator
-    from .models import (
-        Artifact,
-        Collection,
-        Feature,
-        FeatureSet,  # backward compat
-        Person,
-        Project,
-        Reference,
-        Run,
-        Schema,
-        Storage,
-        Transform,
-        ULabel,
-        User,
-        Space,
-        Branch,
-        Record,
-    )
-    from .models.save import save
-    from . import core
-    from . import integrations
-    from . import curators
-    from . import examples
-    track = context._track
-    finish = context._finish
-    settings.__doc__ = """Global live settings (:class:`~lamindb.core.Settings`)."""
-    context.__doc__ = """Global run context (:class:`~lamindb.core.Context`)."""
-    from django.db.models import Q
-    Param = Feature  # backward compat
+_check_instance_setup(from_module="lamindb")
+from ._tracked import tracked
+from ._view import view
+from .core._context import context
+from .core._settings import settings
+from .curators._legacy import CatManager as Curator
+from .models import (
+    Artifact,
+    Collection,
+    Feature,
+    FeatureSet,  # backward compat
+    Person,
+    Project,
+    Reference,
+    Run,
+    Schema,
+    Storage,
+    Transform,
+    ULabel,
+    User,
+    Space,
+    Branch,
+    Record,
+)
+from .models.save import save
+from . import core
+from . import integrations
+from . import curators
+from . import examples
+track = context._track
+finish = context._finish
+settings.__doc__ = """Global live settings (:class:`~lamindb.core.Settings`)."""
+context.__doc__ = """Global run context (:class:`~lamindb.core.Context`)."""
+from django.db.models import Q
+Param = Feature  # backward compat
+__all__ = [
+    # data lineage
+    "track",
+    "finish",
+    "tracked",
+    # registries
+    "Artifact",
+    "Storage",
+    "Transform",
+    "Run",
+    "Feature",
+    "ULabel",
+    "Schema",
+    "Record",
+    "User",
+    "Collection",
+    "Project",
+    "Space",
+    "Branch",
+    "Reference",
+    "Person",
+    # other
+    "connect",
+    "view",
+    "save",
+    "UPath",
+    "settings",
+    "context",
+    # curators and integrations
+    "curators",
+    "integrations",
+    # examples, errors, setup
+    "examples",
+    "errors",
+    "setup",
+    # low-level functionality
+    "base",
+    "core",
+    "models",
+]

lamindb/_finish.py CHANGED Viewed

@@ -264,12 +264,14 @@ def save_context_core(
     if (
         is_run_from_ipython and notebook_runner != "nbconvert" and filepath.exists()
     ):  # python notebooks in interactive session
-        import nbproject
-        # it might be that the user modifies the title just before ln.finish()
-        if (nbproject_title := nbproject.meta.live.title) != transform.description:
-            transform.description = nbproject_title
-            transform.save()
+        if is_ipynb:
+            # ignore this for py:percent notebooks
+            import nbproject
+            # it might be that the user modifies the title just before ln.finish()
+            if (nbproject_title := nbproject.meta.live.title) != transform.description:
+                transform.description = nbproject_title
+                transform.save()
         if not ln_setup._TESTING:
             save_source_code_and_report = check_filepath_recently_saved(
                 filepath, is_retry
@@ -349,7 +351,7 @@ def save_context_core(
             if transform_hash != transform.hash:
                 response = input(
                     f"You are about to overwrite existing source code (hash '{transform.hash}') for Transform('{transform.uid}')."
-                    f" Proceed? (y/n)"
+                    f" Proceed? (y/n) "
                 )
                 if response == "y":
                     transform.source_code = source_code_path.read_text()
@@ -365,11 +367,11 @@ def save_context_core(
     if run is not None:
         base_path = ln_setup.settings.cache_dir / "environments" / f"run_{run.uid}"
-        paths = [base_path / "run_env_pip.txt", base_path / "r_pak_lockfile.json"]
+        paths = [base_path / "run_env_pip.txt", base_path / "r_environment.txt"]
         existing_paths = [path for path in paths if path.exists()]
         if len(existing_paths) == 2:
             # let's not store the python environment for an R session for now
-            existing_paths = [base_path / "r_pak_lockfile.json"]
+            existing_paths = [base_path / "r_environment.txt"]
         if existing_paths:
             overwrite_env = True
@@ -387,8 +389,8 @@ def save_context_core(
                 if len(existing_paths) == 1:
                     if existing_paths[0].name == "run_env_pip.txt":
                         description = "requirements.txt"
-                    elif existing_paths[0].name == "r_pak_lockfile.json":
-                        description = "r_pak_lockfile.json"
+                    elif existing_paths[0].name == "r_environment.txt":
+                        description = "r_environment.txt"
                     env_hash, _ = hash_file(artifact_path)
                 else:
                     description = "environments"
@@ -432,7 +434,7 @@ def save_context_core(
                     hash, _ = hash_file(report_path)  # ignore hash_type for now
                     if hash != run.report.hash:
                         response = input(
-                            f"You are about to overwrite an existing report (hash '{run.report.hash}') for Run('{run.uid}'). Proceed? (y/n)"
+                            f"You are about to overwrite an existing report (hash '{run.report.hash}') for Run('{run.uid}'). Proceed? (y/n) "
                         )
                         if response == "y":
                             run.report.replace(report_path)

lamindb/_tracked.py CHANGED Viewed

@@ -52,7 +52,7 @@ def tracked(uid: str | None = None) -> Callable[[Callable[P, R]], Callable[P, R]
             artifact = ln.Artifact.get(key=input_artifact_key)
             df = artifact.load()  # auto-tracked as input
             new_df = df.iloc[:subset_rows, :subset_cols]
-            ln.Artifact.from_df(new_df, key=output_artifact_key).save()  # auto-tracked as output
+            ln.Artifact.from_dataframe(new_df, key=output_artifact_key).save()  # auto-tracked as output
     """
     def decorator_tracked(func: Callable[P, R]) -> Callable[P, R]:
@@ -104,9 +104,7 @@ def tracked(uid: str | None = None) -> Callable[[Callable[P, R]], Callable[P, R]
             # Deal with non-trivial parameter values
             filtered_params = {}
             for key, value in params.items():
-                dtype, _, _ = infer_feature_type_convert_json(
-                    key, value, str_as_ulabel=False
-                )
+                dtype, _, _ = infer_feature_type_convert_json(key, value)
                 if (dtype == "?" or dtype.startswith("cat")) and dtype != "cat ? str":
                     continue
                 filtered_params[key] = value

lamindb/_view.py CHANGED Viewed

@@ -162,7 +162,7 @@ def view(
             logger.print(section)
             logger.print("*" * len(section_no_color))
         for registry in sorted(filtered_registries, key=lambda x: x.__name__):
-            df = registry.df(limit=limit)
+            df = registry.to_dataframe(limit=limit)
             if df.shape[0] > 0:
                 logger.print(colors.blue(colors.bold(registry.__name__)))
                 show(df)

lamindb/base/__init__.py CHANGED Viewed

@@ -10,6 +10,7 @@ Modules:
    uids
    types
    fields
+   dtypes
 Utils:
@@ -23,4 +24,4 @@ Utils:
 from lamindb_setup.core import deprecated, doc_args
-from . import fields, types, uids
+from . import dtypes, fields, types, uids

lamindb/base/dtypes.py ADDED Viewed

@@ -0,0 +1,76 @@
+from datetime import datetime
+from typing import Any, Callable, Iterable
+import pandas as pd
+def is_list_of_type(value: Any, expected_type: Any) -> bool:
+    """Helper function to check if a value is either of expected_type or a list of that type, or a mix of both in a nested structure."""
+    if isinstance(value, Iterable) and not isinstance(value, (str, bytes)):
+        # handle nested lists recursively
+        return all(is_list_of_type(item, expected_type) for item in value)
+    return isinstance(value, expected_type)
+def check_dtype(expected_type: Any) -> Callable:
+    """Creates a check function for Pandera that validates a column's dtype.
+    Supports both standard dtype checking and mixed list/single values for the same type.
+    For example, a column with expected_type 'float' would also accept a mix of float values and lists of floats.
+    Args:
+        expected_type: String identifier for the expected type ('int', 'float', 'num', 'str')
+    Returns:
+        A function that checks if a series has the expected dtype or contains mixed types
+    """
+    def check_function(series):
+        # first check if the series is entirely of the expected dtype (fast path)
+        if expected_type == "int" and pd.api.types.is_integer_dtype(series.dtype):
+            return True
+        elif expected_type == "float" and pd.api.types.is_float_dtype(series.dtype):
+            return True
+        elif expected_type == "num" and pd.api.types.is_numeric_dtype(series.dtype):
+            return True
+        elif expected_type == "str" and pd.api.types.is_string_dtype(series.dtype):
+            return True
+        elif expected_type == "path" and pd.api.types.is_string_dtype(series.dtype):
+            return True
+        # if we're here, it might be a mixed column with object dtype
+        # need to check each value individually
+        if series.dtype == "object" and expected_type.startswith("list"):
+            expected_type_member = expected_type.replace("list[", "").removesuffix("]")
+            if expected_type_member == "int":
+                return series.apply(lambda x: is_list_of_type(x, int)).all()
+            elif expected_type_member == "float":
+                return series.apply(lambda x: is_list_of_type(x, float)).all()
+            elif expected_type_member == "num":
+                # for numeric, accept either int or float
+                return series.apply(lambda x: is_list_of_type(x, (int, float))).all()
+            elif (
+                expected_type_member == "str"
+                or expected_type_member == "path"
+                or expected_type_member.startswith("cat[")
+            ):
+                return series.apply(lambda x: is_list_of_type(x, str)).all()
+        # if we get here, the validation failed
+        return False
+    return check_function
+def is_valid_datetime_str(date_string: str) -> bool | str:
+    try:
+        dt = datetime.fromisoformat(date_string)
+        return dt.isoformat()
+    except ValueError:
+        return False
+def is_iterable_of_sqlrecord(value: Any):
+    from lamindb.models import SQLRecord
+    return isinstance(value, Iterable) and isinstance(next(iter(value)), SQLRecord)

lamindb/core/_settings.py CHANGED Viewed

@@ -206,7 +206,7 @@ class Settings:
         exists = ln.Storage.filter(root=ssettings.root_as_str).one_or_none()
         if exists is None:
             response = input(
-                f"Storage location {ssettings.root_as_str} does not yet exist. Do you want to continue with creating it? (y/n)"
+                f"Storage location {ssettings.root_as_str} does not yet exist. Do you want to continue with creating it? (y/n) "
             )
             # logger.warning(f"deprecated call because storage location does **not yet** exist; going forward, please create through ln.Storage(root={path}).save() going forward")
             if response != "y":
@@ -256,7 +256,7 @@ class Settings:
         exists = ln.Storage.filter(root=ssettings.root_as_str).one_or_none()
         if exists is None:
             response = input(
-                f"Storage location {ssettings.root_as_str} does not yet exist. Do you want to continue with creating it? (y/n)"
+                f"Storage location {ssettings.root_as_str} does not yet exist. Do you want to continue with creating it? (y/n) "
             )
             # logger.warning(f"deprecated call because storage location does **not yet** exist; going forward, please create through ln.Storage(root={path}).save() going forward")
             if response != "y":

lamindb/core/storage/_anndata_accessor.py CHANGED Viewed

@@ -295,7 +295,7 @@ except ImportError:
 if ZARR_INSTALLED:
     from anndata._io.zarr import read_dataframe_legacy as read_dataframe_legacy_zarr
-    from ._zarr import get_zarr_store
+    from ._zarr import IS_ZARR_V3, get_zarr_store
     ArrayTypes.append(zarr.Array)
     GroupTypes.append(zarr.Group)
@@ -306,11 +306,17 @@ if ZARR_INSTALLED:
         assert mode in {"r", "r+", "a", "w", "w-"}, f"Unknown mode {mode}!"  #  noqa: S101
         store = get_zarr_store(filepath)
-        storage = zarr.open(store, mode=mode)
+        kwargs = {}
+        if IS_ZARR_V3 and mode != "r":
+            # otherwise unable to write
+            kwargs["use_consolidated"] = False
+        storage = zarr.open(store, mode=mode, **kwargs)
         # zarr v2 re-initializes the mapper
         # we need to put back the correct one
         # S3FSMap is returned from get_zarr_store only for zarr v2
         if isinstance(store, S3FSMap):
+            assert not IS_ZARR_V3  # noqa: S101
             storage.store.map = store
         conn = None
         return conn, storage
@@ -363,10 +369,10 @@ if ZARR_INSTALLED:
     # this is needed because accessing zarr.Group.keys() directly is very slow
     @registry.register("zarr")
     def keys(storage: zarr.Group):
-        if hasattr(storage, "_sync_iter"):  # zarr v3
+        if IS_ZARR_V3:
             paths = storage._sync_iter(storage.store.list())
         else:
-            paths = storage.store.keys()  # zarr v2
+            paths = storage.store.keys()
         attrs_keys: dict[str, list] = {}
         obs_var_arrays = []
@@ -748,22 +754,36 @@ class AnnDataAccessor(_AnnDataAttrsMixin):
     def close(self):
         """Closes the connection."""
-        if hasattr(self, "storage") and hasattr(self.storage, "close"):
-            self.storage.close()
-        if hasattr(self, "_conn") and hasattr(self._conn, "close"):
-            self._conn.close()
-        self._closed = True
+        storage = self.storage
+        connection = self._conn
         if self._updated and (artifact := self._artifact) is not None:
             from lamindb.models.artifact import Artifact
             from lamindb.models.sqlrecord import init_self_from_db
+            # now self._updated can only be True for zarr
+            assert ZARR_INSTALLED  # noqa: S101
+            store = storage.store
+            keys = storage._sync_iter(store.list()) if IS_ZARR_V3 else store.keys()
+            # this checks that there consolidated metadata was written before
+            # need to update it
+            # zmetadata is in spatialdata sometimes for some reason
+            if ".zmetadata" in keys or "zmetadata" in keys:
+                zarr.consolidate_metadata(store)
             new_version = Artifact(
                 artifact.path, revises=artifact, _is_internal_call=True
             ).save()
             # note: sets _state.db = "default"
             init_self_from_db(artifact, new_version)
+        if hasattr(storage, "close"):
+            storage.close()
+        if hasattr(connection, "close"):
+            connection.close()
+        self._closed = True
     @property
     def closed(self):
         return self._closed

lamindb/curators/_legacy.py CHANGED Viewed

@@ -133,7 +133,7 @@ class CatManager:
         if self._artifact is None:
             if isinstance(self._dataset, pd.DataFrame):
-                artifact = Artifact.from_df(
+                artifact = Artifact.from_dataframe(
                     self._dataset,
                     key=key,
                     description=description,
@@ -1275,7 +1275,7 @@ class TiledbsomaCatManager(CatManager):
                 empty_dict, schema=self._obs_pa_schema
             ).to_pandas()
             # in parallel to https://github.com/laminlabs/lamindb/blob/2a1709990b5736b480c6de49c0ada47fafc8b18d/lamindb/core/_feature_manager.py#L549-L554
-            feature_sets["obs"] = Schema.from_df(
+            feature_sets["obs"] = Schema.from_dataframe(
                 df=mock_df,
                 field=self._columns_field,
                 mute=True,
@@ -1367,7 +1367,7 @@ def legacy_annotate_artifact(
 @classmethod  # type: ignore
-def from_df(
+def from_dataframe(
     cls,
     df: pd.DataFrame,
     categoricals: dict[str, FieldAttr] | None = None,
@@ -1383,6 +1383,18 @@ def from_df(
     )
+@classmethod  # type: ignore
+@deprecated("from_dataframe")
+def from_df(
+    cls,
+    df: pd.DataFrame,
+    categoricals: dict[str, FieldAttr] | None = None,
+    columns: FieldAttr = Feature.name,
+    organism: str | None = None,
+) -> DataFrameCatManager:
+    return cls.from_dataframe(df, categoricals, columns, organism)
 @classmethod  # type: ignore
 def from_anndata(
     cls,
@@ -1468,6 +1480,7 @@ def from_spatialdata(
     )
+CatManager.from_dataframe = from_dataframe  # type: ignore
 CatManager.from_df = from_df  # type: ignore
 CatManager.from_anndata = from_anndata  # type: ignore
 CatManager.from_mudata = from_mudata  # type: ignore

lamindb 1.10.2__py3-none-any.whl → 1.11a1__py3-none-any.whl

lamindb 1.10.2py3-none-any.whl → 1.11a1py3-none-any.whl