PyPI - lamindb - Versions diffs - 1.0.5__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

lamindb 1.0.5py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

lamindb/__init__.py +14 -5
lamindb/_artifact.py +150 -53
lamindb/_can_curate.py +27 -8
lamindb/_collection.py +85 -51
lamindb/_feature.py +177 -41
lamindb/_finish.py +12 -6
lamindb/_from_values.py +83 -98
lamindb/_parents.py +4 -4
lamindb/_query_set.py +59 -17
lamindb/_record.py +171 -53
lamindb/_run.py +4 -4
lamindb/_save.py +33 -10
lamindb/_schema.py +135 -38
lamindb/_storage.py +1 -1
lamindb/_tracked.py +106 -0
lamindb/_transform.py +21 -8
lamindb/_ulabel.py +5 -14
lamindb/base/validation.py +2 -6
lamindb/core/__init__.py +13 -14
lamindb/core/_context.py +7 -7
lamindb/core/_data.py +29 -25
lamindb/core/_describe.py +1 -1
lamindb/core/_django.py +1 -1
lamindb/core/_feature_manager.py +53 -43
lamindb/core/_label_manager.py +4 -4
lamindb/core/_mapped_collection.py +20 -7
lamindb/core/datasets/__init__.py +6 -1
lamindb/core/datasets/_core.py +12 -11
lamindb/core/datasets/_small.py +66 -20
lamindb/core/exceptions.py +1 -90
lamindb/core/loaders.py +6 -12
lamindb/core/relations.py +6 -4
lamindb/core/storage/_anndata_accessor.py +41 -0
lamindb/core/storage/_backed_access.py +2 -2
lamindb/core/storage/_pyarrow_dataset.py +25 -15
lamindb/core/storage/_tiledbsoma.py +56 -12
lamindb/core/storage/paths.py +27 -21
lamindb/core/subsettings/_creation_settings.py +4 -16
lamindb/curators/__init__.py +2168 -833
lamindb/curators/_cellxgene_schemas/__init__.py +26 -0
lamindb/curators/_cellxgene_schemas/schema_versions.yml +104 -0
lamindb/errors.py +96 -0
lamindb/integrations/_vitessce.py +3 -3
lamindb/migrations/0069_squashed.py +76 -75
lamindb/migrations/0075_lamindbv1_part5.py +4 -5
lamindb/migrations/0082_alter_feature_dtype.py +21 -0
lamindb/migrations/0083_alter_feature_is_type_alter_flextable_is_type_and_more.py +94 -0
lamindb/migrations/0084_alter_schemafeature_feature_and_more.py +35 -0
lamindb/migrations/0085_alter_feature_is_type_alter_flextable_is_type_and_more.py +63 -0
lamindb/migrations/0086_various.py +95 -0
lamindb/migrations/0087_rename__schemas_m2m_artifact_feature_sets_and_more.py +41 -0
lamindb/migrations/0088_schema_components.py +273 -0
lamindb/migrations/0088_squashed.py +4372 -0
lamindb/models.py +420 -153
{lamindb-1.0.5.dist-info → lamindb-1.1.0.dist-info}/METADATA +9 -7
lamindb-1.1.0.dist-info/RECORD +95 -0
lamindb/curators/_spatial.py +0 -528
lamindb/migrations/0052_squashed.py +0 -1261
lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +0 -57
lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +0 -35
lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +0 -61
lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +0 -22
lamindb/migrations/0057_link_models_latest_report_and_others.py +0 -356
lamindb/migrations/0058_artifact__actions_collection__actions.py +0 -22
lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -31
lamindb/migrations/0060_alter_artifact__actions.py +0 -22
lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +0 -45
lamindb/migrations/0062_add_is_latest_field.py +0 -32
lamindb/migrations/0063_populate_latest_field.py +0 -45
lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +0 -33
lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +0 -22
lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +0 -352
lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +0 -20
lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +0 -20
lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -1294
lamindb-1.0.5.dist-info/RECORD +0 -102
{lamindb-1.0.5.dist-info → lamindb-1.1.0.dist-info}/LICENSE +0 -0
{lamindb-1.0.5.dist-info → lamindb-1.1.0.dist-info}/WHEEL +0 -0

lamindb/core/datasets/_small.py CHANGED Viewed

@@ -8,20 +8,25 @@ import pandas as pd
 def small_dataset1(
-    format: Literal["df", "anndata"],
+    otype: Literal["DataFrame", "AnnData"],
+    gene_symbols_in_index: bool = False,
     with_typo: bool = False,
-) -> tuple[pd.DataFrame, dict[str, Any]] | ad.AnnData:
+) -> pd.DataFrame | ad.AnnData:
     # define the data in the dataset
     # it's a mix of numerical measurements and observation-level metadata
     ifng = "IFNJ" if with_typo else "IFNG"
+    if gene_symbols_in_index:
+        var_ids = ["CD8A", "CD4", "CD14"]
+    else:
+        var_ids = ["ENSG00000153563", "ENSG00000010610", "ENSG00000170458"]
     dataset_dict = {
-        "CD8A": [1, 2, 3],
-        "CD4": [3, 4, 5],
-        "CD14": [5, 6, 7],
-        "cell_medium": ["DMSO", ifng, "DMSO"],
+        var_ids[0]: [1, 2, 3],
+        var_ids[1]: [3, 4, 5],
+        var_ids[2]: [5, 6, 7],
+        "cell_medium": pd.Categorical(["DMSO", ifng, "DMSO"]),
         "sample_note": ["was ok", "looks naah", "pretty! 🤩"],
-        "cell_type_by_expert": ["B cell", "T cell", "T cell"],
-        "cell_type_by_model": ["B cell", "T cell", "T cell"],
+        "cell_type_by_expert": pd.Categorical(["B cell", "T cell", "T cell"]),
+        "cell_type_by_model": pd.Categorical(["B cell", "T cell", "T cell"]),
     }
     # define the dataset-level metadata
     metadata = {
@@ -32,8 +37,10 @@ def small_dataset1(
     }
     # the dataset as DataFrame
     dataset_df = pd.DataFrame(dataset_dict, index=["sample1", "sample2", "sample3"])
-    if format == "df":
-        return dataset_df, metadata
+    if otype == "DataFrame":
+        for key, value in metadata.items():
+            dataset_df.attrs[key] = value
+        return dataset_df
     else:
         dataset_ad = ad.AnnData(
             dataset_df.iloc[:, :3], obs=dataset_df.iloc[:, 3:], uns=metadata
@@ -42,14 +49,19 @@ def small_dataset1(
 def small_dataset2(
-    format: Literal["df", "anndata"],
-) -> tuple[pd.DataFrame, dict[str, Any]] | ad.AnnData:
+    otype: Literal["DataFrame", "AnnData"],
+    gene_symbols_in_index: bool = False,
+) -> pd.DataFrame | ad.AnnData:
+    if gene_symbols_in_index:
+        var_ids = ["CD8A", "CD4", "CD38"]
+    else:
+        var_ids = ["ENSG00000153563", "ENSG00000010610", "ENSG00000004468"]
     dataset_dict = {
-        "CD8A": [2, 3, 3],
-        "CD4": [3, 4, 5],
-        "CD38": [4, 2, 3],
-        "cell_medium": ["DMSO", "IFNG", "IFNG"],
-        "cell_type_by_model": ["B cell", "T cell", "T cell"],
+        var_ids[0]: [2, 3, 3],
+        var_ids[1]: [3, 4, 5],
+        var_ids[2]: [4, 2, 3],
+        "cell_medium": pd.Categorical(["DMSO", "IFNG", "IFNG"]),
+        "cell_type_by_model": pd.Categorical(["B cell", "T cell", "T cell"]),
     }
     metadata = {
         "temperature": 22.6,
@@ -61,11 +73,13 @@ def small_dataset2(
         index=["sample4", "sample5", "sample6"],
     )
     ad.AnnData(
-        dataset_df[["CD8A", "CD4", "CD38"]],
+        dataset_df[var_ids],
         obs=dataset_df[["cell_medium", "cell_type_by_model"]],
     )
-    if format == "df":
-        return dataset_df, metadata
+    if otype == "DataFrame":
+        for key, value in metadata.items():
+            dataset_df.attrs[key] = value
+        return dataset_df
     else:
         dataset_ad = ad.AnnData(
             dataset_df.iloc[:, :3], obs=dataset_df.iloc[:, 3:], uns=metadata
@@ -73,6 +87,38 @@ def small_dataset2(
         return dataset_ad
+def small_dataset3_cellxgene(
+    otype: Literal["DataFrame", "AnnData"] = "AnnData",
+) -> tuple[pd.DataFrame, dict[str, Any]] | ad.AnnData:
+    # TODO: consider other ids for other organisms
+    # "ENSMUSG00002076988"
+    var_ids = ["invalid_ensembl_id", "ENSG00000000419", "ENSG00000139618"]
+    dataset_dict = {
+        var_ids[0]: [2, 3, 3],
+        var_ids[1]: [3, 4, 5],
+        var_ids[2]: [4, 2, 3],
+        "disease_ontology_term_id": ["MONDO:0004975", "MONDO:0004980", "MONDO:0004980"],
+        "organism": ["human", "human", "human"],
+        "sex": ["female", "male", "unknown"],
+        "tissue": ["lungg", "lungg", "heart"],
+        "donor": ["-1", "1", "2"],
+    }
+    dataset_df = pd.DataFrame(
+        dataset_dict,
+        index=["barcode1", "barcode2", "barcode3"],
+    )
+    dataset_df["tissue"] = dataset_df["tissue"].astype("category")
+    ad.AnnData(
+        dataset_df[var_ids],
+        obs=dataset_df[[key for key in dataset_dict if key not in var_ids]],
+    )
+    if otype == "DataFrame":
+        return dataset_df
+    else:
+        dataset_ad = ad.AnnData(dataset_df.iloc[:, :3], obs=dataset_df.iloc[:, 3:])
+        return dataset_ad
 def anndata_with_obs() -> ad.AnnData:
     """Create a mini anndata with cell_type, disease and tissue."""
     import anndata as ad

lamindb/core/exceptions.py CHANGED Viewed

@@ -1,90 +1 @@
-"""Exceptions.
-.. autosummary::
-   :toctree: .
-   InvalidArgument
-   DoesNotExist
-   ValidationError
-   NotebookNotSaved
-   MissingContextUID
-   UpdateContext
-   IntegrityError
-   RecordNameChangeIntegrityError
-"""
-# inheriting from SystemExit has the sole purpose of suppressing
-# the traceback - this isn't optimal but the current best solution
-# https://laminlabs.slack.com/archives/C04A0RMA0SC/p1726856875597489
-class InvalidArgument(SystemExit):
-    """Invalid method or function argument."""
-    pass
-class TrackNotCalled(SystemExit):
-    """`ln.track()` wasn't called."""
-    pass
-class NotebookNotSaved(SystemExit):
-    """Notebook wasn't saved."""
-    pass
-class ValidationError(SystemExit):
-    """Validation error: not mapped in registry."""
-    pass
-# inspired by Django's DoesNotExist
-# equivalent to SQLAlchemy's NoResultFound
-class DoesNotExist(SystemExit):
-    """No record found."""
-    pass
-class InconsistentKey(Exception):
-    """Inconsistent transform or artifact `key`."""
-    pass
-class RecordNameChangeIntegrityError(SystemExit):
-    """Custom exception for name change errors."""
-    pass
-# -------------------------------------------------------------------------------------
-# run context
-# -------------------------------------------------------------------------------------
-class IntegrityError(Exception):
-    """Integrity error.
-    For instance, it's not allowed to delete artifacts outside managed storage
-    locations.
-    """
-    pass
-class MissingContextUID(SystemExit):
-    """User didn't define transform settings."""
-    pass
-class UpdateContext(SystemExit):
-    """Transform settings require update."""
-    pass
+from ..errors import *  # noqa: F403 backward compat

lamindb/core/loaders.py CHANGED Viewed

@@ -109,19 +109,13 @@ def load_json(path: UPathStr) -> dict:
     return data
-def load_yaml(path: UPathStr) -> dict | UPathStr:
+def load_yaml(path: UPathStr) -> dict:
     """Load `.yaml` to `dict`."""
-    try:
-        import yaml  # type: ignore
-        with open(path) as f:
-            data = yaml.safe_load(f)
-        return data
-    except ImportError:
-        logger.warning(
-            "Please install PyYAML (`pip install PyYAML`) to load `.yaml` files."
-        )
-        return path
+    import yaml  # type: ignore
+    with open(path) as f:
+        data = yaml.safe_load(f)
+    return data
 def load_image(path: UPathStr) -> None | UPathStr:

lamindb/core/relations.py CHANGED Viewed

@@ -8,7 +8,7 @@ from lamindb_setup._connect_instance import (
 )
 from lamindb_setup.core._settings_store import instance_settings_file
-from lamindb.models import LinkORM, Record, Schema
+from lamindb.models import LinkORM, Record, Registry, Schema
 def get_schema_modules(instance: str | None) -> set[str]:
@@ -35,9 +35,11 @@ def get_schema_modules(instance: str | None) -> set[str]:
     return shared_schema_modules
+# this function here should likely be renamed
+# it maps the __get_name_with_module__() onto the actual model
 def dict_module_name_to_model_name(
-    registry: type[Record], instance: str | None = None
-) -> dict[str, Record]:
+    registry: Registry, instance: str | None = None
+) -> dict[str, Registry]:
     schema_modules = get_schema_modules(instance)
     d: dict = {
         i.related_model.__get_name_with_module__(): i.related_model
@@ -92,7 +94,7 @@ def get_related_name(features_type: type[Record]) -> str:
             f"Can't create feature sets from {features_type.__name__} because it's not"
             " related to it!\nYou need to create a link model between Schema and"
             " your Record in your custom module.\nTo do so, add a"
-            " line:\n_schemas_m2m = models.ManyToMany(Schema,"
+            " line:\n_feature_sets = models.ManyToMany(Schema,"
             " related_name='mythings')\n"
         )
     return candidates[0]

lamindb/core/storage/_anndata_accessor.py CHANGED Viewed

@@ -19,6 +19,7 @@ from fsspec.implementations.local import LocalFileSystem
 from lamin_utils import logger
 from lamindb_setup.core.upath import create_mapper, infer_filesystem
 from packaging import version
+from upath import UPath
 if TYPE_CHECKING:
     from collections.abc import Mapping
@@ -741,3 +742,43 @@ class AnnDataAccessor(_AnnDataAttrsMixin):
         return AnnDataRawAccessor(
             self.storage["raw"], None, None, self._obs_names, None, self.shape[0]
         )
+# get the number of observations in an anndata object or file fast and safely
+def _anndata_n_observations(object: UPathStr | AnnData) -> int | None:
+    if isinstance(object, AnnData):
+        return object.n_obs
+    try:
+        objectpath = UPath(object)
+        suffix = objectpath.suffix
+        conn_module = {".h5ad": "h5py", ".zarr": "zarr"}.get(suffix, suffix[1:])
+        conn, storage = registry.open(conn_module, objectpath, mode="r")
+    except Exception as e:
+        logger.warning(f"Could not open {object} to read n_observations: {e}")
+        return None
+    n_observations: int | None = None
+    try:
+        obs = storage["obs"]
+        if isinstance(obs, GroupTypes):  # type: ignore
+            if "_index" in obs.attrs:
+                elem_key = _read_attr(obs.attrs, "_index")
+            else:
+                elem_key = next(iter(obs))
+            elem = obs[elem_key]
+            if isinstance(elem, ArrayTypes):  # type: ignore
+                n_observations = elem.shape[0]
+            else:
+                # assume standard obs group
+                n_observations = elem["codes"].shape[0]
+        else:
+            n_observations = obs.shape[0]
+    except Exception as e:
+        logger.warning(f"Could not read n_observations from anndata {object}: {e}")
+    finally:
+        if hasattr(storage, "close"):
+            storage.close()
+        if hasattr(conn, "close"):
+            conn.close()
+    return n_observations

lamindb/core/storage/_backed_access.py CHANGED Viewed

@@ -94,8 +94,8 @@ def backed_access(
         return _open_pyarrow_dataset(objectpath)
     else:
         raise ValueError(
-            "object should have .h5, .hdf5, .h5ad, .zarr, .tiledbsoma suffix, not"
-            f" {suffix}."
+            "The object should have .h5, .hdf5, .h5ad, .zarr, .tiledbsoma suffix "
+            f"or be compatible with pyarrow.dataset.dataset, instead of being {suffix} object."
         )
     is_anndata = suffix == ".h5ad" or get_spec(storage).encoding_type == "anndata"

lamindb/core/storage/_pyarrow_dataset.py CHANGED Viewed

@@ -6,26 +6,36 @@ import pyarrow.dataset
 from lamindb_setup.core.upath import LocalPathClasses
 if TYPE_CHECKING:
+    from pyarrow.dataset import Dataset as PyArrowDataset
     from upath import UPath
-PYARROW_SUFFIXES = (".parquet", ".csv", ".json", ".orc", ".arrow", ".feather")
+PYARROW_SUFFIXES = (".parquet", ".csv", ".json", ".orc", ".arrow", ".feather", ".ipc")
-def _is_pyarrow_dataset(path: UPath) -> bool:
-    # it is assumed here that path exists
-    if path.is_file():
-        return path.suffix in PYARROW_SUFFIXES
+def _is_pyarrow_dataset(paths: UPath | list[UPath]) -> bool:
+    # it is assumed here that the paths exist
+    # we don't check here that the filesystem is the same
+    # but this is a requirement for pyarrow.dataset.dataset
+    if isinstance(paths, list):
+        suffixes = {path.suffix for path in paths}
+    elif paths.is_file():
+        suffixes = {paths.suffix}
     else:
-        objects = path.rglob("*")
-        suffixes = {object.suffix for object in objects if object.suffix != ""}
-        return len(suffixes) == 1 and suffixes.pop() in PYARROW_SUFFIXES
-def _open_pyarrow_dataset(path: UPath) -> pyarrow.dataset.Dataset:
-    if isinstance(path, LocalPathClasses):
-        path_str, filesystem = path.as_posix(), None
+        suffixes = {path.suffix for path in paths.rglob("*") if path.suffix != ""}
+    return len(suffixes) == 1 and suffixes.pop() in PYARROW_SUFFIXES
+def _open_pyarrow_dataset(paths: UPath | list[UPath]) -> PyArrowDataset:
+    if isinstance(paths, list):
+        path0 = paths[0]
+        if isinstance(path0, LocalPathClasses):
+            paths_str, filesystem = [path.as_posix() for path in paths], None
+        else:
+            paths_str, filesystem = [path.path for path in paths], path0.fs
+    elif isinstance(paths, LocalPathClasses):
+        paths_str, filesystem = paths.as_posix(), None
     else:
-        path_str, filesystem = path.path, path.fs
+        paths_str, filesystem = paths.path, paths.fs
-    return pyarrow.dataset.dataset(path_str, filesystem=filesystem)
+    return pyarrow.dataset.dataset(paths_str, filesystem=filesystem)

lamindb/core/storage/_tiledbsoma.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 from typing import TYPE_CHECKING, Literal
+from urllib.parse import urlparse
 import pandas as pd
 import pyarrow as pa
@@ -17,6 +18,7 @@ if TYPE_CHECKING:
     from lamindb_setup.core.types import UPathStr
     from tiledbsoma import Collection as SOMACollection
     from tiledbsoma import Experiment as SOMAExperiment
+    from tiledbsoma import Measurement as SOMAMeasurement
     from upath import UPath
@@ -36,9 +38,21 @@ def _load_h5ad_zarr(objpath: UPath):
 def _tiledb_config_s3(storepath: UPath) -> dict:
-    region = get_storage_region(storepath)
-    tiledb_config = {"vfs.s3.region": region}
     storage_options = storepath.storage_options
+    tiledb_config = {}
+    endpoint_url = storage_options.get("endpoint_url", None)
+    if endpoint_url is not None:
+        tiledb_config["vfs.s3.region"] = ""
+        tiledb_config["vfs.s3.use_virtual_addressing"] = "false"
+        parsed = urlparse(endpoint_url)
+        tiledb_config["vfs.s3.scheme"] = parsed.scheme
+        tiledb_config["vfs.s3.endpoint_override"] = (
+            parsed._replace(scheme="").geturl().lstrip("/")
+        )
+    else:
+        tiledb_config["vfs.s3.region"] = get_storage_region(storepath)
     if "key" in storage_options:
         tiledb_config["vfs.s3.aws_access_key_id"] = storage_options["key"]
     if "secret" in storage_options:
@@ -51,7 +65,7 @@ def _tiledb_config_s3(storepath: UPath) -> dict:
 def _open_tiledbsoma(
     storepath: UPath, mode: Literal["r", "w"] = "r"
-) -> SOMACollection | SOMAExperiment:
+) -> SOMACollection | SOMAExperiment | SOMAMeasurement:
     try:
         import tiledbsoma as soma
     except ImportError as e:
@@ -71,6 +85,8 @@ def _open_tiledbsoma(
     soma_objects = [obj.name for obj in storepath.iterdir()]
     if "obs" in soma_objects and "ms" in soma_objects:
         SOMAType = soma.Experiment
+    elif "var" in soma_objects:
+        SOMAType = soma.Measurement
     else:
         SOMAType = soma.Collection
     return SOMAType.open(storepath_str, mode=mode, context=ctx)
@@ -134,17 +150,17 @@ def save_tiledbsoma_experiment(
         )
         storepath = setup_settings.storage.root / storage_key
-    if storepath.protocol == "s3":
+    if storepath.protocol == "s3":  # type: ignore
         ctx = soma.SOMATileDBContext(tiledb_config=_tiledb_config_s3(storepath))
     else:
         ctx = None
-    storepath = storepath.as_posix()
+    storepath_str = storepath.as_posix()
     add_run_uid = True
     run_uid_dtype = "category"
     if appending:
-        with soma.Experiment.open(storepath, mode="r", context=ctx) as store:
+        with soma.Experiment.open(storepath_str, mode="r", context=ctx) as store:
             obs_schema = store["obs"].schema
             add_run_uid = "lamin_run_uid" in obs_schema.names
             # this is needed to enable backwards compatibility with tiledbsoma stores
@@ -175,7 +191,7 @@ def save_tiledbsoma_experiment(
     registration_mapping = kwargs.get("registration_mapping", None)
     if registration_mapping is None and (appending or len(adata_objects) > 1):
         registration_mapping = soma_io.register_anndatas(
-            experiment_uri=storepath if appending else None,
+            experiment_uri=storepath_str if appending else None,
             adatas=adata_objects,
             measurement_name=measurement_name,
             obs_field_name=obs_id_name,
@@ -195,19 +211,19 @@ def save_tiledbsoma_experiment(
         assert len(adata_objects) == 1  # noqa: S101
         n_observations = adata_objects[0].n_obs
-    logger.important(f"Writing the tiledbsoma store to {storepath}")
+    logger.important(f"Writing the tiledbsoma store to {storepath_str}")
     for adata_obj in adata_objects:
-        if resize_experiment and soma.Experiment.exists(storepath, context=ctx):
+        if resize_experiment and soma.Experiment.exists(storepath_str, context=ctx):
             # can only happen if registration_mapping is not None
             soma_io.resize_experiment(
-                storepath,
+                storepath_str,
                 nobs=n_observations,
                 nvars=registration_mapping.get_var_shapes(),
                 context=ctx,
             )
             resize_experiment = False
         soma_io.from_anndata(
-            storepath,
+            storepath_str,
             adata_obj,
             measurement_name,
             context=ctx,
@@ -217,7 +233,7 @@ def save_tiledbsoma_experiment(
             **kwargs,
         )
-    artifact = Artifact(
+    artifact = Artifact(  # type: ignore
         storepath,
         key=key,
         description=description,
@@ -229,3 +245,31 @@ def save_tiledbsoma_experiment(
     artifact.otype = "tiledbsoma"
     return artifact.save()
+# this is less defensive than _anndata_n_observations
+# this doesn't really catches errors
+# assumes that the tiledbsoma object is well-formed
+def _soma_store_n_observations(obj) -> int:
+    if obj.soma_type in {"SOMADataFrame", "SOMASparseNDArray", "SOMADenseNDArray"}:
+        return obj.non_empty_domain()[0][1] + 1
+    elif obj.soma_type == "SOMAExperiment":
+        return _soma_store_n_observations(obj["obs"])
+    elif obj.soma_type == "SOMAMeasurement":
+        keys = obj.keys()
+        for slot in ("X", "obsm", "obsp"):
+            if slot in keys:
+                return _soma_store_n_observations(next(iter(obj[slot].values())))
+    elif obj.soma_type == "SOMACollection":
+        n_obs = 0
+        for value in obj.values():
+            n_obs += _soma_store_n_observations(value)
+        return n_obs
+    raise ValueError(
+        "Could not infer the number of observations from the tiledbsoma object."
+    )
+def _soma_n_observations(objectpath: UPath) -> int:
+    with _open_tiledbsoma(objectpath, mode="r") as store:
+        return _soma_store_n_observations(store)

lamindb/core/storage/paths.py CHANGED Viewed

@@ -4,7 +4,6 @@ import shutil
 from typing import TYPE_CHECKING
 import fsspec
-from lamin_utils import logger
 from lamindb_setup.core import StorageSettings
 from lamindb_setup.core.upath import (
     LocalPathClasses,
@@ -42,25 +41,27 @@ def auto_storage_key_from_artifact_uid(uid: str, suffix: str, is_dir: bool) -> s
     return storage_key
-def check_path_is_child_of_root(path: UPathStr, root: UPathStr) -> bool:
-    # str is needed to eliminate UPath storage_options
-    # from the equality checks below
-    # and for fsspec.utils.get_protocol
-    path_str = str(path)
-    root_str = str(root)
-    root_protocol = fsspec.utils.get_protocol(root_str)
-    # check that the protocols are the same first
-    if fsspec.utils.get_protocol(path_str) != root_protocol:
-        return False
-    if root_protocol in {"http", "https"}:
-        # in this case it is a base url, not a file
-        # so formally does not exist
+def _safely_resolve(upath: UPath) -> UPath:
+    if upath.protocol in {"http", "https"}:
         resolve_kwargs = {"follow_redirects": False}
     else:
         resolve_kwargs = {}
-    return (
-        UPath(root_str).resolve(**resolve_kwargs) in UPath(path_str).resolve().parents
-    )
+    return upath.resolve(**resolve_kwargs)
+def check_path_is_child_of_root(path: UPathStr, root: UPathStr) -> bool:
+    if fsspec.utils.get_protocol(str(path)) != fsspec.utils.get_protocol(str(root)):
+        return False
+    path_upath = _safely_resolve(UPath(path))
+    root_upath = _safely_resolve(UPath(root))
+    if path_upath.protocol == "s3":
+        endpoint_path = path_upath.storage_options.get("endpoint_url", "")
+        endpoint_root = root_upath.storage_options.get("endpoint_url", "")
+        if endpoint_path != endpoint_root:
+            return False
+    # str is needed to eliminate UPath storage_options
+    # which affect equality checks
+    return UPath(str(root_upath)) in UPath(str(path_upath)).parents
 # returns filepath and root of the storage
@@ -169,10 +170,15 @@ def store_file_or_folder(
 def delete_storage_using_key(
-    artifact: Artifact, storage_key: str, using_key: str | None
-):
+    artifact: Artifact,
+    storage_key: str,
+    raise_file_not_found_error: bool = True,
+    using_key: str | None = None,
+) -> None | str:
     filepath, _ = attempt_accessing_path(artifact, storage_key, using_key=using_key)
-    delete_storage(filepath)
+    return delete_storage(
+        filepath, raise_file_not_found_error=raise_file_not_found_error
+    )
 def delete_storage(
@@ -191,5 +197,5 @@ def delete_storage(
     elif raise_file_not_found_error:
         raise FileNotFoundError(f"{storagepath} is not an existing path!")
     else:
-        logger.warning(f"{storagepath} is not an existing path!")
+        return "did-not-delete"
     return None

lamindb/core/subsettings/_creation_settings.py CHANGED Viewed

@@ -1,13 +1,8 @@
-from typing import Literal
 class CreationSettings:
-    artifact_if_hash_exists: Literal[
-        "warn_return_existing", "error", "warn_create_new"
-    ] = "warn_return_existing"
-    """Behavior if file hash exists (default `"warn_return_existing"`).
+    search_names: bool = True
+    """Switch off to speed up creating records (default `True`).
-    One of `["warn_return_existing", "error", "warn_create_new"]`.
+    If `True`, search for alternative names and avoids duplicates.
     FAQ: :doc:`/faq/idempotency`
     """
@@ -18,15 +13,8 @@ class CreationSettings:
     It speeds up file creation by about a factor 100.
     """
-    search_names: bool = True
-    """To speed up creating records (default `True`).
-    If `True`, search for alternative names.
-    FAQ: :doc:`/faq/idempotency`
-    """
     artifact_silence_missing_run_warning: bool = False
-    """Silence warning about missing run & transform during artifact creation."""
+    """Silence warning about missing run & transform during artifact creation (default `False`)."""
     _artifact_use_virtual_keys: bool = True
     """Treat `key` parameter in :class:`~lamindb.Artifact` as virtual.

lamindb 1.0.5__py3-none-any.whl → 1.1.0__py3-none-any.whl

lamindb 1.0.5py3-none-any.whl → 1.1.0py3-none-any.whl