PyPI - lamindb - Versions diffs - 0.77.2__py3-none-any.whl → 1.0rc1__py3-none-any.whl - Mend

lamindb 0.77.2py3-none-any.whl → 1.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

lamindb/__init__.py +39 -32
lamindb/_artifact.py +95 -64
lamindb/_can_curate.py +19 -10
lamindb/_collection.py +51 -49
lamindb/_feature.py +9 -9
lamindb/_finish.py +99 -86
lamindb/_from_values.py +20 -17
lamindb/_is_versioned.py +2 -1
lamindb/_parents.py +23 -16
lamindb/_query_manager.py +3 -3
lamindb/_query_set.py +85 -18
lamindb/_record.py +121 -46
lamindb/_run.py +3 -3
lamindb/_save.py +14 -8
lamindb/{_feature_set.py → _schema.py} +34 -31
lamindb/_storage.py +2 -1
lamindb/_transform.py +51 -23
lamindb/_ulabel.py +17 -8
lamindb/_view.py +15 -14
lamindb/base/__init__.py +24 -0
lamindb/base/fields.py +281 -0
lamindb/base/ids.py +103 -0
lamindb/base/types.py +51 -0
lamindb/base/users.py +30 -0
lamindb/base/validation.py +67 -0
lamindb/core/__init__.py +19 -14
lamindb/core/_context.py +297 -228
lamindb/core/_data.py +44 -49
lamindb/core/_describe.py +41 -31
lamindb/core/_django.py +59 -44
lamindb/core/_feature_manager.py +192 -168
lamindb/core/_label_manager.py +22 -22
lamindb/core/_mapped_collection.py +17 -14
lamindb/core/_settings.py +1 -12
lamindb/core/_sync_git.py +56 -9
lamindb/core/_track_environment.py +1 -1
lamindb/core/datasets/_core.py +5 -6
lamindb/core/exceptions.py +0 -7
lamindb/core/fields.py +1 -1
lamindb/core/loaders.py +18 -2
lamindb/core/{schema.py → relations.py} +22 -19
lamindb/core/storage/_anndata_accessor.py +1 -2
lamindb/core/storage/_backed_access.py +2 -1
lamindb/core/storage/_tiledbsoma.py +40 -13
lamindb/core/storage/objects.py +1 -1
lamindb/core/storage/paths.py +13 -8
lamindb/core/subsettings/__init__.py +0 -2
lamindb/core/types.py +2 -23
lamindb/core/versioning.py +11 -7
lamindb/{_curate.py → curators/__init__.py} +700 -57
lamindb/curators/_spatial.py +528 -0
lamindb/integrations/_vitessce.py +1 -3
lamindb/migrations/0052_squashed.py +1261 -0
lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +57 -0
lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +35 -0
lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +61 -0
lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +22 -0
lamindb/migrations/0057_link_models_latest_report_and_others.py +356 -0
lamindb/migrations/0058_artifact__actions_collection__actions.py +22 -0
lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +31 -0
lamindb/migrations/0060_alter_artifact__actions.py +22 -0
lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +45 -0
lamindb/migrations/0062_add_is_latest_field.py +32 -0
lamindb/migrations/0063_populate_latest_field.py +45 -0
lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +33 -0
lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +22 -0
lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +352 -0
lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +20 -0
lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +20 -0
lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +1294 -0
lamindb/migrations/0069_squashed.py +1770 -0
lamindb/migrations/0070_lamindbv1_migrate_data.py +78 -0
lamindb/migrations/0071_lamindbv1_migrate_schema.py +741 -0
lamindb/migrations/0072_remove_user__branch_code_remove_user_aux_and_more.py +148 -0
lamindb/migrations/0073_merge_ourprojects.py +945 -0
lamindb/migrations/0074_lamindbv1_part4.py +374 -0
lamindb/migrations/0075_lamindbv1_part5.py +276 -0
lamindb/migrations/0076_lamindbv1_part6.py +621 -0
lamindb/migrations/0077_lamindbv1_part6b.py +228 -0
lamindb/migrations/0078_lamindbv1_part6c.py +468 -0
lamindb/migrations/0079_alter_rundata_value_json_and_more.py +36 -0
lamindb/migrations/__init__.py +0 -0
lamindb/models.py +4064 -0
{lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/METADATA +15 -20
lamindb-1.0rc1.dist-info/RECORD +100 -0
{lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/WHEEL +1 -1
lamindb/core/subsettings/_transform_settings.py +0 -21
lamindb-0.77.2.dist-info/RECORD +0 -63
{lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/LICENSE +0 -0

lamindb/core/_label_manager.py CHANGED Viewed

@@ -5,12 +5,12 @@ from collections import defaultdict
 from typing import TYPE_CHECKING
 from django.db import connections
-from lamin_utils import colors, logger
-from lnschema_core.models import CanCurate, Feature
+from lamin_utils import logger
 from rich.table import Column, Table
 from rich.text import Text
+from rich.tree import Tree
-from lamindb._from_values import _print_values
+from lamindb._from_values import _format_values
 from lamindb._record import (
     REGISTRY_UNIQUE_FIELD,
     get_name_field,
@@ -18,6 +18,7 @@ from lamindb._record import (
     transfer_to_default_db,
 )
 from lamindb._save import save
+from lamindb.models import CanCurate, Feature
 from ._describe import (
     NAME_WIDTH,
@@ -28,15 +29,13 @@ from ._describe import (
 )
 from ._django import get_artifact_with_related, get_related_model
 from ._settings import settings
-from .schema import dict_related_model_to_related_name
+from .relations import dict_related_model_to_related_name
 if TYPE_CHECKING:
-    from lnschema_core.models import Artifact, Collection, Record
-    from rich.tree import Tree
     from lamindb._query_set import QuerySet
+    from lamindb.models import Artifact, Collection, Record
-EXCLUDE_LABELS = {"feature_sets"}
+EXCLUDE_LABELS = {"_schemas_m2m"}
 def _get_labels(
@@ -99,39 +98,40 @@ def describe_labels(
         return tree
     labels_table = Table(
-        Column(
-            Text.assemble(("Labels", "green_yellow")),
-            style="",
-            no_wrap=True,
-            width=NAME_WIDTH,
-        ),
+        Column("", style="", no_wrap=True, width=NAME_WIDTH),
         Column("", style="dim", no_wrap=True, width=TYPE_WIDTH),
         Column("", width=VALUES_WIDTH, no_wrap=True),
-        # show_header=True,
+        show_header=False,
         box=None,
         pad_edge=False,
     )
     for related_name, labels in labels_data.items():
-        if not labels or related_name == "feature_sets":
+        if not labels or related_name == "_schemas_m2m":
             continue
         if isinstance(labels, dict):  # postgres, labels are a dict[id, name]
-            print_values = _print_values(labels.values(), n=10)
+            print_values = _format_values(labels.values(), n=10, quotes=False)
         else:  # labels are a QuerySet
             field = get_name_field(labels)
-            print_values = _print_values(labels.values_list(field, flat=True), n=10)
+            print_values = _format_values(
+                labels.values_list(field, flat=True), n=10, quotes=False
+            )
         if print_values:
             related_model = get_related_model(self, related_name)
-            type_str = related_model.__get_name_with_schema__()
+            type_str = related_model.__get_name_with_module__()
             labels_table.add_row(
                 f".{related_name}", Text(type_str, style="dim"), print_values
             )
+    labels_header = Text("Labels", style="bold green_yellow")
     if as_subtree:
         if labels_table.rows:
-            return labels_table
+            labels_tree = Tree(labels_header, guide_style="dim")
+            labels_tree.add(labels_table)
+            return labels_tree
     else:
         if labels_table.rows:
-            tree.add(labels_table)
+            labels_tree = tree.add(labels_header)
+            labels_tree.add(labels_table)
         return tree
@@ -310,7 +310,7 @@ class LabelManager:
         """
         d = dict_related_model_to_related_name(self._host)
         registry = label.__class__
-        related_name = d.get(registry.__get_name_with_schema__())
+        related_name = d.get(registry.__get_name_with_module__())
         link_model = getattr(self._host, related_name).through
         link_records = link_model.filter(
             artifact_id=self._host.id, **{f"{registry.__name__.lower()}_id": label.id}

lamindb/core/_mapped_collection.py CHANGED Viewed

@@ -2,7 +2,6 @@ from __future__ import annotations
 from collections import Counter
 from functools import reduce
-from pathlib import Path
 from typing import TYPE_CHECKING, Literal
 import numpy as np
@@ -86,9 +85,9 @@ class MappedCollection:
             retrieves ``.X``.
         obsm_keys: Keys from the ``.obsm`` slots.
         obs_keys: Keys from the ``.obs`` slots.
-        obs_filter: Select only observations with these values for the given obs column.
-            Should be a tuple with an obs column name as the first element
-            and filtering values (a string or a tuple of strings) as the second element.
+        obs_filter: Select only observations with these values for the given obs columns.
+            Should be a dictionary with obs column names as keys
+            and filtering values (a string or a tuple of strings) as values.
         join: `"inner"` or `"outer"` virtual joins. If ``None`` is passed,
             does not join.
         encode_labels: Encode labels into integers.
@@ -107,7 +106,7 @@ class MappedCollection:
         layers_keys: str | list[str] | None = None,
         obs_keys: str | list[str] | None = None,
         obsm_keys: str | list[str] | None = None,
-        obs_filter: tuple[str, str | tuple[str, ...]] | None = None,
+        obs_filter: dict[str, str | tuple[str, ...]] | None = None,
         join: Literal["inner", "outer"] | None = "inner",
         encode_labels: bool | list[str] = True,
         unknown_label: str | dict[str, str] | None = None,
@@ -121,11 +120,11 @@ class MappedCollection:
             )
         self.filtered = obs_filter is not None
-        if self.filtered and len(obs_filter) != 2:
-            raise ValueError(
-                "obs_filter should be a tuple with obs column name "
-                "as the first element and filtering values as the second element"
+        if self.filtered and not isinstance(obs_filter, dict):
+            logger.warning(
+                "Passing a tuple to `obs_filter` is deprecated, use a dictionary"
             )
+            obs_filter = {obs_filter[0]: obs_filter[1]}
         if layers_keys is None:
             self.layers_keys = ["X"]
@@ -183,12 +182,16 @@ class MappedCollection:
                 store_path = self.path_list[i]
                 self._check_csc_raise_error(X, "X", store_path)
                 if self.filtered:
-                    obs_filter_key, obs_filter_values = obs_filter
-                    indices_storage = np.where(
-                        np.isin(
+                    indices_storage_mask = None
+                    for obs_filter_key, obs_filter_values in obs_filter.items():
+                        obs_filter_mask = np.isin(
                             self._get_labels(store, obs_filter_key), obs_filter_values
                         )
-                    )[0]
+                        if indices_storage_mask is None:
+                            indices_storage_mask = obs_filter_mask
+                        else:
+                            indices_storage_mask &= obs_filter_mask
+                    indices_storage = np.where(indices_storage_mask)[0]
                     n_obs_storage = len(indices_storage)
                 else:
                     if isinstance(X, ArrayTypes):  # type: ignore
@@ -348,7 +351,7 @@ class MappedCollection:
     @property
     def original_shapes(self) -> list[tuple[int, int]]:
-        """Shapes of the underlying AnnData objects."""
+        """Shapes of the underlying AnnData objects (with `obs_filter` applied)."""
         if self.n_vars_list is None:
             n_vars_list = [None] * len(self.n_obs_list)
         else:

lamindb/core/_settings.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations
 import os
-from typing import TYPE_CHECKING, Literal
+from typing import TYPE_CHECKING
 import lamindb_setup as ln_setup
 from lamin_utils import logger
@@ -10,7 +10,6 @@ from lamindb_setup.core._settings import settings as setup_settings
 from lamindb_setup.core._settings_instance import sanitize_git_repo_url
 from .subsettings._creation_settings import CreationSettings, creation_settings
-from .subsettings._transform_settings import TransformSettings, transform_settings
 if TYPE_CHECKING:
     from collections.abc import Mapping
@@ -80,16 +79,6 @@ class Settings:
             storage_settings = ln_setup.core.StorageSettings(root=self._using_storage)
         return storage_settings
-    @property
-    def transform(self) -> TransformSettings:
-        """Transform settings.
-        Is deprecated since version 0.76.1.
-        """
-        # enable warning soon
-        # logger.warning("Transform settings are deprecated, please instead set `ln.context.uid`")
-        return transform_settings
     @property
     def sync_git_repo(self) -> str | None:
         """Sync transforms with scripts in git repository.

lamindb/core/_sync_git.py CHANGED Viewed

@@ -53,22 +53,69 @@ def check_local_git_repo() -> bool:
 def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | None:
-    command = ["git", "log", f"--find-object={blob_hash}", "--pretty=format:%H"]
+    # Fetch all remote branches so that we can also search them
+    fetch_command = ["git", "fetch", "origin", "+refs/heads/*:refs/remotes/origin/*"]
+    subprocess.run(fetch_command, cwd=repo_dir, check=True)
+    # Find the commit containing the blob hash in all branches
+    command = [
+        "git",
+        "log",
+        "--all",
+        f"--find-object={blob_hash}",
+        "--pretty=format:%H",
+    ]
     result = subprocess.run(
         command,
         capture_output=True,
         cwd=repo_dir,
     )
-    # we just care to find one commit
-    # hence, we split by new line ("\n") and use the first one
+    # We just care to find one commit
+    # Hence, we split by new line ("\n") and use the first one
     commit_hash = result.stdout.decode().split("\n")[0]
-    if commit_hash == "" or result.returncode == 1:
+    if not commit_hash or result.returncode == 1:
         return None
-    else:
-        assert (  # noqa: S101
-            len(commit_hash) == 40
-        ), f"commit hash |{commit_hash}| is not 40 characters long"
-        return commit_hash
+    default_branch = (
+        subprocess.run(
+            ["git", "rev-parse", "--abbrev-ref", "origin/HEAD"],
+            capture_output=True,
+            cwd=repo_dir,
+            text=True,
+        )
+        .stdout.strip()
+        .split("/")[-1]
+    )
+    # Find all branches containing the commit
+    commit_containing_branches = subprocess.run(
+        ["git", "branch", "--all", "--contains", commit_hash],
+        capture_output=True,
+        cwd=repo_dir,
+        text=True,
+    ).stdout.split("\n")
+    # Clean up branch names and filter out the default branch
+    commit_containing_branches = [
+        branch.strip().replace("remotes/", "")
+        for branch in commit_containing_branches
+        if branch.strip()
+    ]
+    non_default_branches = [
+        branch for branch in commit_containing_branches if default_branch not in branch
+    ]
+    if non_default_branches:
+        logger.warning(
+            f"code blob hash {blob_hash} was found in non-default branch(es): {', '.join(non_default_branches)}"
+        )
+    assert (  # noqa: S101
+        len(commit_hash) == 40
+    ), f"commit hash |{commit_hash}| is not 40 characters long"
+    return commit_hash
 def get_filepath_within_git_repo(

lamindb/core/_track_environment.py CHANGED Viewed

@@ -7,7 +7,7 @@ import lamindb_setup as ln_setup
 from lamin_utils import logger
 if TYPE_CHECKING:
-    from lnschema_core.models import Run
+    from lamindb.models import Run
 def track_environment(run: Run) -> None:

lamindb/core/datasets/_core.py CHANGED Viewed

@@ -5,11 +5,10 @@ from typing import TYPE_CHECKING
 from urllib.request import urlretrieve
 import anndata as ad
-import numpy as np
 import pandas as pd
-from lnschema_core import ids
 from upath import UPath
+from lamindb.base.ids import base62
 from lamindb.core._settings import settings
 if TYPE_CHECKING:
@@ -146,7 +145,7 @@ def dir_iris_images() -> UPath:  # pragma: no cover
     This is why on the UI, the artifact shows up as output of the downstream
     demo notebook rather than the upstream curation notebook. The lineage
     information should still be captured by
-    https://github.com/laminlabs/lnschema-core/blob/a90437e91dfbd6b9002f18c3e978bd0f9c9a632d/lnschema_core/models.py#L2050-L2052
+    https://github.com/laminlabs/lnschema-core/blob/a90437e91dfbd6b9002f18c3e978bd0f9c9a632d/lamindb/models.py#L2050-L2052
     but we don't use this in the UI yet.
     """
     return UPath("s3://lamindata/iris_studies")
@@ -481,11 +480,11 @@ def dir_scrnaseq_cellranger(
         fastqdir.mkdir(parents=True, exist_ok=True)
         fastqfile1 = fastqdir / f"{sample_name}_R1_001.fastq.gz"
         with open(fastqfile1, "w") as f:
-            f.write(f"{ids.base62(n_char=6)}")
+            f.write(f"{base62(n_char=6)}")
         fastqfile2 = fastqdir / f"{sample_name}_R2_001.fastq.gz"
         fastqfile2.touch(exist_ok=True)
         with open(fastqfile2, "w") as f:
-            f.write(f"{ids.base62(n_char=6)}")
+            f.write(f"{base62(n_char=6)}")
     sampledir = basedir / f"{sample_name}"
     for folder in ["raw_feature_bc_matrix", "filtered_feature_bc_matrix", "analysis"]:
@@ -511,7 +510,7 @@ def dir_scrnaseq_cellranger(
     ]:
         file = sampledir / filename
         with open(file, "w") as f:
-            f.write(f"{ids.base62(n_char=6)}")
+            f.write(f"{base62(n_char=6)}")
     return sampledir

lamindb/core/exceptions.py CHANGED Viewed

@@ -7,7 +7,6 @@
    DoesNotExist
    ValidationError
    NotebookNotSaved
-   NoTitleError
    MissingContextUID
    UpdateContext
    IntegrityError
@@ -79,12 +78,6 @@ class IntegrityError(Exception):
     pass
-class NoTitleError(SystemExit):
-    """Notebook has no title."""
-    pass
 class MissingContextUID(SystemExit):
     """User didn't define transform settings."""

lamindb/core/fields.py CHANGED Viewed

@@ -9,4 +9,4 @@ The field accessor of a :class:`~lamindb.core.Record`:
 """
-from lnschema_core.types import FieldAttr
+from lamindb.base.types import FieldAttr  # noqa: F401

lamindb/core/loaders.py CHANGED Viewed

@@ -33,7 +33,6 @@ from lamindb_setup.core.upath import (
 from ._settings import settings
 if TYPE_CHECKING:
-    import mudata as md
     from lamindb_setup.core.types import UPathStr
 try:
@@ -110,8 +109,23 @@ def load_json(path: UPathStr) -> dict:
     return data
+def load_yaml(path: UPathStr) -> dict | UPathStr:
+    """Load `.yaml` to `dict`."""
+    try:
+        import yaml  # type: ignore
+        with open(path) as f:
+            data = yaml.safe_load(f)
+        return data
+    except ImportError:
+        logger.warning(
+            "Please install PyYAML (`pip install PyYAML`) to load `.yaml` files."
+        )
+        return path
 def load_image(path: UPathStr) -> None | UPathStr:
-    """Display `.svg` in ipython, otherwise return path."""
+    """Display `.jpg`, `.gif` or `.png` in ipython, otherwise return path."""
     if is_run_from_ipython:
         from IPython.display import Image, display
@@ -147,7 +161,9 @@ FILE_LOADERS = {
     ".zarr": load_anndata_zarr,
     ".html": load_html,
     ".json": load_json,
+    ".yaml": load_yaml,
     ".h5mu": load_h5mu,
+    ".gif": load_image,
     ".jpg": load_image,
     ".png": load_image,
     ".svg": load_svg,

lamindb/core/{schema.py → relations.py} RENAMED Viewed

@@ -7,47 +7,50 @@ from lamindb_setup._connect_instance import (
     load_instance_settings,
 )
 from lamindb_setup.core._settings_store import instance_settings_file
-from lnschema_core.models import Feature, FeatureSet, LinkORM, Record
+from lamindb.models import LinkORM, Record, Schema
-def get_schemas_modules(instance: str | None) -> set[str]:
+def get_schema_modules(instance: str | None) -> set[str]:
     if instance is None or instance == "default":
-        schema_modules = set(ln_setup.settings.instance.schema)
+        schema_modules = set(ln_setup.settings.instance.modules)
         schema_modules.add("core")
         return schema_modules
     owner, name = get_owner_name_from_identifier(instance)
     settings_file = instance_settings_file(name, owner)
     if settings_file.exists():
-        schema = set(load_instance_settings(settings_file).schema)
+        modules = set(load_instance_settings(settings_file).modules)
     else:
         cache_filepath = (
             ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
         )
         if cache_filepath.exists():
-            schema = set(cache_filepath.read_text().split("\n")[1].split(","))
+            modules = set(cache_filepath.read_text().split("\n")[1].split(","))
         else:
             raise ValueError(f"Instance {instance} not found")
-    shared_schema_modules = set(ln_setup.settings.instance.schema).intersection(schema)
+    shared_schema_modules = set(ln_setup.settings.instance.modules).intersection(
+        modules
+    )
     shared_schema_modules.add("core")
     return shared_schema_modules
-def dict_schema_name_to_model_name(
+def dict_module_name_to_model_name(
     registry: type[Record], instance: str | None = None
 ) -> dict[str, Record]:
-    schema_modules = get_schemas_modules(instance)
+    schema_modules = get_schema_modules(instance)
     d: dict = {
-        i.related_model.__get_name_with_schema__(): i.related_model
+        i.related_model.__get_name_with_module__(): i.related_model
         for i in registry._meta.related_objects
         if i.related_name is not None
-        and i.related_model.__get_schema_name__() in schema_modules
+        and i.related_model.__get_module_name__() in schema_modules
     }
     d.update(
         {
-            i.related_model.__get_name_with_schema__(): i.related_model
+            i.related_model.__get_name_with_module__(): i.related_model
             for i in registry._meta.many_to_many
             if i.name is not None
-            and i.related_model.__get_schema_name__() in schema_modules
+            and i.related_model.__get_module_name__() in schema_modules
         }
     )
     return d
@@ -59,11 +62,11 @@ def dict_related_model_to_related_name(
     def include(model: Record):
         return not links != issubclass(model, LinkORM)
-    schema_modules = get_schemas_modules(instance)
+    schema_modules = get_schema_modules(instance)
     related_objects = registry._meta.related_objects + registry._meta.many_to_many
     d: dict = {
-        record.related_model.__get_name_with_schema__(): (
+        record.related_model.__get_name_with_module__(): (
             record.related_name
             if not isinstance(record, ManyToManyField)
             else record.name
@@ -72,7 +75,7 @@ def dict_related_model_to_related_name(
         if (
             record.name is not None
             and include(record.related_model)
-            and record.related_model.__get_schema_name__() in schema_modules
+            and record.related_model.__get_module_name__() in schema_modules
         )
     }
     return d
@@ -81,15 +84,15 @@ def dict_related_model_to_related_name(
 def get_related_name(features_type: type[Record]) -> str:
     candidates = [
         field.related_name
-        for field in FeatureSet._meta.related_objects
+        for field in Schema._meta.related_objects
         if field.related_model == features_type
     ]
     if not candidates:
         raise ValueError(
             f"Can't create feature sets from {features_type.__name__} because it's not"
-            " related to it!\nYou need to create a link model between FeatureSet and"
-            " your Record in your custom schema.\nTo do so, add a"
-            " line:\nfeature_sets = models.ManyToMany(FeatureSet,"
+            " related to it!\nYou need to create a link model between Schema and"
+            " your Record in your custom module.\nTo do so, add a"
+            " line:\n_schemas_m2m = models.ManyToMany(Schema,"
             " related_name='mythings')\n"
         )
     return candidates[0]

lamindb/core/storage/_anndata_accessor.py CHANGED Viewed

@@ -17,12 +17,11 @@ from anndata._io.specs.registry import get_spec, read_elem, read_elem_partial
 from anndata.compat import _read_attr
 from fsspec.implementations.local import LocalFileSystem
 from lamin_utils import logger
-from lamindb_setup.core.upath import UPath, create_mapper, infer_filesystem
+from lamindb_setup.core.upath import create_mapper, infer_filesystem
 from packaging import version
 if TYPE_CHECKING:
     from collections.abc import Mapping
-    from pathlib import Path
     from fsspec.core import OpenFile
     from lamindb_setup.core.types import UPathStr

lamindb/core/storage/_backed_access.py CHANGED Viewed

@@ -4,7 +4,8 @@ from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any, Callable
 from anndata._io.specs.registry import get_spec
-from lnschema_core import Artifact
+from lamindb.models import Artifact
 from ._anndata_accessor import AnnDataAccessor, StorageType, registry
 from ._pyarrow_dataset import _is_pyarrow_dataset, _open_pyarrow_dataset

lamindb/core/storage/_tiledbsoma.py CHANGED Viewed

@@ -2,11 +2,16 @@ from __future__ import annotations
 from typing import TYPE_CHECKING, Literal
+import pandas as pd
+import pyarrow as pa
 from anndata import AnnData, read_h5ad
+from lamin_utils import logger
 from lamindb_setup import settings as setup_settings
 from lamindb_setup.core._settings_storage import get_storage_region
 from lamindb_setup.core.upath import LocalPathClasses, create_path
-from lnschema_core import Artifact, Run
+from packaging import version
+from lamindb.models import Artifact, Run
 if TYPE_CHECKING:
     from lamindb_setup.core.types import UPathStr
@@ -137,9 +142,17 @@ def save_tiledbsoma_experiment(
     storepath = storepath.as_posix()
     add_run_uid = True
+    run_uid_dtype = "category"
     if appending:
         with soma.Experiment.open(storepath, mode="r", context=ctx) as store:
-            add_run_uid = "lamin_run_uid" in store["obs"].schema.names
+            obs_schema = store["obs"].schema
+            add_run_uid = "lamin_run_uid" in obs_schema.names
+            # this is needed to enable backwards compatibility with tiledbsoma stores
+            # created before PR 2300
+            if add_run_uid:
+                column_type = obs_schema.types[obs_schema.names.index("lamin_run_uid")]
+                if not isinstance(column_type, pa.DictionaryType):
+                    run_uid_dtype = None
     if add_run_uid and run is None:
         raise ValueError("Pass `run`")
@@ -147,17 +160,16 @@ def save_tiledbsoma_experiment(
     adata_objects = []
     for adata in adatas:
         if isinstance(adata, AnnData):
-            if add_run_uid:
-                if adata.is_view:
-                    raise ValueError(
-                        "Can not write an `AnnData` view, please do `adata.copy()` before passing."
-                    )
-                else:
-                    adata.obs["lamin_run_uid"] = run.uid
+            if add_run_uid and adata.is_view:
+                raise ValueError(
+                    "Can not write an `AnnData` view, please do `adata.copy()` before passing."
+                )
         else:
             adata = _load_h5ad_zarr(create_path(adata))
-            if add_run_uid:
-                adata.obs["lamin_run_uid"] = run.uid
+        if add_run_uid:
+            adata.obs["lamin_run_uid"] = pd.Series(
+                run.uid, index=adata.obs.index, dtype=run_uid_dtype
+            )
         adata_objects.append(adata)
     registration_mapping = kwargs.get("registration_mapping", None)
@@ -172,13 +184,28 @@ def save_tiledbsoma_experiment(
             context=ctx,
         )
+    resize_experiment = False
     if registration_mapping is not None:
-        n_observations = len(registration_mapping.obs_axis.data)
+        if version.parse(soma.__version__) < version.parse("1.15.0rc4"):
+            n_observations = len(registration_mapping.obs_axis.data)
+        else:
+            n_observations = registration_mapping.get_obs_shape()
+            resize_experiment = True
     else:  # happens only if not appending and only one adata passed
         assert len(adata_objects) == 1  # noqa: S101
         n_observations = adata_objects[0].n_obs
+    logger.important(f"Writing the tiledbsoma store to {storepath}")
     for adata_obj in adata_objects:
+        if resize_experiment and soma.Experiment.exists(storepath, context=ctx):
+            # can only happen if registration_mapping is not None
+            soma_io.resize_experiment(
+                storepath,
+                nobs=n_observations,
+                nvars=registration_mapping.get_var_shapes(),
+                context=ctx,
+            )
+            resize_experiment = False
         soma_io.from_anndata(
             storepath,
             adata_obj,
@@ -199,6 +226,6 @@ def save_tiledbsoma_experiment(
         _is_internal_call=True,
     )
     artifact.n_observations = n_observations
-    artifact._accessor = "tiledbsoma"
+    artifact.otype = "tiledbsoma"
     return artifact.save()

lamindb/core/storage/objects.py CHANGED Viewed

@@ -12,7 +12,7 @@ if TYPE_CHECKING:
 def _mudata_is_installed():
     try:
-        import mudata
+        import mudata  # noqa: F401c
     except ImportError:
         return False
     return True

lamindb 0.77.2__py3-none-any.whl → 1.0rc1__py3-none-any.whl

lamindb 0.77.2py3-none-any.whl → 1.0rc1py3-none-any.whl