PyPI - lamindb - Versions diffs - 1.11a1__py3-none-any.whl → 1.11.2__py3-none-any.whl - Mend

lamindb 1.11a1py3-none-any.whl → 1.11.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

lamindb/__init__.py +1 -1
lamindb/_finish.py +3 -3
lamindb/core/_context.py +4 -2
lamindb/curators/core.py +13 -5
lamindb/errors.py +6 -0
lamindb/examples/cellxgene/_cellxgene.py +1 -1
lamindb/examples/croissant/__init__.py +20 -4
lamindb/examples/datasets/_core.py +8 -1
lamindb/examples/datasets/mini_immuno.py +0 -1
lamindb/examples/fixtures/sheets.py +8 -2
lamindb/integrations/_croissant.py +34 -11
lamindb/migrations/0121_recorduser.py +7 -0
lamindb/models/__init__.py +1 -0
lamindb/models/_feature_manager.py +78 -18
lamindb/models/artifact.py +71 -65
lamindb/models/artifact_set.py +12 -3
lamindb/models/query_set.py +170 -74
lamindb/models/record.py +5 -1
lamindb/models/run.py +2 -27
lamindb/models/save.py +18 -10
lamindb/models/sqlrecord.py +47 -33
lamindb-1.11.2.dist-info/METADATA +180 -0
{lamindb-1.11a1.dist-info → lamindb-1.11.2.dist-info}/RECORD +25 -25
lamindb-1.11a1.dist-info/METADATA +0 -144
{lamindb-1.11a1.dist-info → lamindb-1.11.2.dist-info}/LICENSE +0 -0
{lamindb-1.11a1.dist-info → lamindb-1.11.2.dist-info}/WHEEL +0 -0

lamindb/__init__.py CHANGED Viewed

@@ -114,7 +114,7 @@ Backwards compatibility.
 # ruff: noqa: I001
 # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
-__version__ = "1.11a1"
+__version__ = "1.11.2"
 import warnings as _warnings

lamindb/_finish.py CHANGED Viewed

@@ -173,7 +173,7 @@ def notebook_to_script(  # type: ignore
     if script_path is None:
         return py_content
     else:
-        script_path.write_text(py_content)
+        script_path.write_text(py_content, encoding="utf-8")
 def clean_r_notebook_html(file_path: Path) -> tuple[str | None, Path]:
@@ -202,7 +202,7 @@ def clean_r_notebook_html(file_path: Path) -> tuple[str | None, Path]:
             )  # RStudio might insert a newline
             cleaned_content = cleaned_content.replace(orig_error_message, "")
     cleaned_path = file_path.parent / (f"{file_path.stem}.cleaned{file_path.suffix}")
-    cleaned_path.write_text(cleaned_content)
+    cleaned_path.write_text(cleaned_content, encoding="utf-8")
     return title_text, cleaned_path
@@ -474,7 +474,7 @@ def save_context_core(
         # this can happen in interactively executed notebooks with a pro-active version bump in case it turns out that the user didn't make a change to the notebook
         run.transform = transform
         run.save()
-        ln.Transform.get(transform_id_prior_to_save).delete()
+        ln.Transform.get(transform_id_prior_to_save).delete(permanent=True)
     # finalize
     if finished_at and not from_cli and run is not None:

lamindb/core/_context.py CHANGED Viewed

@@ -600,9 +600,11 @@ class Context:
         if pypackages is None:
             pypackages = True
         description = None
+        if path.suffix == ".ipynb" and path.stem.startswith("Untitled"):
+            raise RuntimeError(
+                "Your notebook is untitled, please rename it before tracking"
+            )
         path_str = path.as_posix()
-        if path_str.endswith("Untitled.ipynb"):
-            raise RuntimeError("Please rename your notebook before tracking it")
         if path_str.startswith("/fileId="):
             logger.warning("tracking on Google Colab is experimental")
             path_str = get_notebook_key_colab()

lamindb/curators/core.py CHANGED Viewed

@@ -411,7 +411,7 @@ class ComponentCurator(Curator):
     """Curator for `DataFrame`.
     Provides all key functionality to validate Pandas DataFrames.
-    This class is not user facing unlike :class:`~lamindb.DataFrameCurator` which extends this
+    This class is not user facing unlike :class:`~lamindb.curators.DataFrameCurator` which extends this
     class with functionality to validate the `attrs` slot.
     Args:
@@ -671,7 +671,7 @@ class DataFrameCurator(SlotsCurator):
     Examples:
-        For a simple example using a flexible schema, see :meth:`~lamindb.Artifact.from_df`.
+        For a simple example using a flexible schema, see :meth:`~lamindb.Artifact.from_dataframe`.
         Here is an example that enforces a minimal set of columns in the dataframe.
@@ -688,7 +688,7 @@ class DataFrameCurator(SlotsCurator):
         .. literalinclude:: scripts/define_mini_immuno_features_labels.py
             :language: python
-         It is also possible to curate the `attrs` slot.
+        It is also possible to curate the `attrs` slot.
         .. literalinclude:: scripts/curate_dataframe_attrs.py
             :language: python
@@ -885,12 +885,20 @@ class AnnDataCurator(SlotsCurator):
         dataset: The AnnData-like object to validate & annotate.
         schema: A :class:`~lamindb.Schema` object that defines the validation constraints.
-    Example:
+    Examples:
+        Curate Ensembl gene IDs and valid features in obs:
         .. literalinclude:: scripts/curate_anndata_flexible.py
             :language: python
             :caption: curate_anndata_flexible.py
+        Curate `uns` dictionary:
+        .. literalinclude:: scripts/curate_anndata_uns.py
+            :language: python
+            :caption: curate_anndata_uns.py
     See Also:
         :meth:`~lamindb.Artifact.from_anndata`.
     """
@@ -903,7 +911,7 @@ class AnnDataCurator(SlotsCurator):
         super().__init__(dataset=dataset, schema=schema)
         if not data_is_scversedatastructure(self._dataset, "AnnData"):
             raise InvalidArgument("dataset must be AnnData-like.")
-        if schema.otype != "AnnData":
+        if schema.otype and schema.otype != "AnnData":
             raise InvalidArgument("Schema otype must be 'AnnData'.")
         for slot, slot_schema in schema.slots.items():

lamindb/errors.py CHANGED Viewed

@@ -60,6 +60,12 @@ class DoesNotExist(Exception):
     pass
+class MultipleResultsFound(Exception):
+    """Multiple records found."""
+    pass
 class InconsistentKey(Exception):
     """Inconsistent transform or artifact `key`."""

lamindb/examples/cellxgene/_cellxgene.py CHANGED Viewed

@@ -28,7 +28,7 @@ FieldType = Literal["ontology_id", "name"]
 @deprecated(new_name="save_cellxgene_defaults")
 def save_cxg_defaults() -> None:
-    return save_cxg_defaults()
+    return save_cellxgene_defaults()
 def save_cellxgene_defaults() -> None:

lamindb/examples/croissant/__init__.py CHANGED Viewed

@@ -11,35 +11,51 @@ import json
 from pathlib import Path
-def mini_immuno(n_files: int = 1) -> list[Path]:
+def mini_immuno(
+    n_files: int = 1, filepath_prefix: str = "", strip_version: bool = False
+) -> list[Path]:
     """Return paths to the mini immuno dataset and its metadata as a Croissant file.
     Args:
         n_files: Number of files inside the croissant file. Default is 1.
+        filepath_prefix: Move the dataset and references to it in a specific directory.
     Example
         ::
             croissant_path, dataset1_path = ln.examples.croissant.mini_immuno()
+            croissant_path, dataset1_path, dataset2_path = ln.examples.croissant.mini_immuno(n_files=2)
     """
     from ..datasets import file_mini_csv
     from ..datasets.mini_immuno import get_dataset1
     adata = get_dataset1(otype="AnnData")
-    dataset1_path = Path("mini_immuno.anndata.zarr")
+    if filepath_prefix:
+        dataset1_path = Path(filepath_prefix) / "mini_immuno.anndata.zarr"
+    else:
+        dataset1_path = Path("mini_immuno.anndata.zarr")
     adata.write_zarr(dataset1_path)
     orig_croissant_path = (
         Path(__file__).parent / "mini_immuno.anndata.zarr_metadata.json"
     )
     with open(orig_croissant_path, encoding="utf-8") as f:
         data = json.load(f)
+    if filepath_prefix:
+        assert data["distribution"][0]["@id"] == "mini_immuno.anndata.zarr"  # noqa: S101
+        data["distribution"][0]["@id"] = str(Path(filepath_prefix) / dataset1_path.name)
+    if strip_version:
+        data.pop("version", None)
     if n_files == 2:
-        dataset2_path = file_mini_csv()
+        file_mini_csv()
+        if filepath_prefix:
+            dataset2_path = Path(filepath_prefix) / "mini.csv"
+        else:
+            dataset2_path = Path("mini.csv")
         data["distribution"].append(
             {
                 "@type": "sc:FileObject",
-                "@id": "mini.csv",
+                "@id": dataset2_path.as_posix(),
                 "name": "mini.csv",
                 "encodingFormat": "text/csv",
             }

lamindb/examples/datasets/_core.py CHANGED Viewed

@@ -353,7 +353,7 @@ def anndata_suo22_Visium10X():  # pragma: no cover
     return ad.read_h5ad(filepath)
-def mudata_papalexi21_subset() -> MuData:  # pragma: no cover
+def mudata_papalexi21_subset(with_uns: bool = False) -> MuData:  # pragma: no cover
     """A subsetted mudata from papalexi21.
     To reproduce the subsetting:
@@ -415,6 +415,13 @@ def mudata_papalexi21_subset() -> MuData:  # pragma: no cover
     mdata["hto"].obs["technique"] = mdata["hto"].obs["technique"].astype("category")
     mdata.pull_obs(["technique"], mods="hto")
+    if with_uns:
+        mdata.uns["study_metadata"] = {
+            "temperature": 21.6,
+            "experiment": "Experiment 1",
+        }
+        mdata["rna"].uns["site_metadata"] = {"pos": 99.9, "site_id": "SITE001"}
     return mdata

lamindb/examples/datasets/mini_immuno.py CHANGED Viewed

@@ -78,7 +78,6 @@ def get_dataset1(
     with_outdated_gene: bool = False,
     with_wrong_subtype: bool = False,
     with_index_type_mismatch: bool = False,
-    with_nested_uns: bool = False,
 ) -> pd.DataFrame | ad.AnnData:
     """A small tabular dataset measuring expression & metadata."""
     # define the data in the dataset

lamindb/examples/fixtures/sheets.py CHANGED Viewed

@@ -46,6 +46,8 @@ def populate_sheets_compound_treatment():
     # Samples ---------------------------
+    project = ln.Feature(name="project", dtype=ln.Project).save()
+    project1 = ln.Project(name="Project 1").save()
     sample_type = ln.Record(name="BioSample", is_type=True).save()
     treatment = ln.Feature(name="treatment", dtype=treatment_type).save()
     cell_line = ln.Feature(name="cell_line", dtype=bt.CellLine).save()
@@ -54,7 +56,7 @@ def populate_sheets_compound_treatment():
     cell_line.save()
     schema1 = ln.Schema(
         name="My samples schema 2025-06",
-        features=[treatment, cell_line, preparation_date],
+        features=[treatment, cell_line, preparation_date, project],
     ).save()
     sample_sheet1 = ln.Record(
         name="My samples 2025-06", schema=schema1, type=sample_type
@@ -69,6 +71,7 @@ def populate_sheets_compound_treatment():
     ln.models.RecordJson(
         record=sample1, feature=preparation_date, value="2025-06-01T05:00:00"
     ).save()
+    ln.models.RecordProject(record=sample1, feature=project, value=project1).save()
     # populate sample2
     sample2 = ln.Record(name="sample2", type=sample_sheet1).save()
     ln.models.RecordRecord(record=sample2, feature=treatment, value=treatment2).save()
@@ -76,12 +79,13 @@ def populate_sheets_compound_treatment():
     ln.models.RecordJson(
         record=sample2, feature=preparation_date, value="2025-06-01T06:00:00"
     ).save()
+    ln.models.RecordProject(record=sample2, feature=project, value=project1).save()
     # another sheet for samples
     sample_note = ln.Feature(name="sample_note", dtype="str").save()
     schema2 = ln.Schema(
         name="My samples schema 2025-07",
-        features=[treatment, cell_line, sample_note],
+        features=[treatment, cell_line, sample_note, project],
     ).save()
     # the sheet
     sample_sheet2 = ln.Record(
@@ -94,6 +98,7 @@ def populate_sheets_compound_treatment():
     ln.models.RecordJson(
         record=sample3, feature=preparation_date, value="2025-06-02T05:00:00Z"
     ).save()
+    ln.models.RecordProject(record=sample3, feature=project, value=project1).save()
     # populate sample4
     sample4 = ln.Record(type=sample_sheet2).save()
     ln.models.RecordRecord(record=sample4, feature=treatment, value=treatment2).save()
@@ -101,6 +106,7 @@ def populate_sheets_compound_treatment():
     ln.models.RecordJson(
         record=sample4, feature=preparation_date, value="2025-06-02T06:00:00Z"
     ).save()
+    ln.models.RecordProject(record=sample4, feature=project, value=project1).save()
     yield treatments_sheet, sample_sheet1

lamindb/integrations/_croissant.py CHANGED Viewed

@@ -4,6 +4,10 @@ import json
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
+import lamindb_setup as ln_setup
+from lamin_utils import logger
+from lamindb_setup.core.upath import UPath
 if TYPE_CHECKING:
     import lamindb as ln
@@ -27,6 +31,8 @@ def curate_from_croissant(
     """
     import lamindb as ln
+    from ..models.artifact import check_path_in_existing_storage
     # Load CroissantML data
     if isinstance(croissant_data, (str, Path)):
         if not Path(croissant_data).exists():
@@ -49,10 +55,10 @@ def curate_from_croissant(
     # Extract basic metadata
     dataset_name = data["name"]
-    description = data.get("description", "")
-    version = data.get("version", "1.0")
-    license_info = data.get("license", "")
-    project_name = data.get("cr:projectName", "")
+    description = data.get("description", None)
+    version = data.get("version", None)
+    license_info = data.get("license", None)
+    project_name = data.get("cr:projectName", None)
     # Create license feature and label if license info exists
     license_label = None
@@ -86,18 +92,35 @@ def curate_from_croissant(
             content_url = dist.get("contentUrl", "")
             file_path = content_url or data.get("url", "")
         if not file_path:
-            raise ValueError(
-                f"No valid file path found in croissant distribution: {dist}"
+            raise ValueError(f"No file path found in croissant distribution: {dist}")
+        if not UPath(file_path).exists():
+            raise ValueError(f"Inferred file path does not exist: {file_path}")
+        result = check_path_in_existing_storage(
+            file_path, check_hub_register_storage=ln_setup.settings.instance.is_on_hub
+        )
+        if isinstance(result, ln.Storage):
+            key = None  # will automatically use existing storage key
+        else:
+            current_storage_location = (
+                ln.settings.storage
+                if not ln.setup.settings.instance.keep_artifacts_local
+                else ln.settings.local_storage
+            )
+            logger.warning(
+                f"file path {file_path} is not part of a known storage location, will be duplicated to: {current_storage_location}"
             )
+            key = file_id
         if len(file_distributions) == 1:
-            artifact_description = f"{dataset_name}"
-            if file_id != dataset_name:
-                artifact_description += f" ({file_id})"
-            artifact_description += f" - {description}"
+            # it doesn't make sense to have the dataset name on the individual
+            # artifact if it's part of a collection
+            artifact_description = dataset_name
+            if description is not None:
+                artifact_description += f" - {description}"
         else:
-            artifact_description = f"{file_id}"
+            artifact_description = None
         artifact = ln.Artifact(  # type: ignore
             file_path,
+            key=key,
             description=artifact_description,
             version=version,
             kind="dataset",

lamindb/migrations/0121_recorduser.py CHANGED Viewed

@@ -50,4 +50,11 @@ class Migration(migrations.Migration):
             },
             bases=(models.Model, lamindb.models.sqlrecord.IsLink),
         ),
+        migrations.AddField(
+            model_name="record",
+            name="linked_users",
+            field=models.ManyToManyField(
+                related_name="records", through="lamindb.RecordUser", to="lamindb.user"
+            ),
+        ),
     ]

lamindb/models/__init__.py CHANGED Viewed

@@ -96,6 +96,7 @@ from .record import (
     RecordRecord,
     RecordULabel,
     RecordRun,
+    RecordUser,
     RecordArtifact,
     ArtifactRecord,
 )

lamindb/models/_feature_manager.py CHANGED Viewed

@@ -23,7 +23,7 @@ from rich.table import Column, Table
 from rich.text import Text
 from lamindb.core.storage import LocalPathClasses
-from lamindb.errors import DoesNotExist, ValidationError
+from lamindb.errors import DoesNotExist, InvalidArgument, ValidationError
 from lamindb.models._from_values import _format_values
 from lamindb.models.feature import (
     serialize_pandas_dtype,
@@ -33,7 +33,6 @@ from lamindb.models.save import save
 from lamindb.models.schema import DICT_KEYS_TYPE, Schema
 from lamindb.models.sqlrecord import (
     REGISTRY_UNIQUE_FIELD,
-    Registry,
     get_name_field,
     transfer_fk_to_default_db_bulk,
     transfer_to_default_db,
@@ -65,7 +64,7 @@ if TYPE_CHECKING:
         Collection,
         IsLink,
     )
-    from lamindb.models.query_set import QuerySet
+    from lamindb.models.query_set import BasicQuerySet
     from .run import Run
@@ -100,7 +99,7 @@ def get_schema_by_slot_(host: Artifact) -> dict[str, Schema]:
 def get_label_links(
     host: Artifact | Collection, registry: str, feature: Feature
-) -> QuerySet:
+) -> BasicQuerySet:
     kwargs = {"artifact_id": host.id, "feature_id": feature.id}
     link_records = (
         getattr(host, host.features._accessor_by_registry[registry])  # type: ignore
@@ -110,7 +109,7 @@ def get_label_links(
     return link_records
-def get_schema_links(host: Artifact | Collection) -> QuerySet:
+def get_schema_links(host: Artifact | Collection) -> BasicQuerySet:
     kwargs = {"artifact_id": host.id}
     links_schema = host.feature_sets.through.objects.filter(**kwargs)
     return links_schema
@@ -562,21 +561,29 @@ def infer_feature_type_convert_json(
 def filter_base(
-    registry: Registry, _skip_validation: bool = True, **expression
-) -> QuerySet:
-    from .artifact import Artifact
+    queryset: BasicQuerySet,
+    _skip_validation: bool = True,
+    **expression,
+) -> BasicQuerySet:
+    from lamindb.models import Artifact, BasicQuerySet, QuerySet
+    # not QuerySet but only BasicQuerySet
+    assert isinstance(queryset, BasicQuerySet) and not isinstance(queryset, QuerySet)  # noqa: S101
+    registry = queryset.model
+    db = queryset.db
     model = Feature
     value_model = FeatureValue
     keys_normalized = [key.split("__")[0] for key in expression]
     if not _skip_validation:
-        validated = model.validate(keys_normalized, field="name", mute=True)
+        validated = model.using(db).validate(keys_normalized, field="name", mute=True)
         if sum(validated) != len(keys_normalized):
             raise ValidationError(
                 f"Some keys in the filter expression are not registered as features: {np.array(keys_normalized)[~validated]}"
             )
     new_expression = {}
-    features = model.filter(name__in=keys_normalized).all().distinct()
+    features = model.using(db).filter(name__in=keys_normalized).all().distinct()
     feature_param = "feature"
     for key, value in expression.items():
         split_key = key.split("__")
@@ -594,7 +601,7 @@ def filter_base(
                     from .artifact import ArtifactFeatureValue
                     if value:  # True
-                        return Artifact.objects.exclude(
+                        return queryset.exclude(
                             id__in=Subquery(
                                 ArtifactFeatureValue.objects.filter(
                                     featurevalue__feature=feature
@@ -602,7 +609,7 @@ def filter_base(
                             )
                         )
                     else:
-                        return Artifact.objects.exclude(
+                        return queryset.exclude(
                             id__in=Subquery(
                                 ArtifactFeatureValue.objects.filter(
                                     featurevalue__feature=feature
@@ -626,9 +633,9 @@ def filter_base(
                         f"links_{result['registry'].__name__.lower()}__feature": feature
                     }
                     if value:  # True
-                        return Artifact.objects.exclude(**kwargs)
+                        return queryset.exclude(**kwargs)
                     else:
-                        return Artifact.objects.filter(**kwargs)
+                        return queryset.filter(**kwargs)
             else:
                 # because SQL is sensitive to whether querying with __in or not
                 # and might return multiple equivalent records for the latter
@@ -642,7 +649,7 @@ def filter_base(
                     # we need the comparator here because users might query like so
                     # ln.Artifact.filter(experiment__contains="Experi")
                     expression = {f"{field_name}{comparator}": value}
-                    labels = result["registry"].filter(**expression).all()
+                    labels = result["registry"].using(db).filter(**expression).all()
                     if len(labels) == 0:
                         raise DoesNotExist(
                             f"Did not find a {label_registry.__name__} matching `{field_name}{comparator}={value}`"
@@ -668,9 +675,62 @@ def filter_base(
             # find artifacts that are annotated by all of them at the same
             # time; hence, we don't want the __in construct that we use to match strings
             # https://laminlabs.slack.com/archives/C04FPE8V01W/p1688328084810609
-    if not (new_expression):
+    if not new_expression:
         raise NotImplementedError
-    return registry.objects.filter(**new_expression)
+    return queryset.filter(**new_expression)
+def filter_with_features(
+    queryset: BasicQuerySet, *queries, **expressions
+) -> BasicQuerySet:
+    from lamindb.models import Artifact, BasicQuerySet, QuerySet
+    if isinstance(queryset, QuerySet):
+        # need to avoid infinite recursion because
+        # filter_with_features is called in queryset.filter otherwise
+        filter_kwargs = {"_skip_filter_with_features": True}
+    else:
+        filter_kwargs = {}
+    registry = queryset.model
+    if registry is Artifact and not any(e.startswith("kind") for e in expressions):
+        exclude_kwargs = {"kind": "__lamindb_run__"}
+    else:
+        exclude_kwargs = {}
+    if expressions:
+        keys_normalized = [key.split("__")[0] for key in expressions]
+        field_or_feature_or_param = keys_normalized[0].split("__")[0]
+        if field_or_feature_or_param in registry.__get_available_fields__():
+            qs = queryset.filter(*queries, **expressions, **filter_kwargs)
+        elif all(
+            features_validated := Feature.objects.using(queryset.db).validate(
+                keys_normalized, field="name", mute=True
+            )
+        ):
+            # filter_base requires qs to be BasicQuerySet
+            qs = filter_base(
+                queryset._to_class(BasicQuerySet, copy=True),
+                _skip_validation=True,
+                **expressions,
+            )._to_class(type(queryset), copy=False)
+            qs = qs.filter(*queries, **filter_kwargs)
+        else:
+            features = ", ".join(sorted(np.array(keys_normalized)[~features_validated]))
+            message = f"feature names: {features}"
+            avail_fields = registry.__get_available_fields__()
+            if "_branch_code" in avail_fields:
+                avail_fields.remove("_branch_code")  # backward compat
+            fields = ", ".join(sorted(avail_fields))
+            raise InvalidArgument(
+                f"You can query either by available fields: {fields}\n"
+                f"Or fix invalid {message}"
+            )
+    else:
+        qs = queryset.filter(*queries, **filter_kwargs)
+    return qs.exclude(**exclude_kwargs) if exclude_kwargs else qs
 # for deprecated functionality
@@ -765,7 +825,7 @@ class FeatureManager:
         return describe_features(self._host, to_dict=True)  # type: ignore
     @deprecated("slots[slot].members")
-    def __getitem__(self, slot) -> QuerySet:
+    def __getitem__(self, slot) -> BasicQuerySet:
         if slot not in self.slots:
             raise ValueError(
                 f"No linked feature set for slot: {slot}\nDid you get validation"

lamindb 1.11a1__py3-none-any.whl → 1.11.2__py3-none-any.whl

lamindb 1.11a1py3-none-any.whl → 1.11.2py3-none-any.whl