PyPI - lamindb - Versions diffs - 0.76.9__py3-none-any.whl → 0.76.11__py3-none-any.whl - Mend

lamindb 0.76.9py3-none-any.whl → 0.76.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

lamindb/__init__.py +15 -14
lamindb/_artifact.py +2 -1
lamindb/_can_validate.py +46 -4
lamindb/_collection.py +2 -1
lamindb/_curate.py +3 -1
lamindb/_feature_set.py +3 -1
lamindb/_finish.py +19 -18
lamindb/_from_values.py +110 -89
lamindb/_query_set.py +3 -1
lamindb/_record.py +81 -62
lamindb/_run.py +3 -0
lamindb/_save.py +3 -1
lamindb/_transform.py +9 -6
lamindb/core/_context.py +94 -78
lamindb/core/_data.py +113 -41
lamindb/core/_django.py +209 -0
lamindb/core/_feature_manager.py +140 -13
lamindb/core/_label_manager.py +58 -23
lamindb/core/_mapped_collection.py +1 -1
lamindb/core/_settings.py +2 -1
lamindb/core/exceptions.py +9 -9
lamindb/core/storage/_anndata_accessor.py +2 -1
lamindb/core/versioning.py +2 -14
{lamindb-0.76.9.dist-info → lamindb-0.76.11.dist-info}/METADATA +8 -8
{lamindb-0.76.9.dist-info → lamindb-0.76.11.dist-info}/RECORD +27 -26
{lamindb-0.76.9.dist-info → lamindb-0.76.11.dist-info}/LICENSE +0 -0
{lamindb-0.76.9.dist-info → lamindb-0.76.11.dist-info}/WHEEL +0 -0

lamindb/__init__.py CHANGED Viewed

@@ -21,7 +21,8 @@ Key functionality.
 .. autosummary::
    :toctree: .
-   context
+   track
+   finish
    connect
    Curator
    view
@@ -33,6 +34,7 @@ Modules and settings.
    :toctree: .
    integrations
+   context
    settings
    setup
    UPath
@@ -41,7 +43,7 @@ Modules and settings.
 """
 # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
-__version__ = "0.76.9"
+__version__ = "0.76.11"
 import os as _os
@@ -58,7 +60,7 @@ def __getattr__(name):
     raise _InstanceNotSetupError()
-if _check_instance_setup(from_lamindb=True):
+if _check_instance_setup(from_module="lnschema_core"):
     del _InstanceNotSetupError
     del __getattr__  # delete so that imports work out
     from lnschema_core.models import (
@@ -97,17 +99,16 @@ if _check_instance_setup(from_lamindb=True):
     from .core._context import context
     from .core._settings import settings
-    # schema modules
-    if not _os.environ.get("LAMINDB_MULTI_INSTANCE") == "true":
-        from lamindb_setup._init_instance import (
-            reload_schema_modules as _reload_schema_modules,
-        )
-        _reload_schema_modules(_lamindb_setup.settings.instance)
-    track = context.track  # backward compat
-    finish = context.finish  # backward compat
+    track = context.track  # simple access because these are so common
+    finish = context.finish  # simple access because these are so common
     Curate = Curator  # backward compat
     settings.__doc__ = """Global settings (:class:`~lamindb.core.Settings`)."""
-    context.__doc__ = """Global run context (:class:`~lamindb.core.Context`)."""
+    context.__doc__ = """Global run context (:class:`~lamindb.core.Context`).
+    Note that you can access:
+    - `ln.context.track()` as `ln.track()`
+    - `ln.context.finish()` as `ln.finish()`
+    """
     from django.db.models import Q

lamindb/_artifact.py CHANGED Viewed

@@ -2,8 +2,9 @@ from __future__ import annotations
 import os
 import shutil
+from collections.abc import Mapping
 from pathlib import Path, PurePath, PurePosixPath
-from typing import TYPE_CHECKING, Any, Mapping
+from typing import TYPE_CHECKING, Any
 import fsspec
 import lamindb_setup as ln_setup

lamindb/_can_validate.py CHANGED Viewed

@@ -12,7 +12,7 @@ from lnschema_core import CanValidate, Record
 from lamindb._utils import attach_func_to_class_method
-from ._from_values import _has_organism_field, _print_values
+from ._from_values import _has_organism_field, _print_values, get_or_create_records
 from ._record import _queryset, get_name_field
 if TYPE_CHECKING:
@@ -21,6 +21,33 @@ if TYPE_CHECKING:
     from lnschema_core.types import ListLike, StrField
+# from_values doesn't apply for QuerySet or Manager
+@classmethod  # type:ignore
+@doc_args(CanValidate.from_values.__doc__)
+def from_values(
+    cls,
+    values: ListLike,
+    field: StrField | None = None,
+    create: bool = False,
+    organism: Record | str | None = None,
+    source: Record | None = None,
+    mute: bool = False,
+) -> list[Record]:
+    """{}"""  # noqa: D415
+    from_source = True if cls.__module__.startswith("bionty.") else False
+    field_str = get_name_field(cls, field=field)
+    return get_or_create_records(
+        iterable=values,
+        field=getattr(cls, field_str),
+        create=create,
+        from_source=from_source,
+        organism=organism,
+        source=source,
+        mute=mute,
+    )
 @classmethod  # type: ignore
 @doc_args(CanValidate.inspect.__doc__)
 def inspect(
@@ -349,16 +376,24 @@ def _standardize(
         from bionty._bionty import create_or_get_organism_record
         organism_record = create_or_get_organism_record(
-            organism=organism, registry=registry
+            organism=organism, registry=registry, field=field
         )
         organism = (
             organism_record.name if organism_record is not None else organism_record
         )
+    # only perform synonym mapping if field is the name field
+    if hasattr(registry, "_name_field") and field != registry._name_field:
+        synonyms_field = None
     try:
         registry._meta.get_field(synonyms_field)
+        fields = {i for i in [field, return_field, synonyms_field] if i is not None}
         df = _filter_query_based_on_organism(
-            queryset=queryset, field=field, organism=organism
+            queryset=queryset,
+            field=field,
+            organism=organism,
+            fields=list(fields),
         )
     except FieldDoesNotExist:
         df = pd.DataFrame()
@@ -525,6 +560,7 @@ def _filter_query_based_on_organism(
     field: str,
     organism: str | Record | None = None,
     values_list_field: str | None = None,
+    fields: list[str] | None = None,
 ):
     """Filter a queryset based on organism."""
     import pandas as pd
@@ -536,13 +572,18 @@ def _filter_query_based_on_organism(
         from bionty._bionty import create_or_get_organism_record
         organism_record = create_or_get_organism_record(
-            organism=organism, registry=registry
+            organism=organism, registry=registry, field=field
         )
         if organism_record is not None:
             queryset = queryset.filter(organism__name=organism_record.name)
     if values_list_field is None:
+        if fields:
+            return pd.DataFrame.from_records(
+                queryset.values_list(*fields), columns=fields
+            )
         return pd.DataFrame.from_records(queryset.values())
     else:
         return queryset.values_list(values_list_field, flat=True)
@@ -564,6 +605,7 @@ METHOD_NAMES = [
     "add_synonym",
     "remove_synonym",
     "set_abbr",
+    "from_values",
 ]
 if ln_setup._TESTING:  # type: ignore

lamindb/_collection.py CHANGED Viewed

@@ -4,7 +4,6 @@ from collections import defaultdict
 from typing import (
     TYPE_CHECKING,
     Any,
-    Iterable,
     Literal,
 )
@@ -37,6 +36,8 @@ from .core._data import (
 from .core._settings import settings
 if TYPE_CHECKING:
+    from collections.abc import Iterable
     from lamindb.core.storage import UPath
     from ._query_set import QuerySet

lamindb/_curate.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations
 import copy
-from typing import TYPE_CHECKING, Iterable
+from typing import TYPE_CHECKING
 import anndata as ad
 import lamindb_setup as ln_setup
@@ -19,6 +19,8 @@ from lnschema_core import (
 from .core.exceptions import ValidationError
 if TYPE_CHECKING:
+    from collections.abc import Iterable
     from lamindb_setup.core.types import UPathStr
     from lnschema_core.types import FieldAttr
     from mudata import MuData

lamindb/_feature_set.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from __future__ import annotations
-from typing import TYPE_CHECKING, Iterable, Type
+from typing import TYPE_CHECKING
 import lamindb_setup as ln_setup
 import numpy as np
@@ -21,6 +21,8 @@ from .core.schema import (
 )
 if TYPE_CHECKING:
+    from collections.abc import Iterable
     import pandas as pd
     from ._query_set import QuerySet

lamindb/_finish.py CHANGED Viewed

@@ -1,8 +1,6 @@
 from __future__ import annotations
-import os
 import re
-import shutil
 from datetime import datetime, timezone
 from typing import TYPE_CHECKING
@@ -37,6 +35,13 @@ def prepare_notebook(
                     if strip_title:
                         lines.pop(i)
                         cell["source"] = "\n".join(lines)
+        # strip resaved finish error if present
+        # this is normally the last cell
+        if cell["cell_type"] == "code" and ".finish(" in cell["source"]:
+            for output in cell["outputs"]:
+                if output.get("ename", None) == "NotebookNotSaved":
+                    cell["outputs"] = []
+                    break
     return None
@@ -77,17 +82,6 @@ def notebook_to_script(
     script_path.write_text(py_content)
-def script_to_notebook(transform: Transform, notebook_path: Path) -> None:
-    import jupytext
-    # get title back
-    py_content = transform.source_code.replace(
-        "# # transform.name", f"# # {transform.name}"
-    )
-    notebook = jupytext.reads(py_content, fmt="py:percent")
-    jupytext.write(notebook, notebook_path)
 def save_context_core(
     *,
     run: Run,
@@ -97,6 +91,10 @@ def save_context_core(
     ignore_non_consecutive: bool | None = None,
     from_cli: bool = False,
 ) -> str | None:
+    from lnschema_core.models import (
+        format_field_value,  # needs to come after lamindb was imported because of CLI use
+    )
     import lamindb as ln
     from .core._context import context, is_run_from_ipython
@@ -121,7 +119,7 @@ def save_context_core(
         notebook_content = read_notebook(filepath)  # type: ignore
         if not ignore_non_consecutive:  # ignore_non_consecutive is None or False
             is_consecutive = check_consecutiveness(
-                notebook_content, calling_statement=".finish()"
+                notebook_content, calling_statement=".finish("
             )
             if not is_consecutive:
                 response = "n"  # ignore_non_consecutive == False
@@ -158,15 +156,13 @@ def save_context_core(
         if hash != ref_hash:
             response = input(
                 f"You are about to overwrite existing source code (hash '{ref_hash}') for Transform('{transform.uid}')."
-                f"Proceed? (y/n)"
+                f" Proceed? (y/n)"
             )
             if response == "y":
                 transform.source_code = source_code_path.read_text()
                 transform.hash = hash
             else:
-                logger.warning(
-                    "Please re-run `ln.context.track()` to make a new version"
-                )
+                logger.warning("Please re-run `ln.track()` to make a new version")
                 return "rerun-the-notebook"
         else:
             logger.important("source code is already saved")
@@ -236,6 +232,11 @@ def save_context_core(
     transform.save()
     # finalize
+    if not from_cli:
+        run_time = run.finished_at - run.started_at
+        logger.important(
+            f"finished Run('{run.uid[:8]}') after {run_time} at {format_field_value(run.finished_at)}"
+        )
     if ln_setup.settings.instance.is_on_hub:
         identifier = ln_setup.settings.instance.slug
         logger.important(

lamindb/_from_values.py CHANGED Viewed

@@ -1,15 +1,17 @@
 from __future__ import annotations
-from typing import TYPE_CHECKING, Iterable
+from typing import TYPE_CHECKING
 import pandas as pd
 from django.core.exceptions import FieldDoesNotExist
 from lamin_utils import colors, logger
-from lnschema_core.models import Feature, Record, ULabel
+from lnschema_core.models import Feature, Field, Record, ULabel
 from .core._settings import settings
 if TYPE_CHECKING:
+    from collections.abc import Iterable
     from lnschema_core.types import ListLike, StrField
@@ -29,20 +31,17 @@ def get_or_create_records(
     if create:
         return [registry(**{field.field.name: value}) for value in iterable]
     creation_search_names = settings.creation.search_names
-    feature: Feature = None
     organism = _get_organism_record(field, organism)
-    kwargs: dict = {}
-    if organism is not None:
-        kwargs["organism"] = organism
-    if source is not None:
-        kwargs["source"] = source
     settings.creation.search_names = False
     try:
         iterable_idx = index_iterable(iterable)
         # returns existing records & non-existing values
         records, nonexist_values, msg = get_existing_records(
-            iterable_idx=iterable_idx, field=field, mute=mute, **kwargs
+            iterable_idx=iterable_idx,
+            field=field,
+            organism=organism,
+            mute=mute,
         )
         # new records to be created based on new values
@@ -58,11 +57,10 @@ def get_or_create_records(
                 ):
                     source_record = records[0].source
             if not source_record and hasattr(registry, "public"):
-                from bionty._bionty import get_source_record
+                if organism is None:
+                    organism = _ensembl_prefix(nonexist_values[0], field, organism)
+                    organism = _get_organism_record(field, organism, force=True)
-                source_record = get_source_record(
-                    registry.public(organism=organism), registry
-                )
             if source_record:
                 from bionty.core._add_ontology import check_source_in_db
@@ -82,9 +80,10 @@ def get_or_create_records(
                 records_bionty, unmapped_values = create_records_from_source(
                     iterable_idx=nonexist_values,
                     field=field,
+                    organism=organism,
+                    source=source_record,
                     msg=msg,
                     mute=mute,
-                    **kwargs,
                 )
                 if len(records_bionty) > 0:
                     msg = ""
@@ -106,17 +105,17 @@ def get_or_create_records(
                         f"{colors.red('did not create')} {name} record{s} for "
                         f"{n_nonval} {colors.italic(f'{field.field.name}{s}')}: {print_values}"
                     )
-        if registry.__get_schema_name__() == "bionty" or registry == ULabel:
-            if isinstance(iterable, pd.Series):
-                feature = iterable.name
-            feature_name = None
-            if isinstance(feature, str):
-                feature_name = feature
-            if feature_name is not None:
-                if feature_name is not None:
-                    for record in records:
-                        record._feature = feature_name
-                logger.debug(f"added default feature '{feature_name}'")
+        # if registry.__get_schema_name__() == "bionty" or registry == ULabel:
+        #     if isinstance(iterable, pd.Series):
+        #         feature = iterable.name
+        #     feature_name = None
+        #     if isinstance(feature, str):
+        #         feature_name = feature
+        #     if feature_name is not None:
+        #         if feature_name is not None:
+        #             for record in records:
+        #                 record._feature = feature_name
+        #         logger.debug(f"added default feature '{feature_name}'")
         return records
     finally:
         settings.creation.search_names = creation_search_names
@@ -125,46 +124,27 @@ def get_or_create_records(
 def get_existing_records(
     iterable_idx: pd.Index,
     field: StrField,
+    organism: Record | None = None,
     mute: bool = False,
-    **kwargs,
 ):
+    # NOTE: existing records matching is agnostic to the source
     model = field.field.model
-    condition: dict = {} if len(kwargs) == 0 else kwargs.copy()
-    # existing records matching is agnostic to the bionty source
-    if "source" in condition:
-        condition.pop("source")
+    if organism is None and field.field.name == "ensembl_gene_id":
+        if len(iterable_idx) > 0:
+            organism = _ensembl_prefix(iterable_idx[0], field, organism)
+            organism = _get_organism_record(field, organism, force=True)
     # standardize based on the DB reference
     # log synonyms mapped terms
-    result = model.inspect(
+    syn_mapper = model.standardize(
         iterable_idx,
         field=field,
-        organism=kwargs.get("organism"),
-        source=kwargs.get("source"),
+        organism=organism,
         mute=True,
+        public_aware=False,
+        return_mapper=True,
     )
-    syn_mapper = result.synonyms_mapper
-    syn_msg = ""
-    if len(syn_mapper) > 0:
-        s = "" if len(syn_mapper) == 1 else "s"
-        names = list(syn_mapper.keys())
-        print_values = colors.green(_print_values(names))
-        syn_msg = (
-            "loaded"
-            f" {colors.green(f'{len(syn_mapper)} {model.__name__} record{s}')}"
-            f" matching {colors.italic('synonyms')}: {print_values}"
-        )
-        iterable_idx = iterable_idx.to_frame().rename(index=syn_mapper).index
-    # get all existing records in the db
-    # if necessary, create records for the values in kwargs
-    # k:v -> k:v_record
-    # kwargs is used to deal with organism
-    condition.update({f"{field.field.name}__in": iterable_idx.values})
-    query_set = model.filter(**condition)
-    records = query_set.list()
+    iterable_idx = iterable_idx.to_frame().rename(index=syn_mapper).index
     # now we have to sort the list of queried records
     # preserved = Case(
@@ -177,16 +157,33 @@ def get_existing_records(
     # records = query_set.order_by(preserved).list()
     # log validated terms
-    validated = result.validated
+    is_validated = model.validate(
+        iterable_idx, field=field, organism=organism, mute=True
+    )
+    if len(is_validated) > 0:
+        validated = iterable_idx[is_validated]
+    else:
+        validated = []
     msg = ""
-    if len(validated) > 0:
-        s = "" if len(validated) == 1 else "s"
-        print_values = colors.green(_print_values(validated))
-        msg = (
-            "loaded"
-            f" {colors.green(f'{len(validated)} {model.__name__} record{s}')}"
-            f" matching {colors.italic(f'{field.field.name}')}: {print_values}"
-        )
+    syn_msg = ""
+    if not mute:
+        if len(validated) > 0:
+            s = "" if len(validated) == 1 else "s"
+            print_values = colors.green(_print_values(validated))
+            msg = (
+                "loaded"
+                f" {colors.green(f'{len(validated)} {model.__name__} record{s}')}"
+                f" matching {colors.italic(f'{field.field.name}')}: {print_values}"
+            )
+        if len(syn_mapper) > 0:
+            s = "" if len(syn_mapper) == 1 else "s"
+            names = list(syn_mapper.keys())
+            print_values = colors.green(_print_values(names))
+            syn_msg = (
+                "loaded"
+                f" {colors.green(f'{len(syn_mapper)} {model.__name__} record{s}')}"
+                f" matching {colors.italic('synonyms')}: {print_values}"
+            )
     # no logging if all values are validated
     # logs if there are synonyms
@@ -197,20 +194,28 @@ def get_existing_records(
             logger.success(syn_msg)
         msg = ""
-    existing_values = iterable_idx.intersection(
-        query_set.values_list(field.field.name, flat=True)
-    )
-    nonexist_values = iterable_idx.difference(existing_values)
+    # get all existing records in the db
+    # if necessary, create records for the values in kwargs
+    # k:v -> k:v_record
+    query = {f"{field.field.name}__in": iterable_idx.values}
+    if organism is not None:
+        query["organism"] = organism
+    records = model.filter(**query).list()
-    return records, nonexist_values, msg
+    if len(validated) == len(iterable_idx):
+        return records, [], msg
+    else:
+        nonval_values = iterable_idx.difference(validated)
+        return records, nonval_values, msg
 def create_records_from_source(
     iterable_idx: pd.Index,
     field: StrField,
+    organism: Record | None = None,
+    source: Record | None = None,
     msg: str = "",
     mute: bool = False,
-    **kwargs,
 ):
     model = field.field.model
     records: list = []
@@ -221,19 +226,13 @@ def create_records_from_source(
     # create the corresponding bionty object from model
     try:
         # TODO: more generic
-        organism = kwargs.get("organism")
-        if field.field.name == "ensembl_gene_id":
-            if iterable_idx[0].startswith("ENSG"):
-                organism = "human"
-            elif iterable_idx[0].startswith("ENSMUSG"):
-                organism = "mouse"
-        public_ontology = model.public(organism=organism, source=kwargs.get("source"))
+        public_ontology = model.public(organism=organism, source=source)
     except Exception:
         # for custom records that are not created from public sources
         return records, iterable_idx
-    # add source record to the kwargs
-    source_record = get_source_record(public_ontology, model)
-    kwargs.update({"source": source_record})
+    # get the default source
+    if source is None:
+        source = get_source_record(public_ontology, model)
     # filter the columns in bionty df based on fields
     bionty_df = filter_bionty_df_columns(model=model, public_ontology=public_ontology)
@@ -264,15 +263,17 @@ def create_records_from_source(
         bionty_kwargs, multi_msg = _bulk_create_dicts_from_df(
             keys=mapped_values, column_name=field.field.name, df=bionty_df
         )
-        organism_kwargs = {}
-        if "organism" not in kwargs:
-            organism_record = _get_organism_record(
-                field, public_ontology.organism, force=True
-            )
-            if organism_record is not None:
-                organism_kwargs["organism"] = organism_record
+        if hasattr(model, "organism_id") and organism is None:
+            organism = _get_organism_record(field, source.organism, force=True)
+        create_kwargs = (
+            {"organism": organism, "source": source}
+            if organism is not None
+            else {"source": source}
+        )
         for bk in bionty_kwargs:
-            records.append(model(**bk, **kwargs, **organism_kwargs))
+            records.append(model(**bk, **create_kwargs))
         # number of records that matches field (not synonyms)
         validated = result.validated
@@ -364,6 +365,13 @@ def _has_organism_field(registry: type[Record]) -> bool:
 def _get_organism_record(
     field: StrField, organism: str | Record, force: bool = False
 ) -> Record:
+    """Get organism record.
+    Args:
+        field: the field to get the organism record for
+        organism: the organism to get the record for
+        force: whether to force fetching the organism record
+    """
     registry = field.field.model
     check = True
     if not force and hasattr(registry, "_ontology_id_field"):
@@ -375,8 +383,21 @@ def _get_organism_record(
     if _has_organism_field(registry) and check:
         from bionty._bionty import create_or_get_organism_record
+        if field and not isinstance(field, str):
+            field = field.field.name
         organism_record = create_or_get_organism_record(
-            organism=organism, registry=registry
+            organism=organism, registry=registry, field=field
         )
         if organism_record is not None:
             return organism_record
+def _ensembl_prefix(id: str, field: StrField, organism: Record | None) -> str | None:
+    if field.field.name == "ensembl_gene_id" and organism is None:
+        if id.startswith("ENSG"):
+            organism = "human"
+        elif id.startswith("ENSMUSG"):
+            organism = "mouse"
+    return organism

lamindb/_query_set.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations
 from collections import UserList
-from typing import TYPE_CHECKING, Iterable, NamedTuple
+from typing import TYPE_CHECKING, NamedTuple
 import pandas as pd
 from django.db import models
@@ -23,6 +23,8 @@ from lnschema_core.models import (
 from lamindb.core.exceptions import DoesNotExist
 if TYPE_CHECKING:
+    from collections.abc import Iterable
     from lnschema_core.types import ListLike, StrField

lamindb 0.76.9__py3-none-any.whl → 0.76.11__py3-none-any.whl

lamindb 0.76.9py3-none-any.whl → 0.76.11py3-none-any.whl