PyPI - lamindb - Versions diffs - 0.65.0__py3-none-any.whl → 0.66.0__py3-none-any.whl - Mend

lamindb 0.65.0py3-none-any.whl → 0.66.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

lamindb/__init__.py +1 -1
lamindb/_collection.py +31 -4
lamindb/_feature.py +3 -2
lamindb/_parents.py +16 -8
lamindb/_query_set.py +37 -19
lamindb/_registry.py +5 -1
lamindb/dev/__init__.py +11 -2
lamindb/dev/_data.py +5 -6
lamindb/dev/_label_manager.py +2 -1
lamindb/dev/_mapped_collection.py +109 -30
lamindb/dev/_run_context.py +7 -5
lamindb/dev/_track_environment.py +7 -3
{lamindb-0.65.0.dist-info → lamindb-0.66.0.dist-info}/METADATA +10 -10
{lamindb-0.65.0.dist-info → lamindb-0.66.0.dist-info}/RECORD +16 -16
{lamindb-0.65.0.dist-info → lamindb-0.66.0.dist-info}/LICENSE +0 -0
{lamindb-0.65.0.dist-info → lamindb-0.66.0.dist-info}/WHEEL +0 -0

lamindb/__init__.py CHANGED Viewed

@@ -54,7 +54,7 @@ Modules & settings:
 """
-__version__ = "0.65.0"  # denote a release candidate for 0.1.0 with 0.1rc1
+__version__ = "0.66.0"  # denote a release candidate for 0.1.0 with 0.1rc1
 import os as _os

lamindb/_collection.py CHANGED Viewed

@@ -5,7 +5,7 @@ import anndata as ad
 import pandas as pd
 from lamin_utils import logger
 from lamindb_setup.dev._docs import doc_args
-from lnschema_core.models import Collection, Feature, FeatureSet
+from lnschema_core.models import Collection, CollectionArtifact, Feature, FeatureSet
 from lnschema_core.types import AnnDataLike, DataLike, FieldAttr, VisibilityChoice
 from lamindb._utils import attach_func_to_class_method
@@ -15,6 +15,7 @@ from lamindb.dev.versioning import get_uid_from_old_version, init_uid
 from . import _TESTING, Artifact, Run
 from ._artifact import parse_feature_sets_from_anndata
+from ._query_set import QuerySet
 from ._registry import init_self_from_db
 from .dev._data import (
     add_transform_to_kwargs,
@@ -312,9 +313,11 @@ def from_artifacts(artifacts: Iterable[Artifact]) -> Tuple[str, Dict[str, str]]:
 def mapped(
     self,
     label_keys: Optional[Union[str, List[str]]] = None,
-    join_vars: Optional[Literal["auto", "inner"]] = "auto",
+    join: Optional[Literal["inner", "outer"]] = "inner",
     encode_labels: bool = True,
+    cache_categories: bool = True,
     parallel: bool = False,
+    dtype: Optional[str] = None,
     stream: bool = False,
     is_run_input: Optional[bool] = None,
 ) -> "MappedCollection":
@@ -328,7 +331,15 @@ def mapped(
             path_list.append(artifact.stage())
         else:
             path_list.append(artifact.path)
-    return MappedCollection(path_list, label_keys, join_vars, encode_labels, parallel)
+    return MappedCollection(
+        path_list,
+        label_keys,
+        join,
+        encode_labels,
+        cache_categories,
+        parallel,
+        dtype,
+    )
 # docstring handled through attach_func_to_class_method
@@ -416,7 +427,14 @@ def save(self, *args, **kwargs) -> None:
     super(Collection, self).save()
     if hasattr(self, "_artifacts"):
         if self._artifacts is not None and len(self._artifacts) > 0:
-            self.artifacts.set(self._artifacts)
+            links = [
+                CollectionArtifact(collection_id=self.id, artifact_id=artifact.id)
+                for artifact in self._artifacts
+            ]
+            # the below seems to preserve the order of the list in the
+            # auto-incrementing integer primary
+            # merely using .unordered_artifacts.set(*...) doesn't achieve this
+            CollectionArtifact.objects.bulk_create(links)
     save_feature_set_links(self)
@@ -429,6 +447,14 @@ def restore(self) -> None:
         self.artifact.save()
+@property  # type: ignore
+@doc_args(Collection.artifacts.__doc__)
+def artifacts(self) -> QuerySet:
+    """{}."""
+    _track_run_input(self)
+    return self.unordered_artifacts.order_by("collectionartifact__id")
 METHOD_NAMES = [
     "__init__",
     "from_anndata",
@@ -455,3 +481,4 @@ for name in METHOD_NAMES:
 # this seems a Django-generated function
 delattr(Collection, "get_visibility_display")
+Collection.artifacts = artifacts

lamindb/_feature.py CHANGED Viewed

@@ -9,6 +9,7 @@ from lamindb._utils import attach_func_to_class_method
 from lamindb.dev._settings import settings
 from . import _TESTING
+from ._query_set import RecordsList
 FEATURE_TYPES = {
     "int": "number",
@@ -86,7 +87,7 @@ def categoricals_from_df(df: "pd.DataFrame") -> Dict:
 @classmethod  # type:ignore
 @doc_args(Feature.from_df.__doc__)
-def from_df(cls, df: "pd.DataFrame") -> List["Feature"]:
+def from_df(cls, df: "pd.DataFrame") -> "RecordsList":
     """{}."""
     categoricals = categoricals_from_df(df)
@@ -141,7 +142,7 @@ def from_df(cls, df: "pd.DataFrame") -> List["Feature"]:
     #         f" {colors.yellow('unmapped categories')}:\n     "
     #         f" {categoricals_with_unmapped_categories_formatted}"
     #     )
-    return features
+    return RecordsList(features)
 @doc_args(Feature.save.__doc__)

lamindb/_parents.py CHANGED Viewed

@@ -275,10 +275,15 @@ def _record_label(record: Registry, field: Optional[str] = None):
         )
     elif isinstance(record, Run):
         name = f'{record.transform.name.replace("&", "&amp;")}'
+        user_display = (
+            record.created_by.handle
+            if record.created_by.name is None
+            else record.created_by.name
+        )
         return (
             rf'<{TRANSFORM_EMOJIS.get(str(record.transform.type), "💫")} {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
             rf' FACE="Monospace">uid={record.transform.uid}<BR/>type={record.transform.type},'
-            rf" user={record.created_by.name}<BR/>run_at={format_field_value(record.run_at)}</FONT>>"
+            rf" user={user_display}<BR/>run={format_field_value(record.run_at)}</FONT>>"
         )
     elif isinstance(record, Transform):
         name = f'{record.name.replace("&", "&amp;")}'
@@ -317,13 +322,13 @@ def _get_all_parent_runs(data: Union[Artifact, Collection]) -> List:
             inputs_run = (
                 r.__getattribute__(f"input_{name}s").all().filter(visibility=1).list()
             )
-            if name == "file":
+            if name == "artifact":
                 inputs_run += r.input_collections.all().filter(visibility=1).list()
             run_inputs_outputs += [(inputs_run, r)]
             outputs_run = (
                 r.__getattribute__(f"output_{name}s").all().filter(visibility=1).list()
             )
-            if name == "file":
+            if name == "artifact":
                 outputs_run += r.output_collections.all().filter(visibility=1).list()
             run_inputs_outputs += [(r, outputs_run)]
             inputs += inputs_run
@@ -337,8 +342,11 @@ def _get_all_child_runs(data: Union[Artifact, Collection]) -> List:
     all_runs: Set[Run] = set()
     run_inputs_outputs = []
-    runs = {f.run for f in data.run.__getattribute__(f"output_{name}s").all()}
-    if name == "file":
+    if data.run is not None:
+        runs = {f.run for f in data.run.__getattribute__(f"output_{name}s").all()}
+    else:
+        runs = set()
+    if name == "artifact" and data.run is not None:
         runs.update(
             {
                 f.run
@@ -352,13 +360,13 @@ def _get_all_child_runs(data: Union[Artifact, Collection]) -> List:
             inputs_run = (
                 r.__getattribute__(f"input_{name}s").all().filter(visibility=1).list()
             )
-            if name == "file":
+            if name == "artifact":
                 inputs_run += r.input_collections.all().filter(visibility=1).list()
             run_inputs_outputs += [(inputs_run, r)]
             outputs_run = (
                 r.__getattribute__(f"output_{name}s").all().filter(visibility=1).list()
             )
-            if name == "file":
+            if name == "artifact":
                 outputs_run += r.output_collections.all().filter(visibility=1).list()
             run_inputs_outputs += [(r, outputs_run)]
             child_runs.update(
@@ -366,7 +374,7 @@ def _get_all_child_runs(data: Union[Artifact, Collection]) -> List:
                     **{f"input_{name}s__id__in": [i.id for i in outputs_run]}
                 ).list()
             )
-            if name == "file":
+            if name == "artifact":
                 child_runs.update(
                     Run.filter(
                         input_collections__id__in=[i.id for i in outputs_run]

lamindb/_query_set.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from typing import Iterable, List, NamedTuple, Optional, Union
+from collections import UserList
+from typing import Dict, Iterable, List, NamedTuple, Optional, Union
 import pandas as pd
 from django.db import models
@@ -21,6 +22,40 @@ class MultipleResultsFound(Exception):
 #     return (series + timedelta).dt.strftime("%Y-%m-%d %H:%M:%S %Z")
+def get_keys_from_df(data: List, registry: Registry) -> List[str]:
+    if len(data) > 0:
+        if isinstance(data[0], dict):
+            keys = list(data[0].keys())
+        else:
+            keys = list(data[0].__dict__.keys())
+            if "_state" in keys:
+                keys.remove("_state")
+    else:
+        keys = [
+            field.name
+            for field in registry._meta.fields
+            if not isinstance(field, models.ForeignKey)
+        ]
+        keys += [
+            f"{field.name}_id"
+            for field in registry._meta.fields
+            if isinstance(field, models.ForeignKey)
+        ]
+    return keys
+class RecordsList(UserList):
+    """Is ordered, can't be queried, but has `.df()`."""
+    def __init__(self, records: List[Registry]):
+        super().__init__(record for record in records)
+    def df(self) -> pd.DataFrame:
+        keys = get_keys_from_df(self.data, self.data[0].__class__)
+        values = [record.__dict__ for record in self.data]
+        return pd.DataFrame(values, columns=keys)
 class QuerySet(models.QuerySet, CanValidate, IsTree):
     """Lazily loaded queried records returned by queries.
@@ -59,24 +94,7 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
             >>> ln.ULabel.filter().df(include=["labels__name", "labels__created_by_id"])
         """
         data = self.values()
-        if len(data) > 0:
-            keys = list(data[0].keys())
-            if "created_at" in keys:
-                keys.remove("created_at")
-        else:
-            keys = [
-                field.name
-                for field in self.model._meta.fields
-                if (
-                    not isinstance(field, models.ForeignKey)
-                    and field.name != "created_at"
-                )
-            ]
-            keys += [
-                f"{field.name}_id"
-                for field in self.model._meta.fields
-                if isinstance(field, models.ForeignKey)
-            ]
+        keys = get_keys_from_df(data, self.model)
         df = pd.DataFrame(self.values(), columns=keys)
         # if len(df) > 0 and "updated_at" in df:
         #     df.updated_at = format_and_convert_to_local_time(df.updated_at)

lamindb/_registry.py CHANGED Viewed

@@ -469,7 +469,11 @@ def save(self, *args, **kwargs) -> None:
     if result is not None:
         init_self_from_db(self, result)
     else:
-        super(Registry, self).save(*args, **kwargs)
+        # here, we can't use the parents argument
+        save_kwargs = kwargs.copy()
+        if "parents" in save_kwargs:
+            save_kwargs.pop("parents")
+        super(Registry, self).save(*args, **save_kwargs)
     if db is not None and db != "default":
         if hasattr(self, "labels"):
             from copy import copy

lamindb/dev/__init__.py CHANGED Viewed

@@ -14,6 +14,7 @@ Queries of registries:
    QuerySet
    QueryManager
+   RecordsList
 Functionality of data registries:
@@ -24,6 +25,7 @@ Functionality of data registries:
    FeatureManager
    LabelManager
    IsTree
+   IsVersioned
 Functionality of metadata registries:
@@ -51,10 +53,17 @@ Auxiliary tools:
 """
 from lamin_utils._inspect import InspectResult
-from lnschema_core.models import CanValidate, Data, HasParents, IsTree, Registry
+from lnschema_core.models import (
+    CanValidate,
+    Data,
+    HasParents,
+    IsTree,
+    IsVersioned,
+    Registry,
+)
 from lamindb._query_manager import QueryManager
-from lamindb._query_set import QuerySet
+from lamindb._query_set import QuerySet, RecordsList
 from lamindb.dev._feature_manager import FeatureManager
 from lamindb.dev._label_manager import LabelManager

lamindb/dev/_data.py CHANGED Viewed

@@ -113,8 +113,7 @@ def describe(self: Data):
         "created_by": "👤",
         "transform": _transform_emoji(self.transform),
         "run": "👣",
-        "initial_version": "🔖",
-        "file": "📄",
+        "artifact": "📄",
     }
     if len(foreign_key_fields) > 0:  # always True for Artifact and Collection
         record_msg = f"{colors.green(model_name)}{__repr__(self, include_foreign_keys=False).lstrip(model_name)}"
@@ -209,7 +208,7 @@ def add_labels(
 ) -> None:
     """{}."""
     if self._state.adding:
-        raise ValueError("Please save the file/collection before adding a label!")
+        raise ValueError("Please save the artifact/collection before adding a label!")
     if isinstance(records, (QuerySet, QuerySet.__base__)):  # need to have both
         records = records.list()
@@ -331,7 +330,7 @@ def add_labels(
                             id=old_feature_set_link.feature_set_id
                         ).one()
                         logger.info(
-                            "no file links to it anymore, deleting feature set"
+                            "nothing links to it anymore, deleting feature set"
                             f" {old_feature_set}"
                         )
                         old_feature_set.delete()
@@ -368,7 +367,7 @@ def _track_run_input(
         if run is None:
             if settings.track_run_inputs:
                 logger.hint(
-                    "you can auto-track this file as a run input by calling"
+                    "you can auto-track these data as a run input by calling"
                     " `ln.track()`"
                 )
         # assume we have a run record
@@ -390,7 +389,7 @@ def _track_run_input(
                     track_run_input = True
                 else:
                     logger.hint(
-                        "track this file as a run input by passing `is_run_input=True`"
+                        "track these data as a run input by passing `is_run_input=True`"
                     )
     else:
         track_run_input = is_run_input

lamindb/dev/_label_manager.py CHANGED Viewed

@@ -23,11 +23,12 @@ def get_labels_as_dict(self: Data):
     ).items():
         if related_name in {
             "feature_sets",
-            "files",
+            "artifacts",
             "input_of",
             "collections",
             "source_of",
             "report_of",
+            "environment_of",
         }:
             continue
         if self.id is not None:

lamindb/dev/_mapped_collection.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import List, Literal, Optional, Union
 import numpy as np
 import pandas as pd
+from lamin_utils import logger
 from lamindb_setup.dev.upath import UPath
 from .storage._backed_access import (
@@ -57,10 +58,14 @@ class MappedCollection:
         self,
         path_list: List[Union[str, PathLike]],
         label_keys: Optional[Union[str, List[str]]] = None,
-        join_vars: Optional[Literal["auto", "inner"]] = "auto",
+        join: Optional[Literal["inner", "outer"]] = "outer",
         encode_labels: bool = True,
+        cache_categories: bool = True,
         parallel: bool = False,
+        dtype: Optional[str] = None,
     ):
+        assert join in {None, "inner", "outer"}
         self.storages = []  # type: ignore
         self.conns = []  # type: ignore
         self.parallel = parallel
@@ -79,16 +84,22 @@ class MappedCollection:
         self.indices = np.hstack([np.arange(n_obs) for n_obs in self.n_obs_list])
         self.storage_idx = np.repeat(np.arange(len(self.storages)), self.n_obs_list)
-        self.join_vars = join_vars if len(path_list) > 1 else None
+        self.join_vars = join if len(path_list) > 1 else None
         self.var_indices = None
         if self.join_vars is not None:
             self._make_join_vars()
         self.encode_labels = encode_labels
         self.label_keys = [label_keys] if isinstance(label_keys, str) else label_keys
-        if self.label_keys is not None and self.encode_labels:
-            self._make_encoders(self.label_keys)
+        if self.label_keys is not None:
+            if cache_categories:
+                self._cache_categories(self.label_keys)
+            else:
+                self._cache_cats: dict = {}
+            if self.encode_labels:
+                self._make_encoders(self.label_keys)
+        self._dtype = dtype
         self._closed = False
     def _make_connections(self, path_list: list, parallel: bool):
@@ -104,6 +115,18 @@ class MappedCollection:
             self.conns.append(conn)
             self.storages.append(storage)
+    def _cache_categories(self, label_keys: list):
+        self._cache_cats = {}
+        decode = np.frompyfunc(lambda x: x.decode("utf-8"), 1, 1)
+        for label in label_keys:
+            self._cache_cats[label] = []
+            for storage in self.storages:
+                with _Connect(storage) as store:
+                    cats = self.get_categories(store, label)
+                    if cats is not None:
+                        cats = decode(cats) if isinstance(cats[0], bytes) else cats[...]
+                    self._cache_cats[label].append(cats)
     def _make_encoders(self, label_keys: list):
         self.encoders = []
         for label in label_keys:
@@ -115,20 +138,38 @@ class MappedCollection:
         for storage in self.storages:
             with _Connect(storage) as store:
                 var_list.append(_safer_read_index(store["var"]))
+        self.var_joint = None
         if self.join_vars == "auto":
             vars_eq = all(var_list[0].equals(vrs) for vrs in var_list[1:])
             if vars_eq:
                 self.join_vars = None
+                logger.info("The variables are same, no virtual join is performed.")
                 return
             else:
-                self.join_vars = "inner"
+                self.var_joint = reduce(pd.Index.intersection, var_list)
+                if len(self.var_joint) > 0:
+                    self.join_vars = "inner"
+                    logger.info(
+                        "The intersection of variables is not empty, using virtual inner join."
+                    )
+                else:
+                    self.join_vars = "outer"
+                    logger.info(
+                        "The intersection of variables is empty, using virtual outer join."
+                    )
         if self.join_vars == "inner":
-            self.var_joint = reduce(pd.Index.intersection, var_list)
-            if len(self.var_joint) == 0:
-                raise ValueError(
-                    "The provided AnnData objects don't have shared varibales."
-                )
+            if self.var_joint is None:
+                self.var_joint = reduce(pd.Index.intersection, var_list)
+                if len(self.var_joint) == 0:
+                    raise ValueError(
+                        "The provided AnnData objects don't have shared varibales."
+                    )
             self.var_indices = [vrs.get_indexer(self.var_joint) for vrs in var_list]
+        elif self.join_vars == "outer":
+            self.var_joint = reduce(pd.Index.union, var_list)
+            self.var_indices = [self.var_joint.get_indexer(vrs) for vrs in var_list]
     def __len__(self):
         return self.n_obs
@@ -137,15 +178,21 @@ class MappedCollection:
         obs_idx = self.indices[idx]
         storage_idx = self.storage_idx[idx]
         if self.var_indices is not None:
-            var_idxs = self.var_indices[storage_idx]
+            var_idxs_join = self.var_indices[storage_idx]
         else:
-            var_idxs = None
+            var_idxs_join = None
         with _Connect(self.storages[storage_idx]) as store:
-            out = [self.get_data_idx(store, obs_idx, var_idxs)]
+            out = [self.get_data_idx(store, obs_idx, var_idxs_join)]
             if self.label_keys is not None:
                 for i, label in enumerate(self.label_keys):
-                    label_idx = self.get_label_idx(store, obs_idx, label)
+                    if label in self._cache_cats:
+                        cats = self._cache_cats[label][storage_idx]
+                        if cats is None:
+                            cats = []
+                    else:
+                        cats = None
+                    label_idx = self.get_label_idx(store, obs_idx, label, cats)
                     if self.encode_labels:
                         label_idx = self.encoders[i][label_idx]
                     out.append(label_idx)
@@ -155,26 +202,50 @@ class MappedCollection:
         self,
         storage: StorageType,  # type: ignore
         idx: int,
-        var_idxs: Optional[list] = None,
+        var_idxs_join: Optional[list] = None,
         layer_key: Optional[str] = None,
     ):
         """Get the index for the data."""
         layer = storage["X"] if layer_key is None else storage["layers"][layer_key]  # type: ignore
         if isinstance(layer, ArrayTypes):  # type: ignore
-            # todo: better way to select variables
-            return layer[idx] if var_idxs is None else layer[idx][var_idxs]
+            layer_idx = layer[idx]
+            if self.join_vars is None:
+                result = layer_idx
+                if self._dtype is not None:
+                    result = result.astype(self._dtype, copy=False)
+            elif self.join_vars == "outer":
+                dtype = layer_idx.dtype if self._dtype is None else self._dtype
+                result = np.zeros(len(self.var_joint), dtype=dtype)
+                result[var_idxs_join] = layer_idx
+            else:  # inner join
+                result = layer_idx[var_idxs_join]
+                if self._dtype is not None:
+                    result = result.astype(self._dtype, copy=False)
+            return result
         else:  # assume csr_matrix here
             data = layer["data"]
             indices = layer["indices"]
             indptr = layer["indptr"]
             s = slice(*(indptr[idx : idx + 2]))
-            # this requires more memory than csr_matrix when var_idxs is not None
-            # but it is faster
-            layer_idx = np.zeros(layer.attrs["shape"][1])
-            layer_idx[indices[s]] = data[s]
-            return layer_idx if var_idxs is None else layer_idx[var_idxs]
+            data_s = data[s]
+            dtype = data_s.dtype if self._dtype is None else self._dtype
+            if self.join_vars == "outer":
+                layer_idx = np.zeros(len(self.var_joint), dtype=dtype)
+                layer_idx[var_idxs_join[indices[s]]] = data_s
+            else:
+                layer_idx = np.zeros(layer.attrs["shape"][1], dtype=dtype)
+                layer_idx[indices[s]] = data_s
+                if self.join_vars == "inner":
+                    layer_idx = layer_idx[var_idxs_join]
+            return layer_idx
-    def get_label_idx(self, storage: StorageType, idx: int, label_key: str):  # type: ignore
+    def get_label_idx(
+        self,
+        storage: StorageType,
+        idx: int,
+        label_key: str,
+        categories: Optional[list] = None,
+    ):
         """Get the index for the label by key."""
         obs = storage["obs"]  # type: ignore
         # how backwards compatible do we want to be here actually?
@@ -186,9 +257,11 @@ class MappedCollection:
                 label = labels[idx]
             else:
                 label = labels["codes"][idx]
-        cats = self.get_categories(storage, label_key)
-        if cats is not None:
+        if categories is not None:
+            cats = categories
+        else:
+            cats = self.get_categories(storage, label_key)
+        if cats is not None and len(cats) > 0:
             label = cats[label]
         if isinstance(label, bytes):
             label = label.decode("utf-8")
@@ -215,11 +288,14 @@ class MappedCollection:
         """Get merged labels."""
         labels_merge = []
         decode = np.frompyfunc(lambda x: x.decode("utf-8"), 1, 1)
-        for storage in self.storages:
+        for i, storage in enumerate(self.storages):
             with _Connect(storage) as store:
                 codes = self.get_codes(store, label_key)
                 labels = decode(codes) if isinstance(codes[0], bytes) else codes
-                cats = self.get_categories(store, label_key)
+                if label_key in self._cache_cats:
+                    cats = self._cache_cats[label_key][i]
+                else:
+                    cats = self.get_categories(store, label_key)
                 if cats is not None:
                     cats = decode(cats) if isinstance(cats[0], bytes) else cats
                     labels = cats[labels]
@@ -230,9 +306,12 @@ class MappedCollection:
         """Get merged categories."""
         cats_merge = set()
         decode = np.frompyfunc(lambda x: x.decode("utf-8"), 1, 1)
-        for storage in self.storages:
+        for i, storage in enumerate(self.storages):
             with _Connect(storage) as store:
-                cats = self.get_categories(store, label_key)
+                if label_key in self._cache_cats:
+                    cats = self._cache_cats[label_key][i]
+                else:
+                    cats = self.get_categories(store, label_key)
                 if cats is not None:
                     cats = decode(cats) if isinstance(cats[0], bytes) else cats
                     cats_merge.update(cats)

lamindb/dev/_run_context.py CHANGED Viewed

@@ -33,7 +33,9 @@ msg_manual_init = (
 )
-class UpdateNbWithNonInteractiveEditorError(Exception):
+# we don't want a real error here, as this is so frequent
+# in VSCode
+class UpdateNbWithNonInteractiveEditor(SystemExit):
     pass
@@ -230,7 +232,7 @@ class run_context:
                             "it looks like you are running ln.track() from a "
                             "notebook!\nplease install nbproject: pip install nbproject"
                         )
-                    elif isinstance(e, UpdateNbWithNonInteractiveEditorError):
+                    elif isinstance(e, UpdateNbWithNonInteractiveEditor):
                         raise e
                     elif isinstance(e, (NotebookNotSavedError, NoTitleError)):
                         raise e
@@ -435,7 +437,7 @@ class run_context:
                 cls._notebook_meta = metadata  # type: ignore
             else:
                 msg = msg_manual_init.format(notebook_path=notebook_path_str)
-                raise UpdateNbWithNonInteractiveEditorError(msg)
+                raise UpdateNbWithNonInteractiveEditor(msg)
         if _env in ("lab", "notebook"):
             # save the notebook in case that title was updated
@@ -450,7 +452,7 @@ class run_context:
             is_interactive = _seconds_modified(_filepath) < 1.5  # should be ~1 sec
             if not is_interactive and needs_init:
                 msg = msg_manual_init.format(notebook_path=_filepath)
-                raise UpdateNbWithNonInteractiveEditorError(msg)
+                raise UpdateNbWithNonInteractiveEditor(msg)
             nbproject_id = metadata["id"]
             nbproject_version = metadata["version"]
@@ -509,7 +511,7 @@ class run_context:
                 cls._notebook_meta = metadata  # type: ignore
             else:
                 msg = msg_manual_init.format(notebook_path=filepath)
-                raise UpdateNbWithNonInteractiveEditorError(msg)
+                raise UpdateNbWithNonInteractiveEditor(msg)
         else:
             from lamin_cli._transform import update_transform_source_metadata

lamindb/dev/_track_environment.py CHANGED Viewed

@@ -6,9 +6,13 @@ from lnschema_core.models import Run
 def track_environment(run: Run) -> None:
-    filepath = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}"
+    filepath = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
     # create a requirements.txt
     # we don't create a conda environment.yml mostly for its slowness
-    result = subprocess.run(f"pip freeze > {str(filepath)}", shell=True)
-    if result.returncode == 0:
+    try:
+        result = subprocess.run(f"pip freeze > {str(filepath)}", shell=True)
+    except OSError as e:
+        result = None
+        logger.warning(f"could not run pip freeze with error {e}")
+    if result is not None and result.returncode == 0:
         logger.info(f"tracked pip freeze > {str(filepath)}")

{lamindb-0.65.0.dist-info → lamindb-0.66.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: lamindb
-Version: 0.65.0
+Version: 0.66.0
 Summary: A data framework for biology.
 Author-email: Lamin Labs <open-source@lamin.ai>
 Requires-Python: >=3.8
@@ -9,10 +9,10 @@ Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
-Requires-Dist: lnschema_core==0.60.0
-Requires-Dist: lamindb_setup==0.63.0
+Requires-Dist: lnschema_core==0.61.0
+Requires-Dist: lamindb_setup==0.64.0
 Requires-Dist: lamin_utils==0.13.0
-Requires-Dist: lamin_cli==0.5.0
+Requires-Dist: lamin_cli==0.6.0
 Requires-Dist: rapidfuzz
 Requires-Dist: pyarrow
 Requires-Dist: typing_extensions!=4.6.0
@@ -62,14 +62,14 @@ Provides-Extra: zarr
 # LaminDB - A data framework for biology
-LaminDB is an open-source Python framework to manage biological data & analyses in generic backends:
+LaminDB is an open-source Python framework to manage biological data & analyses:
-- Access data & metadata across storage (files, arrays) & database (SQL) backends.
-- Track data flow across notebooks, pipelines & UI.
-- Manage registries for experimental metadata & in-house ontologies, import public ontologies.
-- Validate, standardize & annotate data using registries.
+- Access data & metadata across storage & databases.
+- Track data lineage across notebooks & pipelines.
+- Manage registries for experimental metadata & in-house ontologies.
+- Validate, standardize & annotate data.
 - Organize and share data across a mesh of LaminDB instances.
-- Manage data access with an auditable system of record.
+- Manage data access, leverage an auditable system of record.
 ## Documentation

{lamindb-0.65.0.dist-info → lamindb-0.66.0.dist-info}/RECORD RENAMED Viewed

@@ -1,15 +1,15 @@
-lamindb/__init__.py,sha256=-_IG5yZQ4fWBdgSG1qMl8oCQJf5WgAol6keJ2PuFh5I,2691
+lamindb/__init__.py,sha256=rgCY0tETrHKyB7V5f2Y3BhY4BAJicGUUYzSRwIdRlmI,2691
 lamindb/_artifact.py,sha256=eWsLj8x6Cqy8MR7LxKyScxozM52MaqOTCK8gplloP2c,38087
-lamindb/_collection.py,sha256=ZGzx58Tm76wGxMBU9nzKRkme9IduLmiuL0H-8byPkdY,16812
+lamindb/_collection.py,sha256=gVcs3A200JZilfdYd0zrX29UrAmhP9Eovu6r_SIxXQ4,17634
 lamindb/_delete.py,sha256=jO6kcIoxY6EFgqiVF2vlbXaCaqlI25AvBo7nre3JXkQ,1968
-lamindb/_feature.py,sha256=AqQZTOL38aElT3-e7WCj8Fm2Xcso0uJO0oE72fQCScU,5989
+lamindb/_feature.py,sha256=tEcqFoEj5yp4LSJfMGyiVvxDUuLoZaik6lo05ZKcCtE,6036
 lamindb/_feature_set.py,sha256=KYgdmMdXb21pfpir1J1O21in3nJvUeznECOB38qfTvk,8654
 lamindb/_filter.py,sha256=YwWqviJ34kHTMJ8NYlrEw-vsrXkKrVIPsEZSBVvMcrI,1163
 lamindb/_from_values.py,sha256=dKz4cTUBRkXOOzFX2Ix2cKhK2Lw9PyTgi7d0PI-kh3c,11869
-lamindb/_parents.py,sha256=lDNuOys4OW5wSHzH6fxEcHPsOjwVgXFq0q-I-noPO5A,13907
+lamindb/_parents.py,sha256=hyoN92YnfJFmRWmQMLLUjTKKwnIOJci5z6csMjsdYDE,14165
 lamindb/_query_manager.py,sha256=m4WUScviuNlMHeNEPZ8H8y0YsMXSbwWyfIgS4L00wBY,4332
-lamindb/_query_set.py,sha256=nacnkFaVYDmuFkpXr0fb3uNcWP6XahbMeIvJic0YCSk,9967
-lamindb/_registry.py,sha256=UX4O3Ne9QajcfG2FGXyVkyF6b-McPPxJmRQ2MwXZy3w,17254
+lamindb/_query_set.py,sha256=tItL2YNdycpbXklYd8aW4jJX6Z-kGcNclscg0v3l8t4,10495
+lamindb/_registry.py,sha256=MxYpJUKD6Qu5eO2jO6JOcQBBGxfQpiEGPJrFaXau_jw,17421
 lamindb/_run.py,sha256=659lqY32GW7F41rFUUo37OftUa38-p8yaV9Z0oF32CE,1120
 lamindb/_save.py,sha256=UlRHJGUiHGOXv90wmawZVsOqhJIqk8f1wj8MW3Rlq_c,10535
 lamindb/_storage.py,sha256=mz2Cy0CTaeJGA03A1FPQmmH0Vt2ib_KlXklaLqtN1mU,394
@@ -18,14 +18,14 @@ lamindb/_ulabel.py,sha256=HALoy6HerRnehR-u8zPH-qmiFQHWxeAwkZ31jxjrfgI,1893
 lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
 lamindb/_validate.py,sha256=fS2685MYX9h6iAWymEorJJmDYA2CGNOSmJpesbG6faU,14400
 lamindb/_view.py,sha256=yFMu4vnt0YqvN1q11boAkwigxCH1gdliDUSbzh3IuDw,2175
-lamindb/dev/__init__.py,sha256=Sm1-zkgy_7MKwFheXDrUKiY7ZBKX_VUQVfbr_hEPVqE,1089
-lamindb/dev/_data.py,sha256=C7Z3mygwx4IGoFOtjvnmA_-O7VXZqNvJJh6QAgN2MBM,17091
+lamindb/dev/__init__.py,sha256=LLqivujL8c-oKWC15SJepAYyrTlLNvql5Vdwunc0qvE,1174
+lamindb/dev/_data.py,sha256=YPZ664qGKMl34LbZCMCEFIxQ-E81iAt_b3lvMiTe-oc,17066
 lamindb/dev/_feature_manager.py,sha256=jn8x_JbrtLFelmaFh4noOXqGSCfqVuVX0quoa7gTJtM,9366
-lamindb/dev/_label_manager.py,sha256=q8rlFA_KgyVL_rE7h52dA6whCxGu72YTj62cilKWXGM,8706
-lamindb/dev/_mapped_collection.py,sha256=Woz5iUnCzQGraF-pjzZF0fQHEJlXnL6lpkIXq_k_d64,11129
-lamindb/dev/_run_context.py,sha256=Hgmq0yYQsLHK3cUVKR3V2bFUSllaIWO5S7a8GQcjEl0,22919
+lamindb/dev/_label_manager.py,sha256=6E_pSQicqfTWDGEGe4WPn_3GZl_CCIMTZ6xJDh4EkC0,8740
+lamindb/dev/_mapped_collection.py,sha256=NRjOYnC1d3IcVyqhT_Yp0xycepmeytlngYnw-5Xcnw4,14445
+lamindb/dev/_run_context.py,sha256=4eBZsbfcFpW5nqmRLbRZxuA5oeRW17XVHMzVtMH0bKA,22965
 lamindb/dev/_settings.py,sha256=nixk8lVijCbq_fRlUpkX5gvO9AdgUFjbXzFThAJhGBA,3824
-lamindb/dev/_track_environment.py,sha256=GelTuDF_k9dXTLV5AcibfzXllmTXXorBy2RqJyb6GuI,508
+lamindb/dev/_track_environment.py,sha256=QjHWbyl2u8J4hbJG8Q_ToFaZIgS-H15Ej6syJgk-dvY,662
 lamindb/dev/_view_tree.py,sha256=K-C1BsOiEupwgkhyrsGxLFxHU45SAkiKsQbeOV9PbaY,3421
 lamindb/dev/exceptions.py,sha256=PHk5lyBdJPrrEQcid3ItfdNzz3fgiQsUmsEDdz063F0,197
 lamindb/dev/fields.py,sha256=0f0wai2aCjQYAQgI04UlCOAHo2MQknp4AsOKFDmE9iU,163
@@ -43,7 +43,7 @@ lamindb/dev/storage/file.py,sha256=jalzFQ8q110UUu_GGQBkU-g3M04h5g4LJ3nLjCzJ4pU,5
 lamindb/dev/storage/object.py,sha256=KGuOwwYuN2yCJxTXn9v0LanC0fjKwy_62P-WksHcf40,1140
 lamindb/setup/__init__.py,sha256=WaWKO-2XT67S65lSbS80hUojL-Mr_Wms9UxH6U54TsY,289
 lamindb/setup/dev/__init__.py,sha256=tBty426VGF2PGqqt2XuNU-WgvOrbOp1aZBDowjLuzgA,242
-lamindb-0.65.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-lamindb-0.65.0.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
-lamindb-0.65.0.dist-info/METADATA,sha256=OPOAcrY3znOQE4q664MuFvFEN30kOwunGfrup364TUE,3165
-lamindb-0.65.0.dist-info/RECORD,,
+lamindb-0.66.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+lamindb-0.66.0.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
+lamindb-0.66.0.dist-info/METADATA,sha256=d9S5mPiFAzV1EhN7KB_VnugNCy7vdeivGqtxZsZPD60,3076
+lamindb-0.66.0.dist-info/RECORD,,

{lamindb-0.65.0.dist-info → lamindb-0.66.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{lamindb-0.65.0.dist-info → lamindb-0.66.0.dist-info}/WHEEL RENAMED Viewed

File without changes

lamindb 0.65.0__py3-none-any.whl → 0.66.0__py3-none-any.whl

lamindb 0.65.0py3-none-any.whl → 0.66.0py3-none-any.whl