PyPI - lamindb - Versions diffs - 0.57.2__py3-none-any.whl → 0.58.1__py3-none-any.whl - Mend

lamindb 0.57.2py3-none-any.whl → 0.58.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

lamindb/__init__.py +1 -1
lamindb/_dataset.py +91 -21
lamindb/_feature_set.py +7 -1
lamindb/_file.py +73 -15
lamindb/_filter.py +12 -0
lamindb/_parents.py +10 -5
lamindb/_query_set.py +2 -4
lamindb/_registry.py +20 -7
lamindb/_save.py +5 -3
lamindb/dev/_data.py +50 -22
lamindb/dev/datasets/_core.py +8 -5
{lamindb-0.57.2.dist-info → lamindb-0.58.1.dist-info}/METADATA +5 -5
{lamindb-0.57.2.dist-info → lamindb-0.58.1.dist-info}/RECORD +16 -16
{lamindb-0.57.2.dist-info → lamindb-0.58.1.dist-info}/LICENSE +0 -0
{lamindb-0.57.2.dist-info → lamindb-0.58.1.dist-info}/WHEEL +0 -0
{lamindb-0.57.2.dist-info → lamindb-0.58.1.dist-info}/entry_points.txt +0 -0

lamindb/__init__.py CHANGED Viewed

@@ -53,7 +53,7 @@ Static classes & modules:
 """
-__version__ = "0.57.2"  # denote a release candidate for 0.1.0 with 0.1rc1
+__version__ = "0.58.1"  # denote a release candidate for 0.1.0 with 0.1rc1
 import os as _os

lamindb/_dataset.py CHANGED Viewed

@@ -63,12 +63,15 @@ def __init__(
         kwargs.pop("initial_version_id") if "initial_version_id" in kwargs else None
     )
     version: Optional[str] = kwargs.pop("version") if "version" in kwargs else None
+    visibility: Optional[int] = (
+        kwargs.pop("visibility") if "visibility" in kwargs else 0
+    )
     feature_sets: Dict[str, FeatureSet] = (
         kwargs.pop("feature_sets") if "feature_sets" in kwargs else {}
     )
     if not len(kwargs) == 0:
         raise ValueError(
-            f"Only data, name, run, description, reference, reference_type can be passed, you passed: {kwargs}"  # noqa
+            f"Only data, name, run, description, reference, reference_type, visibility can be passed, you passed: {kwargs}"  # noqa
         )
     if is_new_version_of is None:
@@ -145,6 +148,7 @@ def __init__(
             hash, feature_sets = from_files(files)  # type: ignore
         else:
             raise ValueError("Only DataFrame, AnnData and iterable of File is allowed")
+    # we ignore datasets in trash containing the same hash
     existing_dataset = Dataset.filter(hash=hash).one_or_none()
     if existing_dataset is not None:
         logger.warning(f"returning existing dataset with same hash: {existing_dataset}")
@@ -169,6 +173,7 @@ def __init__(
             run=run,
             version=version,
             initial_version_id=initial_version_id,
+            visibility=visibility,
             **kwargs,
         )
     dataset._files = files
@@ -179,10 +184,7 @@ def __init__(
     if file is not None and file.run != run:
         _track_run_input(file, run=run)
     elif files is not None:
-        for file in files:
-            if file.run != run:
-                _track_run_input(file, run=run)
-    # there is not other possibility
+        _track_run_input(files, run=run)
 @classmethod  # type: ignore
@@ -197,6 +199,8 @@ def from_df(
     modality: Optional[Modality] = None,
     reference: Optional[str] = None,
     reference_type: Optional[str] = None,
+    version: Optional[str] = None,
+    is_new_version_of: Optional["File"] = None,
 ) -> "Dataset":
     """{}"""
     feature_set = FeatureSet.from_df(df, field=field, modality=modality)
@@ -205,7 +209,15 @@ def from_df(
     else:
         feature_sets = {}
     dataset = Dataset(
-        data=df, name=name, run=run, description=description, feature_sets=feature_sets
+        data=df,
+        name=name,
+        run=run,
+        description=description,
+        feature_sets=feature_sets,
+        reference=reference,
+        reference_type=reference_type,
+        version=version,
+        is_new_version_of=is_new_version_of,
     )
     return dataset
@@ -222,6 +234,8 @@ def from_anndata(
     modality: Optional[Modality] = None,
     reference: Optional[str] = None,
     reference_type: Optional[str] = None,
+    version: Optional[str] = None,
+    is_new_version_of: Optional["File"] = None,
 ) -> "Dataset":
     """{}"""
     if isinstance(adata, File):
@@ -237,6 +251,10 @@ def from_anndata(
         name=name,
         description=description,
         feature_sets=feature_sets,
+        reference=reference,
+        reference_type=reference_type,
+        version=version,
+        is_new_version_of=is_new_version_of,
     )
     return dataset
@@ -244,30 +262,48 @@ def from_anndata(
 # internal function, not exposed to user
 def from_files(files: Iterable[File]) -> Tuple[str, Dict[str, str]]:
     # assert all files are already saved
+    logger.debug("check not saved")
     saved = not any([file._state.adding for file in files])
     if not saved:
         raise ValueError("Not all files are yet saved, please save them")
     # query all feature sets of files
+    logger.debug("file ids")
     file_ids = [file.id for file in files]
     # query all feature sets at the same time rather than making a single query per file
+    logger.debug("feature_set_file_links")
     feature_set_file_links = File.feature_sets.through.objects.filter(
         file_id__in=file_ids
     )
-    feature_set_ids = [link.feature_set_id for link in feature_set_file_links]
-    feature_sets = FeatureSet.filter(id__in=feature_set_ids).all()
     feature_sets_by_slots = defaultdict(list)
+    logger.debug("slots")
     for link in feature_set_file_links:
-        feature_sets_by_slots[link.slot].append(
-            feature_sets.filter(id=link.feature_set_id).one()
-        )
+        feature_sets_by_slots[link.slot].append(link.feature_set_id)
     feature_sets_union = {}
-    for slot, feature_sets_slot in feature_sets_by_slots.items():
-        members = feature_sets_slot[0].members
-        for feature_set in feature_sets_slot[1:]:
-            members = members | feature_set.members
-        feature_sets_union[slot] = FeatureSet(members)
+    logger.debug("union")
+    for slot, feature_set_ids_slot in feature_sets_by_slots.items():
+        feature_set_1 = FeatureSet.filter(id=feature_set_ids_slot[0]).one()
+        related_name = feature_set_1._get_related_name()
+        features_registry = getattr(FeatureSet, related_name).field.model
+        start_time = logger.debug("run filter")
+        # this way of writing the __in statement turned out to be the fastest
+        # evaluated on a link table with 16M entries connecting 500 feature sets with
+        # 60k genes
+        feature_ids = (
+            features_registry.feature_sets.through.objects.filter(
+                featureset_id__in=feature_set_ids_slot
+            )
+            .values(f"{features_registry.__name__.lower()}_id")
+            .distinct()
+        )
+        start_time = logger.debug("done, start evaluate", time=start_time)
+        features = features_registry.filter(id__in=feature_ids)
+        feature_sets_union[slot] = FeatureSet(
+            features, type=feature_set_1.type, modality=feature_set_1.modality
+        )
+        start_time = logger.debug("done", time=start_time)
     # validate consistency of hashes
     # we do not allow duplicate hashes
+    logger.debug("hashes")
     hashes = [file.hash for file in files]
     if len(hashes) != len(set(hashes)):
         seen = set()
@@ -276,7 +312,9 @@ def from_files(files: Iterable[File]) -> Tuple[str, Dict[str, str]]:
             "Please pass files with distinct hashes: these ones are non-unique"
             f" {non_unique}"
         )
+    time = logger.debug("hash")
     hash = hash_set(set(hashes))
+    logger.debug("done", time=time)
     return hash, feature_sets_union
@@ -311,12 +349,12 @@ def load(
         # because we're tracking data flow on the dataset-level, here, we don't
         # want to track it on the file-level
         objects = [file.load(is_run_input=False) for file in all_files]
-        file_ids = [file.id for file in all_files]
+        file_uids = [file.uid for file in all_files]
         if isinstance(objects[0], pd.DataFrame):
             concat_object = pd.concat(objects, join=join)
         elif isinstance(objects[0], ad.AnnData):
             concat_object = ad.concat(
-                objects, join=join, label="file_id", keys=file_ids
+                objects, join=join, label="file_uid", keys=file_uids
             )
         # only call it here because there might be errors during concat
         _track_run_input(self, is_run_input)
@@ -324,10 +362,32 @@ def load(
 # docstring handled through attach_func_to_class_method
-def delete(self, storage: Optional[bool] = None) -> None:
-    super(Dataset, self).delete()
+def delete(
+    self, permanent: Optional[bool] = None, storage: Optional[bool] = None
+) -> None:
+    # change visibility to 2 (trash)
+    if self.visibility < 2 and permanent is not True:
+        self.visibility = 2
+        self.save()
+        if self.file is not None:
+            self.file.visibility = 2
+            self.file.save()
+        return
+    # permanent delete
+    if permanent is None:
+        response = input(
+            "File record is already in trash! Are you sure to delete it from your"
+            " database? (y/n) You can't undo this action."
+        )
+        delete_record = response == "y"
+    else:
+        delete_record = permanent
+    if delete_record:
+        super(Dataset, self).delete()
     if self.file is not None:
-        self.file.delete(storage=storage)
+        self.file.delete(permanent=permanent, storage=storage)
 # docstring handled through attach_func_to_class_method
@@ -351,6 +411,15 @@ def path(self) -> Union[Path, UPath]:
     return self.storage.path
+# docstring handled through attach_func_to_class_method
+def restore(self) -> None:
+    self.visibility = 0
+    self.save()
+    if self.file is not None:
+        self.file.visibility = 0
+        self.file.save()
 METHOD_NAMES = [
     "__init__",
     "from_anndata",
@@ -359,6 +428,7 @@ METHOD_NAMES = [
     "load",
     "delete",
     "save",
+    "restore",
 ]
 if _TESTING:

lamindb/_feature_set.py CHANGED Viewed

@@ -239,11 +239,16 @@ def members(self) -> "QuerySet":
         # this should return a queryset and not a list...
         # need to fix this
         return self._features[1]
+    related_name = self._get_related_name()
+    return self.__getattribute__(related_name).all()
+def _get_related_name(self: FeatureSet) -> str:
     key_split = self.registry.split(".")
     orm_name_with_schema = f"{key_split[0]}.{key_split[1]}"
     feature_sets_related_models = dict_related_model_to_related_name(self)
     related_name = feature_sets_related_models.get(orm_name_with_schema)
-    return self.__getattribute__(related_name).all()
+    return related_name
 METHOD_NAMES = [
@@ -266,3 +271,4 @@ for name in METHOD_NAMES:
     attach_func_to_class_method(name, FeatureSet, globals())
 setattr(FeatureSet, "members", members)
+setattr(FeatureSet, "_get_related_name", _get_related_name)

lamindb/_file.py CHANGED Viewed

@@ -179,6 +179,7 @@ def get_hash(
         hash, hash_type = hash_file(filepath)
     if not check_hash:
         return hash, hash_type
+    # we ignore datasets in trash containing the same hash
     result = File.filter(hash=hash).list()
     if len(result) > 0:
         if settings.upon_file_create_if_hash_exists == "error":
@@ -454,6 +455,9 @@ def __init__(file: File, *args, **kwargs):
         kwargs.pop("initial_version_id") if "initial_version_id" in kwargs else None
     )
     version: Optional[str] = kwargs.pop("version") if "version" in kwargs else None
+    visibility: Optional[int] = (
+        kwargs.pop("visibility") if "visibility" in kwargs else 0
+    )
     format = kwargs.pop("format") if "format" in kwargs else None
     log_hint = kwargs.pop("log_hint") if "log_hint" in kwargs else True
     skip_check_exists = (
@@ -462,8 +466,8 @@ def __init__(file: File, *args, **kwargs):
     if not len(kwargs) == 0:
         raise ValueError(
-            "Only data, key, run, description, version, is_new_version_of can be"
-            f" passed, you passed: {kwargs}"
+            "Only data, key, run, description, version, is_new_version_of, visibility"
+            f" can be passed, you passed: {kwargs}"
         )
     if is_new_version_of is None:
@@ -523,6 +527,7 @@ def __init__(file: File, *args, **kwargs):
     kwargs["initial_version_id"] = initial_version_id
     kwargs["version"] = version
     kwargs["description"] = description
+    kwargs["visibility"] = visibility
     # this check needs to come down here because key might be populated from an
     # existing file path during get_file_kwargs_from_data()
     if (
@@ -553,9 +558,19 @@ def from_df(
     description: Optional[str] = None,
     run: Optional[Run] = None,
     modality: Optional[Modality] = None,
+    version: Optional[str] = None,
+    is_new_version_of: Optional["File"] = None,
 ) -> "File":
     """{}"""
-    file = File(data=df, key=key, run=run, description=description, log_hint=False)
+    file = File(
+        data=df,
+        key=key,
+        run=run,
+        description=description,
+        version=version,
+        is_new_version_of=is_new_version_of,
+        log_hint=False,
+    )
     feature_set = FeatureSet.from_df(df, field=field, modality=modality)
     if feature_set is not None:
         file._feature_sets = {"columns": feature_set}
@@ -615,9 +630,19 @@ def from_anndata(
     description: Optional[str] = None,
     run: Optional[Run] = None,
     modality: Optional[Modality] = None,
+    version: Optional[str] = None,
+    is_new_version_of: Optional["File"] = None,
 ) -> "File":
     """{}"""
-    file = File(data=adata, key=key, run=run, description=description, log_hint=False)
+    file = File(
+        data=adata,
+        key=key,
+        run=run,
+        description=description,
+        version=version,
+        is_new_version_of=is_new_version_of,
+        log_hint=False,
+    )
     file._feature_sets = parse_feature_sets_from_anndata(adata, field, modality)
     return file
@@ -800,23 +825,49 @@ def stage(self, is_run_input: Optional[bool] = None) -> Path:
 # docstring handled through attach_func_to_class_method
-def delete(self, storage: Optional[bool] = None) -> None:
-    if storage is None:
-        response = input(f"Are you sure you want to delete {self} from storage? (y/n)")
-        delete_in_storage = response == "y"
+def delete(
+    self, permanent: Optional[bool] = None, storage: Optional[bool] = None
+) -> None:
+    # change visibility to 2 (trash)
+    if self.visibility < 2 and permanent is not True:
+        self.visibility = 2
+        self.save()
+        return
+    # if the file is already in the trash
+    # permanent delete skips the trash
+    if permanent is None:
+        response = input(
+            "File record is already in trash! Are you sure to delete it from your"
+            " database? (y/n) You can't undo this action."
+        )
+        delete_record = response == "y"
     else:
-        delete_in_storage = storage
+        delete_record = permanent
     # need to grab file path before deletion
     filepath = self.path
     # only delete in storage if DB delete is successful
     # DB delete might error because of a foreign key constraint violated etc.
-    self._delete_skip_storage()
-    # we don't yet have any way to bring back the deleted metadata record
-    # in case the storage deletion fails - this is important for ACID down the road
-    if delete_in_storage:
-        delete_storage(filepath)
-        logger.success(f"deleted stored object {colors.yellow(f'{filepath}')}")
+    if delete_record:
+        self._delete_skip_storage()
+        if self.key is None:
+            delete_in_storage = True
+        else:
+            if storage is None:
+                response = input(
+                    f"Are you sure to delete {filepath}? (y/n)  You can't undo this"
+                    " action."
+                )
+                delete_in_storage = response == "y"
+            else:
+                delete_in_storage = storage
+        # we don't yet have any way to bring back the deleted metadata record
+        # in case storage deletion fails - this is important for ACID down the road
+        if delete_in_storage:
+            delete_storage(filepath)
+            logger.success(f"deleted {colors.yellow(f'{filepath}')}")
 def _delete_skip_storage(file, *args, **kwargs) -> None:
@@ -941,6 +992,12 @@ def view_tree(
     )
+# docstring handled through attach_func_to_class_method
+def restore(self) -> None:
+    self.visibility = 0
+    self.save()
 METHOD_NAMES = [
     "__init__",
     "from_anndata",
@@ -953,6 +1010,7 @@ METHOD_NAMES = [
     "replace",
     "from_dir",
     "view_tree",
+    "restore",
 ]
 if _TESTING:

lamindb/_filter.py CHANGED Viewed

@@ -42,6 +42,18 @@ def filter(Registry: Type[Registry], using: str = None, **expressions) -> QueryS
             id=UUID(instance_result["id"]),
         )
         add_db_connection(isettings, using)
+    if Registry.__name__ in {"File", "Dataset"}:
+        # visibility is set to <2 by default
+        if not any([e.startswith("visibility") for e in expressions]):
+            expressions["visibility__lt"] = 2
+        # if visibility is None, will not apply any filter for visibility
+        elif "visibility" in expressions:
+            if expressions["visibility"] is None:
+                expressions.pop("visibility")
+            elif expressions["visibility"] == "default":
+                expressions.pop("visibility")
+                expressions["visibility__lt"] = 2
     qs = QuerySet(model=Registry, using=using)
     if len(expressions) > 0:
         return qs.filter(**expressions)

lamindb/_parents.py CHANGED Viewed

@@ -201,13 +201,18 @@ def _get_parents(record: Registry, field: str, distance: int, children: bool = F
     d = 2
     while d < distance:
         condition = f"{key}__{condition}"
-        records = model.filter(**{condition: record.__getattribute__(field)}).all()
+        records = model.filter(**{condition: record.__getattribute__(field)})
-        if len(records) == 0:
-            return results
+        try:
+            if not records.exists():
+                return results
-        results = results | records
-        d += 1
+            results = results | records.all()
+            d += 1
+        except Exception:
+            # For OperationalError:
+            # SQLite does not support joins containing more than 64 tables
+            return results
     return results

lamindb/_query_set.py CHANGED Viewed

@@ -210,13 +210,11 @@ class QuerySet(models.QuerySet):
         return _search(cls=self, string=string, **kwargs)
     @doc_args(Registry.lookup.__doc__)
-    def lookup(
-        self, field: Optional[StrField] = None, return_field: Optional[StrField] = None
-    ) -> NamedTuple:
+    def lookup(self, field: Optional[StrField] = None, **kwargs) -> NamedTuple:
         """{}"""
         from ._registry import _lookup
-        return _lookup(cls=self, field=field, return_field=return_field)
+        return _lookup(cls=self, field=field, **kwargs)
     @doc_args(CanValidate.validate.__doc__)
     def validate(

lamindb/_registry.py CHANGED Viewed

@@ -147,8 +147,9 @@ def _search(
     return_queryset: bool = False,
     case_sensitive: bool = False,
     synonyms_field: Optional[StrField] = "synonyms",
+    **expressions,
 ) -> Union["pd.DataFrame", "QuerySet"]:
-    queryset = _queryset(cls)
+    queryset = _queryset(cls, **expressions)
     orm = queryset.model
     def _search_single_field(
@@ -229,6 +230,7 @@ def search(
     return_queryset: bool = False,
     case_sensitive: bool = False,
     synonyms_field: Optional[StrField] = "synonyms",
+    **expressions,
 ) -> Union["pd.DataFrame", "QuerySet"]:
     """{}"""
     return _search(
@@ -239,14 +241,18 @@ def search(
         limit=limit,
         case_sensitive=case_sensitive,
         synonyms_field=synonyms_field,
+        **expressions,
     )
 def _lookup(
-    cls, field: Optional[StrField] = None, return_field: Optional[StrField] = None
+    cls,
+    field: Optional[StrField] = None,
+    return_field: Optional[StrField] = None,
+    **expressions,
 ) -> NamedTuple:
     """{}"""
-    queryset = _queryset(cls)
+    queryset = _queryset(cls, **expressions)
     field = get_default_str_field(orm=queryset.model, field=field)
     return Lookup(
@@ -264,10 +270,13 @@ def _lookup(
 @classmethod  # type: ignore
 @doc_args(Registry.lookup.__doc__)
 def lookup(
-    cls, field: Optional[StrField] = None, return_field: Optional[StrField] = None
+    cls,
+    field: Optional[StrField] = None,
+    return_field: Optional[StrField] = None,
+    **expressions,
 ) -> NamedTuple:
     """{}"""
-    return _lookup(cls=cls, field=field, return_field=return_field)
+    return _lookup(cls=cls, field=field, return_field=return_field, **expressions)
 def get_default_str_field(
@@ -316,8 +325,12 @@ def get_default_str_field(
     return field
-def _queryset(cls: Union[Registry, QuerySet, Manager]) -> QuerySet:
-    queryset = cls.all() if isinstance(cls, QuerySet) else cls.objects.all()
+def _queryset(cls: Union[Registry, QuerySet, Manager], **expressions) -> QuerySet:
+    queryset = (
+        cls.filter(**expressions).all()
+        if isinstance(cls, QuerySet)
+        else cls.filter(**expressions).all()
+    )
     return queryset

lamindb/_save.py CHANGED Viewed

@@ -252,18 +252,20 @@ def upload_data_object(file) -> None:
     """Store and add file and its linked entries."""
     # do NOT hand-craft the storage key!
     file_storage_key = auto_storage_key_from_file(file)
-    msg = f"storing file '{file.id}' at '{file_storage_key}'"
+    storage_path = lamindb_setup.settings.instance.storage.key_to_filepath(
+        file_storage_key
+    )
+    msg = f"storing file '{file.uid}' at '{storage_path}'"
     if (
         file.suffix in {".zarr", ".zrad"}
         and hasattr(file, "_memory_rep")
         and file._memory_rep is not None
     ):
         logger.save(msg)
-        storagepath = lamindb_setup.settings.storage.key_to_filepath(file_storage_key)
         print_progress = partial(
             print_hook, filepath=file_storage_key, action="uploading"
         )
-        write_adata_zarr(file._memory_rep, storagepath, callback=print_progress)
+        write_adata_zarr(file._memory_rep, storage_path, callback=print_progress)
     elif hasattr(file, "_to_store") and file._to_store:
         logger.save(msg)
         store_object(file._local_filepath, file_storage_key)

lamindb/dev/_data.py CHANGED Viewed

@@ -308,24 +308,48 @@ def add_labels(
 def _track_run_input(
-    data: Data, is_run_input: Optional[bool] = None, run: Optional[Run] = None
+    data: Union[Data, Iterable[Data]],
+    is_run_input: Optional[bool] = None,
+    run: Optional[Run] = None,
 ):
     if run is None:
         run = run_context.run
+    # consider that data is an iterable of Data
+    data_iter: Iterable[Data] = [data] if isinstance(data, Data) else data
     track_run_input = False
+    input_data = []
+    if run is not None:
+        # avoid cycles: data can't be both input and output
+        input_data = [data for data in data_iter if data.run_id != run.id]
+        input_data_ids = [data.id for data in data_iter if data.run_id != run.id]
+    if input_data:
+        data_class_name = input_data[0].__class__.__name__.lower()
+    # let us first look at the case in which the user does not
+    # provide a boolean value for `is_run_input`
+    # hence, we need to determine whether we actually want to
+    # track a run or not
     if is_run_input is None:
-        # we need a global run context for this to work
-        if run is not None:
-            # avoid cycles (a file is both input and output)
-            if data.run != run:
+        # we don't have a run record
+        if run is None:
+            if settings.track_run_inputs:
+                logger.hint(
+                    "you can auto-track this file as a run input by calling"
+                    " `ln.track()`"
+                )
+        # assume we have a run record
+        else:
+            # assume there is non-cyclic candidate input data
+            if input_data:
                 if settings.track_run_inputs:
                     transform_note = ""
-                    if data.transform is not None:
-                        transform_note = (
-                            f", adding parent transform {data.transform.id}"
-                        )
+                    if len(input_data) == 1:
+                        if input_data[0].transform is not None:
+                            transform_note = (
+                                ", adding parent transform"
+                                f" {input_data[0].transform.id}"
+                            )
                     logger.info(
-                        f"adding file {data.id} as input for run"
+                        f"adding {data_class_name} {input_data_ids} as input for run"
                         f" {run.id}{transform_note}"
                     )
                     track_run_input = True
@@ -333,12 +357,6 @@ def _track_run_input(
                     logger.hint(
                         "track this file as a run input by passing `is_run_input=True`"
                     )
-        else:
-            if settings.track_run_inputs:
-                logger.hint(
-                    "you can auto-track this file as a run input by calling"
-                    " `ln.track()`"
-                )
     else:
         track_run_input = is_run_input
     if track_run_input:
@@ -348,12 +366,22 @@ def _track_run_input(
                 " run object via `run.input_files.add(file)`"
             )
         # avoid adding the same run twice
-        # avoid cycles (a file is both input and output)
-        if not data.input_of.contains(run) and data.run != run:
-            run.save()
-            data.input_of.add(run)
-            if data.transform is not None:
-                run.transform.parents.add(data.transform)
+        run.save()
+        if data_class_name == "file":
+            LinkORM = run.input_files.through
+            links = [
+                LinkORM(run_id=run.id, file_id=data_id) for data_id in input_data_ids
+            ]
+        else:
+            LinkORM = run.input_datasets.through
+            links = [
+                LinkORM(run_id=run.id, dataset_id=data_id) for data_id in input_data_ids
+            ]
+        LinkORM.objects.bulk_create(links, ignore_conflicts=True)
+        # generalize below for more than one data batch
+        if len(input_data) == 1:
+            if input_data[0].transform is not None:
+                run.transform.parents.add(input_data[0].transform)
 @property  # type: ignore

lamindb/dev/datasets/_core.py CHANGED Viewed

@@ -156,7 +156,7 @@ def anndata_mouse_sc_lymph_node(
         populate_registries: pre-populate metadata records to simulate existing registries  # noqa
     """
     filepath, _ = urlretrieve("https://lamindb-test.s3.amazonaws.com/E-MTAB-8414.h5ad")
-    adata = ad.read(filepath)
+    adata = ad.read_h5ad(filepath)
     # The column names are a bit lengthy, let's abbreviate them:
     adata.obs.columns = (
@@ -253,7 +253,7 @@ def anndata_pbmc68k_reduced() -> ad.AnnData:
     filepath, _ = urlretrieve(
         "https://lamindb-dev-datasets.s3.amazonaws.com/scrnaseq_pbmc68k_tiny.h5ad"
     )
-    return ad.read(filepath)
+    return ad.read_h5ad(filepath)
 def anndata_file_pbmc68k_test() -> Path:
@@ -283,7 +283,7 @@ def anndata_pbmc3k_processed() -> ad.AnnData:  # pragma: no cover
     filepath, _ = urlretrieve(
         "https://lamindb-test.s3.amazonaws.com/scrnaseq_scanpy_pbmc3k_processed.h5ad"
     )
-    pbmc3k = ad.read(filepath)
+    pbmc3k = ad.read_h5ad(filepath)
     pbmc3k.obs.rename(columns={"louvain": "cell_type"}, inplace=True)
     return pbmc3k
@@ -306,8 +306,11 @@ def anndata_human_immune_cells(
         adata.write('human_immune.h5ad')
     """
     filepath, _ = urlretrieve("https://lamindb-test.s3.amazonaws.com/human_immune.h5ad")
-    adata = ad.read(filepath)
+    adata = ad.read_h5ad(filepath)
     adata.var.drop(columns=["gene_symbols", "feature_name"], inplace=True)
+    adata.uns.pop("cell_type_ontology_term_id_colors")
+    adata.uns.pop("title")
+    adata.uns.pop("schema_version")
     adata.obs.columns = adata.obs.columns.str.replace("donor_id", "donor")
     columns = [col for col in adata.obs.columns if "ontology_term" not in col]
     adata.obs = adata.obs[columns]
@@ -378,7 +381,7 @@ def anndata_suo22_Visium10X():  # pragma: no cover
     )
     Path("suo22/").mkdir(exist_ok=True)
     filepath = Path(filepath).rename("suo22/Visium10X_data_LI_subset.h5ad")
-    return ad.read(filepath)
+    return ad.read_h5ad(filepath)
 def mudata_papalexi21_subset():  # pragma: no cover

{lamindb-0.57.2.dist-info → lamindb-0.58.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: lamindb
-Version: 0.57.2
+Version: 0.58.1
 Summary: A data framework for biology.
 Author-email: Lamin Labs <open-source@lamin.ai>
 Requires-Python: >=3.8
@@ -8,9 +8,9 @@ Description-Content-Type: text/markdown
 Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
-Requires-Dist: lnschema_core==0.52.0
-Requires-Dist: lamindb_setup==0.55.6
-Requires-Dist: lamin_utils==0.11.4
+Requires-Dist: lnschema_core==0.53.0
+Requires-Dist: lamindb_setup==0.56.3
+Requires-Dist: lamin_utils==0.11.5
 Requires-Dist: rapidfuzz
 Requires-Dist: pyarrow
 Requires-Dist: typing_extensions!=4.6.0
@@ -24,7 +24,7 @@ Requires-Dist: urllib3<2 ; extra == "aws"
 Requires-Dist: boto3==1.28.17 ; extra == "aws"
 Requires-Dist: aiobotocore==2.5.4 ; extra == "aws"
 Requires-Dist: fsspec[s3]==2023.9.0 ; extra == "aws"
-Requires-Dist: lnschema_bionty==0.33.0 ; extra == "bionty"
+Requires-Dist: lnschema_bionty==0.34.0 ; extra == "bionty"
 Requires-Dist: pandas<2 ; extra == "dev"
 Requires-Dist: pre-commit ; extra == "dev"
 Requires-Dist: nox ; extra == "dev"

{lamindb-0.57.2.dist-info → lamindb-0.58.1.dist-info}/RECORD RENAMED Viewed

@@ -1,17 +1,17 @@
-lamindb/__init__.py,sha256=IaVdOqONgsSFqAPdan9hd2UoDB3fd9CWMbHSmnZdjn8,2870
-lamindb/_dataset.py,sha256=mBoeQj7KhMxAnb9wmgu7MXxlbPRGQPsGIcBVaiAZjQ8,13453
+lamindb/__init__.py,sha256=0YGJThA1KvrX4UlxWsOrnuNKxxjEe1FVhKvdZ_8KWTg,2870
+lamindb/_dataset.py,sha256=GLGtwbZLlSjy2HtJsjHgRDTOO0u0PwuarRE5qp-rGUA,15810
 lamindb/_delete.py,sha256=wiYmYnvIEHrDdmw1NiXyfCY9mBt-FI5XNFi5jyR_mkA,1968
 lamindb/_feature.py,sha256=5gsa7zsMVVtm1DID4dF3Vwo5llWyY1dH3Hg5hjaIrQk,5554
-lamindb/_feature_set.py,sha256=DWDrLlNfsR726IdGw93CcTxSxrfmZtGSulZKCmUv4MQ,9055
-lamindb/_file.py,sha256=0TIsPvOcWXjtgCwTOoeot1o0Gs8ebkcDFQenMSgxXuM,35818
-lamindb/_filter.py,sha256=fNvPbLeOxYzvNKPcFYiFz3P7bkD5_84Xh8HHAoLNdas,1716
+lamindb/_feature_set.py,sha256=G63pwauDQ7jg4ydFCQLhu-lgO6tm56iQwUdRuNHeKHY,9233
+lamindb/_file.py,sha256=9McSL-DuhGDihfusIX1UKZ195HwhXohlWhJHV9Ki0c4,37358
+lamindb/_filter.py,sha256=JrE4tdExNkOmNf0_tnO3vo-W3tecsH6ZB74gLO_fvKE,2293
 lamindb/_from_values.py,sha256=GitpmKOqV6YHJggaCnJgGsRIHI_bnuLRVE2oo9W-SgE,11613
-lamindb/_parents.py,sha256=-SRNd4O7TUmCIHYysjS00uK1QKODF4UJSXK_T_1KOEI,13212
+lamindb/_parents.py,sha256=VT_gtomf1Erd_AKLVd1uLwigeDqMHtcaAbma3_AbQAw,13408
 lamindb/_query_manager.py,sha256=MXueabWHqft7GWNkzmWbhfTqdk-0mKU7nWrhXG6wpYQ,3693
-lamindb/_query_set.py,sha256=Lf7vLvOsEfUWRQ3iImSj4eQPmUK1KCgeoKS_m66Lp7o,10279
-lamindb/_registry.py,sha256=_pdlEvAtemiQCzpK2s14MsTKkLqE6ORDjhDs7ABs4i4,14893
+lamindb/_query_set.py,sha256=1vjTLkCCrs1GiS2KTyqmSgVRSx966UsMhApXbW7GgI0,10217
+lamindb/_registry.py,sha256=lUnHCeDDOw4mlak0_Q_EbQU1_qDrsE23l7IEbeoaV8w,15138
 lamindb/_run.py,sha256=659lqY32GW7F41rFUUo37OftUa38-p8yaV9Z0oF32CE,1120
-lamindb/_save.py,sha256=m6l5mMsxlrmlkdWhfjbwOtZ3haGEYyg63QcPG8twTMQ,10136
+lamindb/_save.py,sha256=hL34zgm-L3MFfi6P9O0AzeptFHtEnHdKheJqdOlGDM4,10154
 lamindb/_storage.py,sha256=HUdXGj4839C606gvxWXo0tDITbtbuyJKOgUPhagYPTI,415
 lamindb/_transform.py,sha256=87yUTz0RndJ_C98tBt4t2SPw8fksRgqJKwCQG_H40Kk,2515
 lamindb/_ulabel.py,sha256=lEAENh_dluNkBi8xKUH_CjJNMXldOm2liy6Rg3IH1pE,1900
@@ -19,7 +19,7 @@ lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
 lamindb/_validate.py,sha256=3powFmYcNop2R6ijt2v3I_vPn4TD9ET4DJkW8uzQt_U,13719
 lamindb/_view.py,sha256=bzx6e-Cif2CmDQkOu6jMrq_d5rsu6g7hhdaK_sYBv_Y,2150
 lamindb/dev/__init__.py,sha256=Ja96dxb0t7raGsCr8QxqCabyEzIxeVGlL_IgmhxdsB8,1010
-lamindb/dev/_data.py,sha256=-0Bz2wg98-BTzpV_5lUZCrRk9yeU1xqCUrjELomJb60,13818
+lamindb/dev/_data.py,sha256=6TLM2tVWV7xMYzWNA14EsdyhSoRjK7IK6EU4VuQoC-g,15071
 lamindb/dev/_feature_manager.py,sha256=IojA1TPH3ZPlPghV_d1MIPIxdIcYO15RenI_o7YjmAM,8049
 lamindb/dev/_label_manager.py,sha256=5R2rZzdLgiZHEzXyilSjK3J7kHDHUOhneZJuSh--qQY,7339
 lamindb/dev/_priors.py,sha256=eSZEEijmeFs3zcrU27r3T2sSGdsK-cvy7vl6ukDYaU8,785
@@ -31,7 +31,7 @@ lamindb/dev/hashing.py,sha256=IlNrHy-a9NqB0vfqiwIh4sjt40CvaiZIvfK6gMnkxDo,1381
 lamindb/dev/types.py,sha256=svg5S_aynuGfbEOsbmqkR_gF9d9YMzfOkcvGN37Rzvg,232
 lamindb/dev/versioning.py,sha256=XF7X-Ngat_Ggca7FdtZa5ElOKlOgoxDtxwZlhsCTJZU,2788
 lamindb/dev/datasets/__init__.py,sha256=clbWOmg4K8Rh94OPFtJasNKdtUHHvR_Lx11jZWMqfok,1350
-lamindb/dev/datasets/_core.py,sha256=T1XE9tr3uVLnyA2W9_xuF60EQH3WVaS9GBl69cB-KDQ,18844
+lamindb/dev/datasets/_core.py,sha256=-g7wWWYHrejlkSQS04Xafi_w5OjDv9ItHMUFNdHsXlM,18987
 lamindb/dev/datasets/_fake.py,sha256=S8mNho-oSh1M9x9oOSsUBLLHmBAegsOLlFk6LnF81EA,942
 lamindb/dev/storage/__init__.py,sha256=mFvsMkAHHmO_xTM1UI-WGynDObnH0RCI2TXtFGhYfv8,392
 lamindb/dev/storage/_anndata_sizes.py,sha256=0XVzA6AQeVGPaGPrhGusKyxFgFjeo3qSN29hxb8D5E8,993
@@ -41,8 +41,8 @@ lamindb/dev/storage/file.py,sha256=xfeU8X1ty80-PhnHOpupBJfibZKhp6MPLA2IjYdTBoY,7
 lamindb/dev/storage/object.py,sha256=KGuOwwYuN2yCJxTXn9v0LanC0fjKwy_62P-WksHcf40,1140
 lamindb/setup/__init__.py,sha256=8-0F2C4Glx23-b8-D_1CBGgRBM5PppVhazhoXZYOLsg,275
 lamindb/setup/dev/__init__.py,sha256=tBty426VGF2PGqqt2XuNU-WgvOrbOp1aZBDowjLuzgA,242
-lamindb-0.57.2.dist-info/entry_points.txt,sha256=MioM8vSpKwXxY3geNBwjo1wnwy1l15WjJYlI3lpKuZI,53
-lamindb-0.57.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-lamindb-0.57.2.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
-lamindb-0.57.2.dist-info/METADATA,sha256=oJ8lBjU6ZZ7Bl-oV6PrqtbHlnuGb-8Ry3clIInkpxuk,3030
-lamindb-0.57.2.dist-info/RECORD,,
+lamindb-0.58.1.dist-info/entry_points.txt,sha256=MioM8vSpKwXxY3geNBwjo1wnwy1l15WjJYlI3lpKuZI,53
+lamindb-0.58.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+lamindb-0.58.1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
+lamindb-0.58.1.dist-info/METADATA,sha256=vsEe2aNzGIKTdXiRH07Cr6wZuFn5COOO9U1DuZRkBRM,3030
+lamindb-0.58.1.dist-info/RECORD,,

{lamindb-0.57.2.dist-info → lamindb-0.58.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{lamindb-0.57.2.dist-info → lamindb-0.58.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{lamindb-0.57.2.dist-info → lamindb-0.58.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

lamindb 0.57.2__py3-none-any.whl → 0.58.1__py3-none-any.whl

lamindb 0.57.2py3-none-any.whl → 0.58.1py3-none-any.whl