PyPI - lamindb - Versions diffs - 0.49.3__py3-none-any.whl → 0.50.1__py3-none-any.whl - Mend

lamindb 0.49.3py3-none-any.whl → 0.50.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

lamindb/__init__.py +55 -15
lamindb/_context.py +25 -25
lamindb/_delete.py +8 -8
lamindb/_feature.py +15 -11
lamindb/_feature_set.py +70 -39
lamindb/_file.py +80 -56
lamindb/_filter.py +5 -5
lamindb/_from_values.py +55 -92
lamindb/{_manager.py → _query_manager.py} +8 -5
lamindb/{_queryset.py → _query_set.py} +31 -28
lamindb/{_orm.py → _registry.py} +53 -294
lamindb/_save.py +14 -13
lamindb/_synonym.py +203 -0
lamindb/_validate.py +134 -0
lamindb/_view.py +15 -9
lamindb/dev/__init__.py +13 -6
lamindb/dev/_data.py +195 -0
lamindb/dev/_feature_manager.py +102 -0
lamindb/dev/_settings.py +10 -9
lamindb/dev/_view_parents.py +36 -17
lamindb/dev/datasets/__init__.py +5 -3
lamindb/dev/datasets/_core.py +35 -17
lamindb/dev/exc.py +4 -0
lamindb/dev/storage/_backed_access.py +53 -17
lamindb/dev/storage/file.py +44 -15
{lamindb-0.49.3.dist-info → lamindb-0.50.1.dist-info}/METADATA +34 -36
lamindb-0.50.1.dist-info/RECORD +47 -0
lamindb/_feature_manager.py +0 -237
lamindb-0.49.3.dist-info/RECORD +0 -43
{lamindb-0.49.3.dist-info → lamindb-0.50.1.dist-info}/LICENSE +0 -0
{lamindb-0.49.3.dist-info → lamindb-0.50.1.dist-info}/WHEEL +0 -0
{lamindb-0.49.3.dist-info → lamindb-0.50.1.dist-info}/entry_points.txt +0 -0

lamindb/_file.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from itertools import islice
 from pathlib import Path, PurePath, PurePosixPath
-from typing import Any, List, Optional, Tuple, Union
+from typing import Any, List, Optional, Set, Tuple, Union
 import anndata as ad
 import lamindb_setup
@@ -17,7 +17,6 @@ from lnschema_core import Feature, FeatureSet, File, Run, Storage, ids
 from lnschema_core.types import AnnDataLike, DataLike, PathLike
 from lamindb._context import context
-from lamindb.dev import FeatureManager
 from lamindb.dev._settings import settings
 from lamindb.dev.hashing import b16_to_b64, hash_file
 from lamindb.dev.storage import (
@@ -33,6 +32,7 @@ from lamindb.dev.storage.file import (
     ProgressCallback,
     _str_to_path,
     auto_storage_key_from_file,
+    extract_suffix_from_path,
     filepath_from_file,
 )
 from lamindb.dev.utils import attach_func_to_class_method
@@ -76,7 +76,7 @@ def process_pathlike(
                 new_root = list(filepath.parents)[-1]
                 new_root_str = new_root.as_posix()
                 logger.warning(
-                    f"Creating new storage location for root: {new_root_str}"
+                    f"creating new storage location for root: {new_root_str}"
                 )
                 storage_settings = StorageSettings(new_root_str)
                 register_storage(storage_settings)
@@ -110,7 +110,7 @@ def process_data(
         storage, use_existing_storage_key = process_pathlike(
             filepath, skip_existence_check=skip_existence_check
         )
-        suffix = suffix = "".join(filepath.suffixes)
+        suffix = extract_suffix_from_path(filepath)
         memory_rep = None
     elif isinstance(data, (pd.DataFrame, AnnData)):  # DataLike, spelled out
         storage = lamindb_setup.settings.storage.record
@@ -162,7 +162,7 @@ def get_hash(
                 hash = f"{b16_to_b64(stripped_etag)}-{suffix}"
                 hash_type = "md5-n"  # this is the S3 chunk-hashing strategy
         else:
-            logger.warning(f"Did not add hash for {filepath}")
+            logger.warning(f"did not add hash for {filepath}")
             return None, None
     else:
         hash, hash_type = hash_file(filepath)
@@ -171,20 +171,20 @@ def get_hash(
     result = File.filter(hash=hash).list()
     if len(result) > 0:
         if settings.upon_file_create_if_hash_exists == "error":
-            msg = f"A file with same hash exists: {result[0]}"
+            msg = f"file with same hash exists: {result[0]}"
             hint = (
-                "💡 You can make this error a warning:\n"
+                "💡 you can make this error a warning:\n"
                 "    ln.settings.upon_file_create_if_hash_exists"
             )
             raise RuntimeError(f"{msg}\n{hint}")
         elif settings.upon_file_create_if_hash_exists == "warn_create_new":
             logger.warning(
-                "Creating new File object despite existing file with same hash:"
+                "creating new File object despite existing file with same hash:"
                 f" {result[0]}"
             )
             return hash, hash_type
         else:
-            logger.warning(f"Returning existing file with same hash: {result[0]}")
+            logger.warning(f"returning existing file with same hash: {result[0]}")
             return result[0]
     else:
         return hash, hash_type
@@ -295,7 +295,7 @@ def get_relative_path_to_directory(
     elif isinstance(directory, PurePath):
         relpath = path.relative_to(directory)
     else:
-        raise TypeError("directory not of type Path or UPath")
+        raise TypeError("Directory not of type Path or UPath")
     return relpath
@@ -377,13 +377,13 @@ def log_storage_hint(
 ) -> None:
     hint = ""
     if check_path_in_storage:
-        hint += f"file in storage {storage.root}"  # type: ignore
+        hint += f"file in storage '{storage.root}'"  # type: ignore
     else:
         hint += "file will be copied to default storage upon `save()`"
     if key is None:
-        hint += f" with key = {id}{suffix}"
+        hint += f" with key '{id}{suffix}'"
     else:
-        hint += f" with key = {key}"
+        hint += f" with key '{key}'"
     logger.hint(hint)
@@ -441,7 +441,7 @@ def __init__(file: File, *args, **kwargs):
     if name is not None and description is not None:
         raise ValueError("Only pass description, do not pass a name")
     if name is not None:
-        logger.warning("Argument `name` is deprecated, please use `description`")
+        logger.warning("argument `name` is deprecated, please use `description`")
         description = name
     provisional_id = ids.base62_20()
@@ -473,15 +473,15 @@ def __init__(file: File, *args, **kwargs):
     if isinstance(data, pd.DataFrame):
         if log_hint:
             logger.hint(
-                "This is a dataframe, consider using File.from_df() to link column"
-                " names as features!"
+                "file is a dataframe, consider using File.from_df() to link column"
+                " names as features"
             )
         kwargs["accessor"] = "DataFrame"
     elif data_is_anndata(data):
         if log_hint:
             logger.hint(
-                "This is AnnDataLike, consider using File.from_anndata() to link"
-                " var_names and obs.columns as features!"
+                "file is AnnDataLike, consider using File.from_anndata() to link"
+                " var_names and obs.columns as features"
             )
         kwargs["accessor"] = "AnnData"
     elif data_is_mudata(data):
@@ -524,7 +524,10 @@ def from_df(
     """{}"""
     file = File(data=df, key=key, run=run, description=description, log_hint=False)
     feature_set = FeatureSet.from_df(df)
-    file._feature_sets = {"columns": feature_set}
+    if feature_set is not None:
+        file._feature_sets = {"columns": feature_set}
+    else:
+        file._feature_sets = {}
     return file
@@ -554,20 +557,25 @@ def from_anndata(
     else:
         type = convert_numpy_dtype_to_lamin_feature_type(adata.X.dtype)
     feature_sets = {}
-    logger.info("Parsing feature names of X, stored in slot .var")
+    logger.info("parsing feature names of X stored in slot 'var'")
     logger.indent = "   "
-    feature_set_x = FeatureSet.from_values(
+    feature_set_var = FeatureSet.from_values(
         data_parse.var.index,
         var_ref,
         type=type,
     )
-    feature_sets["var"] = feature_set_x
+    if feature_set_var is not None:
+        feature_sets["var"] = feature_set_var
+        logger.save(f"linked: {feature_set_var}")
     logger.indent = ""
     if len(data_parse.obs.columns) > 0:
-        logger.info("Parsing feature names of slot .obs")
+        logger.info("parsing feature names of slot 'obs'")
         logger.indent = "   "
         feature_set_obs = FeatureSet.from_df(data_parse.obs)
-        feature_sets["obs"] = feature_set_obs
+        if feature_set_obs is not None:
+            feature_sets["obs"] = feature_set_obs
+            logger.save(f"linked: {feature_set_obs}")
         logger.indent = ""
     file._feature_sets = feature_sets
     return file
@@ -598,7 +606,7 @@ def from_dir(
     if key is None:
         if not use_existing_storage:
             logger.warning(
-                "Folder is outside existing storage location, will copy files from"
+                "folder is outside existing storage location, will copy files from"
                 f" {path} to {storage}/{folderpath.name}"
             )
             folder_key_path = Path(folderpath.name)
@@ -612,7 +620,6 @@ def from_dir(
     # always sanitize by stripping a trailing slash
     folder_key = folder_key_path.as_posix().rstrip("/")
-    logger.hint(f"using storage {storage.root} and key prefix = {folder_key}/")
     # TODO: UPath doesn't list the first level files and dirs with "*"
     pattern = "" if isinstance(folderpath, UPath) else "*"
@@ -629,7 +636,10 @@ def from_dir(
             file = File(filepath, run=run, key=file_key, skip_check_exists=True)
             files.append(file)
     settings.verbosity = verbosity
-    logger.info(f"→ {len(files)} files")
+    logger.success(
+        f"created {len(files)} files from directory using storage"
+        f" {storage.root} and key = {folder_key}/"
+    )
     return files
@@ -654,7 +664,7 @@ def replace(
             self._clear_storagekey = self.key
             self.key = str(key_path.with_name(new_filename))
             logger.warning(
-                f"Replacing the file will replace key '{key_path}' with '{self.key}'"
+                f"replacing the file will replace key '{key_path}' with '{self.key}'"
                 f" and delete '{key_path}' upon `save()`"
             )
     else:
@@ -716,18 +726,18 @@ def _track_run_input(file: File, is_run_input: Optional[bool] = None):
                             f", adding parent transform {file.transform.id}"
                         )
                     logger.info(
-                        f"Adding file {file.id} as input for run"
+                        f"adding file {file.id} as input for run"
                         f" {context.run.id}{transform_note}"
                     )
                     track_run_input = True
                 else:
                     logger.hint(
-                        "Track this file as a run input by passing `is_run_input=True`"
+                        "track this file as a run input by passing `is_run_input=True`"
                     )
         else:
             if settings.track_run_inputs:
                 logger.hint(
-                    "You can auto-track this file as a run input by calling"
+                    "you can auto-track this file as a run input by calling"
                     " `ln.track()`"
                 )
     else:
@@ -771,9 +781,9 @@ def delete(self, storage: Optional[bool] = None) -> None:
         delete_in_storage = storage
     if delete_in_storage:
-        filepath = self.path()
+        filepath = self.path
         delete_storage(filepath)
-        logger.success(f"Deleted stored object {colors.yellow(f'{filepath}')}")
+        logger.success(f"deleted stored object {colors.yellow(f'{filepath}')}")
     self._delete_skip_storage()
@@ -802,6 +812,11 @@ def _save_skip_storage(file, *args, **kwargs) -> None:
     if hasattr(file, "_feature_sets"):
         for feature_set in file._feature_sets.values():
             feature_set.save()
+        s = "s" if len(file._feature_sets) > 1 else ""
+        logger.save(
+            f"saved {len(file._feature_sets)} feature set{s} for slot{s}:"
+            f" {list(file._feature_sets.keys())}"
+        )
     super(File, file).save(*args, **kwargs)
     if hasattr(file, "_feature_sets"):
         links = []
@@ -817,11 +832,14 @@ def _save_skip_storage(file, *args, **kwargs) -> None:
         bulk_create(links)
+@property  # type: ignore
+@doc_args(File.path.__doc__)
 def path(self) -> Union[Path, UPath]:
+    """{}"""
     return filepath_from_file(self)
-# adapted from: https://stackoverflow.com/questions/9727673/list-directory-tree-structure-in-python  # noqa
+# adapted from: https://stackoverflow.com/questions/9727673
 @classmethod  # type: ignore
 @doc_args(File.tree.__doc__)
 def tree(
@@ -831,7 +849,7 @@ def tree(
     level: int = -1,
     limit_to_directories: bool = False,
     length_limit: int = 1000,
-):
+) -> None:
     """{}"""
     space = "    "
     branch = "│   "
@@ -842,11 +860,21 @@ def tree(
         dir_path = settings.storage
     else:
         dir_path = path if isinstance(path, (Path, UPath)) else _str_to_path(path)
-    files = 0
-    directories = 0
+    n_files = 0
+    n_directories = 0
+    # by default only including registered files
+    # need a flag and a proper implementation
+    registered_paths: Set[Any] = set()
+    registered_dirs: Set[Any] = set()
+    if path is None:
+        registered_paths = {
+            file.path for file in cls.filter(storage_id=setup_settings.storage.id).all()
+        }
+        registered_dirs = {d for p in registered_paths for d in p.parents}
     def inner(dir_path: Union[Path, UPath], prefix: str = "", level=-1):
-        nonlocal files, directories
+        nonlocal n_files, n_directories
         if not level:
             return  # 0, stop iterating
         stripped_dir_path = dir_path.as_posix().rstrip("/")
@@ -864,22 +892,29 @@ def tree(
         pointers = [tee] * (len(contents) - 1) + [last]
         for pointer, path in zip(pointers, contents):
             if path.is_dir():
+                if registered_dirs and path not in registered_dirs:
+                    continue
                 yield prefix + pointer + path.name
-                directories += 1
+                n_directories += 1
                 extension = branch if pointer == tee else space
                 yield from inner(path, prefix=prefix + extension, level=level - 1)
             elif not limit_to_directories:
+                if registered_paths and path not in registered_paths:
+                    continue
                 yield prefix + pointer + path.name
-                files += 1
+                n_files += 1
-    folder_tree = f"{dir_path.name}"
+    folder_tree = ""
     iterator = inner(dir_path, level=level)
     for line in islice(iterator, length_limit):
         folder_tree += f"\n{line}"
     if next(iterator, None):
         folder_tree += f"... length_limit, {length_limit}, reached, counted:"
-    print(folder_tree)
-    print(f"\n{directories} directories" + (f", {files} files" if files else ""))
+    directory_info = "directory" if n_directories == 1 else "directories"
+    print(
+        f"{dir_path.name} ({n_directories} sub-{directory_info} & {n_files} files):"
+        f" {folder_tree}"
+    )
 def inherit_relations(self, file: File, fields: Optional[List[str]] = None):
@@ -922,22 +957,13 @@ def inherit_relations(self, file: File, fields: Optional[List[str]] = None):
     ]
     s = "s" if len(inherit_names) > 1 else ""
-    logger.info(f"Inheriting {len(inherit_names)} field{s}: {inherit_names}")
+    logger.info(f"inheriting {len(inherit_names)} field{s}: {inherit_names}")
     for related_name in inherit_names:
         self.__getattribute__(related_name).set(
             file.__getattribute__(related_name).all()
         )
-@property  # type: ignore
-@doc_args(File.features.__doc__)
-def features(self) -> "FeatureManager":
-    """{}"""
-    from lamindb._feature_manager import FeatureManager
-    return FeatureManager(self)
 METHOD_NAMES = [
     "__init__",
     "from_anndata",
@@ -948,7 +974,6 @@ METHOD_NAMES = [
     "delete",
     "save",
     "replace",
-    "path",
     "from_dir",
     "tree",
 ]
@@ -971,5 +996,4 @@ File._save_skip_storage = _save_skip_storage
 # TODO: move these to METHOD_NAMES
 setattr(File, "view_lineage", view_lineage)
 setattr(File, "inherit_relations", inherit_relations)
-# property signature is not tested:
-setattr(File, "features", features)
+setattr(File, "path", path)

lamindb/_filter.py CHANGED Viewed

@@ -1,13 +1,13 @@
 from typing import Type
-from lnschema_core import ORM
+from lnschema_core import Registry
-from lamindb._queryset import QuerySet
+from lamindb._query_set import QuerySet
-def filter(ORM: Type[ORM], **expressions) -> QuerySet:
-    """See :meth:`~lamindb.dev.ORM.filter`."""
-    qs = QuerySet(model=ORM)
+def filter(Registry: Type[Registry], **expressions) -> QuerySet:
+    """See :meth:`~lamindb.dev.Registry.filter`."""
+    qs = QuerySet(model=Registry)
     if len(expressions) > 0:
         return qs.filter(**expressions)
     else:

lamindb/_from_values.py CHANGED Viewed

@@ -2,10 +2,9 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
 import pandas as pd
 from django.core.exceptions import FieldDoesNotExist
-from django.db.models import Case, When
 from django.db.models.query_utils import DeferredAttribute as Field
 from lamin_utils import colors, logger
-from lnschema_core.models import ORM, Feature, Label
+from lnschema_core.models import Feature, Label, Registry
 from lnschema_core.types import ListLike
 from .dev._settings import settings
@@ -18,7 +17,7 @@ def get_or_create_records(
     *,
     from_bionty: bool = False,
     **kwargs,
-) -> List[ORM]:
+) -> List[Registry]:
     """Get or create records from iterables."""
     upon_create_search_names = settings.upon_create_search_names
     settings.upon_create_search_names = False
@@ -31,10 +30,10 @@ def get_or_create_records(
         types = kwargs.pop("types")
     try:
         field_name = field.field.name
-        ORM = field.field.model
+        Registry = field.field.model
         iterable_idx = index_iterable(iterable)
-        if isinstance(ORM, Feature):
+        if isinstance(Registry, Feature):
             if types is None:
                 raise ValueError("Please pass types as {} or use FeatureSet.from_df()")
@@ -49,6 +48,8 @@ def get_or_create_records(
                 records_bionty, unmapped_values = create_records_from_bionty(
                     iterable_idx=nonexist_values, field=field, **kwargs
                 )
+                for record in records_bionty:
+                    record._from_bionty = True
                 records += records_bionty
             else:
                 unmapped_values = nonexist_values
@@ -58,19 +59,19 @@ def get_or_create_records(
                     params = {field_name: value}
                     if types is not None:
                         params["type"] = str(types[value])
-                    records.append(ORM(**params, **kwargs))
+                    records.append(Registry(**params, **kwargs))
                 s = "" if len(unmapped_values) == 1 else "s"
-                print_unmapped_values = ", ".join(unmapped_values[:5])
-                if len(unmapped_values) > 10:
+                print_unmapped_values = ", ".join(unmapped_values[:20])
+                if len(unmapped_values) > 20:
                     print_unmapped_values += ", ..."
                 additional_info = " "
                 if feature is not None:
                     additional_info = f" Feature {feature.name} and "
                 logger.warning(
-                    f"Created {colors.yellow(f'{len(unmapped_values)} {ORM.__name__} record{s}')} for{additional_info}"  # noqa
+                    f"did not validate {colors.yellow(f'{len(unmapped_values)} {Registry.__name__} record{s}')} for{additional_info}"  # noqa
                     f"{colors.yellow(f'{field_name}{s}')}: {print_unmapped_values}"  # noqa
                 )
-        if ORM.__module__.startswith("lnschema_bionty.") or ORM == Label:
+        if Registry.__module__.startswith("lnschema_bionty.") or Registry == Label:
             if isinstance(iterable, pd.Series):
                 feature = iterable.name
             feature_name = None
@@ -82,13 +83,17 @@ def get_or_create_records(
                 if feature_name is not None:
                     for record in records:
                         record._feature = feature_name
-                logger.hint(f"Added default feature '{feature_name}'")
+                logger.debug(f"added default feature '{feature_name}'")
         return records
     finally:
         settings.upon_create_search_names = upon_create_search_names
-def get_existing_records(iterable_idx: pd.Index, field: Field, kwargs: Dict = {}):
+def get_existing_records(
+    iterable_idx: pd.Index,
+    field: Field,
+    kwargs: Dict = {},
+):
     field_name = field.field.name
     model = field.field.model
     condition: Dict = {}
@@ -103,25 +108,6 @@ def get_existing_records(iterable_idx: pd.Index, field: Field, kwargs: Dict = {}
             kwargs.update({"species": species_record})
             condition.update({"species__name": species_record.name})
-    # map synonyms based on the DB reference
-    syn_mapper = model.map_synonyms(
-        iterable_idx, species=kwargs.get("species"), return_mapper=True
-    )
-    syn_msg = ""
-    if len(syn_mapper) > 0:
-        s = "" if len(syn_mapper) == 1 else "s"
-        names = list(syn_mapper.keys())
-        print_values = ", ".join(names[:5])
-        if len(names) > 5:
-            print_values += ", ..."
-        syn_msg = (
-            "Loaded"
-            f" {colors.green(f'{len(syn_mapper)} {model.__name__} record{s}')} that"  # noqa
-            f" matched {colors.green('synonyms')}: {print_values}"
-        )
-        iterable_idx = iterable_idx.to_frame().rename(index=syn_mapper).index
     # get all existing records in the db
     # if necessary, create records for the values in kwargs
     # k:v -> k:v_record
@@ -129,32 +115,31 @@ def get_existing_records(iterable_idx: pd.Index, field: Field, kwargs: Dict = {}
     condition.update({f"{field_name}__in": iterable_idx.values})
     query_set = model.filter(**condition)
-    # new we have to sort the list of queried records
-    preserved = Case(
-        *[
-            When(**{field_name: value}, then=pos)
-            for pos, value in enumerate(iterable_idx)
-        ]
-    )
-    records = query_set.order_by(preserved).list()
-    n_name = len(records) - len(syn_mapper)
+    records = query_set.list()
+    # now we have to sort the list of queried records
+    # preserved = Case(
+    #     *[
+    #         When(**{field_name: value}, then=pos)
+    #         for pos, value in enumerate(iterable_idx)
+    #     ]
+    # )
+    # order by causes a factor 10 in runtime
+    # records = query_set.order_by(preserved).list()
+    n_name = len(records)
     names = [getattr(record, field_name) for record in records]
-    names = [name for name in names if name not in syn_mapper.values()]
+    names = [name for name in names]
     if n_name > 0:
         s = "" if n_name == 1 else "s"
-        print_values = ", ".join(names[:5])
-        if len(names) > 5:
+        print_values = ", ".join(names[:20])
+        if len(names) > 20:
             print_values += ", ..."
-        logger.info(
-            "Loaded"
-            f" {colors.green(f'{n_name} {model.__name__} record{s}')} that"
-            f" matched {colors.green(f'{field_name}')}: {print_values}"
+        logger.success(
+            "validated"
+            f" {colors.green(f'{n_name} {model.__name__} record{s}')}"
+            f" on {colors.green(f'{field_name}')}: {print_values}"
         )
-    # make sure that synonyms logging appears after the field logging
-    if len(syn_msg) > 0:
-        logger.info(syn_msg)
     existing_values = iterable_idx.intersection(
         query_set.values_list(field_name, flat=True)
@@ -183,30 +168,10 @@ def create_records_from_bionty(
     # filter the columns in bionty df based on fields
     bionty_df = _filter_bionty_df_columns(model=model, bionty_object=bionty_object)
-    # map synonyms in the bionty reference
-    try:
-        syn_mapper = bionty_object.map_synonyms(iterable_idx, return_mapper=True)
-    except KeyError:
-        # no synonyms column
-        syn_mapper = {}
-    msg_syn: str = ""
-    if len(syn_mapper) > 0:
-        s = "" if len(syn_mapper) == 1 else "s"
-        names = list(syn_mapper.keys())
-        print_values = ", ".join(names[:5])
-        if len(names) > 5:
-            print_values += ", ..."
-        msg_syn = (
-            "Loaded"
-            f" {colors.purple(f'{len(syn_mapper)} {model.__name__} record{s} from Bionty')} that"  # noqa
-            f" matched {colors.purple('synonyms')}: {print_values}"
-        )
-        iterable_idx = iterable_idx.to_frame().rename(index=syn_mapper).index
     # create records for values that are found in the bionty reference
     mapped_values = iterable_idx.intersection(bionty_df[field_name])
+    multi_msg = ""
     if len(mapped_values) > 0:
         bionty_kwargs, multi_msg = _bulk_create_dicts_from_df(
             keys=mapped_values, column_name=field_name, df=bionty_df
@@ -215,26 +180,24 @@ def create_records_from_bionty(
             records.append(model(**bk, **kwargs))
         # number of records that matches field (not synonyms)
-        n_name = len(records) - len(syn_mapper)
+        n_name = len(records)
         names = [getattr(record, field_name) for record in records]
-        names = [name for name in names if name not in syn_mapper.values()]
+        names = [name for name in names]
         if n_name > 0:
             s = "" if n_name == 1 else "s"
-            print_values = ", ".join(names[:5])
-            if len(names) > 5:
+            print_values = ", ".join(names[:20])
+            if len(names) > 20:
                 print_values += ", ..."
             msg = (
-                "Loaded"
-                f" {colors.purple(f'{n_name} {model.__name__} record{s} from Bionty')} that"  # noqa
-                f" matched {colors.purple(f'{field_name}')}: {print_values}"
+                "validated"
+                f" {colors.purple(f'{n_name} {model.__name__} record{s} from Bionty')}"  # noqa
+                f" on {colors.purple(f'{field_name}')}: {print_values}"
             )
-            logger.info(msg)
-        # make sure that synonyms logging appears after the field logging
-        if len(msg_syn) > 0:
-            logger.info(msg_syn)
-        # warning about multi matches
-        if len(multi_msg) > 0:
-            logger.warning(multi_msg)
+            logger.success(msg)
+    # warning about multi matches
+    if len(multi_msg) > 0:
+        logger.warning(multi_msg)
     # return the values that are not found in the bionty reference
     unmapped_values = iterable_idx.difference(mapped_values)
@@ -248,7 +211,7 @@ def index_iterable(iterable: Iterable) -> pd.Index:
     return idx[(idx != "") & (~idx.isnull())]
-def _filter_bionty_df_columns(model: ORM, bionty_object: Any) -> pd.DataFrame:
+def _filter_bionty_df_columns(model: Registry, bionty_object: Any) -> pd.DataFrame:
     bionty_df = pd.DataFrame()
     if bionty_object is not None:
         model_field_names = {i.name for i in model._meta.fields}
@@ -297,18 +260,18 @@ def _bulk_create_dicts_from_df(
         dup = df.index[df.index.duplicated()].unique().tolist()
         if len(dup) > 0:
             s = "" if len(dup) == 1 else "s"
-            print_values = ", ".join(dup[:5])
-            if len(dup) > 5:
+            print_values = ", ".join(dup[:20])
+            if len(dup) > 20:
                 print_values += ", ..."
             multi_msg = (
-                f"Multiple matches found in Bionty for {len(dup)} record{s}:"
+                f"ambiguous validation in Bionty for {len(dup)} record{s}:"
                 f" {print_values}"
             )
     return df.reset_index().to_dict(orient="records"), multi_msg
-def _has_species_field(orm: ORM) -> bool:
+def _has_species_field(orm: Registry) -> bool:
     try:
         orm._meta.get_field("species")
         return True

lamindb 0.49.3__py3-none-any.whl → 0.50.1__py3-none-any.whl

lamindb 0.49.3py3-none-any.whl → 0.50.1py3-none-any.whl