lamindb 0.76.14__py3-none-any.whl → 0.76.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -43,7 +43,7 @@ Modules and settings.
43
43
  """
44
44
 
45
45
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
46
- __version__ = "0.76.14"
46
+ __version__ = "0.76.16"
47
47
 
48
48
  import os as _os
49
49
 
lamindb/_artifact.py CHANGED
@@ -28,39 +28,41 @@ from lnschema_core.types import (
28
28
  VisibilityChoice,
29
29
  )
30
30
 
31
- from lamindb._utils import attach_func_to_class_method
32
- from lamindb.core._data import _track_run_input, describe, view_lineage
33
- from lamindb.core._settings import settings
34
- from lamindb.core.exceptions import IntegrityError, InvalidArgument
35
- from lamindb.core.loaders import load_to_memory
36
- from lamindb.core.storage import (
31
+ from ._utils import attach_func_to_class_method
32
+ from .core._data import (
33
+ _track_run_input,
34
+ add_transform_to_kwargs,
35
+ describe,
36
+ get_run,
37
+ save_feature_set_links,
38
+ save_feature_sets,
39
+ view_lineage,
40
+ )
41
+ from .core._settings import settings
42
+ from .core.exceptions import IntegrityError, InvalidArgument
43
+ from .core.loaders import load_to_memory
44
+ from .core.storage import (
37
45
  LocalPathClasses,
38
46
  UPath,
39
47
  delete_storage,
40
48
  infer_suffix,
41
49
  write_to_disk,
42
50
  )
43
- from lamindb.core.storage.paths import (
51
+ from .core.storage._pyarrow_dataset import PYARROW_SUFFIXES
52
+ from .core.storage.objects import _mudata_is_installed
53
+ from .core.storage.paths import (
54
+ AUTO_KEY_PREFIX,
44
55
  auto_storage_key_from_artifact,
45
56
  auto_storage_key_from_artifact_uid,
46
57
  check_path_is_child_of_root,
47
58
  filepath_cache_key_from_artifact,
48
59
  filepath_from_artifact,
49
60
  )
50
- from lamindb.core.versioning import (
61
+ from .core.versioning import (
51
62
  create_uid,
52
63
  message_update_key_in_version_family,
53
64
  )
54
65
 
55
- from .core._data import (
56
- add_transform_to_kwargs,
57
- get_run,
58
- save_feature_set_links,
59
- save_feature_sets,
60
- )
61
- from .core.storage.objects import _mudata_is_installed
62
- from .core.storage.paths import AUTO_KEY_PREFIX
63
-
64
66
  try:
65
67
  from .core.storage._zarr import zarr_is_adata
66
68
  except ImportError:
@@ -72,6 +74,7 @@ except ImportError:
72
74
  if TYPE_CHECKING:
73
75
  from lamindb_setup.core.types import UPathStr
74
76
  from mudata import MuData
77
+ from pyarrow.dataset import Dataset as PyArrowDataset
75
78
  from tiledbsoma import Collection as SOMACollection
76
79
  from tiledbsoma import Experiment as SOMAExperiment
77
80
 
@@ -108,7 +111,12 @@ def process_pathlike(
108
111
  # for the storage root: the bucket
109
112
  if not isinstance(filepath, LocalPathClasses):
110
113
  # for a cloud path, new_root is always the bucket name
111
- new_root = list(filepath.parents)[-1]
114
+ if filepath.protocol == "hf":
115
+ hf_path = filepath.fs.resolve_path(filepath.as_posix())
116
+ hf_path.path_in_repo = ""
117
+ new_root = "hf://" + hf_path.unresolve()
118
+ else:
119
+ new_root = list(filepath.parents)[-1]
112
120
  # do not register remote storage locations on hub if the current instance
113
121
  # is not managed on the hub
114
122
  storage_settings, _ = init_storage(
@@ -210,9 +218,9 @@ def get_stat_or_artifact(
210
218
  if stat is not None:
211
219
  # convert UPathStatResult to fsspec info dict
212
220
  stat = stat.as_info()
213
- if "ETag" in stat: # is file
221
+ if (store_type := stat["type"]) == "file":
214
222
  size, hash, hash_type = get_stat_file_cloud(stat)
215
- elif stat["type"] == "directory":
223
+ elif store_type == "directory":
216
224
  size, hash, hash_type, n_objects = get_stat_dir_cloud(path)
217
225
  if hash is None:
218
226
  logger.warning(f"did not add hash for {path}")
@@ -237,7 +245,7 @@ def get_stat_or_artifact(
237
245
  .order_by("-created_at")
238
246
  .all()
239
247
  )
240
- artifact_with_same_hash_exists = len(result.filter(hash=hash).all()) > 0
248
+ artifact_with_same_hash_exists = result.filter(hash=hash).count() > 0
241
249
  if not artifact_with_same_hash_exists and len(result) > 0:
242
250
  logger.important(
243
251
  f"creating new artifact version for key='{key}' (storage: '{settings.storage.root_as_str}')"
@@ -772,19 +780,14 @@ def from_dir(
772
780
  else:
773
781
  folder_key_path = Path(key)
774
782
 
775
- # always sanitize by stripping a trailing slash
776
- folder_key = folder_key_path.as_posix().rstrip("/")
777
-
778
- # TODO: (non-local) UPath doesn't list the first level artifacts and dirs with "*"
779
- pattern = "" if not isinstance(folderpath, LocalPathClasses) else "*"
780
-
783
+ folder_key = folder_key_path.as_posix()
781
784
  # silence fine-grained logging
782
785
  verbosity = settings.verbosity
783
786
  verbosity_int = settings._verbosity_int
784
787
  if verbosity_int >= 1:
785
788
  settings.verbosity = "warning"
786
789
  artifacts_dict = {}
787
- for filepath in folderpath.rglob(pattern):
790
+ for filepath in folderpath.rglob("*"):
788
791
  if filepath.is_file():
789
792
  relative_path = get_relative_path_to_directory(filepath, folderpath)
790
793
  artifact_key = folder_key + "/" + relative_path.as_posix()
@@ -802,7 +805,8 @@ def from_dir(
802
805
  if artifact.hash is not None
803
806
  ]
804
807
  uids = artifacts_dict.keys()
805
- if len(set(hashes)) == len(hashes):
808
+ n_unique_hashes = len(set(hashes))
809
+ if n_unique_hashes == len(hashes):
806
810
  artifacts = list(artifacts_dict.values())
807
811
  else:
808
812
  # consider exact duplicates (same id, same hash)
@@ -811,7 +815,7 @@ def from_dir(
811
815
  # logger.warning("dropping duplicate records in list of artifact records")
812
816
  # artifacts = list(set(uids))
813
817
  # consider false duplicates (different id, same hash)
814
- if not len(set(uids)) == len(set(hashes)):
818
+ if not len(set(uids)) == n_unique_hashes:
815
819
  seen_hashes = set()
816
820
  non_unique_artifacts = {
817
821
  hash: artifact
@@ -905,14 +909,19 @@ def replace(
905
909
  # docstring handled through attach_func_to_class_method
906
910
  def open(
907
911
  self, mode: str = "r", is_run_input: bool | None = None
908
- ) -> AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment:
912
+ ) -> (
913
+ AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment | PyArrowDataset
914
+ ):
909
915
  # ignore empty suffix for now
910
- suffixes = (".h5", ".hdf5", ".h5ad", ".zarr", ".tiledbsoma", "")
916
+ suffixes = ("", ".h5", ".hdf5", ".h5ad", ".zarr", ".tiledbsoma") + PYARROW_SUFFIXES
911
917
  if self.suffix not in suffixes:
912
918
  raise ValueError(
913
- "Artifact should have a zarr, h5 or tiledbsoma object as the underlying data, please"
914
- " use one of the following suffixes for the object name:"
915
- f" {', '.join(suffixes[:-1])}."
919
+ "Artifact should have a zarr, h5, tiledbsoma object"
920
+ " or a compatible `pyarrow.dataset.dataset` directory"
921
+ " as the underlying data, please use one of the following suffixes"
922
+ f" for the object name: {', '.join(suffixes[1:])}."
923
+ f" Or no suffix for a folder with {', '.join(PYARROW_SUFFIXES)} files"
924
+ " (no mixing allowed)."
916
925
  )
917
926
  if self.suffix != ".tiledbsoma" and self.key != "soma" and mode != "r":
918
927
  raise ValueError("Only a tiledbsoma store can be openened with `mode!='r'`.")
lamindb/_can_validate.py CHANGED
@@ -10,10 +10,10 @@ from lamin_utils import colors, logger
10
10
  from lamindb_setup.core._docs import doc_args
11
11
  from lnschema_core import CanValidate, Record
12
12
 
13
- from lamindb._utils import attach_func_to_class_method
14
-
15
13
  from ._from_values import _has_organism_field, _print_values, get_or_create_records
16
14
  from ._record import _queryset, get_name_field
15
+ from ._utils import attach_func_to_class_method
16
+ from .core.exceptions import ValidationError
17
17
 
18
18
  if TYPE_CHECKING:
19
19
  from django.db.models import QuerySet
@@ -108,14 +108,14 @@ def _check_organism_db(organism: Record, using_key: str | None):
108
108
 
109
109
  def _concat_lists(values: ListLike) -> list[str]:
110
110
  """Concatenate a list of lists of strings into a single list."""
111
- if len(values) > 0 and isinstance(values, (list, pd.Series)):
112
- try:
113
- if isinstance(values[0], list):
114
- if isinstance(values, pd.Series):
115
- values = values.tolist()
116
- values = sum([v for v in values if isinstance(v, list)], [])
117
- except KeyError:
118
- pass
111
+ if isinstance(values, (list, pd.Series)) and len(values) > 0:
112
+ first_item = values[0] if isinstance(values, list) else values.iloc[0]
113
+ if isinstance(first_item, list):
114
+ if isinstance(values, pd.Series):
115
+ values = values.tolist()
116
+ values = [
117
+ v for sublist in values if isinstance(sublist, list) for v in sublist
118
+ ]
119
119
  return values
120
120
 
121
121
 
@@ -250,7 +250,7 @@ def _validate(
250
250
  f"Your {cls.__name__} registry is empty, consider populating it first!"
251
251
  )
252
252
  if hasattr(cls, "source_id"):
253
- msg += "\n → use `.import_from_source()` to import records from a source, e.g. a public ontology"
253
+ msg += "\n → use `.import_source()` to import records from a source, e.g. a public ontology"
254
254
  logger.warning(msg)
255
255
  return np.array([False] * len(values))
256
256
 
@@ -388,7 +388,11 @@ def _standardize(
388
388
 
389
389
  try:
390
390
  registry._meta.get_field(synonyms_field)
391
- fields = {i for i in [field, return_field, synonyms_field] if i is not None}
391
+ fields = {
392
+ field_name
393
+ for field_name in [field, return_field, synonyms_field]
394
+ if field_name is not None
395
+ }
392
396
  df = _filter_query_based_on_organism(
393
397
  queryset=queryset,
394
398
  field=field,
@@ -445,14 +449,19 @@ def _standardize(
445
449
  if len(std_names_bt_mapper) > 0 and not mute:
446
450
  s = "" if len(std_names_bt_mapper) == 1 else "s"
447
451
  field_print = "synonym" if field == return_field else field
448
- warn_msg = (
449
- f"found {len(std_names_bt_mapper)} {field_print}{s} in Bionty:"
450
- f" {list(std_names_bt_mapper.keys())}"
452
+
453
+ reduced_mapped_keys_str = f"{list(std_names_bt_mapper.keys())[:10] + ['...'] if len(std_names_bt_mapper) > 10 else list(std_names_bt_mapper.keys())}"
454
+ truncated_note = (
455
+ " (output truncated)" if len(std_names_bt_mapper) > 10 else ""
451
456
  )
452
- warn_msg += (
453
- f"\n please add corresponding {registry._meta.model.__name__} records via"
454
- f" `.from_values({list(set(std_names_bt_mapper.values()))})`"
457
+
458
+ warn_msg = (
459
+ f"found {len(std_names_bt_mapper)} {field_print}{s} in Bionty{truncated_note}:"
460
+ f" {reduced_mapped_keys_str}\n"
461
+ f" please add corresponding {registry._meta.model.__name__} records via{truncated_note}:"
462
+ f" `.from_values({reduced_mapped_keys_str})`"
455
463
  )
464
+
456
465
  logger.warning(warn_msg)
457
466
 
458
467
  mapper.update(std_names_bt_mapper)
@@ -496,9 +505,9 @@ def _add_or_remove_synonyms(
496
505
  " with the following records:\n"
497
506
  )
498
507
  display(records_df)
499
- raise ValueError(
500
- "cannot assigned a synonym that is already associated with a record to a different record.\n"
501
- "Consider removing the synonym from existing records or using a different synonym."
508
+ raise ValidationError(
509
+ f"you are trying to assign a synonym to record: {record}\n"
510
+ " consider removing the synonym from existing records or using a different synonym."
502
511
  )
503
512
 
504
513
  # passed synonyms
@@ -516,7 +525,7 @@ def _add_or_remove_synonyms(
516
525
  return
517
526
  # because we use | as the separator
518
527
  if any("|" in i for i in syn_new_set):
519
- raise ValueError("a synonym can't contain '|'!")
528
+ raise ValidationError("a synonym can't contain '|'!")
520
529
 
521
530
  # existing synonyms
522
531
  syns_exist = record.synonyms
lamindb/_collection.py CHANGED
@@ -20,20 +20,21 @@ from lnschema_core.models import (
20
20
  )
21
21
  from lnschema_core.types import VisibilityChoice
22
22
 
23
- from lamindb._utils import attach_func_to_class_method
24
- from lamindb.core._data import _track_run_input, describe, view_lineage
25
- from lamindb.core._mapped_collection import MappedCollection
26
- from lamindb.core.versioning import process_revises
27
-
28
23
  from . import Artifact, Run
29
24
  from ._record import init_self_from_db, update_attributes
25
+ from ._utils import attach_func_to_class_method
30
26
  from .core._data import (
27
+ _track_run_input,
31
28
  add_transform_to_kwargs,
29
+ describe,
32
30
  get_run,
33
31
  save_feature_set_links,
34
32
  save_feature_sets,
33
+ view_lineage,
35
34
  )
35
+ from .core._mapped_collection import MappedCollection
36
36
  from .core._settings import settings
37
+ from .core.versioning import process_revises
37
38
 
38
39
  if TYPE_CHECKING:
39
40
  from collections.abc import Iterable
lamindb/_curate.py CHANGED
@@ -20,6 +20,7 @@ from .core.exceptions import ValidationError
20
20
 
21
21
  if TYPE_CHECKING:
22
22
  from collections.abc import Iterable
23
+ from typing import Any
23
24
 
24
25
  from lamindb_setup.core.types import UPathStr
25
26
  from lnschema_core.types import FieldAttr
@@ -184,7 +185,7 @@ class DataFrameCurator(BaseCurator):
184
185
  def non_validated(self) -> list:
185
186
  """Return the non-validated features and labels."""
186
187
  if self._non_validated is None:
187
- raise ValueError("Please run validate() first!")
188
+ raise ValidationError("Please run validate() first!")
188
189
  return self._non_validated
189
190
 
190
191
  @property
@@ -222,11 +223,11 @@ class DataFrameCurator(BaseCurator):
222
223
  valid_keys = set(self._df.columns) | {"columns"} | extra
223
224
  nonval_keys = [key for key in d.keys() if key not in valid_keys]
224
225
  if len(nonval_keys) > 0:
225
- raise ValueError(
226
+ raise ValidationError(
226
227
  f"the following keys passed to {name} are not allowed: {nonval_keys}"
227
228
  )
228
229
 
229
- def _save_columns(self, validated_only: bool = True, **kwargs) -> None:
230
+ def _save_columns(self, validated_only: bool = True) -> None:
230
231
  """Save column name records."""
231
232
  # Always save features specified as the fields keys
232
233
  update_registry(
@@ -238,7 +239,7 @@ class DataFrameCurator(BaseCurator):
238
239
  validated_only=False,
239
240
  source=self._sources.get("columns"),
240
241
  exclude=self._exclude.get("columns"),
241
- **kwargs,
242
+ **self._kwargs, # type: ignore
242
243
  )
243
244
 
244
245
  # Save the rest of the columns based on validated_only
@@ -255,7 +256,7 @@ class DataFrameCurator(BaseCurator):
255
256
  source=self._sources.get("columns"),
256
257
  exclude=self._exclude.get("columns"),
257
258
  warning=False, # Do not warn about missing columns, just an info message
258
- **kwargs,
259
+ **self._kwargs, # type: ignore
259
260
  )
260
261
 
261
262
  def add_new_from(self, key: str, organism: str | None = None, **kwargs):
@@ -288,9 +289,11 @@ class DataFrameCurator(BaseCurator):
288
289
  self._save_columns(validated_only=validated_only, **kwargs)
289
290
  else:
290
291
  if categorical not in self.fields:
291
- raise ValueError(f"Feature {categorical} is not part of the fields!")
292
+ raise ValidationError(
293
+ f"Feature {categorical} is not part of the fields!"
294
+ )
292
295
  update_registry(
293
- values=self._df[categorical].unique().tolist(),
296
+ values=_flatten_unique(self._df[categorical]),
294
297
  field=self.fields[categorical],
295
298
  key=categorical,
296
299
  using_key=self._using_key,
@@ -303,7 +306,6 @@ class DataFrameCurator(BaseCurator):
303
306
  def _update_registry_all(self, validated_only: bool = True, **kwargs):
304
307
  """Save labels for all features."""
305
308
  for name in self.fields.keys():
306
- logger.info(f"saving validated records of '{name}'")
307
309
  self._update_registry(name, validated_only=validated_only, **kwargs)
308
310
 
309
311
  def validate(self, organism: str | None = None) -> bool:
@@ -434,12 +436,15 @@ class AnnDataCurator(DataFrameCurator):
434
436
  ) -> None:
435
437
  from lamindb_setup.core import upath
436
438
 
439
+ if isinstance(var_index, str):
440
+ raise TypeError("var_index parameter has to be a bionty field")
441
+
437
442
  from ._artifact import data_is_anndata
438
443
 
439
444
  if sources is None:
440
445
  sources = {}
441
446
  if not data_is_anndata(data):
442
- raise ValueError(
447
+ raise TypeError(
443
448
  "data has to be an AnnData object or a path to AnnData-like"
444
449
  )
445
450
  if isinstance(data, ad.AnnData):
@@ -449,6 +454,11 @@ class AnnDataCurator(DataFrameCurator):
449
454
 
450
455
  self._adata = backed_access(upath.create_path(data))
451
456
 
457
+ if "symbol" in str(var_index):
458
+ logger.warning(
459
+ "Curating gene symbols is discouraged. See FAQ for more details."
460
+ )
461
+
452
462
  self._data = data
453
463
  self._var_field = var_index
454
464
  super().__init__(
@@ -508,13 +518,11 @@ class AnnDataCurator(DataFrameCurator):
508
518
  exclude=self._exclude.get("var_index"),
509
519
  )
510
520
 
511
- def _update_registry_all(self):
521
+ def _update_registry_all(self, validated_only: bool = True, **kwargs):
512
522
  """Save labels for all features."""
513
- logger.info("saving validated records of 'var_index'")
514
- self._save_from_var_index(validated_only=True, **self._kwargs)
523
+ self._save_from_var_index(validated_only=validated_only, **self._kwargs)
515
524
  for name in self._obs_fields.keys():
516
- logger.info(f"saving validated terms of '{name}'")
517
- self._update_registry(name, validated_only=True, **self._kwargs)
525
+ self._update_registry(name, validated_only=validated_only, **self._kwargs)
518
526
 
519
527
  def add_new_from_var_index(self, organism: str | None = None, **kwargs):
520
528
  """Update variable records.
@@ -704,7 +712,7 @@ class MuDataCurator:
704
712
  """Verify the modality exists."""
705
713
  for modality in modalities:
706
714
  if modality not in self._mdata.mod.keys():
707
- raise ValueError(f"modality '{modality}' does not exist!")
715
+ raise ValidationError(f"modality '{modality}' does not exist!")
708
716
 
709
717
  def _save_from_var_index_modality(
710
718
  self, modality: str, validated_only: bool = True, **kwargs
@@ -729,7 +737,7 @@ class MuDataCurator:
729
737
  obs_fields: dict[str, dict[str, FieldAttr]] = {}
730
738
  for k, v in categoricals.items():
731
739
  if k not in self._mdata.obs.columns:
732
- raise ValueError(f"column '{k}' does not exist in mdata.obs!")
740
+ raise ValidationError(f"column '{k}' does not exist in mdata.obs!")
733
741
  if any(k.startswith(prefix) for prefix in prefixes):
734
742
  modality, col = k.split(":")[0], k.split(":")[1]
735
743
  if modality not in obs_fields.keys():
@@ -1120,7 +1128,7 @@ def check_registry_organism(registry: Record, organism: str | None = None) -> di
1120
1128
  import bionty as bt
1121
1129
 
1122
1130
  if organism is None and bt.settings.organism is None:
1123
- raise ValueError(
1131
+ raise ValidationError(
1124
1132
  f"{registry.__name__} registry requires an organism!\n"
1125
1133
  " → please pass an organism name via organism="
1126
1134
  )
@@ -1148,8 +1156,8 @@ def validate_categories(
1148
1156
  using_key: A reference LaminDB instance.
1149
1157
  organism: The organism name.
1150
1158
  source: The source record.
1151
- exclude: Exclude specific values.
1152
- standardize: Standardize the values.
1159
+ exclude: Exclude specific values from validation.
1160
+ standardize: Whether to standardize the values.
1153
1161
  validated_hint_print: The hint to print for validated values.
1154
1162
  """
1155
1163
  from lamindb._from_values import _print_values
@@ -1210,12 +1218,15 @@ def validate_categories(
1210
1218
 
1211
1219
  validated_hint_print = validated_hint_print or f".add_validated_from('{key}')"
1212
1220
  n_validated = len(values_validated)
1221
+
1213
1222
  if n_validated > 0:
1214
1223
  _log_mapping_info()
1224
+ terms_str = f"{', '.join([f'{chr(39)}{v}{chr(39)}' for v in values_validated[:10]])}{', ...' if len(values_validated) > 10 else ''}"
1225
+ val_numerous = "" if n_validated == 1 else "s"
1215
1226
  logger.warning(
1216
- f"found {colors.yellow(n_validated)} validated terms: "
1217
- f"{colors.yellow(values_validated)}\n → save terms via "
1218
- f"{colors.yellow(validated_hint_print)}"
1227
+ f"found {colors.yellow(n_validated)} validated term{val_numerous}: "
1228
+ f"{colors.yellow(terms_str)}\n"
1229
+ f"→ save term{val_numerous} via {colors.yellow(validated_hint_print)}"
1219
1230
  )
1220
1231
 
1221
1232
  non_validated_hint_print = validated_hint_print.replace("_validated_", "_new_")
@@ -1224,19 +1235,21 @@ def validate_categories(
1224
1235
  if n_non_validated == 0:
1225
1236
  if n_validated == 0:
1226
1237
  logger.indent = ""
1227
- logger.success(f"{key} is validated against {colors.italic(model_field)}")
1238
+ logger.success(f"'{key}' is validated against {colors.italic(model_field)}")
1228
1239
  return True, []
1229
1240
  else:
1230
1241
  # validated values still need to be saved to the current instance
1231
1242
  return False, []
1232
1243
  else:
1233
- are = "are" if n_non_validated > 1 else "is"
1244
+ non_val_numerous = ("", "is") if n_non_validated == 1 else ("s", "are")
1234
1245
  print_values = _print_values(non_validated)
1235
1246
  warning_message = (
1236
- f"{colors.red(f'{n_non_validated} terms')} {are} not validated: "
1237
- f"{colors.red(print_values)}\n → fix typos, remove non-existent values, or save terms via "
1247
+ f"{colors.red(f'{n_non_validated} term{non_val_numerous[0]}')} {non_val_numerous[1]} not validated: "
1248
+ f"{colors.red(', '.join(print_values.split(', ')[:10]) + ', ...' if len(print_values.split(', ')) > 10 else print_values)}\n"
1249
+ f"→ fix typo{non_val_numerous[0]}, remove non-existent value{non_val_numerous[0]}, or save term{non_val_numerous[0]} via "
1238
1250
  f"{colors.red(non_validated_hint_print)}"
1239
1251
  )
1252
+
1240
1253
  if logger.indent == "":
1241
1254
  _log_mapping_info()
1242
1255
  logger.warning(warning_message)
@@ -1427,6 +1440,19 @@ def save_artifact(
1427
1440
  return artifact
1428
1441
 
1429
1442
 
1443
+ def _flatten_unique(series: pd.Series[list[Any] | Any]) -> list[Any]:
1444
+ """Flatten a Pandas series containing lists or single items into a unique list of elements."""
1445
+ result = set()
1446
+
1447
+ for item in series:
1448
+ if isinstance(item, list):
1449
+ result.update(item)
1450
+ else:
1451
+ result.add(item)
1452
+
1453
+ return list(result)
1454
+
1455
+
1430
1456
  def update_registry(
1431
1457
  values: list[str],
1432
1458
  field: FieldAttr,
@@ -1485,9 +1511,14 @@ def update_registry(
1485
1511
 
1486
1512
  public_records = [r for r in existing_and_public_records if r._state.adding]
1487
1513
  # here we check to only save the public records if they are from the specified source
1488
- # we check the uid because r.source and soruce can be from different instances
1514
+ # we check the uid because r.source and source can be from different instances
1489
1515
  if source:
1490
1516
  public_records = [r for r in public_records if r.source.uid == source.uid]
1517
+
1518
+ if public_records:
1519
+ settings.verbosity = "info"
1520
+ logger.info(f"saving validated records of '{key}'")
1521
+ settings.verbosity = "error"
1491
1522
  ln_save(public_records)
1492
1523
  labels_saved["from public"] = [
1493
1524
  getattr(r, field.field.name) for r in public_records
@@ -1596,24 +1627,25 @@ def log_saved_labels(
1596
1627
  continue
1597
1628
 
1598
1629
  if k == "without reference" and validated_only:
1599
- msg = colors.yellow(
1600
- f"{len(labels)} non-validated values are not saved in {model_field}: {labels}!"
1601
- )
1602
- lookup_print = (
1603
- f"lookup().{key}" if key.isidentifier() else f".lookup()['{key}']"
1604
- )
1605
-
1606
- hint = f".add_new_from('{key}')"
1607
- msg += f"\n → to lookup values, use {lookup_print}"
1608
- msg += (
1609
- f"\n → to save, run {colors.yellow(hint)}"
1610
- if save_function == "add_new_from"
1611
- else f"\n → to save, run {colors.yellow(save_function)}"
1612
- )
1613
- if warning:
1614
- logger.warning(msg)
1615
- else:
1616
- logger.info(msg)
1630
+ continue
1631
+ # msg = colors.yellow(
1632
+ # f"{len(labels)} non-validated values are not saved in {model_field}: {labels}!"
1633
+ # )
1634
+ # lookup_print = (
1635
+ # f"lookup().{key}" if key.isidentifier() else f".lookup()['{key}']"
1636
+ # )
1637
+
1638
+ # hint = f".add_new_from('{key}')"
1639
+ # msg += f"\n → to lookup values, use {lookup_print}"
1640
+ # msg += (
1641
+ # f"\n → to save, run {colors.yellow(hint)}"
1642
+ # if save_function == "add_new_from"
1643
+ # else f"\n → to save, run {colors.yellow(save_function)}"
1644
+ # )
1645
+ # if warning:
1646
+ # logger.warning(msg)
1647
+ # else:
1648
+ # logger.info(msg)
1617
1649
  else:
1618
1650
  k = "" if k == "without reference" else f"{colors.green(k)} "
1619
1651
  # the term "transferred" stresses that this is always in the context of transferring
@@ -1631,8 +1663,8 @@ def save_ulabels_with_parent(values: list[str], field: FieldAttr, key: str) -> N
1631
1663
  all_records = registry.from_values(list(values), field=field)
1632
1664
  is_feature = registry.filter(name=f"is_{key}").one_or_none()
1633
1665
  if is_feature is None:
1634
- is_feature = registry(name=f"is_{key}")
1635
- is_feature.save()
1666
+ is_feature = registry(name=f"is_{key}").save()
1667
+ logger.important(f"Created a parent ULabel: {is_feature}")
1636
1668
  is_feature.children.add(*all_records)
1637
1669
 
1638
1670
 
@@ -1689,7 +1721,7 @@ def _save_organism(name: str): # pragma: no cover
1689
1721
  if organism is None:
1690
1722
  organism = bt.Organism.from_source(name=name)
1691
1723
  if organism is None:
1692
- raise ValueError(
1724
+ raise ValidationError(
1693
1725
  f"Organism '{name}' not found\n"
1694
1726
  f" → please save it: bt.Organism(name='{name}').save()"
1695
1727
  )
lamindb/_feature.py CHANGED
@@ -8,10 +8,9 @@ from lamindb_setup.core._docs import doc_args
8
8
  from lnschema_core.models import Artifact, Feature
9
9
  from pandas.api.types import CategoricalDtype, is_string_dtype
10
10
 
11
- from lamindb._utils import attach_func_to_class_method
12
- from lamindb.core._settings import settings
13
-
14
11
  from ._query_set import RecordsList
12
+ from ._utils import attach_func_to_class_method
13
+ from .core._settings import settings
15
14
  from .core.schema import dict_schema_name_to_model_name
16
15
 
17
16
  if TYPE_CHECKING:
lamindb/_feature_set.py CHANGED
@@ -10,10 +10,9 @@ from lamindb_setup.core.hashing import hash_set
10
10
  from lnschema_core import Feature, FeatureSet, Record, ids
11
11
  from lnschema_core.types import FieldAttr, ListLike
12
12
 
13
- from lamindb._utils import attach_func_to_class_method
14
-
15
13
  from ._feature import convert_numpy_dtype_to_lamin_feature_type
16
14
  from ._record import init_self_from_db
15
+ from ._utils import attach_func_to_class_method
17
16
  from .core.exceptions import ValidationError
18
17
  from .core.schema import (
19
18
  dict_related_model_to_related_name,