lamindb 0.74.3__py3-none-any.whl → 0.75.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -42,7 +42,7 @@ Modules & settings:
42
42
  """
43
43
 
44
44
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
45
- __version__ = "0.74.3"
45
+ __version__ = "0.75.1"
46
46
 
47
47
  import os as _os
48
48
 
lamindb/_artifact.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import os
3
4
  import shutil
4
5
  from pathlib import Path, PurePath, PurePosixPath
5
6
  from typing import TYPE_CHECKING, Any, Mapping
@@ -28,6 +29,7 @@ from lnschema_core.types import (
28
29
  from lamindb._utils import attach_func_to_class_method
29
30
  from lamindb.core._data import HasFeatures, _track_run_input
30
31
  from lamindb.core._settings import settings
32
+ from lamindb.core.exceptions import IntegrityError
31
33
  from lamindb.core.storage import (
32
34
  LocalPathClasses,
33
35
  UPath,
@@ -39,6 +41,7 @@ from lamindb.core.storage import (
39
41
  from lamindb.core.storage.paths import (
40
42
  auto_storage_key_from_artifact,
41
43
  auto_storage_key_from_artifact_uid,
44
+ check_path_is_child_of_root,
42
45
  filepath_from_artifact,
43
46
  )
44
47
  from lamindb.core.versioning import get_uid_from_old_version, init_uid
@@ -102,7 +105,11 @@ def process_pathlike(
102
105
  if not isinstance(filepath, LocalPathClasses):
103
106
  # for a cloud path, new_root is always the bucket name
104
107
  new_root = list(filepath.parents)[-1]
105
- storage_settings = init_storage(new_root)
108
+ # do not register remote storage locations on hub if the current instance
109
+ # is not managed on the hub
110
+ storage_settings = init_storage(
111
+ new_root, prevent_register_hub=not setup_settings.instance.is_on_hub
112
+ )
106
113
  storage_record = register_storage_in_instance(storage_settings)
107
114
  use_existing_storage_key = True
108
115
  return storage_record, use_existing_storage_key
@@ -257,14 +264,6 @@ def check_path_in_existing_storage(
257
264
  return False
258
265
 
259
266
 
260
- def check_path_is_child_of_root(path: Path | UPath, root: Path | UPath | None) -> bool:
261
- # str is needed to eliminate UPath storage_options
262
- # from the equality checks below
263
- path = UPath(str(path))
264
- root = UPath(str(root))
265
- return root.resolve() in path.resolve().parents
266
-
267
-
268
267
  def get_relative_path_to_directory(
269
268
  path: PurePath | Path | UPath, directory: PurePath | Path | UPath
270
269
  ) -> PurePath | Path:
@@ -343,8 +342,10 @@ def get_artifact_kwargs_from_data(
343
342
  else:
344
343
  storage = default_storage
345
344
 
346
- if key is not None and key.startswith(AUTO_KEY_PREFIX):
347
- raise ValueError(f"Key cannot start with {AUTO_KEY_PREFIX}")
345
+ # for now comment out this error to allow creating new versions of stores
346
+ # in the default folder (.lamindb)
347
+ # if key is not None and key.startswith(AUTO_KEY_PREFIX):
348
+ # raise ValueError(f"Key cannot start with {AUTO_KEY_PREFIX}")
348
349
 
349
350
  log_storage_hint(
350
351
  check_path_in_storage=check_path_in_storage,
@@ -366,7 +367,7 @@ def get_artifact_kwargs_from_data(
366
367
  kwargs = {
367
368
  "suffix": suffix,
368
369
  "hash": hash,
369
- "hash_type": hash_type,
370
+ "_hash_type": hash_type,
370
371
  "key": key,
371
372
  "size": size,
372
373
  "storage_id": storage.id,
@@ -377,7 +378,7 @@ def get_artifact_kwargs_from_data(
377
378
  "n_observations": None, # to implement
378
379
  "run_id": run.id if run is not None else None,
379
380
  "run": run,
380
- "key_is_virtual": key_is_virtual,
381
+ "_key_is_virtual": key_is_virtual,
381
382
  }
382
383
  if not isinstance(path, LocalPathClasses):
383
384
  local_filepath = None
@@ -502,7 +503,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
502
503
  raise ValueError("Only one non-keyword arg allowed: data")
503
504
 
504
505
  data: str | Path = kwargs.pop("data") if len(args) == 0 else args[0]
505
- type: str = kwargs.pop("type") if "type" in kwargs else "dataset"
506
+ type: str = kwargs.pop("type") if "type" in kwargs else None
506
507
  key: str | None = kwargs.pop("key") if "key" in kwargs else None
507
508
  run: Run | None = kwargs.pop("run") if "run" in kwargs else None
508
509
  description: str | None = (
@@ -531,7 +532,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
531
532
  using_key = (
532
533
  kwargs.pop("using_key") if "using_key" in kwargs else settings._using_key
533
534
  )
534
- accessor = kwargs.pop("accessor") if "accessor" in kwargs else None
535
+ accessor = kwargs.pop("_accessor") if "_accessor" in kwargs else None
535
536
  accessor = _check_accessor_artifact(data=data, accessor=accessor)
536
537
  if not len(kwargs) == 0:
537
538
  raise ValueError(
@@ -592,7 +593,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
592
593
  kwargs["version"] = version
593
594
  kwargs["description"] = description
594
595
  kwargs["visibility"] = visibility
595
- kwargs["accessor"] = accessor
596
+ kwargs["_accessor"] = accessor
596
597
  # this check needs to come down here because key might be populated from an
597
598
  # existing file path during get_artifact_kwargs_from_data()
598
599
  if (
@@ -633,7 +634,7 @@ def from_df(
633
634
  description=description,
634
635
  version=version,
635
636
  is_new_version_of=is_new_version_of,
636
- accessor="DataFrame",
637
+ _accessor="DataFrame",
637
638
  type="dataset",
638
639
  **kwargs,
639
640
  )
@@ -662,7 +663,7 @@ def from_anndata(
662
663
  description=description,
663
664
  version=version,
664
665
  is_new_version_of=is_new_version_of,
665
- accessor="AnnData",
666
+ _accessor="AnnData",
666
667
  type="dataset",
667
668
  **kwargs,
668
669
  )
@@ -689,7 +690,7 @@ def from_mudata(
689
690
  description=description,
690
691
  version=version,
691
692
  is_new_version_of=is_new_version_of,
692
- accessor="MuData",
693
+ _accessor="MuData",
693
694
  type="dataset",
694
695
  **kwargs,
695
696
  )
@@ -707,8 +708,8 @@ def from_dir(
707
708
  ) -> list[Artifact]:
708
709
  """{}""" # noqa: D415
709
710
  logger.warning(
710
- "this creates one artifact per file in the directory - you might simply call"
711
- " ln.Artifact(dir) to get one artifact for the entire directory"
711
+ "this creates one artifact per file in the directory - consider"
712
+ " ln.Artifact(dir_path) to get one artifact for the entire directory"
712
713
  )
713
714
  folderpath: UPath = create_path(path) # returns Path for local
714
715
  default_storage = settings._storage_settings.record
@@ -823,7 +824,7 @@ def replace(
823
824
  if check_path_in_storage:
824
825
  raise ValueError("Can only replace with a local file not in any Storage.")
825
826
 
826
- if self.key is not None and not self.key_is_virtual:
827
+ if self.key is not None and not self._key_is_virtual:
827
828
  key_path = PurePosixPath(self.key)
828
829
  new_filename = f"{key_path.stem}{kwargs['suffix']}"
829
830
  # the following will only be true if the suffix changes!
@@ -849,7 +850,7 @@ def replace(
849
850
  self.suffix = kwargs["suffix"]
850
851
  self.size = kwargs["size"]
851
852
  self.hash = kwargs["hash"]
852
- self.hash_type = kwargs["hash_type"]
853
+ self._hash_type = kwargs["_hash_type"]
853
854
  self.run_id = kwargs["run_id"]
854
855
  self.run = kwargs["run"]
855
856
 
@@ -862,15 +863,15 @@ def replace(
862
863
 
863
864
  # deprecated
864
865
  def backed(
865
- self, is_run_input: bool | None = None
866
+ self, mode: str = "r", is_run_input: bool | None = None
866
867
  ) -> AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment:
867
868
  logger.warning("`.backed()` is deprecated, use `.open()`!'")
868
- return self.open(is_run_input)
869
+ return self.open(mode, is_run_input)
869
870
 
870
871
 
871
872
  # docstring handled through attach_func_to_class_method
872
873
  def open(
873
- self, is_run_input: bool | None = None
874
+ self, mode: str = "r", is_run_input: bool | None = None
874
875
  ) -> AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment:
875
876
  # ignore empty suffix for now
876
877
  suffixes = (".h5", ".hdf5", ".h5ad", ".zarr", ".tiledbsoma", "")
@@ -880,29 +881,61 @@ def open(
880
881
  " use one of the following suffixes for the object name:"
881
882
  f" {', '.join(suffixes[:-1])}."
882
883
  )
884
+ if self.suffix != ".tiledbsoma" and self.key != "soma" and mode != "r":
885
+ raise ValueError("Only a tiledbsoma store can be openened with `mode!='r'`.")
883
886
 
884
- from lamindb.core.storage._backed_access import backed_access
887
+ from lamindb.core.storage._backed_access import _track_writes_factory, backed_access
885
888
 
886
- _track_run_input(self, is_run_input)
887
889
  using_key = settings._using_key
888
890
  filepath = filepath_from_artifact(self, using_key=using_key)
891
+ is_tiledbsoma_w = (
892
+ filepath.name == "soma" or filepath.suffix == ".tiledbsoma"
893
+ ) and mode == "w"
889
894
  # consider the case where an object is already locally cached
890
895
  localpath = setup_settings.instance.storage.cloud_to_local_no_update(filepath)
891
- if localpath.exists():
892
- return backed_access(localpath, using_key)
896
+ if not is_tiledbsoma_w and localpath.exists():
897
+ access = backed_access(localpath, mode, using_key)
893
898
  else:
894
- return backed_access(filepath, using_key)
899
+ access = backed_access(filepath, mode, using_key)
900
+ if is_tiledbsoma_w:
901
+
902
+ def finalize():
903
+ nonlocal self, filepath, localpath
904
+ if not isinstance(filepath, LocalPathClasses):
905
+ _, hash, _, _ = get_stat_dir_cloud(filepath)
906
+ else:
907
+ # this can be very slow
908
+ _, hash, _, _ = hash_dir(filepath)
909
+ if self.hash != hash:
910
+ from ._record import init_self_from_db
911
+
912
+ logger.warning(
913
+ "The hash of the tiledbsoma store has changed, creating a new version of the artifact."
914
+ )
915
+ new_version = Artifact(filepath, is_new_version_of=self).save()
916
+ init_self_from_db(self, new_version)
917
+
918
+ if localpath != filepath and localpath.exists():
919
+ shutil.rmtree(localpath)
920
+
921
+ access = _track_writes_factory(access, finalize)
922
+ # only call if open is successfull
923
+ _track_run_input(self, is_run_input)
924
+ return access
895
925
 
896
926
 
897
927
  # docstring handled through attach_func_to_class_method
898
928
  def load(self, is_run_input: bool | None = None, stream: bool = False, **kwargs) -> Any:
899
- _track_run_input(self, is_run_input)
900
929
  if hasattr(self, "_memory_rep") and self._memory_rep is not None:
901
- return self._memory_rep
902
- using_key = settings._using_key
903
- return load_to_memory(
904
- filepath_from_artifact(self, using_key=using_key), stream=stream, **kwargs
905
- )
930
+ access_memory = self._memory_rep
931
+ else:
932
+ using_key = settings._using_key
933
+ access_memory = load_to_memory(
934
+ filepath_from_artifact(self, using_key=using_key), stream=stream, **kwargs
935
+ )
936
+ # only call if load is successfull
937
+ _track_run_input(self, is_run_input)
938
+ return access_memory
906
939
 
907
940
 
908
941
  # docstring handled through attach_func_to_class_method
@@ -935,6 +968,17 @@ def delete(
935
968
  storage: bool | None = None,
936
969
  using_key: str | None = None,
937
970
  ) -> None:
971
+ # this first check means an invalid delete fails fast rather than cascading through
972
+ # database and storage permission errors
973
+ if os.getenv("LAMINDB_MULTI_INSTANCE") is None:
974
+ isettings = setup_settings.instance
975
+ if self.storage.instance_uid != isettings.uid and (storage or storage is None):
976
+ raise IntegrityError(
977
+ "Cannot simply delete artifacts outside of this instance's managed storage locations."
978
+ "\n(1) If you only want to delete the metadata record in this instance, pass `storage=False`"
979
+ f"\n(2) If you want to delete the artifact in storage, please load the managing lamindb instance (uid={self.storage.instance_uid})."
980
+ f"\nThese are all managed storage locations of this instance:\n{Storage.filter(instance_uid=isettings.uid).df()}"
981
+ )
938
982
  # by default, we only move artifacts into the trash (visibility = -1)
939
983
  trash_visibility = VisibilityChoice.trash.value
940
984
  if self.visibility > trash_visibility and not permanent:
@@ -943,7 +987,7 @@ def delete(
943
987
  # move to trash
944
988
  self.visibility = trash_visibility
945
989
  self.save()
946
- logger.warning(f"moved artifact to trash (visibility = {trash_visibility})")
990
+ logger.important(f"moved artifact to trash (visibility = {trash_visibility})")
947
991
  return
948
992
 
949
993
  # if the artifact is already in the trash
@@ -970,7 +1014,7 @@ def delete(
970
1014
  # only delete in storage if DB delete is successful
971
1015
  # DB delete might error because of a foreign key constraint violated etc.
972
1016
  self._delete_skip_storage()
973
- if self.key is None or self.key_is_virtual:
1017
+ if self.key is None or self._key_is_virtual:
974
1018
  # do not ask for confirmation also if storage is None
975
1019
  delete_in_storage = storage is None or storage
976
1020
  else:
@@ -985,9 +1029,7 @@ def delete(
985
1029
  else:
986
1030
  delete_in_storage = storage
987
1031
  if not delete_in_storage:
988
- logger.warning(
989
- f"you will retain a dangling store here: {path}, not referenced via an artifact"
990
- )
1032
+ logger.important(f"a file/folder remains here: {path}")
991
1033
  # we don't yet have logic to bring back the deleted metadata record
992
1034
  # in case storage deletion fails - this is important for ACID down the road
993
1035
  if delete_in_storage:
@@ -1013,7 +1055,7 @@ def save(self, upload: bool | None = None, **kwargs) -> Artifact:
1013
1055
  self._local_filepath = local_path
1014
1056
  # switch to virtual storage key upon upload
1015
1057
  # the local filepath is already cached at that point
1016
- self.key_is_virtual = True
1058
+ self._key_is_virtual = True
1017
1059
  # ensure that the artifact is uploaded
1018
1060
  self._to_store = True
1019
1061
 
lamindb/_can_validate.py CHANGED
@@ -13,7 +13,7 @@ from lnschema_core import CanValidate, Record
13
13
  from lamindb._utils import attach_func_to_class_method
14
14
 
15
15
  from ._from_values import _has_organism_field, _print_values
16
- from ._record import _queryset, get_default_str_field
16
+ from ._record import _queryset, get_name_field
17
17
 
18
18
  if TYPE_CHECKING:
19
19
  from django.db.models import QuerySet
@@ -30,7 +30,7 @@ def inspect(
30
30
  *,
31
31
  mute: bool = False,
32
32
  organism: str | Record | None = None,
33
- public_source: Record | None = None,
33
+ source: Record | None = None,
34
34
  ) -> InspectResult:
35
35
  """{}""" # noqa: D415
36
36
  return _inspect(
@@ -39,7 +39,7 @@ def inspect(
39
39
  field=field,
40
40
  mute=mute,
41
41
  organism=organism,
42
- public_source=public_source,
42
+ source=source,
43
43
  )
44
44
 
45
45
 
@@ -52,9 +52,31 @@ def validate(
52
52
  *,
53
53
  mute: bool = False,
54
54
  organism: str | Record | None = None,
55
+ source: Record | None = None,
55
56
  ) -> np.ndarray:
56
57
  """{}""" # noqa: D415
57
- return _validate(cls=cls, values=values, field=field, mute=mute, organism=organism)
58
+ return _validate(
59
+ cls=cls, values=values, field=field, mute=mute, organism=organism, source=source
60
+ )
61
+
62
+
63
+ def _check_source_db(source: Record, using_key: str | None):
64
+ """Check if the source is from the DB."""
65
+ if using_key is not None and using_key != "default":
66
+ if source._state.db != using_key:
67
+ raise ValueError(
68
+ f"source must be a bionty.Source record from instance '{using_key}'!"
69
+ )
70
+
71
+
72
+ def _check_organism_db(organism: Record, using_key: str | None):
73
+ """Check if the organism is from the DB."""
74
+ if isinstance(organism, Record):
75
+ if using_key is not None and using_key != "default":
76
+ if organism._state.db != using_key:
77
+ raise ValueError(
78
+ f"organism must be a bionty.Organism record from instance '{using_key}'!"
79
+ )
58
80
 
59
81
 
60
82
  def _inspect(
@@ -65,7 +87,7 @@ def _inspect(
65
87
  mute: bool = False,
66
88
  using_key: str | None = None,
67
89
  organism: str | Record | None = None,
68
- public_source: Record | None = None,
90
+ source: Record | None = None,
69
91
  ) -> pd.DataFrame | dict[str, list[str]]:
70
92
  """{}""" # noqa: D415
71
93
  from lamin_utils._inspect import inspect
@@ -73,10 +95,15 @@ def _inspect(
73
95
  if isinstance(values, str):
74
96
  values = [values]
75
97
 
76
- field = get_default_str_field(cls, field=field)
98
+ field = get_name_field(cls, field=field)
77
99
  queryset = _queryset(cls, using_key)
78
- orm = queryset.model
79
- model_name = orm._meta.model.__name__
100
+ using_key = queryset.db
101
+ if isinstance(source, Record):
102
+ _check_source_db(source, using_key)
103
+ queryset = queryset.filter(source=source).all()
104
+ _check_organism_db(organism, using_key)
105
+ registry = queryset.model
106
+ model_name = registry._meta.model.__name__
80
107
 
81
108
  # inspect in the DB
82
109
  result_db = inspect(
@@ -89,11 +116,11 @@ def _inspect(
89
116
  )
90
117
  nonval = set(result_db.non_validated).difference(result_db.synonyms_mapper.keys())
91
118
 
92
- if len(nonval) > 0 and orm.__get_schema_name__() == "bionty":
119
+ if len(nonval) > 0 and registry.__get_schema_name__() == "bionty":
93
120
  try:
94
- bionty_result = orm.public(
95
- organism=organism, public_source=public_source
96
- ).inspect(values=nonval, field=field, mute=True)
121
+ bionty_result = registry.public(organism=organism, source=source).inspect(
122
+ values=nonval, field=field, mute=True
123
+ )
97
124
  bionty_validated = bionty_result.validated
98
125
  bionty_mapper = bionty_result.synonyms_mapper
99
126
  hint = False
@@ -135,7 +162,7 @@ def _inspect(
135
162
  logger.print(f" couldn't validate {labels}: {colors.red(print_values)}")
136
163
  logger.print(
137
164
  f"→ if you are sure, create new record{s} via"
138
- f" {colors.italic(f'ln.{orm.__name__}()')} and save to your registry"
165
+ f" {colors.italic(f'{registry.__name__}()')} and save to your registry"
139
166
  )
140
167
 
141
168
  return result_db
@@ -149,6 +176,7 @@ def _validate(
149
176
  mute: bool = False,
150
177
  using_key: str | None = None,
151
178
  organism: str | Record | None = None,
179
+ source: Record | None = None,
152
180
  ) -> np.ndarray:
153
181
  """{}""" # noqa: D415
154
182
  from lamin_utils._inspect import validate
@@ -157,9 +185,14 @@ def _validate(
157
185
  if isinstance(values, str):
158
186
  values = [values]
159
187
 
160
- field = get_default_str_field(cls, field=field)
188
+ field = get_name_field(cls, field=field)
161
189
 
162
190
  queryset = _queryset(cls, using_key)
191
+ using_key = queryset.db
192
+ if isinstance(source, Record):
193
+ _check_source_db(source, using_key)
194
+ queryset = queryset.filter(source=source).all()
195
+ _check_organism_db(organism, using_key)
163
196
  field_values = pd.Series(
164
197
  _filter_query_based_on_organism(
165
198
  queryset=queryset,
@@ -169,6 +202,15 @@ def _validate(
169
202
  ),
170
203
  dtype="object",
171
204
  )
205
+ if field_values.empty:
206
+ if not mute:
207
+ msg = (
208
+ f"Your {cls.__name__} registry is empty, consider populating it first!"
209
+ )
210
+ if hasattr(cls, "source_id"):
211
+ msg += "\n → use `.import_from_source()` to import records from a source, e.g. a public ontology"
212
+ logger.warning(msg)
213
+ return np.array([False] * len(values))
172
214
 
173
215
  result = validate(
174
216
  identifiers=values,
@@ -198,6 +240,7 @@ def standardize(
198
240
  keep: Literal["first", "last", False] = "first",
199
241
  synonyms_field: str = "synonyms",
200
242
  organism: str | Record | None = None,
243
+ source: Record | None = None,
201
244
  ) -> list[str] | dict[str, str]:
202
245
  """{}""" # noqa: D415
203
246
  return _standardize(
@@ -212,6 +255,7 @@ def standardize(
212
255
  keep=keep,
213
256
  synonyms_field=synonyms_field,
214
257
  organism=organism,
258
+ source=source,
215
259
  )
216
260
 
217
261
 
@@ -263,6 +307,7 @@ def _standardize(
263
307
  synonyms_field: str = "synonyms",
264
308
  using_key: str | None = None,
265
309
  organism: str | Record | None = None,
310
+ source: Record | None = None,
266
311
  ) -> list[str] | dict[str, str]:
267
312
  """{}""" # noqa: D415
268
313
  from lamin_utils._standardize import standardize as map_synonyms
@@ -271,24 +316,29 @@ def _standardize(
271
316
  if isinstance(values, str):
272
317
  values = [values]
273
318
 
274
- field = get_default_str_field(cls, field=field)
275
- return_field = get_default_str_field(
319
+ field = get_name_field(cls, field=field)
320
+ return_field = get_name_field(
276
321
  cls, field=field if return_field is None else return_field
277
322
  )
278
323
  queryset = _queryset(cls, using_key)
279
- orm = queryset.model
280
-
281
- if _has_organism_field(orm):
282
- # here, we can safely import lnschema_bionty
283
- from lnschema_bionty._bionty import create_or_get_organism_record
284
-
285
- organism_record = create_or_get_organism_record(organism=organism, orm=orm)
324
+ using_key = queryset.db
325
+ if isinstance(source, Record):
326
+ _check_source_db(source, using_key)
327
+ queryset = queryset.filter(source=source).all()
328
+ _check_organism_db(organism, using_key)
329
+ registry = queryset.model
330
+
331
+ if _has_organism_field(registry):
332
+ # here, we can safely import bionty
333
+ from bionty._bionty import create_or_get_organism_record
334
+
335
+ organism_record = create_or_get_organism_record(organism=organism, orm=registry)
286
336
  organism = (
287
337
  organism_record.name if organism_record is not None else organism_record
288
338
  )
289
339
 
290
340
  try:
291
- orm._meta.get_field(synonyms_field)
341
+ registry._meta.get_field(synonyms_field)
292
342
  df = _filter_query_based_on_organism(
293
343
  queryset=queryset, field=field, organism=organism
294
344
  )
@@ -320,7 +370,7 @@ def _standardize(
320
370
  return result
321
371
 
322
372
  # map synonyms in Bionty
323
- if orm.__get_schema_name__() == "bionty" and public_aware:
373
+ if registry.__get_schema_name__() == "bionty" and public_aware:
324
374
  mapper = {}
325
375
  if return_mapper:
326
376
  mapper = std_names_db
@@ -328,12 +378,14 @@ def _standardize(
328
378
  df=df, identifiers=values, return_mapper=False, mute=True, **_kwargs
329
379
  )
330
380
 
331
- val_res = orm.validate(std_names_db, field=field, mute=True, organism=organism)
381
+ val_res = registry.validate(
382
+ std_names_db, field=field, mute=True, organism=organism
383
+ )
332
384
  if all(val_res):
333
385
  return _return(result=std_names_db, mapper=mapper)
334
386
 
335
387
  nonval = np.array(std_names_db)[~val_res]
336
- std_names_bt_mapper = orm.public(organism=organism).standardize(
388
+ std_names_bt_mapper = registry.public(organism=organism).standardize(
337
389
  nonval, return_mapper=True, mute=True, **_kwargs
338
390
  )
339
391
 
@@ -345,7 +397,7 @@ def _standardize(
345
397
  f" {list(std_names_bt_mapper.keys())}"
346
398
  )
347
399
  warn_msg += (
348
- f"\n please add corresponding {orm._meta.model.__name__} records via"
400
+ f"\n please add corresponding {registry._meta.model.__name__} records via"
349
401
  f" `.from_values({list(set(std_names_bt_mapper.values()))})`"
350
402
  )
351
403
  logger.warning(warn_msg)
@@ -388,7 +440,10 @@ def _add_or_remove_synonyms(
388
440
  " with the following records:\n"
389
441
  )
390
442
  display(records_df)
391
- raise SystemExit(AssertionError)
443
+ raise ValueError(
444
+ "cannot assigned a synonym that is already associated with a record to a different record.\n"
445
+ "Consider removing the synonym from existing records or using a different synonym."
446
+ )
392
447
 
393
448
  # passed synonyms
394
449
  # nothing happens when passing an empty string or list
@@ -405,7 +460,7 @@ def _add_or_remove_synonyms(
405
460
  return
406
461
  # because we use | as the separator
407
462
  if any("|" in i for i in syn_new_set):
408
- raise AssertionError("a synonym can't contain '|'!")
463
+ raise ValueError("a synonym can't contain '|'!")
409
464
 
410
465
  # existing synonyms
411
466
  syns_exist = record.synonyms
@@ -453,13 +508,13 @@ def _filter_query_based_on_organism(
453
508
  """Filter a queryset based on organism."""
454
509
  import pandas as pd
455
510
 
456
- orm = queryset.model
511
+ registry = queryset.model
457
512
 
458
- if _has_organism_field(orm) and not field.endswith("id"):
459
- # here, we can safely import lnschema_bionty
460
- from lnschema_bionty._bionty import create_or_get_organism_record
513
+ if _has_organism_field(registry) and not _field_is_id(field, registry):
514
+ # here, we can safely import bionty
515
+ from bionty._bionty import create_or_get_organism_record
461
516
 
462
- organism_record = create_or_get_organism_record(organism=organism, orm=orm)
517
+ organism_record = create_or_get_organism_record(organism=organism, orm=registry)
463
518
  if organism_record is not None:
464
519
  queryset = queryset.filter(organism__name=organism_record.name)
465
520
 
@@ -469,6 +524,16 @@ def _filter_query_based_on_organism(
469
524
  return queryset.values_list(values_list_field, flat=True)
470
525
 
471
526
 
527
+ def _field_is_id(field: str, registry: type[Record]) -> bool:
528
+ """Check if the field is an ontology ID."""
529
+ if hasattr(registry, "_ontology_id_field"):
530
+ if field == registry._ontology_id_field:
531
+ return True
532
+ if field.endswith("id"):
533
+ return True
534
+ return False
535
+
536
+
472
537
  METHOD_NAMES = [
473
538
  "validate",
474
539
  "inspect",