lamindb 0.76.2__py3-none-any.whl → 0.76.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -1,7 +1,6 @@
1
1
  """A data framework for biology.
2
2
 
3
- Records
4
- =======
3
+ Core registries.
5
4
 
6
5
  .. autosummary::
7
6
  :toctree: .
@@ -17,20 +16,18 @@ Records
17
16
  FeatureSet
18
17
  Param
19
18
 
20
- Key functionality
21
- =================
19
+ Key functionality.
22
20
 
23
21
  .. autosummary::
24
22
  :toctree: .
25
23
 
26
24
  context
27
25
  connect
28
- Curate
26
+ Curator
29
27
  view
30
28
  save
31
29
 
32
- Modules & settings
33
- ==================
30
+ Modules and settings.
34
31
 
35
32
  .. autosummary::
36
33
  :toctree: .
@@ -44,7 +41,7 @@ Modules & settings
44
41
  """
45
42
 
46
43
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
47
- __version__ = "0.76.2"
44
+ __version__ = "0.76.4"
48
45
 
49
46
  import os as _os
50
47
 
@@ -94,7 +91,7 @@ if _check_instance_setup(from_lamindb=True):
94
91
  _ulabel,
95
92
  integrations,
96
93
  )
97
- from ._curate import Curate
94
+ from ._curate import Curator
98
95
  from ._save import save
99
96
  from ._view import view
100
97
  from .core._context import context
@@ -110,6 +107,7 @@ if _check_instance_setup(from_lamindb=True):
110
107
 
111
108
  track = context.track # backward compat
112
109
  finish = context.finish # backward compat
110
+ Curate = Curator # backward compat
113
111
  settings.__doc__ = """Global :class:`~lamindb.core.Settings`."""
114
112
  context.__doc__ = """Global :class:`~lamindb.core.Context`."""
115
113
  from django.db.models import Q
lamindb/_artifact.py CHANGED
@@ -366,11 +366,6 @@ def get_artifact_kwargs_from_data(
366
366
  else:
367
367
  storage = default_storage
368
368
 
369
- # for now comment out this error to allow creating new versions of stores
370
- # in the default folder (.lamindb)
371
- # if key is not None and key.startswith(AUTO_KEY_PREFIX):
372
- # raise ValueError(f"Key cannot start with {AUTO_KEY_PREFIX}")
373
-
374
369
  log_storage_hint(
375
370
  check_path_in_storage=check_path_in_storage,
376
371
  storage=storage,
@@ -542,6 +537,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
542
537
  else VisibilityChoice.default.value
543
538
  )
544
539
  format = kwargs.pop("format") if "format" in kwargs else None
540
+ _is_internal_call = kwargs.pop("_is_internal_call", False)
545
541
  skip_check_exists = (
546
542
  kwargs.pop("skip_check_exists") if "skip_check_exists" in kwargs else False
547
543
  )
@@ -575,13 +571,29 @@ def __init__(artifact: Artifact, *args, **kwargs):
575
571
  raise ValueError(
576
572
  f"`key` is {key}, but `revises.key` is '{revises.key}'\n\n Either do *not* pass `key`.\n\n{note}"
577
573
  )
578
-
579
- provisional_uid, revises = create_uid(revises=revises, version=version)
580
574
  if revises is not None:
581
575
  if not isinstance(revises, Artifact):
582
576
  raise TypeError("`revises` has to be of type `Artifact`")
583
577
  if description is None:
584
578
  description = revises.description
579
+ if key is not None and AUTO_KEY_PREFIX in key:
580
+ raise ValueError(
581
+ f"Do not pass key that contains a managed storage path in `{AUTO_KEY_PREFIX}`"
582
+ )
583
+ # below is for internal calls that require defining the storage location
584
+ # ahead of constructing the Artifact
585
+ if isinstance(data, (str, Path)) and AUTO_KEY_PREFIX in str(data):
586
+ if _is_internal_call:
587
+ is_automanaged_path = True
588
+ user_provided_key = key
589
+ key = None
590
+ else:
591
+ raise ValueError(
592
+ f"Do not pass path inside the `{AUTO_KEY_PREFIX}` directory."
593
+ )
594
+ else:
595
+ is_automanaged_path = False
596
+ provisional_uid, revises = create_uid(revises=revises, version=version)
585
597
  kwargs_or_artifact, privates = get_artifact_kwargs_from_data(
586
598
  data=data,
587
599
  key=key,
@@ -609,16 +621,29 @@ def __init__(artifact: Artifact, *args, **kwargs):
609
621
  else:
610
622
  kwargs = kwargs_or_artifact
611
623
 
624
+ if data is not None:
625
+ artifact._local_filepath = privates["local_filepath"]
626
+ artifact._cloud_filepath = privates["cloud_filepath"]
627
+ artifact._memory_rep = privates["memory_rep"]
628
+ artifact._to_store = not privates["check_path_in_storage"]
629
+
630
+ if is_automanaged_path and _is_internal_call:
631
+ kwargs["_key_is_virtual"] = True
632
+ assert AUTO_KEY_PREFIX in kwargs["key"] # noqa: S101
633
+ uid = kwargs["key"].replace(AUTO_KEY_PREFIX, "").replace(kwargs["suffix"], "")
634
+ kwargs["key"] = user_provided_key
635
+ if revises is not None:
636
+ assert uid.startswith(revises.stem_uid) # noqa: S101
637
+ if len(uid) == 16:
638
+ if revises is None:
639
+ uid += "0000"
640
+ else:
641
+ uid, revises = create_uid(revises=revises, version=version)
642
+ kwargs["uid"] = uid
643
+
612
644
  # only set key now so that we don't do a look-up on it in case revises is passed
613
645
  if revises is not None:
614
646
  kwargs["key"] = revises.key
615
- # in case we have a new version of a folder with a different hash, print a
616
- # warning that the old version can't be recovered
617
- if revises is not None and revises.n_objects is not None and revises.n_objects > 1:
618
- logger.warning(
619
- f"artifact version {version} will _update_ the state of folder {revises.path} - "
620
- "to _retain_ the old state by duplicating the entire folder, do _not_ pass `revises`"
621
- )
622
647
 
623
648
  kwargs["type"] = type
624
649
  kwargs["version"] = version
@@ -637,12 +662,6 @@ def __init__(artifact: Artifact, *args, **kwargs):
637
662
 
638
663
  add_transform_to_kwargs(kwargs, kwargs["run"])
639
664
 
640
- if data is not None:
641
- artifact._local_filepath = privates["local_filepath"]
642
- artifact._cloud_filepath = privates["cloud_filepath"]
643
- artifact._memory_rep = privates["memory_rep"]
644
- artifact._to_store = not privates["check_path_in_storage"]
645
-
646
665
  super(Artifact, artifact).__init__(**kwargs)
647
666
 
648
667
 
@@ -937,10 +956,9 @@ def open(
937
956
  if self.hash != hash:
938
957
  from ._record import init_self_from_db
939
958
 
940
- logger.warning(
941
- "The hash of the tiledbsoma store has changed, creating a new version of the artifact."
942
- )
943
- new_version = Artifact(filepath, revises=self).save()
959
+ new_version = Artifact(
960
+ filepath, revises=self, _is_internal_call=True
961
+ ).save()
944
962
  init_self_from_db(self, new_version)
945
963
 
946
964
  if localpath != filepath and localpath.exists():
@@ -1168,3 +1186,4 @@ Artifact._delete_skip_storage = _delete_skip_storage
1168
1186
  Artifact._save_skip_storage = _save_skip_storage
1169
1187
  Artifact.path = path
1170
1188
  Artifact.backed = backed
1189
+ Artifact.view_lineage = HasFeatures.view_lineage
lamindb/_can_validate.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Iterable, Literal
3
+ from typing import TYPE_CHECKING, Literal
4
4
 
5
5
  import lamindb_setup as ln_setup
6
6
  import numpy as np
@@ -79,6 +79,19 @@ def _check_organism_db(organism: Record, using_key: str | None):
79
79
  )
80
80
 
81
81
 
82
+ def _concat_lists(values: ListLike) -> list[str]:
83
+ """Concatenate a list of lists of strings into a single list."""
84
+ if len(values) > 0 and isinstance(values, (list, pd.Series)):
85
+ try:
86
+ if isinstance(values[0], list):
87
+ if isinstance(values, pd.Series):
88
+ values = values.tolist()
89
+ values = sum([v for v in values if isinstance(v, list)], [])
90
+ except KeyError:
91
+ pass
92
+ return values
93
+
94
+
82
95
  def _inspect(
83
96
  cls,
84
97
  values: ListLike,
@@ -94,6 +107,7 @@ def _inspect(
94
107
 
95
108
  if isinstance(values, str):
96
109
  values = [values]
110
+ values = _concat_lists(values)
97
111
 
98
112
  field = get_name_field(cls, field=field)
99
113
  queryset = _queryset(cls, using_key)
@@ -184,6 +198,7 @@ def _validate(
184
198
  return_str = True if isinstance(values, str) else False
185
199
  if isinstance(values, str):
186
200
  values = [values]
201
+ values = _concat_lists(values)
187
202
 
188
203
  field = get_name_field(cls, field=field)
189
204
 
@@ -229,7 +244,7 @@ def _validate(
229
244
  @doc_args(CanValidate.standardize.__doc__)
230
245
  def standardize(
231
246
  cls,
232
- values: Iterable,
247
+ values: ListLike,
233
248
  field: str | StrField | None = None,
234
249
  *,
235
250
  return_field: str = None,
@@ -295,7 +310,7 @@ def remove_synonym(self, synonym: str | ListLike):
295
310
 
296
311
  def _standardize(
297
312
  cls,
298
- values: Iterable,
313
+ values: ListLike,
299
314
  field: str | StrField | None = None,
300
315
  *,
301
316
  return_field: str = None,
@@ -315,6 +330,7 @@ def _standardize(
315
330
  return_str = True if isinstance(values, str) else False
316
331
  if isinstance(values, str):
317
332
  values = [values]
333
+ values = _concat_lists(values)
318
334
 
319
335
  field = get_name_field(cls, field=field)
320
336
  return_field = get_name_field(
@@ -416,7 +432,7 @@ def _standardize(
416
432
 
417
433
 
418
434
  def _add_or_remove_synonyms(
419
- synonym: str | Iterable,
435
+ synonym: str | ListLike,
420
436
  record: Record,
421
437
  action: Literal["add", "remove"],
422
438
  force: bool = False,
lamindb/_curate.py CHANGED
@@ -84,10 +84,34 @@ class CurateLookup:
84
84
  return colors.warning("No fields are found!")
85
85
 
86
86
 
87
- class DataFrameCurator:
87
+ class BaseCurator:
88
+ """Curate a dataset."""
89
+
90
+ def validate(self) -> bool:
91
+ """Validate dataset.
92
+
93
+ Returns:
94
+ Boolean indicating whether the dataset is validated.
95
+ """
96
+ pass
97
+
98
+ def save_artifact(self, description: str | None = None, **kwargs) -> Artifact:
99
+ """Save the dataset as artifact.
100
+
101
+ Args:
102
+ description: Description of the DataFrame object.
103
+ **kwargs: Object level metadata.
104
+
105
+ Returns:
106
+ A saved artifact record.
107
+ """
108
+ pass
109
+
110
+
111
+ class DataFrameCurator(BaseCurator):
88
112
  """Curation flow for a DataFrame object.
89
113
 
90
- See also :class:`~lamindb.Curate`.
114
+ See also :class:`~lamindb.Curator`.
91
115
 
92
116
  Args:
93
117
  df: The DataFrame object to curate.
@@ -101,7 +125,7 @@ class DataFrameCurator:
101
125
 
102
126
  Examples:
103
127
  >>> import bionty as bt
104
- >>> curate = ln.Curate.from_df(
128
+ >>> curate = ln.Curator.from_df(
105
129
  ... df,
106
130
  ... categoricals={
107
131
  ... "cell_type_ontology_id": bt.CellType.ontology_id,
@@ -120,6 +144,7 @@ class DataFrameCurator:
120
144
  organism: str | None = None,
121
145
  sources: dict[str, Record] | None = None,
122
146
  exclude: dict | None = None,
147
+ check_valid_keys: bool = True,
123
148
  ) -> None:
124
149
  from lamindb.core._settings import settings
125
150
 
@@ -139,6 +164,8 @@ class DataFrameCurator:
139
164
  exclude = {}
140
165
  self._exclude = exclude
141
166
  self._non_validated = None
167
+ if check_valid_keys:
168
+ self._check_valid_keys()
142
169
  self._save_columns()
143
170
 
144
171
  @property
@@ -167,14 +194,25 @@ class DataFrameCurator:
167
194
  using_key=using_key or self._using_key,
168
195
  )
169
196
 
197
+ def _check_valid_keys(self, extra: set = None) -> None:
198
+ if extra is None:
199
+ extra = set()
200
+ for name, d in {
201
+ "categoricals": self._fields,
202
+ "sources": self._sources,
203
+ "exclude": self._exclude,
204
+ }.items():
205
+ if not isinstance(d, dict):
206
+ raise TypeError(f"{name} must be a dictionary!")
207
+ valid_keys = set(self._df.columns) | {"columns"} | extra
208
+ nonval_keys = [key for key in d.keys() if key not in valid_keys]
209
+ if len(nonval_keys) > 0:
210
+ raise ValueError(
211
+ f"the following keys passed to {name} are not allowed: {nonval_keys}"
212
+ )
213
+
170
214
  def _save_columns(self, validated_only: bool = True, **kwargs) -> None:
171
215
  """Save column name records."""
172
- missing_columns = set(self.fields.keys()) - set(self._df.columns)
173
- if missing_columns:
174
- raise ValueError(
175
- f"Columns {missing_columns} are not found in the data object!"
176
- )
177
-
178
216
  # Always save features specified as the fields keys
179
217
  update_registry(
180
218
  values=list(self.fields.keys()),
@@ -184,6 +222,7 @@ class DataFrameCurator:
184
222
  using_key=self._using_key,
185
223
  validated_only=False,
186
224
  source=self._sources.get("columns"),
225
+ exclude=self._exclude.get("columns"),
187
226
  **kwargs,
188
227
  )
189
228
 
@@ -199,6 +238,7 @@ class DataFrameCurator:
199
238
  validated_only=validated_only,
200
239
  df=self._df, # Get the Feature type from df
201
240
  source=self._sources.get("columns"),
241
+ exclude=self._exclude.get("columns"),
202
242
  warning=False, # Do not warn about missing columns, just an info message
203
243
  **kwargs,
204
244
  )
@@ -251,6 +291,7 @@ class DataFrameCurator:
251
291
  using_key=self._using_key,
252
292
  validated_only=validated_only,
253
293
  source=self._sources.get(categorical),
294
+ exclude=self._exclude.get(categorical),
254
295
  **kwargs,
255
296
  )
256
297
 
@@ -330,9 +371,11 @@ class DataFrameCurator:
330
371
  class AnnDataCurator(DataFrameCurator):
331
372
  """Curation flow for ``AnnData``.
332
373
 
333
- See also :class:`~lamindb.Curate`.
374
+ See also :class:`~lamindb.Curator`.
375
+
376
+ Note that if genes are removed from the AnnData object, the object should be recreated using :meth:`~lamindb.Curator.from_anndata`.
334
377
 
335
- Note that if genes are removed from the AnnData object, the object should be recreated using :meth:`~lamindb.Curate.from_anndata`.
378
+ See :doc:`docs:cellxgene-curate` for instructions on how to curate against a specific cellxgene schema version.
336
379
 
337
380
  Args:
338
381
  data: The AnnData object or an AnnData-like path.
@@ -346,7 +389,7 @@ class AnnDataCurator(DataFrameCurator):
346
389
 
347
390
  Examples:
348
391
  >>> import bionty as bt
349
- >>> curate = ln.Curate.from_anndata(
392
+ >>> curate = ln.Curator.from_anndata(
350
393
  ... adata,
351
394
  ... var_index=bt.Gene.ensembl_gene_id,
352
395
  ... categoricals={
@@ -397,8 +440,10 @@ class AnnDataCurator(DataFrameCurator):
397
440
  organism=organism,
398
441
  sources=sources,
399
442
  exclude=exclude,
443
+ check_valid_keys=False,
400
444
  )
401
445
  self._obs_fields = categoricals
446
+ self._check_valid_keys(extra={"var_index"})
402
447
 
403
448
  @property
404
449
  def var_index(self) -> FieldAttr:
@@ -437,6 +482,7 @@ class AnnDataCurator(DataFrameCurator):
437
482
  validated_only=validated_only,
438
483
  organism=organism,
439
484
  source=self._sources.get("var_index"),
485
+ exclude=self._exclude.get("var_index"),
440
486
  )
441
487
 
442
488
  def _update_registry_all(self, validated_only: bool = True, **kwargs):
@@ -536,10 +582,10 @@ class AnnDataCurator(DataFrameCurator):
536
582
  class MuDataCurator:
537
583
  """Curation flow for a ``MuData`` object.
538
584
 
539
- See also :class:`~lamindb.Curate`.
585
+ See also :class:`~lamindb.Curator`.
540
586
 
541
587
  Note that if genes or other measurements are removed from the MuData object,
542
- the object should be recreated using :meth:`~lamindb.Curate.from_mudata`.
588
+ the object should be recreated using :meth:`~lamindb.Curator.from_mudata`.
543
589
 
544
590
  Args:
545
591
  mdata: The MuData object to curate.
@@ -556,7 +602,7 @@ class MuDataCurator:
556
602
 
557
603
  Examples:
558
604
  >>> import bionty as bt
559
- >>> curate = ln.Curate.from_mudata(
605
+ >>> curate = ln.Curator.from_mudata(
560
606
  ... mdata,
561
607
  ... var_index={
562
608
  ... "rna": bt.Gene.ensembl_gene_id,
@@ -603,6 +649,7 @@ class MuDataCurator:
603
649
  verbosity=verbosity,
604
650
  sources=self._sources.get(modality),
605
651
  exclude=self._exclude.get(modality),
652
+ check_valid_keys=False,
606
653
  **self._kwargs,
607
654
  )
608
655
  for modality in self._modalities
@@ -641,6 +688,7 @@ class MuDataCurator:
641
688
  validated_only=validated_only,
642
689
  dtype="number",
643
690
  source=self._sources.get(modality, {}).get("var_index"),
691
+ exclude=self._exclude.get(modality, {}).get("var_index"),
644
692
  **kwargs,
645
693
  )
646
694
 
@@ -704,6 +752,7 @@ class MuDataCurator:
704
752
  validated_only=False,
705
753
  df=self._mdata[modality].obs,
706
754
  source=self._sources.get(modality, {}).get("columns"),
755
+ exclude=self._exclude.get(modality, {}).get("columns"),
707
756
  **self._kwargs, # type: ignore
708
757
  **kwargs,
709
758
  )
@@ -789,7 +838,8 @@ class MuDataCurator:
789
838
  field=var_field,
790
839
  key=f"{modality}_var_index",
791
840
  using_key=self._using_key,
792
- exclude=self._exclude.get(f"{modality}_var_index"),
841
+ source=self._sources.get(modality, {}).get("var_index"),
842
+ exclude=self._exclude.get(modality, {}).get("var_index"),
793
843
  **self._kwargs, # type: ignore
794
844
  )
795
845
  validated_var &= is_validated_var
@@ -846,19 +896,19 @@ class MuDataCurator:
846
896
  return self._artifact
847
897
 
848
898
 
849
- class Curate:
850
- """Curation flow.
899
+ class Curator(BaseCurator):
900
+ """Dataset curator.
851
901
 
852
902
  Data curation entails accurately labeling datasets with standardized metadata
853
903
  to facilitate data integration, interpretation and analysis.
854
904
 
855
905
  The curation flow has several steps:
856
906
 
857
- 1. Create a :class:`Curate` object corresponding to the object type that you want to curate:
907
+ 1. Instantiate `Curator` from one of the following dataset objects:
858
908
 
859
- - :meth:`~lamindb.Curate.from_df`
860
- - :meth:`~lamindb.Curate.from_anndata`
861
- - :meth:`~lamindb.Curate.from_mudata`
909
+ - :meth:`~lamindb.Curator.from_df`
910
+ - :meth:`~lamindb.Curator.from_anndata`
911
+ - :meth:`~lamindb.Curator.from_mudata`
862
912
 
863
913
  During object creation, any passed categoricals found in the object will be saved.
864
914
 
@@ -867,7 +917,7 @@ class Curate:
867
917
  - Values that can successfully validated and already exist in the registry.
868
918
  - Values which are new and not yet validated or potentially problematic values.
869
919
 
870
- 3. Determine how to handle validated and unvalidated values:
920
+ 3. Determine how to handle validated and non-validated values:
871
921
 
872
922
  - Validated values not yet in the registry can be automatically registered using :meth:`~lamindb.core.DataFrameCurator.add_validated_from`.
873
923
  - Valid and new values can be registered using :meth:`~lamindb.core.DataFrameCurator.add_new_from`.
@@ -982,10 +1032,22 @@ def standardize_and_inspect(
982
1032
  field: FieldAttr,
983
1033
  registry: type[Record],
984
1034
  standardize: bool = False,
1035
+ exclude: str | list | None = None,
985
1036
  **kwargs,
986
1037
  ):
987
1038
  """Standardize and inspect values using a registry."""
988
- filter_kwargs = get_current_filter_kwargs(registry, kwargs)
1039
+ # inspect exclude values in the default instance
1040
+ values = list(values)
1041
+ include_validated = []
1042
+ if exclude is not None:
1043
+ exclude = [exclude] if isinstance(exclude, str) else exclude
1044
+ exclude = [i for i in exclude if i in values]
1045
+ if len(exclude) > 0:
1046
+ # exclude values are validated without source and organism
1047
+ inspect_result_exclude = registry.inspect(exclude, field=field, mute=True)
1048
+ # if exclude values are validated, remove them from the values
1049
+ values = [i for i in values if i not in inspect_result_exclude.validated]
1050
+ include_validated = inspect_result_exclude.validated
989
1051
 
990
1052
  if standardize:
991
1053
  if hasattr(registry, "standardize") and hasattr(
@@ -993,11 +1055,17 @@ def standardize_and_inspect(
993
1055
  "synonyms", # https://github.com/laminlabs/lamindb/issues/1685
994
1056
  ):
995
1057
  standardized_values = registry.standardize(
996
- values, field=field, mute=True, **filter_kwargs
1058
+ values, field=field, mute=True, **kwargs
997
1059
  )
998
1060
  values = standardized_values
999
1061
 
1000
- return registry.inspect(values, field=field, mute=True, **filter_kwargs)
1062
+ inspect_result = registry.inspect(values, field=field, mute=True, **kwargs)
1063
+ inspect_result._validated += include_validated
1064
+ inspect_result._non_validated = [
1065
+ i for i in inspect_result.non_validated if i not in include_validated
1066
+ ]
1067
+
1068
+ return inspect_result
1001
1069
 
1002
1070
 
1003
1071
  def check_registry_organism(registry: Record, organism: str | None = None) -> dict:
@@ -1049,35 +1117,32 @@ def validate_categories(
1049
1117
  logger.indent = " "
1050
1118
 
1051
1119
  registry = field.field.model
1120
+
1052
1121
  kwargs = check_registry_organism(registry, organism)
1053
1122
  kwargs.update({"source": source} if source else {})
1123
+ kwargs_current = get_current_filter_kwargs(registry, kwargs)
1054
1124
 
1055
1125
  # inspect the default instance
1056
- if exclude is not None:
1057
- exclude = [exclude] if isinstance(exclude, str) else exclude
1058
- # exclude values are validated without source and organism
1059
- inspect_result = registry.inspect(exclude, field=field, mute=True)
1060
- # if exclude values are validated, remove them from the values
1061
- values = [i for i in values if i not in inspect_result.validated]
1062
-
1063
1126
  inspect_result = standardize_and_inspect(
1064
1127
  values=values,
1065
1128
  field=field,
1066
1129
  registry=registry,
1067
1130
  standardize=standardize,
1068
- **kwargs,
1131
+ exclude=exclude,
1132
+ **kwargs_current,
1069
1133
  )
1070
1134
  non_validated = inspect_result.non_validated
1071
1135
 
1136
+ # inspect the using instance
1072
1137
  values_validated = []
1073
1138
  if using_key is not None and using_key != "default" and non_validated:
1074
1139
  registry_using = get_registry_instance(registry, using_key)
1075
- # inspect the using instance
1076
1140
  inspect_result = standardize_and_inspect(
1077
1141
  values=non_validated,
1078
1142
  field=field,
1079
1143
  registry=registry_using,
1080
1144
  standardize=standardize,
1145
+ exclude=exclude,
1081
1146
  **kwargs,
1082
1147
  )
1083
1148
  non_validated = inspect_result.non_validated
@@ -1091,7 +1156,7 @@ def validate_categories(
1091
1156
  public_records = registry.from_values(
1092
1157
  non_validated,
1093
1158
  field=field,
1094
- **get_current_filter_kwargs(registry, kwargs),
1159
+ **kwargs_current,
1095
1160
  )
1096
1161
  values_validated += [getattr(r, field.field.name) for r in public_records]
1097
1162
  finally:
@@ -1111,9 +1176,13 @@ def validate_categories(
1111
1176
  non_validated = [i for i in non_validated if i not in values_validated]
1112
1177
  n_non_validated = len(non_validated)
1113
1178
  if n_non_validated == 0:
1114
- logger.indent = ""
1115
- logger.success(f"{key} is validated against {colors.italic(model_field)}")
1116
- return True, []
1179
+ if n_validated == 0:
1180
+ logger.indent = ""
1181
+ logger.success(f"{key} is validated against {colors.italic(model_field)}")
1182
+ return True, []
1183
+ else:
1184
+ # validated values still need to be saved to the current instance
1185
+ return False, []
1117
1186
  else:
1118
1187
  are = "are" if n_non_validated > 1 else "is"
1119
1188
  print_values = _print_values(non_validated)
@@ -1138,6 +1207,9 @@ def validate_categories_in_df(
1138
1207
  **kwargs,
1139
1208
  ) -> tuple[bool, dict]:
1140
1209
  """Validate categories in DataFrame columns using LaminDB registries."""
1210
+ if not fields:
1211
+ return True, {}
1212
+
1141
1213
  if sources is None:
1142
1214
  sources = {}
1143
1215
  validated = True
@@ -1270,6 +1342,7 @@ def update_registry(
1270
1342
  source: Record | None = None,
1271
1343
  standardize: bool = True,
1272
1344
  warning: bool = True,
1345
+ exclude: str | list | None = None,
1273
1346
  **kwargs,
1274
1347
  ) -> None:
1275
1348
  """Save features or labels records in the default instance from the using_key instance.
@@ -1329,7 +1402,8 @@ def update_registry(
1329
1402
  field=field,
1330
1403
  registry=registry,
1331
1404
  standardize=standardize,
1332
- **filter_kwargs,
1405
+ exclude=exclude,
1406
+ **filter_kwargs_current,
1333
1407
  )
1334
1408
  if not inspect_result_current.non_validated:
1335
1409
  all_labels = registry.from_values(
@@ -1348,6 +1422,7 @@ def update_registry(
1348
1422
  inspect_result_current.non_validated,
1349
1423
  field=field,
1350
1424
  using_key=using_key,
1425
+ exclude=exclude,
1351
1426
  **filter_kwargs,
1352
1427
  )
1353
1428
 
@@ -1467,6 +1542,7 @@ def update_registry_from_using_instance(
1467
1542
  field: FieldAttr,
1468
1543
  using_key: str | None = None,
1469
1544
  standardize: bool = False,
1545
+ exclude: str | list | None = None,
1470
1546
  **kwargs,
1471
1547
  ) -> tuple[list[str], list[str]]:
1472
1548
  """Save features or labels records from the using_key instance.
@@ -1492,6 +1568,7 @@ def update_registry_from_using_instance(
1492
1568
  field=field,
1493
1569
  registry=registry_using,
1494
1570
  standardize=standardize,
1571
+ exclude=exclude,
1495
1572
  **kwargs,
1496
1573
  )
1497
1574
  labels_using = registry_using.filter(
@@ -1519,3 +1596,6 @@ def _save_organism(name: str): # pragma: no cover
1519
1596
  )
1520
1597
  organism.save()
1521
1598
  return organism
1599
+
1600
+
1601
+ Curate = Curator # backward compat