lamindb 0.74.3__py3-none-any.whl → 0.75.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/_curate.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Iterable
3
+ import copy
4
+ from typing import TYPE_CHECKING, Iterable, Type
4
5
 
5
6
  import anndata as ad
6
7
  import lamindb_setup as ln_setup
@@ -9,7 +10,6 @@ from lamin_utils import colors, logger
9
10
  from lamindb_setup.core._docs import doc_args
10
11
  from lnschema_core import (
11
12
  Artifact,
12
- Collection,
13
13
  Feature,
14
14
  Record,
15
15
  Run,
@@ -31,23 +31,25 @@ class CurateLookup:
31
31
  self,
32
32
  categoricals: dict[str, FieldAttr],
33
33
  slots: dict[str, FieldAttr] = None,
34
- using: str | None = None,
34
+ using_key: str | None = None,
35
35
  ) -> None:
36
36
  if slots is None:
37
37
  slots = {}
38
38
  self._fields = {**categoricals, **slots}
39
- self._using = None if using == "default" else using
40
- self._using_name = self._using or ln_setup.settings.instance.slug
41
- debug_message = f"Lookup objects from the " f"{colors.italic(self._using_name)}"
39
+ self._using_key = None if using_key == "default" else using_key
40
+ self._using_key_name = self._using_key or ln_setup.settings.instance.slug
41
+ debug_message = (
42
+ f"Lookup objects from the " f"{colors.italic(self._using_key_name)}"
43
+ )
42
44
  logger.debug(debug_message)
43
45
 
44
46
  def __getattr__(self, name):
45
47
  if name in self._fields:
46
48
  registry = self._fields[name].field.model
47
- if self._using == "public":
49
+ if self._using_key == "public":
48
50
  return registry.public().lookup()
49
51
  else:
50
- return get_registry_instance(registry, self._using).lookup()
52
+ return get_registry_instance(registry, self._using_key).lookup()
51
53
  raise AttributeError(
52
54
  f"'{self.__class__.__name__}' object has no attribute '{name}'"
53
55
  )
@@ -55,10 +57,10 @@ class CurateLookup:
55
57
  def __getitem__(self, name):
56
58
  if name in self._fields:
57
59
  registry = self._fields[name].field.model
58
- if self._using == "public":
60
+ if self._using_key == "public":
59
61
  return registry.public().lookup()
60
62
  else:
61
- return get_registry_instance(registry, self._using).lookup()
63
+ return get_registry_instance(registry, self._using_key).lookup()
62
64
  raise AttributeError(
63
65
  f"'{self.__class__.__name__}' object has no attribute '{name}'"
64
66
  )
@@ -72,7 +74,7 @@ class CurateLookup:
72
74
  [str([key]) for key in self._fields if not key.isidentifier()]
73
75
  )
74
76
  return (
75
- f"Lookup objects from the {colors.italic(self._using_name)}:\n "
77
+ f"Lookup objects from the {colors.italic(self._using_key_name)}:\n "
76
78
  f"{colors.green(getattr_keys)}\n "
77
79
  f"{colors.green(getitem_keys)}\n\n"
78
80
  "Example:\n → categories = validator.lookup().cell_type\n"
@@ -83,15 +85,19 @@ class CurateLookup:
83
85
 
84
86
 
85
87
  class DataFrameCurator:
86
- """Annotation flow for a DataFrame object.
88
+ """Curation flow for a DataFrame object.
89
+
90
+ See also :class:`~lamindb.Curate`.
87
91
 
88
92
  Args:
89
93
  df: The DataFrame object to curate.
90
94
  columns: The field attribute for the feature column.
91
95
  categoricals: A dictionary mapping column names to registry_field.
92
- using: The reference instance containing registries to validate against.
96
+ using_key: The reference instance containing registries to validate against.
93
97
  verbosity: The verbosity level.
94
98
  organism: The organism name.
99
+ sources: A dictionary mapping column names to Source records.
100
+ exclude: A dictionary mapping column names to values to exclude.
95
101
 
96
102
  Examples:
97
103
  >>> import bionty as bt
@@ -106,40 +112,56 @@ class DataFrameCurator:
106
112
  df: pd.DataFrame,
107
113
  columns: FieldAttr = Feature.name,
108
114
  categoricals: dict[str, FieldAttr] | None = None,
109
- using: str | None = None,
115
+ using_key: str | None = None,
110
116
  verbosity: str = "hint",
111
117
  organism: str | None = None,
118
+ sources: dict[str, Record] | None = None,
119
+ exclude: dict | None = None,
112
120
  ) -> None:
113
121
  from lamindb.core._settings import settings
114
122
 
115
123
  self._df = df
116
124
  self._fields = categoricals or {}
117
125
  self._columns_field = columns
118
- self._using = using
126
+ self._using_key = using_key
119
127
  settings.verbosity = verbosity
120
128
  self._artifact = None
121
129
  self._collection = None
122
130
  self._validated = False
123
131
  self._kwargs = {"organism": organism} if organism else {}
132
+ if sources is None:
133
+ sources = {}
134
+ self._sources = sources
135
+ if exclude is None:
136
+ exclude = {}
137
+ self._exclude = exclude
138
+ self._non_validated = None
124
139
  self._save_columns()
125
140
 
141
+ @property
142
+ def non_validated(self) -> list:
143
+ """Return the non-validated features and labels."""
144
+ if self._non_validated is None:
145
+ raise ValueError("Please run validate() first!")
146
+ return self._non_validated
147
+
126
148
  @property
127
149
  def fields(self) -> dict:
128
150
  """Return the columns fields to validate against."""
129
151
  return self._fields
130
152
 
131
- def lookup(self, using: str | None = None) -> CurateLookup:
153
+ def lookup(self, using_key: str | None = None) -> CurateLookup:
132
154
  """Lookup categories.
133
155
 
134
156
  Args:
135
- using: The instance where the lookup is performed.
136
- if None (default), the lookup is performed on the instance specified in "using" parameter of the validator.
157
+ using_key: The instance where the lookup is performed.
158
+ if None (default), the lookup is performed on the instance specified in "using_key" parameter of the validator.
137
159
  if "public", the lookup is performed on the public reference.
138
160
  """
139
161
  return CurateLookup(
140
162
  categoricals=self._fields,
141
163
  slots={"columns": self._columns_field},
142
- using=using or self._using,
164
+ using_key=using_key or self._using_key,
143
165
  )
144
166
 
145
167
  def _save_columns(self, validated_only: bool = True, **kwargs) -> None:
@@ -156,8 +178,9 @@ class DataFrameCurator:
156
178
  field=self._columns_field,
157
179
  key="columns",
158
180
  save_function="add_new_from_columns",
159
- using=self._using,
181
+ using_key=self._using_key,
160
182
  validated_only=False,
183
+ source=self._sources.get("columns"),
161
184
  **kwargs,
162
185
  )
163
186
 
@@ -169,9 +192,11 @@ class DataFrameCurator:
169
192
  field=self._columns_field,
170
193
  key="columns",
171
194
  save_function="add_new_from_columns",
172
- using=self._using,
195
+ using_key=self._using_key,
173
196
  validated_only=validated_only,
174
197
  df=self._df, # Get the Feature type from df
198
+ source=self._sources.get("columns"),
199
+ warning=False, # Do not warn about missing columns, just an info message
175
200
  **kwargs,
176
201
  )
177
202
 
@@ -220,8 +245,9 @@ class DataFrameCurator:
220
245
  values=self._df[categorical].unique().tolist(),
221
246
  field=self.fields[categorical],
222
247
  key=categorical,
223
- using=self._using,
248
+ using_key=self._using_key,
224
249
  validated_only=validated_only,
250
+ sources=self._sources.get(categorical),
225
251
  **kwargs,
226
252
  )
227
253
 
@@ -238,10 +264,12 @@ class DataFrameCurator:
238
264
  Whether the DataFrame is validated.
239
265
  """
240
266
  self._kwargs.update({"organism": organism} if organism else {})
241
- self._validated = validate_categories_in_df(
267
+ self._validated, self._non_validated = validate_categories_in_df( # type: ignore
242
268
  self._df,
243
269
  fields=self.fields,
244
- using=self._using,
270
+ using_key=self._using_key,
271
+ sources=self._sources,
272
+ exclude=self._exclude,
245
273
  **self._kwargs,
246
274
  )
247
275
  return self._validated
@@ -283,41 +311,6 @@ class DataFrameCurator:
283
311
 
284
312
  return self._artifact
285
313
 
286
- def save_collection(
287
- self,
288
- artifact: Artifact | Iterable[Artifact],
289
- name: str,
290
- description: str | None = None,
291
- reference: str | None = None,
292
- reference_type: str | None = None,
293
- ) -> Collection:
294
- """Save a collection from artifact/artifacts.
295
-
296
- Args:
297
- artifact: One or several saved Artifacts.
298
- name: Title of the publication.
299
- description: Description of the publication.
300
- reference: Accession number (e.g. GSE#, E-MTAB#, etc.).
301
- reference_type: Source type (e.g. GEO, ArrayExpress, SRA, etc.).
302
- """
303
- collection = Collection(
304
- artifact,
305
- name=name,
306
- description=description,
307
- reference=reference,
308
- reference_type=reference_type,
309
- )
310
- slug = ln_setup.settings.instance.slug
311
- if collection._state.adding:
312
- collection.save()
313
- else: # pragma: no cover
314
- collection.save()
315
- logger.warning(f"collection already exists in {colors.italic(slug)}!")
316
- if ln_setup.settings.instance.is_remote: # pragma: no cover
317
- logger.print(f"go to https://lamin.ai/{slug}/collection/{collection.uid}")
318
- self._collection = collection
319
- return collection
320
-
321
314
  def clean_up_failed_runs(self):
322
315
  """Clean up previous failed runs that don't save any outputs."""
323
316
  from lamindb.core._run_context import run_context
@@ -329,15 +322,21 @@ class DataFrameCurator:
329
322
 
330
323
 
331
324
  class AnnDataCurator(DataFrameCurator):
332
- """Annotation flow for ``AnnData``.
325
+ """Curation flow for ``AnnData``.
326
+
327
+ See also :class:`~lamindb.Curate`.
328
+
329
+ Note that if genes are removed from the AnnData object, the object should be recreated using :meth:`~lamindb.Curate.from_anndata`.
333
330
 
334
331
  Args:
335
332
  data: The AnnData object or an AnnData-like path.
336
333
  var_index: The registry field for mapping the ``.var`` index.
337
334
  categoricals: A dictionary mapping ``.obs.columns`` to a registry field.
338
- using: A reference LaminDB instance.
335
+ using_key: A reference LaminDB instance.
339
336
  verbosity: The verbosity level.
340
337
  organism: The organism name.
338
+ sources: A dictionary mapping ``.obs.columns`` to Source records.
339
+ exclude: A dictionary mapping column names to values to exclude.
341
340
 
342
341
  Examples:
343
342
  >>> import bionty as bt
@@ -354,14 +353,19 @@ class AnnDataCurator(DataFrameCurator):
354
353
  data: ad.AnnData | UPathStr,
355
354
  var_index: FieldAttr,
356
355
  categoricals: dict[str, FieldAttr] | None = None,
357
- using: str = "default",
356
+ obs_columns: FieldAttr = Feature.name,
357
+ using_key: str = "default",
358
358
  verbosity: str = "hint",
359
359
  organism: str | None = None,
360
+ sources: dict[str, Record] | None = None,
361
+ exclude: dict | None = None,
360
362
  ) -> None:
361
363
  from lamindb_setup.core import upath
362
364
 
363
365
  from ._artifact import data_is_anndata
364
366
 
367
+ if sources is None:
368
+ sources = {}
365
369
  if not data_is_anndata(data):
366
370
  raise ValueError(
367
371
  "data has to be an AnnData object or a path to AnnData-like"
@@ -378,12 +382,14 @@ class AnnDataCurator(DataFrameCurator):
378
382
  super().__init__(
379
383
  df=self._adata.obs,
380
384
  categoricals=categoricals,
381
- using=using,
385
+ columns=obs_columns,
386
+ using_key=using_key,
382
387
  verbosity=verbosity,
383
388
  organism=organism,
389
+ sources=sources,
390
+ exclude=exclude,
384
391
  )
385
392
  self._obs_fields = categoricals
386
- self._save_from_var_index(validated_only=True, **self._kwargs)
387
393
 
388
394
  @property
389
395
  def var_index(self) -> FieldAttr:
@@ -395,18 +401,18 @@ class AnnDataCurator(DataFrameCurator):
395
401
  """Return the obs fields to validate against."""
396
402
  return self._obs_fields
397
403
 
398
- def lookup(self, using: str | None = None) -> CurateLookup:
404
+ def lookup(self, using_key: str | None = None) -> CurateLookup:
399
405
  """Lookup categories.
400
406
 
401
407
  Args:
402
- using: The instance where the lookup is performed.
408
+ using_key: The instance where the lookup is performed.
403
409
  if None (default), the lookup is performed on the instance specified in "using" parameter of the validator.
404
410
  if "public", the lookup is performed on the public reference.
405
411
  """
406
412
  return CurateLookup(
407
413
  categoricals=self._obs_fields,
408
414
  slots={"columns": self._columns_field, "var_index": self._var_field},
409
- using=using or self._using,
415
+ using_key=using_key or self._using_key,
410
416
  )
411
417
 
412
418
  def _save_from_var_index(
@@ -414,15 +420,25 @@ class AnnDataCurator(DataFrameCurator):
414
420
  ):
415
421
  """Save variable records."""
416
422
  update_registry(
417
- values=self._adata.var.index,
423
+ values=list(self._adata.var.index),
418
424
  field=self.var_index,
419
425
  key="var_index",
420
426
  save_function="add_new_from_var_index",
421
- using=self._using,
427
+ using_key=self._using_key,
422
428
  validated_only=validated_only,
423
429
  organism=organism,
430
+ source=self._sources.get("var_index"),
424
431
  )
425
432
 
433
+ def _update_registry_all(self, validated_only: bool = True, **kwargs):
434
+ """Save labels for all features."""
435
+ for name in self.fields.keys():
436
+ logger.info(f"saving labels for '{name}'")
437
+ if name == "var_index":
438
+ self._save_from_var_index(validated_only=validated_only, **kwargs)
439
+ else:
440
+ self._update_registry(name, validated_only=validated_only, **kwargs)
441
+
426
442
  def add_new_from_var_index(self, organism: str | None = None, **kwargs):
427
443
  """Update variable records.
428
444
 
@@ -433,6 +449,15 @@ class AnnDataCurator(DataFrameCurator):
433
449
  self._kwargs.update({"organism": organism} if organism else {})
434
450
  self._save_from_var_index(validated_only=False, **self._kwargs, **kwargs)
435
451
 
452
+ def add_validated_from_var_index(self, organism: str | None = None):
453
+ """Add validated variable records.
454
+
455
+ Args:
456
+ organism: The organism name.
457
+ """
458
+ self._kwargs.update({"organism": organism} if organism else {})
459
+ self._save_from_var_index(validated_only=True, **self._kwargs)
460
+
436
461
  def validate(self, organism: str | None = None) -> bool:
437
462
  """Validate categories.
438
463
 
@@ -443,20 +468,32 @@ class AnnDataCurator(DataFrameCurator):
443
468
  Whether the AnnData object is validated.
444
469
  """
445
470
  self._kwargs.update({"organism": organism} if organism else {})
446
- if self._using is not None and self._using != "default":
471
+ if self._using_key is not None and self._using_key != "default":
447
472
  logger.important(
448
- f"validating metadata using registries of instance {colors.italic(self._using)}"
473
+ f"validating metadata using registries of instance {colors.italic(self._using_key)}"
449
474
  )
450
- validated_var = validate_categories(
475
+
476
+ validated_var, non_validated_var = validate_categories(
451
477
  self._adata.var.index,
452
478
  field=self._var_field,
453
479
  key="var_index",
454
- using=self._using,
455
- **self._kwargs,
480
+ using_key=self._using_key,
481
+ source=self._sources.get("var_index"),
482
+ validated_hint_print=".add_validated_from_var_index()",
483
+ exclude=self._exclude.get("var_index"),
484
+ **self._kwargs, # type: ignore
456
485
  )
457
- validated_obs = validate_categories_in_df(
458
- self._adata.obs, fields=self.categoricals, using=self._using, **self._kwargs
486
+ validated_obs, non_validated_obs = validate_categories_in_df(
487
+ self._adata.obs,
488
+ fields=self.categoricals,
489
+ using_key=self._using_key,
490
+ sources=self._sources,
491
+ exclude=self._exclude,
492
+ **self._kwargs,
459
493
  )
494
+ self._non_validated = non_validated_obs # type: ignore
495
+ if len(non_validated_var) > 0:
496
+ self._non_validated["var_index"] = non_validated_var # type: ignore
460
497
  self._validated = validated_var and validated_obs
461
498
  return self._validated
462
499
 
@@ -488,7 +525,12 @@ class AnnDataCurator(DataFrameCurator):
488
525
 
489
526
 
490
527
  class MuDataCurator:
491
- """Annotation flow for a ``MuData`` object.
528
+ """Curation flow for a ``MuData`` object.
529
+
530
+ See also :class:`~lamindb.Curate`.
531
+
532
+ Note that if genes or other measurements are removed from the MuData object,
533
+ the object should be recreated using :meth:`~lamindb.Curate.from_mudata`.
492
534
 
493
535
  Args:
494
536
  mdata: The MuData object to curate.
@@ -497,9 +539,11 @@ class MuDataCurator:
497
539
  ``{"modality_1": bt.Gene.ensembl_gene_id, "modality_2": ln.CellMarker.name}``
498
540
  categoricals: A dictionary mapping ``.obs.columns`` to a registry field.
499
541
  Use modality keys to specify categoricals for MuData slots such as `"rna:cell_type": bt.CellType.name"`.
500
- using: A reference LaminDB instance.
542
+ using_key: A reference LaminDB instance.
501
543
  verbosity: The verbosity level.
502
544
  organism: The organism name.
545
+ sources: A dictionary mapping ``.obs.columns`` to Source records.
546
+ exclude: A dictionary mapping column names to values to exclude.
503
547
 
504
548
  Examples:
505
549
  >>> import bionty as bt
@@ -516,24 +560,34 @@ class MuDataCurator:
516
560
  mdata: MuData,
517
561
  var_index: dict[str, dict[str, FieldAttr]],
518
562
  categoricals: dict[str, FieldAttr] | None = None,
519
- using: str = "default",
563
+ using_key: str = "default",
520
564
  verbosity: str = "hint",
521
565
  organism: str | None = None,
566
+ sources: dict[str, Record] | None = None,
567
+ exclude: dict | None = None,
522
568
  ) -> None:
569
+ if sources is None:
570
+ sources = {}
571
+ self._sources = sources
572
+ if exclude is None:
573
+ exclude = {}
574
+ self._exclude = exclude
523
575
  self._mdata = mdata
524
576
  self._kwargs = {"organism": organism} if organism else {}
525
577
  self._var_fields = var_index
526
578
  self._verify_modality(self._var_fields.keys())
527
579
  self._obs_fields = self._parse_categoricals(categoricals)
528
580
  self._modalities = set(self._var_fields.keys()) | set(self._obs_fields.keys())
529
- self._using = using
581
+ self._using_key = using_key
530
582
  self._verbosity = verbosity
531
583
  self._df_annotators = {
532
584
  modality: DataFrameCurator(
533
585
  df=mdata[modality].obs if modality != "obs" else mdata.obs,
534
586
  categoricals=self._obs_fields.get(modality, {}),
535
- using=using,
587
+ using_key=using_key,
536
588
  verbosity=verbosity,
589
+ sources=self._sources.get(modality),
590
+ exclude=self._exclude.get(modality),
537
591
  **self._kwargs,
538
592
  )
539
593
  for modality in self._modalities
@@ -564,11 +618,11 @@ class MuDataCurator:
564
618
  ):
565
619
  """Save variable records."""
566
620
  update_registry(
567
- values=self._mdata[modality].var.index,
621
+ values=list(self._mdata[modality].var.index),
568
622
  field=self._var_fields[modality],
569
623
  key="var_index",
570
624
  save_function="add_new_from_var_index",
571
- using=self._using,
625
+ using_key=self._using_key,
572
626
  validated_only=validated_only,
573
627
  dtype="number",
574
628
  **kwargs,
@@ -592,12 +646,12 @@ class MuDataCurator:
592
646
  obs_fields["obs"][k] = v
593
647
  return obs_fields
594
648
 
595
- def lookup(self, using: str | None = None) -> CurateLookup:
649
+ def lookup(self, using_key: str | None = None) -> CurateLookup:
596
650
  """Lookup categories.
597
651
 
598
652
  Args:
599
- using: The instance where the lookup is performed.
600
- if None (default), the lookup is performed on the instance specified in "using" parameter of the validator.
653
+ using_key: The instance where the lookup is performed.
654
+ if None (default), the lookup is performed on the instance specified in "using_key" parameter of the validator.
601
655
  if "public", the lookup is performed on the public reference.
602
656
  """
603
657
  return CurateLookup(
@@ -606,7 +660,7 @@ class MuDataCurator:
606
660
  **self._obs_fields,
607
661
  **{f"{k}_var_index": v for k, v in self._var_fields.items()},
608
662
  },
609
- using=using or self._using,
663
+ using_key=using_key or self._using_key,
610
664
  )
611
665
 
612
666
  def add_new_from_columns(
@@ -625,14 +679,15 @@ class MuDataCurator:
625
679
  **kwargs: Additional keyword arguments to pass to the registry model.
626
680
  """
627
681
  self._kwargs.update({"organism": organism} if organism else {})
682
+ values = column_names or self._mdata[modality].obs.columns
628
683
  update_registry(
629
- values=column_names or self._mdata[modality].obs.columns,
684
+ values=list(values),
630
685
  field=Feature.name,
631
686
  key=f"{modality} obs columns",
632
- using=self._using,
687
+ using_key=self._using_key,
633
688
  validated_only=False,
634
689
  df=self._mdata[modality].obs,
635
- **self._kwargs,
690
+ **self._kwargs, # type: ignore
636
691
  **kwargs,
637
692
  )
638
693
 
@@ -651,6 +706,18 @@ class MuDataCurator:
651
706
  modality=modality, validated_only=False, **self._kwargs, **kwargs
652
707
  )
653
708
 
709
+ def add_validated_from_var_index(self, modality: str, organism: str | None = None):
710
+ """Add validated variable records.
711
+
712
+ Args:
713
+ modality: The modality name.
714
+ organism: The organism name.
715
+ """
716
+ self._kwargs.update({"organism": organism} if organism else {})
717
+ self._save_from_var_index_modality(
718
+ modality=modality, validated_only=True, **self._kwargs
719
+ )
720
+
654
721
  def add_validated_from(
655
722
  self, key: str, modality: str | None = None, organism: str | None = None
656
723
  ):
@@ -693,28 +760,48 @@ class MuDataCurator:
693
760
  def validate(self, organism: str | None = None) -> bool:
694
761
  """Validate categories."""
695
762
  self._kwargs.update({"organism": organism} if organism else {})
696
- if self._using is not None and self._using != "default":
763
+ if self._using_key is not None and self._using_key != "default":
697
764
  logger.important(
698
- f"validating metadata using registries of instance {colors.italic(self._using)}"
765
+ f"validating metadata using registries of instance {colors.italic(self._using_key)}"
699
766
  )
700
767
  validated_var = True
768
+ non_validated_var_modality = {}
701
769
  for modality, var_field in self._var_fields.items():
702
- validated_var &= validate_categories(
770
+ is_validated_var, non_validated_var = validate_categories(
703
771
  self._mdata[modality].var.index,
704
772
  field=var_field,
705
773
  key=f"{modality}_var_index",
706
- using=self._using,
707
- **self._kwargs,
774
+ using_key=self._using_key,
775
+ exclude=self._exclude.get(f"{modality}_var_index"),
776
+ **self._kwargs, # type: ignore
708
777
  )
778
+ validated_var &= is_validated_var
779
+ if len(non_validated_var) > 0:
780
+ non_validated_var_modality[modality] = non_validated_var
781
+
709
782
  validated_obs = True
783
+ non_validated_obs_modality = {}
710
784
  for modality, fields in self._obs_fields.items():
711
785
  if modality == "obs":
712
786
  obs = self._mdata.obs
713
787
  else:
714
788
  obs = self._mdata[modality].obs
715
- validated_obs &= validate_categories_in_df(
716
- obs, fields=fields, using=self._using, **self._kwargs
789
+ is_validated_obs, non_validated_obs = validate_categories_in_df(
790
+ obs,
791
+ fields=fields,
792
+ using_key=self._using_key,
793
+ sources=self._sources.get(modality),
794
+ exclude=self._exclude.get(modality),
795
+ **self._kwargs,
717
796
  )
797
+ validated_obs &= is_validated_obs
798
+ non_validated_obs_modality[modality] = non_validated_obs
799
+ if modality in non_validated_var_modality:
800
+ non_validated_obs_modality[modality]["var_index"] = (
801
+ non_validated_var_modality[modality]
802
+ )
803
+ if len(non_validated_obs_modality[modality]) > 0:
804
+ self._non_validated = non_validated_obs_modality[modality]
718
805
  self._validated = validated_var and validated_obs
719
806
  return self._validated
720
807
 
@@ -743,7 +830,32 @@ class MuDataCurator:
743
830
 
744
831
 
745
832
  class Curate:
746
- """Annotation flow."""
833
+ """Curation flow.
834
+
835
+ Data curation entails accurately labeling datasets with standardized metadata
836
+ to facilitate data integration, interpretation and analysis.
837
+
838
+ The curation flow has several steps:
839
+
840
+ 1. Create a :class:`Curate` object corresponding to the object type that you want to curate:
841
+
842
+ - :meth:`~lamindb.Curate.from_df`
843
+ - :meth:`~lamindb.Curate.from_anndata`
844
+ - :meth:`~lamindb.Curate.from_mudata`
845
+
846
+ During object creation, any passed categoricals found in the object will be saved.
847
+
848
+ 2. Run :meth:`~lamindb.core.DataFrameCurator.validate` to check the data against the defined criteria. This method identifies:
849
+
850
+ - Values that can successfully validated and already exist in the registry.
851
+ - Values which are new and not yet validated or potentially problematic values.
852
+
853
+ 3. Determine how to handle validated and unvalidated values:
854
+
855
+ - Validated values not yet in the registry can be automatically registered using :meth:`~lamindb.core.DataFrameCurator.add_validated_from`.
856
+ - Valid and new values can be registered using :meth:`~lamindb.core.DataFrameCurator.add_new_from`.
857
+ - All unvalidated values can be accessed using :meth:`~lamindb.core.DataFrameCurator.non_validated` and subsequently removed from the object at hand.
858
+ """
747
859
 
748
860
  @classmethod
749
861
  @doc_args(DataFrameCurator.__doc__)
@@ -752,7 +864,7 @@ class Curate:
752
864
  df: pd.DataFrame,
753
865
  categoricals: dict[str, FieldAttr] | None = None,
754
866
  columns: FieldAttr = Feature.name,
755
- using: str | None = None,
867
+ using_key: str | None = None,
756
868
  verbosity: str = "hint",
757
869
  organism: str | None = None,
758
870
  ) -> DataFrameCurator:
@@ -761,7 +873,7 @@ class Curate:
761
873
  df=df,
762
874
  categoricals=categoricals,
763
875
  columns=columns,
764
- using=using,
876
+ using_key=using_key,
765
877
  verbosity=verbosity,
766
878
  organism=organism,
767
879
  )
@@ -773,18 +885,22 @@ class Curate:
773
885
  data: ad.AnnData | UPathStr,
774
886
  var_index: FieldAttr,
775
887
  categoricals: dict[str, FieldAttr] | None = None,
776
- using: str = "default",
888
+ obs_columns: FieldAttr = Feature.name,
889
+ using_key: str = "default",
777
890
  verbosity: str = "hint",
778
891
  organism: str | None = None,
892
+ sources: dict[str, Record] | None = None,
779
893
  ) -> AnnDataCurator:
780
894
  """{}""" # noqa: D415
781
895
  return AnnDataCurator(
782
896
  data=data,
783
897
  var_index=var_index,
784
898
  categoricals=categoricals,
785
- using=using,
899
+ obs_columns=obs_columns,
900
+ using_key=using_key,
786
901
  verbosity=verbosity,
787
902
  organism=organism,
903
+ sources=sources,
788
904
  )
789
905
 
790
906
  @classmethod
@@ -794,7 +910,7 @@ class Curate:
794
910
  mdata: MuData,
795
911
  var_index: dict[str, dict[str, FieldAttr]],
796
912
  categoricals: dict[str, FieldAttr] | None = None,
797
- using: str = "default",
913
+ using_key: str = "default",
798
914
  verbosity: str = "hint",
799
915
  organism: str | None = None,
800
916
  ) -> MuDataCurator:
@@ -803,29 +919,68 @@ class Curate:
803
919
  mdata=mdata,
804
920
  var_index=var_index,
805
921
  categoricals=categoricals,
806
- using=using,
922
+ using_key=using_key,
807
923
  verbosity=verbosity,
808
924
  organism=organism,
809
925
  )
810
926
 
811
927
 
812
- def get_registry_instance(registry: Record, using: str | None = None) -> Record:
928
+ def get_registry_instance(registry: Record, using_key: str | None = None) -> Record:
813
929
  """Get a registry instance using a specific instance."""
814
- if using is not None and using != "default":
815
- return registry.using(using)
930
+ if using_key is not None and using_key != "default":
931
+ return registry.using(using_key)
816
932
  return registry
817
933
 
818
934
 
935
+ def get_current_filter_kwargs(registry: type[Record], kwargs: dict) -> dict:
936
+ """Make sure the source and organism are saved in the same database as the registry."""
937
+ from lamindb.core._settings import settings
938
+
939
+ db = registry.filter().db
940
+ source = kwargs.get("source")
941
+ organism = kwargs.get("organism")
942
+ filter_kwargs = kwargs.copy()
943
+ try:
944
+ verbosity = settings.verbosity
945
+ settings.verbosity = "error"
946
+ if isinstance(organism, Record) and organism._state.db != "default":
947
+ if db is None or db == "default":
948
+ organism_default = copy.copy(organism)
949
+ # save the organism record in the default database
950
+ organism_default.save()
951
+ filter_kwargs["organism"] = organism_default
952
+ if isinstance(source, Record) and source._state.db != "default":
953
+ if db is None or db == "default":
954
+ source_default = copy.copy(source)
955
+ # save the source record in the default database
956
+ source_default.save()
957
+ filter_kwargs["source"] = source_default
958
+ finally:
959
+ settings.verbosity = verbosity
960
+ return filter_kwargs
961
+
962
+
819
963
  def standardize_and_inspect(
820
- values: Iterable[str], field: FieldAttr, registry: Record, **kwargs
964
+ values: Iterable[str],
965
+ field: FieldAttr,
966
+ registry: type[Record],
967
+ standardize: bool = False,
968
+ **kwargs,
821
969
  ):
822
970
  """Standardize and inspect values using a registry."""
823
- if hasattr(registry, "standardize") and hasattr(
824
- registry,
825
- "synonyms", # https://github.com/laminlabs/lamindb/issues/1685
826
- ):
827
- values = registry.standardize(values, field=field, mute=True, **kwargs)
828
- return registry.inspect(values, field=field, mute=True, **kwargs)
971
+ filter_kwargs = get_current_filter_kwargs(registry, kwargs)
972
+
973
+ if standardize:
974
+ if hasattr(registry, "standardize") and hasattr(
975
+ registry,
976
+ "synonyms", # https://github.com/laminlabs/lamindb/issues/1685
977
+ ):
978
+ standardized_values = registry.standardize(
979
+ values, field=field, mute=True, **filter_kwargs
980
+ )
981
+ values = standardized_values
982
+
983
+ return registry.inspect(values, field=field, mute=True, **filter_kwargs)
829
984
 
830
985
 
831
986
  def check_registry_organism(registry: Record, organism: str | None = None) -> dict:
@@ -846,10 +1001,26 @@ def validate_categories(
846
1001
  values: Iterable[str],
847
1002
  field: FieldAttr,
848
1003
  key: str,
849
- using: str | None = None,
1004
+ using_key: str | None = None,
850
1005
  organism: str | None = None,
851
- ) -> bool:
852
- """Validate ontology terms in a pandas series using LaminDB registries."""
1006
+ source: Record | None = None,
1007
+ exclude: str | list | None = None,
1008
+ standardize: bool = True,
1009
+ validated_hint_print: str | None = None,
1010
+ ) -> tuple[bool, list]:
1011
+ """Validate ontology terms in a pandas series using LaminDB registries.
1012
+
1013
+ Args:
1014
+ values: The values to validate.
1015
+ field: The field attribute.
1016
+ key: The key referencing the slot in the DataFrame.
1017
+ using_key: A reference LaminDB instance.
1018
+ organism: The organism name.
1019
+ source: The source record.
1020
+ exclude: Exclude specific values.
1021
+ standardize: Standardize the values.
1022
+ validated_hint_print: The hint to print for validated values.
1023
+ """
853
1024
  from lamindb._from_values import _print_values
854
1025
  from lamindb.core._settings import settings
855
1026
 
@@ -861,42 +1032,60 @@ def validate_categories(
861
1032
  logger.indent = " "
862
1033
 
863
1034
  registry = field.field.model
864
- filter_kwargs = check_registry_organism(registry, organism)
1035
+ kwargs = check_registry_organism(registry, organism)
1036
+ kwargs.update({"source": source} if source else {})
1037
+
1038
+ # inspect the default instance
1039
+ if exclude is not None:
1040
+ exclude = [exclude] if isinstance(exclude, str) else exclude
1041
+ # exclude values are validated without source and organism
1042
+ inspect_result = registry.inspect(exclude, field=field, mute=True)
1043
+ # if exclude values are validated, remove them from the values
1044
+ values = [i for i in values if i not in inspect_result.validated]
865
1045
 
866
- # Inspect the default instance
867
1046
  inspect_result = standardize_and_inspect(
868
- values=values, field=field, registry=registry, **filter_kwargs
1047
+ values=values,
1048
+ field=field,
1049
+ registry=registry,
1050
+ standardize=standardize,
1051
+ **kwargs,
869
1052
  )
870
1053
  non_validated = inspect_result.non_validated
871
1054
 
872
1055
  values_validated = []
873
- if using is not None and using != "default" and non_validated:
874
- registry = get_registry_instance(registry, using)
875
- # Inspect the using instance
1056
+ if using_key is not None and using_key != "default" and non_validated:
1057
+ registry_using = get_registry_instance(registry, using_key)
1058
+ # inspect the using instance
876
1059
  inspect_result = standardize_and_inspect(
877
- values=non_validated, field=field, registry=registry, **filter_kwargs
1060
+ values=non_validated,
1061
+ field=field,
1062
+ registry=registry_using,
1063
+ standardize=standardize,
1064
+ **kwargs,
878
1065
  )
879
1066
  non_validated = inspect_result.non_validated
880
1067
  values_validated += inspect_result.validated
881
1068
 
882
- # Inspect from public (bionty only)
1069
+ # inspect from public (bionty only)
883
1070
  if hasattr(registry, "public"):
884
1071
  verbosity = settings.verbosity
885
1072
  try:
886
1073
  settings.verbosity = "error"
887
1074
  public_records = registry.from_values(
888
- non_validated, field=field, **filter_kwargs
1075
+ non_validated,
1076
+ field=field,
1077
+ **get_current_filter_kwargs(registry, kwargs),
889
1078
  )
890
1079
  values_validated += [getattr(r, field.field.name) for r in public_records]
891
1080
  finally:
892
1081
  settings.verbosity = verbosity
893
1082
 
894
- validated_hint_print = f".add_validated_from('{key}')"
1083
+ validated_hint_print = validated_hint_print or f".add_validated_from('{key}')"
895
1084
  n_validated = len(values_validated)
896
1085
  if n_validated > 0:
897
1086
  _log_mapping_info()
898
1087
  logger.warning(
899
- f"found {colors.yellow(f'{n_validated} terms')} validated terms: "
1088
+ f"found {colors.yellow(n_validated)} validated terms: "
900
1089
  f"{colors.yellow(values_validated)}\n → save terms via "
901
1090
  f"{colors.yellow(validated_hint_print)}"
902
1091
  )
@@ -907,39 +1096,49 @@ def validate_categories(
907
1096
  if n_non_validated == 0:
908
1097
  logger.indent = ""
909
1098
  logger.success(f"{key} is validated against {colors.italic(model_field)}")
910
- return True
1099
+ return True, []
911
1100
  else:
912
1101
  are = "are" if n_non_validated > 1 else "is"
913
1102
  print_values = _print_values(non_validated)
914
1103
  warning_message = (
915
- f"{colors.yellow(f'{n_non_validated} terms')} {are} not validated: "
916
- f"{colors.yellow(print_values)}\n → save terms via "
917
- f"{colors.yellow(non_validated_hint_print)}"
1104
+ f"{colors.red(f'{n_non_validated} terms')} {are} not validated: "
1105
+ f"{colors.red(print_values)}\n → save terms via "
1106
+ f"{colors.red(non_validated_hint_print)}"
918
1107
  )
919
1108
  if logger.indent == "":
920
1109
  _log_mapping_info()
921
1110
  logger.warning(warning_message)
922
1111
  logger.indent = ""
923
- return False
1112
+ return False, non_validated
924
1113
 
925
1114
 
926
1115
  def validate_categories_in_df(
927
1116
  df: pd.DataFrame,
928
1117
  fields: dict[str, FieldAttr],
929
- using: str | None = None,
1118
+ using_key: str | None = None,
1119
+ sources: dict[str, Record] = None,
1120
+ exclude: dict | None = None,
930
1121
  **kwargs,
931
- ) -> bool:
1122
+ ) -> tuple[bool, dict]:
932
1123
  """Validate categories in DataFrame columns using LaminDB registries."""
1124
+ if sources is None:
1125
+ sources = {}
933
1126
  validated = True
1127
+ non_validated = {}
934
1128
  for key, field in fields.items():
935
- validated &= validate_categories(
1129
+ is_val, non_val = validate_categories(
936
1130
  df[key],
937
1131
  field=field,
938
1132
  key=key,
939
- using=using,
1133
+ using_key=using_key,
1134
+ source=sources.get(key),
1135
+ exclude=exclude.get(key) if exclude else None,
940
1136
  **kwargs,
941
1137
  )
942
- return validated
1138
+ validated &= is_val
1139
+ if len(non_val) > 0:
1140
+ non_validated[key] = non_val
1141
+ return validated, non_validated
943
1142
 
944
1143
 
945
1144
  def save_artifact(
@@ -998,13 +1197,13 @@ def save_artifact(
998
1197
  organism,
999
1198
  )
1000
1199
 
1001
- if artifact.accessor == "DataFrame":
1200
+ if artifact._accessor == "DataFrame":
1002
1201
  artifact.features._add_set_from_df(field=columns_field, **feature_kwargs)
1003
- elif artifact.accessor == "AnnData":
1202
+ elif artifact._accessor == "AnnData":
1004
1203
  artifact.features._add_set_from_anndata(
1005
1204
  var_field=columns_field, **feature_kwargs
1006
1205
  )
1007
- elif artifact.accessor == "MuData":
1206
+ elif artifact._accessor == "MuData":
1008
1207
  artifact.features._add_set_from_mudata(
1009
1208
  var_fields=columns_field, **feature_kwargs
1010
1209
  )
@@ -1017,11 +1216,16 @@ def save_artifact(
1017
1216
  feature = features.get(key)
1018
1217
  registry = field.field.model
1019
1218
  filter_kwargs = check_registry_organism(registry, organism)
1219
+ filter_kwargs_current = get_current_filter_kwargs(registry, filter_kwargs)
1020
1220
  df = data if isinstance(data, pd.DataFrame) else data.obs
1021
- labels = registry.from_values(df[key], field=field, **filter_kwargs)
1221
+ labels = registry.from_values(
1222
+ df[key],
1223
+ field=field,
1224
+ **filter_kwargs_current,
1225
+ )
1022
1226
  artifact.labels.add(labels, feature)
1023
1227
 
1024
- if artifact.accessor == "MuData":
1228
+ if artifact._accessor == "MuData":
1025
1229
  for modality, modality_fields in fields.items():
1026
1230
  if modality == "obs":
1027
1231
  _add_labels(data, artifact, modality_fields)
@@ -1041,25 +1245,29 @@ def update_registry(
1041
1245
  field: FieldAttr,
1042
1246
  key: str,
1043
1247
  save_function: str = "add_new_from",
1044
- using: str | None = None,
1248
+ using_key: str | None = None,
1045
1249
  validated_only: bool = True,
1046
1250
  df: pd.DataFrame | None = None,
1047
1251
  organism: str | None = None,
1048
1252
  dtype: str | None = None,
1253
+ source: Record | None = None,
1254
+ standardize: bool = True,
1255
+ warning: bool = True,
1049
1256
  **kwargs,
1050
- ) -> list[Record]:
1051
- """Save features or labels records in the default instance from the using instance.
1257
+ ) -> None:
1258
+ """Save features or labels records in the default instance from the using_key instance.
1052
1259
 
1053
1260
  Args:
1054
1261
  values: A list of values to be saved as labels.
1055
1262
  field: The FieldAttr object representing the field for which labels are being saved.
1056
1263
  key: The name of the feature to save.
1057
1264
  save_function: The name of the function to save the labels.
1058
- using: The name of the instance from which to transfer labels (if applicable).
1265
+ using_key: The name of the instance from which to transfer labels (if applicable).
1059
1266
  validated_only: If True, only save validated labels.
1060
1267
  df: A DataFrame to save labels from.
1061
1268
  organism: The organism name.
1062
1269
  dtype: The type of the feature.
1270
+ source: The source record.
1063
1271
  kwargs: Additional keyword arguments to pass to the registry model to create new records.
1064
1272
  """
1065
1273
  from lamindb._save import save as ln_save
@@ -1067,51 +1275,79 @@ def update_registry(
1067
1275
 
1068
1276
  registry = field.field.model
1069
1277
  filter_kwargs = check_registry_organism(registry, organism)
1278
+ filter_kwargs.update({"source": source} if source else {})
1070
1279
 
1071
1280
  verbosity = settings.verbosity
1072
1281
  try:
1073
1282
  settings.verbosity = "error"
1283
+
1284
+ # save from public
1285
+ filter_kwargs_current = get_current_filter_kwargs(registry, filter_kwargs)
1286
+ existing_and_public_records = (
1287
+ registry.from_values(
1288
+ list(values),
1289
+ field=field,
1290
+ **filter_kwargs_current,
1291
+ )
1292
+ if values
1293
+ else []
1294
+ )
1295
+
1296
+ labels_saved: dict = {"from public": [], "without reference": []}
1297
+
1298
+ public_records = [r for r in existing_and_public_records if r._state.adding]
1299
+ # here we check to only save the public records if they are from the specified source
1300
+ # we check the uid because r.source and soruce can be from different instances
1301
+ if source:
1302
+ public_records = [r for r in public_records if r.source.uid == source.uid]
1303
+ ln_save(public_records)
1304
+ labels_saved["from public"] = [
1305
+ getattr(r, field.field.name) for r in public_records
1306
+ ]
1307
+ non_public_labels = [i for i in values if i not in labels_saved["from public"]]
1308
+
1309
+ # inspect the default instance
1074
1310
  inspect_result_current = standardize_and_inspect(
1075
- values=values, field=field, registry=registry, **filter_kwargs
1311
+ values=non_public_labels,
1312
+ field=field,
1313
+ registry=registry,
1314
+ standardize=standardize,
1315
+ **filter_kwargs,
1076
1316
  )
1077
1317
  if not inspect_result_current.non_validated:
1078
1318
  all_labels = registry.from_values(
1079
- inspect_result_current.validated, field=field, **filter_kwargs
1319
+ inspect_result_current.validated,
1320
+ field=field,
1321
+ **filter_kwargs_current,
1080
1322
  )
1081
1323
  settings.verbosity = verbosity
1082
1324
  return all_labels
1083
1325
 
1084
- labels_saved: dict = {"from public": [], "without reference": []}
1085
-
1326
+ # inspect the using_key instance
1086
1327
  (
1087
- labels_saved[f"from {using}"],
1328
+ labels_saved[f"from {using_key}"],
1088
1329
  non_validated_labels,
1089
1330
  ) = update_registry_from_using_instance(
1090
1331
  inspect_result_current.non_validated,
1091
1332
  field=field,
1092
- using=using,
1333
+ using_key=using_key,
1093
1334
  **filter_kwargs,
1094
1335
  )
1095
1336
 
1096
- public_records = (
1097
- registry.from_values(non_validated_labels, field=field, **filter_kwargs)
1098
- if non_validated_labels
1099
- else []
1100
- )
1101
- ln_save(public_records)
1102
- labels_saved["from public"] = [
1103
- getattr(r, field.field.name) for r in public_records
1104
- ]
1105
1337
  labels_saved["without reference"] = [
1106
- i for i in non_validated_labels if i not in labels_saved["from public"]
1338
+ i
1339
+ for i in non_validated_labels
1340
+ if i not in labels_saved[f"from {using_key}"]
1107
1341
  ]
1108
1342
 
1343
+ # save non-validated records
1109
1344
  if not validated_only:
1110
1345
  non_validated_records = []
1111
1346
  if df is not None and registry == Feature:
1112
1347
  non_validated_records = Feature.from_df(df)
1113
1348
  else:
1114
1349
  if "organism" in filter_kwargs:
1350
+ # make sure organism record is saved to the current instance
1115
1351
  filter_kwargs["organism"] = _save_organism(name=organism)
1116
1352
  init_kwargs = {}
1117
1353
  for value in labels_saved["without reference"]:
@@ -1119,19 +1355,24 @@ def update_registry(
1119
1355
  if registry == Feature:
1120
1356
  init_kwargs["dtype"] = "cat" if dtype is None else dtype
1121
1357
  non_validated_records.append(
1122
- registry(**init_kwargs, **filter_kwargs, **kwargs)
1358
+ registry(
1359
+ **init_kwargs,
1360
+ **{k: v for k, v in filter_kwargs.items() if k != "source"},
1361
+ **{k: v for k, v in kwargs.items() if k != "sources"},
1362
+ )
1123
1363
  )
1124
1364
  ln_save(non_validated_records)
1125
1365
 
1366
+ # save parent labels for ulabels
1126
1367
  if registry == ULabel and field.field.name == "name":
1127
1368
  save_ulabels_with_parent(values, field=field, key=key)
1128
1369
 
1129
- # get all records
1130
- all_labels = registry.from_values(
1131
- inspect_result_current.validated + inspect_result_current.non_validated,
1132
- field=field,
1133
- **filter_kwargs,
1134
- )
1370
+ # # get all records that are now validated in the current instance
1371
+ # all_labels = registry.from_values(
1372
+ # inspect_result_current.validated + inspect_result_current.non_validated,
1373
+ # field=field,
1374
+ # **get_current_filter_kwargs(registry, filter_kwargs),
1375
+ # )
1135
1376
  finally:
1136
1377
  settings.verbosity = verbosity
1137
1378
 
@@ -1141,9 +1382,10 @@ def update_registry(
1141
1382
  save_function=save_function,
1142
1383
  model_field=f"{registry.__name__}.{field.field.name}",
1143
1384
  validated_only=validated_only,
1385
+ warning=warning,
1144
1386
  )
1145
1387
 
1146
- return all_labels
1388
+ # return all_labels
1147
1389
 
1148
1390
 
1149
1391
  def log_saved_labels(
@@ -1152,6 +1394,7 @@ def log_saved_labels(
1152
1394
  save_function: str,
1153
1395
  model_field: str,
1154
1396
  validated_only: bool = True,
1397
+ warning: bool = True,
1155
1398
  ) -> None:
1156
1399
  """Log the saved labels."""
1157
1400
  from ._from_values import _print_values
@@ -1176,7 +1419,10 @@ def log_saved_labels(
1176
1419
  if save_function == "add_new_from"
1177
1420
  else f"\n → to save, run {colors.yellow(save_function)}"
1178
1421
  )
1179
- logger.warning(msg)
1422
+ if warning:
1423
+ logger.warning(msg)
1424
+ else:
1425
+ logger.info(msg)
1180
1426
  else:
1181
1427
  k = "" if k == "without reference" else f"{colors.green(k)} "
1182
1428
  # the term "transferred" stresses that this is always in the context of transferring
@@ -1191,7 +1437,7 @@ def save_ulabels_with_parent(values: list[str], field: FieldAttr, key: str) -> N
1191
1437
  """Save a parent label for the given labels."""
1192
1438
  registry = field.field.model
1193
1439
  assert registry == ULabel # noqa: S101
1194
- all_records = registry.from_values(values, field=field)
1440
+ all_records = registry.from_values(list(values), field=field)
1195
1441
  is_feature = registry.filter(name=f"is_{key}").one_or_none()
1196
1442
  if is_feature is None:
1197
1443
  is_feature = registry(name=f"is_{key}")
@@ -1202,15 +1448,16 @@ def save_ulabels_with_parent(values: list[str], field: FieldAttr, key: str) -> N
1202
1448
  def update_registry_from_using_instance(
1203
1449
  values: list[str],
1204
1450
  field: FieldAttr,
1205
- using: str | None = None,
1451
+ using_key: str | None = None,
1452
+ standardize: bool = False,
1206
1453
  **kwargs,
1207
1454
  ) -> tuple[list[str], list[str]]:
1208
- """Save features or labels records from the using instance.
1455
+ """Save features or labels records from the using_key instance.
1209
1456
 
1210
1457
  Args:
1211
1458
  values: A list of values to be saved as labels.
1212
1459
  field: The FieldAttr object representing the field for which labels are being saved.
1213
- using: The name of the instance from which to transfer labels (if applicable).
1460
+ using_key: The name of the instance from which to transfer labels (if applicable).
1214
1461
  kwargs: Additional keyword arguments to pass to the registry model.
1215
1462
 
1216
1463
  Returns:
@@ -1219,11 +1466,15 @@ def update_registry_from_using_instance(
1219
1466
  labels_saved = []
1220
1467
  not_saved = values
1221
1468
 
1222
- if using is not None and using != "default":
1223
- registry = field.field.model
1224
- registry_using = get_registry_instance(registry, using)
1469
+ if using_key is not None and using_key != "default":
1470
+ registry_using = get_registry_instance(field.field.model, using_key)
1471
+
1225
1472
  inspect_result_using = standardize_and_inspect(
1226
- values=values, field=field, registry=registry_using, **kwargs
1473
+ values=values,
1474
+ field=field,
1475
+ registry=registry_using,
1476
+ standardize=standardize,
1477
+ **kwargs,
1227
1478
  )
1228
1479
  labels_using = registry_using.filter(
1229
1480
  **{f"{field.field.name}__in": inspect_result_using.validated}
@@ -1242,7 +1493,7 @@ def _save_organism(name: str): # pragma: no cover
1242
1493
 
1243
1494
  organism = bt.Organism.filter(name=name).one_or_none()
1244
1495
  if organism is None:
1245
- organism = bt.Organism.from_public(name=name)
1496
+ organism = bt.Organism.from_source(name=name)
1246
1497
  if organism is None:
1247
1498
  raise ValueError(
1248
1499
  f"Organism '{name}' not found\n"