lamindb 0.76.13__py3-none-any.whl → 0.76.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/_curate.py CHANGED
@@ -34,21 +34,21 @@ class CurateLookup:
34
34
  categoricals: dict[str, FieldAttr],
35
35
  slots: dict[str, FieldAttr] = None,
36
36
  using_key: str | None = None,
37
+ public: bool = False,
37
38
  ) -> None:
38
39
  if slots is None:
39
40
  slots = {}
40
41
  self._fields = {**categoricals, **slots}
41
42
  self._using_key = None if using_key == "default" else using_key
42
43
  self._using_key_name = self._using_key or ln_setup.settings.instance.slug
43
- debug_message = (
44
- f"Lookup objects from the " f"{colors.italic(self._using_key_name)}"
45
- )
44
+ self._public = public
45
+ debug_message = f"Lookup objects from {colors.italic(self._using_key_name)}"
46
46
  logger.debug(debug_message)
47
47
 
48
48
  def __getattr__(self, name):
49
49
  if name in self._fields:
50
50
  registry = self._fields[name].field.model
51
- if self._using_key == "public":
51
+ if self._public and hasattr(registry, "public"):
52
52
  return registry.public().lookup()
53
53
  else:
54
54
  return get_registry_instance(registry, self._using_key).lookup()
@@ -59,7 +59,7 @@ class CurateLookup:
59
59
  def __getitem__(self, name):
60
60
  if name in self._fields:
61
61
  registry = self._fields[name].field.model
62
- if self._using_key == "public":
62
+ if self._public and hasattr(registry, "public"):
63
63
  return registry.public().lookup()
64
64
  else:
65
65
  return get_registry_instance(registry, self._using_key).lookup()
@@ -75,12 +75,14 @@ class CurateLookup:
75
75
  getitem_keys = "\n ".join(
76
76
  [str([key]) for key in self._fields if not key.isidentifier()]
77
77
  )
78
+ ref = "public" if self._public else self._using_key_name
78
79
  return (
79
- f"Lookup objects from the {colors.italic(self._using_key_name)}:\n "
80
+ f"Lookup objects from the {colors.italic(ref)}:\n "
80
81
  f"{colors.green(getattr_keys)}\n "
81
- f"{colors.green(getitem_keys)}\n\n"
82
- "Example:\n → categories = validator.lookup().cell_type\n"
83
- " → categories.alveolar_type_1_fibroblast_cell"
82
+ f"{colors.green(getitem_keys)}\n"
83
+ "Example:\n → categories = validator.lookup()['cell_type']\n"
84
+ " → categories.alveolar_type_1_fibroblast_cell\n\n"
85
+ "To look up public ontologies, use .lookup(public=True)"
84
86
  )
85
87
  else: # pragma: no cover
86
88
  return colors.warning("No fields are found!")
@@ -97,12 +99,20 @@ class BaseCurator:
97
99
  """
98
100
  pass
99
101
 
100
- def save_artifact(self, description: str | None = None, **kwargs) -> Artifact:
102
+ def save_artifact(
103
+ self,
104
+ description: str | None = None,
105
+ key: str | None = None,
106
+ revises: Artifact | None = None,
107
+ run: Run | None = None,
108
+ ) -> Artifact:
101
109
  """Save the dataset as artifact.
102
110
 
103
111
  Args:
104
- description: Description of the DataFrame object.
105
- **kwargs: Object level metadata.
112
+ description: `str | None = None` A description of the DataFrame object.
113
+ key: `str | None = None` A path-like key to reference artifact in default storage, e.g., `"myfolder/myfile.fcs"`. Artifacts with the same key form a revision family.
114
+ revises: `Artifact | None = None` Previous version of the artifact. Triggers a revision.
115
+ run: `Run | None = None` The run that creates the artifact.
106
116
 
107
117
  Returns:
108
118
  A saved artifact record.
@@ -182,7 +192,9 @@ class DataFrameCurator(BaseCurator):
182
192
  """Return the columns fields to validate against."""
183
193
  return self._fields
184
194
 
185
- def lookup(self, using_key: str | None = None) -> CurateLookup:
195
+ def lookup(
196
+ self, using_key: str | None = None, public: bool = False
197
+ ) -> CurateLookup:
186
198
  """Lookup categories.
187
199
 
188
200
  Args:
@@ -194,6 +206,7 @@ class DataFrameCurator(BaseCurator):
194
206
  categoricals=self._fields,
195
207
  slots={"columns": self._columns_field},
196
208
  using_key=using_key or self._using_key,
209
+ public=public,
197
210
  )
198
211
 
199
212
  def _check_valid_keys(self, extra: set = None) -> None:
@@ -245,16 +258,6 @@ class DataFrameCurator(BaseCurator):
245
258
  **kwargs,
246
259
  )
247
260
 
248
- def add_validated_from(self, key: str, organism: str | None = None):
249
- """Add validated categories.
250
-
251
- Args:
252
- key: The key referencing the slot in the DataFrame.
253
- organism: The organism name.
254
- """
255
- self._kwargs.update({"organism": organism} if organism else {})
256
- self._update_registry(key, validated_only=True, **self._kwargs)
257
-
258
261
  def add_new_from(self, key: str, organism: str | None = None, **kwargs):
259
262
  """Add validated & new categories.
260
263
 
@@ -300,7 +303,7 @@ class DataFrameCurator(BaseCurator):
300
303
  def _update_registry_all(self, validated_only: bool = True, **kwargs):
301
304
  """Save labels for all features."""
302
305
  for name in self.fields.keys():
303
- logger.info(f"saving labels for '{name}'")
306
+ logger.info(f"saving validated records of '{name}'")
304
307
  self._update_registry(name, validated_only=validated_only, **kwargs)
305
308
 
306
309
  def validate(self, organism: str | None = None) -> bool:
@@ -313,6 +316,10 @@ class DataFrameCurator(BaseCurator):
313
316
  Whether the DataFrame is validated.
314
317
  """
315
318
  self._kwargs.update({"organism": organism} if organism else {})
319
+
320
+ # add all validated records to the current instance
321
+ self._update_registry_all()
322
+
316
323
  self._validated, self._non_validated = validate_categories_in_df( # type: ignore
317
324
  self._df,
318
325
  fields=self.fields,
@@ -323,12 +330,20 @@ class DataFrameCurator(BaseCurator):
323
330
  )
324
331
  return self._validated
325
332
 
326
- def save_artifact(self, description: str | None = None, **kwargs) -> Artifact:
333
+ def save_artifact(
334
+ self,
335
+ description: str | None = None,
336
+ key: str | None = None,
337
+ revises: Artifact | None = None,
338
+ run: Run | None = None,
339
+ ) -> Artifact:
327
340
  """Save the validated DataFrame and metadata.
328
341
 
329
342
  Args:
330
- description: Description of the DataFrame object.
331
- **kwargs: Object level metadata.
343
+ description: `str | None = None` Description of the DataFrame object.
344
+ key: `str | None = None` A path-like key to reference artifact in default storage, e.g., `"myfolder/myfile.fcs"`. Artifacts with the same key form a revision family.
345
+ revises: `Artifact | None = None` Previous version of the artifact. Triggers a revision.
346
+ run: `Run | None = None` The run that creates the artifact.
332
347
 
333
348
  Returns:
334
349
  A saved artifact record.
@@ -344,15 +359,18 @@ class DataFrameCurator(BaseCurator):
344
359
  verbosity = settings.verbosity
345
360
  try:
346
361
  settings.verbosity = "warning"
347
- # save all validated records to the current instance
348
- self.add_validated_from("all")
362
+ if not self._validated:
363
+ # save all validated records to the current instance
364
+ self._update_registry_all()
349
365
 
350
366
  self._artifact = save_artifact(
351
367
  self._df,
352
368
  description=description,
353
369
  fields=self.fields,
354
370
  columns_field=self._columns_field,
355
- **kwargs,
371
+ key=key,
372
+ revises=revises,
373
+ run=run,
356
374
  **self._kwargs,
357
375
  )
358
376
  finally:
@@ -457,7 +475,9 @@ class AnnDataCurator(DataFrameCurator):
457
475
  """Return the obs fields to validate against."""
458
476
  return self._obs_fields
459
477
 
460
- def lookup(self, using_key: str | None = None) -> CurateLookup:
478
+ def lookup(
479
+ self, using_key: str | None = None, public: bool = False
480
+ ) -> CurateLookup:
461
481
  """Lookup categories.
462
482
 
463
483
  Args:
@@ -469,6 +489,7 @@ class AnnDataCurator(DataFrameCurator):
469
489
  categoricals=self._obs_fields,
470
490
  slots={"columns": self._columns_field, "var_index": self._var_field},
471
491
  using_key=using_key or self._using_key,
492
+ public=public,
472
493
  )
473
494
 
474
495
  def _save_from_var_index(
@@ -479,7 +500,7 @@ class AnnDataCurator(DataFrameCurator):
479
500
  values=list(self._adata.var.index),
480
501
  field=self.var_index,
481
502
  key="var_index",
482
- save_function="add_new_from_var_index",
503
+ save_function=".add_new_from_var_index()",
483
504
  using_key=self._using_key,
484
505
  validated_only=validated_only,
485
506
  organism=organism,
@@ -487,14 +508,13 @@ class AnnDataCurator(DataFrameCurator):
487
508
  exclude=self._exclude.get("var_index"),
488
509
  )
489
510
 
490
- def _update_registry_all(self, validated_only: bool = True, **kwargs):
511
+ def _update_registry_all(self):
491
512
  """Save labels for all features."""
492
- for name in self.fields.keys():
493
- logger.info(f"saving labels for '{name}'")
494
- if name == "var_index":
495
- self._save_from_var_index(validated_only=validated_only, **kwargs)
496
- else:
497
- self._update_registry(name, validated_only=validated_only, **kwargs)
513
+ logger.info("saving validated records of 'var_index'")
514
+ self._save_from_var_index(validated_only=True, **self._kwargs)
515
+ for name in self._obs_fields.keys():
516
+ logger.info(f"saving validated terms of '{name}'")
517
+ self._update_registry(name, validated_only=True, **self._kwargs)
498
518
 
499
519
  def add_new_from_var_index(self, organism: str | None = None, **kwargs):
500
520
  """Update variable records.
@@ -506,15 +526,6 @@ class AnnDataCurator(DataFrameCurator):
506
526
  self._kwargs.update({"organism": organism} if organism else {})
507
527
  self._save_from_var_index(validated_only=False, **self._kwargs, **kwargs)
508
528
 
509
- def add_validated_from_var_index(self, organism: str | None = None):
510
- """Add validated variable records.
511
-
512
- Args:
513
- organism: The organism name.
514
- """
515
- self._kwargs.update({"organism": organism} if organism else {})
516
- self._save_from_var_index(validated_only=True, **self._kwargs)
517
-
518
529
  def validate(self, organism: str | None = None) -> bool:
519
530
  """Validate categories.
520
531
 
@@ -530,6 +541,9 @@ class AnnDataCurator(DataFrameCurator):
530
541
  f"validating metadata using registries of instance {colors.italic(self._using_key)}"
531
542
  )
532
543
 
544
+ # add all validated records to the current instance
545
+ self._update_registry_all()
546
+
533
547
  validated_var, non_validated_var = validate_categories(
534
548
  self._adata.var.index,
535
549
  field=self._var_field,
@@ -554,30 +568,49 @@ class AnnDataCurator(DataFrameCurator):
554
568
  self._validated = validated_var and validated_obs
555
569
  return self._validated
556
570
 
557
- def save_artifact(self, description: str | None = None, **kwargs) -> Artifact:
571
+ def save_artifact(
572
+ self,
573
+ description: str | None = None,
574
+ key: str | None = None,
575
+ revises: Artifact | None = None,
576
+ run: Run | None = None,
577
+ ) -> Artifact:
558
578
  """Save the validated ``AnnData`` and metadata.
559
579
 
560
580
  Args:
561
- description: Description of the ``AnnData`` object.
562
- **kwargs: Object level metadata.
581
+ description: `str | None = None` A description of the ``AnnData`` object.
582
+ key: `str | None = None` A path-like key to reference artifact in default storage, e.g., `"myfolder/myfile.fcs"`. Artifacts with the same key form a revision family.
583
+ revises: `Artifact | None = None` Previous version of the artifact. Triggers a revision.
584
+ run: `Run | None = None` The run that creates the artifact.
563
585
 
564
586
  Returns:
565
587
  A saved artifact record.
566
588
  """
589
+ from lamindb.core._settings import settings
590
+
567
591
  if not self._validated:
568
592
  self.validate()
569
593
  if not self._validated:
570
594
  raise ValidationError("Dataset does not validate. Please curate.")
571
-
572
- self._artifact = save_artifact(
573
- self._data,
574
- adata=self._adata,
575
- description=description,
576
- columns_field=self.var_index,
577
- fields=self.categoricals,
578
- **self._kwargs,
579
- **kwargs,
580
- )
595
+ verbosity = settings.verbosity
596
+ try:
597
+ settings.verbosity = "warning"
598
+ if not self._validated:
599
+ # save all validated records to the current instance
600
+ self._update_registry_all()
601
+ self._artifact = save_artifact(
602
+ self._data,
603
+ adata=self._adata,
604
+ description=description,
605
+ columns_field=self.var_index,
606
+ fields=self.categoricals,
607
+ key=key,
608
+ revises=revises,
609
+ run=run,
610
+ **self._kwargs,
611
+ )
612
+ finally:
613
+ settings.verbosity = verbosity
581
614
  return self._artifact
582
615
 
583
616
 
@@ -656,10 +689,6 @@ class MuDataCurator:
656
689
  )
657
690
  for modality in self._modalities
658
691
  }
659
- for modality in self._var_fields.keys():
660
- self._save_from_var_index_modality(
661
- modality=modality, validated_only=True, **self._kwargs
662
- )
663
692
 
664
693
  @property
665
694
  def var_index(self) -> FieldAttr:
@@ -685,7 +714,7 @@ class MuDataCurator:
685
714
  values=list(self._mdata[modality].var.index),
686
715
  field=self._var_fields[modality],
687
716
  key="var_index",
688
- save_function="add_new_from_var_index",
717
+ save_function=f'.add_new_from_var_index("{modality}")',
689
718
  using_key=self._using_key,
690
719
  validated_only=validated_only,
691
720
  dtype="number",
@@ -712,7 +741,9 @@ class MuDataCurator:
712
741
  obs_fields["obs"][k] = v
713
742
  return obs_fields
714
743
 
715
- def lookup(self, using_key: str | None = None) -> CurateLookup:
744
+ def lookup(
745
+ self, using_key: str | None = None, public: bool = False
746
+ ) -> CurateLookup:
716
747
  """Lookup categories.
717
748
 
718
749
  Args:
@@ -727,6 +758,7 @@ class MuDataCurator:
727
758
  **{f"{k}_var_index": v for k, v in self._var_fields.items()},
728
759
  },
729
760
  using_key=using_key or self._using_key,
761
+ public=public,
730
762
  )
731
763
 
732
764
  def add_new_from_columns(
@@ -774,33 +806,14 @@ class MuDataCurator:
774
806
  modality=modality, validated_only=False, **self._kwargs, **kwargs
775
807
  )
776
808
 
777
- def add_validated_from_var_index(self, modality: str, organism: str | None = None):
778
- """Add validated variable records.
779
-
780
- Args:
781
- modality: The modality name.
782
- organism: The organism name.
783
- """
784
- self._kwargs.update({"organism": organism} if organism else {})
785
- self._save_from_var_index_modality(
786
- modality=modality, validated_only=True, **self._kwargs
787
- )
788
-
789
- def add_validated_from(
790
- self, key: str, modality: str | None = None, organism: str | None = None
791
- ):
792
- """Add validated categories.
793
-
794
- Args:
795
- key: The key referencing the slot in the DataFrame.
796
- modality: The modality name.
797
- organism: The organism name.
798
- """
799
- self._kwargs.update({"organism": organism} if organism else {})
800
- modality = modality or "obs"
801
- if modality in self._df_annotators:
802
- df_annotator = self._df_annotators[modality]
803
- df_annotator.add_validated_from(key=key, **self._kwargs)
809
+ def _update_registry_all(self):
810
+ """Update all registries."""
811
+ for modality in self._var_fields.keys():
812
+ self._save_from_var_index_modality(
813
+ modality=modality, validated_only=True, **self._kwargs
814
+ )
815
+ for _, df_annotator in self._df_annotators.items():
816
+ df_annotator._update_registry_all(validated_only=True, **self._kwargs)
804
817
 
805
818
  def add_new_from(
806
819
  self,
@@ -827,11 +840,22 @@ class MuDataCurator:
827
840
 
828
841
  def validate(self, organism: str | None = None) -> bool:
829
842
  """Validate categories."""
843
+ from lamindb.core._settings import settings
844
+
830
845
  self._kwargs.update({"organism": organism} if organism else {})
831
846
  if self._using_key is not None and self._using_key != "default":
832
847
  logger.important(
833
848
  f"validating metadata using registries of instance {colors.italic(self._using_key)}"
834
849
  )
850
+
851
+ # add all validated records to the current instance
852
+ verbosity = settings.verbosity
853
+ try:
854
+ settings.verbosity = "error"
855
+ self._update_registry_all()
856
+ finally:
857
+ settings.verbosity = verbosity
858
+
835
859
  validated_var = True
836
860
  non_validated_var_modality = {}
837
861
  for modality, var_field in self._var_fields.items():
@@ -842,6 +866,7 @@ class MuDataCurator:
842
866
  using_key=self._using_key,
843
867
  source=self._sources.get(modality, {}).get("var_index"),
844
868
  exclude=self._exclude.get(modality, {}).get("var_index"),
869
+ validated_hint_print=f'.add_validated_from_var_index("{modality}")',
845
870
  **self._kwargs, # type: ignore
846
871
  )
847
872
  validated_var &= is_validated_var
@@ -874,56 +899,75 @@ class MuDataCurator:
874
899
  self._validated = validated_var and validated_obs
875
900
  return self._validated
876
901
 
877
- def save_artifact(self, description: str | None = None, **kwargs) -> Artifact:
902
+ def save_artifact(
903
+ self,
904
+ description: str | None = None,
905
+ key: str | None = None,
906
+ revises: Artifact | None = None,
907
+ run: Run | None = None,
908
+ ) -> Artifact:
878
909
  """Save the validated ``MuData`` and metadata.
879
910
 
880
911
  Args:
881
- description: Description of the ``MuData`` object.
882
- **kwargs: Object level metadata.
912
+ description: `str | None = None` A description of the ``MuData`` object.
913
+ key: `str | None = None` A path-like key to reference artifact in default storage, e.g., `"myfolder/myfile.fcs"`. Artifacts with the same key form a revision family.
914
+ revises: `Artifact | None = None` Previous version of the artifact. Triggers a revision.
915
+ run: `Run | None = None` The run that creates the artifact.
883
916
 
884
917
  Returns:
885
918
  A saved artifact record.
886
919
  """
920
+ from lamindb.core._settings import settings
921
+
887
922
  if not self._validated:
888
- raise ValidationError("Please run `validate()` first!")
923
+ self.validate()
924
+ if not self._validated:
925
+ raise ValidationError("Dataset does not validate. Please curate.")
926
+ verbosity = settings.verbosity
927
+ try:
928
+ settings.verbosity = "warning"
929
+ if not self._validated:
930
+ # save all validated records to the current instance
931
+ self._update_registry_all()
889
932
 
890
- self._artifact = save_artifact(
891
- self._mdata,
892
- description=description,
893
- columns_field=self.var_index,
894
- fields=self.categoricals,
895
- **self._kwargs,
896
- **kwargs,
897
- )
933
+ self._artifact = save_artifact(
934
+ self._mdata,
935
+ description=description,
936
+ columns_field=self.var_index,
937
+ fields=self.categoricals,
938
+ key=key,
939
+ revises=revises,
940
+ run=run,
941
+ **self._kwargs,
942
+ )
943
+ finally:
944
+ settings.verbosity = verbosity
898
945
  return self._artifact
899
946
 
900
947
 
901
948
  class Curator(BaseCurator):
902
949
  """Dataset curator.
903
950
 
904
- Data curation entails accurately labeling datasets with standardized metadata
905
- to facilitate data integration, interpretation and analysis.
906
-
907
- The curation flow has several steps:
908
-
909
- 1. Instantiate `Curator` from one of the following dataset objects:
951
+ A `Curator` object makes it easy to save validated & annotated artifacts.
910
952
 
911
- - :meth:`~lamindb.Curator.from_df`
912
- - :meth:`~lamindb.Curator.from_anndata`
913
- - :meth:`~lamindb.Curator.from_mudata`
953
+ Example:
914
954
 
915
- During object creation, any passed categoricals found in the object will be saved.
955
+ >>> curator = ln.Curator.from_df(
956
+ >>> df,
957
+ >>> # define validation criteria as mappings
958
+ >>> columns=ln.Feature.name, # map column names
959
+ >>> categoricals={"perturbation": ln.ULabel.name}, # map categories
960
+ >>> )
961
+ >>> curator.validate() # validate the data in df
962
+ >>> artifact = curate.save_artifact(description="my RNA-seq")
963
+ >>> artifact.describe() # see annotations
916
964
 
917
- 2. Run :meth:`~lamindb.core.DataFrameCurator.validate` to check the data against the defined criteria. This method identifies:
965
+ `curator.validate()` maps values within `df` according to the mapping criteria and logs validated & problematic values.
918
966
 
919
- - Values that can successfully validated and already exist in the registry.
920
- - Values which are new and not yet validated or potentially problematic values.
967
+ If you find non-validated values, you have several options:
921
968
 
922
- 3. Determine how to handle validated and non-validated values:
923
-
924
- - Validated values not yet in the registry can be automatically registered using :meth:`~lamindb.core.DataFrameCurator.add_validated_from`.
925
- - Valid and new values can be registered using :meth:`~lamindb.core.DataFrameCurator.add_new_from`.
926
- - All unvalidated values can be accessed using :meth:`~lamindb.core.DataFrameCurator.non_validated` and subsequently removed from the object at hand.
969
+ - new values found in the data can be registered using :meth:`~lamindb.core.DataFrameCurator.add_new_from`
970
+ - non-validated values can be accessed using :meth:`~lamindb.core.DataFrameCurator.non_validated` and addressed manually
927
971
  """
928
972
 
929
973
  @classmethod
@@ -1174,7 +1218,7 @@ def validate_categories(
1174
1218
  f"{colors.yellow(validated_hint_print)}"
1175
1219
  )
1176
1220
 
1177
- non_validated_hint_print = f".add_new_from('{key}')"
1221
+ non_validated_hint_print = validated_hint_print.replace("_validated_", "_new_")
1178
1222
  non_validated = [i for i in non_validated if i not in values_validated]
1179
1223
  n_non_validated = len(non_validated)
1180
1224
  if n_non_validated == 0:
@@ -1239,7 +1283,9 @@ def save_artifact(
1239
1283
  description: str | None = None,
1240
1284
  organism: str | None = None,
1241
1285
  adata: ad.AnnData | None = None,
1242
- **kwargs,
1286
+ key: str | None = None,
1287
+ revises: Artifact | None = None,
1288
+ run: Run | None = None,
1243
1289
  ) -> Artifact:
1244
1290
  """Save all metadata with an Artifact.
1245
1291
 
@@ -1249,29 +1295,43 @@ def save_artifact(
1249
1295
  fields: A dictionary mapping obs_column to registry_field.
1250
1296
  columns_field: The registry field to validate variables index against.
1251
1297
  organism: The organism name.
1252
- adata: The AnnData object to save, must be provided if data is a path.
1253
- kwargs: Additional keyword arguments to pass to the registry model.
1298
+ adata: The AnnData object to save and get n_observations, must be provided if data is a path.
1299
+ type: `Literal["dataset", "model"] | None = None` The artifact type.
1300
+ key: `str | None = None` A path-like key to reference artifact in default storage, e.g., `"myfolder/myfile.fcs"`. Artifacts with the same key form a revision family.
1301
+ revises: `Artifact | None = None` Previous version of the artifact. Triggers a revision.
1302
+ run: `Run | None = None` The run that creates the artifact.
1254
1303
 
1255
1304
  Returns:
1256
1305
  The saved Artifact.
1257
1306
  """
1258
1307
  from ._artifact import data_is_anndata
1308
+ from .core._data import add_labels
1259
1309
 
1260
1310
  artifact = None
1261
1311
  if data_is_anndata(data):
1262
1312
  assert adata is not None # noqa: S101
1263
- artifact = Artifact.from_anndata(data, description=description, **kwargs)
1313
+ artifact = Artifact.from_anndata(
1314
+ data, description=description, key=key, revises=revises, run=run
1315
+ )
1264
1316
  artifact.n_observations = adata.shape[0]
1265
1317
  data = adata
1266
1318
 
1267
1319
  elif isinstance(data, pd.DataFrame):
1268
- artifact = Artifact.from_df(data, description=description, **kwargs)
1320
+ artifact = Artifact.from_df(
1321
+ data, description=description, key=key, revises=revises, run=run
1322
+ )
1269
1323
  else:
1270
1324
  try:
1271
1325
  from mudata import MuData
1272
1326
 
1273
1327
  if isinstance(data, MuData):
1274
- artifact = Artifact.from_mudata(data, description=description, **kwargs)
1328
+ artifact = Artifact.from_mudata(
1329
+ data,
1330
+ description=description,
1331
+ key=key,
1332
+ revises=revises,
1333
+ run=run,
1334
+ )
1275
1335
  artifact.n_observations = data.n_obs
1276
1336
  except ImportError:
1277
1337
  pass
@@ -1301,7 +1361,12 @@ def save_artifact(
1301
1361
  else:
1302
1362
  raise NotImplementedError
1303
1363
 
1304
- def _add_labels(data, artifact: Artifact, fields: dict[str, FieldAttr]):
1364
+ def _add_labels(
1365
+ data,
1366
+ artifact: Artifact,
1367
+ fields: dict[str, FieldAttr],
1368
+ feature_ref_is_name: bool | None = None,
1369
+ ):
1305
1370
  features = Feature.lookup().dict()
1306
1371
  for key, field in fields.items():
1307
1372
  feature = features.get(key)
@@ -1314,16 +1379,47 @@ def save_artifact(
1314
1379
  field=field,
1315
1380
  **filter_kwargs_current,
1316
1381
  )
1317
- artifact.labels.add(labels, feature)
1382
+ if len(labels) == 0:
1383
+ continue
1384
+ if hasattr(registry, "_name_field"):
1385
+ label_ref_is_name = field.field.name == registry._name_field
1386
+ add_labels(
1387
+ artifact,
1388
+ records=labels,
1389
+ feature=feature,
1390
+ feature_ref_is_name=feature_ref_is_name,
1391
+ label_ref_is_name=label_ref_is_name,
1392
+ )
1318
1393
 
1319
1394
  if artifact._accessor == "MuData":
1320
1395
  for modality, modality_fields in fields.items():
1396
+ column_field_modality = columns_field.get(modality)
1321
1397
  if modality == "obs":
1322
- _add_labels(data, artifact, modality_fields)
1398
+ _add_labels(
1399
+ data,
1400
+ artifact,
1401
+ modality_fields,
1402
+ feature_ref_is_name=(
1403
+ None
1404
+ if column_field_modality is None
1405
+ else _ref_is_name(column_field_modality)
1406
+ ),
1407
+ )
1323
1408
  else:
1324
- _add_labels(data[modality], artifact, modality_fields)
1409
+ _add_labels(
1410
+ data[modality],
1411
+ artifact,
1412
+ modality_fields,
1413
+ feature_ref_is_name=(
1414
+ None
1415
+ if column_field_modality is None
1416
+ else _ref_is_name(column_field_modality)
1417
+ ),
1418
+ )
1325
1419
  else:
1326
- _add_labels(data, artifact, fields)
1420
+ _add_labels(
1421
+ data, artifact, fields, feature_ref_is_name=_ref_is_name(columns_field)
1422
+ )
1327
1423
 
1328
1424
  slug = ln_setup.settings.instance.slug
1329
1425
  if ln_setup.settings.instance.is_remote: # pragma: no cover
@@ -1438,7 +1534,8 @@ def update_registry(
1438
1534
  if not validated_only:
1439
1535
  non_validated_records = []
1440
1536
  if df is not None and registry == Feature:
1441
- non_validated_records = Feature.from_df(df)
1537
+ nonval_columns = Feature.inspect(df.columns, mute=True).non_validated
1538
+ non_validated_records = Feature.from_df(df.loc[:, nonval_columns])
1442
1539
  else:
1443
1540
  if "organism" in filter_kwargs:
1444
1541
  # make sure organism record is saved to the current instance
@@ -1600,4 +1697,12 @@ def _save_organism(name: str): # pragma: no cover
1600
1697
  return organism
1601
1698
 
1602
1699
 
1700
+ def _ref_is_name(field: FieldAttr) -> bool | None:
1701
+ """Check if the reference field is a name field."""
1702
+ from ._can_validate import get_name_field
1703
+
1704
+ name_field = get_name_field(field.field.model)
1705
+ return field.field.name == name_field
1706
+
1707
+
1603
1708
  Curate = Curator # backward compat