lamindb 0.69.9__py3-none-any.whl → 0.69.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/_annotate.py CHANGED
@@ -1,15 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Iterable
3
+ from typing import TYPE_CHECKING, Iterable, Optional
4
4
 
5
5
  import anndata as ad
6
6
  import lamindb_setup as ln_setup
7
7
  import pandas as pd
8
8
  from lamin_utils import colors, logger
9
+ from lamindb_setup.core._docs import doc_args
9
10
  from lnschema_core import Artifact, Collection, Feature, Registry, Run, ULabel
10
11
 
11
12
  if TYPE_CHECKING:
12
13
  from lnschema_core.types import FieldAttr
14
+ from mudata import MuData
13
15
 
14
16
 
15
17
  class ValidationError(ValueError):
@@ -85,10 +87,16 @@ class DataFrameAnnotator:
85
87
  df: The DataFrame object to annotate.
86
88
  columns: The field attribute for the feature column.
87
89
  categoricals: A dictionary mapping column names to registry_field.
88
- For example:
89
- ``{"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}``.
90
90
  using: The reference instance containing registries to validate against.
91
91
  verbosity: The verbosity level.
92
+ organism: The organism name.
93
+
94
+ Examples:
95
+ >>> import bionty as bt
96
+ >>> annotate = ln.Annotate.from_df(
97
+ df,
98
+ categoricals={"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}
99
+ )
92
100
  """
93
101
 
94
102
  def __init__(
@@ -98,7 +106,7 @@ class DataFrameAnnotator:
98
106
  categoricals: dict[str, FieldAttr] | None = None,
99
107
  using: str | None = None,
100
108
  verbosity: str = "hint",
101
- **kwargs,
109
+ organism: str | None = None,
102
110
  ) -> None:
103
111
  from lamindb.core._settings import settings
104
112
 
@@ -110,7 +118,7 @@ class DataFrameAnnotator:
110
118
  self._artifact = None
111
119
  self._collection = None
112
120
  self._validated = False
113
- self._kwargs: dict = kwargs
121
+ self._kwargs = {"organism": organism} if organism else {}
114
122
  self._save_columns()
115
123
 
116
124
  @property
@@ -119,7 +127,7 @@ class DataFrameAnnotator:
119
127
  return self._fields
120
128
 
121
129
  def lookup(self, using: str | None = None) -> AnnotateLookup:
122
- """Lookup features and labels.
130
+ """Lookup categories.
123
131
 
124
132
  Args:
125
133
  using: The instance where the lookup is performed.
@@ -132,7 +140,7 @@ class DataFrameAnnotator:
132
140
  using=using or self._using,
133
141
  )
134
142
 
135
- def _save_columns(self, validated_only: bool = True) -> None:
143
+ def _save_columns(self, validated_only: bool = True, **kwargs) -> None:
136
144
  """Save column name records."""
137
145
  missing_columns = set(self.fields.keys()) - set(self._df.columns)
138
146
  if missing_columns:
@@ -148,7 +156,7 @@ class DataFrameAnnotator:
148
156
  save_function="add_new_from_columns",
149
157
  using=self._using,
150
158
  validated_only=False,
151
- kwargs=self._kwargs,
159
+ **kwargs,
152
160
  )
153
161
 
154
162
  # Save the rest of the columns based on validated_only
@@ -162,36 +170,47 @@ class DataFrameAnnotator:
162
170
  using=self._using,
163
171
  validated_only=validated_only,
164
172
  df=self._df, # Get the Feature type from df
165
- kwargs=self._kwargs,
173
+ **kwargs,
166
174
  )
167
175
 
168
- def add_validated_from(self, key: str, **kwargs):
176
+ def add_validated_from(self, key: str, organism: str | None = None):
169
177
  """Add validated categories.
170
178
 
171
179
  Args:
172
180
  key: The key referencing the slot in the DataFrame.
173
- **kwargs: Additional keyword arguments.
181
+ organism: The organism name.
174
182
  """
175
- self._update_registry(key, validated_only=True, **kwargs)
183
+ self._kwargs.update({"organism": organism} if organism else {})
184
+ self._update_registry(key, validated_only=True, **self._kwargs)
176
185
 
177
- def add_new_from(self, key: str, **kwargs):
186
+ def add_new_from(self, key: str, organism: str | None = None, **kwargs):
178
187
  """Add validated & new categories.
179
188
 
180
189
  Args:
181
190
  key: The key referencing the slot in the DataFrame from which to draw terms.
182
- **kwargs: Additional keyword arguments.
191
+ organism: The organism name.
192
+ **kwargs: Additional keyword arguments to pass to the registry model.
183
193
  """
184
- self._update_registry(key, validated_only=False, **kwargs)
194
+ if len(kwargs) > 0 and key == "all":
195
+ raise ValueError("Cannot pass additional arguments to 'all' key!")
196
+ self._kwargs.update({"organism": organism} if organism else {})
197
+ self._update_registry(key, validated_only=False, **self._kwargs, **kwargs)
198
+
199
+ def add_new_from_columns(self, organism: str | None = None, **kwargs):
200
+ """Add validated & new column names to its registry.
185
201
 
186
- def add_new_from_columns(self, **kwargs):
187
- """Add validated & new column names to its registry."""
188
- self._save_columns(validated_only=False, **kwargs)
202
+ Args:
203
+ organism: The organism name.
204
+ **kwargs: Additional keyword arguments to pass to the registry model.
205
+ """
206
+ self._kwargs.update({"organism": organism} if organism else {})
207
+ self._save_columns(validated_only=False, **self._kwargs, **kwargs)
189
208
 
190
209
  def _update_registry(self, categorical: str, validated_only: bool = True, **kwargs):
191
210
  if categorical == "all":
192
211
  self._update_registry_all(validated_only=validated_only, **kwargs)
193
212
  elif categorical == "columns":
194
- self._save_columns(validated_only=validated_only)
213
+ self._save_columns(validated_only=validated_only, **kwargs)
195
214
  else:
196
215
  if categorical not in self.fields:
197
216
  raise ValueError(f"Feature {categorical} is not part of the fields!")
@@ -201,7 +220,7 @@ class DataFrameAnnotator:
201
220
  key=categorical,
202
221
  using=self._using,
203
222
  validated_only=validated_only,
204
- kwargs=kwargs,
223
+ **kwargs,
205
224
  )
206
225
 
207
226
  def _update_registry_all(self, validated_only: bool = True, **kwargs):
@@ -210,13 +229,13 @@ class DataFrameAnnotator:
210
229
  logger.info(f"saving labels for '{name}'")
211
230
  self._update_registry(name, validated_only=validated_only, **kwargs)
212
231
 
213
- def validate(self, **kwargs) -> bool:
232
+ def validate(self, organism: str | None = None) -> bool:
214
233
  """Validate variables and categorical observations.
215
234
 
216
235
  Returns:
217
236
  Whether the DataFrame is validated.
218
237
  """
219
- self._kwargs.update(kwargs)
238
+ self._kwargs.update({"organism": organism} if organism else {})
220
239
  self._validated = validate_categories_in_df(
221
240
  self._df,
222
241
  fields=self.fields,
@@ -237,7 +256,6 @@ class DataFrameAnnotator:
237
256
  """
238
257
  from lamindb.core._settings import settings
239
258
 
240
- self._kwargs.update(kwargs)
241
259
  if not self._validated:
242
260
  raise ValidationError(
243
261
  f"Data object is not validated, please run {colors.yellow('validate()')}!"
@@ -255,6 +273,7 @@ class DataFrameAnnotator:
255
273
  description=description,
256
274
  fields=self.fields,
257
275
  columns_field=self._columns_field,
276
+ **kwargs,
258
277
  **self._kwargs,
259
278
  )
260
279
  finally:
@@ -314,9 +333,18 @@ class AnnDataAnnotator(DataFrameAnnotator):
314
333
  adata: The AnnData object to annotate.
315
334
  var_index: The registry field for mapping the ``.var`` index.
316
335
  categoricals: A dictionary mapping ``.obs.columns`` to a registry field.
317
- For example:
318
- ``{"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}``
319
336
  using: A reference LaminDB instance.
337
+ verbosity: The verbosity level.
338
+ organism: The organism name.
339
+
340
+ Examples:
341
+ >>> import bionty as bt
342
+ >>> annotate = ln.Annotate.from_anndata(
343
+ adata,
344
+ var_index=bt.Gene.ensembl_gene_id,
345
+ categoricals={"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name},
346
+ organism="human",
347
+ )
320
348
  """
321
349
 
322
350
  def __init__(
@@ -326,7 +354,7 @@ class AnnDataAnnotator(DataFrameAnnotator):
326
354
  categoricals: dict[str, FieldAttr],
327
355
  using: str = "default",
328
356
  verbosity: str = "hint",
329
- **kwargs,
357
+ organism: str | None = None,
330
358
  ) -> None:
331
359
  self._adata = adata
332
360
  self._var_field = var_index
@@ -335,10 +363,10 @@ class AnnDataAnnotator(DataFrameAnnotator):
335
363
  categoricals=categoricals,
336
364
  using=using,
337
365
  verbosity=verbosity,
338
- **kwargs,
366
+ organism=organism,
339
367
  )
340
368
  self._obs_fields = categoricals
341
- self._save_from_var_index()
369
+ self._save_from_var_index(validated_only=True, **self._kwargs)
342
370
 
343
371
  @property
344
372
  def var_index(self) -> FieldAttr:
@@ -351,16 +379,23 @@ class AnnDataAnnotator(DataFrameAnnotator):
351
379
  return self._obs_fields
352
380
 
353
381
  def lookup(self, using: str | None = None) -> AnnotateLookup:
354
- """Lookup features and labels."""
382
+ """Lookup categories.
383
+
384
+ Args:
385
+ using: The instance where the lookup is performed.
386
+ if None (default), the lookup is performed on the instance specified in "using" parameter of the validator.
387
+ if "public", the lookup is performed on the public reference.
388
+ """
355
389
  return AnnotateLookup(
356
390
  categorials=self._obs_fields,
357
391
  slots={"columns": self._columns_field, "var_index": self._var_field},
358
392
  using=using or self._using,
359
393
  )
360
394
 
361
- def _save_from_var_index(self, validated_only: bool = True, **kwargs):
395
+ def _save_from_var_index(
396
+ self, validated_only: bool = True, organism: str | None = None
397
+ ):
362
398
  """Save variable records."""
363
- self._kwargs.update(kwargs)
364
399
  update_registry(
365
400
  values=self._adata.var.index,
366
401
  field=self.var_index,
@@ -368,35 +403,56 @@ class AnnDataAnnotator(DataFrameAnnotator):
368
403
  save_function="add_new_from_var_index",
369
404
  using=self._using,
370
405
  validated_only=validated_only,
371
- kwargs=self._kwargs,
406
+ organism=organism,
372
407
  )
373
408
 
374
- def add_new_from_var_index(self, **kwargs):
375
- """Update variable records."""
376
- self._save_from_var_index(validated_only=False, **kwargs)
409
+ def add_new_from_var_index(self, organism: str | None = None, **kwargs):
410
+ """Update variable records.
377
411
 
378
- def validate(self, **kwargs) -> bool:
379
- """Validate categories."""
380
- self._kwargs.update(kwargs)
381
- self._validated = validate_anndata(
382
- self._adata,
383
- var_field=self.var_index,
384
- obs_fields=self.categoricals,
412
+ Args:
413
+ organism: The organism name.
414
+ **kwargs: Additional keyword arguments to pass to the registry model.
415
+ """
416
+ self._kwargs.update({"organism": organism} if organism else {})
417
+ self._save_from_var_index(validated_only=False, **self._kwargs, **kwargs)
418
+
419
+ def validate(self, organism: str | None = None) -> bool:
420
+ """Validate categories.
421
+
422
+ Args:
423
+ organism: The organism name.
424
+
425
+ Returns:
426
+ Whether the AnnData object is validated.
427
+ """
428
+ self._kwargs.update({"organism": organism} if organism else {})
429
+ if self._using is not None and self._using != "default":
430
+ logger.important(
431
+ f"validating metadata using registries of instance {colors.italic(self._using)}"
432
+ )
433
+ validated_var = validate_categories(
434
+ self._adata.var.index,
435
+ field=self._var_field,
436
+ key="var_index",
437
+ using=self._using,
385
438
  **self._kwargs,
386
439
  )
440
+ validated_obs = validate_categories_in_df(
441
+ self._adata.obs, fields=self.categoricals, using=self._using, **self._kwargs
442
+ )
443
+ self._validated = validated_var and validated_obs
387
444
  return self._validated
388
445
 
389
446
  def save_artifact(self, description: str, **kwargs) -> Artifact:
390
- """Save the validated AnnData and metadata.
447
+ """Save the validated ``AnnData`` and metadata.
391
448
 
392
449
  Args:
393
- description: Description of the AnnData object.
450
+ description: Description of the ``AnnData`` object.
394
451
  **kwargs: Object level metadata.
395
452
 
396
453
  Returns:
397
454
  A saved artifact record.
398
455
  """
399
- self._kwargs.update(kwargs)
400
456
  if not self._validated:
401
457
  raise ValidationError("Please run `validate()` first!")
402
458
 
@@ -406,6 +462,261 @@ class AnnDataAnnotator(DataFrameAnnotator):
406
462
  columns_field=self.var_index,
407
463
  fields=self.categoricals,
408
464
  **self._kwargs,
465
+ **kwargs,
466
+ )
467
+ return self._artifact
468
+
469
+
470
+ class MuDataAnnotator:
471
+ """Annotation flow for a ``MuData`` object.
472
+
473
+ Args:
474
+ mdata: The MuData object to annotate.
475
+ var_index: The registry field for mapping the ``.var`` index for each modality.
476
+ For example:
477
+ ``{"modality_1": bt.Gene.ensembl_gene_id, "modality_2": ln.CellMarker.name}``
478
+ categoricals: A dictionary mapping ``.obs.columns`` to a registry field.
479
+ using: A reference LaminDB instance.
480
+ verbosity: The verbosity level.
481
+ organism: The organism name.
482
+
483
+ Examples:
484
+ >>> import bionty as bt
485
+ >>> annotate = ln.Annotate.from_mudata(
486
+ mdata,
487
+ var_index={"rna": bt.Gene.ensembl_gene_id, "adt": ln.CellMarker.name},
488
+ categoricals={"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name},
489
+ organism="human",
490
+ )
491
+ """
492
+
493
+ def __init__(
494
+ self,
495
+ mdata: MuData,
496
+ var_index: dict[str, dict[str, FieldAttr]],
497
+ categoricals: dict[str, FieldAttr],
498
+ using: str = "default",
499
+ verbosity: str = "hint",
500
+ organism: str | None = None,
501
+ ) -> None:
502
+ self._mdata = mdata
503
+ self._kwargs = {"organism": organism} if organism else {}
504
+ self._var_fields = var_index
505
+ self._verify_modality(self._var_fields.keys())
506
+ self._obs_fields = self._parse_categoricals(categoricals)
507
+ self._modalities = set(self._var_fields.keys()) | set(self._obs_fields.keys())
508
+ self._using = using
509
+ self._verbosity = verbosity
510
+ self._df_annotators = {
511
+ modality: DataFrameAnnotator(
512
+ df=mdata[modality].obs if modality != "obs" else mdata.obs,
513
+ categoricals=self._obs_fields.get(modality, {}),
514
+ using=using,
515
+ verbosity=verbosity,
516
+ **self._kwargs,
517
+ )
518
+ for modality in self._modalities
519
+ }
520
+ for modality in self._var_fields.keys():
521
+ self._save_from_var_index_modality(
522
+ modality=modality, validated_only=True, **self._kwargs
523
+ )
524
+
525
+ @property
526
+ def var_index(self) -> FieldAttr:
527
+ """Return the registry field to validate variables index against."""
528
+ return self._var_fields
529
+
530
+ @property
531
+ def categoricals(self) -> dict:
532
+ """Return the obs fields to validate against."""
533
+ return self._obs_fields
534
+
535
+ def _verify_modality(self, modalities: Iterable[str]):
536
+ """Verify the modality exists."""
537
+ for modality in modalities:
538
+ if modality not in self._mdata.mod.keys():
539
+ raise ValueError(f"modality '{modality}' does not exist!")
540
+
541
+ def _save_from_var_index_modality(
542
+ self, modality: str, validated_only: bool = True, **kwargs
543
+ ):
544
+ """Save variable records."""
545
+ update_registry(
546
+ values=self._mdata[modality].var.index,
547
+ field=self._var_fields[modality],
548
+ key="var_index",
549
+ save_function="add_new_from_var_index",
550
+ using=self._using,
551
+ validated_only=validated_only,
552
+ type="number",
553
+ **kwargs,
554
+ )
555
+
556
+ def _parse_categoricals(self, categoricals: dict[str, FieldAttr]) -> dict:
557
+ """Parse the categorical fields."""
558
+ prefixes = {f"{k}:" for k in self._mdata.mod.keys()}
559
+ obs_fields: dict[str, dict[str, FieldAttr]] = {}
560
+ for k, v in categoricals.items():
561
+ if k not in self._mdata.obs.columns:
562
+ raise ValueError(f"column '{k}' does not exist in mdata.obs!")
563
+ if any(k.startswith(prefix) for prefix in prefixes):
564
+ modality, col = k.split(":")[0], k.split(":")[1]
565
+ if modality not in obs_fields.keys():
566
+ obs_fields[modality] = {}
567
+ obs_fields[modality][col] = v
568
+ else:
569
+ if "obs" not in obs_fields.keys():
570
+ obs_fields["obs"] = {}
571
+ obs_fields["obs"][k] = v
572
+ return obs_fields
573
+
574
+ def lookup(self, using: str | None = None) -> AnnotateLookup:
575
+ """Lookup categories.
576
+
577
+ Args:
578
+ using: The instance where the lookup is performed.
579
+ if None (default), the lookup is performed on the instance specified in "using" parameter of the validator.
580
+ if "public", the lookup is performed on the public reference.
581
+ """
582
+ return AnnotateLookup(
583
+ categorials=self._obs_fields,
584
+ slots={
585
+ **self._obs_fields,
586
+ **{f"{k}_var_index": v for k, v in self._var_fields.items()},
587
+ },
588
+ using=using or self._using,
589
+ )
590
+
591
+ def add_new_from_columns(
592
+ self,
593
+ modality: str,
594
+ column_names: list[str] | None = None,
595
+ organism: str | None = None,
596
+ **kwargs,
597
+ ):
598
+ """Update columns records.
599
+
600
+ Args:
601
+ modality: The modality name.
602
+ column_names: The column names to save.
603
+ organism: The organism name.
604
+ **kwargs: Additional keyword arguments to pass to the registry model.
605
+ """
606
+ self._kwargs.update({"organism": organism} if organism else {})
607
+ update_registry(
608
+ values=column_names or self._mdata[modality].obs.columns,
609
+ field=Feature.name,
610
+ key=f"{modality} obs columns",
611
+ using=self._using,
612
+ validated_only=False,
613
+ df=self._mdata[modality].obs,
614
+ **self._kwargs,
615
+ **kwargs,
616
+ )
617
+
618
+ def add_new_from_var_index(
619
+ self, modality: str, organism: str | None = None, **kwargs
620
+ ):
621
+ """Update variable records.
622
+
623
+ Args:
624
+ modality: The modality name.
625
+ organism: The organism name.
626
+ **kwargs: Additional keyword arguments to pass to the registry model.
627
+ """
628
+ self._kwargs.update({"organism": organism} if organism else {})
629
+ self._save_from_var_index_modality(
630
+ modality=modality, validated_only=False, **self._kwargs, **kwargs
631
+ )
632
+
633
+ def add_validated_from(
634
+ self, key: str, modality: str | None = None, organism: str | None = None
635
+ ):
636
+ """Add validated categories.
637
+
638
+ Args:
639
+ key: The key referencing the slot in the DataFrame.
640
+ modality: The modality name.
641
+ organism: The organism name.
642
+ """
643
+ self._kwargs.update({"organism": organism} if organism else {})
644
+ modality = modality or "obs"
645
+ if modality in self._df_annotators:
646
+ df_annotator = self._df_annotators[modality]
647
+ df_annotator.add_validated_from(key=key, **self._kwargs)
648
+
649
+ def add_new_from(
650
+ self,
651
+ key: str,
652
+ modality: str | None = None,
653
+ organism: str | None = None,
654
+ **kwargs,
655
+ ):
656
+ """Add validated & new categories.
657
+
658
+ Args:
659
+ key: The key referencing the slot in the DataFrame.
660
+ modality: The modality name.
661
+ organism: The organism name.
662
+ **kwargs: Additional keyword arguments to pass to the registry model.
663
+ """
664
+ if len(kwargs) > 0 and key == "all":
665
+ raise ValueError("Cannot pass additional arguments to 'all' key!")
666
+ self._kwargs.update({"organism": organism} if organism else {})
667
+ modality = modality or "obs"
668
+ if modality in self._df_annotators:
669
+ df_annotator = self._df_annotators[modality]
670
+ df_annotator.add_new_from(key=key, **self._kwargs, **kwargs)
671
+
672
+ def validate(self, organism: str | None = None) -> bool:
673
+ """Validate categories."""
674
+ self._kwargs.update({"organism": organism} if organism else {})
675
+ if self._using is not None and self._using != "default":
676
+ logger.important(
677
+ f"validating metadata using registries of instance {colors.italic(self._using)}"
678
+ )
679
+ validated_var = True
680
+ for modality, var_field in self._var_fields.items():
681
+ validated_var &= validate_categories(
682
+ self._mdata[modality].var.index,
683
+ field=var_field,
684
+ key=f"{modality}_var_index",
685
+ using=self._using,
686
+ **self._kwargs,
687
+ )
688
+ validated_obs = True
689
+ for modality, fields in self._obs_fields.items():
690
+ if modality == "obs":
691
+ obs = self._mdata.obs
692
+ else:
693
+ obs = self._mdata[modality].obs
694
+ validated_obs &= validate_categories_in_df(
695
+ obs, fields=fields, using=self._using, **self._kwargs
696
+ )
697
+ self._validated = validated_var and validated_obs
698
+ return self._validated
699
+
700
+ def save_artifact(self, description: str, **kwargs) -> Artifact:
701
+ """Save the validated ``MuData`` and metadata.
702
+
703
+ Args:
704
+ description: Description of the ``MuData`` object.
705
+ **kwargs: Object level metadata.
706
+
707
+ Returns:
708
+ A saved artifact record.
709
+ """
710
+ if not self._validated:
711
+ raise ValidationError("Please run `validate()` first!")
712
+
713
+ self._artifact = save_artifact(
714
+ self._mdata,
715
+ description=description,
716
+ columns_field=self.var_index,
717
+ fields=self.categoricals,
718
+ **self._kwargs,
719
+ **kwargs,
409
720
  )
410
721
  return self._artifact
411
722
 
@@ -414,6 +725,7 @@ class Annotate:
414
725
  """Annotation flow."""
415
726
 
416
727
  @classmethod
728
+ @doc_args(DataFrameAnnotator.__doc__)
417
729
  def from_df(
418
730
  cls,
419
731
  df: pd.DataFrame,
@@ -421,18 +733,20 @@ class Annotate:
421
733
  columns: FieldAttr = Feature.name,
422
734
  using: str | None = None,
423
735
  verbosity: str = "hint",
424
- **kwargs,
736
+ organism: str | None = None,
425
737
  ) -> DataFrameAnnotator:
738
+ """{}."""
426
739
  return DataFrameAnnotator(
427
740
  df=df,
428
741
  categoricals=categoricals,
429
742
  columns=columns,
430
743
  using=using,
431
744
  verbosity=verbosity,
432
- **kwargs,
745
+ organism=organism,
433
746
  )
434
747
 
435
748
  @classmethod
749
+ @doc_args(AnnDataAnnotator.__doc__)
436
750
  def from_anndata(
437
751
  cls,
438
752
  adata: ad.AnnData,
@@ -440,15 +754,37 @@ class Annotate:
440
754
  categoricals: dict[str, FieldAttr],
441
755
  using: str = "default",
442
756
  verbosity: str = "hint",
443
- **kwargs,
757
+ organism: str | None = None,
444
758
  ) -> AnnDataAnnotator:
759
+ """{}."""
445
760
  return AnnDataAnnotator(
446
761
  adata=adata,
447
762
  var_index=var_index,
448
763
  categoricals=categoricals,
449
764
  using=using,
450
765
  verbosity=verbosity,
451
- **kwargs,
766
+ organism=organism,
767
+ )
768
+
769
+ @classmethod
770
+ @doc_args(MuDataAnnotator.__doc__)
771
+ def from_mudata(
772
+ cls,
773
+ mdata: MuData,
774
+ var_index: dict[str, dict[str, FieldAttr]],
775
+ categoricals: dict[str, dict[str, FieldAttr]],
776
+ using: str = "default",
777
+ verbosity: str = "hint",
778
+ organism: str | None = None,
779
+ ) -> MuDataAnnotator:
780
+ """{}."""
781
+ return MuDataAnnotator(
782
+ mdata=mdata,
783
+ var_index=var_index,
784
+ categoricals=categoricals,
785
+ using=using,
786
+ verbosity=verbosity,
787
+ organism=organism,
452
788
  )
453
789
 
454
790
 
@@ -468,9 +804,7 @@ def standardize_and_inspect(
468
804
  return registry.inspect(values, field=field, mute=True, **kwargs)
469
805
 
470
806
 
471
- def check_registry_organism(
472
- registry: Registry, organism: str | None = None
473
- ) -> str | None:
807
+ def check_registry_organism(registry: Registry, organism: str | None = None) -> dict:
474
808
  """Check if a registry needs an organism and return the organism name."""
475
809
  if hasattr(registry, "organism_id"):
476
810
  import bionty as bt
@@ -480,8 +814,8 @@ def check_registry_organism(
480
814
  f"{registry.__name__} registry requires an organism!\n"
481
815
  " → please pass an organism name via organism="
482
816
  )
483
- return organism or bt.settings.organism.name
484
- return None
817
+ return {"organism": organism or bt.settings.organism.name}
818
+ return {}
485
819
 
486
820
 
487
821
  def validate_categories(
@@ -489,22 +823,21 @@ def validate_categories(
489
823
  field: FieldAttr,
490
824
  key: str,
491
825
  using: str | None = None,
492
- **kwargs,
826
+ organism: str | None = None,
493
827
  ) -> bool:
494
828
  """Validate ontology terms in a pandas series using LaminDB registries."""
495
829
  from lamindb._from_values import _print_values
496
830
  from lamindb.core._settings import settings
497
831
 
498
832
  model_field = f"{field.field.model.__name__}.{field.field.name}"
499
- logger.indent = ""
500
- logger.info(f"mapping {colors.italic(key)} on {colors.italic(model_field)}")
501
- logger.indent = " "
833
+
834
+ def _log_mapping_info():
835
+ logger.indent = ""
836
+ logger.info(f"mapping {colors.italic(key)} on {colors.italic(model_field)}")
837
+ logger.indent = " "
502
838
 
503
839
  registry = field.field.model
504
- filter_kwargs = {}
505
- organism = check_registry_organism(registry, kwargs.get("organism"))
506
- if organism is not None:
507
- filter_kwargs["organism"] = organism
840
+ filter_kwargs = check_registry_organism(registry, organism)
508
841
 
509
842
  # Inspect the default instance
510
843
  inspect_result = standardize_and_inspect(
@@ -537,6 +870,7 @@ def validate_categories(
537
870
  validated_hint_print = f".add_validated_from('{key}')"
538
871
  n_validated = len(values_validated)
539
872
  if n_validated > 0:
873
+ _log_mapping_info()
540
874
  logger.warning(
541
875
  f"found {colors.yellow(f'{n_validated} terms')} validated terms: "
542
876
  f"{colors.yellow(values_validated)}\n → save terms via "
@@ -547,7 +881,8 @@ def validate_categories(
547
881
  non_validated = [i for i in non_validated if i not in values_validated]
548
882
  n_non_validated = len(non_validated)
549
883
  if n_non_validated == 0:
550
- logger.success(f"{key} validated")
884
+ logger.indent = ""
885
+ logger.success(f"{key} is validated against {colors.italic(model_field)}")
551
886
  return True
552
887
  else:
553
888
  are = "are" if n_non_validated > 1 else "is"
@@ -557,6 +892,8 @@ def validate_categories(
557
892
  f"{colors.yellow(print_values)}\n → save terms via "
558
893
  f"{colors.yellow(non_validated_hint_print)}"
559
894
  )
895
+ if logger.indent == "":
896
+ _log_mapping_info()
560
897
  logger.warning(warning_message)
561
898
  logger.indent = ""
562
899
  return False
@@ -581,37 +918,12 @@ def validate_categories_in_df(
581
918
  return validated
582
919
 
583
920
 
584
- def validate_anndata(
585
- adata: ad.AnnData,
586
- var_field: FieldAttr,
587
- obs_fields: dict[str, FieldAttr],
588
- using: str | None = None,
589
- **kwargs,
590
- ) -> bool:
591
- """Inspect metadata in an AnnData object using LaminDB registries."""
592
- if using is not None and using != "default":
593
- logger.important(
594
- f"validating metadata using registries of instance {colors.italic(using)}"
595
- )
596
-
597
- validated_var = validate_categories(
598
- adata.var.index,
599
- field=var_field,
600
- key="var_index",
601
- using=using,
602
- **kwargs,
603
- )
604
- validated_obs = validate_categories_in_df(
605
- adata.obs, fields=obs_fields, using=using, **kwargs
606
- )
607
- return validated_var and validated_obs
608
-
609
-
610
921
  def save_artifact(
611
- data: pd.DataFrame | ad.AnnData,
922
+ data: pd.DataFrame | ad.AnnData | MuData,
612
923
  description: str,
613
- fields: dict[str, FieldAttr],
614
- columns_field: FieldAttr,
924
+ fields: dict[str, FieldAttr] | dict[str, dict[str, FieldAttr]],
925
+ columns_field: FieldAttr | dict[str, FieldAttr],
926
+ organism: str | None = None,
615
927
  **kwargs,
616
928
  ) -> Artifact:
617
929
  """Save all metadata with an Artifact.
@@ -621,43 +933,67 @@ def save_artifact(
621
933
  description: A description of the artifact.
622
934
  fields: A dictionary mapping obs_column to registry_field.
623
935
  columns_field: The registry field to validate variables index against.
936
+ organism: The organism name.
624
937
  kwargs: Additional keyword arguments to pass to the registry model.
625
938
 
626
939
  Returns:
627
940
  The saved Artifact.
628
941
  """
942
+ artifact = None
629
943
  if isinstance(data, ad.AnnData):
630
- artifact = Artifact.from_anndata(data, description=description)
944
+ artifact = Artifact.from_anndata(data, description=description, **kwargs)
631
945
  artifact.n_observations = data.n_obs
632
946
  elif isinstance(data, pd.DataFrame):
633
- artifact = Artifact.from_df(data, description=description)
947
+ artifact = Artifact.from_df(data, description=description, **kwargs)
634
948
  else:
635
- raise ValueError("data must be a DataFrame or AnnData object")
949
+ try:
950
+ from mudata import MuData
951
+
952
+ if isinstance(data, MuData):
953
+ artifact = Artifact.from_mudata(data, description=description, **kwargs)
954
+ artifact.n_observations = data.n_obs
955
+ except ImportError:
956
+ pass
957
+ if artifact is None:
958
+ raise ValueError("data must be a DataFrame, AnnData or MuData object")
636
959
  artifact.save()
637
960
 
638
- feature_kwargs: dict = {}
639
- organism = check_registry_organism(
640
- columns_field.field.model, kwargs.pop("organism", None)
961
+ feature_kwargs = check_registry_organism(
962
+ (
963
+ list(columns_field.values())[0].field.model
964
+ if isinstance(columns_field, dict)
965
+ else columns_field.field.model
966
+ ),
967
+ organism,
641
968
  )
642
- if organism is not None:
643
- feature_kwargs["organism"] = organism
644
969
 
645
- if isinstance(data, ad.AnnData):
970
+ if artifact.accessor == "DataFrame":
971
+ artifact.features.add_from_df(field=columns_field, **feature_kwargs)
972
+ elif artifact.accessor == "AnnData":
646
973
  artifact.features.add_from_anndata(var_field=columns_field, **feature_kwargs)
974
+ elif artifact.accessor == "MuData":
975
+ artifact.features.add_from_mudata(var_fields=columns_field, **feature_kwargs)
647
976
  else:
648
- artifact.features.add_from_df(field=columns_field, **feature_kwargs)
649
-
650
- features = Feature.lookup().dict()
651
- for key, field in fields.items():
652
- feature = features.get(key)
653
- registry = field.field.model
654
- filter_kwargs = kwargs.copy()
655
- organism = check_registry_organism(registry, organism)
656
- if organism is not None:
657
- filter_kwargs["organism"] = organism
658
- df = data.obs if isinstance(data, ad.AnnData) else data
659
- labels = registry.from_values(df[key], field=field, **filter_kwargs)
660
- artifact.labels.add(labels, feature)
977
+ raise NotImplementedError
978
+
979
+ def _add_labels(data, artifact: Artifact, fields: dict[str, FieldAttr]):
980
+ features = Feature.lookup().dict()
981
+ for key, field in fields.items():
982
+ feature = features.get(key)
983
+ registry = field.field.model
984
+ filter_kwargs = check_registry_organism(registry, organism)
985
+ df = data if isinstance(data, pd.DataFrame) else data.obs
986
+ labels = registry.from_values(df[key], field=field, **filter_kwargs)
987
+ artifact.labels.add(labels, feature)
988
+
989
+ if artifact.accessor == "MuData":
990
+ for modality, modality_fields in fields.items():
991
+ if modality == "obs":
992
+ _add_labels(data, artifact, modality_fields)
993
+ else:
994
+ _add_labels(data[modality], artifact, modality_fields)
995
+ else:
996
+ _add_labels(data, artifact, fields)
661
997
 
662
998
  slug = ln_setup.settings.instance.slug
663
999
  if ln_setup.settings.instance.is_remote:
@@ -672,8 +1008,10 @@ def update_registry(
672
1008
  save_function: str = "add_new_from",
673
1009
  using: str | None = None,
674
1010
  validated_only: bool = True,
675
- kwargs: dict | None = None,
676
1011
  df: pd.DataFrame | None = None,
1012
+ organism: str | None = None,
1013
+ type: str | None = None,
1014
+ **kwargs,
677
1015
  ) -> None:
678
1016
  """Save features or labels records in the default instance from the using instance.
679
1017
 
@@ -684,18 +1022,16 @@ def update_registry(
684
1022
  save_function: The name of the function to save the labels.
685
1023
  using: The name of the instance from which to transfer labels (if applicable).
686
1024
  validated_only: If True, only save validated labels.
687
- kwargs: Additional keyword arguments to pass to the registry model.
688
1025
  df: A DataFrame to save labels from.
1026
+ organism: The organism name.
1027
+ type: The type of the feature.
1028
+ kwargs: Additional keyword arguments to pass to the registry model to create new records.
689
1029
  """
690
1030
  from lamindb._save import save as ln_save
691
1031
  from lamindb.core._settings import settings
692
1032
 
693
- filter_kwargs = {} if kwargs is None else kwargs.copy()
694
1033
  registry = field.field.model
695
-
696
- organism = check_registry_organism(registry, filter_kwargs.pop("organism", None))
697
- if organism is not None:
698
- filter_kwargs["organism"] = organism
1034
+ filter_kwargs = check_registry_organism(registry, organism)
699
1035
 
700
1036
  verbosity = settings.verbosity
701
1037
  try:
@@ -716,7 +1052,7 @@ def update_registry(
716
1052
  inspect_result_current.non_validated,
717
1053
  field=field,
718
1054
  using=using,
719
- kwargs=filter_kwargs,
1055
+ **filter_kwargs,
720
1056
  )
721
1057
 
722
1058
  public_records = (
@@ -742,8 +1078,8 @@ def update_registry(
742
1078
  for value in labels_saved["without reference"]:
743
1079
  filter_kwargs[field.field.name] = value
744
1080
  if registry == Feature:
745
- filter_kwargs["type"] = "category"
746
- non_validated_records.append(registry(**filter_kwargs))
1081
+ filter_kwargs["type"] = "category" if type is None else type
1082
+ non_validated_records.append(registry(**filter_kwargs, **kwargs))
747
1083
  ln_save(non_validated_records)
748
1084
 
749
1085
  if registry == ULabel and field.field.name == "name":
@@ -815,7 +1151,7 @@ def update_registry_from_using_instance(
815
1151
  values: list[str],
816
1152
  field: FieldAttr,
817
1153
  using: str | None = None,
818
- kwargs: dict | None = None,
1154
+ **kwargs,
819
1155
  ) -> tuple[list[str], list[str]]:
820
1156
  """Save features or labels records from the using instance.
821
1157
 
@@ -828,7 +1164,6 @@ def update_registry_from_using_instance(
828
1164
  Returns:
829
1165
  A tuple containing the list of saved labels and the list of non-saved labels.
830
1166
  """
831
- kwargs = kwargs or {}
832
1167
  labels_saved = []
833
1168
  not_saved = values
834
1169