lamindb 0.76.12__py3-none-any.whl → 0.76.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_artifact.py +8 -9
- lamindb/_collection.py +18 -5
- lamindb/_curate.py +242 -137
- lamindb/_feature_set.py +3 -1
- lamindb/_from_values.py +1 -5
- lamindb/_parents.py +18 -3
- lamindb/_query_manager.py +0 -15
- lamindb/_query_set.py +8 -4
- lamindb/_record.py +82 -6
- lamindb/core/__init__.py +2 -0
- lamindb/core/_context.py +1 -1
- lamindb/core/_data.py +19 -7
- lamindb/core/_django.py +19 -5
- lamindb/core/_feature_manager.py +80 -44
- lamindb/core/_label_manager.py +91 -93
- lamindb/core/exceptions.py +7 -0
- lamindb/core/schema.py +42 -3
- lamindb/core/types.py +1 -0
- {lamindb-0.76.12.dist-info → lamindb-0.76.14.dist-info}/METADATA +6 -6
- {lamindb-0.76.12.dist-info → lamindb-0.76.14.dist-info}/RECORD +23 -23
- {lamindb-0.76.12.dist-info → lamindb-0.76.14.dist-info}/LICENSE +0 -0
- {lamindb-0.76.12.dist-info → lamindb-0.76.14.dist-info}/WHEEL +0 -0
lamindb/_curate.py
CHANGED
@@ -34,21 +34,21 @@ class CurateLookup:
|
|
34
34
|
categoricals: dict[str, FieldAttr],
|
35
35
|
slots: dict[str, FieldAttr] = None,
|
36
36
|
using_key: str | None = None,
|
37
|
+
public: bool = False,
|
37
38
|
) -> None:
|
38
39
|
if slots is None:
|
39
40
|
slots = {}
|
40
41
|
self._fields = {**categoricals, **slots}
|
41
42
|
self._using_key = None if using_key == "default" else using_key
|
42
43
|
self._using_key_name = self._using_key or ln_setup.settings.instance.slug
|
43
|
-
|
44
|
-
|
45
|
-
)
|
44
|
+
self._public = public
|
45
|
+
debug_message = f"Lookup objects from {colors.italic(self._using_key_name)}"
|
46
46
|
logger.debug(debug_message)
|
47
47
|
|
48
48
|
def __getattr__(self, name):
|
49
49
|
if name in self._fields:
|
50
50
|
registry = self._fields[name].field.model
|
51
|
-
if self.
|
51
|
+
if self._public and hasattr(registry, "public"):
|
52
52
|
return registry.public().lookup()
|
53
53
|
else:
|
54
54
|
return get_registry_instance(registry, self._using_key).lookup()
|
@@ -59,7 +59,7 @@ class CurateLookup:
|
|
59
59
|
def __getitem__(self, name):
|
60
60
|
if name in self._fields:
|
61
61
|
registry = self._fields[name].field.model
|
62
|
-
if self.
|
62
|
+
if self._public and hasattr(registry, "public"):
|
63
63
|
return registry.public().lookup()
|
64
64
|
else:
|
65
65
|
return get_registry_instance(registry, self._using_key).lookup()
|
@@ -75,12 +75,14 @@ class CurateLookup:
|
|
75
75
|
getitem_keys = "\n ".join(
|
76
76
|
[str([key]) for key in self._fields if not key.isidentifier()]
|
77
77
|
)
|
78
|
+
ref = "public" if self._public else self._using_key_name
|
78
79
|
return (
|
79
|
-
f"Lookup objects from the {colors.italic(
|
80
|
+
f"Lookup objects from the {colors.italic(ref)}:\n "
|
80
81
|
f"{colors.green(getattr_keys)}\n "
|
81
|
-
f"{colors.green(getitem_keys)}\n
|
82
|
-
"Example:\n → categories = validator.lookup()
|
83
|
-
" → categories.alveolar_type_1_fibroblast_cell"
|
82
|
+
f"{colors.green(getitem_keys)}\n"
|
83
|
+
"Example:\n → categories = validator.lookup()['cell_type']\n"
|
84
|
+
" → categories.alveolar_type_1_fibroblast_cell\n\n"
|
85
|
+
"To look up public ontologies, use .lookup(public=True)"
|
84
86
|
)
|
85
87
|
else: # pragma: no cover
|
86
88
|
return colors.warning("No fields are found!")
|
@@ -97,12 +99,20 @@ class BaseCurator:
|
|
97
99
|
"""
|
98
100
|
pass
|
99
101
|
|
100
|
-
def save_artifact(
|
102
|
+
def save_artifact(
|
103
|
+
self,
|
104
|
+
description: str | None = None,
|
105
|
+
key: str | None = None,
|
106
|
+
revises: Artifact | None = None,
|
107
|
+
run: Run | None = None,
|
108
|
+
) -> Artifact:
|
101
109
|
"""Save the dataset as artifact.
|
102
110
|
|
103
111
|
Args:
|
104
|
-
description:
|
105
|
-
|
112
|
+
description: `str | None = None` A description of the DataFrame object.
|
113
|
+
key: `str | None = None` A path-like key to reference artifact in default storage, e.g., `"myfolder/myfile.fcs"`. Artifacts with the same key form a revision family.
|
114
|
+
revises: `Artifact | None = None` Previous version of the artifact. Triggers a revision.
|
115
|
+
run: `Run | None = None` The run that creates the artifact.
|
106
116
|
|
107
117
|
Returns:
|
108
118
|
A saved artifact record.
|
@@ -182,7 +192,9 @@ class DataFrameCurator(BaseCurator):
|
|
182
192
|
"""Return the columns fields to validate against."""
|
183
193
|
return self._fields
|
184
194
|
|
185
|
-
def lookup(
|
195
|
+
def lookup(
|
196
|
+
self, using_key: str | None = None, public: bool = False
|
197
|
+
) -> CurateLookup:
|
186
198
|
"""Lookup categories.
|
187
199
|
|
188
200
|
Args:
|
@@ -194,6 +206,7 @@ class DataFrameCurator(BaseCurator):
|
|
194
206
|
categoricals=self._fields,
|
195
207
|
slots={"columns": self._columns_field},
|
196
208
|
using_key=using_key or self._using_key,
|
209
|
+
public=public,
|
197
210
|
)
|
198
211
|
|
199
212
|
def _check_valid_keys(self, extra: set = None) -> None:
|
@@ -245,16 +258,6 @@ class DataFrameCurator(BaseCurator):
|
|
245
258
|
**kwargs,
|
246
259
|
)
|
247
260
|
|
248
|
-
def add_validated_from(self, key: str, organism: str | None = None):
|
249
|
-
"""Add validated categories.
|
250
|
-
|
251
|
-
Args:
|
252
|
-
key: The key referencing the slot in the DataFrame.
|
253
|
-
organism: The organism name.
|
254
|
-
"""
|
255
|
-
self._kwargs.update({"organism": organism} if organism else {})
|
256
|
-
self._update_registry(key, validated_only=True, **self._kwargs)
|
257
|
-
|
258
261
|
def add_new_from(self, key: str, organism: str | None = None, **kwargs):
|
259
262
|
"""Add validated & new categories.
|
260
263
|
|
@@ -300,7 +303,7 @@ class DataFrameCurator(BaseCurator):
|
|
300
303
|
def _update_registry_all(self, validated_only: bool = True, **kwargs):
|
301
304
|
"""Save labels for all features."""
|
302
305
|
for name in self.fields.keys():
|
303
|
-
logger.info(f"saving
|
306
|
+
logger.info(f"saving validated records of '{name}'")
|
304
307
|
self._update_registry(name, validated_only=validated_only, **kwargs)
|
305
308
|
|
306
309
|
def validate(self, organism: str | None = None) -> bool:
|
@@ -313,6 +316,10 @@ class DataFrameCurator(BaseCurator):
|
|
313
316
|
Whether the DataFrame is validated.
|
314
317
|
"""
|
315
318
|
self._kwargs.update({"organism": organism} if organism else {})
|
319
|
+
|
320
|
+
# add all validated records to the current instance
|
321
|
+
self._update_registry_all()
|
322
|
+
|
316
323
|
self._validated, self._non_validated = validate_categories_in_df( # type: ignore
|
317
324
|
self._df,
|
318
325
|
fields=self.fields,
|
@@ -323,12 +330,20 @@ class DataFrameCurator(BaseCurator):
|
|
323
330
|
)
|
324
331
|
return self._validated
|
325
332
|
|
326
|
-
def save_artifact(
|
333
|
+
def save_artifact(
|
334
|
+
self,
|
335
|
+
description: str | None = None,
|
336
|
+
key: str | None = None,
|
337
|
+
revises: Artifact | None = None,
|
338
|
+
run: Run | None = None,
|
339
|
+
) -> Artifact:
|
327
340
|
"""Save the validated DataFrame and metadata.
|
328
341
|
|
329
342
|
Args:
|
330
|
-
description: Description of the DataFrame object.
|
331
|
-
|
343
|
+
description: `str | None = None` Description of the DataFrame object.
|
344
|
+
key: `str | None = None` A path-like key to reference artifact in default storage, e.g., `"myfolder/myfile.fcs"`. Artifacts with the same key form a revision family.
|
345
|
+
revises: `Artifact | None = None` Previous version of the artifact. Triggers a revision.
|
346
|
+
run: `Run | None = None` The run that creates the artifact.
|
332
347
|
|
333
348
|
Returns:
|
334
349
|
A saved artifact record.
|
@@ -344,15 +359,18 @@ class DataFrameCurator(BaseCurator):
|
|
344
359
|
verbosity = settings.verbosity
|
345
360
|
try:
|
346
361
|
settings.verbosity = "warning"
|
347
|
-
|
348
|
-
|
362
|
+
if not self._validated:
|
363
|
+
# save all validated records to the current instance
|
364
|
+
self._update_registry_all()
|
349
365
|
|
350
366
|
self._artifact = save_artifact(
|
351
367
|
self._df,
|
352
368
|
description=description,
|
353
369
|
fields=self.fields,
|
354
370
|
columns_field=self._columns_field,
|
355
|
-
|
371
|
+
key=key,
|
372
|
+
revises=revises,
|
373
|
+
run=run,
|
356
374
|
**self._kwargs,
|
357
375
|
)
|
358
376
|
finally:
|
@@ -457,7 +475,9 @@ class AnnDataCurator(DataFrameCurator):
|
|
457
475
|
"""Return the obs fields to validate against."""
|
458
476
|
return self._obs_fields
|
459
477
|
|
460
|
-
def lookup(
|
478
|
+
def lookup(
|
479
|
+
self, using_key: str | None = None, public: bool = False
|
480
|
+
) -> CurateLookup:
|
461
481
|
"""Lookup categories.
|
462
482
|
|
463
483
|
Args:
|
@@ -469,6 +489,7 @@ class AnnDataCurator(DataFrameCurator):
|
|
469
489
|
categoricals=self._obs_fields,
|
470
490
|
slots={"columns": self._columns_field, "var_index": self._var_field},
|
471
491
|
using_key=using_key or self._using_key,
|
492
|
+
public=public,
|
472
493
|
)
|
473
494
|
|
474
495
|
def _save_from_var_index(
|
@@ -479,7 +500,7 @@ class AnnDataCurator(DataFrameCurator):
|
|
479
500
|
values=list(self._adata.var.index),
|
480
501
|
field=self.var_index,
|
481
502
|
key="var_index",
|
482
|
-
save_function="add_new_from_var_index",
|
503
|
+
save_function=".add_new_from_var_index()",
|
483
504
|
using_key=self._using_key,
|
484
505
|
validated_only=validated_only,
|
485
506
|
organism=organism,
|
@@ -487,14 +508,13 @@ class AnnDataCurator(DataFrameCurator):
|
|
487
508
|
exclude=self._exclude.get("var_index"),
|
488
509
|
)
|
489
510
|
|
490
|
-
def _update_registry_all(self
|
511
|
+
def _update_registry_all(self):
|
491
512
|
"""Save labels for all features."""
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
self._update_registry(name, validated_only=validated_only, **kwargs)
|
513
|
+
logger.info("saving validated records of 'var_index'")
|
514
|
+
self._save_from_var_index(validated_only=True, **self._kwargs)
|
515
|
+
for name in self._obs_fields.keys():
|
516
|
+
logger.info(f"saving validated terms of '{name}'")
|
517
|
+
self._update_registry(name, validated_only=True, **self._kwargs)
|
498
518
|
|
499
519
|
def add_new_from_var_index(self, organism: str | None = None, **kwargs):
|
500
520
|
"""Update variable records.
|
@@ -506,15 +526,6 @@ class AnnDataCurator(DataFrameCurator):
|
|
506
526
|
self._kwargs.update({"organism": organism} if organism else {})
|
507
527
|
self._save_from_var_index(validated_only=False, **self._kwargs, **kwargs)
|
508
528
|
|
509
|
-
def add_validated_from_var_index(self, organism: str | None = None):
|
510
|
-
"""Add validated variable records.
|
511
|
-
|
512
|
-
Args:
|
513
|
-
organism: The organism name.
|
514
|
-
"""
|
515
|
-
self._kwargs.update({"organism": organism} if organism else {})
|
516
|
-
self._save_from_var_index(validated_only=True, **self._kwargs)
|
517
|
-
|
518
529
|
def validate(self, organism: str | None = None) -> bool:
|
519
530
|
"""Validate categories.
|
520
531
|
|
@@ -530,6 +541,9 @@ class AnnDataCurator(DataFrameCurator):
|
|
530
541
|
f"validating metadata using registries of instance {colors.italic(self._using_key)}"
|
531
542
|
)
|
532
543
|
|
544
|
+
# add all validated records to the current instance
|
545
|
+
self._update_registry_all()
|
546
|
+
|
533
547
|
validated_var, non_validated_var = validate_categories(
|
534
548
|
self._adata.var.index,
|
535
549
|
field=self._var_field,
|
@@ -554,30 +568,49 @@ class AnnDataCurator(DataFrameCurator):
|
|
554
568
|
self._validated = validated_var and validated_obs
|
555
569
|
return self._validated
|
556
570
|
|
557
|
-
def save_artifact(
|
571
|
+
def save_artifact(
|
572
|
+
self,
|
573
|
+
description: str | None = None,
|
574
|
+
key: str | None = None,
|
575
|
+
revises: Artifact | None = None,
|
576
|
+
run: Run | None = None,
|
577
|
+
) -> Artifact:
|
558
578
|
"""Save the validated ``AnnData`` and metadata.
|
559
579
|
|
560
580
|
Args:
|
561
|
-
description:
|
562
|
-
|
581
|
+
description: `str | None = None` A description of the ``AnnData`` object.
|
582
|
+
key: `str | None = None` A path-like key to reference artifact in default storage, e.g., `"myfolder/myfile.fcs"`. Artifacts with the same key form a revision family.
|
583
|
+
revises: `Artifact | None = None` Previous version of the artifact. Triggers a revision.
|
584
|
+
run: `Run | None = None` The run that creates the artifact.
|
563
585
|
|
564
586
|
Returns:
|
565
587
|
A saved artifact record.
|
566
588
|
"""
|
589
|
+
from lamindb.core._settings import settings
|
590
|
+
|
567
591
|
if not self._validated:
|
568
592
|
self.validate()
|
569
593
|
if not self._validated:
|
570
594
|
raise ValidationError("Dataset does not validate. Please curate.")
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
595
|
+
verbosity = settings.verbosity
|
596
|
+
try:
|
597
|
+
settings.verbosity = "warning"
|
598
|
+
if not self._validated:
|
599
|
+
# save all validated records to the current instance
|
600
|
+
self._update_registry_all()
|
601
|
+
self._artifact = save_artifact(
|
602
|
+
self._data,
|
603
|
+
adata=self._adata,
|
604
|
+
description=description,
|
605
|
+
columns_field=self.var_index,
|
606
|
+
fields=self.categoricals,
|
607
|
+
key=key,
|
608
|
+
revises=revises,
|
609
|
+
run=run,
|
610
|
+
**self._kwargs,
|
611
|
+
)
|
612
|
+
finally:
|
613
|
+
settings.verbosity = verbosity
|
581
614
|
return self._artifact
|
582
615
|
|
583
616
|
|
@@ -656,10 +689,6 @@ class MuDataCurator:
|
|
656
689
|
)
|
657
690
|
for modality in self._modalities
|
658
691
|
}
|
659
|
-
for modality in self._var_fields.keys():
|
660
|
-
self._save_from_var_index_modality(
|
661
|
-
modality=modality, validated_only=True, **self._kwargs
|
662
|
-
)
|
663
692
|
|
664
693
|
@property
|
665
694
|
def var_index(self) -> FieldAttr:
|
@@ -685,7 +714,7 @@ class MuDataCurator:
|
|
685
714
|
values=list(self._mdata[modality].var.index),
|
686
715
|
field=self._var_fields[modality],
|
687
716
|
key="var_index",
|
688
|
-
save_function=
|
717
|
+
save_function=f'.add_new_from_var_index("{modality}")',
|
689
718
|
using_key=self._using_key,
|
690
719
|
validated_only=validated_only,
|
691
720
|
dtype="number",
|
@@ -712,7 +741,9 @@ class MuDataCurator:
|
|
712
741
|
obs_fields["obs"][k] = v
|
713
742
|
return obs_fields
|
714
743
|
|
715
|
-
def lookup(
|
744
|
+
def lookup(
|
745
|
+
self, using_key: str | None = None, public: bool = False
|
746
|
+
) -> CurateLookup:
|
716
747
|
"""Lookup categories.
|
717
748
|
|
718
749
|
Args:
|
@@ -727,6 +758,7 @@ class MuDataCurator:
|
|
727
758
|
**{f"{k}_var_index": v for k, v in self._var_fields.items()},
|
728
759
|
},
|
729
760
|
using_key=using_key or self._using_key,
|
761
|
+
public=public,
|
730
762
|
)
|
731
763
|
|
732
764
|
def add_new_from_columns(
|
@@ -774,33 +806,14 @@ class MuDataCurator:
|
|
774
806
|
modality=modality, validated_only=False, **self._kwargs, **kwargs
|
775
807
|
)
|
776
808
|
|
777
|
-
def
|
778
|
-
"""
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
self._save_from_var_index_modality(
|
786
|
-
modality=modality, validated_only=True, **self._kwargs
|
787
|
-
)
|
788
|
-
|
789
|
-
def add_validated_from(
|
790
|
-
self, key: str, modality: str | None = None, organism: str | None = None
|
791
|
-
):
|
792
|
-
"""Add validated categories.
|
793
|
-
|
794
|
-
Args:
|
795
|
-
key: The key referencing the slot in the DataFrame.
|
796
|
-
modality: The modality name.
|
797
|
-
organism: The organism name.
|
798
|
-
"""
|
799
|
-
self._kwargs.update({"organism": organism} if organism else {})
|
800
|
-
modality = modality or "obs"
|
801
|
-
if modality in self._df_annotators:
|
802
|
-
df_annotator = self._df_annotators[modality]
|
803
|
-
df_annotator.add_validated_from(key=key, **self._kwargs)
|
809
|
+
def _update_registry_all(self):
|
810
|
+
"""Update all registries."""
|
811
|
+
for modality in self._var_fields.keys():
|
812
|
+
self._save_from_var_index_modality(
|
813
|
+
modality=modality, validated_only=True, **self._kwargs
|
814
|
+
)
|
815
|
+
for _, df_annotator in self._df_annotators.items():
|
816
|
+
df_annotator._update_registry_all(validated_only=True, **self._kwargs)
|
804
817
|
|
805
818
|
def add_new_from(
|
806
819
|
self,
|
@@ -827,11 +840,22 @@ class MuDataCurator:
|
|
827
840
|
|
828
841
|
def validate(self, organism: str | None = None) -> bool:
|
829
842
|
"""Validate categories."""
|
843
|
+
from lamindb.core._settings import settings
|
844
|
+
|
830
845
|
self._kwargs.update({"organism": organism} if organism else {})
|
831
846
|
if self._using_key is not None and self._using_key != "default":
|
832
847
|
logger.important(
|
833
848
|
f"validating metadata using registries of instance {colors.italic(self._using_key)}"
|
834
849
|
)
|
850
|
+
|
851
|
+
# add all validated records to the current instance
|
852
|
+
verbosity = settings.verbosity
|
853
|
+
try:
|
854
|
+
settings.verbosity = "error"
|
855
|
+
self._update_registry_all()
|
856
|
+
finally:
|
857
|
+
settings.verbosity = verbosity
|
858
|
+
|
835
859
|
validated_var = True
|
836
860
|
non_validated_var_modality = {}
|
837
861
|
for modality, var_field in self._var_fields.items():
|
@@ -842,6 +866,7 @@ class MuDataCurator:
|
|
842
866
|
using_key=self._using_key,
|
843
867
|
source=self._sources.get(modality, {}).get("var_index"),
|
844
868
|
exclude=self._exclude.get(modality, {}).get("var_index"),
|
869
|
+
validated_hint_print=f'.add_validated_from_var_index("{modality}")',
|
845
870
|
**self._kwargs, # type: ignore
|
846
871
|
)
|
847
872
|
validated_var &= is_validated_var
|
@@ -874,56 +899,75 @@ class MuDataCurator:
|
|
874
899
|
self._validated = validated_var and validated_obs
|
875
900
|
return self._validated
|
876
901
|
|
877
|
-
def save_artifact(
|
902
|
+
def save_artifact(
|
903
|
+
self,
|
904
|
+
description: str | None = None,
|
905
|
+
key: str | None = None,
|
906
|
+
revises: Artifact | None = None,
|
907
|
+
run: Run | None = None,
|
908
|
+
) -> Artifact:
|
878
909
|
"""Save the validated ``MuData`` and metadata.
|
879
910
|
|
880
911
|
Args:
|
881
|
-
description:
|
882
|
-
|
912
|
+
description: `str | None = None` A description of the ``MuData`` object.
|
913
|
+
key: `str | None = None` A path-like key to reference artifact in default storage, e.g., `"myfolder/myfile.fcs"`. Artifacts with the same key form a revision family.
|
914
|
+
revises: `Artifact | None = None` Previous version of the artifact. Triggers a revision.
|
915
|
+
run: `Run | None = None` The run that creates the artifact.
|
883
916
|
|
884
917
|
Returns:
|
885
918
|
A saved artifact record.
|
886
919
|
"""
|
920
|
+
from lamindb.core._settings import settings
|
921
|
+
|
887
922
|
if not self._validated:
|
888
|
-
|
923
|
+
self.validate()
|
924
|
+
if not self._validated:
|
925
|
+
raise ValidationError("Dataset does not validate. Please curate.")
|
926
|
+
verbosity = settings.verbosity
|
927
|
+
try:
|
928
|
+
settings.verbosity = "warning"
|
929
|
+
if not self._validated:
|
930
|
+
# save all validated records to the current instance
|
931
|
+
self._update_registry_all()
|
889
932
|
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
933
|
+
self._artifact = save_artifact(
|
934
|
+
self._mdata,
|
935
|
+
description=description,
|
936
|
+
columns_field=self.var_index,
|
937
|
+
fields=self.categoricals,
|
938
|
+
key=key,
|
939
|
+
revises=revises,
|
940
|
+
run=run,
|
941
|
+
**self._kwargs,
|
942
|
+
)
|
943
|
+
finally:
|
944
|
+
settings.verbosity = verbosity
|
898
945
|
return self._artifact
|
899
946
|
|
900
947
|
|
901
948
|
class Curator(BaseCurator):
|
902
949
|
"""Dataset curator.
|
903
950
|
|
904
|
-
|
905
|
-
to facilitate data integration, interpretation and analysis.
|
906
|
-
|
907
|
-
The curation flow has several steps:
|
908
|
-
|
909
|
-
1. Instantiate `Curator` from one of the following dataset objects:
|
951
|
+
A `Curator` object makes it easy to save validated & annotated artifacts.
|
910
952
|
|
911
|
-
|
912
|
-
- :meth:`~lamindb.Curator.from_anndata`
|
913
|
-
- :meth:`~lamindb.Curator.from_mudata`
|
953
|
+
Example:
|
914
954
|
|
915
|
-
|
955
|
+
>>> curator = ln.Curator.from_df(
|
956
|
+
>>> df,
|
957
|
+
>>> # define validation criteria as mappings
|
958
|
+
>>> columns=ln.Feature.name, # map column names
|
959
|
+
>>> categoricals={"perturbation": ln.ULabel.name}, # map categories
|
960
|
+
>>> )
|
961
|
+
>>> curator.validate() # validate the data in df
|
962
|
+
>>> artifact = curate.save_artifact(description="my RNA-seq")
|
963
|
+
>>> artifact.describe() # see annotations
|
916
964
|
|
917
|
-
|
965
|
+
`curator.validate()` maps values within `df` according to the mapping criteria and logs validated & problematic values.
|
918
966
|
|
919
|
-
|
920
|
-
- Values which are new and not yet validated or potentially problematic values.
|
967
|
+
If you find non-validated values, you have several options:
|
921
968
|
|
922
|
-
|
923
|
-
|
924
|
-
- Validated values not yet in the registry can be automatically registered using :meth:`~lamindb.core.DataFrameCurator.add_validated_from`.
|
925
|
-
- Valid and new values can be registered using :meth:`~lamindb.core.DataFrameCurator.add_new_from`.
|
926
|
-
- All unvalidated values can be accessed using :meth:`~lamindb.core.DataFrameCurator.non_validated` and subsequently removed from the object at hand.
|
969
|
+
- new values found in the data can be registered using :meth:`~lamindb.core.DataFrameCurator.add_new_from`
|
970
|
+
- non-validated values can be accessed using :meth:`~lamindb.core.DataFrameCurator.non_validated` and addressed manually
|
927
971
|
"""
|
928
972
|
|
929
973
|
@classmethod
|
@@ -1174,7 +1218,7 @@ def validate_categories(
|
|
1174
1218
|
f"{colors.yellow(validated_hint_print)}"
|
1175
1219
|
)
|
1176
1220
|
|
1177
|
-
non_validated_hint_print =
|
1221
|
+
non_validated_hint_print = validated_hint_print.replace("_validated_", "_new_")
|
1178
1222
|
non_validated = [i for i in non_validated if i not in values_validated]
|
1179
1223
|
n_non_validated = len(non_validated)
|
1180
1224
|
if n_non_validated == 0:
|
@@ -1239,7 +1283,9 @@ def save_artifact(
|
|
1239
1283
|
description: str | None = None,
|
1240
1284
|
organism: str | None = None,
|
1241
1285
|
adata: ad.AnnData | None = None,
|
1242
|
-
|
1286
|
+
key: str | None = None,
|
1287
|
+
revises: Artifact | None = None,
|
1288
|
+
run: Run | None = None,
|
1243
1289
|
) -> Artifact:
|
1244
1290
|
"""Save all metadata with an Artifact.
|
1245
1291
|
|
@@ -1249,29 +1295,43 @@ def save_artifact(
|
|
1249
1295
|
fields: A dictionary mapping obs_column to registry_field.
|
1250
1296
|
columns_field: The registry field to validate variables index against.
|
1251
1297
|
organism: The organism name.
|
1252
|
-
adata: The AnnData object to save, must be provided if data is a path.
|
1253
|
-
|
1298
|
+
adata: The AnnData object to save and get n_observations, must be provided if data is a path.
|
1299
|
+
type: `Literal["dataset", "model"] | None = None` The artifact type.
|
1300
|
+
key: `str | None = None` A path-like key to reference artifact in default storage, e.g., `"myfolder/myfile.fcs"`. Artifacts with the same key form a revision family.
|
1301
|
+
revises: `Artifact | None = None` Previous version of the artifact. Triggers a revision.
|
1302
|
+
run: `Run | None = None` The run that creates the artifact.
|
1254
1303
|
|
1255
1304
|
Returns:
|
1256
1305
|
The saved Artifact.
|
1257
1306
|
"""
|
1258
1307
|
from ._artifact import data_is_anndata
|
1308
|
+
from .core._data import add_labels
|
1259
1309
|
|
1260
1310
|
artifact = None
|
1261
1311
|
if data_is_anndata(data):
|
1262
1312
|
assert adata is not None # noqa: S101
|
1263
|
-
artifact = Artifact.from_anndata(
|
1313
|
+
artifact = Artifact.from_anndata(
|
1314
|
+
data, description=description, key=key, revises=revises, run=run
|
1315
|
+
)
|
1264
1316
|
artifact.n_observations = adata.shape[0]
|
1265
1317
|
data = adata
|
1266
1318
|
|
1267
1319
|
elif isinstance(data, pd.DataFrame):
|
1268
|
-
artifact = Artifact.from_df(
|
1320
|
+
artifact = Artifact.from_df(
|
1321
|
+
data, description=description, key=key, revises=revises, run=run
|
1322
|
+
)
|
1269
1323
|
else:
|
1270
1324
|
try:
|
1271
1325
|
from mudata import MuData
|
1272
1326
|
|
1273
1327
|
if isinstance(data, MuData):
|
1274
|
-
artifact = Artifact.from_mudata(
|
1328
|
+
artifact = Artifact.from_mudata(
|
1329
|
+
data,
|
1330
|
+
description=description,
|
1331
|
+
key=key,
|
1332
|
+
revises=revises,
|
1333
|
+
run=run,
|
1334
|
+
)
|
1275
1335
|
artifact.n_observations = data.n_obs
|
1276
1336
|
except ImportError:
|
1277
1337
|
pass
|
@@ -1301,7 +1361,12 @@ def save_artifact(
|
|
1301
1361
|
else:
|
1302
1362
|
raise NotImplementedError
|
1303
1363
|
|
1304
|
-
def _add_labels(
|
1364
|
+
def _add_labels(
|
1365
|
+
data,
|
1366
|
+
artifact: Artifact,
|
1367
|
+
fields: dict[str, FieldAttr],
|
1368
|
+
feature_ref_is_name: bool | None = None,
|
1369
|
+
):
|
1305
1370
|
features = Feature.lookup().dict()
|
1306
1371
|
for key, field in fields.items():
|
1307
1372
|
feature = features.get(key)
|
@@ -1314,16 +1379,47 @@ def save_artifact(
|
|
1314
1379
|
field=field,
|
1315
1380
|
**filter_kwargs_current,
|
1316
1381
|
)
|
1317
|
-
|
1382
|
+
if len(labels) == 0:
|
1383
|
+
continue
|
1384
|
+
if hasattr(registry, "_name_field"):
|
1385
|
+
label_ref_is_name = field.field.name == registry._name_field
|
1386
|
+
add_labels(
|
1387
|
+
artifact,
|
1388
|
+
records=labels,
|
1389
|
+
feature=feature,
|
1390
|
+
feature_ref_is_name=feature_ref_is_name,
|
1391
|
+
label_ref_is_name=label_ref_is_name,
|
1392
|
+
)
|
1318
1393
|
|
1319
1394
|
if artifact._accessor == "MuData":
|
1320
1395
|
for modality, modality_fields in fields.items():
|
1396
|
+
column_field_modality = columns_field.get(modality)
|
1321
1397
|
if modality == "obs":
|
1322
|
-
_add_labels(
|
1398
|
+
_add_labels(
|
1399
|
+
data,
|
1400
|
+
artifact,
|
1401
|
+
modality_fields,
|
1402
|
+
feature_ref_is_name=(
|
1403
|
+
None
|
1404
|
+
if column_field_modality is None
|
1405
|
+
else _ref_is_name(column_field_modality)
|
1406
|
+
),
|
1407
|
+
)
|
1323
1408
|
else:
|
1324
|
-
_add_labels(
|
1409
|
+
_add_labels(
|
1410
|
+
data[modality],
|
1411
|
+
artifact,
|
1412
|
+
modality_fields,
|
1413
|
+
feature_ref_is_name=(
|
1414
|
+
None
|
1415
|
+
if column_field_modality is None
|
1416
|
+
else _ref_is_name(column_field_modality)
|
1417
|
+
),
|
1418
|
+
)
|
1325
1419
|
else:
|
1326
|
-
_add_labels(
|
1420
|
+
_add_labels(
|
1421
|
+
data, artifact, fields, feature_ref_is_name=_ref_is_name(columns_field)
|
1422
|
+
)
|
1327
1423
|
|
1328
1424
|
slug = ln_setup.settings.instance.slug
|
1329
1425
|
if ln_setup.settings.instance.is_remote: # pragma: no cover
|
@@ -1438,7 +1534,8 @@ def update_registry(
|
|
1438
1534
|
if not validated_only:
|
1439
1535
|
non_validated_records = []
|
1440
1536
|
if df is not None and registry == Feature:
|
1441
|
-
|
1537
|
+
nonval_columns = Feature.inspect(df.columns, mute=True).non_validated
|
1538
|
+
non_validated_records = Feature.from_df(df.loc[:, nonval_columns])
|
1442
1539
|
else:
|
1443
1540
|
if "organism" in filter_kwargs:
|
1444
1541
|
# make sure organism record is saved to the current instance
|
@@ -1600,4 +1697,12 @@ def _save_organism(name: str): # pragma: no cover
|
|
1600
1697
|
return organism
|
1601
1698
|
|
1602
1699
|
|
1700
|
+
def _ref_is_name(field: FieldAttr) -> bool | None:
|
1701
|
+
"""Check if the reference field is a name field."""
|
1702
|
+
from ._can_validate import get_name_field
|
1703
|
+
|
1704
|
+
name_field = get_name_field(field.field.model)
|
1705
|
+
return field.field.name == name_field
|
1706
|
+
|
1707
|
+
|
1603
1708
|
Curate = Curator # backward compat
|