lamindb 0.69.2__py3-none-any.whl → 0.69.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/_annotate.py ADDED
@@ -0,0 +1,790 @@
1
+ from typing import Dict, Iterable, List, Optional, Tuple, Union
2
+
3
+ import anndata as ad
4
+ import lamindb_setup as ln_setup
5
+ import pandas as pd
6
+ from lamin_utils import colors, logger
7
+ from lnschema_core import Artifact, Collection, Feature, Registry, Run, ULabel
8
+ from lnschema_core.types import FieldAttr
9
+
10
+
11
+ class ValidationError(ValueError):
12
+ """Validation error."""
13
+
14
+ pass
15
+
16
+
17
+ class AnnotateLookup:
18
+ """Lookup features and labels from the reference instance."""
19
+
20
+ def __init__(
21
+ self, fields: Dict[str, FieldAttr], using: Optional[str] = None
22
+ ) -> None:
23
+ self._fields = fields
24
+ self._using = None if using == "default" else using
25
+ self._using_name = using or ln_setup.settings.instance.slug
26
+ logger.debug(f"Lookup objects from the {colors.italic(self._using_name)}")
27
+
28
+ def __getitem__(self, name):
29
+ if name in self._fields:
30
+ registry = self._fields[name].field.model
31
+ if self._using == "public":
32
+ return registry.public().lookup()
33
+ else:
34
+ return get_registry_instance(registry, self._using).lookup()
35
+ raise AttributeError(
36
+ f"'{self.__class__.__name__}' object has no attribute '{name}'"
37
+ )
38
+
39
+ def __repr__(self) -> str:
40
+ if len(self._fields) > 0:
41
+ fields = "\n ".join([str([key]) for key in self._fields.keys()])
42
+ return (
43
+ f"Lookup objects from the {colors.italic(self._using_name)}:\n {colors.green(fields)}\n\n"
44
+ "Example:\n → categories = validator.lookup().['cell_type']\n"
45
+ " → categories.alveolar_type_1_fibroblast_cell"
46
+ )
47
+ else:
48
+ return colors.warning("No fields are found!")
49
+
50
+
51
+ class DataFrameAnnotator:
52
+ """Annotation flow for a DataFrame object.
53
+
54
+ Args:
55
+ df: The DataFrame object to annotate.
56
+ fields: A dictionary mapping column to registry_field.
57
+ For example:
58
+ {"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}
59
+ feature_field: The field attribute for the feature column.
60
+ using: The reference instance containing registries to validate against.
61
+ verbosity: The verbosity level.
62
+ """
63
+
64
+ def __init__(
65
+ self,
66
+ df: pd.DataFrame,
67
+ fields: Optional[Dict[str, FieldAttr]] = None,
68
+ feature_field: FieldAttr = Feature.name,
69
+ using: Optional[str] = None,
70
+ verbosity: str = "hint",
71
+ **kwargs,
72
+ ) -> None:
73
+ from lamindb.core._settings import settings
74
+
75
+ self._df = df
76
+ self._fields = fields or {}
77
+ self._feature_field = feature_field
78
+ self._using = using
79
+ settings.verbosity = verbosity
80
+ self._artifact = None
81
+ self._collection = None
82
+ self._validated = False
83
+ self._kwargs: Dict = kwargs
84
+ self.save_features()
85
+
86
+ @property
87
+ def fields(self) -> Dict:
88
+ """Return the columns fields to validate against."""
89
+ return self._fields
90
+
91
+ def lookup(self, using: Optional[str] = None) -> AnnotateLookup:
92
+ """Lookup features and labels.
93
+
94
+ Args:
95
+ using: The instance where the lookup is performed.
96
+ if None (default), the lookup is performed on the instance specified in "using" parameter of the validator.
97
+ if "public", the lookup is performed on the public reference.
98
+ """
99
+ fields = {**{"feature": self._feature_field}, **self.fields}
100
+ return AnnotateLookup(fields=fields, using=using or self._using)
101
+
102
+ def save_features(self, validated_only: bool = True) -> None:
103
+ """Register features records."""
104
+ missing_columns = set(self.fields.keys()) - set(self._df.columns)
105
+ if missing_columns:
106
+ raise ValueError(
107
+ f"Columns {missing_columns} are not found in the data object!"
108
+ )
109
+
110
+ # Always register features specified as the fields keys
111
+ update_registry(
112
+ values=list(self.fields.keys()),
113
+ field=self._feature_field,
114
+ feature_name="feature",
115
+ using=self._using,
116
+ validated_only=False,
117
+ kwargs=self._kwargs,
118
+ )
119
+
120
+ # Register the rest of the columns based on validated_only
121
+ additional_columns = set(self._df.columns) - set(self.fields.keys())
122
+ if additional_columns:
123
+ update_registry(
124
+ values=list(additional_columns),
125
+ field=self._feature_field,
126
+ feature_name="feature",
127
+ using=self._using,
128
+ validated_only=validated_only,
129
+ df=self._df, # Get the Feature type from df
130
+ kwargs=self._kwargs,
131
+ )
132
+
133
+ def update_registry(self, feature: str, validated_only: bool = True, **kwargs):
134
+ """Register labels for a feature.
135
+
136
+ Args:
137
+ feature: The name of the feature to register.
138
+ validated_only: Whether to register only validated labels.
139
+ **kwargs: Additional keyword arguments.
140
+ """
141
+ if feature == "all":
142
+ self._update_registry_all(validated_only=validated_only, **kwargs)
143
+ elif feature == "feature":
144
+ self.save_features(validated_only=validated_only)
145
+ else:
146
+ if feature not in self.fields:
147
+ raise ValueError(f"Feature {feature} is not part of the fields!")
148
+ update_registry(
149
+ values=self._df[feature].unique().tolist(),
150
+ field=self.fields[feature],
151
+ feature_name=feature,
152
+ using=self._using,
153
+ validated_only=validated_only,
154
+ kwargs=kwargs,
155
+ )
156
+
157
+ def _update_registry_all(self, validated_only: bool = True, **kwargs):
158
+ """Register labels for all features."""
159
+ for name in self.fields.keys():
160
+ logger.info(f"registering labels for '{name}'")
161
+ self.update_registry(feature=name, validated_only=validated_only, **kwargs)
162
+
163
+ def validate(self, **kwargs) -> bool:
164
+ """Validate variables and categorical observations.
165
+
166
+ Returns:
167
+ Whether the DataFrame is validated.
168
+ """
169
+ self._kwargs.update(kwargs)
170
+ self._validated = validate_categories_in_df(
171
+ self._df,
172
+ fields=self.fields,
173
+ using=self._using,
174
+ **self._kwargs,
175
+ )
176
+ return self._validated
177
+
178
+ def save_artifact(self, description: str, **kwargs) -> Artifact:
179
+ """Register the validated DataFrame and metadata.
180
+
181
+ Args:
182
+ description: Description of the DataFrame object.
183
+ **kwargs: Object level metadata.
184
+
185
+ Returns:
186
+ A registered artifact record.
187
+ """
188
+ from lamindb.core._settings import settings
189
+
190
+ self._kwargs.update(kwargs)
191
+ if not self._validated:
192
+ raise ValidationError(
193
+ f"Data object is not validated, please run {colors.yellow('validate()')}!"
194
+ )
195
+
196
+ # Make sure all labels are registered in the current instance
197
+ verbosity = settings.verbosity
198
+ try:
199
+ settings.verbosity = "warning"
200
+ self.update_registry("all")
201
+
202
+ self._artifact = save_artifact(
203
+ self._df,
204
+ description=description,
205
+ fields=self.fields,
206
+ feature_field=self._feature_field,
207
+ **self._kwargs,
208
+ )
209
+ finally:
210
+ settings.verbosity = verbosity
211
+
212
+ return self._artifact
213
+
214
+ def save_collection(
215
+ self,
216
+ artifact: Union[Artifact, Iterable[Artifact]],
217
+ name: str,
218
+ description: Optional[str] = None,
219
+ reference: Optional[str] = None,
220
+ reference_type: Optional[str] = None,
221
+ ) -> Collection:
222
+ """Register a collection from artifact/artifacts.
223
+
224
+ Args:
225
+ artifact: One or several registered Artifacts.
226
+ name: Title of the publication.
227
+ description: Description of the publication.
228
+ reference: Accession number (e.g. GSE#, E-MTAB#, etc.).
229
+ reference_type: Source type (e.g. GEO, ArrayExpress, SRA, etc.).
230
+ """
231
+ collection = Collection(
232
+ artifact,
233
+ name=name,
234
+ description=description,
235
+ reference=reference,
236
+ reference_type=reference_type,
237
+ )
238
+ slug = ln_setup.settings.instance.slug
239
+ if collection._state.adding:
240
+ collection.save()
241
+ logger.success(f"registered collection in {colors.italic(slug)}")
242
+ else:
243
+ collection.save()
244
+ logger.warning(f"collection already exists in {colors.italic(slug)}!")
245
+ if ln_setup.settings.instance.is_remote:
246
+ logger.print(f"🔗 https://lamin.ai/{slug}/collection/{collection.uid}")
247
+ self._collection = collection
248
+ return collection
249
+
250
+ def clean_up_failed_runs(self):
251
+ """Clean up previous failed runs that don't register any outputs."""
252
+ from lamindb.core._run_context import run_context
253
+
254
+ if run_context.transform is not None:
255
+ Run.filter(transform=run_context.transform, output_artifacts=None).exclude(
256
+ uid=run_context.run.uid
257
+ ).delete()
258
+
259
+
260
+ class AnnDataAnnotator(DataFrameAnnotator):
261
+ """Annotation flow for an AnnData object.
262
+
263
+ Args:
264
+ adata: The AnnData object to annotate.
265
+ var_field: The registry field to validate variables index against.
266
+ obs_fields: A dictionary mapping obs_column to registry_field.
267
+ For example:
268
+ {"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}
269
+ using: The reference instance containing registries to validate against.
270
+ """
271
+
272
+ def __init__(
273
+ self,
274
+ adata: ad.AnnData,
275
+ var_field: FieldAttr,
276
+ obs_fields: Dict[str, FieldAttr],
277
+ using: str = "default",
278
+ verbosity: str = "hint",
279
+ **kwargs,
280
+ ) -> None:
281
+ self._adata = adata
282
+ self._var_field = var_field
283
+ super().__init__(
284
+ df=self._adata.obs,
285
+ fields=obs_fields,
286
+ using=using,
287
+ verbosity=verbosity,
288
+ **kwargs,
289
+ )
290
+ self._obs_fields = obs_fields
291
+ self._save_variables()
292
+
293
+ @property
294
+ def var_field(self) -> FieldAttr:
295
+ """Return the registry field to validate variables index against."""
296
+ return self._var_field
297
+
298
+ @property
299
+ def obs_fields(self) -> Dict:
300
+ """Return the obs fields to validate against."""
301
+ return self._obs_fields
302
+
303
+ def lookup(self, using: Optional[str] = None) -> AnnotateLookup:
304
+ """Lookup features and labels."""
305
+ fields = {
306
+ **{"feature": Feature.name, "variables": self.var_field},
307
+ **self.obs_fields,
308
+ }
309
+ return AnnotateLookup(fields=fields, using=using or self._using)
310
+
311
+ def _save_variables(self, validated_only: bool = True, **kwargs):
312
+ """Register variable records."""
313
+ self._kwargs.update(kwargs)
314
+ update_registry(
315
+ values=self._adata.var_names,
316
+ field=self.var_field,
317
+ feature_name="variables",
318
+ using=self._using,
319
+ validated_only=validated_only,
320
+ kwargs=self._kwargs,
321
+ )
322
+
323
+ def validate(self, **kwargs) -> bool:
324
+ """Validate variables and categorical observations."""
325
+ self._kwargs.update(kwargs)
326
+ self._validated = validate_anndata(
327
+ self._adata,
328
+ var_field=self.var_field,
329
+ obs_fields=self.obs_fields,
330
+ **self._kwargs,
331
+ )
332
+ return self._validated
333
+
334
+ def update_registry(self, feature: str, validated_only: bool = True, **kwargs):
335
+ """Register labels for a feature."""
336
+ if feature == "variables":
337
+ self._save_variables(validated_only=validated_only, **kwargs)
338
+ else:
339
+ super().update_registry(feature, validated_only, **kwargs)
340
+
341
+ def save_artifact(self, description: str, **kwargs) -> Artifact:
342
+ """Register the validated AnnData and metadata.
343
+
344
+ Args:
345
+ description: Description of the AnnData object.
346
+ **kwargs: Object level metadata.
347
+
348
+ Returns:
349
+ A registered artifact record.
350
+ """
351
+ self._kwargs.update(kwargs)
352
+ if not self._validated:
353
+ raise ValidationError("Please run `validate()` first!")
354
+
355
+ self._artifact = save_artifact(
356
+ self._adata,
357
+ description=description,
358
+ feature_field=self.var_field,
359
+ fields=self.obs_fields,
360
+ **self._kwargs,
361
+ )
362
+ return self._artifact
363
+
364
+
365
+ class Annotate:
366
+ """Annotation flow."""
367
+
368
+ @classmethod
369
+ def from_df(
370
+ cls,
371
+ df: pd.DataFrame,
372
+ fields: Optional[Dict[str, FieldAttr]] = None,
373
+ feature_field: FieldAttr = Feature.name,
374
+ using: Optional[str] = None,
375
+ verbosity: str = "hint",
376
+ **kwargs,
377
+ ) -> DataFrameAnnotator:
378
+ return DataFrameAnnotator(
379
+ df=df,
380
+ fields=fields,
381
+ feature_field=feature_field,
382
+ using=using,
383
+ verbosity=verbosity,
384
+ **kwargs,
385
+ )
386
+
387
+ @classmethod
388
+ def from_anndata(
389
+ cls,
390
+ adata: ad.AnnData,
391
+ var_field: FieldAttr,
392
+ obs_fields: Dict[str, FieldAttr],
393
+ using: str = "default",
394
+ verbosity: str = "hint",
395
+ **kwargs,
396
+ ) -> AnnDataAnnotator:
397
+ return AnnDataAnnotator(
398
+ adata=adata,
399
+ var_field=var_field,
400
+ obs_fields=obs_fields,
401
+ using=using,
402
+ verbosity=verbosity,
403
+ **kwargs,
404
+ )
405
+
406
+
407
+ def get_registry_instance(registry: Registry, using: Optional[str] = None) -> Registry:
408
+ """Get a registry instance using a specific instance."""
409
+ if using is not None and using != "default":
410
+ return registry.using(using)
411
+ return registry
412
+
413
+
414
+ def standardize_and_inspect(
415
+ values: Iterable[str], field: FieldAttr, registry: Registry, **kwargs
416
+ ):
417
+ """Standardize and inspect values using a registry."""
418
+ if hasattr(registry, "standardize"):
419
+ values = registry.standardize(values, field=field, mute=True, **kwargs)
420
+ return registry.inspect(values, field=field, mute=True, **kwargs)
421
+
422
+
423
+ def check_registry_organism(
424
+ registry: Registry, organism: Optional[str] = None
425
+ ) -> Optional[str]:
426
+ """Check if a registry needs an organism and return the organism name."""
427
+ if hasattr(registry, "organism_id"):
428
+ import bionty as bt
429
+
430
+ if organism is None and bt.settings.organism is None:
431
+ raise ValueError(
432
+ f"{registry.__name__} registry requires an organism!\n"
433
+ " → please pass an organism name via organism="
434
+ )
435
+ return organism or bt.settings.organism.name
436
+ return None
437
+
438
+
439
+ def validate_categories(
440
+ values: Iterable[str],
441
+ field: FieldAttr,
442
+ feature_name: str,
443
+ using: Optional[str] = None,
444
+ **kwargs,
445
+ ) -> bool:
446
+ """Validate ontology terms in a pandas series using LaminDB registries."""
447
+ from lamindb._from_values import _print_values
448
+
449
+ model_field = f"{field.field.model.__name__}.{field.field.name}"
450
+ logger.indent = ""
451
+ logger.info(
452
+ f"inspecting '{colors.bold(feature_name)}' by {colors.italic(model_field)}"
453
+ )
454
+ logger.indent = " "
455
+
456
+ registry = field.field.model
457
+ filter_kwargs = {}
458
+ organism = check_registry_organism(registry, kwargs.get("organism"))
459
+ if organism is not None:
460
+ filter_kwargs["organism"] = organism
461
+
462
+ # Inspect the default instance
463
+ inspect_result = standardize_and_inspect(
464
+ values=values, field=field, registry=registry, **filter_kwargs
465
+ )
466
+ non_validated = inspect_result.non_validated
467
+
468
+ if using is not None and using != "default" and non_validated:
469
+ registry = get_registry_instance(registry, using)
470
+ # Inspect the using instance
471
+ inspect_result = standardize_and_inspect(
472
+ values=non_validated, field=field, registry=registry, **filter_kwargs
473
+ )
474
+ non_validated = inspect_result.non_validated
475
+
476
+ n_non_validated = len(non_validated)
477
+ if n_non_validated == 0:
478
+ logger.success(f"all {feature_name}s are validated")
479
+ return True
480
+ else:
481
+ are = "are" if n_non_validated > 1 else "is"
482
+ print_values = _print_values(non_validated)
483
+ feature_name_print = f".update_registry('{feature_name}')"
484
+ warning_message = (
485
+ f"{colors.yellow(f'{n_non_validated} terms')} {are} not validated: "
486
+ f"{colors.yellow(print_values)}\n → register terms via "
487
+ f"{colors.yellow(feature_name_print)}"
488
+ )
489
+ logger.warning(warning_message)
490
+ logger.indent = ""
491
+ return False
492
+
493
+
494
+ def validate_categories_in_df(
495
+ df: pd.DataFrame,
496
+ fields: Dict[str, FieldAttr],
497
+ using: Optional[str] = None,
498
+ **kwargs,
499
+ ) -> bool:
500
+ """Validate categories in DataFrame columns using LaminDB registries."""
501
+ validated = True
502
+ for feature_name, field in fields.items():
503
+ validated &= validate_categories(
504
+ df[feature_name],
505
+ field=field,
506
+ feature_name=feature_name,
507
+ using=using,
508
+ **kwargs,
509
+ )
510
+ return validated
511
+
512
+
513
+ def validate_anndata(
514
+ adata: ad.AnnData,
515
+ var_field: FieldAttr,
516
+ obs_fields: Dict[str, FieldAttr],
517
+ using: Optional[str] = None,
518
+ **kwargs,
519
+ ) -> bool:
520
+ """Inspect metadata in an AnnData object using LaminDB registries."""
521
+ if using is not None and using != "default":
522
+ logger.important(
523
+ f"validating metadata using registries of instance {colors.italic(using)}"
524
+ )
525
+
526
+ validated_var = validate_categories(
527
+ adata.var.index,
528
+ field=var_field,
529
+ feature_name="variables",
530
+ using=using,
531
+ **kwargs,
532
+ )
533
+ validated_obs = validate_categories_in_df(
534
+ adata.obs, fields=obs_fields, using=using, **kwargs
535
+ )
536
+ return validated_var and validated_obs
537
+
538
+
539
+ def save_artifact(
540
+ data: Union[pd.DataFrame, ad.AnnData],
541
+ description: str,
542
+ fields: Dict[str, FieldAttr],
543
+ feature_field: FieldAttr,
544
+ **kwargs,
545
+ ) -> Artifact:
546
+ """Register all metadata with an Artifact.
547
+
548
+ Args:
549
+ data: The DataFrame or AnnData object to register.
550
+ description: A description of the artifact.
551
+ fields: A dictionary mapping obs_column to registry_field.
552
+ feature_field: The registry field to validate variables index against.
553
+ kwargs: Additional keyword arguments to pass to the registry model.
554
+
555
+ Returns:
556
+ The registered Artifact.
557
+ """
558
+ if isinstance(data, ad.AnnData):
559
+ artifact = Artifact.from_anndata(data, description=description)
560
+ artifact.n_observations = data.n_obs
561
+ elif isinstance(data, pd.DataFrame):
562
+ artifact = Artifact.from_df(data, description=description)
563
+ else:
564
+ raise ValueError("data must be a DataFrame or AnnData object")
565
+ artifact.save()
566
+
567
+ feature_kwargs: Dict = {}
568
+ organism = check_registry_organism(
569
+ feature_field.field.model, kwargs.pop("organism", None)
570
+ )
571
+ if organism is not None:
572
+ feature_kwargs["organism"] = organism
573
+
574
+ if isinstance(data, ad.AnnData):
575
+ artifact.features.add_from_anndata(var_field=feature_field, **feature_kwargs)
576
+ else:
577
+ artifact.features.add_from_df(field=feature_field, **feature_kwargs)
578
+
579
+ features = Feature.lookup().dict()
580
+ for feature_name, field in fields.items():
581
+ feature = features.get(feature_name)
582
+ registry = field.field.model
583
+ filter_kwargs = kwargs.copy()
584
+ organism = check_registry_organism(registry, organism)
585
+ if organism is not None:
586
+ filter_kwargs["organism"] = organism
587
+ df = data.obs if isinstance(data, ad.AnnData) else data
588
+ labels = registry.from_values(df[feature_name], field=field, **filter_kwargs)
589
+ artifact.labels.add(labels, feature)
590
+
591
+ slug = ln_setup.settings.instance.slug
592
+ logger.success(f"registered artifact in {colors.italic(slug)}")
593
+ if ln_setup.settings.instance.is_remote:
594
+ logger.info(f"🔗 https://lamin.ai/{slug}/artifact/{artifact.uid}")
595
+
596
+ return artifact
597
+
598
+
599
+ def update_registry(
600
+ values: List[str],
601
+ field: FieldAttr,
602
+ feature_name: str,
603
+ using: Optional[str] = None,
604
+ validated_only: bool = True,
605
+ kwargs: Optional[Dict] = None,
606
+ df: Optional[pd.DataFrame] = None,
607
+ ) -> None:
608
+ """Register features or labels records in the default instance from the using instance.
609
+
610
+ Args:
611
+ values: A list of values to be registered as labels.
612
+ field: The FieldAttr object representing the field for which labels are being registered.
613
+ feature_name: The name of the feature to register.
614
+ using: The name of the instance from which to transfer labels (if applicable).
615
+ validated_only: If True, only register validated labels.
616
+ kwargs: Additional keyword arguments to pass to the registry model.
617
+ df: A DataFrame to register labels from.
618
+ """
619
+ from lamindb._save import save as ln_save
620
+ from lamindb.core._settings import settings
621
+
622
+ filter_kwargs = {} if kwargs is None else kwargs.copy()
623
+ registry = field.field.model
624
+ if registry == ULabel:
625
+ validated_only = False
626
+
627
+ organism = check_registry_organism(registry, filter_kwargs.pop("organism", None))
628
+ if organism is not None:
629
+ filter_kwargs["organism"] = organism
630
+
631
+ verbosity = settings.verbosity
632
+ try:
633
+ settings.verbosity = "error"
634
+ inspect_result_current = standardize_and_inspect(
635
+ values=values, field=field, registry=registry, **filter_kwargs
636
+ )
637
+ if not inspect_result_current.non_validated:
638
+ settings.verbosity = verbosity
639
+ return
640
+
641
+ labels_registered: Dict = {"from public": [], "without reference": []}
642
+
643
+ (
644
+ labels_registered[f"from {using}"],
645
+ non_validated_labels,
646
+ ) = update_registry_from_using_instance(
647
+ inspect_result_current.non_validated,
648
+ field=field,
649
+ using=using,
650
+ kwargs=filter_kwargs,
651
+ )
652
+
653
+ public_records = (
654
+ registry.from_values(non_validated_labels, field=field, **filter_kwargs)
655
+ if non_validated_labels
656
+ else []
657
+ )
658
+ ln_save(public_records)
659
+ labels_registered["from public"] = [
660
+ getattr(r, field.field.name) for r in public_records
661
+ ]
662
+ labels_registered["without reference"] = [
663
+ i for i in non_validated_labels if i not in labels_registered["from public"]
664
+ ]
665
+
666
+ if not validated_only:
667
+ non_validated_records = []
668
+ if df is not None and registry == Feature:
669
+ non_validated_records = Feature.from_df(df)
670
+ else:
671
+ if "organism" in filter_kwargs:
672
+ filter_kwargs["organism"] = _save_organism(name=organism)
673
+ for value in labels_registered["without reference"]:
674
+ filter_kwargs[field.field.name] = value
675
+ if registry == Feature:
676
+ filter_kwargs["type"] = "category"
677
+ non_validated_records.append(registry(**filter_kwargs))
678
+ ln_save(non_validated_records)
679
+
680
+ if registry == ULabel and field.field.name == "name":
681
+ save_ulabels_with_parent(values, field=field, feature_name=feature_name)
682
+ finally:
683
+ settings.verbosity = verbosity
684
+
685
+ log_registered_labels(
686
+ labels_registered,
687
+ feature_name=feature_name,
688
+ model_field=f"{registry.__name__}.{field.field.name}",
689
+ validated_only=validated_only,
690
+ )
691
+
692
+
693
+ def log_registered_labels(
694
+ labels_registered: Dict,
695
+ feature_name: str,
696
+ model_field: str,
697
+ validated_only: bool = True,
698
+ ) -> None:
699
+ """Log the registered labels."""
700
+ labels_type = "features" if feature_name == "feature" else "labels"
701
+ model_field = colors.italic(model_field)
702
+ for key, labels in labels_registered.items():
703
+ if not labels:
704
+ continue
705
+
706
+ if key == "without reference" and validated_only:
707
+ msg = colors.yellow(
708
+ f"{len(labels)} non-validated {labels_type} are not registered with {model_field}: {labels}!"
709
+ )
710
+ lookup_print = f".lookup().['{feature_name}']"
711
+ msg += f"\n → to lookup categories, use {lookup_print}"
712
+ msg += (
713
+ f"\n → to register, run {colors.yellow('save_features(validated_only=False)')}"
714
+ if labels_type == "features"
715
+ else f"\n → to register, set {colors.yellow('validated_only=False')}"
716
+ )
717
+ logger.warning(msg)
718
+ else:
719
+ key = "" if key == "without reference" else f"{colors.green(key)} "
720
+ logger.success(
721
+ f"registered {len(labels)} {labels_type} {key}with {model_field}: {labels}"
722
+ )
723
+
724
+
725
+ def save_ulabels_with_parent(
726
+ values: List[str], field: FieldAttr, feature_name: str
727
+ ) -> None:
728
+ """Register a parent label for the given labels."""
729
+ registry = field.field.model
730
+ assert registry == ULabel
731
+ all_records = registry.from_values(values, field=field)
732
+ is_feature = registry.filter(name=f"is_{feature_name}").one_or_none()
733
+ if is_feature is None:
734
+ is_feature = registry(name=f"is_{feature_name}")
735
+ is_feature.save()
736
+ is_feature.children.add(*all_records)
737
+
738
+
739
+ def update_registry_from_using_instance(
740
+ values: List[str],
741
+ field: FieldAttr,
742
+ using: Optional[str] = None,
743
+ kwargs: Optional[Dict] = None,
744
+ ) -> Tuple[List[str], List[str]]:
745
+ """Register features or labels records from the using instance.
746
+
747
+ Args:
748
+ values: A list of values to be registered as labels.
749
+ field: The FieldAttr object representing the field for which labels are being registered.
750
+ using: The name of the instance from which to transfer labels (if applicable).
751
+ kwargs: Additional keyword arguments to pass to the registry model.
752
+
753
+ Returns:
754
+ A tuple containing the list of registered labels and the list of non-registered labels.
755
+ """
756
+ kwargs = kwargs or {}
757
+ labels_registered = []
758
+ not_registered = values
759
+
760
+ if using is not None and using != "default":
761
+ registry = field.field.model
762
+ registry_using = get_registry_instance(registry, using)
763
+ inspect_result_using = standardize_and_inspect(
764
+ values=values, field=field, registry=registry_using, **kwargs
765
+ )
766
+ labels_using = registry_using.filter(
767
+ **{f"{field.field.name}__in": inspect_result_using.validated}
768
+ ).all()
769
+ for label_using in labels_using:
770
+ label_using.save()
771
+ labels_registered.append(getattr(label_using, field.field.name))
772
+ not_registered = inspect_result_using.non_validated
773
+
774
+ return labels_registered, not_registered
775
+
776
+
777
+ def _save_organism(name: str):
778
+ """Register an organism record."""
779
+ import bionty as bt
780
+
781
+ organism = bt.Organism.filter(name=name).one_or_none()
782
+ if organism is None:
783
+ organism = bt.Organism.from_public(name=name)
784
+ if organism is None:
785
+ raise ValueError(
786
+ f"Organism '{name}' not found\n"
787
+ f" → please register it: bt.Organism(name='{name}').save()"
788
+ )
789
+ organism.save()
790
+ return organism