lamindb 0.75.0__py3-none-any.whl → 0.75.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_can_validate.py +52 -22
- lamindb/_curate.py +384 -144
- lamindb/_from_values.py +8 -8
- lamindb/_record.py +26 -26
- lamindb/_save.py +5 -5
- lamindb/_view.py +13 -11
- lamindb/core/__init__.py +2 -0
- lamindb/core/_data.py +4 -4
- lamindb/core/_feature_manager.py +16 -6
- lamindb/core/schema.py +5 -5
- lamindb/core/storage/__init__.py +11 -2
- lamindb/core/storage/_valid_suffixes.py +16 -2
- lamindb/integrations/_vitessce.py +68 -31
- {lamindb-0.75.0.dist-info → lamindb-0.75.1.dist-info}/METADATA +4 -4
- {lamindb-0.75.0.dist-info → lamindb-0.75.1.dist-info}/RECORD +18 -18
- {lamindb-0.75.0.dist-info → lamindb-0.75.1.dist-info}/LICENSE +0 -0
- {lamindb-0.75.0.dist-info → lamindb-0.75.1.dist-info}/WHEEL +0 -0
lamindb/_curate.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
|
3
|
+
import copy
|
4
|
+
from typing import TYPE_CHECKING, Iterable, Type
|
4
5
|
|
5
6
|
import anndata as ad
|
6
7
|
import lamindb_setup as ln_setup
|
@@ -30,23 +31,25 @@ class CurateLookup:
|
|
30
31
|
self,
|
31
32
|
categoricals: dict[str, FieldAttr],
|
32
33
|
slots: dict[str, FieldAttr] = None,
|
33
|
-
|
34
|
+
using_key: str | None = None,
|
34
35
|
) -> None:
|
35
36
|
if slots is None:
|
36
37
|
slots = {}
|
37
38
|
self._fields = {**categoricals, **slots}
|
38
|
-
self.
|
39
|
-
self.
|
40
|
-
debug_message =
|
39
|
+
self._using_key = None if using_key == "default" else using_key
|
40
|
+
self._using_key_name = self._using_key or ln_setup.settings.instance.slug
|
41
|
+
debug_message = (
|
42
|
+
f"Lookup objects from the " f"{colors.italic(self._using_key_name)}"
|
43
|
+
)
|
41
44
|
logger.debug(debug_message)
|
42
45
|
|
43
46
|
def __getattr__(self, name):
|
44
47
|
if name in self._fields:
|
45
48
|
registry = self._fields[name].field.model
|
46
|
-
if self.
|
49
|
+
if self._using_key == "public":
|
47
50
|
return registry.public().lookup()
|
48
51
|
else:
|
49
|
-
return get_registry_instance(registry, self.
|
52
|
+
return get_registry_instance(registry, self._using_key).lookup()
|
50
53
|
raise AttributeError(
|
51
54
|
f"'{self.__class__.__name__}' object has no attribute '{name}'"
|
52
55
|
)
|
@@ -54,10 +57,10 @@ class CurateLookup:
|
|
54
57
|
def __getitem__(self, name):
|
55
58
|
if name in self._fields:
|
56
59
|
registry = self._fields[name].field.model
|
57
|
-
if self.
|
60
|
+
if self._using_key == "public":
|
58
61
|
return registry.public().lookup()
|
59
62
|
else:
|
60
|
-
return get_registry_instance(registry, self.
|
63
|
+
return get_registry_instance(registry, self._using_key).lookup()
|
61
64
|
raise AttributeError(
|
62
65
|
f"'{self.__class__.__name__}' object has no attribute '{name}'"
|
63
66
|
)
|
@@ -71,7 +74,7 @@ class CurateLookup:
|
|
71
74
|
[str([key]) for key in self._fields if not key.isidentifier()]
|
72
75
|
)
|
73
76
|
return (
|
74
|
-
f"Lookup objects from the {colors.italic(self.
|
77
|
+
f"Lookup objects from the {colors.italic(self._using_key_name)}:\n "
|
75
78
|
f"{colors.green(getattr_keys)}\n "
|
76
79
|
f"{colors.green(getitem_keys)}\n\n"
|
77
80
|
"Example:\n → categories = validator.lookup().cell_type\n"
|
@@ -82,16 +85,19 @@ class CurateLookup:
|
|
82
85
|
|
83
86
|
|
84
87
|
class DataFrameCurator:
|
85
|
-
"""
|
88
|
+
"""Curation flow for a DataFrame object.
|
89
|
+
|
90
|
+
See also :class:`~lamindb.Curate`.
|
86
91
|
|
87
92
|
Args:
|
88
93
|
df: The DataFrame object to curate.
|
89
94
|
columns: The field attribute for the feature column.
|
90
95
|
categoricals: A dictionary mapping column names to registry_field.
|
91
|
-
|
96
|
+
using_key: The reference instance containing registries to validate against.
|
92
97
|
verbosity: The verbosity level.
|
93
98
|
organism: The organism name.
|
94
99
|
sources: A dictionary mapping column names to Source records.
|
100
|
+
exclude: A dictionary mapping column names to values to exclude.
|
95
101
|
|
96
102
|
Examples:
|
97
103
|
>>> import bionty as bt
|
@@ -106,17 +112,18 @@ class DataFrameCurator:
|
|
106
112
|
df: pd.DataFrame,
|
107
113
|
columns: FieldAttr = Feature.name,
|
108
114
|
categoricals: dict[str, FieldAttr] | None = None,
|
109
|
-
|
115
|
+
using_key: str | None = None,
|
110
116
|
verbosity: str = "hint",
|
111
117
|
organism: str | None = None,
|
112
118
|
sources: dict[str, Record] | None = None,
|
119
|
+
exclude: dict | None = None,
|
113
120
|
) -> None:
|
114
121
|
from lamindb.core._settings import settings
|
115
122
|
|
116
123
|
self._df = df
|
117
124
|
self._fields = categoricals or {}
|
118
125
|
self._columns_field = columns
|
119
|
-
self.
|
126
|
+
self._using_key = using_key
|
120
127
|
settings.verbosity = verbosity
|
121
128
|
self._artifact = None
|
122
129
|
self._collection = None
|
@@ -125,25 +132,36 @@ class DataFrameCurator:
|
|
125
132
|
if sources is None:
|
126
133
|
sources = {}
|
127
134
|
self._sources = sources
|
135
|
+
if exclude is None:
|
136
|
+
exclude = {}
|
137
|
+
self._exclude = exclude
|
138
|
+
self._non_validated = None
|
128
139
|
self._save_columns()
|
129
140
|
|
141
|
+
@property
|
142
|
+
def non_validated(self) -> list:
|
143
|
+
"""Return the non-validated features and labels."""
|
144
|
+
if self._non_validated is None:
|
145
|
+
raise ValueError("Please run validate() first!")
|
146
|
+
return self._non_validated
|
147
|
+
|
130
148
|
@property
|
131
149
|
def fields(self) -> dict:
|
132
150
|
"""Return the columns fields to validate against."""
|
133
151
|
return self._fields
|
134
152
|
|
135
|
-
def lookup(self,
|
153
|
+
def lookup(self, using_key: str | None = None) -> CurateLookup:
|
136
154
|
"""Lookup categories.
|
137
155
|
|
138
156
|
Args:
|
139
|
-
|
140
|
-
if None (default), the lookup is performed on the instance specified in "
|
157
|
+
using_key: The instance where the lookup is performed.
|
158
|
+
if None (default), the lookup is performed on the instance specified in "using_key" parameter of the validator.
|
141
159
|
if "public", the lookup is performed on the public reference.
|
142
160
|
"""
|
143
161
|
return CurateLookup(
|
144
162
|
categoricals=self._fields,
|
145
163
|
slots={"columns": self._columns_field},
|
146
|
-
|
164
|
+
using_key=using_key or self._using_key,
|
147
165
|
)
|
148
166
|
|
149
167
|
def _save_columns(self, validated_only: bool = True, **kwargs) -> None:
|
@@ -160,7 +178,7 @@ class DataFrameCurator:
|
|
160
178
|
field=self._columns_field,
|
161
179
|
key="columns",
|
162
180
|
save_function="add_new_from_columns",
|
163
|
-
|
181
|
+
using_key=self._using_key,
|
164
182
|
validated_only=False,
|
165
183
|
source=self._sources.get("columns"),
|
166
184
|
**kwargs,
|
@@ -174,10 +192,11 @@ class DataFrameCurator:
|
|
174
192
|
field=self._columns_field,
|
175
193
|
key="columns",
|
176
194
|
save_function="add_new_from_columns",
|
177
|
-
|
195
|
+
using_key=self._using_key,
|
178
196
|
validated_only=validated_only,
|
179
197
|
df=self._df, # Get the Feature type from df
|
180
198
|
source=self._sources.get("columns"),
|
199
|
+
warning=False, # Do not warn about missing columns, just an info message
|
181
200
|
**kwargs,
|
182
201
|
)
|
183
202
|
|
@@ -226,7 +245,7 @@ class DataFrameCurator:
|
|
226
245
|
values=self._df[categorical].unique().tolist(),
|
227
246
|
field=self.fields[categorical],
|
228
247
|
key=categorical,
|
229
|
-
|
248
|
+
using_key=self._using_key,
|
230
249
|
validated_only=validated_only,
|
231
250
|
sources=self._sources.get(categorical),
|
232
251
|
**kwargs,
|
@@ -245,11 +264,12 @@ class DataFrameCurator:
|
|
245
264
|
Whether the DataFrame is validated.
|
246
265
|
"""
|
247
266
|
self._kwargs.update({"organism": organism} if organism else {})
|
248
|
-
self._validated = validate_categories_in_df(
|
267
|
+
self._validated, self._non_validated = validate_categories_in_df( # type: ignore
|
249
268
|
self._df,
|
250
269
|
fields=self.fields,
|
251
|
-
|
270
|
+
using_key=self._using_key,
|
252
271
|
sources=self._sources,
|
272
|
+
exclude=self._exclude,
|
253
273
|
**self._kwargs,
|
254
274
|
)
|
255
275
|
return self._validated
|
@@ -302,16 +322,21 @@ class DataFrameCurator:
|
|
302
322
|
|
303
323
|
|
304
324
|
class AnnDataCurator(DataFrameCurator):
|
305
|
-
"""
|
325
|
+
"""Curation flow for ``AnnData``.
|
326
|
+
|
327
|
+
See also :class:`~lamindb.Curate`.
|
328
|
+
|
329
|
+
Note that if genes are removed from the AnnData object, the object should be recreated using :meth:`~lamindb.Curate.from_anndata`.
|
306
330
|
|
307
331
|
Args:
|
308
332
|
data: The AnnData object or an AnnData-like path.
|
309
333
|
var_index: The registry field for mapping the ``.var`` index.
|
310
334
|
categoricals: A dictionary mapping ``.obs.columns`` to a registry field.
|
311
|
-
|
335
|
+
using_key: A reference LaminDB instance.
|
312
336
|
verbosity: The verbosity level.
|
313
337
|
organism: The organism name.
|
314
338
|
sources: A dictionary mapping ``.obs.columns`` to Source records.
|
339
|
+
exclude: A dictionary mapping column names to values to exclude.
|
315
340
|
|
316
341
|
Examples:
|
317
342
|
>>> import bionty as bt
|
@@ -328,10 +353,12 @@ class AnnDataCurator(DataFrameCurator):
|
|
328
353
|
data: ad.AnnData | UPathStr,
|
329
354
|
var_index: FieldAttr,
|
330
355
|
categoricals: dict[str, FieldAttr] | None = None,
|
331
|
-
|
356
|
+
obs_columns: FieldAttr = Feature.name,
|
357
|
+
using_key: str = "default",
|
332
358
|
verbosity: str = "hint",
|
333
359
|
organism: str | None = None,
|
334
360
|
sources: dict[str, Record] | None = None,
|
361
|
+
exclude: dict | None = None,
|
335
362
|
) -> None:
|
336
363
|
from lamindb_setup.core import upath
|
337
364
|
|
@@ -355,13 +382,14 @@ class AnnDataCurator(DataFrameCurator):
|
|
355
382
|
super().__init__(
|
356
383
|
df=self._adata.obs,
|
357
384
|
categoricals=categoricals,
|
358
|
-
|
385
|
+
columns=obs_columns,
|
386
|
+
using_key=using_key,
|
359
387
|
verbosity=verbosity,
|
360
388
|
organism=organism,
|
361
389
|
sources=sources,
|
390
|
+
exclude=exclude,
|
362
391
|
)
|
363
392
|
self._obs_fields = categoricals
|
364
|
-
self._save_from_var_index(validated_only=True, **self._kwargs)
|
365
393
|
|
366
394
|
@property
|
367
395
|
def var_index(self) -> FieldAttr:
|
@@ -373,18 +401,18 @@ class AnnDataCurator(DataFrameCurator):
|
|
373
401
|
"""Return the obs fields to validate against."""
|
374
402
|
return self._obs_fields
|
375
403
|
|
376
|
-
def lookup(self,
|
404
|
+
def lookup(self, using_key: str | None = None) -> CurateLookup:
|
377
405
|
"""Lookup categories.
|
378
406
|
|
379
407
|
Args:
|
380
|
-
|
408
|
+
using_key: The instance where the lookup is performed.
|
381
409
|
if None (default), the lookup is performed on the instance specified in "using" parameter of the validator.
|
382
410
|
if "public", the lookup is performed on the public reference.
|
383
411
|
"""
|
384
412
|
return CurateLookup(
|
385
413
|
categoricals=self._obs_fields,
|
386
414
|
slots={"columns": self._columns_field, "var_index": self._var_field},
|
387
|
-
|
415
|
+
using_key=using_key or self._using_key,
|
388
416
|
)
|
389
417
|
|
390
418
|
def _save_from_var_index(
|
@@ -392,16 +420,25 @@ class AnnDataCurator(DataFrameCurator):
|
|
392
420
|
):
|
393
421
|
"""Save variable records."""
|
394
422
|
update_registry(
|
395
|
-
values=self._adata.var.index,
|
423
|
+
values=list(self._adata.var.index),
|
396
424
|
field=self.var_index,
|
397
425
|
key="var_index",
|
398
426
|
save_function="add_new_from_var_index",
|
399
|
-
|
427
|
+
using_key=self._using_key,
|
400
428
|
validated_only=validated_only,
|
401
429
|
organism=organism,
|
402
430
|
source=self._sources.get("var_index"),
|
403
431
|
)
|
404
432
|
|
433
|
+
def _update_registry_all(self, validated_only: bool = True, **kwargs):
|
434
|
+
"""Save labels for all features."""
|
435
|
+
for name in self.fields.keys():
|
436
|
+
logger.info(f"saving labels for '{name}'")
|
437
|
+
if name == "var_index":
|
438
|
+
self._save_from_var_index(validated_only=validated_only, **kwargs)
|
439
|
+
else:
|
440
|
+
self._update_registry(name, validated_only=validated_only, **kwargs)
|
441
|
+
|
405
442
|
def add_new_from_var_index(self, organism: str | None = None, **kwargs):
|
406
443
|
"""Update variable records.
|
407
444
|
|
@@ -412,6 +449,15 @@ class AnnDataCurator(DataFrameCurator):
|
|
412
449
|
self._kwargs.update({"organism": organism} if organism else {})
|
413
450
|
self._save_from_var_index(validated_only=False, **self._kwargs, **kwargs)
|
414
451
|
|
452
|
+
def add_validated_from_var_index(self, organism: str | None = None):
|
453
|
+
"""Add validated variable records.
|
454
|
+
|
455
|
+
Args:
|
456
|
+
organism: The organism name.
|
457
|
+
"""
|
458
|
+
self._kwargs.update({"organism": organism} if organism else {})
|
459
|
+
self._save_from_var_index(validated_only=True, **self._kwargs)
|
460
|
+
|
415
461
|
def validate(self, organism: str | None = None) -> bool:
|
416
462
|
"""Validate categories.
|
417
463
|
|
@@ -422,24 +468,32 @@ class AnnDataCurator(DataFrameCurator):
|
|
422
468
|
Whether the AnnData object is validated.
|
423
469
|
"""
|
424
470
|
self._kwargs.update({"organism": organism} if organism else {})
|
425
|
-
if self.
|
471
|
+
if self._using_key is not None and self._using_key != "default":
|
426
472
|
logger.important(
|
427
|
-
f"validating metadata using registries of instance {colors.italic(self.
|
473
|
+
f"validating metadata using registries of instance {colors.italic(self._using_key)}"
|
428
474
|
)
|
429
|
-
|
475
|
+
|
476
|
+
validated_var, non_validated_var = validate_categories(
|
430
477
|
self._adata.var.index,
|
431
478
|
field=self._var_field,
|
432
479
|
key="var_index",
|
433
|
-
|
434
|
-
|
480
|
+
using_key=self._using_key,
|
481
|
+
source=self._sources.get("var_index"),
|
482
|
+
validated_hint_print=".add_validated_from_var_index()",
|
483
|
+
exclude=self._exclude.get("var_index"),
|
484
|
+
**self._kwargs, # type: ignore
|
435
485
|
)
|
436
|
-
validated_obs = validate_categories_in_df(
|
486
|
+
validated_obs, non_validated_obs = validate_categories_in_df(
|
437
487
|
self._adata.obs,
|
438
488
|
fields=self.categoricals,
|
439
|
-
|
489
|
+
using_key=self._using_key,
|
440
490
|
sources=self._sources,
|
491
|
+
exclude=self._exclude,
|
441
492
|
**self._kwargs,
|
442
493
|
)
|
494
|
+
self._non_validated = non_validated_obs # type: ignore
|
495
|
+
if len(non_validated_var) > 0:
|
496
|
+
self._non_validated["var_index"] = non_validated_var # type: ignore
|
443
497
|
self._validated = validated_var and validated_obs
|
444
498
|
return self._validated
|
445
499
|
|
@@ -471,7 +525,12 @@ class AnnDataCurator(DataFrameCurator):
|
|
471
525
|
|
472
526
|
|
473
527
|
class MuDataCurator:
|
474
|
-
"""
|
528
|
+
"""Curation flow for a ``MuData`` object.
|
529
|
+
|
530
|
+
See also :class:`~lamindb.Curate`.
|
531
|
+
|
532
|
+
Note that if genes or other measurements are removed from the MuData object,
|
533
|
+
the object should be recreated using :meth:`~lamindb.Curate.from_mudata`.
|
475
534
|
|
476
535
|
Args:
|
477
536
|
mdata: The MuData object to curate.
|
@@ -480,9 +539,11 @@ class MuDataCurator:
|
|
480
539
|
``{"modality_1": bt.Gene.ensembl_gene_id, "modality_2": ln.CellMarker.name}``
|
481
540
|
categoricals: A dictionary mapping ``.obs.columns`` to a registry field.
|
482
541
|
Use modality keys to specify categoricals for MuData slots such as `"rna:cell_type": bt.CellType.name"`.
|
483
|
-
|
542
|
+
using_key: A reference LaminDB instance.
|
484
543
|
verbosity: The verbosity level.
|
485
544
|
organism: The organism name.
|
545
|
+
sources: A dictionary mapping ``.obs.columns`` to Source records.
|
546
|
+
exclude: A dictionary mapping column names to values to exclude.
|
486
547
|
|
487
548
|
Examples:
|
488
549
|
>>> import bionty as bt
|
@@ -499,29 +560,34 @@ class MuDataCurator:
|
|
499
560
|
mdata: MuData,
|
500
561
|
var_index: dict[str, dict[str, FieldAttr]],
|
501
562
|
categoricals: dict[str, FieldAttr] | None = None,
|
502
|
-
|
563
|
+
using_key: str = "default",
|
503
564
|
verbosity: str = "hint",
|
504
565
|
organism: str | None = None,
|
505
566
|
sources: dict[str, Record] | None = None,
|
567
|
+
exclude: dict | None = None,
|
506
568
|
) -> None:
|
507
569
|
if sources is None:
|
508
570
|
sources = {}
|
509
571
|
self._sources = sources
|
572
|
+
if exclude is None:
|
573
|
+
exclude = {}
|
574
|
+
self._exclude = exclude
|
510
575
|
self._mdata = mdata
|
511
576
|
self._kwargs = {"organism": organism} if organism else {}
|
512
577
|
self._var_fields = var_index
|
513
578
|
self._verify_modality(self._var_fields.keys())
|
514
579
|
self._obs_fields = self._parse_categoricals(categoricals)
|
515
580
|
self._modalities = set(self._var_fields.keys()) | set(self._obs_fields.keys())
|
516
|
-
self.
|
581
|
+
self._using_key = using_key
|
517
582
|
self._verbosity = verbosity
|
518
583
|
self._df_annotators = {
|
519
584
|
modality: DataFrameCurator(
|
520
585
|
df=mdata[modality].obs if modality != "obs" else mdata.obs,
|
521
586
|
categoricals=self._obs_fields.get(modality, {}),
|
522
|
-
|
587
|
+
using_key=using_key,
|
523
588
|
verbosity=verbosity,
|
524
589
|
sources=self._sources.get(modality),
|
590
|
+
exclude=self._exclude.get(modality),
|
525
591
|
**self._kwargs,
|
526
592
|
)
|
527
593
|
for modality in self._modalities
|
@@ -552,11 +618,11 @@ class MuDataCurator:
|
|
552
618
|
):
|
553
619
|
"""Save variable records."""
|
554
620
|
update_registry(
|
555
|
-
values=self._mdata[modality].var.index,
|
621
|
+
values=list(self._mdata[modality].var.index),
|
556
622
|
field=self._var_fields[modality],
|
557
623
|
key="var_index",
|
558
624
|
save_function="add_new_from_var_index",
|
559
|
-
|
625
|
+
using_key=self._using_key,
|
560
626
|
validated_only=validated_only,
|
561
627
|
dtype="number",
|
562
628
|
**kwargs,
|
@@ -580,12 +646,12 @@ class MuDataCurator:
|
|
580
646
|
obs_fields["obs"][k] = v
|
581
647
|
return obs_fields
|
582
648
|
|
583
|
-
def lookup(self,
|
649
|
+
def lookup(self, using_key: str | None = None) -> CurateLookup:
|
584
650
|
"""Lookup categories.
|
585
651
|
|
586
652
|
Args:
|
587
|
-
|
588
|
-
if None (default), the lookup is performed on the instance specified in "
|
653
|
+
using_key: The instance where the lookup is performed.
|
654
|
+
if None (default), the lookup is performed on the instance specified in "using_key" parameter of the validator.
|
589
655
|
if "public", the lookup is performed on the public reference.
|
590
656
|
"""
|
591
657
|
return CurateLookup(
|
@@ -594,7 +660,7 @@ class MuDataCurator:
|
|
594
660
|
**self._obs_fields,
|
595
661
|
**{f"{k}_var_index": v for k, v in self._var_fields.items()},
|
596
662
|
},
|
597
|
-
|
663
|
+
using_key=using_key or self._using_key,
|
598
664
|
)
|
599
665
|
|
600
666
|
def add_new_from_columns(
|
@@ -613,14 +679,15 @@ class MuDataCurator:
|
|
613
679
|
**kwargs: Additional keyword arguments to pass to the registry model.
|
614
680
|
"""
|
615
681
|
self._kwargs.update({"organism": organism} if organism else {})
|
682
|
+
values = column_names or self._mdata[modality].obs.columns
|
616
683
|
update_registry(
|
617
|
-
values=
|
684
|
+
values=list(values),
|
618
685
|
field=Feature.name,
|
619
686
|
key=f"{modality} obs columns",
|
620
|
-
|
687
|
+
using_key=self._using_key,
|
621
688
|
validated_only=False,
|
622
689
|
df=self._mdata[modality].obs,
|
623
|
-
**self._kwargs,
|
690
|
+
**self._kwargs, # type: ignore
|
624
691
|
**kwargs,
|
625
692
|
)
|
626
693
|
|
@@ -639,6 +706,18 @@ class MuDataCurator:
|
|
639
706
|
modality=modality, validated_only=False, **self._kwargs, **kwargs
|
640
707
|
)
|
641
708
|
|
709
|
+
def add_validated_from_var_index(self, modality: str, organism: str | None = None):
|
710
|
+
"""Add validated variable records.
|
711
|
+
|
712
|
+
Args:
|
713
|
+
modality: The modality name.
|
714
|
+
organism: The organism name.
|
715
|
+
"""
|
716
|
+
self._kwargs.update({"organism": organism} if organism else {})
|
717
|
+
self._save_from_var_index_modality(
|
718
|
+
modality=modality, validated_only=True, **self._kwargs
|
719
|
+
)
|
720
|
+
|
642
721
|
def add_validated_from(
|
643
722
|
self, key: str, modality: str | None = None, organism: str | None = None
|
644
723
|
):
|
@@ -681,32 +760,48 @@ class MuDataCurator:
|
|
681
760
|
def validate(self, organism: str | None = None) -> bool:
|
682
761
|
"""Validate categories."""
|
683
762
|
self._kwargs.update({"organism": organism} if organism else {})
|
684
|
-
if self.
|
763
|
+
if self._using_key is not None and self._using_key != "default":
|
685
764
|
logger.important(
|
686
|
-
f"validating metadata using registries of instance {colors.italic(self.
|
765
|
+
f"validating metadata using registries of instance {colors.italic(self._using_key)}"
|
687
766
|
)
|
688
767
|
validated_var = True
|
768
|
+
non_validated_var_modality = {}
|
689
769
|
for modality, var_field in self._var_fields.items():
|
690
|
-
|
770
|
+
is_validated_var, non_validated_var = validate_categories(
|
691
771
|
self._mdata[modality].var.index,
|
692
772
|
field=var_field,
|
693
773
|
key=f"{modality}_var_index",
|
694
|
-
|
695
|
-
|
774
|
+
using_key=self._using_key,
|
775
|
+
exclude=self._exclude.get(f"{modality}_var_index"),
|
776
|
+
**self._kwargs, # type: ignore
|
696
777
|
)
|
778
|
+
validated_var &= is_validated_var
|
779
|
+
if len(non_validated_var) > 0:
|
780
|
+
non_validated_var_modality[modality] = non_validated_var
|
781
|
+
|
697
782
|
validated_obs = True
|
783
|
+
non_validated_obs_modality = {}
|
698
784
|
for modality, fields in self._obs_fields.items():
|
699
785
|
if modality == "obs":
|
700
786
|
obs = self._mdata.obs
|
701
787
|
else:
|
702
788
|
obs = self._mdata[modality].obs
|
703
|
-
|
789
|
+
is_validated_obs, non_validated_obs = validate_categories_in_df(
|
704
790
|
obs,
|
705
791
|
fields=fields,
|
706
|
-
|
792
|
+
using_key=self._using_key,
|
707
793
|
sources=self._sources.get(modality),
|
794
|
+
exclude=self._exclude.get(modality),
|
708
795
|
**self._kwargs,
|
709
796
|
)
|
797
|
+
validated_obs &= is_validated_obs
|
798
|
+
non_validated_obs_modality[modality] = non_validated_obs
|
799
|
+
if modality in non_validated_var_modality:
|
800
|
+
non_validated_obs_modality[modality]["var_index"] = (
|
801
|
+
non_validated_var_modality[modality]
|
802
|
+
)
|
803
|
+
if len(non_validated_obs_modality[modality]) > 0:
|
804
|
+
self._non_validated = non_validated_obs_modality[modality]
|
710
805
|
self._validated = validated_var and validated_obs
|
711
806
|
return self._validated
|
712
807
|
|
@@ -735,7 +830,32 @@ class MuDataCurator:
|
|
735
830
|
|
736
831
|
|
737
832
|
class Curate:
|
738
|
-
"""
|
833
|
+
"""Curation flow.
|
834
|
+
|
835
|
+
Data curation entails accurately labeling datasets with standardized metadata
|
836
|
+
to facilitate data integration, interpretation and analysis.
|
837
|
+
|
838
|
+
The curation flow has several steps:
|
839
|
+
|
840
|
+
1. Create a :class:`Curate` object corresponding to the object type that you want to curate:
|
841
|
+
|
842
|
+
- :meth:`~lamindb.Curate.from_df`
|
843
|
+
- :meth:`~lamindb.Curate.from_anndata`
|
844
|
+
- :meth:`~lamindb.Curate.from_mudata`
|
845
|
+
|
846
|
+
During object creation, any passed categoricals found in the object will be saved.
|
847
|
+
|
848
|
+
2. Run :meth:`~lamindb.core.DataFrameCurator.validate` to check the data against the defined criteria. This method identifies:
|
849
|
+
|
850
|
+
- Values that can successfully validated and already exist in the registry.
|
851
|
+
- Values which are new and not yet validated or potentially problematic values.
|
852
|
+
|
853
|
+
3. Determine how to handle validated and unvalidated values:
|
854
|
+
|
855
|
+
- Validated values not yet in the registry can be automatically registered using :meth:`~lamindb.core.DataFrameCurator.add_validated_from`.
|
856
|
+
- Valid and new values can be registered using :meth:`~lamindb.core.DataFrameCurator.add_new_from`.
|
857
|
+
- All unvalidated values can be accessed using :meth:`~lamindb.core.DataFrameCurator.non_validated` and subsequently removed from the object at hand.
|
858
|
+
"""
|
739
859
|
|
740
860
|
@classmethod
|
741
861
|
@doc_args(DataFrameCurator.__doc__)
|
@@ -744,7 +864,7 @@ class Curate:
|
|
744
864
|
df: pd.DataFrame,
|
745
865
|
categoricals: dict[str, FieldAttr] | None = None,
|
746
866
|
columns: FieldAttr = Feature.name,
|
747
|
-
|
867
|
+
using_key: str | None = None,
|
748
868
|
verbosity: str = "hint",
|
749
869
|
organism: str | None = None,
|
750
870
|
) -> DataFrameCurator:
|
@@ -753,7 +873,7 @@ class Curate:
|
|
753
873
|
df=df,
|
754
874
|
categoricals=categoricals,
|
755
875
|
columns=columns,
|
756
|
-
|
876
|
+
using_key=using_key,
|
757
877
|
verbosity=verbosity,
|
758
878
|
organism=organism,
|
759
879
|
)
|
@@ -765,7 +885,8 @@ class Curate:
|
|
765
885
|
data: ad.AnnData | UPathStr,
|
766
886
|
var_index: FieldAttr,
|
767
887
|
categoricals: dict[str, FieldAttr] | None = None,
|
768
|
-
|
888
|
+
obs_columns: FieldAttr = Feature.name,
|
889
|
+
using_key: str = "default",
|
769
890
|
verbosity: str = "hint",
|
770
891
|
organism: str | None = None,
|
771
892
|
sources: dict[str, Record] | None = None,
|
@@ -775,7 +896,8 @@ class Curate:
|
|
775
896
|
data=data,
|
776
897
|
var_index=var_index,
|
777
898
|
categoricals=categoricals,
|
778
|
-
|
899
|
+
obs_columns=obs_columns,
|
900
|
+
using_key=using_key,
|
779
901
|
verbosity=verbosity,
|
780
902
|
organism=organism,
|
781
903
|
sources=sources,
|
@@ -788,7 +910,7 @@ class Curate:
|
|
788
910
|
mdata: MuData,
|
789
911
|
var_index: dict[str, dict[str, FieldAttr]],
|
790
912
|
categoricals: dict[str, FieldAttr] | None = None,
|
791
|
-
|
913
|
+
using_key: str = "default",
|
792
914
|
verbosity: str = "hint",
|
793
915
|
organism: str | None = None,
|
794
916
|
) -> MuDataCurator:
|
@@ -797,29 +919,68 @@ class Curate:
|
|
797
919
|
mdata=mdata,
|
798
920
|
var_index=var_index,
|
799
921
|
categoricals=categoricals,
|
800
|
-
|
922
|
+
using_key=using_key,
|
801
923
|
verbosity=verbosity,
|
802
924
|
organism=organism,
|
803
925
|
)
|
804
926
|
|
805
927
|
|
806
|
-
def get_registry_instance(registry: Record,
|
928
|
+
def get_registry_instance(registry: Record, using_key: str | None = None) -> Record:
|
807
929
|
"""Get a registry instance using a specific instance."""
|
808
|
-
if
|
809
|
-
return registry.using(
|
930
|
+
if using_key is not None and using_key != "default":
|
931
|
+
return registry.using(using_key)
|
810
932
|
return registry
|
811
933
|
|
812
934
|
|
935
|
+
def get_current_filter_kwargs(registry: type[Record], kwargs: dict) -> dict:
|
936
|
+
"""Make sure the source and organism are saved in the same database as the registry."""
|
937
|
+
from lamindb.core._settings import settings
|
938
|
+
|
939
|
+
db = registry.filter().db
|
940
|
+
source = kwargs.get("source")
|
941
|
+
organism = kwargs.get("organism")
|
942
|
+
filter_kwargs = kwargs.copy()
|
943
|
+
try:
|
944
|
+
verbosity = settings.verbosity
|
945
|
+
settings.verbosity = "error"
|
946
|
+
if isinstance(organism, Record) and organism._state.db != "default":
|
947
|
+
if db is None or db == "default":
|
948
|
+
organism_default = copy.copy(organism)
|
949
|
+
# save the organism record in the default database
|
950
|
+
organism_default.save()
|
951
|
+
filter_kwargs["organism"] = organism_default
|
952
|
+
if isinstance(source, Record) and source._state.db != "default":
|
953
|
+
if db is None or db == "default":
|
954
|
+
source_default = copy.copy(source)
|
955
|
+
# save the source record in the default database
|
956
|
+
source_default.save()
|
957
|
+
filter_kwargs["source"] = source_default
|
958
|
+
finally:
|
959
|
+
settings.verbosity = verbosity
|
960
|
+
return filter_kwargs
|
961
|
+
|
962
|
+
|
813
963
|
def standardize_and_inspect(
|
814
|
-
values: Iterable[str],
|
964
|
+
values: Iterable[str],
|
965
|
+
field: FieldAttr,
|
966
|
+
registry: type[Record],
|
967
|
+
standardize: bool = False,
|
968
|
+
**kwargs,
|
815
969
|
):
|
816
970
|
"""Standardize and inspect values using a registry."""
|
817
|
-
|
818
|
-
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
971
|
+
filter_kwargs = get_current_filter_kwargs(registry, kwargs)
|
972
|
+
|
973
|
+
if standardize:
|
974
|
+
if hasattr(registry, "standardize") and hasattr(
|
975
|
+
registry,
|
976
|
+
"synonyms", # https://github.com/laminlabs/lamindb/issues/1685
|
977
|
+
):
|
978
|
+
standardized_values = registry.standardize(
|
979
|
+
values, field=field, mute=True, **filter_kwargs
|
980
|
+
)
|
981
|
+
values = standardized_values
|
982
|
+
|
983
|
+
return registry.inspect(values, field=field, mute=True, **filter_kwargs)
|
823
984
|
|
824
985
|
|
825
986
|
def check_registry_organism(registry: Record, organism: str | None = None) -> dict:
|
@@ -840,11 +1001,26 @@ def validate_categories(
|
|
840
1001
|
values: Iterable[str],
|
841
1002
|
field: FieldAttr,
|
842
1003
|
key: str,
|
843
|
-
|
1004
|
+
using_key: str | None = None,
|
844
1005
|
organism: str | None = None,
|
845
1006
|
source: Record | None = None,
|
846
|
-
|
847
|
-
|
1007
|
+
exclude: str | list | None = None,
|
1008
|
+
standardize: bool = True,
|
1009
|
+
validated_hint_print: str | None = None,
|
1010
|
+
) -> tuple[bool, list]:
|
1011
|
+
"""Validate ontology terms in a pandas series using LaminDB registries.
|
1012
|
+
|
1013
|
+
Args:
|
1014
|
+
values: The values to validate.
|
1015
|
+
field: The field attribute.
|
1016
|
+
key: The key referencing the slot in the DataFrame.
|
1017
|
+
using_key: A reference LaminDB instance.
|
1018
|
+
organism: The organism name.
|
1019
|
+
source: The source record.
|
1020
|
+
exclude: Exclude specific values.
|
1021
|
+
standardize: Standardize the values.
|
1022
|
+
validated_hint_print: The hint to print for validated values.
|
1023
|
+
"""
|
848
1024
|
from lamindb._from_values import _print_values
|
849
1025
|
from lamindb.core._settings import settings
|
850
1026
|
|
@@ -856,43 +1032,60 @@ def validate_categories(
|
|
856
1032
|
logger.indent = " "
|
857
1033
|
|
858
1034
|
registry = field.field.model
|
859
|
-
|
860
|
-
|
1035
|
+
kwargs = check_registry_organism(registry, organism)
|
1036
|
+
kwargs.update({"source": source} if source else {})
|
1037
|
+
|
1038
|
+
# inspect the default instance
|
1039
|
+
if exclude is not None:
|
1040
|
+
exclude = [exclude] if isinstance(exclude, str) else exclude
|
1041
|
+
# exclude values are validated without source and organism
|
1042
|
+
inspect_result = registry.inspect(exclude, field=field, mute=True)
|
1043
|
+
# if exclude values are validated, remove them from the values
|
1044
|
+
values = [i for i in values if i not in inspect_result.validated]
|
861
1045
|
|
862
|
-
# Inspect the default instance
|
863
1046
|
inspect_result = standardize_and_inspect(
|
864
|
-
values=values,
|
1047
|
+
values=values,
|
1048
|
+
field=field,
|
1049
|
+
registry=registry,
|
1050
|
+
standardize=standardize,
|
1051
|
+
**kwargs,
|
865
1052
|
)
|
866
1053
|
non_validated = inspect_result.non_validated
|
867
1054
|
|
868
1055
|
values_validated = []
|
869
|
-
if
|
870
|
-
|
871
|
-
#
|
1056
|
+
if using_key is not None and using_key != "default" and non_validated:
|
1057
|
+
registry_using = get_registry_instance(registry, using_key)
|
1058
|
+
# inspect the using instance
|
872
1059
|
inspect_result = standardize_and_inspect(
|
873
|
-
values=non_validated,
|
1060
|
+
values=non_validated,
|
1061
|
+
field=field,
|
1062
|
+
registry=registry_using,
|
1063
|
+
standardize=standardize,
|
1064
|
+
**kwargs,
|
874
1065
|
)
|
875
1066
|
non_validated = inspect_result.non_validated
|
876
1067
|
values_validated += inspect_result.validated
|
877
1068
|
|
878
|
-
#
|
1069
|
+
# inspect from public (bionty only)
|
879
1070
|
if hasattr(registry, "public"):
|
880
1071
|
verbosity = settings.verbosity
|
881
1072
|
try:
|
882
1073
|
settings.verbosity = "error"
|
883
1074
|
public_records = registry.from_values(
|
884
|
-
non_validated,
|
1075
|
+
non_validated,
|
1076
|
+
field=field,
|
1077
|
+
**get_current_filter_kwargs(registry, kwargs),
|
885
1078
|
)
|
886
1079
|
values_validated += [getattr(r, field.field.name) for r in public_records]
|
887
1080
|
finally:
|
888
1081
|
settings.verbosity = verbosity
|
889
1082
|
|
890
|
-
validated_hint_print = f".add_validated_from('{key}')"
|
1083
|
+
validated_hint_print = validated_hint_print or f".add_validated_from('{key}')"
|
891
1084
|
n_validated = len(values_validated)
|
892
1085
|
if n_validated > 0:
|
893
1086
|
_log_mapping_info()
|
894
1087
|
logger.warning(
|
895
|
-
f"found {colors.yellow(
|
1088
|
+
f"found {colors.yellow(n_validated)} validated terms: "
|
896
1089
|
f"{colors.yellow(values_validated)}\n → save terms via "
|
897
1090
|
f"{colors.yellow(validated_hint_print)}"
|
898
1091
|
)
|
@@ -903,43 +1096,49 @@ def validate_categories(
|
|
903
1096
|
if n_non_validated == 0:
|
904
1097
|
logger.indent = ""
|
905
1098
|
logger.success(f"{key} is validated against {colors.italic(model_field)}")
|
906
|
-
return True
|
1099
|
+
return True, []
|
907
1100
|
else:
|
908
1101
|
are = "are" if n_non_validated > 1 else "is"
|
909
1102
|
print_values = _print_values(non_validated)
|
910
1103
|
warning_message = (
|
911
|
-
f"{colors.
|
912
|
-
f"{colors.
|
913
|
-
f"{colors.
|
1104
|
+
f"{colors.red(f'{n_non_validated} terms')} {are} not validated: "
|
1105
|
+
f"{colors.red(print_values)}\n → save terms via "
|
1106
|
+
f"{colors.red(non_validated_hint_print)}"
|
914
1107
|
)
|
915
1108
|
if logger.indent == "":
|
916
1109
|
_log_mapping_info()
|
917
1110
|
logger.warning(warning_message)
|
918
1111
|
logger.indent = ""
|
919
|
-
return False
|
1112
|
+
return False, non_validated
|
920
1113
|
|
921
1114
|
|
922
1115
|
def validate_categories_in_df(
|
923
1116
|
df: pd.DataFrame,
|
924
1117
|
fields: dict[str, FieldAttr],
|
925
|
-
|
1118
|
+
using_key: str | None = None,
|
926
1119
|
sources: dict[str, Record] = None,
|
1120
|
+
exclude: dict | None = None,
|
927
1121
|
**kwargs,
|
928
|
-
) -> bool:
|
1122
|
+
) -> tuple[bool, dict]:
|
929
1123
|
"""Validate categories in DataFrame columns using LaminDB registries."""
|
930
1124
|
if sources is None:
|
931
1125
|
sources = {}
|
932
1126
|
validated = True
|
1127
|
+
non_validated = {}
|
933
1128
|
for key, field in fields.items():
|
934
|
-
|
1129
|
+
is_val, non_val = validate_categories(
|
935
1130
|
df[key],
|
936
1131
|
field=field,
|
937
1132
|
key=key,
|
938
|
-
|
1133
|
+
using_key=using_key,
|
939
1134
|
source=sources.get(key),
|
1135
|
+
exclude=exclude.get(key) if exclude else None,
|
940
1136
|
**kwargs,
|
941
1137
|
)
|
942
|
-
|
1138
|
+
validated &= is_val
|
1139
|
+
if len(non_val) > 0:
|
1140
|
+
non_validated[key] = non_val
|
1141
|
+
return validated, non_validated
|
943
1142
|
|
944
1143
|
|
945
1144
|
def save_artifact(
|
@@ -1017,8 +1216,13 @@ def save_artifact(
|
|
1017
1216
|
feature = features.get(key)
|
1018
1217
|
registry = field.field.model
|
1019
1218
|
filter_kwargs = check_registry_organism(registry, organism)
|
1219
|
+
filter_kwargs_current = get_current_filter_kwargs(registry, filter_kwargs)
|
1020
1220
|
df = data if isinstance(data, pd.DataFrame) else data.obs
|
1021
|
-
labels = registry.from_values(
|
1221
|
+
labels = registry.from_values(
|
1222
|
+
df[key],
|
1223
|
+
field=field,
|
1224
|
+
**filter_kwargs_current,
|
1225
|
+
)
|
1022
1226
|
artifact.labels.add(labels, feature)
|
1023
1227
|
|
1024
1228
|
if artifact._accessor == "MuData":
|
@@ -1041,22 +1245,24 @@ def update_registry(
|
|
1041
1245
|
field: FieldAttr,
|
1042
1246
|
key: str,
|
1043
1247
|
save_function: str = "add_new_from",
|
1044
|
-
|
1248
|
+
using_key: str | None = None,
|
1045
1249
|
validated_only: bool = True,
|
1046
1250
|
df: pd.DataFrame | None = None,
|
1047
1251
|
organism: str | None = None,
|
1048
1252
|
dtype: str | None = None,
|
1049
1253
|
source: Record | None = None,
|
1254
|
+
standardize: bool = True,
|
1255
|
+
warning: bool = True,
|
1050
1256
|
**kwargs,
|
1051
|
-
) ->
|
1052
|
-
"""Save features or labels records in the default instance from the
|
1257
|
+
) -> None:
|
1258
|
+
"""Save features or labels records in the default instance from the using_key instance.
|
1053
1259
|
|
1054
1260
|
Args:
|
1055
1261
|
values: A list of values to be saved as labels.
|
1056
1262
|
field: The FieldAttr object representing the field for which labels are being saved.
|
1057
1263
|
key: The name of the feature to save.
|
1058
1264
|
save_function: The name of the function to save the labels.
|
1059
|
-
|
1265
|
+
using_key: The name of the instance from which to transfer labels (if applicable).
|
1060
1266
|
validated_only: If True, only save validated labels.
|
1061
1267
|
df: A DataFrame to save labels from.
|
1062
1268
|
organism: The organism name.
|
@@ -1074,51 +1280,74 @@ def update_registry(
|
|
1074
1280
|
verbosity = settings.verbosity
|
1075
1281
|
try:
|
1076
1282
|
settings.verbosity = "error"
|
1283
|
+
|
1284
|
+
# save from public
|
1285
|
+
filter_kwargs_current = get_current_filter_kwargs(registry, filter_kwargs)
|
1286
|
+
existing_and_public_records = (
|
1287
|
+
registry.from_values(
|
1288
|
+
list(values),
|
1289
|
+
field=field,
|
1290
|
+
**filter_kwargs_current,
|
1291
|
+
)
|
1292
|
+
if values
|
1293
|
+
else []
|
1294
|
+
)
|
1295
|
+
|
1296
|
+
labels_saved: dict = {"from public": [], "without reference": []}
|
1297
|
+
|
1298
|
+
public_records = [r for r in existing_and_public_records if r._state.adding]
|
1299
|
+
# here we check to only save the public records if they are from the specified source
|
1300
|
+
# we check the uid because r.source and soruce can be from different instances
|
1301
|
+
if source:
|
1302
|
+
public_records = [r for r in public_records if r.source.uid == source.uid]
|
1303
|
+
ln_save(public_records)
|
1304
|
+
labels_saved["from public"] = [
|
1305
|
+
getattr(r, field.field.name) for r in public_records
|
1306
|
+
]
|
1307
|
+
non_public_labels = [i for i in values if i not in labels_saved["from public"]]
|
1308
|
+
|
1309
|
+
# inspect the default instance
|
1077
1310
|
inspect_result_current = standardize_and_inspect(
|
1078
|
-
values=
|
1311
|
+
values=non_public_labels,
|
1312
|
+
field=field,
|
1313
|
+
registry=registry,
|
1314
|
+
standardize=standardize,
|
1315
|
+
**filter_kwargs,
|
1079
1316
|
)
|
1080
1317
|
if not inspect_result_current.non_validated:
|
1081
1318
|
all_labels = registry.from_values(
|
1082
|
-
inspect_result_current.validated,
|
1319
|
+
inspect_result_current.validated,
|
1320
|
+
field=field,
|
1321
|
+
**filter_kwargs_current,
|
1083
1322
|
)
|
1084
1323
|
settings.verbosity = verbosity
|
1085
1324
|
return all_labels
|
1086
1325
|
|
1087
|
-
|
1088
|
-
|
1326
|
+
# inspect the using_key instance
|
1089
1327
|
(
|
1090
|
-
labels_saved[f"from {
|
1328
|
+
labels_saved[f"from {using_key}"],
|
1091
1329
|
non_validated_labels,
|
1092
1330
|
) = update_registry_from_using_instance(
|
1093
1331
|
inspect_result_current.non_validated,
|
1094
1332
|
field=field,
|
1095
|
-
|
1333
|
+
using_key=using_key,
|
1096
1334
|
**filter_kwargs,
|
1097
1335
|
)
|
1098
1336
|
|
1099
|
-
public_records = (
|
1100
|
-
registry.from_values(non_validated_labels, field=field, **filter_kwargs)
|
1101
|
-
if non_validated_labels
|
1102
|
-
else []
|
1103
|
-
)
|
1104
|
-
# here we check to only save the public records if they are from the specified source
|
1105
|
-
# TODO: this if shouldn't be needed
|
1106
|
-
if source:
|
1107
|
-
public_records = [r for r in public_records if r.source == source]
|
1108
|
-
ln_save(public_records)
|
1109
|
-
labels_saved["from public"] = [
|
1110
|
-
getattr(r, field.field.name) for r in public_records
|
1111
|
-
]
|
1112
1337
|
labels_saved["without reference"] = [
|
1113
|
-
i
|
1338
|
+
i
|
1339
|
+
for i in non_validated_labels
|
1340
|
+
if i not in labels_saved[f"from {using_key}"]
|
1114
1341
|
]
|
1115
1342
|
|
1343
|
+
# save non-validated records
|
1116
1344
|
if not validated_only:
|
1117
1345
|
non_validated_records = []
|
1118
1346
|
if df is not None and registry == Feature:
|
1119
1347
|
non_validated_records = Feature.from_df(df)
|
1120
1348
|
else:
|
1121
1349
|
if "organism" in filter_kwargs:
|
1350
|
+
# make sure organism record is saved to the current instance
|
1122
1351
|
filter_kwargs["organism"] = _save_organism(name=organism)
|
1123
1352
|
init_kwargs = {}
|
1124
1353
|
for value in labels_saved["without reference"]:
|
@@ -1134,15 +1363,16 @@ def update_registry(
|
|
1134
1363
|
)
|
1135
1364
|
ln_save(non_validated_records)
|
1136
1365
|
|
1366
|
+
# save parent labels for ulabels
|
1137
1367
|
if registry == ULabel and field.field.name == "name":
|
1138
1368
|
save_ulabels_with_parent(values, field=field, key=key)
|
1139
1369
|
|
1140
|
-
# get all records
|
1141
|
-
all_labels = registry.from_values(
|
1142
|
-
|
1143
|
-
|
1144
|
-
|
1145
|
-
)
|
1370
|
+
# # get all records that are now validated in the current instance
|
1371
|
+
# all_labels = registry.from_values(
|
1372
|
+
# inspect_result_current.validated + inspect_result_current.non_validated,
|
1373
|
+
# field=field,
|
1374
|
+
# **get_current_filter_kwargs(registry, filter_kwargs),
|
1375
|
+
# )
|
1146
1376
|
finally:
|
1147
1377
|
settings.verbosity = verbosity
|
1148
1378
|
|
@@ -1152,9 +1382,10 @@ def update_registry(
|
|
1152
1382
|
save_function=save_function,
|
1153
1383
|
model_field=f"{registry.__name__}.{field.field.name}",
|
1154
1384
|
validated_only=validated_only,
|
1385
|
+
warning=warning,
|
1155
1386
|
)
|
1156
1387
|
|
1157
|
-
return all_labels
|
1388
|
+
# return all_labels
|
1158
1389
|
|
1159
1390
|
|
1160
1391
|
def log_saved_labels(
|
@@ -1163,6 +1394,7 @@ def log_saved_labels(
|
|
1163
1394
|
save_function: str,
|
1164
1395
|
model_field: str,
|
1165
1396
|
validated_only: bool = True,
|
1397
|
+
warning: bool = True,
|
1166
1398
|
) -> None:
|
1167
1399
|
"""Log the saved labels."""
|
1168
1400
|
from ._from_values import _print_values
|
@@ -1187,7 +1419,10 @@ def log_saved_labels(
|
|
1187
1419
|
if save_function == "add_new_from"
|
1188
1420
|
else f"\n → to save, run {colors.yellow(save_function)}"
|
1189
1421
|
)
|
1190
|
-
|
1422
|
+
if warning:
|
1423
|
+
logger.warning(msg)
|
1424
|
+
else:
|
1425
|
+
logger.info(msg)
|
1191
1426
|
else:
|
1192
1427
|
k = "" if k == "without reference" else f"{colors.green(k)} "
|
1193
1428
|
# the term "transferred" stresses that this is always in the context of transferring
|
@@ -1202,7 +1437,7 @@ def save_ulabels_with_parent(values: list[str], field: FieldAttr, key: str) -> N
|
|
1202
1437
|
"""Save a parent label for the given labels."""
|
1203
1438
|
registry = field.field.model
|
1204
1439
|
assert registry == ULabel # noqa: S101
|
1205
|
-
all_records = registry.from_values(values, field=field)
|
1440
|
+
all_records = registry.from_values(list(values), field=field)
|
1206
1441
|
is_feature = registry.filter(name=f"is_{key}").one_or_none()
|
1207
1442
|
if is_feature is None:
|
1208
1443
|
is_feature = registry(name=f"is_{key}")
|
@@ -1213,15 +1448,16 @@ def save_ulabels_with_parent(values: list[str], field: FieldAttr, key: str) -> N
|
|
1213
1448
|
def update_registry_from_using_instance(
|
1214
1449
|
values: list[str],
|
1215
1450
|
field: FieldAttr,
|
1216
|
-
|
1451
|
+
using_key: str | None = None,
|
1452
|
+
standardize: bool = False,
|
1217
1453
|
**kwargs,
|
1218
1454
|
) -> tuple[list[str], list[str]]:
|
1219
|
-
"""Save features or labels records from the
|
1455
|
+
"""Save features or labels records from the using_key instance.
|
1220
1456
|
|
1221
1457
|
Args:
|
1222
1458
|
values: A list of values to be saved as labels.
|
1223
1459
|
field: The FieldAttr object representing the field for which labels are being saved.
|
1224
|
-
|
1460
|
+
using_key: The name of the instance from which to transfer labels (if applicable).
|
1225
1461
|
kwargs: Additional keyword arguments to pass to the registry model.
|
1226
1462
|
|
1227
1463
|
Returns:
|
@@ -1230,11 +1466,15 @@ def update_registry_from_using_instance(
|
|
1230
1466
|
labels_saved = []
|
1231
1467
|
not_saved = values
|
1232
1468
|
|
1233
|
-
if
|
1234
|
-
|
1235
|
-
|
1469
|
+
if using_key is not None and using_key != "default":
|
1470
|
+
registry_using = get_registry_instance(field.field.model, using_key)
|
1471
|
+
|
1236
1472
|
inspect_result_using = standardize_and_inspect(
|
1237
|
-
values=values,
|
1473
|
+
values=values,
|
1474
|
+
field=field,
|
1475
|
+
registry=registry_using,
|
1476
|
+
standardize=standardize,
|
1477
|
+
**kwargs,
|
1238
1478
|
)
|
1239
1479
|
labels_using = registry_using.filter(
|
1240
1480
|
**{f"{field.field.name}__in": inspect_result_using.validated}
|