lamindb 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +33 -26
- lamindb/_finish.py +9 -1
- lamindb/_tracked.py +26 -3
- lamindb/_view.py +2 -3
- lamindb/base/__init__.py +1 -1
- lamindb/base/ids.py +1 -10
- lamindb/base/users.py +1 -4
- lamindb/core/__init__.py +7 -65
- lamindb/core/_compat.py +60 -0
- lamindb/core/_context.py +50 -22
- lamindb/core/_mapped_collection.py +4 -2
- lamindb/core/_settings.py +6 -6
- lamindb/core/_sync_git.py +1 -1
- lamindb/core/_track_environment.py +2 -1
- lamindb/core/datasets/_small.py +3 -3
- lamindb/core/loaders.py +43 -20
- lamindb/core/storage/_anndata_accessor.py +8 -3
- lamindb/core/storage/_backed_access.py +14 -7
- lamindb/core/storage/_pyarrow_dataset.py +24 -9
- lamindb/core/storage/_tiledbsoma.py +8 -6
- lamindb/core/storage/_zarr.py +104 -25
- lamindb/core/storage/objects.py +63 -28
- lamindb/core/storage/paths.py +16 -13
- lamindb/core/types.py +10 -0
- lamindb/curators/__init__.py +176 -149
- lamindb/errors.py +1 -1
- lamindb/integrations/_vitessce.py +4 -4
- lamindb/migrations/0089_subsequent_runs.py +159 -0
- lamindb/migrations/0090_runproject_project_runs.py +73 -0
- lamindb/migrations/{0088_squashed.py → 0090_squashed.py} +245 -177
- lamindb/models/__init__.py +79 -0
- lamindb/{core → models}/_describe.py +3 -3
- lamindb/{core → models}/_django.py +8 -5
- lamindb/{core → models}/_feature_manager.py +103 -87
- lamindb/{_from_values.py → models/_from_values.py} +5 -2
- lamindb/{core/versioning.py → models/_is_versioned.py} +94 -6
- lamindb/{core → models}/_label_manager.py +10 -17
- lamindb/{core/relations.py → models/_relations.py} +8 -1
- lamindb/models/artifact.py +2602 -0
- lamindb/{_can_curate.py → models/can_curate.py} +349 -180
- lamindb/models/collection.py +683 -0
- lamindb/models/core.py +135 -0
- lamindb/models/feature.py +643 -0
- lamindb/models/flextable.py +163 -0
- lamindb/{_parents.py → models/has_parents.py} +55 -49
- lamindb/models/project.py +384 -0
- lamindb/{_query_manager.py → models/query_manager.py} +10 -8
- lamindb/{_query_set.py → models/query_set.py} +64 -32
- lamindb/models/record.py +1762 -0
- lamindb/models/run.py +563 -0
- lamindb/{_save.py → models/save.py} +18 -8
- lamindb/models/schema.py +732 -0
- lamindb/models/transform.py +360 -0
- lamindb/models/ulabel.py +249 -0
- {lamindb-1.1.0.dist-info → lamindb-1.2.0.dist-info}/METADATA +6 -6
- lamindb-1.2.0.dist-info/RECORD +95 -0
- lamindb/_artifact.py +0 -1361
- lamindb/_collection.py +0 -440
- lamindb/_feature.py +0 -316
- lamindb/_is_versioned.py +0 -40
- lamindb/_record.py +0 -1065
- lamindb/_run.py +0 -60
- lamindb/_schema.py +0 -347
- lamindb/_storage.py +0 -15
- lamindb/_transform.py +0 -170
- lamindb/_ulabel.py +0 -56
- lamindb/_utils.py +0 -9
- lamindb/base/validation.py +0 -63
- lamindb/core/_data.py +0 -491
- lamindb/core/fields.py +0 -12
- lamindb/models.py +0 -4435
- lamindb-1.1.0.dist-info/RECORD +0 -95
- {lamindb-1.1.0.dist-info → lamindb-1.2.0.dist-info}/LICENSE +0 -0
- {lamindb-1.1.0.dist-info → lamindb-1.2.0.dist-info}/WHEEL +0 -0
@@ -1,102 +1,23 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from typing import TYPE_CHECKING, Literal
|
3
|
+
from typing import TYPE_CHECKING, Iterable, Literal, Union
|
4
4
|
|
5
|
-
import lamindb_setup as ln_setup
|
6
5
|
import numpy as np
|
7
6
|
import pandas as pd
|
8
7
|
from django.core.exceptions import FieldDoesNotExist
|
9
8
|
from lamin_utils import colors, logger
|
10
|
-
from lamindb_setup.core._docs import doc_args
|
11
|
-
|
12
|
-
from lamindb.models import CanCurate, Record
|
13
9
|
|
10
|
+
from ..errors import ValidationError
|
14
11
|
from ._from_values import _format_values, _has_organism_field, get_or_create_records
|
15
|
-
from .
|
16
|
-
from ._utils import attach_func_to_class_method
|
17
|
-
from .errors import ValidationError
|
12
|
+
from .record import Record, _queryset, get_name_field
|
18
13
|
|
19
14
|
if TYPE_CHECKING:
|
20
15
|
from django.db.models import QuerySet
|
21
16
|
from lamin_utils._inspect import InspectResult
|
22
17
|
|
23
|
-
from lamindb._query_set import RecordList
|
24
18
|
from lamindb.base.types import ListLike, StrField
|
25
19
|
|
26
|
-
|
27
|
-
# from_values doesn't apply for QuerySet or Manager
|
28
|
-
@classmethod # type:ignore
|
29
|
-
@doc_args(CanCurate.from_values.__doc__)
|
30
|
-
def from_values(
|
31
|
-
cls,
|
32
|
-
values: ListLike,
|
33
|
-
field: StrField | None = None,
|
34
|
-
create: bool = False,
|
35
|
-
organism: Record | str | None = None,
|
36
|
-
source: Record | None = None,
|
37
|
-
mute: bool = False,
|
38
|
-
) -> RecordList:
|
39
|
-
"""{}""" # noqa: D415
|
40
|
-
from_source = True if cls.__module__.startswith("bionty.") else False
|
41
|
-
|
42
|
-
field_str = get_name_field(cls, field=field)
|
43
|
-
return get_or_create_records(
|
44
|
-
iterable=values,
|
45
|
-
field=getattr(cls, field_str),
|
46
|
-
create=create,
|
47
|
-
from_source=from_source,
|
48
|
-
organism=organism,
|
49
|
-
source=source,
|
50
|
-
mute=mute,
|
51
|
-
)
|
52
|
-
|
53
|
-
|
54
|
-
@classmethod # type: ignore
|
55
|
-
@doc_args(CanCurate.inspect.__doc__)
|
56
|
-
def inspect(
|
57
|
-
cls,
|
58
|
-
values: ListLike,
|
59
|
-
field: str | StrField | None = None,
|
60
|
-
*,
|
61
|
-
mute: bool = False,
|
62
|
-
organism: str | Record | None = None,
|
63
|
-
source: Record | None = None,
|
64
|
-
strict_source: bool = False,
|
65
|
-
) -> InspectResult:
|
66
|
-
"""{}""" # noqa: D415
|
67
|
-
return _inspect(
|
68
|
-
cls=cls,
|
69
|
-
values=values,
|
70
|
-
field=field,
|
71
|
-
mute=mute,
|
72
|
-
strict_source=strict_source,
|
73
|
-
organism=organism,
|
74
|
-
source=source,
|
75
|
-
)
|
76
|
-
|
77
|
-
|
78
|
-
@classmethod # type: ignore
|
79
|
-
@doc_args(CanCurate.validate.__doc__)
|
80
|
-
def validate(
|
81
|
-
cls,
|
82
|
-
values: ListLike,
|
83
|
-
field: str | StrField | None = None,
|
84
|
-
*,
|
85
|
-
mute: bool = False,
|
86
|
-
organism: str | Record | None = None,
|
87
|
-
source: Record | None = None,
|
88
|
-
strict_source: bool = False,
|
89
|
-
) -> np.ndarray:
|
90
|
-
"""{}""" # noqa: D415
|
91
|
-
return _validate(
|
92
|
-
cls=cls,
|
93
|
-
values=values,
|
94
|
-
field=field,
|
95
|
-
mute=mute,
|
96
|
-
strict_source=strict_source,
|
97
|
-
organism=organism,
|
98
|
-
source=source,
|
99
|
-
)
|
20
|
+
from .query_set import RecordList
|
100
21
|
|
101
22
|
|
102
23
|
def _check_source_db(source: Record, using_key: str | None):
|
@@ -291,76 +212,6 @@ def _validate(
|
|
291
212
|
return result
|
292
213
|
|
293
214
|
|
294
|
-
@classmethod # type: ignore
|
295
|
-
@doc_args(CanCurate.standardize.__doc__)
|
296
|
-
def standardize(
|
297
|
-
cls,
|
298
|
-
values: ListLike,
|
299
|
-
field: str | StrField | None = None,
|
300
|
-
*,
|
301
|
-
return_field: str = None,
|
302
|
-
return_mapper: bool = False,
|
303
|
-
case_sensitive: bool = False,
|
304
|
-
mute: bool = False,
|
305
|
-
public_aware: bool = True,
|
306
|
-
keep: Literal["first", "last", False] = "first",
|
307
|
-
synonyms_field: str = "synonyms",
|
308
|
-
organism: str | Record | None = None,
|
309
|
-
source: Record | None = None,
|
310
|
-
strict_source: bool = False,
|
311
|
-
) -> list[str] | dict[str, str]:
|
312
|
-
"""{}""" # noqa: D415
|
313
|
-
return _standardize(
|
314
|
-
cls=cls,
|
315
|
-
values=values,
|
316
|
-
field=field,
|
317
|
-
return_field=return_field,
|
318
|
-
return_mapper=return_mapper,
|
319
|
-
case_sensitive=case_sensitive,
|
320
|
-
mute=mute,
|
321
|
-
strict_source=strict_source,
|
322
|
-
public_aware=public_aware,
|
323
|
-
keep=keep,
|
324
|
-
synonyms_field=synonyms_field,
|
325
|
-
organism=organism,
|
326
|
-
source=source,
|
327
|
-
)
|
328
|
-
|
329
|
-
|
330
|
-
def set_abbr(self, value: str):
|
331
|
-
self.abbr = value
|
332
|
-
|
333
|
-
if hasattr(self, "name") and value == self.name:
|
334
|
-
pass
|
335
|
-
else:
|
336
|
-
try:
|
337
|
-
self.add_synonym(value, save=False)
|
338
|
-
except Exception as e: # pragma: no cover
|
339
|
-
logger.debug(
|
340
|
-
f"Encountered an Exception while attempting to add synonyms.\n{e}"
|
341
|
-
)
|
342
|
-
|
343
|
-
if not self._state.adding:
|
344
|
-
self.save()
|
345
|
-
|
346
|
-
|
347
|
-
def add_synonym(
|
348
|
-
self,
|
349
|
-
synonym: str | ListLike,
|
350
|
-
force: bool = False,
|
351
|
-
save: bool | None = None,
|
352
|
-
):
|
353
|
-
_check_synonyms_field_exist(self)
|
354
|
-
_add_or_remove_synonyms(
|
355
|
-
synonym=synonym, record=self, force=force, action="add", save=save
|
356
|
-
)
|
357
|
-
|
358
|
-
|
359
|
-
def remove_synonym(self, synonym: str | ListLike):
|
360
|
-
_check_synonyms_field_exist(self)
|
361
|
-
_add_or_remove_synonyms(synonym=synonym, record=self, action="remove")
|
362
|
-
|
363
|
-
|
364
215
|
def _standardize(
|
365
216
|
cls,
|
366
217
|
values: ListLike,
|
@@ -507,27 +358,27 @@ def _standardize(
|
|
507
358
|
|
508
359
|
def _add_or_remove_synonyms(
|
509
360
|
synonym: str | ListLike,
|
510
|
-
record:
|
361
|
+
record: CanCurate,
|
511
362
|
action: Literal["add", "remove"],
|
512
363
|
force: bool = False,
|
513
364
|
save: bool | None = None,
|
514
365
|
):
|
515
366
|
"""Add or remove synonyms."""
|
516
367
|
|
517
|
-
def check_synonyms_in_all_records(synonyms: set[str], record:
|
368
|
+
def check_synonyms_in_all_records(synonyms: set[str], record: CanCurate):
|
518
369
|
"""Errors if input synonym is associated with other records in the DB."""
|
519
370
|
import pandas as pd
|
520
371
|
from IPython.display import display
|
521
372
|
|
522
373
|
syns_all = (
|
523
|
-
record.__class__.objects.exclude(synonyms="").exclude(synonyms=None).all()
|
374
|
+
record.__class__.objects.exclude(synonyms="").exclude(synonyms=None).all() # type: ignore
|
524
375
|
)
|
525
376
|
if len(syns_all) == 0:
|
526
377
|
return
|
527
378
|
df = pd.DataFrame(syns_all.values())
|
528
379
|
df["synonyms"] = df["synonyms"].str.split("|")
|
529
380
|
df = df.explode("synonyms")
|
530
|
-
matches_df = df[(df["synonyms"].isin(synonyms)) & (df["id"] != record.id)]
|
381
|
+
matches_df = df[(df["synonyms"].isin(synonyms)) & (df["id"] != record.id)] # type: ignore
|
531
382
|
if matches_df.shape[0] > 0:
|
532
383
|
records_df = pd.DataFrame(syns_all.filter(id__in=matches_df["id"]).values())
|
533
384
|
logger.error(
|
@@ -558,7 +409,7 @@ def _add_or_remove_synonyms(
|
|
558
409
|
raise ValidationError("a synonym can't contain '|'!")
|
559
410
|
|
560
411
|
# existing synonyms
|
561
|
-
syns_exist = record.synonyms
|
412
|
+
syns_exist = record.synonyms # type: ignore
|
562
413
|
if syns_exist is None or len(syns_exist) == 0:
|
563
414
|
syns_exist_set = set()
|
564
415
|
else:
|
@@ -576,16 +427,16 @@ def _add_or_remove_synonyms(
|
|
576
427
|
else:
|
577
428
|
syns_str = "|".join(syns_exist_set)
|
578
429
|
|
579
|
-
record.synonyms = syns_str
|
430
|
+
record.synonyms = syns_str # type: ignore
|
580
431
|
|
581
432
|
if save is None:
|
582
433
|
# if record is already in DB, save the changes to DB
|
583
|
-
save = not record._state.adding
|
434
|
+
save = not record._state.adding # type: ignore
|
584
435
|
if save:
|
585
|
-
record.save()
|
436
|
+
record.save() # type: ignore
|
586
437
|
|
587
438
|
|
588
|
-
def _check_synonyms_field_exist(record:
|
439
|
+
def _check_synonyms_field_exist(record: CanCurate):
|
589
440
|
try:
|
590
441
|
record.__getattribute__("synonyms")
|
591
442
|
except AttributeError:
|
@@ -637,24 +488,342 @@ def _field_is_id(field: str, registry: type[Record]) -> bool:
|
|
637
488
|
return False
|
638
489
|
|
639
490
|
|
640
|
-
|
641
|
-
"
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
491
|
+
class CanCurate:
|
492
|
+
"""Base class providing :class:`~lamindb.models.Record`-based validation."""
|
493
|
+
|
494
|
+
@classmethod
|
495
|
+
def inspect(
|
496
|
+
cls,
|
497
|
+
values: ListLike,
|
498
|
+
field: str | StrField | None = None,
|
499
|
+
*,
|
500
|
+
mute: bool = False,
|
501
|
+
organism: Union[str, Record, None] = None,
|
502
|
+
source: Record | None = None,
|
503
|
+
strict_source: bool = False,
|
504
|
+
) -> InspectResult:
|
505
|
+
"""Inspect if values are mappable to a field.
|
506
|
+
|
507
|
+
Being mappable means that an exact match exists.
|
508
|
+
|
509
|
+
Args:
|
510
|
+
values: Values that will be checked against the field.
|
511
|
+
field: The field of values. Examples are `'ontology_id'` to map
|
512
|
+
against the source ID or `'name'` to map against the ontologies
|
513
|
+
field names.
|
514
|
+
mute: Whether to mute logging.
|
515
|
+
organism: An Organism name or record.
|
516
|
+
source: A `bionty.Source` record that specifies the version to inspect against.
|
517
|
+
strict_source: Determines the validation behavior against records in the registry.
|
518
|
+
- If `False`, validation will include all records in the registry, ignoring the specified source.
|
519
|
+
- If `True`, validation will only include records in the registry that are linked to the specified source.
|
520
|
+
Note: this parameter won't affect validation against bionty/public sources.
|
521
|
+
|
522
|
+
See Also:
|
523
|
+
:meth:`~lamindb.models.CanCurate.validate`
|
524
|
+
|
525
|
+
Examples:
|
526
|
+
>>> import bionty as bt
|
527
|
+
>>> bt.settings.organism = "human"
|
528
|
+
>>> ln.save(bt.Gene.from_values(["A1CF", "A1BG", "BRCA2"], field="symbol"))
|
529
|
+
>>> gene_symbols = ["A1CF", "A1BG", "FANCD1", "FANCD20"]
|
530
|
+
>>> result = bt.Gene.inspect(gene_symbols, field=bt.Gene.symbol)
|
531
|
+
>>> result.validated
|
532
|
+
['A1CF', 'A1BG']
|
533
|
+
>>> result.non_validated
|
534
|
+
['FANCD1', 'FANCD20']
|
535
|
+
"""
|
536
|
+
return _inspect(
|
537
|
+
cls=cls,
|
538
|
+
values=values,
|
539
|
+
field=field,
|
540
|
+
mute=mute,
|
541
|
+
strict_source=strict_source,
|
542
|
+
organism=organism,
|
543
|
+
source=source,
|
544
|
+
)
|
649
545
|
|
650
|
-
|
651
|
-
|
546
|
+
@classmethod
|
547
|
+
def validate(
|
548
|
+
cls,
|
549
|
+
values: ListLike,
|
550
|
+
field: str | StrField | None = None,
|
551
|
+
*,
|
552
|
+
mute: bool = False,
|
553
|
+
organism: Union[str, Record, None] = None,
|
554
|
+
source: Record | None = None,
|
555
|
+
strict_source: bool = False,
|
556
|
+
) -> np.ndarray:
|
557
|
+
"""Validate values against existing values of a string field.
|
558
|
+
|
559
|
+
Note this is strict_source validation, only asserts exact matches.
|
560
|
+
|
561
|
+
Args:
|
562
|
+
values: Values that will be validated against the field.
|
563
|
+
field: The field of values.
|
564
|
+
Examples are `'ontology_id'` to map against the source ID
|
565
|
+
or `'name'` to map against the ontologies field names.
|
566
|
+
mute: Whether to mute logging.
|
567
|
+
organism: An Organism name or record.
|
568
|
+
source: A `bionty.Source` record that specifies the version to validate against.
|
569
|
+
strict_source: Determines the validation behavior against records in the registry.
|
570
|
+
- If `False`, validation will include all records in the registry, ignoring the specified source.
|
571
|
+
- If `True`, validation will only include records in the registry that are linked to the specified source.
|
572
|
+
Note: this parameter won't affect validation against bionty/public sources.
|
573
|
+
|
574
|
+
Returns:
|
575
|
+
A vector of booleans indicating if an element is validated.
|
576
|
+
|
577
|
+
See Also:
|
578
|
+
:meth:`~lamindb.models.CanCurate.inspect`
|
579
|
+
|
580
|
+
Examples:
|
581
|
+
>>> import bionty as bt
|
582
|
+
>>> bt.settings.organism = "human"
|
583
|
+
>>> ln.save(bt.Gene.from_values(["A1CF", "A1BG", "BRCA2"], field="symbol"))
|
584
|
+
>>> gene_symbols = ["A1CF", "A1BG", "FANCD1", "FANCD20"]
|
585
|
+
>>> bt.Gene.validate(gene_symbols, field=bt.Gene.symbol)
|
586
|
+
array([ True, True, False, False])
|
587
|
+
"""
|
588
|
+
return _validate(
|
589
|
+
cls=cls,
|
590
|
+
values=values,
|
591
|
+
field=field,
|
592
|
+
mute=mute,
|
593
|
+
strict_source=strict_source,
|
594
|
+
organism=organism,
|
595
|
+
source=source,
|
596
|
+
)
|
652
597
|
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
598
|
+
@classmethod
|
599
|
+
def from_values(
|
600
|
+
cls,
|
601
|
+
values: ListLike,
|
602
|
+
field: StrField | None = None,
|
603
|
+
create: bool = False,
|
604
|
+
organism: Union[Record, str, None] = None,
|
605
|
+
source: Record | None = None,
|
606
|
+
mute: bool = False,
|
607
|
+
) -> RecordList:
|
608
|
+
"""Bulk create validated records by parsing values for an identifier such as a name or an id).
|
609
|
+
|
610
|
+
Args:
|
611
|
+
values: A list of values for an identifier, e.g.
|
612
|
+
`["name1", "name2"]`.
|
613
|
+
field: A `Record` field to look up, e.g., `bt.CellMarker.name`.
|
614
|
+
create: Whether to create records if they don't exist.
|
615
|
+
organism: A `bionty.Organism` name or record.
|
616
|
+
source: A `bionty.Source` record to validate against to create records for.
|
617
|
+
mute: Whether to mute logging.
|
618
|
+
|
619
|
+
Returns:
|
620
|
+
A list of validated records. For bionty registries. Also returns knowledge-coupled records.
|
621
|
+
|
622
|
+
Notes:
|
623
|
+
For more info, see tutorial: :doc:`docs:bio-registries`.
|
624
|
+
|
625
|
+
Examples:
|
626
|
+
|
627
|
+
Bulk create from non-validated values will log warnings & returns empty list:
|
628
|
+
|
629
|
+
>>> ulabels = ln.ULabel.from_values(["benchmark", "prediction", "test"], field="name")
|
630
|
+
>>> assert len(ulabels) == 0
|
631
|
+
|
632
|
+
Bulk create records from validated values returns the corresponding existing records:
|
633
|
+
|
634
|
+
>>> ln.save([ln.ULabel(name=name) for name in ["benchmark", "prediction", "test"]])
|
635
|
+
>>> ulabels = ln.ULabel.from_values(["benchmark", "prediction", "test"], field="name")
|
636
|
+
>>> assert len(ulabels) == 3
|
637
|
+
|
638
|
+
Bulk create records from public reference:
|
639
|
+
|
640
|
+
>>> import bionty as bt
|
641
|
+
>>> records = bt.CellType.from_values(["T cell", "B cell"], field="name")
|
642
|
+
>>> records
|
643
|
+
"""
|
644
|
+
from_source = True if cls.__module__.startswith("bionty.") else False
|
645
|
+
|
646
|
+
field_str = get_name_field(cls, field=field)
|
647
|
+
return get_or_create_records(
|
648
|
+
iterable=values,
|
649
|
+
field=getattr(cls, field_str),
|
650
|
+
create=create,
|
651
|
+
from_source=from_source,
|
652
|
+
organism=organism,
|
653
|
+
source=source,
|
654
|
+
mute=mute,
|
655
|
+
)
|
656
|
+
|
657
|
+
@classmethod
|
658
|
+
def standardize(
|
659
|
+
cls,
|
660
|
+
values: Iterable,
|
661
|
+
field: str | StrField | None = None,
|
662
|
+
*,
|
663
|
+
return_field: str | StrField | None = None,
|
664
|
+
return_mapper: bool = False,
|
665
|
+
case_sensitive: bool = False,
|
666
|
+
mute: bool = False,
|
667
|
+
public_aware: bool = True,
|
668
|
+
keep: Literal["first", "last", False] = "first",
|
669
|
+
synonyms_field: str = "synonyms",
|
670
|
+
organism: Union[str, Record, None] = None,
|
671
|
+
source: Record | None = None,
|
672
|
+
strict_source: bool = False,
|
673
|
+
) -> list[str] | dict[str, str]:
|
674
|
+
"""Maps input synonyms to standardized names.
|
675
|
+
|
676
|
+
Args:
|
677
|
+
values: Identifiers that will be standardized.
|
678
|
+
field: The field representing the standardized names.
|
679
|
+
return_field: The field to return. Defaults to field.
|
680
|
+
return_mapper: If `True`, returns `{input_value: standardized_name}`.
|
681
|
+
case_sensitive: Whether the mapping is case sensitive.
|
682
|
+
mute: Whether to mute logging.
|
683
|
+
public_aware: Whether to standardize from Bionty reference. Defaults to `True` for Bionty registries.
|
684
|
+
keep: When a synonym maps to multiple names, determines which duplicates to mark as `pd.DataFrame.duplicated`:
|
685
|
+
- `"first"`: returns the first mapped standardized name
|
686
|
+
- `"last"`: returns the last mapped standardized name
|
687
|
+
- `False`: returns all mapped standardized name.
|
688
|
+
|
689
|
+
When `keep` is `False`, the returned list of standardized names will contain nested lists in case of duplicates.
|
690
|
+
|
691
|
+
When a field is converted into return_field, keep marks which matches to keep when multiple return_field values map to the same field value.
|
692
|
+
synonyms_field: A field containing the concatenated synonyms.
|
693
|
+
organism: An Organism name or record.
|
694
|
+
source: A `bionty.Source` record that specifies the version to validate against.
|
695
|
+
strict_source: Determines the validation behavior against records in the registry.
|
696
|
+
- If `False`, validation will include all records in the registry, ignoring the specified source.
|
697
|
+
- If `True`, validation will only include records in the registry that are linked to the specified source.
|
698
|
+
Note: this parameter won't affect validation against bionty/public sources.
|
699
|
+
|
700
|
+
Returns:
|
701
|
+
If `return_mapper` is `False`: a list of standardized names. Otherwise,
|
702
|
+
a dictionary of mapped values with mappable synonyms as keys and
|
703
|
+
standardized names as values.
|
704
|
+
|
705
|
+
See Also:
|
706
|
+
:meth:`~lamindb.models.CanCurate.add_synonym`
|
707
|
+
Add synonyms.
|
708
|
+
:meth:`~lamindb.models.CanCurate.remove_synonym`
|
709
|
+
Remove synonyms.
|
710
|
+
|
711
|
+
Examples:
|
712
|
+
>>> import bionty as bt
|
713
|
+
>>> bt.settings.organism = "human"
|
714
|
+
>>> ln.save(bt.Gene.from_values(["A1CF", "A1BG", "BRCA2"], field="symbol"))
|
715
|
+
>>> gene_synonyms = ["A1CF", "A1BG", "FANCD1", "FANCD20"]
|
716
|
+
>>> standardized_names = bt.Gene.standardize(gene_synonyms)
|
717
|
+
>>> standardized_names
|
718
|
+
['A1CF', 'A1BG', 'BRCA2', 'FANCD20']
|
719
|
+
"""
|
720
|
+
return _standardize(
|
721
|
+
cls=cls,
|
722
|
+
values=values,
|
723
|
+
field=field,
|
724
|
+
return_field=return_field,
|
725
|
+
return_mapper=return_mapper,
|
726
|
+
case_sensitive=case_sensitive,
|
727
|
+
mute=mute,
|
728
|
+
strict_source=strict_source,
|
729
|
+
public_aware=public_aware,
|
730
|
+
keep=keep,
|
731
|
+
synonyms_field=synonyms_field,
|
732
|
+
organism=organism,
|
733
|
+
source=source,
|
734
|
+
)
|
735
|
+
|
736
|
+
def add_synonym(
|
737
|
+
self,
|
738
|
+
synonym: str | ListLike,
|
739
|
+
force: bool = False,
|
740
|
+
save: bool | None = None,
|
741
|
+
):
|
742
|
+
"""Add synonyms to a record.
|
743
|
+
|
744
|
+
Args:
|
745
|
+
synonym: The synonyms to add to the record.
|
746
|
+
force: Whether to add synonyms even if they are already synonyms of other records.
|
747
|
+
save: Whether to save the record to the database.
|
748
|
+
|
749
|
+
See Also:
|
750
|
+
:meth:`~lamindb.models.CanCurate.remove_synonym`
|
751
|
+
Remove synonyms.
|
752
|
+
|
753
|
+
Examples:
|
754
|
+
>>> import bionty as bt
|
755
|
+
>>> bt.CellType.from_source(name="T cell").save()
|
756
|
+
>>> lookup = bt.CellType.lookup()
|
757
|
+
>>> record = lookup.t_cell
|
758
|
+
>>> record.synonyms
|
759
|
+
'T-cell|T lymphocyte|T-lymphocyte'
|
760
|
+
>>> record.add_synonym("T cells")
|
761
|
+
>>> record.synonyms
|
762
|
+
'T cells|T-cell|T-lymphocyte|T lymphocyte'
|
763
|
+
"""
|
764
|
+
_check_synonyms_field_exist(self)
|
765
|
+
_add_or_remove_synonyms(
|
766
|
+
synonym=synonym, record=self, force=force, action="add", save=save
|
767
|
+
)
|
768
|
+
|
769
|
+
def remove_synonym(self, synonym: str | ListLike):
|
770
|
+
"""Remove synonyms from a record.
|
771
|
+
|
772
|
+
Args:
|
773
|
+
synonym: The synonym values to remove.
|
774
|
+
|
775
|
+
See Also:
|
776
|
+
:meth:`~lamindb.models.CanCurate.add_synonym`
|
777
|
+
Add synonyms
|
778
|
+
|
779
|
+
Examples:
|
780
|
+
>>> import bionty as bt
|
781
|
+
>>> bt.CellType.from_source(name="T cell").save()
|
782
|
+
>>> lookup = bt.CellType.lookup()
|
783
|
+
>>> record = lookup.t_cell
|
784
|
+
>>> record.synonyms
|
785
|
+
'T-cell|T lymphocyte|T-lymphocyte'
|
786
|
+
>>> record.remove_synonym("T-cell")
|
787
|
+
'T lymphocyte|T-lymphocyte'
|
788
|
+
"""
|
789
|
+
_check_synonyms_field_exist(self)
|
790
|
+
_add_or_remove_synonyms(synonym=synonym, record=self, action="remove")
|
791
|
+
|
792
|
+
def set_abbr(self, value: str):
|
793
|
+
"""Set value for abbr field and add to synonyms.
|
794
|
+
|
795
|
+
Args:
|
796
|
+
value: A value for an abbreviation.
|
797
|
+
|
798
|
+
See Also:
|
799
|
+
:meth:`~lamindb.models.CanCurate.add_synonym`
|
800
|
+
|
801
|
+
Examples:
|
802
|
+
>>> import bionty as bt
|
803
|
+
>>> bt.ExperimentalFactor.from_source(name="single-cell RNA sequencing").save()
|
804
|
+
>>> scrna = bt.ExperimentalFactor.get(name="single-cell RNA sequencing")
|
805
|
+
>>> scrna.abbr
|
806
|
+
None
|
807
|
+
>>> scrna.synonyms
|
808
|
+
'single-cell RNA-seq|single-cell transcriptome sequencing|scRNA-seq|single cell RNA sequencing'
|
809
|
+
>>> scrna.set_abbr("scRNA")
|
810
|
+
>>> scrna.abbr
|
811
|
+
'scRNA'
|
812
|
+
>>> scrna.synonyms
|
813
|
+
'scRNA|single-cell RNA-seq|single cell RNA sequencing|single-cell transcriptome sequencing|scRNA-seq'
|
814
|
+
>>> scrna.save()
|
815
|
+
"""
|
816
|
+
self.abbr = value
|
817
|
+
|
818
|
+
if hasattr(self, "name") and value == self.name:
|
819
|
+
pass
|
820
|
+
else:
|
821
|
+
try:
|
822
|
+
self.add_synonym(value, save=False)
|
823
|
+
except Exception as e: # pragma: no cover
|
824
|
+
logger.debug(
|
825
|
+
f"Encountered an Exception while attempting to add synonyms.\n{e}"
|
826
|
+
)
|
658
827
|
|
659
|
-
|
660
|
-
|
828
|
+
if not self._state.adding: # type: ignore
|
829
|
+
self.save() # type: ignore
|