lamindb 1.2a2__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,14 +5,18 @@ from typing import TYPE_CHECKING, Iterable, Literal, Union
5
5
  import numpy as np
6
6
  import pandas as pd
7
7
  from django.core.exceptions import FieldDoesNotExist
8
+ from django.db.models import Manager, QuerySet
8
9
  from lamin_utils import colors, logger
9
10
 
10
11
  from ..errors import ValidationError
11
- from ._from_values import _format_values, _has_organism_field, get_or_create_records
12
- from .record import Record, _queryset, get_name_field
12
+ from ._from_values import (
13
+ _format_values,
14
+ _from_values,
15
+ get_organism_record_from_field,
16
+ )
17
+ from .record import Record, get_name_field
13
18
 
14
19
  if TYPE_CHECKING:
15
- from django.db.models import QuerySet
16
20
  from lamin_utils._inspect import InspectResult
17
21
 
18
22
  from lamindb.base.types import ListLike, StrField
@@ -20,27 +24,20 @@ if TYPE_CHECKING:
20
24
  from .query_set import RecordList
21
25
 
22
26
 
23
- def _check_source_db(source: Record, using_key: str | None):
24
- """Check if the source is from the DB."""
25
- if using_key is not None and using_key != "default":
26
- if source._state.db != using_key:
27
- raise ValueError(
28
- f"source must be a bionty.Source record from instance '{using_key}'!"
29
- )
30
-
31
-
32
- def _check_organism_db(organism: str | Record | None, using_key: str | None):
33
- """Check if the organism is from the DB."""
34
- if isinstance(organism, Record):
27
+ def _check_if_record_in_db(record: str | Record | None, using_key: str | None):
28
+ """Check if the record is from the using_key DB."""
29
+ if isinstance(record, Record):
35
30
  if using_key is not None and using_key != "default":
36
- if organism._state.db != using_key:
31
+ if record._state.db != using_key:
37
32
  raise ValueError(
38
- f"organism must be a bionty.Organism record from instance '{using_key}'!"
33
+ f"record must be a {record.__class__.__get_name_with_module__()} record from instance '{using_key}'!"
39
34
  )
40
35
 
41
36
 
42
- def _concat_lists(values: ListLike) -> list[str]:
37
+ def _concat_lists(values: ListLike | str) -> list[str]:
43
38
  """Concatenate a list of lists of strings into a single list."""
39
+ if isinstance(values, str):
40
+ values = [values]
44
41
  if isinstance(values, (list, pd.Series)) and len(values) > 0:
45
42
  first_item = values[0] if isinstance(values, list) else values.iloc[0]
46
43
  if isinstance(first_item, list):
@@ -55,10 +52,9 @@ def _concat_lists(values: ListLike) -> list[str]:
55
52
  def _inspect(
56
53
  cls,
57
54
  values: ListLike,
58
- field: str | StrField | None = None,
55
+ field: StrField | None = None,
59
56
  *,
60
57
  mute: bool = False,
61
- using_key: str | None = None,
62
58
  organism: str | Record | None = None,
63
59
  source: Record | None = None,
64
60
  strict_source: bool = False,
@@ -66,78 +62,81 @@ def _inspect(
66
62
  """{}""" # noqa: D415
67
63
  from lamin_utils._inspect import inspect
68
64
 
69
- if isinstance(values, str):
70
- values = [values]
71
65
  values = _concat_lists(values)
72
66
 
73
- field = get_name_field(cls, field=field)
74
- queryset = _queryset(cls, using_key)
75
- using_key = queryset.db
67
+ field_str = get_name_field(cls, field=field)
68
+ queryset = cls.all() if isinstance(cls, (QuerySet, Manager)) else cls.objects.all()
69
+ registry = queryset.model
70
+ model_name = registry._meta.model.__name__
76
71
  if isinstance(source, Record):
77
- _check_source_db(source, using_key)
72
+ _check_if_record_in_db(source, queryset.db)
78
73
  # if strict_source mode, restrict the query to the passed ontology source
79
74
  # otherwise, inspect across records present in the DB from all ontology sources and no-source
80
75
  if strict_source:
81
76
  queryset = queryset.filter(source=source)
82
- _check_organism_db(organism, using_key)
83
- registry = queryset.model
84
- model_name = registry._meta.model.__name__
77
+ organism_record = get_organism_record_from_field(
78
+ getattr(registry, field_str), organism, values, queryset.db
79
+ )
80
+ _check_if_record_in_db(organism_record, queryset.db)
85
81
 
86
82
  # do not inspect synonyms if the field is not name field
87
83
  inspect_synonyms = True
88
- if hasattr(registry, "_name_field") and field != registry._name_field:
84
+ if hasattr(registry, "_name_field") and field_str != registry._name_field:
89
85
  inspect_synonyms = False
90
86
 
91
87
  # inspect in the DB
92
88
  result_db = inspect(
93
- df=_filter_query_based_on_organism(
94
- queryset=queryset, field=field, organism=organism
95
- ),
89
+ df=_filter_queryset_with_organism(queryset=queryset, organism=organism_record),
96
90
  identifiers=values,
97
- field=field,
91
+ field=field_str,
98
92
  mute=mute,
99
93
  inspect_synonyms=inspect_synonyms,
100
94
  )
101
95
  nonval = set(result_db.non_validated).difference(result_db.synonyms_mapper.keys())
102
96
 
103
- if len(nonval) > 0 and registry.__get_module_name__() == "bionty":
97
+ if len(nonval) > 0 and hasattr(registry, "source_id"):
104
98
  try:
105
- bionty_result = registry.public(organism=organism, source=source).inspect(
106
- values=nonval, field=field, mute=True, inspect_synonyms=inspect_synonyms
99
+ public_result = registry.public(
100
+ organism=organism_record, source=source
101
+ ).inspect(
102
+ values=nonval,
103
+ field=field_str,
104
+ mute=True,
105
+ inspect_synonyms=inspect_synonyms,
107
106
  )
108
- bionty_validated = bionty_result.validated
109
- bionty_mapper = bionty_result.synonyms_mapper
107
+ public_validated = public_result.validated
108
+ public_mapper = public_result.synonyms_mapper
110
109
  hint = False
111
- if len(bionty_validated) > 0 and not mute:
112
- print_values = _format_values(bionty_validated)
113
- s = "" if len(bionty_validated) == 1 else "s"
114
- labels = colors.yellow(f"{len(bionty_validated)} {model_name} term{s}")
110
+ if len(public_validated) > 0 and not mute:
111
+ print_values = _format_values(public_validated)
112
+ s = "" if len(public_validated) == 1 else "s"
113
+ labels = colors.yellow(f"{len(public_validated)} {model_name} term{s}")
115
114
  logger.print(
116
- f" detected {labels} in Bionty for"
117
- f" {colors.italic(field)}: {colors.yellow(print_values)}"
115
+ f" detected {labels} in public source for"
116
+ f" {colors.italic(field_str)}: {colors.yellow(print_values)}"
118
117
  )
119
118
  hint = True
120
119
 
121
- if len(bionty_mapper) > 0 and not mute:
122
- print_values = _format_values(list(bionty_mapper.keys()))
123
- s = "" if len(bionty_mapper) == 1 else "s"
124
- labels = colors.yellow(f"{len(bionty_mapper)} {model_name} term{s}")
120
+ if len(public_mapper) > 0 and not mute:
121
+ print_values = _format_values(list(public_mapper.keys()))
122
+ s = "" if len(public_mapper) == 1 else "s"
123
+ labels = colors.yellow(f"{len(public_mapper)} {model_name} term{s}")
125
124
  logger.print(
126
- f" detected {labels} in Bionty as {colors.italic(f'synonym{s}')}:"
125
+ f" detected {labels} in public source as {colors.italic(f'synonym{s}')}:"
127
126
  f" {colors.yellow(print_values)}"
128
127
  )
129
128
  hint = True
130
129
 
131
130
  if hint:
132
131
  logger.print(
133
- f"→ add records from Bionty to your {model_name} registry via"
132
+ f"→ add records from public source to your {model_name} registry via"
134
133
  f" {colors.italic('.from_values()')}"
135
134
  )
136
135
 
137
- nonval = [i for i in bionty_result.non_validated if i not in bionty_mapper] # type: ignore
138
- # no bionty source is found
136
+ nonval = [i for i in public_result.non_validated if i not in public_mapper] # type: ignore
137
+ # no public source is found
139
138
  except ValueError:
140
- logger.warning("no Bionty source found, skipping Bionty validation")
139
+ logger.warning("no public source found, skipping source validation")
141
140
 
142
141
  if len(nonval) > 0 and not mute:
143
142
  print_values = _format_values(list(nonval))
@@ -155,10 +154,9 @@ def _inspect(
155
154
  def _validate(
156
155
  cls,
157
156
  values: ListLike,
158
- field: str | StrField | None = None,
157
+ field: StrField | None = None,
159
158
  *,
160
159
  mute: bool = False,
161
- using_key: str | None = None,
162
160
  organism: str | Record | None = None,
163
161
  source: Record | None = None,
164
162
  strict_source: bool = False,
@@ -167,25 +165,26 @@ def _validate(
167
165
  from lamin_utils._inspect import validate
168
166
 
169
167
  return_str = True if isinstance(values, str) else False
170
- if isinstance(values, str):
171
- values = [values]
172
168
  values = _concat_lists(values)
173
169
 
174
- field = get_name_field(cls, field=field)
170
+ field_str = get_name_field(cls, field=field)
175
171
 
176
- queryset = _queryset(cls, using_key)
177
- using_key = queryset.db
172
+ queryset = cls.all() if isinstance(cls, (QuerySet, Manager)) else cls.objects.all()
173
+ registry = queryset.model
178
174
  if isinstance(source, Record):
179
- _check_source_db(source, using_key)
175
+ _check_if_record_in_db(source, queryset.db)
180
176
  if strict_source:
181
177
  queryset = queryset.filter(source=source)
182
- _check_organism_db(organism, using_key)
178
+
179
+ organism_record = get_organism_record_from_field(
180
+ getattr(registry, field_str), organism, values, queryset.db
181
+ )
182
+ _check_if_record_in_db(organism_record, queryset.db)
183
183
  field_values = pd.Series(
184
- _filter_query_based_on_organism(
184
+ _filter_queryset_with_organism(
185
185
  queryset=queryset,
186
- field=field,
187
- organism=organism,
188
- values_list_field=field,
186
+ organism=organism_record,
187
+ values_list_field=field_str,
189
188
  ),
190
189
  dtype="object",
191
190
  )
@@ -204,7 +203,7 @@ def _validate(
204
203
  field_values=field_values,
205
204
  case_sensitive=True,
206
205
  mute=mute,
207
- field=field,
206
+ field=field_str,
208
207
  )
209
208
  if return_str and len(result) == 1:
210
209
  return result[0]
@@ -215,16 +214,15 @@ def _validate(
215
214
  def _standardize(
216
215
  cls,
217
216
  values: ListLike,
218
- field: str | StrField | None = None,
217
+ field: StrField | None = None,
219
218
  *,
220
219
  return_field: str = None,
221
220
  return_mapper: bool = False,
222
221
  case_sensitive: bool = False,
223
222
  mute: bool = False,
224
- public_aware: bool = True,
223
+ source_aware: bool = True,
225
224
  keep: Literal["first", "last", False] = "first",
226
225
  synonyms_field: str = "synonyms",
227
- using_key: str | None = None,
228
226
  organism: str | Record | None = None,
229
227
  source: Record | None = None,
230
228
  strict_source: bool = False,
@@ -233,59 +231,45 @@ def _standardize(
233
231
  from lamin_utils._standardize import standardize as map_synonyms
234
232
 
235
233
  return_str = True if isinstance(values, str) else False
236
- if isinstance(values, str):
237
- values = [values]
238
234
  values = _concat_lists(values)
239
235
 
240
- field = get_name_field(cls, field=field)
241
- return_field = get_name_field(
236
+ field_str = get_name_field(cls, field=field)
237
+ return_field_str = get_name_field(
242
238
  cls, field=field if return_field is None else return_field
243
239
  )
244
- queryset = _queryset(cls, using_key)
245
- using_key = queryset.db
240
+ queryset = cls.all() if isinstance(cls, (QuerySet, Manager)) else cls.objects.all()
241
+ registry = queryset.model
246
242
  if isinstance(source, Record):
247
- _check_source_db(source, using_key)
243
+ _check_if_record_in_db(source, queryset.db)
248
244
  if strict_source:
249
245
  queryset = queryset.filter(source=source)
250
- _check_organism_db(organism, using_key)
251
- registry = queryset.model
252
-
253
- if _has_organism_field(registry):
254
- # here, we can safely import bionty
255
- from bionty._bionty import create_or_get_organism_record
256
-
257
- organism_record = create_or_get_organism_record(
258
- organism=organism, registry=registry, field=field
259
- )
260
- organism = (
261
- organism_record.name if organism_record is not None else organism_record
262
- )
263
- else:
264
- organism = None
246
+ organism_record = get_organism_record_from_field(
247
+ getattr(registry, field_str), organism, values, queryset.db
248
+ )
249
+ _check_if_record_in_db(organism_record, queryset.db)
265
250
 
266
251
  # only perform synonym mapping if field is the name field
267
- if hasattr(registry, "_name_field") and field != registry._name_field:
252
+ if hasattr(registry, "_name_field") and field_str != registry._name_field:
268
253
  synonyms_field = None
269
254
 
270
255
  try:
271
256
  registry._meta.get_field(synonyms_field)
272
257
  fields = {
273
258
  field_name
274
- for field_name in [field, return_field, synonyms_field]
259
+ for field_name in [field_str, return_field_str, synonyms_field]
275
260
  if field_name is not None
276
261
  }
277
- df = _filter_query_based_on_organism(
262
+ df = _filter_queryset_with_organism(
278
263
  queryset=queryset,
279
- field=field,
280
- organism=organism,
281
- fields=list(fields),
264
+ organism=organism_record,
265
+ values_list_fields=list(fields),
282
266
  )
283
267
  except FieldDoesNotExist:
284
268
  df = pd.DataFrame()
285
269
 
286
270
  _kwargs = {
287
- "field": field,
288
- "return_field": return_field,
271
+ "field": field_str,
272
+ "return_field": return_field_str,
289
273
  "case_sensitive": case_sensitive,
290
274
  "keep": keep,
291
275
  "synonyms_field": synonyms_field,
@@ -307,8 +291,8 @@ def _standardize(
307
291
  return result[0]
308
292
  return result
309
293
 
310
- # map synonyms in Bionty
311
- if registry.__get_module_name__() == "bionty" and public_aware:
294
+ # map synonyms in public source
295
+ if hasattr(registry, "source_id") and source_aware:
312
296
  mapper = {}
313
297
  if return_mapper:
314
298
  mapper = std_names_db
@@ -317,19 +301,19 @@ def _standardize(
317
301
  )
318
302
 
319
303
  val_res = registry.validate(
320
- std_names_db, field=field, mute=True, organism=organism
304
+ std_names_db, field=field, mute=True, organism=organism_record
321
305
  )
322
306
  if all(val_res):
323
307
  return _return(result=std_names_db, mapper=mapper)
324
308
 
325
309
  nonval = np.array(std_names_db)[~val_res]
326
- std_names_bt_mapper = registry.public(organism=organism).standardize(
310
+ std_names_bt_mapper = registry.public(organism=organism_record).standardize(
327
311
  nonval, return_mapper=True, mute=True, **_kwargs
328
312
  )
329
313
 
330
314
  if len(std_names_bt_mapper) > 0 and not mute:
331
315
  s = "" if len(std_names_bt_mapper) == 1 else "s"
332
- field_print = "synonym" if field == return_field else field
316
+ field_print = "synonym" if field_str == return_field_str else field_str
333
317
 
334
318
  reduced_mapped_keys_str = f"{list(std_names_bt_mapper.keys())[:10] + ['...'] if len(std_names_bt_mapper) > 10 else list(std_names_bt_mapper.keys())}"
335
319
  truncated_note = (
@@ -337,7 +321,7 @@ def _standardize(
337
321
  )
338
322
 
339
323
  warn_msg = (
340
- f"found {len(std_names_bt_mapper)} {field_print}{s} in Bionty{truncated_note}:"
324
+ f"found {len(std_names_bt_mapper)} {field_print}{s} in public source{truncated_note}:"
341
325
  f" {reduced_mapped_keys_str}\n"
342
326
  f" please add corresponding {registry._meta.model.__name__} records via{truncated_note}:"
343
327
  f" `.from_values({reduced_mapped_keys_str})`"
@@ -437,57 +421,36 @@ def _add_or_remove_synonyms(
437
421
 
438
422
 
439
423
  def _check_synonyms_field_exist(record: CanCurate):
440
- try:
441
- record.__getattribute__("synonyms")
442
- except AttributeError:
424
+ """Check if synonyms field exists."""
425
+ if not hasattr(record, "synonyms"):
443
426
  raise NotImplementedError(
444
427
  f"No synonyms field found in table {record.__class__.__name__}!"
445
428
  ) from None
446
429
 
447
430
 
448
- def _filter_query_based_on_organism(
431
+ def _filter_queryset_with_organism(
449
432
  queryset: QuerySet,
450
- field: str,
451
- organism: str | Record | None = None,
433
+ organism: Record | None = None,
452
434
  values_list_field: str | None = None,
453
- fields: list[str] | None = None,
435
+ values_list_fields: list[str] | None = None,
454
436
  ):
455
437
  """Filter a queryset based on organism."""
456
438
  import pandas as pd
457
439
 
458
- registry = queryset.model
459
-
460
- if _has_organism_field(registry) and not _field_is_id(field, registry):
461
- # here, we can safely import bionty
462
- from bionty._bionty import create_or_get_organism_record
463
-
464
- organism_record = create_or_get_organism_record(
465
- organism=organism, registry=registry, field=field
466
- )
467
- if organism_record is not None:
468
- queryset = queryset.filter(organism__name=organism_record.name)
440
+ if organism is not None:
441
+ queryset = queryset.filter(organism=organism)
469
442
 
443
+ # values_list_field/s for better performance
470
444
  if values_list_field is None:
471
- if fields:
445
+ if values_list_fields:
472
446
  return pd.DataFrame.from_records(
473
- queryset.values_list(*fields), columns=fields
447
+ queryset.values_list(*values_list_fields), columns=values_list_fields
474
448
  )
475
449
  return pd.DataFrame.from_records(queryset.values())
476
-
477
450
  else:
478
451
  return queryset.values_list(values_list_field, flat=True)
479
452
 
480
453
 
481
- def _field_is_id(field: str, registry: type[Record]) -> bool:
482
- """Check if the field is an ontology ID."""
483
- if hasattr(registry, "_ontology_id_field"):
484
- if field == registry._ontology_id_field:
485
- return True
486
- if field.endswith("id"):
487
- return True
488
- return False
489
-
490
-
491
454
  class CanCurate:
492
455
  """Base class providing :class:`~lamindb.models.Record`-based validation."""
493
456
 
@@ -495,7 +458,7 @@ class CanCurate:
495
458
  def inspect(
496
459
  cls,
497
460
  values: ListLike,
498
- field: str | StrField | None = None,
461
+ field: StrField | None = None,
499
462
  *,
500
463
  mute: bool = False,
501
464
  organism: Union[str, Record, None] = None,
@@ -517,21 +480,23 @@ class CanCurate:
517
480
  strict_source: Determines the validation behavior against records in the registry.
518
481
  - If `False`, validation will include all records in the registry, ignoring the specified source.
519
482
  - If `True`, validation will only include records in the registry that are linked to the specified source.
520
- Note: this parameter won't affect validation against bionty/public sources.
483
+ Note: this parameter won't affect validation against public sources.
521
484
 
522
485
  See Also:
523
486
  :meth:`~lamindb.models.CanCurate.validate`
524
487
 
525
- Examples:
526
- >>> import bionty as bt
527
- >>> bt.settings.organism = "human"
528
- >>> ln.save(bt.Gene.from_values(["A1CF", "A1BG", "BRCA2"], field="symbol"))
529
- >>> gene_symbols = ["A1CF", "A1BG", "FANCD1", "FANCD20"]
530
- >>> result = bt.Gene.inspect(gene_symbols, field=bt.Gene.symbol)
531
- >>> result.validated
532
- ['A1CF', 'A1BG']
533
- >>> result.non_validated
534
- ['FANCD1', 'FANCD20']
488
+ Example::
489
+
490
+ import bionty as bt
491
+
492
+ # save some gene records
493
+ bt.Gene.from_values(["A1CF", "A1BG", "BRCA2"], field="symbol", organism="human").save()
494
+
495
+ # inspect gene symbols
496
+ gene_symbols = ["A1CF", "A1BG", "FANCD1", "FANCD20"]
497
+ result = bt.Gene.inspect(gene_symbols, field=bt.Gene.symbol, organism="human")
498
+ assert result.validated == ["A1CF", "A1BG"]
499
+ assert result.non_validated == ["FANCD1", "FANCD20"]
535
500
  """
536
501
  return _inspect(
537
502
  cls=cls,
@@ -547,7 +512,7 @@ class CanCurate:
547
512
  def validate(
548
513
  cls,
549
514
  values: ListLike,
550
- field: str | StrField | None = None,
515
+ field: StrField | None = None,
551
516
  *,
552
517
  mute: bool = False,
553
518
  organism: Union[str, Record, None] = None,
@@ -569,7 +534,7 @@ class CanCurate:
569
534
  strict_source: Determines the validation behavior against records in the registry.
570
535
  - If `False`, validation will include all records in the registry, ignoring the specified source.
571
536
  - If `True`, validation will only include records in the registry that are linked to the specified source.
572
- Note: this parameter won't affect validation against bionty/public sources.
537
+ Note: this parameter won't affect validation against public sources.
573
538
 
574
539
  Returns:
575
540
  A vector of booleans indicating if an element is validated.
@@ -577,13 +542,15 @@ class CanCurate:
577
542
  See Also:
578
543
  :meth:`~lamindb.models.CanCurate.inspect`
579
544
 
580
- Examples:
581
- >>> import bionty as bt
582
- >>> bt.settings.organism = "human"
583
- >>> ln.save(bt.Gene.from_values(["A1CF", "A1BG", "BRCA2"], field="symbol"))
584
- >>> gene_symbols = ["A1CF", "A1BG", "FANCD1", "FANCD20"]
585
- >>> bt.Gene.validate(gene_symbols, field=bt.Gene.symbol)
586
- array([ True, True, False, False])
545
+ Example::
546
+
547
+ import bionty as bt
548
+
549
+ bt.Gene.from_values(["A1CF", "A1BG", "BRCA2"], field="symbol", organism="human").save()
550
+
551
+ gene_symbols = ["A1CF", "A1BG", "FANCD1", "FANCD20"]
552
+ bt.Gene.validate(gene_symbols, field=bt.Gene.symbol, organism="human")
553
+ #> array([ True, True, False, False])
587
554
  """
588
555
  return _validate(
589
556
  cls=cls,
@@ -622,33 +589,25 @@ class CanCurate:
622
589
  Notes:
623
590
  For more info, see tutorial: :doc:`docs:bio-registries`.
624
591
 
625
- Examples:
592
+ Example::
626
593
 
627
- Bulk create from non-validated values will log warnings & returns empty list:
594
+ import bionty as bt
628
595
 
629
- >>> ulabels = ln.ULabel.from_values(["benchmark", "prediction", "test"], field="name")
630
- >>> assert len(ulabels) == 0
596
+ # Bulk create from non-validated values will log warnings & returns empty list
597
+ ulabels = ln.ULabel.from_values(["benchmark", "prediction", "test"])
598
+ assert len(ulabels) == 0
631
599
 
632
- Bulk create records from validated values returns the corresponding existing records:
600
+ # Bulk create records from validated values returns the corresponding existing records
601
+ ulabels = ln.ULabel.from_values(["benchmark", "prediction", "test"], create=True).save()
602
+ assert len(ulabels) == 3
633
603
 
634
- >>> ln.save([ln.ULabel(name=name) for name in ["benchmark", "prediction", "test"]])
635
- >>> ulabels = ln.ULabel.from_values(["benchmark", "prediction", "test"], field="name")
636
- >>> assert len(ulabels) == 3
637
-
638
- Bulk create records from public reference:
639
-
640
- >>> import bionty as bt
641
- >>> records = bt.CellType.from_values(["T cell", "B cell"], field="name")
642
- >>> records
604
+ # Bulk create records from public reference
605
+ bt.CellType.from_values(["T cell", "B cell"]).save()
643
606
  """
644
- from_source = True if cls.__module__.startswith("bionty.") else False
645
-
646
- field_str = get_name_field(cls, field=field)
647
- return get_or_create_records(
607
+ return _from_values(
648
608
  iterable=values,
649
- field=getattr(cls, field_str),
609
+ field=getattr(cls, get_name_field(cls, field=field)),
650
610
  create=create,
651
- from_source=from_source,
652
611
  organism=organism,
653
612
  source=source,
654
613
  mute=mute,
@@ -658,13 +617,13 @@ class CanCurate:
658
617
  def standardize(
659
618
  cls,
660
619
  values: Iterable,
661
- field: str | StrField | None = None,
620
+ field: StrField | None = None,
662
621
  *,
663
- return_field: str | StrField | None = None,
622
+ return_field: StrField | None = None,
664
623
  return_mapper: bool = False,
665
624
  case_sensitive: bool = False,
666
625
  mute: bool = False,
667
- public_aware: bool = True,
626
+ source_aware: bool = True,
668
627
  keep: Literal["first", "last", False] = "first",
669
628
  synonyms_field: str = "synonyms",
670
629
  organism: Union[str, Record, None] = None,
@@ -680,22 +639,22 @@ class CanCurate:
680
639
  return_mapper: If `True`, returns `{input_value: standardized_name}`.
681
640
  case_sensitive: Whether the mapping is case sensitive.
682
641
  mute: Whether to mute logging.
683
- public_aware: Whether to standardize from Bionty reference. Defaults to `True` for Bionty registries.
642
+ source_aware: Whether to standardize from public source. Defaults to `True` for BioRecord registries.
684
643
  keep: When a synonym maps to multiple names, determines which duplicates to mark as `pd.DataFrame.duplicated`:
685
- - `"first"`: returns the first mapped standardized name
686
- - `"last"`: returns the last mapped standardized name
687
- - `False`: returns all mapped standardized name.
644
+ - `"first"`: returns the first mapped standardized name
645
+ - `"last"`: returns the last mapped standardized name
646
+ - `False`: returns all mapped standardized name.
688
647
 
689
- When `keep` is `False`, the returned list of standardized names will contain nested lists in case of duplicates.
648
+ When `keep` is `False`, the returned list of standardized names will contain nested lists in case of duplicates.
690
649
 
691
- When a field is converted into return_field, keep marks which matches to keep when multiple return_field values map to the same field value.
650
+ When a field is converted into return_field, keep marks which matches to keep when multiple return_field values map to the same field value.
692
651
  synonyms_field: A field containing the concatenated synonyms.
693
652
  organism: An Organism name or record.
694
653
  source: A `bionty.Source` record that specifies the version to validate against.
695
654
  strict_source: Determines the validation behavior against records in the registry.
696
655
  - If `False`, validation will include all records in the registry, ignoring the specified source.
697
656
  - If `True`, validation will only include records in the registry that are linked to the specified source.
698
- Note: this parameter won't affect validation against bionty/public sources.
657
+ Note: this parameter won't affect validation against public sources.
699
658
 
700
659
  Returns:
701
660
  If `return_mapper` is `False`: a list of standardized names. Otherwise,
@@ -708,14 +667,17 @@ class CanCurate:
708
667
  :meth:`~lamindb.models.CanCurate.remove_synonym`
709
668
  Remove synonyms.
710
669
 
711
- Examples:
712
- >>> import bionty as bt
713
- >>> bt.settings.organism = "human"
714
- >>> ln.save(bt.Gene.from_values(["A1CF", "A1BG", "BRCA2"], field="symbol"))
715
- >>> gene_synonyms = ["A1CF", "A1BG", "FANCD1", "FANCD20"]
716
- >>> standardized_names = bt.Gene.standardize(gene_synonyms)
717
- >>> standardized_names
718
- ['A1CF', 'A1BG', 'BRCA2', 'FANCD20']
670
+ Example::
671
+
672
+ import bionty as bt
673
+
674
+ # save some gene records
675
+ bt.Gene.from_values(["A1CF", "A1BG", "BRCA2"], field="symbol", organism="human").save()
676
+
677
+ # standardize gene synonyms
678
+ gene_synonyms = ["A1CF", "A1BG", "FANCD1", "FANCD20"]
679
+ bt.Gene.standardize(gene_synonyms)
680
+ #> ['A1CF', 'A1BG', 'BRCA2', 'FANCD20']
719
681
  """
720
682
  return _standardize(
721
683
  cls=cls,
@@ -726,7 +688,7 @@ class CanCurate:
726
688
  case_sensitive=case_sensitive,
727
689
  mute=mute,
728
690
  strict_source=strict_source,
729
- public_aware=public_aware,
691
+ source_aware=source_aware,
730
692
  keep=keep,
731
693
  synonyms_field=synonyms_field,
732
694
  organism=organism,
@@ -750,16 +712,19 @@ class CanCurate:
750
712
  :meth:`~lamindb.models.CanCurate.remove_synonym`
751
713
  Remove synonyms.
752
714
 
753
- Examples:
754
- >>> import bionty as bt
755
- >>> bt.CellType.from_source(name="T cell").save()
756
- >>> lookup = bt.CellType.lookup()
757
- >>> record = lookup.t_cell
758
- >>> record.synonyms
759
- 'T-cell|T lymphocyte|T-lymphocyte'
760
- >>> record.add_synonym("T cells")
761
- >>> record.synonyms
762
- 'T cells|T-cell|T-lymphocyte|T lymphocyte'
715
+ Example::
716
+
717
+ import bionty as bt
718
+
719
+ # save "T cell" record
720
+ record = bt.CellType.from_source(name="T cell").save()
721
+ record.synonyms
722
+ #> "T-cell|T lymphocyte|T-lymphocyte"
723
+
724
+ # add a synonym
725
+ record.add_synonym("T cells")
726
+ record.synonyms
727
+ #> "T cells|T-cell|T-lymphocyte|T lymphocyte"
763
728
  """
764
729
  _check_synonyms_field_exist(self)
765
730
  _add_or_remove_synonyms(
@@ -776,15 +741,19 @@ class CanCurate:
776
741
  :meth:`~lamindb.models.CanCurate.add_synonym`
777
742
  Add synonyms
778
743
 
779
- Examples:
780
- >>> import bionty as bt
781
- >>> bt.CellType.from_source(name="T cell").save()
782
- >>> lookup = bt.CellType.lookup()
783
- >>> record = lookup.t_cell
784
- >>> record.synonyms
785
- 'T-cell|T lymphocyte|T-lymphocyte'
786
- >>> record.remove_synonym("T-cell")
787
- 'T lymphocyte|T-lymphocyte'
744
+ Example::
745
+
746
+ import bionty as bt
747
+
748
+ # save "T cell" record
749
+ record = bt.CellType.from_source(name="T cell").save()
750
+ record.synonyms
751
+ #> "T-cell|T lymphocyte|T-lymphocyte"
752
+
753
+ # remove a synonym
754
+ record.remove_synonym("T-cell")
755
+ record.synonyms
756
+ #> "T lymphocyte|T-lymphocyte"
788
757
  """
789
758
  _check_synonyms_field_exist(self)
790
759
  _add_or_remove_synonyms(synonym=synonym, record=self, action="remove")
@@ -798,20 +767,20 @@ class CanCurate:
798
767
  See Also:
799
768
  :meth:`~lamindb.models.CanCurate.add_synonym`
800
769
 
801
- Examples:
802
- >>> import bionty as bt
803
- >>> bt.ExperimentalFactor.from_source(name="single-cell RNA sequencing").save()
804
- >>> scrna = bt.ExperimentalFactor.get(name="single-cell RNA sequencing")
805
- >>> scrna.abbr
806
- None
807
- >>> scrna.synonyms
808
- 'single-cell RNA-seq|single-cell transcriptome sequencing|scRNA-seq|single cell RNA sequencing'
809
- >>> scrna.set_abbr("scRNA")
810
- >>> scrna.abbr
811
- 'scRNA'
812
- >>> scrna.synonyms
813
- 'scRNA|single-cell RNA-seq|single cell RNA sequencing|single-cell transcriptome sequencing|scRNA-seq'
814
- >>> scrna.save()
770
+ Example::
771
+
772
+ import bionty as bt
773
+
774
+ # save an experimental factor record
775
+ scrna = bt.ExperimentalFactor.from_source(name="single-cell RNA sequencing").save()
776
+ assert scrna.abbr is None
777
+ assert scrna.synonyms == "single-cell RNA-seq|single-cell transcriptome sequencing|scRNA-seq|single cell RNA sequencing"
778
+
779
+ # set abbreviation
780
+ scrna.set_abbr("scRNA")
781
+ assert scrna.abbr == "scRNA"
782
+ # synonyms are updated
783
+ assert scrna.synonyms == "scRNA|single-cell RNA-seq|single cell RNA sequencing|single-cell transcriptome sequencing|scRNA-seq"
815
784
  """
816
785
  self.abbr = value
817
786