lamindb 0.76.7__py3-none-any.whl → 0.76.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +113 -113
- lamindb/_artifact.py +1205 -1178
- lamindb/_can_validate.py +579 -579
- lamindb/_collection.py +387 -387
- lamindb/_curate.py +1601 -1601
- lamindb/_feature.py +155 -155
- lamindb/_feature_set.py +242 -242
- lamindb/_filter.py +23 -23
- lamindb/_finish.py +256 -256
- lamindb/_from_values.py +382 -382
- lamindb/_is_versioned.py +40 -40
- lamindb/_parents.py +476 -476
- lamindb/_query_manager.py +125 -125
- lamindb/_query_set.py +362 -362
- lamindb/_record.py +649 -649
- lamindb/_run.py +57 -57
- lamindb/_save.py +308 -295
- lamindb/_storage.py +14 -14
- lamindb/_transform.py +127 -127
- lamindb/_ulabel.py +56 -56
- lamindb/_utils.py +9 -9
- lamindb/_view.py +72 -72
- lamindb/core/__init__.py +94 -94
- lamindb/core/_context.py +574 -574
- lamindb/core/_data.py +438 -438
- lamindb/core/_feature_manager.py +867 -867
- lamindb/core/_label_manager.py +253 -253
- lamindb/core/_mapped_collection.py +597 -597
- lamindb/core/_settings.py +187 -187
- lamindb/core/_sync_git.py +138 -138
- lamindb/core/_track_environment.py +27 -27
- lamindb/core/datasets/__init__.py +59 -59
- lamindb/core/datasets/_core.py +571 -571
- lamindb/core/datasets/_fake.py +36 -36
- lamindb/core/exceptions.py +90 -77
- lamindb/core/fields.py +12 -12
- lamindb/core/loaders.py +164 -164
- lamindb/core/schema.py +56 -56
- lamindb/core/storage/__init__.py +25 -25
- lamindb/core/storage/_anndata_accessor.py +740 -740
- lamindb/core/storage/_anndata_sizes.py +41 -41
- lamindb/core/storage/_backed_access.py +98 -98
- lamindb/core/storage/_tiledbsoma.py +204 -204
- lamindb/core/storage/_valid_suffixes.py +21 -21
- lamindb/core/storage/_zarr.py +110 -110
- lamindb/core/storage/objects.py +62 -62
- lamindb/core/storage/paths.py +172 -141
- lamindb/core/subsettings/__init__.py +12 -12
- lamindb/core/subsettings/_creation_settings.py +38 -38
- lamindb/core/subsettings/_transform_settings.py +21 -21
- lamindb/core/types.py +19 -19
- lamindb/core/versioning.py +158 -158
- lamindb/integrations/__init__.py +12 -12
- lamindb/integrations/_vitessce.py +107 -107
- lamindb/setup/__init__.py +14 -14
- lamindb/setup/core/__init__.py +4 -4
- {lamindb-0.76.7.dist-info → lamindb-0.76.8.dist-info}/LICENSE +201 -201
- {lamindb-0.76.7.dist-info → lamindb-0.76.8.dist-info}/METADATA +3 -3
- lamindb-0.76.8.dist-info/RECORD +60 -0
- {lamindb-0.76.7.dist-info → lamindb-0.76.8.dist-info}/WHEEL +1 -1
- lamindb-0.76.7.dist-info/RECORD +0 -60
lamindb/_can_validate.py
CHANGED
@@ -1,579 +1,579 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
from typing import TYPE_CHECKING, Literal
|
4
|
-
|
5
|
-
import lamindb_setup as ln_setup
|
6
|
-
import numpy as np
|
7
|
-
import pandas as pd
|
8
|
-
from django.core.exceptions import FieldDoesNotExist
|
9
|
-
from lamin_utils import colors, logger
|
10
|
-
from lamindb_setup.core._docs import doc_args
|
11
|
-
from lnschema_core import CanValidate, Record
|
12
|
-
|
13
|
-
from lamindb._utils import attach_func_to_class_method
|
14
|
-
|
15
|
-
from ._from_values import _has_organism_field, _print_values
|
16
|
-
from ._record import _queryset, get_name_field
|
17
|
-
|
18
|
-
if TYPE_CHECKING:
|
19
|
-
from django.db.models import QuerySet
|
20
|
-
from lamin_utils._inspect import InspectResult
|
21
|
-
from lnschema_core.types import ListLike, StrField
|
22
|
-
|
23
|
-
|
24
|
-
@classmethod # type: ignore
|
25
|
-
@doc_args(CanValidate.inspect.__doc__)
|
26
|
-
def inspect(
|
27
|
-
cls,
|
28
|
-
values: ListLike,
|
29
|
-
field: str | StrField | None = None,
|
30
|
-
*,
|
31
|
-
mute: bool = False,
|
32
|
-
organism: str | Record | None = None,
|
33
|
-
source: Record | None = None,
|
34
|
-
) -> InspectResult:
|
35
|
-
"""{}""" # noqa: D415
|
36
|
-
return _inspect(
|
37
|
-
cls=cls,
|
38
|
-
values=values,
|
39
|
-
field=field,
|
40
|
-
mute=mute,
|
41
|
-
organism=organism,
|
42
|
-
source=source,
|
43
|
-
)
|
44
|
-
|
45
|
-
|
46
|
-
@classmethod # type: ignore
|
47
|
-
@doc_args(CanValidate.validate.__doc__)
|
48
|
-
def validate(
|
49
|
-
cls,
|
50
|
-
values: ListLike,
|
51
|
-
field: str | StrField | None = None,
|
52
|
-
*,
|
53
|
-
mute: bool = False,
|
54
|
-
organism: str | Record | None = None,
|
55
|
-
source: Record | None = None,
|
56
|
-
) -> np.ndarray:
|
57
|
-
"""{}""" # noqa: D415
|
58
|
-
return _validate(
|
59
|
-
cls=cls, values=values, field=field, mute=mute, organism=organism, source=source
|
60
|
-
)
|
61
|
-
|
62
|
-
|
63
|
-
def _check_source_db(source: Record, using_key: str | None):
|
64
|
-
"""Check if the source is from the DB."""
|
65
|
-
if using_key is not None and using_key != "default":
|
66
|
-
if source._state.db != using_key:
|
67
|
-
raise ValueError(
|
68
|
-
f"source must be a bionty.Source record from instance '{using_key}'!"
|
69
|
-
)
|
70
|
-
|
71
|
-
|
72
|
-
def _check_organism_db(organism: Record, using_key: str | None):
|
73
|
-
"""Check if the organism is from the DB."""
|
74
|
-
if isinstance(organism, Record):
|
75
|
-
if using_key is not None and using_key != "default":
|
76
|
-
if organism._state.db != using_key:
|
77
|
-
raise ValueError(
|
78
|
-
f"organism must be a bionty.Organism record from instance '{using_key}'!"
|
79
|
-
)
|
80
|
-
|
81
|
-
|
82
|
-
def _concat_lists(values: ListLike) -> list[str]:
|
83
|
-
"""Concatenate a list of lists of strings into a single list."""
|
84
|
-
if len(values) > 0 and isinstance(values, (list, pd.Series)):
|
85
|
-
try:
|
86
|
-
if isinstance(values[0], list):
|
87
|
-
if isinstance(values, pd.Series):
|
88
|
-
values = values.tolist()
|
89
|
-
values = sum([v for v in values if isinstance(v, list)], [])
|
90
|
-
except KeyError:
|
91
|
-
pass
|
92
|
-
return values
|
93
|
-
|
94
|
-
|
95
|
-
def _inspect(
|
96
|
-
cls,
|
97
|
-
values: ListLike,
|
98
|
-
field: str | StrField | None = None,
|
99
|
-
*,
|
100
|
-
mute: bool = False,
|
101
|
-
using_key: str | None = None,
|
102
|
-
organism: str | Record | None = None,
|
103
|
-
source: Record | None = None,
|
104
|
-
) -> pd.DataFrame | dict[str, list[str]]:
|
105
|
-
"""{}""" # noqa: D415
|
106
|
-
from lamin_utils._inspect import inspect
|
107
|
-
|
108
|
-
if isinstance(values, str):
|
109
|
-
values = [values]
|
110
|
-
values = _concat_lists(values)
|
111
|
-
|
112
|
-
field = get_name_field(cls, field=field)
|
113
|
-
queryset = _queryset(cls, using_key)
|
114
|
-
using_key = queryset.db
|
115
|
-
if isinstance(source, Record):
|
116
|
-
_check_source_db(source, using_key)
|
117
|
-
queryset = queryset.filter(source=source).all()
|
118
|
-
_check_organism_db(organism, using_key)
|
119
|
-
registry = queryset.model
|
120
|
-
model_name = registry._meta.model.__name__
|
121
|
-
|
122
|
-
# inspect in the DB
|
123
|
-
result_db = inspect(
|
124
|
-
df=_filter_query_based_on_organism(
|
125
|
-
queryset=queryset, field=field, organism=organism
|
126
|
-
),
|
127
|
-
identifiers=values,
|
128
|
-
field=field,
|
129
|
-
mute=mute,
|
130
|
-
)
|
131
|
-
nonval = set(result_db.non_validated).difference(result_db.synonyms_mapper.keys())
|
132
|
-
|
133
|
-
if len(nonval) > 0 and registry.__get_schema_name__() == "bionty":
|
134
|
-
try:
|
135
|
-
bionty_result = registry.public(organism=organism, source=source).inspect(
|
136
|
-
values=nonval, field=field, mute=True
|
137
|
-
)
|
138
|
-
bionty_validated = bionty_result.validated
|
139
|
-
bionty_mapper = bionty_result.synonyms_mapper
|
140
|
-
hint = False
|
141
|
-
if len(bionty_validated) > 0 and not mute:
|
142
|
-
print_values = _print_values(bionty_validated)
|
143
|
-
s = "" if len(bionty_validated) == 1 else "s"
|
144
|
-
labels = colors.yellow(f"{len(bionty_validated)} {model_name} term{s}")
|
145
|
-
logger.print(
|
146
|
-
f" detected {labels} in Bionty for"
|
147
|
-
f" {colors.italic(field)}: {colors.yellow(print_values)}"
|
148
|
-
)
|
149
|
-
hint = True
|
150
|
-
|
151
|
-
if len(bionty_mapper) > 0 and not mute:
|
152
|
-
print_values = _print_values(list(bionty_mapper.keys()))
|
153
|
-
s = "" if len(bionty_mapper) == 1 else "s"
|
154
|
-
labels = colors.yellow(f"{len(bionty_mapper)} {model_name} term{s}")
|
155
|
-
logger.print(
|
156
|
-
f" detected {labels} in Bionty as {colors.italic(f'synonym{s}')}:"
|
157
|
-
f" {colors.yellow(print_values)}"
|
158
|
-
)
|
159
|
-
hint = True
|
160
|
-
|
161
|
-
if hint:
|
162
|
-
logger.print(
|
163
|
-
f"→ add records from Bionty to your {model_name} registry via"
|
164
|
-
f" {colors.italic('.from_values()')}"
|
165
|
-
)
|
166
|
-
|
167
|
-
nonval = bionty_result.non_validated
|
168
|
-
# no bionty source is found
|
169
|
-
except ValueError:
|
170
|
-
logger.warning("no Bionty source found, skipping Bionty validation")
|
171
|
-
|
172
|
-
if len(nonval) > 0 and not mute:
|
173
|
-
print_values = _print_values(list(nonval))
|
174
|
-
s = "" if len(nonval) == 1 else "s"
|
175
|
-
labels = colors.red(f"{len(nonval)} term{s}")
|
176
|
-
logger.print(f" couldn't validate {labels}: {colors.red(print_values)}")
|
177
|
-
logger.print(
|
178
|
-
f"→ if you are sure, create new record{s} via"
|
179
|
-
f" {colors.italic(f'{registry.__name__}()')} and save to your registry"
|
180
|
-
)
|
181
|
-
|
182
|
-
return result_db
|
183
|
-
|
184
|
-
|
185
|
-
def _validate(
|
186
|
-
cls,
|
187
|
-
values: ListLike,
|
188
|
-
field: str | StrField | None = None,
|
189
|
-
*,
|
190
|
-
mute: bool = False,
|
191
|
-
using_key: str | None = None,
|
192
|
-
organism: str | Record | None = None,
|
193
|
-
source: Record | None = None,
|
194
|
-
) -> np.ndarray:
|
195
|
-
"""{}""" # noqa: D415
|
196
|
-
from lamin_utils._inspect import validate
|
197
|
-
|
198
|
-
return_str = True if isinstance(values, str) else False
|
199
|
-
if isinstance(values, str):
|
200
|
-
values = [values]
|
201
|
-
values = _concat_lists(values)
|
202
|
-
|
203
|
-
field = get_name_field(cls, field=field)
|
204
|
-
|
205
|
-
queryset = _queryset(cls, using_key)
|
206
|
-
using_key = queryset.db
|
207
|
-
if isinstance(source, Record):
|
208
|
-
_check_source_db(source, using_key)
|
209
|
-
queryset = queryset.filter(source=source).all()
|
210
|
-
_check_organism_db(organism, using_key)
|
211
|
-
field_values = pd.Series(
|
212
|
-
_filter_query_based_on_organism(
|
213
|
-
queryset=queryset,
|
214
|
-
field=field,
|
215
|
-
organism=organism,
|
216
|
-
values_list_field=field,
|
217
|
-
),
|
218
|
-
dtype="object",
|
219
|
-
)
|
220
|
-
if field_values.empty:
|
221
|
-
if not mute:
|
222
|
-
msg = (
|
223
|
-
f"Your {cls.__name__} registry is empty, consider populating it first!"
|
224
|
-
)
|
225
|
-
if hasattr(cls, "source_id"):
|
226
|
-
msg += "\n → use `.import_from_source()` to import records from a source, e.g. a public ontology"
|
227
|
-
logger.warning(msg)
|
228
|
-
return np.array([False] * len(values))
|
229
|
-
|
230
|
-
result = validate(
|
231
|
-
identifiers=values,
|
232
|
-
field_values=field_values,
|
233
|
-
case_sensitive=True,
|
234
|
-
mute=mute,
|
235
|
-
field=field,
|
236
|
-
)
|
237
|
-
if return_str and len(result) == 1:
|
238
|
-
return result[0]
|
239
|
-
else:
|
240
|
-
return result
|
241
|
-
|
242
|
-
|
243
|
-
@classmethod # type: ignore
|
244
|
-
@doc_args(CanValidate.standardize.__doc__)
|
245
|
-
def standardize(
|
246
|
-
cls,
|
247
|
-
values: ListLike,
|
248
|
-
field: str | StrField | None = None,
|
249
|
-
*,
|
250
|
-
return_field: str = None,
|
251
|
-
return_mapper: bool = False,
|
252
|
-
case_sensitive: bool = False,
|
253
|
-
mute: bool = False,
|
254
|
-
public_aware: bool = True,
|
255
|
-
keep: Literal["first", "last", False] = "first",
|
256
|
-
synonyms_field: str = "synonyms",
|
257
|
-
organism: str | Record | None = None,
|
258
|
-
source: Record | None = None,
|
259
|
-
) -> list[str] | dict[str, str]:
|
260
|
-
"""{}""" # noqa: D415
|
261
|
-
return _standardize(
|
262
|
-
cls=cls,
|
263
|
-
values=values,
|
264
|
-
field=field,
|
265
|
-
return_field=return_field,
|
266
|
-
return_mapper=return_mapper,
|
267
|
-
case_sensitive=case_sensitive,
|
268
|
-
mute=mute,
|
269
|
-
public_aware=public_aware,
|
270
|
-
keep=keep,
|
271
|
-
synonyms_field=synonyms_field,
|
272
|
-
organism=organism,
|
273
|
-
source=source,
|
274
|
-
)
|
275
|
-
|
276
|
-
|
277
|
-
def set_abbr(self, value: str):
|
278
|
-
self.abbr = value
|
279
|
-
|
280
|
-
if hasattr(self, "name") and value == self.name:
|
281
|
-
pass
|
282
|
-
else:
|
283
|
-
try:
|
284
|
-
self.add_synonym(value, save=False)
|
285
|
-
except Exception as e: # pragma: no cover
|
286
|
-
logger.debug(
|
287
|
-
f"Encountered an Exception while attempting to add synonyms.\n{e}"
|
288
|
-
)
|
289
|
-
|
290
|
-
if not self._state.adding:
|
291
|
-
self.save()
|
292
|
-
|
293
|
-
|
294
|
-
def add_synonym(
|
295
|
-
self,
|
296
|
-
synonym: str | ListLike,
|
297
|
-
force: bool = False,
|
298
|
-
save: bool | None = None,
|
299
|
-
):
|
300
|
-
_check_synonyms_field_exist(self)
|
301
|
-
_add_or_remove_synonyms(
|
302
|
-
synonym=synonym, record=self, force=force, action="add", save=save
|
303
|
-
)
|
304
|
-
|
305
|
-
|
306
|
-
def remove_synonym(self, synonym: str | ListLike):
|
307
|
-
_check_synonyms_field_exist(self)
|
308
|
-
_add_or_remove_synonyms(synonym=synonym, record=self, action="remove")
|
309
|
-
|
310
|
-
|
311
|
-
def _standardize(
|
312
|
-
cls,
|
313
|
-
values: ListLike,
|
314
|
-
field: str | StrField | None = None,
|
315
|
-
*,
|
316
|
-
return_field: str = None,
|
317
|
-
return_mapper: bool = False,
|
318
|
-
case_sensitive: bool = False,
|
319
|
-
mute: bool = False,
|
320
|
-
public_aware: bool = True,
|
321
|
-
keep: Literal["first", "last", False] = "first",
|
322
|
-
synonyms_field: str = "synonyms",
|
323
|
-
using_key: str | None = None,
|
324
|
-
organism: str | Record | None = None,
|
325
|
-
source: Record | None = None,
|
326
|
-
) -> list[str] | dict[str, str]:
|
327
|
-
"""{}""" # noqa: D415
|
328
|
-
from lamin_utils._standardize import standardize as map_synonyms
|
329
|
-
|
330
|
-
return_str = True if isinstance(values, str) else False
|
331
|
-
if isinstance(values, str):
|
332
|
-
values = [values]
|
333
|
-
values = _concat_lists(values)
|
334
|
-
|
335
|
-
field = get_name_field(cls, field=field)
|
336
|
-
return_field = get_name_field(
|
337
|
-
cls, field=field if return_field is None else return_field
|
338
|
-
)
|
339
|
-
queryset = _queryset(cls, using_key)
|
340
|
-
using_key = queryset.db
|
341
|
-
if isinstance(source, Record):
|
342
|
-
_check_source_db(source, using_key)
|
343
|
-
queryset = queryset.filter(source=source).all()
|
344
|
-
_check_organism_db(organism, using_key)
|
345
|
-
registry = queryset.model
|
346
|
-
|
347
|
-
if _has_organism_field(registry):
|
348
|
-
# here, we can safely import bionty
|
349
|
-
from bionty._bionty import create_or_get_organism_record
|
350
|
-
|
351
|
-
organism_record = create_or_get_organism_record(
|
352
|
-
organism=organism, registry=registry
|
353
|
-
)
|
354
|
-
organism = (
|
355
|
-
organism_record.name if organism_record is not None else organism_record
|
356
|
-
)
|
357
|
-
|
358
|
-
try:
|
359
|
-
registry._meta.get_field(synonyms_field)
|
360
|
-
df = _filter_query_based_on_organism(
|
361
|
-
queryset=queryset, field=field, organism=organism
|
362
|
-
)
|
363
|
-
except FieldDoesNotExist:
|
364
|
-
df = pd.DataFrame()
|
365
|
-
|
366
|
-
_kwargs = {
|
367
|
-
"field": field,
|
368
|
-
"return_field": return_field,
|
369
|
-
"case_sensitive": case_sensitive,
|
370
|
-
"keep": keep,
|
371
|
-
"synonyms_field": synonyms_field,
|
372
|
-
}
|
373
|
-
# standardized names from the DB
|
374
|
-
std_names_db = map_synonyms(
|
375
|
-
df=df,
|
376
|
-
identifiers=values,
|
377
|
-
return_mapper=return_mapper,
|
378
|
-
mute=mute,
|
379
|
-
**_kwargs,
|
380
|
-
)
|
381
|
-
|
382
|
-
def _return(result: list, mapper: dict):
|
383
|
-
if return_mapper:
|
384
|
-
return mapper
|
385
|
-
else:
|
386
|
-
if return_str and len(result) == 1:
|
387
|
-
return result[0]
|
388
|
-
return result
|
389
|
-
|
390
|
-
# map synonyms in Bionty
|
391
|
-
if registry.__get_schema_name__() == "bionty" and public_aware:
|
392
|
-
mapper = {}
|
393
|
-
if return_mapper:
|
394
|
-
mapper = std_names_db
|
395
|
-
std_names_db = map_synonyms(
|
396
|
-
df=df, identifiers=values, return_mapper=False, mute=True, **_kwargs
|
397
|
-
)
|
398
|
-
|
399
|
-
val_res = registry.validate(
|
400
|
-
std_names_db, field=field, mute=True, organism=organism
|
401
|
-
)
|
402
|
-
if all(val_res):
|
403
|
-
return _return(result=std_names_db, mapper=mapper)
|
404
|
-
|
405
|
-
nonval = np.array(std_names_db)[~val_res]
|
406
|
-
std_names_bt_mapper = registry.public(organism=organism).standardize(
|
407
|
-
nonval, return_mapper=True, mute=True, **_kwargs
|
408
|
-
)
|
409
|
-
|
410
|
-
if len(std_names_bt_mapper) > 0 and not mute:
|
411
|
-
s = "" if len(std_names_bt_mapper) == 1 else "s"
|
412
|
-
field_print = "synonym" if field == return_field else field
|
413
|
-
warn_msg = (
|
414
|
-
f"found {len(std_names_bt_mapper)} {field_print}{s} in Bionty:"
|
415
|
-
f" {list(std_names_bt_mapper.keys())}"
|
416
|
-
)
|
417
|
-
warn_msg += (
|
418
|
-
f"\n please add corresponding {registry._meta.model.__name__} records via"
|
419
|
-
f" `.from_values({list(set(std_names_bt_mapper.values()))})`"
|
420
|
-
)
|
421
|
-
logger.warning(warn_msg)
|
422
|
-
|
423
|
-
mapper.update(std_names_bt_mapper)
|
424
|
-
if pd.api.types.is_categorical_dtype(std_names_db):
|
425
|
-
result = std_names_db.cat.rename_categories(std_names_bt_mapper).tolist()
|
426
|
-
else:
|
427
|
-
result = pd.Series(std_names_db).replace(std_names_bt_mapper).tolist()
|
428
|
-
return _return(result=result, mapper=mapper)
|
429
|
-
|
430
|
-
else:
|
431
|
-
return _return(result=std_names_db, mapper=std_names_db)
|
432
|
-
|
433
|
-
|
434
|
-
def _add_or_remove_synonyms(
|
435
|
-
synonym: str | ListLike,
|
436
|
-
record: Record,
|
437
|
-
action: Literal["add", "remove"],
|
438
|
-
force: bool = False,
|
439
|
-
save: bool | None = None,
|
440
|
-
):
|
441
|
-
"""Add or remove synonyms."""
|
442
|
-
|
443
|
-
def check_synonyms_in_all_records(synonyms: set[str], record: Record):
|
444
|
-
"""Errors if input synonym is associated with other records in the DB."""
|
445
|
-
import pandas as pd
|
446
|
-
from IPython.display import display
|
447
|
-
|
448
|
-
syns_all = (
|
449
|
-
record.__class__.objects.exclude(synonyms="").exclude(synonyms=None).all()
|
450
|
-
)
|
451
|
-
if len(syns_all) == 0:
|
452
|
-
return
|
453
|
-
df = pd.DataFrame(syns_all.values())
|
454
|
-
df["synonyms"] = df["synonyms"].str.split("|")
|
455
|
-
df = df.explode("synonyms")
|
456
|
-
matches_df = df[(df["synonyms"].isin(synonyms)) & (df["id"] != record.id)]
|
457
|
-
if matches_df.shape[0] > 0:
|
458
|
-
records_df = pd.DataFrame(syns_all.filter(id__in=matches_df["id"]).values())
|
459
|
-
logger.error(
|
460
|
-
f"input synonyms {matches_df['synonyms'].unique()} already associated"
|
461
|
-
" with the following records:\n"
|
462
|
-
)
|
463
|
-
display(records_df)
|
464
|
-
raise ValueError(
|
465
|
-
"cannot assigned a synonym that is already associated with a record to a different record.\n"
|
466
|
-
"Consider removing the synonym from existing records or using a different synonym."
|
467
|
-
)
|
468
|
-
|
469
|
-
# passed synonyms
|
470
|
-
# nothing happens when passing an empty string or list
|
471
|
-
if isinstance(synonym, str):
|
472
|
-
if len(synonym) == 0:
|
473
|
-
return
|
474
|
-
syn_new_set = {synonym}
|
475
|
-
else:
|
476
|
-
if synonym == [""]:
|
477
|
-
return
|
478
|
-
syn_new_set = set(synonym)
|
479
|
-
# nothing happens when passing an empty string or list
|
480
|
-
if len(syn_new_set) == 0:
|
481
|
-
return
|
482
|
-
# because we use | as the separator
|
483
|
-
if any("|" in i for i in syn_new_set):
|
484
|
-
raise ValueError("a synonym can't contain '|'!")
|
485
|
-
|
486
|
-
# existing synonyms
|
487
|
-
syns_exist = record.synonyms
|
488
|
-
if syns_exist is None or len(syns_exist) == 0:
|
489
|
-
syns_exist_set = set()
|
490
|
-
else:
|
491
|
-
syns_exist_set = set(syns_exist.split("|"))
|
492
|
-
|
493
|
-
if action == "add":
|
494
|
-
if not force:
|
495
|
-
check_synonyms_in_all_records(syn_new_set, record)
|
496
|
-
syns_exist_set.update(syn_new_set)
|
497
|
-
elif action == "remove":
|
498
|
-
syns_exist_set = syns_exist_set.difference(syn_new_set)
|
499
|
-
|
500
|
-
if len(syns_exist_set) == 0:
|
501
|
-
syns_str = None
|
502
|
-
else:
|
503
|
-
syns_str = "|".join(syns_exist_set)
|
504
|
-
|
505
|
-
record.synonyms = syns_str
|
506
|
-
|
507
|
-
if save is None:
|
508
|
-
# if record is already in DB, save the changes to DB
|
509
|
-
save = not record._state.adding
|
510
|
-
if save:
|
511
|
-
record.save()
|
512
|
-
|
513
|
-
|
514
|
-
def _check_synonyms_field_exist(record: Record):
|
515
|
-
try:
|
516
|
-
record.__getattribute__("synonyms")
|
517
|
-
except AttributeError:
|
518
|
-
raise NotImplementedError(
|
519
|
-
f"No synonyms field found in table {record.__class__.__name__}!"
|
520
|
-
) from None
|
521
|
-
|
522
|
-
|
523
|
-
def _filter_query_based_on_organism(
|
524
|
-
queryset: QuerySet,
|
525
|
-
field: str,
|
526
|
-
organism: str | Record | None = None,
|
527
|
-
values_list_field: str | None = None,
|
528
|
-
):
|
529
|
-
"""Filter a queryset based on organism."""
|
530
|
-
import pandas as pd
|
531
|
-
|
532
|
-
registry = queryset.model
|
533
|
-
|
534
|
-
if _has_organism_field(registry) and not _field_is_id(field, registry):
|
535
|
-
# here, we can safely import bionty
|
536
|
-
from bionty._bionty import create_or_get_organism_record
|
537
|
-
|
538
|
-
organism_record = create_or_get_organism_record(
|
539
|
-
organism=organism, registry=registry
|
540
|
-
)
|
541
|
-
if organism_record is not None:
|
542
|
-
queryset = queryset.filter(organism__name=organism_record.name)
|
543
|
-
|
544
|
-
if values_list_field is None:
|
545
|
-
return pd.DataFrame.from_records(queryset.values())
|
546
|
-
else:
|
547
|
-
return queryset.values_list(values_list_field, flat=True)
|
548
|
-
|
549
|
-
|
550
|
-
def _field_is_id(field: str, registry: type[Record]) -> bool:
|
551
|
-
"""Check if the field is an ontology ID."""
|
552
|
-
if hasattr(registry, "_ontology_id_field"):
|
553
|
-
if field == registry._ontology_id_field:
|
554
|
-
return True
|
555
|
-
if field.endswith("id"):
|
556
|
-
return True
|
557
|
-
return False
|
558
|
-
|
559
|
-
|
560
|
-
METHOD_NAMES = [
|
561
|
-
"validate",
|
562
|
-
"inspect",
|
563
|
-
"standardize",
|
564
|
-
"add_synonym",
|
565
|
-
"remove_synonym",
|
566
|
-
"set_abbr",
|
567
|
-
]
|
568
|
-
|
569
|
-
if ln_setup._TESTING: # type: ignore
|
570
|
-
from inspect import signature
|
571
|
-
|
572
|
-
SIGS = {
|
573
|
-
name: signature(getattr(CanValidate, name))
|
574
|
-
for name in METHOD_NAMES
|
575
|
-
if not name.startswith("__")
|
576
|
-
}
|
577
|
-
|
578
|
-
for name in METHOD_NAMES:
|
579
|
-
attach_func_to_class_method(name, CanValidate, globals())
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import TYPE_CHECKING, Literal
|
4
|
+
|
5
|
+
import lamindb_setup as ln_setup
|
6
|
+
import numpy as np
|
7
|
+
import pandas as pd
|
8
|
+
from django.core.exceptions import FieldDoesNotExist
|
9
|
+
from lamin_utils import colors, logger
|
10
|
+
from lamindb_setup.core._docs import doc_args
|
11
|
+
from lnschema_core import CanValidate, Record
|
12
|
+
|
13
|
+
from lamindb._utils import attach_func_to_class_method
|
14
|
+
|
15
|
+
from ._from_values import _has_organism_field, _print_values
|
16
|
+
from ._record import _queryset, get_name_field
|
17
|
+
|
18
|
+
if TYPE_CHECKING:
|
19
|
+
from django.db.models import QuerySet
|
20
|
+
from lamin_utils._inspect import InspectResult
|
21
|
+
from lnschema_core.types import ListLike, StrField
|
22
|
+
|
23
|
+
|
24
|
+
@classmethod # type: ignore
|
25
|
+
@doc_args(CanValidate.inspect.__doc__)
|
26
|
+
def inspect(
|
27
|
+
cls,
|
28
|
+
values: ListLike,
|
29
|
+
field: str | StrField | None = None,
|
30
|
+
*,
|
31
|
+
mute: bool = False,
|
32
|
+
organism: str | Record | None = None,
|
33
|
+
source: Record | None = None,
|
34
|
+
) -> InspectResult:
|
35
|
+
"""{}""" # noqa: D415
|
36
|
+
return _inspect(
|
37
|
+
cls=cls,
|
38
|
+
values=values,
|
39
|
+
field=field,
|
40
|
+
mute=mute,
|
41
|
+
organism=organism,
|
42
|
+
source=source,
|
43
|
+
)
|
44
|
+
|
45
|
+
|
46
|
+
@classmethod # type: ignore
|
47
|
+
@doc_args(CanValidate.validate.__doc__)
|
48
|
+
def validate(
|
49
|
+
cls,
|
50
|
+
values: ListLike,
|
51
|
+
field: str | StrField | None = None,
|
52
|
+
*,
|
53
|
+
mute: bool = False,
|
54
|
+
organism: str | Record | None = None,
|
55
|
+
source: Record | None = None,
|
56
|
+
) -> np.ndarray:
|
57
|
+
"""{}""" # noqa: D415
|
58
|
+
return _validate(
|
59
|
+
cls=cls, values=values, field=field, mute=mute, organism=organism, source=source
|
60
|
+
)
|
61
|
+
|
62
|
+
|
63
|
+
def _check_source_db(source: Record, using_key: str | None):
|
64
|
+
"""Check if the source is from the DB."""
|
65
|
+
if using_key is not None and using_key != "default":
|
66
|
+
if source._state.db != using_key:
|
67
|
+
raise ValueError(
|
68
|
+
f"source must be a bionty.Source record from instance '{using_key}'!"
|
69
|
+
)
|
70
|
+
|
71
|
+
|
72
|
+
def _check_organism_db(organism: Record, using_key: str | None):
|
73
|
+
"""Check if the organism is from the DB."""
|
74
|
+
if isinstance(organism, Record):
|
75
|
+
if using_key is not None and using_key != "default":
|
76
|
+
if organism._state.db != using_key:
|
77
|
+
raise ValueError(
|
78
|
+
f"organism must be a bionty.Organism record from instance '{using_key}'!"
|
79
|
+
)
|
80
|
+
|
81
|
+
|
82
|
+
def _concat_lists(values: ListLike) -> list[str]:
|
83
|
+
"""Concatenate a list of lists of strings into a single list."""
|
84
|
+
if len(values) > 0 and isinstance(values, (list, pd.Series)):
|
85
|
+
try:
|
86
|
+
if isinstance(values[0], list):
|
87
|
+
if isinstance(values, pd.Series):
|
88
|
+
values = values.tolist()
|
89
|
+
values = sum([v for v in values if isinstance(v, list)], [])
|
90
|
+
except KeyError:
|
91
|
+
pass
|
92
|
+
return values
|
93
|
+
|
94
|
+
|
95
|
+
def _inspect(
|
96
|
+
cls,
|
97
|
+
values: ListLike,
|
98
|
+
field: str | StrField | None = None,
|
99
|
+
*,
|
100
|
+
mute: bool = False,
|
101
|
+
using_key: str | None = None,
|
102
|
+
organism: str | Record | None = None,
|
103
|
+
source: Record | None = None,
|
104
|
+
) -> pd.DataFrame | dict[str, list[str]]:
|
105
|
+
"""{}""" # noqa: D415
|
106
|
+
from lamin_utils._inspect import inspect
|
107
|
+
|
108
|
+
if isinstance(values, str):
|
109
|
+
values = [values]
|
110
|
+
values = _concat_lists(values)
|
111
|
+
|
112
|
+
field = get_name_field(cls, field=field)
|
113
|
+
queryset = _queryset(cls, using_key)
|
114
|
+
using_key = queryset.db
|
115
|
+
if isinstance(source, Record):
|
116
|
+
_check_source_db(source, using_key)
|
117
|
+
queryset = queryset.filter(source=source).all()
|
118
|
+
_check_organism_db(organism, using_key)
|
119
|
+
registry = queryset.model
|
120
|
+
model_name = registry._meta.model.__name__
|
121
|
+
|
122
|
+
# inspect in the DB
|
123
|
+
result_db = inspect(
|
124
|
+
df=_filter_query_based_on_organism(
|
125
|
+
queryset=queryset, field=field, organism=organism
|
126
|
+
),
|
127
|
+
identifiers=values,
|
128
|
+
field=field,
|
129
|
+
mute=mute,
|
130
|
+
)
|
131
|
+
nonval = set(result_db.non_validated).difference(result_db.synonyms_mapper.keys())
|
132
|
+
|
133
|
+
if len(nonval) > 0 and registry.__get_schema_name__() == "bionty":
|
134
|
+
try:
|
135
|
+
bionty_result = registry.public(organism=organism, source=source).inspect(
|
136
|
+
values=nonval, field=field, mute=True
|
137
|
+
)
|
138
|
+
bionty_validated = bionty_result.validated
|
139
|
+
bionty_mapper = bionty_result.synonyms_mapper
|
140
|
+
hint = False
|
141
|
+
if len(bionty_validated) > 0 and not mute:
|
142
|
+
print_values = _print_values(bionty_validated)
|
143
|
+
s = "" if len(bionty_validated) == 1 else "s"
|
144
|
+
labels = colors.yellow(f"{len(bionty_validated)} {model_name} term{s}")
|
145
|
+
logger.print(
|
146
|
+
f" detected {labels} in Bionty for"
|
147
|
+
f" {colors.italic(field)}: {colors.yellow(print_values)}"
|
148
|
+
)
|
149
|
+
hint = True
|
150
|
+
|
151
|
+
if len(bionty_mapper) > 0 and not mute:
|
152
|
+
print_values = _print_values(list(bionty_mapper.keys()))
|
153
|
+
s = "" if len(bionty_mapper) == 1 else "s"
|
154
|
+
labels = colors.yellow(f"{len(bionty_mapper)} {model_name} term{s}")
|
155
|
+
logger.print(
|
156
|
+
f" detected {labels} in Bionty as {colors.italic(f'synonym{s}')}:"
|
157
|
+
f" {colors.yellow(print_values)}"
|
158
|
+
)
|
159
|
+
hint = True
|
160
|
+
|
161
|
+
if hint:
|
162
|
+
logger.print(
|
163
|
+
f"→ add records from Bionty to your {model_name} registry via"
|
164
|
+
f" {colors.italic('.from_values()')}"
|
165
|
+
)
|
166
|
+
|
167
|
+
nonval = bionty_result.non_validated
|
168
|
+
# no bionty source is found
|
169
|
+
except ValueError:
|
170
|
+
logger.warning("no Bionty source found, skipping Bionty validation")
|
171
|
+
|
172
|
+
if len(nonval) > 0 and not mute:
|
173
|
+
print_values = _print_values(list(nonval))
|
174
|
+
s = "" if len(nonval) == 1 else "s"
|
175
|
+
labels = colors.red(f"{len(nonval)} term{s}")
|
176
|
+
logger.print(f" couldn't validate {labels}: {colors.red(print_values)}")
|
177
|
+
logger.print(
|
178
|
+
f"→ if you are sure, create new record{s} via"
|
179
|
+
f" {colors.italic(f'{registry.__name__}()')} and save to your registry"
|
180
|
+
)
|
181
|
+
|
182
|
+
return result_db
|
183
|
+
|
184
|
+
|
185
|
+
def _validate(
|
186
|
+
cls,
|
187
|
+
values: ListLike,
|
188
|
+
field: str | StrField | None = None,
|
189
|
+
*,
|
190
|
+
mute: bool = False,
|
191
|
+
using_key: str | None = None,
|
192
|
+
organism: str | Record | None = None,
|
193
|
+
source: Record | None = None,
|
194
|
+
) -> np.ndarray:
|
195
|
+
"""{}""" # noqa: D415
|
196
|
+
from lamin_utils._inspect import validate
|
197
|
+
|
198
|
+
return_str = True if isinstance(values, str) else False
|
199
|
+
if isinstance(values, str):
|
200
|
+
values = [values]
|
201
|
+
values = _concat_lists(values)
|
202
|
+
|
203
|
+
field = get_name_field(cls, field=field)
|
204
|
+
|
205
|
+
queryset = _queryset(cls, using_key)
|
206
|
+
using_key = queryset.db
|
207
|
+
if isinstance(source, Record):
|
208
|
+
_check_source_db(source, using_key)
|
209
|
+
queryset = queryset.filter(source=source).all()
|
210
|
+
_check_organism_db(organism, using_key)
|
211
|
+
field_values = pd.Series(
|
212
|
+
_filter_query_based_on_organism(
|
213
|
+
queryset=queryset,
|
214
|
+
field=field,
|
215
|
+
organism=organism,
|
216
|
+
values_list_field=field,
|
217
|
+
),
|
218
|
+
dtype="object",
|
219
|
+
)
|
220
|
+
if field_values.empty:
|
221
|
+
if not mute:
|
222
|
+
msg = (
|
223
|
+
f"Your {cls.__name__} registry is empty, consider populating it first!"
|
224
|
+
)
|
225
|
+
if hasattr(cls, "source_id"):
|
226
|
+
msg += "\n → use `.import_from_source()` to import records from a source, e.g. a public ontology"
|
227
|
+
logger.warning(msg)
|
228
|
+
return np.array([False] * len(values))
|
229
|
+
|
230
|
+
result = validate(
|
231
|
+
identifiers=values,
|
232
|
+
field_values=field_values,
|
233
|
+
case_sensitive=True,
|
234
|
+
mute=mute,
|
235
|
+
field=field,
|
236
|
+
)
|
237
|
+
if return_str and len(result) == 1:
|
238
|
+
return result[0]
|
239
|
+
else:
|
240
|
+
return result
|
241
|
+
|
242
|
+
|
243
|
+
@classmethod # type: ignore
|
244
|
+
@doc_args(CanValidate.standardize.__doc__)
|
245
|
+
def standardize(
|
246
|
+
cls,
|
247
|
+
values: ListLike,
|
248
|
+
field: str | StrField | None = None,
|
249
|
+
*,
|
250
|
+
return_field: str = None,
|
251
|
+
return_mapper: bool = False,
|
252
|
+
case_sensitive: bool = False,
|
253
|
+
mute: bool = False,
|
254
|
+
public_aware: bool = True,
|
255
|
+
keep: Literal["first", "last", False] = "first",
|
256
|
+
synonyms_field: str = "synonyms",
|
257
|
+
organism: str | Record | None = None,
|
258
|
+
source: Record | None = None,
|
259
|
+
) -> list[str] | dict[str, str]:
|
260
|
+
"""{}""" # noqa: D415
|
261
|
+
return _standardize(
|
262
|
+
cls=cls,
|
263
|
+
values=values,
|
264
|
+
field=field,
|
265
|
+
return_field=return_field,
|
266
|
+
return_mapper=return_mapper,
|
267
|
+
case_sensitive=case_sensitive,
|
268
|
+
mute=mute,
|
269
|
+
public_aware=public_aware,
|
270
|
+
keep=keep,
|
271
|
+
synonyms_field=synonyms_field,
|
272
|
+
organism=organism,
|
273
|
+
source=source,
|
274
|
+
)
|
275
|
+
|
276
|
+
|
277
|
+
def set_abbr(self, value: str):
|
278
|
+
self.abbr = value
|
279
|
+
|
280
|
+
if hasattr(self, "name") and value == self.name:
|
281
|
+
pass
|
282
|
+
else:
|
283
|
+
try:
|
284
|
+
self.add_synonym(value, save=False)
|
285
|
+
except Exception as e: # pragma: no cover
|
286
|
+
logger.debug(
|
287
|
+
f"Encountered an Exception while attempting to add synonyms.\n{e}"
|
288
|
+
)
|
289
|
+
|
290
|
+
if not self._state.adding:
|
291
|
+
self.save()
|
292
|
+
|
293
|
+
|
294
|
+
def add_synonym(
|
295
|
+
self,
|
296
|
+
synonym: str | ListLike,
|
297
|
+
force: bool = False,
|
298
|
+
save: bool | None = None,
|
299
|
+
):
|
300
|
+
_check_synonyms_field_exist(self)
|
301
|
+
_add_or_remove_synonyms(
|
302
|
+
synonym=synonym, record=self, force=force, action="add", save=save
|
303
|
+
)
|
304
|
+
|
305
|
+
|
306
|
+
def remove_synonym(self, synonym: str | ListLike):
|
307
|
+
_check_synonyms_field_exist(self)
|
308
|
+
_add_or_remove_synonyms(synonym=synonym, record=self, action="remove")
|
309
|
+
|
310
|
+
|
311
|
+
def _standardize(
|
312
|
+
cls,
|
313
|
+
values: ListLike,
|
314
|
+
field: str | StrField | None = None,
|
315
|
+
*,
|
316
|
+
return_field: str = None,
|
317
|
+
return_mapper: bool = False,
|
318
|
+
case_sensitive: bool = False,
|
319
|
+
mute: bool = False,
|
320
|
+
public_aware: bool = True,
|
321
|
+
keep: Literal["first", "last", False] = "first",
|
322
|
+
synonyms_field: str = "synonyms",
|
323
|
+
using_key: str | None = None,
|
324
|
+
organism: str | Record | None = None,
|
325
|
+
source: Record | None = None,
|
326
|
+
) -> list[str] | dict[str, str]:
|
327
|
+
"""{}""" # noqa: D415
|
328
|
+
from lamin_utils._standardize import standardize as map_synonyms
|
329
|
+
|
330
|
+
return_str = True if isinstance(values, str) else False
|
331
|
+
if isinstance(values, str):
|
332
|
+
values = [values]
|
333
|
+
values = _concat_lists(values)
|
334
|
+
|
335
|
+
field = get_name_field(cls, field=field)
|
336
|
+
return_field = get_name_field(
|
337
|
+
cls, field=field if return_field is None else return_field
|
338
|
+
)
|
339
|
+
queryset = _queryset(cls, using_key)
|
340
|
+
using_key = queryset.db
|
341
|
+
if isinstance(source, Record):
|
342
|
+
_check_source_db(source, using_key)
|
343
|
+
queryset = queryset.filter(source=source).all()
|
344
|
+
_check_organism_db(organism, using_key)
|
345
|
+
registry = queryset.model
|
346
|
+
|
347
|
+
if _has_organism_field(registry):
|
348
|
+
# here, we can safely import bionty
|
349
|
+
from bionty._bionty import create_or_get_organism_record
|
350
|
+
|
351
|
+
organism_record = create_or_get_organism_record(
|
352
|
+
organism=organism, registry=registry
|
353
|
+
)
|
354
|
+
organism = (
|
355
|
+
organism_record.name if organism_record is not None else organism_record
|
356
|
+
)
|
357
|
+
|
358
|
+
try:
|
359
|
+
registry._meta.get_field(synonyms_field)
|
360
|
+
df = _filter_query_based_on_organism(
|
361
|
+
queryset=queryset, field=field, organism=organism
|
362
|
+
)
|
363
|
+
except FieldDoesNotExist:
|
364
|
+
df = pd.DataFrame()
|
365
|
+
|
366
|
+
_kwargs = {
|
367
|
+
"field": field,
|
368
|
+
"return_field": return_field,
|
369
|
+
"case_sensitive": case_sensitive,
|
370
|
+
"keep": keep,
|
371
|
+
"synonyms_field": synonyms_field,
|
372
|
+
}
|
373
|
+
# standardized names from the DB
|
374
|
+
std_names_db = map_synonyms(
|
375
|
+
df=df,
|
376
|
+
identifiers=values,
|
377
|
+
return_mapper=return_mapper,
|
378
|
+
mute=mute,
|
379
|
+
**_kwargs,
|
380
|
+
)
|
381
|
+
|
382
|
+
def _return(result: list, mapper: dict):
|
383
|
+
if return_mapper:
|
384
|
+
return mapper
|
385
|
+
else:
|
386
|
+
if return_str and len(result) == 1:
|
387
|
+
return result[0]
|
388
|
+
return result
|
389
|
+
|
390
|
+
# map synonyms in Bionty
|
391
|
+
if registry.__get_schema_name__() == "bionty" and public_aware:
|
392
|
+
mapper = {}
|
393
|
+
if return_mapper:
|
394
|
+
mapper = std_names_db
|
395
|
+
std_names_db = map_synonyms(
|
396
|
+
df=df, identifiers=values, return_mapper=False, mute=True, **_kwargs
|
397
|
+
)
|
398
|
+
|
399
|
+
val_res = registry.validate(
|
400
|
+
std_names_db, field=field, mute=True, organism=organism
|
401
|
+
)
|
402
|
+
if all(val_res):
|
403
|
+
return _return(result=std_names_db, mapper=mapper)
|
404
|
+
|
405
|
+
nonval = np.array(std_names_db)[~val_res]
|
406
|
+
std_names_bt_mapper = registry.public(organism=organism).standardize(
|
407
|
+
nonval, return_mapper=True, mute=True, **_kwargs
|
408
|
+
)
|
409
|
+
|
410
|
+
if len(std_names_bt_mapper) > 0 and not mute:
|
411
|
+
s = "" if len(std_names_bt_mapper) == 1 else "s"
|
412
|
+
field_print = "synonym" if field == return_field else field
|
413
|
+
warn_msg = (
|
414
|
+
f"found {len(std_names_bt_mapper)} {field_print}{s} in Bionty:"
|
415
|
+
f" {list(std_names_bt_mapper.keys())}"
|
416
|
+
)
|
417
|
+
warn_msg += (
|
418
|
+
f"\n please add corresponding {registry._meta.model.__name__} records via"
|
419
|
+
f" `.from_values({list(set(std_names_bt_mapper.values()))})`"
|
420
|
+
)
|
421
|
+
logger.warning(warn_msg)
|
422
|
+
|
423
|
+
mapper.update(std_names_bt_mapper)
|
424
|
+
if pd.api.types.is_categorical_dtype(std_names_db):
|
425
|
+
result = std_names_db.cat.rename_categories(std_names_bt_mapper).tolist()
|
426
|
+
else:
|
427
|
+
result = pd.Series(std_names_db).replace(std_names_bt_mapper).tolist()
|
428
|
+
return _return(result=result, mapper=mapper)
|
429
|
+
|
430
|
+
else:
|
431
|
+
return _return(result=std_names_db, mapper=std_names_db)
|
432
|
+
|
433
|
+
|
434
|
+
def _add_or_remove_synonyms(
|
435
|
+
synonym: str | ListLike,
|
436
|
+
record: Record,
|
437
|
+
action: Literal["add", "remove"],
|
438
|
+
force: bool = False,
|
439
|
+
save: bool | None = None,
|
440
|
+
):
|
441
|
+
"""Add or remove synonyms."""
|
442
|
+
|
443
|
+
def check_synonyms_in_all_records(synonyms: set[str], record: Record):
|
444
|
+
"""Errors if input synonym is associated with other records in the DB."""
|
445
|
+
import pandas as pd
|
446
|
+
from IPython.display import display
|
447
|
+
|
448
|
+
syns_all = (
|
449
|
+
record.__class__.objects.exclude(synonyms="").exclude(synonyms=None).all()
|
450
|
+
)
|
451
|
+
if len(syns_all) == 0:
|
452
|
+
return
|
453
|
+
df = pd.DataFrame(syns_all.values())
|
454
|
+
df["synonyms"] = df["synonyms"].str.split("|")
|
455
|
+
df = df.explode("synonyms")
|
456
|
+
matches_df = df[(df["synonyms"].isin(synonyms)) & (df["id"] != record.id)]
|
457
|
+
if matches_df.shape[0] > 0:
|
458
|
+
records_df = pd.DataFrame(syns_all.filter(id__in=matches_df["id"]).values())
|
459
|
+
logger.error(
|
460
|
+
f"input synonyms {matches_df['synonyms'].unique()} already associated"
|
461
|
+
" with the following records:\n"
|
462
|
+
)
|
463
|
+
display(records_df)
|
464
|
+
raise ValueError(
|
465
|
+
"cannot assigned a synonym that is already associated with a record to a different record.\n"
|
466
|
+
"Consider removing the synonym from existing records or using a different synonym."
|
467
|
+
)
|
468
|
+
|
469
|
+
# passed synonyms
|
470
|
+
# nothing happens when passing an empty string or list
|
471
|
+
if isinstance(synonym, str):
|
472
|
+
if len(synonym) == 0:
|
473
|
+
return
|
474
|
+
syn_new_set = {synonym}
|
475
|
+
else:
|
476
|
+
if synonym == [""]:
|
477
|
+
return
|
478
|
+
syn_new_set = set(synonym)
|
479
|
+
# nothing happens when passing an empty string or list
|
480
|
+
if len(syn_new_set) == 0:
|
481
|
+
return
|
482
|
+
# because we use | as the separator
|
483
|
+
if any("|" in i for i in syn_new_set):
|
484
|
+
raise ValueError("a synonym can't contain '|'!")
|
485
|
+
|
486
|
+
# existing synonyms
|
487
|
+
syns_exist = record.synonyms
|
488
|
+
if syns_exist is None or len(syns_exist) == 0:
|
489
|
+
syns_exist_set = set()
|
490
|
+
else:
|
491
|
+
syns_exist_set = set(syns_exist.split("|"))
|
492
|
+
|
493
|
+
if action == "add":
|
494
|
+
if not force:
|
495
|
+
check_synonyms_in_all_records(syn_new_set, record)
|
496
|
+
syns_exist_set.update(syn_new_set)
|
497
|
+
elif action == "remove":
|
498
|
+
syns_exist_set = syns_exist_set.difference(syn_new_set)
|
499
|
+
|
500
|
+
if len(syns_exist_set) == 0:
|
501
|
+
syns_str = None
|
502
|
+
else:
|
503
|
+
syns_str = "|".join(syns_exist_set)
|
504
|
+
|
505
|
+
record.synonyms = syns_str
|
506
|
+
|
507
|
+
if save is None:
|
508
|
+
# if record is already in DB, save the changes to DB
|
509
|
+
save = not record._state.adding
|
510
|
+
if save:
|
511
|
+
record.save()
|
512
|
+
|
513
|
+
|
514
|
+
def _check_synonyms_field_exist(record: Record):
|
515
|
+
try:
|
516
|
+
record.__getattribute__("synonyms")
|
517
|
+
except AttributeError:
|
518
|
+
raise NotImplementedError(
|
519
|
+
f"No synonyms field found in table {record.__class__.__name__}!"
|
520
|
+
) from None
|
521
|
+
|
522
|
+
|
523
|
+
def _filter_query_based_on_organism(
|
524
|
+
queryset: QuerySet,
|
525
|
+
field: str,
|
526
|
+
organism: str | Record | None = None,
|
527
|
+
values_list_field: str | None = None,
|
528
|
+
):
|
529
|
+
"""Filter a queryset based on organism."""
|
530
|
+
import pandas as pd
|
531
|
+
|
532
|
+
registry = queryset.model
|
533
|
+
|
534
|
+
if _has_organism_field(registry) and not _field_is_id(field, registry):
|
535
|
+
# here, we can safely import bionty
|
536
|
+
from bionty._bionty import create_or_get_organism_record
|
537
|
+
|
538
|
+
organism_record = create_or_get_organism_record(
|
539
|
+
organism=organism, registry=registry
|
540
|
+
)
|
541
|
+
if organism_record is not None:
|
542
|
+
queryset = queryset.filter(organism__name=organism_record.name)
|
543
|
+
|
544
|
+
if values_list_field is None:
|
545
|
+
return pd.DataFrame.from_records(queryset.values())
|
546
|
+
else:
|
547
|
+
return queryset.values_list(values_list_field, flat=True)
|
548
|
+
|
549
|
+
|
550
|
+
def _field_is_id(field: str, registry: type[Record]) -> bool:
|
551
|
+
"""Check if the field is an ontology ID."""
|
552
|
+
if hasattr(registry, "_ontology_id_field"):
|
553
|
+
if field == registry._ontology_id_field:
|
554
|
+
return True
|
555
|
+
if field.endswith("id"):
|
556
|
+
return True
|
557
|
+
return False
|
558
|
+
|
559
|
+
|
560
|
+
METHOD_NAMES = [
|
561
|
+
"validate",
|
562
|
+
"inspect",
|
563
|
+
"standardize",
|
564
|
+
"add_synonym",
|
565
|
+
"remove_synonym",
|
566
|
+
"set_abbr",
|
567
|
+
]
|
568
|
+
|
569
|
+
if ln_setup._TESTING: # type: ignore
|
570
|
+
from inspect import signature
|
571
|
+
|
572
|
+
SIGS = {
|
573
|
+
name: signature(getattr(CanValidate, name))
|
574
|
+
for name in METHOD_NAMES
|
575
|
+
if not name.startswith("__")
|
576
|
+
}
|
577
|
+
|
578
|
+
for name in METHOD_NAMES:
|
579
|
+
attach_func_to_class_method(name, CanValidate, globals())
|