lamindb 0.75.0__py3-none-any.whl → 0.75.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_can_validate.py +52 -22
- lamindb/_curate.py +384 -144
- lamindb/_from_values.py +8 -8
- lamindb/_record.py +26 -26
- lamindb/_save.py +5 -5
- lamindb/_view.py +13 -11
- lamindb/core/__init__.py +2 -0
- lamindb/core/_data.py +4 -4
- lamindb/core/_feature_manager.py +16 -6
- lamindb/core/schema.py +5 -5
- lamindb/core/storage/__init__.py +11 -2
- lamindb/core/storage/_valid_suffixes.py +16 -2
- lamindb/integrations/_vitessce.py +68 -31
- {lamindb-0.75.0.dist-info → lamindb-0.75.1.dist-info}/METADATA +4 -4
- {lamindb-0.75.0.dist-info → lamindb-0.75.1.dist-info}/RECORD +18 -18
- {lamindb-0.75.0.dist-info → lamindb-0.75.1.dist-info}/LICENSE +0 -0
- {lamindb-0.75.0.dist-info → lamindb-0.75.1.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
lamindb/_can_validate.py
CHANGED
@@ -60,6 +60,25 @@ def validate(
|
|
60
60
|
)
|
61
61
|
|
62
62
|
|
63
|
+
def _check_source_db(source: Record, using_key: str | None):
|
64
|
+
"""Check if the source is from the DB."""
|
65
|
+
if using_key is not None and using_key != "default":
|
66
|
+
if source._state.db != using_key:
|
67
|
+
raise ValueError(
|
68
|
+
f"source must be a bionty.Source record from instance '{using_key}'!"
|
69
|
+
)
|
70
|
+
|
71
|
+
|
72
|
+
def _check_organism_db(organism: Record, using_key: str | None):
|
73
|
+
"""Check if the organism is from the DB."""
|
74
|
+
if isinstance(organism, Record):
|
75
|
+
if using_key is not None and using_key != "default":
|
76
|
+
if organism._state.db != using_key:
|
77
|
+
raise ValueError(
|
78
|
+
f"organism must be a bionty.Organism record from instance '{using_key}'!"
|
79
|
+
)
|
80
|
+
|
81
|
+
|
63
82
|
def _inspect(
|
64
83
|
cls,
|
65
84
|
values: ListLike,
|
@@ -78,10 +97,13 @@ def _inspect(
|
|
78
97
|
|
79
98
|
field = get_name_field(cls, field=field)
|
80
99
|
queryset = _queryset(cls, using_key)
|
81
|
-
|
100
|
+
using_key = queryset.db
|
101
|
+
if isinstance(source, Record):
|
102
|
+
_check_source_db(source, using_key)
|
82
103
|
queryset = queryset.filter(source=source).all()
|
83
|
-
|
84
|
-
|
104
|
+
_check_organism_db(organism, using_key)
|
105
|
+
registry = queryset.model
|
106
|
+
model_name = registry._meta.model.__name__
|
85
107
|
|
86
108
|
# inspect in the DB
|
87
109
|
result_db = inspect(
|
@@ -94,9 +116,9 @@ def _inspect(
|
|
94
116
|
)
|
95
117
|
nonval = set(result_db.non_validated).difference(result_db.synonyms_mapper.keys())
|
96
118
|
|
97
|
-
if len(nonval) > 0 and
|
119
|
+
if len(nonval) > 0 and registry.__get_schema_name__() == "bionty":
|
98
120
|
try:
|
99
|
-
bionty_result =
|
121
|
+
bionty_result = registry.public(organism=organism, source=source).inspect(
|
100
122
|
values=nonval, field=field, mute=True
|
101
123
|
)
|
102
124
|
bionty_validated = bionty_result.validated
|
@@ -140,7 +162,7 @@ def _inspect(
|
|
140
162
|
logger.print(f" couldn't validate {labels}: {colors.red(print_values)}")
|
141
163
|
logger.print(
|
142
164
|
f"→ if you are sure, create new record{s} via"
|
143
|
-
f" {colors.italic(f'{
|
165
|
+
f" {colors.italic(f'{registry.__name__}()')} and save to your registry"
|
144
166
|
)
|
145
167
|
|
146
168
|
return result_db
|
@@ -166,8 +188,11 @@ def _validate(
|
|
166
188
|
field = get_name_field(cls, field=field)
|
167
189
|
|
168
190
|
queryset = _queryset(cls, using_key)
|
169
|
-
|
191
|
+
using_key = queryset.db
|
192
|
+
if isinstance(source, Record):
|
193
|
+
_check_source_db(source, using_key)
|
170
194
|
queryset = queryset.filter(source=source).all()
|
195
|
+
_check_organism_db(organism, using_key)
|
171
196
|
field_values = pd.Series(
|
172
197
|
_filter_query_based_on_organism(
|
173
198
|
queryset=queryset,
|
@@ -296,21 +321,24 @@ def _standardize(
|
|
296
321
|
cls, field=field if return_field is None else return_field
|
297
322
|
)
|
298
323
|
queryset = _queryset(cls, using_key)
|
299
|
-
|
324
|
+
using_key = queryset.db
|
325
|
+
if isinstance(source, Record):
|
326
|
+
_check_source_db(source, using_key)
|
300
327
|
queryset = queryset.filter(source=source).all()
|
301
|
-
|
328
|
+
_check_organism_db(organism, using_key)
|
329
|
+
registry = queryset.model
|
302
330
|
|
303
|
-
if _has_organism_field(
|
331
|
+
if _has_organism_field(registry):
|
304
332
|
# here, we can safely import bionty
|
305
333
|
from bionty._bionty import create_or_get_organism_record
|
306
334
|
|
307
|
-
organism_record = create_or_get_organism_record(organism=organism, orm=
|
335
|
+
organism_record = create_or_get_organism_record(organism=organism, orm=registry)
|
308
336
|
organism = (
|
309
337
|
organism_record.name if organism_record is not None else organism_record
|
310
338
|
)
|
311
339
|
|
312
340
|
try:
|
313
|
-
|
341
|
+
registry._meta.get_field(synonyms_field)
|
314
342
|
df = _filter_query_based_on_organism(
|
315
343
|
queryset=queryset, field=field, organism=organism
|
316
344
|
)
|
@@ -342,7 +370,7 @@ def _standardize(
|
|
342
370
|
return result
|
343
371
|
|
344
372
|
# map synonyms in Bionty
|
345
|
-
if
|
373
|
+
if registry.__get_schema_name__() == "bionty" and public_aware:
|
346
374
|
mapper = {}
|
347
375
|
if return_mapper:
|
348
376
|
mapper = std_names_db
|
@@ -350,12 +378,14 @@ def _standardize(
|
|
350
378
|
df=df, identifiers=values, return_mapper=False, mute=True, **_kwargs
|
351
379
|
)
|
352
380
|
|
353
|
-
val_res =
|
381
|
+
val_res = registry.validate(
|
382
|
+
std_names_db, field=field, mute=True, organism=organism
|
383
|
+
)
|
354
384
|
if all(val_res):
|
355
385
|
return _return(result=std_names_db, mapper=mapper)
|
356
386
|
|
357
387
|
nonval = np.array(std_names_db)[~val_res]
|
358
|
-
std_names_bt_mapper =
|
388
|
+
std_names_bt_mapper = registry.public(organism=organism).standardize(
|
359
389
|
nonval, return_mapper=True, mute=True, **_kwargs
|
360
390
|
)
|
361
391
|
|
@@ -367,7 +397,7 @@ def _standardize(
|
|
367
397
|
f" {list(std_names_bt_mapper.keys())}"
|
368
398
|
)
|
369
399
|
warn_msg += (
|
370
|
-
f"\n please add corresponding {
|
400
|
+
f"\n please add corresponding {registry._meta.model.__name__} records via"
|
371
401
|
f" `.from_values({list(set(std_names_bt_mapper.values()))})`"
|
372
402
|
)
|
373
403
|
logger.warning(warn_msg)
|
@@ -478,13 +508,13 @@ def _filter_query_based_on_organism(
|
|
478
508
|
"""Filter a queryset based on organism."""
|
479
509
|
import pandas as pd
|
480
510
|
|
481
|
-
|
511
|
+
registry = queryset.model
|
482
512
|
|
483
|
-
if _has_organism_field(
|
513
|
+
if _has_organism_field(registry) and not _field_is_id(field, registry):
|
484
514
|
# here, we can safely import bionty
|
485
515
|
from bionty._bionty import create_or_get_organism_record
|
486
516
|
|
487
|
-
organism_record = create_or_get_organism_record(organism=organism, orm=
|
517
|
+
organism_record = create_or_get_organism_record(organism=organism, orm=registry)
|
488
518
|
if organism_record is not None:
|
489
519
|
queryset = queryset.filter(organism__name=organism_record.name)
|
490
520
|
|
@@ -494,10 +524,10 @@ def _filter_query_based_on_organism(
|
|
494
524
|
return queryset.values_list(values_list_field, flat=True)
|
495
525
|
|
496
526
|
|
497
|
-
def _field_is_id(field: str,
|
527
|
+
def _field_is_id(field: str, registry: type[Record]) -> bool:
|
498
528
|
"""Check if the field is an ontology ID."""
|
499
|
-
if hasattr(
|
500
|
-
if field ==
|
529
|
+
if hasattr(registry, "_ontology_id_field"):
|
530
|
+
if field == registry._ontology_id_field:
|
501
531
|
return True
|
502
532
|
if field.endswith("id"):
|
503
533
|
return True
|