lamindb 0.76.9__py3-none-any.whl → 0.76.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +15 -14
- lamindb/_artifact.py +2 -1
- lamindb/_can_validate.py +46 -4
- lamindb/_collection.py +2 -1
- lamindb/_curate.py +3 -1
- lamindb/_feature_set.py +3 -1
- lamindb/_finish.py +19 -18
- lamindb/_from_values.py +110 -89
- lamindb/_query_set.py +3 -1
- lamindb/_record.py +81 -62
- lamindb/_run.py +3 -0
- lamindb/_save.py +3 -1
- lamindb/_transform.py +9 -6
- lamindb/core/_context.py +94 -78
- lamindb/core/_data.py +113 -41
- lamindb/core/_django.py +209 -0
- lamindb/core/_feature_manager.py +140 -13
- lamindb/core/_label_manager.py +58 -23
- lamindb/core/_mapped_collection.py +1 -1
- lamindb/core/_settings.py +2 -1
- lamindb/core/exceptions.py +9 -9
- lamindb/core/storage/_anndata_accessor.py +2 -1
- lamindb/core/versioning.py +2 -14
- {lamindb-0.76.9.dist-info → lamindb-0.76.11.dist-info}/METADATA +8 -8
- {lamindb-0.76.9.dist-info → lamindb-0.76.11.dist-info}/RECORD +27 -26
- {lamindb-0.76.9.dist-info → lamindb-0.76.11.dist-info}/LICENSE +0 -0
- {lamindb-0.76.9.dist-info → lamindb-0.76.11.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
@@ -21,7 +21,8 @@ Key functionality.
|
|
21
21
|
.. autosummary::
|
22
22
|
:toctree: .
|
23
23
|
|
24
|
-
|
24
|
+
track
|
25
|
+
finish
|
25
26
|
connect
|
26
27
|
Curator
|
27
28
|
view
|
@@ -33,6 +34,7 @@ Modules and settings.
|
|
33
34
|
:toctree: .
|
34
35
|
|
35
36
|
integrations
|
37
|
+
context
|
36
38
|
settings
|
37
39
|
setup
|
38
40
|
UPath
|
@@ -41,7 +43,7 @@ Modules and settings.
|
|
41
43
|
"""
|
42
44
|
|
43
45
|
# denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
|
44
|
-
__version__ = "0.76.
|
46
|
+
__version__ = "0.76.11"
|
45
47
|
|
46
48
|
import os as _os
|
47
49
|
|
@@ -58,7 +60,7 @@ def __getattr__(name):
|
|
58
60
|
raise _InstanceNotSetupError()
|
59
61
|
|
60
62
|
|
61
|
-
if _check_instance_setup(
|
63
|
+
if _check_instance_setup(from_module="lnschema_core"):
|
62
64
|
del _InstanceNotSetupError
|
63
65
|
del __getattr__ # delete so that imports work out
|
64
66
|
from lnschema_core.models import (
|
@@ -97,17 +99,16 @@ if _check_instance_setup(from_lamindb=True):
|
|
97
99
|
from .core._context import context
|
98
100
|
from .core._settings import settings
|
99
101
|
|
100
|
-
#
|
101
|
-
|
102
|
-
from lamindb_setup._init_instance import (
|
103
|
-
reload_schema_modules as _reload_schema_modules,
|
104
|
-
)
|
105
|
-
|
106
|
-
_reload_schema_modules(_lamindb_setup.settings.instance)
|
107
|
-
|
108
|
-
track = context.track # backward compat
|
109
|
-
finish = context.finish # backward compat
|
102
|
+
track = context.track # simple access because these are so common
|
103
|
+
finish = context.finish # simple access because these are so common
|
110
104
|
Curate = Curator # backward compat
|
111
105
|
settings.__doc__ = """Global settings (:class:`~lamindb.core.Settings`)."""
|
112
|
-
context.__doc__ = """Global run context (:class:`~lamindb.core.Context`).
|
106
|
+
context.__doc__ = """Global run context (:class:`~lamindb.core.Context`).
|
107
|
+
|
108
|
+
Note that you can access:
|
109
|
+
|
110
|
+
- `ln.context.track()` as `ln.track()`
|
111
|
+
- `ln.context.finish()` as `ln.finish()`
|
112
|
+
|
113
|
+
"""
|
113
114
|
from django.db.models import Q
|
lamindb/_artifact.py
CHANGED
@@ -2,8 +2,9 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import os
|
4
4
|
import shutil
|
5
|
+
from collections.abc import Mapping
|
5
6
|
from pathlib import Path, PurePath, PurePosixPath
|
6
|
-
from typing import TYPE_CHECKING, Any
|
7
|
+
from typing import TYPE_CHECKING, Any
|
7
8
|
|
8
9
|
import fsspec
|
9
10
|
import lamindb_setup as ln_setup
|
lamindb/_can_validate.py
CHANGED
@@ -12,7 +12,7 @@ from lnschema_core import CanValidate, Record
|
|
12
12
|
|
13
13
|
from lamindb._utils import attach_func_to_class_method
|
14
14
|
|
15
|
-
from ._from_values import _has_organism_field, _print_values
|
15
|
+
from ._from_values import _has_organism_field, _print_values, get_or_create_records
|
16
16
|
from ._record import _queryset, get_name_field
|
17
17
|
|
18
18
|
if TYPE_CHECKING:
|
@@ -21,6 +21,33 @@ if TYPE_CHECKING:
|
|
21
21
|
from lnschema_core.types import ListLike, StrField
|
22
22
|
|
23
23
|
|
24
|
+
# from_values doesn't apply for QuerySet or Manager
|
25
|
+
@classmethod # type:ignore
|
26
|
+
@doc_args(CanValidate.from_values.__doc__)
|
27
|
+
def from_values(
|
28
|
+
cls,
|
29
|
+
values: ListLike,
|
30
|
+
field: StrField | None = None,
|
31
|
+
create: bool = False,
|
32
|
+
organism: Record | str | None = None,
|
33
|
+
source: Record | None = None,
|
34
|
+
mute: bool = False,
|
35
|
+
) -> list[Record]:
|
36
|
+
"""{}""" # noqa: D415
|
37
|
+
from_source = True if cls.__module__.startswith("bionty.") else False
|
38
|
+
|
39
|
+
field_str = get_name_field(cls, field=field)
|
40
|
+
return get_or_create_records(
|
41
|
+
iterable=values,
|
42
|
+
field=getattr(cls, field_str),
|
43
|
+
create=create,
|
44
|
+
from_source=from_source,
|
45
|
+
organism=organism,
|
46
|
+
source=source,
|
47
|
+
mute=mute,
|
48
|
+
)
|
49
|
+
|
50
|
+
|
24
51
|
@classmethod # type: ignore
|
25
52
|
@doc_args(CanValidate.inspect.__doc__)
|
26
53
|
def inspect(
|
@@ -349,16 +376,24 @@ def _standardize(
|
|
349
376
|
from bionty._bionty import create_or_get_organism_record
|
350
377
|
|
351
378
|
organism_record = create_or_get_organism_record(
|
352
|
-
organism=organism, registry=registry
|
379
|
+
organism=organism, registry=registry, field=field
|
353
380
|
)
|
354
381
|
organism = (
|
355
382
|
organism_record.name if organism_record is not None else organism_record
|
356
383
|
)
|
357
384
|
|
385
|
+
# only perform synonym mapping if field is the name field
|
386
|
+
if hasattr(registry, "_name_field") and field != registry._name_field:
|
387
|
+
synonyms_field = None
|
388
|
+
|
358
389
|
try:
|
359
390
|
registry._meta.get_field(synonyms_field)
|
391
|
+
fields = {i for i in [field, return_field, synonyms_field] if i is not None}
|
360
392
|
df = _filter_query_based_on_organism(
|
361
|
-
queryset=queryset,
|
393
|
+
queryset=queryset,
|
394
|
+
field=field,
|
395
|
+
organism=organism,
|
396
|
+
fields=list(fields),
|
362
397
|
)
|
363
398
|
except FieldDoesNotExist:
|
364
399
|
df = pd.DataFrame()
|
@@ -525,6 +560,7 @@ def _filter_query_based_on_organism(
|
|
525
560
|
field: str,
|
526
561
|
organism: str | Record | None = None,
|
527
562
|
values_list_field: str | None = None,
|
563
|
+
fields: list[str] | None = None,
|
528
564
|
):
|
529
565
|
"""Filter a queryset based on organism."""
|
530
566
|
import pandas as pd
|
@@ -536,13 +572,18 @@ def _filter_query_based_on_organism(
|
|
536
572
|
from bionty._bionty import create_or_get_organism_record
|
537
573
|
|
538
574
|
organism_record = create_or_get_organism_record(
|
539
|
-
organism=organism, registry=registry
|
575
|
+
organism=organism, registry=registry, field=field
|
540
576
|
)
|
541
577
|
if organism_record is not None:
|
542
578
|
queryset = queryset.filter(organism__name=organism_record.name)
|
543
579
|
|
544
580
|
if values_list_field is None:
|
581
|
+
if fields:
|
582
|
+
return pd.DataFrame.from_records(
|
583
|
+
queryset.values_list(*fields), columns=fields
|
584
|
+
)
|
545
585
|
return pd.DataFrame.from_records(queryset.values())
|
586
|
+
|
546
587
|
else:
|
547
588
|
return queryset.values_list(values_list_field, flat=True)
|
548
589
|
|
@@ -564,6 +605,7 @@ METHOD_NAMES = [
|
|
564
605
|
"add_synonym",
|
565
606
|
"remove_synonym",
|
566
607
|
"set_abbr",
|
608
|
+
"from_values",
|
567
609
|
]
|
568
610
|
|
569
611
|
if ln_setup._TESTING: # type: ignore
|
lamindb/_collection.py
CHANGED
@@ -4,7 +4,6 @@ from collections import defaultdict
|
|
4
4
|
from typing import (
|
5
5
|
TYPE_CHECKING,
|
6
6
|
Any,
|
7
|
-
Iterable,
|
8
7
|
Literal,
|
9
8
|
)
|
10
9
|
|
@@ -37,6 +36,8 @@ from .core._data import (
|
|
37
36
|
from .core._settings import settings
|
38
37
|
|
39
38
|
if TYPE_CHECKING:
|
39
|
+
from collections.abc import Iterable
|
40
|
+
|
40
41
|
from lamindb.core.storage import UPath
|
41
42
|
|
42
43
|
from ._query_set import QuerySet
|
lamindb/_curate.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import copy
|
4
|
-
from typing import TYPE_CHECKING
|
4
|
+
from typing import TYPE_CHECKING
|
5
5
|
|
6
6
|
import anndata as ad
|
7
7
|
import lamindb_setup as ln_setup
|
@@ -19,6 +19,8 @@ from lnschema_core import (
|
|
19
19
|
from .core.exceptions import ValidationError
|
20
20
|
|
21
21
|
if TYPE_CHECKING:
|
22
|
+
from collections.abc import Iterable
|
23
|
+
|
22
24
|
from lamindb_setup.core.types import UPathStr
|
23
25
|
from lnschema_core.types import FieldAttr
|
24
26
|
from mudata import MuData
|
lamindb/_feature_set.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from typing import TYPE_CHECKING
|
3
|
+
from typing import TYPE_CHECKING
|
4
4
|
|
5
5
|
import lamindb_setup as ln_setup
|
6
6
|
import numpy as np
|
@@ -21,6 +21,8 @@ from .core.schema import (
|
|
21
21
|
)
|
22
22
|
|
23
23
|
if TYPE_CHECKING:
|
24
|
+
from collections.abc import Iterable
|
25
|
+
|
24
26
|
import pandas as pd
|
25
27
|
|
26
28
|
from ._query_set import QuerySet
|
lamindb/_finish.py
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
import os
|
4
3
|
import re
|
5
|
-
import shutil
|
6
4
|
from datetime import datetime, timezone
|
7
5
|
from typing import TYPE_CHECKING
|
8
6
|
|
@@ -37,6 +35,13 @@ def prepare_notebook(
|
|
37
35
|
if strip_title:
|
38
36
|
lines.pop(i)
|
39
37
|
cell["source"] = "\n".join(lines)
|
38
|
+
# strip resaved finish error if present
|
39
|
+
# this is normally the last cell
|
40
|
+
if cell["cell_type"] == "code" and ".finish(" in cell["source"]:
|
41
|
+
for output in cell["outputs"]:
|
42
|
+
if output.get("ename", None) == "NotebookNotSaved":
|
43
|
+
cell["outputs"] = []
|
44
|
+
break
|
40
45
|
return None
|
41
46
|
|
42
47
|
|
@@ -77,17 +82,6 @@ def notebook_to_script(
|
|
77
82
|
script_path.write_text(py_content)
|
78
83
|
|
79
84
|
|
80
|
-
def script_to_notebook(transform: Transform, notebook_path: Path) -> None:
|
81
|
-
import jupytext
|
82
|
-
|
83
|
-
# get title back
|
84
|
-
py_content = transform.source_code.replace(
|
85
|
-
"# # transform.name", f"# # {transform.name}"
|
86
|
-
)
|
87
|
-
notebook = jupytext.reads(py_content, fmt="py:percent")
|
88
|
-
jupytext.write(notebook, notebook_path)
|
89
|
-
|
90
|
-
|
91
85
|
def save_context_core(
|
92
86
|
*,
|
93
87
|
run: Run,
|
@@ -97,6 +91,10 @@ def save_context_core(
|
|
97
91
|
ignore_non_consecutive: bool | None = None,
|
98
92
|
from_cli: bool = False,
|
99
93
|
) -> str | None:
|
94
|
+
from lnschema_core.models import (
|
95
|
+
format_field_value, # needs to come after lamindb was imported because of CLI use
|
96
|
+
)
|
97
|
+
|
100
98
|
import lamindb as ln
|
101
99
|
|
102
100
|
from .core._context import context, is_run_from_ipython
|
@@ -121,7 +119,7 @@ def save_context_core(
|
|
121
119
|
notebook_content = read_notebook(filepath) # type: ignore
|
122
120
|
if not ignore_non_consecutive: # ignore_non_consecutive is None or False
|
123
121
|
is_consecutive = check_consecutiveness(
|
124
|
-
notebook_content, calling_statement=".finish(
|
122
|
+
notebook_content, calling_statement=".finish("
|
125
123
|
)
|
126
124
|
if not is_consecutive:
|
127
125
|
response = "n" # ignore_non_consecutive == False
|
@@ -158,15 +156,13 @@ def save_context_core(
|
|
158
156
|
if hash != ref_hash:
|
159
157
|
response = input(
|
160
158
|
f"You are about to overwrite existing source code (hash '{ref_hash}') for Transform('{transform.uid}')."
|
161
|
-
f"Proceed? (y/n)"
|
159
|
+
f" Proceed? (y/n)"
|
162
160
|
)
|
163
161
|
if response == "y":
|
164
162
|
transform.source_code = source_code_path.read_text()
|
165
163
|
transform.hash = hash
|
166
164
|
else:
|
167
|
-
logger.warning(
|
168
|
-
"Please re-run `ln.context.track()` to make a new version"
|
169
|
-
)
|
165
|
+
logger.warning("Please re-run `ln.track()` to make a new version")
|
170
166
|
return "rerun-the-notebook"
|
171
167
|
else:
|
172
168
|
logger.important("source code is already saved")
|
@@ -236,6 +232,11 @@ def save_context_core(
|
|
236
232
|
transform.save()
|
237
233
|
|
238
234
|
# finalize
|
235
|
+
if not from_cli:
|
236
|
+
run_time = run.finished_at - run.started_at
|
237
|
+
logger.important(
|
238
|
+
f"finished Run('{run.uid[:8]}') after {run_time} at {format_field_value(run.finished_at)}"
|
239
|
+
)
|
239
240
|
if ln_setup.settings.instance.is_on_hub:
|
240
241
|
identifier = ln_setup.settings.instance.slug
|
241
242
|
logger.important(
|
lamindb/_from_values.py
CHANGED
@@ -1,15 +1,17 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from typing import TYPE_CHECKING
|
3
|
+
from typing import TYPE_CHECKING
|
4
4
|
|
5
5
|
import pandas as pd
|
6
6
|
from django.core.exceptions import FieldDoesNotExist
|
7
7
|
from lamin_utils import colors, logger
|
8
|
-
from lnschema_core.models import Feature, Record, ULabel
|
8
|
+
from lnschema_core.models import Feature, Field, Record, ULabel
|
9
9
|
|
10
10
|
from .core._settings import settings
|
11
11
|
|
12
12
|
if TYPE_CHECKING:
|
13
|
+
from collections.abc import Iterable
|
14
|
+
|
13
15
|
from lnschema_core.types import ListLike, StrField
|
14
16
|
|
15
17
|
|
@@ -29,20 +31,17 @@ def get_or_create_records(
|
|
29
31
|
if create:
|
30
32
|
return [registry(**{field.field.name: value}) for value in iterable]
|
31
33
|
creation_search_names = settings.creation.search_names
|
32
|
-
feature: Feature = None
|
33
34
|
organism = _get_organism_record(field, organism)
|
34
|
-
kwargs: dict = {}
|
35
|
-
if organism is not None:
|
36
|
-
kwargs["organism"] = organism
|
37
|
-
if source is not None:
|
38
|
-
kwargs["source"] = source
|
39
35
|
settings.creation.search_names = False
|
40
36
|
try:
|
41
37
|
iterable_idx = index_iterable(iterable)
|
42
38
|
|
43
39
|
# returns existing records & non-existing values
|
44
40
|
records, nonexist_values, msg = get_existing_records(
|
45
|
-
iterable_idx=iterable_idx,
|
41
|
+
iterable_idx=iterable_idx,
|
42
|
+
field=field,
|
43
|
+
organism=organism,
|
44
|
+
mute=mute,
|
46
45
|
)
|
47
46
|
|
48
47
|
# new records to be created based on new values
|
@@ -58,11 +57,10 @@ def get_or_create_records(
|
|
58
57
|
):
|
59
58
|
source_record = records[0].source
|
60
59
|
if not source_record and hasattr(registry, "public"):
|
61
|
-
|
60
|
+
if organism is None:
|
61
|
+
organism = _ensembl_prefix(nonexist_values[0], field, organism)
|
62
|
+
organism = _get_organism_record(field, organism, force=True)
|
62
63
|
|
63
|
-
source_record = get_source_record(
|
64
|
-
registry.public(organism=organism), registry
|
65
|
-
)
|
66
64
|
if source_record:
|
67
65
|
from bionty.core._add_ontology import check_source_in_db
|
68
66
|
|
@@ -82,9 +80,10 @@ def get_or_create_records(
|
|
82
80
|
records_bionty, unmapped_values = create_records_from_source(
|
83
81
|
iterable_idx=nonexist_values,
|
84
82
|
field=field,
|
83
|
+
organism=organism,
|
84
|
+
source=source_record,
|
85
85
|
msg=msg,
|
86
86
|
mute=mute,
|
87
|
-
**kwargs,
|
88
87
|
)
|
89
88
|
if len(records_bionty) > 0:
|
90
89
|
msg = ""
|
@@ -106,17 +105,17 @@ def get_or_create_records(
|
|
106
105
|
f"{colors.red('did not create')} {name} record{s} for "
|
107
106
|
f"{n_nonval} {colors.italic(f'{field.field.name}{s}')}: {print_values}"
|
108
107
|
)
|
109
|
-
if registry.__get_schema_name__() == "bionty" or registry == ULabel:
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
108
|
+
# if registry.__get_schema_name__() == "bionty" or registry == ULabel:
|
109
|
+
# if isinstance(iterable, pd.Series):
|
110
|
+
# feature = iterable.name
|
111
|
+
# feature_name = None
|
112
|
+
# if isinstance(feature, str):
|
113
|
+
# feature_name = feature
|
114
|
+
# if feature_name is not None:
|
115
|
+
# if feature_name is not None:
|
116
|
+
# for record in records:
|
117
|
+
# record._feature = feature_name
|
118
|
+
# logger.debug(f"added default feature '{feature_name}'")
|
120
119
|
return records
|
121
120
|
finally:
|
122
121
|
settings.creation.search_names = creation_search_names
|
@@ -125,46 +124,27 @@ def get_or_create_records(
|
|
125
124
|
def get_existing_records(
|
126
125
|
iterable_idx: pd.Index,
|
127
126
|
field: StrField,
|
127
|
+
organism: Record | None = None,
|
128
128
|
mute: bool = False,
|
129
|
-
**kwargs,
|
130
129
|
):
|
130
|
+
# NOTE: existing records matching is agnostic to the source
|
131
131
|
model = field.field.model
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
132
|
+
if organism is None and field.field.name == "ensembl_gene_id":
|
133
|
+
if len(iterable_idx) > 0:
|
134
|
+
organism = _ensembl_prefix(iterable_idx[0], field, organism)
|
135
|
+
organism = _get_organism_record(field, organism, force=True)
|
136
136
|
|
137
137
|
# standardize based on the DB reference
|
138
138
|
# log synonyms mapped terms
|
139
|
-
|
139
|
+
syn_mapper = model.standardize(
|
140
140
|
iterable_idx,
|
141
141
|
field=field,
|
142
|
-
organism=
|
143
|
-
source=kwargs.get("source"),
|
142
|
+
organism=organism,
|
144
143
|
mute=True,
|
144
|
+
public_aware=False,
|
145
|
+
return_mapper=True,
|
145
146
|
)
|
146
|
-
|
147
|
-
|
148
|
-
syn_msg = ""
|
149
|
-
if len(syn_mapper) > 0:
|
150
|
-
s = "" if len(syn_mapper) == 1 else "s"
|
151
|
-
names = list(syn_mapper.keys())
|
152
|
-
print_values = colors.green(_print_values(names))
|
153
|
-
syn_msg = (
|
154
|
-
"loaded"
|
155
|
-
f" {colors.green(f'{len(syn_mapper)} {model.__name__} record{s}')}"
|
156
|
-
f" matching {colors.italic('synonyms')}: {print_values}"
|
157
|
-
)
|
158
|
-
iterable_idx = iterable_idx.to_frame().rename(index=syn_mapper).index
|
159
|
-
|
160
|
-
# get all existing records in the db
|
161
|
-
# if necessary, create records for the values in kwargs
|
162
|
-
# k:v -> k:v_record
|
163
|
-
# kwargs is used to deal with organism
|
164
|
-
condition.update({f"{field.field.name}__in": iterable_idx.values})
|
165
|
-
|
166
|
-
query_set = model.filter(**condition)
|
167
|
-
records = query_set.list()
|
147
|
+
iterable_idx = iterable_idx.to_frame().rename(index=syn_mapper).index
|
168
148
|
|
169
149
|
# now we have to sort the list of queried records
|
170
150
|
# preserved = Case(
|
@@ -177,16 +157,33 @@ def get_existing_records(
|
|
177
157
|
# records = query_set.order_by(preserved).list()
|
178
158
|
|
179
159
|
# log validated terms
|
180
|
-
|
160
|
+
is_validated = model.validate(
|
161
|
+
iterable_idx, field=field, organism=organism, mute=True
|
162
|
+
)
|
163
|
+
if len(is_validated) > 0:
|
164
|
+
validated = iterable_idx[is_validated]
|
165
|
+
else:
|
166
|
+
validated = []
|
181
167
|
msg = ""
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
168
|
+
syn_msg = ""
|
169
|
+
if not mute:
|
170
|
+
if len(validated) > 0:
|
171
|
+
s = "" if len(validated) == 1 else "s"
|
172
|
+
print_values = colors.green(_print_values(validated))
|
173
|
+
msg = (
|
174
|
+
"loaded"
|
175
|
+
f" {colors.green(f'{len(validated)} {model.__name__} record{s}')}"
|
176
|
+
f" matching {colors.italic(f'{field.field.name}')}: {print_values}"
|
177
|
+
)
|
178
|
+
if len(syn_mapper) > 0:
|
179
|
+
s = "" if len(syn_mapper) == 1 else "s"
|
180
|
+
names = list(syn_mapper.keys())
|
181
|
+
print_values = colors.green(_print_values(names))
|
182
|
+
syn_msg = (
|
183
|
+
"loaded"
|
184
|
+
f" {colors.green(f'{len(syn_mapper)} {model.__name__} record{s}')}"
|
185
|
+
f" matching {colors.italic('synonyms')}: {print_values}"
|
186
|
+
)
|
190
187
|
|
191
188
|
# no logging if all values are validated
|
192
189
|
# logs if there are synonyms
|
@@ -197,20 +194,28 @@ def get_existing_records(
|
|
197
194
|
logger.success(syn_msg)
|
198
195
|
msg = ""
|
199
196
|
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
197
|
+
# get all existing records in the db
|
198
|
+
# if necessary, create records for the values in kwargs
|
199
|
+
# k:v -> k:v_record
|
200
|
+
query = {f"{field.field.name}__in": iterable_idx.values}
|
201
|
+
if organism is not None:
|
202
|
+
query["organism"] = organism
|
203
|
+
records = model.filter(**query).list()
|
204
204
|
|
205
|
-
|
205
|
+
if len(validated) == len(iterable_idx):
|
206
|
+
return records, [], msg
|
207
|
+
else:
|
208
|
+
nonval_values = iterable_idx.difference(validated)
|
209
|
+
return records, nonval_values, msg
|
206
210
|
|
207
211
|
|
208
212
|
def create_records_from_source(
|
209
213
|
iterable_idx: pd.Index,
|
210
214
|
field: StrField,
|
215
|
+
organism: Record | None = None,
|
216
|
+
source: Record | None = None,
|
211
217
|
msg: str = "",
|
212
218
|
mute: bool = False,
|
213
|
-
**kwargs,
|
214
219
|
):
|
215
220
|
model = field.field.model
|
216
221
|
records: list = []
|
@@ -221,19 +226,13 @@ def create_records_from_source(
|
|
221
226
|
# create the corresponding bionty object from model
|
222
227
|
try:
|
223
228
|
# TODO: more generic
|
224
|
-
|
225
|
-
if field.field.name == "ensembl_gene_id":
|
226
|
-
if iterable_idx[0].startswith("ENSG"):
|
227
|
-
organism = "human"
|
228
|
-
elif iterable_idx[0].startswith("ENSMUSG"):
|
229
|
-
organism = "mouse"
|
230
|
-
public_ontology = model.public(organism=organism, source=kwargs.get("source"))
|
229
|
+
public_ontology = model.public(organism=organism, source=source)
|
231
230
|
except Exception:
|
232
231
|
# for custom records that are not created from public sources
|
233
232
|
return records, iterable_idx
|
234
|
-
#
|
235
|
-
|
236
|
-
|
233
|
+
# get the default source
|
234
|
+
if source is None:
|
235
|
+
source = get_source_record(public_ontology, model)
|
237
236
|
|
238
237
|
# filter the columns in bionty df based on fields
|
239
238
|
bionty_df = filter_bionty_df_columns(model=model, public_ontology=public_ontology)
|
@@ -264,15 +263,17 @@ def create_records_from_source(
|
|
264
263
|
bionty_kwargs, multi_msg = _bulk_create_dicts_from_df(
|
265
264
|
keys=mapped_values, column_name=field.field.name, df=bionty_df
|
266
265
|
)
|
267
|
-
|
268
|
-
if "
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
266
|
+
|
267
|
+
if hasattr(model, "organism_id") and organism is None:
|
268
|
+
organism = _get_organism_record(field, source.organism, force=True)
|
269
|
+
|
270
|
+
create_kwargs = (
|
271
|
+
{"organism": organism, "source": source}
|
272
|
+
if organism is not None
|
273
|
+
else {"source": source}
|
274
|
+
)
|
274
275
|
for bk in bionty_kwargs:
|
275
|
-
records.append(model(**bk, **
|
276
|
+
records.append(model(**bk, **create_kwargs))
|
276
277
|
|
277
278
|
# number of records that matches field (not synonyms)
|
278
279
|
validated = result.validated
|
@@ -364,6 +365,13 @@ def _has_organism_field(registry: type[Record]) -> bool:
|
|
364
365
|
def _get_organism_record(
|
365
366
|
field: StrField, organism: str | Record, force: bool = False
|
366
367
|
) -> Record:
|
368
|
+
"""Get organism record.
|
369
|
+
|
370
|
+
Args:
|
371
|
+
field: the field to get the organism record for
|
372
|
+
organism: the organism to get the record for
|
373
|
+
force: whether to force fetching the organism record
|
374
|
+
"""
|
367
375
|
registry = field.field.model
|
368
376
|
check = True
|
369
377
|
if not force and hasattr(registry, "_ontology_id_field"):
|
@@ -375,8 +383,21 @@ def _get_organism_record(
|
|
375
383
|
if _has_organism_field(registry) and check:
|
376
384
|
from bionty._bionty import create_or_get_organism_record
|
377
385
|
|
386
|
+
if field and not isinstance(field, str):
|
387
|
+
field = field.field.name
|
388
|
+
|
378
389
|
organism_record = create_or_get_organism_record(
|
379
|
-
organism=organism, registry=registry
|
390
|
+
organism=organism, registry=registry, field=field
|
380
391
|
)
|
381
392
|
if organism_record is not None:
|
382
393
|
return organism_record
|
394
|
+
|
395
|
+
|
396
|
+
def _ensembl_prefix(id: str, field: StrField, organism: Record | None) -> str | None:
|
397
|
+
if field.field.name == "ensembl_gene_id" and organism is None:
|
398
|
+
if id.startswith("ENSG"):
|
399
|
+
organism = "human"
|
400
|
+
elif id.startswith("ENSMUSG"):
|
401
|
+
organism = "mouse"
|
402
|
+
|
403
|
+
return organism
|
lamindb/_query_set.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
from collections import UserList
|
4
|
-
from typing import TYPE_CHECKING,
|
4
|
+
from typing import TYPE_CHECKING, NamedTuple
|
5
5
|
|
6
6
|
import pandas as pd
|
7
7
|
from django.db import models
|
@@ -23,6 +23,8 @@ from lnschema_core.models import (
|
|
23
23
|
from lamindb.core.exceptions import DoesNotExist
|
24
24
|
|
25
25
|
if TYPE_CHECKING:
|
26
|
+
from collections.abc import Iterable
|
27
|
+
|
26
28
|
from lnschema_core.types import ListLike, StrField
|
27
29
|
|
28
30
|
|