lamindb 0.74.3__py3-none-any.whl → 0.75.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_artifact.py +85 -43
- lamindb/_can_validate.py +55 -20
- lamindb/_collection.py +36 -28
- lamindb/_curate.py +55 -44
- lamindb/_feature_set.py +5 -5
- lamindb/_filter.py +3 -3
- lamindb/_finish.py +29 -23
- lamindb/_from_values.py +41 -60
- lamindb/_is_versioned.py +1 -1
- lamindb/_parents.py +38 -13
- lamindb/_record.py +19 -20
- lamindb/_save.py +2 -2
- lamindb/_transform.py +27 -16
- lamindb/core/_data.py +14 -16
- lamindb/core/_feature_manager.py +34 -44
- lamindb/core/_label_manager.py +17 -19
- lamindb/core/_mapped_collection.py +1 -1
- lamindb/core/_run_context.py +6 -8
- lamindb/core/datasets/_core.py +7 -7
- lamindb/core/exceptions.py +11 -0
- lamindb/core/storage/__init__.py +1 -0
- lamindb/core/storage/_anndata_accessor.py +735 -0
- lamindb/core/storage/_backed_access.py +77 -747
- lamindb/core/storage/paths.py +9 -14
- lamindb/core/types.py +3 -0
- lamindb/core/versioning.py +1 -1
- lamindb/integrations/__init__.py +1 -0
- {lamindb-0.74.3.dist-info → lamindb-0.75.0.dist-info}/METADATA +5 -5
- lamindb-0.75.0.dist-info/RECORD +58 -0
- lamindb-0.74.3.dist-info/RECORD +0 -57
- {lamindb-0.74.3.dist-info → lamindb-0.75.0.dist-info}/LICENSE +0 -0
- {lamindb-0.74.3.dist-info → lamindb-0.75.0.dist-info}/WHEEL +0 -0
lamindb/_is_versioned.py
CHANGED
@@ -16,7 +16,7 @@ def _add_to_version_family(
|
|
16
16
|
):
|
17
17
|
old_uid = self.uid
|
18
18
|
new_uid, version = get_uid_from_old_version(is_new_version_of, version)
|
19
|
-
if self.__class__.__name__ == "Artifact" and self.
|
19
|
+
if self.__class__.__name__ == "Artifact" and self._key_is_virtual:
|
20
20
|
old_path = self.path
|
21
21
|
new_path = get_new_path_from_uid(
|
22
22
|
old_path=old_path, old_uid=old_uid, new_uid=new_uid
|
lamindb/_parents.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import builtins
|
4
|
-
from typing import TYPE_CHECKING
|
4
|
+
from typing import TYPE_CHECKING, Literal
|
5
5
|
|
6
6
|
import lamindb_setup as ln_setup
|
7
7
|
from lamin_utils import logger
|
@@ -10,7 +10,7 @@ from lnschema_core.models import HasParents, format_field_value
|
|
10
10
|
|
11
11
|
from lamindb._utils import attach_func_to_class_method
|
12
12
|
|
13
|
-
from ._record import
|
13
|
+
from ._record import get_name_field
|
14
14
|
|
15
15
|
if TYPE_CHECKING:
|
16
16
|
from lnschema_core.types import StrField
|
@@ -61,7 +61,7 @@ def view_parents(
|
|
61
61
|
distance: int = 5,
|
62
62
|
):
|
63
63
|
if field is None:
|
64
|
-
field =
|
64
|
+
field = get_name_field(self)
|
65
65
|
if not isinstance(field, str):
|
66
66
|
field = field.field.name
|
67
67
|
|
@@ -137,10 +137,14 @@ def view_lineage(data: Artifact | Collection, with_children: bool = True) -> Non
|
|
137
137
|
|
138
138
|
|
139
139
|
def _view_parents(
|
140
|
-
record: Record,
|
140
|
+
record: Record,
|
141
|
+
field: str,
|
142
|
+
with_children: bool = False,
|
143
|
+
distance: int = 100,
|
144
|
+
attr_name: Literal["parents", "predecessors"] = "parents",
|
141
145
|
):
|
142
146
|
"""Graph of parents."""
|
143
|
-
if not hasattr(record,
|
147
|
+
if not hasattr(record, attr_name):
|
144
148
|
raise NotImplementedError(
|
145
149
|
f"Parents view is not supported for {record.__class__.__name__}!"
|
146
150
|
)
|
@@ -149,13 +153,17 @@ def _view_parents(
|
|
149
153
|
|
150
154
|
df_edges = None
|
151
155
|
df_edges_parents = _df_edges_from_parents(
|
152
|
-
record=record, field=field, distance=distance
|
156
|
+
record=record, field=field, distance=distance, attr_name=attr_name
|
153
157
|
)
|
154
158
|
if df_edges_parents is not None:
|
155
159
|
df_edges = df_edges_parents
|
156
160
|
if with_children:
|
157
161
|
df_edges_children = _df_edges_from_parents(
|
158
|
-
record=record,
|
162
|
+
record=record,
|
163
|
+
field=field,
|
164
|
+
distance=distance,
|
165
|
+
children=True,
|
166
|
+
attr_name=attr_name,
|
159
167
|
)
|
160
168
|
if df_edges_children is not None:
|
161
169
|
if df_edges is not None:
|
@@ -197,12 +205,18 @@ def _view_parents(
|
|
197
205
|
_view(u)
|
198
206
|
|
199
207
|
|
200
|
-
def _get_parents(
|
208
|
+
def _get_parents(
|
209
|
+
record: Record,
|
210
|
+
field: str,
|
211
|
+
distance: int,
|
212
|
+
children: bool = False,
|
213
|
+
attr_name: Literal["parents", "predecessors"] = "parents",
|
214
|
+
):
|
201
215
|
"""Recursively get parent records within a distance."""
|
202
216
|
if children:
|
203
|
-
key =
|
217
|
+
key = attr_name
|
204
218
|
else:
|
205
|
-
key = "children"
|
219
|
+
key = "children" if attr_name == "parents" else "successors" # type: ignore
|
206
220
|
model = record.__class__
|
207
221
|
condition = f"{key}__{field}"
|
208
222
|
results = model.filter(**{condition: record.__getattribute__(field)}).all()
|
@@ -228,12 +242,23 @@ def _get_parents(record: Record, field: str, distance: int, children: bool = Fal
|
|
228
242
|
|
229
243
|
|
230
244
|
def _df_edges_from_parents(
|
231
|
-
record: Record,
|
245
|
+
record: Record,
|
246
|
+
field: str,
|
247
|
+
distance: int,
|
248
|
+
children: bool = False,
|
249
|
+
attr_name: Literal["parents", "predecessors"] = "parents",
|
232
250
|
):
|
233
251
|
"""Construct a DataFrame of edges as the input of graphviz.Digraph."""
|
234
|
-
|
252
|
+
if attr_name == "parents":
|
253
|
+
key = "children" if children else "parents"
|
254
|
+
else:
|
255
|
+
key = "successors" if children else "predecessors"
|
235
256
|
parents = _get_parents(
|
236
|
-
record=record,
|
257
|
+
record=record,
|
258
|
+
field=field,
|
259
|
+
distance=distance,
|
260
|
+
children=children,
|
261
|
+
attr_name=attr_name,
|
237
262
|
)
|
238
263
|
all = record.__class__.objects
|
239
264
|
records = parents | all.filter(id=record.id)
|
lamindb/_record.py
CHANGED
@@ -160,19 +160,22 @@ def from_values(
|
|
160
160
|
field: StrField | None = None,
|
161
161
|
create: bool = False,
|
162
162
|
organism: Record | str | None = None,
|
163
|
-
|
163
|
+
source: Record | None = None,
|
164
164
|
mute: bool = False,
|
165
165
|
) -> list[Record]:
|
166
166
|
"""{}""" # noqa: D415
|
167
|
-
|
168
|
-
|
167
|
+
from_source = True if cls.__module__.startswith("bionty.") else False
|
168
|
+
# if records from source is already saved in db, skip from_source
|
169
|
+
if isinstance(source, Record) and source.in_db:
|
170
|
+
from_source = False
|
171
|
+
field_str = get_name_field(cls, field=field)
|
169
172
|
return get_or_create_records(
|
170
173
|
iterable=values,
|
171
174
|
field=getattr(cls, field_str),
|
172
175
|
create=create,
|
173
|
-
|
176
|
+
from_source=from_source,
|
174
177
|
organism=organism,
|
175
|
-
|
178
|
+
source=source,
|
176
179
|
mute=mute,
|
177
180
|
)
|
178
181
|
|
@@ -284,7 +287,7 @@ def _lookup(
|
|
284
287
|
) -> NamedTuple:
|
285
288
|
"""{}""" # noqa: D415
|
286
289
|
queryset = _queryset(cls, using_key=using_key)
|
287
|
-
field =
|
290
|
+
field = get_name_field(orm=queryset.model, field=field)
|
288
291
|
|
289
292
|
return Lookup(
|
290
293
|
records=queryset,
|
@@ -293,7 +296,7 @@ def _lookup(
|
|
293
296
|
prefix="ln",
|
294
297
|
).lookup(
|
295
298
|
return_field=(
|
296
|
-
|
299
|
+
get_name_field(orm=queryset.model, field=return_field)
|
297
300
|
if return_field is not None
|
298
301
|
else None
|
299
302
|
)
|
@@ -311,7 +314,7 @@ def lookup(
|
|
311
314
|
return _lookup(cls=cls, field=field, return_field=return_field)
|
312
315
|
|
313
316
|
|
314
|
-
def
|
317
|
+
def get_name_field(
|
315
318
|
orm: Record | QuerySet | Manager,
|
316
319
|
*,
|
317
320
|
field: str | StrField | None = None,
|
@@ -321,14 +324,11 @@ def get_default_str_field(
|
|
321
324
|
orm = orm.model
|
322
325
|
model_field_names = [i.name for i in orm._meta.fields]
|
323
326
|
|
324
|
-
# set default field
|
327
|
+
# set to default name field
|
325
328
|
if field is None:
|
326
|
-
if orm
|
327
|
-
field = orm._meta.get_field(
|
328
|
-
elif orm._meta.model.__name__ == "User":
|
329
|
-
field = orm._meta.get_field("handle")
|
329
|
+
if hasattr(orm, "_name_field"):
|
330
|
+
field = orm._meta.get_field(orm._name_field)
|
330
331
|
elif "name" in model_field_names:
|
331
|
-
# by default use the name field
|
332
332
|
field = orm._meta.get_field("name")
|
333
333
|
else:
|
334
334
|
# first char or text field that doesn't contain "id"
|
@@ -339,7 +339,7 @@ def get_default_str_field(
|
|
339
339
|
field = i
|
340
340
|
break
|
341
341
|
|
342
|
-
# no default field can be found
|
342
|
+
# no default name field can be found
|
343
343
|
if field is None:
|
344
344
|
raise ValueError(
|
345
345
|
"please pass a Record string field, e.g., `CellType.name`!"
|
@@ -443,9 +443,8 @@ def update_fk_to_default_db(
|
|
443
443
|
|
444
444
|
FKBULK = [
|
445
445
|
"organism",
|
446
|
-
"
|
447
|
-
"
|
448
|
-
"source_code", # Transform
|
446
|
+
"source",
|
447
|
+
"_source_code_artifact", # Transform
|
449
448
|
"report", # Run
|
450
449
|
]
|
451
450
|
|
@@ -523,7 +522,7 @@ def save(self, *args, **kwargs) -> Record:
|
|
523
522
|
artifacts: list = []
|
524
523
|
if self.__class__.__name__ == "Collection" and self.id is not None:
|
525
524
|
# when creating a new collection without being able to access artifacts
|
526
|
-
artifacts = self.
|
525
|
+
artifacts = self.ordered_artifacts.list()
|
527
526
|
# transfer of the record to the default db with fk fields
|
528
527
|
result = transfer_to_default_db(self, using_key)
|
529
528
|
if result is not None:
|
@@ -538,7 +537,7 @@ def save(self, *args, **kwargs) -> Record:
|
|
538
537
|
logger.info("transfer artifacts")
|
539
538
|
for artifact in artifacts:
|
540
539
|
artifact.save()
|
541
|
-
self.
|
540
|
+
self.artifacts.add(*artifacts)
|
542
541
|
if hasattr(self, "labels"):
|
543
542
|
from copy import copy
|
544
543
|
|
lamindb/_save.py
CHANGED
@@ -85,9 +85,9 @@ def save(records: Iterable[Record], ignore_conflicts: bool | None = False) -> No
|
|
85
85
|
r for r in non_artifacts_new if hasattr(r, "_parents")
|
86
86
|
]
|
87
87
|
if len(non_artifacts_with_parents) > 0:
|
88
|
-
# this can only happen within
|
88
|
+
# this can only happen within bionty right now!!
|
89
89
|
# we might extend to core lamindb later
|
90
|
-
from
|
90
|
+
from bionty.core import add_ontology
|
91
91
|
|
92
92
|
add_ontology(non_artifacts_with_parents)
|
93
93
|
|
lamindb/_transform.py
CHANGED
@@ -1,11 +1,17 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
from typing import TYPE_CHECKING
|
4
|
+
|
5
|
+
from lamindb_setup.core._docs import doc_args
|
3
6
|
from lnschema_core.models import Run, Transform
|
4
|
-
from lnschema_core.types import TransformType
|
5
7
|
|
8
|
+
from ._parents import _view_parents
|
6
9
|
from ._run import delete_run_artifacts
|
7
10
|
from .core.versioning import process_is_new_version_of
|
8
11
|
|
12
|
+
if TYPE_CHECKING:
|
13
|
+
from lnschema_core.types import TransformType
|
14
|
+
|
9
15
|
|
10
16
|
def __init__(transform: Transform, *args, **kwargs):
|
11
17
|
if len(args) == len(transform._meta.concrete_fields):
|
@@ -18,9 +24,7 @@ def __init__(transform: Transform, *args, **kwargs):
|
|
18
24
|
)
|
19
25
|
(kwargs.pop("initial_version_id") if "initial_version_id" in kwargs else None)
|
20
26
|
version: str | None = kwargs.pop("version") if "version" in kwargs else None
|
21
|
-
type: TransformType | None = (
|
22
|
-
kwargs.pop("type") if "type" in kwargs else TransformType.pipeline
|
23
|
-
)
|
27
|
+
type: TransformType | None = kwargs.pop("type") if "type" in kwargs else "pipeline"
|
24
28
|
reference: str | None = kwargs.pop("reference") if "reference" in kwargs else None
|
25
29
|
reference_type: str | None = (
|
26
30
|
kwargs.pop("reference_type") if "reference_type" in kwargs else None
|
@@ -55,19 +59,13 @@ def __init__(transform: Transform, *args, **kwargs):
|
|
55
59
|
|
56
60
|
|
57
61
|
def delete(self) -> None:
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
self.latest_report = None
|
63
|
-
source_code = None
|
64
|
-
if self.source_code is not None:
|
65
|
-
source_code = self.source_code
|
66
|
-
self.source_code = None
|
67
|
-
if latest_report is not None or source_code is not None:
|
62
|
+
_source_code_artifact = None
|
63
|
+
if self._source_code_artifact is not None:
|
64
|
+
_source_code_artifact = self._source_code_artifact
|
65
|
+
self._source_code_artifact = None
|
68
66
|
self.save()
|
69
|
-
if
|
70
|
-
|
67
|
+
if _source_code_artifact is not None:
|
68
|
+
_source_code_artifact.delete(permanent=True)
|
71
69
|
# query all runs and delete their artifacts
|
72
70
|
runs = Run.filter(transform=self)
|
73
71
|
for run in runs:
|
@@ -78,10 +76,23 @@ def delete(self) -> None:
|
|
78
76
|
|
79
77
|
|
80
78
|
@property # type: ignore
|
79
|
+
@doc_args(Transform.latest_run.__doc__)
|
81
80
|
def latest_run(self) -> Run:
|
81
|
+
"""{}""" # noqa: D415
|
82
82
|
return self.runs.order_by("-started_at").first()
|
83
83
|
|
84
84
|
|
85
|
+
def view_lineage(self, with_successors: bool = False, distance: int = 5):
|
86
|
+
return _view_parents(
|
87
|
+
record=self,
|
88
|
+
field="name",
|
89
|
+
with_children=with_successors,
|
90
|
+
distance=distance,
|
91
|
+
attr_name="predecessors",
|
92
|
+
)
|
93
|
+
|
94
|
+
|
85
95
|
Transform.__init__ = __init__
|
86
96
|
Transform.delete = delete
|
87
97
|
Transform.latest_run = latest_run
|
98
|
+
Transform.view_lineage = view_lineage
|
lamindb/core/_data.py
CHANGED
@@ -20,7 +20,7 @@ from lnschema_core.models import (
|
|
20
20
|
|
21
21
|
from lamindb._parents import view_lineage
|
22
22
|
from lamindb._query_set import QuerySet
|
23
|
-
from lamindb._record import
|
23
|
+
from lamindb._record import get_name_field
|
24
24
|
from lamindb.core._settings import settings
|
25
25
|
|
26
26
|
from ._feature_manager import (
|
@@ -129,31 +129,29 @@ def describe(self: HasFeatures, print_types: bool = False):
|
|
129
129
|
# prefetch m-2-m relationships
|
130
130
|
self = (
|
131
131
|
self.__class__.objects.using(self._state.db)
|
132
|
-
.prefetch_related("feature_sets", "
|
132
|
+
.prefetch_related("feature_sets", "input_of_runs")
|
133
133
|
.get(id=self.id)
|
134
134
|
)
|
135
135
|
|
136
136
|
# provenance
|
137
137
|
if len(foreign_key_fields) > 0: # always True for Artifact and Collection
|
138
138
|
fields_values = [(field, getattr(self, field)) for field in foreign_key_fields]
|
139
|
-
type_str = (
|
140
|
-
|
141
|
-
if print_types
|
142
|
-
else ""
|
139
|
+
type_str = lambda attr: (
|
140
|
+
f": {attr.__class__.__get_name_with_schema__()}" if print_types else ""
|
143
141
|
)
|
144
142
|
related_msg = "".join(
|
145
143
|
[
|
146
|
-
f" .{field_name}{type_str(attr)} = {format_field_value(getattr(attr,
|
144
|
+
f" .{field_name}{type_str(attr)} = {format_field_value(getattr(attr, get_name_field(attr)))}\n"
|
147
145
|
for (field_name, attr) in fields_values
|
148
146
|
if attr is not None
|
149
147
|
]
|
150
148
|
)
|
151
149
|
prov_msg += related_msg
|
152
150
|
# input of
|
153
|
-
if self.id is not None and self.
|
154
|
-
values = [format_field_value(i.started_at) for i in self.
|
151
|
+
if self.id is not None and self.input_of_runs.exists():
|
152
|
+
values = [format_field_value(i.started_at) for i in self.input_of_runs.all()]
|
155
153
|
type_str = ": Run" if print_types else "" # type: ignore
|
156
|
-
prov_msg += f" .
|
154
|
+
prov_msg += f" .input_of_runs{type_str} = {values}\n"
|
157
155
|
if prov_msg:
|
158
156
|
msg += f" {colors.italic('Provenance')}\n"
|
159
157
|
msg += prov_msg
|
@@ -210,11 +208,11 @@ def get_labels(
|
|
210
208
|
).all()
|
211
209
|
if flat_names:
|
212
210
|
# returns a flat list of names
|
213
|
-
from lamindb._record import
|
211
|
+
from lamindb._record import get_name_field
|
214
212
|
|
215
213
|
values = []
|
216
214
|
for v in qs_by_registry.values():
|
217
|
-
values += v.list(
|
215
|
+
values += v.list(get_name_field(v))
|
218
216
|
return values
|
219
217
|
if len(registries_to_check) == 1 and registry in qs_by_registry:
|
220
218
|
return qs_by_registry[registry]
|
@@ -304,12 +302,12 @@ def add_labels(
|
|
304
302
|
if len(linked_labels) > 0:
|
305
303
|
labels_accessor.remove(*linked_labels)
|
306
304
|
labels_accessor.add(*records, through_defaults={"feature_id": feature.id})
|
307
|
-
|
308
|
-
feature_set_ids = [link.featureset_id for link in
|
305
|
+
links_feature_set = get_feature_set_links(self)
|
306
|
+
feature_set_ids = [link.featureset_id for link in links_feature_set.all()]
|
309
307
|
# get all linked features of type Feature
|
310
308
|
feature_sets = FeatureSet.filter(id__in=feature_set_ids).all()
|
311
309
|
{
|
312
|
-
|
310
|
+
links_feature_set.filter(featureset_id=feature_set.id)
|
313
311
|
.one()
|
314
312
|
.slot: feature_set.features.all()
|
315
313
|
for feature_set in feature_sets
|
@@ -415,7 +413,7 @@ def _track_run_input(
|
|
415
413
|
# generalize below for more than one data batch
|
416
414
|
if len(input_data) == 1:
|
417
415
|
if input_data[0].transform is not None:
|
418
|
-
run.transform.
|
416
|
+
run.transform.predecessors.add(input_data[0].transform)
|
419
417
|
|
420
418
|
|
421
419
|
HasFeatures.describe = describe
|
lamindb/core/_feature_manager.py
CHANGED
@@ -39,7 +39,7 @@ from lamindb._feature import FEATURE_TYPES, convert_numpy_dtype_to_lamin_feature
|
|
39
39
|
from lamindb._feature_set import DICT_KEYS_TYPE, FeatureSet
|
40
40
|
from lamindb._record import (
|
41
41
|
REGISTRY_UNIQUE_FIELD,
|
42
|
-
|
42
|
+
get_name_field,
|
43
43
|
transfer_fk_to_default_db_bulk,
|
44
44
|
transfer_to_default_db,
|
45
45
|
)
|
@@ -88,12 +88,12 @@ def get_feature_set_by_slot_(host) -> dict:
|
|
88
88
|
host_id_field = get_host_id_field(host)
|
89
89
|
kwargs = {host_id_field: host.id}
|
90
90
|
# otherwise, we need a query
|
91
|
-
|
91
|
+
links_feature_set = (
|
92
92
|
host.feature_sets.through.objects.using(host_db)
|
93
93
|
.filter(**kwargs)
|
94
94
|
.select_related("featureset")
|
95
95
|
)
|
96
|
-
return {fsl.slot: fsl.featureset for fsl in
|
96
|
+
return {fsl.slot: fsl.featureset for fsl in links_feature_set}
|
97
97
|
|
98
98
|
|
99
99
|
def get_label_links(
|
@@ -112,8 +112,8 @@ def get_label_links(
|
|
112
112
|
def get_feature_set_links(host: Artifact | Collection) -> QuerySet:
|
113
113
|
host_id_field = get_host_id_field(host)
|
114
114
|
kwargs = {host_id_field: host.id}
|
115
|
-
|
116
|
-
return
|
115
|
+
links_feature_set = host.feature_sets.through.objects.filter(**kwargs)
|
116
|
+
return links_feature_set
|
117
117
|
|
118
118
|
|
119
119
|
def get_link_attr(link: LinkORM | type[LinkORM], data: HasFeatures) -> str:
|
@@ -122,12 +122,7 @@ def get_link_attr(link: LinkORM | type[LinkORM], data: HasFeatures) -> str:
|
|
122
122
|
link_model_name == "ModelBase" or link_model_name == "RecordMeta"
|
123
123
|
): # we passed the type of the link
|
124
124
|
link_model_name = link.__name__
|
125
|
-
|
126
|
-
if link_attr == "ExperimentalFactor":
|
127
|
-
link_attr = "experimental_factor"
|
128
|
-
else:
|
129
|
-
link_attr = link_attr.lower()
|
130
|
-
return link_attr
|
125
|
+
return link_model_name.replace(data.__class__.__name__, "").lower()
|
131
126
|
|
132
127
|
|
133
128
|
# Custom aggregation for SQLite
|
@@ -182,14 +177,14 @@ def print_features(
|
|
182
177
|
non_labels_msg = ""
|
183
178
|
if self.id is not None and self.__class__ == Artifact or self.__class__ == Run:
|
184
179
|
attr_name = "param" if print_params else "feature"
|
185
|
-
|
186
|
-
getattr(self, f"{attr_name}_values")
|
180
|
+
_feature_values = (
|
181
|
+
getattr(self, f"_{attr_name}_values")
|
187
182
|
.values(f"{attr_name}__name", f"{attr_name}__dtype")
|
188
183
|
.annotate(values=custom_aggregate("value", self._state.db))
|
189
184
|
.order_by(f"{attr_name}__name")
|
190
185
|
)
|
191
|
-
if len(
|
192
|
-
for fv in
|
186
|
+
if len(_feature_values) > 0:
|
187
|
+
for fv in _feature_values:
|
193
188
|
feature_name = fv[f"{attr_name}__name"]
|
194
189
|
feature_dtype = fv[f"{attr_name}__dtype"]
|
195
190
|
values = fv["values"]
|
@@ -217,7 +212,7 @@ def print_features(
|
|
217
212
|
for slot, feature_set in get_feature_set_by_slot_(self).items():
|
218
213
|
features = feature_set.members
|
219
214
|
# features.first() is a lot slower than features[0] here
|
220
|
-
name_field =
|
215
|
+
name_field = get_name_field(features[0])
|
221
216
|
feature_names = list(features.values_list(name_field, flat=True)[:20])
|
222
217
|
type_str = f": {feature_set.registry}" if print_types else ""
|
223
218
|
feature_set_msg += (
|
@@ -246,7 +241,7 @@ def parse_feature_sets_from_anndata(
|
|
246
241
|
from lamindb.core.storage._backed_access import backed_access
|
247
242
|
|
248
243
|
using_key = settings._using_key
|
249
|
-
data_parse = backed_access(filepath, using_key)
|
244
|
+
data_parse = backed_access(filepath, using_key=using_key)
|
250
245
|
else:
|
251
246
|
data_parse = ad.read_h5ad(filepath, backed="r")
|
252
247
|
type = "float"
|
@@ -316,13 +311,13 @@ def infer_feature_type_convert_json(
|
|
316
311
|
if len(value) > 0: # type: ignore
|
317
312
|
first_element_type = type(next(iter(value)))
|
318
313
|
if all(isinstance(elem, first_element_type) for elem in value):
|
319
|
-
if first_element_type
|
314
|
+
if first_element_type is bool:
|
320
315
|
return f"list[{FEATURE_TYPES['bool']}]", value
|
321
|
-
elif first_element_type
|
316
|
+
elif first_element_type is int:
|
322
317
|
return f"list[{FEATURE_TYPES['int']}]", value
|
323
|
-
elif first_element_type
|
318
|
+
elif first_element_type is float:
|
324
319
|
return f"list[{FEATURE_TYPES['float']}]", value
|
325
|
-
elif first_element_type
|
320
|
+
elif first_element_type is str:
|
326
321
|
if str_as_ulabel:
|
327
322
|
return FEATURE_TYPES["str"] + "[ULabel]", value
|
328
323
|
else:
|
@@ -390,7 +385,7 @@ def filter(cls, **expression) -> QuerySet:
|
|
390
385
|
feature = features.get(name=normalized_key)
|
391
386
|
if not feature.dtype.startswith("cat"):
|
392
387
|
feature_value = value_model.filter(feature=feature, value=value).one()
|
393
|
-
new_expression["
|
388
|
+
new_expression["_feature_values"] = feature_value
|
394
389
|
else:
|
395
390
|
if isinstance(value, str):
|
396
391
|
label = ULabel.filter(name=value).one()
|
@@ -478,7 +473,7 @@ def _add_values(
|
|
478
473
|
)
|
479
474
|
# figure out which of the values go where
|
480
475
|
features_labels = defaultdict(list)
|
481
|
-
|
476
|
+
_feature_values = []
|
482
477
|
not_validated_values = []
|
483
478
|
for key, value in features_values.items():
|
484
479
|
feature = model.filter(name=key).one()
|
@@ -508,7 +503,7 @@ def _add_values(
|
|
508
503
|
feature_value = value_model.filter(**filter_kwargs).one_or_none()
|
509
504
|
if feature_value is None:
|
510
505
|
feature_value = value_model(**filter_kwargs)
|
511
|
-
|
506
|
+
_feature_values.append(feature_value)
|
512
507
|
else:
|
513
508
|
if isinstance(value, Record) or (
|
514
509
|
isinstance(value, Iterable) and isinstance(next(iter(value)), Record)
|
@@ -578,7 +573,7 @@ def _add_values(
|
|
578
573
|
except Exception:
|
579
574
|
save(links, ignore_conflicts=True)
|
580
575
|
# now deal with links that were previously saved without a feature_id
|
581
|
-
|
576
|
+
links_saved = LinkORM.filter(
|
582
577
|
**{
|
583
578
|
"artifact_id": self._host.id,
|
584
579
|
f"{field_name}__in": [
|
@@ -586,7 +581,7 @@ def _add_values(
|
|
586
581
|
],
|
587
582
|
}
|
588
583
|
)
|
589
|
-
for link in
|
584
|
+
for link in links_saved.all():
|
590
585
|
# TODO: also check for inconsistent features
|
591
586
|
if link.feature_id is None:
|
592
587
|
link.feature_id = [
|
@@ -595,13 +590,13 @@ def _add_values(
|
|
595
590
|
if l.id == getattr(link, field_name)
|
596
591
|
][0]
|
597
592
|
link.save()
|
598
|
-
if
|
599
|
-
save(
|
593
|
+
if _feature_values:
|
594
|
+
save(_feature_values)
|
600
595
|
if is_param:
|
601
|
-
LinkORM = self._host.
|
596
|
+
LinkORM = self._host._param_values.through
|
602
597
|
valuefield_id = "paramvalue_id"
|
603
598
|
else:
|
604
|
-
LinkORM = self._host.
|
599
|
+
LinkORM = self._host._feature_values.through
|
605
600
|
valuefield_id = "featurevalue_id"
|
606
601
|
links = [
|
607
602
|
LinkORM(
|
@@ -610,7 +605,7 @@ def _add_values(
|
|
610
605
|
valuefield_id: feature_value.id,
|
611
606
|
}
|
612
607
|
)
|
613
|
-
for feature_value in
|
608
|
+
for feature_value in _feature_values
|
614
609
|
]
|
615
610
|
# a link might already exist, to avoid raising a unique constraint
|
616
611
|
# error, ignore_conflicts
|
@@ -683,10 +678,10 @@ def _add_set_from_df(
|
|
683
678
|
):
|
684
679
|
"""Add feature set corresponding to column names of DataFrame."""
|
685
680
|
if isinstance(self._host, Artifact):
|
686
|
-
assert self._host.
|
681
|
+
assert self._host._accessor == "DataFrame" # noqa: S101
|
687
682
|
else:
|
688
683
|
# Collection
|
689
|
-
assert self._host.artifact.
|
684
|
+
assert self._host.artifact._accessor == "DataFrame" # noqa: S101
|
690
685
|
|
691
686
|
# parse and register features
|
692
687
|
registry = field.field.model
|
@@ -714,7 +709,7 @@ def _add_set_from_anndata(
|
|
714
709
|
):
|
715
710
|
"""Add features from AnnData."""
|
716
711
|
if isinstance(self._host, Artifact):
|
717
|
-
assert self._host.
|
712
|
+
assert self._host._accessor == "AnnData" # noqa: S101
|
718
713
|
else:
|
719
714
|
raise NotImplementedError()
|
720
715
|
|
@@ -744,7 +739,7 @@ def _add_set_from_mudata(
|
|
744
739
|
if obs_fields is None:
|
745
740
|
obs_fields = {}
|
746
741
|
if isinstance(self._host, Artifact):
|
747
|
-
assert self._host.
|
742
|
+
assert self._host._accessor == "MuData" # noqa: S101
|
748
743
|
else:
|
749
744
|
raise NotImplementedError()
|
750
745
|
|
@@ -781,17 +776,12 @@ def _add_from(self, data: HasFeatures):
|
|
781
776
|
registry = members[0].__class__
|
782
777
|
# note here the features are transferred based on an unique field
|
783
778
|
field = REGISTRY_UNIQUE_FIELD.get(registry.__name__.lower(), "uid")
|
784
|
-
|
785
|
-
|
786
|
-
field = "ontology_id"
|
787
|
-
elif hasattr(registry, "ensembl_gene_id"):
|
788
|
-
field = "ensembl_gene_id"
|
789
|
-
elif hasattr(registry, "uniprotkb_id"):
|
790
|
-
field = "uniprotkb_id"
|
779
|
+
if hasattr(registry, "_ontology_id_field"):
|
780
|
+
field = registry._ontology_id_field
|
791
781
|
# this will be e.g. be a list of ontology_ids or uids
|
792
782
|
member_uids = list(members.values_list(field, flat=True))
|
793
783
|
# create records from ontology_id
|
794
|
-
if
|
784
|
+
if hasattr(registry, "_ontology_id_field") and len(member_uids) > 0:
|
795
785
|
# create from bionty
|
796
786
|
save(registry.from_values(member_uids, field=field))
|
797
787
|
validated = registry.validate(member_uids, field=field, mute=True)
|
@@ -816,7 +806,7 @@ def _add_from(self, data: HasFeatures):
|
|
816
806
|
member_uids, field=getattr(registry, field)
|
817
807
|
)
|
818
808
|
if feature_set_self is None:
|
819
|
-
if hasattr(registry, "
|
809
|
+
if hasattr(registry, "organism_id"):
|
820
810
|
logger.warning(
|
821
811
|
f"FeatureSet is not transferred, check if organism is set correctly: {feature_set}"
|
822
812
|
)
|