lamindb 0.71.2__py3-none-any.whl → 0.72.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +2 -2
- lamindb/_annotate.py +6 -10
- lamindb/_artifact.py +24 -10
- lamindb/_can_validate.py +9 -3
- lamindb/_collection.py +7 -7
- lamindb/_feature.py +53 -45
- lamindb/_feature_set.py +37 -74
- lamindb/_from_values.py +27 -8
- lamindb/_query_manager.py +6 -1
- lamindb/_registry.py +60 -100
- lamindb/_run.py +0 -2
- lamindb/_save.py +28 -11
- lamindb/core/__init__.py +4 -0
- lamindb/core/_data.py +56 -30
- lamindb/core/_feature_manager.py +159 -64
- lamindb/core/_label_manager.py +53 -38
- lamindb/core/_run_context.py +24 -1
- lamindb/core/datasets/_core.py +10 -18
- lamindb/core/schema.py +53 -0
- {lamindb-0.71.2.dist-info → lamindb-0.72.0.dist-info}/METADATA +7 -6
- {lamindb-0.71.2.dist-info → lamindb-0.72.0.dist-info}/RECORD +23 -22
- {lamindb-0.71.2.dist-info → lamindb-0.72.0.dist-info}/LICENSE +0 -0
- {lamindb-0.71.2.dist-info → lamindb-0.72.0.dist-info}/WHEEL +0 -0
lamindb/core/_feature_manager.py
CHANGED
@@ -1,13 +1,21 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
from itertools import compress
|
4
|
-
from typing import TYPE_CHECKING, Iterable
|
4
|
+
from typing import TYPE_CHECKING, Iterable
|
5
5
|
|
6
6
|
import anndata as ad
|
7
7
|
from anndata import AnnData
|
8
8
|
from lamin_utils import colors, logger
|
9
9
|
from lamindb_setup.core.upath import create_path
|
10
|
-
from lnschema_core.models import
|
10
|
+
from lnschema_core.models import (
|
11
|
+
Artifact,
|
12
|
+
Collection,
|
13
|
+
Data,
|
14
|
+
Feature,
|
15
|
+
FeatureValue,
|
16
|
+
Registry,
|
17
|
+
ULabel,
|
18
|
+
)
|
11
19
|
|
12
20
|
from lamindb._feature import convert_numpy_dtype_to_lamin_feature_type
|
13
21
|
from lamindb._feature_set import FeatureSet
|
@@ -18,6 +26,7 @@ from lamindb._registry import (
|
|
18
26
|
transfer_to_default_db,
|
19
27
|
)
|
20
28
|
from lamindb._save import save
|
29
|
+
from lamindb.core.exceptions import ValidationError
|
21
30
|
from lamindb.core.storage import LocalPathClasses
|
22
31
|
|
23
32
|
from ._settings import settings
|
@@ -41,8 +50,8 @@ def get_accessor_by_orm(host: Artifact | Collection) -> dict:
|
|
41
50
|
field.related_model.__get_name_with_schema__(): field.name
|
42
51
|
for field in host._meta.related_objects
|
43
52
|
}
|
44
|
-
dictionary["
|
45
|
-
dictionary["
|
53
|
+
dictionary["Feature"] = "features"
|
54
|
+
dictionary["ULabel"] = "ulabels"
|
46
55
|
return dictionary
|
47
56
|
|
48
57
|
|
@@ -57,15 +66,12 @@ def get_feature_set_by_slot(host) -> dict:
|
|
57
66
|
host_id_field = get_host_id_field(host)
|
58
67
|
kwargs = {host_id_field: host.id}
|
59
68
|
# otherwise, we need a query
|
60
|
-
feature_set_links =
|
61
|
-
|
69
|
+
feature_set_links = (
|
70
|
+
host.feature_sets.through.objects.using(host_db)
|
71
|
+
.filter(**kwargs)
|
72
|
+
.select_related("featureset")
|
62
73
|
)
|
63
|
-
return {
|
64
|
-
feature_set_link.slot: FeatureSet.objects.using(host_db).get(
|
65
|
-
id=feature_set_link.feature_set_id
|
66
|
-
)
|
67
|
-
for feature_set_link in feature_set_links
|
68
|
-
}
|
74
|
+
return {fsl.slot: fsl.featureset for fsl in feature_set_links}
|
69
75
|
|
70
76
|
|
71
77
|
def get_label_links(
|
@@ -74,7 +80,7 @@ def get_label_links(
|
|
74
80
|
host_id_field = get_host_id_field(host)
|
75
81
|
kwargs = {host_id_field: host.id, "feature_id": feature.id}
|
76
82
|
link_records = (
|
77
|
-
getattr(host, host.features.
|
83
|
+
getattr(host, host.features.accessor_by_orm[registry])
|
78
84
|
.through.objects.using(host._state.db)
|
79
85
|
.filter(**kwargs)
|
80
86
|
)
|
@@ -93,48 +99,48 @@ def print_features(self: Data) -> str:
|
|
93
99
|
|
94
100
|
from ._data import format_repr
|
95
101
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
if feature_set.registry != "core.Feature":
|
102
|
+
messages = []
|
103
|
+
for slot, feature_set in get_feature_set_by_slot(self).items():
|
104
|
+
if feature_set.registry != "Feature":
|
100
105
|
features = feature_set.members
|
106
|
+
# features.first() is a lot slower than features[0] here
|
101
107
|
name_field = get_default_str_field(features[0])
|
102
|
-
feature_names =
|
103
|
-
|
108
|
+
feature_names = list(features.values_list(name_field, flat=True)[:30])
|
109
|
+
messages.append(
|
104
110
|
f" {colors.bold(slot)}: {format_repr(feature_set, exclude='hash')}\n"
|
105
111
|
)
|
106
112
|
print_values = _print_values(feature_names, n=20)
|
107
|
-
|
113
|
+
messages.append(f" {print_values}\n")
|
108
114
|
else:
|
109
|
-
|
110
|
-
|
115
|
+
features_lookup = {
|
116
|
+
f.name: f for f in Feature.objects.using(self._state.db).filter().all()
|
117
|
+
}
|
118
|
+
messages.append(
|
111
119
|
f" {colors.bold(slot)}: {format_repr(feature_set, exclude='hash')}\n"
|
112
120
|
)
|
113
|
-
for
|
114
|
-
if
|
115
|
-
labels = self.labels.get(
|
116
|
-
features_lookup.get(row["name"]), mute=True
|
117
|
-
)
|
121
|
+
for name, dtype in feature_set.features.values_list("name", "dtype"):
|
122
|
+
if dtype.startswith("cat["):
|
123
|
+
labels = self.labels.get(features_lookup.get(name), mute=True)
|
118
124
|
indent = ""
|
119
125
|
if isinstance(labels, dict):
|
120
|
-
|
126
|
+
messages.append(f" 🔗 {name} ({dtype})\n")
|
121
127
|
indent = " "
|
122
128
|
else:
|
123
|
-
labels = {
|
124
|
-
for registry,
|
125
|
-
|
126
|
-
|
127
|
-
|
129
|
+
labels = {dtype: labels}
|
130
|
+
for registry, registry_labels in labels.items():
|
131
|
+
field = get_default_str_field(registry_labels)
|
132
|
+
values_list = registry_labels.values_list(field, flat=True)
|
133
|
+
count_str = f"{feature_set.n}, {colors.italic(f'{registry}')}"
|
134
|
+
print_values = _print_values(values_list[:20], n=10)
|
128
135
|
msg_objects = (
|
129
|
-
f"{indent} 🔗 {
|
130
|
-
f" {print_values}\n"
|
136
|
+
f"{indent} 🔗 {name} ({count_str}):" f" {print_values}\n"
|
131
137
|
)
|
132
|
-
|
138
|
+
messages.append(msg_objects)
|
133
139
|
else:
|
134
|
-
|
135
|
-
if
|
136
|
-
|
137
|
-
return
|
140
|
+
messages.append(f" {name} ({dtype})\n")
|
141
|
+
if messages:
|
142
|
+
messages.insert(0, f"{colors.green('Features')}:\n")
|
143
|
+
return "".join(messages)
|
138
144
|
|
139
145
|
|
140
146
|
def parse_feature_sets_from_anndata(
|
@@ -171,6 +177,7 @@ def parse_feature_sets_from_anndata(
|
|
171
177
|
type=type,
|
172
178
|
mute=mute,
|
173
179
|
organism=organism,
|
180
|
+
raise_validation_error=False,
|
174
181
|
)
|
175
182
|
if feature_set_var is not None:
|
176
183
|
feature_sets["var"] = feature_set_var
|
@@ -204,39 +211,118 @@ class FeatureManager:
|
|
204
211
|
|
205
212
|
def __init__(self, host: Artifact | Collection):
|
206
213
|
self._host = host
|
207
|
-
self._feature_set_by_slot =
|
208
|
-
self._accessor_by_orm =
|
214
|
+
self._feature_set_by_slot = None
|
215
|
+
self._accessor_by_orm = None
|
209
216
|
|
210
217
|
def __repr__(self) -> str:
|
211
|
-
if len(self.
|
218
|
+
if len(self.feature_set_by_slot) > 0:
|
212
219
|
return print_features(self._host)
|
213
220
|
else:
|
214
221
|
return "no linked features"
|
215
222
|
|
216
223
|
def __getitem__(self, slot) -> QuerySet:
|
217
|
-
if slot not in self.
|
224
|
+
if slot not in self.feature_set_by_slot:
|
218
225
|
raise ValueError(
|
219
226
|
f"No linked feature set for slot: {slot}\nDid you get validation"
|
220
227
|
" warnings? Only features that match registered features get validated"
|
221
228
|
" and linked."
|
222
229
|
)
|
223
|
-
feature_set = self.
|
230
|
+
feature_set = self.feature_set_by_slot[slot]
|
224
231
|
orm_name = feature_set.registry
|
225
232
|
if hasattr(feature_set, "_features"):
|
226
233
|
# feature set is not yet saved
|
227
234
|
# need to think about turning this into a queryset
|
228
235
|
return feature_set._features
|
229
236
|
else:
|
230
|
-
return getattr(feature_set, self.
|
237
|
+
return getattr(feature_set, self.accessor_by_orm[orm_name]).all()
|
238
|
+
|
239
|
+
@property
|
240
|
+
def feature_set_by_slot(self):
|
241
|
+
"""Feature sets by slot."""
|
242
|
+
if self._feature_set_by_slot is None:
|
243
|
+
self._feature_set_by_slot = get_feature_set_by_slot(self._host)
|
244
|
+
return self._feature_set_by_slot
|
245
|
+
|
246
|
+
@property
|
247
|
+
def accessor_by_orm(self):
|
248
|
+
"""Accessor by ORM."""
|
249
|
+
if self._accessor_by_orm is None:
|
250
|
+
self._accessor_by_orm = get_accessor_by_orm(self._host)
|
251
|
+
return self._accessor_by_orm
|
252
|
+
|
253
|
+
def add(
|
254
|
+
self,
|
255
|
+
features_values: dict[str, str | int | float | bool],
|
256
|
+
slot: str | None = None,
|
257
|
+
feature_field: FieldAttr = Feature.name,
|
258
|
+
):
|
259
|
+
"""Add features stratified by slot.
|
231
260
|
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
261
|
+
Args:
|
262
|
+
features_values: A dictionary of features & values. You can also
|
263
|
+
pass `{feature_identifier: None}` to skip annotation by values.
|
264
|
+
slot: The access slot of the feature sets in the artifact. For
|
265
|
+
instance, `.columns` for `DataFrame` or `.var` or `.obs` for
|
266
|
+
`AnnData`.
|
267
|
+
feature_field: The field of a reference registry to map values.
|
268
|
+
"""
|
269
|
+
if slot is None:
|
270
|
+
slot = "external"
|
271
|
+
keys = features_values.keys()
|
272
|
+
features_values.values()
|
273
|
+
# what if the feature is already part of a linked feature set?
|
274
|
+
# what if artifact annotation by features through link tables and through feature sets
|
275
|
+
# differs?
|
276
|
+
feature_set = FeatureSet.from_values(keys, field=feature_field)
|
277
|
+
self._host.features.add_feature_set(feature_set, slot)
|
278
|
+
# now figure out which of the values go where
|
279
|
+
features_labels = []
|
280
|
+
feature_values = []
|
281
|
+
for key, value in features_values.items():
|
282
|
+
# TODO: use proper field in .get() below
|
283
|
+
feature = feature_set.features.get(name=key)
|
284
|
+
if feature.dtype == "number":
|
285
|
+
if not (isinstance(value, int) or isinstance(value, float)):
|
286
|
+
raise TypeError(
|
287
|
+
f"Value for feature '{key}' with type {feature.dtype} must be a number"
|
288
|
+
)
|
289
|
+
elif feature.dtype == "cat":
|
290
|
+
if not (isinstance(value, str) or isinstance(value, Registry)):
|
291
|
+
raise TypeError(
|
292
|
+
f"Value for feature '{key}' with type '{feature.dtype}' must be a string or record."
|
293
|
+
)
|
294
|
+
elif feature.dtype == "bool":
|
295
|
+
assert isinstance(value, bool)
|
296
|
+
if feature.dtype == "cat":
|
297
|
+
if isinstance(value, Registry):
|
298
|
+
assert not value._state.adding
|
299
|
+
label_record = value
|
300
|
+
assert isinstance(label_record, ULabel)
|
301
|
+
else:
|
302
|
+
label_record = ULabel.filter(name=value).one_or_none()
|
303
|
+
if label_record is None:
|
304
|
+
raise ValidationError(f"Label '{value}' not found in ln.ULabel")
|
305
|
+
features_labels.append((feature, label_record))
|
306
|
+
else:
|
307
|
+
feature_values.append(FeatureValue(feature=feature, value=value))
|
308
|
+
# bulk add all links to ArtifactULabel
|
309
|
+
if features_labels:
|
310
|
+
LinkORM = self._host.ulabels.through
|
311
|
+
links = [
|
312
|
+
LinkORM(
|
313
|
+
artifact_id=self._host.id, feature_id=feature.id, ulabel_id=label.id
|
314
|
+
)
|
315
|
+
for (feature, label) in features_labels
|
316
|
+
]
|
317
|
+
LinkORM.objects.bulk_create(links, ignore_conflicts=True)
|
318
|
+
if feature_values:
|
319
|
+
save(feature_values)
|
320
|
+
LinkORM = self._host.feature_values.through
|
321
|
+
links = [
|
322
|
+
LinkORM(artifact_id=self._host.id, featurevalue_id=feature_value.id)
|
323
|
+
for feature_value in feature_values
|
324
|
+
]
|
325
|
+
LinkORM.objects.bulk_create(links)
|
240
326
|
|
241
327
|
def add_from_df(self, field: FieldAttr = Feature.name, organism: str | None = None):
|
242
328
|
"""Add features from DataFrame."""
|
@@ -341,7 +427,7 @@ class FeatureManager:
|
|
341
427
|
host_id_field = get_host_id_field(self._host)
|
342
428
|
kwargs = {
|
343
429
|
host_id_field: self._host.id,
|
344
|
-
"
|
430
|
+
"featureset": feature_set,
|
345
431
|
"slot": slot,
|
346
432
|
}
|
347
433
|
link_record = (
|
@@ -351,26 +437,36 @@ class FeatureManager:
|
|
351
437
|
)
|
352
438
|
if link_record is None:
|
353
439
|
self._host.feature_sets.through(**kwargs).save(using=host_db)
|
354
|
-
|
440
|
+
if slot in self.feature_set_by_slot:
|
441
|
+
logger.debug(f"replaced existing {slot} feature set")
|
442
|
+
# this _feature_set_by_slot here is private
|
443
|
+
self._feature_set_by_slot[slot] = feature_set # type: ignore
|
355
444
|
|
356
445
|
def _add_from(self, data: Data, parents: bool = True):
|
357
446
|
"""Transfer features from a artifact or collection."""
|
358
447
|
using_key = settings._using_key
|
359
|
-
for slot, feature_set in data.features.
|
448
|
+
for slot, feature_set in data.features.feature_set_by_slot.items():
|
449
|
+
print(slot)
|
360
450
|
members = feature_set.members
|
361
|
-
if members
|
451
|
+
if len(members) == 0:
|
362
452
|
continue
|
363
453
|
registry = members[0].__class__
|
364
454
|
# note here the features are transferred based on an unique field
|
365
455
|
field = REGISTRY_UNIQUE_FIELD.get(registry.__name__.lower(), "uid")
|
456
|
+
# TODO: get a default ID field for the registry
|
366
457
|
if hasattr(registry, "ontology_id") and parents:
|
367
458
|
field = "ontology_id"
|
459
|
+
elif hasattr(registry, "ensembl_gene_id"):
|
460
|
+
field = "ensembl_gene_id"
|
461
|
+
elif hasattr(registry, "uniprotkb_id"):
|
462
|
+
field = "uniprotkb_id"
|
463
|
+
|
368
464
|
if registry.__get_name_with_schema__() == "bionty.Organism":
|
369
465
|
parents = False
|
370
466
|
# this will be e.g. be a list of ontology_ids or uids
|
371
467
|
member_uids = list(members.values_list(field, flat=True))
|
372
468
|
# create records from ontology_id in order to populate parents
|
373
|
-
if field == "ontology_id" and len(member_uids) > 0:
|
469
|
+
if field == "ontology_id" and len(member_uids) > 0 and parents:
|
374
470
|
# create from bionty
|
375
471
|
records = registry.from_values(member_uids, field=field)
|
376
472
|
if len(records) > 0:
|
@@ -378,8 +474,9 @@ class FeatureManager:
|
|
378
474
|
validated = registry.validate(member_uids, field=field, mute=True)
|
379
475
|
new_members_uids = list(compress(member_uids, ~validated))
|
380
476
|
new_members = members.filter(**{f"{field}__in": new_members_uids}).all()
|
381
|
-
|
382
|
-
|
477
|
+
n_new_members = len(new_members)
|
478
|
+
if n_new_members > 0:
|
479
|
+
mute = True if n_new_members > 10 else False
|
383
480
|
# transfer foreign keys needs to be run before transfer to default db
|
384
481
|
transfer_fk_to_default_db_bulk(new_members, using_key)
|
385
482
|
for feature in new_members:
|
@@ -390,9 +487,7 @@ class FeatureManager:
|
|
390
487
|
transfer_to_default_db(
|
391
488
|
feature, using_key, mute=mute, transfer_fk=False
|
392
489
|
)
|
393
|
-
logger.info(
|
394
|
-
f"saving {new_members.count()} new {registry.__name__} records"
|
395
|
-
)
|
490
|
+
logger.info(f"saving {n_new_members} new {registry.__name__} records")
|
396
491
|
save(new_members, parents=parents)
|
397
492
|
|
398
493
|
# create a new feature set from feature values using the same uid
|
lamindb/core/_label_manager.py
CHANGED
@@ -4,9 +4,8 @@ from typing import TYPE_CHECKING, Dict
|
|
4
4
|
|
5
5
|
import numpy as np
|
6
6
|
from lamin_utils import colors, logger
|
7
|
-
from lnschema_core.models import Artifact, Collection, Data, Feature, Registry
|
7
|
+
from lnschema_core.models import Artifact, Collection, Data, Feature, LinkORM, Registry
|
8
8
|
|
9
|
-
from lamindb._feature_set import dict_related_model_to_related_name
|
10
9
|
from lamindb._from_values import _print_values
|
11
10
|
from lamindb._registry import (
|
12
11
|
REGISTRY_UNIQUE_FIELD,
|
@@ -17,14 +16,17 @@ from lamindb._registry import (
|
|
17
16
|
from lamindb._save import save
|
18
17
|
|
19
18
|
from ._settings import settings
|
19
|
+
from .schema import dict_related_model_to_related_name
|
20
20
|
|
21
21
|
if TYPE_CHECKING:
|
22
22
|
from lamindb._query_set import QuerySet
|
23
23
|
|
24
24
|
|
25
25
|
def get_labels_as_dict(self: Data):
|
26
|
-
labels = {}
|
27
|
-
|
26
|
+
labels = {} # type: ignore
|
27
|
+
if self.id is None:
|
28
|
+
return labels
|
29
|
+
for related_model_name, related_name in dict_related_model_to_related_name(
|
28
30
|
self.__class__
|
29
31
|
).items():
|
30
32
|
if related_name in {
|
@@ -37,19 +39,23 @@ def get_labels_as_dict(self: Data):
|
|
37
39
|
"environment_of",
|
38
40
|
}:
|
39
41
|
continue
|
40
|
-
|
41
|
-
labels[related_name] = (related_model, self.__getattribute__(related_name))
|
42
|
+
labels[related_name] = (related_model_name, self.__getattribute__(related_name))
|
42
43
|
return labels
|
43
44
|
|
44
45
|
|
45
|
-
def print_labels(
|
46
|
+
def print_labels(
|
47
|
+
self: Data, field: str = "name", ignore_labels_with_feature: bool = True
|
48
|
+
):
|
46
49
|
labels_msg = ""
|
47
50
|
for related_name, (related_model, labels) in get_labels_as_dict(self).items():
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
51
|
+
try:
|
52
|
+
labels_list = list(labels.values_list(field, flat=True))
|
53
|
+
if len(labels_list) > 0:
|
54
|
+
get_default_str_field(labels)
|
55
|
+
print_values = _print_values(labels_list[:20], n=10)
|
56
|
+
labels_msg += f" 📎 {related_name} ({len(labels_list)}, {colors.italic(related_model)}): {print_values}\n"
|
57
|
+
except Exception:
|
58
|
+
continue
|
53
59
|
if len(labels_msg) > 0:
|
54
60
|
return f"{colors.green('Labels')}:\n{labels_msg}"
|
55
61
|
else:
|
@@ -72,7 +78,7 @@ def transfer_add_labels(labels, features_lookup_self, self, row, parents: bool =
|
|
72
78
|
# link labels records from self db
|
73
79
|
self._host.labels.add(
|
74
80
|
validated_labels + new_labels,
|
75
|
-
feature=
|
81
|
+
feature=features_lookup_self.get(row["name"]),
|
76
82
|
)
|
77
83
|
|
78
84
|
# validate labels on the default db
|
@@ -94,6 +100,10 @@ def validate_labels(labels: QuerySet | list | dict, parents: bool = True):
|
|
94
100
|
field = REGISTRY_UNIQUE_FIELD.get(registry.__name__.lower(), "uid")
|
95
101
|
if hasattr(registry, "ontology_id") and parents:
|
96
102
|
field = "ontology_id"
|
103
|
+
elif hasattr(registry, "ensembl_gene_id"):
|
104
|
+
field = "ensembl_gene_id"
|
105
|
+
elif hasattr(registry, "uniprotkb_id"):
|
106
|
+
field = "uniprotkb_id"
|
97
107
|
if registry.__get_name_with_schema__() == "bionty.Organism":
|
98
108
|
parents = False
|
99
109
|
# if the field value is None, use uid field
|
@@ -195,42 +205,47 @@ class LabelManager:
|
|
195
205
|
>>> file1.ulabels.set(labels)
|
196
206
|
>>> file2.labels.add_from(file1)
|
197
207
|
"""
|
198
|
-
|
199
|
-
|
200
|
-
for
|
208
|
+
from django.db.utils import ProgrammingError
|
209
|
+
|
210
|
+
features_lookup_self = {f.name: f for f in Feature.objects.filter().all()}
|
211
|
+
features_lookup_data = {
|
212
|
+
f.name: f for f in Feature.objects.using(data._state.db).filter().all()
|
213
|
+
}
|
214
|
+
for _, feature_set in data.features.feature_set_by_slot.items():
|
201
215
|
# add labels stratified by feature
|
202
|
-
if feature_set.registry == "
|
203
|
-
# df_slot is the Feature table with type
|
216
|
+
if feature_set.registry == "Feature":
|
217
|
+
# df_slot is the Feature table with type
|
204
218
|
df_slot = feature_set.features.df()
|
205
219
|
for _, row in df_slot.iterrows():
|
206
|
-
if row["
|
220
|
+
if row["dtype"].startswith("cat["):
|
207
221
|
logger.info(f"transferring {row['name']}")
|
208
222
|
# labels records from data db
|
209
223
|
labels = data.labels.get(
|
210
|
-
|
224
|
+
features_lookup_data.get(row["name"]), mute=True
|
211
225
|
)
|
212
226
|
transfer_add_labels(
|
213
227
|
labels, features_lookup_self, self, row, parents=parents
|
214
228
|
)
|
215
|
-
|
216
|
-
# for now, have this be duplicated, need to disentangle above
|
229
|
+
# TODO: for now, has to be duplicated
|
217
230
|
using_key = settings._using_key
|
218
231
|
for related_name, (_, labels) in get_labels_as_dict(data).items():
|
219
232
|
labels = labels.all()
|
220
|
-
|
233
|
+
try:
|
234
|
+
if len(labels) == 0:
|
235
|
+
continue
|
236
|
+
validated_labels, new_labels = validate_labels(labels, parents=parents)
|
237
|
+
if len(new_labels) > 0:
|
238
|
+
transfer_fk_to_default_db_bulk(new_labels, using_key)
|
239
|
+
for label in new_labels:
|
240
|
+
transfer_to_default_db(
|
241
|
+
label, using_key, mute=True, transfer_fk=False
|
242
|
+
)
|
243
|
+
save(new_labels, parents=parents)
|
244
|
+
# this should not occur as file and collection should have the same attributes
|
245
|
+
# but this might not be true for custom schema
|
246
|
+
labels_list = validated_labels + new_labels
|
247
|
+
if hasattr(self._host, related_name):
|
248
|
+
getattr(self._host, related_name).add(*labels_list)
|
249
|
+
# ProgrammingError is raised when schemas don't match between source and target instances
|
250
|
+
except ProgrammingError:
|
221
251
|
continue
|
222
|
-
validated_labels, new_labels = validate_labels(
|
223
|
-
labels.all(), parents=parents
|
224
|
-
)
|
225
|
-
if len(new_labels) > 0:
|
226
|
-
transfer_fk_to_default_db_bulk(new_labels, using_key)
|
227
|
-
for label in new_labels:
|
228
|
-
transfer_to_default_db(
|
229
|
-
label, using_key, mute=True, transfer_fk=False
|
230
|
-
)
|
231
|
-
save(new_labels, parents=parents)
|
232
|
-
# this should not occur as file and collection should have the same attributes
|
233
|
-
# but this might not be true for custom schema
|
234
|
-
labels_list = validated_labels + new_labels
|
235
|
-
if hasattr(self._host, related_name):
|
236
|
-
getattr(self._host, related_name).add(*labels_list)
|
lamindb/core/_run_context.py
CHANGED
@@ -10,9 +10,11 @@ from typing import TYPE_CHECKING
|
|
10
10
|
from lamin_utils import logger
|
11
11
|
from lamindb_setup.core.hashing import hash_file
|
12
12
|
from lnschema_core import Run, Transform, ids
|
13
|
+
from lnschema_core.models import Param, ParamValue, RunParamValue
|
13
14
|
from lnschema_core.types import TransformType
|
14
15
|
from lnschema_core.users import current_user_id
|
15
16
|
|
17
|
+
from lamindb._save import save
|
16
18
|
from lamindb.core._transform_settings import transform as transform_settings
|
17
19
|
|
18
20
|
from ._settings import settings
|
@@ -187,6 +189,26 @@ def pretty_pypackages(dependencies: dict) -> str:
|
|
187
189
|
return " ".join(deps_list)
|
188
190
|
|
189
191
|
|
192
|
+
def parse_and_link_params(run: Run, params: dict) -> None:
|
193
|
+
param_values = []
|
194
|
+
for key, value in params.items():
|
195
|
+
param = Param.filter(name=key).one_or_none()
|
196
|
+
if param is None:
|
197
|
+
dtype = type(value).__name__
|
198
|
+
logger.warning(
|
199
|
+
f"param '{key}' does not yet exist, creating it with dtype '{dtype}'"
|
200
|
+
)
|
201
|
+
param = Param(name=key, dtype=dtype).save()
|
202
|
+
param_value, _ = ParamValue.objects.get_or_create(param=param, value=value)
|
203
|
+
param_values.append(param_value)
|
204
|
+
if param_values:
|
205
|
+
links = [
|
206
|
+
RunParamValue(run_id=run.id, paramvalue_id=param_value.id)
|
207
|
+
for param_value in param_values
|
208
|
+
]
|
209
|
+
RunParamValue.objects.bulk_create(links)
|
210
|
+
|
211
|
+
|
190
212
|
class run_context:
|
191
213
|
"""Global run context."""
|
192
214
|
|
@@ -312,7 +334,6 @@ class run_context:
|
|
312
334
|
)
|
313
335
|
if run is not None: # loaded latest run
|
314
336
|
run.started_at = datetime.now(timezone.utc) # update run time
|
315
|
-
run.json = params # update run params
|
316
337
|
logger.important(f"loaded: {run}")
|
317
338
|
|
318
339
|
if run is None: # create new run
|
@@ -326,6 +347,8 @@ class run_context:
|
|
326
347
|
run.is_consecutive = True if is_run_from_ipython else None
|
327
348
|
# need to save in all cases
|
328
349
|
run.save()
|
350
|
+
if params is not None:
|
351
|
+
parse_and_link_params(run, params)
|
329
352
|
cls.run = run
|
330
353
|
|
331
354
|
from ._track_environment import track_environment
|
lamindb/core/datasets/_core.py
CHANGED
@@ -45,10 +45,8 @@ def file_fcs_alpert19(populate_registries: bool = False) -> Path: # pragma: no
|
|
45
45
|
bt.CellMarker.public().inspect(std, "name").validated, "name"
|
46
46
|
)
|
47
47
|
)
|
48
|
-
ln.Feature(
|
49
|
-
|
50
|
-
).save()
|
51
|
-
ln.Feature(name="organism", type="category", registries=[bt.Organism]).save()
|
48
|
+
ln.Feature(name="assay", dtype=[bt.ExperimentalFactor]).save()
|
49
|
+
ln.Feature(name="organism", dtype=[bt.Organism]).save()
|
52
50
|
ln.settings.verbosity = verbosity
|
53
51
|
return Path(filepath)
|
54
52
|
|
@@ -83,10 +81,8 @@ def file_tsv_rnaseq_nfcore_salmon_merged_gene_counts(
|
|
83
81
|
|
84
82
|
verbosity = ln.settings.verbosity
|
85
83
|
ln.settings.verbosity = "error"
|
86
|
-
ln.Feature(
|
87
|
-
|
88
|
-
).save()
|
89
|
-
ln.Feature(name="organism", type="category", registries=[bt.Organism]).save()
|
84
|
+
ln.Feature(name="assay", dtype=[bt.ExperimentalFactor]).save()
|
85
|
+
ln.Feature(name="organism", dtype=[bt.Organism]).save()
|
90
86
|
bt.ExperimentalFactor.from_public(ontology_id="EFO:0008896").save()
|
91
87
|
ln.settings.verbosity = verbosity
|
92
88
|
|
@@ -200,9 +196,7 @@ def anndata_mouse_sc_lymph_node(
|
|
200
196
|
# cell types
|
201
197
|
ln.save(bt.CellType.from_values(["CL:0000115", "CL:0000738"], "ontology_id"))
|
202
198
|
# assays
|
203
|
-
ln.Feature(
|
204
|
-
name="assay", type="category", registries=[bt.ExperimentalFactor]
|
205
|
-
).save()
|
199
|
+
ln.Feature(name="assay", dtype=[bt.ExperimentalFactor]).save()
|
206
200
|
bt.ExperimentalFactor.from_public(ontology_id="EFO:0008913").save()
|
207
201
|
# genes
|
208
202
|
validated = bt.Gene.public(organism="mouse").validate(
|
@@ -329,13 +323,11 @@ def anndata_human_immune_cells(
|
|
329
323
|
ln.save(bt.CellType.from_values(adata.obs.cell_type, field="name"))
|
330
324
|
ln.save(bt.ExperimentalFactor.from_values(adata.obs.assay, field="name"))
|
331
325
|
ln.save(bt.Tissue.from_values(adata.obs.tissue, field="name"))
|
332
|
-
ln.Feature(name="cell_type",
|
333
|
-
ln.Feature(
|
334
|
-
|
335
|
-
).save()
|
336
|
-
ln.Feature(name="
|
337
|
-
ln.Feature(name="organism", type="category", registries=[bt.Organism]).save()
|
338
|
-
ln.Feature(name="donor", type="category", registries=[ln.ULabel]).save()
|
326
|
+
ln.Feature(name="cell_type", dtype=[bt.CellType]).save()
|
327
|
+
ln.Feature(name="assay", dtype=[bt.ExperimentalFactor]).save()
|
328
|
+
ln.Feature(name="tissue", dtype=[bt.Tissue]).save()
|
329
|
+
ln.Feature(name="organism", dtype=[bt.Organism]).save()
|
330
|
+
ln.Feature(name="donor", dtype=[ln.ULabel]).save()
|
339
331
|
bt.ExperimentalFactor.from_public(ontology_id="EFO:0008913").save()
|
340
332
|
ln.save([ln.ULabel(name=name) for name in adata.obs.donor.unique()])
|
341
333
|
ln.settings.verbosity = verbosity
|