lamindb 0.71.3__py3-none-any.whl → 0.72.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +2 -2
- lamindb/_annotate.py +6 -10
- lamindb/_artifact.py +6 -2
- lamindb/_collection.py +4 -4
- lamindb/_feature.py +53 -45
- lamindb/_feature_set.py +37 -74
- lamindb/_from_values.py +0 -3
- lamindb/_query_manager.py +6 -1
- lamindb/_registry.py +8 -6
- lamindb/_run.py +0 -2
- lamindb/core/__init__.py +4 -0
- lamindb/core/_data.py +30 -27
- lamindb/core/_feature_manager.py +96 -24
- lamindb/core/_label_manager.py +13 -10
- lamindb/core/_run_context.py +24 -1
- lamindb/core/datasets/_core.py +10 -18
- lamindb/core/schema.py +53 -0
- {lamindb-0.71.3.dist-info → lamindb-0.72.1.dist-info}/METADATA +4 -4
- {lamindb-0.71.3.dist-info → lamindb-0.72.1.dist-info}/RECORD +21 -20
- {lamindb-0.71.3.dist-info → lamindb-0.72.1.dist-info}/LICENSE +0 -0
- {lamindb-0.71.3.dist-info → lamindb-0.72.1.dist-info}/WHEEL +0 -0
lamindb/core/_data.py
CHANGED
@@ -18,10 +18,6 @@ from lnschema_core.models import (
|
|
18
18
|
format_field_value,
|
19
19
|
)
|
20
20
|
|
21
|
-
from lamindb._feature_set import (
|
22
|
-
dict_related_model_to_related_name,
|
23
|
-
dict_schema_name_to_model_name,
|
24
|
-
)
|
25
21
|
from lamindb._parents import view_lineage
|
26
22
|
from lamindb._query_set import QuerySet
|
27
23
|
from lamindb.core._settings import settings
|
@@ -36,6 +32,10 @@ from ._feature_manager import (
|
|
36
32
|
from ._label_manager import LabelManager, print_labels
|
37
33
|
from ._run_context import run_context
|
38
34
|
from .exceptions import ValidationError
|
35
|
+
from .schema import (
|
36
|
+
dict_related_model_to_related_name,
|
37
|
+
dict_schema_name_to_model_name,
|
38
|
+
)
|
39
39
|
|
40
40
|
if TYPE_CHECKING:
|
41
41
|
from lnschema_core.types import StrField
|
@@ -87,7 +87,7 @@ def save_feature_set_links(self: Artifact | Collection) -> None:
|
|
87
87
|
for slot, feature_set in self._feature_sets.items():
|
88
88
|
kwargs = {
|
89
89
|
host_id_field: self.id,
|
90
|
-
"
|
90
|
+
"featureset_id": feature_set.id,
|
91
91
|
"slot": slot,
|
92
92
|
}
|
93
93
|
links.append(Data.feature_sets.through(**kwargs))
|
@@ -175,16 +175,13 @@ def describe(self: Data):
|
|
175
175
|
|
176
176
|
|
177
177
|
def validate_feature(feature: Feature, records: list[Registry]) -> None:
|
178
|
-
"""Validate feature record,
|
178
|
+
"""Validate feature record, adjust feature.dtype based on labels records."""
|
179
179
|
if not isinstance(feature, Feature):
|
180
180
|
raise TypeError("feature has to be of type Feature")
|
181
181
|
if feature._state.adding:
|
182
182
|
registries = {record.__class__.__get_name_with_schema__() for record in records}
|
183
183
|
registries_str = "|".join(registries)
|
184
|
-
msg = (
|
185
|
-
f"ln.Feature(name='{feature.name}', type='category',"
|
186
|
-
f" registries='{registries_str}').save()"
|
187
|
-
)
|
184
|
+
msg = f"ln.Feature(name='{feature.name}', type='cat[{registries_str}]').save()"
|
188
185
|
raise ValidationError(f"Feature not validated. If it looks correct: {msg}")
|
189
186
|
|
190
187
|
|
@@ -197,9 +194,9 @@ def get_labels(
|
|
197
194
|
"""{}."""
|
198
195
|
if not isinstance(feature, Feature):
|
199
196
|
raise TypeError("feature has to be of type Feature")
|
200
|
-
if feature.
|
197
|
+
if feature.dtype is None or not feature.dtype.startswith("cat["):
|
201
198
|
raise ValueError("feature does not have linked labels")
|
202
|
-
registries_to_check = feature.
|
199
|
+
registries_to_check = feature.dtype.replace("cat[", "").rstrip("]").split("|")
|
203
200
|
if len(registries_to_check) > 1 and not mute:
|
204
201
|
logger.warning("labels come from multiple registries!")
|
205
202
|
# return an empty query set if self.id is still None
|
@@ -209,13 +206,13 @@ def get_labels(
|
|
209
206
|
for registry in registries_to_check:
|
210
207
|
# currently need to distinguish between ULabel and non-ULabel, because
|
211
208
|
# we only have the feature information for Label
|
212
|
-
if registry == "
|
209
|
+
if registry == "ULabel":
|
213
210
|
links_to_labels = get_label_links(self, registry, feature)
|
214
211
|
label_ids = [link.ulabel_id for link in links_to_labels]
|
215
212
|
qs_by_registry[registry] = ULabel.objects.using(self._state.db).filter(
|
216
213
|
id__in=label_ids
|
217
214
|
)
|
218
|
-
|
215
|
+
elif registry in self.features.accessor_by_orm:
|
219
216
|
qs_by_registry[registry] = getattr(
|
220
217
|
self, self.features.accessor_by_orm[registry]
|
221
218
|
).all()
|
@@ -227,7 +224,7 @@ def get_labels(
|
|
227
224
|
for v in qs_by_registry.values():
|
228
225
|
values += v.list(get_default_str_field(v))
|
229
226
|
return values
|
230
|
-
if len(registries_to_check) == 1:
|
227
|
+
if len(registries_to_check) == 1 and registry in qs_by_registry:
|
231
228
|
return qs_by_registry[registry]
|
232
229
|
else:
|
233
230
|
return qs_by_registry
|
@@ -261,9 +258,9 @@ def add_labels(
|
|
261
258
|
"Please pass a feature, e.g., via: label = ln.ULabel(name='my_label',"
|
262
259
|
" feature=ln.Feature(name='my_feature'))"
|
263
260
|
)
|
264
|
-
if feature.
|
261
|
+
if feature.dtype.startswith("cat["):
|
265
262
|
orm_dict = dict_schema_name_to_model_name(Artifact)
|
266
|
-
for reg in feature.
|
263
|
+
for reg in feature.dtype.replace("cat[", "").rstrip("]").split("|"):
|
267
264
|
orm = orm_dict.get(reg)
|
268
265
|
records_validated += orm.from_values(records, field=field)
|
269
266
|
|
@@ -304,6 +301,9 @@ def add_labels(
|
|
304
301
|
record
|
305
302
|
)
|
306
303
|
for registry_name, records in records_by_registry.items():
|
304
|
+
if registry_name not in self.features.accessor_by_orm:
|
305
|
+
logger.warning(f"skipping {registry_name}")
|
306
|
+
continue
|
307
307
|
labels_accessor = getattr(
|
308
308
|
self, self.features.accessor_by_orm[registry_name]
|
309
309
|
)
|
@@ -313,26 +313,29 @@ def add_labels(
|
|
313
313
|
labels_accessor.remove(*linked_labels)
|
314
314
|
labels_accessor.add(*records, through_defaults={"feature_id": feature.id})
|
315
315
|
feature_set_links = get_feature_set_links(self)
|
316
|
-
feature_set_ids = [link.
|
316
|
+
feature_set_ids = [link.featureset_id for link in feature_set_links.all()]
|
317
317
|
# get all linked features of type Feature
|
318
318
|
feature_sets = FeatureSet.filter(id__in=feature_set_ids).all()
|
319
319
|
linked_features_by_slot = {
|
320
|
-
feature_set_links.filter(
|
320
|
+
feature_set_links.filter(featureset_id=feature_set.id)
|
321
321
|
.one()
|
322
322
|
.slot: feature_set.features.all()
|
323
323
|
for feature_set in feature_sets
|
324
|
-
if "
|
324
|
+
if "Feature" == feature_set.registry
|
325
325
|
}
|
326
326
|
for registry_name, _ in records_by_registry.items():
|
327
327
|
msg = ""
|
328
|
-
if
|
328
|
+
if (
|
329
|
+
not feature.dtype.startswith("cat[")
|
330
|
+
or registry_name not in feature.dtype
|
331
|
+
):
|
329
332
|
if len(msg) > 0:
|
330
333
|
msg += ", "
|
331
334
|
msg += f"linked feature '{feature.name}' to registry '{registry_name}'"
|
332
|
-
if feature.
|
333
|
-
feature.
|
334
|
-
elif registry_name not in feature.
|
335
|
-
feature.
|
335
|
+
if not feature.dtype.startswith("cat["):
|
336
|
+
feature.dtype = f"cat[{registry_name}]"
|
337
|
+
elif registry_name not in feature.dtype:
|
338
|
+
feature.dtype = feature.dtype.rstrip("]") + f"|{registry_name}]"
|
336
339
|
feature.save()
|
337
340
|
if len(msg) > 0:
|
338
341
|
logger.save(msg)
|
@@ -357,11 +360,11 @@ def add_labels(
|
|
357
360
|
).one()
|
358
361
|
old_feature_set_link.delete()
|
359
362
|
remaining_links = self.feature_sets.through.objects.filter(
|
360
|
-
|
363
|
+
featureset_id=feature_set.id
|
361
364
|
).all()
|
362
365
|
if len(remaining_links) == 0:
|
363
366
|
old_feature_set = FeatureSet.filter(
|
364
|
-
id=old_feature_set_link.
|
367
|
+
id=old_feature_set_link.featureset_id
|
365
368
|
).one()
|
366
369
|
logger.info(
|
367
370
|
"nothing links to it anymore, deleting feature set"
|
lamindb/core/_feature_manager.py
CHANGED
@@ -7,7 +7,15 @@ import anndata as ad
|
|
7
7
|
from anndata import AnnData
|
8
8
|
from lamin_utils import colors, logger
|
9
9
|
from lamindb_setup.core.upath import create_path
|
10
|
-
from lnschema_core.models import
|
10
|
+
from lnschema_core.models import (
|
11
|
+
Artifact,
|
12
|
+
Collection,
|
13
|
+
Data,
|
14
|
+
Feature,
|
15
|
+
FeatureValue,
|
16
|
+
Registry,
|
17
|
+
ULabel,
|
18
|
+
)
|
11
19
|
|
12
20
|
from lamindb._feature import convert_numpy_dtype_to_lamin_feature_type
|
13
21
|
from lamindb._feature_set import FeatureSet
|
@@ -18,6 +26,7 @@ from lamindb._registry import (
|
|
18
26
|
transfer_to_default_db,
|
19
27
|
)
|
20
28
|
from lamindb._save import save
|
29
|
+
from lamindb.core.exceptions import ValidationError
|
21
30
|
from lamindb.core.storage import LocalPathClasses
|
22
31
|
|
23
32
|
from ._settings import settings
|
@@ -41,8 +50,8 @@ def get_accessor_by_orm(host: Artifact | Collection) -> dict:
|
|
41
50
|
field.related_model.__get_name_with_schema__(): field.name
|
42
51
|
for field in host._meta.related_objects
|
43
52
|
}
|
44
|
-
dictionary["
|
45
|
-
dictionary["
|
53
|
+
dictionary["Feature"] = "features"
|
54
|
+
dictionary["ULabel"] = "ulabels"
|
46
55
|
return dictionary
|
47
56
|
|
48
57
|
|
@@ -60,10 +69,9 @@ def get_feature_set_by_slot(host) -> dict:
|
|
60
69
|
feature_set_links = (
|
61
70
|
host.feature_sets.through.objects.using(host_db)
|
62
71
|
.filter(**kwargs)
|
63
|
-
.select_related("
|
72
|
+
.select_related("featureset")
|
64
73
|
)
|
65
|
-
|
66
|
-
return {fsl.slot: fsl.feature_set for fsl in feature_set_links}
|
74
|
+
return {fsl.slot: fsl.featureset for fsl in feature_set_links}
|
67
75
|
|
68
76
|
|
69
77
|
def get_label_links(
|
@@ -93,7 +101,7 @@ def print_features(self: Data) -> str:
|
|
93
101
|
|
94
102
|
messages = []
|
95
103
|
for slot, feature_set in get_feature_set_by_slot(self).items():
|
96
|
-
if feature_set.registry != "
|
104
|
+
if feature_set.registry != "Feature":
|
97
105
|
features = feature_set.members
|
98
106
|
# features.first() is a lot slower than features[0] here
|
99
107
|
name_field = get_default_str_field(features[0])
|
@@ -110,17 +118,15 @@ def print_features(self: Data) -> str:
|
|
110
118
|
messages.append(
|
111
119
|
f" {colors.bold(slot)}: {format_repr(feature_set, exclude='hash')}\n"
|
112
120
|
)
|
113
|
-
for name,
|
114
|
-
|
115
|
-
):
|
116
|
-
if row_type == "category" and registries is not None:
|
121
|
+
for name, dtype in feature_set.features.values_list("name", "dtype"):
|
122
|
+
if dtype.startswith("cat["):
|
117
123
|
labels = self.labels.get(features_lookup.get(name), mute=True)
|
118
124
|
indent = ""
|
119
125
|
if isinstance(labels, dict):
|
120
|
-
messages.append(f" 🔗 {name} ({
|
126
|
+
messages.append(f" 🔗 {name} ({dtype})\n")
|
121
127
|
indent = " "
|
122
128
|
else:
|
123
|
-
labels = {
|
129
|
+
labels = {dtype: labels}
|
124
130
|
for registry, registry_labels in labels.items():
|
125
131
|
field = get_default_str_field(registry_labels)
|
126
132
|
values_list = registry_labels.values_list(field, flat=True)
|
@@ -131,7 +137,7 @@ def print_features(self: Data) -> str:
|
|
131
137
|
)
|
132
138
|
messages.append(msg_objects)
|
133
139
|
else:
|
134
|
-
messages.append(f" {name} ({
|
140
|
+
messages.append(f" {name} ({dtype})\n")
|
135
141
|
if messages:
|
136
142
|
messages.insert(0, f"{colors.green('Features')}:\n")
|
137
143
|
return "".join(messages)
|
@@ -171,6 +177,7 @@ def parse_feature_sets_from_anndata(
|
|
171
177
|
type=type,
|
172
178
|
mute=mute,
|
173
179
|
organism=organism,
|
180
|
+
raise_validation_error=False,
|
174
181
|
)
|
175
182
|
if feature_set_var is not None:
|
176
183
|
feature_sets["var"] = feature_set_var
|
@@ -243,14 +250,79 @@ class FeatureManager:
|
|
243
250
|
self._accessor_by_orm = get_accessor_by_orm(self._host)
|
244
251
|
return self._accessor_by_orm
|
245
252
|
|
246
|
-
def add(
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
253
|
+
def add(
|
254
|
+
self,
|
255
|
+
features_values: dict[str, str | int | float | bool],
|
256
|
+
slot: str | None = None,
|
257
|
+
feature_field: FieldAttr = Feature.name,
|
258
|
+
):
|
259
|
+
"""Add features stratified by slot.
|
260
|
+
|
261
|
+
Args:
|
262
|
+
features_values: A dictionary of features & values. You can also
|
263
|
+
pass `{feature_identifier: None}` to skip annotation by values.
|
264
|
+
slot: The access slot of the feature sets in the artifact. For
|
265
|
+
instance, `.columns` for `DataFrame` or `.var` or `.obs` for
|
266
|
+
`AnnData`.
|
267
|
+
feature_field: The field of a reference registry to map values.
|
268
|
+
"""
|
269
|
+
if slot is None:
|
270
|
+
slot = "external"
|
271
|
+
keys = features_values.keys()
|
272
|
+
features_values.values()
|
273
|
+
# what if the feature is already part of a linked feature set?
|
274
|
+
# what if artifact annotation by features through link tables and through feature sets
|
275
|
+
# differs?
|
276
|
+
feature_set = FeatureSet.from_values(keys, field=feature_field)
|
277
|
+
self._host.features.add_feature_set(feature_set, slot)
|
278
|
+
# now figure out which of the values go where
|
279
|
+
features_labels = []
|
280
|
+
feature_values = []
|
281
|
+
for key, value in features_values.items():
|
282
|
+
# TODO: use proper field in .get() below
|
283
|
+
feature = feature_set.features.get(name=key)
|
284
|
+
if feature.dtype == "number":
|
285
|
+
if not (isinstance(value, int) or isinstance(value, float)):
|
286
|
+
raise TypeError(
|
287
|
+
f"Value for feature '{key}' with type {feature.dtype} must be a number"
|
288
|
+
)
|
289
|
+
elif feature.dtype == "cat":
|
290
|
+
if not (isinstance(value, str) or isinstance(value, Registry)):
|
291
|
+
raise TypeError(
|
292
|
+
f"Value for feature '{key}' with type '{feature.dtype}' must be a string or record."
|
293
|
+
)
|
294
|
+
elif feature.dtype == "bool":
|
295
|
+
assert isinstance(value, bool)
|
296
|
+
if feature.dtype == "cat":
|
297
|
+
if isinstance(value, Registry):
|
298
|
+
assert not value._state.adding
|
299
|
+
label_record = value
|
300
|
+
assert isinstance(label_record, ULabel)
|
301
|
+
else:
|
302
|
+
label_record = ULabel.filter(name=value).one_or_none()
|
303
|
+
if label_record is None:
|
304
|
+
raise ValidationError(f"Label '{value}' not found in ln.ULabel")
|
305
|
+
features_labels.append((feature, label_record))
|
306
|
+
else:
|
307
|
+
feature_values.append(FeatureValue(feature=feature, value=value))
|
308
|
+
# bulk add all links to ArtifactULabel
|
309
|
+
if features_labels:
|
310
|
+
LinkORM = self._host.ulabels.through
|
311
|
+
links = [
|
312
|
+
LinkORM(
|
313
|
+
artifact_id=self._host.id, feature_id=feature.id, ulabel_id=label.id
|
314
|
+
)
|
315
|
+
for (feature, label) in features_labels
|
316
|
+
]
|
317
|
+
LinkORM.objects.bulk_create(links, ignore_conflicts=True)
|
318
|
+
if feature_values:
|
319
|
+
save(feature_values)
|
320
|
+
LinkORM = self._host.feature_values.through
|
321
|
+
links = [
|
322
|
+
LinkORM(artifact_id=self._host.id, featurevalue_id=feature_value.id)
|
323
|
+
for feature_value in feature_values
|
324
|
+
]
|
325
|
+
LinkORM.objects.bulk_create(links)
|
254
326
|
|
255
327
|
def add_from_df(self, field: FieldAttr = Feature.name, organism: str | None = None):
|
256
328
|
"""Add features from DataFrame."""
|
@@ -355,7 +427,7 @@ class FeatureManager:
|
|
355
427
|
host_id_field = get_host_id_field(self._host)
|
356
428
|
kwargs = {
|
357
429
|
host_id_field: self._host.id,
|
358
|
-
"
|
430
|
+
"featureset": feature_set,
|
359
431
|
"slot": slot,
|
360
432
|
}
|
361
433
|
link_record = (
|
@@ -366,7 +438,7 @@ class FeatureManager:
|
|
366
438
|
if link_record is None:
|
367
439
|
self._host.feature_sets.through(**kwargs).save(using=host_db)
|
368
440
|
if slot in self.feature_set_by_slot:
|
369
|
-
logger.
|
441
|
+
logger.debug(f"replaced existing {slot} feature set")
|
370
442
|
# this _feature_set_by_slot here is private
|
371
443
|
self._feature_set_by_slot[slot] = feature_set # type: ignore
|
372
444
|
|
lamindb/core/_label_manager.py
CHANGED
@@ -4,9 +4,8 @@ from typing import TYPE_CHECKING, Dict
|
|
4
4
|
|
5
5
|
import numpy as np
|
6
6
|
from lamin_utils import colors, logger
|
7
|
-
from lnschema_core.models import Artifact, Collection, Data, Feature, Registry
|
7
|
+
from lnschema_core.models import Artifact, Collection, Data, Feature, LinkORM, Registry
|
8
8
|
|
9
|
-
from lamindb._feature_set import dict_related_model_to_related_name
|
10
9
|
from lamindb._from_values import _print_values
|
11
10
|
from lamindb._registry import (
|
12
11
|
REGISTRY_UNIQUE_FIELD,
|
@@ -17,14 +16,17 @@ from lamindb._registry import (
|
|
17
16
|
from lamindb._save import save
|
18
17
|
|
19
18
|
from ._settings import settings
|
19
|
+
from .schema import dict_related_model_to_related_name
|
20
20
|
|
21
21
|
if TYPE_CHECKING:
|
22
22
|
from lamindb._query_set import QuerySet
|
23
23
|
|
24
24
|
|
25
25
|
def get_labels_as_dict(self: Data):
|
26
|
-
labels = {}
|
27
|
-
|
26
|
+
labels = {} # type: ignore
|
27
|
+
if self.id is None:
|
28
|
+
return labels
|
29
|
+
for related_model_name, related_name in dict_related_model_to_related_name(
|
28
30
|
self.__class__
|
29
31
|
).items():
|
30
32
|
if related_name in {
|
@@ -37,12 +39,13 @@ def get_labels_as_dict(self: Data):
|
|
37
39
|
"environment_of",
|
38
40
|
}:
|
39
41
|
continue
|
40
|
-
|
41
|
-
labels[related_name] = (related_model, self.__getattribute__(related_name))
|
42
|
+
labels[related_name] = (related_model_name, self.__getattribute__(related_name))
|
42
43
|
return labels
|
43
44
|
|
44
45
|
|
45
|
-
def print_labels(
|
46
|
+
def print_labels(
|
47
|
+
self: Data, field: str = "name", ignore_labels_with_feature: bool = True
|
48
|
+
):
|
46
49
|
labels_msg = ""
|
47
50
|
for related_name, (related_model, labels) in get_labels_as_dict(self).items():
|
48
51
|
try:
|
@@ -210,11 +213,11 @@ class LabelManager:
|
|
210
213
|
}
|
211
214
|
for _, feature_set in data.features.feature_set_by_slot.items():
|
212
215
|
# add labels stratified by feature
|
213
|
-
if feature_set.registry == "
|
214
|
-
# df_slot is the Feature table with type
|
216
|
+
if feature_set.registry == "Feature":
|
217
|
+
# df_slot is the Feature table with type
|
215
218
|
df_slot = feature_set.features.df()
|
216
219
|
for _, row in df_slot.iterrows():
|
217
|
-
if row["
|
220
|
+
if row["dtype"].startswith("cat["):
|
218
221
|
logger.info(f"transferring {row['name']}")
|
219
222
|
# labels records from data db
|
220
223
|
labels = data.labels.get(
|
lamindb/core/_run_context.py
CHANGED
@@ -10,9 +10,11 @@ from typing import TYPE_CHECKING
|
|
10
10
|
from lamin_utils import logger
|
11
11
|
from lamindb_setup.core.hashing import hash_file
|
12
12
|
from lnschema_core import Run, Transform, ids
|
13
|
+
from lnschema_core.models import Param, ParamValue, RunParamValue
|
13
14
|
from lnschema_core.types import TransformType
|
14
15
|
from lnschema_core.users import current_user_id
|
15
16
|
|
17
|
+
from lamindb._save import save
|
16
18
|
from lamindb.core._transform_settings import transform as transform_settings
|
17
19
|
|
18
20
|
from ._settings import settings
|
@@ -187,6 +189,26 @@ def pretty_pypackages(dependencies: dict) -> str:
|
|
187
189
|
return " ".join(deps_list)
|
188
190
|
|
189
191
|
|
192
|
+
def parse_and_link_params(run: Run, params: dict) -> None:
|
193
|
+
param_values = []
|
194
|
+
for key, value in params.items():
|
195
|
+
param = Param.filter(name=key).one_or_none()
|
196
|
+
if param is None:
|
197
|
+
dtype = type(value).__name__
|
198
|
+
logger.warning(
|
199
|
+
f"param '{key}' does not yet exist, creating it with dtype '{dtype}'"
|
200
|
+
)
|
201
|
+
param = Param(name=key, dtype=dtype).save()
|
202
|
+
param_value, _ = ParamValue.objects.get_or_create(param=param, value=value)
|
203
|
+
param_values.append(param_value)
|
204
|
+
if param_values:
|
205
|
+
links = [
|
206
|
+
RunParamValue(run_id=run.id, paramvalue_id=param_value.id)
|
207
|
+
for param_value in param_values
|
208
|
+
]
|
209
|
+
RunParamValue.objects.bulk_create(links)
|
210
|
+
|
211
|
+
|
190
212
|
class run_context:
|
191
213
|
"""Global run context."""
|
192
214
|
|
@@ -312,7 +334,6 @@ class run_context:
|
|
312
334
|
)
|
313
335
|
if run is not None: # loaded latest run
|
314
336
|
run.started_at = datetime.now(timezone.utc) # update run time
|
315
|
-
run.json = params # update run params
|
316
337
|
logger.important(f"loaded: {run}")
|
317
338
|
|
318
339
|
if run is None: # create new run
|
@@ -326,6 +347,8 @@ class run_context:
|
|
326
347
|
run.is_consecutive = True if is_run_from_ipython else None
|
327
348
|
# need to save in all cases
|
328
349
|
run.save()
|
350
|
+
if params is not None:
|
351
|
+
parse_and_link_params(run, params)
|
329
352
|
cls.run = run
|
330
353
|
|
331
354
|
from ._track_environment import track_environment
|
lamindb/core/datasets/_core.py
CHANGED
@@ -45,10 +45,8 @@ def file_fcs_alpert19(populate_registries: bool = False) -> Path: # pragma: no
|
|
45
45
|
bt.CellMarker.public().inspect(std, "name").validated, "name"
|
46
46
|
)
|
47
47
|
)
|
48
|
-
ln.Feature(
|
49
|
-
|
50
|
-
).save()
|
51
|
-
ln.Feature(name="organism", type="category", registries=[bt.Organism]).save()
|
48
|
+
ln.Feature(name="assay", dtype=[bt.ExperimentalFactor]).save()
|
49
|
+
ln.Feature(name="organism", dtype=[bt.Organism]).save()
|
52
50
|
ln.settings.verbosity = verbosity
|
53
51
|
return Path(filepath)
|
54
52
|
|
@@ -83,10 +81,8 @@ def file_tsv_rnaseq_nfcore_salmon_merged_gene_counts(
|
|
83
81
|
|
84
82
|
verbosity = ln.settings.verbosity
|
85
83
|
ln.settings.verbosity = "error"
|
86
|
-
ln.Feature(
|
87
|
-
|
88
|
-
).save()
|
89
|
-
ln.Feature(name="organism", type="category", registries=[bt.Organism]).save()
|
84
|
+
ln.Feature(name="assay", dtype=[bt.ExperimentalFactor]).save()
|
85
|
+
ln.Feature(name="organism", dtype=[bt.Organism]).save()
|
90
86
|
bt.ExperimentalFactor.from_public(ontology_id="EFO:0008896").save()
|
91
87
|
ln.settings.verbosity = verbosity
|
92
88
|
|
@@ -200,9 +196,7 @@ def anndata_mouse_sc_lymph_node(
|
|
200
196
|
# cell types
|
201
197
|
ln.save(bt.CellType.from_values(["CL:0000115", "CL:0000738"], "ontology_id"))
|
202
198
|
# assays
|
203
|
-
ln.Feature(
|
204
|
-
name="assay", type="category", registries=[bt.ExperimentalFactor]
|
205
|
-
).save()
|
199
|
+
ln.Feature(name="assay", dtype=[bt.ExperimentalFactor]).save()
|
206
200
|
bt.ExperimentalFactor.from_public(ontology_id="EFO:0008913").save()
|
207
201
|
# genes
|
208
202
|
validated = bt.Gene.public(organism="mouse").validate(
|
@@ -329,13 +323,11 @@ def anndata_human_immune_cells(
|
|
329
323
|
ln.save(bt.CellType.from_values(adata.obs.cell_type, field="name"))
|
330
324
|
ln.save(bt.ExperimentalFactor.from_values(adata.obs.assay, field="name"))
|
331
325
|
ln.save(bt.Tissue.from_values(adata.obs.tissue, field="name"))
|
332
|
-
ln.Feature(name="cell_type",
|
333
|
-
ln.Feature(
|
334
|
-
|
335
|
-
).save()
|
336
|
-
ln.Feature(name="
|
337
|
-
ln.Feature(name="organism", type="category", registries=[bt.Organism]).save()
|
338
|
-
ln.Feature(name="donor", type="category", registries=[ln.ULabel]).save()
|
326
|
+
ln.Feature(name="cell_type", dtype=[bt.CellType]).save()
|
327
|
+
ln.Feature(name="assay", dtype=[bt.ExperimentalFactor]).save()
|
328
|
+
ln.Feature(name="tissue", dtype=[bt.Tissue]).save()
|
329
|
+
ln.Feature(name="organism", dtype=[bt.Organism]).save()
|
330
|
+
ln.Feature(name="donor", dtype=[ln.ULabel]).save()
|
339
331
|
bt.ExperimentalFactor.from_public(ontology_id="EFO:0008913").save()
|
340
332
|
ln.save([ln.ULabel(name=name) for name in adata.obs.donor.unique()])
|
341
333
|
ln.settings.verbosity = verbosity
|
lamindb/core/schema.py
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
from typing import Type
|
2
|
+
|
3
|
+
from lnschema_core.models import Feature, FeatureSet, LinkORM, Registry
|
4
|
+
|
5
|
+
|
6
|
+
def dict_schema_name_to_model_name(orm: Type[Registry]) -> dict[str, Registry]:
|
7
|
+
d: dict = {
|
8
|
+
i.related_model.__get_name_with_schema__(): i.related_model
|
9
|
+
for i in orm._meta.related_objects
|
10
|
+
if i.related_name is not None
|
11
|
+
}
|
12
|
+
d.update(
|
13
|
+
{
|
14
|
+
i.related_model.__get_name_with_schema__(): i.related_model
|
15
|
+
for i in orm._meta.many_to_many
|
16
|
+
if i.name is not None
|
17
|
+
}
|
18
|
+
)
|
19
|
+
return d
|
20
|
+
|
21
|
+
|
22
|
+
def dict_related_model_to_related_name(orm: Type[Registry]) -> dict[str, str]:
|
23
|
+
d: dict = {
|
24
|
+
i.related_model.__get_name_with_schema__(): i.related_name
|
25
|
+
for i in orm._meta.related_objects
|
26
|
+
if (i.name is not None and not issubclass(i.related_model, LinkORM))
|
27
|
+
}
|
28
|
+
d.update(
|
29
|
+
{
|
30
|
+
i.related_model.__get_name_with_schema__(): i.name
|
31
|
+
for i in orm._meta.many_to_many
|
32
|
+
if (i.name is not None and not issubclass(i.related_model, LinkORM))
|
33
|
+
}
|
34
|
+
)
|
35
|
+
|
36
|
+
return d
|
37
|
+
|
38
|
+
|
39
|
+
def get_related_name(features_type: type[Registry]) -> str:
|
40
|
+
candidates = [
|
41
|
+
field.related_name
|
42
|
+
for field in FeatureSet._meta.related_objects
|
43
|
+
if field.related_model == features_type
|
44
|
+
]
|
45
|
+
if not candidates:
|
46
|
+
raise ValueError(
|
47
|
+
f"Can't create feature sets from {features_type.__name__} because it's not"
|
48
|
+
" related to it!\nYou need to create a link model between FeatureSet and"
|
49
|
+
" your Registry in your custom schema.\nTo do so, add a"
|
50
|
+
" line:\nfeature_sets = models.ManyToMany(FeatureSet,"
|
51
|
+
" related_name='mythings')\n"
|
52
|
+
)
|
53
|
+
return candidates[0]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lamindb
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.72.1
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.8
|
@@ -9,8 +9,8 @@ Classifier: Programming Language :: Python :: 3.8
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
10
10
|
Classifier: Programming Language :: Python :: 3.10
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
12
|
-
Requires-Dist: lnschema_core==0.
|
13
|
-
Requires-Dist: lamindb_setup==0.
|
12
|
+
Requires-Dist: lnschema_core==0.67.1
|
13
|
+
Requires-Dist: lamindb_setup==0.72.2
|
14
14
|
Requires-Dist: lamin_utils==0.13.2
|
15
15
|
Requires-Dist: lamin_cli==0.13.2
|
16
16
|
Requires-Dist: rapidfuzz
|
@@ -25,7 +25,7 @@ Requires-Dist: graphviz
|
|
25
25
|
Requires-Dist: psycopg2-binary
|
26
26
|
Requires-Dist: psutil
|
27
27
|
Requires-Dist: lamindb_setup[aws] ; extra == "aws"
|
28
|
-
Requires-Dist: bionty==0.
|
28
|
+
Requires-Dist: bionty==0.43.1 ; extra == "bionty"
|
29
29
|
Requires-Dist: pandas<2 ; extra == "dev"
|
30
30
|
Requires-Dist: pre-commit ; extra == "dev"
|
31
31
|
Requires-Dist: nox ; extra == "dev"
|