lamindb 0.73.2__py3-none-any.whl → 0.74.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +8 -8
- lamindb/_artifact.py +21 -26
- lamindb/_feature.py +2 -2
- lamindb/_finish.py +9 -3
- lamindb/_from_values.py +21 -9
- lamindb/_query_manager.py +1 -1
- lamindb/_query_set.py +5 -3
- lamindb/_registry.py +2 -2
- lamindb/_run.py +2 -1
- lamindb/_storage.py +0 -6
- lamindb/core/__init__.py +7 -5
- lamindb/core/_data.py +7 -4
- lamindb/core/_feature_manager.py +243 -108
- lamindb/core/_run_context.py +5 -23
- lamindb/core/_settings.py +43 -49
- lamindb/core/datasets/_core.py +3 -3
- lamindb/core/storage/paths.py +3 -3
- lamindb/core/subsettings/__init__.py +12 -0
- lamindb/core/subsettings/_creation_settings.py +38 -0
- lamindb/core/subsettings/_transform_settings.py +21 -0
- {lamindb-0.73.2.dist-info → lamindb-0.74.1.dist-info}/METADATA +5 -5
- {lamindb-0.73.2.dist-info → lamindb-0.74.1.dist-info}/RECORD +24 -22
- lamindb/core/_transform_settings.py +0 -9
- {lamindb-0.73.2.dist-info → lamindb-0.74.1.dist-info}/LICENSE +0 -0
- {lamindb-0.73.2.dist-info → lamindb-0.74.1.dist-info}/WHEEL +0 -0
lamindb/core/_feature_manager.py
CHANGED
@@ -10,9 +10,8 @@ import numpy as np
|
|
10
10
|
import pandas as pd
|
11
11
|
from anndata import AnnData
|
12
12
|
from django.contrib.postgres.aggregates import ArrayAgg
|
13
|
-
from django.db import connections
|
14
|
-
from django.db.models import Aggregate
|
15
|
-
from django.db.models.functions import Concat
|
13
|
+
from django.db import connections
|
14
|
+
from django.db.models import Aggregate
|
16
15
|
from lamin_utils import colors, logger
|
17
16
|
from lamindb_setup.core.upath import create_path
|
18
17
|
from lnschema_core.models import (
|
@@ -24,8 +23,15 @@ from lnschema_core.models import (
|
|
24
23
|
FeatureManagerCollection,
|
25
24
|
FeatureValue,
|
26
25
|
HasFeatures,
|
26
|
+
HasParams,
|
27
27
|
LinkORM,
|
28
|
+
Param,
|
29
|
+
ParamManager,
|
30
|
+
ParamManagerArtifact,
|
31
|
+
ParamManagerRun,
|
32
|
+
ParamValue,
|
28
33
|
Registry,
|
34
|
+
Run,
|
29
35
|
ULabel,
|
30
36
|
)
|
31
37
|
|
@@ -43,6 +49,9 @@ from lamindb.core.storage import LocalPathClasses
|
|
43
49
|
|
44
50
|
from ._label_manager import get_labels_as_dict
|
45
51
|
from ._settings import settings
|
52
|
+
from .schema import (
|
53
|
+
dict_related_model_to_related_name,
|
54
|
+
)
|
46
55
|
|
47
56
|
if TYPE_CHECKING:
|
48
57
|
from lnschema_core.types import FieldAttr
|
@@ -107,8 +116,10 @@ def get_feature_set_links(host: Artifact | Collection) -> QuerySet:
|
|
107
116
|
return feature_set_links
|
108
117
|
|
109
118
|
|
110
|
-
def get_link_attr(link: LinkORM, data: HasFeatures) -> str:
|
119
|
+
def get_link_attr(link: LinkORM | type[LinkORM], data: HasFeatures) -> str:
|
111
120
|
link_model_name = link.__class__.__name__
|
121
|
+
if link_model_name == "ModelBase": # we passed the type of the link
|
122
|
+
link_model_name = link.__name__
|
112
123
|
link_attr = link_model_name.replace(data.__class__.__name__, "")
|
113
124
|
if link_attr == "ExperimentalFactor":
|
114
125
|
link_attr = "experimental_factor"
|
@@ -131,42 +142,51 @@ def custom_aggregate(field, using: str):
|
|
131
142
|
|
132
143
|
|
133
144
|
def print_features(
|
134
|
-
self: HasFeatures
|
145
|
+
self: HasFeatures | HasParams,
|
146
|
+
print_types: bool = False,
|
147
|
+
to_dict: bool = False,
|
148
|
+
print_params: bool = False,
|
135
149
|
) -> str | dict[str, Any]:
|
136
150
|
from lamindb._from_values import _print_values
|
137
151
|
|
138
152
|
msg = ""
|
139
153
|
dictionary = {}
|
140
154
|
# categorical feature values
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
for
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
155
|
+
if not print_params:
|
156
|
+
labels_msg = ""
|
157
|
+
labels_by_feature = defaultdict(list)
|
158
|
+
for _, (_, links) in get_labels_as_dict(self, links=True).items():
|
159
|
+
for link in links:
|
160
|
+
if link.feature_id is not None:
|
161
|
+
link_attr = get_link_attr(link, self)
|
162
|
+
labels_by_feature[link.feature_id].append(
|
163
|
+
getattr(link, link_attr).name
|
164
|
+
)
|
165
|
+
for feature_id, labels_list in labels_by_feature.items():
|
166
|
+
feature = Feature.objects.using(self._state.db).get(id=feature_id)
|
167
|
+
print_values = _print_values(labels_list, n=10)
|
168
|
+
type_str = f": {feature.dtype}" if print_types else ""
|
169
|
+
if to_dict:
|
170
|
+
dictionary[feature.name] = (
|
171
|
+
labels_list if len(labels_list) > 1 else labels_list[0]
|
172
|
+
)
|
173
|
+
labels_msg += f" '{feature.name}'{type_str} = {print_values}\n"
|
174
|
+
if labels_msg:
|
175
|
+
msg += labels_msg
|
159
176
|
|
160
177
|
# non-categorical feature values
|
161
178
|
non_labels_msg = ""
|
162
|
-
if self.id is not None and self.__class__ == Artifact:
|
163
|
-
|
164
|
-
|
165
|
-
|
179
|
+
if self.id is not None and self.__class__ == Artifact or self.__class__ == Run:
|
180
|
+
attr_name = "param" if print_params else "feature"
|
181
|
+
feature_values = (
|
182
|
+
getattr(self, f"{attr_name}_values")
|
183
|
+
.values(f"{attr_name}__name", f"{attr_name}__dtype")
|
184
|
+
.annotate(values=custom_aggregate("value", self._state.db))
|
185
|
+
)
|
166
186
|
if len(feature_values) > 0:
|
167
187
|
for fv in feature_values:
|
168
|
-
feature_name = fv["
|
169
|
-
feature_dtype = fv["
|
188
|
+
feature_name = fv[f"{attr_name}__name"]
|
189
|
+
feature_dtype = fv[f"{attr_name}__dtype"]
|
170
190
|
values = fv["values"]
|
171
191
|
# TODO: understand why the below is necessary
|
172
192
|
if not isinstance(values, list):
|
@@ -174,24 +194,33 @@ def print_features(
|
|
174
194
|
if to_dict:
|
175
195
|
dictionary[feature_name] = values if len(values) > 1 else values[0]
|
176
196
|
type_str = f": {feature_dtype}" if print_types else ""
|
177
|
-
|
197
|
+
printed_values = (
|
198
|
+
_print_values(values, n=10, quotes=False)
|
199
|
+
if not feature_dtype.startswith("list")
|
200
|
+
else values
|
201
|
+
)
|
202
|
+
non_labels_msg += f" '{feature_name}'{type_str} = {printed_values}\n"
|
178
203
|
msg += non_labels_msg
|
179
204
|
|
180
205
|
if msg != "":
|
181
|
-
|
206
|
+
header = "Features" if not print_params else "Params"
|
207
|
+
msg = f" {colors.italic(header)}\n" + msg
|
182
208
|
|
183
209
|
# feature sets
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
210
|
+
if not print_params:
|
211
|
+
feature_set_msg = ""
|
212
|
+
for slot, feature_set in get_feature_set_by_slot_(self).items():
|
213
|
+
features = feature_set.members
|
214
|
+
# features.first() is a lot slower than features[0] here
|
215
|
+
name_field = get_default_str_field(features[0])
|
216
|
+
feature_names = list(features.values_list(name_field, flat=True)[:20])
|
217
|
+
type_str = f": {feature_set.registry}" if print_types else ""
|
218
|
+
feature_set_msg += (
|
219
|
+
f" '{slot}'{type_str} = {_print_values(feature_names)}\n"
|
220
|
+
)
|
221
|
+
if feature_set_msg:
|
222
|
+
msg += f" {colors.italic('Feature sets')}\n"
|
223
|
+
msg += feature_set_msg
|
195
224
|
if to_dict:
|
196
225
|
return dictionary
|
197
226
|
else:
|
@@ -214,7 +243,7 @@ def parse_feature_sets_from_anndata(
|
|
214
243
|
using_key = settings._using_key
|
215
244
|
data_parse = backed_access(filepath, using_key)
|
216
245
|
else:
|
217
|
-
data_parse = ad.
|
246
|
+
data_parse = ad.read_h5ad(filepath, backed="r")
|
218
247
|
type = "float"
|
219
248
|
else:
|
220
249
|
type = (
|
@@ -258,7 +287,9 @@ def parse_feature_sets_from_anndata(
|
|
258
287
|
return feature_sets
|
259
288
|
|
260
289
|
|
261
|
-
def infer_feature_type_convert_json(
|
290
|
+
def infer_feature_type_convert_json(
|
291
|
+
value: Any, mute: bool = False, str_as_ulabel: bool = True
|
292
|
+
) -> tuple[str, Any]:
|
262
293
|
if isinstance(value, bool):
|
263
294
|
return FEATURE_TYPES["bool"], value
|
264
295
|
elif isinstance(value, int):
|
@@ -266,39 +297,53 @@ def infer_feature_type_convert_json(value: Any, mute: bool = False) -> tuple[str
|
|
266
297
|
elif isinstance(value, float):
|
267
298
|
return FEATURE_TYPES["float"], value
|
268
299
|
elif isinstance(value, str):
|
269
|
-
|
300
|
+
if str_as_ulabel:
|
301
|
+
return FEATURE_TYPES["str"] + "[ULabel]", value
|
302
|
+
else:
|
303
|
+
return "str", value
|
270
304
|
elif isinstance(value, Iterable) and not isinstance(value, (str, bytes)):
|
271
305
|
if isinstance(value, (pd.Series, np.ndarray)):
|
272
|
-
return convert_numpy_dtype_to_lamin_feature_type(
|
306
|
+
return convert_numpy_dtype_to_lamin_feature_type(
|
307
|
+
value.dtype, str_as_cat=str_as_ulabel
|
308
|
+
), list(value)
|
309
|
+
if isinstance(value, dict):
|
310
|
+
return "dict", value
|
273
311
|
if len(value) > 0: # type: ignore
|
274
312
|
first_element_type = type(next(iter(value)))
|
275
313
|
if all(isinstance(elem, first_element_type) for elem in value):
|
276
314
|
if first_element_type == bool:
|
277
|
-
return FEATURE_TYPES[
|
315
|
+
return f"list[{FEATURE_TYPES['bool']}]", value
|
278
316
|
elif first_element_type == int:
|
279
|
-
return FEATURE_TYPES[
|
317
|
+
return f"list[{FEATURE_TYPES['int']}]", value
|
280
318
|
elif first_element_type == float:
|
281
|
-
return FEATURE_TYPES[
|
319
|
+
return f"list[{FEATURE_TYPES['float']}]", value
|
282
320
|
elif first_element_type == str:
|
283
|
-
|
321
|
+
if str_as_ulabel:
|
322
|
+
return FEATURE_TYPES["str"] + "[ULabel]", value
|
323
|
+
else:
|
324
|
+
return "list[str]", value
|
325
|
+
elif isinstance(value, Registry):
|
326
|
+
return (f"cat[{value.__class__.__get_name_with_schema__()}]", value)
|
284
327
|
if not mute:
|
285
328
|
logger.warning(f"cannot infer feature type of: {value}, returning '?")
|
286
329
|
return ("?", value)
|
287
330
|
|
288
331
|
|
289
|
-
def __init__(self, host: Artifact | Collection):
|
332
|
+
def __init__(self, host: Artifact | Collection | Run):
|
290
333
|
self._host = host
|
291
334
|
self._feature_set_by_slot_ = None
|
292
335
|
self._accessor_by_registry_ = None
|
293
336
|
|
294
337
|
|
295
338
|
def __repr__(self) -> str:
|
296
|
-
return print_features(self._host) # type: ignore
|
339
|
+
return print_features(self._host, print_params=(self.__class__ == ParamManager)) # type: ignore
|
297
340
|
|
298
341
|
|
299
342
|
def get_values(self) -> dict[str, Any]:
|
300
343
|
"""Get feature values as a dictionary."""
|
301
|
-
return print_features(
|
344
|
+
return print_features(
|
345
|
+
self._host, to_dict=True, print_params=(self.__class__ == ParamManager)
|
346
|
+
) # type: ignore
|
302
347
|
|
303
348
|
|
304
349
|
def __getitem__(self, slot) -> QuerySet:
|
@@ -316,19 +361,25 @@ def __getitem__(self, slot) -> QuerySet:
|
|
316
361
|
@classmethod # type: ignore
|
317
362
|
def filter(cls, **expression) -> QuerySet:
|
318
363
|
"""Filter features."""
|
364
|
+
if cls in {FeatureManagerArtifact, FeatureManagerCollection}:
|
365
|
+
model = Feature
|
366
|
+
value_model = FeatureValue
|
367
|
+
else:
|
368
|
+
model = Param
|
369
|
+
value_model = ParamValue
|
319
370
|
keys_normalized = [key.split("__")[0] for key in expression]
|
320
|
-
validated =
|
371
|
+
validated = model.validate(keys_normalized, field="name", mute=True)
|
321
372
|
if sum(validated) != len(keys_normalized):
|
322
373
|
raise ValidationError(
|
323
374
|
f"Some keys in the filter expression are not registered as features: {np.array(keys_normalized)[~validated]}"
|
324
375
|
)
|
325
376
|
new_expression = {}
|
326
|
-
features =
|
377
|
+
features = model.filter(name__in=keys_normalized).all().distinct()
|
327
378
|
for key, value in expression.items():
|
328
379
|
normalized_key = key.split("__")[0]
|
329
380
|
feature = features.get(name=normalized_key)
|
330
381
|
if not feature.dtype.startswith("cat"):
|
331
|
-
feature_value =
|
382
|
+
feature_value = value_model.filter(feature=feature, value=value).one()
|
332
383
|
new_expression["feature_values"] = feature_value
|
333
384
|
else:
|
334
385
|
if isinstance(value, str):
|
@@ -336,10 +387,12 @@ def filter(cls, **expression) -> QuerySet:
|
|
336
387
|
new_expression["ulabels"] = label
|
337
388
|
else:
|
338
389
|
raise NotImplementedError
|
339
|
-
if cls == FeatureManagerArtifact:
|
390
|
+
if cls == FeatureManagerArtifact or cls == ParamManagerArtifact:
|
340
391
|
return Artifact.filter(**new_expression)
|
341
|
-
|
392
|
+
elif cls == FeatureManagerCollection:
|
342
393
|
return Collection.filter(**new_expression)
|
394
|
+
elif cls == ParamManagerRun:
|
395
|
+
return Run.filter(**new_expression)
|
343
396
|
|
344
397
|
|
345
398
|
@property # type: ignore
|
@@ -358,16 +411,17 @@ def _accessor_by_registry(self):
|
|
358
411
|
return self._accessor_by_registry_
|
359
412
|
|
360
413
|
|
361
|
-
def
|
414
|
+
def _add_values(
|
362
415
|
self,
|
363
416
|
values: dict[str, str | int | float | bool],
|
364
|
-
|
417
|
+
feature_param_field: FieldAttr,
|
418
|
+
str_as_ulabel: bool = True,
|
365
419
|
) -> None:
|
366
420
|
"""Annotate artifact with features & values.
|
367
421
|
|
368
422
|
Args:
|
369
423
|
values: A dictionary of keys (features) & values (labels, numbers, booleans).
|
370
|
-
|
424
|
+
feature_param_field: The field of a reference registry to map keys of the
|
371
425
|
dictionary.
|
372
426
|
"""
|
373
427
|
# rename to distinguish from the values inside the dict
|
@@ -377,62 +431,83 @@ def add_values(
|
|
377
431
|
keys = list(keys) # type: ignore
|
378
432
|
# deal with other cases later
|
379
433
|
assert all(isinstance(key, str) for key in keys)
|
380
|
-
registry =
|
381
|
-
|
434
|
+
registry = feature_param_field.field.model
|
435
|
+
is_param = registry == Param
|
436
|
+
model = Param if is_param else Feature
|
437
|
+
value_model = ParamValue if is_param else FeatureValue
|
438
|
+
model_name = "Param" if is_param else "Feature"
|
439
|
+
if is_param:
|
440
|
+
if self._host.__class__ == Artifact:
|
441
|
+
if self._host.type != "model":
|
442
|
+
raise ValidationError("Can only set params for model-like artifacts.")
|
443
|
+
else:
|
444
|
+
if self._host.__class__ == Artifact:
|
445
|
+
if self._host.type != "dataset" and self._host.type is not None:
|
446
|
+
raise ValidationError(
|
447
|
+
"Can only set features for dataset-like artifacts."
|
448
|
+
)
|
449
|
+
validated = registry.validate(keys, field=feature_param_field, mute=True)
|
382
450
|
keys_array = np.array(keys)
|
383
451
|
validated_keys = keys_array[validated]
|
384
452
|
if validated.sum() != len(keys):
|
385
453
|
not_validated_keys = keys_array[~validated]
|
386
454
|
hint = "\n".join(
|
387
455
|
[
|
388
|
-
f" ln.
|
456
|
+
f" ln.{model_name}(name='{key}', dtype='{infer_feature_type_convert_json(features_values[key], str_as_ulabel=str_as_ulabel)[0]}').save()"
|
389
457
|
for key in not_validated_keys
|
390
458
|
]
|
391
459
|
)
|
392
460
|
msg = (
|
393
461
|
f"These keys could not be validated: {not_validated_keys.tolist()}\n"
|
394
|
-
f"
|
462
|
+
f"Here is how to create a {model_name.lower()}:\n\n{hint}"
|
395
463
|
)
|
396
464
|
raise ValidationError(msg)
|
397
465
|
registry.from_values(
|
398
466
|
validated_keys,
|
399
|
-
field=
|
467
|
+
field=feature_param_field,
|
400
468
|
)
|
401
469
|
# figure out which of the values go where
|
402
|
-
features_labels =
|
470
|
+
features_labels = defaultdict(list)
|
403
471
|
feature_values = []
|
404
472
|
not_validated_values = []
|
405
473
|
for key, value in features_values.items():
|
406
|
-
feature =
|
474
|
+
feature = model.filter(name=key).one()
|
407
475
|
inferred_type, converted_value = infer_feature_type_convert_json(
|
408
|
-
value,
|
476
|
+
value,
|
477
|
+
mute=True,
|
478
|
+
str_as_ulabel=str_as_ulabel,
|
409
479
|
)
|
410
480
|
if feature.dtype == "number":
|
411
481
|
if inferred_type not in {"int", "float"}:
|
412
482
|
raise TypeError(
|
413
483
|
f"Value for feature '{key}' with type {feature.dtype} must be a number"
|
414
484
|
)
|
415
|
-
elif feature.dtype
|
485
|
+
elif feature.dtype.startswith("cat"):
|
416
486
|
if not (inferred_type.startswith("cat") or isinstance(value, Registry)):
|
417
487
|
raise TypeError(
|
418
488
|
f"Value for feature '{key}' with type '{feature.dtype}' must be a string or record."
|
419
489
|
)
|
420
|
-
elif feature.dtype
|
421
|
-
|
490
|
+
elif not inferred_type == feature.dtype:
|
491
|
+
raise ValidationError(
|
492
|
+
f"Expected dtype for '{key}' is '{feature.dtype}', got '{inferred_type}'"
|
493
|
+
)
|
422
494
|
if not feature.dtype.startswith("cat"):
|
423
495
|
# can remove the query once we have the unique constraint
|
424
|
-
|
425
|
-
|
426
|
-
).one_or_none()
|
496
|
+
filter_kwargs = {model_name.lower(): feature, "value": converted_value}
|
497
|
+
feature_value = value_model.filter(**filter_kwargs).one_or_none()
|
427
498
|
if feature_value is None:
|
428
|
-
feature_value =
|
499
|
+
feature_value = value_model(**filter_kwargs)
|
429
500
|
feature_values.append(feature_value)
|
430
501
|
else:
|
431
502
|
if isinstance(value, Registry):
|
432
|
-
|
503
|
+
if value._state.adding:
|
504
|
+
raise ValidationError(
|
505
|
+
"Please save your label record before annotation."
|
506
|
+
)
|
433
507
|
label_record = value
|
434
|
-
|
435
|
-
|
508
|
+
features_labels[
|
509
|
+
label_record.__class__.__get_name_with_schema__()
|
510
|
+
].append((feature, label_record))
|
436
511
|
else:
|
437
512
|
if isinstance(value, str):
|
438
513
|
values = [value] # type: ignore
|
@@ -447,7 +522,7 @@ def add_values(
|
|
447
522
|
if validated.sum() != len(values):
|
448
523
|
not_validated_values += values_array[~validated].tolist()
|
449
524
|
label_records = ULabel.from_values(validated_values, field="name")
|
450
|
-
features_labels += [
|
525
|
+
features_labels["ULabel"] += [
|
451
526
|
(feature, label_record) for label_record in label_records
|
452
527
|
]
|
453
528
|
if not_validated_values:
|
@@ -457,40 +532,67 @@ def add_values(
|
|
457
532
|
)
|
458
533
|
msg = (
|
459
534
|
f"These values could not be validated: {not_validated_values}\n"
|
460
|
-
f"
|
535
|
+
f"Here is how to create ulabels for them:\n\n{hint}"
|
461
536
|
)
|
462
537
|
raise ValidationError(msg)
|
463
538
|
# bulk add all links to ArtifactULabel
|
464
539
|
if features_labels:
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
540
|
+
if list(features_labels.keys()) != ["ULabel"]:
|
541
|
+
related_names = dict_related_model_to_related_name(self._host.__class__)
|
542
|
+
else:
|
543
|
+
related_names = {"ULabel": "ulabels"}
|
544
|
+
for class_name, registry_features_labels in features_labels.items():
|
545
|
+
related_name = related_names[class_name] # e.g., "ulabels"
|
546
|
+
LinkORM = getattr(self._host, related_name).through
|
547
|
+
field_name = f"{get_link_attr(LinkORM, self._host)}_id" # e.g., ulabel_id
|
548
|
+
links = [
|
549
|
+
LinkORM(
|
550
|
+
**{
|
551
|
+
"artifact_id": self._host.id,
|
552
|
+
"feature_id": feature.id,
|
553
|
+
field_name: label.id,
|
554
|
+
}
|
555
|
+
)
|
556
|
+
for (feature, label) in registry_features_labels
|
557
|
+
]
|
558
|
+
# a link might already exist
|
559
|
+
try:
|
560
|
+
save(links, ignore_conflicts=False)
|
561
|
+
except Exception:
|
562
|
+
save(links, ignore_conflicts=True)
|
563
|
+
# now deal with links that were previously saved without a feature_id
|
564
|
+
saved_links = LinkORM.filter(
|
565
|
+
**{
|
566
|
+
"artifact_id": self._host.id,
|
567
|
+
f"{field_name}__in": [
|
568
|
+
l.id for _, l in registry_features_labels
|
569
|
+
],
|
570
|
+
}
|
571
|
+
)
|
572
|
+
for link in saved_links.all():
|
573
|
+
# TODO: also check for inconsistent features
|
574
|
+
if link.feature_id is None:
|
575
|
+
link.feature_id = [
|
576
|
+
f.id
|
577
|
+
for f, l in registry_features_labels
|
578
|
+
if l.id == getattr(link, field_name)
|
579
|
+
][0]
|
580
|
+
link.save()
|
489
581
|
if feature_values:
|
490
582
|
save(feature_values)
|
491
|
-
|
583
|
+
if is_param:
|
584
|
+
LinkORM = self._host.param_values.through
|
585
|
+
valuefield_id = "paramvalue_id"
|
586
|
+
else:
|
587
|
+
LinkORM = self._host.feature_values.through
|
588
|
+
valuefield_id = "featurevalue_id"
|
492
589
|
links = [
|
493
|
-
LinkORM(
|
590
|
+
LinkORM(
|
591
|
+
**{
|
592
|
+
f"{self._host.__get_name_with_schema__().lower()}_id": self._host.id,
|
593
|
+
valuefield_id: feature_value.id,
|
594
|
+
}
|
595
|
+
)
|
494
596
|
for feature_value in feature_values
|
495
597
|
]
|
496
598
|
# a link might already exist, to avoid raising a unique constraint
|
@@ -498,6 +600,35 @@ def add_values(
|
|
498
600
|
save(links, ignore_conflicts=True)
|
499
601
|
|
500
602
|
|
603
|
+
def add_values_features(
|
604
|
+
self,
|
605
|
+
values: dict[str, str | int | float | bool],
|
606
|
+
feature_field: FieldAttr = Feature.name,
|
607
|
+
str_as_ulabel: bool = True,
|
608
|
+
) -> None:
|
609
|
+
"""Annotate artifact with features & values.
|
610
|
+
|
611
|
+
Args:
|
612
|
+
values: A dictionary of keys (features) & values (labels, numbers, booleans).
|
613
|
+
feature_field: The field of a reference registry to map keys of the
|
614
|
+
dictionary.
|
615
|
+
str_as_ulabel: Whether to interpret string values as ulabels.
|
616
|
+
"""
|
617
|
+
_add_values(self, values, feature_field, str_as_ulabel=str_as_ulabel)
|
618
|
+
|
619
|
+
|
620
|
+
def add_values_params(
|
621
|
+
self,
|
622
|
+
values: dict[str, str | int | float | bool],
|
623
|
+
) -> None:
|
624
|
+
"""Annotate artifact with features & values.
|
625
|
+
|
626
|
+
Args:
|
627
|
+
values: A dictionary of keys (features) & values (labels, numbers, booleans).
|
628
|
+
"""
|
629
|
+
_add_values(self, values, Param.name, str_as_ulabel=False)
|
630
|
+
|
631
|
+
|
501
632
|
def add_feature_set(self, feature_set: FeatureSet, slot: str) -> None:
|
502
633
|
"""Annotate artifact with a feature set.
|
503
634
|
|
@@ -686,15 +817,19 @@ def _add_from(self, data: HasFeatures, parents: bool = True):
|
|
686
817
|
|
687
818
|
|
688
819
|
FeatureManager.__init__ = __init__
|
820
|
+
ParamManager.__init__ = __init__
|
689
821
|
FeatureManager.__repr__ = __repr__
|
822
|
+
ParamManager.__repr__ = __repr__
|
690
823
|
FeatureManager.__getitem__ = __getitem__
|
691
824
|
FeatureManager.get_values = get_values
|
692
825
|
FeatureManager._feature_set_by_slot = _feature_set_by_slot
|
693
826
|
FeatureManager._accessor_by_registry = _accessor_by_registry
|
694
|
-
FeatureManager.add_values =
|
827
|
+
FeatureManager.add_values = add_values_features
|
695
828
|
FeatureManager.add_feature_set = add_feature_set
|
696
829
|
FeatureManager._add_set_from_df = _add_set_from_df
|
697
830
|
FeatureManager._add_set_from_anndata = _add_set_from_anndata
|
698
831
|
FeatureManager._add_set_from_mudata = _add_set_from_mudata
|
699
832
|
FeatureManager._add_from = _add_from
|
700
833
|
FeatureManager.filter = filter
|
834
|
+
ParamManager.add_values = add_values_params
|
835
|
+
ParamManager.get_values = get_values
|
lamindb/core/_run_context.py
CHANGED
@@ -14,8 +14,6 @@ from lnschema_core.models import Param, ParamValue, RunParamValue
|
|
14
14
|
from lnschema_core.types import TransformType
|
15
15
|
from lnschema_core.users import current_user_id
|
16
16
|
|
17
|
-
from lamindb.core._transform_settings import transform as transform_settings
|
18
|
-
|
19
17
|
from ._settings import settings
|
20
18
|
from ._sync_git import get_transform_reference_from_git_repo
|
21
19
|
from .exceptions import (
|
@@ -24,6 +22,7 @@ from .exceptions import (
|
|
24
22
|
NoTitleError,
|
25
23
|
UpdateTransformSettings,
|
26
24
|
)
|
25
|
+
from .subsettings._transform_settings import transform_settings
|
27
26
|
from .versioning import bump_version as bump_version_function
|
28
27
|
|
29
28
|
if TYPE_CHECKING:
|
@@ -211,26 +210,6 @@ def pretty_pypackages(dependencies: dict) -> str:
|
|
211
210
|
return " ".join(deps_list)
|
212
211
|
|
213
212
|
|
214
|
-
def parse_and_link_params(run: Run, params: dict) -> None:
|
215
|
-
param_values = []
|
216
|
-
for key, value in params.items():
|
217
|
-
param = Param.filter(name=key).one_or_none()
|
218
|
-
if param is None:
|
219
|
-
dtype = type(value).__name__
|
220
|
-
logger.warning(
|
221
|
-
f"param '{key}' does not yet exist, creating it with dtype '{dtype}'"
|
222
|
-
)
|
223
|
-
param = Param(name=key, dtype=dtype).save()
|
224
|
-
param_value, _ = ParamValue.objects.get_or_create(param=param, value=value)
|
225
|
-
param_values.append(param_value)
|
226
|
-
if param_values:
|
227
|
-
links = [
|
228
|
-
RunParamValue(run_id=run.id, paramvalue_id=param_value.id)
|
229
|
-
for param_value in param_values
|
230
|
-
]
|
231
|
-
RunParamValue.objects.bulk_create(links)
|
232
|
-
|
233
|
-
|
234
213
|
class run_context:
|
235
214
|
"""Global run context."""
|
236
215
|
|
@@ -308,6 +287,9 @@ class run_context:
|
|
308
287
|
path=path
|
309
288
|
)
|
310
289
|
transform_type = TransformType.script
|
290
|
+
# overwrite whatever is auto-detected in the notebook or script
|
291
|
+
if transform_settings.name is not None:
|
292
|
+
name = transform_settings.name
|
311
293
|
cls._create_or_load_transform(
|
312
294
|
stem_uid=stem_uid,
|
313
295
|
version=version,
|
@@ -370,7 +352,7 @@ class run_context:
|
|
370
352
|
# need to save in all cases
|
371
353
|
run.save()
|
372
354
|
if params is not None:
|
373
|
-
|
355
|
+
run.params.add_values(params)
|
374
356
|
cls.run = run
|
375
357
|
|
376
358
|
from ._track_environment import track_environment
|