lamindb 0.74.1__py3-none-any.whl → 0.74.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +8 -5
- lamindb/_artifact.py +31 -41
- lamindb/_can_validate.py +24 -22
- lamindb/_collection.py +5 -6
- lamindb/{_annotate.py → _curate.py} +62 -40
- lamindb/_feature.py +7 -9
- lamindb/_feature_set.py +17 -18
- lamindb/_filter.py +5 -5
- lamindb/_finish.py +18 -6
- lamindb/_from_values.py +12 -12
- lamindb/_is_versioned.py +2 -2
- lamindb/_parents.py +7 -7
- lamindb/_query_manager.py +7 -7
- lamindb/_query_set.py +27 -27
- lamindb/{_registry.py → _record.py} +91 -63
- lamindb/_save.py +10 -28
- lamindb/_storage.py +1 -1
- lamindb/_view.py +4 -4
- lamindb/core/__init__.py +16 -12
- lamindb/core/_data.py +10 -10
- lamindb/core/_feature_manager.py +54 -42
- lamindb/core/_label_manager.py +15 -21
- lamindb/core/_mapped_collection.py +4 -1
- lamindb/core/_run_context.py +2 -2
- lamindb/core/_settings.py +2 -1
- lamindb/core/_sync_git.py +22 -12
- lamindb/core/_track_environment.py +5 -1
- lamindb/core/datasets/_core.py +0 -6
- lamindb/core/fields.py +1 -1
- lamindb/core/schema.py +6 -6
- lamindb/core/storage/_backed_access.py +56 -12
- lamindb/core/storage/paths.py +1 -1
- lamindb/core/versioning.py +1 -1
- lamindb/integrations/_vitessce.py +4 -3
- {lamindb-0.74.1.dist-info → lamindb-0.74.3.dist-info}/METADATA +6 -8
- lamindb-0.74.3.dist-info/RECORD +57 -0
- lamindb-0.74.1.dist-info/RECORD +0 -57
- {lamindb-0.74.1.dist-info → lamindb-0.74.3.dist-info}/LICENSE +0 -0
- {lamindb-0.74.1.dist-info → lamindb-0.74.3.dist-info}/WHEEL +0 -0
lamindb/_save.py
CHANGED
@@ -13,7 +13,7 @@ from django.db import IntegrityError, transaction
|
|
13
13
|
from django.utils.functional import partition
|
14
14
|
from lamin_utils import logger
|
15
15
|
from lamindb_setup.core.upath import LocalPathClasses
|
16
|
-
from lnschema_core.models import Artifact,
|
16
|
+
from lnschema_core.models import Artifact, Record
|
17
17
|
|
18
18
|
from lamindb.core._settings import settings
|
19
19
|
from lamindb.core.storage.paths import (
|
@@ -27,9 +27,7 @@ if TYPE_CHECKING:
|
|
27
27
|
from lamindb_setup.core.upath import UPath
|
28
28
|
|
29
29
|
|
30
|
-
def save(
|
31
|
-
records: Iterable[Registry], ignore_conflicts: bool | None = False, **kwargs
|
32
|
-
) -> None:
|
30
|
+
def save(records: Iterable[Record], ignore_conflicts: bool | None = False) -> None:
|
33
31
|
"""Bulk save to registries & storage.
|
34
32
|
|
35
33
|
Note:
|
@@ -42,12 +40,11 @@ def save(
|
|
42
40
|
existing records! Use ``record.save()`` for these use cases.
|
43
41
|
|
44
42
|
Args:
|
45
|
-
records: Multiple :class:`~lamindb.core.
|
43
|
+
records: Multiple :class:`~lamindb.core.Record` objects.
|
46
44
|
ignore_conflicts: If ``True``, do not error if some records violate a
|
47
45
|
unique or another constraint. However, it won't inplace update the id
|
48
46
|
fields of records. If you need records with ids, you need to query
|
49
47
|
them from the database.
|
50
|
-
**kwargs: Get kwargs related to parents.
|
51
48
|
|
52
49
|
Examples:
|
53
50
|
|
@@ -69,7 +66,7 @@ def save(
|
|
69
66
|
>>> transform.save()
|
70
67
|
|
71
68
|
"""
|
72
|
-
if isinstance(records,
|
69
|
+
if isinstance(records, Record):
|
73
70
|
raise ValueError("Please use record.save() if saving a single record.")
|
74
71
|
|
75
72
|
# previously, this was all set based,
|
@@ -87,27 +84,12 @@ def save(
|
|
87
84
|
non_artifacts_with_parents = [
|
88
85
|
r for r in non_artifacts_new if hasattr(r, "_parents")
|
89
86
|
]
|
90
|
-
if len(non_artifacts_with_parents) > 0
|
87
|
+
if len(non_artifacts_with_parents) > 0:
|
91
88
|
# this can only happen within lnschema_bionty right now!!
|
92
89
|
# we might extend to core lamindb later
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
kwargs.get("parents") is None and bt.settings.auto_save_parents
|
97
|
-
):
|
98
|
-
mute = False if kwargs.get("mute") is None else kwargs.get("mute")
|
99
|
-
if not mute:
|
100
|
-
# save the record with parents one by one
|
101
|
-
logger.warning(
|
102
|
-
"now recursing through parents: "
|
103
|
-
"this only happens once, but is much slower than bulk saving"
|
104
|
-
)
|
105
|
-
logger.hint(
|
106
|
-
"you can switch this off via: bt.settings.auto_save_parents ="
|
107
|
-
" False"
|
108
|
-
)
|
109
|
-
for record in non_artifacts_with_parents:
|
110
|
-
record._save_ontology_parents(mute=True)
|
90
|
+
from lnschema_bionty.core import add_ontology
|
91
|
+
|
92
|
+
add_ontology(non_artifacts_with_parents)
|
111
93
|
|
112
94
|
if artifacts:
|
113
95
|
with transaction.atomic():
|
@@ -122,7 +104,7 @@ def save(
|
|
122
104
|
return None
|
123
105
|
|
124
106
|
|
125
|
-
def bulk_create(records: Iterable[
|
107
|
+
def bulk_create(records: Iterable[Record], ignore_conflicts: bool | None = False):
|
126
108
|
records_by_orm = defaultdict(list)
|
127
109
|
for record in records:
|
128
110
|
records_by_orm[record.__class__].append(record)
|
@@ -130,7 +112,7 @@ def bulk_create(records: Iterable[Registry], ignore_conflicts: bool | None = Fal
|
|
130
112
|
orm.objects.bulk_create(records, ignore_conflicts=ignore_conflicts)
|
131
113
|
|
132
114
|
|
133
|
-
def bulk_update(records: Iterable[
|
115
|
+
def bulk_update(records: Iterable[Record], ignore_conflicts: bool | None = False):
|
134
116
|
records_by_orm = defaultdict(list)
|
135
117
|
for record in records:
|
136
118
|
records_by_orm[record.__class__].append(record)
|
lamindb/_storage.py
CHANGED
@@ -6,7 +6,7 @@ from lnschema_core import Storage
|
|
6
6
|
@property # type: ignore
|
7
7
|
@doc_args(Storage.path.__doc__)
|
8
8
|
def path(self) -> UPath:
|
9
|
-
"""{}
|
9
|
+
"""{}""" # noqa: D415
|
10
10
|
access_token = self._access_token if hasattr(self, "_access_token") else None
|
11
11
|
return create_path(self.root, access_token=access_token)
|
12
12
|
|
lamindb/_view.py
CHANGED
@@ -7,7 +7,7 @@ import inspect
|
|
7
7
|
from lamin_utils import colors, logger
|
8
8
|
from lamindb_setup import settings
|
9
9
|
from lamindb_setup._init_instance import get_schema_module_name
|
10
|
-
from lnschema_core import
|
10
|
+
from lnschema_core import Record
|
11
11
|
|
12
12
|
is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
|
13
13
|
|
@@ -21,7 +21,7 @@ def view(
|
|
21
21
|
n: Display the last `n` rows of a registry.
|
22
22
|
schema: Schema module to view. Default's to
|
23
23
|
`None` and displays all schema modules.
|
24
|
-
registries: List of
|
24
|
+
registries: List of Record names. Defaults to
|
25
25
|
`None` and lists all registries.
|
26
26
|
|
27
27
|
Examples:
|
@@ -44,8 +44,8 @@ def view(
|
|
44
44
|
orm
|
45
45
|
for orm in schema_module.__dict__.values()
|
46
46
|
if inspect.isclass(orm)
|
47
|
-
and issubclass(orm,
|
48
|
-
and orm.__name__ != "
|
47
|
+
and issubclass(orm, Record)
|
48
|
+
and orm.__name__ != "Record"
|
49
49
|
}
|
50
50
|
if registries is not None:
|
51
51
|
filtered_registries = {
|
lamindb/core/__init__.py
CHANGED
@@ -5,7 +5,7 @@ Registries:
|
|
5
5
|
.. autosummary::
|
6
6
|
:toctree: .
|
7
7
|
|
8
|
-
|
8
|
+
Record
|
9
9
|
QuerySet
|
10
10
|
QueryManager
|
11
11
|
RecordsList
|
@@ -19,18 +19,20 @@ Registries:
|
|
19
19
|
HasParents
|
20
20
|
TracksRun
|
21
21
|
TracksUpdates
|
22
|
+
ParamValue
|
23
|
+
FeatureValue
|
22
24
|
InspectResult
|
23
25
|
fields
|
24
26
|
|
25
|
-
|
27
|
+
Curators:
|
26
28
|
|
27
29
|
.. autosummary::
|
28
30
|
:toctree: .
|
29
31
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
32
|
+
DataFrameCurator
|
33
|
+
AnnDataCurator
|
34
|
+
MuDataCurator
|
35
|
+
CurateLookup
|
34
36
|
|
35
37
|
Other:
|
36
38
|
|
@@ -57,20 +59,22 @@ Modules:
|
|
57
59
|
from lamin_utils._inspect import InspectResult
|
58
60
|
from lnschema_core.models import (
|
59
61
|
CanValidate,
|
62
|
+
FeatureValue,
|
60
63
|
HasFeatures,
|
61
64
|
HasParams,
|
62
65
|
HasParents,
|
63
66
|
IsVersioned,
|
64
|
-
|
67
|
+
ParamValue,
|
68
|
+
Record,
|
65
69
|
TracksRun,
|
66
70
|
TracksUpdates,
|
67
71
|
)
|
68
72
|
|
69
|
-
from lamindb.
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
73
|
+
from lamindb._curate import (
|
74
|
+
AnnDataCurator,
|
75
|
+
CurateLookup,
|
76
|
+
DataFrameCurator,
|
77
|
+
MuDataCurator,
|
74
78
|
)
|
75
79
|
from lamindb._query_manager import QueryManager
|
76
80
|
from lamindb._query_set import QuerySet, RecordsList
|
lamindb/core/_data.py
CHANGED
@@ -11,7 +11,7 @@ from lnschema_core.models import (
|
|
11
11
|
Feature,
|
12
12
|
FeatureSet,
|
13
13
|
HasFeatures,
|
14
|
-
|
14
|
+
Record,
|
15
15
|
Run,
|
16
16
|
ULabel,
|
17
17
|
__repr__,
|
@@ -20,7 +20,7 @@ from lnschema_core.models import (
|
|
20
20
|
|
21
21
|
from lamindb._parents import view_lineage
|
22
22
|
from lamindb._query_set import QuerySet
|
23
|
-
from lamindb.
|
23
|
+
from lamindb._record import get_default_str_field
|
24
24
|
from lamindb.core._settings import settings
|
25
25
|
|
26
26
|
from ._feature_manager import (
|
@@ -96,7 +96,7 @@ def save_feature_set_links(self: Artifact | Collection) -> None:
|
|
96
96
|
|
97
97
|
@doc_args(HasFeatures.describe.__doc__)
|
98
98
|
def describe(self: HasFeatures, print_types: bool = False):
|
99
|
-
"""{}
|
99
|
+
"""{}""" # noqa: D415
|
100
100
|
# prefetch all many-to-many relationships
|
101
101
|
# doesn't work for describing using artifact
|
102
102
|
# self = (
|
@@ -166,7 +166,7 @@ def describe(self: HasFeatures, print_types: bool = False):
|
|
166
166
|
logger.print(msg)
|
167
167
|
|
168
168
|
|
169
|
-
def validate_feature(feature: Feature, records: list[
|
169
|
+
def validate_feature(feature: Feature, records: list[Record]) -> None:
|
170
170
|
"""Validate feature record, adjust feature.dtype based on labels records."""
|
171
171
|
if not isinstance(feature, Feature):
|
172
172
|
raise TypeError("feature has to be of type Feature")
|
@@ -183,7 +183,7 @@ def get_labels(
|
|
183
183
|
mute: bool = False,
|
184
184
|
flat_names: bool = False,
|
185
185
|
) -> QuerySet | dict[str, QuerySet] | list:
|
186
|
-
"""{}
|
186
|
+
"""{}""" # noqa: D415
|
187
187
|
if not isinstance(feature, Feature):
|
188
188
|
raise TypeError("feature has to be of type Feature")
|
189
189
|
if feature.dtype is None or not feature.dtype.startswith("cat["):
|
@@ -210,7 +210,7 @@ def get_labels(
|
|
210
210
|
).all()
|
211
211
|
if flat_names:
|
212
212
|
# returns a flat list of names
|
213
|
-
from lamindb.
|
213
|
+
from lamindb._record import get_default_str_field
|
214
214
|
|
215
215
|
values = []
|
216
216
|
for v in qs_by_registry.values():
|
@@ -224,18 +224,18 @@ def get_labels(
|
|
224
224
|
|
225
225
|
def add_labels(
|
226
226
|
self,
|
227
|
-
records:
|
227
|
+
records: Record | list[Record] | QuerySet | Iterable,
|
228
228
|
feature: Feature | None = None,
|
229
229
|
*,
|
230
230
|
field: StrField | None = None,
|
231
231
|
) -> None:
|
232
|
-
"""{}
|
232
|
+
"""{}""" # noqa: D415
|
233
233
|
if self._state.adding:
|
234
234
|
raise ValueError("Please save the artifact/collection before adding a label!")
|
235
235
|
|
236
236
|
if isinstance(records, (QuerySet, QuerySet.__base__)): # need to have both
|
237
237
|
records = records.list()
|
238
|
-
if isinstance(records, (str,
|
238
|
+
if isinstance(records, (str, Record)):
|
239
239
|
records = [records]
|
240
240
|
if not isinstance(records, List): # avoids warning for pd Series
|
241
241
|
records = list(records)
|
@@ -260,7 +260,7 @@ def add_labels(
|
|
260
260
|
# ask users to pass records
|
261
261
|
if len(records_validated) == 0:
|
262
262
|
raise ValueError(
|
263
|
-
"Please pass a record (a `
|
263
|
+
"Please pass a record (a `Record` object), not a string, e.g., via:"
|
264
264
|
" label"
|
265
265
|
f" = ln.ULabel(name='{records[0]}')" # type: ignore
|
266
266
|
)
|
lamindb/core/_feature_manager.py
CHANGED
@@ -30,14 +30,14 @@ from lnschema_core.models import (
|
|
30
30
|
ParamManagerArtifact,
|
31
31
|
ParamManagerRun,
|
32
32
|
ParamValue,
|
33
|
-
|
33
|
+
Record,
|
34
34
|
Run,
|
35
35
|
ULabel,
|
36
36
|
)
|
37
37
|
|
38
38
|
from lamindb._feature import FEATURE_TYPES, convert_numpy_dtype_to_lamin_feature_type
|
39
39
|
from lamindb._feature_set import DICT_KEYS_TYPE, FeatureSet
|
40
|
-
from lamindb.
|
40
|
+
from lamindb._record import (
|
41
41
|
REGISTRY_UNIQUE_FIELD,
|
42
42
|
get_default_str_field,
|
43
43
|
transfer_fk_to_default_db_bulk,
|
@@ -118,7 +118,9 @@ def get_feature_set_links(host: Artifact | Collection) -> QuerySet:
|
|
118
118
|
|
119
119
|
def get_link_attr(link: LinkORM | type[LinkORM], data: HasFeatures) -> str:
|
120
120
|
link_model_name = link.__class__.__name__
|
121
|
-
if
|
121
|
+
if (
|
122
|
+
link_model_name == "ModelBase" or link_model_name == "RecordMeta"
|
123
|
+
): # we passed the type of the link
|
122
124
|
link_model_name = link.__name__
|
123
125
|
link_attr = link_model_name.replace(data.__class__.__name__, "")
|
124
126
|
if link_attr == "ExperimentalFactor":
|
@@ -162,6 +164,7 @@ def print_features(
|
|
162
164
|
labels_by_feature[link.feature_id].append(
|
163
165
|
getattr(link, link_attr).name
|
164
166
|
)
|
167
|
+
labels_msgs = []
|
165
168
|
for feature_id, labels_list in labels_by_feature.items():
|
166
169
|
feature = Feature.objects.using(self._state.db).get(id=feature_id)
|
167
170
|
print_values = _print_values(labels_list, n=10)
|
@@ -170,8 +173,9 @@ def print_features(
|
|
170
173
|
dictionary[feature.name] = (
|
171
174
|
labels_list if len(labels_list) > 1 else labels_list[0]
|
172
175
|
)
|
173
|
-
|
174
|
-
if
|
176
|
+
labels_msgs.append(f" '{feature.name}'{type_str} = {print_values}")
|
177
|
+
if len(labels_msgs) > 0:
|
178
|
+
labels_msg = "\n".join(sorted(labels_msgs)) + "\n"
|
175
179
|
msg += labels_msg
|
176
180
|
|
177
181
|
# non-categorical feature values
|
@@ -182,6 +186,7 @@ def print_features(
|
|
182
186
|
getattr(self, f"{attr_name}_values")
|
183
187
|
.values(f"{attr_name}__name", f"{attr_name}__dtype")
|
184
188
|
.annotate(values=custom_aggregate("value", self._state.db))
|
189
|
+
.order_by(f"{attr_name}__name")
|
185
190
|
)
|
186
191
|
if len(feature_values) > 0:
|
187
192
|
for fv in feature_values:
|
@@ -232,7 +237,7 @@ def parse_feature_sets_from_anndata(
|
|
232
237
|
var_field: FieldAttr | None = None,
|
233
238
|
obs_field: FieldAttr = Feature.name,
|
234
239
|
mute: bool = False,
|
235
|
-
organism: str |
|
240
|
+
organism: str | Record | None = None,
|
236
241
|
) -> dict:
|
237
242
|
data_parse = adata
|
238
243
|
if not isinstance(adata, AnnData): # is a path
|
@@ -322,7 +327,12 @@ def infer_feature_type_convert_json(
|
|
322
327
|
return FEATURE_TYPES["str"] + "[ULabel]", value
|
323
328
|
else:
|
324
329
|
return "list[str]", value
|
325
|
-
|
330
|
+
elif first_element_type == Record:
|
331
|
+
return (
|
332
|
+
f"cat[{first_element_type.__get_name_with_schema__()}]",
|
333
|
+
value,
|
334
|
+
)
|
335
|
+
elif isinstance(value, Record):
|
326
336
|
return (f"cat[{value.__class__.__get_name_with_schema__()}]", value)
|
327
337
|
if not mute:
|
328
338
|
logger.warning(f"cannot infer feature type of: {value}, returning '?")
|
@@ -417,7 +427,7 @@ def _add_values(
|
|
417
427
|
feature_param_field: FieldAttr,
|
418
428
|
str_as_ulabel: bool = True,
|
419
429
|
) -> None:
|
420
|
-
"""
|
430
|
+
"""Curate artifact with features & values.
|
421
431
|
|
422
432
|
Args:
|
423
433
|
values: A dictionary of keys (features) & values (labels, numbers, booleans).
|
@@ -430,7 +440,7 @@ def _add_values(
|
|
430
440
|
if isinstance(keys, DICT_KEYS_TYPE):
|
431
441
|
keys = list(keys) # type: ignore
|
432
442
|
# deal with other cases later
|
433
|
-
assert all(isinstance(key, str) for key in keys)
|
443
|
+
assert all(isinstance(key, str) for key in keys) # noqa: S101
|
434
444
|
registry = feature_param_field.field.model
|
435
445
|
is_param = registry == Param
|
436
446
|
model = Param if is_param else Feature
|
@@ -483,10 +493,11 @@ def _add_values(
|
|
483
493
|
f"Value for feature '{key}' with type {feature.dtype} must be a number"
|
484
494
|
)
|
485
495
|
elif feature.dtype.startswith("cat"):
|
486
|
-
if
|
487
|
-
|
488
|
-
|
489
|
-
|
496
|
+
if inferred_type != "?":
|
497
|
+
if not (inferred_type.startswith("cat") or isinstance(value, Record)):
|
498
|
+
raise TypeError(
|
499
|
+
f"Value for feature '{key}' with type '{feature.dtype}' must be a string or record."
|
500
|
+
)
|
490
501
|
elif not inferred_type == feature.dtype:
|
491
502
|
raise ValidationError(
|
492
503
|
f"Expected dtype for '{key}' is '{feature.dtype}', got '{inferred_type}'"
|
@@ -499,15 +510,21 @@ def _add_values(
|
|
499
510
|
feature_value = value_model(**filter_kwargs)
|
500
511
|
feature_values.append(feature_value)
|
501
512
|
else:
|
502
|
-
if isinstance(value,
|
503
|
-
|
504
|
-
|
505
|
-
|
513
|
+
if isinstance(value, Record) or (
|
514
|
+
isinstance(value, Iterable) and isinstance(next(iter(value)), Record)
|
515
|
+
):
|
516
|
+
if isinstance(value, Record):
|
517
|
+
label_records = [value]
|
518
|
+
else:
|
519
|
+
label_records = value # type: ignore
|
520
|
+
for record in label_records:
|
521
|
+
if record._state.adding:
|
522
|
+
raise ValidationError(
|
523
|
+
f"Please save {record} before annotation."
|
524
|
+
)
|
525
|
+
features_labels[record.__class__.__get_name_with_schema__()].append(
|
526
|
+
(feature, record)
|
506
527
|
)
|
507
|
-
label_record = value
|
508
|
-
features_labels[
|
509
|
-
label_record.__class__.__get_name_with_schema__()
|
510
|
-
].append((feature, label_record))
|
511
528
|
else:
|
512
529
|
if isinstance(value, str):
|
513
530
|
values = [value] # type: ignore
|
@@ -589,7 +606,7 @@ def _add_values(
|
|
589
606
|
links = [
|
590
607
|
LinkORM(
|
591
608
|
**{
|
592
|
-
f"{self._host.__get_name_with_schema__().lower()}_id": self._host.id,
|
609
|
+
f"{self._host.__class__.__get_name_with_schema__().lower()}_id": self._host.id,
|
593
610
|
valuefield_id: feature_value.id,
|
594
611
|
}
|
595
612
|
)
|
@@ -606,7 +623,7 @@ def add_values_features(
|
|
606
623
|
feature_field: FieldAttr = Feature.name,
|
607
624
|
str_as_ulabel: bool = True,
|
608
625
|
) -> None:
|
609
|
-
"""
|
626
|
+
"""Curate artifact with features & values.
|
610
627
|
|
611
628
|
Args:
|
612
629
|
values: A dictionary of keys (features) & values (labels, numbers, booleans).
|
@@ -621,7 +638,7 @@ def add_values_params(
|
|
621
638
|
self,
|
622
639
|
values: dict[str, str | int | float | bool],
|
623
640
|
) -> None:
|
624
|
-
"""
|
641
|
+
"""Curate artifact with features & values.
|
625
642
|
|
626
643
|
Args:
|
627
644
|
values: A dictionary of keys (features) & values (labels, numbers, booleans).
|
@@ -630,7 +647,7 @@ def add_values_params(
|
|
630
647
|
|
631
648
|
|
632
649
|
def add_feature_set(self, feature_set: FeatureSet, slot: str) -> None:
|
633
|
-
"""
|
650
|
+
"""Curate artifact with a feature set.
|
634
651
|
|
635
652
|
Args:
|
636
653
|
feature_set: `FeatureSet` A feature set record.
|
@@ -666,10 +683,10 @@ def _add_set_from_df(
|
|
666
683
|
):
|
667
684
|
"""Add feature set corresponding to column names of DataFrame."""
|
668
685
|
if isinstance(self._host, Artifact):
|
669
|
-
assert self._host.accessor == "DataFrame"
|
686
|
+
assert self._host.accessor == "DataFrame" # noqa: S101
|
670
687
|
else:
|
671
688
|
# Collection
|
672
|
-
assert self._host.artifact.accessor == "DataFrame"
|
689
|
+
assert self._host.artifact.accessor == "DataFrame" # noqa: S101
|
673
690
|
|
674
691
|
# parse and register features
|
675
692
|
registry = field.field.model
|
@@ -693,11 +710,11 @@ def _add_set_from_anndata(
|
|
693
710
|
var_field: FieldAttr,
|
694
711
|
obs_field: FieldAttr | None = Feature.name,
|
695
712
|
mute: bool = False,
|
696
|
-
organism: str |
|
713
|
+
organism: str | Record | None = None,
|
697
714
|
):
|
698
715
|
"""Add features from AnnData."""
|
699
716
|
if isinstance(self._host, Artifact):
|
700
|
-
assert self._host.accessor == "AnnData"
|
717
|
+
assert self._host.accessor == "AnnData" # noqa: S101
|
701
718
|
else:
|
702
719
|
raise NotImplementedError()
|
703
720
|
|
@@ -721,13 +738,13 @@ def _add_set_from_mudata(
|
|
721
738
|
var_fields: dict[str, FieldAttr],
|
722
739
|
obs_fields: dict[str, FieldAttr] = None,
|
723
740
|
mute: bool = False,
|
724
|
-
organism: str |
|
741
|
+
organism: str | Record | None = None,
|
725
742
|
):
|
726
743
|
"""Add features from MuData."""
|
727
744
|
if obs_fields is None:
|
728
745
|
obs_fields = {}
|
729
746
|
if isinstance(self._host, Artifact):
|
730
|
-
assert self._host.accessor == "MuData"
|
747
|
+
assert self._host.accessor == "MuData" # noqa: S101
|
731
748
|
else:
|
732
749
|
raise NotImplementedError()
|
733
750
|
|
@@ -753,7 +770,7 @@ def _add_set_from_mudata(
|
|
753
770
|
self._host.save()
|
754
771
|
|
755
772
|
|
756
|
-
def _add_from(self, data: HasFeatures
|
773
|
+
def _add_from(self, data: HasFeatures):
|
757
774
|
"""Transfer features from a artifact or collection."""
|
758
775
|
# This only covers feature sets, though.
|
759
776
|
using_key = settings._using_key
|
@@ -765,23 +782,18 @@ def _add_from(self, data: HasFeatures, parents: bool = True):
|
|
765
782
|
# note here the features are transferred based on an unique field
|
766
783
|
field = REGISTRY_UNIQUE_FIELD.get(registry.__name__.lower(), "uid")
|
767
784
|
# TODO: get a default ID field for the registry
|
768
|
-
if hasattr(registry, "ontology_id")
|
785
|
+
if hasattr(registry, "ontology_id"):
|
769
786
|
field = "ontology_id"
|
770
787
|
elif hasattr(registry, "ensembl_gene_id"):
|
771
788
|
field = "ensembl_gene_id"
|
772
789
|
elif hasattr(registry, "uniprotkb_id"):
|
773
790
|
field = "uniprotkb_id"
|
774
|
-
|
775
|
-
if registry.__get_name_with_schema__() == "bionty.Organism":
|
776
|
-
parents = False
|
777
791
|
# this will be e.g. be a list of ontology_ids or uids
|
778
792
|
member_uids = list(members.values_list(field, flat=True))
|
779
|
-
# create records from ontology_id
|
780
|
-
if field == "ontology_id" and len(member_uids) > 0
|
793
|
+
# create records from ontology_id
|
794
|
+
if field == "ontology_id" and len(member_uids) > 0:
|
781
795
|
# create from bionty
|
782
|
-
|
783
|
-
if len(records) > 0:
|
784
|
-
save(records, parents=parents)
|
796
|
+
save(registry.from_values(member_uids, field=field))
|
785
797
|
validated = registry.validate(member_uids, field=field, mute=True)
|
786
798
|
new_members_uids = list(compress(member_uids, ~validated))
|
787
799
|
new_members = members.filter(**{f"{field}__in": new_members_uids}).all()
|
@@ -797,7 +809,7 @@ def _add_from(self, data: HasFeatures, parents: bool = True):
|
|
797
809
|
# in the previous step transfer_fk_to_default_db_bulk
|
798
810
|
transfer_to_default_db(feature, using_key, mute=mute, transfer_fk=False)
|
799
811
|
logger.info(f"saving {n_new_members} new {registry.__name__} records")
|
800
|
-
save(new_members
|
812
|
+
save(new_members)
|
801
813
|
|
802
814
|
# create a new feature set from feature values using the same uid
|
803
815
|
feature_set_self = FeatureSet.from_values(
|
lamindb/core/_label_manager.py
CHANGED
@@ -8,7 +8,7 @@ from lamin_utils import colors, logger
|
|
8
8
|
from lnschema_core.models import Feature
|
9
9
|
|
10
10
|
from lamindb._from_values import _print_values
|
11
|
-
from lamindb.
|
11
|
+
from lamindb._record import (
|
12
12
|
REGISTRY_UNIQUE_FIELD,
|
13
13
|
get_default_str_field,
|
14
14
|
transfer_fk_to_default_db_bulk,
|
@@ -20,7 +20,7 @@ from ._settings import settings
|
|
20
20
|
from .schema import dict_related_model_to_related_name
|
21
21
|
|
22
22
|
if TYPE_CHECKING:
|
23
|
-
from lnschema_core.models import Artifact, Collection, HasFeatures,
|
23
|
+
from lnschema_core.models import Artifact, Collection, HasFeatures, Record
|
24
24
|
|
25
25
|
from lamindb._query_set import QuerySet
|
26
26
|
|
@@ -66,7 +66,7 @@ def print_labels(self: HasFeatures, field: str = "name", print_types: bool = Fal
|
|
66
66
|
print_values = _print_values(labels_list, n=10)
|
67
67
|
type_str = f": {related_model}" if print_types else ""
|
68
68
|
labels_msg += f" .{related_name}{type_str} = {print_values}\n"
|
69
|
-
except Exception:
|
69
|
+
except Exception: # noqa: S112
|
70
70
|
continue
|
71
71
|
msg = ""
|
72
72
|
if labels_msg:
|
@@ -76,33 +76,29 @@ def print_labels(self: HasFeatures, field: str = "name", print_types: bool = Fal
|
|
76
76
|
|
77
77
|
|
78
78
|
# Alex: is this a label transfer function?
|
79
|
-
def validate_labels(labels: QuerySet | list | dict
|
79
|
+
def validate_labels(labels: QuerySet | list | dict):
|
80
80
|
def validate_labels_registry(
|
81
|
-
labels: QuerySet | list | dict,
|
81
|
+
labels: QuerySet | list | dict,
|
82
82
|
) -> tuple[list[str], list[str]]:
|
83
83
|
if len(labels) == 0:
|
84
84
|
return [], []
|
85
85
|
registry = labels[0].__class__
|
86
86
|
field = REGISTRY_UNIQUE_FIELD.get(registry.__name__.lower(), "uid")
|
87
|
-
if hasattr(registry, "ontology_id")
|
87
|
+
if hasattr(registry, "ontology_id"):
|
88
88
|
field = "ontology_id"
|
89
89
|
elif hasattr(registry, "ensembl_gene_id"):
|
90
90
|
field = "ensembl_gene_id"
|
91
91
|
elif hasattr(registry, "uniprotkb_id"):
|
92
92
|
field = "uniprotkb_id"
|
93
|
-
if registry.__get_name_with_schema__() == "bionty.Organism":
|
94
|
-
parents = False
|
95
93
|
# if the field value is None, use uid field
|
96
94
|
label_uids = np.array(
|
97
95
|
[getattr(label, field) for label in labels if label is not None]
|
98
96
|
)
|
99
|
-
# save labels from ontology_ids
|
97
|
+
# save labels from ontology_ids
|
100
98
|
if field == "ontology_id" and len(label_uids) > 0:
|
101
99
|
try:
|
102
|
-
|
103
|
-
|
104
|
-
save(records, parents=parents)
|
105
|
-
except Exception:
|
100
|
+
save(registry.from_values(label_uids, field=field))
|
101
|
+
except Exception: # noqa S110
|
106
102
|
pass
|
107
103
|
field = "uid"
|
108
104
|
label_uids = np.array(
|
@@ -117,11 +113,9 @@ def validate_labels(labels: QuerySet | list | dict, parents: bool = True):
|
|
117
113
|
if isinstance(labels, Dict):
|
118
114
|
result = {}
|
119
115
|
for registry, labels_registry in labels.items():
|
120
|
-
result[registry] = validate_labels_registry(
|
121
|
-
labels_registry, parents=parents
|
122
|
-
)
|
116
|
+
result[registry] = validate_labels_registry(labels_registry)
|
123
117
|
else:
|
124
|
-
return validate_labels_registry(labels
|
118
|
+
return validate_labels_registry(labels)
|
125
119
|
|
126
120
|
|
127
121
|
class LabelManager:
|
@@ -146,7 +140,7 @@ class LabelManager:
|
|
146
140
|
|
147
141
|
def add(
|
148
142
|
self,
|
149
|
-
records:
|
143
|
+
records: Record | list[Record] | QuerySet,
|
150
144
|
feature: Feature | None = None,
|
151
145
|
) -> None:
|
152
146
|
"""Add one or several labels and associate them with a feature.
|
@@ -176,7 +170,7 @@ class LabelManager:
|
|
176
170
|
|
177
171
|
return get_labels(self._host, feature=feature, mute=mute, flat_names=flat_names)
|
178
172
|
|
179
|
-
def add_from(self, data: HasFeatures
|
173
|
+
def add_from(self, data: HasFeatures) -> None:
|
180
174
|
"""Add labels from an artifact or collection to another artifact or collection.
|
181
175
|
|
182
176
|
Examples:
|
@@ -202,7 +196,7 @@ class LabelManager:
|
|
202
196
|
data_name_lower = data.__class__.__name__.lower()
|
203
197
|
labels_by_features = defaultdict(list)
|
204
198
|
features = set()
|
205
|
-
_, new_labels = validate_labels(labels
|
199
|
+
_, new_labels = validate_labels(labels)
|
206
200
|
if len(new_labels) > 0:
|
207
201
|
transfer_fk_to_default_db_bulk(new_labels, using_key)
|
208
202
|
for label in labels:
|
@@ -237,7 +231,7 @@ class LabelManager:
|
|
237
231
|
transfer_to_default_db(
|
238
232
|
feature, using_key, mute=True, transfer_fk=False
|
239
233
|
)
|
240
|
-
save(new_features
|
234
|
+
save(new_features)
|
241
235
|
if hasattr(self._host, related_name):
|
242
236
|
for feature_name, labels in labels_by_features.items():
|
243
237
|
if feature_name is not None:
|
@@ -107,7 +107,10 @@ class MappedCollection:
|
|
107
107
|
parallel: bool = False,
|
108
108
|
dtype: str | None = None,
|
109
109
|
):
|
110
|
-
|
110
|
+
if join not in {None, "inner", "outer"}: # pragma: nocover
|
111
|
+
raise ValueError(
|
112
|
+
f"join must be one of None, 'inner, or 'outer' but was {type(join)}"
|
113
|
+
)
|
111
114
|
|
112
115
|
if layers_keys is None:
|
113
116
|
self.layers_keys = ["X"]
|