lamindb 0.76.8__py3-none-any.whl → 0.76.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +114 -113
- lamindb/_artifact.py +1206 -1205
- lamindb/_can_validate.py +621 -579
- lamindb/_collection.py +390 -387
- lamindb/_curate.py +1603 -1601
- lamindb/_feature.py +155 -155
- lamindb/_feature_set.py +244 -242
- lamindb/_filter.py +23 -23
- lamindb/_finish.py +250 -256
- lamindb/_from_values.py +403 -382
- lamindb/_is_versioned.py +40 -40
- lamindb/_parents.py +476 -476
- lamindb/_query_manager.py +125 -125
- lamindb/_query_set.py +364 -362
- lamindb/_record.py +668 -649
- lamindb/_run.py +60 -57
- lamindb/_save.py +310 -308
- lamindb/_storage.py +14 -14
- lamindb/_transform.py +130 -127
- lamindb/_ulabel.py +56 -56
- lamindb/_utils.py +9 -9
- lamindb/_view.py +72 -72
- lamindb/core/__init__.py +94 -94
- lamindb/core/_context.py +590 -574
- lamindb/core/_data.py +510 -438
- lamindb/core/_django.py +209 -0
- lamindb/core/_feature_manager.py +994 -867
- lamindb/core/_label_manager.py +289 -253
- lamindb/core/_mapped_collection.py +631 -597
- lamindb/core/_settings.py +188 -187
- lamindb/core/_sync_git.py +138 -138
- lamindb/core/_track_environment.py +27 -27
- lamindb/core/datasets/__init__.py +59 -59
- lamindb/core/datasets/_core.py +581 -571
- lamindb/core/datasets/_fake.py +36 -36
- lamindb/core/exceptions.py +90 -90
- lamindb/core/fields.py +12 -12
- lamindb/core/loaders.py +164 -164
- lamindb/core/schema.py +56 -56
- lamindb/core/storage/__init__.py +25 -25
- lamindb/core/storage/_anndata_accessor.py +741 -740
- lamindb/core/storage/_anndata_sizes.py +41 -41
- lamindb/core/storage/_backed_access.py +98 -98
- lamindb/core/storage/_tiledbsoma.py +204 -204
- lamindb/core/storage/_valid_suffixes.py +21 -21
- lamindb/core/storage/_zarr.py +110 -110
- lamindb/core/storage/objects.py +62 -62
- lamindb/core/storage/paths.py +172 -172
- lamindb/core/subsettings/__init__.py +12 -12
- lamindb/core/subsettings/_creation_settings.py +38 -38
- lamindb/core/subsettings/_transform_settings.py +21 -21
- lamindb/core/types.py +19 -19
- lamindb/core/versioning.py +146 -158
- lamindb/integrations/__init__.py +12 -12
- lamindb/integrations/_vitessce.py +107 -107
- lamindb/setup/__init__.py +14 -14
- lamindb/setup/core/__init__.py +4 -4
- {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/LICENSE +201 -201
- {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/METADATA +8 -8
- lamindb-0.76.10.dist-info/RECORD +61 -0
- {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/WHEEL +1 -1
- lamindb-0.76.8.dist-info/RECORD +0 -60
lamindb/core/_label_manager.py
CHANGED
@@ -1,253 +1,289 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
from collections import defaultdict
|
4
|
-
from typing import TYPE_CHECKING
|
5
|
-
|
6
|
-
import numpy as np
|
7
|
-
from
|
8
|
-
from
|
9
|
-
|
10
|
-
|
11
|
-
from lamindb.
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
from .
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
from
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
"
|
33
|
-
"
|
34
|
-
"
|
35
|
-
"
|
36
|
-
"
|
37
|
-
"
|
38
|
-
"
|
39
|
-
"
|
40
|
-
"
|
41
|
-
"
|
42
|
-
"
|
43
|
-
"
|
44
|
-
"
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
if
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
"""
|
185
|
-
from
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
#
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from collections import defaultdict
|
4
|
+
from typing import TYPE_CHECKING
|
5
|
+
|
6
|
+
import numpy as np
|
7
|
+
from django.db import connections
|
8
|
+
from lamin_utils import colors, logger
|
9
|
+
from lnschema_core.models import CanValidate, Feature
|
10
|
+
|
11
|
+
from lamindb._from_values import _print_values
|
12
|
+
from lamindb._record import (
|
13
|
+
REGISTRY_UNIQUE_FIELD,
|
14
|
+
get_name_field,
|
15
|
+
transfer_fk_to_default_db_bulk,
|
16
|
+
transfer_to_default_db,
|
17
|
+
)
|
18
|
+
from lamindb._save import save
|
19
|
+
|
20
|
+
from ._django import get_artifact_with_related, get_related_model
|
21
|
+
from ._settings import settings
|
22
|
+
from .schema import dict_related_model_to_related_name
|
23
|
+
|
24
|
+
if TYPE_CHECKING:
|
25
|
+
from lnschema_core.models import Artifact, Collection, Record
|
26
|
+
|
27
|
+
from lamindb._query_set import QuerySet
|
28
|
+
|
29
|
+
|
30
|
+
def get_labels_as_dict(self: Artifact | Collection, links: bool = False):
|
31
|
+
exclude_set = {
|
32
|
+
"feature_sets",
|
33
|
+
"artifacts",
|
34
|
+
"input_of_runs",
|
35
|
+
"collections",
|
36
|
+
"_source_code_artifact_of",
|
37
|
+
"_report_of",
|
38
|
+
"_environment_of",
|
39
|
+
"links_collection",
|
40
|
+
"links_artifact",
|
41
|
+
"links_feature_set",
|
42
|
+
"previous_runs",
|
43
|
+
"_feature_values",
|
44
|
+
"_action_targets",
|
45
|
+
"_lnschema_core_collection__actions_+", # something seems off with this one
|
46
|
+
"_actions",
|
47
|
+
}
|
48
|
+
labels = {} # type: ignore
|
49
|
+
if self.id is None:
|
50
|
+
return labels
|
51
|
+
for related_model_name, related_name in dict_related_model_to_related_name(
|
52
|
+
self.__class__, links=links
|
53
|
+
).items():
|
54
|
+
if related_name not in exclude_set:
|
55
|
+
labels[related_name] = (
|
56
|
+
related_model_name,
|
57
|
+
getattr(self, related_name).all(),
|
58
|
+
)
|
59
|
+
return labels
|
60
|
+
|
61
|
+
|
62
|
+
def _print_labels_postgres(
|
63
|
+
self: Artifact | Collection, m2m_data: dict | None = None, print_types: bool = False
|
64
|
+
) -> str:
|
65
|
+
labels_msg = ""
|
66
|
+
if not m2m_data:
|
67
|
+
artifact_meta = get_artifact_with_related(self, include_m2m=True)
|
68
|
+
m2m_data = artifact_meta.get("related_data", {}).get("m2m", {})
|
69
|
+
if m2m_data:
|
70
|
+
for related_name, labels in m2m_data.items():
|
71
|
+
if not labels or related_name == "feature_sets":
|
72
|
+
continue
|
73
|
+
related_model = get_related_model(self, related_name)
|
74
|
+
print_values = _print_values(labels.values(), n=10)
|
75
|
+
type_str = f": {related_model}" if print_types else ""
|
76
|
+
labels_msg += f" .{related_name}{type_str} = {print_values}\n"
|
77
|
+
return labels_msg
|
78
|
+
|
79
|
+
|
80
|
+
def print_labels(
|
81
|
+
self: Artifact | Collection,
|
82
|
+
m2m_data: dict | None = None,
|
83
|
+
print_types: bool = False,
|
84
|
+
):
|
85
|
+
if not self._state.adding and connections[self._state.db].vendor == "postgresql":
|
86
|
+
labels_msg = _print_labels_postgres(self, m2m_data, print_types)
|
87
|
+
else:
|
88
|
+
labels_msg = ""
|
89
|
+
for related_name, (related_model, labels) in get_labels_as_dict(self).items():
|
90
|
+
# there is a try except block here to deal with schema inconsistencies
|
91
|
+
# during transfer between instances
|
92
|
+
try:
|
93
|
+
field = get_name_field(self.__class__)
|
94
|
+
labels_list = list(labels.values_list(field, flat=True))
|
95
|
+
if len(labels_list) > 0:
|
96
|
+
get_name_field(labels)
|
97
|
+
print_values = _print_values(labels_list, n=10)
|
98
|
+
type_str = f": {related_model}" if print_types else ""
|
99
|
+
labels_msg += f" .{related_name}{type_str} = {print_values}\n"
|
100
|
+
except Exception: # noqa: S112
|
101
|
+
continue
|
102
|
+
|
103
|
+
msg = ""
|
104
|
+
if labels_msg:
|
105
|
+
msg += f" {colors.italic('Labels')}\n"
|
106
|
+
msg += labels_msg
|
107
|
+
return msg
|
108
|
+
|
109
|
+
|
110
|
+
# Alex: is this a label transfer function?
|
111
|
+
def validate_labels(labels: QuerySet | list | dict):
|
112
|
+
def validate_labels_registry(
|
113
|
+
labels: QuerySet | list | dict,
|
114
|
+
) -> tuple[list[str], list[str]]:
|
115
|
+
if len(labels) == 0:
|
116
|
+
return [], []
|
117
|
+
registry = labels[0].__class__
|
118
|
+
field = REGISTRY_UNIQUE_FIELD.get(registry.__name__.lower(), "uid")
|
119
|
+
if hasattr(registry, "_ontology_id_field"):
|
120
|
+
field = registry._ontology_id_field
|
121
|
+
# if the field value is None, use uid field
|
122
|
+
label_uids = np.array(
|
123
|
+
[getattr(label, field) for label in labels if label is not None]
|
124
|
+
)
|
125
|
+
# save labels from ontology_ids
|
126
|
+
if hasattr(registry, "_ontology_id_field") and len(label_uids) > 0:
|
127
|
+
try:
|
128
|
+
labels_records = registry.from_values(label_uids, field=field)
|
129
|
+
save([r for r in labels_records if r._state.adding])
|
130
|
+
except Exception: # noqa S110
|
131
|
+
pass
|
132
|
+
field = "uid"
|
133
|
+
label_uids = np.array(
|
134
|
+
[getattr(label, field) for label in labels if label is not None]
|
135
|
+
)
|
136
|
+
if issubclass(registry, CanValidate):
|
137
|
+
validated = registry.validate(label_uids, field=field, mute=True)
|
138
|
+
validated_uids = label_uids[validated]
|
139
|
+
validated_labels = registry.filter(
|
140
|
+
**{f"{field}__in": validated_uids}
|
141
|
+
).list()
|
142
|
+
new_labels = [labels[int(i)] for i in np.argwhere(~validated).flatten()]
|
143
|
+
else:
|
144
|
+
validated_labels = []
|
145
|
+
new_labels = list(labels)
|
146
|
+
return validated_labels, new_labels
|
147
|
+
|
148
|
+
if isinstance(labels, dict):
|
149
|
+
result = {}
|
150
|
+
for registry, labels_registry in labels.items():
|
151
|
+
result[registry] = validate_labels_registry(labels_registry)
|
152
|
+
else:
|
153
|
+
return validate_labels_registry(labels)
|
154
|
+
|
155
|
+
|
156
|
+
class LabelManager:
|
157
|
+
"""Label manager.
|
158
|
+
|
159
|
+
This allows to manage untyped labels :class:`~lamindb.ULabel` and arbitrary
|
160
|
+
typed labels (e.g., :class:`~bionty.CellLine`) and associate labels
|
161
|
+
with features.
|
162
|
+
"""
|
163
|
+
|
164
|
+
def __init__(self, host: Artifact | Collection):
|
165
|
+
self._host = host
|
166
|
+
|
167
|
+
def __repr__(self) -> str:
|
168
|
+
msg = print_labels(self._host)
|
169
|
+
if len(msg) > 0:
|
170
|
+
return msg
|
171
|
+
else:
|
172
|
+
return "no linked labels"
|
173
|
+
|
174
|
+
def add(
|
175
|
+
self,
|
176
|
+
records: Record | list[Record] | QuerySet,
|
177
|
+
feature: Feature | None = None,
|
178
|
+
) -> None:
|
179
|
+
"""Add one or several labels and associate them with a feature.
|
180
|
+
|
181
|
+
Args:
|
182
|
+
records: Label records to add.
|
183
|
+
feature: Feature under which to group the labels.
|
184
|
+
"""
|
185
|
+
from ._data import add_labels
|
186
|
+
|
187
|
+
return add_labels(self._host, records=records, feature=feature)
|
188
|
+
|
189
|
+
def get(
|
190
|
+
self,
|
191
|
+
feature: Feature,
|
192
|
+
mute: bool = False,
|
193
|
+
flat_names: bool = False,
|
194
|
+
) -> QuerySet | dict[str, QuerySet] | list:
|
195
|
+
"""Get labels given a feature.
|
196
|
+
|
197
|
+
Args:
|
198
|
+
feature: Feature under which labels are grouped.
|
199
|
+
mute: Show no logging.
|
200
|
+
flat_names: Flatten list to names rather than returning records.
|
201
|
+
"""
|
202
|
+
from ._data import get_labels
|
203
|
+
|
204
|
+
return get_labels(self._host, feature=feature, mute=mute, flat_names=flat_names)
|
205
|
+
|
206
|
+
def add_from(self, data: Artifact | Collection, transfer_logs: dict = None) -> None:
|
207
|
+
"""Add labels from an artifact or collection to another artifact or collection.
|
208
|
+
|
209
|
+
Examples:
|
210
|
+
>>> artifact1 = ln.Artifact(pd.DataFrame(index=[0, 1])).save()
|
211
|
+
>>> artifact2 = ln.Artifact(pd.DataFrame(index=[2, 3])).save()
|
212
|
+
>>> ulabels = ln.ULabel.from_values(["Label1", "Label2"], field="name")
|
213
|
+
>>> ln.save(ulabels)
|
214
|
+
>>> labels = ln.ULabel.filter(name__icontains = "label").all()
|
215
|
+
>>> artifact1.ulabels.set(labels)
|
216
|
+
>>> artifact2.labels.add_from(artifact1)
|
217
|
+
"""
|
218
|
+
from django.db.utils import ProgrammingError
|
219
|
+
|
220
|
+
if transfer_logs is None:
|
221
|
+
transfer_logs = {"mapped": [], "transferred": [], "run": None}
|
222
|
+
using_key = settings._using_key
|
223
|
+
for related_name, (_, labels) in get_labels_as_dict(data).items():
|
224
|
+
labels = labels.all()
|
225
|
+
try:
|
226
|
+
if not labels.exists():
|
227
|
+
continue
|
228
|
+
# look for features
|
229
|
+
data_name_lower = data.__class__.__name__.lower()
|
230
|
+
labels_by_features = defaultdict(list)
|
231
|
+
features = set()
|
232
|
+
_, new_labels = validate_labels(labels)
|
233
|
+
if len(new_labels) > 0:
|
234
|
+
transfer_fk_to_default_db_bulk(
|
235
|
+
new_labels, using_key, transfer_logs=transfer_logs
|
236
|
+
)
|
237
|
+
for label in labels:
|
238
|
+
# if the link table doesn't follow this convention, we'll ignore it
|
239
|
+
if not hasattr(label, f"links_{data_name_lower}"):
|
240
|
+
key = None
|
241
|
+
else:
|
242
|
+
link = getattr(label, f"links_{data_name_lower}").get(
|
243
|
+
**{f"{data_name_lower}_id": data.id}
|
244
|
+
)
|
245
|
+
if link.feature is not None:
|
246
|
+
features.add(link.feature)
|
247
|
+
key = link.feature.name
|
248
|
+
else:
|
249
|
+
key = None
|
250
|
+
label_returned = transfer_to_default_db(
|
251
|
+
label,
|
252
|
+
using_key,
|
253
|
+
transfer_logs=transfer_logs,
|
254
|
+
transfer_fk=False,
|
255
|
+
save=True,
|
256
|
+
)
|
257
|
+
# TODO: refactor return value of transfer to default db
|
258
|
+
if label_returned is not None:
|
259
|
+
label = label_returned
|
260
|
+
labels_by_features[key].append(label)
|
261
|
+
# treat features
|
262
|
+
_, new_features = validate_labels(list(features))
|
263
|
+
if len(new_features) > 0:
|
264
|
+
transfer_fk_to_default_db_bulk(
|
265
|
+
new_features, using_key, transfer_logs=transfer_logs
|
266
|
+
)
|
267
|
+
for feature in new_features:
|
268
|
+
transfer_to_default_db(
|
269
|
+
feature,
|
270
|
+
using_key,
|
271
|
+
transfer_logs=transfer_logs,
|
272
|
+
transfer_fk=False,
|
273
|
+
)
|
274
|
+
save(new_features)
|
275
|
+
if hasattr(self._host, related_name):
|
276
|
+
for feature_name, labels in labels_by_features.items():
|
277
|
+
if feature_name is not None:
|
278
|
+
feature_id = Feature.get(name=feature_name).id
|
279
|
+
else:
|
280
|
+
feature_id = None
|
281
|
+
getattr(self._host, related_name).add(
|
282
|
+
*labels, through_defaults={"feature_id": feature_id}
|
283
|
+
)
|
284
|
+
# ProgrammingError is raised when schemas don't match between source and target instances
|
285
|
+
except ProgrammingError:
|
286
|
+
logger.warning(
|
287
|
+
f"{related_name} labels cannot be transferred because schema module does not exist in target instance: {labels}"
|
288
|
+
)
|
289
|
+
continue
|