lamindb 0.76.9__py3-none-any.whl → 0.76.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +15 -14
- lamindb/_artifact.py +2 -1
- lamindb/_can_validate.py +46 -4
- lamindb/_collection.py +2 -1
- lamindb/_curate.py +3 -1
- lamindb/_feature_set.py +3 -1
- lamindb/_finish.py +19 -18
- lamindb/_from_values.py +110 -89
- lamindb/_query_set.py +3 -1
- lamindb/_record.py +81 -62
- lamindb/_run.py +3 -0
- lamindb/_save.py +3 -1
- lamindb/_transform.py +9 -6
- lamindb/core/_context.py +94 -78
- lamindb/core/_data.py +113 -41
- lamindb/core/_django.py +209 -0
- lamindb/core/_feature_manager.py +140 -13
- lamindb/core/_label_manager.py +58 -23
- lamindb/core/_mapped_collection.py +1 -1
- lamindb/core/_settings.py +2 -1
- lamindb/core/exceptions.py +9 -9
- lamindb/core/storage/_anndata_accessor.py +2 -1
- lamindb/core/versioning.py +2 -14
- {lamindb-0.76.9.dist-info → lamindb-0.76.11.dist-info}/METADATA +8 -8
- {lamindb-0.76.9.dist-info → lamindb-0.76.11.dist-info}/RECORD +27 -26
- {lamindb-0.76.9.dist-info → lamindb-0.76.11.dist-info}/LICENSE +0 -0
- {lamindb-0.76.9.dist-info → lamindb-0.76.11.dist-info}/WHEEL +0 -0
lamindb/core/_django.py
ADDED
@@ -0,0 +1,209 @@
|
|
1
|
+
from django.contrib.postgres.aggregates import ArrayAgg
|
2
|
+
from django.db import connection
|
3
|
+
from django.db.models import F, OuterRef, Q, Subquery
|
4
|
+
from django.db.models.fields.related import ForeignKey, ManyToManyField
|
5
|
+
from django.db.models.fields.reverse_related import ManyToManyRel, ManyToOneRel
|
6
|
+
from django.db.models.functions import JSONObject
|
7
|
+
from lnschema_core.models import Artifact, FeatureSet, Record
|
8
|
+
|
9
|
+
from .schema import dict_related_model_to_related_name
|
10
|
+
|
11
|
+
|
12
|
+
def get_related_model(model, field_name):
|
13
|
+
try:
|
14
|
+
field = model._meta.get_field(field_name)
|
15
|
+
|
16
|
+
if isinstance(field, (ForeignKey, ManyToManyField)):
|
17
|
+
# Forward ForeignKey or ManyToManyField
|
18
|
+
return field.remote_field.model
|
19
|
+
elif isinstance(field, (ManyToOneRel, ManyToManyRel)):
|
20
|
+
# Reverse ForeignKey or ManyToManyField
|
21
|
+
return field.related_model
|
22
|
+
else:
|
23
|
+
return f"Unexpected field type: {type(field)}"
|
24
|
+
except Exception as e:
|
25
|
+
return f"Error: {str(e)}"
|
26
|
+
|
27
|
+
|
28
|
+
def get_artifact_with_related(
|
29
|
+
artifact: Record,
|
30
|
+
include_fk: bool = False,
|
31
|
+
include_m2m: bool = False,
|
32
|
+
include_feature_link: bool = False,
|
33
|
+
include_featureset: bool = False,
|
34
|
+
) -> dict:
|
35
|
+
"""Fetch an artifact with its related data."""
|
36
|
+
from lamindb._can_validate import get_name_field
|
37
|
+
|
38
|
+
model = artifact.__class__
|
39
|
+
foreign_key_fields = [f.name for f in model._meta.fields if f.is_relation]
|
40
|
+
|
41
|
+
m2m_relations = (
|
42
|
+
[]
|
43
|
+
if not include_m2m
|
44
|
+
else [
|
45
|
+
v
|
46
|
+
for v in dict_related_model_to_related_name(model).values()
|
47
|
+
if not v.startswith("_")
|
48
|
+
]
|
49
|
+
)
|
50
|
+
link_tables = (
|
51
|
+
[]
|
52
|
+
if not include_feature_link
|
53
|
+
else list(dict_related_model_to_related_name(model, links=True).values())
|
54
|
+
)
|
55
|
+
|
56
|
+
# Clear previous queries
|
57
|
+
connection.queries_log.clear()
|
58
|
+
|
59
|
+
annotations = {}
|
60
|
+
|
61
|
+
if include_fk:
|
62
|
+
for fk in foreign_key_fields:
|
63
|
+
name_field = get_name_field(get_related_model(model, fk))
|
64
|
+
annotations[f"fkfield_{fk}"] = JSONObject(
|
65
|
+
id=F(f"{fk}__id"), name=F(f"{fk}__{name_field}")
|
66
|
+
)
|
67
|
+
|
68
|
+
for name in m2m_relations:
|
69
|
+
related_model = get_related_model(model, name)
|
70
|
+
name_field = get_name_field(related_model)
|
71
|
+
annotations[f"m2mfield_{name}"] = ArrayAgg(
|
72
|
+
JSONObject(id=F(f"{name}__id"), name=F(f"{name}__{name_field}")),
|
73
|
+
filter=Q(**{f"{name}__isnull": False}),
|
74
|
+
distinct=True,
|
75
|
+
)
|
76
|
+
|
77
|
+
for link in link_tables:
|
78
|
+
link_model = getattr(model, link).rel.related_model
|
79
|
+
if not hasattr(link_model, "feature"):
|
80
|
+
continue
|
81
|
+
label_field = link.removeprefix("links_").replace("_", "")
|
82
|
+
annotations[f"linkfield_{link}"] = Subquery(
|
83
|
+
link_model.objects.filter(artifact=OuterRef("pk"))
|
84
|
+
.annotate(
|
85
|
+
data=JSONObject(
|
86
|
+
id=F("id"),
|
87
|
+
feature=F("feature"),
|
88
|
+
**{label_field: F(label_field)},
|
89
|
+
)
|
90
|
+
)
|
91
|
+
.values("artifact")
|
92
|
+
.annotate(json_agg=ArrayAgg("data"))
|
93
|
+
.values("json_agg")
|
94
|
+
)
|
95
|
+
|
96
|
+
if include_featureset:
|
97
|
+
annotations["featuresets"] = Subquery(
|
98
|
+
model.feature_sets.through.objects.filter(artifact=OuterRef("pk"))
|
99
|
+
.annotate(
|
100
|
+
data=JSONObject(
|
101
|
+
id=F("id"),
|
102
|
+
slot=F("slot"),
|
103
|
+
featureset=F("featureset"),
|
104
|
+
)
|
105
|
+
)
|
106
|
+
.values("artifact")
|
107
|
+
.annotate(json_agg=ArrayAgg("data"))
|
108
|
+
.values("json_agg")
|
109
|
+
)
|
110
|
+
|
111
|
+
artifact_meta = (
|
112
|
+
model.objects.using(artifact._state.db)
|
113
|
+
.filter(uid=artifact.uid)
|
114
|
+
.annotate(**annotations)
|
115
|
+
.values(*["id", "uid"], *annotations.keys())
|
116
|
+
.first()
|
117
|
+
)
|
118
|
+
|
119
|
+
if not artifact_meta:
|
120
|
+
return None
|
121
|
+
|
122
|
+
related_data: dict = {"m2m": {}, "fk": {}, "link": {}, "featuresets": {}}
|
123
|
+
for k, v in artifact_meta.items():
|
124
|
+
if k.startswith("m2mfield_"):
|
125
|
+
related_data["m2m"][k[9:]] = v
|
126
|
+
elif k.startswith("fkfield_"):
|
127
|
+
related_data["fk"][k[8:]] = v
|
128
|
+
elif k.startswith("linkfield_"):
|
129
|
+
related_data["link"][k[10:]] = v
|
130
|
+
elif k == "featuresets":
|
131
|
+
if v:
|
132
|
+
related_data["featuresets"] = get_featureset_m2m_relations(
|
133
|
+
artifact, {i["featureset"]: i["slot"] for i in v}
|
134
|
+
)
|
135
|
+
|
136
|
+
related_data["m2m"] = {
|
137
|
+
k: {item["id"]: item["name"] for item in v}
|
138
|
+
for k, v in related_data["m2m"].items()
|
139
|
+
if v
|
140
|
+
}
|
141
|
+
|
142
|
+
return {
|
143
|
+
**{name: artifact_meta[name] for name in ["id", "uid"]},
|
144
|
+
"related_data": related_data,
|
145
|
+
}
|
146
|
+
|
147
|
+
|
148
|
+
def get_featureset_m2m_relations(
|
149
|
+
artifact: Artifact, slot_featureset: dict, limit: int = 20
|
150
|
+
):
|
151
|
+
"""Fetch all many-to-many relationships for given feature sets."""
|
152
|
+
from lamindb._can_validate import get_name_field
|
153
|
+
|
154
|
+
m2m_relations = [
|
155
|
+
v
|
156
|
+
for v in dict_related_model_to_related_name(FeatureSet).values()
|
157
|
+
if not v.startswith("_") and v != "artifacts"
|
158
|
+
]
|
159
|
+
|
160
|
+
annotations = {}
|
161
|
+
related_names = {}
|
162
|
+
for name in m2m_relations:
|
163
|
+
related_model = get_related_model(FeatureSet, name)
|
164
|
+
name_field = get_name_field(related_model)
|
165
|
+
|
166
|
+
# Get the correct field names for the through table
|
167
|
+
through_model = getattr(FeatureSet, name).through
|
168
|
+
related_field = (
|
169
|
+
through_model.__name__.replace("FeatureSet", "").lower().replace("_", "")
|
170
|
+
)
|
171
|
+
|
172
|
+
# Subquery to get limited related records
|
173
|
+
limited_related = Subquery(
|
174
|
+
through_model.objects.filter(featureset=OuterRef("pk")).values(
|
175
|
+
related_field
|
176
|
+
)[:limit]
|
177
|
+
)
|
178
|
+
|
179
|
+
annotations[f"m2mfield_{name}"] = ArrayAgg(
|
180
|
+
JSONObject(id=F(f"{name}__id"), name=F(f"{name}__{name_field}")),
|
181
|
+
filter=Q(
|
182
|
+
**{
|
183
|
+
f"{name}__id__in": limited_related,
|
184
|
+
}
|
185
|
+
),
|
186
|
+
distinct=True,
|
187
|
+
)
|
188
|
+
related_names[name] = related_model.__get_name_with_schema__()
|
189
|
+
|
190
|
+
featureset_m2m = (
|
191
|
+
FeatureSet.objects.using(artifact._state.db)
|
192
|
+
.filter(id__in=slot_featureset.keys())
|
193
|
+
.annotate(**annotations)
|
194
|
+
.values("id", *annotations.keys())
|
195
|
+
)
|
196
|
+
|
197
|
+
result = {}
|
198
|
+
for fs in featureset_m2m:
|
199
|
+
slot = slot_featureset.get(fs["id"])
|
200
|
+
result[fs["id"]] = (
|
201
|
+
slot,
|
202
|
+
{
|
203
|
+
related_names.get(k[9:]): [item["name"] for item in v]
|
204
|
+
for k, v in fs.items()
|
205
|
+
if k.startswith("m2mfield_") and v
|
206
|
+
},
|
207
|
+
)
|
208
|
+
|
209
|
+
return result
|
lamindb/core/_feature_manager.py
CHANGED
@@ -43,6 +43,7 @@ from lamindb._save import save
|
|
43
43
|
from lamindb.core.exceptions import ValidationError
|
44
44
|
from lamindb.core.storage import LocalPathClasses
|
45
45
|
|
46
|
+
from ._django import get_artifact_with_related
|
46
47
|
from ._label_manager import get_labels_as_dict
|
47
48
|
from ._settings import settings
|
48
49
|
from .schema import (
|
@@ -132,12 +133,81 @@ def custom_aggregate(field, using: str):
|
|
132
133
|
return GroupConcat(field)
|
133
134
|
|
134
135
|
|
135
|
-
def
|
136
|
+
def _print_categoricals_postgres(
|
136
137
|
self: Artifact | Collection,
|
138
|
+
related_data: dict | None = None,
|
137
139
|
print_types: bool = False,
|
138
140
|
to_dict: bool = False,
|
139
141
|
print_params: bool = False,
|
140
|
-
)
|
142
|
+
):
|
143
|
+
from lamindb._from_values import _print_values
|
144
|
+
|
145
|
+
if not related_data:
|
146
|
+
artifact_meta = get_artifact_with_related(
|
147
|
+
self, include_feature_link=True, include_m2m=True
|
148
|
+
)
|
149
|
+
related_data = artifact_meta.get("related_data", {})
|
150
|
+
|
151
|
+
m2m_data = related_data.get("m2m", {}) if related_data else {}
|
152
|
+
m2m_name = {}
|
153
|
+
for related_name, values in m2m_data.items():
|
154
|
+
link_model = getattr(self.__class__, related_name).through
|
155
|
+
related_model_name = link_model.__name__.replace(
|
156
|
+
self.__class__.__name__, ""
|
157
|
+
).lower()
|
158
|
+
m2m_name[related_model_name] = values
|
159
|
+
links_data = related_data.get("link", {}) if related_data else {}
|
160
|
+
feature_dict = {
|
161
|
+
id: (name, dtype)
|
162
|
+
for id, name, dtype in Feature.objects.using(self._state.db).values_list(
|
163
|
+
"id", "name", "dtype"
|
164
|
+
)
|
165
|
+
}
|
166
|
+
|
167
|
+
msg = ""
|
168
|
+
dictionary = {}
|
169
|
+
|
170
|
+
# categorical feature values
|
171
|
+
if not print_params:
|
172
|
+
labels_msg = ""
|
173
|
+
labels_msgs = []
|
174
|
+
feature_values: dict = {}
|
175
|
+
for link_name, link_values in links_data.items():
|
176
|
+
related_name = link_name.removeprefix("links_").replace("_", "")
|
177
|
+
link_model = getattr(self.__class__, link_name).rel.related_model
|
178
|
+
if not link_values:
|
179
|
+
continue
|
180
|
+
for link_value in link_values:
|
181
|
+
feature_id = link_value.get("feature")
|
182
|
+
if feature_id is None:
|
183
|
+
continue
|
184
|
+
feature_name = feature_dict.get(feature_id)[0]
|
185
|
+
if feature_name not in feature_values:
|
186
|
+
feature_values[feature_name] = (feature_dict.get(feature_id)[1], [])
|
187
|
+
label_id = link_value.get(related_name)
|
188
|
+
feature_values[feature_name][1].append(
|
189
|
+
m2m_name.get(related_name, {}).get(label_id)
|
190
|
+
)
|
191
|
+
for feature_name, (dtype, labels_list) in feature_values.items():
|
192
|
+
print_values = _print_values(labels_list, n=10)
|
193
|
+
type_str = f": {dtype}" if print_types else ""
|
194
|
+
if to_dict:
|
195
|
+
dictionary[feature_name] = (
|
196
|
+
labels_list if len(labels_list) > 1 else labels_list[0]
|
197
|
+
)
|
198
|
+
labels_msgs.append(f" '{feature_name}'{type_str} = {print_values}")
|
199
|
+
if len(labels_msgs) > 0:
|
200
|
+
labels_msg = "\n".join(sorted(labels_msgs)) + "\n"
|
201
|
+
msg += labels_msg
|
202
|
+
return msg, dictionary
|
203
|
+
|
204
|
+
|
205
|
+
def _print_categoricals(
|
206
|
+
self: Artifact | Collection,
|
207
|
+
print_types: bool = False,
|
208
|
+
to_dict: bool = False,
|
209
|
+
print_params: bool = False,
|
210
|
+
):
|
141
211
|
from lamindb._from_values import _print_values
|
142
212
|
|
143
213
|
msg = ""
|
@@ -166,6 +236,56 @@ def print_features(
|
|
166
236
|
if len(labels_msgs) > 0:
|
167
237
|
labels_msg = "\n".join(sorted(labels_msgs)) + "\n"
|
168
238
|
msg += labels_msg
|
239
|
+
return msg, dictionary
|
240
|
+
|
241
|
+
|
242
|
+
def _print_featuresets_postgres(
|
243
|
+
self: Artifact | Collection,
|
244
|
+
related_data: dict | None = None,
|
245
|
+
print_types: bool = False,
|
246
|
+
):
|
247
|
+
from lamindb._from_values import _print_values
|
248
|
+
|
249
|
+
if not related_data:
|
250
|
+
artifact_meta = get_artifact_with_related(self, include_featureset=True)
|
251
|
+
related_data = artifact_meta.get("related_data", {})
|
252
|
+
|
253
|
+
fs_data = related_data.get("featuresets", {}) if related_data else {}
|
254
|
+
feature_set_msg = ""
|
255
|
+
for _, (slot, data) in fs_data.items():
|
256
|
+
for type_str, feature_names in data.items():
|
257
|
+
type_str = f": {type_str}" if print_types else ""
|
258
|
+
feature_set_msg += (
|
259
|
+
f" '{slot}'{type_str} = {_print_values(feature_names)}\n"
|
260
|
+
)
|
261
|
+
|
262
|
+
return feature_set_msg
|
263
|
+
|
264
|
+
|
265
|
+
def print_features(
|
266
|
+
self: Artifact | Collection,
|
267
|
+
related_data: dict | None = None,
|
268
|
+
print_types: bool = False,
|
269
|
+
to_dict: bool = False,
|
270
|
+
print_params: bool = False,
|
271
|
+
) -> str | dict[str, Any]:
|
272
|
+
from lamindb._from_values import _print_values
|
273
|
+
|
274
|
+
if not self._state.adding and connections[self._state.db].vendor == "postgresql":
|
275
|
+
msg, dictionary = _print_categoricals_postgres(
|
276
|
+
self,
|
277
|
+
related_data=related_data,
|
278
|
+
print_types=print_types,
|
279
|
+
to_dict=to_dict,
|
280
|
+
print_params=print_params,
|
281
|
+
)
|
282
|
+
else:
|
283
|
+
msg, dictionary = _print_categoricals(
|
284
|
+
self,
|
285
|
+
print_types=print_types,
|
286
|
+
to_dict=to_dict,
|
287
|
+
print_params=print_params,
|
288
|
+
)
|
169
289
|
|
170
290
|
# non-categorical feature values
|
171
291
|
non_labels_msg = ""
|
@@ -203,15 +323,20 @@ def print_features(
|
|
203
323
|
# feature sets
|
204
324
|
if not print_params:
|
205
325
|
feature_set_msg = ""
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
name_field = get_name_field(features[0])
|
210
|
-
feature_names = list(features.values_list(name_field, flat=True)[:20])
|
211
|
-
type_str = f": {feature_set.registry}" if print_types else ""
|
212
|
-
feature_set_msg += (
|
213
|
-
f" '{slot}'{type_str} = {_print_values(feature_names)}\n"
|
326
|
+
if self.id is not None and connections[self._state.db].vendor == "postgresql":
|
327
|
+
feature_set_msg = _print_featuresets_postgres(
|
328
|
+
self, related_data=related_data
|
214
329
|
)
|
330
|
+
else:
|
331
|
+
for slot, feature_set in get_feature_set_by_slot_(self).items():
|
332
|
+
features = feature_set.members
|
333
|
+
# features.first() is a lot slower than features[0] here
|
334
|
+
name_field = get_name_field(features[0])
|
335
|
+
feature_names = list(features.values_list(name_field, flat=True)[:20])
|
336
|
+
type_str = f": {feature_set.registry}" if print_types else ""
|
337
|
+
feature_set_msg += (
|
338
|
+
f" '{slot}'{type_str} = {_print_values(feature_names)}\n"
|
339
|
+
)
|
215
340
|
if feature_set_msg:
|
216
341
|
msg += f" {colors.italic('Feature sets')}\n"
|
217
342
|
msg += feature_set_msg
|
@@ -372,6 +497,7 @@ def filter_base(cls, **expression):
|
|
372
497
|
)
|
373
498
|
new_expression = {}
|
374
499
|
features = model.filter(name__in=keys_normalized).all().distinct()
|
500
|
+
feature_param = "param" if model is Param else "feature"
|
375
501
|
for key, value in expression.items():
|
376
502
|
split_key = key.split("__")
|
377
503
|
normalized_key = split_key[0]
|
@@ -380,9 +506,9 @@ def filter_base(cls, **expression):
|
|
380
506
|
comparator = f"__{split_key[1]}"
|
381
507
|
feature = features.get(name=normalized_key)
|
382
508
|
if not feature.dtype.startswith("cat"):
|
383
|
-
expression = {
|
509
|
+
expression = {feature_param: feature, f"value{comparator}": value}
|
384
510
|
feature_value = value_model.filter(**expression)
|
385
|
-
new_expression["
|
511
|
+
new_expression[f"_{feature_param}_values__in"] = feature_value
|
386
512
|
else:
|
387
513
|
if isinstance(value, str):
|
388
514
|
expression = {f"name{comparator}": value}
|
@@ -792,7 +918,7 @@ def _add_from(self, data: Artifact | Collection, transfer_logs: dict = None):
|
|
792
918
|
"""Transfer features from a artifact or collection."""
|
793
919
|
# This only covers feature sets
|
794
920
|
if transfer_logs is None:
|
795
|
-
transfer_logs = {"mapped": [], "transferred": []}
|
921
|
+
transfer_logs = {"mapped": [], "transferred": [], "run": None}
|
796
922
|
using_key = settings._using_key
|
797
923
|
for slot, feature_set in data.features._feature_set_by_slot.items():
|
798
924
|
members = feature_set.members
|
@@ -865,3 +991,4 @@ FeatureManager.filter = filter
|
|
865
991
|
FeatureManager.get = get
|
866
992
|
ParamManager.add_values = add_values_params
|
867
993
|
ParamManager.get_values = get_values
|
994
|
+
ParamManager.filter = filter
|
lamindb/core/_label_manager.py
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
from collections import defaultdict
|
4
|
-
from typing import TYPE_CHECKING
|
4
|
+
from typing import TYPE_CHECKING
|
5
5
|
|
6
6
|
import numpy as np
|
7
|
-
from
|
8
|
-
from
|
7
|
+
from django.db import connections
|
8
|
+
from lamin_utils import colors, logger
|
9
|
+
from lnschema_core.models import CanValidate, Feature
|
9
10
|
|
10
11
|
from lamindb._from_values import _print_values
|
11
12
|
from lamindb._record import (
|
@@ -16,6 +17,7 @@ from lamindb._record import (
|
|
16
17
|
)
|
17
18
|
from lamindb._save import save
|
18
19
|
|
20
|
+
from ._django import get_artifact_with_related, get_related_model
|
19
21
|
from ._settings import settings
|
20
22
|
from .schema import dict_related_model_to_related_name
|
21
23
|
|
@@ -57,22 +59,46 @@ def get_labels_as_dict(self: Artifact | Collection, links: bool = False):
|
|
57
59
|
return labels
|
58
60
|
|
59
61
|
|
62
|
+
def _print_labels_postgres(
|
63
|
+
self: Artifact | Collection, m2m_data: dict | None = None, print_types: bool = False
|
64
|
+
) -> str:
|
65
|
+
labels_msg = ""
|
66
|
+
if not m2m_data:
|
67
|
+
artifact_meta = get_artifact_with_related(self, include_m2m=True)
|
68
|
+
m2m_data = artifact_meta.get("related_data", {}).get("m2m", {})
|
69
|
+
if m2m_data:
|
70
|
+
for related_name, labels in m2m_data.items():
|
71
|
+
if not labels or related_name == "feature_sets":
|
72
|
+
continue
|
73
|
+
related_model = get_related_model(self, related_name)
|
74
|
+
print_values = _print_values(labels.values(), n=10)
|
75
|
+
type_str = f": {related_model}" if print_types else ""
|
76
|
+
labels_msg += f" .{related_name}{type_str} = {print_values}\n"
|
77
|
+
return labels_msg
|
78
|
+
|
79
|
+
|
60
80
|
def print_labels(
|
61
|
-
self: Artifact | Collection,
|
81
|
+
self: Artifact | Collection,
|
82
|
+
m2m_data: dict | None = None,
|
83
|
+
print_types: bool = False,
|
62
84
|
):
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
85
|
+
if not self._state.adding and connections[self._state.db].vendor == "postgresql":
|
86
|
+
labels_msg = _print_labels_postgres(self, m2m_data, print_types)
|
87
|
+
else:
|
88
|
+
labels_msg = ""
|
89
|
+
for related_name, (related_model, labels) in get_labels_as_dict(self).items():
|
90
|
+
# there is a try except block here to deal with schema inconsistencies
|
91
|
+
# during transfer between instances
|
92
|
+
try:
|
93
|
+
field = get_name_field(labels)
|
94
|
+
labels_list = list(labels.values_list(field, flat=True))
|
95
|
+
if len(labels_list) > 0:
|
96
|
+
print_values = _print_values(labels_list, n=10)
|
97
|
+
type_str = f": {related_model}" if print_types else ""
|
98
|
+
labels_msg += f" .{related_name}{type_str} = {print_values}\n"
|
99
|
+
except Exception: # noqa: S112
|
100
|
+
continue
|
101
|
+
|
76
102
|
msg = ""
|
77
103
|
if labels_msg:
|
78
104
|
msg += f" {colors.italic('Labels')}\n"
|
@@ -106,13 +132,19 @@ def validate_labels(labels: QuerySet | list | dict):
|
|
106
132
|
label_uids = np.array(
|
107
133
|
[getattr(label, field) for label in labels if label is not None]
|
108
134
|
)
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
135
|
+
if issubclass(registry, CanValidate):
|
136
|
+
validated = registry.validate(label_uids, field=field, mute=True)
|
137
|
+
validated_uids = label_uids[validated]
|
138
|
+
validated_labels = registry.filter(
|
139
|
+
**{f"{field}__in": validated_uids}
|
140
|
+
).list()
|
141
|
+
new_labels = [labels[int(i)] for i in np.argwhere(~validated).flatten()]
|
142
|
+
else:
|
143
|
+
validated_labels = []
|
144
|
+
new_labels = list(labels)
|
113
145
|
return validated_labels, new_labels
|
114
146
|
|
115
|
-
if isinstance(labels,
|
147
|
+
if isinstance(labels, dict):
|
116
148
|
result = {}
|
117
149
|
for registry, labels_registry in labels.items():
|
118
150
|
result[registry] = validate_labels_registry(labels_registry)
|
@@ -185,7 +217,7 @@ class LabelManager:
|
|
185
217
|
from django.db.utils import ProgrammingError
|
186
218
|
|
187
219
|
if transfer_logs is None:
|
188
|
-
transfer_logs = {"mapped": [], "transferred": []}
|
220
|
+
transfer_logs = {"mapped": [], "transferred": [], "run": None}
|
189
221
|
using_key = settings._using_key
|
190
222
|
for related_name, (_, labels) in get_labels_as_dict(data).items():
|
191
223
|
labels = labels.all()
|
@@ -250,4 +282,7 @@ class LabelManager:
|
|
250
282
|
)
|
251
283
|
# ProgrammingError is raised when schemas don't match between source and target instances
|
252
284
|
except ProgrammingError:
|
285
|
+
logger.warning(
|
286
|
+
f"{related_name} labels cannot be transferred because schema module does not exist in target instance: {labels}"
|
287
|
+
)
|
253
288
|
continue
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
3
3
|
from collections import Counter
|
4
4
|
from functools import reduce
|
5
5
|
from pathlib import Path
|
6
|
-
from typing import TYPE_CHECKING,
|
6
|
+
from typing import TYPE_CHECKING, Literal
|
7
7
|
|
8
8
|
import numpy as np
|
9
9
|
import pandas as pd
|
lamindb/core/_settings.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import os
|
4
|
-
from typing import TYPE_CHECKING, Literal
|
4
|
+
from typing import TYPE_CHECKING, Literal
|
5
5
|
|
6
6
|
import lamindb_setup as ln_setup
|
7
7
|
from lamin_utils import logger
|
@@ -13,6 +13,7 @@ from .subsettings._creation_settings import CreationSettings, creation_settings
|
|
13
13
|
from .subsettings._transform_settings import TransformSettings, transform_settings
|
14
14
|
|
15
15
|
if TYPE_CHECKING:
|
16
|
+
from collections.abc import Mapping
|
16
17
|
from pathlib import Path
|
17
18
|
|
18
19
|
from lamindb_setup.core._settings_storage import StorageSettings
|
lamindb/core/exceptions.py
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
InvalidArgument
|
7
7
|
DoesNotExist
|
8
8
|
ValidationError
|
9
|
-
|
9
|
+
NotebookNotSaved
|
10
10
|
NoTitleError
|
11
11
|
MissingContextUID
|
12
12
|
UpdateContext
|
@@ -26,7 +26,7 @@ class InvalidArgument(SystemExit):
|
|
26
26
|
|
27
27
|
|
28
28
|
class TrackNotCalled(SystemExit):
|
29
|
-
"""ln.
|
29
|
+
"""`ln.track()` wasn't called."""
|
30
30
|
|
31
31
|
pass
|
32
32
|
|
@@ -51,8 +51,14 @@ class DoesNotExist(Exception):
|
|
51
51
|
pass
|
52
52
|
|
53
53
|
|
54
|
+
class InconsistentKey(Exception):
|
55
|
+
"""Inconsistent transform or artifact `key`."""
|
56
|
+
|
57
|
+
pass
|
58
|
+
|
59
|
+
|
54
60
|
# -------------------------------------------------------------------------------------
|
55
|
-
#
|
61
|
+
# run context
|
56
62
|
# -------------------------------------------------------------------------------------
|
57
63
|
|
58
64
|
|
@@ -66,12 +72,6 @@ class IntegrityError(Exception):
|
|
66
72
|
pass
|
67
73
|
|
68
74
|
|
69
|
-
class NotebookNotSavedError(Exception):
|
70
|
-
"""Notebook wasn't saved."""
|
71
|
-
|
72
|
-
pass
|
73
|
-
|
74
|
-
|
75
75
|
class NoTitleError(Exception):
|
76
76
|
"""Notebook has no title."""
|
77
77
|
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
3
3
|
import inspect
|
4
4
|
from functools import cached_property
|
5
5
|
from itertools import chain
|
6
|
-
from typing import TYPE_CHECKING, Callable, Literal,
|
6
|
+
from typing import TYPE_CHECKING, Callable, Literal, Union
|
7
7
|
|
8
8
|
import h5py
|
9
9
|
import numpy as np
|
@@ -21,6 +21,7 @@ from lamindb_setup.core.upath import UPath, create_mapper, infer_filesystem
|
|
21
21
|
from packaging import version
|
22
22
|
|
23
23
|
if TYPE_CHECKING:
|
24
|
+
from collections.abc import Mapping
|
24
25
|
from pathlib import Path
|
25
26
|
|
26
27
|
from fsspec.core import OpenFile
|
lamindb/core/versioning.py
CHANGED
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
3
3
|
from typing import TYPE_CHECKING, Literal
|
4
4
|
|
5
5
|
from lamin_utils import logger
|
6
|
-
from lamin_utils._base62 import
|
6
|
+
from lamin_utils._base62 import increment_base62
|
7
7
|
from lamindb_setup.core.upath import LocalPathClasses, UPath
|
8
8
|
from lnschema_core import ids
|
9
9
|
|
@@ -18,19 +18,7 @@ def message_update_key_in_version_family(
|
|
18
18
|
registry: str,
|
19
19
|
new_key: str,
|
20
20
|
) -> str:
|
21
|
-
return f'Or update key "{existing_key}" to "{new_key}":\n\nln.{registry}.filter(uid__startswith="{suid}").update(key="{new_key}")\n'
|
22
|
-
|
23
|
-
|
24
|
-
def increment_base62(s: str) -> str:
|
25
|
-
# we don't need to throw an error for zzzz because uids are enforced to be unique
|
26
|
-
# on the db level and have an enforced maximum length
|
27
|
-
value = sum(BASE62_CHARS.index(c) * (62**i) for i, c in enumerate(reversed(s)))
|
28
|
-
value += 1
|
29
|
-
result = ""
|
30
|
-
while value:
|
31
|
-
value, remainder = divmod(value, 62)
|
32
|
-
result = BASE62_CHARS[remainder] + result
|
33
|
-
return result.zfill(len(s))
|
21
|
+
return f'Or update key "{existing_key}" to "{new_key}" for all previous versions:\n\nln.{registry}.filter(uid__startswith="{suid}").update(key="{new_key}")\n'
|
34
22
|
|
35
23
|
|
36
24
|
def bump_version(
|