lamindb 0.75.0__py3-none-any.whl → 0.76.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_artifact.py +1 -0
- lamindb/_can_validate.py +52 -22
- lamindb/_collection.py +1 -0
- lamindb/_curate.py +384 -144
- lamindb/_feature.py +1 -1
- lamindb/_from_values.py +36 -20
- lamindb/_query_set.py +2 -25
- lamindb/_record.py +79 -32
- lamindb/_run.py +1 -1
- lamindb/_save.py +5 -5
- lamindb/_transform.py +1 -1
- lamindb/_view.py +13 -11
- lamindb/core/__init__.py +2 -0
- lamindb/core/_data.py +4 -4
- lamindb/core/_feature_manager.py +16 -6
- lamindb/core/_label_manager.py +4 -3
- lamindb/core/datasets/_core.py +29 -23
- lamindb/core/schema.py +5 -5
- lamindb/core/storage/__init__.py +11 -2
- lamindb/core/storage/_valid_suffixes.py +16 -2
- lamindb/core/versioning.py +0 -1
- lamindb/integrations/_vitessce.py +68 -31
- {lamindb-0.75.0.dist-info → lamindb-0.76.0.dist-info}/METADATA +5 -5
- {lamindb-0.75.0.dist-info → lamindb-0.76.0.dist-info}/RECORD +27 -27
- {lamindb-0.75.0.dist-info → lamindb-0.76.0.dist-info}/LICENSE +0 -0
- {lamindb-0.75.0.dist-info → lamindb-0.76.0.dist-info}/WHEEL +0 -0
lamindb/_feature.py
CHANGED
@@ -42,7 +42,7 @@ def __init__(self, *args, **kwargs):
|
|
42
42
|
return None
|
43
43
|
# now we proceed with the user-facing constructor
|
44
44
|
if len(args) != 0:
|
45
|
-
raise ValueError("Only
|
45
|
+
raise ValueError("Only keyword args allowed")
|
46
46
|
dtype: type | str = kwargs.pop("dtype") if "dtype" in kwargs else None
|
47
47
|
# cast type
|
48
48
|
if dtype is None:
|
lamindb/_from_values.py
CHANGED
@@ -47,15 +47,35 @@ def get_or_create_records(
|
|
47
47
|
|
48
48
|
# new records to be created based on new values
|
49
49
|
if len(nonexist_values) > 0:
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
50
|
+
source_record = None
|
51
|
+
if from_source:
|
52
|
+
if isinstance(source, Record):
|
53
|
+
source_record = source
|
54
|
+
elif (
|
55
|
+
len(records) > 0
|
56
|
+
and hasattr(records[0], "source_id")
|
57
|
+
and records[0].source_id
|
58
|
+
):
|
59
|
+
source_record = records[0].source
|
60
|
+
if not source_record and hasattr(Record, "public"):
|
61
|
+
from bionty._bionty import get_source_record
|
62
|
+
|
63
|
+
source_record = get_source_record(Record.public(organism=organism))
|
64
|
+
if source_record:
|
65
|
+
from bionty.core._add_ontology import check_source_in_db
|
66
|
+
|
67
|
+
check_source_in_db(
|
68
|
+
registry=Record,
|
69
|
+
source=source_record,
|
70
|
+
update=True,
|
71
|
+
)
|
72
|
+
|
73
|
+
from_source = not source_record.in_db
|
74
|
+
elif hasattr(Record, "source_id"):
|
75
|
+
from_source = True
|
76
|
+
else:
|
58
77
|
from_source = False
|
78
|
+
|
59
79
|
if from_source:
|
60
80
|
records_bionty, unmapped_values = create_records_from_source(
|
61
81
|
iterable_idx=nonexist_values,
|
@@ -211,10 +231,6 @@ def create_records_from_source(
|
|
211
231
|
return records, iterable_idx
|
212
232
|
# add source record to the kwargs
|
213
233
|
source_record = get_source_record(public_ontology)
|
214
|
-
if source_record is not None and source_record.in_db:
|
215
|
-
# skips the creation of records from public if the source is already in the db
|
216
|
-
return records, iterable_idx
|
217
|
-
|
218
234
|
kwargs.update({"source": source_record})
|
219
235
|
|
220
236
|
# filter the columns in bionty df based on fields
|
@@ -335,9 +351,9 @@ def _bulk_create_dicts_from_df(
|
|
335
351
|
return df.reset_index().to_dict(orient="records"), multi_msg
|
336
352
|
|
337
353
|
|
338
|
-
def _has_organism_field(
|
354
|
+
def _has_organism_field(registry: type[Record]) -> bool:
|
339
355
|
try:
|
340
|
-
|
356
|
+
registry._meta.get_field("organism")
|
341
357
|
return True
|
342
358
|
except FieldDoesNotExist:
|
343
359
|
return False
|
@@ -346,17 +362,17 @@ def _has_organism_field(orm: type[Record]) -> bool:
|
|
346
362
|
def _get_organism_record(
|
347
363
|
field: StrField, organism: str | Record, force: bool = False
|
348
364
|
) -> Record:
|
349
|
-
|
365
|
+
registry = field.field.model
|
350
366
|
check = True
|
351
|
-
if not force and hasattr(
|
352
|
-
check = field.field.name !=
|
367
|
+
if not force and hasattr(registry, "_ontology_id_field"):
|
368
|
+
check = field.field.name != registry._ontology_id_field
|
353
369
|
# e.g. bionty.CellMarker has "name" as _ontology_id_field
|
354
|
-
if not
|
370
|
+
if not registry._ontology_id_field.endswith("id"):
|
355
371
|
check = True
|
356
372
|
|
357
|
-
if _has_organism_field(
|
373
|
+
if _has_organism_field(registry) and check:
|
358
374
|
from bionty._bionty import create_or_get_organism_record
|
359
375
|
|
360
|
-
organism_record = create_or_get_organism_record(organism=organism, orm=
|
376
|
+
organism_record = create_or_get_organism_record(organism=organism, orm=registry)
|
361
377
|
if organism_record is not None:
|
362
378
|
return organism_record
|
lamindb/_query_set.py
CHANGED
@@ -243,10 +243,10 @@ class QuerySet(models.QuerySet, CanValidate):
|
|
243
243
|
else:
|
244
244
|
raise MultipleResultsFound(self.all())
|
245
245
|
|
246
|
-
def latest_version(self) ->
|
246
|
+
def latest_version(self) -> QuerySet:
|
247
247
|
"""Filter every version family by latest version."""
|
248
248
|
if issubclass(self.model, IsVersioned):
|
249
|
-
return
|
249
|
+
return self.filter(is_latest=True)
|
250
250
|
else:
|
251
251
|
raise ValueError("Record isn't subclass of `lamindb.core.IsVersioned`")
|
252
252
|
|
@@ -288,29 +288,6 @@ class QuerySet(models.QuerySet, CanValidate):
|
|
288
288
|
return _standardize(cls=self, values=values, field=field, **kwargs)
|
289
289
|
|
290
290
|
|
291
|
-
def filter_query_set_by_latest_version(ordered_query_set: QuerySet) -> RecordsList:
|
292
|
-
# evaluating length can be very costly, hence, the try-except block
|
293
|
-
try:
|
294
|
-
first_record = ordered_query_set[0]
|
295
|
-
except IndexError:
|
296
|
-
return ordered_query_set
|
297
|
-
records_in_view = {}
|
298
|
-
records_in_view[first_record.stem_uid] = first_record
|
299
|
-
for record in ordered_query_set:
|
300
|
-
# this overwrites user-provided ordering (relevant records ordered by a
|
301
|
-
# certain field will not show if they are not the latest version)
|
302
|
-
if record.stem_uid not in records_in_view:
|
303
|
-
records_in_view[record.stem_uid] = record
|
304
|
-
else:
|
305
|
-
if record.created_at > records_in_view[record.stem_uid].created_at:
|
306
|
-
# deleting the entry is needed to preserve the integrity of
|
307
|
-
# user-provided ordering
|
308
|
-
del records_in_view[record.stem_uid]
|
309
|
-
records_in_view[record.stem_uid] = record
|
310
|
-
list_records_in_view = RecordsList(records_in_view.values())
|
311
|
-
return list_records_in_view
|
312
|
-
|
313
|
-
|
314
291
|
models.QuerySet.df = QuerySet.df
|
315
292
|
models.QuerySet.list = QuerySet.list
|
316
293
|
models.QuerySet.first = QuerySet.first
|
lamindb/_record.py
CHANGED
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, List, NamedTuple
|
|
5
5
|
|
6
6
|
import dj_database_url
|
7
7
|
import lamindb_setup as ln_setup
|
8
|
-
from django.db import connections
|
8
|
+
from django.db import connections, transaction
|
9
9
|
from django.db.models import IntegerField, Manager, Q, QuerySet, Value
|
10
10
|
from lamin_utils import logger
|
11
11
|
from lamin_utils._lookup import Lookup
|
@@ -36,9 +36,9 @@ def init_self_from_db(self: Record, existing_record: Record):
|
|
36
36
|
self._state.db = "default"
|
37
37
|
|
38
38
|
|
39
|
-
def validate_required_fields(
|
39
|
+
def validate_required_fields(record: Record, kwargs):
|
40
40
|
required_fields = {
|
41
|
-
k.name for k in
|
41
|
+
k.name for k in record._meta.fields if not k.null and k.default is None
|
42
42
|
}
|
43
43
|
required_fields_not_passed = {k: None for k in required_fields if k not in kwargs}
|
44
44
|
kwargs.update(required_fields_not_passed)
|
@@ -77,9 +77,9 @@ def suggest_records_with_similar_names(record: Record, kwargs) -> bool:
|
|
77
77
|
return False
|
78
78
|
|
79
79
|
|
80
|
-
def __init__(
|
80
|
+
def __init__(record: Record, *args, **kwargs):
|
81
81
|
if not args:
|
82
|
-
validate_required_fields(
|
82
|
+
validate_required_fields(record, kwargs)
|
83
83
|
|
84
84
|
# do not search for names if an id is passed; this is important
|
85
85
|
# e.g. when synching ids from the notebook store to lamindb
|
@@ -87,29 +87,29 @@ def __init__(orm: Record, *args, **kwargs):
|
|
87
87
|
if "_has_consciously_provided_uid" in kwargs:
|
88
88
|
has_consciously_provided_uid = kwargs.pop("_has_consciously_provided_uid")
|
89
89
|
if settings.creation.search_names and not has_consciously_provided_uid:
|
90
|
-
match = suggest_records_with_similar_names(
|
90
|
+
match = suggest_records_with_similar_names(record, kwargs)
|
91
91
|
if match:
|
92
92
|
if "version" in kwargs:
|
93
93
|
version_comment = " and version"
|
94
|
-
existing_record =
|
94
|
+
existing_record = record.__class__.filter(
|
95
95
|
name=kwargs["name"], version=kwargs["version"]
|
96
96
|
).one_or_none()
|
97
97
|
else:
|
98
98
|
version_comment = ""
|
99
|
-
existing_record =
|
99
|
+
existing_record = record.__class__.filter(name=kwargs["name"]).one()
|
100
100
|
if existing_record is not None:
|
101
101
|
logger.important(
|
102
|
-
f"returning existing {
|
102
|
+
f"returning existing {record.__class__.__name__} record with same"
|
103
103
|
f" name{version_comment}: '{kwargs['name']}'"
|
104
104
|
)
|
105
|
-
init_self_from_db(
|
105
|
+
init_self_from_db(record, existing_record)
|
106
106
|
return None
|
107
|
-
super(Record,
|
108
|
-
elif len(args) != len(
|
107
|
+
super(Record, record).__init__(**kwargs)
|
108
|
+
elif len(args) != len(record._meta.concrete_fields):
|
109
109
|
raise ValueError("please provide keyword arguments, not plain arguments")
|
110
110
|
else:
|
111
111
|
# object is loaded from DB (**kwargs could be omitted below, I believe)
|
112
|
-
super(Record,
|
112
|
+
super(Record, record).__init__(*args, **kwargs)
|
113
113
|
|
114
114
|
|
115
115
|
@classmethod # type:ignore
|
@@ -132,7 +132,10 @@ def get(cls, idlike: int | str) -> Record:
|
|
132
132
|
else:
|
133
133
|
qs = filter(cls, uid__startswith=idlike)
|
134
134
|
if issubclass(cls, IsVersioned):
|
135
|
-
|
135
|
+
if len(idlike) <= cls._len_stem_uid:
|
136
|
+
return qs.latest_version().one()
|
137
|
+
else:
|
138
|
+
return qs.one()
|
136
139
|
else:
|
137
140
|
return qs.one()
|
138
141
|
|
@@ -165,9 +168,7 @@ def from_values(
|
|
165
168
|
) -> list[Record]:
|
166
169
|
"""{}""" # noqa: D415
|
167
170
|
from_source = True if cls.__module__.startswith("bionty.") else False
|
168
|
-
|
169
|
-
if isinstance(source, Record) and source.in_db:
|
170
|
-
from_source = False
|
171
|
+
|
171
172
|
field_str = get_name_field(cls, field=field)
|
172
173
|
return get_or_create_records(
|
173
174
|
iterable=values,
|
@@ -191,11 +192,11 @@ def _search(
|
|
191
192
|
truncate_words: bool = False,
|
192
193
|
) -> QuerySet:
|
193
194
|
input_queryset = _queryset(cls, using_key=using_key)
|
194
|
-
|
195
|
+
registry = input_queryset.model
|
195
196
|
if field is None:
|
196
197
|
fields = [
|
197
198
|
field.name
|
198
|
-
for field in
|
199
|
+
for field in registry._meta.fields
|
199
200
|
if field.get_internal_type() in {"CharField", "TextField"}
|
200
201
|
]
|
201
202
|
else:
|
@@ -287,7 +288,7 @@ def _lookup(
|
|
287
288
|
) -> NamedTuple:
|
288
289
|
"""{}""" # noqa: D415
|
289
290
|
queryset = _queryset(cls, using_key=using_key)
|
290
|
-
field = get_name_field(
|
291
|
+
field = get_name_field(registry=queryset.model, field=field)
|
291
292
|
|
292
293
|
return Lookup(
|
293
294
|
records=queryset,
|
@@ -296,7 +297,7 @@ def _lookup(
|
|
296
297
|
prefix="ln",
|
297
298
|
).lookup(
|
298
299
|
return_field=(
|
299
|
-
get_name_field(
|
300
|
+
get_name_field(registry=queryset.model, field=return_field)
|
300
301
|
if return_field is not None
|
301
302
|
else None
|
302
303
|
)
|
@@ -315,24 +316,24 @@ def lookup(
|
|
315
316
|
|
316
317
|
|
317
318
|
def get_name_field(
|
318
|
-
|
319
|
+
registry: type[Record] | QuerySet | Manager,
|
319
320
|
*,
|
320
321
|
field: str | StrField | None = None,
|
321
322
|
) -> str:
|
322
|
-
"""Get the 1st char or text field from the
|
323
|
-
if isinstance(
|
324
|
-
|
325
|
-
model_field_names = [i.name for i in
|
323
|
+
"""Get the 1st char or text field from the registry."""
|
324
|
+
if isinstance(registry, (QuerySet, Manager)):
|
325
|
+
registry = registry.model
|
326
|
+
model_field_names = [i.name for i in registry._meta.fields]
|
326
327
|
|
327
328
|
# set to default name field
|
328
329
|
if field is None:
|
329
|
-
if hasattr(
|
330
|
-
field =
|
330
|
+
if hasattr(registry, "_name_field"):
|
331
|
+
field = registry._meta.get_field(registry._name_field)
|
331
332
|
elif "name" in model_field_names:
|
332
|
-
field =
|
333
|
+
field = registry._meta.get_field("name")
|
333
334
|
else:
|
334
335
|
# first char or text field that doesn't contain "id"
|
335
|
-
for i in
|
336
|
+
for i in registry._meta.fields:
|
336
337
|
if "id" in i.name:
|
337
338
|
continue
|
338
339
|
if i.get_internal_type() in {"CharField", "TextField"}:
|
@@ -360,7 +361,7 @@ def get_name_field(
|
|
360
361
|
def _queryset(cls: Record | QuerySet | Manager, using_key: str) -> QuerySet:
|
361
362
|
if isinstance(cls, (QuerySet, Manager)):
|
362
363
|
return cls.all()
|
363
|
-
elif using_key is None:
|
364
|
+
elif using_key is None or using_key == "default":
|
364
365
|
return cls.objects.all()
|
365
366
|
else:
|
366
367
|
# using must be called on cls, otherwise the connection isn't found
|
@@ -528,7 +529,28 @@ def save(self, *args, **kwargs) -> Record:
|
|
528
529
|
if result is not None:
|
529
530
|
init_self_from_db(self, result)
|
530
531
|
else:
|
531
|
-
|
532
|
+
# save versioned record
|
533
|
+
if isinstance(self, IsVersioned) and self._is_new_version_of is not None:
|
534
|
+
if self._is_new_version_of.is_latest:
|
535
|
+
is_new_version_of = self._is_new_version_of
|
536
|
+
else:
|
537
|
+
# need one additional request
|
538
|
+
is_new_version_of = self.__class__.objects.get(
|
539
|
+
is_latest=True, uid__startswith=self.stem_uid
|
540
|
+
)
|
541
|
+
logger.warning(
|
542
|
+
f"didn't pass the latest version in `is_new_version_of`, retrieved it: {is_new_version_of}"
|
543
|
+
)
|
544
|
+
is_new_version_of.is_latest = False
|
545
|
+
with transaction.atomic():
|
546
|
+
is_new_version_of._is_new_version_of = (
|
547
|
+
None # ensure we don't start a recursion
|
548
|
+
)
|
549
|
+
is_new_version_of.save()
|
550
|
+
super(Record, self).save(*args, **kwargs)
|
551
|
+
# save unversioned record
|
552
|
+
else:
|
553
|
+
super(Record, self).save(*args, **kwargs)
|
532
554
|
# perform transfer of many-to-many fields
|
533
555
|
# only supported for Artifact and Collection records
|
534
556
|
if db is not None and db != "default" and using_key is None:
|
@@ -553,6 +575,30 @@ def save(self, *args, **kwargs) -> Record:
|
|
553
575
|
return self
|
554
576
|
|
555
577
|
|
578
|
+
def delete(self) -> None:
|
579
|
+
"""Delete the record."""
|
580
|
+
# note that the logic below does not fire if a record is moved to the trash
|
581
|
+
# the idea is that moving a record to the trash should move its entire version family
|
582
|
+
# to the trash, whereas permanently deleting should default to only deleting a single record
|
583
|
+
# of a version family
|
584
|
+
# we can consider making it easy to permanently delete entire version families as well,
|
585
|
+
# but that's for another time
|
586
|
+
if isinstance(self, IsVersioned) and self.is_latest:
|
587
|
+
new_latest = (
|
588
|
+
self.__class__.filter(is_latest=False, uid__startswith=self.stem_uid)
|
589
|
+
.order_by("-created_at")
|
590
|
+
.first()
|
591
|
+
)
|
592
|
+
if new_latest is not None:
|
593
|
+
new_latest.is_latest = True
|
594
|
+
with transaction.atomic():
|
595
|
+
new_latest.save()
|
596
|
+
super(Record, self).delete()
|
597
|
+
logger.warning(f"new latest version is {new_latest}")
|
598
|
+
return None
|
599
|
+
super(Record, self).delete()
|
600
|
+
|
601
|
+
|
556
602
|
METHOD_NAMES = [
|
557
603
|
"__init__",
|
558
604
|
"filter",
|
@@ -561,6 +607,7 @@ METHOD_NAMES = [
|
|
561
607
|
"search",
|
562
608
|
"lookup",
|
563
609
|
"save",
|
610
|
+
"delete",
|
564
611
|
"from_values",
|
565
612
|
"using",
|
566
613
|
]
|
lamindb/_run.py
CHANGED
@@ -42,7 +42,7 @@ def delete_run_artifacts(run: Run) -> None:
|
|
42
42
|
run.save()
|
43
43
|
if environment is not None:
|
44
44
|
# only delete if there are no other runs attached to this environment
|
45
|
-
if environment.
|
45
|
+
if environment._environment_of.count() == 0:
|
46
46
|
environment.delete(permanent=True)
|
47
47
|
if report is not None:
|
48
48
|
report.delete(permanent=True)
|
lamindb/_save.py
CHANGED
@@ -108,21 +108,21 @@ def bulk_create(records: Iterable[Record], ignore_conflicts: bool | None = False
|
|
108
108
|
records_by_orm = defaultdict(list)
|
109
109
|
for record in records:
|
110
110
|
records_by_orm[record.__class__].append(record)
|
111
|
-
for
|
112
|
-
|
111
|
+
for registry, records in records_by_orm.items():
|
112
|
+
registry.objects.bulk_create(records, ignore_conflicts=ignore_conflicts)
|
113
113
|
|
114
114
|
|
115
115
|
def bulk_update(records: Iterable[Record], ignore_conflicts: bool | None = False):
|
116
116
|
records_by_orm = defaultdict(list)
|
117
117
|
for record in records:
|
118
118
|
records_by_orm[record.__class__].append(record)
|
119
|
-
for
|
119
|
+
for registry, records in records_by_orm.items():
|
120
120
|
field_names = [
|
121
121
|
field.name
|
122
|
-
for field in
|
122
|
+
for field in registry._meta.fields
|
123
123
|
if (field.name != "created_at" and field.name != "id")
|
124
124
|
]
|
125
|
-
|
125
|
+
registry.objects.bulk_update(records, field_names)
|
126
126
|
|
127
127
|
|
128
128
|
# This is also used within Artifact.save()
|
lamindb/_transform.py
CHANGED
@@ -22,7 +22,6 @@ def __init__(transform: Transform, *args, **kwargs):
|
|
22
22
|
is_new_version_of: Transform | None = (
|
23
23
|
kwargs.pop("is_new_version_of") if "is_new_version_of" in kwargs else None
|
24
24
|
)
|
25
|
-
(kwargs.pop("initial_version_id") if "initial_version_id" in kwargs else None)
|
26
25
|
version: str | None = kwargs.pop("version") if "version" in kwargs else None
|
27
26
|
type: TransformType | None = kwargs.pop("type") if "type" in kwargs else "pipeline"
|
28
27
|
reference: str | None = kwargs.pop("reference") if "reference" in kwargs else None
|
@@ -55,6 +54,7 @@ def __init__(transform: Transform, *args, **kwargs):
|
|
55
54
|
reference=reference,
|
56
55
|
reference_type=reference_type,
|
57
56
|
_has_consciously_provided_uid=has_consciously_provided_uid,
|
57
|
+
is_new_version_of=is_new_version_of,
|
58
58
|
)
|
59
59
|
|
60
60
|
|
lamindb/_view.py
CHANGED
@@ -41,15 +41,17 @@ def view(
|
|
41
41
|
schema_module = importlib.import_module(get_schema_module_name(schema_name))
|
42
42
|
|
43
43
|
all_registries = {
|
44
|
-
|
45
|
-
for
|
46
|
-
if inspect.isclass(
|
47
|
-
and issubclass(
|
48
|
-
and
|
44
|
+
registry
|
45
|
+
for registry in schema_module.__dict__.values()
|
46
|
+
if inspect.isclass(registry)
|
47
|
+
and issubclass(registry, Record)
|
48
|
+
and registry is not Record
|
49
49
|
}
|
50
50
|
if registries is not None:
|
51
51
|
filtered_registries = {
|
52
|
-
|
52
|
+
registry
|
53
|
+
for registry in all_registries
|
54
|
+
if registry.__name__ in registries
|
53
55
|
}
|
54
56
|
else:
|
55
57
|
filtered_registries = all_registries
|
@@ -59,12 +61,12 @@ def view(
|
|
59
61
|
logger.print("*" * len(section_no_color))
|
60
62
|
logger.print(section)
|
61
63
|
logger.print("*" * len(section_no_color))
|
62
|
-
for
|
63
|
-
if hasattr(
|
64
|
-
df =
|
64
|
+
for registry in sorted(filtered_registries, key=lambda x: x.__name__):
|
65
|
+
if hasattr(registry, "updated_at"):
|
66
|
+
df = registry.filter().order_by("-updated_at")[:n].df()
|
65
67
|
else:
|
66
68
|
# need to adjust in the future
|
67
|
-
df =
|
69
|
+
df = registry.df().iloc[-n:]
|
68
70
|
if df.shape[0] > 0:
|
69
|
-
logger.print(colors.blue(colors.bold(
|
71
|
+
logger.print(colors.blue(colors.bold(registry.__name__)))
|
70
72
|
show(df)
|
lamindb/core/__init__.py
CHANGED
lamindb/core/_data.py
CHANGED
@@ -14,8 +14,8 @@ from lnschema_core.models import (
|
|
14
14
|
Record,
|
15
15
|
Run,
|
16
16
|
ULabel,
|
17
|
-
__repr__,
|
18
17
|
format_field_value,
|
18
|
+
record_repr,
|
19
19
|
)
|
20
20
|
|
21
21
|
from lamindb._parents import view_lineage
|
@@ -108,7 +108,7 @@ def describe(self: HasFeatures, print_types: bool = False):
|
|
108
108
|
# )
|
109
109
|
|
110
110
|
model_name = self.__class__.__name__
|
111
|
-
msg = f"{colors.green(model_name)}{
|
111
|
+
msg = f"{colors.green(model_name)}{record_repr(self, include_foreign_keys=False).lstrip(model_name)}\n"
|
112
112
|
prov_msg = ""
|
113
113
|
|
114
114
|
fields = self._meta.fields
|
@@ -251,8 +251,8 @@ def add_labels(
|
|
251
251
|
if feature.dtype.startswith("cat["):
|
252
252
|
orm_dict = dict_schema_name_to_model_name(Artifact)
|
253
253
|
for reg in feature.dtype.replace("cat[", "").rstrip("]").split("|"):
|
254
|
-
|
255
|
-
records_validated +=
|
254
|
+
registry = orm_dict.get(reg)
|
255
|
+
records_validated += registry.from_values(records, field=field)
|
256
256
|
|
257
257
|
# feature doesn't have registries and therefore can't create records from values
|
258
258
|
# ask users to pass records
|
lamindb/core/_feature_manager.py
CHANGED
@@ -118,9 +118,7 @@ def get_feature_set_links(host: Artifact | Collection) -> QuerySet:
|
|
118
118
|
|
119
119
|
def get_link_attr(link: LinkORM | type[LinkORM], data: HasFeatures) -> str:
|
120
120
|
link_model_name = link.__class__.__name__
|
121
|
-
if
|
122
|
-
link_model_name == "ModelBase" or link_model_name == "RecordMeta"
|
123
|
-
): # we passed the type of the link
|
121
|
+
if link_model_name in {"Registry", "ModelBase"}: # we passed the type of the link
|
124
122
|
link_model_name = link.__name__
|
125
123
|
return link_model_name.replace(data.__class__.__name__, "").lower()
|
126
124
|
|
@@ -746,9 +744,9 @@ def _add_set_from_mudata(
|
|
746
744
|
# parse and register features
|
747
745
|
mdata = self._host.load()
|
748
746
|
feature_sets = {}
|
749
|
-
obs_features =
|
747
|
+
obs_features = Feature.from_values(mdata.obs.columns)
|
750
748
|
if len(obs_features) > 0:
|
751
|
-
feature_sets["obs"] = FeatureSet(features=
|
749
|
+
feature_sets["obs"] = FeatureSet(features=obs_features)
|
752
750
|
for modality, field in var_fields.items():
|
753
751
|
modality_fs = parse_feature_sets_from_anndata(
|
754
752
|
mdata[modality],
|
@@ -760,8 +758,20 @@ def _add_set_from_mudata(
|
|
760
758
|
for k, v in modality_fs.items():
|
761
759
|
feature_sets[f"['{modality}'].{k}"] = v
|
762
760
|
|
761
|
+
def unify_feature_sets_by_hash(feature_sets):
|
762
|
+
unique_values = {}
|
763
|
+
|
764
|
+
for key, value in feature_sets.items():
|
765
|
+
value_hash = value.hash # Assuming each value has a .hash attribute
|
766
|
+
if value_hash in unique_values:
|
767
|
+
feature_sets[key] = unique_values[value_hash]
|
768
|
+
else:
|
769
|
+
unique_values[value_hash] = value
|
770
|
+
|
771
|
+
return feature_sets
|
772
|
+
|
763
773
|
# link feature sets
|
764
|
-
self._host._feature_sets = feature_sets
|
774
|
+
self._host._feature_sets = unify_feature_sets_by_hash(feature_sets)
|
765
775
|
self._host.save()
|
766
776
|
|
767
777
|
|
lamindb/core/_label_manager.py
CHANGED
@@ -32,14 +32,15 @@ def get_labels_as_dict(self: HasFeatures, links: bool = False):
|
|
32
32
|
"input_of_runs",
|
33
33
|
"collections",
|
34
34
|
"_source_code_artifact_of",
|
35
|
-
"
|
36
|
-
"
|
35
|
+
"_report_of",
|
36
|
+
"_environment_of",
|
37
37
|
"links_collection",
|
38
38
|
"links_artifact",
|
39
39
|
"links_feature_set",
|
40
40
|
"previous_runs",
|
41
41
|
"_feature_values",
|
42
|
-
"
|
42
|
+
"_action_targets",
|
43
|
+
"_lnschema_core_collection__actions_+", # something seems off with this one
|
43
44
|
"_actions",
|
44
45
|
}
|
45
46
|
labels = {} # type: ignore
|