lamindb 0.77.3__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +39 -32
- lamindb/_artifact.py +95 -64
- lamindb/_can_curate.py +13 -6
- lamindb/_collection.py +51 -49
- lamindb/_feature.py +9 -9
- lamindb/_finish.py +92 -79
- lamindb/_from_values.py +13 -10
- lamindb/_is_versioned.py +2 -1
- lamindb/_parents.py +23 -16
- lamindb/_query_manager.py +3 -3
- lamindb/_query_set.py +85 -18
- lamindb/_record.py +114 -41
- lamindb/_run.py +3 -3
- lamindb/_save.py +5 -6
- lamindb/{_feature_set.py → _schema.py} +34 -31
- lamindb/_storage.py +2 -1
- lamindb/_transform.py +51 -23
- lamindb/_ulabel.py +17 -8
- lamindb/_view.py +13 -13
- lamindb/base/__init__.py +24 -0
- lamindb/base/fields.py +281 -0
- lamindb/base/ids.py +103 -0
- lamindb/base/types.py +51 -0
- lamindb/base/users.py +30 -0
- lamindb/base/validation.py +67 -0
- lamindb/core/__init__.py +18 -15
- lamindb/core/_context.py +295 -224
- lamindb/core/_data.py +44 -49
- lamindb/core/_describe.py +41 -31
- lamindb/core/_django.py +29 -27
- lamindb/core/_feature_manager.py +130 -129
- lamindb/core/_label_manager.py +7 -8
- lamindb/core/_mapped_collection.py +17 -14
- lamindb/core/_settings.py +1 -12
- lamindb/core/_sync_git.py +56 -9
- lamindb/core/_track_environment.py +1 -1
- lamindb/core/datasets/_core.py +5 -6
- lamindb/core/exceptions.py +0 -7
- lamindb/core/fields.py +1 -1
- lamindb/core/loaders.py +0 -1
- lamindb/core/{schema.py → relations.py} +22 -19
- lamindb/core/storage/_anndata_accessor.py +1 -2
- lamindb/core/storage/_backed_access.py +2 -1
- lamindb/core/storage/_tiledbsoma.py +38 -13
- lamindb/core/storage/objects.py +1 -1
- lamindb/core/storage/paths.py +13 -8
- lamindb/core/subsettings/__init__.py +0 -2
- lamindb/core/types.py +2 -23
- lamindb/core/versioning.py +11 -7
- lamindb/{_curate.py → curators/__init__.py} +122 -23
- lamindb/curators/_spatial.py +528 -0
- lamindb/integrations/_vitessce.py +1 -3
- lamindb/migrations/0052_squashed.py +1261 -0
- lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +57 -0
- lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +35 -0
- lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +61 -0
- lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +22 -0
- lamindb/migrations/0057_link_models_latest_report_and_others.py +356 -0
- lamindb/migrations/0058_artifact__actions_collection__actions.py +22 -0
- lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +31 -0
- lamindb/migrations/0060_alter_artifact__actions.py +22 -0
- lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +45 -0
- lamindb/migrations/0062_add_is_latest_field.py +32 -0
- lamindb/migrations/0063_populate_latest_field.py +45 -0
- lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +33 -0
- lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +22 -0
- lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +352 -0
- lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +20 -0
- lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +20 -0
- lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +1294 -0
- lamindb/migrations/0069_squashed.py +1770 -0
- lamindb/migrations/0070_lamindbv1_migrate_data.py +78 -0
- lamindb/migrations/0071_lamindbv1_migrate_schema.py +741 -0
- lamindb/migrations/0072_remove_user__branch_code_remove_user_aux_and_more.py +148 -0
- lamindb/migrations/0073_merge_ourprojects.py +945 -0
- lamindb/migrations/0074_lamindbv1_part4.py +374 -0
- lamindb/migrations/0075_lamindbv1_part5.py +276 -0
- lamindb/migrations/0076_lamindbv1_part6.py +621 -0
- lamindb/migrations/0077_lamindbv1_part6b.py +228 -0
- lamindb/migrations/0078_lamindbv1_part6c.py +468 -0
- lamindb/migrations/0079_alter_rundata_value_json_and_more.py +36 -0
- lamindb/migrations/__init__.py +0 -0
- lamindb/models.py +4064 -0
- {lamindb-0.77.3.dist-info → lamindb-1.0.0.dist-info}/METADATA +13 -19
- lamindb-1.0.0.dist-info/RECORD +100 -0
- {lamindb-0.77.3.dist-info → lamindb-1.0.0.dist-info}/WHEEL +1 -1
- lamindb/core/subsettings/_transform_settings.py +0 -21
- lamindb-0.77.3.dist-info/RECORD +0 -63
- {lamindb-0.77.3.dist-info → lamindb-1.0.0.dist-info}/LICENSE +0 -0
lamindb/core/_feature_manager.py
CHANGED
@@ -14,34 +14,16 @@ from anndata import AnnData
|
|
14
14
|
from django.contrib.postgres.aggregates import ArrayAgg
|
15
15
|
from django.db import connections
|
16
16
|
from django.db.models import Aggregate
|
17
|
-
from lamin_utils import
|
17
|
+
from lamin_utils import logger
|
18
18
|
from lamindb_setup.core.hashing import hash_set
|
19
19
|
from lamindb_setup.core.upath import create_path
|
20
|
-
from lnschema_core.models import (
|
21
|
-
Artifact,
|
22
|
-
Collection,
|
23
|
-
Feature,
|
24
|
-
FeatureManager,
|
25
|
-
FeatureValue,
|
26
|
-
LinkORM,
|
27
|
-
Param,
|
28
|
-
ParamManager,
|
29
|
-
ParamManagerArtifact,
|
30
|
-
ParamManagerRun,
|
31
|
-
ParamValue,
|
32
|
-
Record,
|
33
|
-
Run,
|
34
|
-
ULabel,
|
35
|
-
)
|
36
20
|
from rich.table import Column, Table
|
37
21
|
from rich.text import Text
|
38
22
|
|
39
23
|
from lamindb._feature import (
|
40
|
-
FEATURE_DTYPES,
|
41
24
|
convert_pandas_dtype_to_lamin_dtype,
|
42
25
|
suggest_categorical_for_str_iterable,
|
43
26
|
)
|
44
|
-
from lamindb._feature_set import DICT_KEYS_TYPE, FeatureSet
|
45
27
|
from lamindb._from_values import _format_values
|
46
28
|
from lamindb._record import (
|
47
29
|
REGISTRY_UNIQUE_FIELD,
|
@@ -50,8 +32,25 @@ from lamindb._record import (
|
|
50
32
|
transfer_to_default_db,
|
51
33
|
)
|
52
34
|
from lamindb._save import save
|
35
|
+
from lamindb._schema import DICT_KEYS_TYPE, Schema
|
53
36
|
from lamindb.core.exceptions import DoesNotExist, ValidationError
|
54
37
|
from lamindb.core.storage import LocalPathClasses
|
38
|
+
from lamindb.models import (
|
39
|
+
Artifact,
|
40
|
+
Collection,
|
41
|
+
Feature,
|
42
|
+
FeatureManager,
|
43
|
+
FeatureValue,
|
44
|
+
LinkORM,
|
45
|
+
Param,
|
46
|
+
ParamManager,
|
47
|
+
ParamManagerArtifact,
|
48
|
+
ParamManagerRun,
|
49
|
+
ParamValue,
|
50
|
+
Record,
|
51
|
+
Run,
|
52
|
+
ULabel,
|
53
|
+
)
|
55
54
|
|
56
55
|
from ._describe import (
|
57
56
|
NAME_WIDTH,
|
@@ -63,15 +62,15 @@ from ._describe import (
|
|
63
62
|
from ._django import get_artifact_with_related
|
64
63
|
from ._label_manager import _get_labels, describe_labels
|
65
64
|
from ._settings import settings
|
66
|
-
from .
|
65
|
+
from .relations import (
|
67
66
|
dict_related_model_to_related_name,
|
68
67
|
)
|
69
68
|
|
70
69
|
if TYPE_CHECKING:
|
71
|
-
from lnschema_core.types import FieldAttr
|
72
70
|
from rich.tree import Tree
|
73
71
|
|
74
72
|
from lamindb._query_set import QuerySet
|
73
|
+
from lamindb.base.types import FieldAttr
|
75
74
|
|
76
75
|
|
77
76
|
def get_host_id_field(host: Artifact | Collection) -> str:
|
@@ -84,7 +83,7 @@ def get_host_id_field(host: Artifact | Collection) -> str:
|
|
84
83
|
|
85
84
|
def get_accessor_by_registry_(host: Artifact | Collection) -> dict:
|
86
85
|
dictionary = {
|
87
|
-
field.related_model.
|
86
|
+
field.related_model.__get_name_with_module__(): field.name
|
88
87
|
for field in host._meta.related_objects
|
89
88
|
}
|
90
89
|
dictionary["Feature"] = "features"
|
@@ -92,25 +91,25 @@ def get_accessor_by_registry_(host: Artifact | Collection) -> dict:
|
|
92
91
|
return dictionary
|
93
92
|
|
94
93
|
|
95
|
-
def
|
94
|
+
def get_schema_by_slot_(host: Artifact | Collection) -> dict:
|
96
95
|
if isinstance(host, Collection):
|
97
96
|
return {}
|
98
97
|
# if the host is not yet saved
|
99
98
|
if host._state.adding:
|
100
|
-
if hasattr(host, "
|
101
|
-
return host.
|
99
|
+
if hasattr(host, "_staged__schemas_m2m"):
|
100
|
+
return host._staged__schemas_m2m
|
102
101
|
else:
|
103
102
|
return {}
|
104
103
|
host_db = host._state.db
|
105
104
|
host_id_field = get_host_id_field(host)
|
106
105
|
kwargs = {host_id_field: host.id}
|
107
106
|
# otherwise, we need a query
|
108
|
-
|
109
|
-
host.
|
107
|
+
links_schema = (
|
108
|
+
host._schemas_m2m.through.objects.using(host_db)
|
110
109
|
.filter(**kwargs)
|
111
|
-
.select_related("
|
110
|
+
.select_related("schema")
|
112
111
|
)
|
113
|
-
return {fsl.slot: fsl.
|
112
|
+
return {fsl.slot: fsl.schema for fsl in links_schema}
|
114
113
|
|
115
114
|
|
116
115
|
def get_label_links(
|
@@ -126,11 +125,11 @@ def get_label_links(
|
|
126
125
|
return link_records
|
127
126
|
|
128
127
|
|
129
|
-
def
|
128
|
+
def get_schema_links(host: Artifact | Collection) -> QuerySet:
|
130
129
|
host_id_field = get_host_id_field(host)
|
131
130
|
kwargs = {host_id_field: host.id}
|
132
|
-
|
133
|
-
return
|
131
|
+
links_schema = host._schemas_m2m.through.objects.filter(**kwargs)
|
132
|
+
return links_schema
|
134
133
|
|
135
134
|
|
136
135
|
def get_link_attr(link: LinkORM | type[LinkORM], data: Artifact | Collection) -> str:
|
@@ -270,15 +269,15 @@ def _get_non_categoricals(
|
|
270
269
|
return non_categoricals
|
271
270
|
|
272
271
|
|
273
|
-
def
|
272
|
+
def _get_schemas_postgres(
|
274
273
|
self: Artifact | Collection,
|
275
274
|
related_data: dict | None = None,
|
276
275
|
) -> dict:
|
277
276
|
if not related_data:
|
278
|
-
artifact_meta = get_artifact_with_related(self,
|
277
|
+
artifact_meta = get_artifact_with_related(self, include_schema=True)
|
279
278
|
related_data = artifact_meta.get("related_data", {})
|
280
279
|
|
281
|
-
fs_data = related_data.get("
|
280
|
+
fs_data = related_data.get("schemas", {}) if related_data else {}
|
282
281
|
return fs_data
|
283
282
|
|
284
283
|
|
@@ -326,35 +325,35 @@ def describe_features(
|
|
326
325
|
return dictionary if to_dict else tree
|
327
326
|
|
328
327
|
# feature sets
|
329
|
-
|
328
|
+
schema_data: dict[str, tuple[str, list[str]]] = {}
|
330
329
|
feature_data: dict[str, tuple[str, list[str]]] = {}
|
331
330
|
if not print_params and not to_dict:
|
332
331
|
if self.id is not None and connections[self._state.db].vendor == "postgresql":
|
333
|
-
fs_data =
|
332
|
+
fs_data = _get_schemas_postgres(self, related_data=related_data)
|
334
333
|
for fs_id, (slot, data) in fs_data.items():
|
335
334
|
for registry_str, feature_names in data.items():
|
336
|
-
|
337
|
-
|
335
|
+
schema = Schema.objects.using(self._state.db).get(id=fs_id)
|
336
|
+
schema_data[slot] = (schema, feature_names)
|
338
337
|
for feature_name in feature_names:
|
339
338
|
feature_data[feature_name] = (slot, registry_str)
|
340
339
|
else:
|
341
|
-
for slot,
|
342
|
-
features =
|
340
|
+
for slot, schema in get_schema_by_slot_(self).items():
|
341
|
+
features = schema.members
|
343
342
|
# features.first() is a lot slower than features[0] here
|
344
343
|
name_field = get_name_field(features[0])
|
345
344
|
feature_names = list(features.values_list(name_field, flat=True)[:20])
|
346
|
-
|
345
|
+
schema_data[slot] = (schema, feature_names)
|
347
346
|
for feature_name in feature_names:
|
348
|
-
feature_data[feature_name] = (slot,
|
347
|
+
feature_data[feature_name] = (slot, schema.itype)
|
349
348
|
|
350
349
|
internal_feature_names: dict[str, str] = {}
|
351
350
|
if isinstance(self, Artifact):
|
352
|
-
|
351
|
+
_schemas_m2m = self._schemas_m2m.filter(itype="Feature").all()
|
353
352
|
internal_feature_names = {}
|
354
|
-
if len(
|
355
|
-
for
|
353
|
+
if len(_schemas_m2m) > 0:
|
354
|
+
for schema in _schemas_m2m:
|
356
355
|
internal_feature_names.update(
|
357
|
-
dict(
|
356
|
+
dict(schema.members.values_list("name", "dtype"))
|
358
357
|
)
|
359
358
|
|
360
359
|
# categorical feature values
|
@@ -417,7 +416,7 @@ def describe_features(
|
|
417
416
|
internal_feature_labels_slot.setdefault(slot, []).append(feature_row)
|
418
417
|
|
419
418
|
int_features_tree_children = []
|
420
|
-
for slot, (
|
419
|
+
for slot, (schema, feature_names) in schema_data.items():
|
421
420
|
if slot in internal_feature_labels_slot:
|
422
421
|
# add internal Feature features with labels
|
423
422
|
feature_rows = internal_feature_labels_slot[slot]
|
@@ -440,7 +439,7 @@ def describe_features(
|
|
440
439
|
str(
|
441
440
|
internal_feature_names.get(feature_name)
|
442
441
|
if feature_name in internal_feature_names
|
443
|
-
else
|
442
|
+
else schema.dtype
|
444
443
|
),
|
445
444
|
style="dim",
|
446
445
|
),
|
@@ -454,9 +453,9 @@ def describe_features(
|
|
454
453
|
Text.assemble(
|
455
454
|
(slot, "violet"),
|
456
455
|
(" • ", "dim"),
|
457
|
-
(str(
|
456
|
+
(str(schema.n), "pink1"),
|
458
457
|
),
|
459
|
-
Text.assemble((f"[{
|
458
|
+
Text.assemble((f"[{schema.itype}]", "pink1")),
|
460
459
|
feature_rows,
|
461
460
|
show_header=True,
|
462
461
|
)
|
@@ -467,7 +466,7 @@ def describe_features(
|
|
467
466
|
Text.assemble(
|
468
467
|
("Dataset features", "bold bright_magenta"),
|
469
468
|
("/", "dim"),
|
470
|
-
(".
|
469
|
+
("._schemas_m2m", "dim bold"),
|
471
470
|
)
|
472
471
|
)
|
473
472
|
for child in int_features_tree_children:
|
@@ -501,7 +500,7 @@ def describe_features(
|
|
501
500
|
return tree
|
502
501
|
|
503
502
|
|
504
|
-
def
|
503
|
+
def parse_staged__schemas_m2m_from_anndata(
|
505
504
|
adata: AnnData,
|
506
505
|
var_field: FieldAttr | None = None,
|
507
506
|
obs_field: FieldAttr = Feature.name,
|
@@ -525,11 +524,11 @@ def parse_feature_sets_from_anndata(
|
|
525
524
|
if adata.X is None
|
526
525
|
else convert_pandas_dtype_to_lamin_dtype(adata.X.dtype)
|
527
526
|
)
|
528
|
-
|
527
|
+
_schemas_m2m = {}
|
529
528
|
if var_field is not None:
|
530
529
|
logger.info("parsing feature names of X stored in slot 'var'")
|
531
530
|
logger.indent = " "
|
532
|
-
|
531
|
+
schema_var = Schema.from_values(
|
533
532
|
data_parse.var.index,
|
534
533
|
var_field,
|
535
534
|
type=type,
|
@@ -537,28 +536,28 @@ def parse_feature_sets_from_anndata(
|
|
537
536
|
organism=organism,
|
538
537
|
raise_validation_error=False,
|
539
538
|
)
|
540
|
-
if
|
541
|
-
|
542
|
-
logger.save(f"linked: {
|
539
|
+
if schema_var is not None:
|
540
|
+
_schemas_m2m["var"] = schema_var
|
541
|
+
logger.save(f"linked: {schema_var}")
|
543
542
|
logger.indent = ""
|
544
|
-
if
|
543
|
+
if schema_var is None:
|
545
544
|
logger.warning("skip linking features to artifact in slot 'var'")
|
546
545
|
if len(data_parse.obs.columns) > 0:
|
547
546
|
logger.info("parsing feature names of slot 'obs'")
|
548
547
|
logger.indent = " "
|
549
|
-
|
548
|
+
schema_obs = Schema.from_df(
|
550
549
|
df=data_parse.obs,
|
551
550
|
field=obs_field,
|
552
551
|
mute=mute,
|
553
552
|
organism=organism,
|
554
553
|
)
|
555
|
-
if
|
556
|
-
|
557
|
-
logger.save(f"linked: {
|
554
|
+
if schema_obs is not None:
|
555
|
+
_schemas_m2m["obs"] = schema_obs
|
556
|
+
logger.save(f"linked: {schema_obs}")
|
558
557
|
logger.indent = ""
|
559
|
-
if
|
558
|
+
if schema_obs is None:
|
560
559
|
logger.warning("skip linking features to artifact in slot 'obs'")
|
561
|
-
return
|
560
|
+
return _schemas_m2m
|
562
561
|
|
563
562
|
|
564
563
|
def is_valid_datetime_str(date_string: str) -> bool | str:
|
@@ -621,12 +620,12 @@ def infer_feature_type_convert_json(
|
|
621
620
|
return ("list[cat ? str]", value, message)
|
622
621
|
elif first_element_type == Record:
|
623
622
|
return (
|
624
|
-
f"list[cat[{first_element_type.
|
623
|
+
f"list[cat[{first_element_type.__get_name_with_module__()}]]",
|
625
624
|
value,
|
626
625
|
message,
|
627
626
|
)
|
628
627
|
elif isinstance(value, Record):
|
629
|
-
return (f"cat[{value.__class__.
|
628
|
+
return (f"cat[{value.__class__.__get_name_with_module__()}]", value, message)
|
630
629
|
if not mute:
|
631
630
|
logger.warning(f"cannot infer feature type of: {value}, returning '?")
|
632
631
|
return "?", value, message
|
@@ -634,7 +633,7 @@ def infer_feature_type_convert_json(
|
|
634
633
|
|
635
634
|
def __init__(self, host: Artifact | Collection | Run):
|
636
635
|
self._host = host
|
637
|
-
self.
|
636
|
+
self._schema_by_slot_ = None
|
638
637
|
self._accessor_by_registry_ = None
|
639
638
|
|
640
639
|
|
@@ -651,15 +650,15 @@ def get_values(self) -> dict[str, Any]:
|
|
651
650
|
|
652
651
|
|
653
652
|
def __getitem__(self, slot) -> QuerySet:
|
654
|
-
if slot not in self.
|
653
|
+
if slot not in self._schema_by_slot:
|
655
654
|
raise ValueError(
|
656
655
|
f"No linked feature set for slot: {slot}\nDid you get validation"
|
657
656
|
" warnings? Only features that match registered features get validated"
|
658
657
|
" and linked."
|
659
658
|
)
|
660
|
-
|
661
|
-
orm_name =
|
662
|
-
return getattr(
|
659
|
+
schema = self._schema_by_slot[slot]
|
660
|
+
orm_name = schema.itype
|
661
|
+
return getattr(schema, self._accessor_by_registry[orm_name]).all()
|
663
662
|
|
664
663
|
|
665
664
|
def filter_base(cls, **expression):
|
@@ -749,11 +748,11 @@ def get(cls, **expression) -> Record:
|
|
749
748
|
|
750
749
|
|
751
750
|
@property # type: ignore
|
752
|
-
def
|
751
|
+
def _schema_by_slot(self):
|
753
752
|
"""Feature sets by slot."""
|
754
|
-
if self.
|
755
|
-
self.
|
756
|
-
return self.
|
753
|
+
if self._schema_by_slot_ is None:
|
754
|
+
self._schema_by_slot_ = get_schema_by_slot_(self._host)
|
755
|
+
return self._schema_by_slot_
|
757
756
|
|
758
757
|
|
759
758
|
@property # type: ignore
|
@@ -833,11 +832,11 @@ def _add_values(
|
|
833
832
|
model_name = "Param" if is_param else "Feature"
|
834
833
|
if is_param:
|
835
834
|
if self._host.__class__ == Artifact:
|
836
|
-
if self._host.
|
835
|
+
if self._host.kind != "model":
|
837
836
|
raise ValidationError("Can only set params for model-like artifacts.")
|
838
837
|
else:
|
839
838
|
if self._host.__class__ == Artifact:
|
840
|
-
if self._host.
|
839
|
+
if self._host.kind != "dataset" and self._host.kind is not None:
|
841
840
|
raise ValidationError(
|
842
841
|
"Can only set features for dataset-like artifacts."
|
843
842
|
)
|
@@ -913,7 +912,7 @@ def _add_values(
|
|
913
912
|
raise ValidationError(
|
914
913
|
f"Please save {record} before annotation."
|
915
914
|
)
|
916
|
-
features_labels[record.__class__.
|
915
|
+
features_labels[record.__class__.__get_name_with_module__()].append(
|
917
916
|
(feature, record)
|
918
917
|
)
|
919
918
|
else:
|
@@ -957,7 +956,7 @@ def _add_values(
|
|
957
956
|
links = [
|
958
957
|
LinkORM(
|
959
958
|
**{
|
960
|
-
f"{self._host.__class__.
|
959
|
+
f"{self._host.__class__.__get_name_with_module__().lower()}_id": self._host.id,
|
961
960
|
valuefield_id: feature_value.id,
|
962
961
|
}
|
963
962
|
)
|
@@ -1030,14 +1029,14 @@ def remove_values(
|
|
1030
1029
|
link_models_on_models = {
|
1031
1030
|
getattr(
|
1032
1031
|
Artifact, obj.related_name
|
1033
|
-
).through.
|
1032
|
+
).through.__get_name_with_module__(): obj.related_model.__get_name_with_module__()
|
1034
1033
|
for obj in Artifact._meta.related_objects
|
1035
|
-
if obj.related_model.
|
1034
|
+
if obj.related_model.__get_name_with_module__() == feature_registry
|
1036
1035
|
}
|
1037
1036
|
link_attribute = {
|
1038
1037
|
obj.related_name
|
1039
1038
|
for obj in Artifact._meta.related_objects
|
1040
|
-
if obj.related_model.
|
1039
|
+
if obj.related_model.__get_name_with_module__() in link_models_on_models
|
1041
1040
|
}.pop()
|
1042
1041
|
getattr(self._host, link_attribute).filter(**filter_kwargs).all().delete()
|
1043
1042
|
else:
|
@@ -1050,36 +1049,37 @@ def remove_values(
|
|
1050
1049
|
# we can clean the FeatureValue registry periodically if we want to
|
1051
1050
|
|
1052
1051
|
|
1053
|
-
def
|
1054
|
-
"""
|
1052
|
+
def add_schema(self, schema: Schema, slot: str) -> None:
|
1053
|
+
"""Annotate artifact with a schema.
|
1055
1054
|
|
1056
1055
|
Args:
|
1057
|
-
|
1058
|
-
slot: `str` The slot that marks where the
|
1056
|
+
schema: `Schema` A schema record.
|
1057
|
+
slot: `str` The slot that marks where the schema is stored in
|
1059
1058
|
the artifact.
|
1060
1059
|
"""
|
1060
|
+
# TODO: deprecate as soon as we have the Schema-based curators
|
1061
1061
|
if self._host._state.adding:
|
1062
1062
|
raise ValueError(
|
1063
1063
|
"Please save the artifact or collection before adding a feature set!"
|
1064
1064
|
)
|
1065
1065
|
host_db = self._host._state.db
|
1066
|
-
|
1066
|
+
schema.save(using=host_db)
|
1067
1067
|
host_id_field = get_host_id_field(self._host)
|
1068
1068
|
kwargs = {
|
1069
1069
|
host_id_field: self._host.id,
|
1070
|
-
"
|
1070
|
+
"schema": schema,
|
1071
1071
|
"slot": slot,
|
1072
1072
|
}
|
1073
1073
|
link_record = (
|
1074
|
-
self._host.
|
1074
|
+
self._host._schemas_m2m.through.objects.using(host_db)
|
1075
1075
|
.filter(**kwargs)
|
1076
1076
|
.one_or_none()
|
1077
1077
|
)
|
1078
1078
|
if link_record is None:
|
1079
|
-
self._host.
|
1080
|
-
if slot in self.
|
1079
|
+
self._host._schemas_m2m.through(**kwargs).save(using=host_db)
|
1080
|
+
if slot in self._schema_by_slot:
|
1081
1081
|
logger.debug(f"replaced existing {slot} feature set")
|
1082
|
-
self.
|
1082
|
+
self._schema_by_slot_[slot] = schema # type: ignore
|
1083
1083
|
|
1084
1084
|
|
1085
1085
|
def _add_set_from_df(
|
@@ -1090,18 +1090,18 @@ def _add_set_from_df(
|
|
1090
1090
|
):
|
1091
1091
|
"""Add feature set corresponding to column names of DataFrame."""
|
1092
1092
|
if isinstance(self._host, Artifact):
|
1093
|
-
assert self._host.
|
1093
|
+
assert self._host.otype == "DataFrame" # noqa: S101
|
1094
1094
|
else:
|
1095
1095
|
# Collection
|
1096
|
-
assert self._host.artifact.
|
1096
|
+
assert self._host.artifact.otype == "DataFrame" # noqa: S101
|
1097
1097
|
df = self._host.load()
|
1098
|
-
|
1098
|
+
schema = Schema.from_df(
|
1099
1099
|
df=df,
|
1100
1100
|
field=field,
|
1101
1101
|
mute=mute,
|
1102
1102
|
organism=organism,
|
1103
1103
|
)
|
1104
|
-
self._host.
|
1104
|
+
self._host._staged__schemas_m2m = {"columns": schema}
|
1105
1105
|
self._host.save()
|
1106
1106
|
|
1107
1107
|
|
@@ -1114,13 +1114,13 @@ def _add_set_from_anndata(
|
|
1114
1114
|
):
|
1115
1115
|
"""Add features from AnnData."""
|
1116
1116
|
if isinstance(self._host, Artifact):
|
1117
|
-
assert self._host.
|
1117
|
+
assert self._host.otype == "AnnData" # noqa: S101
|
1118
1118
|
else:
|
1119
1119
|
raise NotImplementedError()
|
1120
1120
|
|
1121
1121
|
# parse and register features
|
1122
1122
|
adata = self._host.load()
|
1123
|
-
|
1123
|
+
_schemas_m2m = parse_staged__schemas_m2m_from_anndata(
|
1124
1124
|
adata,
|
1125
1125
|
var_field=var_field,
|
1126
1126
|
obs_field=obs_field,
|
@@ -1129,7 +1129,7 @@ def _add_set_from_anndata(
|
|
1129
1129
|
)
|
1130
1130
|
|
1131
1131
|
# link feature sets
|
1132
|
-
self._host.
|
1132
|
+
self._host._staged__schemas_m2m = _schemas_m2m
|
1133
1133
|
self._host.save()
|
1134
1134
|
|
1135
1135
|
|
@@ -1144,18 +1144,18 @@ def _add_set_from_mudata(
|
|
1144
1144
|
if obs_fields is None:
|
1145
1145
|
obs_fields = {}
|
1146
1146
|
if isinstance(self._host, Artifact):
|
1147
|
-
assert self._host.
|
1147
|
+
assert self._host.otype == "MuData" # noqa: S101
|
1148
1148
|
else:
|
1149
1149
|
raise NotImplementedError()
|
1150
1150
|
|
1151
1151
|
# parse and register features
|
1152
1152
|
mdata = self._host.load()
|
1153
|
-
|
1153
|
+
_schemas_m2m = {}
|
1154
1154
|
obs_features = Feature.from_values(mdata.obs.columns)
|
1155
1155
|
if len(obs_features) > 0:
|
1156
|
-
|
1156
|
+
_schemas_m2m["obs"] = Schema(features=obs_features)
|
1157
1157
|
for modality, field in var_fields.items():
|
1158
|
-
modality_fs =
|
1158
|
+
modality_fs = parse_staged__schemas_m2m_from_anndata(
|
1159
1159
|
mdata[modality],
|
1160
1160
|
var_field=field,
|
1161
1161
|
obs_field=obs_fields.get(modality, Feature.name),
|
@@ -1163,22 +1163,22 @@ def _add_set_from_mudata(
|
|
1163
1163
|
organism=organism,
|
1164
1164
|
)
|
1165
1165
|
for k, v in modality_fs.items():
|
1166
|
-
|
1166
|
+
_schemas_m2m[f"['{modality}'].{k}"] = v
|
1167
1167
|
|
1168
|
-
def
|
1168
|
+
def unify_staged__schemas_m2m_by_hash(_schemas_m2m):
|
1169
1169
|
unique_values = {}
|
1170
1170
|
|
1171
|
-
for key, value in
|
1171
|
+
for key, value in _schemas_m2m.items():
|
1172
1172
|
value_hash = value.hash # Assuming each value has a .hash attribute
|
1173
1173
|
if value_hash in unique_values:
|
1174
|
-
|
1174
|
+
_schemas_m2m[key] = unique_values[value_hash]
|
1175
1175
|
else:
|
1176
1176
|
unique_values[value_hash] = value
|
1177
1177
|
|
1178
|
-
return
|
1178
|
+
return _schemas_m2m
|
1179
1179
|
|
1180
1180
|
# link feature sets
|
1181
|
-
self._host.
|
1181
|
+
self._host._staged__schemas_m2m = unify_staged__schemas_m2m_by_hash(_schemas_m2m)
|
1182
1182
|
self._host.save()
|
1183
1183
|
|
1184
1184
|
|
@@ -1188,8 +1188,8 @@ def _add_from(self, data: Artifact | Collection, transfer_logs: dict = None):
|
|
1188
1188
|
if transfer_logs is None:
|
1189
1189
|
transfer_logs = {"mapped": [], "transferred": [], "run": None}
|
1190
1190
|
using_key = settings._using_key
|
1191
|
-
for slot,
|
1192
|
-
members =
|
1191
|
+
for slot, schema in data.features._schema_by_slot.items():
|
1192
|
+
members = schema.members
|
1193
1193
|
if len(members) == 0:
|
1194
1194
|
continue
|
1195
1195
|
registry = members[0].__class__
|
@@ -1225,20 +1225,18 @@ def _add_from(self, data: Artifact | Collection, transfer_logs: dict = None):
|
|
1225
1225
|
save(new_members)
|
1226
1226
|
|
1227
1227
|
# create a new feature set from feature values using the same uid
|
1228
|
-
|
1229
|
-
|
1230
|
-
)
|
1231
|
-
if feature_set_self is None:
|
1228
|
+
schema_self = Schema.from_values(member_uids, field=getattr(registry, field))
|
1229
|
+
if schema_self is None:
|
1232
1230
|
if hasattr(registry, "organism_id"):
|
1233
1231
|
logger.warning(
|
1234
|
-
f"
|
1232
|
+
f"Schema is not transferred, check if organism is set correctly: {schema}"
|
1235
1233
|
)
|
1236
1234
|
continue
|
1237
|
-
# make sure the uid matches if
|
1238
|
-
if
|
1239
|
-
|
1240
|
-
logger.info(f"saving {slot}
|
1241
|
-
self._host.features.
|
1235
|
+
# make sure the uid matches if schema is composed of same features
|
1236
|
+
if schema_self.hash == schema.hash:
|
1237
|
+
schema_self.uid = schema.uid
|
1238
|
+
logger.info(f"saving {slot} schema: {schema_self}")
|
1239
|
+
self._host.features.add_schema(schema_self, slot)
|
1242
1240
|
|
1243
1241
|
|
1244
1242
|
def make_external(self, feature: Feature) -> None:
|
@@ -1250,8 +1248,8 @@ def make_external(self, feature: Feature) -> None:
|
|
1250
1248
|
"""
|
1251
1249
|
if not isinstance(feature, Feature):
|
1252
1250
|
raise TypeError("feature must be a Feature record!")
|
1253
|
-
|
1254
|
-
for fs in
|
1251
|
+
_schemas_m2m = Schema.filter(features=feature).all()
|
1252
|
+
for fs in _schemas_m2m:
|
1255
1253
|
f = Feature.filter(uid=feature.uid).all()
|
1256
1254
|
features_updated = fs.members.difference(f)
|
1257
1255
|
if len(features_updated) > 0:
|
@@ -1261,13 +1259,14 @@ def make_external(self, feature: Feature) -> None:
|
|
1261
1259
|
fs.n = len(features_updated)
|
1262
1260
|
fs.save()
|
1263
1261
|
# delete the link between the feature and the feature set
|
1264
|
-
|
1265
|
-
feature_id=feature.id,
|
1262
|
+
Schema.features.through.objects.filter(
|
1263
|
+
feature_id=feature.id, schema_id=fs.id
|
1266
1264
|
).delete()
|
1267
|
-
# if no members are left in the
|
1265
|
+
# if no members are left in the schema, delete it
|
1268
1266
|
if len(features_updated) == 0:
|
1269
1267
|
logger.warning(f"deleting empty feature set: {fs}")
|
1270
1268
|
fs.artifacts.set([])
|
1269
|
+
fs._artifacts_m2m.set([])
|
1271
1270
|
fs.delete()
|
1272
1271
|
|
1273
1272
|
|
@@ -1277,10 +1276,12 @@ FeatureManager.__repr__ = __repr__
|
|
1277
1276
|
ParamManager.__repr__ = __repr__
|
1278
1277
|
FeatureManager.__getitem__ = __getitem__
|
1279
1278
|
FeatureManager.get_values = get_values
|
1280
|
-
FeatureManager.
|
1279
|
+
FeatureManager._schema_by_slot = _schema_by_slot
|
1280
|
+
FeatureManager._feature_set_by_slot = _schema_by_slot
|
1281
1281
|
FeatureManager._accessor_by_registry = _accessor_by_registry
|
1282
1282
|
FeatureManager.add_values = add_values_features
|
1283
|
-
FeatureManager.
|
1283
|
+
FeatureManager.add_schema = add_schema
|
1284
|
+
FeatureManager.add_feature_set = add_schema # backward compat, will raise warning soon
|
1284
1285
|
FeatureManager._add_set_from_df = _add_set_from_df
|
1285
1286
|
FeatureManager._add_set_from_anndata = _add_set_from_anndata
|
1286
1287
|
FeatureManager._add_set_from_mudata = _add_set_from_mudata
|