lamindb 0.74.3__py3-none-any.whl → 0.75.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_artifact.py +85 -43
- lamindb/_can_validate.py +100 -35
- lamindb/_collection.py +36 -28
- lamindb/_curate.py +432 -181
- lamindb/_feature_set.py +5 -5
- lamindb/_filter.py +3 -3
- lamindb/_finish.py +29 -23
- lamindb/_from_values.py +47 -66
- lamindb/_is_versioned.py +1 -1
- lamindb/_parents.py +38 -13
- lamindb/_record.py +41 -42
- lamindb/_save.py +7 -7
- lamindb/_transform.py +27 -16
- lamindb/_view.py +13 -11
- lamindb/core/__init__.py +2 -0
- lamindb/core/_data.py +18 -20
- lamindb/core/_feature_manager.py +50 -50
- lamindb/core/_label_manager.py +17 -19
- lamindb/core/_mapped_collection.py +1 -1
- lamindb/core/_run_context.py +6 -8
- lamindb/core/datasets/_core.py +7 -7
- lamindb/core/exceptions.py +11 -0
- lamindb/core/schema.py +5 -5
- lamindb/core/storage/__init__.py +12 -2
- lamindb/core/storage/_anndata_accessor.py +735 -0
- lamindb/core/storage/_backed_access.py +77 -747
- lamindb/core/storage/_valid_suffixes.py +16 -2
- lamindb/core/storage/paths.py +9 -14
- lamindb/core/types.py +3 -0
- lamindb/core/versioning.py +1 -1
- lamindb/integrations/__init__.py +1 -0
- lamindb/integrations/_vitessce.py +68 -31
- {lamindb-0.74.3.dist-info → lamindb-0.75.1.dist-info}/METADATA +5 -5
- lamindb-0.75.1.dist-info/RECORD +58 -0
- lamindb-0.74.3.dist-info/RECORD +0 -57
- {lamindb-0.74.3.dist-info → lamindb-0.75.1.dist-info}/LICENSE +0 -0
- {lamindb-0.74.3.dist-info → lamindb-0.75.1.dist-info}/WHEEL +0 -0
lamindb/core/_feature_manager.py
CHANGED
@@ -39,7 +39,7 @@ from lamindb._feature import FEATURE_TYPES, convert_numpy_dtype_to_lamin_feature
|
|
39
39
|
from lamindb._feature_set import DICT_KEYS_TYPE, FeatureSet
|
40
40
|
from lamindb._record import (
|
41
41
|
REGISTRY_UNIQUE_FIELD,
|
42
|
-
|
42
|
+
get_name_field,
|
43
43
|
transfer_fk_to_default_db_bulk,
|
44
44
|
transfer_to_default_db,
|
45
45
|
)
|
@@ -88,12 +88,12 @@ def get_feature_set_by_slot_(host) -> dict:
|
|
88
88
|
host_id_field = get_host_id_field(host)
|
89
89
|
kwargs = {host_id_field: host.id}
|
90
90
|
# otherwise, we need a query
|
91
|
-
|
91
|
+
links_feature_set = (
|
92
92
|
host.feature_sets.through.objects.using(host_db)
|
93
93
|
.filter(**kwargs)
|
94
94
|
.select_related("featureset")
|
95
95
|
)
|
96
|
-
return {fsl.slot: fsl.featureset for fsl in
|
96
|
+
return {fsl.slot: fsl.featureset for fsl in links_feature_set}
|
97
97
|
|
98
98
|
|
99
99
|
def get_label_links(
|
@@ -112,22 +112,15 @@ def get_label_links(
|
|
112
112
|
def get_feature_set_links(host: Artifact | Collection) -> QuerySet:
|
113
113
|
host_id_field = get_host_id_field(host)
|
114
114
|
kwargs = {host_id_field: host.id}
|
115
|
-
|
116
|
-
return
|
115
|
+
links_feature_set = host.feature_sets.through.objects.filter(**kwargs)
|
116
|
+
return links_feature_set
|
117
117
|
|
118
118
|
|
119
119
|
def get_link_attr(link: LinkORM | type[LinkORM], data: HasFeatures) -> str:
|
120
120
|
link_model_name = link.__class__.__name__
|
121
|
-
if
|
122
|
-
link_model_name == "ModelBase" or link_model_name == "RecordMeta"
|
123
|
-
): # we passed the type of the link
|
121
|
+
if link_model_name in {"Registry", "ModelBase"}: # we passed the type of the link
|
124
122
|
link_model_name = link.__name__
|
125
|
-
|
126
|
-
if link_attr == "ExperimentalFactor":
|
127
|
-
link_attr = "experimental_factor"
|
128
|
-
else:
|
129
|
-
link_attr = link_attr.lower()
|
130
|
-
return link_attr
|
123
|
+
return link_model_name.replace(data.__class__.__name__, "").lower()
|
131
124
|
|
132
125
|
|
133
126
|
# Custom aggregation for SQLite
|
@@ -182,14 +175,14 @@ def print_features(
|
|
182
175
|
non_labels_msg = ""
|
183
176
|
if self.id is not None and self.__class__ == Artifact or self.__class__ == Run:
|
184
177
|
attr_name = "param" if print_params else "feature"
|
185
|
-
|
186
|
-
getattr(self, f"{attr_name}_values")
|
178
|
+
_feature_values = (
|
179
|
+
getattr(self, f"_{attr_name}_values")
|
187
180
|
.values(f"{attr_name}__name", f"{attr_name}__dtype")
|
188
181
|
.annotate(values=custom_aggregate("value", self._state.db))
|
189
182
|
.order_by(f"{attr_name}__name")
|
190
183
|
)
|
191
|
-
if len(
|
192
|
-
for fv in
|
184
|
+
if len(_feature_values) > 0:
|
185
|
+
for fv in _feature_values:
|
193
186
|
feature_name = fv[f"{attr_name}__name"]
|
194
187
|
feature_dtype = fv[f"{attr_name}__dtype"]
|
195
188
|
values = fv["values"]
|
@@ -217,7 +210,7 @@ def print_features(
|
|
217
210
|
for slot, feature_set in get_feature_set_by_slot_(self).items():
|
218
211
|
features = feature_set.members
|
219
212
|
# features.first() is a lot slower than features[0] here
|
220
|
-
name_field =
|
213
|
+
name_field = get_name_field(features[0])
|
221
214
|
feature_names = list(features.values_list(name_field, flat=True)[:20])
|
222
215
|
type_str = f": {feature_set.registry}" if print_types else ""
|
223
216
|
feature_set_msg += (
|
@@ -246,7 +239,7 @@ def parse_feature_sets_from_anndata(
|
|
246
239
|
from lamindb.core.storage._backed_access import backed_access
|
247
240
|
|
248
241
|
using_key = settings._using_key
|
249
|
-
data_parse = backed_access(filepath, using_key)
|
242
|
+
data_parse = backed_access(filepath, using_key=using_key)
|
250
243
|
else:
|
251
244
|
data_parse = ad.read_h5ad(filepath, backed="r")
|
252
245
|
type = "float"
|
@@ -316,13 +309,13 @@ def infer_feature_type_convert_json(
|
|
316
309
|
if len(value) > 0: # type: ignore
|
317
310
|
first_element_type = type(next(iter(value)))
|
318
311
|
if all(isinstance(elem, first_element_type) for elem in value):
|
319
|
-
if first_element_type
|
312
|
+
if first_element_type is bool:
|
320
313
|
return f"list[{FEATURE_TYPES['bool']}]", value
|
321
|
-
elif first_element_type
|
314
|
+
elif first_element_type is int:
|
322
315
|
return f"list[{FEATURE_TYPES['int']}]", value
|
323
|
-
elif first_element_type
|
316
|
+
elif first_element_type is float:
|
324
317
|
return f"list[{FEATURE_TYPES['float']}]", value
|
325
|
-
elif first_element_type
|
318
|
+
elif first_element_type is str:
|
326
319
|
if str_as_ulabel:
|
327
320
|
return FEATURE_TYPES["str"] + "[ULabel]", value
|
328
321
|
else:
|
@@ -390,7 +383,7 @@ def filter(cls, **expression) -> QuerySet:
|
|
390
383
|
feature = features.get(name=normalized_key)
|
391
384
|
if not feature.dtype.startswith("cat"):
|
392
385
|
feature_value = value_model.filter(feature=feature, value=value).one()
|
393
|
-
new_expression["
|
386
|
+
new_expression["_feature_values"] = feature_value
|
394
387
|
else:
|
395
388
|
if isinstance(value, str):
|
396
389
|
label = ULabel.filter(name=value).one()
|
@@ -478,7 +471,7 @@ def _add_values(
|
|
478
471
|
)
|
479
472
|
# figure out which of the values go where
|
480
473
|
features_labels = defaultdict(list)
|
481
|
-
|
474
|
+
_feature_values = []
|
482
475
|
not_validated_values = []
|
483
476
|
for key, value in features_values.items():
|
484
477
|
feature = model.filter(name=key).one()
|
@@ -508,7 +501,7 @@ def _add_values(
|
|
508
501
|
feature_value = value_model.filter(**filter_kwargs).one_or_none()
|
509
502
|
if feature_value is None:
|
510
503
|
feature_value = value_model(**filter_kwargs)
|
511
|
-
|
504
|
+
_feature_values.append(feature_value)
|
512
505
|
else:
|
513
506
|
if isinstance(value, Record) or (
|
514
507
|
isinstance(value, Iterable) and isinstance(next(iter(value)), Record)
|
@@ -578,7 +571,7 @@ def _add_values(
|
|
578
571
|
except Exception:
|
579
572
|
save(links, ignore_conflicts=True)
|
580
573
|
# now deal with links that were previously saved without a feature_id
|
581
|
-
|
574
|
+
links_saved = LinkORM.filter(
|
582
575
|
**{
|
583
576
|
"artifact_id": self._host.id,
|
584
577
|
f"{field_name}__in": [
|
@@ -586,7 +579,7 @@ def _add_values(
|
|
586
579
|
],
|
587
580
|
}
|
588
581
|
)
|
589
|
-
for link in
|
582
|
+
for link in links_saved.all():
|
590
583
|
# TODO: also check for inconsistent features
|
591
584
|
if link.feature_id is None:
|
592
585
|
link.feature_id = [
|
@@ -595,13 +588,13 @@ def _add_values(
|
|
595
588
|
if l.id == getattr(link, field_name)
|
596
589
|
][0]
|
597
590
|
link.save()
|
598
|
-
if
|
599
|
-
save(
|
591
|
+
if _feature_values:
|
592
|
+
save(_feature_values)
|
600
593
|
if is_param:
|
601
|
-
LinkORM = self._host.
|
594
|
+
LinkORM = self._host._param_values.through
|
602
595
|
valuefield_id = "paramvalue_id"
|
603
596
|
else:
|
604
|
-
LinkORM = self._host.
|
597
|
+
LinkORM = self._host._feature_values.through
|
605
598
|
valuefield_id = "featurevalue_id"
|
606
599
|
links = [
|
607
600
|
LinkORM(
|
@@ -610,7 +603,7 @@ def _add_values(
|
|
610
603
|
valuefield_id: feature_value.id,
|
611
604
|
}
|
612
605
|
)
|
613
|
-
for feature_value in
|
606
|
+
for feature_value in _feature_values
|
614
607
|
]
|
615
608
|
# a link might already exist, to avoid raising a unique constraint
|
616
609
|
# error, ignore_conflicts
|
@@ -683,10 +676,10 @@ def _add_set_from_df(
|
|
683
676
|
):
|
684
677
|
"""Add feature set corresponding to column names of DataFrame."""
|
685
678
|
if isinstance(self._host, Artifact):
|
686
|
-
assert self._host.
|
679
|
+
assert self._host._accessor == "DataFrame" # noqa: S101
|
687
680
|
else:
|
688
681
|
# Collection
|
689
|
-
assert self._host.artifact.
|
682
|
+
assert self._host.artifact._accessor == "DataFrame" # noqa: S101
|
690
683
|
|
691
684
|
# parse and register features
|
692
685
|
registry = field.field.model
|
@@ -714,7 +707,7 @@ def _add_set_from_anndata(
|
|
714
707
|
):
|
715
708
|
"""Add features from AnnData."""
|
716
709
|
if isinstance(self._host, Artifact):
|
717
|
-
assert self._host.
|
710
|
+
assert self._host._accessor == "AnnData" # noqa: S101
|
718
711
|
else:
|
719
712
|
raise NotImplementedError()
|
720
713
|
|
@@ -744,16 +737,16 @@ def _add_set_from_mudata(
|
|
744
737
|
if obs_fields is None:
|
745
738
|
obs_fields = {}
|
746
739
|
if isinstance(self._host, Artifact):
|
747
|
-
assert self._host.
|
740
|
+
assert self._host._accessor == "MuData" # noqa: S101
|
748
741
|
else:
|
749
742
|
raise NotImplementedError()
|
750
743
|
|
751
744
|
# parse and register features
|
752
745
|
mdata = self._host.load()
|
753
746
|
feature_sets = {}
|
754
|
-
obs_features =
|
747
|
+
obs_features = Feature.from_values(mdata.obs.columns)
|
755
748
|
if len(obs_features) > 0:
|
756
|
-
feature_sets["obs"] = FeatureSet(features=
|
749
|
+
feature_sets["obs"] = FeatureSet(features=obs_features)
|
757
750
|
for modality, field in var_fields.items():
|
758
751
|
modality_fs = parse_feature_sets_from_anndata(
|
759
752
|
mdata[modality],
|
@@ -765,8 +758,20 @@ def _add_set_from_mudata(
|
|
765
758
|
for k, v in modality_fs.items():
|
766
759
|
feature_sets[f"['{modality}'].{k}"] = v
|
767
760
|
|
761
|
+
def unify_feature_sets_by_hash(feature_sets):
|
762
|
+
unique_values = {}
|
763
|
+
|
764
|
+
for key, value in feature_sets.items():
|
765
|
+
value_hash = value.hash # Assuming each value has a .hash attribute
|
766
|
+
if value_hash in unique_values:
|
767
|
+
feature_sets[key] = unique_values[value_hash]
|
768
|
+
else:
|
769
|
+
unique_values[value_hash] = value
|
770
|
+
|
771
|
+
return feature_sets
|
772
|
+
|
768
773
|
# link feature sets
|
769
|
-
self._host._feature_sets = feature_sets
|
774
|
+
self._host._feature_sets = unify_feature_sets_by_hash(feature_sets)
|
770
775
|
self._host.save()
|
771
776
|
|
772
777
|
|
@@ -781,17 +786,12 @@ def _add_from(self, data: HasFeatures):
|
|
781
786
|
registry = members[0].__class__
|
782
787
|
# note here the features are transferred based on an unique field
|
783
788
|
field = REGISTRY_UNIQUE_FIELD.get(registry.__name__.lower(), "uid")
|
784
|
-
|
785
|
-
|
786
|
-
field = "ontology_id"
|
787
|
-
elif hasattr(registry, "ensembl_gene_id"):
|
788
|
-
field = "ensembl_gene_id"
|
789
|
-
elif hasattr(registry, "uniprotkb_id"):
|
790
|
-
field = "uniprotkb_id"
|
789
|
+
if hasattr(registry, "_ontology_id_field"):
|
790
|
+
field = registry._ontology_id_field
|
791
791
|
# this will be e.g. be a list of ontology_ids or uids
|
792
792
|
member_uids = list(members.values_list(field, flat=True))
|
793
793
|
# create records from ontology_id
|
794
|
-
if
|
794
|
+
if hasattr(registry, "_ontology_id_field") and len(member_uids) > 0:
|
795
795
|
# create from bionty
|
796
796
|
save(registry.from_values(member_uids, field=field))
|
797
797
|
validated = registry.validate(member_uids, field=field, mute=True)
|
@@ -816,7 +816,7 @@ def _add_from(self, data: HasFeatures):
|
|
816
816
|
member_uids, field=getattr(registry, field)
|
817
817
|
)
|
818
818
|
if feature_set_self is None:
|
819
|
-
if hasattr(registry, "
|
819
|
+
if hasattr(registry, "organism_id"):
|
820
820
|
logger.warning(
|
821
821
|
f"FeatureSet is not transferred, check if organism is set correctly: {feature_set}"
|
822
822
|
)
|
lamindb/core/_label_manager.py
CHANGED
@@ -4,13 +4,13 @@ from collections import defaultdict
|
|
4
4
|
from typing import TYPE_CHECKING, Dict
|
5
5
|
|
6
6
|
import numpy as np
|
7
|
-
from lamin_utils import colors
|
7
|
+
from lamin_utils import colors
|
8
8
|
from lnschema_core.models import Feature
|
9
9
|
|
10
10
|
from lamindb._from_values import _print_values
|
11
11
|
from lamindb._record import (
|
12
12
|
REGISTRY_UNIQUE_FIELD,
|
13
|
-
|
13
|
+
get_name_field,
|
14
14
|
transfer_fk_to_default_db_bulk,
|
15
15
|
transfer_to_default_db,
|
16
16
|
)
|
@@ -28,17 +28,19 @@ if TYPE_CHECKING:
|
|
28
28
|
def get_labels_as_dict(self: HasFeatures, links: bool = False):
|
29
29
|
exclude_set = {
|
30
30
|
"feature_sets",
|
31
|
-
"
|
32
|
-
"
|
31
|
+
"artifacts",
|
32
|
+
"input_of_runs",
|
33
33
|
"collections",
|
34
|
-
"
|
34
|
+
"_source_code_artifact_of",
|
35
35
|
"report_of",
|
36
36
|
"environment_of",
|
37
|
-
"
|
38
|
-
"
|
39
|
-
"
|
37
|
+
"links_collection",
|
38
|
+
"links_artifact",
|
39
|
+
"links_feature_set",
|
40
40
|
"previous_runs",
|
41
|
-
"
|
41
|
+
"_feature_values",
|
42
|
+
"_lnschema_core_collection__actions_+",
|
43
|
+
"_actions",
|
42
44
|
}
|
43
45
|
labels = {} # type: ignore
|
44
46
|
if self.id is None:
|
@@ -62,7 +64,7 @@ def print_labels(self: HasFeatures, field: str = "name", print_types: bool = Fal
|
|
62
64
|
try:
|
63
65
|
labels_list = list(labels.values_list(field, flat=True))
|
64
66
|
if len(labels_list) > 0:
|
65
|
-
|
67
|
+
get_name_field(labels)
|
66
68
|
print_values = _print_values(labels_list, n=10)
|
67
69
|
type_str = f": {related_model}" if print_types else ""
|
68
70
|
labels_msg += f" .{related_name}{type_str} = {print_values}\n"
|
@@ -84,18 +86,14 @@ def validate_labels(labels: QuerySet | list | dict):
|
|
84
86
|
return [], []
|
85
87
|
registry = labels[0].__class__
|
86
88
|
field = REGISTRY_UNIQUE_FIELD.get(registry.__name__.lower(), "uid")
|
87
|
-
if hasattr(registry, "
|
88
|
-
field =
|
89
|
-
elif hasattr(registry, "ensembl_gene_id"):
|
90
|
-
field = "ensembl_gene_id"
|
91
|
-
elif hasattr(registry, "uniprotkb_id"):
|
92
|
-
field = "uniprotkb_id"
|
89
|
+
if hasattr(registry, "_ontology_id_field"):
|
90
|
+
field = registry._ontology_id_field
|
93
91
|
# if the field value is None, use uid field
|
94
92
|
label_uids = np.array(
|
95
93
|
[getattr(label, field) for label in labels if label is not None]
|
96
94
|
)
|
97
95
|
# save labels from ontology_ids
|
98
|
-
if
|
96
|
+
if hasattr(registry, "_ontology_id_field") and len(label_uids) > 0:
|
99
97
|
try:
|
100
98
|
save(registry.from_values(label_uids, field=field))
|
101
99
|
except Exception: # noqa S110
|
@@ -201,10 +199,10 @@ class LabelManager:
|
|
201
199
|
transfer_fk_to_default_db_bulk(new_labels, using_key)
|
202
200
|
for label in labels:
|
203
201
|
# if the link table doesn't follow this convention, we'll ignore it
|
204
|
-
if not hasattr(label, f"{data_name_lower}
|
202
|
+
if not hasattr(label, f"links_{data_name_lower}"):
|
205
203
|
key = None
|
206
204
|
else:
|
207
|
-
link = getattr(label, f"{data_name_lower}
|
205
|
+
link = getattr(label, f"links_{data_name_lower}").get(
|
208
206
|
**{f"{data_name_lower}_id": data.id}
|
209
207
|
)
|
210
208
|
if link.feature is not None:
|
lamindb/core/_run_context.py
CHANGED
@@ -11,7 +11,6 @@ from lamin_utils import logger
|
|
11
11
|
from lamindb_setup.core.hashing import hash_file
|
12
12
|
from lnschema_core import Run, Transform, ids
|
13
13
|
from lnschema_core.models import Param, ParamValue, RunParamValue
|
14
|
-
from lnschema_core.types import TransformType
|
15
14
|
from lnschema_core.users import current_user_id
|
16
15
|
|
17
16
|
from ._settings import settings
|
@@ -27,6 +26,7 @@ from .versioning import bump_version as bump_version_function
|
|
27
26
|
|
28
27
|
if TYPE_CHECKING:
|
29
28
|
from lamindb_setup.core.types import UPathStr
|
29
|
+
from lnschema_core.types import TransformType
|
30
30
|
|
31
31
|
is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
|
32
32
|
|
@@ -279,14 +279,14 @@ class run_context:
|
|
279
279
|
).one_or_none()
|
280
280
|
if is_run_from_ipython:
|
281
281
|
key, name = cls._track_notebook(path=path)
|
282
|
-
transform_type =
|
282
|
+
transform_type = "notebook"
|
283
283
|
transform_ref = None
|
284
284
|
transform_ref_type = None
|
285
285
|
else:
|
286
286
|
(name, key, transform_ref, transform_ref_type) = cls._track_script(
|
287
287
|
path=path
|
288
288
|
)
|
289
|
-
transform_type =
|
289
|
+
transform_type = "script"
|
290
290
|
# overwrite whatever is auto-detected in the notebook or script
|
291
291
|
if transform_settings.name is not None:
|
292
292
|
name = transform_settings.name
|
@@ -323,9 +323,7 @@ class run_context:
|
|
323
323
|
cls.transform = transform_exists
|
324
324
|
|
325
325
|
if new_run is None: # for notebooks, default to loading latest runs
|
326
|
-
new_run =
|
327
|
-
False if cls.transform.type == TransformType.notebook.value else True
|
328
|
-
) # type: ignore
|
326
|
+
new_run = False if cls.transform.type == "notebook" else True # type: ignore
|
329
327
|
|
330
328
|
run = None
|
331
329
|
from lamindb._run import Run
|
@@ -479,7 +477,7 @@ class run_context:
|
|
479
477
|
transform.save()
|
480
478
|
logger.important(f"updated: {transform}")
|
481
479
|
# check whether transform source code was already saved
|
482
|
-
if transform.
|
480
|
+
if transform._source_code_artifact_id is not None:
|
483
481
|
response = None
|
484
482
|
if is_run_from_ipython:
|
485
483
|
if os.getenv("LAMIN_TESTING") is None:
|
@@ -491,7 +489,7 @@ class run_context:
|
|
491
489
|
response = "y"
|
492
490
|
else:
|
493
491
|
hash, _ = hash_file(cls.path) # ignore hash_type for now
|
494
|
-
if hash != transform.
|
492
|
+
if hash != transform._source_code_artifact.hash:
|
495
493
|
# only if hashes don't match, we need user input
|
496
494
|
if os.getenv("LAMIN_TESTING") is None:
|
497
495
|
response = input(
|
lamindb/core/datasets/_core.py
CHANGED
@@ -83,7 +83,7 @@ def file_tsv_rnaseq_nfcore_salmon_merged_gene_counts(
|
|
83
83
|
ln.settings.verbosity = "error"
|
84
84
|
ln.Feature(name="assay", dtype=[bt.ExperimentalFactor]).save()
|
85
85
|
ln.Feature(name="organism", dtype=[bt.Organism]).save()
|
86
|
-
bt.ExperimentalFactor.
|
86
|
+
bt.ExperimentalFactor.from_source(ontology_id="EFO:0008896").save()
|
87
87
|
ln.settings.verbosity = verbosity
|
88
88
|
|
89
89
|
return Path(filepath)
|
@@ -186,16 +186,16 @@ def anndata_mouse_sc_lymph_node(
|
|
186
186
|
verbosity = ln.settings.verbosity
|
187
187
|
ln.settings.verbosity = "error"
|
188
188
|
# strain
|
189
|
-
bt.ExperimentalFactor.
|
189
|
+
bt.ExperimentalFactor.from_source(ontology_id="EFO:0004472").save()
|
190
190
|
# developmental stage
|
191
|
-
bt.ExperimentalFactor.
|
191
|
+
bt.ExperimentalFactor.from_source(ontology_id="EFO:0001272").save()
|
192
192
|
# tissue
|
193
|
-
bt.Tissue.
|
193
|
+
bt.Tissue.from_source(ontology_id="UBERON:0001542").save()
|
194
194
|
# cell types
|
195
195
|
ln.save(bt.CellType.from_values(["CL:0000115", "CL:0000738"], "ontology_id"))
|
196
196
|
# assays
|
197
197
|
ln.Feature(name="assay", dtype=[bt.ExperimentalFactor]).save()
|
198
|
-
bt.ExperimentalFactor.
|
198
|
+
bt.ExperimentalFactor.from_source(ontology_id="EFO:0008913").save()
|
199
199
|
# genes
|
200
200
|
validated = bt.Gene.public(organism="mouse").validate(
|
201
201
|
adata.var.index, field="ensembl_gene_id"
|
@@ -323,7 +323,7 @@ def anndata_human_immune_cells(
|
|
323
323
|
ln.Feature(name="tissue", dtype=[bt.Tissue]).save()
|
324
324
|
ln.Feature(name="organism", dtype=[bt.Organism]).save()
|
325
325
|
ln.Feature(name="donor", dtype=[ln.ULabel]).save()
|
326
|
-
bt.ExperimentalFactor.
|
326
|
+
bt.ExperimentalFactor.from_source(ontology_id="EFO:0008913").save()
|
327
327
|
ln.save([ln.ULabel(name=name) for name in adata.obs.donor.unique()])
|
328
328
|
ln.settings.verbosity = verbosity
|
329
329
|
return adata
|
@@ -332,7 +332,7 @@ def anndata_human_immune_cells(
|
|
332
332
|
def anndata_with_obs() -> ad.AnnData:
|
333
333
|
"""Create a mini anndata with cell_type, disease and tissue."""
|
334
334
|
import anndata as ad
|
335
|
-
import bionty_base
|
335
|
+
import bionty.base as bionty_base
|
336
336
|
|
337
337
|
celltypes = ["T cell", "hematopoietic stem cell", "hepatocyte", "my new cell type"]
|
338
338
|
celltype_ids = ["CL:0000084", "CL:0000037", "CL:0000182", ""]
|
lamindb/core/exceptions.py
CHANGED
@@ -10,6 +10,7 @@ The registry base class:
|
|
10
10
|
NoTitleError
|
11
11
|
MissingTransformSettings
|
12
12
|
UpdateTransformSettings
|
13
|
+
IntegrityError
|
13
14
|
|
14
15
|
"""
|
15
16
|
|
@@ -25,6 +26,16 @@ class ValidationError(SystemExit):
|
|
25
26
|
# -------------------------------------------------------------------------------------
|
26
27
|
|
27
28
|
|
29
|
+
class IntegrityError(Exception):
|
30
|
+
"""Integrity error.
|
31
|
+
|
32
|
+
For instance, it's not allowed to delete artifacts outside managed storage
|
33
|
+
locations.
|
34
|
+
"""
|
35
|
+
|
36
|
+
pass
|
37
|
+
|
38
|
+
|
28
39
|
class NotebookNotSavedError(Exception):
|
29
40
|
"""Notebook wasn't saved."""
|
30
41
|
|
lamindb/core/schema.py
CHANGED
@@ -4,16 +4,16 @@ from django.db.models import ManyToManyField
|
|
4
4
|
from lnschema_core.models import Feature, FeatureSet, LinkORM, Record
|
5
5
|
|
6
6
|
|
7
|
-
def dict_schema_name_to_model_name(
|
7
|
+
def dict_schema_name_to_model_name(registry: type[Record]) -> dict[str, Record]:
|
8
8
|
d: dict = {
|
9
9
|
i.related_model.__get_name_with_schema__(): i.related_model
|
10
|
-
for i in
|
10
|
+
for i in registry._meta.related_objects
|
11
11
|
if i.related_name is not None
|
12
12
|
}
|
13
13
|
d.update(
|
14
14
|
{
|
15
15
|
i.related_model.__get_name_with_schema__(): i.related_model
|
16
|
-
for i in
|
16
|
+
for i in registry._meta.many_to_many
|
17
17
|
if i.name is not None
|
18
18
|
}
|
19
19
|
)
|
@@ -21,12 +21,12 @@ def dict_schema_name_to_model_name(orm: type[Record]) -> dict[str, Record]:
|
|
21
21
|
|
22
22
|
|
23
23
|
def dict_related_model_to_related_name(
|
24
|
-
|
24
|
+
registry: type[Record], links: bool = False
|
25
25
|
) -> dict[str, str]:
|
26
26
|
def include(model: Record):
|
27
27
|
return not links != issubclass(model, LinkORM)
|
28
28
|
|
29
|
-
related_objects =
|
29
|
+
related_objects = registry._meta.related_objects + registry._meta.many_to_many
|
30
30
|
d: dict = {
|
31
31
|
record.related_model.__get_name_with_schema__(): (
|
32
32
|
record.related_name
|
lamindb/core/storage/__init__.py
CHANGED
@@ -1,4 +1,13 @@
|
|
1
|
-
"""Storage
|
1
|
+
"""Storage API.
|
2
|
+
|
3
|
+
Valid suffixes.
|
4
|
+
|
5
|
+
.. autosummary::
|
6
|
+
:toctree: .
|
7
|
+
|
8
|
+
VALID_SUFFIXES
|
9
|
+
|
10
|
+
Array accessors.
|
2
11
|
|
3
12
|
.. autosummary::
|
4
13
|
:toctree: .
|
@@ -6,10 +15,11 @@
|
|
6
15
|
AnnDataAccessor
|
7
16
|
BackedAccessor
|
8
17
|
"""
|
18
|
+
|
9
19
|
from lamindb_setup.core.upath import LocalPathClasses, UPath, infer_filesystem
|
10
20
|
|
11
21
|
from ._anndata_sizes import size_adata
|
12
22
|
from ._backed_access import AnnDataAccessor, BackedAccessor
|
13
|
-
from ._valid_suffixes import
|
23
|
+
from ._valid_suffixes import VALID_SUFFIXES
|
14
24
|
from .objects import infer_suffix, write_to_disk
|
15
25
|
from .paths import delete_storage, load_to_memory
|