lamindb 0.69.1__py3-none-any.whl → 0.69.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +2 -2
- lamindb/_artifact.py +0 -2
- lamindb/_collection.py +16 -4
- lamindb/_feature.py +11 -9
- lamindb/_finish.py +194 -11
- lamindb/_query_set.py +3 -1
- lamindb/_run.py +3 -1
- lamindb/_save.py +34 -21
- lamindb/core/_data.py +3 -0
- lamindb/core/_feature_manager.py +4 -3
- lamindb/core/_run_context.py +17 -5
- lamindb/core/storage/_backed_access.py +48 -11
- lamindb/core/storage/file.py +2 -7
- lamindb/validation/_anndata_validator.py +19 -32
- lamindb/validation/_lookup.py +9 -5
- lamindb/validation/_register.py +120 -69
- lamindb/validation/_validate.py +47 -39
- lamindb/validation/_validator.py +80 -64
- {lamindb-0.69.1.dist-info → lamindb-0.69.2.dist-info}/METADATA +6 -6
- {lamindb-0.69.1.dist-info → lamindb-0.69.2.dist-info}/RECORD +22 -22
- {lamindb-0.69.1.dist-info → lamindb-0.69.2.dist-info}/LICENSE +0 -0
- {lamindb-0.69.1.dist-info → lamindb-0.69.2.dist-info}/WHEEL +0 -0
lamindb/core/storage/file.py
CHANGED
@@ -105,10 +105,7 @@ def read_adata_h5ad(filepath, **kwargs) -> ad.AnnData:
|
|
105
105
|
|
106
106
|
|
107
107
|
def store_artifact(localpath: UPathStr, storagepath: UPath) -> None:
|
108
|
-
"""Store directory or file to configured storage location.
|
109
|
-
|
110
|
-
Returns size in bytes.
|
111
|
-
"""
|
108
|
+
"""Store directory or file to configured storage location."""
|
112
109
|
localpath = Path(localpath)
|
113
110
|
if not isinstance(storagepath, LocalPathClasses):
|
114
111
|
# this uploads files and directories
|
@@ -200,9 +197,7 @@ def load_to_memory(filepath: UPathStr, stream: bool = False, **kwargs):
|
|
200
197
|
"""
|
201
198
|
filepath = create_path(filepath)
|
202
199
|
|
203
|
-
if filepath.suffix in
|
204
|
-
stream = True
|
205
|
-
elif filepath.suffix != ".h5ad":
|
200
|
+
if filepath.suffix not in {".h5ad", ".zarr", ".zrad"}:
|
206
201
|
stream = False
|
207
202
|
|
208
203
|
if not stream:
|
@@ -9,7 +9,7 @@ import lamindb as ln
|
|
9
9
|
from ._lookup import Lookup
|
10
10
|
from ._register import register_artifact, register_labels
|
11
11
|
from ._validate import validate_anndata
|
12
|
-
from ._validator import Validator
|
12
|
+
from ._validator import ValidationError, Validator
|
13
13
|
|
14
14
|
|
15
15
|
class AnnDataValidator(Validator):
|
@@ -34,6 +34,7 @@ class AnnDataValidator(Validator):
|
|
34
34
|
**kwargs,
|
35
35
|
) -> None:
|
36
36
|
self._adata = adata
|
37
|
+
self._var_field = var_field
|
37
38
|
super().__init__(
|
38
39
|
df=self._adata.obs,
|
39
40
|
fields=obs_fields,
|
@@ -42,8 +43,7 @@ class AnnDataValidator(Validator):
|
|
42
43
|
**kwargs,
|
43
44
|
)
|
44
45
|
self._obs_fields = obs_fields
|
45
|
-
self.
|
46
|
-
self._fields = {"variables": var_field, **obs_fields}
|
46
|
+
self._register_variables()
|
47
47
|
|
48
48
|
@property
|
49
49
|
def var_field(self) -> FieldAttr:
|
@@ -65,7 +65,7 @@ class AnnDataValidator(Validator):
|
|
65
65
|
|
66
66
|
def _register_variables(self, validated_only: bool = True, **kwargs):
|
67
67
|
"""Register variable records."""
|
68
|
-
self.
|
68
|
+
self._kwargs.update(kwargs)
|
69
69
|
register_labels(
|
70
70
|
values=self._adata.var_names,
|
71
71
|
field=self.var_field,
|
@@ -75,56 +75,43 @@ class AnnDataValidator(Validator):
|
|
75
75
|
kwargs=self._kwargs,
|
76
76
|
)
|
77
77
|
|
78
|
-
def register_labels(self, feature: str, validated_only: bool = True, **kwargs):
|
79
|
-
"""Register labels for the given feature.
|
80
|
-
|
81
|
-
Args:
|
82
|
-
feature: The feature to register labels for.
|
83
|
-
if "variables", register variables.
|
84
|
-
validated_only: If True, only register validated labels.
|
85
|
-
**kwargs: Additional metadata needed.
|
86
|
-
"""
|
87
|
-
if feature == "variables":
|
88
|
-
self._register_variables(validated_only=validated_only, **kwargs)
|
89
|
-
else:
|
90
|
-
super().register_labels(feature, validated_only, **kwargs)
|
91
|
-
|
92
78
|
def validate(self, **kwargs) -> bool:
|
93
79
|
"""Validate variables and categorical observations."""
|
94
|
-
self.
|
80
|
+
self._kwargs.update(kwargs)
|
95
81
|
self._validated = validate_anndata(
|
96
82
|
self._adata,
|
97
83
|
var_field=self.var_field,
|
98
84
|
obs_fields=self.obs_fields,
|
99
85
|
**self._kwargs,
|
100
86
|
)
|
101
|
-
|
102
87
|
return self._validated
|
103
88
|
|
104
|
-
def
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
89
|
+
def register_labels(self, feature: str, validated_only: bool = True, **kwargs):
|
90
|
+
"""Register labels for a feature."""
|
91
|
+
if feature == "variables":
|
92
|
+
self._register_variables(validated_only=validated_only, **kwargs)
|
93
|
+
else:
|
94
|
+
super().register_labels(feature, validated_only, **kwargs)
|
95
|
+
|
96
|
+
def register_artifact(self, description: str, **kwargs) -> ln.Artifact:
|
109
97
|
"""Register the validated AnnData and metadata.
|
110
98
|
|
111
99
|
Args:
|
112
|
-
description:
|
113
|
-
**kwargs:
|
100
|
+
description: Description of the AnnData object.
|
101
|
+
**kwargs: Object level metadata.
|
114
102
|
|
115
103
|
Returns:
|
116
|
-
|
104
|
+
A registered artifact record.
|
117
105
|
"""
|
118
|
-
self.
|
106
|
+
self._kwargs.update(kwargs)
|
119
107
|
if not self._validated:
|
120
|
-
raise
|
108
|
+
raise ValidationError("Please run `validate()` first!")
|
121
109
|
|
122
110
|
self._artifact = register_artifact(
|
123
111
|
self._adata,
|
124
112
|
description=description,
|
125
|
-
|
113
|
+
feature_field=self.var_field,
|
126
114
|
fields=self.obs_fields,
|
127
115
|
**self._kwargs,
|
128
116
|
)
|
129
|
-
|
130
117
|
return self._artifact
|
lamindb/validation/_lookup.py
CHANGED
@@ -5,7 +5,7 @@ from lnschema_core.types import FieldAttr
|
|
5
5
|
|
6
6
|
import lamindb as ln
|
7
7
|
|
8
|
-
from ._validate import
|
8
|
+
from ._validate import get_registry_instance
|
9
9
|
|
10
10
|
|
11
11
|
class Lookup:
|
@@ -15,9 +15,9 @@ class Lookup:
|
|
15
15
|
self, fields: Dict[str, FieldAttr], using: Optional[str] = None
|
16
16
|
) -> None:
|
17
17
|
self._fields = fields
|
18
|
-
self._using = using
|
18
|
+
self._using = None if using == "default" else using
|
19
19
|
self._using_name = using or ln.setup.settings.instance.slug
|
20
|
-
logger.
|
20
|
+
logger.debug(f"Lookup objects from the {colors.italic(self._using_name)}")
|
21
21
|
|
22
22
|
def __getitem__(self, name):
|
23
23
|
if name in self._fields:
|
@@ -25,7 +25,7 @@ class Lookup:
|
|
25
25
|
if self._using == "public":
|
26
26
|
return registry.public().lookup()
|
27
27
|
else:
|
28
|
-
return
|
28
|
+
return get_registry_instance(registry, self._using).lookup()
|
29
29
|
raise AttributeError(
|
30
30
|
f"'{self.__class__.__name__}' object has no attribute '{name}'"
|
31
31
|
)
|
@@ -33,6 +33,10 @@ class Lookup:
|
|
33
33
|
def __repr__(self) -> str:
|
34
34
|
if len(self._fields) > 0:
|
35
35
|
fields = "\n ".join([str([key]) for key in self._fields.keys()])
|
36
|
-
return
|
36
|
+
return (
|
37
|
+
f"Lookup objects from the {colors.italic(self._using_name)}:\n {colors.green(fields)}\n\n"
|
38
|
+
"Example:\n → categories = validator.lookup().['cell_type']\n"
|
39
|
+
" → categories.alveolar_type_1_fibroblast_cell"
|
40
|
+
)
|
37
41
|
else:
|
38
42
|
return colors.warning("No fields are found!")
|
lamindb/validation/_register.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Dict, List, Optional, Union
|
1
|
+
from typing import Dict, List, Optional, Tuple, Union
|
2
2
|
|
3
3
|
import anndata as ad
|
4
4
|
import pandas as pd
|
@@ -7,24 +7,31 @@ from lnschema_core.types import FieldAttr
|
|
7
7
|
|
8
8
|
import lamindb as ln
|
9
9
|
|
10
|
-
from ._validate import
|
10
|
+
from ._validate import (
|
11
|
+
check_registry_organism,
|
12
|
+
get_registry_instance,
|
13
|
+
standardize_and_inspect,
|
14
|
+
)
|
11
15
|
|
12
16
|
|
13
17
|
def register_artifact(
|
14
18
|
data: Union[pd.DataFrame, ad.AnnData],
|
15
19
|
description: str,
|
16
20
|
fields: Dict[str, FieldAttr],
|
17
|
-
|
21
|
+
feature_field: FieldAttr,
|
18
22
|
**kwargs,
|
19
|
-
):
|
20
|
-
"""
|
23
|
+
) -> ln.Artifact:
|
24
|
+
"""Register all metadata with an Artifact.
|
21
25
|
|
22
26
|
Args:
|
23
27
|
data: The DataFrame or AnnData object to register.
|
24
28
|
description: A description of the artifact.
|
25
29
|
fields: A dictionary mapping obs_column to registry_field.
|
26
|
-
|
30
|
+
feature_field: The registry field to validate variables index against.
|
27
31
|
kwargs: Additional keyword arguments to pass to the registry model.
|
32
|
+
|
33
|
+
Returns:
|
34
|
+
The registered Artifact.
|
28
35
|
"""
|
29
36
|
if isinstance(data, ad.AnnData):
|
30
37
|
artifact = ln.Artifact.from_anndata(data, description=description)
|
@@ -35,30 +42,34 @@ def register_artifact(
|
|
35
42
|
raise ValueError("data must be a DataFrame or AnnData object")
|
36
43
|
artifact.save()
|
37
44
|
|
38
|
-
|
45
|
+
feature_kwargs: Dict = {}
|
46
|
+
organism = check_registry_organism(
|
47
|
+
feature_field.field.model, kwargs.pop("organism", None)
|
48
|
+
)
|
49
|
+
if organism is not None:
|
50
|
+
feature_kwargs["organism"] = organism
|
39
51
|
|
40
52
|
if isinstance(data, ad.AnnData):
|
41
|
-
artifact.features.add_from_anndata(var_field=
|
53
|
+
artifact.features.add_from_anndata(var_field=feature_field, **feature_kwargs)
|
42
54
|
else:
|
43
|
-
artifact.features.add_from_df()
|
55
|
+
artifact.features.add_from_df(field=feature_field, **feature_kwargs)
|
44
56
|
|
45
|
-
# link validated obs metadata
|
46
57
|
features = ln.Feature.lookup().dict()
|
47
58
|
for feature_name, field in fields.items():
|
48
59
|
feature = features.get(feature_name)
|
49
60
|
registry = field.field.model
|
50
61
|
filter_kwargs = kwargs.copy()
|
51
|
-
|
62
|
+
organism = check_registry_organism(registry, organism)
|
63
|
+
if organism is not None:
|
52
64
|
filter_kwargs["organism"] = organism
|
53
65
|
df = data.obs if isinstance(data, ad.AnnData) else data
|
54
66
|
labels = registry.from_values(df[feature_name], field=field, **filter_kwargs)
|
55
67
|
artifact.labels.add(labels, feature)
|
56
68
|
|
57
|
-
|
69
|
+
slug = ln.setup.settings.instance.slug
|
70
|
+
logger.success(f"registered artifact in {colors.italic(slug)}")
|
58
71
|
if ln.setup.settings.instance.is_remote:
|
59
|
-
logger.
|
60
|
-
f"🔗 https://lamin.ai/{ln.setup.settings.instance.slug}/artifact/{artifact.uid}"
|
61
|
-
)
|
72
|
+
logger.info(f"🔗 https://lamin.ai/{slug}/artifact/{artifact.uid}")
|
62
73
|
|
63
74
|
return artifact
|
64
75
|
|
@@ -69,8 +80,9 @@ def register_labels(
|
|
69
80
|
feature_name: str,
|
70
81
|
using: Optional[str] = None,
|
71
82
|
validated_only: bool = True,
|
72
|
-
kwargs: Dict = None,
|
73
|
-
|
83
|
+
kwargs: Optional[Dict] = None,
|
84
|
+
df: Optional[pd.DataFrame] = None,
|
85
|
+
) -> None:
|
74
86
|
"""Register features or labels records in the default instance from the using instance.
|
75
87
|
|
76
88
|
Args:
|
@@ -80,28 +92,29 @@ def register_labels(
|
|
80
92
|
using: The name of the instance from which to transfer labels (if applicable).
|
81
93
|
validated_only: If True, only register validated labels.
|
82
94
|
kwargs: Additional keyword arguments to pass to the registry model.
|
95
|
+
df: A DataFrame to register labels from.
|
83
96
|
"""
|
84
|
-
if kwargs is None
|
85
|
-
kwargs = {}
|
97
|
+
filter_kwargs = {} if kwargs is None else kwargs.copy()
|
86
98
|
registry = field.field.model
|
99
|
+
if registry == ln.ULabel:
|
100
|
+
validated_only = False
|
101
|
+
|
102
|
+
organism = check_registry_organism(registry, filter_kwargs.pop("organism", None))
|
103
|
+
if organism is not None:
|
104
|
+
filter_kwargs["organism"] = organism
|
87
105
|
|
88
|
-
check_if_registry_needs_organism(registry, kwargs.get("organism"))
|
89
106
|
verbosity = ln.settings.verbosity
|
90
107
|
try:
|
91
108
|
ln.settings.verbosity = "error"
|
92
|
-
|
93
|
-
|
94
|
-
inspect_result_current = registry.inspect(
|
95
|
-
values, field=field, mute=True, **kwargs
|
109
|
+
inspect_result_current = standardize_and_inspect(
|
110
|
+
values=values, field=field, registry=registry, **filter_kwargs
|
96
111
|
)
|
97
|
-
if
|
98
|
-
# everything is validated in the current instance, no need to register
|
112
|
+
if not inspect_result_current.non_validated:
|
99
113
|
ln.settings.verbosity = verbosity
|
100
114
|
return
|
101
115
|
|
102
116
|
labels_registered: Dict = {"from public": [], "without reference": []}
|
103
117
|
|
104
|
-
# register labels from the using instance
|
105
118
|
(
|
106
119
|
labels_registered[f"from {using}"],
|
107
120
|
non_validated_labels,
|
@@ -109,71 +122,92 @@ def register_labels(
|
|
109
122
|
inspect_result_current.non_validated,
|
110
123
|
field=field,
|
111
124
|
using=using,
|
112
|
-
kwargs=
|
125
|
+
kwargs=filter_kwargs,
|
113
126
|
)
|
114
127
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
if len(non_validated_labels) > 0
|
128
|
+
public_records = (
|
129
|
+
registry.from_values(non_validated_labels, field=field, **filter_kwargs)
|
130
|
+
if non_validated_labels
|
119
131
|
else []
|
120
132
|
)
|
121
|
-
ln.save(
|
133
|
+
ln.save(public_records)
|
122
134
|
labels_registered["from public"] = [
|
123
|
-
getattr(r, field.field.name) for r in
|
135
|
+
getattr(r, field.field.name) for r in public_records
|
124
136
|
]
|
125
137
|
labels_registered["without reference"] = [
|
126
138
|
i for i in non_validated_labels if i not in labels_registered["from public"]
|
127
139
|
]
|
140
|
+
|
128
141
|
if not validated_only:
|
129
142
|
non_validated_records = []
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
143
|
+
if df is not None and registry == ln.Feature:
|
144
|
+
non_validated_records = ln.Feature.from_df(df)
|
145
|
+
else:
|
146
|
+
if "organism" in filter_kwargs:
|
147
|
+
filter_kwargs["organism"] = _register_organism(name=organism)
|
148
|
+
for value in labels_registered["without reference"]:
|
149
|
+
filter_kwargs[field.field.name] = value
|
150
|
+
if registry == ln.Feature:
|
151
|
+
filter_kwargs["type"] = "category"
|
152
|
+
non_validated_records.append(registry(**filter_kwargs))
|
136
153
|
ln.save(non_validated_records)
|
137
154
|
|
138
|
-
# for ulabels, also register a parent label: is_{feature_name}
|
139
155
|
if registry == ln.ULabel and field.field.name == "name":
|
140
|
-
register_ulabels_with_parent(values, field)
|
156
|
+
register_ulabels_with_parent(values, field=field, feature_name=feature_name)
|
141
157
|
finally:
|
142
158
|
ln.settings.verbosity = verbosity
|
159
|
+
|
143
160
|
log_registered_labels(
|
144
|
-
labels_registered,
|
161
|
+
labels_registered,
|
162
|
+
feature_name=feature_name,
|
163
|
+
model_field=f"{registry.__name__}.{field.field.name}",
|
164
|
+
validated_only=validated_only,
|
145
165
|
)
|
146
166
|
|
147
167
|
|
148
168
|
def log_registered_labels(
|
149
|
-
labels_registered: Dict,
|
150
|
-
|
169
|
+
labels_registered: Dict,
|
170
|
+
feature_name: str,
|
171
|
+
model_field: str,
|
172
|
+
validated_only: bool = True,
|
173
|
+
) -> None:
|
151
174
|
"""Log the registered labels."""
|
175
|
+
labels_type = "features" if feature_name == "feature" else "labels"
|
176
|
+
model_field = colors.italic(model_field)
|
152
177
|
for key, labels in labels_registered.items():
|
153
|
-
if
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
178
|
+
if not labels:
|
179
|
+
continue
|
180
|
+
|
181
|
+
if key == "without reference" and validated_only:
|
182
|
+
msg = colors.yellow(
|
183
|
+
f"{len(labels)} non-validated {labels_type} are not registered with {model_field}: {labels}!"
|
184
|
+
)
|
185
|
+
lookup_print = f".lookup().['{feature_name}']"
|
186
|
+
msg += f"\n → to lookup categories, use {lookup_print}"
|
187
|
+
msg += (
|
188
|
+
f"\n → to register, run {colors.yellow('register_features(validated_only=False)')}"
|
189
|
+
if labels_type == "features"
|
190
|
+
else f"\n → to register, set {colors.yellow('validated_only=False')}"
|
191
|
+
)
|
192
|
+
logger.warning(msg)
|
193
|
+
else:
|
194
|
+
key = "" if key == "without reference" else f"{colors.green(key)} "
|
162
195
|
logger.success(
|
163
|
-
f"registered {len(labels)}
|
196
|
+
f"registered {len(labels)} {labels_type} {key}with {model_field}: {labels}"
|
164
197
|
)
|
165
198
|
|
166
199
|
|
167
|
-
def register_ulabels_with_parent(
|
200
|
+
def register_ulabels_with_parent(
|
201
|
+
values: List[str], field: FieldAttr, feature_name: str
|
202
|
+
) -> None:
|
168
203
|
"""Register a parent label for the given labels."""
|
169
204
|
registry = field.field.model
|
170
205
|
assert registry == ln.ULabel
|
171
206
|
all_records = registry.from_values(values, field=field)
|
172
|
-
is_feature = registry.filter(name=f"is_{
|
207
|
+
is_feature = registry.filter(name=f"is_{feature_name}").one_or_none()
|
173
208
|
if is_feature is None:
|
174
|
-
is_feature = registry(name=f"is_{
|
209
|
+
is_feature = registry(name=f"is_{feature_name}")
|
175
210
|
is_feature.save()
|
176
|
-
# link all labels to the parent label
|
177
211
|
is_feature.children.add(*all_records)
|
178
212
|
|
179
213
|
|
@@ -181,8 +215,8 @@ def register_labels_from_using_instance(
|
|
181
215
|
values: List[str],
|
182
216
|
field: FieldAttr,
|
183
217
|
using: Optional[str] = None,
|
184
|
-
kwargs: Dict = None,
|
185
|
-
):
|
218
|
+
kwargs: Optional[Dict] = None,
|
219
|
+
) -> Tuple[List[str], List[str]]:
|
186
220
|
"""Register features or labels records from the using instance.
|
187
221
|
|
188
222
|
Args:
|
@@ -190,20 +224,20 @@ def register_labels_from_using_instance(
|
|
190
224
|
field: The FieldAttr object representing the field for which labels are being registered.
|
191
225
|
using: The name of the instance from which to transfer labels (if applicable).
|
192
226
|
kwargs: Additional keyword arguments to pass to the registry model.
|
227
|
+
|
228
|
+
Returns:
|
229
|
+
A tuple containing the list of registered labels and the list of non-registered labels.
|
193
230
|
"""
|
194
|
-
|
195
|
-
kwargs = {}
|
231
|
+
kwargs = kwargs or {}
|
196
232
|
labels_registered = []
|
197
233
|
not_registered = values
|
234
|
+
|
198
235
|
if using is not None and using != "default":
|
199
236
|
registry = field.field.model
|
200
|
-
registry_using =
|
201
|
-
|
202
|
-
|
203
|
-
values, field=field, mute=True, **kwargs
|
237
|
+
registry_using = get_registry_instance(registry, using)
|
238
|
+
inspect_result_using = standardize_and_inspect(
|
239
|
+
values=values, field=field, registry=registry_using, **kwargs
|
204
240
|
)
|
205
|
-
# register the labels that are validated in the using instance
|
206
|
-
# TODO: filter kwargs
|
207
241
|
labels_using = registry_using.filter(
|
208
242
|
**{f"{field.field.name}__in": inspect_result_using.validated}
|
209
243
|
).all()
|
@@ -211,4 +245,21 @@ def register_labels_from_using_instance(
|
|
211
245
|
label_using.save()
|
212
246
|
labels_registered.append(getattr(label_using, field.field.name))
|
213
247
|
not_registered = inspect_result_using.non_validated
|
248
|
+
|
214
249
|
return labels_registered, not_registered
|
250
|
+
|
251
|
+
|
252
|
+
def _register_organism(name: str):
|
253
|
+
"""Register an organism record."""
|
254
|
+
import bionty as bt
|
255
|
+
|
256
|
+
organism = bt.Organism.filter(name=name).one_or_none()
|
257
|
+
if organism is None:
|
258
|
+
organism = bt.Organism.from_public(name=name)
|
259
|
+
if organism is None:
|
260
|
+
raise ValueError(
|
261
|
+
f"Organism '{name}' not found\n"
|
262
|
+
f" → please register it: bt.Organism(name='{name}').save()"
|
263
|
+
)
|
264
|
+
organism.save()
|
265
|
+
return organism
|
lamindb/validation/_validate.py
CHANGED
@@ -9,27 +9,36 @@ from lnschema_core.types import FieldAttr
|
|
9
9
|
from lamindb._from_values import _print_values
|
10
10
|
|
11
11
|
|
12
|
-
def
|
12
|
+
def get_registry_instance(registry: Registry, using: Optional[str] = None) -> Registry:
|
13
13
|
"""Get a registry instance using a specific instance."""
|
14
|
-
|
15
|
-
registry.using(using)
|
16
|
-
|
14
|
+
if using is not None and using != "default":
|
15
|
+
return registry.using(using)
|
16
|
+
return registry
|
17
17
|
|
18
18
|
|
19
|
-
def
|
20
|
-
|
19
|
+
def standardize_and_inspect(
|
20
|
+
values: Iterable[str], field: FieldAttr, registry: Registry, **kwargs
|
21
21
|
):
|
22
|
-
"""
|
22
|
+
"""Standardize and inspect values using a registry."""
|
23
|
+
if hasattr(registry, "standardize"):
|
24
|
+
values = registry.standardize(values, field=field, mute=True, **kwargs)
|
25
|
+
return registry.inspect(values, field=field, mute=True, **kwargs)
|
26
|
+
|
27
|
+
|
28
|
+
def check_registry_organism(
|
29
|
+
registry: Registry, organism: Optional[str] = None
|
30
|
+
) -> Optional[str]:
|
31
|
+
"""Check if a registry needs an organism and return the organism name."""
|
23
32
|
if hasattr(registry, "organism_id"):
|
24
|
-
|
33
|
+
import bionty as bt
|
34
|
+
|
35
|
+
if organism is None and bt.settings.organism is None:
|
25
36
|
raise ValueError(
|
26
37
|
f"{registry.__name__} registry requires an organism!\n"
|
27
38
|
" → please pass an organism name via organism="
|
28
39
|
)
|
29
|
-
|
30
|
-
|
31
|
-
else:
|
32
|
-
return False
|
40
|
+
return organism or bt.settings.organism.name
|
41
|
+
return None
|
33
42
|
|
34
43
|
|
35
44
|
def validate_categories(
|
@@ -38,7 +47,7 @@ def validate_categories(
|
|
38
47
|
feature_name: str,
|
39
48
|
using: Optional[str] = None,
|
40
49
|
**kwargs,
|
41
|
-
):
|
50
|
+
) -> bool:
|
42
51
|
"""Validate ontology terms in a pandas series using LaminDB registries."""
|
43
52
|
model_field = f"{field.field.model.__name__}.{field.field.name}"
|
44
53
|
logger.indent = ""
|
@@ -48,40 +57,41 @@ def validate_categories(
|
|
48
57
|
logger.indent = " "
|
49
58
|
|
50
59
|
registry = field.field.model
|
51
|
-
filter_kwargs = {}
|
52
|
-
organism = kwargs.get("organism")
|
53
|
-
if
|
60
|
+
filter_kwargs = {}
|
61
|
+
organism = check_registry_organism(registry, kwargs.get("organism"))
|
62
|
+
if organism is not None:
|
54
63
|
filter_kwargs["organism"] = organism
|
55
|
-
|
56
|
-
|
64
|
+
|
65
|
+
# Inspect the default instance
|
66
|
+
inspect_result = standardize_and_inspect(
|
67
|
+
values=values, field=field, registry=registry, **filter_kwargs
|
68
|
+
)
|
57
69
|
non_validated = inspect_result.non_validated
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
70
|
+
|
71
|
+
if using is not None and using != "default" and non_validated:
|
72
|
+
registry = get_registry_instance(registry, using)
|
73
|
+
# Inspect the using instance
|
74
|
+
inspect_result = standardize_and_inspect(
|
75
|
+
values=non_validated, field=field, registry=registry, **filter_kwargs
|
63
76
|
)
|
64
77
|
non_validated = inspect_result.non_validated
|
65
78
|
|
66
|
-
# if all terms are validated
|
67
79
|
n_non_validated = len(non_validated)
|
68
80
|
if n_non_validated == 0:
|
69
|
-
validated = True
|
70
81
|
logger.success(f"all {feature_name}s are validated")
|
82
|
+
return True
|
71
83
|
else:
|
72
84
|
are = "are" if n_non_validated > 1 else "is"
|
73
85
|
print_values = _print_values(non_validated)
|
74
|
-
feature_name_print = f"
|
86
|
+
feature_name_print = f".register_labels('{feature_name}')"
|
75
87
|
warning_message = (
|
76
88
|
f"{colors.yellow(f'{n_non_validated} terms')} {are} not validated: "
|
77
89
|
f"{colors.yellow(print_values)}\n → register terms via "
|
78
|
-
f"{colors.
|
90
|
+
f"{colors.yellow(feature_name_print)}"
|
79
91
|
)
|
80
92
|
logger.warning(warning_message)
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
return validated
|
93
|
+
logger.indent = ""
|
94
|
+
return False
|
85
95
|
|
86
96
|
|
87
97
|
def validate_categories_in_df(
|
@@ -89,9 +99,8 @@ def validate_categories_in_df(
|
|
89
99
|
fields: Dict[str, FieldAttr],
|
90
100
|
using: Optional[str] = None,
|
91
101
|
**kwargs,
|
92
|
-
):
|
102
|
+
) -> bool:
|
93
103
|
"""Validate categories in DataFrame columns using LaminDB registries."""
|
94
|
-
# start validation
|
95
104
|
validated = True
|
96
105
|
for feature_name, field in fields.items():
|
97
106
|
validated &= validate_categories(
|
@@ -113,7 +122,9 @@ def validate_anndata(
|
|
113
122
|
) -> bool:
|
114
123
|
"""Inspect metadata in an AnnData object using LaminDB registries."""
|
115
124
|
if using is not None and using != "default":
|
116
|
-
logger.important(
|
125
|
+
logger.important(
|
126
|
+
f"validating metadata using registries of instance {colors.italic(using)}"
|
127
|
+
)
|
117
128
|
|
118
129
|
validated_var = validate_categories(
|
119
130
|
adata.var.index,
|
@@ -123,9 +134,6 @@ def validate_anndata(
|
|
123
134
|
**kwargs,
|
124
135
|
)
|
125
136
|
validated_obs = validate_categories_in_df(
|
126
|
-
adata.obs,
|
127
|
-
fields=obs_fields,
|
128
|
-
using=using,
|
129
|
-
**kwargs,
|
137
|
+
adata.obs, fields=obs_fields, using=using, **kwargs
|
130
138
|
)
|
131
|
-
return validated_var
|
139
|
+
return validated_var and validated_obs
|