lamindb 0.69.1__py3-none-any.whl → 0.69.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +6 -4
- lamindb/_annotate.py +790 -0
- lamindb/_artifact.py +2 -8
- lamindb/_collection.py +16 -4
- lamindb/_feature.py +11 -9
- lamindb/_finish.py +194 -11
- lamindb/_query_set.py +6 -4
- lamindb/_run.py +3 -1
- lamindb/_save.py +34 -21
- lamindb/core/__init__.py +4 -0
- lamindb/core/_data.py +3 -0
- lamindb/core/_feature_manager.py +4 -3
- lamindb/core/_run_context.py +17 -5
- lamindb/core/storage/_backed_access.py +48 -11
- lamindb/core/storage/file.py +2 -7
- {lamindb-0.69.1.dist-info → lamindb-0.69.3.dist-info}/METADATA +7 -7
- {lamindb-0.69.1.dist-info → lamindb-0.69.3.dist-info}/RECORD +20 -25
- lamindb/validation/__init__.py +0 -19
- lamindb/validation/_anndata_validator.py +0 -130
- lamindb/validation/_lookup.py +0 -38
- lamindb/validation/_register.py +0 -214
- lamindb/validation/_validate.py +0 -131
- lamindb/validation/_validator.py +0 -205
- /lamindb/{_validate.py → _can_validate.py} +0 -0
- {lamindb-0.69.1.dist-info → lamindb-0.69.3.dist-info}/LICENSE +0 -0
- {lamindb-0.69.1.dist-info → lamindb-0.69.3.dist-info}/WHEEL +0 -0
lamindb/validation/_validate.py
DELETED
@@ -1,131 +0,0 @@
|
|
1
|
-
from typing import Dict, Iterable, Optional
|
2
|
-
|
3
|
-
import pandas as pd
|
4
|
-
from anndata import AnnData
|
5
|
-
from lamin_utils import colors, logger
|
6
|
-
from lnschema_core import Registry
|
7
|
-
from lnschema_core.types import FieldAttr
|
8
|
-
|
9
|
-
from lamindb._from_values import _print_values
|
10
|
-
|
11
|
-
|
12
|
-
def _registry_using(registry: Registry, using: Optional[str] = None) -> Registry:
|
13
|
-
"""Get a registry instance using a specific instance."""
|
14
|
-
return (
|
15
|
-
registry.using(using) if using is not None and using != "default" else registry
|
16
|
-
)
|
17
|
-
|
18
|
-
|
19
|
-
def check_if_registry_needs_organism(
|
20
|
-
registry: Registry, organism: Optional[str] = None
|
21
|
-
):
|
22
|
-
"""Check if a registry needs an organism."""
|
23
|
-
if hasattr(registry, "organism_id"):
|
24
|
-
if organism is None:
|
25
|
-
raise ValueError(
|
26
|
-
f"{registry.__name__} registry requires an organism!\n"
|
27
|
-
" → please pass an organism name via organism="
|
28
|
-
)
|
29
|
-
else:
|
30
|
-
return True
|
31
|
-
else:
|
32
|
-
return False
|
33
|
-
|
34
|
-
|
35
|
-
def validate_categories(
|
36
|
-
values: Iterable[str],
|
37
|
-
field: FieldAttr,
|
38
|
-
feature_name: str,
|
39
|
-
using: Optional[str] = None,
|
40
|
-
**kwargs,
|
41
|
-
):
|
42
|
-
"""Validate ontology terms in a pandas series using LaminDB registries."""
|
43
|
-
model_field = f"{field.field.model.__name__}.{field.field.name}"
|
44
|
-
logger.indent = ""
|
45
|
-
logger.info(
|
46
|
-
f"inspecting '{colors.bold(feature_name)}' by {colors.italic(model_field)}"
|
47
|
-
)
|
48
|
-
logger.indent = " "
|
49
|
-
|
50
|
-
registry = field.field.model
|
51
|
-
filter_kwargs = {} # type: Dict[str, str]
|
52
|
-
organism = kwargs.get("organism")
|
53
|
-
if check_if_registry_needs_organism(registry, organism):
|
54
|
-
filter_kwargs["organism"] = organism
|
55
|
-
# inspect the default instance
|
56
|
-
inspect_result = registry.inspect(values, field=field, mute=True, **filter_kwargs)
|
57
|
-
non_validated = inspect_result.non_validated
|
58
|
-
if using is not None and using != "default" and len(non_validated) > 0:
|
59
|
-
registry = _registry_using(registry, using)
|
60
|
-
# inspect the using instance
|
61
|
-
inspect_result = registry.inspect(
|
62
|
-
non_validated, field=field, mute=True, **filter_kwargs
|
63
|
-
)
|
64
|
-
non_validated = inspect_result.non_validated
|
65
|
-
|
66
|
-
# if all terms are validated
|
67
|
-
n_non_validated = len(non_validated)
|
68
|
-
if n_non_validated == 0:
|
69
|
-
validated = True
|
70
|
-
logger.success(f"all {feature_name}s are validated")
|
71
|
-
else:
|
72
|
-
are = "are" if n_non_validated > 1 else "is"
|
73
|
-
print_values = _print_values(non_validated)
|
74
|
-
feature_name_print = f"`.register_labels('{feature_name}')`"
|
75
|
-
warning_message = (
|
76
|
-
f"{colors.yellow(f'{n_non_validated} terms')} {are} not validated: "
|
77
|
-
f"{colors.yellow(print_values)}\n → register terms via "
|
78
|
-
f"{colors.red(feature_name_print)}"
|
79
|
-
)
|
80
|
-
logger.warning(warning_message)
|
81
|
-
validated = False
|
82
|
-
logger.indent = ""
|
83
|
-
|
84
|
-
return validated
|
85
|
-
|
86
|
-
|
87
|
-
def validate_categories_in_df(
|
88
|
-
df: pd.DataFrame,
|
89
|
-
fields: Dict[str, FieldAttr],
|
90
|
-
using: Optional[str] = None,
|
91
|
-
**kwargs,
|
92
|
-
):
|
93
|
-
"""Validate categories in DataFrame columns using LaminDB registries."""
|
94
|
-
# start validation
|
95
|
-
validated = True
|
96
|
-
for feature_name, field in fields.items():
|
97
|
-
validated &= validate_categories(
|
98
|
-
df[feature_name],
|
99
|
-
field=field,
|
100
|
-
feature_name=feature_name,
|
101
|
-
using=using,
|
102
|
-
**kwargs,
|
103
|
-
)
|
104
|
-
return validated
|
105
|
-
|
106
|
-
|
107
|
-
def validate_anndata(
|
108
|
-
adata: AnnData,
|
109
|
-
var_field: FieldAttr,
|
110
|
-
obs_fields: Dict[str, FieldAttr],
|
111
|
-
using: Optional[str] = None,
|
112
|
-
**kwargs,
|
113
|
-
) -> bool:
|
114
|
-
"""Inspect metadata in an AnnData object using LaminDB registries."""
|
115
|
-
if using is not None and using != "default":
|
116
|
-
logger.important(f"validating metadata using registries of instance `{using}`")
|
117
|
-
|
118
|
-
validated_var = validate_categories(
|
119
|
-
adata.var.index,
|
120
|
-
field=var_field,
|
121
|
-
feature_name="variables",
|
122
|
-
using=using,
|
123
|
-
**kwargs,
|
124
|
-
)
|
125
|
-
validated_obs = validate_categories_in_df(
|
126
|
-
adata.obs,
|
127
|
-
fields=obs_fields,
|
128
|
-
using=using,
|
129
|
-
**kwargs,
|
130
|
-
)
|
131
|
-
return validated_var & validated_obs
|
lamindb/validation/_validator.py
DELETED
@@ -1,205 +0,0 @@
|
|
1
|
-
from typing import Dict, Iterable, Optional
|
2
|
-
|
3
|
-
import pandas as pd
|
4
|
-
from lamin_utils import logger
|
5
|
-
from lnschema_core.types import FieldAttr
|
6
|
-
|
7
|
-
import lamindb as ln
|
8
|
-
|
9
|
-
from ._lookup import Lookup
|
10
|
-
from ._register import register_artifact, register_labels
|
11
|
-
from ._validate import validate_categories_in_df
|
12
|
-
|
13
|
-
|
14
|
-
class Validator:
|
15
|
-
"""Lamin validator.
|
16
|
-
|
17
|
-
Args:
|
18
|
-
df: The DataFrame object to validate.
|
19
|
-
fields: A dictionary mapping column to registry_field.
|
20
|
-
For example:
|
21
|
-
{"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}
|
22
|
-
using: The reference instance containing registries to validate against.
|
23
|
-
verbosity: The verbosity level.
|
24
|
-
"""
|
25
|
-
|
26
|
-
def __init__(
|
27
|
-
self,
|
28
|
-
df: pd.DataFrame,
|
29
|
-
fields: Dict[str, FieldAttr],
|
30
|
-
using: str = None,
|
31
|
-
verbosity: str = "hint",
|
32
|
-
**kwargs,
|
33
|
-
) -> None:
|
34
|
-
"""Validate an AnnData object."""
|
35
|
-
self._df = df
|
36
|
-
self._fields = fields
|
37
|
-
self._using = using
|
38
|
-
ln.settings.verbosity = verbosity
|
39
|
-
self._artifact = None
|
40
|
-
self._collection = None
|
41
|
-
self._validated = False
|
42
|
-
self._kwargs: Dict = {}
|
43
|
-
self._add_kwargs(**kwargs)
|
44
|
-
self._register_features()
|
45
|
-
|
46
|
-
@property
|
47
|
-
def fields(self) -> Dict:
|
48
|
-
"""Return the columns fields to validate against."""
|
49
|
-
return self._fields
|
50
|
-
|
51
|
-
def _add_kwargs(self, **kwargs):
|
52
|
-
for k, v in kwargs.items():
|
53
|
-
self._kwargs[k] = v
|
54
|
-
|
55
|
-
def _register_features(self) -> None:
|
56
|
-
"""Register features records."""
|
57
|
-
missing_columns = [i for i in self.fields.keys() if i not in self._df]
|
58
|
-
if len(missing_columns) > 0:
|
59
|
-
raise ValueError(
|
60
|
-
f"columns {missing_columns} are not found in the AnnData object!"
|
61
|
-
)
|
62
|
-
register_labels(
|
63
|
-
values=list(self.fields.keys()),
|
64
|
-
field=ln.Feature.name,
|
65
|
-
feature_name="feature",
|
66
|
-
using=self._using,
|
67
|
-
validated_only=False,
|
68
|
-
)
|
69
|
-
|
70
|
-
def _register_labels_all(self, validated_only: bool = True, **kwargs):
|
71
|
-
"""Register labels for all features."""
|
72
|
-
for name in self.fields.keys():
|
73
|
-
logger.info(f"registering labels for '{name}'")
|
74
|
-
self.register_labels(feature=name, validated_only=validated_only, **kwargs)
|
75
|
-
|
76
|
-
def lookup(self, using: Optional[str] = None) -> Lookup:
|
77
|
-
"""Lookup features and labels.
|
78
|
-
|
79
|
-
Args:
|
80
|
-
using: The instance where the lookup is performed.
|
81
|
-
if None (default), the lookup is performed on the instance specified in "using" parameter of the Validator.
|
82
|
-
if "public", the lookup is performed on the public reference.
|
83
|
-
"""
|
84
|
-
fields = {**{"feature": ln.Feature.name}, **self.fields}
|
85
|
-
return Lookup(fields=fields, using=using or self._using)
|
86
|
-
|
87
|
-
def register_labels(self, feature: str, validated_only: bool = True, **kwargs):
|
88
|
-
"""Register labels records.
|
89
|
-
|
90
|
-
Args:
|
91
|
-
feature: The name of the feature to register.
|
92
|
-
validated_only: Whether to register only validated labels.
|
93
|
-
**kwargs: Additional keyword arguments.
|
94
|
-
"""
|
95
|
-
if feature == "all":
|
96
|
-
self._register_labels_all(validated_only=validated_only, **kwargs)
|
97
|
-
else:
|
98
|
-
if feature not in self.fields:
|
99
|
-
raise ValueError(f"feature {feature} is not part of the fields!")
|
100
|
-
register_labels(
|
101
|
-
values=self._df[feature].unique().tolist(),
|
102
|
-
field=self.fields.get(feature),
|
103
|
-
feature_name=feature,
|
104
|
-
using=self._using,
|
105
|
-
validated_only=validated_only,
|
106
|
-
kwargs=kwargs,
|
107
|
-
)
|
108
|
-
|
109
|
-
def validate(
|
110
|
-
self,
|
111
|
-
**kwargs,
|
112
|
-
) -> bool:
|
113
|
-
"""Validate variables and categorical observations.
|
114
|
-
|
115
|
-
Returns:
|
116
|
-
whether the AnnData object is validated
|
117
|
-
"""
|
118
|
-
self._add_kwargs(**kwargs)
|
119
|
-
self._validated = validate_categories_in_df(
|
120
|
-
self._df,
|
121
|
-
fields=self.fields,
|
122
|
-
using=self._using,
|
123
|
-
**self._kwargs,
|
124
|
-
)
|
125
|
-
|
126
|
-
return self._validated
|
127
|
-
|
128
|
-
def register_artifact(
|
129
|
-
self,
|
130
|
-
description: str,
|
131
|
-
**kwargs,
|
132
|
-
) -> ln.Artifact:
|
133
|
-
"""Register the validated AnnData and metadata.
|
134
|
-
|
135
|
-
Args:
|
136
|
-
description: description of the AnnData object
|
137
|
-
**kwargs: object level metadata
|
138
|
-
|
139
|
-
Returns:
|
140
|
-
a registered artifact record
|
141
|
-
"""
|
142
|
-
self._add_kwargs(**kwargs)
|
143
|
-
if not self._validated:
|
144
|
-
raise ValueError("please run `validate()` first!")
|
145
|
-
|
146
|
-
# make sure all labels are registered in the current instance
|
147
|
-
verbosity = ln.settings.verbosity
|
148
|
-
try:
|
149
|
-
ln.settings.verbosity = "warning"
|
150
|
-
self.register_labels("all")
|
151
|
-
|
152
|
-
self._artifact = register_artifact(
|
153
|
-
self._df,
|
154
|
-
description=description,
|
155
|
-
fields=self.fields,
|
156
|
-
**self._kwargs,
|
157
|
-
)
|
158
|
-
finally:
|
159
|
-
ln.settings.verbosity = verbosity
|
160
|
-
|
161
|
-
return self._artifact
|
162
|
-
|
163
|
-
def register_collection(
|
164
|
-
self,
|
165
|
-
artifact: ln.Artifact | Iterable[ln.Artifact],
|
166
|
-
name: str,
|
167
|
-
description: Optional[str] = None,
|
168
|
-
reference: Optional[str] = None,
|
169
|
-
reference_type: Optional[str] = None,
|
170
|
-
) -> ln.Collection:
|
171
|
-
"""Register a collection from artifact/artifacts.
|
172
|
-
|
173
|
-
Args:
|
174
|
-
artifact: one or several registered Artifacts
|
175
|
-
name: title of the publication
|
176
|
-
description: description of the publication
|
177
|
-
reference: accession number (e.g. GSE#, E-MTAB#, etc.)
|
178
|
-
reference_type: source type (e.g. GEO, ArrayExpress, SRA, etc.)
|
179
|
-
"""
|
180
|
-
collection = ln.Collection(
|
181
|
-
artifact,
|
182
|
-
name=name,
|
183
|
-
description=description,
|
184
|
-
reference=reference,
|
185
|
-
reference_type=reference_type,
|
186
|
-
)
|
187
|
-
if collection._state.adding:
|
188
|
-
collection.save()
|
189
|
-
logger.print("🎉 registered collection in LaminDB!\n")
|
190
|
-
else:
|
191
|
-
collection.save()
|
192
|
-
logger.warning("collection already exists in LaminDB!\n")
|
193
|
-
if ln.setup.settings.instance.is_remote:
|
194
|
-
logger.print(
|
195
|
-
f"🔗 https://lamin.ai/{ln.setup.settings.instance.slug}/collection/{collection.uid}"
|
196
|
-
)
|
197
|
-
self._collection = collection
|
198
|
-
return collection
|
199
|
-
|
200
|
-
def clean_up_failed_runs(self):
|
201
|
-
"""Clean up previous failed runs that don't register any outputs."""
|
202
|
-
if ln.run_context.transform is not None:
|
203
|
-
ln.Run.filter(
|
204
|
-
transform=ln.run_context.transform, output_artifacts=None
|
205
|
-
).exclude(uid=ln.run_context.run.uid).delete()
|
File without changes
|
File without changes
|
File without changes
|