lamindb 0.69.2__py3-none-any.whl → 0.69.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +8 -4
- lamindb/_annotate.py +790 -0
- lamindb/_artifact.py +2 -6
- lamindb/_collection.py +37 -158
- lamindb/_query_set.py +3 -3
- lamindb/core/__init__.py +4 -0
- lamindb/core/_data.py +1 -3
- lamindb/core/_feature_manager.py +1 -2
- lamindb/integrations/__init__.py +8 -0
- lamindb/integrations/_vitessce.py +36 -0
- {lamindb-0.69.2.dist-info → lamindb-0.69.4.dist-info}/METADATA +3 -3
- {lamindb-0.69.2.dist-info → lamindb-0.69.4.dist-info}/RECORD +15 -18
- lamindb/validation/__init__.py +0 -19
- lamindb/validation/_anndata_validator.py +0 -117
- lamindb/validation/_lookup.py +0 -42
- lamindb/validation/_register.py +0 -265
- lamindb/validation/_validate.py +0 -139
- lamindb/validation/_validator.py +0 -221
- /lamindb/{_validate.py → _can_validate.py} +0 -0
- {lamindb-0.69.2.dist-info → lamindb-0.69.4.dist-info}/LICENSE +0 -0
- {lamindb-0.69.2.dist-info → lamindb-0.69.4.dist-info}/WHEEL +0 -0
lamindb/validation/_register.py
DELETED
@@ -1,265 +0,0 @@
|
|
1
|
-
from typing import Dict, List, Optional, Tuple, Union
|
2
|
-
|
3
|
-
import anndata as ad
|
4
|
-
import pandas as pd
|
5
|
-
from lamin_utils import colors, logger
|
6
|
-
from lnschema_core.types import FieldAttr
|
7
|
-
|
8
|
-
import lamindb as ln
|
9
|
-
|
10
|
-
from ._validate import (
|
11
|
-
check_registry_organism,
|
12
|
-
get_registry_instance,
|
13
|
-
standardize_and_inspect,
|
14
|
-
)
|
15
|
-
|
16
|
-
|
17
|
-
def register_artifact(
|
18
|
-
data: Union[pd.DataFrame, ad.AnnData],
|
19
|
-
description: str,
|
20
|
-
fields: Dict[str, FieldAttr],
|
21
|
-
feature_field: FieldAttr,
|
22
|
-
**kwargs,
|
23
|
-
) -> ln.Artifact:
|
24
|
-
"""Register all metadata with an Artifact.
|
25
|
-
|
26
|
-
Args:
|
27
|
-
data: The DataFrame or AnnData object to register.
|
28
|
-
description: A description of the artifact.
|
29
|
-
fields: A dictionary mapping obs_column to registry_field.
|
30
|
-
feature_field: The registry field to validate variables index against.
|
31
|
-
kwargs: Additional keyword arguments to pass to the registry model.
|
32
|
-
|
33
|
-
Returns:
|
34
|
-
The registered Artifact.
|
35
|
-
"""
|
36
|
-
if isinstance(data, ad.AnnData):
|
37
|
-
artifact = ln.Artifact.from_anndata(data, description=description)
|
38
|
-
artifact.n_observations = data.n_obs
|
39
|
-
elif isinstance(data, pd.DataFrame):
|
40
|
-
artifact = ln.Artifact.from_df(data, description=description)
|
41
|
-
else:
|
42
|
-
raise ValueError("data must be a DataFrame or AnnData object")
|
43
|
-
artifact.save()
|
44
|
-
|
45
|
-
feature_kwargs: Dict = {}
|
46
|
-
organism = check_registry_organism(
|
47
|
-
feature_field.field.model, kwargs.pop("organism", None)
|
48
|
-
)
|
49
|
-
if organism is not None:
|
50
|
-
feature_kwargs["organism"] = organism
|
51
|
-
|
52
|
-
if isinstance(data, ad.AnnData):
|
53
|
-
artifact.features.add_from_anndata(var_field=feature_field, **feature_kwargs)
|
54
|
-
else:
|
55
|
-
artifact.features.add_from_df(field=feature_field, **feature_kwargs)
|
56
|
-
|
57
|
-
features = ln.Feature.lookup().dict()
|
58
|
-
for feature_name, field in fields.items():
|
59
|
-
feature = features.get(feature_name)
|
60
|
-
registry = field.field.model
|
61
|
-
filter_kwargs = kwargs.copy()
|
62
|
-
organism = check_registry_organism(registry, organism)
|
63
|
-
if organism is not None:
|
64
|
-
filter_kwargs["organism"] = organism
|
65
|
-
df = data.obs if isinstance(data, ad.AnnData) else data
|
66
|
-
labels = registry.from_values(df[feature_name], field=field, **filter_kwargs)
|
67
|
-
artifact.labels.add(labels, feature)
|
68
|
-
|
69
|
-
slug = ln.setup.settings.instance.slug
|
70
|
-
logger.success(f"registered artifact in {colors.italic(slug)}")
|
71
|
-
if ln.setup.settings.instance.is_remote:
|
72
|
-
logger.info(f"🔗 https://lamin.ai/{slug}/artifact/{artifact.uid}")
|
73
|
-
|
74
|
-
return artifact
|
75
|
-
|
76
|
-
|
77
|
-
def register_labels(
|
78
|
-
values: List[str],
|
79
|
-
field: FieldAttr,
|
80
|
-
feature_name: str,
|
81
|
-
using: Optional[str] = None,
|
82
|
-
validated_only: bool = True,
|
83
|
-
kwargs: Optional[Dict] = None,
|
84
|
-
df: Optional[pd.DataFrame] = None,
|
85
|
-
) -> None:
|
86
|
-
"""Register features or labels records in the default instance from the using instance.
|
87
|
-
|
88
|
-
Args:
|
89
|
-
values: A list of values to be registered as labels.
|
90
|
-
field: The FieldAttr object representing the field for which labels are being registered.
|
91
|
-
feature_name: The name of the feature to register.
|
92
|
-
using: The name of the instance from which to transfer labels (if applicable).
|
93
|
-
validated_only: If True, only register validated labels.
|
94
|
-
kwargs: Additional keyword arguments to pass to the registry model.
|
95
|
-
df: A DataFrame to register labels from.
|
96
|
-
"""
|
97
|
-
filter_kwargs = {} if kwargs is None else kwargs.copy()
|
98
|
-
registry = field.field.model
|
99
|
-
if registry == ln.ULabel:
|
100
|
-
validated_only = False
|
101
|
-
|
102
|
-
organism = check_registry_organism(registry, filter_kwargs.pop("organism", None))
|
103
|
-
if organism is not None:
|
104
|
-
filter_kwargs["organism"] = organism
|
105
|
-
|
106
|
-
verbosity = ln.settings.verbosity
|
107
|
-
try:
|
108
|
-
ln.settings.verbosity = "error"
|
109
|
-
inspect_result_current = standardize_and_inspect(
|
110
|
-
values=values, field=field, registry=registry, **filter_kwargs
|
111
|
-
)
|
112
|
-
if not inspect_result_current.non_validated:
|
113
|
-
ln.settings.verbosity = verbosity
|
114
|
-
return
|
115
|
-
|
116
|
-
labels_registered: Dict = {"from public": [], "without reference": []}
|
117
|
-
|
118
|
-
(
|
119
|
-
labels_registered[f"from {using}"],
|
120
|
-
non_validated_labels,
|
121
|
-
) = register_labels_from_using_instance(
|
122
|
-
inspect_result_current.non_validated,
|
123
|
-
field=field,
|
124
|
-
using=using,
|
125
|
-
kwargs=filter_kwargs,
|
126
|
-
)
|
127
|
-
|
128
|
-
public_records = (
|
129
|
-
registry.from_values(non_validated_labels, field=field, **filter_kwargs)
|
130
|
-
if non_validated_labels
|
131
|
-
else []
|
132
|
-
)
|
133
|
-
ln.save(public_records)
|
134
|
-
labels_registered["from public"] = [
|
135
|
-
getattr(r, field.field.name) for r in public_records
|
136
|
-
]
|
137
|
-
labels_registered["without reference"] = [
|
138
|
-
i for i in non_validated_labels if i not in labels_registered["from public"]
|
139
|
-
]
|
140
|
-
|
141
|
-
if not validated_only:
|
142
|
-
non_validated_records = []
|
143
|
-
if df is not None and registry == ln.Feature:
|
144
|
-
non_validated_records = ln.Feature.from_df(df)
|
145
|
-
else:
|
146
|
-
if "organism" in filter_kwargs:
|
147
|
-
filter_kwargs["organism"] = _register_organism(name=organism)
|
148
|
-
for value in labels_registered["without reference"]:
|
149
|
-
filter_kwargs[field.field.name] = value
|
150
|
-
if registry == ln.Feature:
|
151
|
-
filter_kwargs["type"] = "category"
|
152
|
-
non_validated_records.append(registry(**filter_kwargs))
|
153
|
-
ln.save(non_validated_records)
|
154
|
-
|
155
|
-
if registry == ln.ULabel and field.field.name == "name":
|
156
|
-
register_ulabels_with_parent(values, field=field, feature_name=feature_name)
|
157
|
-
finally:
|
158
|
-
ln.settings.verbosity = verbosity
|
159
|
-
|
160
|
-
log_registered_labels(
|
161
|
-
labels_registered,
|
162
|
-
feature_name=feature_name,
|
163
|
-
model_field=f"{registry.__name__}.{field.field.name}",
|
164
|
-
validated_only=validated_only,
|
165
|
-
)
|
166
|
-
|
167
|
-
|
168
|
-
def log_registered_labels(
|
169
|
-
labels_registered: Dict,
|
170
|
-
feature_name: str,
|
171
|
-
model_field: str,
|
172
|
-
validated_only: bool = True,
|
173
|
-
) -> None:
|
174
|
-
"""Log the registered labels."""
|
175
|
-
labels_type = "features" if feature_name == "feature" else "labels"
|
176
|
-
model_field = colors.italic(model_field)
|
177
|
-
for key, labels in labels_registered.items():
|
178
|
-
if not labels:
|
179
|
-
continue
|
180
|
-
|
181
|
-
if key == "without reference" and validated_only:
|
182
|
-
msg = colors.yellow(
|
183
|
-
f"{len(labels)} non-validated {labels_type} are not registered with {model_field}: {labels}!"
|
184
|
-
)
|
185
|
-
lookup_print = f".lookup().['{feature_name}']"
|
186
|
-
msg += f"\n → to lookup categories, use {lookup_print}"
|
187
|
-
msg += (
|
188
|
-
f"\n → to register, run {colors.yellow('register_features(validated_only=False)')}"
|
189
|
-
if labels_type == "features"
|
190
|
-
else f"\n → to register, set {colors.yellow('validated_only=False')}"
|
191
|
-
)
|
192
|
-
logger.warning(msg)
|
193
|
-
else:
|
194
|
-
key = "" if key == "without reference" else f"{colors.green(key)} "
|
195
|
-
logger.success(
|
196
|
-
f"registered {len(labels)} {labels_type} {key}with {model_field}: {labels}"
|
197
|
-
)
|
198
|
-
|
199
|
-
|
200
|
-
def register_ulabels_with_parent(
|
201
|
-
values: List[str], field: FieldAttr, feature_name: str
|
202
|
-
) -> None:
|
203
|
-
"""Register a parent label for the given labels."""
|
204
|
-
registry = field.field.model
|
205
|
-
assert registry == ln.ULabel
|
206
|
-
all_records = registry.from_values(values, field=field)
|
207
|
-
is_feature = registry.filter(name=f"is_{feature_name}").one_or_none()
|
208
|
-
if is_feature is None:
|
209
|
-
is_feature = registry(name=f"is_{feature_name}")
|
210
|
-
is_feature.save()
|
211
|
-
is_feature.children.add(*all_records)
|
212
|
-
|
213
|
-
|
214
|
-
def register_labels_from_using_instance(
|
215
|
-
values: List[str],
|
216
|
-
field: FieldAttr,
|
217
|
-
using: Optional[str] = None,
|
218
|
-
kwargs: Optional[Dict] = None,
|
219
|
-
) -> Tuple[List[str], List[str]]:
|
220
|
-
"""Register features or labels records from the using instance.
|
221
|
-
|
222
|
-
Args:
|
223
|
-
values: A list of values to be registered as labels.
|
224
|
-
field: The FieldAttr object representing the field for which labels are being registered.
|
225
|
-
using: The name of the instance from which to transfer labels (if applicable).
|
226
|
-
kwargs: Additional keyword arguments to pass to the registry model.
|
227
|
-
|
228
|
-
Returns:
|
229
|
-
A tuple containing the list of registered labels and the list of non-registered labels.
|
230
|
-
"""
|
231
|
-
kwargs = kwargs or {}
|
232
|
-
labels_registered = []
|
233
|
-
not_registered = values
|
234
|
-
|
235
|
-
if using is not None and using != "default":
|
236
|
-
registry = field.field.model
|
237
|
-
registry_using = get_registry_instance(registry, using)
|
238
|
-
inspect_result_using = standardize_and_inspect(
|
239
|
-
values=values, field=field, registry=registry_using, **kwargs
|
240
|
-
)
|
241
|
-
labels_using = registry_using.filter(
|
242
|
-
**{f"{field.field.name}__in": inspect_result_using.validated}
|
243
|
-
).all()
|
244
|
-
for label_using in labels_using:
|
245
|
-
label_using.save()
|
246
|
-
labels_registered.append(getattr(label_using, field.field.name))
|
247
|
-
not_registered = inspect_result_using.non_validated
|
248
|
-
|
249
|
-
return labels_registered, not_registered
|
250
|
-
|
251
|
-
|
252
|
-
def _register_organism(name: str):
|
253
|
-
"""Register an organism record."""
|
254
|
-
import bionty as bt
|
255
|
-
|
256
|
-
organism = bt.Organism.filter(name=name).one_or_none()
|
257
|
-
if organism is None:
|
258
|
-
organism = bt.Organism.from_public(name=name)
|
259
|
-
if organism is None:
|
260
|
-
raise ValueError(
|
261
|
-
f"Organism '{name}' not found\n"
|
262
|
-
f" → please register it: bt.Organism(name='{name}').save()"
|
263
|
-
)
|
264
|
-
organism.save()
|
265
|
-
return organism
|
lamindb/validation/_validate.py
DELETED
@@ -1,139 +0,0 @@
|
|
1
|
-
from typing import Dict, Iterable, Optional
|
2
|
-
|
3
|
-
import pandas as pd
|
4
|
-
from anndata import AnnData
|
5
|
-
from lamin_utils import colors, logger
|
6
|
-
from lnschema_core import Registry
|
7
|
-
from lnschema_core.types import FieldAttr
|
8
|
-
|
9
|
-
from lamindb._from_values import _print_values
|
10
|
-
|
11
|
-
|
12
|
-
def get_registry_instance(registry: Registry, using: Optional[str] = None) -> Registry:
|
13
|
-
"""Get a registry instance using a specific instance."""
|
14
|
-
if using is not None and using != "default":
|
15
|
-
return registry.using(using)
|
16
|
-
return registry
|
17
|
-
|
18
|
-
|
19
|
-
def standardize_and_inspect(
|
20
|
-
values: Iterable[str], field: FieldAttr, registry: Registry, **kwargs
|
21
|
-
):
|
22
|
-
"""Standardize and inspect values using a registry."""
|
23
|
-
if hasattr(registry, "standardize"):
|
24
|
-
values = registry.standardize(values, field=field, mute=True, **kwargs)
|
25
|
-
return registry.inspect(values, field=field, mute=True, **kwargs)
|
26
|
-
|
27
|
-
|
28
|
-
def check_registry_organism(
|
29
|
-
registry: Registry, organism: Optional[str] = None
|
30
|
-
) -> Optional[str]:
|
31
|
-
"""Check if a registry needs an organism and return the organism name."""
|
32
|
-
if hasattr(registry, "organism_id"):
|
33
|
-
import bionty as bt
|
34
|
-
|
35
|
-
if organism is None and bt.settings.organism is None:
|
36
|
-
raise ValueError(
|
37
|
-
f"{registry.__name__} registry requires an organism!\n"
|
38
|
-
" → please pass an organism name via organism="
|
39
|
-
)
|
40
|
-
return organism or bt.settings.organism.name
|
41
|
-
return None
|
42
|
-
|
43
|
-
|
44
|
-
def validate_categories(
|
45
|
-
values: Iterable[str],
|
46
|
-
field: FieldAttr,
|
47
|
-
feature_name: str,
|
48
|
-
using: Optional[str] = None,
|
49
|
-
**kwargs,
|
50
|
-
) -> bool:
|
51
|
-
"""Validate ontology terms in a pandas series using LaminDB registries."""
|
52
|
-
model_field = f"{field.field.model.__name__}.{field.field.name}"
|
53
|
-
logger.indent = ""
|
54
|
-
logger.info(
|
55
|
-
f"inspecting '{colors.bold(feature_name)}' by {colors.italic(model_field)}"
|
56
|
-
)
|
57
|
-
logger.indent = " "
|
58
|
-
|
59
|
-
registry = field.field.model
|
60
|
-
filter_kwargs = {}
|
61
|
-
organism = check_registry_organism(registry, kwargs.get("organism"))
|
62
|
-
if organism is not None:
|
63
|
-
filter_kwargs["organism"] = organism
|
64
|
-
|
65
|
-
# Inspect the default instance
|
66
|
-
inspect_result = standardize_and_inspect(
|
67
|
-
values=values, field=field, registry=registry, **filter_kwargs
|
68
|
-
)
|
69
|
-
non_validated = inspect_result.non_validated
|
70
|
-
|
71
|
-
if using is not None and using != "default" and non_validated:
|
72
|
-
registry = get_registry_instance(registry, using)
|
73
|
-
# Inspect the using instance
|
74
|
-
inspect_result = standardize_and_inspect(
|
75
|
-
values=non_validated, field=field, registry=registry, **filter_kwargs
|
76
|
-
)
|
77
|
-
non_validated = inspect_result.non_validated
|
78
|
-
|
79
|
-
n_non_validated = len(non_validated)
|
80
|
-
if n_non_validated == 0:
|
81
|
-
logger.success(f"all {feature_name}s are validated")
|
82
|
-
return True
|
83
|
-
else:
|
84
|
-
are = "are" if n_non_validated > 1 else "is"
|
85
|
-
print_values = _print_values(non_validated)
|
86
|
-
feature_name_print = f".register_labels('{feature_name}')"
|
87
|
-
warning_message = (
|
88
|
-
f"{colors.yellow(f'{n_non_validated} terms')} {are} not validated: "
|
89
|
-
f"{colors.yellow(print_values)}\n → register terms via "
|
90
|
-
f"{colors.yellow(feature_name_print)}"
|
91
|
-
)
|
92
|
-
logger.warning(warning_message)
|
93
|
-
logger.indent = ""
|
94
|
-
return False
|
95
|
-
|
96
|
-
|
97
|
-
def validate_categories_in_df(
|
98
|
-
df: pd.DataFrame,
|
99
|
-
fields: Dict[str, FieldAttr],
|
100
|
-
using: Optional[str] = None,
|
101
|
-
**kwargs,
|
102
|
-
) -> bool:
|
103
|
-
"""Validate categories in DataFrame columns using LaminDB registries."""
|
104
|
-
validated = True
|
105
|
-
for feature_name, field in fields.items():
|
106
|
-
validated &= validate_categories(
|
107
|
-
df[feature_name],
|
108
|
-
field=field,
|
109
|
-
feature_name=feature_name,
|
110
|
-
using=using,
|
111
|
-
**kwargs,
|
112
|
-
)
|
113
|
-
return validated
|
114
|
-
|
115
|
-
|
116
|
-
def validate_anndata(
|
117
|
-
adata: AnnData,
|
118
|
-
var_field: FieldAttr,
|
119
|
-
obs_fields: Dict[str, FieldAttr],
|
120
|
-
using: Optional[str] = None,
|
121
|
-
**kwargs,
|
122
|
-
) -> bool:
|
123
|
-
"""Inspect metadata in an AnnData object using LaminDB registries."""
|
124
|
-
if using is not None and using != "default":
|
125
|
-
logger.important(
|
126
|
-
f"validating metadata using registries of instance {colors.italic(using)}"
|
127
|
-
)
|
128
|
-
|
129
|
-
validated_var = validate_categories(
|
130
|
-
adata.var.index,
|
131
|
-
field=var_field,
|
132
|
-
feature_name="variables",
|
133
|
-
using=using,
|
134
|
-
**kwargs,
|
135
|
-
)
|
136
|
-
validated_obs = validate_categories_in_df(
|
137
|
-
adata.obs, fields=obs_fields, using=using, **kwargs
|
138
|
-
)
|
139
|
-
return validated_var and validated_obs
|
lamindb/validation/_validator.py
DELETED
@@ -1,221 +0,0 @@
|
|
1
|
-
from typing import Dict, Iterable, Optional
|
2
|
-
|
3
|
-
import pandas as pd
|
4
|
-
from lamin_utils import colors, logger
|
5
|
-
from lnschema_core.types import FieldAttr
|
6
|
-
|
7
|
-
import lamindb as ln
|
8
|
-
|
9
|
-
from ._lookup import Lookup
|
10
|
-
from ._register import register_artifact, register_labels
|
11
|
-
from ._validate import validate_categories_in_df
|
12
|
-
|
13
|
-
|
14
|
-
class ValidationError(ValueError):
|
15
|
-
"""Validation error."""
|
16
|
-
|
17
|
-
pass
|
18
|
-
|
19
|
-
|
20
|
-
class Validator:
|
21
|
-
"""Lamin validator.
|
22
|
-
|
23
|
-
Args:
|
24
|
-
df: The DataFrame object to validate.
|
25
|
-
fields: A dictionary mapping column to registry_field.
|
26
|
-
For example:
|
27
|
-
{"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}
|
28
|
-
feature_field: The field attribute for the feature column.
|
29
|
-
using: The reference instance containing registries to validate against.
|
30
|
-
verbosity: The verbosity level.
|
31
|
-
"""
|
32
|
-
|
33
|
-
def __init__(
|
34
|
-
self,
|
35
|
-
df: pd.DataFrame,
|
36
|
-
fields: Optional[Dict[str, FieldAttr]] = None,
|
37
|
-
feature_field: FieldAttr = ln.Feature.name,
|
38
|
-
using: Optional[str] = None,
|
39
|
-
verbosity: str = "hint",
|
40
|
-
**kwargs,
|
41
|
-
) -> None:
|
42
|
-
"""Initialize the Validator."""
|
43
|
-
self._df = df
|
44
|
-
self._fields = fields or {}
|
45
|
-
self._feature_field = feature_field
|
46
|
-
self._using = using
|
47
|
-
ln.settings.verbosity = verbosity
|
48
|
-
self._artifact = None
|
49
|
-
self._collection = None
|
50
|
-
self._validated = False
|
51
|
-
self._kwargs: Dict = kwargs
|
52
|
-
self.register_features()
|
53
|
-
|
54
|
-
@property
|
55
|
-
def fields(self) -> Dict:
|
56
|
-
"""Return the columns fields to validate against."""
|
57
|
-
return self._fields
|
58
|
-
|
59
|
-
def lookup(self, using: Optional[str] = None) -> Lookup:
|
60
|
-
"""Lookup features and labels.
|
61
|
-
|
62
|
-
Args:
|
63
|
-
using: The instance where the lookup is performed.
|
64
|
-
if None (default), the lookup is performed on the instance specified in "using" parameter of the Validator.
|
65
|
-
if "public", the lookup is performed on the public reference.
|
66
|
-
"""
|
67
|
-
fields = {**{"feature": self._feature_field}, **self.fields}
|
68
|
-
return Lookup(fields=fields, using=using or self._using)
|
69
|
-
|
70
|
-
def register_features(self, validated_only: bool = True) -> None:
|
71
|
-
"""Register features records."""
|
72
|
-
missing_columns = set(self.fields.keys()) - set(self._df.columns)
|
73
|
-
if missing_columns:
|
74
|
-
raise ValueError(
|
75
|
-
f"Columns {missing_columns} are not found in the data object!"
|
76
|
-
)
|
77
|
-
|
78
|
-
# Always register features specified as the fields keys
|
79
|
-
register_labels(
|
80
|
-
values=list(self.fields.keys()),
|
81
|
-
field=self._feature_field,
|
82
|
-
feature_name="feature",
|
83
|
-
using=self._using,
|
84
|
-
validated_only=False,
|
85
|
-
kwargs=self._kwargs,
|
86
|
-
)
|
87
|
-
|
88
|
-
# Register the rest of the columns based on validated_only
|
89
|
-
additional_columns = set(self._df.columns) - set(self.fields.keys())
|
90
|
-
if additional_columns:
|
91
|
-
register_labels(
|
92
|
-
values=list(additional_columns),
|
93
|
-
field=self._feature_field,
|
94
|
-
feature_name="feature",
|
95
|
-
using=self._using,
|
96
|
-
validated_only=validated_only,
|
97
|
-
df=self._df, # Get the Feature type from df
|
98
|
-
kwargs=self._kwargs,
|
99
|
-
)
|
100
|
-
|
101
|
-
def register_labels(self, feature: str, validated_only: bool = True, **kwargs):
|
102
|
-
"""Register labels for a feature.
|
103
|
-
|
104
|
-
Args:
|
105
|
-
feature: The name of the feature to register.
|
106
|
-
validated_only: Whether to register only validated labels.
|
107
|
-
**kwargs: Additional keyword arguments.
|
108
|
-
"""
|
109
|
-
if feature == "all":
|
110
|
-
self._register_labels_all(validated_only=validated_only, **kwargs)
|
111
|
-
elif feature == "feature":
|
112
|
-
self.register_features(validated_only=validated_only)
|
113
|
-
else:
|
114
|
-
if feature not in self.fields:
|
115
|
-
raise ValueError(f"Feature {feature} is not part of the fields!")
|
116
|
-
register_labels(
|
117
|
-
values=self._df[feature].unique().tolist(),
|
118
|
-
field=self.fields[feature],
|
119
|
-
feature_name=feature,
|
120
|
-
using=self._using,
|
121
|
-
validated_only=validated_only,
|
122
|
-
kwargs=kwargs,
|
123
|
-
)
|
124
|
-
|
125
|
-
def _register_labels_all(self, validated_only: bool = True, **kwargs):
|
126
|
-
"""Register labels for all features."""
|
127
|
-
for name in self.fields.keys():
|
128
|
-
logger.info(f"registering labels for '{name}'")
|
129
|
-
self.register_labels(feature=name, validated_only=validated_only, **kwargs)
|
130
|
-
|
131
|
-
def validate(self, **kwargs) -> bool:
|
132
|
-
"""Validate variables and categorical observations.
|
133
|
-
|
134
|
-
Returns:
|
135
|
-
Whether the DataFrame is validated.
|
136
|
-
"""
|
137
|
-
self._kwargs.update(kwargs)
|
138
|
-
self._validated = validate_categories_in_df(
|
139
|
-
self._df,
|
140
|
-
fields=self.fields,
|
141
|
-
using=self._using,
|
142
|
-
**self._kwargs,
|
143
|
-
)
|
144
|
-
return self._validated
|
145
|
-
|
146
|
-
def register_artifact(self, description: str, **kwargs) -> ln.Artifact:
|
147
|
-
"""Register the validated DataFrame and metadata.
|
148
|
-
|
149
|
-
Args:
|
150
|
-
description: Description of the DataFrame object.
|
151
|
-
**kwargs: Object level metadata.
|
152
|
-
|
153
|
-
Returns:
|
154
|
-
A registered artifact record.
|
155
|
-
"""
|
156
|
-
self._kwargs.update(kwargs)
|
157
|
-
if not self._validated:
|
158
|
-
raise ValidationError(
|
159
|
-
f"Data object is not validated, please run {colors.yellow('validate()')}!"
|
160
|
-
)
|
161
|
-
|
162
|
-
# Make sure all labels are registered in the current instance
|
163
|
-
verbosity = ln.settings.verbosity
|
164
|
-
try:
|
165
|
-
ln.settings.verbosity = "warning"
|
166
|
-
self.register_labels("all")
|
167
|
-
|
168
|
-
self._artifact = register_artifact(
|
169
|
-
self._df,
|
170
|
-
description=description,
|
171
|
-
fields=self.fields,
|
172
|
-
feature_field=self._feature_field,
|
173
|
-
**self._kwargs,
|
174
|
-
)
|
175
|
-
finally:
|
176
|
-
ln.settings.verbosity = verbosity
|
177
|
-
|
178
|
-
return self._artifact
|
179
|
-
|
180
|
-
def register_collection(
|
181
|
-
self,
|
182
|
-
artifact: ln.Artifact | Iterable[ln.Artifact],
|
183
|
-
name: str,
|
184
|
-
description: Optional[str] = None,
|
185
|
-
reference: Optional[str] = None,
|
186
|
-
reference_type: Optional[str] = None,
|
187
|
-
) -> ln.Collection:
|
188
|
-
"""Register a collection from artifact/artifacts.
|
189
|
-
|
190
|
-
Args:
|
191
|
-
artifact: One or several registered Artifacts.
|
192
|
-
name: Title of the publication.
|
193
|
-
description: Description of the publication.
|
194
|
-
reference: Accession number (e.g. GSE#, E-MTAB#, etc.).
|
195
|
-
reference_type: Source type (e.g. GEO, ArrayExpress, SRA, etc.).
|
196
|
-
"""
|
197
|
-
collection = ln.Collection(
|
198
|
-
artifact,
|
199
|
-
name=name,
|
200
|
-
description=description,
|
201
|
-
reference=reference,
|
202
|
-
reference_type=reference_type,
|
203
|
-
)
|
204
|
-
slug = ln.setup.settings.instance.slug
|
205
|
-
if collection._state.adding:
|
206
|
-
collection.save()
|
207
|
-
logger.success(f"registered collection in {colors.italic(slug)}")
|
208
|
-
else:
|
209
|
-
collection.save()
|
210
|
-
logger.warning(f"collection already exists in {colors.italic(slug)}!")
|
211
|
-
if ln.setup.settings.instance.is_remote:
|
212
|
-
logger.print(f"🔗 https://lamin.ai/{slug}/collection/{collection.uid}")
|
213
|
-
self._collection = collection
|
214
|
-
return collection
|
215
|
-
|
216
|
-
def clean_up_failed_runs(self):
|
217
|
-
"""Clean up previous failed runs that don't register any outputs."""
|
218
|
-
if ln.run_context.transform is not None:
|
219
|
-
ln.Run.filter(
|
220
|
-
transform=ln.run_context.transform, output_artifacts=None
|
221
|
-
).exclude(uid=ln.run_context.run.uid).delete()
|
File without changes
|
File without changes
|
File without changes
|