lamindb 0.69.1__py3-none-any.whl → 0.69.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,131 +0,0 @@
1
- from typing import Dict, Iterable, Optional
2
-
3
- import pandas as pd
4
- from anndata import AnnData
5
- from lamin_utils import colors, logger
6
- from lnschema_core import Registry
7
- from lnschema_core.types import FieldAttr
8
-
9
- from lamindb._from_values import _print_values
10
-
11
-
12
- def _registry_using(registry: Registry, using: Optional[str] = None) -> Registry:
13
- """Get a registry instance using a specific instance."""
14
- return (
15
- registry.using(using) if using is not None and using != "default" else registry
16
- )
17
-
18
-
19
- def check_if_registry_needs_organism(
20
- registry: Registry, organism: Optional[str] = None
21
- ):
22
- """Check if a registry needs an organism."""
23
- if hasattr(registry, "organism_id"):
24
- if organism is None:
25
- raise ValueError(
26
- f"{registry.__name__} registry requires an organism!\n"
27
- " → please pass an organism name via organism="
28
- )
29
- else:
30
- return True
31
- else:
32
- return False
33
-
34
-
35
- def validate_categories(
36
- values: Iterable[str],
37
- field: FieldAttr,
38
- feature_name: str,
39
- using: Optional[str] = None,
40
- **kwargs,
41
- ):
42
- """Validate ontology terms in a pandas series using LaminDB registries."""
43
- model_field = f"{field.field.model.__name__}.{field.field.name}"
44
- logger.indent = ""
45
- logger.info(
46
- f"inspecting '{colors.bold(feature_name)}' by {colors.italic(model_field)}"
47
- )
48
- logger.indent = " "
49
-
50
- registry = field.field.model
51
- filter_kwargs = {} # type: Dict[str, str]
52
- organism = kwargs.get("organism")
53
- if check_if_registry_needs_organism(registry, organism):
54
- filter_kwargs["organism"] = organism
55
- # inspect the default instance
56
- inspect_result = registry.inspect(values, field=field, mute=True, **filter_kwargs)
57
- non_validated = inspect_result.non_validated
58
- if using is not None and using != "default" and len(non_validated) > 0:
59
- registry = _registry_using(registry, using)
60
- # inspect the using instance
61
- inspect_result = registry.inspect(
62
- non_validated, field=field, mute=True, **filter_kwargs
63
- )
64
- non_validated = inspect_result.non_validated
65
-
66
- # if all terms are validated
67
- n_non_validated = len(non_validated)
68
- if n_non_validated == 0:
69
- validated = True
70
- logger.success(f"all {feature_name}s are validated")
71
- else:
72
- are = "are" if n_non_validated > 1 else "is"
73
- print_values = _print_values(non_validated)
74
- feature_name_print = f"`.register_labels('{feature_name}')`"
75
- warning_message = (
76
- f"{colors.yellow(f'{n_non_validated} terms')} {are} not validated: "
77
- f"{colors.yellow(print_values)}\n → register terms via "
78
- f"{colors.red(feature_name_print)}"
79
- )
80
- logger.warning(warning_message)
81
- validated = False
82
- logger.indent = ""
83
-
84
- return validated
85
-
86
-
87
- def validate_categories_in_df(
88
- df: pd.DataFrame,
89
- fields: Dict[str, FieldAttr],
90
- using: Optional[str] = None,
91
- **kwargs,
92
- ):
93
- """Validate categories in DataFrame columns using LaminDB registries."""
94
- # start validation
95
- validated = True
96
- for feature_name, field in fields.items():
97
- validated &= validate_categories(
98
- df[feature_name],
99
- field=field,
100
- feature_name=feature_name,
101
- using=using,
102
- **kwargs,
103
- )
104
- return validated
105
-
106
-
107
- def validate_anndata(
108
- adata: AnnData,
109
- var_field: FieldAttr,
110
- obs_fields: Dict[str, FieldAttr],
111
- using: Optional[str] = None,
112
- **kwargs,
113
- ) -> bool:
114
- """Inspect metadata in an AnnData object using LaminDB registries."""
115
- if using is not None and using != "default":
116
- logger.important(f"validating metadata using registries of instance `{using}`")
117
-
118
- validated_var = validate_categories(
119
- adata.var.index,
120
- field=var_field,
121
- feature_name="variables",
122
- using=using,
123
- **kwargs,
124
- )
125
- validated_obs = validate_categories_in_df(
126
- adata.obs,
127
- fields=obs_fields,
128
- using=using,
129
- **kwargs,
130
- )
131
- return validated_var & validated_obs
@@ -1,205 +0,0 @@
1
- from typing import Dict, Iterable, Optional
2
-
3
- import pandas as pd
4
- from lamin_utils import logger
5
- from lnschema_core.types import FieldAttr
6
-
7
- import lamindb as ln
8
-
9
- from ._lookup import Lookup
10
- from ._register import register_artifact, register_labels
11
- from ._validate import validate_categories_in_df
12
-
13
-
14
- class Validator:
15
- """Lamin validator.
16
-
17
- Args:
18
- df: The DataFrame object to validate.
19
- fields: A dictionary mapping column to registry_field.
20
- For example:
21
- {"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}
22
- using: The reference instance containing registries to validate against.
23
- verbosity: The verbosity level.
24
- """
25
-
26
- def __init__(
27
- self,
28
- df: pd.DataFrame,
29
- fields: Dict[str, FieldAttr],
30
- using: str = None,
31
- verbosity: str = "hint",
32
- **kwargs,
33
- ) -> None:
34
- """Validate an AnnData object."""
35
- self._df = df
36
- self._fields = fields
37
- self._using = using
38
- ln.settings.verbosity = verbosity
39
- self._artifact = None
40
- self._collection = None
41
- self._validated = False
42
- self._kwargs: Dict = {}
43
- self._add_kwargs(**kwargs)
44
- self._register_features()
45
-
46
- @property
47
- def fields(self) -> Dict:
48
- """Return the columns fields to validate against."""
49
- return self._fields
50
-
51
- def _add_kwargs(self, **kwargs):
52
- for k, v in kwargs.items():
53
- self._kwargs[k] = v
54
-
55
- def _register_features(self) -> None:
56
- """Register features records."""
57
- missing_columns = [i for i in self.fields.keys() if i not in self._df]
58
- if len(missing_columns) > 0:
59
- raise ValueError(
60
- f"columns {missing_columns} are not found in the AnnData object!"
61
- )
62
- register_labels(
63
- values=list(self.fields.keys()),
64
- field=ln.Feature.name,
65
- feature_name="feature",
66
- using=self._using,
67
- validated_only=False,
68
- )
69
-
70
- def _register_labels_all(self, validated_only: bool = True, **kwargs):
71
- """Register labels for all features."""
72
- for name in self.fields.keys():
73
- logger.info(f"registering labels for '{name}'")
74
- self.register_labels(feature=name, validated_only=validated_only, **kwargs)
75
-
76
- def lookup(self, using: Optional[str] = None) -> Lookup:
77
- """Lookup features and labels.
78
-
79
- Args:
80
- using: The instance where the lookup is performed.
81
- if None (default), the lookup is performed on the instance specified in "using" parameter of the Validator.
82
- if "public", the lookup is performed on the public reference.
83
- """
84
- fields = {**{"feature": ln.Feature.name}, **self.fields}
85
- return Lookup(fields=fields, using=using or self._using)
86
-
87
- def register_labels(self, feature: str, validated_only: bool = True, **kwargs):
88
- """Register labels records.
89
-
90
- Args:
91
- feature: The name of the feature to register.
92
- validated_only: Whether to register only validated labels.
93
- **kwargs: Additional keyword arguments.
94
- """
95
- if feature == "all":
96
- self._register_labels_all(validated_only=validated_only, **kwargs)
97
- else:
98
- if feature not in self.fields:
99
- raise ValueError(f"feature {feature} is not part of the fields!")
100
- register_labels(
101
- values=self._df[feature].unique().tolist(),
102
- field=self.fields.get(feature),
103
- feature_name=feature,
104
- using=self._using,
105
- validated_only=validated_only,
106
- kwargs=kwargs,
107
- )
108
-
109
- def validate(
110
- self,
111
- **kwargs,
112
- ) -> bool:
113
- """Validate variables and categorical observations.
114
-
115
- Returns:
116
- whether the AnnData object is validated
117
- """
118
- self._add_kwargs(**kwargs)
119
- self._validated = validate_categories_in_df(
120
- self._df,
121
- fields=self.fields,
122
- using=self._using,
123
- **self._kwargs,
124
- )
125
-
126
- return self._validated
127
-
128
- def register_artifact(
129
- self,
130
- description: str,
131
- **kwargs,
132
- ) -> ln.Artifact:
133
- """Register the validated AnnData and metadata.
134
-
135
- Args:
136
- description: description of the AnnData object
137
- **kwargs: object level metadata
138
-
139
- Returns:
140
- a registered artifact record
141
- """
142
- self._add_kwargs(**kwargs)
143
- if not self._validated:
144
- raise ValueError("please run `validate()` first!")
145
-
146
- # make sure all labels are registered in the current instance
147
- verbosity = ln.settings.verbosity
148
- try:
149
- ln.settings.verbosity = "warning"
150
- self.register_labels("all")
151
-
152
- self._artifact = register_artifact(
153
- self._df,
154
- description=description,
155
- fields=self.fields,
156
- **self._kwargs,
157
- )
158
- finally:
159
- ln.settings.verbosity = verbosity
160
-
161
- return self._artifact
162
-
163
- def register_collection(
164
- self,
165
- artifact: ln.Artifact | Iterable[ln.Artifact],
166
- name: str,
167
- description: Optional[str] = None,
168
- reference: Optional[str] = None,
169
- reference_type: Optional[str] = None,
170
- ) -> ln.Collection:
171
- """Register a collection from artifact/artifacts.
172
-
173
- Args:
174
- artifact: one or several registered Artifacts
175
- name: title of the publication
176
- description: description of the publication
177
- reference: accession number (e.g. GSE#, E-MTAB#, etc.)
178
- reference_type: source type (e.g. GEO, ArrayExpress, SRA, etc.)
179
- """
180
- collection = ln.Collection(
181
- artifact,
182
- name=name,
183
- description=description,
184
- reference=reference,
185
- reference_type=reference_type,
186
- )
187
- if collection._state.adding:
188
- collection.save()
189
- logger.print("🎉 registered collection in LaminDB!\n")
190
- else:
191
- collection.save()
192
- logger.warning("collection already exists in LaminDB!\n")
193
- if ln.setup.settings.instance.is_remote:
194
- logger.print(
195
- f"🔗 https://lamin.ai/{ln.setup.settings.instance.slug}/collection/{collection.uid}"
196
- )
197
- self._collection = collection
198
- return collection
199
-
200
- def clean_up_failed_runs(self):
201
- """Clean up previous failed runs that don't register any outputs."""
202
- if ln.run_context.transform is not None:
203
- ln.Run.filter(
204
- transform=ln.run_context.transform, output_artifacts=None
205
- ).exclude(uid=ln.run_context.run.uid).delete()
File without changes