lamindb 0.69.0__py3-none-any.whl → 0.69.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -163,28 +163,65 @@ def read_dataframe(elem: Union[h5py.Dataset, h5py.Group]):
163
163
 
164
164
  @registry.register("h5py")
165
165
  def safer_read_partial(elem, indices):
166
- if get_spec(elem).encoding_type == "":
167
- if isinstance(elem, h5py.Dataset):
166
+ is_dataset = isinstance(elem, h5py.Dataset)
167
+ indices_inverse: Optional[list] = None
168
+ encoding_type = get_spec(elem).encoding_type
169
+ # h5py selection for datasets requires sorted indices
170
+ if is_dataset or encoding_type == "dataframe":
171
+ indices_increasing = []
172
+ indices_inverse = []
173
+ for indices_dim in indices:
174
+ if isinstance(indices_dim, np.ndarray) and not np.all(
175
+ np.diff(indices_dim) > 0
176
+ ):
177
+ idx_unique, idx_inverse = np.unique(indices_dim, return_inverse=True)
178
+ indices_increasing.append(idx_unique)
179
+ indices_inverse.append(idx_inverse)
180
+ else:
181
+ indices_increasing.append(indices_dim)
182
+ indices_inverse.append(None)
183
+ indices = tuple(indices_increasing)
184
+ if all(idx is None for idx in indices_inverse):
185
+ indices_inverse = None
186
+ result = None
187
+ if encoding_type == "":
188
+ if is_dataset:
168
189
  dims = len(elem.shape)
169
190
  if dims == 2:
170
- return elem[indices]
191
+ result = elem[indices]
171
192
  elif dims == 1:
172
193
  if indices[0] == slice(None):
173
- return elem[indices[1]]
194
+ result = elem[indices[1]]
174
195
  elif indices[1] == slice(None):
175
- return elem[indices[0]]
196
+ result = elem[indices[0]]
176
197
  elif isinstance(elem, h5py.Group):
177
198
  try:
178
199
  ds = CSRDataset(elem)
179
- return _subset_sparse(ds, indices)
200
+ result = _subset_sparse(ds, indices)
180
201
  except Exception:
181
202
  pass
182
- raise ValueError(
183
- "Can not get a subset of the element of type"
184
- f" {type(elem).__name__} with an empty spec."
185
- )
203
+ if result is None:
204
+ raise ValueError(
205
+ "Can not get a subset of the element of type"
206
+ f" {type(elem).__name__} with an empty spec."
207
+ )
208
+ else:
209
+ result = read_elem_partial(elem, indices=indices)
210
+ if indices_inverse is None:
211
+ return result
186
212
  else:
187
- return read_elem_partial(elem, indices=indices)
213
+ if indices_inverse[0] is None:
214
+ if len(result.shape) == 2:
215
+ return result[:, indices_inverse[1]]
216
+ else:
217
+ return result[indices_inverse[1]]
218
+ elif indices_inverse[1] is None:
219
+ if isinstance(result, pd.DataFrame):
220
+ return result.iloc[indices_inverse[0]]
221
+ else:
222
+ return result[indices_inverse[0]]
223
+ else:
224
+ return result[tuple(indices_inverse)]
188
225
 
189
226
 
190
227
  @registry.register("h5py")
@@ -105,10 +105,7 @@ def read_adata_h5ad(filepath, **kwargs) -> ad.AnnData:
105
105
 
106
106
 
107
107
  def store_artifact(localpath: UPathStr, storagepath: UPath) -> None:
108
- """Store directory or file to configured storage location.
109
-
110
- Returns size in bytes.
111
- """
108
+ """Store directory or file to configured storage location."""
112
109
  localpath = Path(localpath)
113
110
  if not isinstance(storagepath, LocalPathClasses):
114
111
  # this uploads files and directories
@@ -200,9 +197,7 @@ def load_to_memory(filepath: UPathStr, stream: bool = False, **kwargs):
200
197
  """
201
198
  filepath = create_path(filepath)
202
199
 
203
- if filepath.suffix in (".zarr", ".zrad"):
204
- stream = True
205
- elif filepath.suffix != ".h5ad":
200
+ if filepath.suffix not in {".h5ad", ".zarr", ".zrad"}:
206
201
  stream = False
207
202
 
208
203
  if not stream:
@@ -0,0 +1,19 @@
1
+ """Validators built on LaminDB.
2
+
3
+ Import the package::
4
+
5
+ from lamindb.validation import Validator, AnnDataValidator
6
+
7
+ This is the complete API reference:
8
+
9
+ .. autosummary::
10
+ :toctree: .
11
+
12
+ Validator
13
+ AnnDataValidator
14
+ Lookup
15
+ """
16
+
17
+ from ._anndata_validator import AnnDataValidator
18
+ from ._lookup import Lookup
19
+ from ._validator import Validator
@@ -0,0 +1,117 @@
1
+ from typing import Dict, Optional
2
+
3
+ import anndata as ad
4
+ from lnschema_core.types import FieldAttr
5
+ from pandas.core.api import DataFrame as DataFrame
6
+
7
+ import lamindb as ln
8
+
9
+ from ._lookup import Lookup
10
+ from ._register import register_artifact, register_labels
11
+ from ._validate import validate_anndata
12
+ from ._validator import ValidationError, Validator
13
+
14
+
15
+ class AnnDataValidator(Validator):
16
+ """Lamin AnnData validator.
17
+
18
+ Args:
19
+ adata: The AnnData object to validate.
20
+ var_field: The registry field to validate variables index against.
21
+ obs_fields: A dictionary mapping obs_column to registry_field.
22
+ For example:
23
+ {"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}
24
+ using: The reference instance containing registries to validate against.
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ adata: ad.AnnData,
30
+ var_field: FieldAttr,
31
+ obs_fields: Dict[str, FieldAttr],
32
+ using: str = "default",
33
+ verbosity: str = "hint",
34
+ **kwargs,
35
+ ) -> None:
36
+ self._adata = adata
37
+ self._var_field = var_field
38
+ super().__init__(
39
+ df=self._adata.obs,
40
+ fields=obs_fields,
41
+ using=using,
42
+ verbosity=verbosity,
43
+ **kwargs,
44
+ )
45
+ self._obs_fields = obs_fields
46
+ self._register_variables()
47
+
48
+ @property
49
+ def var_field(self) -> FieldAttr:
50
+ """Return the registry field to validate variables index against."""
51
+ return self._var_field
52
+
53
+ @property
54
+ def obs_fields(self) -> Dict:
55
+ """Return the obs fields to validate against."""
56
+ return self._obs_fields
57
+
58
+ def lookup(self, using: Optional[str] = None) -> Lookup:
59
+ """Lookup features and labels."""
60
+ fields = {
61
+ **{"feature": ln.Feature.name, "variables": self.var_field},
62
+ **self.obs_fields,
63
+ }
64
+ return Lookup(fields=fields, using=using or self._using)
65
+
66
+ def _register_variables(self, validated_only: bool = True, **kwargs):
67
+ """Register variable records."""
68
+ self._kwargs.update(kwargs)
69
+ register_labels(
70
+ values=self._adata.var_names,
71
+ field=self.var_field,
72
+ feature_name="variables",
73
+ using=self._using,
74
+ validated_only=validated_only,
75
+ kwargs=self._kwargs,
76
+ )
77
+
78
+ def validate(self, **kwargs) -> bool:
79
+ """Validate variables and categorical observations."""
80
+ self._kwargs.update(kwargs)
81
+ self._validated = validate_anndata(
82
+ self._adata,
83
+ var_field=self.var_field,
84
+ obs_fields=self.obs_fields,
85
+ **self._kwargs,
86
+ )
87
+ return self._validated
88
+
89
+ def register_labels(self, feature: str, validated_only: bool = True, **kwargs):
90
+ """Register labels for a feature."""
91
+ if feature == "variables":
92
+ self._register_variables(validated_only=validated_only, **kwargs)
93
+ else:
94
+ super().register_labels(feature, validated_only, **kwargs)
95
+
96
+ def register_artifact(self, description: str, **kwargs) -> ln.Artifact:
97
+ """Register the validated AnnData and metadata.
98
+
99
+ Args:
100
+ description: Description of the AnnData object.
101
+ **kwargs: Object level metadata.
102
+
103
+ Returns:
104
+ A registered artifact record.
105
+ """
106
+ self._kwargs.update(kwargs)
107
+ if not self._validated:
108
+ raise ValidationError("Please run `validate()` first!")
109
+
110
+ self._artifact = register_artifact(
111
+ self._adata,
112
+ description=description,
113
+ feature_field=self.var_field,
114
+ fields=self.obs_fields,
115
+ **self._kwargs,
116
+ )
117
+ return self._artifact
@@ -0,0 +1,42 @@
1
+ from typing import Dict, Optional
2
+
3
+ from lamin_utils import colors, logger
4
+ from lnschema_core.types import FieldAttr
5
+
6
+ import lamindb as ln
7
+
8
+ from ._validate import get_registry_instance
9
+
10
+
11
+ class Lookup:
12
+ """Lookup features and labels from the reference instance."""
13
+
14
+ def __init__(
15
+ self, fields: Dict[str, FieldAttr], using: Optional[str] = None
16
+ ) -> None:
17
+ self._fields = fields
18
+ self._using = None if using == "default" else using
19
+ self._using_name = using or ln.setup.settings.instance.slug
20
+ logger.debug(f"Lookup objects from the {colors.italic(self._using_name)}")
21
+
22
+ def __getitem__(self, name):
23
+ if name in self._fields:
24
+ registry = self._fields[name].field.model
25
+ if self._using == "public":
26
+ return registry.public().lookup()
27
+ else:
28
+ return get_registry_instance(registry, self._using).lookup()
29
+ raise AttributeError(
30
+ f"'{self.__class__.__name__}' object has no attribute '{name}'"
31
+ )
32
+
33
+ def __repr__(self) -> str:
34
+ if len(self._fields) > 0:
35
+ fields = "\n ".join([str([key]) for key in self._fields.keys()])
36
+ return (
37
+ f"Lookup objects from the {colors.italic(self._using_name)}:\n {colors.green(fields)}\n\n"
38
+ "Example:\n → categories = validator.lookup().['cell_type']\n"
39
+ " → categories.alveolar_type_1_fibroblast_cell"
40
+ )
41
+ else:
42
+ return colors.warning("No fields are found!")
@@ -0,0 +1,265 @@
1
+ from typing import Dict, List, Optional, Tuple, Union
2
+
3
+ import anndata as ad
4
+ import pandas as pd
5
+ from lamin_utils import colors, logger
6
+ from lnschema_core.types import FieldAttr
7
+
8
+ import lamindb as ln
9
+
10
+ from ._validate import (
11
+ check_registry_organism,
12
+ get_registry_instance,
13
+ standardize_and_inspect,
14
+ )
15
+
16
+
17
+ def register_artifact(
18
+ data: Union[pd.DataFrame, ad.AnnData],
19
+ description: str,
20
+ fields: Dict[str, FieldAttr],
21
+ feature_field: FieldAttr,
22
+ **kwargs,
23
+ ) -> ln.Artifact:
24
+ """Register all metadata with an Artifact.
25
+
26
+ Args:
27
+ data: The DataFrame or AnnData object to register.
28
+ description: A description of the artifact.
29
+ fields: A dictionary mapping obs_column to registry_field.
30
+ feature_field: The registry field to validate variables index against.
31
+ kwargs: Additional keyword arguments to pass to the registry model.
32
+
33
+ Returns:
34
+ The registered Artifact.
35
+ """
36
+ if isinstance(data, ad.AnnData):
37
+ artifact = ln.Artifact.from_anndata(data, description=description)
38
+ artifact.n_observations = data.n_obs
39
+ elif isinstance(data, pd.DataFrame):
40
+ artifact = ln.Artifact.from_df(data, description=description)
41
+ else:
42
+ raise ValueError("data must be a DataFrame or AnnData object")
43
+ artifact.save()
44
+
45
+ feature_kwargs: Dict = {}
46
+ organism = check_registry_organism(
47
+ feature_field.field.model, kwargs.pop("organism", None)
48
+ )
49
+ if organism is not None:
50
+ feature_kwargs["organism"] = organism
51
+
52
+ if isinstance(data, ad.AnnData):
53
+ artifact.features.add_from_anndata(var_field=feature_field, **feature_kwargs)
54
+ else:
55
+ artifact.features.add_from_df(field=feature_field, **feature_kwargs)
56
+
57
+ features = ln.Feature.lookup().dict()
58
+ for feature_name, field in fields.items():
59
+ feature = features.get(feature_name)
60
+ registry = field.field.model
61
+ filter_kwargs = kwargs.copy()
62
+ organism = check_registry_organism(registry, organism)
63
+ if organism is not None:
64
+ filter_kwargs["organism"] = organism
65
+ df = data.obs if isinstance(data, ad.AnnData) else data
66
+ labels = registry.from_values(df[feature_name], field=field, **filter_kwargs)
67
+ artifact.labels.add(labels, feature)
68
+
69
+ slug = ln.setup.settings.instance.slug
70
+ logger.success(f"registered artifact in {colors.italic(slug)}")
71
+ if ln.setup.settings.instance.is_remote:
72
+ logger.info(f"🔗 https://lamin.ai/{slug}/artifact/{artifact.uid}")
73
+
74
+ return artifact
75
+
76
+
77
+ def register_labels(
78
+ values: List[str],
79
+ field: FieldAttr,
80
+ feature_name: str,
81
+ using: Optional[str] = None,
82
+ validated_only: bool = True,
83
+ kwargs: Optional[Dict] = None,
84
+ df: Optional[pd.DataFrame] = None,
85
+ ) -> None:
86
+ """Register features or labels records in the default instance from the using instance.
87
+
88
+ Args:
89
+ values: A list of values to be registered as labels.
90
+ field: The FieldAttr object representing the field for which labels are being registered.
91
+ feature_name: The name of the feature to register.
92
+ using: The name of the instance from which to transfer labels (if applicable).
93
+ validated_only: If True, only register validated labels.
94
+ kwargs: Additional keyword arguments to pass to the registry model.
95
+ df: A DataFrame to register labels from.
96
+ """
97
+ filter_kwargs = {} if kwargs is None else kwargs.copy()
98
+ registry = field.field.model
99
+ if registry == ln.ULabel:
100
+ validated_only = False
101
+
102
+ organism = check_registry_organism(registry, filter_kwargs.pop("organism", None))
103
+ if organism is not None:
104
+ filter_kwargs["organism"] = organism
105
+
106
+ verbosity = ln.settings.verbosity
107
+ try:
108
+ ln.settings.verbosity = "error"
109
+ inspect_result_current = standardize_and_inspect(
110
+ values=values, field=field, registry=registry, **filter_kwargs
111
+ )
112
+ if not inspect_result_current.non_validated:
113
+ ln.settings.verbosity = verbosity
114
+ return
115
+
116
+ labels_registered: Dict = {"from public": [], "without reference": []}
117
+
118
+ (
119
+ labels_registered[f"from {using}"],
120
+ non_validated_labels,
121
+ ) = register_labels_from_using_instance(
122
+ inspect_result_current.non_validated,
123
+ field=field,
124
+ using=using,
125
+ kwargs=filter_kwargs,
126
+ )
127
+
128
+ public_records = (
129
+ registry.from_values(non_validated_labels, field=field, **filter_kwargs)
130
+ if non_validated_labels
131
+ else []
132
+ )
133
+ ln.save(public_records)
134
+ labels_registered["from public"] = [
135
+ getattr(r, field.field.name) for r in public_records
136
+ ]
137
+ labels_registered["without reference"] = [
138
+ i for i in non_validated_labels if i not in labels_registered["from public"]
139
+ ]
140
+
141
+ if not validated_only:
142
+ non_validated_records = []
143
+ if df is not None and registry == ln.Feature:
144
+ non_validated_records = ln.Feature.from_df(df)
145
+ else:
146
+ if "organism" in filter_kwargs:
147
+ filter_kwargs["organism"] = _register_organism(name=organism)
148
+ for value in labels_registered["without reference"]:
149
+ filter_kwargs[field.field.name] = value
150
+ if registry == ln.Feature:
151
+ filter_kwargs["type"] = "category"
152
+ non_validated_records.append(registry(**filter_kwargs))
153
+ ln.save(non_validated_records)
154
+
155
+ if registry == ln.ULabel and field.field.name == "name":
156
+ register_ulabels_with_parent(values, field=field, feature_name=feature_name)
157
+ finally:
158
+ ln.settings.verbosity = verbosity
159
+
160
+ log_registered_labels(
161
+ labels_registered,
162
+ feature_name=feature_name,
163
+ model_field=f"{registry.__name__}.{field.field.name}",
164
+ validated_only=validated_only,
165
+ )
166
+
167
+
168
+ def log_registered_labels(
169
+ labels_registered: Dict,
170
+ feature_name: str,
171
+ model_field: str,
172
+ validated_only: bool = True,
173
+ ) -> None:
174
+ """Log the registered labels."""
175
+ labels_type = "features" if feature_name == "feature" else "labels"
176
+ model_field = colors.italic(model_field)
177
+ for key, labels in labels_registered.items():
178
+ if not labels:
179
+ continue
180
+
181
+ if key == "without reference" and validated_only:
182
+ msg = colors.yellow(
183
+ f"{len(labels)} non-validated {labels_type} are not registered with {model_field}: {labels}!"
184
+ )
185
+ lookup_print = f".lookup().['{feature_name}']"
186
+ msg += f"\n → to lookup categories, use {lookup_print}"
187
+ msg += (
188
+ f"\n → to register, run {colors.yellow('register_features(validated_only=False)')}"
189
+ if labels_type == "features"
190
+ else f"\n → to register, set {colors.yellow('validated_only=False')}"
191
+ )
192
+ logger.warning(msg)
193
+ else:
194
+ key = "" if key == "without reference" else f"{colors.green(key)} "
195
+ logger.success(
196
+ f"registered {len(labels)} {labels_type} {key}with {model_field}: {labels}"
197
+ )
198
+
199
+
200
+ def register_ulabels_with_parent(
201
+ values: List[str], field: FieldAttr, feature_name: str
202
+ ) -> None:
203
+ """Register a parent label for the given labels."""
204
+ registry = field.field.model
205
+ assert registry == ln.ULabel
206
+ all_records = registry.from_values(values, field=field)
207
+ is_feature = registry.filter(name=f"is_{feature_name}").one_or_none()
208
+ if is_feature is None:
209
+ is_feature = registry(name=f"is_{feature_name}")
210
+ is_feature.save()
211
+ is_feature.children.add(*all_records)
212
+
213
+
214
+ def register_labels_from_using_instance(
215
+ values: List[str],
216
+ field: FieldAttr,
217
+ using: Optional[str] = None,
218
+ kwargs: Optional[Dict] = None,
219
+ ) -> Tuple[List[str], List[str]]:
220
+ """Register features or labels records from the using instance.
221
+
222
+ Args:
223
+ values: A list of values to be registered as labels.
224
+ field: The FieldAttr object representing the field for which labels are being registered.
225
+ using: The name of the instance from which to transfer labels (if applicable).
226
+ kwargs: Additional keyword arguments to pass to the registry model.
227
+
228
+ Returns:
229
+ A tuple containing the list of registered labels and the list of non-registered labels.
230
+ """
231
+ kwargs = kwargs or {}
232
+ labels_registered = []
233
+ not_registered = values
234
+
235
+ if using is not None and using != "default":
236
+ registry = field.field.model
237
+ registry_using = get_registry_instance(registry, using)
238
+ inspect_result_using = standardize_and_inspect(
239
+ values=values, field=field, registry=registry_using, **kwargs
240
+ )
241
+ labels_using = registry_using.filter(
242
+ **{f"{field.field.name}__in": inspect_result_using.validated}
243
+ ).all()
244
+ for label_using in labels_using:
245
+ label_using.save()
246
+ labels_registered.append(getattr(label_using, field.field.name))
247
+ not_registered = inspect_result_using.non_validated
248
+
249
+ return labels_registered, not_registered
250
+
251
+
252
+ def _register_organism(name: str):
253
+ """Register an organism record."""
254
+ import bionty as bt
255
+
256
+ organism = bt.Organism.filter(name=name).one_or_none()
257
+ if organism is None:
258
+ organism = bt.Organism.from_public(name=name)
259
+ if organism is None:
260
+ raise ValueError(
261
+ f"Organism '{name}' not found\n"
262
+ f" → please register it: bt.Organism(name='{name}').save()"
263
+ )
264
+ organism.save()
265
+ return organism
@@ -0,0 +1,139 @@
1
+ from typing import Dict, Iterable, Optional
2
+
3
+ import pandas as pd
4
+ from anndata import AnnData
5
+ from lamin_utils import colors, logger
6
+ from lnschema_core import Registry
7
+ from lnschema_core.types import FieldAttr
8
+
9
+ from lamindb._from_values import _print_values
10
+
11
+
12
+ def get_registry_instance(registry: Registry, using: Optional[str] = None) -> Registry:
13
+ """Get a registry instance using a specific instance."""
14
+ if using is not None and using != "default":
15
+ return registry.using(using)
16
+ return registry
17
+
18
+
19
+ def standardize_and_inspect(
20
+ values: Iterable[str], field: FieldAttr, registry: Registry, **kwargs
21
+ ):
22
+ """Standardize and inspect values using a registry."""
23
+ if hasattr(registry, "standardize"):
24
+ values = registry.standardize(values, field=field, mute=True, **kwargs)
25
+ return registry.inspect(values, field=field, mute=True, **kwargs)
26
+
27
+
28
+ def check_registry_organism(
29
+ registry: Registry, organism: Optional[str] = None
30
+ ) -> Optional[str]:
31
+ """Check if a registry needs an organism and return the organism name."""
32
+ if hasattr(registry, "organism_id"):
33
+ import bionty as bt
34
+
35
+ if organism is None and bt.settings.organism is None:
36
+ raise ValueError(
37
+ f"{registry.__name__} registry requires an organism!\n"
38
+ " → please pass an organism name via organism="
39
+ )
40
+ return organism or bt.settings.organism.name
41
+ return None
42
+
43
+
44
+ def validate_categories(
45
+ values: Iterable[str],
46
+ field: FieldAttr,
47
+ feature_name: str,
48
+ using: Optional[str] = None,
49
+ **kwargs,
50
+ ) -> bool:
51
+ """Validate ontology terms in a pandas series using LaminDB registries."""
52
+ model_field = f"{field.field.model.__name__}.{field.field.name}"
53
+ logger.indent = ""
54
+ logger.info(
55
+ f"inspecting '{colors.bold(feature_name)}' by {colors.italic(model_field)}"
56
+ )
57
+ logger.indent = " "
58
+
59
+ registry = field.field.model
60
+ filter_kwargs = {}
61
+ organism = check_registry_organism(registry, kwargs.get("organism"))
62
+ if organism is not None:
63
+ filter_kwargs["organism"] = organism
64
+
65
+ # Inspect the default instance
66
+ inspect_result = standardize_and_inspect(
67
+ values=values, field=field, registry=registry, **filter_kwargs
68
+ )
69
+ non_validated = inspect_result.non_validated
70
+
71
+ if using is not None and using != "default" and non_validated:
72
+ registry = get_registry_instance(registry, using)
73
+ # Inspect the using instance
74
+ inspect_result = standardize_and_inspect(
75
+ values=non_validated, field=field, registry=registry, **filter_kwargs
76
+ )
77
+ non_validated = inspect_result.non_validated
78
+
79
+ n_non_validated = len(non_validated)
80
+ if n_non_validated == 0:
81
+ logger.success(f"all {feature_name}s are validated")
82
+ return True
83
+ else:
84
+ are = "are" if n_non_validated > 1 else "is"
85
+ print_values = _print_values(non_validated)
86
+ feature_name_print = f".register_labels('{feature_name}')"
87
+ warning_message = (
88
+ f"{colors.yellow(f'{n_non_validated} terms')} {are} not validated: "
89
+ f"{colors.yellow(print_values)}\n → register terms via "
90
+ f"{colors.yellow(feature_name_print)}"
91
+ )
92
+ logger.warning(warning_message)
93
+ logger.indent = ""
94
+ return False
95
+
96
+
97
+ def validate_categories_in_df(
98
+ df: pd.DataFrame,
99
+ fields: Dict[str, FieldAttr],
100
+ using: Optional[str] = None,
101
+ **kwargs,
102
+ ) -> bool:
103
+ """Validate categories in DataFrame columns using LaminDB registries."""
104
+ validated = True
105
+ for feature_name, field in fields.items():
106
+ validated &= validate_categories(
107
+ df[feature_name],
108
+ field=field,
109
+ feature_name=feature_name,
110
+ using=using,
111
+ **kwargs,
112
+ )
113
+ return validated
114
+
115
+
116
+ def validate_anndata(
117
+ adata: AnnData,
118
+ var_field: FieldAttr,
119
+ obs_fields: Dict[str, FieldAttr],
120
+ using: Optional[str] = None,
121
+ **kwargs,
122
+ ) -> bool:
123
+ """Inspect metadata in an AnnData object using LaminDB registries."""
124
+ if using is not None and using != "default":
125
+ logger.important(
126
+ f"validating metadata using registries of instance {colors.italic(using)}"
127
+ )
128
+
129
+ validated_var = validate_categories(
130
+ adata.var.index,
131
+ field=var_field,
132
+ feature_name="variables",
133
+ using=using,
134
+ **kwargs,
135
+ )
136
+ validated_obs = validate_categories_in_df(
137
+ adata.obs, fields=obs_fields, using=using, **kwargs
138
+ )
139
+ return validated_var and validated_obs