lamindb 0.69.4__py3-none-any.whl → 0.69.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_annotate.py +244 -173
- lamindb/_finish.py +4 -3
- lamindb/integrations/_vitessce.py +4 -1
- {lamindb-0.69.4.dist-info → lamindb-0.69.5.dist-info}/METADATA +4 -4
- {lamindb-0.69.4.dist-info → lamindb-0.69.5.dist-info}/RECORD +8 -8
- {lamindb-0.69.4.dist-info → lamindb-0.69.5.dist-info}/LICENSE +0 -0
- {lamindb-0.69.4.dist-info → lamindb-0.69.5.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
lamindb/_annotate.py
CHANGED
@@ -15,15 +15,34 @@ class ValidationError(ValueError):
|
|
15
15
|
|
16
16
|
|
17
17
|
class AnnotateLookup:
|
18
|
-
"""Lookup
|
18
|
+
"""Lookup categories from the reference instance."""
|
19
19
|
|
20
20
|
def __init__(
|
21
|
-
self,
|
21
|
+
self,
|
22
|
+
categorials: Dict[str, FieldAttr],
|
23
|
+
slots: Dict[str, FieldAttr] = None,
|
24
|
+
using: Optional[str] = None,
|
22
25
|
) -> None:
|
23
|
-
|
26
|
+
if slots is None:
|
27
|
+
slots = {}
|
28
|
+
if slots is None:
|
29
|
+
slots = {}
|
30
|
+
self._fields = {**categorials, **slots}
|
24
31
|
self._using = None if using == "default" else using
|
25
|
-
self._using_name =
|
26
|
-
|
32
|
+
self._using_name = self._using or ln_setup.settings.instance.slug
|
33
|
+
debug_message = f"Lookup objects from the " f"{colors.italic(self._using_name)}"
|
34
|
+
logger.debug(debug_message)
|
35
|
+
|
36
|
+
def __getattr__(self, name):
|
37
|
+
if name in self._fields:
|
38
|
+
registry = self._fields[name].field.model
|
39
|
+
if self._using == "public":
|
40
|
+
return registry.public().lookup()
|
41
|
+
else:
|
42
|
+
return get_registry_instance(registry, self._using).lookup()
|
43
|
+
raise AttributeError(
|
44
|
+
f"'{self.__class__.__name__}' object has no attribute '{name}'"
|
45
|
+
)
|
27
46
|
|
28
47
|
def __getitem__(self, name):
|
29
48
|
if name in self._fields:
|
@@ -38,10 +57,17 @@ class AnnotateLookup:
|
|
38
57
|
|
39
58
|
def __repr__(self) -> str:
|
40
59
|
if len(self._fields) > 0:
|
41
|
-
|
60
|
+
getattr_keys = "\n ".join(
|
61
|
+
[f".{key}" for key in self._fields if key.isidentifier()]
|
62
|
+
)
|
63
|
+
getitem_keys = "\n ".join(
|
64
|
+
[str([key]) for key in self._fields if not key.isidentifier()]
|
65
|
+
)
|
42
66
|
return (
|
43
|
-
f"Lookup objects from the {colors.italic(self._using_name)}:\n
|
44
|
-
"
|
67
|
+
f"Lookup objects from the {colors.italic(self._using_name)}:\n "
|
68
|
+
f"{colors.green(getattr_keys)}\n "
|
69
|
+
f"{colors.green(getitem_keys)}\n\n"
|
70
|
+
"Example:\n → categories = validator.lookup().cell_type\n"
|
45
71
|
" → categories.alveolar_type_1_fibroblast_cell"
|
46
72
|
)
|
47
73
|
else:
|
@@ -53,10 +79,10 @@ class DataFrameAnnotator:
|
|
53
79
|
|
54
80
|
Args:
|
55
81
|
df: The DataFrame object to annotate.
|
56
|
-
|
82
|
+
columns: The field attribute for the feature column.
|
83
|
+
categoricals: A dictionary mapping column names to registry_field.
|
57
84
|
For example:
|
58
|
-
{"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}
|
59
|
-
feature_field: The field attribute for the feature column.
|
85
|
+
``{"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}``.
|
60
86
|
using: The reference instance containing registries to validate against.
|
61
87
|
verbosity: The verbosity level.
|
62
88
|
"""
|
@@ -64,8 +90,8 @@ class DataFrameAnnotator:
|
|
64
90
|
def __init__(
|
65
91
|
self,
|
66
92
|
df: pd.DataFrame,
|
67
|
-
|
68
|
-
|
93
|
+
columns: FieldAttr = Feature.name,
|
94
|
+
categoricals: Optional[Dict[str, FieldAttr]] = None,
|
69
95
|
using: Optional[str] = None,
|
70
96
|
verbosity: str = "hint",
|
71
97
|
**kwargs,
|
@@ -73,15 +99,15 @@ class DataFrameAnnotator:
|
|
73
99
|
from lamindb.core._settings import settings
|
74
100
|
|
75
101
|
self._df = df
|
76
|
-
self._fields =
|
77
|
-
self.
|
102
|
+
self._fields = categoricals or {}
|
103
|
+
self._columns_field = columns
|
78
104
|
self._using = using
|
79
105
|
settings.verbosity = verbosity
|
80
106
|
self._artifact = None
|
81
107
|
self._collection = None
|
82
108
|
self._validated = False
|
83
109
|
self._kwargs: Dict = kwargs
|
84
|
-
self.
|
110
|
+
self._save_columns()
|
85
111
|
|
86
112
|
@property
|
87
113
|
def fields(self) -> Dict:
|
@@ -96,69 +122,89 @@ class DataFrameAnnotator:
|
|
96
122
|
if None (default), the lookup is performed on the instance specified in "using" parameter of the validator.
|
97
123
|
if "public", the lookup is performed on the public reference.
|
98
124
|
"""
|
99
|
-
|
100
|
-
|
125
|
+
return AnnotateLookup(
|
126
|
+
categorials=self._fields,
|
127
|
+
slots={"columns": self._columns_field},
|
128
|
+
using=using or self._using,
|
129
|
+
)
|
101
130
|
|
102
|
-
def
|
103
|
-
"""
|
131
|
+
def _save_columns(self, validated_only: bool = True) -> None:
|
132
|
+
"""Save column name records."""
|
104
133
|
missing_columns = set(self.fields.keys()) - set(self._df.columns)
|
105
134
|
if missing_columns:
|
106
135
|
raise ValueError(
|
107
136
|
f"Columns {missing_columns} are not found in the data object!"
|
108
137
|
)
|
109
138
|
|
110
|
-
# Always
|
139
|
+
# Always save features specified as the fields keys
|
111
140
|
update_registry(
|
112
141
|
values=list(self.fields.keys()),
|
113
|
-
field=self.
|
114
|
-
|
142
|
+
field=self._columns_field,
|
143
|
+
key="columns",
|
144
|
+
save_function="add_new_from_columns",
|
115
145
|
using=self._using,
|
116
146
|
validated_only=False,
|
117
147
|
kwargs=self._kwargs,
|
118
148
|
)
|
119
149
|
|
120
|
-
#
|
150
|
+
# Save the rest of the columns based on validated_only
|
121
151
|
additional_columns = set(self._df.columns) - set(self.fields.keys())
|
122
152
|
if additional_columns:
|
123
153
|
update_registry(
|
124
154
|
values=list(additional_columns),
|
125
|
-
field=self.
|
126
|
-
|
155
|
+
field=self._columns_field,
|
156
|
+
key="columns",
|
157
|
+
save_function="add_new_from_columns",
|
127
158
|
using=self._using,
|
128
159
|
validated_only=validated_only,
|
129
160
|
df=self._df, # Get the Feature type from df
|
130
161
|
kwargs=self._kwargs,
|
131
162
|
)
|
132
163
|
|
133
|
-
def
|
134
|
-
"""
|
164
|
+
def add_validated_from(self, key: str, **kwargs):
|
165
|
+
"""Add validated categories.
|
166
|
+
|
167
|
+
Args:
|
168
|
+
key: The key referencing the slot in the DataFrame.
|
169
|
+
**kwargs: Additional keyword arguments.
|
170
|
+
"""
|
171
|
+
self._update_registry(key, validated_only=True, **kwargs)
|
172
|
+
|
173
|
+
def add_new_from(self, key: str, **kwargs):
|
174
|
+
"""Add validated & new categories.
|
135
175
|
|
136
176
|
Args:
|
137
|
-
|
138
|
-
validated_only: Whether to register only validated labels.
|
177
|
+
key: The key referencing the slot in the DataFrame from which to draw terms.
|
139
178
|
**kwargs: Additional keyword arguments.
|
140
179
|
"""
|
141
|
-
|
180
|
+
self._update_registry(key, validated_only=False, **kwargs)
|
181
|
+
|
182
|
+
def add_new_from_columns(self, **kwargs):
|
183
|
+
"""Add validated & new column names to its registry."""
|
184
|
+
self._save_columns(validated_only=False, **kwargs)
|
185
|
+
|
186
|
+
def _update_registry(self, categorical: str, validated_only: bool = True, **kwargs):
|
187
|
+
if categorical == "all":
|
142
188
|
self._update_registry_all(validated_only=validated_only, **kwargs)
|
143
|
-
elif
|
144
|
-
self.
|
189
|
+
elif categorical == "columns":
|
190
|
+
self._save_columns(validated_only=validated_only)
|
145
191
|
else:
|
146
|
-
if
|
147
|
-
raise ValueError(f"Feature {
|
192
|
+
if categorical not in self.fields:
|
193
|
+
raise ValueError(f"Feature {categorical} is not part of the fields!")
|
148
194
|
update_registry(
|
149
|
-
values=self._df[
|
150
|
-
field=self.fields[
|
151
|
-
|
195
|
+
values=self._df[categorical].unique().tolist(),
|
196
|
+
field=self.fields[categorical],
|
197
|
+
key=categorical,
|
152
198
|
using=self._using,
|
153
199
|
validated_only=validated_only,
|
154
200
|
kwargs=kwargs,
|
155
201
|
)
|
156
202
|
|
157
203
|
def _update_registry_all(self, validated_only: bool = True, **kwargs):
|
158
|
-
"""
|
204
|
+
"""Save labels for all features."""
|
159
205
|
for name in self.fields.keys():
|
160
|
-
logger.info(f"
|
161
|
-
self.
|
206
|
+
logger.info(f"saving labels for '{name}'")
|
207
|
+
self._update_registry(name, validated_only=validated_only, **kwargs)
|
162
208
|
|
163
209
|
def validate(self, **kwargs) -> bool:
|
164
210
|
"""Validate variables and categorical observations.
|
@@ -176,14 +222,14 @@ class DataFrameAnnotator:
|
|
176
222
|
return self._validated
|
177
223
|
|
178
224
|
def save_artifact(self, description: str, **kwargs) -> Artifact:
|
179
|
-
"""
|
225
|
+
"""Save the validated DataFrame and metadata.
|
180
226
|
|
181
227
|
Args:
|
182
228
|
description: Description of the DataFrame object.
|
183
229
|
**kwargs: Object level metadata.
|
184
230
|
|
185
231
|
Returns:
|
186
|
-
A
|
232
|
+
A saved artifact record.
|
187
233
|
"""
|
188
234
|
from lamindb.core._settings import settings
|
189
235
|
|
@@ -193,17 +239,18 @@ class DataFrameAnnotator:
|
|
193
239
|
f"Data object is not validated, please run {colors.yellow('validate()')}!"
|
194
240
|
)
|
195
241
|
|
196
|
-
# Make sure all labels are
|
242
|
+
# Make sure all labels are saved in the current instance
|
197
243
|
verbosity = settings.verbosity
|
198
244
|
try:
|
199
245
|
settings.verbosity = "warning"
|
200
|
-
|
246
|
+
# save all validated records to the current instance
|
247
|
+
self.add_validated_from("all")
|
201
248
|
|
202
249
|
self._artifact = save_artifact(
|
203
250
|
self._df,
|
204
251
|
description=description,
|
205
252
|
fields=self.fields,
|
206
|
-
|
253
|
+
columns_field=self._columns_field,
|
207
254
|
**self._kwargs,
|
208
255
|
)
|
209
256
|
finally:
|
@@ -219,10 +266,10 @@ class DataFrameAnnotator:
|
|
219
266
|
reference: Optional[str] = None,
|
220
267
|
reference_type: Optional[str] = None,
|
221
268
|
) -> Collection:
|
222
|
-
"""
|
269
|
+
"""Save a collection from artifact/artifacts.
|
223
270
|
|
224
271
|
Args:
|
225
|
-
artifact: One or several
|
272
|
+
artifact: One or several saved Artifacts.
|
226
273
|
name: Title of the publication.
|
227
274
|
description: Description of the publication.
|
228
275
|
reference: Accession number (e.g. GSE#, E-MTAB#, etc.).
|
@@ -238,17 +285,16 @@ class DataFrameAnnotator:
|
|
238
285
|
slug = ln_setup.settings.instance.slug
|
239
286
|
if collection._state.adding:
|
240
287
|
collection.save()
|
241
|
-
logger.success(f"registered collection in {colors.italic(slug)}")
|
242
288
|
else:
|
243
289
|
collection.save()
|
244
290
|
logger.warning(f"collection already exists in {colors.italic(slug)}!")
|
245
291
|
if ln_setup.settings.instance.is_remote:
|
246
|
-
logger.print(f"
|
292
|
+
logger.print(f"go to https://lamin.ai/{slug}/collection/{collection.uid}")
|
247
293
|
self._collection = collection
|
248
294
|
return collection
|
249
295
|
|
250
296
|
def clean_up_failed_runs(self):
|
251
|
-
"""Clean up previous failed runs that don't
|
297
|
+
"""Clean up previous failed runs that don't save any outputs."""
|
252
298
|
from lamindb.core._run_context import run_context
|
253
299
|
|
254
300
|
if run_context.transform is not None:
|
@@ -258,95 +304,93 @@ class DataFrameAnnotator:
|
|
258
304
|
|
259
305
|
|
260
306
|
class AnnDataAnnotator(DataFrameAnnotator):
|
261
|
-
"""Annotation flow for an AnnData object.
|
307
|
+
"""Annotation flow for an ``AnnData`` object.
|
262
308
|
|
263
309
|
Args:
|
264
310
|
adata: The AnnData object to annotate.
|
265
|
-
|
266
|
-
|
311
|
+
var_index: The registry field for mapping the ``.var`` index.
|
312
|
+
categoricals: A dictionary mapping ``.obs.columns`` to a registry field.
|
267
313
|
For example:
|
268
|
-
{"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}
|
269
|
-
using:
|
314
|
+
``{"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}``
|
315
|
+
using: A reference LaminDB instance.
|
270
316
|
"""
|
271
317
|
|
272
318
|
def __init__(
|
273
319
|
self,
|
274
320
|
adata: ad.AnnData,
|
275
|
-
|
276
|
-
|
321
|
+
var_index: FieldAttr,
|
322
|
+
categoricals: Dict[str, FieldAttr],
|
277
323
|
using: str = "default",
|
278
324
|
verbosity: str = "hint",
|
279
325
|
**kwargs,
|
280
326
|
) -> None:
|
281
327
|
self._adata = adata
|
282
|
-
self._var_field =
|
328
|
+
self._var_field = var_index
|
283
329
|
super().__init__(
|
284
330
|
df=self._adata.obs,
|
285
|
-
|
331
|
+
categoricals=categoricals,
|
286
332
|
using=using,
|
287
333
|
verbosity=verbosity,
|
288
334
|
**kwargs,
|
289
335
|
)
|
290
|
-
self._obs_fields =
|
291
|
-
self.
|
336
|
+
self._obs_fields = categoricals
|
337
|
+
self._save_from_var_index()
|
292
338
|
|
293
339
|
@property
|
294
|
-
def
|
340
|
+
def var_index(self) -> FieldAttr:
|
295
341
|
"""Return the registry field to validate variables index against."""
|
296
342
|
return self._var_field
|
297
343
|
|
298
344
|
@property
|
299
|
-
def
|
345
|
+
def categoricals(self) -> Dict:
|
300
346
|
"""Return the obs fields to validate against."""
|
301
347
|
return self._obs_fields
|
302
348
|
|
303
349
|
def lookup(self, using: Optional[str] = None) -> AnnotateLookup:
|
304
350
|
"""Lookup features and labels."""
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
def
|
312
|
-
"""
|
351
|
+
return AnnotateLookup(
|
352
|
+
categorials=self._obs_fields,
|
353
|
+
slots={"columns": self._columns_field, "var_index": self._var_field},
|
354
|
+
using=using or self._using,
|
355
|
+
)
|
356
|
+
|
357
|
+
def _save_from_var_index(self, validated_only: bool = True, **kwargs):
|
358
|
+
"""Save variable records."""
|
313
359
|
self._kwargs.update(kwargs)
|
314
360
|
update_registry(
|
315
|
-
values=self._adata.
|
316
|
-
field=self.
|
317
|
-
|
361
|
+
values=self._adata.var.index,
|
362
|
+
field=self.var_index,
|
363
|
+
key="var_index",
|
364
|
+
save_function="add_new_from_var_index",
|
318
365
|
using=self._using,
|
319
366
|
validated_only=validated_only,
|
320
367
|
kwargs=self._kwargs,
|
321
368
|
)
|
322
369
|
|
370
|
+
def add_new_from_var_index(self, **kwargs):
|
371
|
+
"""Update variable records."""
|
372
|
+
self._save_from_var_index(validated_only=False, **kwargs)
|
373
|
+
|
323
374
|
def validate(self, **kwargs) -> bool:
|
324
|
-
"""Validate
|
375
|
+
"""Validate categories."""
|
325
376
|
self._kwargs.update(kwargs)
|
326
377
|
self._validated = validate_anndata(
|
327
378
|
self._adata,
|
328
|
-
var_field=self.
|
329
|
-
obs_fields=self.
|
379
|
+
var_field=self.var_index,
|
380
|
+
obs_fields=self.categoricals,
|
330
381
|
**self._kwargs,
|
331
382
|
)
|
332
383
|
return self._validated
|
333
384
|
|
334
|
-
def update_registry(self, feature: str, validated_only: bool = True, **kwargs):
|
335
|
-
"""Register labels for a feature."""
|
336
|
-
if feature == "variables":
|
337
|
-
self._save_variables(validated_only=validated_only, **kwargs)
|
338
|
-
else:
|
339
|
-
super().update_registry(feature, validated_only, **kwargs)
|
340
|
-
|
341
385
|
def save_artifact(self, description: str, **kwargs) -> Artifact:
|
342
|
-
"""
|
386
|
+
"""Save the validated AnnData and metadata.
|
343
387
|
|
344
388
|
Args:
|
345
389
|
description: Description of the AnnData object.
|
346
390
|
**kwargs: Object level metadata.
|
347
391
|
|
348
392
|
Returns:
|
349
|
-
A
|
393
|
+
A saved artifact record.
|
350
394
|
"""
|
351
395
|
self._kwargs.update(kwargs)
|
352
396
|
if not self._validated:
|
@@ -355,8 +399,8 @@ class AnnDataAnnotator(DataFrameAnnotator):
|
|
355
399
|
self._artifact = save_artifact(
|
356
400
|
self._adata,
|
357
401
|
description=description,
|
358
|
-
|
359
|
-
fields=self.
|
402
|
+
columns_field=self.var_index,
|
403
|
+
fields=self.categoricals,
|
360
404
|
**self._kwargs,
|
361
405
|
)
|
362
406
|
return self._artifact
|
@@ -369,16 +413,16 @@ class Annotate:
|
|
369
413
|
def from_df(
|
370
414
|
cls,
|
371
415
|
df: pd.DataFrame,
|
372
|
-
|
373
|
-
|
416
|
+
categoricals: Optional[Dict[str, FieldAttr]] = None,
|
417
|
+
columns: FieldAttr = Feature.name,
|
374
418
|
using: Optional[str] = None,
|
375
419
|
verbosity: str = "hint",
|
376
420
|
**kwargs,
|
377
421
|
) -> DataFrameAnnotator:
|
378
422
|
return DataFrameAnnotator(
|
379
423
|
df=df,
|
380
|
-
|
381
|
-
|
424
|
+
categoricals=categoricals,
|
425
|
+
columns=columns,
|
382
426
|
using=using,
|
383
427
|
verbosity=verbosity,
|
384
428
|
**kwargs,
|
@@ -388,16 +432,16 @@ class Annotate:
|
|
388
432
|
def from_anndata(
|
389
433
|
cls,
|
390
434
|
adata: ad.AnnData,
|
391
|
-
|
392
|
-
|
435
|
+
var_index: FieldAttr,
|
436
|
+
categoricals: Dict[str, FieldAttr],
|
393
437
|
using: str = "default",
|
394
438
|
verbosity: str = "hint",
|
395
439
|
**kwargs,
|
396
440
|
) -> AnnDataAnnotator:
|
397
441
|
return AnnDataAnnotator(
|
398
442
|
adata=adata,
|
399
|
-
|
400
|
-
|
443
|
+
var_index=var_index,
|
444
|
+
categoricals=categoricals,
|
401
445
|
using=using,
|
402
446
|
verbosity=verbosity,
|
403
447
|
**kwargs,
|
@@ -439,18 +483,17 @@ def check_registry_organism(
|
|
439
483
|
def validate_categories(
|
440
484
|
values: Iterable[str],
|
441
485
|
field: FieldAttr,
|
442
|
-
|
486
|
+
key: str,
|
443
487
|
using: Optional[str] = None,
|
444
488
|
**kwargs,
|
445
489
|
) -> bool:
|
446
490
|
"""Validate ontology terms in a pandas series using LaminDB registries."""
|
447
491
|
from lamindb._from_values import _print_values
|
492
|
+
from lamindb.core._settings import settings
|
448
493
|
|
449
494
|
model_field = f"{field.field.model.__name__}.{field.field.name}"
|
450
495
|
logger.indent = ""
|
451
|
-
logger.info(
|
452
|
-
f"inspecting '{colors.bold(feature_name)}' by {colors.italic(model_field)}"
|
453
|
-
)
|
496
|
+
logger.info(f"mapping {colors.italic(key)} on {colors.italic(model_field)}")
|
454
497
|
logger.indent = " "
|
455
498
|
|
456
499
|
registry = field.field.model
|
@@ -465,6 +508,7 @@ def validate_categories(
|
|
465
508
|
)
|
466
509
|
non_validated = inspect_result.non_validated
|
467
510
|
|
511
|
+
values_validated = []
|
468
512
|
if using is not None and using != "default" and non_validated:
|
469
513
|
registry = get_registry_instance(registry, using)
|
470
514
|
# Inspect the using instance
|
@@ -472,19 +516,42 @@ def validate_categories(
|
|
472
516
|
values=non_validated, field=field, registry=registry, **filter_kwargs
|
473
517
|
)
|
474
518
|
non_validated = inspect_result.non_validated
|
519
|
+
values_validated += inspect_result.validated
|
475
520
|
|
521
|
+
# Inspect from public (bionty only)
|
522
|
+
if hasattr(registry, "public"):
|
523
|
+
verbosity = settings.verbosity
|
524
|
+
try:
|
525
|
+
settings.verbosity = "error"
|
526
|
+
public_records = registry.from_values(
|
527
|
+
non_validated, field=field, **filter_kwargs
|
528
|
+
)
|
529
|
+
values_validated += [getattr(r, field.field.name) for r in public_records]
|
530
|
+
finally:
|
531
|
+
settings.verbosity = verbosity
|
532
|
+
|
533
|
+
validated_hint_print = f".add_validated_from('{key}')"
|
534
|
+
n_validated = len(values_validated)
|
535
|
+
if n_validated > 0:
|
536
|
+
logger.warning(
|
537
|
+
f"found {colors.yellow(f'{n_validated} terms')} validated terms: "
|
538
|
+
f"{colors.yellow(values_validated)}\n → save terms via "
|
539
|
+
f"{colors.yellow(validated_hint_print)}"
|
540
|
+
)
|
541
|
+
|
542
|
+
non_validated_hint_print = f".add_new_from('{key}')"
|
543
|
+
non_validated = [i for i in non_validated if i not in values_validated]
|
476
544
|
n_non_validated = len(non_validated)
|
477
545
|
if n_non_validated == 0:
|
478
|
-
logger.success(f"
|
546
|
+
logger.success(f"{key} validated")
|
479
547
|
return True
|
480
548
|
else:
|
481
549
|
are = "are" if n_non_validated > 1 else "is"
|
482
550
|
print_values = _print_values(non_validated)
|
483
|
-
feature_name_print = f".update_registry('{feature_name}')"
|
484
551
|
warning_message = (
|
485
552
|
f"{colors.yellow(f'{n_non_validated} terms')} {are} not validated: "
|
486
|
-
f"{colors.yellow(print_values)}\n →
|
487
|
-
f"{colors.yellow(
|
553
|
+
f"{colors.yellow(print_values)}\n → save terms via "
|
554
|
+
f"{colors.yellow(non_validated_hint_print)}"
|
488
555
|
)
|
489
556
|
logger.warning(warning_message)
|
490
557
|
logger.indent = ""
|
@@ -499,11 +566,11 @@ def validate_categories_in_df(
|
|
499
566
|
) -> bool:
|
500
567
|
"""Validate categories in DataFrame columns using LaminDB registries."""
|
501
568
|
validated = True
|
502
|
-
for
|
569
|
+
for key, field in fields.items():
|
503
570
|
validated &= validate_categories(
|
504
|
-
df[
|
571
|
+
df[key],
|
505
572
|
field=field,
|
506
|
-
|
573
|
+
key=key,
|
507
574
|
using=using,
|
508
575
|
**kwargs,
|
509
576
|
)
|
@@ -526,7 +593,7 @@ def validate_anndata(
|
|
526
593
|
validated_var = validate_categories(
|
527
594
|
adata.var.index,
|
528
595
|
field=var_field,
|
529
|
-
|
596
|
+
key="var_index",
|
530
597
|
using=using,
|
531
598
|
**kwargs,
|
532
599
|
)
|
@@ -540,20 +607,20 @@ def save_artifact(
|
|
540
607
|
data: Union[pd.DataFrame, ad.AnnData],
|
541
608
|
description: str,
|
542
609
|
fields: Dict[str, FieldAttr],
|
543
|
-
|
610
|
+
columns_field: FieldAttr,
|
544
611
|
**kwargs,
|
545
612
|
) -> Artifact:
|
546
|
-
"""
|
613
|
+
"""Save all metadata with an Artifact.
|
547
614
|
|
548
615
|
Args:
|
549
|
-
data: The DataFrame or AnnData object to
|
616
|
+
data: The DataFrame or AnnData object to save.
|
550
617
|
description: A description of the artifact.
|
551
618
|
fields: A dictionary mapping obs_column to registry_field.
|
552
|
-
|
619
|
+
columns_field: The registry field to validate variables index against.
|
553
620
|
kwargs: Additional keyword arguments to pass to the registry model.
|
554
621
|
|
555
622
|
Returns:
|
556
|
-
The
|
623
|
+
The saved Artifact.
|
557
624
|
"""
|
558
625
|
if isinstance(data, ad.AnnData):
|
559
626
|
artifact = Artifact.from_anndata(data, description=description)
|
@@ -566,63 +633,61 @@ def save_artifact(
|
|
566
633
|
|
567
634
|
feature_kwargs: Dict = {}
|
568
635
|
organism = check_registry_organism(
|
569
|
-
|
636
|
+
columns_field.field.model, kwargs.pop("organism", None)
|
570
637
|
)
|
571
638
|
if organism is not None:
|
572
639
|
feature_kwargs["organism"] = organism
|
573
640
|
|
574
641
|
if isinstance(data, ad.AnnData):
|
575
|
-
artifact.features.add_from_anndata(var_field=
|
642
|
+
artifact.features.add_from_anndata(var_field=columns_field, **feature_kwargs)
|
576
643
|
else:
|
577
|
-
artifact.features.add_from_df(field=
|
644
|
+
artifact.features.add_from_df(field=columns_field, **feature_kwargs)
|
578
645
|
|
579
646
|
features = Feature.lookup().dict()
|
580
|
-
for
|
581
|
-
feature = features.get(
|
647
|
+
for key, field in fields.items():
|
648
|
+
feature = features.get(key)
|
582
649
|
registry = field.field.model
|
583
650
|
filter_kwargs = kwargs.copy()
|
584
651
|
organism = check_registry_organism(registry, organism)
|
585
652
|
if organism is not None:
|
586
653
|
filter_kwargs["organism"] = organism
|
587
654
|
df = data.obs if isinstance(data, ad.AnnData) else data
|
588
|
-
labels = registry.from_values(df[
|
655
|
+
labels = registry.from_values(df[key], field=field, **filter_kwargs)
|
589
656
|
artifact.labels.add(labels, feature)
|
590
657
|
|
591
658
|
slug = ln_setup.settings.instance.slug
|
592
|
-
logger.success(f"registered artifact in {colors.italic(slug)}")
|
593
659
|
if ln_setup.settings.instance.is_remote:
|
594
|
-
logger.
|
595
|
-
|
660
|
+
logger.important(f"go to https://lamin.ai/{slug}/artifact/{artifact.uid}")
|
596
661
|
return artifact
|
597
662
|
|
598
663
|
|
599
664
|
def update_registry(
|
600
665
|
values: List[str],
|
601
666
|
field: FieldAttr,
|
602
|
-
|
667
|
+
key: str,
|
668
|
+
save_function: str = "add_new_from",
|
603
669
|
using: Optional[str] = None,
|
604
670
|
validated_only: bool = True,
|
605
671
|
kwargs: Optional[Dict] = None,
|
606
672
|
df: Optional[pd.DataFrame] = None,
|
607
673
|
) -> None:
|
608
|
-
"""
|
674
|
+
"""Save features or labels records in the default instance from the using instance.
|
609
675
|
|
610
676
|
Args:
|
611
|
-
values: A list of values to be
|
612
|
-
field: The FieldAttr object representing the field for which labels are being
|
613
|
-
|
677
|
+
values: A list of values to be saved as labels.
|
678
|
+
field: The FieldAttr object representing the field for which labels are being saved.
|
679
|
+
key: The name of the feature to save.
|
680
|
+
save_function: The name of the function to save the labels.
|
614
681
|
using: The name of the instance from which to transfer labels (if applicable).
|
615
|
-
validated_only: If True, only
|
682
|
+
validated_only: If True, only save validated labels.
|
616
683
|
kwargs: Additional keyword arguments to pass to the registry model.
|
617
|
-
df: A DataFrame to
|
684
|
+
df: A DataFrame to save labels from.
|
618
685
|
"""
|
619
686
|
from lamindb._save import save as ln_save
|
620
687
|
from lamindb.core._settings import settings
|
621
688
|
|
622
689
|
filter_kwargs = {} if kwargs is None else kwargs.copy()
|
623
690
|
registry = field.field.model
|
624
|
-
if registry == ULabel:
|
625
|
-
validated_only = False
|
626
691
|
|
627
692
|
organism = check_registry_organism(registry, filter_kwargs.pop("organism", None))
|
628
693
|
if organism is not None:
|
@@ -638,10 +703,10 @@ def update_registry(
|
|
638
703
|
settings.verbosity = verbosity
|
639
704
|
return
|
640
705
|
|
641
|
-
|
706
|
+
labels_saved: Dict = {"from public": [], "without reference": []}
|
642
707
|
|
643
708
|
(
|
644
|
-
|
709
|
+
labels_saved[f"from {using}"],
|
645
710
|
non_validated_labels,
|
646
711
|
) = update_registry_from_using_instance(
|
647
712
|
inspect_result_current.non_validated,
|
@@ -656,11 +721,11 @@ def update_registry(
|
|
656
721
|
else []
|
657
722
|
)
|
658
723
|
ln_save(public_records)
|
659
|
-
|
724
|
+
labels_saved["from public"] = [
|
660
725
|
getattr(r, field.field.name) for r in public_records
|
661
726
|
]
|
662
|
-
|
663
|
-
i for i in non_validated_labels if i not in
|
727
|
+
labels_saved["without reference"] = [
|
728
|
+
i for i in non_validated_labels if i not in labels_saved["from public"]
|
664
729
|
]
|
665
730
|
|
666
731
|
if not validated_only:
|
@@ -670,7 +735,7 @@ def update_registry(
|
|
670
735
|
else:
|
671
736
|
if "organism" in filter_kwargs:
|
672
737
|
filter_kwargs["organism"] = _save_organism(name=organism)
|
673
|
-
for value in
|
738
|
+
for value in labels_saved["without reference"]:
|
674
739
|
filter_kwargs[field.field.name] = value
|
675
740
|
if registry == Feature:
|
676
741
|
filter_kwargs["type"] = "category"
|
@@ -678,60 +743,66 @@ def update_registry(
|
|
678
743
|
ln_save(non_validated_records)
|
679
744
|
|
680
745
|
if registry == ULabel and field.field.name == "name":
|
681
|
-
save_ulabels_with_parent(values, field=field,
|
746
|
+
save_ulabels_with_parent(values, field=field, key=key)
|
682
747
|
finally:
|
683
748
|
settings.verbosity = verbosity
|
684
749
|
|
685
|
-
|
686
|
-
|
687
|
-
|
750
|
+
log_saved_labels(
|
751
|
+
labels_saved,
|
752
|
+
key=key,
|
753
|
+
save_function=save_function,
|
688
754
|
model_field=f"{registry.__name__}.{field.field.name}",
|
689
755
|
validated_only=validated_only,
|
690
756
|
)
|
691
757
|
|
692
758
|
|
693
|
-
def
|
694
|
-
|
695
|
-
|
759
|
+
def log_saved_labels(
|
760
|
+
labels_saved: Dict,
|
761
|
+
key: str,
|
762
|
+
save_function: str,
|
696
763
|
model_field: str,
|
697
764
|
validated_only: bool = True,
|
698
765
|
) -> None:
|
699
|
-
"""Log the
|
700
|
-
labels_type = "features" if feature_name == "feature" else "labels"
|
766
|
+
"""Log the saved labels."""
|
701
767
|
model_field = colors.italic(model_field)
|
702
|
-
for
|
768
|
+
for k, labels in labels_saved.items():
|
703
769
|
if not labels:
|
704
770
|
continue
|
705
771
|
|
706
|
-
if
|
772
|
+
if k == "without reference" and validated_only:
|
707
773
|
msg = colors.yellow(
|
708
|
-
f"{len(labels)} non-validated
|
774
|
+
f"{len(labels)} non-validated categories are not saved in {model_field}: {labels}!"
|
775
|
+
)
|
776
|
+
lookup_print = (
|
777
|
+
f"lookup().{key}" if key.isidentifier() else f".lookup()['{key}']"
|
709
778
|
)
|
710
|
-
|
779
|
+
|
780
|
+
hint = f".add_new_from('{key}')"
|
711
781
|
msg += f"\n → to lookup categories, use {lookup_print}"
|
712
782
|
msg += (
|
713
|
-
f"\n → to
|
714
|
-
if
|
715
|
-
else f"\n → to
|
783
|
+
f"\n → to save, run {colors.yellow(hint)}"
|
784
|
+
if save_function == "add_new_from"
|
785
|
+
else f"\n → to save, run {colors.yellow(save_function)}"
|
716
786
|
)
|
717
787
|
logger.warning(msg)
|
718
788
|
else:
|
719
|
-
|
789
|
+
k = "" if k == "without reference" else f"{colors.green(k)} "
|
790
|
+
# the term "transferred" stresses that this is always in the context of transferring
|
791
|
+
# labels from a public ontology or a different instance to the present instance
|
792
|
+
s = "s" if len(labels) > 1 else ""
|
720
793
|
logger.success(
|
721
|
-
f"
|
794
|
+
f"added {len(labels)} record{s} {k}with {model_field} for {colors.italic(key)}: {labels}"
|
722
795
|
)
|
723
796
|
|
724
797
|
|
725
|
-
def save_ulabels_with_parent(
|
726
|
-
|
727
|
-
) -> None:
|
728
|
-
"""Register a parent label for the given labels."""
|
798
|
+
def save_ulabels_with_parent(values: List[str], field: FieldAttr, key: str) -> None:
|
799
|
+
"""Save a parent label for the given labels."""
|
729
800
|
registry = field.field.model
|
730
801
|
assert registry == ULabel
|
731
802
|
all_records = registry.from_values(values, field=field)
|
732
|
-
is_feature = registry.filter(name=f"is_{
|
803
|
+
is_feature = registry.filter(name=f"is_{key}").one_or_none()
|
733
804
|
if is_feature is None:
|
734
|
-
is_feature = registry(name=f"is_{
|
805
|
+
is_feature = registry(name=f"is_{key}")
|
735
806
|
is_feature.save()
|
736
807
|
is_feature.children.add(*all_records)
|
737
808
|
|
@@ -742,20 +813,20 @@ def update_registry_from_using_instance(
|
|
742
813
|
using: Optional[str] = None,
|
743
814
|
kwargs: Optional[Dict] = None,
|
744
815
|
) -> Tuple[List[str], List[str]]:
|
745
|
-
"""
|
816
|
+
"""Save features or labels records from the using instance.
|
746
817
|
|
747
818
|
Args:
|
748
|
-
values: A list of values to be
|
749
|
-
field: The FieldAttr object representing the field for which labels are being
|
819
|
+
values: A list of values to be saved as labels.
|
820
|
+
field: The FieldAttr object representing the field for which labels are being saved.
|
750
821
|
using: The name of the instance from which to transfer labels (if applicable).
|
751
822
|
kwargs: Additional keyword arguments to pass to the registry model.
|
752
823
|
|
753
824
|
Returns:
|
754
|
-
A tuple containing the list of
|
825
|
+
A tuple containing the list of saved labels and the list of non-saved labels.
|
755
826
|
"""
|
756
827
|
kwargs = kwargs or {}
|
757
|
-
|
758
|
-
|
828
|
+
labels_saved = []
|
829
|
+
not_saved = values
|
759
830
|
|
760
831
|
if using is not None and using != "default":
|
761
832
|
registry = field.field.model
|
@@ -768,14 +839,14 @@ def update_registry_from_using_instance(
|
|
768
839
|
).all()
|
769
840
|
for label_using in labels_using:
|
770
841
|
label_using.save()
|
771
|
-
|
772
|
-
|
842
|
+
labels_saved.append(getattr(label_using, field.field.name))
|
843
|
+
not_saved = inspect_result_using.non_validated
|
773
844
|
|
774
|
-
return
|
845
|
+
return labels_saved, not_saved
|
775
846
|
|
776
847
|
|
777
848
|
def _save_organism(name: str):
|
778
|
-
"""
|
849
|
+
"""Save an organism record."""
|
779
850
|
import bionty as bt
|
780
851
|
|
781
852
|
organism = bt.Organism.filter(name=name).one_or_none()
|
@@ -784,7 +855,7 @@ def _save_organism(name: str):
|
|
784
855
|
if organism is None:
|
785
856
|
raise ValueError(
|
786
857
|
f"Organism '{name}' not found\n"
|
787
|
-
f" → please
|
858
|
+
f" → please save it: bt.Organism(name='{name}').save()"
|
788
859
|
)
|
789
860
|
organism.save()
|
790
861
|
return organism
|
lamindb/_finish.py
CHANGED
@@ -8,6 +8,7 @@ from typing import Optional
|
|
8
8
|
import lamindb_setup as ln_setup
|
9
9
|
from lamin_utils import logger
|
10
10
|
from lnschema_core import Run, Transform
|
11
|
+
from lnschema_core.types import TransformType
|
11
12
|
|
12
13
|
from ._query_set import QuerySet
|
13
14
|
from .core._run_context import is_run_from_ipython, run_context
|
@@ -69,7 +70,7 @@ def save_run_context_core(
|
|
69
70
|
|
70
71
|
ln.settings.verbosity = "success"
|
71
72
|
|
72
|
-
if transform.type ==
|
73
|
+
if transform.type == TransformType.notebook:
|
73
74
|
try:
|
74
75
|
import nbstripout
|
75
76
|
from nbproject.dev import (
|
@@ -187,7 +188,7 @@ def save_run_context_core(
|
|
187
188
|
run.environment = artifact
|
188
189
|
logger.success(f"saved run.environment: {run.environment}")
|
189
190
|
# save report file
|
190
|
-
if not transform.type ==
|
191
|
+
if not transform.type == TransformType.notebook:
|
191
192
|
run.save()
|
192
193
|
else:
|
193
194
|
if run.report_id is not None:
|
@@ -212,7 +213,7 @@ def save_run_context_core(
|
|
212
213
|
run.save()
|
213
214
|
transform.latest_report = run.report
|
214
215
|
transform.save()
|
215
|
-
if transform.type ==
|
216
|
+
if transform.type == TransformType.notebook:
|
216
217
|
logger.success(f"saved transform.latest_report: {transform.latest_report}")
|
217
218
|
identifier = ln_setup.settings.instance.slug
|
218
219
|
logger.success(f"go to: https://lamin.ai/{identifier}/transform/{transform.uid}")
|
@@ -26,11 +26,14 @@ def save_vitessce_config(vitessce_config, description: str) -> Artifact:
|
|
26
26
|
artifact = Artifact(vitesse_export, description=description)
|
27
27
|
artifact.save()
|
28
28
|
config_dict = vitessce_config.to_dict(base_url=artifact.path.to_url())
|
29
|
+
logger.important(f"base url: {artifact.path.to_url()}")
|
29
30
|
config_filename = "vitessce_config.json"
|
30
31
|
config_file_local_path = f"{vitesse_export}/{config_filename}"
|
31
32
|
with open(config_file_local_path, "w") as file:
|
32
33
|
json.dump(config_dict, file)
|
33
|
-
|
34
|
+
config_file_path = artifact.path / config_filename
|
35
|
+
config_file_path.upload_from(config_file_local_path)
|
36
|
+
logger.important(f"config url: {config_file_path.to_url()}")
|
34
37
|
slug = ln_setup.settings.instance.slug
|
35
38
|
logger.important(f"go to: https://lamin.ai/{slug}/artifact/{artifact.uid}")
|
36
39
|
return artifact
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lamindb
|
3
|
-
Version: 0.69.
|
3
|
+
Version: 0.69.5
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.8
|
@@ -9,10 +9,10 @@ Classifier: Programming Language :: Python :: 3.8
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
10
10
|
Classifier: Programming Language :: Python :: 3.10
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
12
|
-
Requires-Dist: lnschema_core==0.64.
|
13
|
-
Requires-Dist: lamindb_setup==0.68.
|
12
|
+
Requires-Dist: lnschema_core==0.64.4
|
13
|
+
Requires-Dist: lamindb_setup==0.68.3
|
14
14
|
Requires-Dist: lamin_utils==0.13.1
|
15
|
-
Requires-Dist: lamin_cli==0.
|
15
|
+
Requires-Dist: lamin_cli==0.11.0
|
16
16
|
Requires-Dist: rapidfuzz
|
17
17
|
Requires-Dist: pyarrow
|
18
18
|
Requires-Dist: typing_extensions!=4.6.0
|
@@ -1,12 +1,12 @@
|
|
1
|
-
lamindb/__init__.py,sha256=
|
2
|
-
lamindb/_annotate.py,sha256=
|
1
|
+
lamindb/__init__.py,sha256=cWnU0vX4YUkRJTNOMNyjJHRQzTAWQDAj5R7Vp_6EB4M,2163
|
2
|
+
lamindb/_annotate.py,sha256=mM-GCej7i9eUH0cU5AcxWZ916k8NRI41WF84dfjJfu4,29955
|
3
3
|
lamindb/_artifact.py,sha256=RV36tcHMZ6wH6u65jOAQ_H4rfmFiIzZmAr8IY7kFhm0,35817
|
4
4
|
lamindb/_can_validate.py,sha256=w7lrUGTWldpvwaRiXBRrjfU_ZRidA7CooOu_r5MbocY,14569
|
5
5
|
lamindb/_collection.py,sha256=SdNNhhMh2O4q0hG4Hf_y1bcwcbkMF_sqk6MIYc-hLZo,14525
|
6
6
|
lamindb/_feature.py,sha256=ahRv87q1tcRLQ0UM5FA3KtcMQvIjW__fZq1yAdRAV7s,6728
|
7
7
|
lamindb/_feature_set.py,sha256=G_Ss6mKh4D0Eji-xSfLRbKVFXwgUE82YOqIUmkV0CAA,8767
|
8
8
|
lamindb/_filter.py,sha256=_PjyQWQBR3ohDAvJbR3hMvZ-2p2GvzFxLfKGC-gPnHI,1320
|
9
|
-
lamindb/_finish.py,sha256=
|
9
|
+
lamindb/_finish.py,sha256=8lfJzRedTDCA_XXBUf4ECOevpPhVxKqMMj9qgVkmF8M,8672
|
10
10
|
lamindb/_from_values.py,sha256=Ei11ml77Q1xubVekt2C4-mbox2-qnC7kP18B-LhCdSc,11886
|
11
11
|
lamindb/_is_versioned.py,sha256=DXp5t-1DwErpqqMc9eb08kpQPCHOC2fNzaozMoBunR4,1337
|
12
12
|
lamindb/_parents.py,sha256=pTDsW8HjQ_txFbPKrBU0WjjtCNH6sx2LASUuGWpJuYE,14742
|
@@ -45,10 +45,10 @@ lamindb/core/storage/_zarr.py,sha256=bMQSCsTOCtQy4Yo3KwCVpbUkKdWRApN9FM1rM-d2_G0
|
|
45
45
|
lamindb/core/storage/file.py,sha256=WTeC4ENn_O6HEoinmTviB89W81UrJT3bSGtnpqPpIyE,7242
|
46
46
|
lamindb/core/storage/object.py,sha256=MPUb2M8Fleq2j9x1Ryqr3BETmvsDKyf11Ifvbxd3NpA,1097
|
47
47
|
lamindb/integrations/__init__.py,sha256=aH2PmO2m4-vwIifMYTB0Fyyr_gZWtVnV71jT0tVWSw0,123
|
48
|
-
lamindb/integrations/_vitessce.py,sha256=
|
48
|
+
lamindb/integrations/_vitessce.py,sha256=n85g8YRP8Y2sfU5DPJdbU84BGPrTfU3Dg2jStdmBBRI,1637
|
49
49
|
lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
|
50
50
|
lamindb/setup/core/__init__.py,sha256=LqIIvJNcONxkqjbnP6CUaP4d45Lbd6TSMAcXFp4C7_8,231
|
51
|
-
lamindb-0.69.
|
52
|
-
lamindb-0.69.
|
53
|
-
lamindb-0.69.
|
54
|
-
lamindb-0.69.
|
51
|
+
lamindb-0.69.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
52
|
+
lamindb-0.69.5.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
53
|
+
lamindb-0.69.5.dist-info/METADATA,sha256=meDN6DOhRAnUt2jbDcykj9Hclhos_uyO_UbtSPNukG4,2856
|
54
|
+
lamindb-0.69.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|