lamindb 1.1.0__py3-none-any.whl → 1.2a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +31 -26
- lamindb/_finish.py +9 -1
- lamindb/_tracked.py +26 -3
- lamindb/_view.py +2 -3
- lamindb/base/__init__.py +1 -1
- lamindb/base/ids.py +1 -10
- lamindb/base/users.py +1 -4
- lamindb/core/__init__.py +7 -65
- lamindb/core/_context.py +41 -10
- lamindb/core/_mapped_collection.py +4 -2
- lamindb/core/_settings.py +6 -6
- lamindb/core/_sync_git.py +1 -1
- lamindb/core/_track_environment.py +2 -1
- lamindb/core/datasets/_small.py +3 -3
- lamindb/core/loaders.py +22 -9
- lamindb/core/storage/_anndata_accessor.py +8 -3
- lamindb/core/storage/_backed_access.py +14 -7
- lamindb/core/storage/_pyarrow_dataset.py +24 -9
- lamindb/core/storage/_tiledbsoma.py +6 -4
- lamindb/core/storage/_zarr.py +32 -11
- lamindb/core/storage/objects.py +59 -26
- lamindb/core/storage/paths.py +16 -13
- lamindb/curators/__init__.py +173 -145
- lamindb/errors.py +1 -1
- lamindb/integrations/_vitessce.py +4 -4
- lamindb/migrations/0089_subsequent_runs.py +159 -0
- lamindb/migrations/0090_runproject_project_runs.py +73 -0
- lamindb/migrations/{0088_squashed.py → 0090_squashed.py} +245 -177
- lamindb/models/__init__.py +79 -0
- lamindb/{core → models}/_describe.py +3 -3
- lamindb/{core → models}/_django.py +8 -5
- lamindb/{core → models}/_feature_manager.py +103 -87
- lamindb/{_from_values.py → models/_from_values.py} +5 -2
- lamindb/{core/versioning.py → models/_is_versioned.py} +94 -6
- lamindb/{core → models}/_label_manager.py +10 -17
- lamindb/{core/relations.py → models/_relations.py} +8 -1
- lamindb/models/artifact.py +2601 -0
- lamindb/{_can_curate.py → models/can_curate.py} +349 -180
- lamindb/models/collection.py +683 -0
- lamindb/models/core.py +135 -0
- lamindb/models/feature.py +643 -0
- lamindb/models/flextable.py +163 -0
- lamindb/{_parents.py → models/has_parents.py} +55 -49
- lamindb/models/project.py +384 -0
- lamindb/{_query_manager.py → models/query_manager.py} +10 -8
- lamindb/{_query_set.py → models/query_set.py} +52 -30
- lamindb/models/record.py +1757 -0
- lamindb/models/run.py +563 -0
- lamindb/{_save.py → models/save.py} +18 -8
- lamindb/models/schema.py +732 -0
- lamindb/models/transform.py +360 -0
- lamindb/models/ulabel.py +249 -0
- {lamindb-1.1.0.dist-info → lamindb-1.2a2.dist-info}/METADATA +5 -5
- lamindb-1.2a2.dist-info/RECORD +94 -0
- lamindb/_artifact.py +0 -1361
- lamindb/_collection.py +0 -440
- lamindb/_feature.py +0 -316
- lamindb/_is_versioned.py +0 -40
- lamindb/_record.py +0 -1065
- lamindb/_run.py +0 -60
- lamindb/_schema.py +0 -347
- lamindb/_storage.py +0 -15
- lamindb/_transform.py +0 -170
- lamindb/_ulabel.py +0 -56
- lamindb/_utils.py +0 -9
- lamindb/base/validation.py +0 -63
- lamindb/core/_data.py +0 -491
- lamindb/core/fields.py +0 -12
- lamindb/models.py +0 -4435
- lamindb-1.1.0.dist-info/RECORD +0 -95
- {lamindb-1.1.0.dist-info → lamindb-1.2a2.dist-info}/LICENSE +0 -0
- {lamindb-1.1.0.dist-info → lamindb-1.2a2.dist-info}/WHEEL +0 -0
lamindb/curators/__init__.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
"""Curators.
|
2
2
|
|
3
|
+
.. versionadded:: 1.1.0
|
4
|
+
|
3
5
|
.. autosummary::
|
4
6
|
:toctree: .
|
5
7
|
|
@@ -7,12 +9,23 @@
|
|
7
9
|
DataFrameCurator
|
8
10
|
AnnDataCurator
|
9
11
|
|
12
|
+
CatManager:
|
13
|
+
|
14
|
+
.. autosummary::
|
15
|
+
:toctree: .
|
16
|
+
|
17
|
+
CatManager
|
18
|
+
DataFrameCatManager
|
19
|
+
AnnDataCatManager
|
20
|
+
MuDataCatManager
|
21
|
+
TiledbsomaCatManager
|
22
|
+
CurateLookup
|
23
|
+
|
10
24
|
"""
|
11
25
|
|
12
26
|
from __future__ import annotations
|
13
27
|
|
14
28
|
import copy
|
15
|
-
import random
|
16
29
|
import re
|
17
30
|
from importlib import resources
|
18
31
|
from itertools import chain
|
@@ -38,14 +51,10 @@ if TYPE_CHECKING:
|
|
38
51
|
|
39
52
|
from lamindb.base.types import FieldAttr
|
40
53
|
from lamindb.models import Record
|
41
|
-
from lamindb._feature import parse_dtype, parse_dtype_single_cat
|
42
54
|
from lamindb.base.types import FieldAttr # noqa
|
43
|
-
from lamindb.core._data import add_labels
|
44
|
-
from lamindb.core._feature_manager import parse_staged_feature_sets_from_anndata
|
45
55
|
from lamindb.core._settings import settings
|
46
56
|
from lamindb.models import (
|
47
57
|
Artifact,
|
48
|
-
CanCurate,
|
49
58
|
Collection,
|
50
59
|
Feature,
|
51
60
|
Record,
|
@@ -53,9 +62,11 @@ from lamindb.models import (
|
|
53
62
|
Schema,
|
54
63
|
ULabel,
|
55
64
|
)
|
65
|
+
from lamindb.models._feature_manager import parse_staged_feature_sets_from_anndata
|
66
|
+
from lamindb.models.artifact import add_labels, data_is_anndata
|
67
|
+
from lamindb.models.feature import parse_dtype, parse_dtype_single_cat
|
68
|
+
from lamindb.models._from_values import _format_values
|
56
69
|
|
57
|
-
from .._artifact import data_is_anndata
|
58
|
-
from .._from_values import _format_values
|
59
70
|
from ..errors import InvalidArgument, ValidationError
|
60
71
|
|
61
72
|
if TYPE_CHECKING:
|
@@ -66,7 +77,7 @@ if TYPE_CHECKING:
|
|
66
77
|
from mudata import MuData
|
67
78
|
from spatialdata import SpatialData
|
68
79
|
|
69
|
-
from lamindb.
|
80
|
+
from lamindb.models.query_set import RecordList
|
70
81
|
|
71
82
|
|
72
83
|
def strip_ansi_codes(text):
|
@@ -146,6 +157,12 @@ class CurateLookup:
|
|
146
157
|
CAT_MANAGER_DOCSTRING = """Manage categoricals by updating registries."""
|
147
158
|
|
148
159
|
|
160
|
+
SLOTS_DOCSTRING = """Curator objects by slot.
|
161
|
+
|
162
|
+
.. versionadded:: 1.1.1
|
163
|
+
"""
|
164
|
+
|
165
|
+
|
149
166
|
VALIDATE_DOCSTRING = """Validate dataset.
|
150
167
|
|
151
168
|
Raises:
|
@@ -170,6 +187,8 @@ class Curator:
|
|
170
187
|
|
171
188
|
A `Curator` object makes it easy to validate, standardize & annotate datasets.
|
172
189
|
|
190
|
+
.. versionadded:: 1.1.0
|
191
|
+
|
173
192
|
See:
|
174
193
|
- :class:`~lamindb.curators.DataFrameCurator`
|
175
194
|
- :class:`~lamindb.curators.AnnDataCurator`
|
@@ -212,6 +231,8 @@ class DataFrameCurator(Curator):
|
|
212
231
|
|
213
232
|
See also :class:`~lamindb.Curator` and :class:`~lamindb.Schema`.
|
214
233
|
|
234
|
+
.. versionadded:: 1.1.0
|
235
|
+
|
215
236
|
Args:
|
216
237
|
dataset: The DataFrame-like object to validate & annotate.
|
217
238
|
schema: A `Schema` object that defines the validation constraints.
|
@@ -222,9 +243,9 @@ class DataFrameCurator(Curator):
|
|
222
243
|
import bionty as bt
|
223
244
|
|
224
245
|
# define valid labels
|
225
|
-
|
226
|
-
ln.ULabel(name="DMSO", type=
|
227
|
-
ln.ULabel(name="IFNG", type=
|
246
|
+
perturbation = ln.ULabel(name="Perturbation", is_type=True).save()
|
247
|
+
ln.ULabel(name="DMSO", type=perturbation).save()
|
248
|
+
ln.ULabel(name="IFNG", type=perturbation).save()
|
228
249
|
bt.CellType.from_source(name="B cell").save()
|
229
250
|
bt.CellType.from_source(name="T cell").save()
|
230
251
|
|
@@ -232,7 +253,7 @@ class DataFrameCurator(Curator):
|
|
232
253
|
schema = ln.Schema(
|
233
254
|
name="small_dataset1_obs_level_metadata",
|
234
255
|
features=[
|
235
|
-
ln.Feature(name="
|
256
|
+
ln.Feature(name="perturbation", dtype="cat[ULabel[Perturbation]]").save(),
|
236
257
|
ln.Feature(name="sample_note", dtype=str).save(),
|
237
258
|
ln.Feature(name="cell_type_by_expert", dtype=bt.CellType).save(),
|
238
259
|
ln.Feature(name="cell_type_by_model", dtype=bt.CellType).save(),
|
@@ -252,10 +273,10 @@ class DataFrameCurator(Curator):
|
|
252
273
|
schema: Schema,
|
253
274
|
) -> None:
|
254
275
|
super().__init__(dataset=dataset, schema=schema)
|
276
|
+
categoricals = {}
|
255
277
|
if schema.n > 0:
|
256
278
|
# populate features
|
257
279
|
pandera_columns = {}
|
258
|
-
categoricals = {}
|
259
280
|
for feature in schema.features.all():
|
260
281
|
pandera_dtype = (
|
261
282
|
feature.dtype if not feature.dtype.startswith("cat") else "category"
|
@@ -268,13 +289,13 @@ class DataFrameCurator(Curator):
|
|
268
289
|
self._pandera_schema = pandera.DataFrameSchema(
|
269
290
|
pandera_columns, coerce=schema.coerce_dtype
|
270
291
|
)
|
271
|
-
# now deal with detailed validation of categoricals
|
272
|
-
self._cat_manager = DataFrameCatManager(
|
273
|
-
self._dataset,
|
274
|
-
categoricals=categoricals,
|
275
|
-
)
|
276
292
|
else:
|
277
293
|
assert schema.itype is not None # noqa: S101
|
294
|
+
self._cat_manager = DataFrameCatManager(
|
295
|
+
self._dataset,
|
296
|
+
columns=parse_dtype_single_cat(schema.itype, is_itype=True)["field"],
|
297
|
+
categoricals=categoricals,
|
298
|
+
)
|
278
299
|
|
279
300
|
@property
|
280
301
|
@doc_args(CAT_MANAGER_DOCSTRING)
|
@@ -285,16 +306,29 @@ class DataFrameCurator(Curator):
|
|
285
306
|
def standardize(self) -> None:
|
286
307
|
"""Standardize the dataset.
|
287
308
|
|
288
|
-
- Adds missing columns
|
289
|
-
- Fills missing values
|
309
|
+
- Adds missing columns for features
|
310
|
+
- Fills missing values for features with default values
|
290
311
|
"""
|
291
312
|
for feature in self._schema.members:
|
292
313
|
if feature.name not in self._dataset.columns:
|
293
|
-
if feature.default_value is not None:
|
294
|
-
|
314
|
+
if feature.default_value is not None or feature.nullable:
|
315
|
+
fill_value = (
|
316
|
+
feature.default_value
|
317
|
+
if feature.default_value is not None
|
318
|
+
else pd.NA
|
319
|
+
)
|
320
|
+
if feature.dtype.startswith("cat"):
|
321
|
+
self._dataset[feature.name] = pd.Categorical(
|
322
|
+
[fill_value] * len(self._dataset)
|
323
|
+
)
|
324
|
+
else:
|
325
|
+
self._dataset[feature.name] = fill_value
|
326
|
+
logger.important(
|
327
|
+
f"added column {feature.name} with fill value {fill_value}"
|
328
|
+
)
|
295
329
|
else:
|
296
330
|
raise ValidationError(
|
297
|
-
f"Missing column {feature.name} cannot be added because
|
331
|
+
f"Missing column {feature.name} cannot be added because is not nullable and has no default value"
|
298
332
|
)
|
299
333
|
else:
|
300
334
|
if feature.default_value is not None:
|
@@ -312,46 +346,29 @@ class DataFrameCurator(Curator):
|
|
312
346
|
feature.default_value
|
313
347
|
)
|
314
348
|
|
349
|
+
def _cat_manager_validate(self) -> None:
|
350
|
+
self._cat_manager.validate()
|
351
|
+
if self._cat_manager._is_validated:
|
352
|
+
self._is_validated = True
|
353
|
+
else:
|
354
|
+
self._is_validated = False
|
355
|
+
raise ValidationError(self._cat_manager._validate_category_error_messages)
|
356
|
+
|
315
357
|
@doc_args(VALIDATE_DOCSTRING)
|
316
358
|
def validate(self) -> None:
|
317
359
|
"""{}""" # noqa: D415
|
318
360
|
if self._schema.n > 0:
|
319
|
-
self._cat_manager.validate()
|
320
361
|
try:
|
362
|
+
# first validate through pandera
|
321
363
|
self._pandera_schema.validate(self._dataset)
|
322
|
-
|
323
|
-
|
324
|
-
else:
|
325
|
-
self._is_validated = False
|
326
|
-
raise ValidationError(
|
327
|
-
self._cat_manager._validate_category_error_messages
|
328
|
-
)
|
364
|
+
# then validate lamindb categoricals
|
365
|
+
self._cat_manager_validate()
|
329
366
|
except pandera.errors.SchemaError as err:
|
330
367
|
self._is_validated = False
|
331
368
|
# .exconly() doesn't exist on SchemaError
|
332
369
|
raise ValidationError(str(err)) from err
|
333
370
|
else:
|
334
|
-
|
335
|
-
registry: CanCurate = result["registry"]
|
336
|
-
inspector = registry.inspect(
|
337
|
-
self._dataset.columns,
|
338
|
-
result["field"],
|
339
|
-
mute=True,
|
340
|
-
)
|
341
|
-
if len(inspector.non_validated) > 0:
|
342
|
-
# also check public ontology
|
343
|
-
if hasattr(registry, "public"):
|
344
|
-
registry.from_values(
|
345
|
-
inspector.non_validated, result["field"], mute=True
|
346
|
-
).save()
|
347
|
-
inspector = registry.inspect(
|
348
|
-
inspector.non_validated, result["field"], mute=True
|
349
|
-
)
|
350
|
-
if len(inspector.non_validated) > 0:
|
351
|
-
self._is_validated = False
|
352
|
-
raise ValidationError(
|
353
|
-
f"Invalid identifiers for {self._schema.itype}: {inspector.non_validated}"
|
354
|
-
)
|
371
|
+
self._cat_manager_validate()
|
355
372
|
|
356
373
|
@doc_args(SAVE_ARTIFACT_DOCSTRING)
|
357
374
|
def save_artifact(
|
@@ -385,6 +402,8 @@ class AnnDataCurator(Curator):
|
|
385
402
|
|
386
403
|
See also :class:`~lamindb.Curator` and :class:`~lamindb.Schema`.
|
387
404
|
|
405
|
+
.. versionadded:: 1.1.0
|
406
|
+
|
388
407
|
Args:
|
389
408
|
dataset: The AnnData-like object to validate & annotate.
|
390
409
|
schema: A `Schema` object that defines the validation constraints.
|
@@ -395,9 +414,9 @@ class AnnDataCurator(Curator):
|
|
395
414
|
import bionty as bt
|
396
415
|
|
397
416
|
# define valid labels
|
398
|
-
|
399
|
-
ln.ULabel(name="DMSO", type=
|
400
|
-
ln.ULabel(name="IFNG", type=
|
417
|
+
perturbation = ln.ULabel(name="Perturbation", is_type=True).save()
|
418
|
+
ln.ULabel(name="DMSO", type=perturbation).save()
|
419
|
+
ln.ULabel(name="IFNG", type=perturbation).save()
|
401
420
|
bt.CellType.from_source(name="B cell").save()
|
402
421
|
bt.CellType.from_source(name="T cell").save()
|
403
422
|
|
@@ -405,9 +424,9 @@ class AnnDataCurator(Curator):
|
|
405
424
|
obs_schema = ln.Schema(
|
406
425
|
name="small_dataset1_obs_level_metadata",
|
407
426
|
features=[
|
408
|
-
ln.Feature(name="
|
427
|
+
ln.Feature(name="perturbation", dtype="cat[ULabel[Perturbation]]").save(),
|
409
428
|
ln.Feature(name="sample_note", dtype=str).save(),
|
410
|
-
ln.Feature(name="cell_type_by_expert", dtype=bt.CellType
|
429
|
+
ln.Feature(name="cell_type_by_expert", dtype=bt.CellType).save(),
|
411
430
|
ln.Feature(name="cell_type_by_model", dtype=bt.CellType").save(),
|
412
431
|
],
|
413
432
|
).save()
|
@@ -416,7 +435,7 @@ class AnnDataCurator(Curator):
|
|
416
435
|
var_schema = ln.Schema(
|
417
436
|
name="scRNA_seq_var_schema",
|
418
437
|
itype=bt.Gene.ensembl_gene_id,
|
419
|
-
dtype=
|
438
|
+
dtype=int,
|
420
439
|
).save()
|
421
440
|
|
422
441
|
# define composite schema
|
@@ -443,31 +462,55 @@ class AnnDataCurator(Curator):
|
|
443
462
|
raise InvalidArgument("dataset must be AnnData-like.")
|
444
463
|
if schema.otype != "AnnData":
|
445
464
|
raise InvalidArgument("Schema otype must be 'AnnData'.")
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
465
|
+
# TODO: also support slots other than obs and var
|
466
|
+
self._slots = {
|
467
|
+
slot: DataFrameCurator(
|
468
|
+
(
|
469
|
+
self._dataset.__getattribute__(slot).T
|
470
|
+
if slot == "var"
|
471
|
+
else self._dataset.__getattribute__(slot)
|
472
|
+
),
|
473
|
+
slot_schema,
|
474
|
+
)
|
475
|
+
for slot, slot_schema in schema.slots.items()
|
476
|
+
if slot in {"obs", "var"}
|
477
|
+
}
|
478
|
+
|
479
|
+
@property
|
480
|
+
@doc_args(SLOTS_DOCSTRING)
|
481
|
+
def slots(self) -> dict[str, DataFrameCurator]:
|
482
|
+
"""{}""" # noqa: D415
|
483
|
+
return self._slots
|
452
484
|
|
453
485
|
@doc_args(VALIDATE_DOCSTRING)
|
454
486
|
def validate(self) -> None:
|
455
487
|
"""{}""" # noqa: D415
|
456
|
-
self.
|
457
|
-
|
458
|
-
self._is_validated = True
|
488
|
+
for _, curator in self._slots.items():
|
489
|
+
curator.validate()
|
459
490
|
|
460
491
|
@doc_args(SAVE_ARTIFACT_DOCSTRING)
|
461
|
-
def save_artifact(
|
492
|
+
def save_artifact(
|
493
|
+
self,
|
494
|
+
*,
|
495
|
+
key: str | None = None,
|
496
|
+
description: str | None = None,
|
497
|
+
revises: Artifact | None = None,
|
498
|
+
run: Run | None = None,
|
499
|
+
):
|
462
500
|
"""{}""" # noqa: D415
|
463
501
|
if not self._is_validated:
|
464
|
-
self.validate()
|
465
|
-
result = parse_dtype_single_cat(self._var_curator._schema.itype, is_itype=True)
|
502
|
+
self.validate()
|
466
503
|
return save_artifact( # type: ignore
|
467
504
|
self._dataset,
|
468
505
|
description=description,
|
469
|
-
fields=self.
|
470
|
-
columns_field=
|
506
|
+
fields=self.slots["obs"]._cat_manager.categoricals,
|
507
|
+
columns_field=(
|
508
|
+
parse_dtype_single_cat(self.slots["var"]._schema.itype, is_itype=True)[
|
509
|
+
"field"
|
510
|
+
]
|
511
|
+
if "var" in self._slots
|
512
|
+
else None
|
513
|
+
),
|
471
514
|
key=key,
|
472
515
|
artifact=self._artifact,
|
473
516
|
revises=revises,
|
@@ -497,8 +540,8 @@ class CatManager:
|
|
497
540
|
|
498
541
|
If you find non-validated values, you have several options:
|
499
542
|
|
500
|
-
- new values found in the data can be registered using :meth:`~lamindb.
|
501
|
-
- non-validated values can be accessed using :meth:`~lamindb.
|
543
|
+
- new values found in the data can be registered using :meth:`~lamindb.curators.DataFrameCatManager.add_new_from`
|
544
|
+
- non-validated values can be accessed using :meth:`~lamindb.curators.DataFrameCatManager.non_validated` and addressed manually
|
502
545
|
"""
|
503
546
|
|
504
547
|
def __init__(
|
@@ -869,7 +912,7 @@ class AnnDataCatManager(CatManager):
|
|
869
912
|
def __init__(
|
870
913
|
self,
|
871
914
|
data: ad.AnnData | Artifact,
|
872
|
-
var_index: FieldAttr,
|
915
|
+
var_index: FieldAttr | None = None,
|
873
916
|
categoricals: dict[str, FieldAttr] | None = None,
|
874
917
|
obs_columns: FieldAttr = Feature.name,
|
875
918
|
verbosity: str = "hint",
|
@@ -938,15 +981,16 @@ class AnnDataCatManager(CatManager):
|
|
938
981
|
validated_only: bool = True,
|
939
982
|
):
|
940
983
|
"""Save variable records."""
|
941
|
-
|
942
|
-
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
|
948
|
-
|
949
|
-
|
984
|
+
if self.var_index is not None:
|
985
|
+
update_registry(
|
986
|
+
values=list(self._adata.var.index),
|
987
|
+
field=self.var_index,
|
988
|
+
key="var_index",
|
989
|
+
validated_only=validated_only,
|
990
|
+
organism=self._organism,
|
991
|
+
source=self._sources.get("var_index"),
|
992
|
+
exclude=self._exclude.get("var_index"),
|
993
|
+
)
|
950
994
|
|
951
995
|
def add_new_from(self, key: str, **kwargs):
|
952
996
|
"""Add validated & new categories.
|
@@ -982,15 +1026,19 @@ class AnnDataCatManager(CatManager):
|
|
982
1026
|
|
983
1027
|
# add all validated records to the current instance
|
984
1028
|
self._save_from_var_index(validated_only=True)
|
985
|
-
|
986
|
-
|
987
|
-
|
988
|
-
|
989
|
-
|
990
|
-
|
991
|
-
|
992
|
-
|
993
|
-
|
1029
|
+
if self.var_index is not None:
|
1030
|
+
validated_var, non_validated_var = validate_categories(
|
1031
|
+
self._adata.var.index,
|
1032
|
+
field=self._var_field,
|
1033
|
+
key="var_index",
|
1034
|
+
source=self._sources.get("var_index"),
|
1035
|
+
hint_print=".add_new_from_var_index()",
|
1036
|
+
exclude=self._exclude.get("var_index"),
|
1037
|
+
organism=self._organism, # type: ignore
|
1038
|
+
)
|
1039
|
+
else:
|
1040
|
+
validated_var = True
|
1041
|
+
non_validated_var = []
|
994
1042
|
validated_obs = self._obs_df_curator.validate()
|
995
1043
|
self._non_validated = self._obs_df_curator._non_validated # type: ignore
|
996
1044
|
if len(non_validated_var) > 0:
|
@@ -1031,11 +1079,6 @@ class AnnDataCatManager(CatManager):
|
|
1031
1079
|
class MuDataCatManager(CatManager):
|
1032
1080
|
"""Curation flow for a ``MuData`` object.
|
1033
1081
|
|
1034
|
-
See also :class:`~lamindb.Curator`.
|
1035
|
-
|
1036
|
-
Note that if genes or other measurements are removed from the MuData object,
|
1037
|
-
the object should be recreated using :meth:`~lamindb.Curator.from_mudata`.
|
1038
|
-
|
1039
1082
|
Args:
|
1040
1083
|
mdata: The MuData object to curate.
|
1041
1084
|
var_index: The registry field for mapping the ``.var`` index for each modality.
|
@@ -1289,8 +1332,6 @@ def _maybe_curation_keys_not_present(nonval_keys: list[str], name: str):
|
|
1289
1332
|
class TiledbsomaCatManager(CatManager):
|
1290
1333
|
"""Curation flow for `tiledbsoma.Experiment`.
|
1291
1334
|
|
1292
|
-
See also :class:`~lamindb.Curator`.
|
1293
|
-
|
1294
1335
|
Args:
|
1295
1336
|
experiment_uri: A local or cloud path to a `tiledbsoma.Experiment`.
|
1296
1337
|
var_index: The registry fields for mapping the `.var` indices for measurements.
|
@@ -1696,7 +1737,7 @@ class TiledbsomaCatManager(CatManager):
|
|
1696
1737
|
Returns:
|
1697
1738
|
A saved artifact record.
|
1698
1739
|
"""
|
1699
|
-
from lamindb.
|
1740
|
+
from lamindb.models.artifact import add_labels
|
1700
1741
|
|
1701
1742
|
if not self._is_validated:
|
1702
1743
|
self.validate()
|
@@ -1838,6 +1879,7 @@ class SpatialDataCatManager(CatManager):
|
|
1838
1879
|
else:
|
1839
1880
|
self._sdata = self._dataset
|
1840
1881
|
self._sample_metadata_key = sample_metadata_key
|
1882
|
+
self._write_path = None
|
1841
1883
|
self._var_fields = var_index
|
1842
1884
|
self._verify_accessor_exists(self._var_fields.keys())
|
1843
1885
|
self._categoricals = categoricals
|
@@ -2119,26 +2161,14 @@ class SpatialDataCatManager(CatManager):
|
|
2119
2161
|
try:
|
2120
2162
|
settings.verbosity = "warning"
|
2121
2163
|
|
2122
|
-
|
2123
|
-
|
2124
|
-
|
2125
|
-
|
2126
|
-
|
2127
|
-
|
2128
|
-
|
2129
|
-
|
2130
|
-
# Create the Artifact and associate Artifact metadata
|
2131
|
-
self._artifact = Artifact(
|
2132
|
-
write_path,
|
2133
|
-
description=description,
|
2134
|
-
key=key,
|
2135
|
-
revises=revises,
|
2136
|
-
run=run,
|
2137
|
-
)
|
2138
|
-
# According to Tim it is not easy to calculate the number of observations.
|
2139
|
-
# We would have to write custom code to iterate over labels (which might not even exist at that point)
|
2140
|
-
self._artifact.otype = "spatialdata"
|
2141
|
-
self._artifact.save()
|
2164
|
+
self._artifact = Artifact.from_spatialdata(
|
2165
|
+
self._sdata,
|
2166
|
+
key=key,
|
2167
|
+
description=description,
|
2168
|
+
revises=revises,
|
2169
|
+
run=run,
|
2170
|
+
)
|
2171
|
+
self._artifact.save()
|
2142
2172
|
|
2143
2173
|
# Link schemas
|
2144
2174
|
feature_kwargs = check_registry_organism(
|
@@ -2156,7 +2186,7 @@ class SpatialDataCatManager(CatManager):
|
|
2156
2186
|
"""Add Schemas from SpatialData."""
|
2157
2187
|
if obs_fields is None:
|
2158
2188
|
obs_fields = {}
|
2159
|
-
assert host.otype == "
|
2189
|
+
assert host.otype == "SpatialData" # noqa: S101
|
2160
2190
|
|
2161
2191
|
feature_sets = {}
|
2162
2192
|
|
@@ -2784,7 +2814,7 @@ class DoseHandler:
|
|
2784
2814
|
return cls.UNIT_MAP.get(unit, unit)
|
2785
2815
|
|
2786
2816
|
@classmethod
|
2787
|
-
def validate_values(cls, values: pd.Series) -> list:
|
2817
|
+
def validate_values(cls, values: pd.Series) -> list[str]:
|
2788
2818
|
"""Validate pert_dose values with strict case checking."""
|
2789
2819
|
errors = []
|
2790
2820
|
|
@@ -2828,7 +2858,7 @@ class TimeHandler:
|
|
2828
2858
|
return unit[0].lower()
|
2829
2859
|
|
2830
2860
|
@classmethod
|
2831
|
-
def validate_values(cls, values: pd.Series) -> list:
|
2861
|
+
def validate_values(cls, values: pd.Series) -> list[str]:
|
2832
2862
|
"""Validate pert_time values."""
|
2833
2863
|
errors = []
|
2834
2864
|
|
@@ -3168,10 +3198,7 @@ def check_registry_organism(registry: Record, organism: str | None = None) -> di
|
|
3168
3198
|
import bionty as bt
|
3169
3199
|
|
3170
3200
|
if organism is None and bt.settings.organism is None:
|
3171
|
-
|
3172
|
-
f"{registry.__name__} registry requires an organism!\n"
|
3173
|
-
" → please pass an organism name via organism="
|
3174
|
-
)
|
3201
|
+
return {}
|
3175
3202
|
return {"organism": organism or bt.settings.organism.name}
|
3176
3203
|
return {}
|
3177
3204
|
|
@@ -3185,8 +3212,8 @@ def validate_categories(
|
|
3185
3212
|
exclude: str | list | None = None,
|
3186
3213
|
hint_print: str | None = None,
|
3187
3214
|
curator: CatManager | None = None,
|
3188
|
-
) -> tuple[bool, list]:
|
3189
|
-
"""Validate ontology terms
|
3215
|
+
) -> tuple[bool, list[str]]:
|
3216
|
+
"""Validate ontology terms using LaminDB registries.
|
3190
3217
|
|
3191
3218
|
Args:
|
3192
3219
|
values: The values to validate.
|
@@ -3198,8 +3225,8 @@ def validate_categories(
|
|
3198
3225
|
standardize: Whether to standardize the values.
|
3199
3226
|
hint_print: The hint to print that suggests fixing non-validated values.
|
3200
3227
|
"""
|
3201
|
-
from lamindb._from_values import _format_values
|
3202
3228
|
from lamindb.core._settings import settings
|
3229
|
+
from lamindb.models._from_values import _format_values
|
3203
3230
|
|
3204
3231
|
model_field = f"{field.field.model.__name__}.{field.field.name}"
|
3205
3232
|
|
@@ -3263,7 +3290,7 @@ def validate_categories(
|
|
3263
3290
|
warning_message += f" {colors.yellow(f'{len(syn_mapper)} synonym{s}')} found: {colors.yellow(syn_mapper_print)}\n → curate synonyms via {colors.cyan(hint_msg)}"
|
3264
3291
|
if n_non_validated > len(syn_mapper):
|
3265
3292
|
if syn_mapper:
|
3266
|
-
warning_message += " for remaining terms:\n"
|
3293
|
+
warning_message += "\n for remaining terms:\n"
|
3267
3294
|
warning_message += f" → fix typos, remove non-existent values, or save terms via {colors.cyan(non_validated_hint_print)}"
|
3268
3295
|
|
3269
3296
|
if logger.indent == "":
|
@@ -3334,7 +3361,7 @@ def validate_categories_in_df(
|
|
3334
3361
|
def save_artifact(
|
3335
3362
|
data: pd.DataFrame | ad.AnnData | MuData,
|
3336
3363
|
fields: dict[str, FieldAttr] | dict[str, dict[str, FieldAttr]],
|
3337
|
-
columns_field: FieldAttr | dict[str, FieldAttr],
|
3364
|
+
columns_field: FieldAttr | dict[str, FieldAttr] | None = None,
|
3338
3365
|
description: str | None = None,
|
3339
3366
|
organism: str | None = None,
|
3340
3367
|
key: str | None = None,
|
@@ -3360,8 +3387,7 @@ def save_artifact(
|
|
3360
3387
|
Returns:
|
3361
3388
|
The saved Artifact.
|
3362
3389
|
"""
|
3363
|
-
from ..
|
3364
|
-
from ..core._data import add_labels
|
3390
|
+
from ..models.artifact import add_labels, data_is_anndata, data_is_mudata
|
3365
3391
|
|
3366
3392
|
if artifact is None:
|
3367
3393
|
if data_is_anndata(data):
|
@@ -3383,7 +3409,7 @@ def save_artifact(
|
|
3383
3409
|
artifact.schema = schema
|
3384
3410
|
artifact.save()
|
3385
3411
|
|
3386
|
-
if organism is not None:
|
3412
|
+
if organism is not None and columns_field is not None:
|
3387
3413
|
feature_kwargs = check_registry_organism(
|
3388
3414
|
(
|
3389
3415
|
list(columns_field.values())[0].field.model
|
@@ -3422,7 +3448,7 @@ def save_artifact(
|
|
3422
3448
|
filter_kwargs_current = get_current_filter_kwargs(registry, filter_kwargs)
|
3423
3449
|
df = data if isinstance(data, pd.DataFrame) else data.obs
|
3424
3450
|
# multi-value columns are separated by "|"
|
3425
|
-
if df[key].str.contains("|").any():
|
3451
|
+
if not df[key].isna().all() and df[key].str.contains("|").any():
|
3426
3452
|
values = df[key].str.split("|").explode().unique()
|
3427
3453
|
else:
|
3428
3454
|
values = df[key].unique()
|
@@ -3520,8 +3546,8 @@ def update_registry(
|
|
3520
3546
|
exclude: Values to exclude from inspect.
|
3521
3547
|
kwargs: Additional keyword arguments to pass to the registry model to create new records.
|
3522
3548
|
"""
|
3523
|
-
from lamindb._save import save as ln_save
|
3524
3549
|
from lamindb.core._settings import settings
|
3550
|
+
from lamindb.models.save import save as ln_save
|
3525
3551
|
|
3526
3552
|
registry = field.field.model
|
3527
3553
|
filter_kwargs = check_registry_organism(registry, organism)
|
@@ -3609,7 +3635,7 @@ def log_saved_labels(
|
|
3609
3635
|
validated_only: bool = True,
|
3610
3636
|
) -> None:
|
3611
3637
|
"""Log the saved labels."""
|
3612
|
-
from .._from_values import _format_values
|
3638
|
+
from ..models._from_values import _format_values
|
3613
3639
|
|
3614
3640
|
model_field = colors.italic(model_field)
|
3615
3641
|
for k, labels in labels_saved.items():
|
@@ -3655,12 +3681,14 @@ def _save_organism(name: str):
|
|
3655
3681
|
return organism
|
3656
3682
|
|
3657
3683
|
|
3658
|
-
def _ref_is_name(field: FieldAttr) -> bool | None:
|
3684
|
+
def _ref_is_name(field: FieldAttr | None) -> bool | None:
|
3659
3685
|
"""Check if the reference field is a name field."""
|
3660
|
-
from ..
|
3686
|
+
from ..models.can_curate import get_name_field
|
3661
3687
|
|
3662
|
-
|
3663
|
-
|
3688
|
+
if field is not None:
|
3689
|
+
name_field = get_name_field(field.field.model)
|
3690
|
+
return field.field.name == name_field
|
3691
|
+
return None
|
3664
3692
|
|
3665
3693
|
|
3666
3694
|
# backward compat constructors ------------------
|
@@ -3709,7 +3737,7 @@ def from_anndata(
|
|
3709
3737
|
@classmethod # type: ignore
|
3710
3738
|
def from_mudata(
|
3711
3739
|
cls,
|
3712
|
-
mdata: MuData,
|
3740
|
+
mdata: MuData | UPathStr,
|
3713
3741
|
var_index: dict[str, dict[str, FieldAttr]],
|
3714
3742
|
categoricals: dict[str, FieldAttr] | None = None,
|
3715
3743
|
verbosity: str = "hint",
|
@@ -3749,7 +3777,7 @@ def from_tiledbsoma(
|
|
3749
3777
|
@classmethod # type: ignore
|
3750
3778
|
def from_spatialdata(
|
3751
3779
|
cls,
|
3752
|
-
sdata,
|
3780
|
+
sdata: SpatialData | UPathStr,
|
3753
3781
|
var_index: dict[str, FieldAttr],
|
3754
3782
|
categoricals: dict[str, dict[str, FieldAttr]] | None = None,
|
3755
3783
|
organism: str | None = None,
|
lamindb/errors.py
CHANGED
@@ -7,10 +7,10 @@ from typing import TYPE_CHECKING
|
|
7
7
|
import lamindb_setup as ln_setup
|
8
8
|
from lamin_utils import logger
|
9
9
|
|
10
|
-
from lamindb.
|
11
|
-
from lamindb.
|
12
|
-
from lamindb.
|
13
|
-
from lamindb.
|
10
|
+
from lamindb.models.artifact import Artifact
|
11
|
+
from lamindb.models.collection import Collection
|
12
|
+
from lamindb.models.run import Run
|
13
|
+
from lamindb.models.transform import Transform
|
14
14
|
|
15
15
|
if TYPE_CHECKING:
|
16
16
|
from vitessce import VitessceConfig
|