lamindb 1.2a2__py3-none-any.whl → 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +3 -1
- lamindb/_view.py +2 -2
- lamindb/base/types.py +50 -11
- lamindb/core/_compat.py +60 -0
- lamindb/core/_context.py +15 -12
- lamindb/core/datasets/__init__.py +1 -0
- lamindb/core/datasets/_core.py +23 -0
- lamindb/core/datasets/_small.py +16 -2
- lamindb/core/loaders.py +22 -12
- lamindb/core/storage/_tiledbsoma.py +2 -2
- lamindb/core/storage/_zarr.py +84 -26
- lamindb/core/storage/objects.py +45 -44
- lamindb/core/types.py +11 -1
- lamindb/curators/__init__.py +1430 -1665
- lamindb/curators/_cellxgene_schemas/__init__.py +190 -18
- lamindb/curators/_cellxgene_schemas/schema_versions.csv +43 -0
- lamindb/models/_feature_manager.py +86 -42
- lamindb/models/_from_values.py +110 -119
- lamindb/models/_label_manager.py +17 -10
- lamindb/models/artifact.py +170 -102
- lamindb/models/can_curate.py +200 -231
- lamindb/models/feature.py +76 -47
- lamindb/models/project.py +69 -7
- lamindb/models/query_set.py +12 -2
- lamindb/models/record.py +77 -50
- lamindb/models/run.py +20 -7
- lamindb/models/schema.py +7 -15
- {lamindb-1.2a2.dist-info → lamindb-1.3.1.dist-info}/METADATA +8 -7
- {lamindb-1.2a2.dist-info → lamindb-1.3.1.dist-info}/RECORD +31 -30
- lamindb/curators/_cellxgene_schemas/schema_versions.yml +0 -104
- {lamindb-1.2a2.dist-info → lamindb-1.3.1.dist-info}/LICENSE +0 -0
- {lamindb-1.2a2.dist-info → lamindb-1.3.1.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
@@ -32,6 +32,7 @@ Registries.
|
|
32
32
|
Curators & integrations.
|
33
33
|
|
34
34
|
.. autosummary::
|
35
|
+
:toctree: .
|
35
36
|
|
36
37
|
curators
|
37
38
|
integrations
|
@@ -71,7 +72,7 @@ Backward compatibility.
|
|
71
72
|
|
72
73
|
# ruff: noqa: I001
|
73
74
|
# denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
|
74
|
-
__version__ = "1.
|
75
|
+
__version__ = "1.3.1"
|
75
76
|
|
76
77
|
import warnings
|
77
78
|
|
@@ -120,6 +121,7 @@ if _check_instance_setup(from_module="lamindb"):
|
|
120
121
|
from .models.save import save
|
121
122
|
from . import core
|
122
123
|
from . import integrations
|
124
|
+
from . import curators
|
123
125
|
|
124
126
|
track = context.track # simple access
|
125
127
|
finish = context.finish # simple access
|
lamindb/_view.py
CHANGED
@@ -11,7 +11,7 @@ from lamindb_setup._init_instance import get_schema_module_name
|
|
11
11
|
|
12
12
|
from lamindb.models import Feature, FeatureValue, ParamValue, Record
|
13
13
|
|
14
|
-
from .models.feature import
|
14
|
+
from .models.feature import serialize_pandas_dtype
|
15
15
|
|
16
16
|
if TYPE_CHECKING:
|
17
17
|
import pandas as pd
|
@@ -114,7 +114,7 @@ def view(
|
|
114
114
|
"""
|
115
115
|
if df is not None:
|
116
116
|
descriptions = {
|
117
|
-
col_name:
|
117
|
+
col_name: serialize_pandas_dtype(dtype)
|
118
118
|
for col_name, dtype in df.dtypes.to_dict().items()
|
119
119
|
}
|
120
120
|
feature_dtypes = dict(Feature.objects.values_list("name", "dtype"))
|
lamindb/base/types.py
CHANGED
@@ -7,7 +7,7 @@ Central object types.
|
|
7
7
|
|
8
8
|
ArtifactKind
|
9
9
|
TransformType
|
10
|
-
|
10
|
+
Dtype
|
11
11
|
|
12
12
|
Basic types.
|
13
13
|
|
@@ -38,14 +38,53 @@ TransformType = Literal[
|
|
38
38
|
"pipeline", "notebook", "upload", "script", "function", "linker"
|
39
39
|
]
|
40
40
|
ArtifactKind = Literal["dataset", "model"]
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
"
|
45
|
-
"
|
46
|
-
"
|
47
|
-
"
|
48
|
-
"
|
49
|
-
"
|
50
|
-
"
|
41
|
+
|
42
|
+
# below is used for Feature.dtype and Param.dtype
|
43
|
+
Dtype = Literal[
|
44
|
+
"cat", # categoricals
|
45
|
+
"num", # numericals
|
46
|
+
"str", # string
|
47
|
+
"int", # integer / numpy.integer
|
48
|
+
"float", # float
|
49
|
+
"bool", # boolean
|
50
|
+
"date", # date
|
51
|
+
"datetime", # datetime
|
52
|
+
"object", # this is a pandas input dtype, we're only using it for complicated types, not for strings
|
51
53
|
]
|
54
|
+
"""Data type.
|
55
|
+
|
56
|
+
Data types in lamindb are a string-serialized abstraction of common data types.
|
57
|
+
|
58
|
+
Overview
|
59
|
+
========
|
60
|
+
|
61
|
+
============ ============ =================================================
|
62
|
+
description lamindb pandas
|
63
|
+
============ ============ =================================================
|
64
|
+
categorical `"cat"` `category`
|
65
|
+
numerical `"num"` `int | float`
|
66
|
+
integer `"int"` `int64 | int32 | int16 | int8 | uint | ...`
|
67
|
+
float `"float"` `float64 | float32 | float16 | float8 | ...`
|
68
|
+
string `"str"` `object`
|
69
|
+
datetime `"datetime"` `datetime`
|
70
|
+
date `"date"` `date`
|
71
|
+
============ ============ =================================================
|
72
|
+
|
73
|
+
Categoricals
|
74
|
+
============
|
75
|
+
|
76
|
+
Beyond indicating that a feature is a categorical, `lamindb` allows you to define the registry to which values are restricted.
|
77
|
+
|
78
|
+
For example, `'cat[ULabel]'` or `'cat[bionty.CellType]'` indicate that permissible values are from the `ULabel` or `CellType` registry, respectively.
|
79
|
+
|
80
|
+
You can also reference multiple registries, e.g., `'cat[ULabel|bionty.CellType]'` indicates that values can be from either registry.
|
81
|
+
|
82
|
+
You can also restrict to sub-types defined in registries via the `type` column, e.g., `'cat[ULabel[CellMedium]]'` indicates that values must be of type `CellMedium` within the `ULabel` registry.
|
83
|
+
|
84
|
+
Literal
|
85
|
+
=======
|
86
|
+
|
87
|
+
A `Dtype` object in `lamindb` is a `Literal` up to further specification of `"cat"`.
|
88
|
+
|
89
|
+
"""
|
90
|
+
FeatureDtype = Dtype # backward compat
|
lamindb/core/_compat.py
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
import importlib.util
|
2
|
+
from typing import Any, Callable, TypeVar
|
3
|
+
|
4
|
+
T = TypeVar("T")
|
5
|
+
|
6
|
+
|
7
|
+
def is_package_installed(package_name: str) -> bool:
|
8
|
+
spec = importlib.util.find_spec(package_name)
|
9
|
+
return spec is not None
|
10
|
+
|
11
|
+
|
12
|
+
def with_package(package_name: str, operation: Callable[[Any], T]) -> T:
|
13
|
+
"""Execute an operation that requires a specific package.
|
14
|
+
|
15
|
+
Args:
|
16
|
+
package_name: Package name (e.g., "mudata")
|
17
|
+
operation: Function that takes the imported module and returns a result
|
18
|
+
|
19
|
+
Examples:
|
20
|
+
# For direct package functions
|
21
|
+
result = with_package("mudata", lambda mod: mod.read_zarr(path))
|
22
|
+
"""
|
23
|
+
try:
|
24
|
+
module = importlib.import_module(package_name)
|
25
|
+
return operation(module)
|
26
|
+
except ImportError:
|
27
|
+
raise ImportError(
|
28
|
+
f"Package '{package_name}' is required but not installed. "
|
29
|
+
f"Please install with: pip install {package_name}"
|
30
|
+
) from None
|
31
|
+
|
32
|
+
|
33
|
+
def with_package_obj(
|
34
|
+
obj: Any, class_name: str, package_name: str, operation: Callable[[Any], T]
|
35
|
+
) -> tuple[bool, T | None]:
|
36
|
+
"""Handle operations on objects that require specific packages.
|
37
|
+
|
38
|
+
Args:
|
39
|
+
obj: The object to operate on
|
40
|
+
class_name: Expected class name (e.g., "MuData")
|
41
|
+
package_name: Package that provides the class (e.g., "mudata")
|
42
|
+
operation: Function to call with the object if package is available.
|
43
|
+
|
44
|
+
Examples:
|
45
|
+
# For instance methods
|
46
|
+
handled, res = apply_class_func(dmem, "MuData", "mudata",
|
47
|
+
lambda obj: obj.write(filepath))
|
48
|
+
"""
|
49
|
+
if obj.__class__.__name__ == class_name:
|
50
|
+
try:
|
51
|
+
importlib.import_module(package_name)
|
52
|
+
result = operation(obj)
|
53
|
+
return True, result
|
54
|
+
except ImportError:
|
55
|
+
raise ImportError(
|
56
|
+
f"Object appears to be {class_name} but '{package_name}' package is not installed. "
|
57
|
+
f"Please install with: pip install {package_name}"
|
58
|
+
) from None
|
59
|
+
|
60
|
+
return False, None
|
lamindb/core/_context.py
CHANGED
@@ -301,6 +301,12 @@ class Context:
|
|
301
301
|
"""
|
302
302
|
from lamindb.models import Project
|
303
303
|
|
304
|
+
instance_settings = ln_setup.settings.instance
|
305
|
+
# similar logic here: https://github.com/laminlabs/lamindb/pull/2527
|
306
|
+
# TODO: refactor upon new access management
|
307
|
+
if instance_settings.dialect == "postgresql" and "read" in instance_settings.db:
|
308
|
+
logger.warning("skipping track(), connected in read-only mode")
|
309
|
+
return None
|
304
310
|
if project is not None:
|
305
311
|
project_record = Project.filter(
|
306
312
|
Q(name=project) | Q(uid=project)
|
@@ -461,26 +467,23 @@ class Context:
|
|
461
467
|
path_str = get_notebook_key_colab()
|
462
468
|
path = Path(path_str)
|
463
469
|
else:
|
464
|
-
import
|
470
|
+
from nbproject.dev import read_notebook
|
471
|
+
from nbproject.dev._meta_live import get_title
|
472
|
+
from nbproject.dev._pypackage import infer_pypackages
|
465
473
|
|
466
474
|
try:
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
description = nbproject_title
|
473
|
-
# log imported python packages
|
474
|
-
try:
|
475
|
-
from nbproject.dev._pypackage import infer_pypackages
|
475
|
+
nb = read_notebook(path_str)
|
476
|
+
|
477
|
+
nbproject_title = get_title(nb)
|
478
|
+
if nbproject_title is not None:
|
479
|
+
description = nbproject_title
|
476
480
|
|
477
|
-
nb = nbproject.dev.read_notebook(path_str)
|
478
481
|
self._logging_message_imports += (
|
479
482
|
"notebook imports:"
|
480
483
|
f" {pretty_pypackages(infer_pypackages(nb, pin_versions=True))}"
|
481
484
|
)
|
482
485
|
except Exception:
|
483
|
-
logger.debug("
|
486
|
+
logger.debug("reading the notebook file failed")
|
484
487
|
pass
|
485
488
|
return path, description
|
486
489
|
|
lamindb/core/datasets/_core.py
CHANGED
@@ -13,6 +13,7 @@ from lamindb.core._settings import settings
|
|
13
13
|
|
14
14
|
if TYPE_CHECKING:
|
15
15
|
from mudata import MuData
|
16
|
+
from spatialdata import SpatialData
|
16
17
|
|
17
18
|
|
18
19
|
def file_fcs() -> Path:
|
@@ -552,3 +553,25 @@ def schmidt22_perturbseq(basedir=".") -> Path: # pragma: no cover
|
|
552
553
|
"schmidt22_perturbseq.h5ad",
|
553
554
|
)
|
554
555
|
return Path(filepath).rename(Path(basedir) / filepath)
|
556
|
+
|
557
|
+
|
558
|
+
def spatialdata_blobs() -> SpatialData:
|
559
|
+
"""Example SpatialData dataset for tutorials."""
|
560
|
+
from spatialdata.datasets import blobs
|
561
|
+
|
562
|
+
sdata = blobs()
|
563
|
+
sdata.attrs["sample"] = {
|
564
|
+
"assay": "Visium Spatial Gene Expression",
|
565
|
+
"disease": "Alzheimer disease",
|
566
|
+
"developmental_stage": "adult stage",
|
567
|
+
}
|
568
|
+
sdata.tables["table"].var.index = [
|
569
|
+
"ENSG00000139618", # BRCA2
|
570
|
+
"ENSG00000157764", # BRAF
|
571
|
+
"ENSG00000999999", # Does not exist
|
572
|
+
]
|
573
|
+
sdata.tables["table"].obs["sample_region"] = pd.Categorical(
|
574
|
+
["sample region 1"] * 13 + ["sample region 2"] * 13
|
575
|
+
)
|
576
|
+
|
577
|
+
return sdata
|
lamindb/core/datasets/_small.py
CHANGED
@@ -8,9 +8,11 @@ import pandas as pd
|
|
8
8
|
|
9
9
|
|
10
10
|
def small_dataset1(
|
11
|
-
otype: Literal["DataFrame", "AnnData"],
|
11
|
+
otype: Literal["DataFrame", "AnnData"] = "DataFrame",
|
12
12
|
gene_symbols_in_index: bool = False,
|
13
13
|
with_typo: bool = False,
|
14
|
+
with_cell_type_synonym: bool = False,
|
15
|
+
with_cell_type_typo: bool = False,
|
14
16
|
) -> pd.DataFrame | ad.AnnData:
|
15
17
|
# define the data in the dataset
|
16
18
|
# it's a mix of numerical measurements and observation-level metadata
|
@@ -19,14 +21,25 @@ def small_dataset1(
|
|
19
21
|
var_ids = ["CD8A", "CD4", "CD14"]
|
20
22
|
else:
|
21
23
|
var_ids = ["ENSG00000153563", "ENSG00000010610", "ENSG00000170458"]
|
24
|
+
abt_cell = (
|
25
|
+
"CD8-pos alpha-beta T cell"
|
26
|
+
if with_cell_type_typo
|
27
|
+
else "CD8-positive, alpha-beta T cell"
|
28
|
+
)
|
22
29
|
dataset_dict = {
|
23
30
|
var_ids[0]: [1, 2, 3],
|
24
31
|
var_ids[1]: [3, 4, 5],
|
25
32
|
var_ids[2]: [5, 6, 7],
|
26
33
|
"perturbation": pd.Categorical(["DMSO", ifng, "DMSO"]),
|
27
34
|
"sample_note": ["was ok", "looks naah", "pretty! 🤩"],
|
28
|
-
"cell_type_by_expert": pd.Categorical(
|
35
|
+
"cell_type_by_expert": pd.Categorical(
|
36
|
+
["B-cell" if with_cell_type_synonym else "B cell", abt_cell, abt_cell]
|
37
|
+
),
|
29
38
|
"cell_type_by_model": pd.Categorical(["B cell", "T cell", "T cell"]),
|
39
|
+
"assay_oid": pd.Categorical(["EFO:0008913", "EFO:0008913", "EFO:0008913"]),
|
40
|
+
"concentration": ["0.1%", "200 nM", "0.1%"],
|
41
|
+
"treatment_time_h": [24, 24, 6],
|
42
|
+
"donor": ["D0001", "D0002", None],
|
30
43
|
}
|
31
44
|
# define the dataset-level metadata
|
32
45
|
metadata = {
|
@@ -100,6 +113,7 @@ def small_dataset3_cellxgene(
|
|
100
113
|
"disease_ontology_term_id": ["MONDO:0004975", "MONDO:0004980", "MONDO:0004980"],
|
101
114
|
"organism": ["human", "human", "human"],
|
102
115
|
"sex": ["female", "male", "unknown"],
|
116
|
+
"sex_ontology_term_id": ["PATO:0000383", "PATO:0000384", "unknown"],
|
103
117
|
"tissue": ["lungg", "lungg", "heart"],
|
104
118
|
"donor": ["-1", "1", "2"],
|
105
119
|
}
|
lamindb/core/loaders.py
CHANGED
@@ -20,10 +20,10 @@ from __future__ import annotations
|
|
20
20
|
import builtins
|
21
21
|
import re
|
22
22
|
from pathlib import Path
|
23
|
-
from typing import TYPE_CHECKING
|
23
|
+
from typing import TYPE_CHECKING, Any
|
24
24
|
|
25
|
-
import anndata as ad
|
26
25
|
import pandas as pd
|
26
|
+
from anndata import read_h5ad
|
27
27
|
from lamin_utils import logger
|
28
28
|
from lamindb_setup.core.upath import (
|
29
29
|
create_path,
|
@@ -33,13 +33,17 @@ from lamindb_setup.core.upath import (
|
|
33
33
|
from ..core._settings import settings
|
34
34
|
|
35
35
|
if TYPE_CHECKING:
|
36
|
+
from anndata import AnnData
|
36
37
|
from lamindb_setup.core.types import UPathStr
|
38
|
+
from mudata import MuData
|
39
|
+
|
40
|
+
from lamindb.core.types import ScverseDataStructures
|
37
41
|
|
38
42
|
try:
|
39
|
-
from ..core.storage._zarr import
|
43
|
+
from ..core.storage._zarr import load_zarr
|
40
44
|
except ImportError:
|
41
45
|
|
42
|
-
def
|
46
|
+
def load_zarr(storepath): # type: ignore
|
43
47
|
raise ImportError("Please install zarr: pip install zarr<=2.18.4")
|
44
48
|
|
45
49
|
|
@@ -47,7 +51,7 @@ is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
|
|
47
51
|
|
48
52
|
|
49
53
|
# tested in lamin-usecases
|
50
|
-
def load_fcs(*args, **kwargs) ->
|
54
|
+
def load_fcs(*args, **kwargs) -> AnnData:
|
51
55
|
"""Load an `.fcs` file to `AnnData`."""
|
52
56
|
try:
|
53
57
|
import readfcs
|
@@ -62,16 +66,16 @@ def load_tsv(path: UPathStr, **kwargs) -> pd.DataFrame:
|
|
62
66
|
return pd.read_csv(path_sanitized, sep="\t", **kwargs)
|
63
67
|
|
64
68
|
|
65
|
-
def load_h5ad(filepath, **kwargs) ->
|
69
|
+
def load_h5ad(filepath, **kwargs) -> AnnData:
|
66
70
|
"""Load an `.h5ad` file to `AnnData`."""
|
67
71
|
fs, filepath = infer_filesystem(filepath)
|
68
72
|
compression = kwargs.pop("compression", "infer")
|
69
73
|
with fs.open(filepath, mode="rb", compression=compression) as file:
|
70
|
-
adata =
|
74
|
+
adata = read_h5ad(file, backed=False, **kwargs)
|
71
75
|
return adata
|
72
76
|
|
73
77
|
|
74
|
-
def load_h5mu(filepath: UPathStr, **kwargs):
|
78
|
+
def load_h5mu(filepath: UPathStr, **kwargs) -> MuData:
|
75
79
|
"""Load an `.h5mu` file to `MuData`."""
|
76
80
|
import mudata as md
|
77
81
|
|
@@ -100,7 +104,7 @@ def load_html(path: UPathStr) -> None | UPathStr:
|
|
100
104
|
return path
|
101
105
|
|
102
106
|
|
103
|
-
def load_json(path: UPathStr) -> dict:
|
107
|
+
def load_json(path: UPathStr) -> dict[str, Any] | list[Any]:
|
104
108
|
"""Load `.json` to `dict`."""
|
105
109
|
import json
|
106
110
|
|
@@ -109,7 +113,7 @@ def load_json(path: UPathStr) -> dict:
|
|
109
113
|
return data
|
110
114
|
|
111
115
|
|
112
|
-
def load_yaml(path: UPathStr) -> dict:
|
116
|
+
def load_yaml(path: UPathStr) -> dict[str, Any] | list[Any]:
|
113
117
|
"""Load `.yaml` to `dict`."""
|
114
118
|
import yaml # type: ignore
|
115
119
|
|
@@ -156,7 +160,7 @@ FILE_LOADERS = {
|
|
156
160
|
".parquet": pd.read_parquet,
|
157
161
|
".parquet.gz": pd.read_parquet, # this doesn't work for externally gzipped files, REMOVE LATER
|
158
162
|
".fcs": load_fcs,
|
159
|
-
".zarr":
|
163
|
+
".zarr": load_zarr,
|
160
164
|
".html": load_html,
|
161
165
|
".json": load_json,
|
162
166
|
".yaml": load_yaml,
|
@@ -172,10 +176,15 @@ SUPPORTED_SUFFIXES = [sfx for sfx in FILE_LOADERS.keys() if sfx != ".rds"]
|
|
172
176
|
"""Suffixes with defined artifact loaders."""
|
173
177
|
|
174
178
|
|
175
|
-
def load_to_memory(
|
179
|
+
def load_to_memory(
|
180
|
+
filepath: UPathStr, **kwargs
|
181
|
+
) -> (
|
182
|
+
pd.DataFrame | ScverseDataStructures | dict[str, Any] | list[Any] | UPathStr | None
|
183
|
+
):
|
176
184
|
"""Load a file into memory.
|
177
185
|
|
178
186
|
Returns the filepath if no in-memory form is found.
|
187
|
+
May return None in interactive sessions for images.
|
179
188
|
"""
|
180
189
|
filepath = create_path(filepath)
|
181
190
|
|
@@ -194,4 +203,5 @@ def load_to_memory(filepath: UPathStr, **kwargs):
|
|
194
203
|
)
|
195
204
|
|
196
205
|
filepath = settings._storage_settings.cloud_to_local(filepath, print_progress=True)
|
206
|
+
|
197
207
|
return loader(filepath, **kwargs)
|
@@ -24,10 +24,10 @@ if TYPE_CHECKING:
|
|
24
24
|
|
25
25
|
|
26
26
|
def _load_h5ad_zarr(objpath: UPath):
|
27
|
-
from lamindb.core.loaders import
|
27
|
+
from lamindb.core.loaders import load_h5ad, load_zarr
|
28
28
|
|
29
29
|
if objpath.is_dir():
|
30
|
-
adata =
|
30
|
+
adata = load_zarr(objpath, expected_type="anndata")
|
31
31
|
else:
|
32
32
|
# read only local in backed for now
|
33
33
|
# in principle possible to read remote in backed also
|
lamindb/core/storage/_zarr.py
CHANGED
@@ -9,25 +9,60 @@ from anndata import __version__ as anndata_version
|
|
9
9
|
from anndata._io.specs import write_elem
|
10
10
|
from fsspec.implementations.local import LocalFileSystem
|
11
11
|
from lamin_utils import logger
|
12
|
-
from lamindb_setup.core.upath import create_mapper, infer_filesystem
|
12
|
+
from lamindb_setup.core.upath import S3FSMap, create_mapper, infer_filesystem
|
13
13
|
from packaging import version
|
14
14
|
|
15
|
+
from lamindb.core._compat import with_package
|
16
|
+
|
15
17
|
from ._anndata_sizes import _size_elem, _size_raw, size_adata
|
16
18
|
|
17
19
|
if version.parse(anndata_version) < version.parse("0.11.0"):
|
18
|
-
from anndata._io import read_zarr
|
20
|
+
from anndata._io import read_zarr as read_anndata_zarr
|
19
21
|
else:
|
20
|
-
from anndata.io import read_zarr
|
22
|
+
from anndata.io import read_zarr as read_anndata_zarr
|
21
23
|
|
22
24
|
|
23
25
|
if TYPE_CHECKING:
|
24
26
|
from anndata import AnnData
|
27
|
+
from fsspec import FSMap
|
25
28
|
from lamindb_setup.core.types import UPathStr
|
26
29
|
|
30
|
+
from lamindb.core.types import ScverseDataStructures
|
31
|
+
|
32
|
+
|
33
|
+
def create_zarr_open_obj(
|
34
|
+
storepath: UPathStr, *, check: bool = True
|
35
|
+
) -> str | S3FSMap | FSMap:
|
36
|
+
"""Creates the correct object that can be used to open a zarr file depending on local or remote location."""
|
37
|
+
fs, storepath_str = infer_filesystem(storepath)
|
38
|
+
|
39
|
+
if isinstance(fs, LocalFileSystem):
|
40
|
+
open_obj = storepath_str
|
41
|
+
else:
|
42
|
+
open_obj = create_mapper(fs, storepath_str, check=check)
|
43
|
+
|
44
|
+
return open_obj
|
45
|
+
|
46
|
+
|
47
|
+
def _identify_zarr_type_from_storage(
|
48
|
+
storage: zarr.Group,
|
49
|
+
) -> Literal["anndata", "mudata", "spatialdata", "unknown"]:
|
50
|
+
"""Internal helper to identify zarr type from an open storage object."""
|
51
|
+
try:
|
52
|
+
if storage.attrs.get("encoding-type", "") == "anndata":
|
53
|
+
return "anndata"
|
54
|
+
elif storage.attrs.get("encoding-type", "") == "MuData":
|
55
|
+
return "mudata"
|
56
|
+
elif "spatialdata_attrs" in storage.attrs:
|
57
|
+
return "spatialdata"
|
58
|
+
except Exception as error:
|
59
|
+
logger.warning(f"an exception occurred {error}")
|
60
|
+
return "unknown"
|
61
|
+
|
27
62
|
|
28
63
|
def identify_zarr_type(
|
29
64
|
storepath: UPathStr, *, check: bool = True
|
30
|
-
) -> Literal["anndata", "spatialdata", "unknown"]:
|
65
|
+
) -> Literal["anndata", "mudata", "spatialdata", "unknown"]:
|
31
66
|
"""Identify whether a zarr store is AnnData, SpatialData, or unknown type."""
|
32
67
|
# we can add these cheap suffix-based-checks later
|
33
68
|
# also need to check whether the .spatialdata.zarr suffix
|
@@ -39,38 +74,61 @@ def identify_zarr_type(
|
|
39
74
|
# elif ".anndata" in suffixes:
|
40
75
|
# return "anndata"
|
41
76
|
|
42
|
-
|
43
|
-
|
44
|
-
if isinstance(fs, LocalFileSystem):
|
45
|
-
open_obj = storepath_str
|
46
|
-
else:
|
47
|
-
open_obj = create_mapper(fs, storepath_str, check=check)
|
48
|
-
|
77
|
+
open_obj = create_zarr_open_obj(storepath, check=check)
|
49
78
|
try:
|
50
79
|
storage = zarr.open(open_obj, mode="r")
|
51
|
-
|
52
|
-
return "spatialdata"
|
53
|
-
if storage.attrs.get("encoding-type", "") == "anndata":
|
54
|
-
return "anndata"
|
80
|
+
return _identify_zarr_type_from_storage(storage)
|
55
81
|
except Exception as error:
|
56
|
-
logger.warning(
|
82
|
+
logger.warning(
|
83
|
+
f"an exception occured while trying to open the zarr store\n {error}"
|
84
|
+
)
|
57
85
|
return "unknown"
|
58
86
|
|
59
87
|
|
60
|
-
def
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
88
|
+
def load_zarr(
|
89
|
+
storepath: UPathStr,
|
90
|
+
expected_type: Literal["anndata", "mudata", "spatialdata"] = None,
|
91
|
+
) -> ScverseDataStructures:
|
92
|
+
"""Loads a zarr store and returns the corresponding scverse data structure.
|
93
|
+
|
94
|
+
Args:
|
95
|
+
storepath: Path to the zarr store
|
96
|
+
expected_type: If provided, ensures the zarr store is of this type ("anndata", "mudata", "spatialdata")
|
97
|
+
and raises ValueError if it's not
|
98
|
+
"""
|
99
|
+
open_obj = create_zarr_open_obj(storepath, check=True)
|
100
|
+
|
101
|
+
# Open the storage once
|
102
|
+
try:
|
103
|
+
storage = zarr.open(open_obj, mode="r")
|
104
|
+
except Exception as error:
|
105
|
+
raise ValueError(f"Could not open zarr store: {error}") from None
|
106
|
+
|
107
|
+
actual_type = _identify_zarr_type_from_storage(storage)
|
108
|
+
if expected_type is not None and actual_type != expected_type:
|
109
|
+
raise ValueError(
|
110
|
+
f"Expected zarr store of type '{expected_type}', but found '{actual_type}'"
|
111
|
+
)
|
112
|
+
|
113
|
+
match actual_type:
|
114
|
+
case "anndata":
|
115
|
+
scverse_obj = read_anndata_zarr(open_obj)
|
116
|
+
case "mudata":
|
117
|
+
scverse_obj = with_package("mudata", lambda mod: mod.read_zarr(open_obj))
|
118
|
+
case "spatialdata":
|
119
|
+
scverse_obj = with_package(
|
120
|
+
"spatialdata", lambda mod: mod.read_zarr(open_obj)
|
121
|
+
)
|
122
|
+
case "unknown" | _:
|
123
|
+
raise ValueError(
|
124
|
+
"Unable to determine zarr store format and therefore cannot load Artifact."
|
125
|
+
)
|
126
|
+
return scverse_obj
|
69
127
|
|
70
128
|
|
71
129
|
def write_adata_zarr(
|
72
130
|
adata: AnnData, storepath: UPathStr, callback=None, chunks=None, **dataset_kwargs
|
73
|
-
):
|
131
|
+
) -> None:
|
74
132
|
fs, storepath_str = infer_filesystem(storepath)
|
75
133
|
store = create_mapper(fs, storepath_str, create=True)
|
76
134
|
|