lamindb 1.2a2__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +3 -1
- lamindb/core/_compat.py +60 -0
- lamindb/core/_context.py +15 -12
- lamindb/core/datasets/__init__.py +1 -0
- lamindb/core/datasets/_core.py +23 -0
- lamindb/core/datasets/_small.py +16 -2
- lamindb/core/loaders.py +22 -12
- lamindb/core/storage/_tiledbsoma.py +2 -2
- lamindb/core/storage/_zarr.py +84 -26
- lamindb/core/storage/objects.py +45 -44
- lamindb/core/types.py +10 -0
- lamindb/curators/__init__.py +1272 -1517
- lamindb/curators/_cellxgene_schemas/__init__.py +190 -18
- lamindb/curators/_cellxgene_schemas/schema_versions.csv +43 -0
- lamindb/models/_feature_manager.py +65 -14
- lamindb/models/_from_values.py +113 -78
- lamindb/models/artifact.py +142 -98
- lamindb/models/can_curate.py +185 -216
- lamindb/models/feature.py +32 -2
- lamindb/models/project.py +69 -7
- lamindb/models/query_set.py +12 -2
- lamindb/models/record.py +48 -25
- lamindb/models/run.py +18 -1
- lamindb/models/schema.py +0 -8
- {lamindb-1.2a2.dist-info → lamindb-1.3.0.dist-info}/METADATA +7 -6
- {lamindb-1.2a2.dist-info → lamindb-1.3.0.dist-info}/RECORD +28 -27
- lamindb/curators/_cellxgene_schemas/schema_versions.yml +0 -104
- {lamindb-1.2a2.dist-info → lamindb-1.3.0.dist-info}/LICENSE +0 -0
- {lamindb-1.2a2.dist-info → lamindb-1.3.0.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
@@ -32,6 +32,7 @@ Registries.
|
|
32
32
|
Curators & integrations.
|
33
33
|
|
34
34
|
.. autosummary::
|
35
|
+
:toctree: .
|
35
36
|
|
36
37
|
curators
|
37
38
|
integrations
|
@@ -71,7 +72,7 @@ Backward compatibility.
|
|
71
72
|
|
72
73
|
# ruff: noqa: I001
|
73
74
|
# denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
|
74
|
-
__version__ = "1.
|
75
|
+
__version__ = "1.3.0"
|
75
76
|
|
76
77
|
import warnings
|
77
78
|
|
@@ -120,6 +121,7 @@ if _check_instance_setup(from_module="lamindb"):
|
|
120
121
|
from .models.save import save
|
121
122
|
from . import core
|
122
123
|
from . import integrations
|
124
|
+
from . import curators
|
123
125
|
|
124
126
|
track = context.track # simple access
|
125
127
|
finish = context.finish # simple access
|
lamindb/core/_compat.py
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
import importlib.util
|
2
|
+
from typing import Any, Callable, TypeVar
|
3
|
+
|
4
|
+
T = TypeVar("T")
|
5
|
+
|
6
|
+
|
7
|
+
def is_package_installed(package_name: str) -> bool:
|
8
|
+
spec = importlib.util.find_spec(package_name)
|
9
|
+
return spec is not None
|
10
|
+
|
11
|
+
|
12
|
+
def with_package(package_name: str, operation: Callable[[Any], T]) -> T:
|
13
|
+
"""Execute an operation that requires a specific package.
|
14
|
+
|
15
|
+
Args:
|
16
|
+
package_name: Package name (e.g., "mudata")
|
17
|
+
operation: Function that takes the imported module and returns a result
|
18
|
+
|
19
|
+
Examples:
|
20
|
+
# For direct package functions
|
21
|
+
result = with_package("mudata", lambda mod: mod.read_zarr(path))
|
22
|
+
"""
|
23
|
+
try:
|
24
|
+
module = importlib.import_module(package_name)
|
25
|
+
return operation(module)
|
26
|
+
except ImportError:
|
27
|
+
raise ImportError(
|
28
|
+
f"Package '{package_name}' is required but not installed. "
|
29
|
+
f"Please install with: pip install {package_name}"
|
30
|
+
) from None
|
31
|
+
|
32
|
+
|
33
|
+
def with_package_obj(
|
34
|
+
obj: Any, class_name: str, package_name: str, operation: Callable[[Any], T]
|
35
|
+
) -> tuple[bool, T | None]:
|
36
|
+
"""Handle operations on objects that require specific packages.
|
37
|
+
|
38
|
+
Args:
|
39
|
+
obj: The object to operate on
|
40
|
+
class_name: Expected class name (e.g., "MuData")
|
41
|
+
package_name: Package that provides the class (e.g., "mudata")
|
42
|
+
operation: Function to call with the object if package is available.
|
43
|
+
|
44
|
+
Examples:
|
45
|
+
# For instance methods
|
46
|
+
handled, res = apply_class_func(dmem, "MuData", "mudata",
|
47
|
+
lambda obj: obj.write(filepath))
|
48
|
+
"""
|
49
|
+
if obj.__class__.__name__ == class_name:
|
50
|
+
try:
|
51
|
+
importlib.import_module(package_name)
|
52
|
+
result = operation(obj)
|
53
|
+
return True, result
|
54
|
+
except ImportError:
|
55
|
+
raise ImportError(
|
56
|
+
f"Object appears to be {class_name} but '{package_name}' package is not installed. "
|
57
|
+
f"Please install with: pip install {package_name}"
|
58
|
+
) from None
|
59
|
+
|
60
|
+
return False, None
|
lamindb/core/_context.py
CHANGED
@@ -301,6 +301,12 @@ class Context:
|
|
301
301
|
"""
|
302
302
|
from lamindb.models import Project
|
303
303
|
|
304
|
+
instance_settings = ln_setup.settings.instance
|
305
|
+
# similar logic here: https://github.com/laminlabs/lamindb/pull/2527
|
306
|
+
# TODO: refactor upon new access management
|
307
|
+
if instance_settings.dialect == "postgresql" and "read" in instance_settings.db:
|
308
|
+
logger.warning("skipping track(), connected in read-only mode")
|
309
|
+
return None
|
304
310
|
if project is not None:
|
305
311
|
project_record = Project.filter(
|
306
312
|
Q(name=project) | Q(uid=project)
|
@@ -461,26 +467,23 @@ class Context:
|
|
461
467
|
path_str = get_notebook_key_colab()
|
462
468
|
path = Path(path_str)
|
463
469
|
else:
|
464
|
-
import
|
470
|
+
from nbproject.dev import read_notebook
|
471
|
+
from nbproject.dev._meta_live import get_title
|
472
|
+
from nbproject.dev._pypackage import infer_pypackages
|
465
473
|
|
466
474
|
try:
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
description = nbproject_title
|
473
|
-
# log imported python packages
|
474
|
-
try:
|
475
|
-
from nbproject.dev._pypackage import infer_pypackages
|
475
|
+
nb = read_notebook(path_str)
|
476
|
+
|
477
|
+
nbproject_title = get_title(nb)
|
478
|
+
if nbproject_title is not None:
|
479
|
+
description = nbproject_title
|
476
480
|
|
477
|
-
nb = nbproject.dev.read_notebook(path_str)
|
478
481
|
self._logging_message_imports += (
|
479
482
|
"notebook imports:"
|
480
483
|
f" {pretty_pypackages(infer_pypackages(nb, pin_versions=True))}"
|
481
484
|
)
|
482
485
|
except Exception:
|
483
|
-
logger.debug("
|
486
|
+
logger.debug("reading the notebook file failed")
|
484
487
|
pass
|
485
488
|
return path, description
|
486
489
|
|
lamindb/core/datasets/_core.py
CHANGED
@@ -13,6 +13,7 @@ from lamindb.core._settings import settings
|
|
13
13
|
|
14
14
|
if TYPE_CHECKING:
|
15
15
|
from mudata import MuData
|
16
|
+
from spatialdata import SpatialData
|
16
17
|
|
17
18
|
|
18
19
|
def file_fcs() -> Path:
|
@@ -552,3 +553,25 @@ def schmidt22_perturbseq(basedir=".") -> Path: # pragma: no cover
|
|
552
553
|
"schmidt22_perturbseq.h5ad",
|
553
554
|
)
|
554
555
|
return Path(filepath).rename(Path(basedir) / filepath)
|
556
|
+
|
557
|
+
|
558
|
+
def spatialdata_blobs() -> SpatialData:
|
559
|
+
"""Example SpatialData dataset for tutorials."""
|
560
|
+
from spatialdata.datasets import blobs
|
561
|
+
|
562
|
+
sdata = blobs()
|
563
|
+
sdata.attrs["sample"] = {
|
564
|
+
"assay": "Visium Spatial Gene Expression",
|
565
|
+
"disease": "Alzheimer disease",
|
566
|
+
"developmental_stage": "adult stage",
|
567
|
+
}
|
568
|
+
sdata.tables["table"].var.index = [
|
569
|
+
"ENSG00000139618", # BRCA2
|
570
|
+
"ENSG00000157764", # BRAF
|
571
|
+
"ENSG00000999999", # Does not exist
|
572
|
+
]
|
573
|
+
sdata.tables["table"].obs["sample_region"] = pd.Categorical(
|
574
|
+
["sample region 1"] * 13 + ["sample region 2"] * 13
|
575
|
+
)
|
576
|
+
|
577
|
+
return sdata
|
lamindb/core/datasets/_small.py
CHANGED
@@ -8,9 +8,11 @@ import pandas as pd
|
|
8
8
|
|
9
9
|
|
10
10
|
def small_dataset1(
|
11
|
-
otype: Literal["DataFrame", "AnnData"],
|
11
|
+
otype: Literal["DataFrame", "AnnData"] = "DataFrame",
|
12
12
|
gene_symbols_in_index: bool = False,
|
13
13
|
with_typo: bool = False,
|
14
|
+
with_cell_type_synonym: bool = False,
|
15
|
+
with_cell_type_typo: bool = False,
|
14
16
|
) -> pd.DataFrame | ad.AnnData:
|
15
17
|
# define the data in the dataset
|
16
18
|
# it's a mix of numerical measurements and observation-level metadata
|
@@ -19,14 +21,25 @@ def small_dataset1(
|
|
19
21
|
var_ids = ["CD8A", "CD4", "CD14"]
|
20
22
|
else:
|
21
23
|
var_ids = ["ENSG00000153563", "ENSG00000010610", "ENSG00000170458"]
|
24
|
+
abt_cell = (
|
25
|
+
"CD8-pos alpha-beta T cell"
|
26
|
+
if with_cell_type_typo
|
27
|
+
else "CD8-positive, alpha-beta T cell"
|
28
|
+
)
|
22
29
|
dataset_dict = {
|
23
30
|
var_ids[0]: [1, 2, 3],
|
24
31
|
var_ids[1]: [3, 4, 5],
|
25
32
|
var_ids[2]: [5, 6, 7],
|
26
33
|
"perturbation": pd.Categorical(["DMSO", ifng, "DMSO"]),
|
27
34
|
"sample_note": ["was ok", "looks naah", "pretty! 🤩"],
|
28
|
-
"cell_type_by_expert": pd.Categorical(
|
35
|
+
"cell_type_by_expert": pd.Categorical(
|
36
|
+
["B-cell" if with_cell_type_synonym else "B cell", abt_cell, abt_cell]
|
37
|
+
),
|
29
38
|
"cell_type_by_model": pd.Categorical(["B cell", "T cell", "T cell"]),
|
39
|
+
"assay_oid": pd.Categorical(["EFO:0008913", "EFO:0008913", "EFO:0008913"]),
|
40
|
+
"concentration": ["0.1%", "200 nM", "0.1%"],
|
41
|
+
"treatment_time_h": [24, 24, 6],
|
42
|
+
"donor": ["D0001", "D0002", None],
|
30
43
|
}
|
31
44
|
# define the dataset-level metadata
|
32
45
|
metadata = {
|
@@ -100,6 +113,7 @@ def small_dataset3_cellxgene(
|
|
100
113
|
"disease_ontology_term_id": ["MONDO:0004975", "MONDO:0004980", "MONDO:0004980"],
|
101
114
|
"organism": ["human", "human", "human"],
|
102
115
|
"sex": ["female", "male", "unknown"],
|
116
|
+
"sex_ontology_term_id": ["PATO:0000383", "PATO:0000384", "unknown"],
|
103
117
|
"tissue": ["lungg", "lungg", "heart"],
|
104
118
|
"donor": ["-1", "1", "2"],
|
105
119
|
}
|
lamindb/core/loaders.py
CHANGED
@@ -20,10 +20,10 @@ from __future__ import annotations
|
|
20
20
|
import builtins
|
21
21
|
import re
|
22
22
|
from pathlib import Path
|
23
|
-
from typing import TYPE_CHECKING
|
23
|
+
from typing import TYPE_CHECKING, Any
|
24
24
|
|
25
|
-
import anndata as ad
|
26
25
|
import pandas as pd
|
26
|
+
from anndata import read_h5ad
|
27
27
|
from lamin_utils import logger
|
28
28
|
from lamindb_setup.core.upath import (
|
29
29
|
create_path,
|
@@ -33,13 +33,17 @@ from lamindb_setup.core.upath import (
|
|
33
33
|
from ..core._settings import settings
|
34
34
|
|
35
35
|
if TYPE_CHECKING:
|
36
|
+
from anndata import AnnData
|
36
37
|
from lamindb_setup.core.types import UPathStr
|
38
|
+
from mudata import MuData
|
39
|
+
|
40
|
+
from lamindb.core.types import ScverseDataStructures
|
37
41
|
|
38
42
|
try:
|
39
|
-
from ..core.storage._zarr import
|
43
|
+
from ..core.storage._zarr import load_zarr
|
40
44
|
except ImportError:
|
41
45
|
|
42
|
-
def
|
46
|
+
def load_zarr(storepath): # type: ignore
|
43
47
|
raise ImportError("Please install zarr: pip install zarr<=2.18.4")
|
44
48
|
|
45
49
|
|
@@ -47,7 +51,7 @@ is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
|
|
47
51
|
|
48
52
|
|
49
53
|
# tested in lamin-usecases
|
50
|
-
def load_fcs(*args, **kwargs) ->
|
54
|
+
def load_fcs(*args, **kwargs) -> AnnData:
|
51
55
|
"""Load an `.fcs` file to `AnnData`."""
|
52
56
|
try:
|
53
57
|
import readfcs
|
@@ -62,16 +66,16 @@ def load_tsv(path: UPathStr, **kwargs) -> pd.DataFrame:
|
|
62
66
|
return pd.read_csv(path_sanitized, sep="\t", **kwargs)
|
63
67
|
|
64
68
|
|
65
|
-
def load_h5ad(filepath, **kwargs) ->
|
69
|
+
def load_h5ad(filepath, **kwargs) -> AnnData:
|
66
70
|
"""Load an `.h5ad` file to `AnnData`."""
|
67
71
|
fs, filepath = infer_filesystem(filepath)
|
68
72
|
compression = kwargs.pop("compression", "infer")
|
69
73
|
with fs.open(filepath, mode="rb", compression=compression) as file:
|
70
|
-
adata =
|
74
|
+
adata = read_h5ad(file, backed=False, **kwargs)
|
71
75
|
return adata
|
72
76
|
|
73
77
|
|
74
|
-
def load_h5mu(filepath: UPathStr, **kwargs):
|
78
|
+
def load_h5mu(filepath: UPathStr, **kwargs) -> MuData:
|
75
79
|
"""Load an `.h5mu` file to `MuData`."""
|
76
80
|
import mudata as md
|
77
81
|
|
@@ -100,7 +104,7 @@ def load_html(path: UPathStr) -> None | UPathStr:
|
|
100
104
|
return path
|
101
105
|
|
102
106
|
|
103
|
-
def load_json(path: UPathStr) -> dict:
|
107
|
+
def load_json(path: UPathStr) -> dict[str, Any] | list[Any]:
|
104
108
|
"""Load `.json` to `dict`."""
|
105
109
|
import json
|
106
110
|
|
@@ -109,7 +113,7 @@ def load_json(path: UPathStr) -> dict:
|
|
109
113
|
return data
|
110
114
|
|
111
115
|
|
112
|
-
def load_yaml(path: UPathStr) -> dict:
|
116
|
+
def load_yaml(path: UPathStr) -> dict[str, Any] | list[Any]:
|
113
117
|
"""Load `.yaml` to `dict`."""
|
114
118
|
import yaml # type: ignore
|
115
119
|
|
@@ -156,7 +160,7 @@ FILE_LOADERS = {
|
|
156
160
|
".parquet": pd.read_parquet,
|
157
161
|
".parquet.gz": pd.read_parquet, # this doesn't work for externally gzipped files, REMOVE LATER
|
158
162
|
".fcs": load_fcs,
|
159
|
-
".zarr":
|
163
|
+
".zarr": load_zarr,
|
160
164
|
".html": load_html,
|
161
165
|
".json": load_json,
|
162
166
|
".yaml": load_yaml,
|
@@ -172,10 +176,15 @@ SUPPORTED_SUFFIXES = [sfx for sfx in FILE_LOADERS.keys() if sfx != ".rds"]
|
|
172
176
|
"""Suffixes with defined artifact loaders."""
|
173
177
|
|
174
178
|
|
175
|
-
def load_to_memory(
|
179
|
+
def load_to_memory(
|
180
|
+
filepath: UPathStr, **kwargs
|
181
|
+
) -> (
|
182
|
+
pd.DataFrame | ScverseDataStructures | dict[str, Any] | list[Any] | UPathStr | None
|
183
|
+
):
|
176
184
|
"""Load a file into memory.
|
177
185
|
|
178
186
|
Returns the filepath if no in-memory form is found.
|
187
|
+
May return None in interactive sessions for images.
|
179
188
|
"""
|
180
189
|
filepath = create_path(filepath)
|
181
190
|
|
@@ -194,4 +203,5 @@ def load_to_memory(filepath: UPathStr, **kwargs):
|
|
194
203
|
)
|
195
204
|
|
196
205
|
filepath = settings._storage_settings.cloud_to_local(filepath, print_progress=True)
|
206
|
+
|
197
207
|
return loader(filepath, **kwargs)
|
@@ -24,10 +24,10 @@ if TYPE_CHECKING:
|
|
24
24
|
|
25
25
|
|
26
26
|
def _load_h5ad_zarr(objpath: UPath):
|
27
|
-
from lamindb.core.loaders import
|
27
|
+
from lamindb.core.loaders import load_h5ad, load_zarr
|
28
28
|
|
29
29
|
if objpath.is_dir():
|
30
|
-
adata =
|
30
|
+
adata = load_zarr(objpath, expected_type="anndata")
|
31
31
|
else:
|
32
32
|
# read only local in backed for now
|
33
33
|
# in principle possible to read remote in backed also
|
lamindb/core/storage/_zarr.py
CHANGED
@@ -9,25 +9,60 @@ from anndata import __version__ as anndata_version
|
|
9
9
|
from anndata._io.specs import write_elem
|
10
10
|
from fsspec.implementations.local import LocalFileSystem
|
11
11
|
from lamin_utils import logger
|
12
|
-
from lamindb_setup.core.upath import create_mapper, infer_filesystem
|
12
|
+
from lamindb_setup.core.upath import S3FSMap, create_mapper, infer_filesystem
|
13
13
|
from packaging import version
|
14
14
|
|
15
|
+
from lamindb.core._compat import with_package
|
16
|
+
|
15
17
|
from ._anndata_sizes import _size_elem, _size_raw, size_adata
|
16
18
|
|
17
19
|
if version.parse(anndata_version) < version.parse("0.11.0"):
|
18
|
-
from anndata._io import read_zarr
|
20
|
+
from anndata._io import read_zarr as read_anndata_zarr
|
19
21
|
else:
|
20
|
-
from anndata.io import read_zarr
|
22
|
+
from anndata.io import read_zarr as read_anndata_zarr
|
21
23
|
|
22
24
|
|
23
25
|
if TYPE_CHECKING:
|
24
26
|
from anndata import AnnData
|
27
|
+
from fsspec import FSMap
|
25
28
|
from lamindb_setup.core.types import UPathStr
|
26
29
|
|
30
|
+
from lamindb.core.types import ScverseDataStructures
|
31
|
+
|
32
|
+
|
33
|
+
def create_zarr_open_obj(
|
34
|
+
storepath: UPathStr, *, check: bool = True
|
35
|
+
) -> str | S3FSMap | FSMap:
|
36
|
+
"""Creates the correct object that can be used to open a zarr file depending on local or remote location."""
|
37
|
+
fs, storepath_str = infer_filesystem(storepath)
|
38
|
+
|
39
|
+
if isinstance(fs, LocalFileSystem):
|
40
|
+
open_obj = storepath_str
|
41
|
+
else:
|
42
|
+
open_obj = create_mapper(fs, storepath_str, check=check)
|
43
|
+
|
44
|
+
return open_obj
|
45
|
+
|
46
|
+
|
47
|
+
def _identify_zarr_type_from_storage(
|
48
|
+
storage: zarr.Group,
|
49
|
+
) -> Literal["anndata", "mudata", "spatialdata", "unknown"]:
|
50
|
+
"""Internal helper to identify zarr type from an open storage object."""
|
51
|
+
try:
|
52
|
+
if storage.attrs.get("encoding-type", "") == "anndata":
|
53
|
+
return "anndata"
|
54
|
+
elif storage.attrs.get("encoding-type", "") == "MuData":
|
55
|
+
return "mudata"
|
56
|
+
elif "spatialdata_attrs" in storage.attrs:
|
57
|
+
return "spatialdata"
|
58
|
+
except Exception as error:
|
59
|
+
logger.warning(f"an exception occurred {error}")
|
60
|
+
return "unknown"
|
61
|
+
|
27
62
|
|
28
63
|
def identify_zarr_type(
|
29
64
|
storepath: UPathStr, *, check: bool = True
|
30
|
-
) -> Literal["anndata", "spatialdata", "unknown"]:
|
65
|
+
) -> Literal["anndata", "mudata", "spatialdata", "unknown"]:
|
31
66
|
"""Identify whether a zarr store is AnnData, SpatialData, or unknown type."""
|
32
67
|
# we can add these cheap suffix-based-checks later
|
33
68
|
# also need to check whether the .spatialdata.zarr suffix
|
@@ -39,38 +74,61 @@ def identify_zarr_type(
|
|
39
74
|
# elif ".anndata" in suffixes:
|
40
75
|
# return "anndata"
|
41
76
|
|
42
|
-
|
43
|
-
|
44
|
-
if isinstance(fs, LocalFileSystem):
|
45
|
-
open_obj = storepath_str
|
46
|
-
else:
|
47
|
-
open_obj = create_mapper(fs, storepath_str, check=check)
|
48
|
-
|
77
|
+
open_obj = create_zarr_open_obj(storepath, check=check)
|
49
78
|
try:
|
50
79
|
storage = zarr.open(open_obj, mode="r")
|
51
|
-
|
52
|
-
return "spatialdata"
|
53
|
-
if storage.attrs.get("encoding-type", "") == "anndata":
|
54
|
-
return "anndata"
|
80
|
+
return _identify_zarr_type_from_storage(storage)
|
55
81
|
except Exception as error:
|
56
|
-
logger.warning(
|
82
|
+
logger.warning(
|
83
|
+
f"an exception occured while trying to open the zarr store\n {error}"
|
84
|
+
)
|
57
85
|
return "unknown"
|
58
86
|
|
59
87
|
|
60
|
-
def
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
88
|
+
def load_zarr(
|
89
|
+
storepath: UPathStr,
|
90
|
+
expected_type: Literal["anndata", "mudata", "spatialdata"] = None,
|
91
|
+
) -> ScverseDataStructures:
|
92
|
+
"""Loads a zarr store and returns the corresponding scverse data structure.
|
93
|
+
|
94
|
+
Args:
|
95
|
+
storepath: Path to the zarr store
|
96
|
+
expected_type: If provided, ensures the zarr store is of this type ("anndata", "mudata", "spatialdata")
|
97
|
+
and raises ValueError if it's not
|
98
|
+
"""
|
99
|
+
open_obj = create_zarr_open_obj(storepath, check=True)
|
100
|
+
|
101
|
+
# Open the storage once
|
102
|
+
try:
|
103
|
+
storage = zarr.open(open_obj, mode="r")
|
104
|
+
except Exception as error:
|
105
|
+
raise ValueError(f"Could not open zarr store: {error}") from None
|
106
|
+
|
107
|
+
actual_type = _identify_zarr_type_from_storage(storage)
|
108
|
+
if expected_type is not None and actual_type != expected_type:
|
109
|
+
raise ValueError(
|
110
|
+
f"Expected zarr store of type '{expected_type}', but found '{actual_type}'"
|
111
|
+
)
|
112
|
+
|
113
|
+
match actual_type:
|
114
|
+
case "anndata":
|
115
|
+
scverse_obj = read_anndata_zarr(open_obj)
|
116
|
+
case "mudata":
|
117
|
+
scverse_obj = with_package("mudata", lambda mod: mod.read_zarr(open_obj))
|
118
|
+
case "spatialdata":
|
119
|
+
scverse_obj = with_package(
|
120
|
+
"spatialdata", lambda mod: mod.read_zarr(open_obj)
|
121
|
+
)
|
122
|
+
case "unknown" | _:
|
123
|
+
raise ValueError(
|
124
|
+
"Unable to determine zarr store format and therefore cannot load Artifact."
|
125
|
+
)
|
126
|
+
return scverse_obj
|
69
127
|
|
70
128
|
|
71
129
|
def write_adata_zarr(
|
72
130
|
adata: AnnData, storepath: UPathStr, callback=None, chunks=None, **dataset_kwargs
|
73
|
-
):
|
131
|
+
) -> None:
|
74
132
|
fs, storepath_str = infer_filesystem(storepath)
|
75
133
|
store = create_mapper(fs, storepath_str, create=True)
|
76
134
|
|
lamindb/core/storage/objects.py
CHANGED
@@ -1,25 +1,20 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
from pathlib import PurePosixPath
|
4
|
-
from typing import TYPE_CHECKING, TypeAlias
|
4
|
+
from typing import TYPE_CHECKING, TypeAlias
|
5
5
|
|
6
6
|
from anndata import AnnData
|
7
7
|
from pandas import DataFrame
|
8
8
|
|
9
|
+
from lamindb.core._compat import (
|
10
|
+
with_package_obj,
|
11
|
+
)
|
12
|
+
from lamindb.core.types import ScverseDataStructures
|
13
|
+
|
9
14
|
if TYPE_CHECKING:
|
10
15
|
from lamindb_setup.core.types import UPathStr
|
11
16
|
|
12
|
-
|
13
|
-
MuData = TypeVar("MuData")
|
14
|
-
|
15
|
-
SupportedDataTypes: TypeAlias = AnnData | DataFrame | MuData | SpatialData
|
16
|
-
|
17
|
-
|
18
|
-
def is_package_installed(package_name):
|
19
|
-
import importlib.util
|
20
|
-
|
21
|
-
spec = importlib.util.find_spec(package_name)
|
22
|
-
return spec is not None
|
17
|
+
SupportedDataTypes: TypeAlias = DataFrame | ScverseDataStructures
|
23
18
|
|
24
19
|
|
25
20
|
def infer_suffix(dmem: SupportedDataTypes, format: str | None = None):
|
@@ -38,25 +33,34 @@ def infer_suffix(dmem: SupportedDataTypes, format: str | None = None):
|
|
38
33
|
if isinstance(dmem, DataFrame):
|
39
34
|
return ".parquet"
|
40
35
|
|
41
|
-
if
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
36
|
+
if with_package_obj(
|
37
|
+
dmem,
|
38
|
+
"MuData",
|
39
|
+
"mudata",
|
40
|
+
lambda obj: True, # Just checking type, not calling any method
|
41
|
+
)[0]:
|
42
|
+
return ".h5mu"
|
43
|
+
|
44
|
+
has_spatialdata, spatialdata_suffix = with_package_obj(
|
45
|
+
dmem,
|
46
|
+
"SpatialData",
|
47
|
+
"spatialdata",
|
48
|
+
lambda obj: (
|
49
|
+
format
|
50
|
+
if format is not None and format in {"spatialdata.zarr", "zarr"}
|
51
|
+
else ".zarr"
|
52
|
+
if format is None
|
53
|
+
else (_ for _ in ()).throw(
|
54
|
+
ValueError(
|
55
|
+
"Error when specifying SpatialData storage format, it should be"
|
56
|
+
f" 'zarr', 'spatialdata.zarr', not '{format}'. Check 'format'"
|
57
|
+
" or the suffix of 'key'."
|
58
|
+
)
|
59
|
+
)
|
60
|
+
),
|
61
|
+
)
|
62
|
+
if has_spatialdata:
|
63
|
+
return spatialdata_suffix
|
60
64
|
else:
|
61
65
|
raise NotImplementedError
|
62
66
|
|
@@ -78,18 +82,15 @@ def write_to_disk(dmem: SupportedDataTypes, filepath: UPathStr) -> None:
|
|
78
82
|
dmem.to_parquet(filepath)
|
79
83
|
return
|
80
84
|
|
81
|
-
if
|
82
|
-
|
83
|
-
|
84
|
-
if isinstance(dmem, MuData):
|
85
|
-
dmem.write(filepath)
|
86
|
-
return
|
85
|
+
if with_package_obj(dmem, "MuData", "mudata", lambda obj: obj.write(filepath))[0]:
|
86
|
+
return
|
87
87
|
|
88
|
-
if
|
89
|
-
|
88
|
+
if with_package_obj(
|
89
|
+
dmem,
|
90
|
+
"SpatialData",
|
91
|
+
"spatialdata",
|
92
|
+
lambda obj: obj.write(filepath, overwrite=True),
|
93
|
+
)[0]:
|
94
|
+
return
|
90
95
|
|
91
|
-
|
92
|
-
dmem.write(filepath, overwrite=True)
|
93
|
-
return
|
94
|
-
else:
|
95
|
-
raise NotImplementedError
|
96
|
+
raise NotImplementedError
|
lamindb/core/types.py
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import TYPE_CHECKING, TypeVar
|
4
|
+
|
5
|
+
from anndata import AnnData
|
1
6
|
from lamindb_setup.core.types import UPathStr
|
2
7
|
|
3
8
|
from lamindb.base.types import (
|
@@ -7,3 +12,8 @@ from lamindb.base.types import (
|
|
7
12
|
StrField,
|
8
13
|
TransformType,
|
9
14
|
)
|
15
|
+
|
16
|
+
MuData = TypeVar("MuData")
|
17
|
+
SpatialData = TypeVar("SpatialData")
|
18
|
+
|
19
|
+
ScverseDataStructures = AnnData | MuData | SpatialData
|