lamindb 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. lamindb/__init__.py +33 -26
  2. lamindb/_finish.py +9 -1
  3. lamindb/_tracked.py +26 -3
  4. lamindb/_view.py +2 -3
  5. lamindb/base/__init__.py +1 -1
  6. lamindb/base/ids.py +1 -10
  7. lamindb/base/users.py +1 -4
  8. lamindb/core/__init__.py +7 -65
  9. lamindb/core/_compat.py +60 -0
  10. lamindb/core/_context.py +50 -22
  11. lamindb/core/_mapped_collection.py +4 -2
  12. lamindb/core/_settings.py +6 -6
  13. lamindb/core/_sync_git.py +1 -1
  14. lamindb/core/_track_environment.py +2 -1
  15. lamindb/core/datasets/_small.py +3 -3
  16. lamindb/core/loaders.py +43 -20
  17. lamindb/core/storage/_anndata_accessor.py +8 -3
  18. lamindb/core/storage/_backed_access.py +14 -7
  19. lamindb/core/storage/_pyarrow_dataset.py +24 -9
  20. lamindb/core/storage/_tiledbsoma.py +8 -6
  21. lamindb/core/storage/_zarr.py +104 -25
  22. lamindb/core/storage/objects.py +63 -28
  23. lamindb/core/storage/paths.py +16 -13
  24. lamindb/core/types.py +10 -0
  25. lamindb/curators/__init__.py +176 -149
  26. lamindb/errors.py +1 -1
  27. lamindb/integrations/_vitessce.py +4 -4
  28. lamindb/migrations/0089_subsequent_runs.py +159 -0
  29. lamindb/migrations/0090_runproject_project_runs.py +73 -0
  30. lamindb/migrations/{0088_squashed.py → 0090_squashed.py} +245 -177
  31. lamindb/models/__init__.py +79 -0
  32. lamindb/{core → models}/_describe.py +3 -3
  33. lamindb/{core → models}/_django.py +8 -5
  34. lamindb/{core → models}/_feature_manager.py +103 -87
  35. lamindb/{_from_values.py → models/_from_values.py} +5 -2
  36. lamindb/{core/versioning.py → models/_is_versioned.py} +94 -6
  37. lamindb/{core → models}/_label_manager.py +10 -17
  38. lamindb/{core/relations.py → models/_relations.py} +8 -1
  39. lamindb/models/artifact.py +2602 -0
  40. lamindb/{_can_curate.py → models/can_curate.py} +349 -180
  41. lamindb/models/collection.py +683 -0
  42. lamindb/models/core.py +135 -0
  43. lamindb/models/feature.py +643 -0
  44. lamindb/models/flextable.py +163 -0
  45. lamindb/{_parents.py → models/has_parents.py} +55 -49
  46. lamindb/models/project.py +384 -0
  47. lamindb/{_query_manager.py → models/query_manager.py} +10 -8
  48. lamindb/{_query_set.py → models/query_set.py} +64 -32
  49. lamindb/models/record.py +1762 -0
  50. lamindb/models/run.py +563 -0
  51. lamindb/{_save.py → models/save.py} +18 -8
  52. lamindb/models/schema.py +732 -0
  53. lamindb/models/transform.py +360 -0
  54. lamindb/models/ulabel.py +249 -0
  55. {lamindb-1.1.0.dist-info → lamindb-1.2.0.dist-info}/METADATA +6 -6
  56. lamindb-1.2.0.dist-info/RECORD +95 -0
  57. lamindb/_artifact.py +0 -1361
  58. lamindb/_collection.py +0 -440
  59. lamindb/_feature.py +0 -316
  60. lamindb/_is_versioned.py +0 -40
  61. lamindb/_record.py +0 -1065
  62. lamindb/_run.py +0 -60
  63. lamindb/_schema.py +0 -347
  64. lamindb/_storage.py +0 -15
  65. lamindb/_transform.py +0 -170
  66. lamindb/_ulabel.py +0 -56
  67. lamindb/_utils.py +0 -9
  68. lamindb/base/validation.py +0 -63
  69. lamindb/core/_data.py +0 -491
  70. lamindb/core/fields.py +0 -12
  71. lamindb/models.py +0 -4435
  72. lamindb-1.1.0.dist-info/RECORD +0 -95
  73. {lamindb-1.1.0.dist-info → lamindb-1.2.0.dist-info}/LICENSE +0 -0
  74. {lamindb-1.1.0.dist-info → lamindb-1.2.0.dist-info}/WHEEL +0 -0
lamindb/core/loaders.py CHANGED
@@ -20,26 +20,30 @@ from __future__ import annotations
20
20
  import builtins
21
21
  import re
22
22
  from pathlib import Path
23
- from typing import TYPE_CHECKING
23
+ from typing import TYPE_CHECKING, Any
24
24
 
25
- import anndata as ad
26
25
  import pandas as pd
26
+ from anndata import read_h5ad
27
27
  from lamin_utils import logger
28
28
  from lamindb_setup.core.upath import (
29
29
  create_path,
30
30
  infer_filesystem,
31
31
  )
32
32
 
33
- from ._settings import settings
33
+ from ..core._settings import settings
34
34
 
35
35
  if TYPE_CHECKING:
36
+ from anndata import AnnData
36
37
  from lamindb_setup.core.types import UPathStr
38
+ from mudata import MuData
39
+
40
+ from lamindb.core.types import ScverseDataStructures
37
41
 
38
42
  try:
39
- from .storage._zarr import load_anndata_zarr
43
+ from ..core.storage._zarr import load_zarr
40
44
  except ImportError:
41
45
 
42
- def load_anndata_zarr(storepath): # type: ignore
46
+ def load_zarr(storepath): # type: ignore
43
47
  raise ImportError("Please install zarr: pip install zarr<=2.18.4")
44
48
 
45
49
 
@@ -47,7 +51,7 @@ is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
47
51
 
48
52
 
49
53
  # tested in lamin-usecases
50
- def load_fcs(*args, **kwargs) -> ad.AnnData:
54
+ def load_fcs(*args, **kwargs) -> AnnData:
51
55
  """Load an `.fcs` file to `AnnData`."""
52
56
  try:
53
57
  import readfcs
@@ -62,16 +66,16 @@ def load_tsv(path: UPathStr, **kwargs) -> pd.DataFrame:
62
66
  return pd.read_csv(path_sanitized, sep="\t", **kwargs)
63
67
 
64
68
 
65
- def load_h5ad(filepath, **kwargs) -> ad.AnnData:
69
+ def load_h5ad(filepath, **kwargs) -> AnnData:
66
70
  """Load an `.h5ad` file to `AnnData`."""
67
71
  fs, filepath = infer_filesystem(filepath)
68
-
69
- with fs.open(filepath, mode="rb") as file:
70
- adata = ad.read_h5ad(file, backed=False, **kwargs)
72
+ compression = kwargs.pop("compression", "infer")
73
+ with fs.open(filepath, mode="rb", compression=compression) as file:
74
+ adata = read_h5ad(file, backed=False, **kwargs)
71
75
  return adata
72
76
 
73
77
 
74
- def load_h5mu(filepath: UPathStr, **kwargs):
78
+ def load_h5mu(filepath: UPathStr, **kwargs) -> MuData:
75
79
  """Load an `.h5mu` file to `MuData`."""
76
80
  import mudata as md
77
81
 
@@ -100,7 +104,7 @@ def load_html(path: UPathStr) -> None | UPathStr:
100
104
  return path
101
105
 
102
106
 
103
- def load_json(path: UPathStr) -> dict:
107
+ def load_json(path: UPathStr) -> dict[str, Any] | list[Any]:
104
108
  """Load `.json` to `dict`."""
105
109
  import json
106
110
 
@@ -109,7 +113,7 @@ def load_json(path: UPathStr) -> dict:
109
113
  return data
110
114
 
111
115
 
112
- def load_yaml(path: UPathStr) -> dict:
116
+ def load_yaml(path: UPathStr) -> dict[str, Any] | list[Any]:
113
117
  """Load `.yaml` to `dict`."""
114
118
  import yaml # type: ignore
115
119
 
@@ -148,11 +152,15 @@ def load_rds(path: UPathStr) -> UPathStr:
148
152
 
149
153
  FILE_LOADERS = {
150
154
  ".csv": pd.read_csv,
155
+ ".csv.gz": pd.read_csv,
151
156
  ".tsv": load_tsv,
157
+ ".tsv.gz": load_tsv,
152
158
  ".h5ad": load_h5ad,
159
+ ".h5ad.gz": load_h5ad,
153
160
  ".parquet": pd.read_parquet,
161
+ ".parquet.gz": pd.read_parquet, # this doesn't work for externally gzipped files, REMOVE LATER
154
162
  ".fcs": load_fcs,
155
- ".zarr": load_anndata_zarr,
163
+ ".zarr": load_zarr,
156
164
  ".html": load_html,
157
165
  ".json": load_json,
158
166
  ".yaml": load_yaml,
@@ -168,17 +176,32 @@ SUPPORTED_SUFFIXES = [sfx for sfx in FILE_LOADERS.keys() if sfx != ".rds"]
168
176
  """Suffixes with defined artifact loaders."""
169
177
 
170
178
 
171
- def load_to_memory(filepath: UPathStr, **kwargs):
179
+ def load_to_memory(
180
+ filepath: UPathStr, **kwargs
181
+ ) -> (
182
+ pd.DataFrame | ScverseDataStructures | dict[str, Any] | list[Any] | UPathStr | None
183
+ ):
172
184
  """Load a file into memory.
173
185
 
174
186
  Returns the filepath if no in-memory form is found.
187
+ May return None in interactive sessions for images.
175
188
  """
176
189
  filepath = create_path(filepath)
177
190
 
178
- filepath = settings._storage_settings.cloud_to_local(filepath, print_progress=True)
191
+ # infer the correct suffix when .gz is present
192
+ suffixes = filepath.suffixes
193
+ suffix = (
194
+ "".join(suffixes[-2:])
195
+ if len(suffixes) > 1 and ".gz" in suffixes
196
+ else filepath.suffix
197
+ )
179
198
 
180
- loader = FILE_LOADERS.get(filepath.suffix)
199
+ loader = FILE_LOADERS.get(suffix, None)
181
200
  if loader is None:
182
- return filepath
183
- else:
184
- return loader(filepath, **kwargs)
201
+ raise NotImplementedError(
202
+ f"There is no loader for {suffix} files. Use .cache() to get the path."
203
+ )
204
+
205
+ filepath = settings._storage_settings.cloud_to_local(filepath, print_progress=True)
206
+
207
+ return loader(filepath, **kwargs)
@@ -16,6 +16,7 @@ from anndata._io.h5ad import read_dataframe_legacy as read_dataframe_legacy_h5
16
16
  from anndata._io.specs.registry import get_spec, read_elem, read_elem_partial
17
17
  from anndata.compat import _read_attr
18
18
  from fsspec.implementations.local import LocalFileSystem
19
+ from fsspec.utils import infer_compression
19
20
  from lamin_utils import logger
20
21
  from lamindb_setup.core.upath import create_mapper, infer_filesystem
21
22
  from packaging import version
@@ -152,9 +153,13 @@ registry = AccessRegistry()
152
153
 
153
154
 
154
155
  @registry.register_open("h5py")
155
- def open(filepath: UPathStr, mode: str = "r"):
156
+ def open(filepath: UPathStr, mode: str = "r", compression: str | None = "infer"):
156
157
  fs, file_path_str = infer_filesystem(filepath)
157
- if isinstance(fs, LocalFileSystem):
158
+ # we don't open compressed files directly because we need fsspec to uncompress on .open
159
+ compression = (
160
+ infer_compression(file_path_str) if compression == "infer" else compression
161
+ )
162
+ if isinstance(fs, LocalFileSystem) and compression is None:
158
163
  assert mode in {"r", "r+", "a", "w", "w-"}, f"Unknown mode {mode}!" # noqa: S101
159
164
  return None, h5py.File(file_path_str, mode=mode)
160
165
  if mode == "r":
@@ -165,7 +170,7 @@ def open(filepath: UPathStr, mode: str = "r"):
165
170
  conn_mode = "ab"
166
171
  else:
167
172
  raise ValueError(f"Unknown mode {mode}! Should be 'r', 'w' or 'a'.")
168
- conn = fs.open(file_path_str, mode=conn_mode)
173
+ conn = fs.open(file_path_str, mode=conn_mode, compression=compression)
169
174
  try:
170
175
  storage = h5py.File(conn, mode=mode)
171
176
  except Exception as e:
@@ -5,8 +5,6 @@ from typing import TYPE_CHECKING, Any, Callable
5
5
 
6
6
  from anndata._io.specs.registry import get_spec
7
7
 
8
- from lamindb.models import Artifact
9
-
10
8
  from ._anndata_accessor import AnnDataAccessor, StorageType, registry
11
9
  from ._pyarrow_dataset import _is_pyarrow_dataset, _open_pyarrow_dataset
12
10
  from ._tiledbsoma import _open_tiledbsoma
@@ -19,6 +17,8 @@ if TYPE_CHECKING:
19
17
  from tiledbsoma import Experiment as SOMAExperiment
20
18
  from upath import UPath
21
19
 
20
+ from lamindb.models.artifact import Artifact
21
+
22
22
 
23
23
  # this dynamically creates a subclass of a context manager class
24
24
  # and reassigns it to an instance of the superclass
@@ -70,9 +70,12 @@ def backed_access(
70
70
  artifact_or_filepath: Artifact | UPath,
71
71
  mode: str = "r",
72
72
  using_key: str | None = None,
73
+ **kwargs,
73
74
  ) -> (
74
75
  AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment | PyArrowDataset
75
76
  ):
77
+ from lamindb.models import Artifact
78
+
76
79
  if isinstance(artifact_or_filepath, Artifact):
77
80
  objectpath, _ = filepath_from_artifact(
78
81
  artifact_or_filepath, using_key=using_key
@@ -80,18 +83,22 @@ def backed_access(
80
83
  else:
81
84
  objectpath = artifact_or_filepath
82
85
  name = objectpath.name
83
- suffix = objectpath.suffix
86
+ # ignore .gz, only check the real suffix
87
+ suffixes = objectpath.suffixes
88
+ suffix = (
89
+ suffixes[-2] if len(suffixes) > 1 and ".gz" in suffixes else objectpath.suffix
90
+ )
84
91
 
85
92
  if name == "soma" or suffix == ".tiledbsoma":
86
93
  if mode not in {"r", "w"}:
87
94
  raise ValueError("`mode` should be either 'r' or 'w' for tiledbsoma.")
88
- return _open_tiledbsoma(objectpath, mode=mode) # type: ignore
95
+ return _open_tiledbsoma(objectpath, mode=mode, **kwargs) # type: ignore
89
96
  elif suffix in {".h5", ".hdf5", ".h5ad"}:
90
- conn, storage = registry.open("h5py", objectpath, mode=mode)
97
+ conn, storage = registry.open("h5py", objectpath, mode=mode, **kwargs)
91
98
  elif suffix == ".zarr":
92
- conn, storage = registry.open("zarr", objectpath, mode=mode)
99
+ conn, storage = registry.open("zarr", objectpath, mode=mode, **kwargs)
93
100
  elif _is_pyarrow_dataset(objectpath):
94
- return _open_pyarrow_dataset(objectpath)
101
+ return _open_pyarrow_dataset(objectpath, **kwargs)
95
102
  else:
96
103
  raise ValueError(
97
104
  "The object should have .h5, .hdf5, .h5ad, .zarr, .tiledbsoma suffix "
@@ -18,15 +18,30 @@ def _is_pyarrow_dataset(paths: UPath | list[UPath]) -> bool:
18
18
  # we don't check here that the filesystem is the same
19
19
  # but this is a requirement for pyarrow.dataset.dataset
20
20
  if isinstance(paths, list):
21
- suffixes = {path.suffix for path in paths}
22
- elif paths.is_file():
23
- suffixes = {paths.suffix}
21
+ path_list = paths
22
+ elif paths.is_dir():
23
+ path_list = [path for path in paths.rglob("*") if path.suffix != ""]
24
24
  else:
25
- suffixes = {path.suffix for path in paths.rglob("*") if path.suffix != ""}
26
- return len(suffixes) == 1 and suffixes.pop() in PYARROW_SUFFIXES
27
-
28
-
29
- def _open_pyarrow_dataset(paths: UPath | list[UPath]) -> PyArrowDataset:
25
+ path_list = [paths]
26
+ suffix = None
27
+ for path in path_list:
28
+ path_suffixes = path.suffixes
29
+ # this doesn't work for externally gzipped files, REMOVE LATER
30
+ path_suffix = (
31
+ path_suffixes[-2]
32
+ if len(path_suffixes) > 1 and ".gz" in path_suffixes
33
+ else path.suffix
34
+ )
35
+ if path_suffix not in PYARROW_SUFFIXES:
36
+ return False
37
+ elif suffix is None:
38
+ suffix = path_suffix
39
+ elif path_suffix != suffix:
40
+ return False
41
+ return True
42
+
43
+
44
+ def _open_pyarrow_dataset(paths: UPath | list[UPath], **kwargs) -> PyArrowDataset:
30
45
  if isinstance(paths, list):
31
46
  path0 = paths[0]
32
47
  if isinstance(path0, LocalPathClasses):
@@ -38,4 +53,4 @@ def _open_pyarrow_dataset(paths: UPath | list[UPath]) -> PyArrowDataset:
38
53
  else:
39
54
  paths_str, filesystem = paths.path, paths.fs
40
55
 
41
- return pyarrow.dataset.dataset(paths_str, filesystem=filesystem)
56
+ return pyarrow.dataset.dataset(paths_str, filesystem=filesystem, **kwargs)
@@ -12,8 +12,6 @@ from lamindb_setup.core._settings_storage import get_storage_region
12
12
  from lamindb_setup.core.upath import LocalPathClasses, create_path
13
13
  from packaging import version
14
14
 
15
- from lamindb.models import Artifact, Run
16
-
17
15
  if TYPE_CHECKING:
18
16
  from lamindb_setup.core.types import UPathStr
19
17
  from tiledbsoma import Collection as SOMACollection
@@ -21,12 +19,15 @@ if TYPE_CHECKING:
21
19
  from tiledbsoma import Measurement as SOMAMeasurement
22
20
  from upath import UPath
23
21
 
22
+ from lamindb.models.artifact import Artifact
23
+ from lamindb.models.run import Run
24
+
24
25
 
25
26
  def _load_h5ad_zarr(objpath: UPath):
26
- from lamindb.core.loaders import load_anndata_zarr, load_h5ad
27
+ from lamindb.core.loaders import load_h5ad, load_zarr
27
28
 
28
29
  if objpath.is_dir():
29
- adata = load_anndata_zarr(objpath)
30
+ adata = load_zarr(objpath, expected_type="anndata")
30
31
  else:
31
32
  # read only local in backed for now
32
33
  # in principle possible to read remote in backed also
@@ -134,9 +135,10 @@ def save_tiledbsoma_experiment(
134
135
  except ImportError as e:
135
136
  raise ImportError("Please install tiledbsoma: pip install tiledbsoma") from e
136
137
 
137
- from lamindb.core._data import get_run
138
138
  from lamindb.core.storage.paths import auto_storage_key_from_artifact_uid
139
- from lamindb.core.versioning import create_uid
139
+ from lamindb.models import Artifact
140
+ from lamindb.models._is_versioned import create_uid
141
+ from lamindb.models.artifact import get_run
140
142
 
141
143
  run = get_run(run)
142
144
 
@@ -1,55 +1,134 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import warnings
4
- from typing import TYPE_CHECKING
4
+ from typing import TYPE_CHECKING, Literal
5
5
 
6
6
  import scipy.sparse as sparse
7
7
  import zarr
8
8
  from anndata import __version__ as anndata_version
9
9
  from anndata._io.specs import write_elem
10
- from anndata._io.specs.registry import get_spec
11
10
  from fsspec.implementations.local import LocalFileSystem
12
- from lamindb_setup.core.upath import create_mapper, infer_filesystem
11
+ from lamin_utils import logger
12
+ from lamindb_setup.core.upath import S3FSMap, create_mapper, infer_filesystem
13
13
  from packaging import version
14
14
 
15
+ from lamindb.core._compat import with_package
16
+
15
17
  from ._anndata_sizes import _size_elem, _size_raw, size_adata
16
18
 
17
19
  if version.parse(anndata_version) < version.parse("0.11.0"):
18
- from anndata._io import read_zarr
20
+ from anndata._io import read_zarr as read_anndata_zarr
19
21
  else:
20
- from anndata.io import read_zarr
22
+ from anndata.io import read_zarr as read_anndata_zarr
21
23
 
22
24
 
23
25
  if TYPE_CHECKING:
24
26
  from anndata import AnnData
27
+ from fsspec import FSMap
25
28
  from lamindb_setup.core.types import UPathStr
26
29
 
30
+ from lamindb.core.types import ScverseDataStructures
27
31
 
28
- def zarr_is_adata(storepath: UPathStr) -> bool:
29
- fs, storepath_str = infer_filesystem(storepath)
30
- if isinstance(fs, LocalFileSystem):
31
- # this is faster than through an fsspec mapper for local
32
- open_obj = storepath_str
33
- else:
34
- open_obj = create_mapper(fs, storepath_str, check=True)
35
- storage = zarr.open(open_obj, mode="r")
36
- return get_spec(storage).encoding_type == "anndata"
37
32
 
38
-
39
- def load_anndata_zarr(storepath: UPathStr) -> AnnData:
33
+ def create_zarr_open_obj(
34
+ storepath: UPathStr, *, check: bool = True
35
+ ) -> str | S3FSMap | FSMap:
36
+ """Creates the correct object that can be used to open a zarr file depending on local or remote location."""
40
37
  fs, storepath_str = infer_filesystem(storepath)
38
+
41
39
  if isinstance(fs, LocalFileSystem):
42
- # this is faster than through an fsspec mapper for local
43
40
  open_obj = storepath_str
44
41
  else:
45
- open_obj = create_mapper(fs, storepath_str, check=True)
46
- adata = read_zarr(open_obj)
47
- return adata
42
+ open_obj = create_mapper(fs, storepath_str, check=check)
43
+
44
+ return open_obj
45
+
46
+
47
+ def _identify_zarr_type_from_storage(
48
+ storage: zarr.Group,
49
+ ) -> Literal["anndata", "mudata", "spatialdata", "unknown"]:
50
+ """Internal helper to identify zarr type from an open storage object."""
51
+ try:
52
+ if storage.attrs.get("encoding-type", "") == "anndata":
53
+ return "anndata"
54
+ elif storage.attrs.get("encoding-type", "") == "MuData":
55
+ return "mudata"
56
+ elif "spatialdata_attrs" in storage.attrs:
57
+ return "spatialdata"
58
+ except Exception as error:
59
+ logger.warning(f"an exception occurred {error}")
60
+ return "unknown"
61
+
62
+
63
+ def identify_zarr_type(
64
+ storepath: UPathStr, *, check: bool = True
65
+ ) -> Literal["anndata", "mudata", "spatialdata", "unknown"]:
66
+ """Identify whether a zarr store is AnnData, SpatialData, or unknown type."""
67
+ # we can add these cheap suffix-based-checks later
68
+ # also need to check whether the .spatialdata.zarr suffix
69
+ # actually becomes a "standard"; currently we don't recognize it
70
+ # unlike ".anndata.zarr" in VALID_SUFFIXES
71
+ # suffixes = UPath(storepath).suffixes
72
+ # if ".spatialdata" in suffixes:
73
+ # return "spatialdata"
74
+ # elif ".anndata" in suffixes:
75
+ # return "anndata"
76
+
77
+ open_obj = create_zarr_open_obj(storepath, check=check)
78
+ try:
79
+ storage = zarr.open(open_obj, mode="r")
80
+ return _identify_zarr_type_from_storage(storage)
81
+ except Exception as error:
82
+ logger.warning(
83
+ f"an exception occured while trying to open the zarr store\n {error}"
84
+ )
85
+ return "unknown"
86
+
87
+
88
+ def load_zarr(
89
+ storepath: UPathStr,
90
+ expected_type: Literal["anndata", "mudata", "spatialdata"] = None,
91
+ ) -> ScverseDataStructures:
92
+ """Loads a zarr store and returns the corresponding scverse data structure.
93
+
94
+ Args:
95
+ storepath: Path to the zarr store
96
+ expected_type: If provided, ensures the zarr store is of this type ("anndata", "mudata", "spatialdata")
97
+ and raises ValueError if it's not
98
+ """
99
+ open_obj = create_zarr_open_obj(storepath, check=True)
100
+
101
+ # Open the storage once
102
+ try:
103
+ storage = zarr.open(open_obj, mode="r")
104
+ except Exception as error:
105
+ raise ValueError(f"Could not open zarr store: {error}") from None
106
+
107
+ actual_type = _identify_zarr_type_from_storage(storage)
108
+ if expected_type is not None and actual_type != expected_type:
109
+ raise ValueError(
110
+ f"Expected zarr store of type '{expected_type}', but found '{actual_type}'"
111
+ )
112
+
113
+ match actual_type:
114
+ case "anndata":
115
+ scverse_obj = read_anndata_zarr(open_obj)
116
+ case "mudata":
117
+ scverse_obj = with_package("mudata", lambda mod: mod.read_zarr(open_obj))
118
+ case "spatialdata":
119
+ scverse_obj = with_package(
120
+ "spatialdata", lambda mod: mod.read_zarr(open_obj)
121
+ )
122
+ case "unknown" | _:
123
+ raise ValueError(
124
+ "Unable to determine zarr store format and therefore cannot load Artifact."
125
+ )
126
+ return scverse_obj
48
127
 
49
128
 
50
129
  def write_adata_zarr(
51
130
  adata: AnnData, storepath: UPathStr, callback=None, chunks=None, **dataset_kwargs
52
- ):
131
+ ) -> None:
53
132
  fs, storepath_str = infer_filesystem(storepath)
54
133
  store = create_mapper(fs, storepath_str, create=True)
55
134
 
@@ -65,7 +144,7 @@ def write_adata_zarr(
65
144
  adata_size = None
66
145
  cumulative_val = 0
67
146
 
68
- def _cb(key_write: str | None = None):
147
+ def _report_progress(key_write: str | None = None):
69
148
  nonlocal adata_size
70
149
  nonlocal cumulative_val
71
150
 
@@ -91,9 +170,9 @@ def write_adata_zarr(
91
170
 
92
171
  def _write_elem_cb(f, k, elem, dataset_kwargs):
93
172
  write_elem(f, k, elem, dataset_kwargs=dataset_kwargs)
94
- _cb(k)
173
+ _report_progress(k)
95
174
 
96
- _cb(None)
175
+ _report_progress(None)
97
176
  with warnings.catch_warnings():
98
177
  warnings.filterwarnings("ignore", category=UserWarning, module="zarr")
99
178
 
@@ -114,4 +193,4 @@ def write_adata_zarr(
114
193
  )
115
194
  _write_elem_cb(f, "raw", adata.raw, dataset_kwargs=dataset_kwargs)
116
195
  # todo: fix size less than total at the end
117
- _cb(None)
196
+ _report_progress(None)
@@ -1,62 +1,97 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from pathlib import PurePosixPath
4
- from typing import TYPE_CHECKING
4
+ from typing import TYPE_CHECKING, TypeAlias
5
5
 
6
6
  from anndata import AnnData
7
7
  from pandas import DataFrame
8
8
 
9
+ from lamindb.core._compat import (
10
+ with_package_obj,
11
+ )
12
+ from lamindb.core.types import ScverseDataStructures
13
+
9
14
  if TYPE_CHECKING:
10
15
  from lamindb_setup.core.types import UPathStr
11
16
 
12
-
13
- def _mudata_is_installed():
14
- try:
15
- import mudata # noqa: F401c
16
- except ImportError:
17
- return False
18
- return True
17
+ SupportedDataTypes: TypeAlias = DataFrame | ScverseDataStructures
19
18
 
20
19
 
21
- def infer_suffix(dmem, adata_format: str | None = None):
20
+ def infer_suffix(dmem: SupportedDataTypes, format: str | None = None):
22
21
  """Infer LaminDB storage file suffix from a data object."""
23
22
  if isinstance(dmem, AnnData):
24
- if adata_format is not None:
25
- if adata_format not in {"h5ad", "zarr", "anndata.zarr"}:
23
+ if format is not None:
24
+ if format not in {"h5ad", "zarr", "anndata.zarr"}:
26
25
  raise ValueError(
27
26
  "Error when specifying AnnData storage format, it should be"
28
- f" 'h5ad', 'zarr', not '{adata_format}'. Check 'format'"
27
+ f" 'h5ad', 'zarr', not '{format}'. Check 'format'"
29
28
  " or the suffix of 'key'."
30
29
  )
31
- return "." + adata_format
30
+ return "." + format
32
31
  return ".h5ad"
33
- elif isinstance(dmem, DataFrame):
32
+
33
+ if isinstance(dmem, DataFrame):
34
34
  return ".parquet"
35
- else:
36
- if _mudata_is_installed():
37
- from mudata import MuData
38
35
 
39
- if isinstance(dmem, MuData):
40
- return ".h5mu"
36
+ if with_package_obj(
37
+ dmem,
38
+ "MuData",
39
+ "mudata",
40
+ lambda obj: True, # Just checking type, not calling any method
41
+ )[0]:
42
+ return ".h5mu"
43
+
44
+ has_spatialdata, spatialdata_suffix = with_package_obj(
45
+ dmem,
46
+ "SpatialData",
47
+ "spatialdata",
48
+ lambda obj: "."
49
+ + (
50
+ format
51
+ if format is not None and format in {"spatialdata.zarr", "zarr"}
52
+ else ".zarr"
53
+ if format is None
54
+ else (_ for _ in ()).throw(
55
+ ValueError(
56
+ "Error when specifying SpatialData storage format, it should be"
57
+ f" 'zarr', 'spatialdata.zarr', not '{format}'. Check 'format'"
58
+ " or the suffix of 'key'."
59
+ )
60
+ )
61
+ ),
62
+ )
63
+ if has_spatialdata:
64
+ return spatialdata_suffix
65
+ else:
41
66
  raise NotImplementedError
42
67
 
43
68
 
44
- def write_to_disk(dmem, filepath: UPathStr):
69
+ def write_to_disk(dmem: SupportedDataTypes, filepath: UPathStr) -> None:
70
+ """Writes the passed in memory data to disk to a specified path."""
45
71
  if isinstance(dmem, AnnData):
46
72
  suffix = PurePosixPath(filepath).suffix
47
73
  if suffix == ".h5ad":
48
74
  dmem.write_h5ad(filepath)
75
+ return
49
76
  elif suffix == ".zarr":
50
77
  dmem.write_zarr(filepath)
78
+ return
51
79
  else:
52
80
  raise NotImplementedError
53
- elif isinstance(dmem, DataFrame):
81
+
82
+ if isinstance(dmem, DataFrame):
54
83
  dmem.to_parquet(filepath)
55
- else:
56
- if _mudata_is_installed():
57
- from mudata import MuData
84
+ return
58
85
 
59
- if isinstance(dmem, MuData):
60
- dmem.write(filepath)
61
- return
62
- raise NotImplementedError
86
+ if with_package_obj(dmem, "MuData", "mudata", lambda obj: obj.write(filepath))[0]:
87
+ return
88
+
89
+ if with_package_obj(
90
+ dmem,
91
+ "SpatialData",
92
+ "spatialdata",
93
+ lambda obj: obj.write(filepath, overwrite=True),
94
+ )[0]:
95
+ return
96
+
97
+ raise NotImplementedError