lamindb 0.76.8__py3-none-any.whl → 0.76.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. lamindb/__init__.py +114 -113
  2. lamindb/_artifact.py +1206 -1205
  3. lamindb/_can_validate.py +621 -579
  4. lamindb/_collection.py +390 -387
  5. lamindb/_curate.py +1603 -1601
  6. lamindb/_feature.py +155 -155
  7. lamindb/_feature_set.py +244 -242
  8. lamindb/_filter.py +23 -23
  9. lamindb/_finish.py +250 -256
  10. lamindb/_from_values.py +403 -382
  11. lamindb/_is_versioned.py +40 -40
  12. lamindb/_parents.py +476 -476
  13. lamindb/_query_manager.py +125 -125
  14. lamindb/_query_set.py +364 -362
  15. lamindb/_record.py +668 -649
  16. lamindb/_run.py +60 -57
  17. lamindb/_save.py +310 -308
  18. lamindb/_storage.py +14 -14
  19. lamindb/_transform.py +130 -127
  20. lamindb/_ulabel.py +56 -56
  21. lamindb/_utils.py +9 -9
  22. lamindb/_view.py +72 -72
  23. lamindb/core/__init__.py +94 -94
  24. lamindb/core/_context.py +590 -574
  25. lamindb/core/_data.py +510 -438
  26. lamindb/core/_django.py +209 -0
  27. lamindb/core/_feature_manager.py +994 -867
  28. lamindb/core/_label_manager.py +289 -253
  29. lamindb/core/_mapped_collection.py +631 -597
  30. lamindb/core/_settings.py +188 -187
  31. lamindb/core/_sync_git.py +138 -138
  32. lamindb/core/_track_environment.py +27 -27
  33. lamindb/core/datasets/__init__.py +59 -59
  34. lamindb/core/datasets/_core.py +581 -571
  35. lamindb/core/datasets/_fake.py +36 -36
  36. lamindb/core/exceptions.py +90 -90
  37. lamindb/core/fields.py +12 -12
  38. lamindb/core/loaders.py +164 -164
  39. lamindb/core/schema.py +56 -56
  40. lamindb/core/storage/__init__.py +25 -25
  41. lamindb/core/storage/_anndata_accessor.py +741 -740
  42. lamindb/core/storage/_anndata_sizes.py +41 -41
  43. lamindb/core/storage/_backed_access.py +98 -98
  44. lamindb/core/storage/_tiledbsoma.py +204 -204
  45. lamindb/core/storage/_valid_suffixes.py +21 -21
  46. lamindb/core/storage/_zarr.py +110 -110
  47. lamindb/core/storage/objects.py +62 -62
  48. lamindb/core/storage/paths.py +172 -172
  49. lamindb/core/subsettings/__init__.py +12 -12
  50. lamindb/core/subsettings/_creation_settings.py +38 -38
  51. lamindb/core/subsettings/_transform_settings.py +21 -21
  52. lamindb/core/types.py +19 -19
  53. lamindb/core/versioning.py +146 -158
  54. lamindb/integrations/__init__.py +12 -12
  55. lamindb/integrations/_vitessce.py +107 -107
  56. lamindb/setup/__init__.py +14 -14
  57. lamindb/setup/core/__init__.py +4 -4
  58. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/LICENSE +201 -201
  59. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/METADATA +8 -8
  60. lamindb-0.76.10.dist-info/RECORD +61 -0
  61. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/WHEEL +1 -1
  62. lamindb-0.76.8.dist-info/RECORD +0 -60
@@ -1,204 +1,204 @@
1
- from __future__ import annotations
2
-
3
- from typing import TYPE_CHECKING, Literal
4
-
5
- from anndata import AnnData, read_h5ad
6
- from lamindb_setup import settings as setup_settings
7
- from lamindb_setup.core._settings_storage import get_storage_region
8
- from lamindb_setup.core.upath import LocalPathClasses, create_path
9
- from lnschema_core import Artifact, Run
10
-
11
- if TYPE_CHECKING:
12
- from lamindb_setup.core.types import UPathStr
13
- from tiledbsoma import Collection as SOMACollection
14
- from tiledbsoma import Experiment as SOMAExperiment
15
- from upath import UPath
16
-
17
-
18
- def _load_h5ad_zarr(objpath: UPath):
19
- from lamindb.core.loaders import load_anndata_zarr, load_h5ad
20
-
21
- if objpath.is_dir():
22
- adata = load_anndata_zarr(objpath)
23
- else:
24
- # read only local in backed for now
25
- # in principle possible to read remote in backed also
26
- if isinstance(objpath, LocalPathClasses):
27
- adata = read_h5ad(objpath.as_posix(), backed="r")
28
- else:
29
- adata = load_h5ad(objpath)
30
- return adata
31
-
32
-
33
- def _tiledb_config_s3(storepath: UPath) -> dict:
34
- region = get_storage_region(storepath)
35
- tiledb_config = {"vfs.s3.region": region}
36
- storage_options = storepath.storage_options
37
- if "key" in storage_options:
38
- tiledb_config["vfs.s3.aws_access_key_id"] = storage_options["key"]
39
- if "secret" in storage_options:
40
- tiledb_config["vfs.s3.aws_secret_access_key"] = storage_options["secret"]
41
- if "token" in storage_options:
42
- tiledb_config["vfs.s3.aws_session_token"] = storage_options["token"]
43
-
44
- return tiledb_config
45
-
46
-
47
- def _open_tiledbsoma(
48
- storepath: UPath, mode: Literal["r", "w"] = "r"
49
- ) -> SOMACollection | SOMAExperiment:
50
- try:
51
- import tiledbsoma as soma
52
- except ImportError as e:
53
- raise ImportError("Please install tiledbsoma: pip install tiledbsoma") from e
54
-
55
- storepath_str = storepath.as_posix()
56
- if storepath.protocol == "s3":
57
- ctx = soma.SOMATileDBContext(tiledb_config=_tiledb_config_s3(storepath))
58
- # this is a strange bug
59
- # for some reason iterdir futher gives incorrect results
60
- # if cache is not invalidated
61
- # instead of obs and ms it gives ms and ms in the list of names
62
- storepath.fs.invalidate_cache()
63
- else:
64
- ctx = None
65
-
66
- soma_objects = [obj.name for obj in storepath.iterdir()]
67
- if "obs" in soma_objects and "ms" in soma_objects:
68
- SOMAType = soma.Experiment
69
- else:
70
- SOMAType = soma.Collection
71
- return SOMAType.open(storepath_str, mode=mode, context=ctx)
72
-
73
-
74
- def save_tiledbsoma_experiment(
75
- # Artifact args
76
- adatas: list[AnnData | UPathStr],
77
- key: str | None = None,
78
- description: str | None = None,
79
- run: Run | None = None,
80
- revises: Artifact | None = None,
81
- # tiledbsoma.io.from_anndata args
82
- measurement_name: str = "RNA",
83
- obs_id_name: str = "obs_id",
84
- var_id_name: str = "var_id",
85
- append_obsm_varm: bool = False,
86
- # additional keyword args for tiledbsoma.io.from_anndata
87
- **kwargs,
88
- ) -> Artifact:
89
- """Write `AnnData` to `tiledbsoma.Experiment`.
90
-
91
- Reads `AnnData` objects, writes them to `tiledbsoma.Experiment`, creates & saves an {class}`~lamindb.Artifact`.
92
-
93
- Populates a column `lamin_run_uid` column in `obs` with the current `run.uid`.
94
-
95
- Is based on `tiledbsoma.io.from_anndata
96
- <https://tiledbsoma.readthedocs.io/en/latest/_autosummary/tiledbsoma.io.from_anndata.html>`__.
97
-
98
- Args:
99
- adatas: `AnnData` objects to write, in-memory or on-disk.
100
- key: An optional key to reference the artifact.
101
- description: A description.
102
- run: The run that creates the artifact.
103
- revises: `lamindb.Artifact` with `tiledbsoma.Experiment` to append to.
104
- measurement_name: The name of the measurement to store data in `tiledbsoma.Experiment`.
105
- obs_id_name: Which `AnnData` `obs` column to use for append mode.
106
- var_id_name: Which `AnnData` `var` column to use for append mode.
107
- append_obsm_varm: Whether to append `obsm` and `varm` in append mode .
108
- **kwargs: Keyword arguments passed to `tiledbsoma.io.from_anndata`.
109
- """
110
- try:
111
- import tiledbsoma as soma
112
- import tiledbsoma.io as soma_io
113
- except ImportError as e:
114
- raise ImportError("Please install tiledbsoma: pip install tiledbsoma") from e
115
-
116
- from lamindb.core._data import get_run
117
- from lamindb.core.storage.paths import auto_storage_key_from_artifact_uid
118
- from lamindb.core.versioning import create_uid
119
-
120
- run = get_run(run)
121
-
122
- appending = revises is not None
123
- if appending:
124
- storepath = revises.path
125
- else:
126
- uid, _ = create_uid(n_full_id=20)
127
- storage_key = auto_storage_key_from_artifact_uid(
128
- uid, ".tiledbsoma", is_dir=True
129
- )
130
- storepath = setup_settings.storage.root / storage_key
131
-
132
- if storepath.protocol == "s3":
133
- ctx = soma.SOMATileDBContext(tiledb_config=_tiledb_config_s3(storepath))
134
- else:
135
- ctx = None
136
-
137
- storepath = storepath.as_posix()
138
-
139
- add_run_uid = True
140
- if appending:
141
- with soma.Experiment.open(storepath, mode="r", context=ctx) as store:
142
- add_run_uid = "lamin_run_uid" in store["obs"].schema.names
143
-
144
- if add_run_uid and run is None:
145
- raise ValueError("Pass `run`")
146
-
147
- adata_objects = []
148
- for adata in adatas:
149
- if isinstance(adata, AnnData):
150
- if add_run_uid:
151
- if adata.is_view:
152
- raise ValueError(
153
- "Can not write an `AnnData` view, please do `adata.copy()` before passing."
154
- )
155
- else:
156
- adata.obs["lamin_run_uid"] = run.uid
157
- else:
158
- adata = _load_h5ad_zarr(create_path(adata))
159
- if add_run_uid:
160
- adata.obs["lamin_run_uid"] = run.uid
161
- adata_objects.append(adata)
162
-
163
- registration_mapping = kwargs.get("registration_mapping", None)
164
- if registration_mapping is None and (appending or len(adata_objects) > 1):
165
- registration_mapping = soma_io.register_anndatas(
166
- experiment_uri=storepath if appending else None,
167
- adatas=adata_objects,
168
- measurement_name=measurement_name,
169
- obs_field_name=obs_id_name,
170
- var_field_name=var_id_name,
171
- append_obsm_varm=append_obsm_varm,
172
- context=ctx,
173
- )
174
-
175
- if registration_mapping is not None:
176
- n_observations = len(registration_mapping.obs_axis.data)
177
- else: # happens only if not appending and only one adata passed
178
- assert len(adata_objects) == 1 # noqa: S101
179
- n_observations = adata_objects[0].n_obs
180
-
181
- for adata_obj in adata_objects:
182
- soma_io.from_anndata(
183
- storepath,
184
- adata_obj,
185
- measurement_name,
186
- context=ctx,
187
- obs_id_name=obs_id_name,
188
- var_id_name=var_id_name,
189
- registration_mapping=registration_mapping,
190
- **kwargs,
191
- )
192
-
193
- artifact = Artifact(
194
- storepath,
195
- key=key,
196
- description=description,
197
- run=run,
198
- revises=revises,
199
- _is_internal_call=True,
200
- )
201
- artifact.n_observations = n_observations
202
- artifact._accessor = "tiledbsoma"
203
-
204
- return artifact.save()
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Literal
4
+
5
+ from anndata import AnnData, read_h5ad
6
+ from lamindb_setup import settings as setup_settings
7
+ from lamindb_setup.core._settings_storage import get_storage_region
8
+ from lamindb_setup.core.upath import LocalPathClasses, create_path
9
+ from lnschema_core import Artifact, Run
10
+
11
+ if TYPE_CHECKING:
12
+ from lamindb_setup.core.types import UPathStr
13
+ from tiledbsoma import Collection as SOMACollection
14
+ from tiledbsoma import Experiment as SOMAExperiment
15
+ from upath import UPath
16
+
17
+
18
+ def _load_h5ad_zarr(objpath: UPath):
19
+ from lamindb.core.loaders import load_anndata_zarr, load_h5ad
20
+
21
+ if objpath.is_dir():
22
+ adata = load_anndata_zarr(objpath)
23
+ else:
24
+ # read only local in backed for now
25
+ # in principle possible to read remote in backed also
26
+ if isinstance(objpath, LocalPathClasses):
27
+ adata = read_h5ad(objpath.as_posix(), backed="r")
28
+ else:
29
+ adata = load_h5ad(objpath)
30
+ return adata
31
+
32
+
33
+ def _tiledb_config_s3(storepath: UPath) -> dict:
34
+ region = get_storage_region(storepath)
35
+ tiledb_config = {"vfs.s3.region": region}
36
+ storage_options = storepath.storage_options
37
+ if "key" in storage_options:
38
+ tiledb_config["vfs.s3.aws_access_key_id"] = storage_options["key"]
39
+ if "secret" in storage_options:
40
+ tiledb_config["vfs.s3.aws_secret_access_key"] = storage_options["secret"]
41
+ if "token" in storage_options:
42
+ tiledb_config["vfs.s3.aws_session_token"] = storage_options["token"]
43
+
44
+ return tiledb_config
45
+
46
+
47
+ def _open_tiledbsoma(
48
+ storepath: UPath, mode: Literal["r", "w"] = "r"
49
+ ) -> SOMACollection | SOMAExperiment:
50
+ try:
51
+ import tiledbsoma as soma
52
+ except ImportError as e:
53
+ raise ImportError("Please install tiledbsoma: pip install tiledbsoma") from e
54
+
55
+ storepath_str = storepath.as_posix()
56
+ if storepath.protocol == "s3":
57
+ ctx = soma.SOMATileDBContext(tiledb_config=_tiledb_config_s3(storepath))
58
+ # this is a strange bug
59
+ # for some reason iterdir futher gives incorrect results
60
+ # if cache is not invalidated
61
+ # instead of obs and ms it gives ms and ms in the list of names
62
+ storepath.fs.invalidate_cache()
63
+ else:
64
+ ctx = None
65
+
66
+ soma_objects = [obj.name for obj in storepath.iterdir()]
67
+ if "obs" in soma_objects and "ms" in soma_objects:
68
+ SOMAType = soma.Experiment
69
+ else:
70
+ SOMAType = soma.Collection
71
+ return SOMAType.open(storepath_str, mode=mode, context=ctx)
72
+
73
+
74
+ def save_tiledbsoma_experiment(
75
+ # Artifact args
76
+ adatas: list[AnnData | UPathStr],
77
+ key: str | None = None,
78
+ description: str | None = None,
79
+ run: Run | None = None,
80
+ revises: Artifact | None = None,
81
+ # tiledbsoma.io.from_anndata args
82
+ measurement_name: str = "RNA",
83
+ obs_id_name: str = "obs_id",
84
+ var_id_name: str = "var_id",
85
+ append_obsm_varm: bool = False,
86
+ # additional keyword args for tiledbsoma.io.from_anndata
87
+ **kwargs,
88
+ ) -> Artifact:
89
+ """Write `AnnData` to `tiledbsoma.Experiment`.
90
+
91
+ Reads `AnnData` objects, writes them to `tiledbsoma.Experiment`, creates & saves an {class}`~lamindb.Artifact`.
92
+
93
+ Populates a column `lamin_run_uid` column in `obs` with the current `run.uid`.
94
+
95
+ Is based on `tiledbsoma.io.from_anndata
96
+ <https://tiledbsoma.readthedocs.io/en/latest/_autosummary/tiledbsoma.io.from_anndata.html>`__.
97
+
98
+ Args:
99
+ adatas: `AnnData` objects to write, in-memory or on-disk.
100
+ key: An optional key to reference the artifact.
101
+ description: A description.
102
+ run: The run that creates the artifact.
103
+ revises: `lamindb.Artifact` with `tiledbsoma.Experiment` to append to.
104
+ measurement_name: The name of the measurement to store data in `tiledbsoma.Experiment`.
105
+ obs_id_name: Which `AnnData` `obs` column to use for append mode.
106
+ var_id_name: Which `AnnData` `var` column to use for append mode.
107
+ append_obsm_varm: Whether to append `obsm` and `varm` in append mode .
108
+ **kwargs: Keyword arguments passed to `tiledbsoma.io.from_anndata`.
109
+ """
110
+ try:
111
+ import tiledbsoma as soma
112
+ import tiledbsoma.io as soma_io
113
+ except ImportError as e:
114
+ raise ImportError("Please install tiledbsoma: pip install tiledbsoma") from e
115
+
116
+ from lamindb.core._data import get_run
117
+ from lamindb.core.storage.paths import auto_storage_key_from_artifact_uid
118
+ from lamindb.core.versioning import create_uid
119
+
120
+ run = get_run(run)
121
+
122
+ appending = revises is not None
123
+ if appending:
124
+ storepath = revises.path
125
+ else:
126
+ uid, _ = create_uid(n_full_id=20)
127
+ storage_key = auto_storage_key_from_artifact_uid(
128
+ uid, ".tiledbsoma", is_dir=True
129
+ )
130
+ storepath = setup_settings.storage.root / storage_key
131
+
132
+ if storepath.protocol == "s3":
133
+ ctx = soma.SOMATileDBContext(tiledb_config=_tiledb_config_s3(storepath))
134
+ else:
135
+ ctx = None
136
+
137
+ storepath = storepath.as_posix()
138
+
139
+ add_run_uid = True
140
+ if appending:
141
+ with soma.Experiment.open(storepath, mode="r", context=ctx) as store:
142
+ add_run_uid = "lamin_run_uid" in store["obs"].schema.names
143
+
144
+ if add_run_uid and run is None:
145
+ raise ValueError("Pass `run`")
146
+
147
+ adata_objects = []
148
+ for adata in adatas:
149
+ if isinstance(adata, AnnData):
150
+ if add_run_uid:
151
+ if adata.is_view:
152
+ raise ValueError(
153
+ "Can not write an `AnnData` view, please do `adata.copy()` before passing."
154
+ )
155
+ else:
156
+ adata.obs["lamin_run_uid"] = run.uid
157
+ else:
158
+ adata = _load_h5ad_zarr(create_path(adata))
159
+ if add_run_uid:
160
+ adata.obs["lamin_run_uid"] = run.uid
161
+ adata_objects.append(adata)
162
+
163
+ registration_mapping = kwargs.get("registration_mapping", None)
164
+ if registration_mapping is None and (appending or len(adata_objects) > 1):
165
+ registration_mapping = soma_io.register_anndatas(
166
+ experiment_uri=storepath if appending else None,
167
+ adatas=adata_objects,
168
+ measurement_name=measurement_name,
169
+ obs_field_name=obs_id_name,
170
+ var_field_name=var_id_name,
171
+ append_obsm_varm=append_obsm_varm,
172
+ context=ctx,
173
+ )
174
+
175
+ if registration_mapping is not None:
176
+ n_observations = len(registration_mapping.obs_axis.data)
177
+ else: # happens only if not appending and only one adata passed
178
+ assert len(adata_objects) == 1 # noqa: S101
179
+ n_observations = adata_objects[0].n_obs
180
+
181
+ for adata_obj in adata_objects:
182
+ soma_io.from_anndata(
183
+ storepath,
184
+ adata_obj,
185
+ measurement_name,
186
+ context=ctx,
187
+ obs_id_name=obs_id_name,
188
+ var_id_name=var_id_name,
189
+ registration_mapping=registration_mapping,
190
+ **kwargs,
191
+ )
192
+
193
+ artifact = Artifact(
194
+ storepath,
195
+ key=key,
196
+ description=description,
197
+ run=run,
198
+ revises=revises,
199
+ _is_internal_call=True,
200
+ )
201
+ artifact.n_observations = n_observations
202
+ artifact._accessor = "tiledbsoma"
203
+
204
+ return artifact.save()
@@ -1,21 +1,21 @@
1
- from __future__ import annotations
2
-
3
- from lamindb_setup.core.upath import VALID_COMPOSITE_SUFFIXES, VALID_SIMPLE_SUFFIXES
4
-
5
- # add new composite suffixes like so
6
- VALID_COMPOSITE_SUFFIXES.update(
7
- {
8
- ".vitessce.json",
9
- ".ome.zarr",
10
- }
11
- )
12
- # can do the same for simple valid suffixes
13
-
14
-
15
- class VALID_SUFFIXES:
16
- """Valid suffixes."""
17
-
18
- SIMPLE: set[str] = VALID_SIMPLE_SUFFIXES
19
- """Simple suffixes."""
20
- COMPOSITE: set[str] = VALID_COMPOSITE_SUFFIXES
21
- """Composite suffixes."""
1
+ from __future__ import annotations
2
+
3
+ from lamindb_setup.core.upath import VALID_COMPOSITE_SUFFIXES, VALID_SIMPLE_SUFFIXES
4
+
5
+ # add new composite suffixes like so
6
+ VALID_COMPOSITE_SUFFIXES.update(
7
+ {
8
+ ".vitessce.json",
9
+ ".ome.zarr",
10
+ }
11
+ )
12
+ # can do the same for simple valid suffixes
13
+
14
+
15
+ class VALID_SUFFIXES:
16
+ """Valid suffixes."""
17
+
18
+ SIMPLE: set[str] = VALID_SIMPLE_SUFFIXES
19
+ """Simple suffixes."""
20
+ COMPOSITE: set[str] = VALID_COMPOSITE_SUFFIXES
21
+ """Composite suffixes."""
@@ -1,110 +1,110 @@
1
- from __future__ import annotations
2
-
3
- import warnings
4
- from typing import TYPE_CHECKING
5
-
6
- import scipy.sparse as sparse
7
- import zarr
8
- from anndata._io import read_zarr
9
- from anndata._io.specs import write_elem
10
- from anndata._io.specs.registry import get_spec
11
- from fsspec.implementations.local import LocalFileSystem
12
- from lamindb_setup.core.upath import create_mapper, infer_filesystem
13
-
14
- from ._anndata_sizes import _size_elem, _size_raw, size_adata
15
-
16
- if TYPE_CHECKING:
17
- from anndata import AnnData
18
- from lamindb_setup.core.types import UPathStr
19
-
20
-
21
- def zarr_is_adata(storepath: UPathStr) -> bool:
22
- fs, storepath_str = infer_filesystem(storepath)
23
- if isinstance(fs, LocalFileSystem):
24
- # this is faster than through an fsspec mapper for local
25
- open_obj = storepath_str
26
- else:
27
- open_obj = create_mapper(fs, storepath_str, check=True)
28
- storage = zarr.open(open_obj, mode="r")
29
- return get_spec(storage).encoding_type == "anndata"
30
-
31
-
32
- def load_anndata_zarr(storepath: UPathStr) -> AnnData:
33
- fs, storepath_str = infer_filesystem(storepath)
34
- if isinstance(fs, LocalFileSystem):
35
- # this is faster than through an fsspec mapper for local
36
- open_obj = storepath_str
37
- else:
38
- open_obj = create_mapper(fs, storepath_str, check=True)
39
- adata = read_zarr(open_obj)
40
- return adata
41
-
42
-
43
- def write_adata_zarr(
44
- adata: AnnData, storepath: UPathStr, callback=None, chunks=None, **dataset_kwargs
45
- ):
46
- fs, storepath_str = infer_filesystem(storepath)
47
- store = create_mapper(fs, storepath_str, create=True)
48
-
49
- f = zarr.open(store, mode="w")
50
-
51
- adata.strings_to_categoricals()
52
- if adata.raw is not None:
53
- adata.strings_to_categoricals(adata.raw.var)
54
-
55
- f.attrs.setdefault("encoding-type", "anndata")
56
- f.attrs.setdefault("encoding-version", "0.1.0")
57
-
58
- adata_size = None
59
- cumulative_val = 0
60
-
61
- def _cb(key_write: str | None = None):
62
- nonlocal adata_size
63
- nonlocal cumulative_val
64
-
65
- if callback is None:
66
- return None
67
- if adata_size is None:
68
- adata_size = size_adata(adata)
69
- if key_write is None:
70
- # begin or finish
71
- if cumulative_val < adata_size:
72
- callback(adata_size, adata_size if cumulative_val > 0 else 0)
73
- return None
74
-
75
- elem = getattr(adata, key_write, None)
76
- if elem is None:
77
- return None
78
- elem_size = _size_raw(elem) if key_write == "raw" else _size_elem(elem)
79
- if elem_size == 0:
80
- return None
81
-
82
- cumulative_val += elem_size
83
- callback(adata_size, cumulative_val)
84
-
85
- def _write_elem_cb(f, k, elem, dataset_kwargs):
86
- write_elem(f, k, elem, dataset_kwargs=dataset_kwargs)
87
- _cb(k)
88
-
89
- _cb(None)
90
- with warnings.catch_warnings():
91
- warnings.filterwarnings("ignore", category=UserWarning, module="zarr")
92
-
93
- if chunks is not None and not isinstance(adata.X, sparse.spmatrix):
94
- _write_elem_cb(
95
- f,
96
- "X",
97
- adata.X,
98
- dataset_kwargs=dict(chunks=chunks, **dataset_kwargs),
99
- )
100
- else:
101
- _write_elem_cb(f, "X", adata.X, dataset_kwargs=dataset_kwargs)
102
- for elem in ("obs", "var"):
103
- _write_elem_cb(f, elem, getattr(adata, elem), dataset_kwargs=dataset_kwargs)
104
- for elem in ("obsm", "varm", "obsp", "varp", "layers", "uns"):
105
- _write_elem_cb(
106
- f, elem, dict(getattr(adata, elem)), dataset_kwargs=dataset_kwargs
107
- )
108
- _write_elem_cb(f, "raw", adata.raw, dataset_kwargs=dataset_kwargs)
109
- # todo: fix size less than total at the end
110
- _cb(None)
1
+ from __future__ import annotations
2
+
3
+ import warnings
4
+ from typing import TYPE_CHECKING
5
+
6
+ import scipy.sparse as sparse
7
+ import zarr
8
+ from anndata._io import read_zarr
9
+ from anndata._io.specs import write_elem
10
+ from anndata._io.specs.registry import get_spec
11
+ from fsspec.implementations.local import LocalFileSystem
12
+ from lamindb_setup.core.upath import create_mapper, infer_filesystem
13
+
14
+ from ._anndata_sizes import _size_elem, _size_raw, size_adata
15
+
16
+ if TYPE_CHECKING:
17
+ from anndata import AnnData
18
+ from lamindb_setup.core.types import UPathStr
19
+
20
+
21
+ def zarr_is_adata(storepath: UPathStr) -> bool:
22
+ fs, storepath_str = infer_filesystem(storepath)
23
+ if isinstance(fs, LocalFileSystem):
24
+ # this is faster than through an fsspec mapper for local
25
+ open_obj = storepath_str
26
+ else:
27
+ open_obj = create_mapper(fs, storepath_str, check=True)
28
+ storage = zarr.open(open_obj, mode="r")
29
+ return get_spec(storage).encoding_type == "anndata"
30
+
31
+
32
+ def load_anndata_zarr(storepath: UPathStr) -> AnnData:
33
+ fs, storepath_str = infer_filesystem(storepath)
34
+ if isinstance(fs, LocalFileSystem):
35
+ # this is faster than through an fsspec mapper for local
36
+ open_obj = storepath_str
37
+ else:
38
+ open_obj = create_mapper(fs, storepath_str, check=True)
39
+ adata = read_zarr(open_obj)
40
+ return adata
41
+
42
+
43
+ def write_adata_zarr(
44
+ adata: AnnData, storepath: UPathStr, callback=None, chunks=None, **dataset_kwargs
45
+ ):
46
+ fs, storepath_str = infer_filesystem(storepath)
47
+ store = create_mapper(fs, storepath_str, create=True)
48
+
49
+ f = zarr.open(store, mode="w")
50
+
51
+ adata.strings_to_categoricals()
52
+ if adata.raw is not None:
53
+ adata.strings_to_categoricals(adata.raw.var)
54
+
55
+ f.attrs.setdefault("encoding-type", "anndata")
56
+ f.attrs.setdefault("encoding-version", "0.1.0")
57
+
58
+ adata_size = None
59
+ cumulative_val = 0
60
+
61
+ def _cb(key_write: str | None = None):
62
+ nonlocal adata_size
63
+ nonlocal cumulative_val
64
+
65
+ if callback is None:
66
+ return None
67
+ if adata_size is None:
68
+ adata_size = size_adata(adata)
69
+ if key_write is None:
70
+ # begin or finish
71
+ if cumulative_val < adata_size:
72
+ callback(adata_size, adata_size if cumulative_val > 0 else 0)
73
+ return None
74
+
75
+ elem = getattr(adata, key_write, None)
76
+ if elem is None:
77
+ return None
78
+ elem_size = _size_raw(elem) if key_write == "raw" else _size_elem(elem)
79
+ if elem_size == 0:
80
+ return None
81
+
82
+ cumulative_val += elem_size
83
+ callback(adata_size, cumulative_val)
84
+
85
+ def _write_elem_cb(f, k, elem, dataset_kwargs):
86
+ write_elem(f, k, elem, dataset_kwargs=dataset_kwargs)
87
+ _cb(k)
88
+
89
+ _cb(None)
90
+ with warnings.catch_warnings():
91
+ warnings.filterwarnings("ignore", category=UserWarning, module="zarr")
92
+
93
+ if chunks is not None and not isinstance(adata.X, sparse.spmatrix):
94
+ _write_elem_cb(
95
+ f,
96
+ "X",
97
+ adata.X,
98
+ dataset_kwargs=dict(chunks=chunks, **dataset_kwargs),
99
+ )
100
+ else:
101
+ _write_elem_cb(f, "X", adata.X, dataset_kwargs=dataset_kwargs)
102
+ for elem in ("obs", "var"):
103
+ _write_elem_cb(f, elem, getattr(adata, elem), dataset_kwargs=dataset_kwargs)
104
+ for elem in ("obsm", "varm", "obsp", "varp", "layers", "uns"):
105
+ _write_elem_cb(
106
+ f, elem, dict(getattr(adata, elem)), dataset_kwargs=dataset_kwargs
107
+ )
108
+ _write_elem_cb(f, "raw", adata.raw, dataset_kwargs=dataset_kwargs)
109
+ # todo: fix size less than total at the end
110
+ _cb(None)