lamindb 0.76.7__py3-none-any.whl → 0.76.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +113 -113
- lamindb/_artifact.py +1205 -1178
- lamindb/_can_validate.py +579 -579
- lamindb/_collection.py +387 -387
- lamindb/_curate.py +1601 -1601
- lamindb/_feature.py +155 -155
- lamindb/_feature_set.py +242 -242
- lamindb/_filter.py +23 -23
- lamindb/_finish.py +256 -256
- lamindb/_from_values.py +382 -382
- lamindb/_is_versioned.py +40 -40
- lamindb/_parents.py +476 -476
- lamindb/_query_manager.py +125 -125
- lamindb/_query_set.py +362 -362
- lamindb/_record.py +649 -649
- lamindb/_run.py +57 -57
- lamindb/_save.py +308 -295
- lamindb/_storage.py +14 -14
- lamindb/_transform.py +127 -127
- lamindb/_ulabel.py +56 -56
- lamindb/_utils.py +9 -9
- lamindb/_view.py +72 -72
- lamindb/core/__init__.py +94 -94
- lamindb/core/_context.py +574 -574
- lamindb/core/_data.py +438 -438
- lamindb/core/_feature_manager.py +867 -867
- lamindb/core/_label_manager.py +253 -253
- lamindb/core/_mapped_collection.py +597 -597
- lamindb/core/_settings.py +187 -187
- lamindb/core/_sync_git.py +138 -138
- lamindb/core/_track_environment.py +27 -27
- lamindb/core/datasets/__init__.py +59 -59
- lamindb/core/datasets/_core.py +571 -571
- lamindb/core/datasets/_fake.py +36 -36
- lamindb/core/exceptions.py +90 -77
- lamindb/core/fields.py +12 -12
- lamindb/core/loaders.py +164 -164
- lamindb/core/schema.py +56 -56
- lamindb/core/storage/__init__.py +25 -25
- lamindb/core/storage/_anndata_accessor.py +740 -740
- lamindb/core/storage/_anndata_sizes.py +41 -41
- lamindb/core/storage/_backed_access.py +98 -98
- lamindb/core/storage/_tiledbsoma.py +204 -204
- lamindb/core/storage/_valid_suffixes.py +21 -21
- lamindb/core/storage/_zarr.py +110 -110
- lamindb/core/storage/objects.py +62 -62
- lamindb/core/storage/paths.py +172 -141
- lamindb/core/subsettings/__init__.py +12 -12
- lamindb/core/subsettings/_creation_settings.py +38 -38
- lamindb/core/subsettings/_transform_settings.py +21 -21
- lamindb/core/types.py +19 -19
- lamindb/core/versioning.py +158 -158
- lamindb/integrations/__init__.py +12 -12
- lamindb/integrations/_vitessce.py +107 -107
- lamindb/setup/__init__.py +14 -14
- lamindb/setup/core/__init__.py +4 -4
- {lamindb-0.76.7.dist-info → lamindb-0.76.8.dist-info}/LICENSE +201 -201
- {lamindb-0.76.7.dist-info → lamindb-0.76.8.dist-info}/METADATA +3 -3
- lamindb-0.76.8.dist-info/RECORD +60 -0
- {lamindb-0.76.7.dist-info → lamindb-0.76.8.dist-info}/WHEEL +1 -1
- lamindb-0.76.7.dist-info/RECORD +0 -60
@@ -1,204 +1,204 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
from typing import TYPE_CHECKING, Literal
|
4
|
-
|
5
|
-
from anndata import AnnData, read_h5ad
|
6
|
-
from lamindb_setup import settings as setup_settings
|
7
|
-
from lamindb_setup.core._settings_storage import get_storage_region
|
8
|
-
from lamindb_setup.core.upath import LocalPathClasses, create_path
|
9
|
-
from lnschema_core import Artifact, Run
|
10
|
-
|
11
|
-
if TYPE_CHECKING:
|
12
|
-
from lamindb_setup.core.types import UPathStr
|
13
|
-
from tiledbsoma import Collection as SOMACollection
|
14
|
-
from tiledbsoma import Experiment as SOMAExperiment
|
15
|
-
from upath import UPath
|
16
|
-
|
17
|
-
|
18
|
-
def _load_h5ad_zarr(objpath: UPath):
|
19
|
-
from lamindb.core.loaders import load_anndata_zarr, load_h5ad
|
20
|
-
|
21
|
-
if objpath.is_dir():
|
22
|
-
adata = load_anndata_zarr(objpath)
|
23
|
-
else:
|
24
|
-
# read only local in backed for now
|
25
|
-
# in principle possible to read remote in backed also
|
26
|
-
if isinstance(objpath, LocalPathClasses):
|
27
|
-
adata = read_h5ad(objpath.as_posix(), backed="r")
|
28
|
-
else:
|
29
|
-
adata = load_h5ad(objpath)
|
30
|
-
return adata
|
31
|
-
|
32
|
-
|
33
|
-
def _tiledb_config_s3(storepath: UPath) -> dict:
|
34
|
-
region = get_storage_region(storepath)
|
35
|
-
tiledb_config = {"vfs.s3.region": region}
|
36
|
-
storage_options = storepath.storage_options
|
37
|
-
if "key" in storage_options:
|
38
|
-
tiledb_config["vfs.s3.aws_access_key_id"] = storage_options["key"]
|
39
|
-
if "secret" in storage_options:
|
40
|
-
tiledb_config["vfs.s3.aws_secret_access_key"] = storage_options["secret"]
|
41
|
-
if "token" in storage_options:
|
42
|
-
tiledb_config["vfs.s3.aws_session_token"] = storage_options["token"]
|
43
|
-
|
44
|
-
return tiledb_config
|
45
|
-
|
46
|
-
|
47
|
-
def _open_tiledbsoma(
|
48
|
-
storepath: UPath, mode: Literal["r", "w"] = "r"
|
49
|
-
) -> SOMACollection | SOMAExperiment:
|
50
|
-
try:
|
51
|
-
import tiledbsoma as soma
|
52
|
-
except ImportError as e:
|
53
|
-
raise ImportError("Please install tiledbsoma: pip install tiledbsoma") from e
|
54
|
-
|
55
|
-
storepath_str = storepath.as_posix()
|
56
|
-
if storepath.protocol == "s3":
|
57
|
-
ctx = soma.SOMATileDBContext(tiledb_config=_tiledb_config_s3(storepath))
|
58
|
-
# this is a strange bug
|
59
|
-
# for some reason iterdir futher gives incorrect results
|
60
|
-
# if cache is not invalidated
|
61
|
-
# instead of obs and ms it gives ms and ms in the list of names
|
62
|
-
storepath.fs.invalidate_cache()
|
63
|
-
else:
|
64
|
-
ctx = None
|
65
|
-
|
66
|
-
soma_objects = [obj.name for obj in storepath.iterdir()]
|
67
|
-
if "obs" in soma_objects and "ms" in soma_objects:
|
68
|
-
SOMAType = soma.Experiment
|
69
|
-
else:
|
70
|
-
SOMAType = soma.Collection
|
71
|
-
return SOMAType.open(storepath_str, mode=mode, context=ctx)
|
72
|
-
|
73
|
-
|
74
|
-
def save_tiledbsoma_experiment(
|
75
|
-
# Artifact args
|
76
|
-
adatas: list[AnnData | UPathStr],
|
77
|
-
key: str | None = None,
|
78
|
-
description: str | None = None,
|
79
|
-
run: Run | None = None,
|
80
|
-
revises: Artifact | None = None,
|
81
|
-
# tiledbsoma.io.from_anndata args
|
82
|
-
measurement_name: str = "RNA",
|
83
|
-
obs_id_name: str = "obs_id",
|
84
|
-
var_id_name: str = "var_id",
|
85
|
-
append_obsm_varm: bool = False,
|
86
|
-
# additional keyword args for tiledbsoma.io.from_anndata
|
87
|
-
**kwargs,
|
88
|
-
) -> Artifact:
|
89
|
-
"""Write `AnnData` to `tiledbsoma.Experiment`.
|
90
|
-
|
91
|
-
Reads `AnnData` objects, writes them to `tiledbsoma.Experiment`, creates & saves an {class}`~lamindb.Artifact`.
|
92
|
-
|
93
|
-
Populates a column `lamin_run_uid` column in `obs` with the current `run.uid`.
|
94
|
-
|
95
|
-
Is based on `tiledbsoma.io.from_anndata
|
96
|
-
<https://tiledbsoma.readthedocs.io/en/latest/_autosummary/tiledbsoma.io.from_anndata.html>`__.
|
97
|
-
|
98
|
-
Args:
|
99
|
-
adatas: `AnnData` objects to write, in-memory or on-disk.
|
100
|
-
key: An optional key to reference the artifact.
|
101
|
-
description: A description.
|
102
|
-
run: The run that creates the artifact.
|
103
|
-
revises: `lamindb.Artifact` with `tiledbsoma.Experiment` to append to.
|
104
|
-
measurement_name: The name of the measurement to store data in `tiledbsoma.Experiment`.
|
105
|
-
obs_id_name: Which `AnnData` `obs` column to use for append mode.
|
106
|
-
var_id_name: Which `AnnData` `var` column to use for append mode.
|
107
|
-
append_obsm_varm: Whether to append `obsm` and `varm` in append mode .
|
108
|
-
**kwargs: Keyword arguments passed to `tiledbsoma.io.from_anndata`.
|
109
|
-
"""
|
110
|
-
try:
|
111
|
-
import tiledbsoma as soma
|
112
|
-
import tiledbsoma.io as soma_io
|
113
|
-
except ImportError as e:
|
114
|
-
raise ImportError("Please install tiledbsoma: pip install tiledbsoma") from e
|
115
|
-
|
116
|
-
from lamindb.core._data import get_run
|
117
|
-
from lamindb.core.storage.paths import auto_storage_key_from_artifact_uid
|
118
|
-
from lamindb.core.versioning import create_uid
|
119
|
-
|
120
|
-
run = get_run(run)
|
121
|
-
|
122
|
-
appending = revises is not None
|
123
|
-
if appending:
|
124
|
-
storepath = revises.path
|
125
|
-
else:
|
126
|
-
uid, _ = create_uid(n_full_id=20)
|
127
|
-
storage_key = auto_storage_key_from_artifact_uid(
|
128
|
-
uid, ".tiledbsoma", is_dir=True
|
129
|
-
)
|
130
|
-
storepath = setup_settings.storage.root / storage_key
|
131
|
-
|
132
|
-
if storepath.protocol == "s3":
|
133
|
-
ctx = soma.SOMATileDBContext(tiledb_config=_tiledb_config_s3(storepath))
|
134
|
-
else:
|
135
|
-
ctx = None
|
136
|
-
|
137
|
-
storepath = storepath.as_posix()
|
138
|
-
|
139
|
-
add_run_uid = True
|
140
|
-
if appending:
|
141
|
-
with soma.Experiment.open(storepath, mode="r", context=ctx) as store:
|
142
|
-
add_run_uid = "lamin_run_uid" in store["obs"].schema.names
|
143
|
-
|
144
|
-
if add_run_uid and run is None:
|
145
|
-
raise ValueError("Pass `run`")
|
146
|
-
|
147
|
-
adata_objects = []
|
148
|
-
for adata in adatas:
|
149
|
-
if isinstance(adata, AnnData):
|
150
|
-
if add_run_uid:
|
151
|
-
if adata.is_view:
|
152
|
-
raise ValueError(
|
153
|
-
"Can not write an `AnnData` view, please do `adata.copy()` before passing."
|
154
|
-
)
|
155
|
-
else:
|
156
|
-
adata.obs["lamin_run_uid"] = run.uid
|
157
|
-
else:
|
158
|
-
adata = _load_h5ad_zarr(create_path(adata))
|
159
|
-
if add_run_uid:
|
160
|
-
adata.obs["lamin_run_uid"] = run.uid
|
161
|
-
adata_objects.append(adata)
|
162
|
-
|
163
|
-
registration_mapping = kwargs.get("registration_mapping", None)
|
164
|
-
if registration_mapping is None and (appending or len(adata_objects) > 1):
|
165
|
-
registration_mapping = soma_io.register_anndatas(
|
166
|
-
experiment_uri=storepath if appending else None,
|
167
|
-
adatas=adata_objects,
|
168
|
-
measurement_name=measurement_name,
|
169
|
-
obs_field_name=obs_id_name,
|
170
|
-
var_field_name=var_id_name,
|
171
|
-
append_obsm_varm=append_obsm_varm,
|
172
|
-
context=ctx,
|
173
|
-
)
|
174
|
-
|
175
|
-
if registration_mapping is not None:
|
176
|
-
n_observations = len(registration_mapping.obs_axis.data)
|
177
|
-
else: # happens only if not appending and only one adata passed
|
178
|
-
assert len(adata_objects) == 1 # noqa: S101
|
179
|
-
n_observations = adata_objects[0].n_obs
|
180
|
-
|
181
|
-
for adata_obj in adata_objects:
|
182
|
-
soma_io.from_anndata(
|
183
|
-
storepath,
|
184
|
-
adata_obj,
|
185
|
-
measurement_name,
|
186
|
-
context=ctx,
|
187
|
-
obs_id_name=obs_id_name,
|
188
|
-
var_id_name=var_id_name,
|
189
|
-
registration_mapping=registration_mapping,
|
190
|
-
**kwargs,
|
191
|
-
)
|
192
|
-
|
193
|
-
artifact = Artifact(
|
194
|
-
storepath,
|
195
|
-
key=key,
|
196
|
-
description=description,
|
197
|
-
run=run,
|
198
|
-
revises=revises,
|
199
|
-
_is_internal_call=True,
|
200
|
-
)
|
201
|
-
artifact.n_observations = n_observations
|
202
|
-
artifact._accessor = "tiledbsoma"
|
203
|
-
|
204
|
-
return artifact.save()
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import TYPE_CHECKING, Literal
|
4
|
+
|
5
|
+
from anndata import AnnData, read_h5ad
|
6
|
+
from lamindb_setup import settings as setup_settings
|
7
|
+
from lamindb_setup.core._settings_storage import get_storage_region
|
8
|
+
from lamindb_setup.core.upath import LocalPathClasses, create_path
|
9
|
+
from lnschema_core import Artifact, Run
|
10
|
+
|
11
|
+
if TYPE_CHECKING:
|
12
|
+
from lamindb_setup.core.types import UPathStr
|
13
|
+
from tiledbsoma import Collection as SOMACollection
|
14
|
+
from tiledbsoma import Experiment as SOMAExperiment
|
15
|
+
from upath import UPath
|
16
|
+
|
17
|
+
|
18
|
+
def _load_h5ad_zarr(objpath: UPath):
|
19
|
+
from lamindb.core.loaders import load_anndata_zarr, load_h5ad
|
20
|
+
|
21
|
+
if objpath.is_dir():
|
22
|
+
adata = load_anndata_zarr(objpath)
|
23
|
+
else:
|
24
|
+
# read only local in backed for now
|
25
|
+
# in principle possible to read remote in backed also
|
26
|
+
if isinstance(objpath, LocalPathClasses):
|
27
|
+
adata = read_h5ad(objpath.as_posix(), backed="r")
|
28
|
+
else:
|
29
|
+
adata = load_h5ad(objpath)
|
30
|
+
return adata
|
31
|
+
|
32
|
+
|
33
|
+
def _tiledb_config_s3(storepath: UPath) -> dict:
|
34
|
+
region = get_storage_region(storepath)
|
35
|
+
tiledb_config = {"vfs.s3.region": region}
|
36
|
+
storage_options = storepath.storage_options
|
37
|
+
if "key" in storage_options:
|
38
|
+
tiledb_config["vfs.s3.aws_access_key_id"] = storage_options["key"]
|
39
|
+
if "secret" in storage_options:
|
40
|
+
tiledb_config["vfs.s3.aws_secret_access_key"] = storage_options["secret"]
|
41
|
+
if "token" in storage_options:
|
42
|
+
tiledb_config["vfs.s3.aws_session_token"] = storage_options["token"]
|
43
|
+
|
44
|
+
return tiledb_config
|
45
|
+
|
46
|
+
|
47
|
+
def _open_tiledbsoma(
|
48
|
+
storepath: UPath, mode: Literal["r", "w"] = "r"
|
49
|
+
) -> SOMACollection | SOMAExperiment:
|
50
|
+
try:
|
51
|
+
import tiledbsoma as soma
|
52
|
+
except ImportError as e:
|
53
|
+
raise ImportError("Please install tiledbsoma: pip install tiledbsoma") from e
|
54
|
+
|
55
|
+
storepath_str = storepath.as_posix()
|
56
|
+
if storepath.protocol == "s3":
|
57
|
+
ctx = soma.SOMATileDBContext(tiledb_config=_tiledb_config_s3(storepath))
|
58
|
+
# this is a strange bug
|
59
|
+
# for some reason iterdir futher gives incorrect results
|
60
|
+
# if cache is not invalidated
|
61
|
+
# instead of obs and ms it gives ms and ms in the list of names
|
62
|
+
storepath.fs.invalidate_cache()
|
63
|
+
else:
|
64
|
+
ctx = None
|
65
|
+
|
66
|
+
soma_objects = [obj.name for obj in storepath.iterdir()]
|
67
|
+
if "obs" in soma_objects and "ms" in soma_objects:
|
68
|
+
SOMAType = soma.Experiment
|
69
|
+
else:
|
70
|
+
SOMAType = soma.Collection
|
71
|
+
return SOMAType.open(storepath_str, mode=mode, context=ctx)
|
72
|
+
|
73
|
+
|
74
|
+
def save_tiledbsoma_experiment(
|
75
|
+
# Artifact args
|
76
|
+
adatas: list[AnnData | UPathStr],
|
77
|
+
key: str | None = None,
|
78
|
+
description: str | None = None,
|
79
|
+
run: Run | None = None,
|
80
|
+
revises: Artifact | None = None,
|
81
|
+
# tiledbsoma.io.from_anndata args
|
82
|
+
measurement_name: str = "RNA",
|
83
|
+
obs_id_name: str = "obs_id",
|
84
|
+
var_id_name: str = "var_id",
|
85
|
+
append_obsm_varm: bool = False,
|
86
|
+
# additional keyword args for tiledbsoma.io.from_anndata
|
87
|
+
**kwargs,
|
88
|
+
) -> Artifact:
|
89
|
+
"""Write `AnnData` to `tiledbsoma.Experiment`.
|
90
|
+
|
91
|
+
Reads `AnnData` objects, writes them to `tiledbsoma.Experiment`, creates & saves an {class}`~lamindb.Artifact`.
|
92
|
+
|
93
|
+
Populates a column `lamin_run_uid` column in `obs` with the current `run.uid`.
|
94
|
+
|
95
|
+
Is based on `tiledbsoma.io.from_anndata
|
96
|
+
<https://tiledbsoma.readthedocs.io/en/latest/_autosummary/tiledbsoma.io.from_anndata.html>`__.
|
97
|
+
|
98
|
+
Args:
|
99
|
+
adatas: `AnnData` objects to write, in-memory or on-disk.
|
100
|
+
key: An optional key to reference the artifact.
|
101
|
+
description: A description.
|
102
|
+
run: The run that creates the artifact.
|
103
|
+
revises: `lamindb.Artifact` with `tiledbsoma.Experiment` to append to.
|
104
|
+
measurement_name: The name of the measurement to store data in `tiledbsoma.Experiment`.
|
105
|
+
obs_id_name: Which `AnnData` `obs` column to use for append mode.
|
106
|
+
var_id_name: Which `AnnData` `var` column to use for append mode.
|
107
|
+
append_obsm_varm: Whether to append `obsm` and `varm` in append mode .
|
108
|
+
**kwargs: Keyword arguments passed to `tiledbsoma.io.from_anndata`.
|
109
|
+
"""
|
110
|
+
try:
|
111
|
+
import tiledbsoma as soma
|
112
|
+
import tiledbsoma.io as soma_io
|
113
|
+
except ImportError as e:
|
114
|
+
raise ImportError("Please install tiledbsoma: pip install tiledbsoma") from e
|
115
|
+
|
116
|
+
from lamindb.core._data import get_run
|
117
|
+
from lamindb.core.storage.paths import auto_storage_key_from_artifact_uid
|
118
|
+
from lamindb.core.versioning import create_uid
|
119
|
+
|
120
|
+
run = get_run(run)
|
121
|
+
|
122
|
+
appending = revises is not None
|
123
|
+
if appending:
|
124
|
+
storepath = revises.path
|
125
|
+
else:
|
126
|
+
uid, _ = create_uid(n_full_id=20)
|
127
|
+
storage_key = auto_storage_key_from_artifact_uid(
|
128
|
+
uid, ".tiledbsoma", is_dir=True
|
129
|
+
)
|
130
|
+
storepath = setup_settings.storage.root / storage_key
|
131
|
+
|
132
|
+
if storepath.protocol == "s3":
|
133
|
+
ctx = soma.SOMATileDBContext(tiledb_config=_tiledb_config_s3(storepath))
|
134
|
+
else:
|
135
|
+
ctx = None
|
136
|
+
|
137
|
+
storepath = storepath.as_posix()
|
138
|
+
|
139
|
+
add_run_uid = True
|
140
|
+
if appending:
|
141
|
+
with soma.Experiment.open(storepath, mode="r", context=ctx) as store:
|
142
|
+
add_run_uid = "lamin_run_uid" in store["obs"].schema.names
|
143
|
+
|
144
|
+
if add_run_uid and run is None:
|
145
|
+
raise ValueError("Pass `run`")
|
146
|
+
|
147
|
+
adata_objects = []
|
148
|
+
for adata in adatas:
|
149
|
+
if isinstance(adata, AnnData):
|
150
|
+
if add_run_uid:
|
151
|
+
if adata.is_view:
|
152
|
+
raise ValueError(
|
153
|
+
"Can not write an `AnnData` view, please do `adata.copy()` before passing."
|
154
|
+
)
|
155
|
+
else:
|
156
|
+
adata.obs["lamin_run_uid"] = run.uid
|
157
|
+
else:
|
158
|
+
adata = _load_h5ad_zarr(create_path(adata))
|
159
|
+
if add_run_uid:
|
160
|
+
adata.obs["lamin_run_uid"] = run.uid
|
161
|
+
adata_objects.append(adata)
|
162
|
+
|
163
|
+
registration_mapping = kwargs.get("registration_mapping", None)
|
164
|
+
if registration_mapping is None and (appending or len(adata_objects) > 1):
|
165
|
+
registration_mapping = soma_io.register_anndatas(
|
166
|
+
experiment_uri=storepath if appending else None,
|
167
|
+
adatas=adata_objects,
|
168
|
+
measurement_name=measurement_name,
|
169
|
+
obs_field_name=obs_id_name,
|
170
|
+
var_field_name=var_id_name,
|
171
|
+
append_obsm_varm=append_obsm_varm,
|
172
|
+
context=ctx,
|
173
|
+
)
|
174
|
+
|
175
|
+
if registration_mapping is not None:
|
176
|
+
n_observations = len(registration_mapping.obs_axis.data)
|
177
|
+
else: # happens only if not appending and only one adata passed
|
178
|
+
assert len(adata_objects) == 1 # noqa: S101
|
179
|
+
n_observations = adata_objects[0].n_obs
|
180
|
+
|
181
|
+
for adata_obj in adata_objects:
|
182
|
+
soma_io.from_anndata(
|
183
|
+
storepath,
|
184
|
+
adata_obj,
|
185
|
+
measurement_name,
|
186
|
+
context=ctx,
|
187
|
+
obs_id_name=obs_id_name,
|
188
|
+
var_id_name=var_id_name,
|
189
|
+
registration_mapping=registration_mapping,
|
190
|
+
**kwargs,
|
191
|
+
)
|
192
|
+
|
193
|
+
artifact = Artifact(
|
194
|
+
storepath,
|
195
|
+
key=key,
|
196
|
+
description=description,
|
197
|
+
run=run,
|
198
|
+
revises=revises,
|
199
|
+
_is_internal_call=True,
|
200
|
+
)
|
201
|
+
artifact.n_observations = n_observations
|
202
|
+
artifact._accessor = "tiledbsoma"
|
203
|
+
|
204
|
+
return artifact.save()
|
@@ -1,21 +1,21 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
from lamindb_setup.core.upath import VALID_COMPOSITE_SUFFIXES, VALID_SIMPLE_SUFFIXES
|
4
|
-
|
5
|
-
# add new composite suffixes like so
|
6
|
-
VALID_COMPOSITE_SUFFIXES.update(
|
7
|
-
{
|
8
|
-
".vitessce.json",
|
9
|
-
".ome.zarr",
|
10
|
-
}
|
11
|
-
)
|
12
|
-
# can do the same for simple valid suffixes
|
13
|
-
|
14
|
-
|
15
|
-
class VALID_SUFFIXES:
|
16
|
-
"""Valid suffixes."""
|
17
|
-
|
18
|
-
SIMPLE: set[str] = VALID_SIMPLE_SUFFIXES
|
19
|
-
"""Simple suffixes."""
|
20
|
-
COMPOSITE: set[str] = VALID_COMPOSITE_SUFFIXES
|
21
|
-
"""Composite suffixes."""
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from lamindb_setup.core.upath import VALID_COMPOSITE_SUFFIXES, VALID_SIMPLE_SUFFIXES
|
4
|
+
|
5
|
+
# add new composite suffixes like so
|
6
|
+
VALID_COMPOSITE_SUFFIXES.update(
|
7
|
+
{
|
8
|
+
".vitessce.json",
|
9
|
+
".ome.zarr",
|
10
|
+
}
|
11
|
+
)
|
12
|
+
# can do the same for simple valid suffixes
|
13
|
+
|
14
|
+
|
15
|
+
class VALID_SUFFIXES:
|
16
|
+
"""Valid suffixes."""
|
17
|
+
|
18
|
+
SIMPLE: set[str] = VALID_SIMPLE_SUFFIXES
|
19
|
+
"""Simple suffixes."""
|
20
|
+
COMPOSITE: set[str] = VALID_COMPOSITE_SUFFIXES
|
21
|
+
"""Composite suffixes."""
|
lamindb/core/storage/_zarr.py
CHANGED
@@ -1,110 +1,110 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
import warnings
|
4
|
-
from typing import TYPE_CHECKING
|
5
|
-
|
6
|
-
import scipy.sparse as sparse
|
7
|
-
import zarr
|
8
|
-
from anndata._io import read_zarr
|
9
|
-
from anndata._io.specs import write_elem
|
10
|
-
from anndata._io.specs.registry import get_spec
|
11
|
-
from fsspec.implementations.local import LocalFileSystem
|
12
|
-
from lamindb_setup.core.upath import create_mapper, infer_filesystem
|
13
|
-
|
14
|
-
from ._anndata_sizes import _size_elem, _size_raw, size_adata
|
15
|
-
|
16
|
-
if TYPE_CHECKING:
|
17
|
-
from anndata import AnnData
|
18
|
-
from lamindb_setup.core.types import UPathStr
|
19
|
-
|
20
|
-
|
21
|
-
def zarr_is_adata(storepath: UPathStr) -> bool:
|
22
|
-
fs, storepath_str = infer_filesystem(storepath)
|
23
|
-
if isinstance(fs, LocalFileSystem):
|
24
|
-
# this is faster than through an fsspec mapper for local
|
25
|
-
open_obj = storepath_str
|
26
|
-
else:
|
27
|
-
open_obj = create_mapper(fs, storepath_str, check=True)
|
28
|
-
storage = zarr.open(open_obj, mode="r")
|
29
|
-
return get_spec(storage).encoding_type == "anndata"
|
30
|
-
|
31
|
-
|
32
|
-
def load_anndata_zarr(storepath: UPathStr) -> AnnData:
|
33
|
-
fs, storepath_str = infer_filesystem(storepath)
|
34
|
-
if isinstance(fs, LocalFileSystem):
|
35
|
-
# this is faster than through an fsspec mapper for local
|
36
|
-
open_obj = storepath_str
|
37
|
-
else:
|
38
|
-
open_obj = create_mapper(fs, storepath_str, check=True)
|
39
|
-
adata = read_zarr(open_obj)
|
40
|
-
return adata
|
41
|
-
|
42
|
-
|
43
|
-
def write_adata_zarr(
|
44
|
-
adata: AnnData, storepath: UPathStr, callback=None, chunks=None, **dataset_kwargs
|
45
|
-
):
|
46
|
-
fs, storepath_str = infer_filesystem(storepath)
|
47
|
-
store = create_mapper(fs, storepath_str, create=True)
|
48
|
-
|
49
|
-
f = zarr.open(store, mode="w")
|
50
|
-
|
51
|
-
adata.strings_to_categoricals()
|
52
|
-
if adata.raw is not None:
|
53
|
-
adata.strings_to_categoricals(adata.raw.var)
|
54
|
-
|
55
|
-
f.attrs.setdefault("encoding-type", "anndata")
|
56
|
-
f.attrs.setdefault("encoding-version", "0.1.0")
|
57
|
-
|
58
|
-
adata_size = None
|
59
|
-
cumulative_val = 0
|
60
|
-
|
61
|
-
def _cb(key_write: str | None = None):
|
62
|
-
nonlocal adata_size
|
63
|
-
nonlocal cumulative_val
|
64
|
-
|
65
|
-
if callback is None:
|
66
|
-
return None
|
67
|
-
if adata_size is None:
|
68
|
-
adata_size = size_adata(adata)
|
69
|
-
if key_write is None:
|
70
|
-
# begin or finish
|
71
|
-
if cumulative_val < adata_size:
|
72
|
-
callback(adata_size, adata_size if cumulative_val > 0 else 0)
|
73
|
-
return None
|
74
|
-
|
75
|
-
elem = getattr(adata, key_write, None)
|
76
|
-
if elem is None:
|
77
|
-
return None
|
78
|
-
elem_size = _size_raw(elem) if key_write == "raw" else _size_elem(elem)
|
79
|
-
if elem_size == 0:
|
80
|
-
return None
|
81
|
-
|
82
|
-
cumulative_val += elem_size
|
83
|
-
callback(adata_size, cumulative_val)
|
84
|
-
|
85
|
-
def _write_elem_cb(f, k, elem, dataset_kwargs):
|
86
|
-
write_elem(f, k, elem, dataset_kwargs=dataset_kwargs)
|
87
|
-
_cb(k)
|
88
|
-
|
89
|
-
_cb(None)
|
90
|
-
with warnings.catch_warnings():
|
91
|
-
warnings.filterwarnings("ignore", category=UserWarning, module="zarr")
|
92
|
-
|
93
|
-
if chunks is not None and not isinstance(adata.X, sparse.spmatrix):
|
94
|
-
_write_elem_cb(
|
95
|
-
f,
|
96
|
-
"X",
|
97
|
-
adata.X,
|
98
|
-
dataset_kwargs=dict(chunks=chunks, **dataset_kwargs),
|
99
|
-
)
|
100
|
-
else:
|
101
|
-
_write_elem_cb(f, "X", adata.X, dataset_kwargs=dataset_kwargs)
|
102
|
-
for elem in ("obs", "var"):
|
103
|
-
_write_elem_cb(f, elem, getattr(adata, elem), dataset_kwargs=dataset_kwargs)
|
104
|
-
for elem in ("obsm", "varm", "obsp", "varp", "layers", "uns"):
|
105
|
-
_write_elem_cb(
|
106
|
-
f, elem, dict(getattr(adata, elem)), dataset_kwargs=dataset_kwargs
|
107
|
-
)
|
108
|
-
_write_elem_cb(f, "raw", adata.raw, dataset_kwargs=dataset_kwargs)
|
109
|
-
# todo: fix size less than total at the end
|
110
|
-
_cb(None)
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import warnings
|
4
|
+
from typing import TYPE_CHECKING
|
5
|
+
|
6
|
+
import scipy.sparse as sparse
|
7
|
+
import zarr
|
8
|
+
from anndata._io import read_zarr
|
9
|
+
from anndata._io.specs import write_elem
|
10
|
+
from anndata._io.specs.registry import get_spec
|
11
|
+
from fsspec.implementations.local import LocalFileSystem
|
12
|
+
from lamindb_setup.core.upath import create_mapper, infer_filesystem
|
13
|
+
|
14
|
+
from ._anndata_sizes import _size_elem, _size_raw, size_adata
|
15
|
+
|
16
|
+
if TYPE_CHECKING:
|
17
|
+
from anndata import AnnData
|
18
|
+
from lamindb_setup.core.types import UPathStr
|
19
|
+
|
20
|
+
|
21
|
+
def zarr_is_adata(storepath: UPathStr) -> bool:
|
22
|
+
fs, storepath_str = infer_filesystem(storepath)
|
23
|
+
if isinstance(fs, LocalFileSystem):
|
24
|
+
# this is faster than through an fsspec mapper for local
|
25
|
+
open_obj = storepath_str
|
26
|
+
else:
|
27
|
+
open_obj = create_mapper(fs, storepath_str, check=True)
|
28
|
+
storage = zarr.open(open_obj, mode="r")
|
29
|
+
return get_spec(storage).encoding_type == "anndata"
|
30
|
+
|
31
|
+
|
32
|
+
def load_anndata_zarr(storepath: UPathStr) -> AnnData:
|
33
|
+
fs, storepath_str = infer_filesystem(storepath)
|
34
|
+
if isinstance(fs, LocalFileSystem):
|
35
|
+
# this is faster than through an fsspec mapper for local
|
36
|
+
open_obj = storepath_str
|
37
|
+
else:
|
38
|
+
open_obj = create_mapper(fs, storepath_str, check=True)
|
39
|
+
adata = read_zarr(open_obj)
|
40
|
+
return adata
|
41
|
+
|
42
|
+
|
43
|
+
def write_adata_zarr(
|
44
|
+
adata: AnnData, storepath: UPathStr, callback=None, chunks=None, **dataset_kwargs
|
45
|
+
):
|
46
|
+
fs, storepath_str = infer_filesystem(storepath)
|
47
|
+
store = create_mapper(fs, storepath_str, create=True)
|
48
|
+
|
49
|
+
f = zarr.open(store, mode="w")
|
50
|
+
|
51
|
+
adata.strings_to_categoricals()
|
52
|
+
if adata.raw is not None:
|
53
|
+
adata.strings_to_categoricals(adata.raw.var)
|
54
|
+
|
55
|
+
f.attrs.setdefault("encoding-type", "anndata")
|
56
|
+
f.attrs.setdefault("encoding-version", "0.1.0")
|
57
|
+
|
58
|
+
adata_size = None
|
59
|
+
cumulative_val = 0
|
60
|
+
|
61
|
+
def _cb(key_write: str | None = None):
|
62
|
+
nonlocal adata_size
|
63
|
+
nonlocal cumulative_val
|
64
|
+
|
65
|
+
if callback is None:
|
66
|
+
return None
|
67
|
+
if adata_size is None:
|
68
|
+
adata_size = size_adata(adata)
|
69
|
+
if key_write is None:
|
70
|
+
# begin or finish
|
71
|
+
if cumulative_val < adata_size:
|
72
|
+
callback(adata_size, adata_size if cumulative_val > 0 else 0)
|
73
|
+
return None
|
74
|
+
|
75
|
+
elem = getattr(adata, key_write, None)
|
76
|
+
if elem is None:
|
77
|
+
return None
|
78
|
+
elem_size = _size_raw(elem) if key_write == "raw" else _size_elem(elem)
|
79
|
+
if elem_size == 0:
|
80
|
+
return None
|
81
|
+
|
82
|
+
cumulative_val += elem_size
|
83
|
+
callback(adata_size, cumulative_val)
|
84
|
+
|
85
|
+
def _write_elem_cb(f, k, elem, dataset_kwargs):
|
86
|
+
write_elem(f, k, elem, dataset_kwargs=dataset_kwargs)
|
87
|
+
_cb(k)
|
88
|
+
|
89
|
+
_cb(None)
|
90
|
+
with warnings.catch_warnings():
|
91
|
+
warnings.filterwarnings("ignore", category=UserWarning, module="zarr")
|
92
|
+
|
93
|
+
if chunks is not None and not isinstance(adata.X, sparse.spmatrix):
|
94
|
+
_write_elem_cb(
|
95
|
+
f,
|
96
|
+
"X",
|
97
|
+
adata.X,
|
98
|
+
dataset_kwargs=dict(chunks=chunks, **dataset_kwargs),
|
99
|
+
)
|
100
|
+
else:
|
101
|
+
_write_elem_cb(f, "X", adata.X, dataset_kwargs=dataset_kwargs)
|
102
|
+
for elem in ("obs", "var"):
|
103
|
+
_write_elem_cb(f, elem, getattr(adata, elem), dataset_kwargs=dataset_kwargs)
|
104
|
+
for elem in ("obsm", "varm", "obsp", "varp", "layers", "uns"):
|
105
|
+
_write_elem_cb(
|
106
|
+
f, elem, dict(getattr(adata, elem)), dataset_kwargs=dataset_kwargs
|
107
|
+
)
|
108
|
+
_write_elem_cb(f, "raw", adata.raw, dataset_kwargs=dataset_kwargs)
|
109
|
+
# todo: fix size less than total at the end
|
110
|
+
_cb(None)
|