ngio 0.2.0a2__py3-none-any.whl → 0.5.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ngio/__init__.py +40 -12
- ngio/common/__init__.py +16 -32
- ngio/common/_dimensions.py +270 -48
- ngio/common/_masking_roi.py +153 -0
- ngio/common/_pyramid.py +267 -73
- ngio/common/_roi.py +290 -66
- ngio/common/_synt_images_utils.py +101 -0
- ngio/common/_zoom.py +54 -22
- ngio/experimental/__init__.py +5 -0
- ngio/experimental/iterators/__init__.py +15 -0
- ngio/experimental/iterators/_abstract_iterator.py +390 -0
- ngio/experimental/iterators/_feature.py +189 -0
- ngio/experimental/iterators/_image_processing.py +130 -0
- ngio/experimental/iterators/_mappers.py +48 -0
- ngio/experimental/iterators/_rois_utils.py +126 -0
- ngio/experimental/iterators/_segmentation.py +235 -0
- ngio/hcs/__init__.py +17 -58
- ngio/hcs/_plate.py +1354 -0
- ngio/images/__init__.py +30 -9
- ngio/images/_abstract_image.py +968 -0
- ngio/images/_create_synt_container.py +132 -0
- ngio/images/_create_utils.py +423 -0
- ngio/images/_image.py +926 -0
- ngio/images/_label.py +417 -0
- ngio/images/_masked_image.py +531 -0
- ngio/images/_ome_zarr_container.py +1235 -0
- ngio/images/_table_ops.py +471 -0
- ngio/io_pipes/__init__.py +75 -0
- ngio/io_pipes/_io_pipes.py +361 -0
- ngio/io_pipes/_io_pipes_masked.py +488 -0
- ngio/io_pipes/_io_pipes_roi.py +146 -0
- ngio/io_pipes/_io_pipes_types.py +56 -0
- ngio/io_pipes/_match_shape.py +377 -0
- ngio/io_pipes/_ops_axes.py +344 -0
- ngio/io_pipes/_ops_slices.py +411 -0
- ngio/io_pipes/_ops_slices_utils.py +199 -0
- ngio/io_pipes/_ops_transforms.py +104 -0
- ngio/io_pipes/_zoom_transform.py +180 -0
- ngio/ome_zarr_meta/__init__.py +39 -15
- ngio/ome_zarr_meta/_meta_handlers.py +490 -96
- ngio/ome_zarr_meta/ngio_specs/__init__.py +24 -10
- ngio/ome_zarr_meta/ngio_specs/_axes.py +268 -234
- ngio/ome_zarr_meta/ngio_specs/_channels.py +125 -41
- ngio/ome_zarr_meta/ngio_specs/_dataset.py +42 -87
- ngio/ome_zarr_meta/ngio_specs/_ngio_hcs.py +536 -2
- ngio/ome_zarr_meta/ngio_specs/_ngio_image.py +202 -198
- ngio/ome_zarr_meta/ngio_specs/_pixel_size.py +72 -34
- ngio/ome_zarr_meta/v04/__init__.py +21 -5
- ngio/ome_zarr_meta/v04/_custom_models.py +18 -0
- ngio/ome_zarr_meta/v04/{_v04_spec_utils.py → _v04_spec.py} +151 -90
- ngio/ome_zarr_meta/v05/__init__.py +27 -0
- ngio/ome_zarr_meta/v05/_custom_models.py +18 -0
- ngio/ome_zarr_meta/v05/_v05_spec.py +511 -0
- ngio/resources/20200812-CardiomyocyteDifferentiation14-Cycle1_B03/mask.png +0 -0
- ngio/resources/20200812-CardiomyocyteDifferentiation14-Cycle1_B03/nuclei.png +0 -0
- ngio/resources/20200812-CardiomyocyteDifferentiation14-Cycle1_B03/raw.jpg +0 -0
- ngio/resources/__init__.py +55 -0
- ngio/resources/resource_model.py +36 -0
- ngio/tables/__init__.py +20 -4
- ngio/tables/_abstract_table.py +270 -0
- ngio/tables/_tables_container.py +449 -0
- ngio/tables/backends/__init__.py +50 -1
- ngio/tables/backends/_abstract_backend.py +200 -31
- ngio/tables/backends/_anndata.py +139 -0
- ngio/tables/backends/_anndata_utils.py +10 -114
- ngio/tables/backends/_csv.py +19 -0
- ngio/tables/backends/_json.py +92 -0
- ngio/tables/backends/_parquet.py +19 -0
- ngio/tables/backends/_py_arrow_backends.py +222 -0
- ngio/tables/backends/_table_backends.py +162 -38
- ngio/tables/backends/_utils.py +608 -0
- ngio/tables/v1/__init__.py +19 -4
- ngio/tables/v1/_condition_table.py +71 -0
- ngio/tables/v1/_feature_table.py +79 -115
- ngio/tables/v1/_generic_table.py +21 -90
- ngio/tables/v1/_roi_table.py +486 -137
- ngio/transforms/__init__.py +5 -0
- ngio/transforms/_zoom.py +19 -0
- ngio/utils/__init__.py +16 -14
- ngio/utils/_cache.py +48 -0
- ngio/utils/_datasets.py +121 -13
- ngio/utils/_fractal_fsspec_store.py +42 -0
- ngio/utils/_zarr_utils.py +374 -218
- ngio-0.5.0b4.dist-info/METADATA +147 -0
- ngio-0.5.0b4.dist-info/RECORD +88 -0
- {ngio-0.2.0a2.dist-info → ngio-0.5.0b4.dist-info}/WHEEL +1 -1
- ngio/common/_array_pipe.py +0 -160
- ngio/common/_axes_transforms.py +0 -63
- ngio/common/_common_types.py +0 -5
- ngio/common/_slicer.py +0 -97
- ngio/images/abstract_image.py +0 -240
- ngio/images/create.py +0 -251
- ngio/images/image.py +0 -389
- ngio/images/label.py +0 -236
- ngio/images/omezarr_container.py +0 -535
- ngio/ome_zarr_meta/_generic_handlers.py +0 -320
- ngio/ome_zarr_meta/v04/_meta_handlers.py +0 -54
- ngio/tables/_validators.py +0 -192
- ngio/tables/backends/_anndata_v1.py +0 -75
- ngio/tables/backends/_json_v1.py +0 -56
- ngio/tables/tables_container.py +0 -300
- ngio/tables/v1/_masking_roi_table.py +0 -175
- ngio/utils/_logger.py +0 -29
- ngio-0.2.0a2.dist-info/METADATA +0 -95
- ngio-0.2.0a2.dist-info/RECORD +0 -53
- {ngio-0.2.0a2.dist-info → ngio-0.5.0b4.dist-info}/licenses/LICENSE +0 -0
ngio/utils/_zarr_utils.py
CHANGED
|
@@ -1,90 +1,103 @@
|
|
|
1
1
|
"""Common utilities for working with Zarr groups in consistent ways."""
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import json
|
|
4
|
+
import warnings
|
|
4
5
|
from pathlib import Path
|
|
5
|
-
from typing import Literal
|
|
6
|
+
from typing import Literal, TypeAlias
|
|
6
7
|
|
|
8
|
+
import dask.array as da
|
|
7
9
|
import fsspec
|
|
8
10
|
import zarr
|
|
9
11
|
from filelock import BaseFileLock, FileLock
|
|
10
|
-
from
|
|
11
|
-
from
|
|
12
|
-
|
|
13
|
-
from
|
|
14
|
-
from
|
|
12
|
+
from pydantic_zarr.v2 import ArraySpec as AnyArraySpecV2
|
|
13
|
+
from pydantic_zarr.v3 import ArraySpec as AnyArraySpecV3
|
|
14
|
+
from zarr.abc.store import Store
|
|
15
|
+
from zarr.errors import ContainsGroupError
|
|
16
|
+
from zarr.storage import FsspecStore, LocalStore, MemoryStore, ZipStore
|
|
17
|
+
|
|
18
|
+
from ngio.utils._cache import NgioCache
|
|
19
|
+
from ngio.utils._errors import (
|
|
20
|
+
NgioFileExistsError,
|
|
21
|
+
NgioFileNotFoundError,
|
|
22
|
+
NgioValueError,
|
|
23
|
+
)
|
|
15
24
|
|
|
16
25
|
AccessModeLiteral = Literal["r", "r+", "w", "w-", "a"]
|
|
17
26
|
# StoreLike is more restrictive than it could be
|
|
18
27
|
# but to make sure we can handle the store correctly
|
|
19
28
|
# we need to be more restrictive
|
|
20
|
-
NgioSupportedStore =
|
|
21
|
-
|
|
22
|
-
|
|
29
|
+
NgioSupportedStore: TypeAlias = (
|
|
30
|
+
str | Path | fsspec.mapping.FSMap | FsspecStore | MemoryStore | dict | LocalStore
|
|
31
|
+
)
|
|
32
|
+
GenericStore: TypeAlias = NgioSupportedStore | Store
|
|
33
|
+
StoreOrGroup: TypeAlias = NgioSupportedStore | zarr.Group
|
|
23
34
|
|
|
24
35
|
|
|
25
36
|
def _check_store(store) -> NgioSupportedStore:
|
|
26
37
|
"""Check the store and return a valid store."""
|
|
27
|
-
if isinstance(store, NgioSupportedStore):
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
38
|
+
if not isinstance(store, NgioSupportedStore):
|
|
39
|
+
warnings.warn(
|
|
40
|
+
f"Store type {type(store)} is not explicitly supported. "
|
|
41
|
+
f"Supported types are: {NgioSupportedStore}. "
|
|
42
|
+
"Proceeding, but this may lead to unexpected behavior.",
|
|
43
|
+
UserWarning,
|
|
44
|
+
stacklevel=2,
|
|
45
|
+
)
|
|
46
|
+
return store
|
|
34
47
|
|
|
35
48
|
|
|
36
|
-
def _check_group(
|
|
49
|
+
def _check_group(
|
|
50
|
+
group: zarr.Group, mode: AccessModeLiteral | None = None
|
|
51
|
+
) -> zarr.Group:
|
|
37
52
|
"""Check the group and return a valid group."""
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
raise NgioValueError(
|
|
41
|
-
"The group is read only. Cannot open in write mode ['w', 'w-']"
|
|
42
|
-
)
|
|
53
|
+
if group.read_only and mode not in [None, "r"]:
|
|
54
|
+
raise NgioValueError(f"The group is read only. Cannot open in mode {mode}.")
|
|
43
55
|
|
|
44
|
-
if mode == "r" and not
|
|
56
|
+
if mode == "r" and not group.read_only:
|
|
45
57
|
# let's make sure we don't accidentally write to the group
|
|
46
58
|
group = zarr.open_group(store=group.store, path=group.path, mode="r")
|
|
47
|
-
|
|
48
59
|
return group
|
|
49
60
|
|
|
50
61
|
|
|
51
62
|
def open_group_wrapper(
|
|
52
|
-
store: StoreOrGroup,
|
|
53
|
-
|
|
63
|
+
store: StoreOrGroup,
|
|
64
|
+
mode: AccessModeLiteral | None = None,
|
|
65
|
+
zarr_format: Literal[2, 3] | None = None,
|
|
66
|
+
) -> zarr.Group:
|
|
54
67
|
"""Wrapper around zarr.open_group with some additional checks.
|
|
55
68
|
|
|
56
69
|
Args:
|
|
57
70
|
store (StoreOrGroup): The store or group to open.
|
|
58
|
-
mode (
|
|
71
|
+
mode (AccessModeLiteral): The mode to open the group in.
|
|
72
|
+
zarr_format (int): The Zarr format version to use.
|
|
59
73
|
|
|
60
74
|
Returns:
|
|
61
75
|
zarr.Group: The opened Zarr group.
|
|
62
76
|
"""
|
|
63
77
|
if isinstance(store, zarr.Group):
|
|
64
78
|
group = _check_group(store, mode)
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
if isinstance(group.store, DirectoryStore):
|
|
68
|
-
_store = group.store.path
|
|
69
|
-
else:
|
|
70
|
-
_store = group.store
|
|
71
|
-
|
|
72
|
-
_store = _check_store(_store)
|
|
73
|
-
return group, _store
|
|
79
|
+
_check_store(group.store)
|
|
80
|
+
return group
|
|
74
81
|
|
|
75
82
|
try:
|
|
76
|
-
|
|
77
|
-
|
|
83
|
+
_check_store(store)
|
|
84
|
+
mode = mode if mode is not None else "a"
|
|
85
|
+
group = zarr.open_group(store=store, mode=mode, zarr_format=zarr_format)
|
|
78
86
|
|
|
79
|
-
except
|
|
87
|
+
except FileExistsError as e:
|
|
80
88
|
raise NgioFileExistsError(
|
|
81
89
|
f"A Zarr group already exists at {store}, consider setting overwrite=True."
|
|
82
90
|
) from e
|
|
83
91
|
|
|
84
|
-
except
|
|
92
|
+
except FileNotFoundError as e:
|
|
85
93
|
raise NgioFileNotFoundError(f"No Zarr group found at {store}") from e
|
|
86
94
|
|
|
87
|
-
|
|
95
|
+
except ContainsGroupError as e:
|
|
96
|
+
raise NgioFileExistsError(
|
|
97
|
+
f"A Zarr group already exists at {store}, consider setting overwrite=True."
|
|
98
|
+
) from e
|
|
99
|
+
|
|
100
|
+
return group
|
|
88
101
|
|
|
89
102
|
|
|
90
103
|
class ZarrGroupHandler:
|
|
@@ -93,167 +106,185 @@ class ZarrGroupHandler:
|
|
|
93
106
|
def __init__(
|
|
94
107
|
self,
|
|
95
108
|
store: StoreOrGroup,
|
|
109
|
+
zarr_format: Literal[2, 3] | None = None,
|
|
96
110
|
cache: bool = False,
|
|
97
|
-
mode: AccessModeLiteral =
|
|
98
|
-
parallel_safe: bool = False,
|
|
99
|
-
parent: "ZarrGroupHandler | None" = None,
|
|
111
|
+
mode: AccessModeLiteral | None = None,
|
|
100
112
|
):
|
|
101
113
|
"""Initialize the handler.
|
|
102
114
|
|
|
103
115
|
Args:
|
|
104
116
|
store (StoreOrGroup): The Zarr store or group containing the image data.
|
|
105
117
|
meta_mode (str): The mode of the metadata handler.
|
|
118
|
+
zarr_format (int | None): The Zarr format version to use.
|
|
106
119
|
cache (bool): Whether to cache the metadata.
|
|
107
|
-
mode (str): The mode of the store.
|
|
108
|
-
parallel_safe (bool): If True, the handler will create a lock file to make
|
|
109
|
-
that can be used to make the handler parallel safe.
|
|
110
|
-
Be aware that the lock needs to be used manually.
|
|
111
|
-
parent (ZarrGroupHandler | None): The parent handler.
|
|
120
|
+
mode (str | None): The mode of the store.
|
|
112
121
|
"""
|
|
113
|
-
if mode not in ["r", "r+", "w", "w-", "a"]:
|
|
122
|
+
if mode not in ["r", "r+", "w", "w-", "a", None]:
|
|
114
123
|
raise NgioValueError(f"Mode {mode} is not supported.")
|
|
115
124
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
"The cache and parallel_safe options are mutually exclusive."
|
|
119
|
-
"If you want to use the lock mechanism, you should not use the cache."
|
|
120
|
-
)
|
|
121
|
-
|
|
122
|
-
_group, _store = open_group_wrapper(store, mode)
|
|
123
|
-
|
|
124
|
-
# Make sure the cache is set in the attrs
|
|
125
|
-
# in the same way as the cache in the handler
|
|
126
|
-
_group.attrs.cache = cache
|
|
127
|
-
|
|
128
|
-
if parallel_safe:
|
|
129
|
-
if not isinstance(_store, str | Path):
|
|
130
|
-
raise NgioValueError(
|
|
131
|
-
"The store needs to be a path to use the lock mechanism."
|
|
132
|
-
)
|
|
133
|
-
self._lock_path = f"{_store}.lock"
|
|
134
|
-
self._lock = FileLock(self._lock_path)
|
|
135
|
-
|
|
136
|
-
else:
|
|
137
|
-
self._lock_path = None
|
|
138
|
-
self._lock = None
|
|
139
|
-
|
|
140
|
-
self._group = _group
|
|
141
|
-
self._mode = mode
|
|
142
|
-
self._store = _store
|
|
125
|
+
group = open_group_wrapper(store=store, mode=mode, zarr_format=zarr_format)
|
|
126
|
+
self._group = group
|
|
143
127
|
self.use_cache = cache
|
|
144
|
-
|
|
145
|
-
self.
|
|
146
|
-
self.
|
|
128
|
+
|
|
129
|
+
self._group_cache: NgioCache[zarr.Group] = NgioCache(use_cache=cache)
|
|
130
|
+
self._array_cache: NgioCache[zarr.Array] = NgioCache(use_cache=cache)
|
|
131
|
+
self._handlers_cache: NgioCache[ZarrGroupHandler] = NgioCache(use_cache=cache)
|
|
132
|
+
self._lock: tuple[Path, BaseFileLock] | None = None
|
|
147
133
|
|
|
148
134
|
def __repr__(self) -> str:
|
|
149
135
|
"""Return a string representation of the handler."""
|
|
150
136
|
return (
|
|
151
|
-
f"ZarrGroupHandler(
|
|
137
|
+
f"ZarrGroupHandler(full_url={self.full_url}, read_only={self.read_only}, "
|
|
152
138
|
f"cache={self.use_cache}"
|
|
153
139
|
)
|
|
154
140
|
|
|
155
141
|
@property
|
|
156
|
-
def store(self) ->
|
|
142
|
+
def store(self) -> Store:
|
|
157
143
|
"""Return the store of the group."""
|
|
158
|
-
return self.
|
|
144
|
+
return self._group.store
|
|
159
145
|
|
|
160
146
|
@property
|
|
161
|
-
def
|
|
147
|
+
def full_url(self) -> str | None:
|
|
162
148
|
"""Return the store path."""
|
|
163
|
-
|
|
149
|
+
if isinstance(self.store, LocalStore):
|
|
150
|
+
return (self.store.root / self.group.path).as_posix()
|
|
151
|
+
elif isinstance(self.store, FsspecStore):
|
|
152
|
+
return f"{self.store.path}/{self.group.path}"
|
|
153
|
+
elif isinstance(self.store, ZipStore):
|
|
154
|
+
return (self.store.path / self.group.path).as_posix()
|
|
155
|
+
elif isinstance(self.store, MemoryStore):
|
|
156
|
+
return None
|
|
157
|
+
warnings.warn(
|
|
158
|
+
f"Cannot determine full URL for store type {type(self.store)}. ",
|
|
159
|
+
UserWarning,
|
|
160
|
+
stacklevel=2,
|
|
161
|
+
)
|
|
162
|
+
return None
|
|
163
|
+
|
|
164
|
+
@property
|
|
165
|
+
def zarr_format(self) -> Literal[2, 3]:
|
|
166
|
+
"""Return the Zarr format version."""
|
|
167
|
+
return self._group.metadata.zarr_format
|
|
164
168
|
|
|
165
169
|
@property
|
|
166
|
-
def
|
|
167
|
-
"""Return the
|
|
168
|
-
return self.
|
|
170
|
+
def read_only(self) -> bool:
|
|
171
|
+
"""Return whether the group is read only."""
|
|
172
|
+
return self._group.read_only
|
|
173
|
+
|
|
174
|
+
def _create_lock(self) -> tuple[Path, BaseFileLock]:
|
|
175
|
+
"""Create the lock."""
|
|
176
|
+
if self._lock is not None:
|
|
177
|
+
return self._lock
|
|
178
|
+
|
|
179
|
+
if self.use_cache is True:
|
|
180
|
+
raise NgioValueError(
|
|
181
|
+
"Lock mechanism is not compatible with caching. "
|
|
182
|
+
"Please set cache=False to use the lock mechanism."
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
if not isinstance(self.store, LocalStore):
|
|
186
|
+
raise NgioValueError(
|
|
187
|
+
"The store needs to be a LocalStore to use the lock mechanism. "
|
|
188
|
+
f"Instead, got {self.store.__class__.__name__}."
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
store_path = Path(self.store.root) / self.group.path
|
|
192
|
+
_lock_path = store_path.with_suffix(".lock")
|
|
193
|
+
_lock = FileLock(_lock_path, timeout=10)
|
|
194
|
+
return _lock_path, _lock
|
|
169
195
|
|
|
170
196
|
@property
|
|
171
|
-
def lock(self) -> BaseFileLock
|
|
197
|
+
def lock(self) -> BaseFileLock:
|
|
172
198
|
"""Return the lock."""
|
|
173
|
-
|
|
199
|
+
if self._lock is None:
|
|
200
|
+
self._lock = self._create_lock()
|
|
201
|
+
return self._lock[1]
|
|
174
202
|
|
|
175
203
|
@property
|
|
176
|
-
def
|
|
177
|
-
"""Return the
|
|
178
|
-
|
|
204
|
+
def lock_path(self) -> Path:
|
|
205
|
+
"""Return the lock path."""
|
|
206
|
+
if self._lock is None:
|
|
207
|
+
self._lock = self._create_lock()
|
|
208
|
+
return self._lock[0]
|
|
179
209
|
|
|
180
210
|
def remove_lock(self) -> None:
|
|
181
211
|
"""Return the lock."""
|
|
182
|
-
if self._lock is None
|
|
212
|
+
if self._lock is None:
|
|
183
213
|
return None
|
|
184
214
|
|
|
185
|
-
lock_path =
|
|
186
|
-
if lock_path.exists() and
|
|
215
|
+
lock_path, lock = self._lock
|
|
216
|
+
if lock_path.exists() and lock.lock_counter == 0:
|
|
187
217
|
lock_path.unlink()
|
|
188
218
|
self._lock = None
|
|
189
|
-
self._lock_path = None
|
|
190
219
|
return None
|
|
191
220
|
|
|
192
221
|
raise NgioValueError("The lock is still in use. Cannot remove it.")
|
|
193
222
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
"""Return the group."""
|
|
197
|
-
return self._group
|
|
223
|
+
def reopen_group(self) -> zarr.Group:
|
|
224
|
+
"""Reopen the group.
|
|
198
225
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
226
|
+
This is useful when the group has been modified
|
|
227
|
+
outside of the handler.
|
|
228
|
+
"""
|
|
229
|
+
mode = "r" if self.read_only else "r+"
|
|
230
|
+
return zarr.open_group(
|
|
231
|
+
store=self._group.store,
|
|
232
|
+
path=self._group.path,
|
|
233
|
+
mode=mode,
|
|
234
|
+
zarr_format=self._group.metadata.zarr_format,
|
|
235
|
+
)
|
|
204
236
|
|
|
205
|
-
def
|
|
206
|
-
"""
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
237
|
+
def reopen_handler(self) -> "ZarrGroupHandler":
|
|
238
|
+
"""Reopen the handler.
|
|
239
|
+
|
|
240
|
+
This is useful when the group has been modified
|
|
241
|
+
outside of the handler.
|
|
242
|
+
"""
|
|
243
|
+
mode = "r" if self.read_only else "r+"
|
|
244
|
+
group = self.reopen_group()
|
|
245
|
+
return ZarrGroupHandler(
|
|
246
|
+
store=group,
|
|
247
|
+
zarr_format=group.metadata.zarr_format,
|
|
248
|
+
cache=self.use_cache,
|
|
249
|
+
mode=mode,
|
|
250
|
+
)
|
|
210
251
|
|
|
211
252
|
def clean_cache(self) -> None:
|
|
212
253
|
"""Clear the cached metadata."""
|
|
213
|
-
|
|
254
|
+
group = self.reopen_group()
|
|
255
|
+
self.__init__(
|
|
256
|
+
store=group,
|
|
257
|
+
zarr_format=group.metadata.zarr_format,
|
|
258
|
+
cache=self.use_cache,
|
|
259
|
+
mode="r" if self.read_only else "r+",
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
@property
|
|
263
|
+
def group(self) -> zarr.Group:
|
|
264
|
+
"""Return the group."""
|
|
265
|
+
if self.use_cache is False:
|
|
266
|
+
# If we are not using cache, we need to reopen the group
|
|
267
|
+
# to make sure that the attributes are up to date
|
|
268
|
+
return self.reopen_group()
|
|
269
|
+
return self._group
|
|
214
270
|
|
|
215
271
|
def load_attrs(self) -> dict:
|
|
216
272
|
"""Load the attributes of the group."""
|
|
217
|
-
|
|
218
|
-
if attrs is not None and isinstance(attrs, dict):
|
|
219
|
-
return attrs
|
|
220
|
-
|
|
221
|
-
attrs = dict(self.group.attrs)
|
|
222
|
-
|
|
223
|
-
self.add_to_cache("attrs", attrs)
|
|
224
|
-
return attrs
|
|
225
|
-
|
|
226
|
-
def _write_attrs(self, attrs: dict, overwrite: bool = False) -> None:
|
|
227
|
-
"""Write the metadata to the store."""
|
|
228
|
-
is_read_only = getattr(self._group, "_read_only", False)
|
|
229
|
-
if is_read_only:
|
|
230
|
-
raise NgioValueError("The group is read only. Cannot write metadata.")
|
|
231
|
-
|
|
232
|
-
# we need to invalidate the current attrs cache
|
|
233
|
-
self.add_to_cache("attrs", None)
|
|
234
|
-
if overwrite:
|
|
235
|
-
self.group.attrs.clear()
|
|
236
|
-
|
|
237
|
-
self.group.attrs.update(attrs)
|
|
273
|
+
return self.reopen_group().attrs.asdict()
|
|
238
274
|
|
|
239
275
|
def write_attrs(self, attrs: dict, overwrite: bool = False) -> None:
|
|
240
276
|
"""Write the metadata to the store."""
|
|
241
277
|
# Maybe we should use the lock here
|
|
242
|
-
self.
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
return group_or_array
|
|
249
|
-
|
|
250
|
-
group_or_array = self.group.get(path, None)
|
|
251
|
-
self.add_to_cache(path, group_or_array)
|
|
252
|
-
return group_or_array
|
|
278
|
+
if self.read_only:
|
|
279
|
+
raise NgioValueError("The group is read only. Cannot write metadata.")
|
|
280
|
+
group = self.reopen_group()
|
|
281
|
+
if overwrite:
|
|
282
|
+
group.attrs.clear()
|
|
283
|
+
group.attrs.update(attrs)
|
|
253
284
|
|
|
254
285
|
def create_group(self, path: str, overwrite: bool = False) -> zarr.Group:
|
|
255
286
|
"""Create a group in the group."""
|
|
256
|
-
if self.
|
|
287
|
+
if self.group.read_only:
|
|
257
288
|
raise NgioValueError("Cannot create a group in read only mode.")
|
|
258
289
|
|
|
259
290
|
try:
|
|
@@ -263,116 +294,241 @@ class ZarrGroupHandler:
|
|
|
263
294
|
f"A Zarr group already exists at {path}, "
|
|
264
295
|
"consider setting overwrite=True."
|
|
265
296
|
) from e
|
|
266
|
-
self.
|
|
297
|
+
self._group_cache.set(path, group, overwrite=overwrite)
|
|
267
298
|
return group
|
|
268
299
|
|
|
269
300
|
def get_group(
|
|
270
301
|
self,
|
|
271
302
|
path: str,
|
|
272
303
|
create_mode: bool = False,
|
|
304
|
+
overwrite: bool = False,
|
|
273
305
|
) -> zarr.Group:
|
|
274
306
|
"""Get a group from the group.
|
|
275
307
|
|
|
276
308
|
Args:
|
|
277
309
|
path (str): The path to the group.
|
|
278
310
|
create_mode (bool): If True, create the group if it does not exist.
|
|
311
|
+
overwrite (bool): If True, overwrite the group if it exists.
|
|
279
312
|
|
|
280
313
|
Returns:
|
|
281
314
|
zarr.Group: The Zarr group.
|
|
282
315
|
|
|
283
316
|
"""
|
|
284
|
-
|
|
317
|
+
if overwrite and not create_mode:
|
|
318
|
+
raise NgioValueError("Cannot overwrite a group without create_mode=True.")
|
|
319
|
+
|
|
320
|
+
if overwrite:
|
|
321
|
+
return self.create_group(path, overwrite=overwrite)
|
|
322
|
+
|
|
323
|
+
group = self._group_cache.get(path)
|
|
285
324
|
if isinstance(group, zarr.Group):
|
|
286
325
|
return group
|
|
287
326
|
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
327
|
+
group = self.group.get(path, default=None)
|
|
328
|
+
if isinstance(group, zarr.Group):
|
|
329
|
+
self._group_cache.set(path, group, overwrite=overwrite)
|
|
330
|
+
return group
|
|
331
|
+
|
|
332
|
+
if isinstance(group, zarr.Array):
|
|
333
|
+
raise NgioValueError(f"The object at {path} is not a group, but an array.")
|
|
292
334
|
|
|
293
335
|
if not create_mode:
|
|
294
336
|
raise NgioFileNotFoundError(f"No group found at {path}")
|
|
295
337
|
group = self.create_group(path)
|
|
338
|
+
self._group_cache.set(path, group, overwrite=overwrite)
|
|
296
339
|
return group
|
|
297
340
|
|
|
298
|
-
def
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
341
|
+
def get_array(self, path: str) -> zarr.Array:
|
|
342
|
+
"""Get an array from the group."""
|
|
343
|
+
array = self._array_cache.get(path)
|
|
344
|
+
if isinstance(array, zarr.Array):
|
|
345
|
+
return array
|
|
346
|
+
array = self.group.get(path, default=None)
|
|
347
|
+
if isinstance(array, zarr.Array):
|
|
348
|
+
self._array_cache.set(path, array)
|
|
349
|
+
return array
|
|
350
|
+
|
|
351
|
+
if isinstance(array, zarr.Group):
|
|
352
|
+
raise NgioValueError(f"The object at {path} is not an array, but a group.")
|
|
353
|
+
raise NgioFileNotFoundError(f"No array found at {path}")
|
|
354
|
+
|
|
355
|
+
def get_handler(
|
|
356
|
+
self,
|
|
357
|
+
path: str,
|
|
358
|
+
create_mode: bool = True,
|
|
359
|
+
overwrite: bool = False,
|
|
360
|
+
) -> "ZarrGroupHandler":
|
|
361
|
+
"""Get a new handler for a group in the current handler group.
|
|
302
362
|
|
|
303
363
|
Args:
|
|
304
364
|
path (str): The path to the group.
|
|
305
365
|
create_mode (bool): If True, create the group if it does not exist.
|
|
366
|
+
overwrite (bool): If True, overwrite the group if it exists.
|
|
367
|
+
"""
|
|
368
|
+
handler = self._handlers_cache.get(path)
|
|
369
|
+
if handler is not None:
|
|
370
|
+
return handler
|
|
371
|
+
group = self.get_group(path, create_mode=create_mode, overwrite=overwrite)
|
|
372
|
+
mode = "r" if group.read_only else "r+"
|
|
373
|
+
handler = ZarrGroupHandler(
|
|
374
|
+
store=group, zarr_format=self.zarr_format, cache=self.use_cache, mode=mode
|
|
375
|
+
)
|
|
376
|
+
self._handlers_cache.set(path, handler)
|
|
377
|
+
return handler
|
|
306
378
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
379
|
+
@property
|
|
380
|
+
def is_listable(self) -> bool:
|
|
381
|
+
return is_group_listable(self.group)
|
|
310
382
|
|
|
383
|
+
def delete_group(self, path: str) -> None:
|
|
384
|
+
"""Delete a group from the current group.
|
|
385
|
+
|
|
386
|
+
Args:
|
|
387
|
+
path (str): The path to the group to delete.
|
|
311
388
|
"""
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
389
|
+
if self.group.read_only:
|
|
390
|
+
raise NgioValueError("Cannot delete a group in read only mode.")
|
|
391
|
+
self.group.__delitem__(path)
|
|
392
|
+
self._group_cache._cache.pop(path, None)
|
|
393
|
+
self._handlers_cache._cache.pop(path, None)
|
|
316
394
|
|
|
317
|
-
def
|
|
318
|
-
"""
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
395
|
+
def delete_self(self) -> None:
|
|
396
|
+
"""Delete the current group."""
|
|
397
|
+
if self.group.read_only:
|
|
398
|
+
raise NgioValueError("Cannot delete a group in read only mode.")
|
|
399
|
+
self.group.__delitem__("/")
|
|
400
|
+
|
|
401
|
+
def copy_group(self, dest_group: zarr.Group):
|
|
402
|
+
"""Copy the group to a new store."""
|
|
403
|
+
copy_group(self.group, dest_group)
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def find_dimension_separator(array: zarr.Array) -> Literal[".", "/"]:
|
|
407
|
+
"""Find the dimension separator used in the Zarr store.
|
|
408
|
+
|
|
409
|
+
Args:
|
|
410
|
+
array (zarr.Array): The Zarr array to check.
|
|
411
|
+
|
|
412
|
+
Returns:
|
|
413
|
+
Literal[".", "/"]: The dimension separator used in the store.
|
|
414
|
+
"""
|
|
415
|
+
from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding
|
|
416
|
+
|
|
417
|
+
if array.metadata.zarr_format == 2:
|
|
418
|
+
separator = array.metadata.dimension_separator
|
|
419
|
+
else:
|
|
420
|
+
separator = array.metadata.chunk_key_encoding
|
|
421
|
+
if not isinstance(separator, DefaultChunkKeyEncoding):
|
|
323
422
|
raise NgioValueError(
|
|
324
|
-
|
|
423
|
+
"Only DefaultChunkKeyEncoding is supported in this example."
|
|
325
424
|
)
|
|
326
|
-
|
|
425
|
+
separator = separator.separator
|
|
426
|
+
return separator
|
|
327
427
|
|
|
328
|
-
def create_array(
|
|
329
|
-
self,
|
|
330
|
-
path: str,
|
|
331
|
-
shape: tuple[int, ...],
|
|
332
|
-
dtype: str,
|
|
333
|
-
chunks: tuple[int, ...] | None = None,
|
|
334
|
-
overwrite: bool = False,
|
|
335
|
-
) -> zarr.Array:
|
|
336
|
-
if self.mode == "r":
|
|
337
|
-
raise NgioValueError("Cannot create an array in read only mode.")
|
|
338
428
|
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
name=path,
|
|
342
|
-
shape=shape,
|
|
343
|
-
dtype=dtype,
|
|
344
|
-
chunks=chunks,
|
|
345
|
-
dimension_separator="/",
|
|
346
|
-
overwrite=overwrite,
|
|
347
|
-
)
|
|
348
|
-
except ContainsGroupError as e:
|
|
349
|
-
raise NgioFileExistsError(
|
|
350
|
-
f"A Zarr array already exists at {path}, "
|
|
351
|
-
"consider setting overwrite=True."
|
|
352
|
-
) from e
|
|
353
|
-
except Exception as e:
|
|
354
|
-
raise NgioValueError(f"Error creating array at {path}") from e
|
|
429
|
+
def is_group_listable(group: zarr.Group) -> bool:
|
|
430
|
+
"""Check if a Zarr group is listable.
|
|
355
431
|
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
432
|
+
A group is considered listable if it contains at least one array or subgroup.
|
|
433
|
+
|
|
434
|
+
Args:
|
|
435
|
+
group (zarr.Group): The Zarr group to check.
|
|
436
|
+
|
|
437
|
+
Returns:
|
|
438
|
+
bool: True if the group is listable, False otherwise.
|
|
439
|
+
"""
|
|
440
|
+
if not group.store.supports_listing:
|
|
441
|
+
# If the store does not support listing
|
|
442
|
+
# then for sure it is not listable
|
|
443
|
+
return False
|
|
444
|
+
try:
|
|
445
|
+
next(group.keys())
|
|
446
|
+
return True
|
|
447
|
+
except StopIteration:
|
|
448
|
+
# Group is listable but empty
|
|
449
|
+
return True
|
|
450
|
+
except Exception as _:
|
|
451
|
+
# Some stores may raise errors when listing
|
|
452
|
+
# consider those not listable
|
|
453
|
+
return False
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
def _make_sync_fs(fs: fsspec.AbstractFileSystem) -> fsspec.AbstractFileSystem:
|
|
457
|
+
fs_dict = json.loads(fs.to_json())
|
|
458
|
+
fs_dict["asynchronous"] = False
|
|
459
|
+
return fsspec.AbstractFileSystem.from_json(json.dumps(fs_dict))
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def _get_mapper(store: LocalStore | FsspecStore, path: str):
|
|
463
|
+
if isinstance(store, LocalStore):
|
|
464
|
+
fs = fsspec.filesystem("file")
|
|
465
|
+
full_path = (store.root / path).as_posix()
|
|
466
|
+
else:
|
|
467
|
+
fs = _make_sync_fs(store.fs)
|
|
468
|
+
full_path = f"{store.path}/{path}"
|
|
469
|
+
return fs.get_mapper(full_path)
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
def _fsspec_copy(
|
|
473
|
+
src_fs: LocalStore | FsspecStore,
|
|
474
|
+
src_path: str,
|
|
475
|
+
dest_fs: LocalStore | FsspecStore,
|
|
476
|
+
dest_path: str,
|
|
477
|
+
):
|
|
478
|
+
src_mapper = _get_mapper(src_fs, src_path)
|
|
479
|
+
dest_mapper = _get_mapper(dest_fs, dest_path)
|
|
480
|
+
for key in src_mapper.keys():
|
|
481
|
+
dest_mapper[key] = src_mapper[key]
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
def _zarr_python_copy(src_group: zarr.Group, dest_group: zarr.Group):
|
|
485
|
+
# Copy attributes
|
|
486
|
+
dest_group.attrs.put(src_group.attrs.asdict())
|
|
487
|
+
# Copy arrays
|
|
488
|
+
for name, array in src_group.arrays():
|
|
489
|
+
if array.metadata.zarr_format == 2:
|
|
490
|
+
spec = AnyArraySpecV2.from_zarr(array)
|
|
491
|
+
else:
|
|
492
|
+
spec = AnyArraySpecV3.from_zarr(array)
|
|
493
|
+
dst = spec.to_zarr(
|
|
494
|
+
store=dest_group.store,
|
|
495
|
+
path=f"{dest_group.path}/{name}",
|
|
496
|
+
overwrite=True,
|
|
497
|
+
)
|
|
498
|
+
if array.ndim > 0:
|
|
499
|
+
dask_array = da.from_zarr(array)
|
|
500
|
+
da.to_zarr(dask_array, dst, overwrite=False)
|
|
501
|
+
# Copy subgroups
|
|
502
|
+
for name, subgroup in src_group.groups():
|
|
503
|
+
dest_subgroup = dest_group.create_group(name, overwrite=True)
|
|
504
|
+
_zarr_python_copy(subgroup, dest_subgroup)
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
def copy_group(
|
|
508
|
+
src_group: zarr.Group, dest_group: zarr.Group, suppress_warnings: bool = False
|
|
509
|
+
):
|
|
510
|
+
if src_group.metadata.zarr_format != dest_group.metadata.zarr_format:
|
|
511
|
+
raise NgioValueError(
|
|
512
|
+
"Different Zarr format versions between source and destination, "
|
|
513
|
+
"cannot copy."
|
|
368
514
|
)
|
|
369
515
|
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
"
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
516
|
+
if not is_group_listable(src_group):
|
|
517
|
+
raise NgioValueError("Source group is not listable, cannot copy.")
|
|
518
|
+
|
|
519
|
+
if dest_group.read_only:
|
|
520
|
+
raise NgioValueError("Destination group is read only, cannot copy.")
|
|
521
|
+
if isinstance(src_group.store, LocalStore | FsspecStore) and isinstance(
|
|
522
|
+
dest_group.store, LocalStore | FsspecStore
|
|
523
|
+
):
|
|
524
|
+
_fsspec_copy(src_group.store, src_group.path, dest_group.store, dest_group.path)
|
|
525
|
+
return
|
|
526
|
+
if not suppress_warnings:
|
|
527
|
+
warnings.warn(
|
|
528
|
+
"Fsspec copy not possible, falling back to Zarr Python API for the copy. "
|
|
529
|
+
"This will preserve some tabular data non-zarr native (parquet, and csv), "
|
|
530
|
+
"and it will be slower for large datasets.",
|
|
531
|
+
UserWarning,
|
|
532
|
+
stacklevel=2,
|
|
533
|
+
)
|
|
534
|
+
_zarr_python_copy(src_group, dest_group)
|