ngio 0.4.8__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ngio/__init__.py +5 -2
- ngio/common/__init__.py +11 -6
- ngio/common/_masking_roi.py +34 -54
- ngio/common/_pyramid.py +322 -75
- ngio/common/_roi.py +258 -330
- ngio/experimental/iterators/_feature.py +3 -3
- ngio/experimental/iterators/_rois_utils.py +10 -11
- ngio/hcs/_plate.py +192 -136
- ngio/images/_abstract_image.py +539 -35
- ngio/images/_create_synt_container.py +45 -47
- ngio/images/_create_utils.py +406 -0
- ngio/images/_image.py +524 -248
- ngio/images/_label.py +257 -180
- ngio/images/_masked_image.py +2 -2
- ngio/images/_ome_zarr_container.py +658 -255
- ngio/io_pipes/_io_pipes.py +9 -9
- ngio/io_pipes/_io_pipes_masked.py +7 -7
- ngio/io_pipes/_io_pipes_roi.py +6 -6
- ngio/io_pipes/_io_pipes_types.py +3 -3
- ngio/io_pipes/_match_shape.py +6 -8
- ngio/io_pipes/_ops_slices_utils.py +8 -5
- ngio/ome_zarr_meta/__init__.py +29 -18
- ngio/ome_zarr_meta/_meta_handlers.py +402 -689
- ngio/ome_zarr_meta/ngio_specs/__init__.py +4 -0
- ngio/ome_zarr_meta/ngio_specs/_axes.py +152 -51
- ngio/ome_zarr_meta/ngio_specs/_dataset.py +13 -22
- ngio/ome_zarr_meta/ngio_specs/_ngio_hcs.py +129 -91
- ngio/ome_zarr_meta/ngio_specs/_ngio_image.py +69 -69
- ngio/ome_zarr_meta/v04/__init__.py +5 -1
- ngio/ome_zarr_meta/v04/{_v04_spec_utils.py → _v04_spec.py} +55 -86
- ngio/ome_zarr_meta/v05/__init__.py +27 -0
- ngio/ome_zarr_meta/v05/_custom_models.py +18 -0
- ngio/ome_zarr_meta/v05/_v05_spec.py +495 -0
- ngio/resources/__init__.py +1 -1
- ngio/resources/resource_model.py +1 -1
- ngio/tables/_tables_container.py +82 -24
- ngio/tables/backends/_abstract_backend.py +7 -0
- ngio/tables/backends/_anndata.py +60 -7
- ngio/tables/backends/_anndata_utils.py +2 -4
- ngio/tables/backends/_csv.py +3 -19
- ngio/tables/backends/_json.py +10 -13
- ngio/tables/backends/_parquet.py +3 -31
- ngio/tables/backends/_py_arrow_backends.py +222 -0
- ngio/tables/backends/_utils.py +1 -1
- ngio/tables/v1/_roi_table.py +41 -24
- ngio/utils/__init__.py +8 -12
- ngio/utils/_cache.py +48 -0
- ngio/utils/_zarr_utils.py +354 -236
- {ngio-0.4.8.dist-info → ngio-0.5.0.dist-info}/METADATA +12 -5
- ngio-0.5.0.dist-info/RECORD +88 -0
- ngio/images/_create.py +0 -276
- ngio/tables/backends/_non_zarr_backends.py +0 -196
- ngio/utils/_logger.py +0 -50
- ngio-0.4.8.dist-info/RECORD +0 -85
- {ngio-0.4.8.dist-info → ngio-0.5.0.dist-info}/WHEEL +0 -0
- {ngio-0.4.8.dist-info → ngio-0.5.0.dist-info}/licenses/LICENSE +0 -0
ngio/utils/_zarr_utils.py
CHANGED
|
@@ -1,61 +1,75 @@
|
|
|
1
1
|
"""Common utilities for working with Zarr groups in consistent ways."""
|
|
2
2
|
|
|
3
|
+
import json
|
|
4
|
+
import warnings
|
|
3
5
|
from pathlib import Path
|
|
4
|
-
from typing import Literal
|
|
6
|
+
from typing import Literal, TypeAlias
|
|
5
7
|
|
|
8
|
+
import dask.array as da
|
|
6
9
|
import fsspec
|
|
7
10
|
import zarr
|
|
8
11
|
from filelock import BaseFileLock, FileLock
|
|
9
|
-
from
|
|
10
|
-
from
|
|
11
|
-
from zarr.
|
|
12
|
-
|
|
13
|
-
from
|
|
14
|
-
|
|
12
|
+
from pydantic_zarr.v2 import ArraySpec as AnyArraySpecV2
|
|
13
|
+
from pydantic_zarr.v3 import ArraySpec as AnyArraySpecV3
|
|
14
|
+
from zarr.abc.store import Store
|
|
15
|
+
from zarr.errors import ContainsGroupError
|
|
16
|
+
from zarr.storage import FsspecStore, LocalStore, MemoryStore, ZipStore
|
|
17
|
+
|
|
18
|
+
from ngio.utils._cache import NgioCache
|
|
19
|
+
from ngio.utils._errors import (
|
|
20
|
+
NgioFileExistsError,
|
|
21
|
+
NgioFileNotFoundError,
|
|
22
|
+
NgioValueError,
|
|
23
|
+
)
|
|
15
24
|
|
|
16
25
|
AccessModeLiteral = Literal["r", "r+", "w", "w-", "a"]
|
|
17
26
|
# StoreLike is more restrictive than it could be
|
|
18
27
|
# but to make sure we can handle the store correctly
|
|
19
28
|
# we need to be more restrictive
|
|
20
|
-
NgioSupportedStore = (
|
|
21
|
-
str | Path | fsspec.mapping.FSMap |
|
|
29
|
+
NgioSupportedStore: TypeAlias = (
|
|
30
|
+
str | Path | fsspec.mapping.FSMap | FsspecStore | MemoryStore | dict | LocalStore
|
|
22
31
|
)
|
|
23
|
-
GenericStore =
|
|
24
|
-
StoreOrGroup =
|
|
32
|
+
GenericStore: TypeAlias = NgioSupportedStore | Store
|
|
33
|
+
StoreOrGroup: TypeAlias = NgioSupportedStore | zarr.Group
|
|
25
34
|
|
|
26
35
|
|
|
27
36
|
def _check_store(store) -> NgioSupportedStore:
|
|
28
37
|
"""Check the store and return a valid store."""
|
|
29
|
-
if isinstance(store, NgioSupportedStore):
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
38
|
+
if not isinstance(store, NgioSupportedStore):
|
|
39
|
+
warnings.warn(
|
|
40
|
+
f"Store type {type(store)} is not explicitly supported. "
|
|
41
|
+
f"Supported types are: {NgioSupportedStore}. "
|
|
42
|
+
"Proceeding, but this may lead to unexpected behavior.",
|
|
43
|
+
UserWarning,
|
|
44
|
+
stacklevel=2,
|
|
45
|
+
)
|
|
46
|
+
return store
|
|
36
47
|
|
|
37
48
|
|
|
38
|
-
def _check_group(
|
|
49
|
+
def _check_group(
|
|
50
|
+
group: zarr.Group, mode: AccessModeLiteral | None = None
|
|
51
|
+
) -> zarr.Group:
|
|
39
52
|
"""Check the group and return a valid group."""
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
raise NgioValueError(
|
|
43
|
-
"The group is read only. Cannot open in write mode ['w', 'w-']"
|
|
44
|
-
)
|
|
53
|
+
if group.read_only and mode not in [None, "r"]:
|
|
54
|
+
raise NgioValueError(f"The group is read only. Cannot open in mode {mode}.")
|
|
45
55
|
|
|
46
|
-
if mode == "r" and not
|
|
56
|
+
if mode == "r" and not group.read_only:
|
|
47
57
|
# let's make sure we don't accidentally write to the group
|
|
48
58
|
group = zarr.open_group(store=group.store, path=group.path, mode="r")
|
|
49
|
-
|
|
50
59
|
return group
|
|
51
60
|
|
|
52
61
|
|
|
53
|
-
def open_group_wrapper(
|
|
62
|
+
def open_group_wrapper(
|
|
63
|
+
store: StoreOrGroup,
|
|
64
|
+
mode: AccessModeLiteral | None = None,
|
|
65
|
+
zarr_format: Literal[2, 3] | None = None,
|
|
66
|
+
) -> zarr.Group:
|
|
54
67
|
"""Wrapper around zarr.open_group with some additional checks.
|
|
55
68
|
|
|
56
69
|
Args:
|
|
57
70
|
store (StoreOrGroup): The store or group to open.
|
|
58
|
-
mode (
|
|
71
|
+
mode (AccessModeLiteral): The mode to open the group in.
|
|
72
|
+
zarr_format (int): The Zarr format version to use.
|
|
59
73
|
|
|
60
74
|
Returns:
|
|
61
75
|
zarr.Group: The opened Zarr group.
|
|
@@ -67,16 +81,22 @@ def open_group_wrapper(store: StoreOrGroup, mode: AccessModeLiteral) -> zarr.Gro
|
|
|
67
81
|
|
|
68
82
|
try:
|
|
69
83
|
_check_store(store)
|
|
70
|
-
|
|
84
|
+
mode = mode if mode is not None else "a"
|
|
85
|
+
group = zarr.open_group(store=store, mode=mode, zarr_format=zarr_format)
|
|
71
86
|
|
|
72
|
-
except
|
|
87
|
+
except FileExistsError as e:
|
|
73
88
|
raise NgioFileExistsError(
|
|
74
89
|
f"A Zarr group already exists at {store}, consider setting overwrite=True."
|
|
75
90
|
) from e
|
|
76
91
|
|
|
77
|
-
except
|
|
92
|
+
except FileNotFoundError as e:
|
|
78
93
|
raise NgioFileNotFoundError(f"No Zarr group found at {store}") from e
|
|
79
94
|
|
|
95
|
+
except ContainsGroupError as e:
|
|
96
|
+
raise NgioFileExistsError(
|
|
97
|
+
f"A Zarr group already exists at {store}, consider setting overwrite=True."
|
|
98
|
+
) from e
|
|
99
|
+
|
|
80
100
|
return group
|
|
81
101
|
|
|
82
102
|
|
|
@@ -86,178 +106,184 @@ class ZarrGroupHandler:
|
|
|
86
106
|
def __init__(
|
|
87
107
|
self,
|
|
88
108
|
store: StoreOrGroup,
|
|
109
|
+
zarr_format: Literal[2, 3] | None = None,
|
|
89
110
|
cache: bool = False,
|
|
90
|
-
mode: AccessModeLiteral =
|
|
91
|
-
parallel_safe: bool = False,
|
|
92
|
-
parent: "ZarrGroupHandler | None" = None,
|
|
111
|
+
mode: AccessModeLiteral | None = None,
|
|
93
112
|
):
|
|
94
113
|
"""Initialize the handler.
|
|
95
114
|
|
|
96
115
|
Args:
|
|
97
116
|
store (StoreOrGroup): The Zarr store or group containing the image data.
|
|
98
|
-
|
|
117
|
+
zarr_format (int | None): The Zarr format version to use.
|
|
99
118
|
cache (bool): Whether to cache the metadata.
|
|
100
|
-
mode (str): The mode of the store.
|
|
101
|
-
parallel_safe (bool): If True, the handler will create a lock file to make
|
|
102
|
-
that can be used to make the handler parallel safe.
|
|
103
|
-
Be aware that the lock needs to be used manually.
|
|
104
|
-
parent (ZarrGroupHandler | None): The parent handler.
|
|
119
|
+
mode (str | None): The mode of the store.
|
|
105
120
|
"""
|
|
106
|
-
if mode not in ["r", "r+", "w", "w-", "a"]:
|
|
121
|
+
if mode not in ["r", "r+", "w", "w-", "a", None]:
|
|
107
122
|
raise NgioValueError(f"Mode {mode} is not supported.")
|
|
108
123
|
|
|
109
|
-
|
|
110
|
-
raise NgioValueError(
|
|
111
|
-
"The cache and parallel_safe options are mutually exclusive."
|
|
112
|
-
"If you want to use the lock mechanism, you should not use the cache."
|
|
113
|
-
)
|
|
114
|
-
|
|
115
|
-
group = open_group_wrapper(store, mode)
|
|
116
|
-
_store = group.store
|
|
117
|
-
|
|
118
|
-
# Make sure the cache is set in the attrs
|
|
119
|
-
# in the same way as the cache in the handler
|
|
120
|
-
group.attrs.cache = cache
|
|
121
|
-
|
|
122
|
-
if parallel_safe:
|
|
123
|
-
if not isinstance(_store, DirectoryStore):
|
|
124
|
-
raise NgioValueError(
|
|
125
|
-
"The store needs to be a DirectoryStore to use the lock mechanism. "
|
|
126
|
-
f"Instead, got {_store.__class__.__name__}."
|
|
127
|
-
)
|
|
128
|
-
store_path = Path(_store.path) / group.path
|
|
129
|
-
self._lock_path = store_path.with_suffix(".lock")
|
|
130
|
-
self._lock = FileLock(self._lock_path, timeout=10)
|
|
131
|
-
|
|
132
|
-
else:
|
|
133
|
-
self._lock_path = None
|
|
134
|
-
self._lock = None
|
|
135
|
-
|
|
124
|
+
group = open_group_wrapper(store=store, mode=mode, zarr_format=zarr_format)
|
|
136
125
|
self._group = group
|
|
137
|
-
self._mode = mode
|
|
138
126
|
self.use_cache = cache
|
|
139
|
-
|
|
140
|
-
self.
|
|
141
|
-
self.
|
|
127
|
+
|
|
128
|
+
self._group_cache: NgioCache[zarr.Group] = NgioCache(use_cache=cache)
|
|
129
|
+
self._array_cache: NgioCache[zarr.Array] = NgioCache(use_cache=cache)
|
|
130
|
+
self._handlers_cache: NgioCache[ZarrGroupHandler] = NgioCache(use_cache=cache)
|
|
131
|
+
self._lock: tuple[Path, BaseFileLock] | None = None
|
|
142
132
|
|
|
143
133
|
def __repr__(self) -> str:
|
|
144
134
|
"""Return a string representation of the handler."""
|
|
145
135
|
return (
|
|
146
|
-
f"ZarrGroupHandler(full_url={self.full_url},
|
|
136
|
+
f"ZarrGroupHandler(full_url={self.full_url}, read_only={self.read_only}, "
|
|
147
137
|
f"cache={self.use_cache}"
|
|
148
138
|
)
|
|
149
139
|
|
|
150
140
|
@property
|
|
151
|
-
def store(self) ->
|
|
141
|
+
def store(self) -> Store:
|
|
152
142
|
"""Return the store of the group."""
|
|
153
|
-
return self.
|
|
143
|
+
return self._group.store
|
|
154
144
|
|
|
155
145
|
@property
|
|
156
146
|
def full_url(self) -> str | None:
|
|
157
147
|
"""Return the store path."""
|
|
158
|
-
if isinstance(self.store,
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
return f"{self.store.path}/{self.
|
|
148
|
+
if isinstance(self.store, LocalStore):
|
|
149
|
+
return (self.store.root / self.group.path).as_posix()
|
|
150
|
+
elif isinstance(self.store, FsspecStore):
|
|
151
|
+
return f"{self.store.path}/{self.group.path}"
|
|
152
|
+
elif isinstance(self.store, ZipStore):
|
|
153
|
+
return (self.store.path / self.group.path).as_posix()
|
|
154
|
+
elif isinstance(self.store, MemoryStore):
|
|
155
|
+
return None
|
|
156
|
+
warnings.warn(
|
|
157
|
+
f"Cannot determine full URL for store type {type(self.store)}. ",
|
|
158
|
+
UserWarning,
|
|
159
|
+
stacklevel=2,
|
|
160
|
+
)
|
|
162
161
|
return None
|
|
163
162
|
|
|
164
163
|
@property
|
|
165
|
-
def
|
|
166
|
-
"""Return the
|
|
167
|
-
return self.
|
|
164
|
+
def zarr_format(self) -> Literal[2, 3]:
|
|
165
|
+
"""Return the Zarr format version."""
|
|
166
|
+
return self._group.metadata.zarr_format
|
|
167
|
+
|
|
168
|
+
@property
|
|
169
|
+
def read_only(self) -> bool:
|
|
170
|
+
"""Return whether the group is read only."""
|
|
171
|
+
return self._group.read_only
|
|
172
|
+
|
|
173
|
+
def _create_lock(self) -> tuple[Path, BaseFileLock]:
|
|
174
|
+
"""Create the lock."""
|
|
175
|
+
if self._lock is not None:
|
|
176
|
+
return self._lock
|
|
177
|
+
|
|
178
|
+
if self.use_cache is True:
|
|
179
|
+
raise NgioValueError(
|
|
180
|
+
"Lock mechanism is not compatible with caching. "
|
|
181
|
+
"Please set cache=False to use the lock mechanism."
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
if not isinstance(self.store, LocalStore):
|
|
185
|
+
raise NgioValueError(
|
|
186
|
+
"The store needs to be a LocalStore to use the lock mechanism. "
|
|
187
|
+
f"Instead, got {self.store.__class__.__name__}."
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
store_path = Path(self.store.root) / self.group.path
|
|
191
|
+
_lock_path = store_path.with_suffix(".lock")
|
|
192
|
+
_lock = FileLock(_lock_path, timeout=10)
|
|
193
|
+
return _lock_path, _lock
|
|
168
194
|
|
|
169
195
|
@property
|
|
170
196
|
def lock(self) -> BaseFileLock:
|
|
171
197
|
"""Return the lock."""
|
|
172
198
|
if self._lock is None:
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
"Reopen the handler with parallel_safe=True."
|
|
176
|
-
)
|
|
177
|
-
return self._lock
|
|
199
|
+
self._lock = self._create_lock()
|
|
200
|
+
return self._lock[1]
|
|
178
201
|
|
|
179
202
|
@property
|
|
180
|
-
def
|
|
181
|
-
"""Return the
|
|
182
|
-
|
|
203
|
+
def lock_path(self) -> Path:
|
|
204
|
+
"""Return the lock path."""
|
|
205
|
+
if self._lock is None:
|
|
206
|
+
self._lock = self._create_lock()
|
|
207
|
+
return self._lock[0]
|
|
183
208
|
|
|
184
209
|
def remove_lock(self) -> None:
|
|
185
210
|
"""Return the lock."""
|
|
186
|
-
if self._lock is None
|
|
211
|
+
if self._lock is None:
|
|
187
212
|
return None
|
|
188
213
|
|
|
189
|
-
lock_path =
|
|
190
|
-
if lock_path.exists() and
|
|
214
|
+
lock_path, lock = self._lock
|
|
215
|
+
if lock_path.exists() and lock.lock_counter == 0:
|
|
191
216
|
lock_path.unlink()
|
|
192
217
|
self._lock = None
|
|
193
|
-
self._lock_path = None
|
|
194
218
|
return None
|
|
195
219
|
|
|
196
220
|
raise NgioValueError("The lock is still in use. Cannot remove it.")
|
|
197
221
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
"""Return the group."""
|
|
201
|
-
return self._group
|
|
222
|
+
def reopen_group(self) -> zarr.Group:
|
|
223
|
+
"""Reopen the group.
|
|
202
224
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
225
|
+
This is useful when the group has been modified
|
|
226
|
+
outside of the handler.
|
|
227
|
+
"""
|
|
228
|
+
mode = "r" if self.read_only else "r+"
|
|
229
|
+
return zarr.open_group(
|
|
230
|
+
store=self._group.store,
|
|
231
|
+
path=self._group.path,
|
|
232
|
+
mode=mode,
|
|
233
|
+
zarr_format=self._group.metadata.zarr_format,
|
|
234
|
+
)
|
|
208
235
|
|
|
209
|
-
def
|
|
210
|
-
"""
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
236
|
+
def reopen_handler(self) -> "ZarrGroupHandler":
|
|
237
|
+
"""Reopen the handler.
|
|
238
|
+
|
|
239
|
+
This is useful when the group has been modified
|
|
240
|
+
outside of the handler.
|
|
241
|
+
"""
|
|
242
|
+
mode = "r" if self.read_only else "r+"
|
|
243
|
+
group = self.reopen_group()
|
|
244
|
+
return ZarrGroupHandler(
|
|
245
|
+
store=group,
|
|
246
|
+
zarr_format=group.metadata.zarr_format,
|
|
247
|
+
cache=self.use_cache,
|
|
248
|
+
mode=mode,
|
|
249
|
+
)
|
|
214
250
|
|
|
215
251
|
def clean_cache(self) -> None:
|
|
216
252
|
"""Clear the cached metadata."""
|
|
217
|
-
|
|
253
|
+
group = self.reopen_group()
|
|
254
|
+
self.__init__(
|
|
255
|
+
store=group,
|
|
256
|
+
zarr_format=group.metadata.zarr_format,
|
|
257
|
+
cache=self.use_cache,
|
|
258
|
+
mode="r" if self.read_only else "r+",
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
@property
|
|
262
|
+
def group(self) -> zarr.Group:
|
|
263
|
+
"""Return the group."""
|
|
264
|
+
if self.use_cache is False:
|
|
265
|
+
# If we are not using cache, we need to reopen the group
|
|
266
|
+
# to make sure that the attributes are up to date
|
|
267
|
+
return self.reopen_group()
|
|
268
|
+
return self._group
|
|
218
269
|
|
|
219
270
|
def load_attrs(self) -> dict:
|
|
220
271
|
"""Load the attributes of the group."""
|
|
221
|
-
|
|
222
|
-
if attrs is not None and isinstance(attrs, dict):
|
|
223
|
-
return attrs
|
|
224
|
-
|
|
225
|
-
attrs = dict(self.group.attrs)
|
|
226
|
-
|
|
227
|
-
self.add_to_cache("attrs", attrs)
|
|
228
|
-
return attrs
|
|
229
|
-
|
|
230
|
-
def _write_attrs(self, attrs: dict, overwrite: bool = False) -> None:
|
|
231
|
-
"""Write the metadata to the store."""
|
|
232
|
-
is_read_only = getattr(self._group, "_read_only", False)
|
|
233
|
-
if is_read_only:
|
|
234
|
-
raise NgioValueError("The group is read only. Cannot write metadata.")
|
|
235
|
-
|
|
236
|
-
# we need to invalidate the current attrs cache
|
|
237
|
-
self.add_to_cache("attrs", None)
|
|
238
|
-
if overwrite:
|
|
239
|
-
self.group.attrs.clear()
|
|
240
|
-
|
|
241
|
-
self.group.attrs.update(attrs)
|
|
272
|
+
return self.reopen_group().attrs.asdict()
|
|
242
273
|
|
|
243
274
|
def write_attrs(self, attrs: dict, overwrite: bool = False) -> None:
|
|
244
275
|
"""Write the metadata to the store."""
|
|
245
276
|
# Maybe we should use the lock here
|
|
246
|
-
self.
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
return group_or_array
|
|
253
|
-
|
|
254
|
-
group_or_array = self.group.get(path, None)
|
|
255
|
-
self.add_to_cache(path, group_or_array)
|
|
256
|
-
return group_or_array
|
|
277
|
+
if self.read_only:
|
|
278
|
+
raise NgioValueError("The group is read only. Cannot write metadata.")
|
|
279
|
+
group = self.reopen_group()
|
|
280
|
+
if overwrite:
|
|
281
|
+
group.attrs.clear()
|
|
282
|
+
group.attrs.update(attrs)
|
|
257
283
|
|
|
258
284
|
def create_group(self, path: str, overwrite: bool = False) -> zarr.Group:
|
|
259
285
|
"""Create a group in the group."""
|
|
260
|
-
if self.
|
|
286
|
+
if self.group.read_only:
|
|
261
287
|
raise NgioValueError("Cannot create a group in read only mode.")
|
|
262
288
|
|
|
263
289
|
try:
|
|
@@ -267,7 +293,7 @@ class ZarrGroupHandler:
|
|
|
267
293
|
f"A Zarr group already exists at {path}, "
|
|
268
294
|
"consider setting overwrite=True."
|
|
269
295
|
) from e
|
|
270
|
-
self.
|
|
296
|
+
self._group_cache.set(path, group, overwrite=overwrite)
|
|
271
297
|
return group
|
|
272
298
|
|
|
273
299
|
def get_group(
|
|
@@ -293,123 +319,215 @@ class ZarrGroupHandler:
|
|
|
293
319
|
if overwrite:
|
|
294
320
|
return self.create_group(path, overwrite=overwrite)
|
|
295
321
|
|
|
296
|
-
group = self.
|
|
322
|
+
group = self._group_cache.get(path)
|
|
297
323
|
if isinstance(group, zarr.Group):
|
|
298
324
|
return group
|
|
299
325
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
326
|
+
group = self.group.get(path, default=None)
|
|
327
|
+
if isinstance(group, zarr.Group):
|
|
328
|
+
self._group_cache.set(path, group, overwrite=overwrite)
|
|
329
|
+
return group
|
|
330
|
+
|
|
331
|
+
if isinstance(group, zarr.Array):
|
|
332
|
+
raise NgioValueError(f"The object at {path} is not a group, but an array.")
|
|
304
333
|
|
|
305
334
|
if not create_mode:
|
|
306
335
|
raise NgioFileNotFoundError(f"No group found at {path}")
|
|
307
336
|
group = self.create_group(path)
|
|
337
|
+
self._group_cache.set(path, group, overwrite=overwrite)
|
|
308
338
|
return group
|
|
309
339
|
|
|
310
|
-
def
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
340
|
+
def get_array(self, path: str) -> zarr.Array:
|
|
341
|
+
"""Get an array from the group."""
|
|
342
|
+
array = self._array_cache.get(path)
|
|
343
|
+
if isinstance(array, zarr.Array):
|
|
344
|
+
return array
|
|
345
|
+
array = self.group.get(path, default=None)
|
|
346
|
+
if isinstance(array, zarr.Array):
|
|
347
|
+
self._array_cache.set(path, array)
|
|
348
|
+
return array
|
|
349
|
+
|
|
350
|
+
if isinstance(array, zarr.Group):
|
|
351
|
+
raise NgioValueError(f"The object at {path} is not an array, but a group.")
|
|
352
|
+
raise NgioFileNotFoundError(f"No array found at {path}")
|
|
353
|
+
|
|
354
|
+
def get_handler(
|
|
355
|
+
self,
|
|
356
|
+
path: str,
|
|
357
|
+
create_mode: bool = True,
|
|
358
|
+
overwrite: bool = False,
|
|
359
|
+
) -> "ZarrGroupHandler":
|
|
360
|
+
"""Get a new handler for a group in the current handler group.
|
|
314
361
|
|
|
315
362
|
Args:
|
|
316
363
|
path (str): The path to the group.
|
|
317
364
|
create_mode (bool): If True, create the group if it does not exist.
|
|
365
|
+
overwrite (bool): If True, overwrite the group if it exists.
|
|
366
|
+
"""
|
|
367
|
+
handler = self._handlers_cache.get(path)
|
|
368
|
+
if handler is not None:
|
|
369
|
+
return handler
|
|
370
|
+
group = self.get_group(path, create_mode=create_mode, overwrite=overwrite)
|
|
371
|
+
mode = "r" if group.read_only else "r+"
|
|
372
|
+
handler = ZarrGroupHandler(
|
|
373
|
+
store=group, zarr_format=self.zarr_format, cache=self.use_cache, mode=mode
|
|
374
|
+
)
|
|
375
|
+
self._handlers_cache.set(path, handler)
|
|
376
|
+
return handler
|
|
318
377
|
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
378
|
+
@property
|
|
379
|
+
def is_listable(self) -> bool:
|
|
380
|
+
return is_group_listable(self.group)
|
|
381
|
+
|
|
382
|
+
def delete_group(self, path: str) -> None:
|
|
383
|
+
"""Delete a group from the current group.
|
|
322
384
|
|
|
385
|
+
Args:
|
|
386
|
+
path (str): The path to the group to delete.
|
|
323
387
|
"""
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
388
|
+
if self.group.read_only:
|
|
389
|
+
raise NgioValueError("Cannot delete a group in read only mode.")
|
|
390
|
+
self.group.__delitem__(path)
|
|
391
|
+
self._group_cache._cache.pop(path, None)
|
|
392
|
+
self._handlers_cache._cache.pop(path, None)
|
|
393
|
+
|
|
394
|
+
def delete_self(self) -> None:
|
|
395
|
+
"""Delete the current group."""
|
|
396
|
+
if self.group.read_only:
|
|
397
|
+
raise NgioValueError("Cannot delete a group in read only mode.")
|
|
398
|
+
self.group.__delitem__("/")
|
|
399
|
+
|
|
400
|
+
def copy_group(self, dest_group: zarr.Group):
|
|
401
|
+
"""Copy the group to a new store."""
|
|
402
|
+
copy_group(self.group, dest_group)
|
|
328
403
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
404
|
+
|
|
405
|
+
def find_dimension_separator(array: zarr.Array) -> Literal[".", "/"]:
|
|
406
|
+
"""Find the dimension separator used in the Zarr store.
|
|
407
|
+
|
|
408
|
+
Args:
|
|
409
|
+
array (zarr.Array): The Zarr array to check.
|
|
410
|
+
|
|
411
|
+
Returns:
|
|
412
|
+
Literal[".", "/"]: The dimension separator used in the store.
|
|
413
|
+
"""
|
|
414
|
+
from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding
|
|
415
|
+
|
|
416
|
+
if array.metadata.zarr_format == 2:
|
|
417
|
+
separator = array.metadata.dimension_separator
|
|
418
|
+
else:
|
|
419
|
+
separator = array.metadata.chunk_key_encoding
|
|
420
|
+
if not isinstance(separator, DefaultChunkKeyEncoding):
|
|
335
421
|
raise NgioValueError(
|
|
336
|
-
|
|
422
|
+
"Only DefaultChunkKeyEncoding is supported in this example."
|
|
337
423
|
)
|
|
338
|
-
|
|
424
|
+
separator = separator.separator
|
|
425
|
+
return separator
|
|
339
426
|
|
|
340
|
-
def create_array(
|
|
341
|
-
self,
|
|
342
|
-
path: str,
|
|
343
|
-
shape: tuple[int, ...],
|
|
344
|
-
dtype: str,
|
|
345
|
-
chunks: tuple[int, ...] | None = None,
|
|
346
|
-
dimension_separator: DIMENSION_SEPARATOR = "/",
|
|
347
|
-
compressor: str = "default",
|
|
348
|
-
overwrite: bool = False,
|
|
349
|
-
) -> zarr.Array:
|
|
350
|
-
if self.mode == "r":
|
|
351
|
-
raise NgioValueError("Cannot create an array in read only mode.")
|
|
352
427
|
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
name=path,
|
|
356
|
-
shape=shape,
|
|
357
|
-
dtype=dtype,
|
|
358
|
-
chunks=chunks,
|
|
359
|
-
dimension_separator=dimension_separator,
|
|
360
|
-
compressor=compressor,
|
|
361
|
-
overwrite=overwrite,
|
|
362
|
-
)
|
|
363
|
-
except ContainsGroupError as e:
|
|
364
|
-
raise NgioFileExistsError(
|
|
365
|
-
f"A Zarr array already exists at {path}, "
|
|
366
|
-
"consider setting overwrite=True."
|
|
367
|
-
) from e
|
|
368
|
-
except Exception as e:
|
|
369
|
-
raise NgioValueError(f"Error creating array at {path}") from e
|
|
428
|
+
def is_group_listable(group: zarr.Group) -> bool:
|
|
429
|
+
"""Check if a Zarr group is listable.
|
|
370
430
|
|
|
371
|
-
|
|
372
|
-
self,
|
|
373
|
-
path: str,
|
|
374
|
-
overwrite: bool = False,
|
|
375
|
-
) -> "ZarrGroupHandler":
|
|
376
|
-
"""Derive a new handler from the current handler.
|
|
431
|
+
A group is considered listable if it contains at least one array or subgroup.
|
|
377
432
|
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
group
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
433
|
+
Args:
|
|
434
|
+
group (zarr.Group): The Zarr group to check.
|
|
435
|
+
|
|
436
|
+
Returns:
|
|
437
|
+
bool: True if the group is listable, False otherwise.
|
|
438
|
+
"""
|
|
439
|
+
if not group.store.supports_listing:
|
|
440
|
+
# If the store does not support listing
|
|
441
|
+
# then for sure it is not listable
|
|
442
|
+
return False
|
|
443
|
+
try:
|
|
444
|
+
next(group.keys())
|
|
445
|
+
return True
|
|
446
|
+
except StopIteration:
|
|
447
|
+
# Group is listable but empty
|
|
448
|
+
return True
|
|
449
|
+
except Exception as _:
|
|
450
|
+
# Some stores may raise errors when listing
|
|
451
|
+
# consider those not listable
|
|
452
|
+
return False
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
def _make_sync_fs(fs: fsspec.AbstractFileSystem) -> fsspec.AbstractFileSystem:
|
|
456
|
+
fs_dict = json.loads(fs.to_json())
|
|
457
|
+
fs_dict["asynchronous"] = False
|
|
458
|
+
return fsspec.AbstractFileSystem.from_json(json.dumps(fs_dict))
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
def _get_mapper(store: LocalStore | FsspecStore, path: str):
|
|
462
|
+
if isinstance(store, LocalStore):
|
|
463
|
+
fs = fsspec.filesystem("file")
|
|
464
|
+
full_path = (store.root / path).as_posix()
|
|
465
|
+
else:
|
|
466
|
+
fs = _make_sync_fs(store.fs)
|
|
467
|
+
full_path = f"{store.path}/{path}"
|
|
468
|
+
return fs.get_mapper(full_path)
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def _fsspec_copy(
|
|
472
|
+
src_fs: LocalStore | FsspecStore,
|
|
473
|
+
src_path: str,
|
|
474
|
+
dest_fs: LocalStore | FsspecStore,
|
|
475
|
+
dest_path: str,
|
|
476
|
+
):
|
|
477
|
+
src_mapper = _get_mapper(src_fs, src_path)
|
|
478
|
+
dest_mapper = _get_mapper(dest_fs, dest_path)
|
|
479
|
+
for key in src_mapper.keys():
|
|
480
|
+
dest_mapper[key] = src_mapper[key]
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def _zarr_python_copy(src_group: zarr.Group, dest_group: zarr.Group):
|
|
484
|
+
# Copy attributes
|
|
485
|
+
dest_group.attrs.put(src_group.attrs.asdict())
|
|
486
|
+
# Copy arrays
|
|
487
|
+
for name, array in src_group.arrays():
|
|
488
|
+
if array.metadata.zarr_format == 2:
|
|
489
|
+
spec = AnyArraySpecV2.from_zarr(array)
|
|
490
|
+
else:
|
|
491
|
+
spec = AnyArraySpecV3.from_zarr(array)
|
|
492
|
+
dst = spec.to_zarr(
|
|
493
|
+
store=dest_group.store,
|
|
494
|
+
path=f"{dest_group.path}/{name}",
|
|
495
|
+
overwrite=True,
|
|
496
|
+
)
|
|
497
|
+
if array.ndim > 0:
|
|
498
|
+
dask_array = da.from_zarr(array)
|
|
499
|
+
da.to_zarr(dask_array, dst, overwrite=False)
|
|
500
|
+
# Copy subgroups
|
|
501
|
+
for name, subgroup in src_group.groups():
|
|
502
|
+
dest_subgroup = dest_group.create_group(name, overwrite=True)
|
|
503
|
+
_zarr_python_copy(subgroup, dest_subgroup)
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
def copy_group(
|
|
507
|
+
src_group: zarr.Group, dest_group: zarr.Group, suppress_warnings: bool = False
|
|
508
|
+
):
|
|
509
|
+
if src_group.metadata.zarr_format != dest_group.metadata.zarr_format:
|
|
510
|
+
raise NgioValueError(
|
|
511
|
+
"Different Zarr format versions between source and destination, "
|
|
512
|
+
"cannot copy."
|
|
389
513
|
)
|
|
390
514
|
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
path: str,
|
|
394
|
-
overwrite: bool = False,
|
|
395
|
-
) -> tuple[bool, "ZarrGroupHandler | NgioError"]:
|
|
396
|
-
"""Derive a new handler from the current handler."""
|
|
397
|
-
try:
|
|
398
|
-
return True, self.derive_handler(path, overwrite=overwrite)
|
|
399
|
-
except NgioError as e:
|
|
400
|
-
return False, e
|
|
515
|
+
if not is_group_listable(src_group):
|
|
516
|
+
raise NgioValueError("Source group is not listable, cannot copy.")
|
|
401
517
|
|
|
402
|
-
|
|
403
|
-
"
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
518
|
+
if dest_group.read_only:
|
|
519
|
+
raise NgioValueError("Destination group is read only, cannot copy.")
|
|
520
|
+
if isinstance(src_group.store, LocalStore | FsspecStore) and isinstance(
|
|
521
|
+
dest_group.store, LocalStore | FsspecStore
|
|
522
|
+
):
|
|
523
|
+
_fsspec_copy(src_group.store, src_group.path, dest_group.store, dest_group.path)
|
|
524
|
+
return
|
|
525
|
+
if not suppress_warnings:
|
|
526
|
+
warnings.warn(
|
|
527
|
+
"Fsspec copy not possible, falling back to Zarr Python API for the copy. "
|
|
528
|
+
"This will preserve some tabular data non-zarr native (parquet, and csv), "
|
|
529
|
+
"and it will be slower for large datasets.",
|
|
530
|
+
UserWarning,
|
|
531
|
+
stacklevel=2,
|
|
410
532
|
)
|
|
411
|
-
|
|
412
|
-
raise NgioValueError(
|
|
413
|
-
f"Error copying group to {handler.full_url}, "
|
|
414
|
-
f"#{n_skipped} files where skipped."
|
|
415
|
-
)
|
|
533
|
+
_zarr_python_copy(src_group, dest_group)
|