ngio 0.5.0a1__py3-none-any.whl → 0.5.0a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ngio/__init__.py +2 -2
- ngio/common/__init__.py +11 -6
- ngio/common/_masking_roi.py +12 -41
- ngio/common/_pyramid.py +218 -78
- ngio/common/_roi.py +257 -329
- ngio/experimental/iterators/_feature.py +3 -3
- ngio/experimental/iterators/_rois_utils.py +10 -11
- ngio/hcs/_plate.py +114 -123
- ngio/images/_abstract_image.py +417 -35
- ngio/images/_create_synt_container.py +36 -43
- ngio/images/_create_utils.py +423 -0
- ngio/images/_image.py +155 -177
- ngio/images/_label.py +144 -119
- ngio/images/_ome_zarr_container.py +361 -196
- ngio/io_pipes/_io_pipes.py +9 -9
- ngio/io_pipes/_io_pipes_masked.py +7 -7
- ngio/io_pipes/_io_pipes_roi.py +6 -6
- ngio/io_pipes/_io_pipes_types.py +3 -3
- ngio/io_pipes/_match_shape.py +5 -4
- ngio/io_pipes/_ops_slices_utils.py +8 -5
- ngio/ome_zarr_meta/__init__.py +15 -18
- ngio/ome_zarr_meta/_meta_handlers.py +334 -713
- ngio/ome_zarr_meta/ngio_specs/_axes.py +1 -0
- ngio/ome_zarr_meta/ngio_specs/_dataset.py +13 -22
- ngio/ome_zarr_meta/ngio_specs/_ngio_hcs.py +54 -61
- ngio/ome_zarr_meta/ngio_specs/_ngio_image.py +14 -68
- ngio/ome_zarr_meta/v04/__init__.py +1 -1
- ngio/ome_zarr_meta/v04/{_v04_spec_utils.py → _v04_spec.py} +16 -61
- ngio/ome_zarr_meta/v05/__init__.py +1 -1
- ngio/ome_zarr_meta/v05/{_v05_spec_utils.py → _v05_spec.py} +18 -61
- ngio/tables/_tables_container.py +25 -20
- ngio/tables/backends/_anndata.py +57 -8
- ngio/tables/backends/_anndata_utils.py +1 -6
- ngio/tables/backends/_csv.py +3 -19
- ngio/tables/backends/_json.py +10 -13
- ngio/tables/backends/_parquet.py +3 -31
- ngio/tables/backends/_py_arrow_backends.py +222 -0
- ngio/tables/v1/_roi_table.py +44 -27
- ngio/utils/__init__.py +6 -12
- ngio/utils/_cache.py +48 -0
- ngio/utils/_zarr_utils.py +285 -245
- {ngio-0.5.0a1.dist-info → ngio-0.5.0a3.dist-info}/METADATA +8 -4
- {ngio-0.5.0a1.dist-info → ngio-0.5.0a3.dist-info}/RECORD +45 -45
- {ngio-0.5.0a1.dist-info → ngio-0.5.0a3.dist-info}/WHEEL +1 -1
- ngio/images/_create.py +0 -283
- ngio/tables/backends/_non_zarr_backends.py +0 -196
- ngio/utils/_logger.py +0 -50
- {ngio-0.5.0a1.dist-info → ngio-0.5.0a3.dist-info}/licenses/LICENSE +0 -0
ngio/utils/_zarr_utils.py
CHANGED
|
@@ -1,25 +1,39 @@
|
|
|
1
1
|
"""Common utilities for working with Zarr groups in consistent ways."""
|
|
2
2
|
|
|
3
|
+
import json
|
|
4
|
+
import warnings
|
|
3
5
|
from pathlib import Path
|
|
4
6
|
from typing import Literal
|
|
5
7
|
|
|
8
|
+
import dask.array as da
|
|
6
9
|
import fsspec
|
|
7
10
|
import zarr
|
|
8
11
|
from filelock import BaseFileLock, FileLock
|
|
12
|
+
from pydantic_zarr.v2 import ArraySpec as AnyArraySpecV2
|
|
13
|
+
from pydantic_zarr.v3 import ArraySpec as AnyArraySpecV3
|
|
9
14
|
from zarr.abc.store import Store
|
|
10
|
-
from zarr.core.array import CompressorLike
|
|
11
15
|
from zarr.errors import ContainsGroupError
|
|
12
|
-
from zarr.storage import FsspecStore, LocalStore, MemoryStore
|
|
16
|
+
from zarr.storage import FsspecStore, LocalStore, MemoryStore, ZipStore
|
|
13
17
|
|
|
14
|
-
from ngio.utils import
|
|
15
|
-
from ngio.utils._errors import
|
|
18
|
+
from ngio.utils._cache import NgioCache
|
|
19
|
+
from ngio.utils._errors import (
|
|
20
|
+
NgioFileExistsError,
|
|
21
|
+
NgioFileNotFoundError,
|
|
22
|
+
NgioValueError,
|
|
23
|
+
)
|
|
16
24
|
|
|
17
25
|
AccessModeLiteral = Literal["r", "r+", "w", "w-", "a"]
|
|
18
26
|
# StoreLike is more restrictive than it could be
|
|
19
27
|
# but to make sure we can handle the store correctly
|
|
20
28
|
# we need to be more restrictive
|
|
21
29
|
NgioSupportedStore = (
|
|
22
|
-
str
|
|
30
|
+
str
|
|
31
|
+
| Path
|
|
32
|
+
| fsspec.mapping.FSMap
|
|
33
|
+
| FsspecStore
|
|
34
|
+
| MemoryStore
|
|
35
|
+
| LocalStore
|
|
36
|
+
| ZipStore
|
|
23
37
|
)
|
|
24
38
|
GenericStore = Store | NgioSupportedStore
|
|
25
39
|
StoreOrGroup = GenericStore | zarr.Group
|
|
@@ -27,32 +41,33 @@ StoreOrGroup = GenericStore | zarr.Group
|
|
|
27
41
|
|
|
28
42
|
def _check_store(store) -> NgioSupportedStore:
|
|
29
43
|
"""Check the store and return a valid store."""
|
|
30
|
-
if isinstance(store, NgioSupportedStore):
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
44
|
+
if not isinstance(store, NgioSupportedStore):
|
|
45
|
+
warnings.warn(
|
|
46
|
+
f"Store type {type(store)} is not explicitly supported. "
|
|
47
|
+
f"Supported types are: {NgioSupportedStore}. "
|
|
48
|
+
"Proceeding, but this may lead to unexpected behavior.",
|
|
49
|
+
UserWarning,
|
|
50
|
+
stacklevel=2,
|
|
51
|
+
)
|
|
52
|
+
return store
|
|
37
53
|
|
|
38
54
|
|
|
39
|
-
def _check_group(
|
|
55
|
+
def _check_group(
|
|
56
|
+
group: zarr.Group, mode: AccessModeLiteral | None = None
|
|
57
|
+
) -> zarr.Group:
|
|
40
58
|
"""Check the group and return a valid group."""
|
|
41
|
-
if group.read_only and mode in [
|
|
42
|
-
raise NgioValueError(
|
|
43
|
-
"The group is read only. Cannot open in write mode ['w', 'w-']"
|
|
44
|
-
)
|
|
59
|
+
if group.read_only and mode not in [None, "r"]:
|
|
60
|
+
raise NgioValueError(f"The group is read only. Cannot open in mode {mode}.")
|
|
45
61
|
|
|
46
62
|
if mode == "r" and not group.read_only:
|
|
47
63
|
# let's make sure we don't accidentally write to the group
|
|
48
64
|
group = zarr.open_group(store=group.store, path=group.path, mode="r")
|
|
49
|
-
|
|
50
65
|
return group
|
|
51
66
|
|
|
52
67
|
|
|
53
68
|
def open_group_wrapper(
|
|
54
69
|
store: StoreOrGroup,
|
|
55
|
-
mode: AccessModeLiteral,
|
|
70
|
+
mode: AccessModeLiteral | None = None,
|
|
56
71
|
zarr_format: Literal[2, 3] | None = None,
|
|
57
72
|
) -> zarr.Group:
|
|
58
73
|
"""Wrapper around zarr.open_group with some additional checks.
|
|
@@ -72,6 +87,7 @@ def open_group_wrapper(
|
|
|
72
87
|
|
|
73
88
|
try:
|
|
74
89
|
_check_store(store)
|
|
90
|
+
mode = mode if mode is not None else "a"
|
|
75
91
|
group = zarr.open_group(store=store, mode=mode, zarr_format=zarr_format)
|
|
76
92
|
|
|
77
93
|
except FileExistsError as e:
|
|
@@ -98,68 +114,33 @@ class ZarrGroupHandler:
|
|
|
98
114
|
store: StoreOrGroup,
|
|
99
115
|
zarr_format: Literal[2, 3] | None = None,
|
|
100
116
|
cache: bool = False,
|
|
101
|
-
mode: AccessModeLiteral =
|
|
102
|
-
parallel_safe: bool = False,
|
|
103
|
-
parent: "ZarrGroupHandler | None" = None,
|
|
117
|
+
mode: AccessModeLiteral | None = None,
|
|
104
118
|
):
|
|
105
119
|
"""Initialize the handler.
|
|
106
120
|
|
|
107
121
|
Args:
|
|
108
122
|
store (StoreOrGroup): The Zarr store or group containing the image data.
|
|
109
123
|
meta_mode (str): The mode of the metadata handler.
|
|
110
|
-
zarr_format (int): The Zarr format version to use.
|
|
124
|
+
zarr_format (int | None): The Zarr format version to use.
|
|
111
125
|
cache (bool): Whether to cache the metadata.
|
|
112
|
-
mode (str): The mode of the store.
|
|
113
|
-
parallel_safe (bool): If True, the handler will create a lock file to make
|
|
114
|
-
that can be used to make the handler parallel safe.
|
|
115
|
-
Be aware that the lock needs to be used manually.
|
|
116
|
-
parent (ZarrGroupHandler | None): The parent handler.
|
|
126
|
+
mode (str | None): The mode of the store.
|
|
117
127
|
"""
|
|
118
|
-
if mode not in ["r", "r+", "w", "w-", "a"]:
|
|
128
|
+
if mode not in ["r", "r+", "w", "w-", "a", None]:
|
|
119
129
|
raise NgioValueError(f"Mode {mode} is not supported.")
|
|
120
130
|
|
|
121
|
-
if parallel_safe and cache:
|
|
122
|
-
raise NgioValueError(
|
|
123
|
-
"The cache and parallel_safe options are mutually exclusive."
|
|
124
|
-
"If you want to use the lock mechanism, you should not use the cache."
|
|
125
|
-
)
|
|
126
|
-
|
|
127
131
|
group = open_group_wrapper(store=store, mode=mode, zarr_format=zarr_format)
|
|
128
|
-
_store = group.store
|
|
129
|
-
|
|
130
|
-
# Make sure the cache is set in the attrs
|
|
131
|
-
# in the same way as the cache in the handler
|
|
132
|
-
|
|
133
|
-
## TODO
|
|
134
|
-
# Figure out how to handle the cache in the new zarr version
|
|
135
|
-
# group.attrs.cache = cache
|
|
136
|
-
|
|
137
|
-
if parallel_safe:
|
|
138
|
-
if not isinstance(_store, LocalStore):
|
|
139
|
-
raise NgioValueError(
|
|
140
|
-
"The store needs to be a LocalStore to use the lock mechanism. "
|
|
141
|
-
f"Instead, got {_store.__class__.__name__}."
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
store_path = _store.root / group.path
|
|
145
|
-
self._lock_path = store_path.with_suffix(".lock")
|
|
146
|
-
self._lock = FileLock(self._lock_path, timeout=10)
|
|
147
|
-
|
|
148
|
-
else:
|
|
149
|
-
self._lock_path = None
|
|
150
|
-
self._lock = None
|
|
151
|
-
|
|
152
132
|
self._group = group
|
|
153
|
-
self._mode = mode
|
|
154
133
|
self.use_cache = cache
|
|
155
|
-
|
|
156
|
-
self.
|
|
157
|
-
self.
|
|
134
|
+
|
|
135
|
+
self._group_cache: NgioCache[zarr.Group] = NgioCache(use_cache=cache)
|
|
136
|
+
self._array_cache: NgioCache[zarr.Array] = NgioCache(use_cache=cache)
|
|
137
|
+
self._handlers_cache: NgioCache[ZarrGroupHandler] = NgioCache(use_cache=cache)
|
|
138
|
+
self._lock: tuple[Path, BaseFileLock] | None = None
|
|
158
139
|
|
|
159
140
|
def __repr__(self) -> str:
|
|
160
141
|
"""Return a string representation of the handler."""
|
|
161
142
|
return (
|
|
162
|
-
f"ZarrGroupHandler(full_url={self.full_url},
|
|
143
|
+
f"ZarrGroupHandler(full_url={self.full_url}, read_only={self.read_only}, "
|
|
163
144
|
f"cache={self.use_cache}"
|
|
164
145
|
)
|
|
165
146
|
|
|
@@ -173,8 +154,17 @@ class ZarrGroupHandler:
|
|
|
173
154
|
"""Return the store path."""
|
|
174
155
|
if isinstance(self.store, LocalStore):
|
|
175
156
|
return (self.store.root / self.group.path).as_posix()
|
|
176
|
-
|
|
177
|
-
return self.store.
|
|
157
|
+
elif isinstance(self.store, FsspecStore):
|
|
158
|
+
return f"{self.store.path}/{self.group.path}"
|
|
159
|
+
elif isinstance(self.store, ZipStore):
|
|
160
|
+
return (self.store.path / self.group.path).as_posix()
|
|
161
|
+
elif isinstance(self.store, MemoryStore):
|
|
162
|
+
return None
|
|
163
|
+
warnings.warn(
|
|
164
|
+
f"Cannot determine full URL for store type {type(self.store)}. ",
|
|
165
|
+
UserWarning,
|
|
166
|
+
stacklevel=2,
|
|
167
|
+
)
|
|
178
168
|
return None
|
|
179
169
|
|
|
180
170
|
@property
|
|
@@ -183,35 +173,55 @@ class ZarrGroupHandler:
|
|
|
183
173
|
return self._group.metadata.zarr_format
|
|
184
174
|
|
|
185
175
|
@property
|
|
186
|
-
def
|
|
187
|
-
"""Return the
|
|
188
|
-
return self.
|
|
176
|
+
def read_only(self) -> bool:
|
|
177
|
+
"""Return whether the group is read only."""
|
|
178
|
+
return self._group.read_only
|
|
179
|
+
|
|
180
|
+
def _create_lock(self) -> tuple[Path, BaseFileLock]:
|
|
181
|
+
"""Create the lock."""
|
|
182
|
+
if self._lock is not None:
|
|
183
|
+
return self._lock
|
|
184
|
+
|
|
185
|
+
if self.use_cache is True:
|
|
186
|
+
raise NgioValueError(
|
|
187
|
+
"Lock mechanism is not compatible with caching. "
|
|
188
|
+
"Please set cache=False to use the lock mechanism."
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
if not isinstance(self.store, LocalStore):
|
|
192
|
+
raise NgioValueError(
|
|
193
|
+
"The store needs to be a LocalStore to use the lock mechanism. "
|
|
194
|
+
f"Instead, got {self.store.__class__.__name__}."
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
store_path = Path(self.store.root) / self.group.path
|
|
198
|
+
_lock_path = store_path.with_suffix(".lock")
|
|
199
|
+
_lock = FileLock(_lock_path, timeout=10)
|
|
200
|
+
return _lock_path, _lock
|
|
189
201
|
|
|
190
202
|
@property
|
|
191
203
|
def lock(self) -> BaseFileLock:
|
|
192
204
|
"""Return the lock."""
|
|
193
205
|
if self._lock is None:
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
"Reopen the handler with parallel_safe=True."
|
|
197
|
-
)
|
|
198
|
-
return self._lock
|
|
206
|
+
self._lock = self._create_lock()
|
|
207
|
+
return self._lock[1]
|
|
199
208
|
|
|
200
209
|
@property
|
|
201
|
-
def
|
|
202
|
-
"""Return the
|
|
203
|
-
|
|
210
|
+
def lock_path(self) -> Path:
|
|
211
|
+
"""Return the lock path."""
|
|
212
|
+
if self._lock is None:
|
|
213
|
+
self._lock = self._create_lock()
|
|
214
|
+
return self._lock[0]
|
|
204
215
|
|
|
205
216
|
def remove_lock(self) -> None:
|
|
206
217
|
"""Return the lock."""
|
|
207
|
-
if self._lock is None
|
|
218
|
+
if self._lock is None:
|
|
208
219
|
return None
|
|
209
220
|
|
|
210
|
-
lock_path =
|
|
211
|
-
if lock_path.exists() and
|
|
221
|
+
lock_path, lock = self._lock
|
|
222
|
+
if lock_path.exists() and lock.lock_counter == 0:
|
|
212
223
|
lock_path.unlink()
|
|
213
224
|
self._lock = None
|
|
214
|
-
self._lock_path = None
|
|
215
225
|
return None
|
|
216
226
|
|
|
217
227
|
raise NgioValueError("The lock is still in use. Cannot remove it.")
|
|
@@ -222,10 +232,7 @@ class ZarrGroupHandler:
|
|
|
222
232
|
This is useful when the group has been modified
|
|
223
233
|
outside of the handler.
|
|
224
234
|
"""
|
|
225
|
-
if self.
|
|
226
|
-
mode = "r"
|
|
227
|
-
else:
|
|
228
|
-
mode = "r+"
|
|
235
|
+
mode = "r" if self.read_only else "r+"
|
|
229
236
|
return zarr.open_group(
|
|
230
237
|
store=self._group.store,
|
|
231
238
|
path=self._group.path,
|
|
@@ -233,72 +240,57 @@ class ZarrGroupHandler:
|
|
|
233
240
|
zarr_format=self._group.metadata.zarr_format,
|
|
234
241
|
)
|
|
235
242
|
|
|
243
|
+
def reopen_handler(self) -> "ZarrGroupHandler":
|
|
244
|
+
"""Reopen the handler.
|
|
245
|
+
|
|
246
|
+
This is useful when the group has been modified
|
|
247
|
+
outside of the handler.
|
|
248
|
+
"""
|
|
249
|
+
mode = "r" if self.read_only else "r+"
|
|
250
|
+
group = self.reopen_group()
|
|
251
|
+
return ZarrGroupHandler(
|
|
252
|
+
store=group,
|
|
253
|
+
zarr_format=group.metadata.zarr_format,
|
|
254
|
+
cache=self.use_cache,
|
|
255
|
+
mode=mode,
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
def clean_cache(self) -> None:
|
|
259
|
+
"""Clear the cached metadata."""
|
|
260
|
+
group = self.reopen_group()
|
|
261
|
+
self.__init__(
|
|
262
|
+
store=group,
|
|
263
|
+
zarr_format=group.metadata.zarr_format,
|
|
264
|
+
cache=self.use_cache,
|
|
265
|
+
mode="r" if self.read_only else "r+",
|
|
266
|
+
)
|
|
267
|
+
|
|
236
268
|
@property
|
|
237
269
|
def group(self) -> zarr.Group:
|
|
238
270
|
"""Return the group."""
|
|
239
|
-
if self.
|
|
240
|
-
# If we are
|
|
271
|
+
if self.use_cache is False:
|
|
272
|
+
# If we are not using cache, we need to reopen the group
|
|
241
273
|
# to make sure that the attributes are up to date
|
|
242
274
|
return self.reopen_group()
|
|
243
275
|
return self._group
|
|
244
276
|
|
|
245
|
-
def add_to_cache(self, key: str, value: object) -> None:
|
|
246
|
-
"""Add an object to the cache."""
|
|
247
|
-
if not self.use_cache:
|
|
248
|
-
return None
|
|
249
|
-
self._cache[key] = value
|
|
250
|
-
|
|
251
|
-
def get_from_cache(self, key: str) -> object | None:
|
|
252
|
-
"""Get an object from the cache."""
|
|
253
|
-
if not self.use_cache:
|
|
254
|
-
return None
|
|
255
|
-
return self._cache.get(key, None)
|
|
256
|
-
|
|
257
|
-
def clean_cache(self) -> None:
|
|
258
|
-
"""Clear the cached metadata."""
|
|
259
|
-
self._cache = {}
|
|
260
|
-
|
|
261
277
|
def load_attrs(self) -> dict:
|
|
262
278
|
"""Load the attributes of the group."""
|
|
263
|
-
|
|
264
|
-
if attrs is not None and isinstance(attrs, dict):
|
|
265
|
-
return attrs
|
|
266
|
-
|
|
267
|
-
attrs = dict(self.group.attrs)
|
|
268
|
-
|
|
269
|
-
self.add_to_cache("attrs", attrs)
|
|
270
|
-
return attrs
|
|
271
|
-
|
|
272
|
-
def _write_attrs(self, attrs: dict, overwrite: bool = False) -> None:
|
|
273
|
-
"""Write the metadata to the store."""
|
|
274
|
-
if self.group.read_only:
|
|
275
|
-
raise NgioValueError("The group is read only. Cannot write metadata.")
|
|
276
|
-
|
|
277
|
-
# we need to invalidate the current attrs cache
|
|
278
|
-
self.add_to_cache("attrs", None)
|
|
279
|
-
if overwrite:
|
|
280
|
-
self.group.attrs.clear()
|
|
281
|
-
|
|
282
|
-
self.group.attrs.update(attrs)
|
|
279
|
+
return self.reopen_group().attrs.asdict()
|
|
283
280
|
|
|
284
281
|
def write_attrs(self, attrs: dict, overwrite: bool = False) -> None:
|
|
285
282
|
"""Write the metadata to the store."""
|
|
286
283
|
# Maybe we should use the lock here
|
|
287
|
-
self.
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
return group_or_array
|
|
294
|
-
|
|
295
|
-
group_or_array = self.group.get(path, None)
|
|
296
|
-
self.add_to_cache(path, group_or_array)
|
|
297
|
-
return group_or_array
|
|
284
|
+
if self.read_only:
|
|
285
|
+
raise NgioValueError("The group is read only. Cannot write metadata.")
|
|
286
|
+
group = self.reopen_group()
|
|
287
|
+
if overwrite:
|
|
288
|
+
group.attrs.clear()
|
|
289
|
+
group.attrs.update(attrs)
|
|
298
290
|
|
|
299
291
|
def create_group(self, path: str, overwrite: bool = False) -> zarr.Group:
|
|
300
292
|
"""Create a group in the group."""
|
|
301
|
-
if self.
|
|
293
|
+
if self.group.read_only:
|
|
302
294
|
raise NgioValueError("Cannot create a group in read only mode.")
|
|
303
295
|
|
|
304
296
|
try:
|
|
@@ -308,7 +300,7 @@ class ZarrGroupHandler:
|
|
|
308
300
|
f"A Zarr group already exists at {path}, "
|
|
309
301
|
"consider setting overwrite=True."
|
|
310
302
|
) from e
|
|
311
|
-
self.
|
|
303
|
+
self._group_cache.set(path, group, overwrite=overwrite)
|
|
312
304
|
return group
|
|
313
305
|
|
|
314
306
|
def get_group(
|
|
@@ -334,138 +326,81 @@ class ZarrGroupHandler:
|
|
|
334
326
|
if overwrite:
|
|
335
327
|
return self.create_group(path, overwrite=overwrite)
|
|
336
328
|
|
|
337
|
-
group = self.
|
|
329
|
+
group = self._group_cache.get(path)
|
|
338
330
|
if isinstance(group, zarr.Group):
|
|
339
331
|
return group
|
|
340
332
|
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
333
|
+
group = self.group.get(path, default=None)
|
|
334
|
+
if isinstance(group, zarr.Group):
|
|
335
|
+
self._group_cache.set(path, group, overwrite=overwrite)
|
|
336
|
+
return group
|
|
337
|
+
|
|
338
|
+
if isinstance(group, zarr.Array):
|
|
339
|
+
raise NgioValueError(f"The object at {path} is not a group, but an array.")
|
|
345
340
|
|
|
346
341
|
if not create_mode:
|
|
347
342
|
raise NgioFileNotFoundError(f"No group found at {path}")
|
|
348
343
|
group = self.create_group(path)
|
|
344
|
+
self._group_cache.set(path, group, overwrite=overwrite)
|
|
349
345
|
return group
|
|
350
346
|
|
|
351
|
-
def safe_get_group(
|
|
352
|
-
self, path: str, create_mode: bool = False
|
|
353
|
-
) -> tuple[bool, zarr.Group | NgioError]:
|
|
354
|
-
"""Get a group from the group.
|
|
355
|
-
|
|
356
|
-
Args:
|
|
357
|
-
path (str): The path to the group.
|
|
358
|
-
create_mode (bool): If True, create the group if it does not exist.
|
|
359
|
-
|
|
360
|
-
Returns:
|
|
361
|
-
zarr.Group | None: The Zarr group or None if it does not exist
|
|
362
|
-
or an error occurs.
|
|
363
|
-
|
|
364
|
-
"""
|
|
365
|
-
try:
|
|
366
|
-
return True, self.get_group(path, create_mode)
|
|
367
|
-
except NgioError as e:
|
|
368
|
-
return False, e
|
|
369
|
-
|
|
370
347
|
def get_array(self, path: str) -> zarr.Array:
|
|
371
348
|
"""Get an array from the group."""
|
|
372
|
-
array = self.
|
|
373
|
-
if array
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
dtype: str,
|
|
386
|
-
chunks: tuple[int, ...] | Literal["auto"] = "auto",
|
|
387
|
-
compressors: CompressorLike = "auto",
|
|
388
|
-
separator: Literal[".", "/"] = "/",
|
|
389
|
-
overwrite: bool = False,
|
|
390
|
-
) -> zarr.Array:
|
|
391
|
-
if self.mode == "r":
|
|
392
|
-
raise NgioValueError("Cannot create an array in read only mode.")
|
|
393
|
-
|
|
394
|
-
if self.zarr_format == 2:
|
|
395
|
-
chunks_encoding = {
|
|
396
|
-
"name": "v2",
|
|
397
|
-
"separator": separator,
|
|
398
|
-
}
|
|
399
|
-
else:
|
|
400
|
-
chunks_encoding = {
|
|
401
|
-
"name": "default",
|
|
402
|
-
"separator": separator,
|
|
403
|
-
}
|
|
404
|
-
|
|
405
|
-
try:
|
|
406
|
-
return self.group.create_array(
|
|
407
|
-
name=path,
|
|
408
|
-
shape=shape,
|
|
409
|
-
dtype=dtype,
|
|
410
|
-
chunks=chunks,
|
|
411
|
-
chunk_key_encoding=chunks_encoding,
|
|
412
|
-
overwrite=overwrite,
|
|
413
|
-
compressors=compressors,
|
|
414
|
-
)
|
|
415
|
-
except ContainsGroupError as e:
|
|
416
|
-
raise NgioFileExistsError(
|
|
417
|
-
f"A Zarr array already exists at {path}, "
|
|
418
|
-
"consider setting overwrite=True."
|
|
419
|
-
) from e
|
|
420
|
-
except Exception as e:
|
|
421
|
-
raise NgioValueError(f"Error creating array at {path}") from e
|
|
422
|
-
|
|
423
|
-
def derive_handler(
|
|
349
|
+
array = self._array_cache.get(path)
|
|
350
|
+
if isinstance(array, zarr.Array):
|
|
351
|
+
return array
|
|
352
|
+
array = self.group.get(path, default=None)
|
|
353
|
+
if isinstance(array, zarr.Array):
|
|
354
|
+
self._array_cache.set(path, array)
|
|
355
|
+
return array
|
|
356
|
+
|
|
357
|
+
if isinstance(array, zarr.Group):
|
|
358
|
+
raise NgioValueError(f"The object at {path} is not an array, but a group.")
|
|
359
|
+
raise NgioFileNotFoundError(f"No array found at {path}")
|
|
360
|
+
|
|
361
|
+
def get_handler(
|
|
424
362
|
self,
|
|
425
363
|
path: str,
|
|
364
|
+
create_mode: bool = True,
|
|
426
365
|
overwrite: bool = False,
|
|
427
366
|
) -> "ZarrGroupHandler":
|
|
428
|
-
"""
|
|
367
|
+
"""Get a new handler for a group in the current handler group.
|
|
429
368
|
|
|
430
369
|
Args:
|
|
431
370
|
path (str): The path to the group.
|
|
371
|
+
create_mode (bool): If True, create the group if it does not exist.
|
|
432
372
|
overwrite (bool): If True, overwrite the group if it exists.
|
|
433
373
|
"""
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
parent=self,
|
|
374
|
+
handler = self._handlers_cache.get(path)
|
|
375
|
+
if handler is not None:
|
|
376
|
+
return handler
|
|
377
|
+
group = self.get_group(path, create_mode=create_mode, overwrite=overwrite)
|
|
378
|
+
mode = "r" if group.read_only else "r+"
|
|
379
|
+
handler = ZarrGroupHandler(
|
|
380
|
+
store=group, zarr_format=self.zarr_format, cache=self.use_cache, mode=mode
|
|
442
381
|
)
|
|
382
|
+
self._handlers_cache.set(path, handler)
|
|
383
|
+
return handler
|
|
443
384
|
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
"""
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
385
|
+
@property
|
|
386
|
+
def is_listable(self) -> bool:
|
|
387
|
+
return is_group_listable(self.group)
|
|
388
|
+
|
|
389
|
+
def delete_group(self, path: str) -> None:
|
|
390
|
+
"""Delete a group from the current group.
|
|
391
|
+
|
|
392
|
+
Args:
|
|
393
|
+
path (str): The path to the group to delete.
|
|
394
|
+
"""
|
|
395
|
+
if self.group.read_only:
|
|
396
|
+
raise NgioValueError("Cannot delete a group in read only mode.")
|
|
397
|
+
self.group.__delitem__(path)
|
|
398
|
+
self._group_cache._cache.pop(path, None)
|
|
399
|
+
self._handlers_cache._cache.pop(path, None)
|
|
454
400
|
|
|
455
|
-
def
|
|
401
|
+
def copy_group(self, dest_group: zarr.Group):
|
|
456
402
|
"""Copy the group to a new store."""
|
|
457
|
-
|
|
458
|
-
source=self.group.store,
|
|
459
|
-
dest=handler.group.store,
|
|
460
|
-
source_path=self.group.path,
|
|
461
|
-
dest_path=handler.group.path,
|
|
462
|
-
if_exists="replace",
|
|
463
|
-
)
|
|
464
|
-
if n_skipped > 0:
|
|
465
|
-
raise NgioValueError(
|
|
466
|
-
f"Error copying group to {handler.full_url}, "
|
|
467
|
-
f"#{n_skipped} files where skipped."
|
|
468
|
-
)
|
|
403
|
+
copy_group(self.group, dest_group)
|
|
469
404
|
|
|
470
405
|
|
|
471
406
|
def find_dimension_separator(array: zarr.Array) -> Literal[".", "/"]:
|
|
@@ -484,8 +419,113 @@ def find_dimension_separator(array: zarr.Array) -> Literal[".", "/"]:
|
|
|
484
419
|
else:
|
|
485
420
|
separator = array.metadata.chunk_key_encoding
|
|
486
421
|
if not isinstance(separator, DefaultChunkKeyEncoding):
|
|
487
|
-
raise
|
|
422
|
+
raise NgioValueError(
|
|
488
423
|
"Only DefaultChunkKeyEncoding is supported in this example."
|
|
489
424
|
)
|
|
490
425
|
separator = separator.separator
|
|
491
426
|
return separator
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def is_group_listable(group: zarr.Group) -> bool:
|
|
430
|
+
"""Check if a Zarr group is listable.
|
|
431
|
+
|
|
432
|
+
A group is considered listable if it contains at least one array or subgroup.
|
|
433
|
+
|
|
434
|
+
Args:
|
|
435
|
+
group (zarr.Group): The Zarr group to check.
|
|
436
|
+
|
|
437
|
+
Returns:
|
|
438
|
+
bool: True if the group is listable, False otherwise.
|
|
439
|
+
"""
|
|
440
|
+
if not group.store.supports_listing:
|
|
441
|
+
# If the store does not support listing
|
|
442
|
+
# then for sure it is not listable
|
|
443
|
+
return False
|
|
444
|
+
try:
|
|
445
|
+
next(group.keys())
|
|
446
|
+
return True
|
|
447
|
+
except StopIteration:
|
|
448
|
+
# Group is listable but empty
|
|
449
|
+
return True
|
|
450
|
+
except Exception as _:
|
|
451
|
+
# Some stores may raise errors when listing
|
|
452
|
+
# consider those not listable
|
|
453
|
+
return False
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
def _make_sync_fs(fs: fsspec.AbstractFileSystem) -> fsspec.AbstractFileSystem:
|
|
457
|
+
fs_dict = json.loads(fs.to_json())
|
|
458
|
+
fs_dict["asynchronous"] = False
|
|
459
|
+
return fsspec.AbstractFileSystem.from_json(json.dumps(fs_dict))
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def _get_mapper(store: LocalStore | FsspecStore, path: str):
|
|
463
|
+
if isinstance(store, LocalStore):
|
|
464
|
+
fs = fsspec.filesystem("file")
|
|
465
|
+
full_path = (store.root / path).as_posix()
|
|
466
|
+
else:
|
|
467
|
+
fs = _make_sync_fs(store.fs)
|
|
468
|
+
full_path = f"{store.path}/{path}"
|
|
469
|
+
return fs.get_mapper(full_path)
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
def _fsspec_copy(
|
|
473
|
+
src_fs: LocalStore | FsspecStore,
|
|
474
|
+
src_path: str,
|
|
475
|
+
dest_fs: LocalStore | FsspecStore,
|
|
476
|
+
dest_path: str,
|
|
477
|
+
):
|
|
478
|
+
src_mapper = _get_mapper(src_fs, src_path)
|
|
479
|
+
dest_mapper = _get_mapper(dest_fs, dest_path)
|
|
480
|
+
for key in src_mapper.keys():
|
|
481
|
+
dest_mapper[key] = src_mapper[key]
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
def _zarr_python_copy(src_group: zarr.Group, dest_group: zarr.Group):
|
|
485
|
+
# Copy attributes
|
|
486
|
+
dest_group.attrs.put(src_group.attrs.asdict())
|
|
487
|
+
# Copy arrays
|
|
488
|
+
for name, array in src_group.arrays():
|
|
489
|
+
if array.metadata.zarr_format == 2:
|
|
490
|
+
spec = AnyArraySpecV2.from_zarr(array)
|
|
491
|
+
else:
|
|
492
|
+
spec = AnyArraySpecV3.from_zarr(array)
|
|
493
|
+
dst = spec.to_zarr(
|
|
494
|
+
store=dest_group.store,
|
|
495
|
+
path=f"{dest_group.path}/{name}",
|
|
496
|
+
overwrite=True,
|
|
497
|
+
)
|
|
498
|
+
if array.ndim > 0:
|
|
499
|
+
dask_array = da.from_zarr(array)
|
|
500
|
+
da.to_zarr(dask_array, dst, overwrite=False)
|
|
501
|
+
# Copy subgroups
|
|
502
|
+
for name, subgroup in src_group.groups():
|
|
503
|
+
dest_subgroup = dest_group.create_group(name, overwrite=True)
|
|
504
|
+
_zarr_python_copy(subgroup, dest_subgroup)
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
def copy_group(src_group: zarr.Group, dest_group: zarr.Group):
|
|
508
|
+
if src_group.metadata.zarr_format != dest_group.metadata.zarr_format:
|
|
509
|
+
raise NgioValueError(
|
|
510
|
+
"Different Zarr format versions between source and destination, "
|
|
511
|
+
"cannot copy."
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
if not is_group_listable(src_group):
|
|
515
|
+
raise NgioValueError("Source group is not listable, cannot copy.")
|
|
516
|
+
|
|
517
|
+
if dest_group.read_only:
|
|
518
|
+
raise NgioValueError("Destination group is read only, cannot copy.")
|
|
519
|
+
if isinstance(src_group.store, LocalStore | FsspecStore) and isinstance(
|
|
520
|
+
dest_group.store, LocalStore | FsspecStore
|
|
521
|
+
):
|
|
522
|
+
_fsspec_copy(src_group.store, src_group.path, dest_group.store, dest_group.path)
|
|
523
|
+
return
|
|
524
|
+
warnings.warn(
|
|
525
|
+
"Fsspec copy not possible, falling back to Zarr Python API for the copy. "
|
|
526
|
+
"This will preserve some tabular data non-zarr native (parquet, and csv), "
|
|
527
|
+
"and it will be slower for large datasets.",
|
|
528
|
+
UserWarning,
|
|
529
|
+
stacklevel=2,
|
|
530
|
+
)
|
|
531
|
+
_zarr_python_copy(src_group, dest_group)
|