ngio 0.5.0__py3-none-any.whl → 0.5.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ngio/__init__.py +2 -5
- ngio/common/__init__.py +6 -11
- ngio/common/_masking_roi.py +54 -34
- ngio/common/_pyramid.py +87 -321
- ngio/common/_roi.py +330 -258
- ngio/experimental/iterators/_feature.py +3 -3
- ngio/experimental/iterators/_rois_utils.py +11 -10
- ngio/hcs/_plate.py +136 -192
- ngio/images/_abstract_image.py +35 -539
- ngio/images/_create.py +283 -0
- ngio/images/_create_synt_container.py +43 -40
- ngio/images/_image.py +251 -517
- ngio/images/_label.py +172 -249
- ngio/images/_masked_image.py +2 -2
- ngio/images/_ome_zarr_container.py +241 -644
- ngio/io_pipes/_io_pipes.py +9 -9
- ngio/io_pipes/_io_pipes_masked.py +7 -7
- ngio/io_pipes/_io_pipes_roi.py +6 -6
- ngio/io_pipes/_io_pipes_types.py +3 -3
- ngio/io_pipes/_match_shape.py +8 -6
- ngio/io_pipes/_ops_slices_utils.py +5 -8
- ngio/ome_zarr_meta/__init__.py +18 -29
- ngio/ome_zarr_meta/_meta_handlers.py +708 -392
- ngio/ome_zarr_meta/ngio_specs/__init__.py +0 -4
- ngio/ome_zarr_meta/ngio_specs/_axes.py +51 -152
- ngio/ome_zarr_meta/ngio_specs/_dataset.py +22 -13
- ngio/ome_zarr_meta/ngio_specs/_ngio_hcs.py +91 -129
- ngio/ome_zarr_meta/ngio_specs/_ngio_image.py +68 -57
- ngio/ome_zarr_meta/v04/__init__.py +1 -5
- ngio/ome_zarr_meta/v04/{_v04_spec.py → _v04_spec_utils.py} +85 -54
- ngio/ome_zarr_meta/v05/__init__.py +1 -5
- ngio/ome_zarr_meta/v05/{_v05_spec.py → _v05_spec_utils.py} +87 -64
- ngio/resources/__init__.py +1 -1
- ngio/resources/resource_model.py +1 -1
- ngio/tables/_tables_container.py +27 -85
- ngio/tables/backends/_anndata.py +8 -58
- ngio/tables/backends/_anndata_utils.py +6 -1
- ngio/tables/backends/_csv.py +19 -3
- ngio/tables/backends/_json.py +13 -10
- ngio/tables/backends/_non_zarr_backends.py +196 -0
- ngio/tables/backends/_parquet.py +31 -3
- ngio/tables/v1/_roi_table.py +27 -44
- ngio/utils/__init__.py +12 -8
- ngio/utils/_datasets.py +0 -6
- ngio/utils/_logger.py +50 -0
- ngio/utils/_zarr_utils.py +250 -292
- {ngio-0.5.0.dist-info → ngio-0.5.0a1.dist-info}/METADATA +6 -13
- ngio-0.5.0a1.dist-info/RECORD +88 -0
- {ngio-0.5.0.dist-info → ngio-0.5.0a1.dist-info}/WHEEL +1 -1
- ngio/images/_create_utils.py +0 -406
- ngio/tables/backends/_py_arrow_backends.py +0 -222
- ngio/utils/_cache.py +0 -48
- ngio-0.5.0.dist-info/RECORD +0 -88
- {ngio-0.5.0.dist-info → ngio-0.5.0a1.dist-info}/licenses/LICENSE +0 -0
ngio/utils/_zarr_utils.py
CHANGED
|
@@ -1,67 +1,58 @@
|
|
|
1
1
|
"""Common utilities for working with Zarr groups in consistent ways."""
|
|
2
2
|
|
|
3
|
-
import json
|
|
4
|
-
import warnings
|
|
5
3
|
from pathlib import Path
|
|
6
|
-
from typing import Literal
|
|
4
|
+
from typing import Literal
|
|
7
5
|
|
|
8
|
-
import dask.array as da
|
|
9
6
|
import fsspec
|
|
10
7
|
import zarr
|
|
11
8
|
from filelock import BaseFileLock, FileLock
|
|
12
|
-
from pydantic_zarr.v2 import ArraySpec as AnyArraySpecV2
|
|
13
|
-
from pydantic_zarr.v3 import ArraySpec as AnyArraySpecV3
|
|
14
9
|
from zarr.abc.store import Store
|
|
10
|
+
from zarr.core.array import CompressorLike
|
|
15
11
|
from zarr.errors import ContainsGroupError
|
|
16
|
-
from zarr.storage import FsspecStore, LocalStore, MemoryStore
|
|
12
|
+
from zarr.storage import FsspecStore, LocalStore, MemoryStore
|
|
17
13
|
|
|
18
|
-
from ngio.utils
|
|
19
|
-
from ngio.utils._errors import
|
|
20
|
-
NgioFileExistsError,
|
|
21
|
-
NgioFileNotFoundError,
|
|
22
|
-
NgioValueError,
|
|
23
|
-
)
|
|
14
|
+
from ngio.utils import NgioFileExistsError, NgioFileNotFoundError, NgioValueError
|
|
15
|
+
from ngio.utils._errors import NgioError
|
|
24
16
|
|
|
25
17
|
AccessModeLiteral = Literal["r", "r+", "w", "w-", "a"]
|
|
26
18
|
# StoreLike is more restrictive than it could be
|
|
27
19
|
# but to make sure we can handle the store correctly
|
|
28
20
|
# we need to be more restrictive
|
|
29
|
-
NgioSupportedStore
|
|
30
|
-
str | Path | fsspec.mapping.FSMap | FsspecStore | MemoryStore |
|
|
21
|
+
NgioSupportedStore = (
|
|
22
|
+
str | Path | fsspec.mapping.FSMap | FsspecStore | MemoryStore | LocalStore
|
|
31
23
|
)
|
|
32
|
-
GenericStore
|
|
33
|
-
StoreOrGroup
|
|
24
|
+
GenericStore = Store | NgioSupportedStore
|
|
25
|
+
StoreOrGroup = GenericStore | zarr.Group
|
|
34
26
|
|
|
35
27
|
|
|
36
28
|
def _check_store(store) -> NgioSupportedStore:
|
|
37
29
|
"""Check the store and return a valid store."""
|
|
38
|
-
if
|
|
39
|
-
|
|
40
|
-
f"Store type {type(store)} is not explicitly supported. "
|
|
41
|
-
f"Supported types are: {NgioSupportedStore}. "
|
|
42
|
-
"Proceeding, but this may lead to unexpected behavior.",
|
|
43
|
-
UserWarning,
|
|
44
|
-
stacklevel=2,
|
|
45
|
-
)
|
|
46
|
-
return store
|
|
30
|
+
if isinstance(store, NgioSupportedStore):
|
|
31
|
+
return store
|
|
47
32
|
|
|
33
|
+
raise NotImplementedError(
|
|
34
|
+
f"Store type {type(store)} is not supported. "
|
|
35
|
+
f"Supported types are: {NgioSupportedStore}"
|
|
36
|
+
)
|
|
48
37
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
) -> zarr.Group:
|
|
38
|
+
|
|
39
|
+
def _check_group(group: zarr.Group, mode: AccessModeLiteral) -> zarr.Group:
|
|
52
40
|
"""Check the group and return a valid group."""
|
|
53
|
-
if group.read_only and mode
|
|
54
|
-
raise NgioValueError(
|
|
41
|
+
if group.read_only and mode in ["w", "w-"]:
|
|
42
|
+
raise NgioValueError(
|
|
43
|
+
"The group is read only. Cannot open in write mode ['w', 'w-']"
|
|
44
|
+
)
|
|
55
45
|
|
|
56
46
|
if mode == "r" and not group.read_only:
|
|
57
47
|
# let's make sure we don't accidentally write to the group
|
|
58
48
|
group = zarr.open_group(store=group.store, path=group.path, mode="r")
|
|
49
|
+
|
|
59
50
|
return group
|
|
60
51
|
|
|
61
52
|
|
|
62
53
|
def open_group_wrapper(
|
|
63
54
|
store: StoreOrGroup,
|
|
64
|
-
mode: AccessModeLiteral
|
|
55
|
+
mode: AccessModeLiteral,
|
|
65
56
|
zarr_format: Literal[2, 3] | None = None,
|
|
66
57
|
) -> zarr.Group:
|
|
67
58
|
"""Wrapper around zarr.open_group with some additional checks.
|
|
@@ -81,7 +72,6 @@ def open_group_wrapper(
|
|
|
81
72
|
|
|
82
73
|
try:
|
|
83
74
|
_check_store(store)
|
|
84
|
-
mode = mode if mode is not None else "a"
|
|
85
75
|
group = zarr.open_group(store=store, mode=mode, zarr_format=zarr_format)
|
|
86
76
|
|
|
87
77
|
except FileExistsError as e:
|
|
@@ -108,32 +98,68 @@ class ZarrGroupHandler:
|
|
|
108
98
|
store: StoreOrGroup,
|
|
109
99
|
zarr_format: Literal[2, 3] | None = None,
|
|
110
100
|
cache: bool = False,
|
|
111
|
-
mode: AccessModeLiteral
|
|
101
|
+
mode: AccessModeLiteral = "a",
|
|
102
|
+
parallel_safe: bool = False,
|
|
103
|
+
parent: "ZarrGroupHandler | None" = None,
|
|
112
104
|
):
|
|
113
105
|
"""Initialize the handler.
|
|
114
106
|
|
|
115
107
|
Args:
|
|
116
108
|
store (StoreOrGroup): The Zarr store or group containing the image data.
|
|
117
|
-
|
|
109
|
+
meta_mode (str): The mode of the metadata handler.
|
|
110
|
+
zarr_format (int): The Zarr format version to use.
|
|
118
111
|
cache (bool): Whether to cache the metadata.
|
|
119
|
-
mode (str
|
|
112
|
+
mode (str): The mode of the store.
|
|
113
|
+
parallel_safe (bool): If True, the handler will create a lock file to make
|
|
114
|
+
that can be used to make the handler parallel safe.
|
|
115
|
+
Be aware that the lock needs to be used manually.
|
|
116
|
+
parent (ZarrGroupHandler | None): The parent handler.
|
|
120
117
|
"""
|
|
121
|
-
if mode not in ["r", "r+", "w", "w-", "a"
|
|
118
|
+
if mode not in ["r", "r+", "w", "w-", "a"]:
|
|
122
119
|
raise NgioValueError(f"Mode {mode} is not supported.")
|
|
123
120
|
|
|
121
|
+
if parallel_safe and cache:
|
|
122
|
+
raise NgioValueError(
|
|
123
|
+
"The cache and parallel_safe options are mutually exclusive."
|
|
124
|
+
"If you want to use the lock mechanism, you should not use the cache."
|
|
125
|
+
)
|
|
126
|
+
|
|
124
127
|
group = open_group_wrapper(store=store, mode=mode, zarr_format=zarr_format)
|
|
128
|
+
_store = group.store
|
|
129
|
+
|
|
130
|
+
# Make sure the cache is set in the attrs
|
|
131
|
+
# in the same way as the cache in the handler
|
|
132
|
+
|
|
133
|
+
## TODO
|
|
134
|
+
# Figure out how to handle the cache in the new zarr version
|
|
135
|
+
# group.attrs.cache = cache
|
|
136
|
+
|
|
137
|
+
if parallel_safe:
|
|
138
|
+
if not isinstance(_store, LocalStore):
|
|
139
|
+
raise NgioValueError(
|
|
140
|
+
"The store needs to be a LocalStore to use the lock mechanism. "
|
|
141
|
+
f"Instead, got {_store.__class__.__name__}."
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
store_path = _store.root / group.path
|
|
145
|
+
self._lock_path = store_path.with_suffix(".lock")
|
|
146
|
+
self._lock = FileLock(self._lock_path, timeout=10)
|
|
147
|
+
|
|
148
|
+
else:
|
|
149
|
+
self._lock_path = None
|
|
150
|
+
self._lock = None
|
|
151
|
+
|
|
125
152
|
self._group = group
|
|
153
|
+
self._mode = mode
|
|
126
154
|
self.use_cache = cache
|
|
127
|
-
|
|
128
|
-
self.
|
|
129
|
-
self.
|
|
130
|
-
self._handlers_cache: NgioCache[ZarrGroupHandler] = NgioCache(use_cache=cache)
|
|
131
|
-
self._lock: tuple[Path, BaseFileLock] | None = None
|
|
155
|
+
self._parallel_safe = parallel_safe
|
|
156
|
+
self._cache = {}
|
|
157
|
+
self._parent = parent
|
|
132
158
|
|
|
133
159
|
def __repr__(self) -> str:
|
|
134
160
|
"""Return a string representation of the handler."""
|
|
135
161
|
return (
|
|
136
|
-
f"ZarrGroupHandler(full_url={self.full_url},
|
|
162
|
+
f"ZarrGroupHandler(full_url={self.full_url}, mode={self.mode}, "
|
|
137
163
|
f"cache={self.use_cache}"
|
|
138
164
|
)
|
|
139
165
|
|
|
@@ -147,17 +173,8 @@ class ZarrGroupHandler:
|
|
|
147
173
|
"""Return the store path."""
|
|
148
174
|
if isinstance(self.store, LocalStore):
|
|
149
175
|
return (self.store.root / self.group.path).as_posix()
|
|
150
|
-
|
|
151
|
-
return
|
|
152
|
-
elif isinstance(self.store, ZipStore):
|
|
153
|
-
return (self.store.path / self.group.path).as_posix()
|
|
154
|
-
elif isinstance(self.store, MemoryStore):
|
|
155
|
-
return None
|
|
156
|
-
warnings.warn(
|
|
157
|
-
f"Cannot determine full URL for store type {type(self.store)}. ",
|
|
158
|
-
UserWarning,
|
|
159
|
-
stacklevel=2,
|
|
160
|
-
)
|
|
176
|
+
if isinstance(self.store, FsspecStore):
|
|
177
|
+
return self.store.fs.map.root_path
|
|
161
178
|
return None
|
|
162
179
|
|
|
163
180
|
@property
|
|
@@ -166,55 +183,35 @@ class ZarrGroupHandler:
|
|
|
166
183
|
return self._group.metadata.zarr_format
|
|
167
184
|
|
|
168
185
|
@property
|
|
169
|
-
def
|
|
170
|
-
"""Return
|
|
171
|
-
return self.
|
|
172
|
-
|
|
173
|
-
def _create_lock(self) -> tuple[Path, BaseFileLock]:
|
|
174
|
-
"""Create the lock."""
|
|
175
|
-
if self._lock is not None:
|
|
176
|
-
return self._lock
|
|
177
|
-
|
|
178
|
-
if self.use_cache is True:
|
|
179
|
-
raise NgioValueError(
|
|
180
|
-
"Lock mechanism is not compatible with caching. "
|
|
181
|
-
"Please set cache=False to use the lock mechanism."
|
|
182
|
-
)
|
|
183
|
-
|
|
184
|
-
if not isinstance(self.store, LocalStore):
|
|
185
|
-
raise NgioValueError(
|
|
186
|
-
"The store needs to be a LocalStore to use the lock mechanism. "
|
|
187
|
-
f"Instead, got {self.store.__class__.__name__}."
|
|
188
|
-
)
|
|
189
|
-
|
|
190
|
-
store_path = Path(self.store.root) / self.group.path
|
|
191
|
-
_lock_path = store_path.with_suffix(".lock")
|
|
192
|
-
_lock = FileLock(_lock_path, timeout=10)
|
|
193
|
-
return _lock_path, _lock
|
|
186
|
+
def mode(self) -> AccessModeLiteral:
|
|
187
|
+
"""Return the mode of the group."""
|
|
188
|
+
return self._mode # type: ignore
|
|
194
189
|
|
|
195
190
|
@property
|
|
196
191
|
def lock(self) -> BaseFileLock:
|
|
197
192
|
"""Return the lock."""
|
|
198
193
|
if self._lock is None:
|
|
199
|
-
|
|
200
|
-
|
|
194
|
+
raise NgioValueError(
|
|
195
|
+
"The handler is not parallel safe. "
|
|
196
|
+
"Reopen the handler with parallel_safe=True."
|
|
197
|
+
)
|
|
198
|
+
return self._lock
|
|
201
199
|
|
|
202
200
|
@property
|
|
203
|
-
def
|
|
204
|
-
"""Return the
|
|
205
|
-
|
|
206
|
-
self._lock = self._create_lock()
|
|
207
|
-
return self._lock[0]
|
|
201
|
+
def parent(self) -> "ZarrGroupHandler | None":
|
|
202
|
+
"""Return the parent handler."""
|
|
203
|
+
return self._parent
|
|
208
204
|
|
|
209
205
|
def remove_lock(self) -> None:
|
|
210
206
|
"""Return the lock."""
|
|
211
|
-
if self._lock is None:
|
|
207
|
+
if self._lock is None or self._lock_path is None:
|
|
212
208
|
return None
|
|
213
209
|
|
|
214
|
-
lock_path
|
|
215
|
-
if lock_path.exists() and
|
|
210
|
+
lock_path = Path(self._lock_path)
|
|
211
|
+
if lock_path.exists() and self._lock.lock_counter == 0:
|
|
216
212
|
lock_path.unlink()
|
|
217
213
|
self._lock = None
|
|
214
|
+
self._lock_path = None
|
|
218
215
|
return None
|
|
219
216
|
|
|
220
217
|
raise NgioValueError("The lock is still in use. Cannot remove it.")
|
|
@@ -225,7 +222,10 @@ class ZarrGroupHandler:
|
|
|
225
222
|
This is useful when the group has been modified
|
|
226
223
|
outside of the handler.
|
|
227
224
|
"""
|
|
228
|
-
|
|
225
|
+
if self.mode == "r":
|
|
226
|
+
mode = "r"
|
|
227
|
+
else:
|
|
228
|
+
mode = "r+"
|
|
229
229
|
return zarr.open_group(
|
|
230
230
|
store=self._group.store,
|
|
231
231
|
path=self._group.path,
|
|
@@ -233,57 +233,72 @@ class ZarrGroupHandler:
|
|
|
233
233
|
zarr_format=self._group.metadata.zarr_format,
|
|
234
234
|
)
|
|
235
235
|
|
|
236
|
-
def reopen_handler(self) -> "ZarrGroupHandler":
|
|
237
|
-
"""Reopen the handler.
|
|
238
|
-
|
|
239
|
-
This is useful when the group has been modified
|
|
240
|
-
outside of the handler.
|
|
241
|
-
"""
|
|
242
|
-
mode = "r" if self.read_only else "r+"
|
|
243
|
-
group = self.reopen_group()
|
|
244
|
-
return ZarrGroupHandler(
|
|
245
|
-
store=group,
|
|
246
|
-
zarr_format=group.metadata.zarr_format,
|
|
247
|
-
cache=self.use_cache,
|
|
248
|
-
mode=mode,
|
|
249
|
-
)
|
|
250
|
-
|
|
251
|
-
def clean_cache(self) -> None:
|
|
252
|
-
"""Clear the cached metadata."""
|
|
253
|
-
group = self.reopen_group()
|
|
254
|
-
self.__init__(
|
|
255
|
-
store=group,
|
|
256
|
-
zarr_format=group.metadata.zarr_format,
|
|
257
|
-
cache=self.use_cache,
|
|
258
|
-
mode="r" if self.read_only else "r+",
|
|
259
|
-
)
|
|
260
|
-
|
|
261
236
|
@property
|
|
262
237
|
def group(self) -> zarr.Group:
|
|
263
238
|
"""Return the group."""
|
|
264
|
-
if self.
|
|
265
|
-
# If we are
|
|
239
|
+
if self._parallel_safe:
|
|
240
|
+
# If we are parallel safe, we need to reopen the group
|
|
266
241
|
# to make sure that the attributes are up to date
|
|
267
242
|
return self.reopen_group()
|
|
268
243
|
return self._group
|
|
269
244
|
|
|
245
|
+
def add_to_cache(self, key: str, value: object) -> None:
|
|
246
|
+
"""Add an object to the cache."""
|
|
247
|
+
if not self.use_cache:
|
|
248
|
+
return None
|
|
249
|
+
self._cache[key] = value
|
|
250
|
+
|
|
251
|
+
def get_from_cache(self, key: str) -> object | None:
|
|
252
|
+
"""Get an object from the cache."""
|
|
253
|
+
if not self.use_cache:
|
|
254
|
+
return None
|
|
255
|
+
return self._cache.get(key, None)
|
|
256
|
+
|
|
257
|
+
def clean_cache(self) -> None:
|
|
258
|
+
"""Clear the cached metadata."""
|
|
259
|
+
self._cache = {}
|
|
260
|
+
|
|
270
261
|
def load_attrs(self) -> dict:
|
|
271
262
|
"""Load the attributes of the group."""
|
|
272
|
-
|
|
263
|
+
attrs = self.get_from_cache("attrs")
|
|
264
|
+
if attrs is not None and isinstance(attrs, dict):
|
|
265
|
+
return attrs
|
|
273
266
|
|
|
274
|
-
|
|
267
|
+
attrs = dict(self.group.attrs)
|
|
268
|
+
|
|
269
|
+
self.add_to_cache("attrs", attrs)
|
|
270
|
+
return attrs
|
|
271
|
+
|
|
272
|
+
def _write_attrs(self, attrs: dict, overwrite: bool = False) -> None:
|
|
275
273
|
"""Write the metadata to the store."""
|
|
276
|
-
|
|
277
|
-
if self.read_only:
|
|
274
|
+
if self.group.read_only:
|
|
278
275
|
raise NgioValueError("The group is read only. Cannot write metadata.")
|
|
279
|
-
|
|
276
|
+
|
|
277
|
+
# we need to invalidate the current attrs cache
|
|
278
|
+
self.add_to_cache("attrs", None)
|
|
280
279
|
if overwrite:
|
|
281
|
-
group.attrs.clear()
|
|
282
|
-
|
|
280
|
+
self.group.attrs.clear()
|
|
281
|
+
|
|
282
|
+
self.group.attrs.update(attrs)
|
|
283
|
+
|
|
284
|
+
def write_attrs(self, attrs: dict, overwrite: bool = False) -> None:
|
|
285
|
+
"""Write the metadata to the store."""
|
|
286
|
+
# Maybe we should use the lock here
|
|
287
|
+
self._write_attrs(attrs, overwrite)
|
|
288
|
+
|
|
289
|
+
def _obj_get(self, path: str):
|
|
290
|
+
"""Get a group from the group."""
|
|
291
|
+
group_or_array = self.get_from_cache(path)
|
|
292
|
+
if group_or_array is not None:
|
|
293
|
+
return group_or_array
|
|
294
|
+
|
|
295
|
+
group_or_array = self.group.get(path, None)
|
|
296
|
+
self.add_to_cache(path, group_or_array)
|
|
297
|
+
return group_or_array
|
|
283
298
|
|
|
284
299
|
def create_group(self, path: str, overwrite: bool = False) -> zarr.Group:
|
|
285
300
|
"""Create a group in the group."""
|
|
286
|
-
if self.
|
|
301
|
+
if self.mode == "r":
|
|
287
302
|
raise NgioValueError("Cannot create a group in read only mode.")
|
|
288
303
|
|
|
289
304
|
try:
|
|
@@ -293,7 +308,7 @@ class ZarrGroupHandler:
|
|
|
293
308
|
f"A Zarr group already exists at {path}, "
|
|
294
309
|
"consider setting overwrite=True."
|
|
295
310
|
) from e
|
|
296
|
-
self.
|
|
311
|
+
self.add_to_cache(path, group)
|
|
297
312
|
return group
|
|
298
313
|
|
|
299
314
|
def get_group(
|
|
@@ -319,87 +334,138 @@ class ZarrGroupHandler:
|
|
|
319
334
|
if overwrite:
|
|
320
335
|
return self.create_group(path, overwrite=overwrite)
|
|
321
336
|
|
|
322
|
-
group = self.
|
|
337
|
+
group = self._obj_get(path)
|
|
323
338
|
if isinstance(group, zarr.Group):
|
|
324
339
|
return group
|
|
325
340
|
|
|
326
|
-
group
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
if isinstance(group, zarr.Array):
|
|
332
|
-
raise NgioValueError(f"The object at {path} is not a group, but an array.")
|
|
341
|
+
if group is not None:
|
|
342
|
+
raise NgioValueError(
|
|
343
|
+
f"The object at {path} is not a group, but a {type(group)}"
|
|
344
|
+
)
|
|
333
345
|
|
|
334
346
|
if not create_mode:
|
|
335
347
|
raise NgioFileNotFoundError(f"No group found at {path}")
|
|
336
348
|
group = self.create_group(path)
|
|
337
|
-
self._group_cache.set(path, group, overwrite=overwrite)
|
|
338
349
|
return group
|
|
339
350
|
|
|
351
|
+
def safe_get_group(
|
|
352
|
+
self, path: str, create_mode: bool = False
|
|
353
|
+
) -> tuple[bool, zarr.Group | NgioError]:
|
|
354
|
+
"""Get a group from the group.
|
|
355
|
+
|
|
356
|
+
Args:
|
|
357
|
+
path (str): The path to the group.
|
|
358
|
+
create_mode (bool): If True, create the group if it does not exist.
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
zarr.Group | None: The Zarr group or None if it does not exist
|
|
362
|
+
or an error occurs.
|
|
363
|
+
|
|
364
|
+
"""
|
|
365
|
+
try:
|
|
366
|
+
return True, self.get_group(path, create_mode)
|
|
367
|
+
except NgioError as e:
|
|
368
|
+
return False, e
|
|
369
|
+
|
|
340
370
|
def get_array(self, path: str) -> zarr.Array:
|
|
341
371
|
"""Get an array from the group."""
|
|
342
|
-
array = self.
|
|
343
|
-
if
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
372
|
+
array = self._obj_get(path)
|
|
373
|
+
if array is None:
|
|
374
|
+
raise NgioFileNotFoundError(f"No array found at {path}")
|
|
375
|
+
if not isinstance(array, zarr.Array):
|
|
376
|
+
raise NgioValueError(
|
|
377
|
+
f"The object at {path} is not an array, but a {type(array)}"
|
|
378
|
+
)
|
|
379
|
+
return array
|
|
380
|
+
|
|
381
|
+
def create_array(
|
|
382
|
+
self,
|
|
383
|
+
path: str,
|
|
384
|
+
shape: tuple[int, ...],
|
|
385
|
+
dtype: str,
|
|
386
|
+
chunks: tuple[int, ...] | Literal["auto"] = "auto",
|
|
387
|
+
compressors: CompressorLike = "auto",
|
|
388
|
+
separator: Literal[".", "/"] = "/",
|
|
389
|
+
overwrite: bool = False,
|
|
390
|
+
) -> zarr.Array:
|
|
391
|
+
if self.mode == "r":
|
|
392
|
+
raise NgioValueError("Cannot create an array in read only mode.")
|
|
393
|
+
|
|
394
|
+
if self.zarr_format == 2:
|
|
395
|
+
chunks_encoding = {
|
|
396
|
+
"name": "v2",
|
|
397
|
+
"separator": separator,
|
|
398
|
+
}
|
|
399
|
+
else:
|
|
400
|
+
chunks_encoding = {
|
|
401
|
+
"name": "default",
|
|
402
|
+
"separator": separator,
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
try:
|
|
406
|
+
return self.group.create_array(
|
|
407
|
+
name=path,
|
|
408
|
+
shape=shape,
|
|
409
|
+
dtype=dtype,
|
|
410
|
+
chunks=chunks,
|
|
411
|
+
chunk_key_encoding=chunks_encoding,
|
|
412
|
+
overwrite=overwrite,
|
|
413
|
+
compressors=compressors,
|
|
414
|
+
)
|
|
415
|
+
except ContainsGroupError as e:
|
|
416
|
+
raise NgioFileExistsError(
|
|
417
|
+
f"A Zarr array already exists at {path}, "
|
|
418
|
+
"consider setting overwrite=True."
|
|
419
|
+
) from e
|
|
420
|
+
except Exception as e:
|
|
421
|
+
raise NgioValueError(f"Error creating array at {path}") from e
|
|
422
|
+
|
|
423
|
+
def derive_handler(
|
|
355
424
|
self,
|
|
356
425
|
path: str,
|
|
357
|
-
create_mode: bool = True,
|
|
358
426
|
overwrite: bool = False,
|
|
359
427
|
) -> "ZarrGroupHandler":
|
|
360
|
-
"""
|
|
428
|
+
"""Derive a new handler from the current handler.
|
|
361
429
|
|
|
362
430
|
Args:
|
|
363
431
|
path (str): The path to the group.
|
|
364
|
-
create_mode (bool): If True, create the group if it does not exist.
|
|
365
432
|
overwrite (bool): If True, overwrite the group if it exists.
|
|
366
433
|
"""
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
434
|
+
group = self.get_group(path, create_mode=True, overwrite=overwrite)
|
|
435
|
+
return ZarrGroupHandler(
|
|
436
|
+
store=group,
|
|
437
|
+
zarr_format=self.zarr_format,
|
|
438
|
+
cache=self.use_cache,
|
|
439
|
+
mode=self.mode,
|
|
440
|
+
parallel_safe=self._parallel_safe,
|
|
441
|
+
parent=self,
|
|
374
442
|
)
|
|
375
|
-
self._handlers_cache.set(path, handler)
|
|
376
|
-
return handler
|
|
377
|
-
|
|
378
|
-
@property
|
|
379
|
-
def is_listable(self) -> bool:
|
|
380
|
-
return is_group_listable(self.group)
|
|
381
443
|
|
|
382
|
-
def
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
"""
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
self._handlers_cache._cache.pop(path, None)
|
|
393
|
-
|
|
394
|
-
def delete_self(self) -> None:
|
|
395
|
-
"""Delete the current group."""
|
|
396
|
-
if self.group.read_only:
|
|
397
|
-
raise NgioValueError("Cannot delete a group in read only mode.")
|
|
398
|
-
self.group.__delitem__("/")
|
|
444
|
+
def safe_derive_handler(
|
|
445
|
+
self,
|
|
446
|
+
path: str,
|
|
447
|
+
overwrite: bool = False,
|
|
448
|
+
) -> tuple[bool, "ZarrGroupHandler | NgioError"]:
|
|
449
|
+
"""Derive a new handler from the current handler."""
|
|
450
|
+
try:
|
|
451
|
+
return True, self.derive_handler(path, overwrite=overwrite)
|
|
452
|
+
except NgioError as e:
|
|
453
|
+
return False, e
|
|
399
454
|
|
|
400
|
-
def
|
|
455
|
+
def copy_handler(self, handler: "ZarrGroupHandler") -> None:
|
|
401
456
|
"""Copy the group to a new store."""
|
|
402
|
-
|
|
457
|
+
_, n_skipped, _ = zarr.copy_store(
|
|
458
|
+
source=self.group.store,
|
|
459
|
+
dest=handler.group.store,
|
|
460
|
+
source_path=self.group.path,
|
|
461
|
+
dest_path=handler.group.path,
|
|
462
|
+
if_exists="replace",
|
|
463
|
+
)
|
|
464
|
+
if n_skipped > 0:
|
|
465
|
+
raise NgioValueError(
|
|
466
|
+
f"Error copying group to {handler.full_url}, "
|
|
467
|
+
f"#{n_skipped} files where skipped."
|
|
468
|
+
)
|
|
403
469
|
|
|
404
470
|
|
|
405
471
|
def find_dimension_separator(array: zarr.Array) -> Literal[".", "/"]:
|
|
@@ -418,116 +484,8 @@ def find_dimension_separator(array: zarr.Array) -> Literal[".", "/"]:
|
|
|
418
484
|
else:
|
|
419
485
|
separator = array.metadata.chunk_key_encoding
|
|
420
486
|
if not isinstance(separator, DefaultChunkKeyEncoding):
|
|
421
|
-
raise
|
|
487
|
+
raise ValueError(
|
|
422
488
|
"Only DefaultChunkKeyEncoding is supported in this example."
|
|
423
489
|
)
|
|
424
490
|
separator = separator.separator
|
|
425
491
|
return separator
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
def is_group_listable(group: zarr.Group) -> bool:
|
|
429
|
-
"""Check if a Zarr group is listable.
|
|
430
|
-
|
|
431
|
-
A group is considered listable if it contains at least one array or subgroup.
|
|
432
|
-
|
|
433
|
-
Args:
|
|
434
|
-
group (zarr.Group): The Zarr group to check.
|
|
435
|
-
|
|
436
|
-
Returns:
|
|
437
|
-
bool: True if the group is listable, False otherwise.
|
|
438
|
-
"""
|
|
439
|
-
if not group.store.supports_listing:
|
|
440
|
-
# If the store does not support listing
|
|
441
|
-
# then for sure it is not listable
|
|
442
|
-
return False
|
|
443
|
-
try:
|
|
444
|
-
next(group.keys())
|
|
445
|
-
return True
|
|
446
|
-
except StopIteration:
|
|
447
|
-
# Group is listable but empty
|
|
448
|
-
return True
|
|
449
|
-
except Exception as _:
|
|
450
|
-
# Some stores may raise errors when listing
|
|
451
|
-
# consider those not listable
|
|
452
|
-
return False
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
def _make_sync_fs(fs: fsspec.AbstractFileSystem) -> fsspec.AbstractFileSystem:
|
|
456
|
-
fs_dict = json.loads(fs.to_json())
|
|
457
|
-
fs_dict["asynchronous"] = False
|
|
458
|
-
return fsspec.AbstractFileSystem.from_json(json.dumps(fs_dict))
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
def _get_mapper(store: LocalStore | FsspecStore, path: str):
|
|
462
|
-
if isinstance(store, LocalStore):
|
|
463
|
-
fs = fsspec.filesystem("file")
|
|
464
|
-
full_path = (store.root / path).as_posix()
|
|
465
|
-
else:
|
|
466
|
-
fs = _make_sync_fs(store.fs)
|
|
467
|
-
full_path = f"{store.path}/{path}"
|
|
468
|
-
return fs.get_mapper(full_path)
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
def _fsspec_copy(
|
|
472
|
-
src_fs: LocalStore | FsspecStore,
|
|
473
|
-
src_path: str,
|
|
474
|
-
dest_fs: LocalStore | FsspecStore,
|
|
475
|
-
dest_path: str,
|
|
476
|
-
):
|
|
477
|
-
src_mapper = _get_mapper(src_fs, src_path)
|
|
478
|
-
dest_mapper = _get_mapper(dest_fs, dest_path)
|
|
479
|
-
for key in src_mapper.keys():
|
|
480
|
-
dest_mapper[key] = src_mapper[key]
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
def _zarr_python_copy(src_group: zarr.Group, dest_group: zarr.Group):
|
|
484
|
-
# Copy attributes
|
|
485
|
-
dest_group.attrs.put(src_group.attrs.asdict())
|
|
486
|
-
# Copy arrays
|
|
487
|
-
for name, array in src_group.arrays():
|
|
488
|
-
if array.metadata.zarr_format == 2:
|
|
489
|
-
spec = AnyArraySpecV2.from_zarr(array)
|
|
490
|
-
else:
|
|
491
|
-
spec = AnyArraySpecV3.from_zarr(array)
|
|
492
|
-
dst = spec.to_zarr(
|
|
493
|
-
store=dest_group.store,
|
|
494
|
-
path=f"{dest_group.path}/{name}",
|
|
495
|
-
overwrite=True,
|
|
496
|
-
)
|
|
497
|
-
if array.ndim > 0:
|
|
498
|
-
dask_array = da.from_zarr(array)
|
|
499
|
-
da.to_zarr(dask_array, dst, overwrite=False)
|
|
500
|
-
# Copy subgroups
|
|
501
|
-
for name, subgroup in src_group.groups():
|
|
502
|
-
dest_subgroup = dest_group.create_group(name, overwrite=True)
|
|
503
|
-
_zarr_python_copy(subgroup, dest_subgroup)
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
def copy_group(
|
|
507
|
-
src_group: zarr.Group, dest_group: zarr.Group, suppress_warnings: bool = False
|
|
508
|
-
):
|
|
509
|
-
if src_group.metadata.zarr_format != dest_group.metadata.zarr_format:
|
|
510
|
-
raise NgioValueError(
|
|
511
|
-
"Different Zarr format versions between source and destination, "
|
|
512
|
-
"cannot copy."
|
|
513
|
-
)
|
|
514
|
-
|
|
515
|
-
if not is_group_listable(src_group):
|
|
516
|
-
raise NgioValueError("Source group is not listable, cannot copy.")
|
|
517
|
-
|
|
518
|
-
if dest_group.read_only:
|
|
519
|
-
raise NgioValueError("Destination group is read only, cannot copy.")
|
|
520
|
-
if isinstance(src_group.store, LocalStore | FsspecStore) and isinstance(
|
|
521
|
-
dest_group.store, LocalStore | FsspecStore
|
|
522
|
-
):
|
|
523
|
-
_fsspec_copy(src_group.store, src_group.path, dest_group.store, dest_group.path)
|
|
524
|
-
return
|
|
525
|
-
if not suppress_warnings:
|
|
526
|
-
warnings.warn(
|
|
527
|
-
"Fsspec copy not possible, falling back to Zarr Python API for the copy. "
|
|
528
|
-
"This will preserve some tabular data non-zarr native (parquet, and csv), "
|
|
529
|
-
"and it will be slower for large datasets.",
|
|
530
|
-
UserWarning,
|
|
531
|
-
stacklevel=2,
|
|
532
|
-
)
|
|
533
|
-
_zarr_python_copy(src_group, dest_group)
|