ngio 0.5.0__py3-none-any.whl → 0.5.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. ngio/__init__.py +2 -5
  2. ngio/common/__init__.py +6 -11
  3. ngio/common/_masking_roi.py +54 -34
  4. ngio/common/_pyramid.py +87 -321
  5. ngio/common/_roi.py +330 -258
  6. ngio/experimental/iterators/_feature.py +3 -3
  7. ngio/experimental/iterators/_rois_utils.py +11 -10
  8. ngio/hcs/_plate.py +136 -192
  9. ngio/images/_abstract_image.py +35 -539
  10. ngio/images/_create.py +283 -0
  11. ngio/images/_create_synt_container.py +43 -40
  12. ngio/images/_image.py +251 -517
  13. ngio/images/_label.py +172 -249
  14. ngio/images/_masked_image.py +2 -2
  15. ngio/images/_ome_zarr_container.py +241 -644
  16. ngio/io_pipes/_io_pipes.py +9 -9
  17. ngio/io_pipes/_io_pipes_masked.py +7 -7
  18. ngio/io_pipes/_io_pipes_roi.py +6 -6
  19. ngio/io_pipes/_io_pipes_types.py +3 -3
  20. ngio/io_pipes/_match_shape.py +8 -6
  21. ngio/io_pipes/_ops_slices_utils.py +5 -8
  22. ngio/ome_zarr_meta/__init__.py +18 -29
  23. ngio/ome_zarr_meta/_meta_handlers.py +708 -392
  24. ngio/ome_zarr_meta/ngio_specs/__init__.py +0 -4
  25. ngio/ome_zarr_meta/ngio_specs/_axes.py +51 -152
  26. ngio/ome_zarr_meta/ngio_specs/_dataset.py +22 -13
  27. ngio/ome_zarr_meta/ngio_specs/_ngio_hcs.py +91 -129
  28. ngio/ome_zarr_meta/ngio_specs/_ngio_image.py +68 -57
  29. ngio/ome_zarr_meta/v04/__init__.py +1 -5
  30. ngio/ome_zarr_meta/v04/{_v04_spec.py → _v04_spec_utils.py} +85 -54
  31. ngio/ome_zarr_meta/v05/__init__.py +1 -5
  32. ngio/ome_zarr_meta/v05/{_v05_spec.py → _v05_spec_utils.py} +87 -64
  33. ngio/resources/__init__.py +1 -1
  34. ngio/resources/resource_model.py +1 -1
  35. ngio/tables/_tables_container.py +27 -85
  36. ngio/tables/backends/_anndata.py +8 -58
  37. ngio/tables/backends/_anndata_utils.py +6 -1
  38. ngio/tables/backends/_csv.py +19 -3
  39. ngio/tables/backends/_json.py +13 -10
  40. ngio/tables/backends/_non_zarr_backends.py +196 -0
  41. ngio/tables/backends/_parquet.py +31 -3
  42. ngio/tables/v1/_roi_table.py +27 -44
  43. ngio/utils/__init__.py +12 -8
  44. ngio/utils/_datasets.py +0 -6
  45. ngio/utils/_logger.py +50 -0
  46. ngio/utils/_zarr_utils.py +250 -292
  47. {ngio-0.5.0.dist-info → ngio-0.5.0a1.dist-info}/METADATA +6 -13
  48. ngio-0.5.0a1.dist-info/RECORD +88 -0
  49. {ngio-0.5.0.dist-info → ngio-0.5.0a1.dist-info}/WHEEL +1 -1
  50. ngio/images/_create_utils.py +0 -406
  51. ngio/tables/backends/_py_arrow_backends.py +0 -222
  52. ngio/utils/_cache.py +0 -48
  53. ngio-0.5.0.dist-info/RECORD +0 -88
  54. {ngio-0.5.0.dist-info → ngio-0.5.0a1.dist-info}/licenses/LICENSE +0 -0
ngio/utils/_zarr_utils.py CHANGED
@@ -1,67 +1,58 @@
1
1
  """Common utilities for working with Zarr groups in consistent ways."""
2
2
 
3
- import json
4
- import warnings
5
3
  from pathlib import Path
6
- from typing import Literal, TypeAlias
4
+ from typing import Literal
7
5
 
8
- import dask.array as da
9
6
  import fsspec
10
7
  import zarr
11
8
  from filelock import BaseFileLock, FileLock
12
- from pydantic_zarr.v2 import ArraySpec as AnyArraySpecV2
13
- from pydantic_zarr.v3 import ArraySpec as AnyArraySpecV3
14
9
  from zarr.abc.store import Store
10
+ from zarr.core.array import CompressorLike
15
11
  from zarr.errors import ContainsGroupError
16
- from zarr.storage import FsspecStore, LocalStore, MemoryStore, ZipStore
12
+ from zarr.storage import FsspecStore, LocalStore, MemoryStore
17
13
 
18
- from ngio.utils._cache import NgioCache
19
- from ngio.utils._errors import (
20
- NgioFileExistsError,
21
- NgioFileNotFoundError,
22
- NgioValueError,
23
- )
14
+ from ngio.utils import NgioFileExistsError, NgioFileNotFoundError, NgioValueError
15
+ from ngio.utils._errors import NgioError
24
16
 
25
17
  AccessModeLiteral = Literal["r", "r+", "w", "w-", "a"]
26
18
  # StoreLike is more restrictive than it could be
27
19
  # but to make sure we can handle the store correctly
28
20
  # we need to be more restrictive
29
- NgioSupportedStore: TypeAlias = (
30
- str | Path | fsspec.mapping.FSMap | FsspecStore | MemoryStore | dict | LocalStore
21
+ NgioSupportedStore = (
22
+ str | Path | fsspec.mapping.FSMap | FsspecStore | MemoryStore | LocalStore
31
23
  )
32
- GenericStore: TypeAlias = NgioSupportedStore | Store
33
- StoreOrGroup: TypeAlias = NgioSupportedStore | zarr.Group
24
+ GenericStore = Store | NgioSupportedStore
25
+ StoreOrGroup = GenericStore | zarr.Group
34
26
 
35
27
 
36
28
  def _check_store(store) -> NgioSupportedStore:
37
29
  """Check the store and return a valid store."""
38
- if not isinstance(store, NgioSupportedStore):
39
- warnings.warn(
40
- f"Store type {type(store)} is not explicitly supported. "
41
- f"Supported types are: {NgioSupportedStore}. "
42
- "Proceeding, but this may lead to unexpected behavior.",
43
- UserWarning,
44
- stacklevel=2,
45
- )
46
- return store
30
+ if isinstance(store, NgioSupportedStore):
31
+ return store
47
32
 
33
+ raise NotImplementedError(
34
+ f"Store type {type(store)} is not supported. "
35
+ f"Supported types are: {NgioSupportedStore}"
36
+ )
48
37
 
49
- def _check_group(
50
- group: zarr.Group, mode: AccessModeLiteral | None = None
51
- ) -> zarr.Group:
38
+
39
+ def _check_group(group: zarr.Group, mode: AccessModeLiteral) -> zarr.Group:
52
40
  """Check the group and return a valid group."""
53
- if group.read_only and mode not in [None, "r"]:
54
- raise NgioValueError(f"The group is read only. Cannot open in mode {mode}.")
41
+ if group.read_only and mode in ["w", "w-"]:
42
+ raise NgioValueError(
43
+ "The group is read only. Cannot open in write mode ['w', 'w-']"
44
+ )
55
45
 
56
46
  if mode == "r" and not group.read_only:
57
47
  # let's make sure we don't accidentally write to the group
58
48
  group = zarr.open_group(store=group.store, path=group.path, mode="r")
49
+
59
50
  return group
60
51
 
61
52
 
62
53
  def open_group_wrapper(
63
54
  store: StoreOrGroup,
64
- mode: AccessModeLiteral | None = None,
55
+ mode: AccessModeLiteral,
65
56
  zarr_format: Literal[2, 3] | None = None,
66
57
  ) -> zarr.Group:
67
58
  """Wrapper around zarr.open_group with some additional checks.
@@ -81,7 +72,6 @@ def open_group_wrapper(
81
72
 
82
73
  try:
83
74
  _check_store(store)
84
- mode = mode if mode is not None else "a"
85
75
  group = zarr.open_group(store=store, mode=mode, zarr_format=zarr_format)
86
76
 
87
77
  except FileExistsError as e:
@@ -108,32 +98,68 @@ class ZarrGroupHandler:
108
98
  store: StoreOrGroup,
109
99
  zarr_format: Literal[2, 3] | None = None,
110
100
  cache: bool = False,
111
- mode: AccessModeLiteral | None = None,
101
+ mode: AccessModeLiteral = "a",
102
+ parallel_safe: bool = False,
103
+ parent: "ZarrGroupHandler | None" = None,
112
104
  ):
113
105
  """Initialize the handler.
114
106
 
115
107
  Args:
116
108
  store (StoreOrGroup): The Zarr store or group containing the image data.
117
- zarr_format (int | None): The Zarr format version to use.
109
+ meta_mode (str): The mode of the metadata handler.
110
+ zarr_format (int): The Zarr format version to use.
118
111
  cache (bool): Whether to cache the metadata.
119
- mode (str | None): The mode of the store.
112
+ mode (str): The mode of the store.
113
+ parallel_safe (bool): If True, the handler will create a lock file to make
114
+ that can be used to make the handler parallel safe.
115
+ Be aware that the lock needs to be used manually.
116
+ parent (ZarrGroupHandler | None): The parent handler.
120
117
  """
121
- if mode not in ["r", "r+", "w", "w-", "a", None]:
118
+ if mode not in ["r", "r+", "w", "w-", "a"]:
122
119
  raise NgioValueError(f"Mode {mode} is not supported.")
123
120
 
121
+ if parallel_safe and cache:
122
+ raise NgioValueError(
123
+ "The cache and parallel_safe options are mutually exclusive."
124
+ "If you want to use the lock mechanism, you should not use the cache."
125
+ )
126
+
124
127
  group = open_group_wrapper(store=store, mode=mode, zarr_format=zarr_format)
128
+ _store = group.store
129
+
130
+ # Make sure the cache is set in the attrs
131
+ # in the same way as the cache in the handler
132
+
133
+ ## TODO
134
+ # Figure out how to handle the cache in the new zarr version
135
+ # group.attrs.cache = cache
136
+
137
+ if parallel_safe:
138
+ if not isinstance(_store, LocalStore):
139
+ raise NgioValueError(
140
+ "The store needs to be a LocalStore to use the lock mechanism. "
141
+ f"Instead, got {_store.__class__.__name__}."
142
+ )
143
+
144
+ store_path = _store.root / group.path
145
+ self._lock_path = store_path.with_suffix(".lock")
146
+ self._lock = FileLock(self._lock_path, timeout=10)
147
+
148
+ else:
149
+ self._lock_path = None
150
+ self._lock = None
151
+
125
152
  self._group = group
153
+ self._mode = mode
126
154
  self.use_cache = cache
127
-
128
- self._group_cache: NgioCache[zarr.Group] = NgioCache(use_cache=cache)
129
- self._array_cache: NgioCache[zarr.Array] = NgioCache(use_cache=cache)
130
- self._handlers_cache: NgioCache[ZarrGroupHandler] = NgioCache(use_cache=cache)
131
- self._lock: tuple[Path, BaseFileLock] | None = None
155
+ self._parallel_safe = parallel_safe
156
+ self._cache = {}
157
+ self._parent = parent
132
158
 
133
159
  def __repr__(self) -> str:
134
160
  """Return a string representation of the handler."""
135
161
  return (
136
- f"ZarrGroupHandler(full_url={self.full_url}, read_only={self.read_only}, "
162
+ f"ZarrGroupHandler(full_url={self.full_url}, mode={self.mode}, "
137
163
  f"cache={self.use_cache}"
138
164
  )
139
165
 
@@ -147,17 +173,8 @@ class ZarrGroupHandler:
147
173
  """Return the store path."""
148
174
  if isinstance(self.store, LocalStore):
149
175
  return (self.store.root / self.group.path).as_posix()
150
- elif isinstance(self.store, FsspecStore):
151
- return f"{self.store.path}/{self.group.path}"
152
- elif isinstance(self.store, ZipStore):
153
- return (self.store.path / self.group.path).as_posix()
154
- elif isinstance(self.store, MemoryStore):
155
- return None
156
- warnings.warn(
157
- f"Cannot determine full URL for store type {type(self.store)}. ",
158
- UserWarning,
159
- stacklevel=2,
160
- )
176
+ if isinstance(self.store, FsspecStore):
177
+ return self.store.fs.map.root_path
161
178
  return None
162
179
 
163
180
  @property
@@ -166,55 +183,35 @@ class ZarrGroupHandler:
166
183
  return self._group.metadata.zarr_format
167
184
 
168
185
  @property
169
- def read_only(self) -> bool:
170
- """Return whether the group is read only."""
171
- return self._group.read_only
172
-
173
- def _create_lock(self) -> tuple[Path, BaseFileLock]:
174
- """Create the lock."""
175
- if self._lock is not None:
176
- return self._lock
177
-
178
- if self.use_cache is True:
179
- raise NgioValueError(
180
- "Lock mechanism is not compatible with caching. "
181
- "Please set cache=False to use the lock mechanism."
182
- )
183
-
184
- if not isinstance(self.store, LocalStore):
185
- raise NgioValueError(
186
- "The store needs to be a LocalStore to use the lock mechanism. "
187
- f"Instead, got {self.store.__class__.__name__}."
188
- )
189
-
190
- store_path = Path(self.store.root) / self.group.path
191
- _lock_path = store_path.with_suffix(".lock")
192
- _lock = FileLock(_lock_path, timeout=10)
193
- return _lock_path, _lock
186
+ def mode(self) -> AccessModeLiteral:
187
+ """Return the mode of the group."""
188
+ return self._mode # type: ignore
194
189
 
195
190
  @property
196
191
  def lock(self) -> BaseFileLock:
197
192
  """Return the lock."""
198
193
  if self._lock is None:
199
- self._lock = self._create_lock()
200
- return self._lock[1]
194
+ raise NgioValueError(
195
+ "The handler is not parallel safe. "
196
+ "Reopen the handler with parallel_safe=True."
197
+ )
198
+ return self._lock
201
199
 
202
200
  @property
203
- def lock_path(self) -> Path:
204
- """Return the lock path."""
205
- if self._lock is None:
206
- self._lock = self._create_lock()
207
- return self._lock[0]
201
+ def parent(self) -> "ZarrGroupHandler | None":
202
+ """Return the parent handler."""
203
+ return self._parent
208
204
 
209
205
  def remove_lock(self) -> None:
210
206
  """Return the lock."""
211
- if self._lock is None:
207
+ if self._lock is None or self._lock_path is None:
212
208
  return None
213
209
 
214
- lock_path, lock = self._lock
215
- if lock_path.exists() and lock.lock_counter == 0:
210
+ lock_path = Path(self._lock_path)
211
+ if lock_path.exists() and self._lock.lock_counter == 0:
216
212
  lock_path.unlink()
217
213
  self._lock = None
214
+ self._lock_path = None
218
215
  return None
219
216
 
220
217
  raise NgioValueError("The lock is still in use. Cannot remove it.")
@@ -225,7 +222,10 @@ class ZarrGroupHandler:
225
222
  This is useful when the group has been modified
226
223
  outside of the handler.
227
224
  """
228
- mode = "r" if self.read_only else "r+"
225
+ if self.mode == "r":
226
+ mode = "r"
227
+ else:
228
+ mode = "r+"
229
229
  return zarr.open_group(
230
230
  store=self._group.store,
231
231
  path=self._group.path,
@@ -233,57 +233,72 @@ class ZarrGroupHandler:
233
233
  zarr_format=self._group.metadata.zarr_format,
234
234
  )
235
235
 
236
- def reopen_handler(self) -> "ZarrGroupHandler":
237
- """Reopen the handler.
238
-
239
- This is useful when the group has been modified
240
- outside of the handler.
241
- """
242
- mode = "r" if self.read_only else "r+"
243
- group = self.reopen_group()
244
- return ZarrGroupHandler(
245
- store=group,
246
- zarr_format=group.metadata.zarr_format,
247
- cache=self.use_cache,
248
- mode=mode,
249
- )
250
-
251
- def clean_cache(self) -> None:
252
- """Clear the cached metadata."""
253
- group = self.reopen_group()
254
- self.__init__(
255
- store=group,
256
- zarr_format=group.metadata.zarr_format,
257
- cache=self.use_cache,
258
- mode="r" if self.read_only else "r+",
259
- )
260
-
261
236
  @property
262
237
  def group(self) -> zarr.Group:
263
238
  """Return the group."""
264
- if self.use_cache is False:
265
- # If we are not using cache, we need to reopen the group
239
+ if self._parallel_safe:
240
+ # If we are parallel safe, we need to reopen the group
266
241
  # to make sure that the attributes are up to date
267
242
  return self.reopen_group()
268
243
  return self._group
269
244
 
245
+ def add_to_cache(self, key: str, value: object) -> None:
246
+ """Add an object to the cache."""
247
+ if not self.use_cache:
248
+ return None
249
+ self._cache[key] = value
250
+
251
+ def get_from_cache(self, key: str) -> object | None:
252
+ """Get an object from the cache."""
253
+ if not self.use_cache:
254
+ return None
255
+ return self._cache.get(key, None)
256
+
257
+ def clean_cache(self) -> None:
258
+ """Clear the cached metadata."""
259
+ self._cache = {}
260
+
270
261
  def load_attrs(self) -> dict:
271
262
  """Load the attributes of the group."""
272
- return self.reopen_group().attrs.asdict()
263
+ attrs = self.get_from_cache("attrs")
264
+ if attrs is not None and isinstance(attrs, dict):
265
+ return attrs
273
266
 
274
- def write_attrs(self, attrs: dict, overwrite: bool = False) -> None:
267
+ attrs = dict(self.group.attrs)
268
+
269
+ self.add_to_cache("attrs", attrs)
270
+ return attrs
271
+
272
+ def _write_attrs(self, attrs: dict, overwrite: bool = False) -> None:
275
273
  """Write the metadata to the store."""
276
- # Maybe we should use the lock here
277
- if self.read_only:
274
+ if self.group.read_only:
278
275
  raise NgioValueError("The group is read only. Cannot write metadata.")
279
- group = self.reopen_group()
276
+
277
+ # we need to invalidate the current attrs cache
278
+ self.add_to_cache("attrs", None)
280
279
  if overwrite:
281
- group.attrs.clear()
282
- group.attrs.update(attrs)
280
+ self.group.attrs.clear()
281
+
282
+ self.group.attrs.update(attrs)
283
+
284
+ def write_attrs(self, attrs: dict, overwrite: bool = False) -> None:
285
+ """Write the metadata to the store."""
286
+ # Maybe we should use the lock here
287
+ self._write_attrs(attrs, overwrite)
288
+
289
+ def _obj_get(self, path: str):
290
+ """Get a group from the group."""
291
+ group_or_array = self.get_from_cache(path)
292
+ if group_or_array is not None:
293
+ return group_or_array
294
+
295
+ group_or_array = self.group.get(path, None)
296
+ self.add_to_cache(path, group_or_array)
297
+ return group_or_array
283
298
 
284
299
  def create_group(self, path: str, overwrite: bool = False) -> zarr.Group:
285
300
  """Create a group in the group."""
286
- if self.group.read_only:
301
+ if self.mode == "r":
287
302
  raise NgioValueError("Cannot create a group in read only mode.")
288
303
 
289
304
  try:
@@ -293,7 +308,7 @@ class ZarrGroupHandler:
293
308
  f"A Zarr group already exists at {path}, "
294
309
  "consider setting overwrite=True."
295
310
  ) from e
296
- self._group_cache.set(path, group, overwrite=overwrite)
311
+ self.add_to_cache(path, group)
297
312
  return group
298
313
 
299
314
  def get_group(
@@ -319,87 +334,138 @@ class ZarrGroupHandler:
319
334
  if overwrite:
320
335
  return self.create_group(path, overwrite=overwrite)
321
336
 
322
- group = self._group_cache.get(path)
337
+ group = self._obj_get(path)
323
338
  if isinstance(group, zarr.Group):
324
339
  return group
325
340
 
326
- group = self.group.get(path, default=None)
327
- if isinstance(group, zarr.Group):
328
- self._group_cache.set(path, group, overwrite=overwrite)
329
- return group
330
-
331
- if isinstance(group, zarr.Array):
332
- raise NgioValueError(f"The object at {path} is not a group, but an array.")
341
+ if group is not None:
342
+ raise NgioValueError(
343
+ f"The object at {path} is not a group, but a {type(group)}"
344
+ )
333
345
 
334
346
  if not create_mode:
335
347
  raise NgioFileNotFoundError(f"No group found at {path}")
336
348
  group = self.create_group(path)
337
- self._group_cache.set(path, group, overwrite=overwrite)
338
349
  return group
339
350
 
351
+ def safe_get_group(
352
+ self, path: str, create_mode: bool = False
353
+ ) -> tuple[bool, zarr.Group | NgioError]:
354
+ """Get a group from the group.
355
+
356
+ Args:
357
+ path (str): The path to the group.
358
+ create_mode (bool): If True, create the group if it does not exist.
359
+
360
+ Returns:
361
+ zarr.Group | None: The Zarr group or None if it does not exist
362
+ or an error occurs.
363
+
364
+ """
365
+ try:
366
+ return True, self.get_group(path, create_mode)
367
+ except NgioError as e:
368
+ return False, e
369
+
340
370
  def get_array(self, path: str) -> zarr.Array:
341
371
  """Get an array from the group."""
342
- array = self._array_cache.get(path)
343
- if isinstance(array, zarr.Array):
344
- return array
345
- array = self.group.get(path, default=None)
346
- if isinstance(array, zarr.Array):
347
- self._array_cache.set(path, array)
348
- return array
349
-
350
- if isinstance(array, zarr.Group):
351
- raise NgioValueError(f"The object at {path} is not an array, but a group.")
352
- raise NgioFileNotFoundError(f"No array found at {path}")
353
-
354
- def get_handler(
372
+ array = self._obj_get(path)
373
+ if array is None:
374
+ raise NgioFileNotFoundError(f"No array found at {path}")
375
+ if not isinstance(array, zarr.Array):
376
+ raise NgioValueError(
377
+ f"The object at {path} is not an array, but a {type(array)}"
378
+ )
379
+ return array
380
+
381
+ def create_array(
382
+ self,
383
+ path: str,
384
+ shape: tuple[int, ...],
385
+ dtype: str,
386
+ chunks: tuple[int, ...] | Literal["auto"] = "auto",
387
+ compressors: CompressorLike = "auto",
388
+ separator: Literal[".", "/"] = "/",
389
+ overwrite: bool = False,
390
+ ) -> zarr.Array:
391
+ if self.mode == "r":
392
+ raise NgioValueError("Cannot create an array in read only mode.")
393
+
394
+ if self.zarr_format == 2:
395
+ chunks_encoding = {
396
+ "name": "v2",
397
+ "separator": separator,
398
+ }
399
+ else:
400
+ chunks_encoding = {
401
+ "name": "default",
402
+ "separator": separator,
403
+ }
404
+
405
+ try:
406
+ return self.group.create_array(
407
+ name=path,
408
+ shape=shape,
409
+ dtype=dtype,
410
+ chunks=chunks,
411
+ chunk_key_encoding=chunks_encoding,
412
+ overwrite=overwrite,
413
+ compressors=compressors,
414
+ )
415
+ except ContainsGroupError as e:
416
+ raise NgioFileExistsError(
417
+ f"A Zarr array already exists at {path}, "
418
+ "consider setting overwrite=True."
419
+ ) from e
420
+ except Exception as e:
421
+ raise NgioValueError(f"Error creating array at {path}") from e
422
+
423
+ def derive_handler(
355
424
  self,
356
425
  path: str,
357
- create_mode: bool = True,
358
426
  overwrite: bool = False,
359
427
  ) -> "ZarrGroupHandler":
360
- """Get a new handler for a group in the current handler group.
428
+ """Derive a new handler from the current handler.
361
429
 
362
430
  Args:
363
431
  path (str): The path to the group.
364
- create_mode (bool): If True, create the group if it does not exist.
365
432
  overwrite (bool): If True, overwrite the group if it exists.
366
433
  """
367
- handler = self._handlers_cache.get(path)
368
- if handler is not None:
369
- return handler
370
- group = self.get_group(path, create_mode=create_mode, overwrite=overwrite)
371
- mode = "r" if group.read_only else "r+"
372
- handler = ZarrGroupHandler(
373
- store=group, zarr_format=self.zarr_format, cache=self.use_cache, mode=mode
434
+ group = self.get_group(path, create_mode=True, overwrite=overwrite)
435
+ return ZarrGroupHandler(
436
+ store=group,
437
+ zarr_format=self.zarr_format,
438
+ cache=self.use_cache,
439
+ mode=self.mode,
440
+ parallel_safe=self._parallel_safe,
441
+ parent=self,
374
442
  )
375
- self._handlers_cache.set(path, handler)
376
- return handler
377
-
378
- @property
379
- def is_listable(self) -> bool:
380
- return is_group_listable(self.group)
381
443
 
382
- def delete_group(self, path: str) -> None:
383
- """Delete a group from the current group.
384
-
385
- Args:
386
- path (str): The path to the group to delete.
387
- """
388
- if self.group.read_only:
389
- raise NgioValueError("Cannot delete a group in read only mode.")
390
- self.group.__delitem__(path)
391
- self._group_cache._cache.pop(path, None)
392
- self._handlers_cache._cache.pop(path, None)
393
-
394
- def delete_self(self) -> None:
395
- """Delete the current group."""
396
- if self.group.read_only:
397
- raise NgioValueError("Cannot delete a group in read only mode.")
398
- self.group.__delitem__("/")
444
+ def safe_derive_handler(
445
+ self,
446
+ path: str,
447
+ overwrite: bool = False,
448
+ ) -> tuple[bool, "ZarrGroupHandler | NgioError"]:
449
+ """Derive a new handler from the current handler."""
450
+ try:
451
+ return True, self.derive_handler(path, overwrite=overwrite)
452
+ except NgioError as e:
453
+ return False, e
399
454
 
400
- def copy_group(self, dest_group: zarr.Group):
455
+ def copy_handler(self, handler: "ZarrGroupHandler") -> None:
401
456
  """Copy the group to a new store."""
402
- copy_group(self.group, dest_group)
457
+ _, n_skipped, _ = zarr.copy_store(
458
+ source=self.group.store,
459
+ dest=handler.group.store,
460
+ source_path=self.group.path,
461
+ dest_path=handler.group.path,
462
+ if_exists="replace",
463
+ )
464
+ if n_skipped > 0:
465
+ raise NgioValueError(
466
+ f"Error copying group to {handler.full_url}, "
467
+ f"#{n_skipped} files where skipped."
468
+ )
403
469
 
404
470
 
405
471
  def find_dimension_separator(array: zarr.Array) -> Literal[".", "/"]:
@@ -418,116 +484,8 @@ def find_dimension_separator(array: zarr.Array) -> Literal[".", "/"]:
418
484
  else:
419
485
  separator = array.metadata.chunk_key_encoding
420
486
  if not isinstance(separator, DefaultChunkKeyEncoding):
421
- raise NgioValueError(
487
+ raise ValueError(
422
488
  "Only DefaultChunkKeyEncoding is supported in this example."
423
489
  )
424
490
  separator = separator.separator
425
491
  return separator
426
-
427
-
428
- def is_group_listable(group: zarr.Group) -> bool:
429
- """Check if a Zarr group is listable.
430
-
431
- A group is considered listable if it contains at least one array or subgroup.
432
-
433
- Args:
434
- group (zarr.Group): The Zarr group to check.
435
-
436
- Returns:
437
- bool: True if the group is listable, False otherwise.
438
- """
439
- if not group.store.supports_listing:
440
- # If the store does not support listing
441
- # then for sure it is not listable
442
- return False
443
- try:
444
- next(group.keys())
445
- return True
446
- except StopIteration:
447
- # Group is listable but empty
448
- return True
449
- except Exception as _:
450
- # Some stores may raise errors when listing
451
- # consider those not listable
452
- return False
453
-
454
-
455
- def _make_sync_fs(fs: fsspec.AbstractFileSystem) -> fsspec.AbstractFileSystem:
456
- fs_dict = json.loads(fs.to_json())
457
- fs_dict["asynchronous"] = False
458
- return fsspec.AbstractFileSystem.from_json(json.dumps(fs_dict))
459
-
460
-
461
- def _get_mapper(store: LocalStore | FsspecStore, path: str):
462
- if isinstance(store, LocalStore):
463
- fs = fsspec.filesystem("file")
464
- full_path = (store.root / path).as_posix()
465
- else:
466
- fs = _make_sync_fs(store.fs)
467
- full_path = f"{store.path}/{path}"
468
- return fs.get_mapper(full_path)
469
-
470
-
471
- def _fsspec_copy(
472
- src_fs: LocalStore | FsspecStore,
473
- src_path: str,
474
- dest_fs: LocalStore | FsspecStore,
475
- dest_path: str,
476
- ):
477
- src_mapper = _get_mapper(src_fs, src_path)
478
- dest_mapper = _get_mapper(dest_fs, dest_path)
479
- for key in src_mapper.keys():
480
- dest_mapper[key] = src_mapper[key]
481
-
482
-
483
- def _zarr_python_copy(src_group: zarr.Group, dest_group: zarr.Group):
484
- # Copy attributes
485
- dest_group.attrs.put(src_group.attrs.asdict())
486
- # Copy arrays
487
- for name, array in src_group.arrays():
488
- if array.metadata.zarr_format == 2:
489
- spec = AnyArraySpecV2.from_zarr(array)
490
- else:
491
- spec = AnyArraySpecV3.from_zarr(array)
492
- dst = spec.to_zarr(
493
- store=dest_group.store,
494
- path=f"{dest_group.path}/{name}",
495
- overwrite=True,
496
- )
497
- if array.ndim > 0:
498
- dask_array = da.from_zarr(array)
499
- da.to_zarr(dask_array, dst, overwrite=False)
500
- # Copy subgroups
501
- for name, subgroup in src_group.groups():
502
- dest_subgroup = dest_group.create_group(name, overwrite=True)
503
- _zarr_python_copy(subgroup, dest_subgroup)
504
-
505
-
506
- def copy_group(
507
- src_group: zarr.Group, dest_group: zarr.Group, suppress_warnings: bool = False
508
- ):
509
- if src_group.metadata.zarr_format != dest_group.metadata.zarr_format:
510
- raise NgioValueError(
511
- "Different Zarr format versions between source and destination, "
512
- "cannot copy."
513
- )
514
-
515
- if not is_group_listable(src_group):
516
- raise NgioValueError("Source group is not listable, cannot copy.")
517
-
518
- if dest_group.read_only:
519
- raise NgioValueError("Destination group is read only, cannot copy.")
520
- if isinstance(src_group.store, LocalStore | FsspecStore) and isinstance(
521
- dest_group.store, LocalStore | FsspecStore
522
- ):
523
- _fsspec_copy(src_group.store, src_group.path, dest_group.store, dest_group.path)
524
- return
525
- if not suppress_warnings:
526
- warnings.warn(
527
- "Fsspec copy not possible, falling back to Zarr Python API for the copy. "
528
- "This will preserve some tabular data non-zarr native (parquet, and csv), "
529
- "and it will be slower for large datasets.",
530
- UserWarning,
531
- stacklevel=2,
532
- )
533
- _zarr_python_copy(src_group, dest_group)