ngio 0.5.0a1__py3-none-any.whl → 0.5.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. ngio/__init__.py +2 -2
  2. ngio/common/__init__.py +11 -6
  3. ngio/common/_masking_roi.py +12 -41
  4. ngio/common/_pyramid.py +218 -78
  5. ngio/common/_roi.py +257 -329
  6. ngio/experimental/iterators/_feature.py +3 -3
  7. ngio/experimental/iterators/_rois_utils.py +10 -11
  8. ngio/hcs/_plate.py +114 -123
  9. ngio/images/_abstract_image.py +417 -35
  10. ngio/images/_create_synt_container.py +36 -43
  11. ngio/images/_create_utils.py +423 -0
  12. ngio/images/_image.py +155 -177
  13. ngio/images/_label.py +144 -119
  14. ngio/images/_ome_zarr_container.py +361 -196
  15. ngio/io_pipes/_io_pipes.py +9 -9
  16. ngio/io_pipes/_io_pipes_masked.py +7 -7
  17. ngio/io_pipes/_io_pipes_roi.py +6 -6
  18. ngio/io_pipes/_io_pipes_types.py +3 -3
  19. ngio/io_pipes/_match_shape.py +5 -4
  20. ngio/io_pipes/_ops_slices_utils.py +8 -5
  21. ngio/ome_zarr_meta/__init__.py +15 -18
  22. ngio/ome_zarr_meta/_meta_handlers.py +334 -713
  23. ngio/ome_zarr_meta/ngio_specs/_axes.py +1 -0
  24. ngio/ome_zarr_meta/ngio_specs/_dataset.py +13 -22
  25. ngio/ome_zarr_meta/ngio_specs/_ngio_hcs.py +54 -61
  26. ngio/ome_zarr_meta/ngio_specs/_ngio_image.py +14 -68
  27. ngio/ome_zarr_meta/v04/__init__.py +1 -1
  28. ngio/ome_zarr_meta/v04/{_v04_spec_utils.py → _v04_spec.py} +16 -61
  29. ngio/ome_zarr_meta/v05/__init__.py +1 -1
  30. ngio/ome_zarr_meta/v05/{_v05_spec_utils.py → _v05_spec.py} +18 -61
  31. ngio/tables/_tables_container.py +25 -20
  32. ngio/tables/backends/_anndata.py +57 -8
  33. ngio/tables/backends/_anndata_utils.py +1 -6
  34. ngio/tables/backends/_csv.py +3 -19
  35. ngio/tables/backends/_json.py +10 -13
  36. ngio/tables/backends/_parquet.py +3 -31
  37. ngio/tables/backends/_py_arrow_backends.py +222 -0
  38. ngio/tables/v1/_roi_table.py +44 -27
  39. ngio/utils/__init__.py +6 -12
  40. ngio/utils/_cache.py +48 -0
  41. ngio/utils/_zarr_utils.py +285 -245
  42. {ngio-0.5.0a1.dist-info → ngio-0.5.0a3.dist-info}/METADATA +8 -4
  43. {ngio-0.5.0a1.dist-info → ngio-0.5.0a3.dist-info}/RECORD +45 -45
  44. {ngio-0.5.0a1.dist-info → ngio-0.5.0a3.dist-info}/WHEEL +1 -1
  45. ngio/images/_create.py +0 -283
  46. ngio/tables/backends/_non_zarr_backends.py +0 -196
  47. ngio/utils/_logger.py +0 -50
  48. {ngio-0.5.0a1.dist-info → ngio-0.5.0a3.dist-info}/licenses/LICENSE +0 -0
ngio/utils/_zarr_utils.py CHANGED
@@ -1,25 +1,39 @@
1
1
  """Common utilities for working with Zarr groups in consistent ways."""
2
2
 
3
+ import json
4
+ import warnings
3
5
  from pathlib import Path
4
6
  from typing import Literal
5
7
 
8
+ import dask.array as da
6
9
  import fsspec
7
10
  import zarr
8
11
  from filelock import BaseFileLock, FileLock
12
+ from pydantic_zarr.v2 import ArraySpec as AnyArraySpecV2
13
+ from pydantic_zarr.v3 import ArraySpec as AnyArraySpecV3
9
14
  from zarr.abc.store import Store
10
- from zarr.core.array import CompressorLike
11
15
  from zarr.errors import ContainsGroupError
12
- from zarr.storage import FsspecStore, LocalStore, MemoryStore
16
+ from zarr.storage import FsspecStore, LocalStore, MemoryStore, ZipStore
13
17
 
14
- from ngio.utils import NgioFileExistsError, NgioFileNotFoundError, NgioValueError
15
- from ngio.utils._errors import NgioError
18
+ from ngio.utils._cache import NgioCache
19
+ from ngio.utils._errors import (
20
+ NgioFileExistsError,
21
+ NgioFileNotFoundError,
22
+ NgioValueError,
23
+ )
16
24
 
17
25
  AccessModeLiteral = Literal["r", "r+", "w", "w-", "a"]
18
26
  # StoreLike is more restrictive than it could be
19
27
  # but to make sure we can handle the store correctly
20
28
  # we need to be more restrictive
21
29
  NgioSupportedStore = (
22
- str | Path | fsspec.mapping.FSMap | FsspecStore | MemoryStore | LocalStore
30
+ str
31
+ | Path
32
+ | fsspec.mapping.FSMap
33
+ | FsspecStore
34
+ | MemoryStore
35
+ | LocalStore
36
+ | ZipStore
23
37
  )
24
38
  GenericStore = Store | NgioSupportedStore
25
39
  StoreOrGroup = GenericStore | zarr.Group
@@ -27,32 +41,33 @@ StoreOrGroup = GenericStore | zarr.Group
27
41
 
28
42
  def _check_store(store) -> NgioSupportedStore:
29
43
  """Check the store and return a valid store."""
30
- if isinstance(store, NgioSupportedStore):
31
- return store
32
-
33
- raise NotImplementedError(
34
- f"Store type {type(store)} is not supported. "
35
- f"Supported types are: {NgioSupportedStore}"
36
- )
44
+ if not isinstance(store, NgioSupportedStore):
45
+ warnings.warn(
46
+ f"Store type {type(store)} is not explicitly supported. "
47
+ f"Supported types are: {NgioSupportedStore}. "
48
+ "Proceeding, but this may lead to unexpected behavior.",
49
+ UserWarning,
50
+ stacklevel=2,
51
+ )
52
+ return store
37
53
 
38
54
 
39
- def _check_group(group: zarr.Group, mode: AccessModeLiteral) -> zarr.Group:
55
+ def _check_group(
56
+ group: zarr.Group, mode: AccessModeLiteral | None = None
57
+ ) -> zarr.Group:
40
58
  """Check the group and return a valid group."""
41
- if group.read_only and mode in ["w", "w-"]:
42
- raise NgioValueError(
43
- "The group is read only. Cannot open in write mode ['w', 'w-']"
44
- )
59
+ if group.read_only and mode not in [None, "r"]:
60
+ raise NgioValueError(f"The group is read only. Cannot open in mode {mode}.")
45
61
 
46
62
  if mode == "r" and not group.read_only:
47
63
  # let's make sure we don't accidentally write to the group
48
64
  group = zarr.open_group(store=group.store, path=group.path, mode="r")
49
-
50
65
  return group
51
66
 
52
67
 
53
68
  def open_group_wrapper(
54
69
  store: StoreOrGroup,
55
- mode: AccessModeLiteral,
70
+ mode: AccessModeLiteral | None = None,
56
71
  zarr_format: Literal[2, 3] | None = None,
57
72
  ) -> zarr.Group:
58
73
  """Wrapper around zarr.open_group with some additional checks.
@@ -72,6 +87,7 @@ def open_group_wrapper(
72
87
 
73
88
  try:
74
89
  _check_store(store)
90
+ mode = mode if mode is not None else "a"
75
91
  group = zarr.open_group(store=store, mode=mode, zarr_format=zarr_format)
76
92
 
77
93
  except FileExistsError as e:
@@ -98,68 +114,33 @@ class ZarrGroupHandler:
98
114
  store: StoreOrGroup,
99
115
  zarr_format: Literal[2, 3] | None = None,
100
116
  cache: bool = False,
101
- mode: AccessModeLiteral = "a",
102
- parallel_safe: bool = False,
103
- parent: "ZarrGroupHandler | None" = None,
117
+ mode: AccessModeLiteral | None = None,
104
118
  ):
105
119
  """Initialize the handler.
106
120
 
107
121
  Args:
108
122
  store (StoreOrGroup): The Zarr store or group containing the image data.
109
123
  meta_mode (str): The mode of the metadata handler.
110
- zarr_format (int): The Zarr format version to use.
124
+ zarr_format (int | None): The Zarr format version to use.
111
125
  cache (bool): Whether to cache the metadata.
112
- mode (str): The mode of the store.
113
- parallel_safe (bool): If True, the handler will create a lock file to make
114
- that can be used to make the handler parallel safe.
115
- Be aware that the lock needs to be used manually.
116
- parent (ZarrGroupHandler | None): The parent handler.
126
+ mode (str | None): The mode of the store.
117
127
  """
118
- if mode not in ["r", "r+", "w", "w-", "a"]:
128
+ if mode not in ["r", "r+", "w", "w-", "a", None]:
119
129
  raise NgioValueError(f"Mode {mode} is not supported.")
120
130
 
121
- if parallel_safe and cache:
122
- raise NgioValueError(
123
- "The cache and parallel_safe options are mutually exclusive."
124
- "If you want to use the lock mechanism, you should not use the cache."
125
- )
126
-
127
131
  group = open_group_wrapper(store=store, mode=mode, zarr_format=zarr_format)
128
- _store = group.store
129
-
130
- # Make sure the cache is set in the attrs
131
- # in the same way as the cache in the handler
132
-
133
- ## TODO
134
- # Figure out how to handle the cache in the new zarr version
135
- # group.attrs.cache = cache
136
-
137
- if parallel_safe:
138
- if not isinstance(_store, LocalStore):
139
- raise NgioValueError(
140
- "The store needs to be a LocalStore to use the lock mechanism. "
141
- f"Instead, got {_store.__class__.__name__}."
142
- )
143
-
144
- store_path = _store.root / group.path
145
- self._lock_path = store_path.with_suffix(".lock")
146
- self._lock = FileLock(self._lock_path, timeout=10)
147
-
148
- else:
149
- self._lock_path = None
150
- self._lock = None
151
-
152
132
  self._group = group
153
- self._mode = mode
154
133
  self.use_cache = cache
155
- self._parallel_safe = parallel_safe
156
- self._cache = {}
157
- self._parent = parent
134
+
135
+ self._group_cache: NgioCache[zarr.Group] = NgioCache(use_cache=cache)
136
+ self._array_cache: NgioCache[zarr.Array] = NgioCache(use_cache=cache)
137
+ self._handlers_cache: NgioCache[ZarrGroupHandler] = NgioCache(use_cache=cache)
138
+ self._lock: tuple[Path, BaseFileLock] | None = None
158
139
 
159
140
  def __repr__(self) -> str:
160
141
  """Return a string representation of the handler."""
161
142
  return (
162
- f"ZarrGroupHandler(full_url={self.full_url}, mode={self.mode}, "
143
+ f"ZarrGroupHandler(full_url={self.full_url}, read_only={self.read_only}, "
163
144
  f"cache={self.use_cache}"
164
145
  )
165
146
 
@@ -173,8 +154,17 @@ class ZarrGroupHandler:
173
154
  """Return the store path."""
174
155
  if isinstance(self.store, LocalStore):
175
156
  return (self.store.root / self.group.path).as_posix()
176
- if isinstance(self.store, FsspecStore):
177
- return self.store.fs.map.root_path
157
+ elif isinstance(self.store, FsspecStore):
158
+ return f"{self.store.path}/{self.group.path}"
159
+ elif isinstance(self.store, ZipStore):
160
+ return (self.store.path / self.group.path).as_posix()
161
+ elif isinstance(self.store, MemoryStore):
162
+ return None
163
+ warnings.warn(
164
+ f"Cannot determine full URL for store type {type(self.store)}. ",
165
+ UserWarning,
166
+ stacklevel=2,
167
+ )
178
168
  return None
179
169
 
180
170
  @property
@@ -183,35 +173,55 @@ class ZarrGroupHandler:
183
173
  return self._group.metadata.zarr_format
184
174
 
185
175
  @property
186
- def mode(self) -> AccessModeLiteral:
187
- """Return the mode of the group."""
188
- return self._mode # type: ignore
176
+ def read_only(self) -> bool:
177
+ """Return whether the group is read only."""
178
+ return self._group.read_only
179
+
180
+ def _create_lock(self) -> tuple[Path, BaseFileLock]:
181
+ """Create the lock."""
182
+ if self._lock is not None:
183
+ return self._lock
184
+
185
+ if self.use_cache is True:
186
+ raise NgioValueError(
187
+ "Lock mechanism is not compatible with caching. "
188
+ "Please set cache=False to use the lock mechanism."
189
+ )
190
+
191
+ if not isinstance(self.store, LocalStore):
192
+ raise NgioValueError(
193
+ "The store needs to be a LocalStore to use the lock mechanism. "
194
+ f"Instead, got {self.store.__class__.__name__}."
195
+ )
196
+
197
+ store_path = Path(self.store.root) / self.group.path
198
+ _lock_path = store_path.with_suffix(".lock")
199
+ _lock = FileLock(_lock_path, timeout=10)
200
+ return _lock_path, _lock
189
201
 
190
202
  @property
191
203
  def lock(self) -> BaseFileLock:
192
204
  """Return the lock."""
193
205
  if self._lock is None:
194
- raise NgioValueError(
195
- "The handler is not parallel safe. "
196
- "Reopen the handler with parallel_safe=True."
197
- )
198
- return self._lock
206
+ self._lock = self._create_lock()
207
+ return self._lock[1]
199
208
 
200
209
  @property
201
- def parent(self) -> "ZarrGroupHandler | None":
202
- """Return the parent handler."""
203
- return self._parent
210
+ def lock_path(self) -> Path:
211
+ """Return the lock path."""
212
+ if self._lock is None:
213
+ self._lock = self._create_lock()
214
+ return self._lock[0]
204
215
 
205
216
  def remove_lock(self) -> None:
206
217
  """Return the lock."""
207
- if self._lock is None or self._lock_path is None:
218
+ if self._lock is None:
208
219
  return None
209
220
 
210
- lock_path = Path(self._lock_path)
211
- if lock_path.exists() and self._lock.lock_counter == 0:
221
+ lock_path, lock = self._lock
222
+ if lock_path.exists() and lock.lock_counter == 0:
212
223
  lock_path.unlink()
213
224
  self._lock = None
214
- self._lock_path = None
215
225
  return None
216
226
 
217
227
  raise NgioValueError("The lock is still in use. Cannot remove it.")
@@ -222,10 +232,7 @@ class ZarrGroupHandler:
222
232
  This is useful when the group has been modified
223
233
  outside of the handler.
224
234
  """
225
- if self.mode == "r":
226
- mode = "r"
227
- else:
228
- mode = "r+"
235
+ mode = "r" if self.read_only else "r+"
229
236
  return zarr.open_group(
230
237
  store=self._group.store,
231
238
  path=self._group.path,
@@ -233,72 +240,57 @@ class ZarrGroupHandler:
233
240
  zarr_format=self._group.metadata.zarr_format,
234
241
  )
235
242
 
243
+ def reopen_handler(self) -> "ZarrGroupHandler":
244
+ """Reopen the handler.
245
+
246
+ This is useful when the group has been modified
247
+ outside of the handler.
248
+ """
249
+ mode = "r" if self.read_only else "r+"
250
+ group = self.reopen_group()
251
+ return ZarrGroupHandler(
252
+ store=group,
253
+ zarr_format=group.metadata.zarr_format,
254
+ cache=self.use_cache,
255
+ mode=mode,
256
+ )
257
+
258
+ def clean_cache(self) -> None:
259
+ """Clear the cached metadata."""
260
+ group = self.reopen_group()
261
+ self.__init__(
262
+ store=group,
263
+ zarr_format=group.metadata.zarr_format,
264
+ cache=self.use_cache,
265
+ mode="r" if self.read_only else "r+",
266
+ )
267
+
236
268
  @property
237
269
  def group(self) -> zarr.Group:
238
270
  """Return the group."""
239
- if self._parallel_safe:
240
- # If we are parallel safe, we need to reopen the group
271
+ if self.use_cache is False:
272
+ # If we are not using cache, we need to reopen the group
241
273
  # to make sure that the attributes are up to date
242
274
  return self.reopen_group()
243
275
  return self._group
244
276
 
245
- def add_to_cache(self, key: str, value: object) -> None:
246
- """Add an object to the cache."""
247
- if not self.use_cache:
248
- return None
249
- self._cache[key] = value
250
-
251
- def get_from_cache(self, key: str) -> object | None:
252
- """Get an object from the cache."""
253
- if not self.use_cache:
254
- return None
255
- return self._cache.get(key, None)
256
-
257
- def clean_cache(self) -> None:
258
- """Clear the cached metadata."""
259
- self._cache = {}
260
-
261
277
  def load_attrs(self) -> dict:
262
278
  """Load the attributes of the group."""
263
- attrs = self.get_from_cache("attrs")
264
- if attrs is not None and isinstance(attrs, dict):
265
- return attrs
266
-
267
- attrs = dict(self.group.attrs)
268
-
269
- self.add_to_cache("attrs", attrs)
270
- return attrs
271
-
272
- def _write_attrs(self, attrs: dict, overwrite: bool = False) -> None:
273
- """Write the metadata to the store."""
274
- if self.group.read_only:
275
- raise NgioValueError("The group is read only. Cannot write metadata.")
276
-
277
- # we need to invalidate the current attrs cache
278
- self.add_to_cache("attrs", None)
279
- if overwrite:
280
- self.group.attrs.clear()
281
-
282
- self.group.attrs.update(attrs)
279
+ return self.reopen_group().attrs.asdict()
283
280
 
284
281
  def write_attrs(self, attrs: dict, overwrite: bool = False) -> None:
285
282
  """Write the metadata to the store."""
286
283
  # Maybe we should use the lock here
287
- self._write_attrs(attrs, overwrite)
288
-
289
- def _obj_get(self, path: str):
290
- """Get a group from the group."""
291
- group_or_array = self.get_from_cache(path)
292
- if group_or_array is not None:
293
- return group_or_array
294
-
295
- group_or_array = self.group.get(path, None)
296
- self.add_to_cache(path, group_or_array)
297
- return group_or_array
284
+ if self.read_only:
285
+ raise NgioValueError("The group is read only. Cannot write metadata.")
286
+ group = self.reopen_group()
287
+ if overwrite:
288
+ group.attrs.clear()
289
+ group.attrs.update(attrs)
298
290
 
299
291
  def create_group(self, path: str, overwrite: bool = False) -> zarr.Group:
300
292
  """Create a group in the group."""
301
- if self.mode == "r":
293
+ if self.group.read_only:
302
294
  raise NgioValueError("Cannot create a group in read only mode.")
303
295
 
304
296
  try:
@@ -308,7 +300,7 @@ class ZarrGroupHandler:
308
300
  f"A Zarr group already exists at {path}, "
309
301
  "consider setting overwrite=True."
310
302
  ) from e
311
- self.add_to_cache(path, group)
303
+ self._group_cache.set(path, group, overwrite=overwrite)
312
304
  return group
313
305
 
314
306
  def get_group(
@@ -334,138 +326,81 @@ class ZarrGroupHandler:
334
326
  if overwrite:
335
327
  return self.create_group(path, overwrite=overwrite)
336
328
 
337
- group = self._obj_get(path)
329
+ group = self._group_cache.get(path)
338
330
  if isinstance(group, zarr.Group):
339
331
  return group
340
332
 
341
- if group is not None:
342
- raise NgioValueError(
343
- f"The object at {path} is not a group, but a {type(group)}"
344
- )
333
+ group = self.group.get(path, default=None)
334
+ if isinstance(group, zarr.Group):
335
+ self._group_cache.set(path, group, overwrite=overwrite)
336
+ return group
337
+
338
+ if isinstance(group, zarr.Array):
339
+ raise NgioValueError(f"The object at {path} is not a group, but an array.")
345
340
 
346
341
  if not create_mode:
347
342
  raise NgioFileNotFoundError(f"No group found at {path}")
348
343
  group = self.create_group(path)
344
+ self._group_cache.set(path, group, overwrite=overwrite)
349
345
  return group
350
346
 
351
- def safe_get_group(
352
- self, path: str, create_mode: bool = False
353
- ) -> tuple[bool, zarr.Group | NgioError]:
354
- """Get a group from the group.
355
-
356
- Args:
357
- path (str): The path to the group.
358
- create_mode (bool): If True, create the group if it does not exist.
359
-
360
- Returns:
361
- zarr.Group | None: The Zarr group or None if it does not exist
362
- or an error occurs.
363
-
364
- """
365
- try:
366
- return True, self.get_group(path, create_mode)
367
- except NgioError as e:
368
- return False, e
369
-
370
347
  def get_array(self, path: str) -> zarr.Array:
371
348
  """Get an array from the group."""
372
- array = self._obj_get(path)
373
- if array is None:
374
- raise NgioFileNotFoundError(f"No array found at {path}")
375
- if not isinstance(array, zarr.Array):
376
- raise NgioValueError(
377
- f"The object at {path} is not an array, but a {type(array)}"
378
- )
379
- return array
380
-
381
- def create_array(
382
- self,
383
- path: str,
384
- shape: tuple[int, ...],
385
- dtype: str,
386
- chunks: tuple[int, ...] | Literal["auto"] = "auto",
387
- compressors: CompressorLike = "auto",
388
- separator: Literal[".", "/"] = "/",
389
- overwrite: bool = False,
390
- ) -> zarr.Array:
391
- if self.mode == "r":
392
- raise NgioValueError("Cannot create an array in read only mode.")
393
-
394
- if self.zarr_format == 2:
395
- chunks_encoding = {
396
- "name": "v2",
397
- "separator": separator,
398
- }
399
- else:
400
- chunks_encoding = {
401
- "name": "default",
402
- "separator": separator,
403
- }
404
-
405
- try:
406
- return self.group.create_array(
407
- name=path,
408
- shape=shape,
409
- dtype=dtype,
410
- chunks=chunks,
411
- chunk_key_encoding=chunks_encoding,
412
- overwrite=overwrite,
413
- compressors=compressors,
414
- )
415
- except ContainsGroupError as e:
416
- raise NgioFileExistsError(
417
- f"A Zarr array already exists at {path}, "
418
- "consider setting overwrite=True."
419
- ) from e
420
- except Exception as e:
421
- raise NgioValueError(f"Error creating array at {path}") from e
422
-
423
- def derive_handler(
349
+ array = self._array_cache.get(path)
350
+ if isinstance(array, zarr.Array):
351
+ return array
352
+ array = self.group.get(path, default=None)
353
+ if isinstance(array, zarr.Array):
354
+ self._array_cache.set(path, array)
355
+ return array
356
+
357
+ if isinstance(array, zarr.Group):
358
+ raise NgioValueError(f"The object at {path} is not an array, but a group.")
359
+ raise NgioFileNotFoundError(f"No array found at {path}")
360
+
361
+ def get_handler(
424
362
  self,
425
363
  path: str,
364
+ create_mode: bool = True,
426
365
  overwrite: bool = False,
427
366
  ) -> "ZarrGroupHandler":
428
- """Derive a new handler from the current handler.
367
+ """Get a new handler for a group in the current handler group.
429
368
 
430
369
  Args:
431
370
  path (str): The path to the group.
371
+ create_mode (bool): If True, create the group if it does not exist.
432
372
  overwrite (bool): If True, overwrite the group if it exists.
433
373
  """
434
- group = self.get_group(path, create_mode=True, overwrite=overwrite)
435
- return ZarrGroupHandler(
436
- store=group,
437
- zarr_format=self.zarr_format,
438
- cache=self.use_cache,
439
- mode=self.mode,
440
- parallel_safe=self._parallel_safe,
441
- parent=self,
374
+ handler = self._handlers_cache.get(path)
375
+ if handler is not None:
376
+ return handler
377
+ group = self.get_group(path, create_mode=create_mode, overwrite=overwrite)
378
+ mode = "r" if group.read_only else "r+"
379
+ handler = ZarrGroupHandler(
380
+ store=group, zarr_format=self.zarr_format, cache=self.use_cache, mode=mode
442
381
  )
382
+ self._handlers_cache.set(path, handler)
383
+ return handler
443
384
 
444
- def safe_derive_handler(
445
- self,
446
- path: str,
447
- overwrite: bool = False,
448
- ) -> tuple[bool, "ZarrGroupHandler | NgioError"]:
449
- """Derive a new handler from the current handler."""
450
- try:
451
- return True, self.derive_handler(path, overwrite=overwrite)
452
- except NgioError as e:
453
- return False, e
385
+ @property
386
+ def is_listable(self) -> bool:
387
+ return is_group_listable(self.group)
388
+
389
+ def delete_group(self, path: str) -> None:
390
+ """Delete a group from the current group.
391
+
392
+ Args:
393
+ path (str): The path to the group to delete.
394
+ """
395
+ if self.group.read_only:
396
+ raise NgioValueError("Cannot delete a group in read only mode.")
397
+ self.group.__delitem__(path)
398
+ self._group_cache._cache.pop(path, None)
399
+ self._handlers_cache._cache.pop(path, None)
454
400
 
455
- def copy_handler(self, handler: "ZarrGroupHandler") -> None:
401
+ def copy_group(self, dest_group: zarr.Group):
456
402
  """Copy the group to a new store."""
457
- _, n_skipped, _ = zarr.copy_store(
458
- source=self.group.store,
459
- dest=handler.group.store,
460
- source_path=self.group.path,
461
- dest_path=handler.group.path,
462
- if_exists="replace",
463
- )
464
- if n_skipped > 0:
465
- raise NgioValueError(
466
- f"Error copying group to {handler.full_url}, "
467
- f"#{n_skipped} files where skipped."
468
- )
403
+ copy_group(self.group, dest_group)
469
404
 
470
405
 
471
406
  def find_dimension_separator(array: zarr.Array) -> Literal[".", "/"]:
@@ -484,8 +419,113 @@ def find_dimension_separator(array: zarr.Array) -> Literal[".", "/"]:
484
419
  else:
485
420
  separator = array.metadata.chunk_key_encoding
486
421
  if not isinstance(separator, DefaultChunkKeyEncoding):
487
- raise ValueError(
422
+ raise NgioValueError(
488
423
  "Only DefaultChunkKeyEncoding is supported in this example."
489
424
  )
490
425
  separator = separator.separator
491
426
  return separator
427
+
428
+
429
+ def is_group_listable(group: zarr.Group) -> bool:
430
+ """Check if a Zarr group is listable.
431
+
432
+ A group is considered listable if it contains at least one array or subgroup.
433
+
434
+ Args:
435
+ group (zarr.Group): The Zarr group to check.
436
+
437
+ Returns:
438
+ bool: True if the group is listable, False otherwise.
439
+ """
440
+ if not group.store.supports_listing:
441
+ # If the store does not support listing
442
+ # then for sure it is not listable
443
+ return False
444
+ try:
445
+ next(group.keys())
446
+ return True
447
+ except StopIteration:
448
+ # Group is listable but empty
449
+ return True
450
+ except Exception as _:
451
+ # Some stores may raise errors when listing
452
+ # consider those not listable
453
+ return False
454
+
455
+
456
+ def _make_sync_fs(fs: fsspec.AbstractFileSystem) -> fsspec.AbstractFileSystem:
457
+ fs_dict = json.loads(fs.to_json())
458
+ fs_dict["asynchronous"] = False
459
+ return fsspec.AbstractFileSystem.from_json(json.dumps(fs_dict))
460
+
461
+
462
+ def _get_mapper(store: LocalStore | FsspecStore, path: str):
463
+ if isinstance(store, LocalStore):
464
+ fs = fsspec.filesystem("file")
465
+ full_path = (store.root / path).as_posix()
466
+ else:
467
+ fs = _make_sync_fs(store.fs)
468
+ full_path = f"{store.path}/{path}"
469
+ return fs.get_mapper(full_path)
470
+
471
+
472
+ def _fsspec_copy(
473
+ src_fs: LocalStore | FsspecStore,
474
+ src_path: str,
475
+ dest_fs: LocalStore | FsspecStore,
476
+ dest_path: str,
477
+ ):
478
+ src_mapper = _get_mapper(src_fs, src_path)
479
+ dest_mapper = _get_mapper(dest_fs, dest_path)
480
+ for key in src_mapper.keys():
481
+ dest_mapper[key] = src_mapper[key]
482
+
483
+
484
+ def _zarr_python_copy(src_group: zarr.Group, dest_group: zarr.Group):
485
+ # Copy attributes
486
+ dest_group.attrs.put(src_group.attrs.asdict())
487
+ # Copy arrays
488
+ for name, array in src_group.arrays():
489
+ if array.metadata.zarr_format == 2:
490
+ spec = AnyArraySpecV2.from_zarr(array)
491
+ else:
492
+ spec = AnyArraySpecV3.from_zarr(array)
493
+ dst = spec.to_zarr(
494
+ store=dest_group.store,
495
+ path=f"{dest_group.path}/{name}",
496
+ overwrite=True,
497
+ )
498
+ if array.ndim > 0:
499
+ dask_array = da.from_zarr(array)
500
+ da.to_zarr(dask_array, dst, overwrite=False)
501
+ # Copy subgroups
502
+ for name, subgroup in src_group.groups():
503
+ dest_subgroup = dest_group.create_group(name, overwrite=True)
504
+ _zarr_python_copy(subgroup, dest_subgroup)
505
+
506
+
507
+ def copy_group(src_group: zarr.Group, dest_group: zarr.Group):
508
+ if src_group.metadata.zarr_format != dest_group.metadata.zarr_format:
509
+ raise NgioValueError(
510
+ "Different Zarr format versions between source and destination, "
511
+ "cannot copy."
512
+ )
513
+
514
+ if not is_group_listable(src_group):
515
+ raise NgioValueError("Source group is not listable, cannot copy.")
516
+
517
+ if dest_group.read_only:
518
+ raise NgioValueError("Destination group is read only, cannot copy.")
519
+ if isinstance(src_group.store, LocalStore | FsspecStore) and isinstance(
520
+ dest_group.store, LocalStore | FsspecStore
521
+ ):
522
+ _fsspec_copy(src_group.store, src_group.path, dest_group.store, dest_group.path)
523
+ return
524
+ warnings.warn(
525
+ "Fsspec copy not possible, falling back to Zarr Python API for the copy. "
526
+ "This will preserve some tabular data non-zarr native (parquet, and csv), "
527
+ "and it will be slower for large datasets.",
528
+ UserWarning,
529
+ stacklevel=2,
530
+ )
531
+ _zarr_python_copy(src_group, dest_group)