ngio 0.4.8__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. ngio/__init__.py +5 -2
  2. ngio/common/__init__.py +11 -6
  3. ngio/common/_masking_roi.py +34 -54
  4. ngio/common/_pyramid.py +322 -75
  5. ngio/common/_roi.py +258 -330
  6. ngio/experimental/iterators/_feature.py +3 -3
  7. ngio/experimental/iterators/_rois_utils.py +10 -11
  8. ngio/hcs/_plate.py +192 -136
  9. ngio/images/_abstract_image.py +539 -35
  10. ngio/images/_create_synt_container.py +45 -47
  11. ngio/images/_create_utils.py +406 -0
  12. ngio/images/_image.py +524 -248
  13. ngio/images/_label.py +257 -180
  14. ngio/images/_masked_image.py +2 -2
  15. ngio/images/_ome_zarr_container.py +658 -255
  16. ngio/io_pipes/_io_pipes.py +9 -9
  17. ngio/io_pipes/_io_pipes_masked.py +7 -7
  18. ngio/io_pipes/_io_pipes_roi.py +6 -6
  19. ngio/io_pipes/_io_pipes_types.py +3 -3
  20. ngio/io_pipes/_match_shape.py +6 -8
  21. ngio/io_pipes/_ops_slices_utils.py +8 -5
  22. ngio/ome_zarr_meta/__init__.py +29 -18
  23. ngio/ome_zarr_meta/_meta_handlers.py +402 -689
  24. ngio/ome_zarr_meta/ngio_specs/__init__.py +4 -0
  25. ngio/ome_zarr_meta/ngio_specs/_axes.py +152 -51
  26. ngio/ome_zarr_meta/ngio_specs/_dataset.py +13 -22
  27. ngio/ome_zarr_meta/ngio_specs/_ngio_hcs.py +129 -91
  28. ngio/ome_zarr_meta/ngio_specs/_ngio_image.py +69 -69
  29. ngio/ome_zarr_meta/v04/__init__.py +5 -1
  30. ngio/ome_zarr_meta/v04/{_v04_spec_utils.py → _v04_spec.py} +55 -86
  31. ngio/ome_zarr_meta/v05/__init__.py +27 -0
  32. ngio/ome_zarr_meta/v05/_custom_models.py +18 -0
  33. ngio/ome_zarr_meta/v05/_v05_spec.py +495 -0
  34. ngio/resources/__init__.py +1 -1
  35. ngio/resources/resource_model.py +1 -1
  36. ngio/tables/_tables_container.py +82 -24
  37. ngio/tables/backends/_abstract_backend.py +7 -0
  38. ngio/tables/backends/_anndata.py +60 -7
  39. ngio/tables/backends/_anndata_utils.py +2 -4
  40. ngio/tables/backends/_csv.py +3 -19
  41. ngio/tables/backends/_json.py +10 -13
  42. ngio/tables/backends/_parquet.py +3 -31
  43. ngio/tables/backends/_py_arrow_backends.py +222 -0
  44. ngio/tables/backends/_utils.py +1 -1
  45. ngio/tables/v1/_roi_table.py +41 -24
  46. ngio/utils/__init__.py +8 -12
  47. ngio/utils/_cache.py +48 -0
  48. ngio/utils/_zarr_utils.py +354 -236
  49. {ngio-0.4.8.dist-info → ngio-0.5.0.dist-info}/METADATA +12 -5
  50. ngio-0.5.0.dist-info/RECORD +88 -0
  51. ngio/images/_create.py +0 -276
  52. ngio/tables/backends/_non_zarr_backends.py +0 -196
  53. ngio/utils/_logger.py +0 -50
  54. ngio-0.4.8.dist-info/RECORD +0 -85
  55. {ngio-0.4.8.dist-info → ngio-0.5.0.dist-info}/WHEEL +0 -0
  56. {ngio-0.4.8.dist-info → ngio-0.5.0.dist-info}/licenses/LICENSE +0 -0
ngio/utils/_zarr_utils.py CHANGED
@@ -1,61 +1,75 @@
1
1
  """Common utilities for working with Zarr groups in consistent ways."""
2
2
 
3
+ import json
4
+ import warnings
3
5
  from pathlib import Path
4
- from typing import Literal
6
+ from typing import Literal, TypeAlias
5
7
 
8
+ import dask.array as da
6
9
  import fsspec
7
10
  import zarr
8
11
  from filelock import BaseFileLock, FileLock
9
- from zarr.errors import ContainsGroupError, GroupNotFoundError
10
- from zarr.storage import DirectoryStore, FSStore, MemoryStore, Store, StoreLike
11
- from zarr.types import DIMENSION_SEPARATOR
12
-
13
- from ngio.utils import NgioFileExistsError, NgioFileNotFoundError, NgioValueError
14
- from ngio.utils._errors import NgioError
12
+ from pydantic_zarr.v2 import ArraySpec as AnyArraySpecV2
13
+ from pydantic_zarr.v3 import ArraySpec as AnyArraySpecV3
14
+ from zarr.abc.store import Store
15
+ from zarr.errors import ContainsGroupError
16
+ from zarr.storage import FsspecStore, LocalStore, MemoryStore, ZipStore
17
+
18
+ from ngio.utils._cache import NgioCache
19
+ from ngio.utils._errors import (
20
+ NgioFileExistsError,
21
+ NgioFileNotFoundError,
22
+ NgioValueError,
23
+ )
15
24
 
16
25
  AccessModeLiteral = Literal["r", "r+", "w", "w-", "a"]
17
26
  # StoreLike is more restrictive than it could be
18
27
  # but to make sure we can handle the store correctly
19
28
  # we need to be more restrictive
20
- NgioSupportedStore = (
21
- str | Path | fsspec.mapping.FSMap | FSStore | DirectoryStore | MemoryStore
29
+ NgioSupportedStore: TypeAlias = (
30
+ str | Path | fsspec.mapping.FSMap | FsspecStore | MemoryStore | dict | LocalStore
22
31
  )
23
- GenericStore = Store | NgioSupportedStore
24
- StoreOrGroup = GenericStore | zarr.Group
32
+ GenericStore: TypeAlias = NgioSupportedStore | Store
33
+ StoreOrGroup: TypeAlias = NgioSupportedStore | zarr.Group
25
34
 
26
35
 
27
36
  def _check_store(store) -> NgioSupportedStore:
28
37
  """Check the store and return a valid store."""
29
- if isinstance(store, NgioSupportedStore):
30
- return store
31
-
32
- raise NotImplementedError(
33
- f"Store type {type(store)} is not supported. "
34
- f"Supported types are: {NgioSupportedStore}"
35
- )
38
+ if not isinstance(store, NgioSupportedStore):
39
+ warnings.warn(
40
+ f"Store type {type(store)} is not explicitly supported. "
41
+ f"Supported types are: {NgioSupportedStore}. "
42
+ "Proceeding, but this may lead to unexpected behavior.",
43
+ UserWarning,
44
+ stacklevel=2,
45
+ )
46
+ return store
36
47
 
37
48
 
38
- def _check_group(group: zarr.Group, mode: AccessModeLiteral) -> zarr.Group:
49
+ def _check_group(
50
+ group: zarr.Group, mode: AccessModeLiteral | None = None
51
+ ) -> zarr.Group:
39
52
  """Check the group and return a valid group."""
40
- is_read_only = getattr(group, "_read_only", False)
41
- if is_read_only and mode in ["w", "w-"]:
42
- raise NgioValueError(
43
- "The group is read only. Cannot open in write mode ['w', 'w-']"
44
- )
53
+ if group.read_only and mode not in [None, "r"]:
54
+ raise NgioValueError(f"The group is read only. Cannot open in mode {mode}.")
45
55
 
46
- if mode == "r" and not is_read_only:
56
+ if mode == "r" and not group.read_only:
47
57
  # let's make sure we don't accidentally write to the group
48
58
  group = zarr.open_group(store=group.store, path=group.path, mode="r")
49
-
50
59
  return group
51
60
 
52
61
 
53
- def open_group_wrapper(store: StoreOrGroup, mode: AccessModeLiteral) -> zarr.Group:
62
+ def open_group_wrapper(
63
+ store: StoreOrGroup,
64
+ mode: AccessModeLiteral | None = None,
65
+ zarr_format: Literal[2, 3] | None = None,
66
+ ) -> zarr.Group:
54
67
  """Wrapper around zarr.open_group with some additional checks.
55
68
 
56
69
  Args:
57
70
  store (StoreOrGroup): The store or group to open.
58
- mode (ReadOrEdirLiteral): The mode to open the group in.
71
+ mode (AccessModeLiteral): The mode to open the group in.
72
+ zarr_format (int): The Zarr format version to use.
59
73
 
60
74
  Returns:
61
75
  zarr.Group: The opened Zarr group.
@@ -67,16 +81,22 @@ def open_group_wrapper(store: StoreOrGroup, mode: AccessModeLiteral) -> zarr.Gro
67
81
 
68
82
  try:
69
83
  _check_store(store)
70
- group = zarr.open_group(store=store, mode=mode)
84
+ mode = mode if mode is not None else "a"
85
+ group = zarr.open_group(store=store, mode=mode, zarr_format=zarr_format)
71
86
 
72
- except ContainsGroupError as e:
87
+ except FileExistsError as e:
73
88
  raise NgioFileExistsError(
74
89
  f"A Zarr group already exists at {store}, consider setting overwrite=True."
75
90
  ) from e
76
91
 
77
- except GroupNotFoundError as e:
92
+ except FileNotFoundError as e:
78
93
  raise NgioFileNotFoundError(f"No Zarr group found at {store}") from e
79
94
 
95
+ except ContainsGroupError as e:
96
+ raise NgioFileExistsError(
97
+ f"A Zarr group already exists at {store}, consider setting overwrite=True."
98
+ ) from e
99
+
80
100
  return group
81
101
 
82
102
 
@@ -86,178 +106,184 @@ class ZarrGroupHandler:
86
106
  def __init__(
87
107
  self,
88
108
  store: StoreOrGroup,
109
+ zarr_format: Literal[2, 3] | None = None,
89
110
  cache: bool = False,
90
- mode: AccessModeLiteral = "a",
91
- parallel_safe: bool = False,
92
- parent: "ZarrGroupHandler | None" = None,
111
+ mode: AccessModeLiteral | None = None,
93
112
  ):
94
113
  """Initialize the handler.
95
114
 
96
115
  Args:
97
116
  store (StoreOrGroup): The Zarr store or group containing the image data.
98
- meta_mode (str): The mode of the metadata handler.
117
+ zarr_format (int | None): The Zarr format version to use.
99
118
  cache (bool): Whether to cache the metadata.
100
- mode (str): The mode of the store.
101
- parallel_safe (bool): If True, the handler will create a lock file to make
102
- that can be used to make the handler parallel safe.
103
- Be aware that the lock needs to be used manually.
104
- parent (ZarrGroupHandler | None): The parent handler.
119
+ mode (str | None): The mode of the store.
105
120
  """
106
- if mode not in ["r", "r+", "w", "w-", "a"]:
121
+ if mode not in ["r", "r+", "w", "w-", "a", None]:
107
122
  raise NgioValueError(f"Mode {mode} is not supported.")
108
123
 
109
- if parallel_safe and cache:
110
- raise NgioValueError(
111
- "The cache and parallel_safe options are mutually exclusive."
112
- "If you want to use the lock mechanism, you should not use the cache."
113
- )
114
-
115
- group = open_group_wrapper(store, mode)
116
- _store = group.store
117
-
118
- # Make sure the cache is set in the attrs
119
- # in the same way as the cache in the handler
120
- group.attrs.cache = cache
121
-
122
- if parallel_safe:
123
- if not isinstance(_store, DirectoryStore):
124
- raise NgioValueError(
125
- "The store needs to be a DirectoryStore to use the lock mechanism. "
126
- f"Instead, got {_store.__class__.__name__}."
127
- )
128
- store_path = Path(_store.path) / group.path
129
- self._lock_path = store_path.with_suffix(".lock")
130
- self._lock = FileLock(self._lock_path, timeout=10)
131
-
132
- else:
133
- self._lock_path = None
134
- self._lock = None
135
-
124
+ group = open_group_wrapper(store=store, mode=mode, zarr_format=zarr_format)
136
125
  self._group = group
137
- self._mode = mode
138
126
  self.use_cache = cache
139
- self._parallel_safe = parallel_safe
140
- self._cache = {}
141
- self._parent = parent
127
+
128
+ self._group_cache: NgioCache[zarr.Group] = NgioCache(use_cache=cache)
129
+ self._array_cache: NgioCache[zarr.Array] = NgioCache(use_cache=cache)
130
+ self._handlers_cache: NgioCache[ZarrGroupHandler] = NgioCache(use_cache=cache)
131
+ self._lock: tuple[Path, BaseFileLock] | None = None
142
132
 
143
133
  def __repr__(self) -> str:
144
134
  """Return a string representation of the handler."""
145
135
  return (
146
- f"ZarrGroupHandler(full_url={self.full_url}, mode={self.mode}, "
136
+ f"ZarrGroupHandler(full_url={self.full_url}, read_only={self.read_only}, "
147
137
  f"cache={self.use_cache}"
148
138
  )
149
139
 
150
140
  @property
151
- def store(self) -> StoreLike:
141
+ def store(self) -> Store:
152
142
  """Return the store of the group."""
153
- return self.group.store
143
+ return self._group.store
154
144
 
155
145
  @property
156
146
  def full_url(self) -> str | None:
157
147
  """Return the store path."""
158
- if isinstance(self.store, DirectoryStore | FSStore):
159
- _store_path = str(self.store.path)
160
- _store_path = _store_path.rstrip("/")
161
- return f"{self.store.path}/{self._group.path}"
148
+ if isinstance(self.store, LocalStore):
149
+ return (self.store.root / self.group.path).as_posix()
150
+ elif isinstance(self.store, FsspecStore):
151
+ return f"{self.store.path}/{self.group.path}"
152
+ elif isinstance(self.store, ZipStore):
153
+ return (self.store.path / self.group.path).as_posix()
154
+ elif isinstance(self.store, MemoryStore):
155
+ return None
156
+ warnings.warn(
157
+ f"Cannot determine full URL for store type {type(self.store)}. ",
158
+ UserWarning,
159
+ stacklevel=2,
160
+ )
162
161
  return None
163
162
 
164
163
  @property
165
- def mode(self) -> AccessModeLiteral:
166
- """Return the mode of the group."""
167
- return self._mode # type: ignore (return type is Literal)
164
+ def zarr_format(self) -> Literal[2, 3]:
165
+ """Return the Zarr format version."""
166
+ return self._group.metadata.zarr_format
167
+
168
+ @property
169
+ def read_only(self) -> bool:
170
+ """Return whether the group is read only."""
171
+ return self._group.read_only
172
+
173
+ def _create_lock(self) -> tuple[Path, BaseFileLock]:
174
+ """Create the lock."""
175
+ if self._lock is not None:
176
+ return self._lock
177
+
178
+ if self.use_cache is True:
179
+ raise NgioValueError(
180
+ "Lock mechanism is not compatible with caching. "
181
+ "Please set cache=False to use the lock mechanism."
182
+ )
183
+
184
+ if not isinstance(self.store, LocalStore):
185
+ raise NgioValueError(
186
+ "The store needs to be a LocalStore to use the lock mechanism. "
187
+ f"Instead, got {self.store.__class__.__name__}."
188
+ )
189
+
190
+ store_path = Path(self.store.root) / self.group.path
191
+ _lock_path = store_path.with_suffix(".lock")
192
+ _lock = FileLock(_lock_path, timeout=10)
193
+ return _lock_path, _lock
168
194
 
169
195
  @property
170
196
  def lock(self) -> BaseFileLock:
171
197
  """Return the lock."""
172
198
  if self._lock is None:
173
- raise NgioValueError(
174
- "The handler is not parallel safe. "
175
- "Reopen the handler with parallel_safe=True."
176
- )
177
- return self._lock
199
+ self._lock = self._create_lock()
200
+ return self._lock[1]
178
201
 
179
202
  @property
180
- def parent(self) -> "ZarrGroupHandler | None":
181
- """Return the parent handler."""
182
- return self._parent
203
+ def lock_path(self) -> Path:
204
+ """Return the lock path."""
205
+ if self._lock is None:
206
+ self._lock = self._create_lock()
207
+ return self._lock[0]
183
208
 
184
209
  def remove_lock(self) -> None:
185
210
  """Return the lock."""
186
- if self._lock is None or self._lock_path is None:
211
+ if self._lock is None:
187
212
  return None
188
213
 
189
- lock_path = Path(self._lock_path)
190
- if lock_path.exists() and self._lock.lock_counter == 0:
214
+ lock_path, lock = self._lock
215
+ if lock_path.exists() and lock.lock_counter == 0:
191
216
  lock_path.unlink()
192
217
  self._lock = None
193
- self._lock_path = None
194
218
  return None
195
219
 
196
220
  raise NgioValueError("The lock is still in use. Cannot remove it.")
197
221
 
198
- @property
199
- def group(self) -> zarr.Group:
200
- """Return the group."""
201
- return self._group
222
+ def reopen_group(self) -> zarr.Group:
223
+ """Reopen the group.
202
224
 
203
- def add_to_cache(self, key: str, value: object) -> None:
204
- """Add an object to the cache."""
205
- if not self.use_cache:
206
- return None
207
- self._cache[key] = value
225
+ This is useful when the group has been modified
226
+ outside of the handler.
227
+ """
228
+ mode = "r" if self.read_only else "r+"
229
+ return zarr.open_group(
230
+ store=self._group.store,
231
+ path=self._group.path,
232
+ mode=mode,
233
+ zarr_format=self._group.metadata.zarr_format,
234
+ )
208
235
 
209
- def get_from_cache(self, key: str) -> object | None:
210
- """Get an object from the cache."""
211
- if not self.use_cache:
212
- return None
213
- return self._cache.get(key, None)
236
+ def reopen_handler(self) -> "ZarrGroupHandler":
237
+ """Reopen the handler.
238
+
239
+ This is useful when the group has been modified
240
+ outside of the handler.
241
+ """
242
+ mode = "r" if self.read_only else "r+"
243
+ group = self.reopen_group()
244
+ return ZarrGroupHandler(
245
+ store=group,
246
+ zarr_format=group.metadata.zarr_format,
247
+ cache=self.use_cache,
248
+ mode=mode,
249
+ )
214
250
 
215
251
  def clean_cache(self) -> None:
216
252
  """Clear the cached metadata."""
217
- self._cache = {}
253
+ group = self.reopen_group()
254
+ self.__init__(
255
+ store=group,
256
+ zarr_format=group.metadata.zarr_format,
257
+ cache=self.use_cache,
258
+ mode="r" if self.read_only else "r+",
259
+ )
260
+
261
+ @property
262
+ def group(self) -> zarr.Group:
263
+ """Return the group."""
264
+ if self.use_cache is False:
265
+ # If we are not using cache, we need to reopen the group
266
+ # to make sure that the attributes are up to date
267
+ return self.reopen_group()
268
+ return self._group
218
269
 
219
270
  def load_attrs(self) -> dict:
220
271
  """Load the attributes of the group."""
221
- attrs = self.get_from_cache("attrs")
222
- if attrs is not None and isinstance(attrs, dict):
223
- return attrs
224
-
225
- attrs = dict(self.group.attrs)
226
-
227
- self.add_to_cache("attrs", attrs)
228
- return attrs
229
-
230
- def _write_attrs(self, attrs: dict, overwrite: bool = False) -> None:
231
- """Write the metadata to the store."""
232
- is_read_only = getattr(self._group, "_read_only", False)
233
- if is_read_only:
234
- raise NgioValueError("The group is read only. Cannot write metadata.")
235
-
236
- # we need to invalidate the current attrs cache
237
- self.add_to_cache("attrs", None)
238
- if overwrite:
239
- self.group.attrs.clear()
240
-
241
- self.group.attrs.update(attrs)
272
+ return self.reopen_group().attrs.asdict()
242
273
 
243
274
  def write_attrs(self, attrs: dict, overwrite: bool = False) -> None:
244
275
  """Write the metadata to the store."""
245
276
  # Maybe we should use the lock here
246
- self._write_attrs(attrs, overwrite)
247
-
248
- def _obj_get(self, path: str):
249
- """Get a group from the group."""
250
- group_or_array = self.get_from_cache(path)
251
- if group_or_array is not None:
252
- return group_or_array
253
-
254
- group_or_array = self.group.get(path, None)
255
- self.add_to_cache(path, group_or_array)
256
- return group_or_array
277
+ if self.read_only:
278
+ raise NgioValueError("The group is read only. Cannot write metadata.")
279
+ group = self.reopen_group()
280
+ if overwrite:
281
+ group.attrs.clear()
282
+ group.attrs.update(attrs)
257
283
 
258
284
  def create_group(self, path: str, overwrite: bool = False) -> zarr.Group:
259
285
  """Create a group in the group."""
260
- if self.mode == "r":
286
+ if self.group.read_only:
261
287
  raise NgioValueError("Cannot create a group in read only mode.")
262
288
 
263
289
  try:
@@ -267,7 +293,7 @@ class ZarrGroupHandler:
267
293
  f"A Zarr group already exists at {path}, "
268
294
  "consider setting overwrite=True."
269
295
  ) from e
270
- self.add_to_cache(path, group)
296
+ self._group_cache.set(path, group, overwrite=overwrite)
271
297
  return group
272
298
 
273
299
  def get_group(
@@ -293,123 +319,215 @@ class ZarrGroupHandler:
293
319
  if overwrite:
294
320
  return self.create_group(path, overwrite=overwrite)
295
321
 
296
- group = self._obj_get(path)
322
+ group = self._group_cache.get(path)
297
323
  if isinstance(group, zarr.Group):
298
324
  return group
299
325
 
300
- if group is not None:
301
- raise NgioValueError(
302
- f"The object at {path} is not a group, but a {type(group)}"
303
- )
326
+ group = self.group.get(path, default=None)
327
+ if isinstance(group, zarr.Group):
328
+ self._group_cache.set(path, group, overwrite=overwrite)
329
+ return group
330
+
331
+ if isinstance(group, zarr.Array):
332
+ raise NgioValueError(f"The object at {path} is not a group, but an array.")
304
333
 
305
334
  if not create_mode:
306
335
  raise NgioFileNotFoundError(f"No group found at {path}")
307
336
  group = self.create_group(path)
337
+ self._group_cache.set(path, group, overwrite=overwrite)
308
338
  return group
309
339
 
310
- def safe_get_group(
311
- self, path: str, create_mode: bool = False
312
- ) -> tuple[bool, zarr.Group | NgioError]:
313
- """Get a group from the group.
340
+ def get_array(self, path: str) -> zarr.Array:
341
+ """Get an array from the group."""
342
+ array = self._array_cache.get(path)
343
+ if isinstance(array, zarr.Array):
344
+ return array
345
+ array = self.group.get(path, default=None)
346
+ if isinstance(array, zarr.Array):
347
+ self._array_cache.set(path, array)
348
+ return array
349
+
350
+ if isinstance(array, zarr.Group):
351
+ raise NgioValueError(f"The object at {path} is not an array, but a group.")
352
+ raise NgioFileNotFoundError(f"No array found at {path}")
353
+
354
+ def get_handler(
355
+ self,
356
+ path: str,
357
+ create_mode: bool = True,
358
+ overwrite: bool = False,
359
+ ) -> "ZarrGroupHandler":
360
+ """Get a new handler for a group in the current handler group.
314
361
 
315
362
  Args:
316
363
  path (str): The path to the group.
317
364
  create_mode (bool): If True, create the group if it does not exist.
365
+ overwrite (bool): If True, overwrite the group if it exists.
366
+ """
367
+ handler = self._handlers_cache.get(path)
368
+ if handler is not None:
369
+ return handler
370
+ group = self.get_group(path, create_mode=create_mode, overwrite=overwrite)
371
+ mode = "r" if group.read_only else "r+"
372
+ handler = ZarrGroupHandler(
373
+ store=group, zarr_format=self.zarr_format, cache=self.use_cache, mode=mode
374
+ )
375
+ self._handlers_cache.set(path, handler)
376
+ return handler
318
377
 
319
- Returns:
320
- zarr.Group | None: The Zarr group or None if it does not exist
321
- or an error occurs.
378
+ @property
379
+ def is_listable(self) -> bool:
380
+ return is_group_listable(self.group)
381
+
382
+ def delete_group(self, path: str) -> None:
383
+ """Delete a group from the current group.
322
384
 
385
+ Args:
386
+ path (str): The path to the group to delete.
323
387
  """
324
- try:
325
- return True, self.get_group(path, create_mode)
326
- except NgioError as e:
327
- return False, e
388
+ if self.group.read_only:
389
+ raise NgioValueError("Cannot delete a group in read only mode.")
390
+ self.group.__delitem__(path)
391
+ self._group_cache._cache.pop(path, None)
392
+ self._handlers_cache._cache.pop(path, None)
393
+
394
+ def delete_self(self) -> None:
395
+ """Delete the current group."""
396
+ if self.group.read_only:
397
+ raise NgioValueError("Cannot delete a group in read only mode.")
398
+ self.group.__delitem__("/")
399
+
400
+ def copy_group(self, dest_group: zarr.Group):
401
+ """Copy the group to a new store."""
402
+ copy_group(self.group, dest_group)
328
403
 
329
- def get_array(self, path: str) -> zarr.Array:
330
- """Get an array from the group."""
331
- array = self._obj_get(path)
332
- if array is None:
333
- raise NgioFileNotFoundError(f"No array found at {path}")
334
- if not isinstance(array, zarr.Array):
404
+
405
+ def find_dimension_separator(array: zarr.Array) -> Literal[".", "/"]:
406
+ """Find the dimension separator used in the Zarr store.
407
+
408
+ Args:
409
+ array (zarr.Array): The Zarr array to check.
410
+
411
+ Returns:
412
+ Literal[".", "/"]: The dimension separator used in the store.
413
+ """
414
+ from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding
415
+
416
+ if array.metadata.zarr_format == 2:
417
+ separator = array.metadata.dimension_separator
418
+ else:
419
+ separator = array.metadata.chunk_key_encoding
420
+ if not isinstance(separator, DefaultChunkKeyEncoding):
335
421
  raise NgioValueError(
336
- f"The object at {path} is not an array, but a {type(array)}"
422
+ "Only DefaultChunkKeyEncoding is supported in this example."
337
423
  )
338
- return array
424
+ separator = separator.separator
425
+ return separator
339
426
 
340
- def create_array(
341
- self,
342
- path: str,
343
- shape: tuple[int, ...],
344
- dtype: str,
345
- chunks: tuple[int, ...] | None = None,
346
- dimension_separator: DIMENSION_SEPARATOR = "/",
347
- compressor: str = "default",
348
- overwrite: bool = False,
349
- ) -> zarr.Array:
350
- if self.mode == "r":
351
- raise NgioValueError("Cannot create an array in read only mode.")
352
427
 
353
- try:
354
- return self.group.zeros(
355
- name=path,
356
- shape=shape,
357
- dtype=dtype,
358
- chunks=chunks,
359
- dimension_separator=dimension_separator,
360
- compressor=compressor,
361
- overwrite=overwrite,
362
- )
363
- except ContainsGroupError as e:
364
- raise NgioFileExistsError(
365
- f"A Zarr array already exists at {path}, "
366
- "consider setting overwrite=True."
367
- ) from e
368
- except Exception as e:
369
- raise NgioValueError(f"Error creating array at {path}") from e
428
+ def is_group_listable(group: zarr.Group) -> bool:
429
+ """Check if a Zarr group is listable.
370
430
 
371
- def derive_handler(
372
- self,
373
- path: str,
374
- overwrite: bool = False,
375
- ) -> "ZarrGroupHandler":
376
- """Derive a new handler from the current handler.
431
+ A group is considered listable if it contains at least one array or subgroup.
377
432
 
378
- Args:
379
- path (str): The path to the group.
380
- overwrite (bool): If True, overwrite the group if it exists.
381
- """
382
- group = self.get_group(path, create_mode=True, overwrite=overwrite)
383
- return ZarrGroupHandler(
384
- store=group,
385
- cache=self.use_cache,
386
- mode=self.mode,
387
- parallel_safe=self._parallel_safe,
388
- parent=self,
433
+ Args:
434
+ group (zarr.Group): The Zarr group to check.
435
+
436
+ Returns:
437
+ bool: True if the group is listable, False otherwise.
438
+ """
439
+ if not group.store.supports_listing:
440
+ # If the store does not support listing
441
+ # then for sure it is not listable
442
+ return False
443
+ try:
444
+ next(group.keys())
445
+ return True
446
+ except StopIteration:
447
+ # Group is listable but empty
448
+ return True
449
+ except Exception as _:
450
+ # Some stores may raise errors when listing
451
+ # consider those not listable
452
+ return False
453
+
454
+
455
+ def _make_sync_fs(fs: fsspec.AbstractFileSystem) -> fsspec.AbstractFileSystem:
456
+ fs_dict = json.loads(fs.to_json())
457
+ fs_dict["asynchronous"] = False
458
+ return fsspec.AbstractFileSystem.from_json(json.dumps(fs_dict))
459
+
460
+
461
+ def _get_mapper(store: LocalStore | FsspecStore, path: str):
462
+ if isinstance(store, LocalStore):
463
+ fs = fsspec.filesystem("file")
464
+ full_path = (store.root / path).as_posix()
465
+ else:
466
+ fs = _make_sync_fs(store.fs)
467
+ full_path = f"{store.path}/{path}"
468
+ return fs.get_mapper(full_path)
469
+
470
+
471
+ def _fsspec_copy(
472
+ src_fs: LocalStore | FsspecStore,
473
+ src_path: str,
474
+ dest_fs: LocalStore | FsspecStore,
475
+ dest_path: str,
476
+ ):
477
+ src_mapper = _get_mapper(src_fs, src_path)
478
+ dest_mapper = _get_mapper(dest_fs, dest_path)
479
+ for key in src_mapper.keys():
480
+ dest_mapper[key] = src_mapper[key]
481
+
482
+
483
+ def _zarr_python_copy(src_group: zarr.Group, dest_group: zarr.Group):
484
+ # Copy attributes
485
+ dest_group.attrs.put(src_group.attrs.asdict())
486
+ # Copy arrays
487
+ for name, array in src_group.arrays():
488
+ if array.metadata.zarr_format == 2:
489
+ spec = AnyArraySpecV2.from_zarr(array)
490
+ else:
491
+ spec = AnyArraySpecV3.from_zarr(array)
492
+ dst = spec.to_zarr(
493
+ store=dest_group.store,
494
+ path=f"{dest_group.path}/{name}",
495
+ overwrite=True,
496
+ )
497
+ if array.ndim > 0:
498
+ dask_array = da.from_zarr(array)
499
+ da.to_zarr(dask_array, dst, overwrite=False)
500
+ # Copy subgroups
501
+ for name, subgroup in src_group.groups():
502
+ dest_subgroup = dest_group.create_group(name, overwrite=True)
503
+ _zarr_python_copy(subgroup, dest_subgroup)
504
+
505
+
506
+ def copy_group(
507
+ src_group: zarr.Group, dest_group: zarr.Group, suppress_warnings: bool = False
508
+ ):
509
+ if src_group.metadata.zarr_format != dest_group.metadata.zarr_format:
510
+ raise NgioValueError(
511
+ "Different Zarr format versions between source and destination, "
512
+ "cannot copy."
389
513
  )
390
514
 
391
- def safe_derive_handler(
392
- self,
393
- path: str,
394
- overwrite: bool = False,
395
- ) -> tuple[bool, "ZarrGroupHandler | NgioError"]:
396
- """Derive a new handler from the current handler."""
397
- try:
398
- return True, self.derive_handler(path, overwrite=overwrite)
399
- except NgioError as e:
400
- return False, e
515
+ if not is_group_listable(src_group):
516
+ raise NgioValueError("Source group is not listable, cannot copy.")
401
517
 
402
- def copy_handler(self, handler: "ZarrGroupHandler") -> None:
403
- """Copy the group to a new store."""
404
- _, n_skipped, _ = zarr.copy_store(
405
- source=self.group.store,
406
- dest=handler.group.store,
407
- source_path=self.group.path,
408
- dest_path=handler.group.path,
409
- if_exists="replace",
518
+ if dest_group.read_only:
519
+ raise NgioValueError("Destination group is read only, cannot copy.")
520
+ if isinstance(src_group.store, LocalStore | FsspecStore) and isinstance(
521
+ dest_group.store, LocalStore | FsspecStore
522
+ ):
523
+ _fsspec_copy(src_group.store, src_group.path, dest_group.store, dest_group.path)
524
+ return
525
+ if not suppress_warnings:
526
+ warnings.warn(
527
+ "Fsspec copy not possible, falling back to Zarr Python API for the copy. "
528
+ "This will preserve some tabular data non-zarr native (parquet, and csv), "
529
+ "and it will be slower for large datasets.",
530
+ UserWarning,
531
+ stacklevel=2,
410
532
  )
411
- if n_skipped > 0:
412
- raise NgioValueError(
413
- f"Error copying group to {handler.full_url}, "
414
- f"#{n_skipped} files where skipped."
415
- )
533
+ _zarr_python_copy(src_group, dest_group)