ngio 0.2.0a2__py3-none-any.whl → 0.5.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. ngio/__init__.py +40 -12
  2. ngio/common/__init__.py +16 -32
  3. ngio/common/_dimensions.py +270 -48
  4. ngio/common/_masking_roi.py +153 -0
  5. ngio/common/_pyramid.py +267 -73
  6. ngio/common/_roi.py +290 -66
  7. ngio/common/_synt_images_utils.py +101 -0
  8. ngio/common/_zoom.py +54 -22
  9. ngio/experimental/__init__.py +5 -0
  10. ngio/experimental/iterators/__init__.py +15 -0
  11. ngio/experimental/iterators/_abstract_iterator.py +390 -0
  12. ngio/experimental/iterators/_feature.py +189 -0
  13. ngio/experimental/iterators/_image_processing.py +130 -0
  14. ngio/experimental/iterators/_mappers.py +48 -0
  15. ngio/experimental/iterators/_rois_utils.py +126 -0
  16. ngio/experimental/iterators/_segmentation.py +235 -0
  17. ngio/hcs/__init__.py +17 -58
  18. ngio/hcs/_plate.py +1354 -0
  19. ngio/images/__init__.py +30 -9
  20. ngio/images/_abstract_image.py +968 -0
  21. ngio/images/_create_synt_container.py +132 -0
  22. ngio/images/_create_utils.py +423 -0
  23. ngio/images/_image.py +926 -0
  24. ngio/images/_label.py +417 -0
  25. ngio/images/_masked_image.py +531 -0
  26. ngio/images/_ome_zarr_container.py +1235 -0
  27. ngio/images/_table_ops.py +471 -0
  28. ngio/io_pipes/__init__.py +75 -0
  29. ngio/io_pipes/_io_pipes.py +361 -0
  30. ngio/io_pipes/_io_pipes_masked.py +488 -0
  31. ngio/io_pipes/_io_pipes_roi.py +146 -0
  32. ngio/io_pipes/_io_pipes_types.py +56 -0
  33. ngio/io_pipes/_match_shape.py +377 -0
  34. ngio/io_pipes/_ops_axes.py +344 -0
  35. ngio/io_pipes/_ops_slices.py +411 -0
  36. ngio/io_pipes/_ops_slices_utils.py +199 -0
  37. ngio/io_pipes/_ops_transforms.py +104 -0
  38. ngio/io_pipes/_zoom_transform.py +180 -0
  39. ngio/ome_zarr_meta/__init__.py +39 -15
  40. ngio/ome_zarr_meta/_meta_handlers.py +490 -96
  41. ngio/ome_zarr_meta/ngio_specs/__init__.py +24 -10
  42. ngio/ome_zarr_meta/ngio_specs/_axes.py +268 -234
  43. ngio/ome_zarr_meta/ngio_specs/_channels.py +125 -41
  44. ngio/ome_zarr_meta/ngio_specs/_dataset.py +42 -87
  45. ngio/ome_zarr_meta/ngio_specs/_ngio_hcs.py +536 -2
  46. ngio/ome_zarr_meta/ngio_specs/_ngio_image.py +202 -198
  47. ngio/ome_zarr_meta/ngio_specs/_pixel_size.py +72 -34
  48. ngio/ome_zarr_meta/v04/__init__.py +21 -5
  49. ngio/ome_zarr_meta/v04/_custom_models.py +18 -0
  50. ngio/ome_zarr_meta/v04/{_v04_spec_utils.py → _v04_spec.py} +151 -90
  51. ngio/ome_zarr_meta/v05/__init__.py +27 -0
  52. ngio/ome_zarr_meta/v05/_custom_models.py +18 -0
  53. ngio/ome_zarr_meta/v05/_v05_spec.py +511 -0
  54. ngio/resources/20200812-CardiomyocyteDifferentiation14-Cycle1_B03/mask.png +0 -0
  55. ngio/resources/20200812-CardiomyocyteDifferentiation14-Cycle1_B03/nuclei.png +0 -0
  56. ngio/resources/20200812-CardiomyocyteDifferentiation14-Cycle1_B03/raw.jpg +0 -0
  57. ngio/resources/__init__.py +55 -0
  58. ngio/resources/resource_model.py +36 -0
  59. ngio/tables/__init__.py +20 -4
  60. ngio/tables/_abstract_table.py +270 -0
  61. ngio/tables/_tables_container.py +449 -0
  62. ngio/tables/backends/__init__.py +50 -1
  63. ngio/tables/backends/_abstract_backend.py +200 -31
  64. ngio/tables/backends/_anndata.py +139 -0
  65. ngio/tables/backends/_anndata_utils.py +10 -114
  66. ngio/tables/backends/_csv.py +19 -0
  67. ngio/tables/backends/_json.py +92 -0
  68. ngio/tables/backends/_parquet.py +19 -0
  69. ngio/tables/backends/_py_arrow_backends.py +222 -0
  70. ngio/tables/backends/_table_backends.py +162 -38
  71. ngio/tables/backends/_utils.py +608 -0
  72. ngio/tables/v1/__init__.py +19 -4
  73. ngio/tables/v1/_condition_table.py +71 -0
  74. ngio/tables/v1/_feature_table.py +79 -115
  75. ngio/tables/v1/_generic_table.py +21 -90
  76. ngio/tables/v1/_roi_table.py +486 -137
  77. ngio/transforms/__init__.py +5 -0
  78. ngio/transforms/_zoom.py +19 -0
  79. ngio/utils/__init__.py +16 -14
  80. ngio/utils/_cache.py +48 -0
  81. ngio/utils/_datasets.py +121 -13
  82. ngio/utils/_fractal_fsspec_store.py +42 -0
  83. ngio/utils/_zarr_utils.py +374 -218
  84. ngio-0.5.0b4.dist-info/METADATA +147 -0
  85. ngio-0.5.0b4.dist-info/RECORD +88 -0
  86. {ngio-0.2.0a2.dist-info → ngio-0.5.0b4.dist-info}/WHEEL +1 -1
  87. ngio/common/_array_pipe.py +0 -160
  88. ngio/common/_axes_transforms.py +0 -63
  89. ngio/common/_common_types.py +0 -5
  90. ngio/common/_slicer.py +0 -97
  91. ngio/images/abstract_image.py +0 -240
  92. ngio/images/create.py +0 -251
  93. ngio/images/image.py +0 -389
  94. ngio/images/label.py +0 -236
  95. ngio/images/omezarr_container.py +0 -535
  96. ngio/ome_zarr_meta/_generic_handlers.py +0 -320
  97. ngio/ome_zarr_meta/v04/_meta_handlers.py +0 -54
  98. ngio/tables/_validators.py +0 -192
  99. ngio/tables/backends/_anndata_v1.py +0 -75
  100. ngio/tables/backends/_json_v1.py +0 -56
  101. ngio/tables/tables_container.py +0 -300
  102. ngio/tables/v1/_masking_roi_table.py +0 -175
  103. ngio/utils/_logger.py +0 -29
  104. ngio-0.2.0a2.dist-info/METADATA +0 -95
  105. ngio-0.2.0a2.dist-info/RECORD +0 -53
  106. {ngio-0.2.0a2.dist-info → ngio-0.5.0b4.dist-info}/licenses/LICENSE +0 -0
ngio/utils/_zarr_utils.py CHANGED
@@ -1,90 +1,103 @@
1
1
  """Common utilities for working with Zarr groups in consistent ways."""
2
2
 
3
- # %%
3
+ import json
4
+ import warnings
4
5
  from pathlib import Path
5
- from typing import Literal
6
+ from typing import Literal, TypeAlias
6
7
 
8
+ import dask.array as da
7
9
  import fsspec
8
10
  import zarr
9
11
  from filelock import BaseFileLock, FileLock
10
- from zarr.errors import ContainsGroupError, GroupNotFoundError
11
- from zarr.storage import DirectoryStore, FSStore, Store
12
-
13
- from ngio.utils import NgioFileExistsError, NgioFileNotFoundError, NgioValueError
14
- from ngio.utils._errors import NgioError
12
+ from pydantic_zarr.v2 import ArraySpec as AnyArraySpecV2
13
+ from pydantic_zarr.v3 import ArraySpec as AnyArraySpecV3
14
+ from zarr.abc.store import Store
15
+ from zarr.errors import ContainsGroupError
16
+ from zarr.storage import FsspecStore, LocalStore, MemoryStore, ZipStore
17
+
18
+ from ngio.utils._cache import NgioCache
19
+ from ngio.utils._errors import (
20
+ NgioFileExistsError,
21
+ NgioFileNotFoundError,
22
+ NgioValueError,
23
+ )
15
24
 
16
25
  AccessModeLiteral = Literal["r", "r+", "w", "w-", "a"]
17
26
  # StoreLike is more restrictive than it could be
18
27
  # but to make sure we can handle the store correctly
19
28
  # we need to be more restrictive
20
- NgioSupportedStore = str | Path | fsspec.mapping.FSMap | FSStore | DirectoryStore
21
- GenericStore = Store | NgioSupportedStore
22
- StoreOrGroup = GenericStore | zarr.Group
29
+ NgioSupportedStore: TypeAlias = (
30
+ str | Path | fsspec.mapping.FSMap | FsspecStore | MemoryStore | dict | LocalStore
31
+ )
32
+ GenericStore: TypeAlias = NgioSupportedStore | Store
33
+ StoreOrGroup: TypeAlias = NgioSupportedStore | zarr.Group
23
34
 
24
35
 
25
36
  def _check_store(store) -> NgioSupportedStore:
26
37
  """Check the store and return a valid store."""
27
- if isinstance(store, NgioSupportedStore):
28
- return store
29
-
30
- raise NotImplementedError(
31
- f"Store type {type(store)} is not supported. "
32
- f"Supported types are: {NgioSupportedStore}"
33
- )
38
+ if not isinstance(store, NgioSupportedStore):
39
+ warnings.warn(
40
+ f"Store type {type(store)} is not explicitly supported. "
41
+ f"Supported types are: {NgioSupportedStore}. "
42
+ "Proceeding, but this may lead to unexpected behavior.",
43
+ UserWarning,
44
+ stacklevel=2,
45
+ )
46
+ return store
34
47
 
35
48
 
36
- def _check_group(group: zarr.Group, mode: AccessModeLiteral) -> zarr.Group:
49
+ def _check_group(
50
+ group: zarr.Group, mode: AccessModeLiteral | None = None
51
+ ) -> zarr.Group:
37
52
  """Check the group and return a valid group."""
38
- is_read_only = getattr(group, "_read_only", False)
39
- if is_read_only and mode in ["w", "w-"]:
40
- raise NgioValueError(
41
- "The group is read only. Cannot open in write mode ['w', 'w-']"
42
- )
53
+ if group.read_only and mode not in [None, "r"]:
54
+ raise NgioValueError(f"The group is read only. Cannot open in mode {mode}.")
43
55
 
44
- if mode == "r" and not is_read_only:
56
+ if mode == "r" and not group.read_only:
45
57
  # let's make sure we don't accidentally write to the group
46
58
  group = zarr.open_group(store=group.store, path=group.path, mode="r")
47
-
48
59
  return group
49
60
 
50
61
 
51
62
  def open_group_wrapper(
52
- store: StoreOrGroup, mode: AccessModeLiteral
53
- ) -> tuple[zarr.Group, NgioSupportedStore]:
63
+ store: StoreOrGroup,
64
+ mode: AccessModeLiteral | None = None,
65
+ zarr_format: Literal[2, 3] | None = None,
66
+ ) -> zarr.Group:
54
67
  """Wrapper around zarr.open_group with some additional checks.
55
68
 
56
69
  Args:
57
70
  store (StoreOrGroup): The store or group to open.
58
- mode (ReadOrEdirLiteral): The mode to open the group in.
71
+ mode (AccessModeLiteral): The mode to open the group in.
72
+ zarr_format (int): The Zarr format version to use.
59
73
 
60
74
  Returns:
61
75
  zarr.Group: The opened Zarr group.
62
76
  """
63
77
  if isinstance(store, zarr.Group):
64
78
  group = _check_group(store, mode)
65
- if hasattr(group, "store_path"):
66
- _store = group.store_path
67
- if isinstance(group.store, DirectoryStore):
68
- _store = group.store.path
69
- else:
70
- _store = group.store
71
-
72
- _store = _check_store(_store)
73
- return group, _store
79
+ _check_store(group.store)
80
+ return group
74
81
 
75
82
  try:
76
- store = _check_store(store)
77
- group = zarr.open_group(store=store, mode=mode)
83
+ _check_store(store)
84
+ mode = mode if mode is not None else "a"
85
+ group = zarr.open_group(store=store, mode=mode, zarr_format=zarr_format)
78
86
 
79
- except ContainsGroupError as e:
87
+ except FileExistsError as e:
80
88
  raise NgioFileExistsError(
81
89
  f"A Zarr group already exists at {store}, consider setting overwrite=True."
82
90
  ) from e
83
91
 
84
- except GroupNotFoundError as e:
92
+ except FileNotFoundError as e:
85
93
  raise NgioFileNotFoundError(f"No Zarr group found at {store}") from e
86
94
 
87
- return group, store
95
+ except ContainsGroupError as e:
96
+ raise NgioFileExistsError(
97
+ f"A Zarr group already exists at {store}, consider setting overwrite=True."
98
+ ) from e
99
+
100
+ return group
88
101
 
89
102
 
90
103
  class ZarrGroupHandler:
@@ -93,167 +106,185 @@ class ZarrGroupHandler:
93
106
  def __init__(
94
107
  self,
95
108
  store: StoreOrGroup,
109
+ zarr_format: Literal[2, 3] | None = None,
96
110
  cache: bool = False,
97
- mode: AccessModeLiteral = "a",
98
- parallel_safe: bool = False,
99
- parent: "ZarrGroupHandler | None" = None,
111
+ mode: AccessModeLiteral | None = None,
100
112
  ):
101
113
  """Initialize the handler.
102
114
 
103
115
  Args:
104
116
  store (StoreOrGroup): The Zarr store or group containing the image data.
105
117
  meta_mode (str): The mode of the metadata handler.
118
+ zarr_format (int | None): The Zarr format version to use.
106
119
  cache (bool): Whether to cache the metadata.
107
- mode (str): The mode of the store.
108
- parallel_safe (bool): If True, the handler will create a lock file to make
109
- that can be used to make the handler parallel safe.
110
- Be aware that the lock needs to be used manually.
111
- parent (ZarrGroupHandler | None): The parent handler.
120
+ mode (str | None): The mode of the store.
112
121
  """
113
- if mode not in ["r", "r+", "w", "w-", "a"]:
122
+ if mode not in ["r", "r+", "w", "w-", "a", None]:
114
123
  raise NgioValueError(f"Mode {mode} is not supported.")
115
124
 
116
- if parallel_safe and cache:
117
- raise NgioValueError(
118
- "The cache and parallel_safe options are mutually exclusive."
119
- "If you want to use the lock mechanism, you should not use the cache."
120
- )
121
-
122
- _group, _store = open_group_wrapper(store, mode)
123
-
124
- # Make sure the cache is set in the attrs
125
- # in the same way as the cache in the handler
126
- _group.attrs.cache = cache
127
-
128
- if parallel_safe:
129
- if not isinstance(_store, str | Path):
130
- raise NgioValueError(
131
- "The store needs to be a path to use the lock mechanism."
132
- )
133
- self._lock_path = f"{_store}.lock"
134
- self._lock = FileLock(self._lock_path)
135
-
136
- else:
137
- self._lock_path = None
138
- self._lock = None
139
-
140
- self._group = _group
141
- self._mode = mode
142
- self._store = _store
125
+ group = open_group_wrapper(store=store, mode=mode, zarr_format=zarr_format)
126
+ self._group = group
143
127
  self.use_cache = cache
144
- self._parallel_safe = parallel_safe
145
- self._cache = {}
146
- self._parent = parent
128
+
129
+ self._group_cache: NgioCache[zarr.Group] = NgioCache(use_cache=cache)
130
+ self._array_cache: NgioCache[zarr.Array] = NgioCache(use_cache=cache)
131
+ self._handlers_cache: NgioCache[ZarrGroupHandler] = NgioCache(use_cache=cache)
132
+ self._lock: tuple[Path, BaseFileLock] | None = None
147
133
 
148
134
  def __repr__(self) -> str:
149
135
  """Return a string representation of the handler."""
150
136
  return (
151
- f"ZarrGroupHandler(full_path={self.full_path}, mode={self.mode}, "
137
+ f"ZarrGroupHandler(full_url={self.full_url}, read_only={self.read_only}, "
152
138
  f"cache={self.use_cache}"
153
139
  )
154
140
 
155
141
  @property
156
- def store(self) -> NgioSupportedStore:
142
+ def store(self) -> Store:
157
143
  """Return the store of the group."""
158
- return self._store
144
+ return self._group.store
159
145
 
160
146
  @property
161
- def full_path(self) -> str:
147
+ def full_url(self) -> str | None:
162
148
  """Return the store path."""
163
- return f"{self._store}/{self._group.path}"
149
+ if isinstance(self.store, LocalStore):
150
+ return (self.store.root / self.group.path).as_posix()
151
+ elif isinstance(self.store, FsspecStore):
152
+ return f"{self.store.path}/{self.group.path}"
153
+ elif isinstance(self.store, ZipStore):
154
+ return (self.store.path / self.group.path).as_posix()
155
+ elif isinstance(self.store, MemoryStore):
156
+ return None
157
+ warnings.warn(
158
+ f"Cannot determine full URL for store type {type(self.store)}. ",
159
+ UserWarning,
160
+ stacklevel=2,
161
+ )
162
+ return None
163
+
164
+ @property
165
+ def zarr_format(self) -> Literal[2, 3]:
166
+ """Return the Zarr format version."""
167
+ return self._group.metadata.zarr_format
164
168
 
165
169
  @property
166
- def mode(self) -> AccessModeLiteral:
167
- """Return the mode of the group."""
168
- return self._mode # type: ignore
170
+ def read_only(self) -> bool:
171
+ """Return whether the group is read only."""
172
+ return self._group.read_only
173
+
174
+ def _create_lock(self) -> tuple[Path, BaseFileLock]:
175
+ """Create the lock."""
176
+ if self._lock is not None:
177
+ return self._lock
178
+
179
+ if self.use_cache is True:
180
+ raise NgioValueError(
181
+ "Lock mechanism is not compatible with caching. "
182
+ "Please set cache=False to use the lock mechanism."
183
+ )
184
+
185
+ if not isinstance(self.store, LocalStore):
186
+ raise NgioValueError(
187
+ "The store needs to be a LocalStore to use the lock mechanism. "
188
+ f"Instead, got {self.store.__class__.__name__}."
189
+ )
190
+
191
+ store_path = Path(self.store.root) / self.group.path
192
+ _lock_path = store_path.with_suffix(".lock")
193
+ _lock = FileLock(_lock_path, timeout=10)
194
+ return _lock_path, _lock
169
195
 
170
196
  @property
171
- def lock(self) -> BaseFileLock | None:
197
+ def lock(self) -> BaseFileLock:
172
198
  """Return the lock."""
173
- return self._lock
199
+ if self._lock is None:
200
+ self._lock = self._create_lock()
201
+ return self._lock[1]
174
202
 
175
203
  @property
176
- def parent(self) -> "ZarrGroupHandler | None":
177
- """Return the parent handler."""
178
- return self._parent
204
+ def lock_path(self) -> Path:
205
+ """Return the lock path."""
206
+ if self._lock is None:
207
+ self._lock = self._create_lock()
208
+ return self._lock[0]
179
209
 
180
210
  def remove_lock(self) -> None:
181
211
  """Return the lock."""
182
- if self._lock is None or self._lock_path is None:
212
+ if self._lock is None:
183
213
  return None
184
214
 
185
- lock_path = Path(self._lock_path)
186
- if lock_path.exists() and self._lock.lock_counter == 0:
215
+ lock_path, lock = self._lock
216
+ if lock_path.exists() and lock.lock_counter == 0:
187
217
  lock_path.unlink()
188
218
  self._lock = None
189
- self._lock_path = None
190
219
  return None
191
220
 
192
221
  raise NgioValueError("The lock is still in use. Cannot remove it.")
193
222
 
194
- @property
195
- def group(self) -> zarr.Group:
196
- """Return the group."""
197
- return self._group
223
+ def reopen_group(self) -> zarr.Group:
224
+ """Reopen the group.
198
225
 
199
- def add_to_cache(self, key: str, value: object) -> None:
200
- """Add an object to the cache."""
201
- if not self.use_cache:
202
- return None
203
- self._cache[key] = value
226
+ This is useful when the group has been modified
227
+ outside of the handler.
228
+ """
229
+ mode = "r" if self.read_only else "r+"
230
+ return zarr.open_group(
231
+ store=self._group.store,
232
+ path=self._group.path,
233
+ mode=mode,
234
+ zarr_format=self._group.metadata.zarr_format,
235
+ )
204
236
 
205
- def get_from_cache(self, key: str) -> object | None:
206
- """Get an object from the cache."""
207
- if not self.use_cache:
208
- return None
209
- return self._cache.get(key, None)
237
+ def reopen_handler(self) -> "ZarrGroupHandler":
238
+ """Reopen the handler.
239
+
240
+ This is useful when the group has been modified
241
+ outside of the handler.
242
+ """
243
+ mode = "r" if self.read_only else "r+"
244
+ group = self.reopen_group()
245
+ return ZarrGroupHandler(
246
+ store=group,
247
+ zarr_format=group.metadata.zarr_format,
248
+ cache=self.use_cache,
249
+ mode=mode,
250
+ )
210
251
 
211
252
  def clean_cache(self) -> None:
212
253
  """Clear the cached metadata."""
213
- self._cache = {}
254
+ group = self.reopen_group()
255
+ self.__init__(
256
+ store=group,
257
+ zarr_format=group.metadata.zarr_format,
258
+ cache=self.use_cache,
259
+ mode="r" if self.read_only else "r+",
260
+ )
261
+
262
+ @property
263
+ def group(self) -> zarr.Group:
264
+ """Return the group."""
265
+ if self.use_cache is False:
266
+ # If we are not using cache, we need to reopen the group
267
+ # to make sure that the attributes are up to date
268
+ return self.reopen_group()
269
+ return self._group
214
270
 
215
271
  def load_attrs(self) -> dict:
216
272
  """Load the attributes of the group."""
217
- attrs = self.get_from_cache("attrs")
218
- if attrs is not None and isinstance(attrs, dict):
219
- return attrs
220
-
221
- attrs = dict(self.group.attrs)
222
-
223
- self.add_to_cache("attrs", attrs)
224
- return attrs
225
-
226
- def _write_attrs(self, attrs: dict, overwrite: bool = False) -> None:
227
- """Write the metadata to the store."""
228
- is_read_only = getattr(self._group, "_read_only", False)
229
- if is_read_only:
230
- raise NgioValueError("The group is read only. Cannot write metadata.")
231
-
232
- # we need to invalidate the current attrs cache
233
- self.add_to_cache("attrs", None)
234
- if overwrite:
235
- self.group.attrs.clear()
236
-
237
- self.group.attrs.update(attrs)
273
+ return self.reopen_group().attrs.asdict()
238
274
 
239
275
  def write_attrs(self, attrs: dict, overwrite: bool = False) -> None:
240
276
  """Write the metadata to the store."""
241
277
  # Maybe we should use the lock here
242
- self._write_attrs(attrs, overwrite)
243
-
244
- def _obj_get(self, path: str):
245
- """Get a group from the group."""
246
- group_or_array = self.get_from_cache(path)
247
- if group_or_array is not None:
248
- return group_or_array
249
-
250
- group_or_array = self.group.get(path, None)
251
- self.add_to_cache(path, group_or_array)
252
- return group_or_array
278
+ if self.read_only:
279
+ raise NgioValueError("The group is read only. Cannot write metadata.")
280
+ group = self.reopen_group()
281
+ if overwrite:
282
+ group.attrs.clear()
283
+ group.attrs.update(attrs)
253
284
 
254
285
  def create_group(self, path: str, overwrite: bool = False) -> zarr.Group:
255
286
  """Create a group in the group."""
256
- if self.mode == "r":
287
+ if self.group.read_only:
257
288
  raise NgioValueError("Cannot create a group in read only mode.")
258
289
 
259
290
  try:
@@ -263,116 +294,241 @@ class ZarrGroupHandler:
263
294
  f"A Zarr group already exists at {path}, "
264
295
  "consider setting overwrite=True."
265
296
  ) from e
266
- self.add_to_cache(path, group)
297
+ self._group_cache.set(path, group, overwrite=overwrite)
267
298
  return group
268
299
 
269
300
  def get_group(
270
301
  self,
271
302
  path: str,
272
303
  create_mode: bool = False,
304
+ overwrite: bool = False,
273
305
  ) -> zarr.Group:
274
306
  """Get a group from the group.
275
307
 
276
308
  Args:
277
309
  path (str): The path to the group.
278
310
  create_mode (bool): If True, create the group if it does not exist.
311
+ overwrite (bool): If True, overwrite the group if it exists.
279
312
 
280
313
  Returns:
281
314
  zarr.Group: The Zarr group.
282
315
 
283
316
  """
284
- group = self._obj_get(path)
317
+ if overwrite and not create_mode:
318
+ raise NgioValueError("Cannot overwrite a group without create_mode=True.")
319
+
320
+ if overwrite:
321
+ return self.create_group(path, overwrite=overwrite)
322
+
323
+ group = self._group_cache.get(path)
285
324
  if isinstance(group, zarr.Group):
286
325
  return group
287
326
 
288
- if group is not None:
289
- raise NgioValueError(
290
- f"The object at {path} is not a group, but a {type(group)}"
291
- )
327
+ group = self.group.get(path, default=None)
328
+ if isinstance(group, zarr.Group):
329
+ self._group_cache.set(path, group, overwrite=overwrite)
330
+ return group
331
+
332
+ if isinstance(group, zarr.Array):
333
+ raise NgioValueError(f"The object at {path} is not a group, but an array.")
292
334
 
293
335
  if not create_mode:
294
336
  raise NgioFileNotFoundError(f"No group found at {path}")
295
337
  group = self.create_group(path)
338
+ self._group_cache.set(path, group, overwrite=overwrite)
296
339
  return group
297
340
 
298
- def safe_get_group(
299
- self, path: str, create_mode: bool = False
300
- ) -> tuple[bool, zarr.Group | NgioError]:
301
- """Get a group from the group.
341
+ def get_array(self, path: str) -> zarr.Array:
342
+ """Get an array from the group."""
343
+ array = self._array_cache.get(path)
344
+ if isinstance(array, zarr.Array):
345
+ return array
346
+ array = self.group.get(path, default=None)
347
+ if isinstance(array, zarr.Array):
348
+ self._array_cache.set(path, array)
349
+ return array
350
+
351
+ if isinstance(array, zarr.Group):
352
+ raise NgioValueError(f"The object at {path} is not an array, but a group.")
353
+ raise NgioFileNotFoundError(f"No array found at {path}")
354
+
355
+ def get_handler(
356
+ self,
357
+ path: str,
358
+ create_mode: bool = True,
359
+ overwrite: bool = False,
360
+ ) -> "ZarrGroupHandler":
361
+ """Get a new handler for a group in the current handler group.
302
362
 
303
363
  Args:
304
364
  path (str): The path to the group.
305
365
  create_mode (bool): If True, create the group if it does not exist.
366
+ overwrite (bool): If True, overwrite the group if it exists.
367
+ """
368
+ handler = self._handlers_cache.get(path)
369
+ if handler is not None:
370
+ return handler
371
+ group = self.get_group(path, create_mode=create_mode, overwrite=overwrite)
372
+ mode = "r" if group.read_only else "r+"
373
+ handler = ZarrGroupHandler(
374
+ store=group, zarr_format=self.zarr_format, cache=self.use_cache, mode=mode
375
+ )
376
+ self._handlers_cache.set(path, handler)
377
+ return handler
306
378
 
307
- Returns:
308
- zarr.Group | None: The Zarr group or None if it does not exist
309
- or an error occurs.
379
+ @property
380
+ def is_listable(self) -> bool:
381
+ return is_group_listable(self.group)
310
382
 
383
+ def delete_group(self, path: str) -> None:
384
+ """Delete a group from the current group.
385
+
386
+ Args:
387
+ path (str): The path to the group to delete.
311
388
  """
312
- try:
313
- return True, self.get_group(path, create_mode)
314
- except NgioError as e:
315
- return False, e
389
+ if self.group.read_only:
390
+ raise NgioValueError("Cannot delete a group in read only mode.")
391
+ self.group.__delitem__(path)
392
+ self._group_cache._cache.pop(path, None)
393
+ self._handlers_cache._cache.pop(path, None)
316
394
 
317
- def get_array(self, path: str) -> zarr.Array:
318
- """Get an array from the group."""
319
- array = self._obj_get(path)
320
- if array is None:
321
- raise NgioFileNotFoundError(f"No array found at {path}")
322
- if not isinstance(array, zarr.Array):
395
+ def delete_self(self) -> None:
396
+ """Delete the current group."""
397
+ if self.group.read_only:
398
+ raise NgioValueError("Cannot delete a group in read only mode.")
399
+ self.group.__delitem__("/")
400
+
401
+ def copy_group(self, dest_group: zarr.Group):
402
+ """Copy the group to a new store."""
403
+ copy_group(self.group, dest_group)
404
+
405
+
406
+ def find_dimension_separator(array: zarr.Array) -> Literal[".", "/"]:
407
+ """Find the dimension separator used in the Zarr store.
408
+
409
+ Args:
410
+ array (zarr.Array): The Zarr array to check.
411
+
412
+ Returns:
413
+ Literal[".", "/"]: The dimension separator used in the store.
414
+ """
415
+ from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding
416
+
417
+ if array.metadata.zarr_format == 2:
418
+ separator = array.metadata.dimension_separator
419
+ else:
420
+ separator = array.metadata.chunk_key_encoding
421
+ if not isinstance(separator, DefaultChunkKeyEncoding):
323
422
  raise NgioValueError(
324
- f"The object at {path} is not an array, but a {type(array)}"
423
+ "Only DefaultChunkKeyEncoding is supported in this example."
325
424
  )
326
- return array
425
+ separator = separator.separator
426
+ return separator
327
427
 
328
- def create_array(
329
- self,
330
- path: str,
331
- shape: tuple[int, ...],
332
- dtype: str,
333
- chunks: tuple[int, ...] | None = None,
334
- overwrite: bool = False,
335
- ) -> zarr.Array:
336
- if self.mode == "r":
337
- raise NgioValueError("Cannot create an array in read only mode.")
338
428
 
339
- try:
340
- return self.group.zeros(
341
- name=path,
342
- shape=shape,
343
- dtype=dtype,
344
- chunks=chunks,
345
- dimension_separator="/",
346
- overwrite=overwrite,
347
- )
348
- except ContainsGroupError as e:
349
- raise NgioFileExistsError(
350
- f"A Zarr array already exists at {path}, "
351
- "consider setting overwrite=True."
352
- ) from e
353
- except Exception as e:
354
- raise NgioValueError(f"Error creating array at {path}") from e
429
+ def is_group_listable(group: zarr.Group) -> bool:
430
+ """Check if a Zarr group is listable.
355
431
 
356
- def derive_handler(
357
- self,
358
- path: str,
359
- ) -> "ZarrGroupHandler":
360
- """Derive a new handler from the current handler."""
361
- group = self.get_group(path, create_mode=True)
362
- return ZarrGroupHandler(
363
- store=group,
364
- cache=self.use_cache,
365
- mode=self.mode,
366
- parallel_safe=self._parallel_safe,
367
- parent=self,
432
+ A group is considered listable if it contains at least one array or subgroup.
433
+
434
+ Args:
435
+ group (zarr.Group): The Zarr group to check.
436
+
437
+ Returns:
438
+ bool: True if the group is listable, False otherwise.
439
+ """
440
+ if not group.store.supports_listing:
441
+ # If the store does not support listing
442
+ # then for sure it is not listable
443
+ return False
444
+ try:
445
+ next(group.keys())
446
+ return True
447
+ except StopIteration:
448
+ # Group is listable but empty
449
+ return True
450
+ except Exception as _:
451
+ # Some stores may raise errors when listing
452
+ # consider those not listable
453
+ return False
454
+
455
+
456
+ def _make_sync_fs(fs: fsspec.AbstractFileSystem) -> fsspec.AbstractFileSystem:
457
+ fs_dict = json.loads(fs.to_json())
458
+ fs_dict["asynchronous"] = False
459
+ return fsspec.AbstractFileSystem.from_json(json.dumps(fs_dict))
460
+
461
+
462
+ def _get_mapper(store: LocalStore | FsspecStore, path: str):
463
+ if isinstance(store, LocalStore):
464
+ fs = fsspec.filesystem("file")
465
+ full_path = (store.root / path).as_posix()
466
+ else:
467
+ fs = _make_sync_fs(store.fs)
468
+ full_path = f"{store.path}/{path}"
469
+ return fs.get_mapper(full_path)
470
+
471
+
472
+ def _fsspec_copy(
473
+ src_fs: LocalStore | FsspecStore,
474
+ src_path: str,
475
+ dest_fs: LocalStore | FsspecStore,
476
+ dest_path: str,
477
+ ):
478
+ src_mapper = _get_mapper(src_fs, src_path)
479
+ dest_mapper = _get_mapper(dest_fs, dest_path)
480
+ for key in src_mapper.keys():
481
+ dest_mapper[key] = src_mapper[key]
482
+
483
+
484
+ def _zarr_python_copy(src_group: zarr.Group, dest_group: zarr.Group):
485
+ # Copy attributes
486
+ dest_group.attrs.put(src_group.attrs.asdict())
487
+ # Copy arrays
488
+ for name, array in src_group.arrays():
489
+ if array.metadata.zarr_format == 2:
490
+ spec = AnyArraySpecV2.from_zarr(array)
491
+ else:
492
+ spec = AnyArraySpecV3.from_zarr(array)
493
+ dst = spec.to_zarr(
494
+ store=dest_group.store,
495
+ path=f"{dest_group.path}/{name}",
496
+ overwrite=True,
497
+ )
498
+ if array.ndim > 0:
499
+ dask_array = da.from_zarr(array)
500
+ da.to_zarr(dask_array, dst, overwrite=False)
501
+ # Copy subgroups
502
+ for name, subgroup in src_group.groups():
503
+ dest_subgroup = dest_group.create_group(name, overwrite=True)
504
+ _zarr_python_copy(subgroup, dest_subgroup)
505
+
506
+
507
+ def copy_group(
508
+ src_group: zarr.Group, dest_group: zarr.Group, suppress_warnings: bool = False
509
+ ):
510
+ if src_group.metadata.zarr_format != dest_group.metadata.zarr_format:
511
+ raise NgioValueError(
512
+ "Different Zarr format versions between source and destination, "
513
+ "cannot copy."
368
514
  )
369
515
 
370
- def safe_derive_handler(
371
- self,
372
- path: str,
373
- ) -> tuple[bool, "ZarrGroupHandler | NgioError"]:
374
- """Derive a new handler from the current handler."""
375
- try:
376
- return True, self.derive_handler(path)
377
- except NgioError as e:
378
- return False, e
516
+ if not is_group_listable(src_group):
517
+ raise NgioValueError("Source group is not listable, cannot copy.")
518
+
519
+ if dest_group.read_only:
520
+ raise NgioValueError("Destination group is read only, cannot copy.")
521
+ if isinstance(src_group.store, LocalStore | FsspecStore) and isinstance(
522
+ dest_group.store, LocalStore | FsspecStore
523
+ ):
524
+ _fsspec_copy(src_group.store, src_group.path, dest_group.store, dest_group.path)
525
+ return
526
+ if not suppress_warnings:
527
+ warnings.warn(
528
+ "Fsspec copy not possible, falling back to Zarr Python API for the copy. "
529
+ "This will preserve some tabular data non-zarr native (parquet, and csv), "
530
+ "and it will be slower for large datasets.",
531
+ UserWarning,
532
+ stacklevel=2,
533
+ )
534
+ _zarr_python_copy(src_group, dest_group)