mlarray 0.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mlarray/mlarray.py ADDED
@@ -0,0 +1,576 @@
1
+ from copy import deepcopy
2
+ import numpy as np
3
+ import blosc2
4
+ import math
5
+ from typing import Dict, Optional, Union, List, Tuple
6
+ from pathlib import Path
7
+ import os
8
+ from mlarray.meta import Meta, MetaBlosc2
9
+ from mlarray.utils import is_serializable
10
+
11
+ MLARRAY_SUFFIX = "mla"
12
+ MLARRAY_VERSION = "v0"
13
+ MLARRAY_DEFAULT_PATCH_SIZE = 192
14
+
15
+
16
+ class MLArray:
17
+ def __init__(
18
+ self,
19
+ array: Optional[Union[np.ndarray, str, Path]] = None,
20
+ spacing: Optional[Union[List, Tuple, np.ndarray]] = None,
21
+ origin: Optional[Union[List, Tuple, np.ndarray]] = None,
22
+ direction: Optional[Union[List, Tuple, np.ndarray]] = None,
23
+ meta: Optional[Union[Dict, Meta]] = None,
24
+ channel_axis: Optional[int] = None,
25
+ num_threads: int = 1,
26
+ copy: Optional['MLArray'] = None) -> None:
27
+ """Initializes a MLArray instance.
28
+
29
+ The MLArray file format (".mla") is a Blosc2-compressed container
30
+ with standardized metadata support for N-dimensional medical images.
31
+
32
+ Args:
33
+ array (Union[np.ndarray, str, Path]): Input data or file path. Use
34
+ a numpy ndarray for in-memory arrays. Use a string or Path to
35
+ load a ".b2nd" or ".mla" file.
36
+ spacing (Optional[Union[List, Tuple, np.ndarray]]): Spacing per
37
+ axis. Provide a list/tuple/ndarray with length equal to the
38
+ number of dimensions (e.g., [sx, sy, sz]).
39
+ origin (Optional[Union[List, Tuple, np.ndarray]]): Origin per axis.
40
+ Provide a list/tuple/ndarray with length equal to the number of
41
+ dimensions.
42
+ direction (Optional[Union[List, Tuple, np.ndarray]]): Direction
43
+ cosine matrix. Provide a 2D list/tuple/ndarray with shape
44
+ (ndims, ndims).
45
+ meta (Optional[Dict | Meta]): Free-form metadata dictionary or Meta
46
+ instance. Must be JSON-serializable when saving.
47
+ If meta is passed as a Dict, it will internally be converted into a Meta object with the dict being interpreted as meta.image metadata.
48
+ num_threads (int): Number of threads for Blosc2 operations.
49
+ mode (str): Blosc2 open mode
50
+ - 'r': read-only, must exist (Default)
51
+ - 'a': read/write, create if doesn't exist (Currently not supported)
52
+ - 'w': create, overwrite if it exists (Currently not supported)
53
+ copy (Optional[MLArray]): Another MLArray instance to copy
54
+ metadata fields from.
55
+ """
56
+ self.filepath = None
57
+ self.support_metadata = None
58
+ self.mmap = None
59
+ if isinstance(array, (str, Path)):
60
+ self.load(array, num_threads)
61
+ else:
62
+ self._store = array
63
+ self._validate_and_add_meta(meta, spacing, origin, direction, channel_axis)
64
+
65
+ if copy is not None:
66
+ self.meta.copy_from(copy.meta)
67
+
68
+ def open(
69
+ self,
70
+ filepath: Union[str, Path],
71
+ shape: Optional[Union[List, Tuple, np.ndarray]] = None,
72
+ dtype: Optional[np.dtype] = None,
73
+ channel_axis: Optional[int] = None,
74
+ mmap: str = 'r',
75
+ patch_size: Optional[Union[int, List, Tuple]] = 'default', # 'default' means that the default of 192 is used. However, if set to 'default', the patch_size will be skipped if self.patch_size is set from a previously loaded MLArray image. In that case the self.patch_size is used.
76
+ chunk_size: Optional[Union[int, List, Tuple]]= None,
77
+ block_size: Optional[Union[int, List, Tuple]] = None,
78
+ num_threads: int = 1,
79
+ cparams: Optional[Dict] = None,
80
+ dparams: Optional[Dict] = None
81
+ ):
82
+ self.filepath = str(filepath)
83
+ if not str(filepath).endswith(".b2nd") and not str(filepath).endswith(f".{MLARRAY_SUFFIX}"):
84
+ raise RuntimeError(f"MLArray requires '.b2nd' or '.{MLARRAY_SUFFIX}' as extension.")
85
+
86
+ if Path(filepath).is_file() and (shape is not None or dtype is not None):
87
+ raise RuntimeError("Cannot create a new file as a file exists already under that path. Explicitly set shape and dtype only if you intent to create a new file.")
88
+ if (shape is not None and dtype is None) or (shape is None and dtype is not None):
89
+ raise RuntimeError("Both shape and dtype must be set if you intend to create a new file.")
90
+ if shape is not None and mmap == 'r':
91
+ raise RuntimeError("mmap_mode cannot be 'r' (read-only) if you intend to write a new file. Explicitly set shape and dtype only if you intent to create a new file.")
92
+ if mmap not in ('r', 'r+', 'w+', 'c'):
93
+ raise RuntimeError("mmap_mode must be one of the following: 'r', 'r+', 'w+', 'c'")
94
+
95
+ create_array = shape is not None
96
+
97
+ if create_array:
98
+ self.meta._blosc2 = self._comp_and_validate_blosc2_meta(self.meta._blosc2, patch_size, chunk_size, block_size, shape, channel_axis)
99
+ self.meta._has_array = True
100
+
101
+ self.support_metadata = str(filepath).endswith(f".{MLARRAY_SUFFIX}")
102
+
103
+ blosc2.set_nthreads(num_threads)
104
+ if cparams is None:
105
+ cparams = {'codec': blosc2.Codec.ZSTD, 'clevel': 8,}
106
+ if dparams is None:
107
+ dparams = {'nthreads': num_threads}
108
+
109
+ if create_array:
110
+ self._store = blosc2.empty(shape=shape, dtype=dtype, urlpath=str(filepath), chunks=self.meta._blosc2.chunk_size, blocks=self.meta._blosc2.block_size, cparams=cparams, dparams=dparams, mmap_mode=mmap)
111
+ else:
112
+ self._store = blosc2.open(urlpath=str(filepath), dparams=dparams, mmap_mode=mmap)
113
+ self._read_meta()
114
+ if self.meta._has_array == True:
115
+ self.meta._blosc2.chunk_size = list(self._store.chunks)
116
+ self.meta._blosc2.block_size = list(self._store.blocks)
117
+ self.mmap = mmap
118
+ self._write_metadata()
119
+
120
+ def close(self):
121
+ self._write_metadata()
122
+ self._store = None
123
+ self.filepath = None
124
+ self.support_metadata = None
125
+ self.mmap = None
126
+
127
+ def load(
128
+ self,
129
+ filepath: Union[str, Path],
130
+ num_threads: int = 1,
131
+ ):
132
+ """Loads a Blosc2-compressed file. Both MLArray ('.mla') and Blosc2 ('.b2nd') files are supported.
133
+
134
+ WARNING:
135
+ MLArray supports both ".b2nd" and ".mla" files. The MLArray
136
+ format standard and standardized metadata are honored only for
137
+ ".mla". For ".b2nd", metadata is ignored when loading.
138
+
139
+ Args:
140
+ filepath (Union[str, Path]): Path to the Blosc2 file to be loaded.
141
+ The filepath needs to have the extension ".b2nd" or ".mla".
142
+ num_threads (int): Number of threads to use for loading the file.
143
+ mode (str): Blosc2 open mode (e.g., "r", "a").
144
+
145
+ Returns:
146
+ Tuple[blosc2.ndarray, dict]: Loaded data and its metadata.
147
+
148
+ Raises:
149
+ RuntimeError: If the file extension is not ".b2nd" or ".mla".
150
+ """
151
+ self.filepath = str(filepath)
152
+ if not str(filepath).endswith(".b2nd") and not str(filepath).endswith(f".{MLARRAY_SUFFIX}"):
153
+ raise RuntimeError(f"MLArray requires '.b2nd' or '.{MLARRAY_SUFFIX}' as extension.")
154
+ self.support_metadata = str(filepath).endswith(f".{MLARRAY_SUFFIX}")
155
+ blosc2.set_nthreads(num_threads)
156
+ dparams = {'nthreads': num_threads}
157
+ self._store = blosc2.open(urlpath=str(filepath), cdparams=dparams, mode='r')
158
+ self.mmap = None
159
+ self._read_meta()
160
+ if self.meta._has_array == True:
161
+ self.meta._blosc2.chunk_size = list(self._store.chunks)
162
+ self.meta._blosc2.block_size = list(self._store.blocks)
163
+
164
+ def save(
165
+ self,
166
+ filepath: Union[str, Path],
167
+ patch_size: Optional[Union[int, List, Tuple]] = 'default', # 'default' means that the default of 192 is used. However, if set to 'default', the patch_size will be skipped if self.patch_size is set from a previously loaded MLArray image. In that case the self.patch_size is used.
168
+ chunk_size: Optional[Union[int, List, Tuple]]= None,
169
+ block_size: Optional[Union[int, List, Tuple]] = None,
170
+ num_threads: int = 1,
171
+ cparams: Optional[Dict] = None,
172
+ dparams: Optional[Dict] = None
173
+ ):
174
+ """Saves the array to a Blosc2-compressed file. Both MLArray ('.mla') and Blosc2 ('.b2nd') files are supported.
175
+
176
+ WARNING:
177
+ MLArray supports both ".b2nd" and ".mla" files. The MLArray
178
+ format standard and standardized metadata are honored only for
179
+ ".mla". For ".b2nd", metadata is ignored when saving.
180
+
181
+ Args:
182
+ filepath (Union[str, Path]): Path to save the file. Must end with
183
+ ".b2nd" or ".mla".
184
+ patch_size (Optional[Union[int, List, Tuple]]): Patch size hint for
185
+ chunk/block optimization. Provide an int for isotropic sizes or
186
+ a list/tuple with length equal to the number of dimensions.
187
+ Use "default" to use the default patch size of 192.
188
+ chunk_size (Optional[Union[int, List, Tuple]]): Explicit chunk size.
189
+ Provide an int or a tuple/list with length equal to the number
190
+ of dimensions, or None to let Blosc2 decide. Ignored when
191
+ patch_size is not None.
192
+ block_size (Optional[Union[int, List, Tuple]]): Explicit block size.
193
+ Provide an int or a tuple/list with length equal to the number
194
+ of dimensions, or None to let Blosc2 decide. Ignored when
195
+ patch_size is not None.
196
+ num_threads (int): Number of threads to use for saving the file.
197
+
198
+ Raises:
199
+ RuntimeError: If the file extension is not ".b2nd" or ".mla".
200
+ """
201
+ if not str(filepath).endswith(".b2nd") and not str(filepath).endswith(f".{MLARRAY_SUFFIX}"):
202
+ raise RuntimeError(f"MLArray requires '.b2nd' or '.{MLARRAY_SUFFIX}' as extension.")
203
+
204
+ if self._store is not None:
205
+ self.meta._blosc2 = self._comp_and_validate_blosc2_meta(self.meta._blosc2, patch_size, chunk_size, block_size, self._store.shape, self.meta.spatial.channel_axis)
206
+ self.meta._has_array = True
207
+ else:
208
+ self.meta._has_array = False
209
+
210
+ self.support_metadata = str(filepath).endswith(f".{MLARRAY_SUFFIX}")
211
+
212
+ blosc2.set_nthreads(num_threads)
213
+ if cparams is None:
214
+ cparams = {'codec': blosc2.Codec.ZSTD, 'clevel': 8,}
215
+ if dparams is None:
216
+ dparams = {'nthreads': num_threads}
217
+
218
+ if self._store is not None:
219
+ array = np.ascontiguousarray(self._store[...])
220
+ self._store = blosc2.asarray(array, urlpath=str(filepath), chunks=self.meta._blosc2.chunk_size, blocks=self.meta._blosc2.block_size, cparams=cparams, dparams=dparams)
221
+ else:
222
+ array = np.empty((0,))
223
+ self._store = blosc2.asarray(array, urlpath=str(filepath), chunks=self.meta._blosc2.chunk_size, blocks=self.meta._blosc2.block_size, cparams=cparams, dparams=dparams)
224
+ if self.meta._has_array == True:
225
+ self.meta._blosc2.chunk_size = list(self._store.chunks)
226
+ self.meta._blosc2.block_size = list(self._store.blocks)
227
+ self.mmap = None
228
+ self._write_metadata(force=True)
229
+
230
+ def to_numpy(self):
231
+ if self._store is None or self.meta._has_array == False:
232
+ raise TypeError("MLArray has no array data loaded.")
233
+ return self._store[...]
234
+
235
+ def __getitem__(self, key):
236
+ if self._store is None or self.meta._has_array == False:
237
+ raise TypeError("MLArray has no array data loaded.")
238
+ return self._store[key]
239
+
240
+ def __setitem__(self, key, value):
241
+ if self._store is None or self.meta._has_array == False:
242
+ raise TypeError("MLArray has no array data loaded.")
243
+ self._store[key] = value
244
+
245
+ def __iter__(self):
246
+ if self._store is None or self.meta._has_array == False:
247
+ raise TypeError("MLArray has no array data loaded.")
248
+ return iter(self._store)
249
+
250
+ def __len__(self):
251
+ if self._store is None or self.meta._has_array == False:
252
+ return 0
253
+ return len(self._store)
254
+
255
+ def __array__(self, dtype=None):
256
+ if self._store is None or self.meta._has_array == False:
257
+ raise TypeError("MLArray has no array data loaded.")
258
+ arr = np.asarray(self._store)
259
+ if dtype is not None:
260
+ return arr.astype(dtype)
261
+ return arr
262
+
263
+ @property
264
+ def spacing(self):
265
+ """Returns the image spacing.
266
+
267
+ Returns:
268
+ list: The image spacing with length equal to the number of
269
+ dimensions.
270
+ """
271
+ return self.meta.spatial.spacing
272
+
273
+ @property
274
+ def origin(self):
275
+ """Returns the image origin.
276
+
277
+ Returns:
278
+ list: The image origin with length equal to the number of
279
+ dimensions.
280
+ """
281
+ return self.meta.spatial.origin
282
+
283
+ @property
284
+ def direction(self):
285
+ """Returns the image direction.
286
+
287
+ Returns:
288
+ list: The image direction with shape (ndims, ndims).
289
+ """
290
+ return self.meta.spatial.direction
291
+
292
+ @property
293
+ def affine(self) -> np.ndarray:
294
+ """Computes the affine transformation matrix for the image.
295
+
296
+ Returns:
297
+ list: Affine matrix with shape (ndims + 1, ndims + 1).
298
+ """
299
+ if self._store is None or self.meta._has_array == False:
300
+ return None
301
+ spacing = np.array(self.spacing) if self.spacing is not None else np.ones(self._spatial_ndim)
302
+ origin = np.array(self.origin) if self.origin is not None else np.zeros(self._spatial_ndim)
303
+ direction = np.array(self.direction) if self.direction is not None else np.eye(self._spatial_ndim)
304
+ affine = np.eye(self._spatial_ndim + 1)
305
+ affine[:self._spatial_ndim, :self._spatial_ndim] = direction @ np.diag(spacing)
306
+ affine[:self._spatial_ndim, self._spatial_ndim] = origin
307
+ return affine.tolist()
308
+
309
+ @property
310
+ def translation(self):
311
+ """Extracts the translation vector from the affine matrix.
312
+
313
+ Returns:
314
+ list: Translation vector with length equal to the number of
315
+ dimensions.
316
+ """
317
+ if self._store is None or self.meta._has_array == False:
318
+ return None
319
+ return np.array(self.affine)[:-1, -1].tolist()
320
+
321
+ @property
322
+ def scale(self):
323
+ """Extracts the scaling factors from the affine matrix.
324
+
325
+ Returns:
326
+ list: Scaling factors per axis with length equal to the number of
327
+ dimensions.
328
+ """
329
+ if self._store is None or self.meta._has_array == False:
330
+ return None
331
+ scales = np.linalg.norm(np.array(self.affine)[:-1, :-1], axis=0)
332
+ return scales.tolist()
333
+
334
+ @property
335
+ def rotation(self):
336
+ """Extracts the rotation matrix from the affine matrix.
337
+
338
+ Returns:
339
+ list: Rotation matrix with shape (ndims, ndims).
340
+ """
341
+ if self._store is None or self.meta._has_array == False:
342
+ return None
343
+ rotation_matrix = np.array(self.affine)[:-1, :-1] / np.array(self.scale)
344
+ return rotation_matrix.tolist()
345
+
346
+ @property
347
+ def shear(self):
348
+ """Computes the shear matrix from the affine matrix.
349
+
350
+ Returns:
351
+ list: Shear matrix with shape (ndims, ndims).
352
+ """
353
+ if self._store is None or self.meta._has_array == False:
354
+ return None
355
+ scales = np.array(self.scale)
356
+ rotation_matrix = np.array(self.rotation)
357
+ shearing_matrix = np.dot(rotation_matrix.T, np.array(self.affine)[:-1, :-1]) / scales[:, None]
358
+ return shearing_matrix.tolist()
359
+
360
+ @property
361
+ def shape(self):
362
+ """Returns the shape of the array.
363
+
364
+ Returns:
365
+ tuple: Shape of the underlying array.
366
+ """
367
+ if self._store is None or self.meta._has_array == False:
368
+ return None
369
+ return self._store.shape
370
+
371
+ @property
372
+ def dtype(self):
373
+ """Returns the dtype of the array."""
374
+ if self._store is None or self.meta._has_array == False:
375
+ return None
376
+ return self._store.dtype
377
+
378
+ @property
379
+ def ndim(self) -> int:
380
+ """Returns the number of dimensions of the image."""
381
+ if self._store is None or self.meta._has_array == False:
382
+ return None
383
+ return len(self._store.shape)
384
+
385
+ @property
386
+ def _spatial_ndim(self) -> int:
387
+ """Returns the number of dimensions of the image."""
388
+ if self._store is None or self.meta._has_array == False:
389
+ return None
390
+ ndim = len(self._store.shape)
391
+ if self.meta.spatial.channel_axis is not None:
392
+ ndim -= 1
393
+ return ndim
394
+
395
+ def comp_blosc2_params(
396
+ self,
397
+ image_size: Union[Tuple[int, int], Tuple[int, int, int], Tuple[int, int, int, int]],
398
+ patch_size: Union[Tuple[int, int], Tuple[int, int, int]],
399
+ channel_axis: Optional[int] = None,
400
+ bytes_per_pixel: int = 4, # 4 byte are float32
401
+ l1_cache_size_per_core_in_bytes: int = 32768, # 1 Kibibyte (KiB) = 2^10 Byte; 32 KiB = 32768 Byte
402
+ l3_cache_size_per_core_in_bytes: int = 1441792, # 1 Mibibyte (MiB) = 2^20 Byte = 1.048.576 Byte; 1.375MiB = 1441792 Byte
403
+ safety_factor: float = 0.8 # we dont will the caches to the brim. 0.8 means we target 80% of the caches
404
+ ):
405
+ """
406
+ Computes a recommended block and chunk size for saving arrays with Blosc v2.
407
+
408
+ Blosc2 NDIM documentation:
409
+ "Having a second partition allows for greater flexibility in fitting different partitions to different CPU cache levels.
410
+ Typically, the first partition (also known as chunks) should be sized to fit within the L3 cache,
411
+ while the second partition (also known as blocks) should be sized to fit within the L2 or L1 caches,
412
+ depending on whether the priority is compression ratio or speed."
413
+ (Source: https://www.blosc.org/posts/blosc2-ndim-intro/)
414
+
415
+ Our approach is not fully optimized for this yet.
416
+ Currently, we aim to fit the uncompressed block within the L1 cache, accepting that it might occasionally spill over into L2, which we consider acceptable.
417
+
418
+ Note: This configuration is specifically optimized for nnU-Net data loading, where each read operation is performed by a single core, so multi-threading is not an option.
419
+
420
+ The default cache values are based on an older Intel 4110 CPU with 32KB L1, 128KB L2, and 1408KB L3 cache per core.
421
+ We haven't further optimized for modern CPUs with larger caches, as our data must still be compatible with the older systems.
422
+
423
+ Args:
424
+ image_size (Tuple[int, int, int, int]): Image shape. Use a 2D, 3D,
425
+ or 4D size; 2D/3D inputs are internally expanded.
426
+ patch_size (Union[Tuple[int, int], Tuple[int, int, int]]): Patch
427
+ size for spatial dimensions. Use a 2-tuple (x, y) or 3-tuple
428
+ (x, y, z).
429
+ bytes_per_pixel (int): Number of bytes per element. Defaults to 4
430
+ for float32.
431
+ l1_cache_size_per_core_in_bytes (int): L1 cache per core in bytes.
432
+ l3_cache_size_per_core_in_bytes (int): L3 cache per core in bytes.
433
+ safety_factor (float): Safety factor to avoid filling caches.
434
+
435
+ Returns:
436
+ Tuple[Tuple[int, ...], Tuple[int, ...]]: Recommended chunk size and block size.
437
+ """
438
+ def _move_index_list(a, src, dst):
439
+ a = list(a)
440
+ x = a.pop(src)
441
+ a.insert(dst, x)
442
+ return a
443
+
444
+ num_squeezes = 0
445
+ if len(image_size) == 2:
446
+ image_size = (1, 1, *image_size)
447
+ num_squeezes = 2
448
+ elif len(image_size) == 3:
449
+ image_size = (1, *image_size)
450
+ num_squeezes = 1
451
+
452
+ if channel_axis is not None:
453
+ image_size = _move_index_list(image_size, channel_axis+num_squeezes, 0)
454
+
455
+ if len(image_size) != 4:
456
+ raise RuntimeError("Image size must be 4D.")
457
+
458
+ if not (len(patch_size) == 2 or len(patch_size) == 3):
459
+ raise RuntimeError("Patch size must be 2D or 3D.")
460
+
461
+ num_channels = image_size[0]
462
+ if len(patch_size) == 2:
463
+ patch_size = [1, *patch_size]
464
+ patch_size = np.array(patch_size)
465
+ block_size = np.array((num_channels, *[2 ** (max(0, math.ceil(math.log2(i)))) for i in patch_size]))
466
+
467
+ # shrink the block size until it fits in L1
468
+ estimated_nbytes_block = np.prod(block_size) * bytes_per_pixel
469
+ while estimated_nbytes_block > (l1_cache_size_per_core_in_bytes * safety_factor):
470
+ # pick largest deviation from patch_size that is not 1
471
+ axis_order = np.argsort(block_size[1:] / patch_size)[::-1]
472
+ idx = 0
473
+ picked_axis = axis_order[idx]
474
+ while block_size[picked_axis + 1] == 1 or block_size[picked_axis + 1] == 1:
475
+ idx += 1
476
+ picked_axis = axis_order[idx]
477
+ # now reduce that axis to the next lowest power of 2
478
+ block_size[picked_axis + 1] = 2 ** (max(0, math.floor(math.log2(block_size[picked_axis + 1] - 1))))
479
+ block_size[picked_axis + 1] = min(block_size[picked_axis + 1], image_size[picked_axis + 1])
480
+ estimated_nbytes_block = np.prod(block_size) * bytes_per_pixel
481
+
482
+ block_size = np.array([min(i, j) for i, j in zip(image_size, block_size)])
483
+
484
+ # note: there is no use extending the chunk size to 3d when we have a 2d patch size! This would unnecessarily
485
+ # load data into L3
486
+ # now tile the blocks into chunks until we hit image_size or the l3 cache per core limit
487
+ chunk_size = deepcopy(block_size)
488
+ estimated_nbytes_chunk = np.prod(chunk_size) * bytes_per_pixel
489
+ while estimated_nbytes_chunk < (l3_cache_size_per_core_in_bytes * safety_factor):
490
+ if patch_size[0] == 1 and all([i == j for i, j in zip(chunk_size[2:], image_size[2:])]):
491
+ break
492
+ if all([i == j for i, j in zip(chunk_size, image_size)]):
493
+ break
494
+ # find axis that deviates from block_size the most
495
+ axis_order = np.argsort(chunk_size[1:] / block_size[1:])
496
+ idx = 0
497
+ picked_axis = axis_order[idx]
498
+ while chunk_size[picked_axis + 1] == image_size[picked_axis + 1] or patch_size[picked_axis] == 1:
499
+ idx += 1
500
+ picked_axis = axis_order[idx]
501
+ chunk_size[picked_axis + 1] += block_size[picked_axis + 1]
502
+ chunk_size[picked_axis + 1] = min(chunk_size[picked_axis + 1], image_size[picked_axis + 1])
503
+ estimated_nbytes_chunk = np.prod(chunk_size) * bytes_per_pixel
504
+ if np.mean([i / j for i, j in zip(chunk_size[1:], patch_size)]) > 1.5:
505
+ # chunk size should not exceed patch size * 1.5 on average
506
+ chunk_size[picked_axis + 1] -= block_size[picked_axis + 1]
507
+ break
508
+ # better safe than sorry
509
+ chunk_size = [min(i, j) for i, j in zip(image_size, chunk_size)]
510
+
511
+ if channel_axis is not None:
512
+ block_size = _move_index_list(block_size, 0, channel_axis+num_squeezes)
513
+ chunk_size = _move_index_list(chunk_size, 0, channel_axis+num_squeezes)
514
+
515
+ block_size = block_size[num_squeezes:]
516
+ chunk_size = chunk_size[num_squeezes:]
517
+
518
+ return [int(value) for value in chunk_size], [int(value) for value in block_size]
519
+
520
+ def _comp_and_validate_blosc2_meta(self, meta_blosc2, patch_size, chunk_size, block_size, shape, channel_axis):
521
+ if patch_size is not None and patch_size != "default" and not ((len(shape) == 2 and channel_axis is None) or (len(shape) == 3 and channel_axis is None) or (len(shape) == 4 and channel_axis is not None) or (len(shape) == 4 and channel_axis is not None)):
522
+ raise NotImplementedError("Chunk and block size optimization based on patch size is only implemented for 2D and 3D images. Please set the chunk and block size manually or set to None for blosc2 to determine a chunk and block size.")
523
+ if patch_size is not None and patch_size != "default" and (chunk_size is not None or block_size is not None):
524
+ raise RuntimeError("patch_size and chunk_size / block_size cannot both be explicitly set.")
525
+
526
+ ndims = len(shape) if channel_axis is None else len(shape) - 1
527
+ if patch_size == "default":
528
+ if meta_blosc2 is not None and meta_blosc2.patch_size is not None: # Use previously loaded patch size, when patch size is not explicitly set and a patch size from a previously loaded image exists
529
+ patch_size = meta_blosc2.patch_size
530
+ else: # Use default patch size, when patch size is not explicitly set and no patch size from a previously loaded image exists
531
+ patch_size = [MLARRAY_DEFAULT_PATCH_SIZE] * ndims
532
+
533
+ patch_size = [patch_size] * len(shape) if isinstance(patch_size, int) else patch_size
534
+
535
+ if patch_size is not None:
536
+ chunk_size, block_size = self.comp_blosc2_params(shape, patch_size, channel_axis)
537
+
538
+ meta_blosc2 = MetaBlosc2(chunk_size, block_size, patch_size)
539
+ meta_blosc2._validate_and_cast(len(shape), channel_axis)
540
+ return meta_blosc2
541
+
542
+ def _read_meta(self):
543
+ meta = Meta()
544
+ if self.support_metadata and isinstance(self._store, blosc2.ndarray.NDArray):
545
+ meta = self._store.vlmeta["mlarray"]
546
+ meta = Meta.from_dict(meta)
547
+ self._validate_and_add_meta(meta)
548
+
549
+ def _write_metadata(self, force=False):
550
+ if self.support_metadata and isinstance(self._store, blosc2.ndarray.NDArray) and (self.mmap in ('r+', 'w+') or force):
551
+ metadata = self.meta.to_dict()
552
+ if not is_serializable(metadata):
553
+ raise RuntimeError("Metadata is not serializable.")
554
+ self._store.vlmeta["mlarray"] = metadata
555
+
556
+ def _validate_and_add_meta(self, meta, spacing=None, origin=None, direction=None, channel_axis=None):
557
+ if meta is not None:
558
+ if not isinstance(meta, (dict, Meta)):
559
+ raise ValueError("Meta must be None, a dict or a Meta object.")
560
+ if isinstance(meta, dict):
561
+ meta = Meta(image=meta)
562
+ else:
563
+ meta = Meta()
564
+ self.meta = meta
565
+ self.meta._mlarray_version = MLARRAY_VERSION
566
+ if spacing is not None:
567
+ self.meta.spatial.spacing = spacing
568
+ if origin is not None:
569
+ self.meta.spatial.origin = origin
570
+ if direction is not None:
571
+ self.meta.spatial.direction = direction
572
+ if channel_axis is not None:
573
+ self.meta.spatial.channel_axis = channel_axis
574
+ self.meta.spatial.shape = self.shape
575
+ self.meta.spatial._validate_and_cast(self._spatial_ndim)
576
+
mlarray/utils.py ADDED
@@ -0,0 +1,17 @@
1
+ import json
2
+
3
+
4
+ def is_serializable(d: dict) -> bool:
5
+ """Checks whether a dictionary is JSON-serializable.
6
+
7
+ Args:
8
+ d (dict): Input dictionary to test.
9
+
10
+ Returns:
11
+ bool: True when serializable, otherwise False.
12
+ """
13
+ try:
14
+ json.dumps(d)
15
+ return True
16
+ except (TypeError, OverflowError):
17
+ return False