mlarray 0.0.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mlarray/mlarray.py ADDED
@@ -0,0 +1,739 @@
1
+ from copy import deepcopy
2
+ import numpy as np
3
+ import blosc2
4
+ import math
5
+ from typing import Dict, Optional, Union, List, Tuple
6
+ from pathlib import Path
7
+ import os
8
+ from mlarray.meta import Meta, MetaBlosc2
9
+ from mlarray.utils import is_serializable
10
+
11
+ MLARRAY_SUFFIX = "mla"
12
+ MLARRAY_VERSION = "v0"
13
+ MLARRAY_DEFAULT_PATCH_SIZE = 192
14
+
15
+
16
+ class MLArray:
17
+ def __init__(
18
+ self,
19
+ array: Optional[Union[np.ndarray, str, Path]] = None,
20
+ spacing: Optional[Union[List, Tuple, np.ndarray]] = None,
21
+ origin: Optional[Union[List, Tuple, np.ndarray]] = None,
22
+ direction: Optional[Union[List, Tuple, np.ndarray]] = None,
23
+ meta: Optional[Union[Dict, Meta]] = None,
24
+ channel_axis: Optional[int] = None,
25
+ num_threads: int = 1,
26
+ copy: Optional['MLArray'] = None) -> None:
27
+ """Initializes a MLArray instance.
28
+
29
+ The MLArray file format (".mla") is a Blosc2-compressed container
30
+ with standardized metadata support for N-dimensional medical images.
31
+
32
+ Args:
33
+ array (Optional[Union[np.ndarray, str, Path]]): Input data or file
34
+ path. Use a numpy ndarray for in-memory arrays, or a string/Path
35
+ to load a ".b2nd" or ".mla" file. If None, an empty MLArray
36
+ instance is created.
37
+ spacing (Optional[Union[List, Tuple, np.ndarray]]): Spacing per
38
+ spatial axis. Provide a list/tuple/ndarray with length equal to
39
+ the number of spatial dimensions (e.g., [sx, sy, sz]).
40
+ origin (Optional[Union[List, Tuple, np.ndarray]]): Origin per axis.
41
+ Provide a list/tuple/ndarray with length equal to the number of
42
+ spatial dimensions.
43
+ direction (Optional[Union[List, Tuple, np.ndarray]]): Direction
44
+ cosine matrix. Provide a 2D list/tuple/ndarray with shape
45
+ (ndims, ndims) for spatial dimensions.
46
+ meta (Optional[Dict | Meta]): Free-form metadata dictionary or Meta
47
+ instance. Must be JSON-serializable when saving.
48
+ If meta is passed as a Dict, it will internally be converted into a Meta object with the dict being interpreted as meta.image metadata.
49
+ channel_axis (Optional[int]): Axis index that represents channels
50
+ in the array (e.g., 0 for CHW or -1 for HWC). If None, the array
51
+ is treated as purely spatial.
52
+ num_threads (int): Number of threads for Blosc2 operations.
53
+ copy (Optional[MLArray]): Another MLArray instance to copy metadata
54
+ fields from. If provided, its metadata overrides any metadata
55
+ set via arguments.
56
+ """
57
+ self.filepath = None
58
+ self.support_metadata = None
59
+ self.mmap = None
60
+ self.meta = None
61
+ if isinstance(array, (str, Path)) and (spacing is not None or origin is not None or direction is not None or meta is not None or channel_axis is not None or copy is not None):
62
+ raise ("Spacing, origin, direction, meta, channel_axis or copy cannot be set when array is a filepath.")
63
+ if isinstance(array, (str, Path)):
64
+ self.load(array, num_threads)
65
+ else:
66
+ self._store = array
67
+ self._validate_and_add_meta(meta, spacing, origin, direction, channel_axis)
68
+
69
+ if copy is not None:
70
+ self.meta.copy_from(copy.meta)
71
+
72
+ def open(
73
+ self,
74
+ filepath: Union[str, Path],
75
+ shape: Optional[Union[List, Tuple, np.ndarray]] = None,
76
+ dtype: Optional[np.dtype] = None,
77
+ channel_axis: Optional[int] = None,
78
+ mmap: str = 'r',
79
+ patch_size: Optional[Union[int, List, Tuple]] = 'default', # 'default' means that the default of 192 is used. However, if set to 'default', the patch_size will be skipped if self.patch_size is set from a previously loaded MLArray image. In that case the self.patch_size is used.
80
+ chunk_size: Optional[Union[int, List, Tuple]]= None,
81
+ block_size: Optional[Union[int, List, Tuple]] = None,
82
+ num_threads: int = 1,
83
+ cparams: Optional[Dict] = None,
84
+ dparams: Optional[Dict] = None
85
+ ):
86
+ """Open an existing Blosc2 file or create a new one with memory mapping.
87
+
88
+ This method supports both MLArray (".mla") and plain Blosc2 (".b2nd")
89
+ files. When creating a new file, both ``shape`` and ``dtype`` must be
90
+ provided.
91
+
92
+ WARNING:
93
+ MLArray supports both ".b2nd" and ".mla" files. The MLArray
94
+ format standard and standardized metadata are honored only for
95
+ ".mla". For ".b2nd", metadata is ignored when loading.
96
+
97
+ Args:
98
+ filepath (Union[str, Path]): Target file path. Must end with
99
+ ".b2nd" or ".mla".
100
+ shape (Optional[Union[List, Tuple, np.ndarray]]): Shape of the array
101
+ to create. If provided, a new file is created. Length must match
102
+ the full array dimensionality (including channels if present).
103
+ dtype (Optional[np.dtype]): Numpy dtype for a newly created array.
104
+ channel_axis (Optional[int]): Axis index for channels in the array.
105
+ Used for patch/chunk/block calculations.
106
+ mmap (str): Blosc2 mmap mode. One of "r", "r+", "w+", "c".
107
+ patch_size (Optional[Union[int, List, Tuple]]): Patch size hint for
108
+ chunk/block optimization. Provide an int for isotropic sizes or
109
+ a list/tuple with length equal to the number of spatial
110
+ dimensions. Use "default" to use the default patch size of 192.
111
+ chunk_size (Optional[Union[int, List, Tuple]]): Explicit chunk size.
112
+ Provide an int or tuple/list with length equal to the array
113
+ dimensions. Ignored when ``patch_size`` is provided.
114
+ block_size (Optional[Union[int, List, Tuple]]): Explicit block size.
115
+ Provide an int or tuple/list with length equal to the array
116
+ dimensions. Ignored when ``patch_size`` is provided.
117
+ num_threads (int): Number of threads for Blosc2 operations.
118
+ cparams (Optional[Dict]): Blosc2 compression parameters.
119
+ dparams (Optional[Dict]): Blosc2 decompression parameters.
120
+
121
+ Returns:
122
+ MLArray: The current instance (for chaining).
123
+
124
+ Raises:
125
+ RuntimeError: If the file extension is invalid, if shape/dtype are
126
+ inconsistent, or if mmap mode is invalid for creation.
127
+ """
128
+ self.filepath = str(filepath)
129
+ if not str(filepath).endswith(".b2nd") and not str(filepath).endswith(f".{MLARRAY_SUFFIX}"):
130
+ raise RuntimeError(f"MLArray requires '.b2nd' or '.{MLARRAY_SUFFIX}' as extension.")
131
+
132
+ if Path(filepath).is_file() and (shape is not None or dtype is not None):
133
+ raise RuntimeError("Cannot create a new file as a file exists already under that path. Explicitly set shape and dtype only if you intent to create a new file.")
134
+ if (shape is not None and dtype is None) or (shape is None and dtype is not None):
135
+ raise RuntimeError("Both shape and dtype must be set if you intend to create a new file.")
136
+ if shape is not None and mmap != 'w+':
137
+ raise RuntimeError("mmap must be 'w+' (create/overwrite) if you intend to write a new file. Explicitly set shape and dtype only if you intent to create a new file.")
138
+ if (shape is None or dtype is None) and mmap == 'w+':
139
+ raise RuntimeError("Shape and dtype must be set explicitly when mmap is 'w+'. Explicitly set shape and dtype only if you intent to create a new file.")
140
+ if mmap not in ('r', 'r+', 'w+', 'c'):
141
+ raise RuntimeError("mmap must be one of the following: 'r', 'r+', 'w+', 'c'")
142
+
143
+ create_array = mmap == 'w+'
144
+
145
+ if create_array:
146
+ self.meta._blosc2 = self._comp_and_validate_blosc2_meta(self.meta._blosc2, patch_size, chunk_size, block_size, shape, channel_axis)
147
+ self.meta._has_array = True
148
+
149
+ self.support_metadata = str(filepath).endswith(f".{MLARRAY_SUFFIX}")
150
+
151
+ blosc2.set_nthreads(num_threads)
152
+ if cparams is None:
153
+ cparams = {'codec': blosc2.Codec.ZSTD, 'clevel': 8,}
154
+ if dparams is None:
155
+ dparams = {'nthreads': num_threads}
156
+
157
+ if create_array:
158
+ self._store = blosc2.empty(shape=shape, dtype=dtype, urlpath=str(filepath), chunks=self.meta._blosc2.chunk_size, blocks=self.meta._blosc2.block_size, cparams=cparams, dparams=dparams, mmap_mode=mmap)
159
+ else:
160
+ self._store = blosc2.open(urlpath=str(filepath), dparams=dparams, mmap_mode=mmap)
161
+ self._read_meta()
162
+ if self.meta._has_array == True:
163
+ self.meta._blosc2.chunk_size = list(self._store.chunks)
164
+ self.meta._blosc2.block_size = list(self._store.blocks)
165
+ self.mmap = mmap
166
+ self._write_metadata()
167
+ return self
168
+
169
+ def close(self):
170
+ """Flush metadata and close the underlying store.
171
+
172
+ After closing, the MLArray instance no longer has an attached array.
173
+ """
174
+ self._write_metadata()
175
+ self._store = None
176
+ self.filepath = None
177
+ self.support_metadata = None
178
+ self.mmap = None
179
+ self.meta = None
180
+
181
+ def load(
182
+ self,
183
+ filepath: Union[str, Path],
184
+ num_threads: int = 1,
185
+ ):
186
+ """Loads a Blosc2-compressed file. Both MLArray ('.mla') and Blosc2 ('.b2nd') files are supported.
187
+
188
+ WARNING:
189
+ MLArray supports both ".b2nd" and ".mla" files. The MLArray
190
+ format standard and standardized metadata are honored only for
191
+ ".mla". For ".b2nd", metadata is ignored when loading.
192
+
193
+ Args:
194
+ filepath (Union[str, Path]): Path to the Blosc2 file to be loaded.
195
+ The filepath needs to have the extension ".b2nd" or ".mla".
196
+ num_threads (int): Number of threads to use for loading the file.
197
+
198
+ Raises:
199
+ RuntimeError: If the file extension is not ".b2nd" or ".mla".
200
+ """
201
+ self.filepath = str(filepath)
202
+ if not str(filepath).endswith(".b2nd") and not str(filepath).endswith(f".{MLARRAY_SUFFIX}"):
203
+ raise RuntimeError(f"MLArray requires '.b2nd' or '.{MLARRAY_SUFFIX}' as extension.")
204
+ self.support_metadata = str(filepath).endswith(f".{MLARRAY_SUFFIX}")
205
+ blosc2.set_nthreads(num_threads)
206
+ dparams = {'nthreads': num_threads}
207
+ self._store = blosc2.open(urlpath=str(filepath), cdparams=dparams, mode='r')
208
+ self.mmap = None
209
+ self._read_meta()
210
+ if self.meta._has_array == True:
211
+ self.meta._blosc2.chunk_size = list(self._store.chunks)
212
+ self.meta._blosc2.block_size = list(self._store.blocks)
213
+
214
+ def save(
215
+ self,
216
+ filepath: Union[str, Path],
217
+ patch_size: Optional[Union[int, List, Tuple]] = 'default', # 'default' means that the default of 192 is used. However, if set to 'default', the patch_size will be skipped if self.patch_size is set from a previously loaded MLArray image. In that case the self.patch_size is used.
218
+ chunk_size: Optional[Union[int, List, Tuple]]= None,
219
+ block_size: Optional[Union[int, List, Tuple]] = None,
220
+ num_threads: int = 1,
221
+ cparams: Optional[Dict] = None,
222
+ dparams: Optional[Dict] = None
223
+ ):
224
+ """Saves the array to a Blosc2-compressed file. Both MLArray ('.mla') and Blosc2 ('.b2nd') files are supported.
225
+
226
+ WARNING:
227
+ MLArray supports both ".b2nd" and ".mla" files. The MLArray
228
+ format standard and standardized metadata are honored only for
229
+ ".mla". For ".b2nd", metadata is ignored when saving.
230
+
231
+ Args:
232
+ filepath (Union[str, Path]): Path to save the file. Must end with
233
+ ".b2nd" or ".mla".
234
+ patch_size (Optional[Union[int, List, Tuple]]): Patch size hint for
235
+ chunk/block optimization. Provide an int for isotropic sizes or
236
+ a list/tuple with length equal to the number of dimensions.
237
+ Use "default" to use the default patch size of 192.
238
+ chunk_size (Optional[Union[int, List, Tuple]]): Explicit chunk size.
239
+ Provide an int or a tuple/list with length equal to the number
240
+ of dimensions, or None to let Blosc2 decide. Ignored when
241
+ patch_size is not None.
242
+ block_size (Optional[Union[int, List, Tuple]]): Explicit block size.
243
+ Provide an int or a tuple/list with length equal to the number
244
+ of dimensions, or None to let Blosc2 decide. Ignored when
245
+ patch_size is not None.
246
+ num_threads (int): Number of threads to use for saving the file.
247
+
248
+ Raises:
249
+ RuntimeError: If the file extension is not ".b2nd" or ".mla".
250
+ """
251
+ if not str(filepath).endswith(".b2nd") and not str(filepath).endswith(f".{MLARRAY_SUFFIX}"):
252
+ raise RuntimeError(f"MLArray requires '.b2nd' or '.{MLARRAY_SUFFIX}' as extension.")
253
+
254
+ if self._store is not None:
255
+ self.meta._blosc2 = self._comp_and_validate_blosc2_meta(self.meta._blosc2, patch_size, chunk_size, block_size, self._store.shape, self.meta.spatial.channel_axis)
256
+ self.meta._has_array = True
257
+ else:
258
+ self.meta._has_array = False
259
+
260
+ self.support_metadata = str(filepath).endswith(f".{MLARRAY_SUFFIX}")
261
+
262
+ blosc2.set_nthreads(num_threads)
263
+ if cparams is None:
264
+ cparams = {'codec': blosc2.Codec.ZSTD, 'clevel': 8,}
265
+ if dparams is None:
266
+ dparams = {'nthreads': num_threads}
267
+
268
+ if self._store is not None:
269
+ array = np.ascontiguousarray(self._store[...])
270
+ self._store = blosc2.asarray(array, urlpath=str(filepath), chunks=self.meta._blosc2.chunk_size, blocks=self.meta._blosc2.block_size, cparams=cparams, dparams=dparams)
271
+ else:
272
+ array = np.empty((0,))
273
+ self._store = blosc2.asarray(array, urlpath=str(filepath), chunks=self.meta._blosc2.chunk_size, blocks=self.meta._blosc2.block_size, cparams=cparams, dparams=dparams)
274
+ if self.meta._has_array == True:
275
+ self.meta._blosc2.chunk_size = list(self._store.chunks)
276
+ self.meta._blosc2.block_size = list(self._store.blocks)
277
+ self.mmap = None
278
+ self._write_metadata(force=True)
279
+
280
+ def to_numpy(self):
281
+ """Return the underlying data as a NumPy array.
282
+
283
+ Returns:
284
+ np.ndarray: A NumPy view or copy of the stored array data.
285
+
286
+ Raises:
287
+ TypeError: If no array data is loaded.
288
+ """
289
+ if self._store is None or self.meta._has_array == False:
290
+ raise TypeError("MLArray has no array data loaded.")
291
+ return self._store[...]
292
+
293
+ def __getitem__(self, key):
294
+ """Return a slice or element from the underlying array.
295
+
296
+ Args:
297
+ key (Any): Any valid NumPy/Blosc2 indexing key (slices, ints, tuples,
298
+ boolean arrays).
299
+
300
+ Returns:
301
+ Any: The indexed value or subarray.
302
+
303
+ Raises:
304
+ TypeError: If no array data is loaded.
305
+ """
306
+ if self._store is None or self.meta._has_array == False:
307
+ raise TypeError("MLArray has no array data loaded.")
308
+ return self._store[key]
309
+
310
+ def __setitem__(self, key, value):
311
+ """Assign to a slice or element in the underlying array.
312
+
313
+ Args:
314
+ key (Any): Any valid NumPy/Blosc2 indexing key.
315
+ value (Any): Value(s) to assign. Must be broadcastable to the
316
+ selected region.
317
+
318
+ Raises:
319
+ TypeError: If no array data is loaded.
320
+ """
321
+ if self._store is None or self.meta._has_array == False:
322
+ raise TypeError("MLArray has no array data loaded.")
323
+ self._store[key] = value
324
+
325
+ def __iter__(self):
326
+ """Iterate over the first axis of the underlying array.
327
+
328
+ Returns:
329
+ Iterator: Iterator over the array's first dimension.
330
+
331
+ Raises:
332
+ TypeError: If no array data is loaded.
333
+ """
334
+ if self._store is None or self.meta._has_array == False:
335
+ raise TypeError("MLArray has no array data loaded.")
336
+ return iter(self._store)
337
+
338
+ def __len__(self):
339
+ """Return the length of the first array dimension.
340
+
341
+ Returns:
342
+ int: Size of axis 0, or 0 if no array is loaded.
343
+ """
344
+ if self._store is None or self.meta._has_array == False:
345
+ return 0
346
+ return len(self._store)
347
+
348
+ def __array__(self, dtype=None):
349
+ """NumPy array interface for implicit conversion.
350
+
351
+ Args:
352
+ dtype (Optional[np.dtype]): Optional dtype to cast to.
353
+
354
+ Returns:
355
+ np.ndarray: The underlying data as a NumPy array.
356
+
357
+ Raises:
358
+ TypeError: If no array data is loaded.
359
+ """
360
+ if self._store is None or self.meta._has_array == False:
361
+ raise TypeError("MLArray has no array data loaded.")
362
+ arr = np.asarray(self._store)
363
+ if dtype is not None:
364
+ return arr.astype(dtype)
365
+ return arr
366
+
367
+ @property
368
+ def spacing(self):
369
+ """Returns the image spacing.
370
+
371
+ Returns:
372
+ list: Spacing per spatial axis with length equal to the number of
373
+ spatial dimensions.
374
+ """
375
+ return self.meta.spatial.spacing
376
+
377
+ @property
378
+ def origin(self):
379
+ """Returns the image origin.
380
+
381
+ Returns:
382
+ list: Origin per spatial axis with length equal to the number of
383
+ spatial dimensions.
384
+ """
385
+ return self.meta.spatial.origin
386
+
387
+ @property
388
+ def direction(self):
389
+ """Returns the image direction.
390
+
391
+ Returns:
392
+ list: Direction cosine matrix with shape (ndims, ndims).
393
+ """
394
+ return self.meta.spatial.direction
395
+
396
+ @property
397
+ def affine(self) -> np.ndarray:
398
+ """Computes the affine transformation matrix for the image.
399
+
400
+ Returns:
401
+ list: Affine matrix with shape (ndims + 1, ndims + 1), or None if
402
+ no array is loaded.
403
+ """
404
+ if self._store is None or self.meta._has_array == False:
405
+ return None
406
+ spacing = np.array(self.spacing) if self.spacing is not None else np.ones(self._spatial_ndim)
407
+ origin = np.array(self.origin) if self.origin is not None else np.zeros(self._spatial_ndim)
408
+ direction = np.array(self.direction) if self.direction is not None else np.eye(self._spatial_ndim)
409
+ affine = np.eye(self._spatial_ndim + 1)
410
+ affine[:self._spatial_ndim, :self._spatial_ndim] = direction @ np.diag(spacing)
411
+ affine[:self._spatial_ndim, self._spatial_ndim] = origin
412
+ return affine.tolist()
413
+
414
+ @property
415
+ def translation(self):
416
+ """Extracts the translation vector from the affine matrix.
417
+
418
+ Returns:
419
+ list: Translation vector with length equal to the number of spatial
420
+ dimensions, or None if no array is loaded.
421
+ """
422
+ if self._store is None or self.meta._has_array == False:
423
+ return None
424
+ return np.array(self.affine)[:-1, -1].tolist()
425
+
426
+ @property
427
+ def scale(self):
428
+ """Extracts the scaling factors from the affine matrix.
429
+
430
+ Returns:
431
+ list: Scaling factors per axis with length equal to the number of
432
+ spatial dimensions, or None if no array is loaded.
433
+ """
434
+ if self._store is None or self.meta._has_array == False:
435
+ return None
436
+ scales = np.linalg.norm(np.array(self.affine)[:-1, :-1], axis=0)
437
+ return scales.tolist()
438
+
439
+ @property
440
+ def rotation(self):
441
+ """Extracts the rotation matrix from the affine matrix.
442
+
443
+ Returns:
444
+ list: Rotation matrix with shape (ndims, ndims), or None if no array
445
+ is loaded.
446
+ """
447
+ if self._store is None or self.meta._has_array == False:
448
+ return None
449
+ rotation_matrix = np.array(self.affine)[:-1, :-1] / np.array(self.scale)
450
+ return rotation_matrix.tolist()
451
+
452
+ @property
453
+ def shear(self):
454
+ """Computes the shear matrix from the affine matrix.
455
+
456
+ Returns:
457
+ list: Shear matrix with shape (ndims, ndims), or None if no array is
458
+ loaded.
459
+ """
460
+ if self._store is None or self.meta._has_array == False:
461
+ return None
462
+ scales = np.array(self.scale)
463
+ rotation_matrix = np.array(self.rotation)
464
+ shearing_matrix = np.dot(rotation_matrix.T, np.array(self.affine)[:-1, :-1]) / scales[:, None]
465
+ return shearing_matrix.tolist()
466
+
467
+ @property
468
+ def shape(self):
469
+ """Returns the shape of the array.
470
+
471
+ Returns:
472
+ tuple: Shape of the underlying array, or None if no array is loaded.
473
+ """
474
+ if self._store is None or self.meta._has_array == False:
475
+ return None
476
+ return self._store.shape
477
+
478
+ @property
479
+ def dtype(self):
480
+ """Returns the dtype of the array.
481
+
482
+ Returns:
483
+ np.dtype: Dtype of the underlying array, or None if no array is
484
+ loaded.
485
+ """
486
+ if self._store is None or self.meta._has_array == False:
487
+ return None
488
+ return self._store.dtype
489
+
490
+ @property
491
+ def ndim(self) -> int:
492
+ """Returns the number of dimensions of the array.
493
+
494
+ Returns:
495
+ int: Number of dimensions, or None if no array is loaded.
496
+ """
497
+ if self._store is None or self.meta._has_array == False:
498
+ return None
499
+ return len(self._store.shape)
500
+
501
+ @property
502
+ def _spatial_ndim(self) -> int:
503
+ """Returns the number of spatial dimensions.
504
+
505
+ If ``channel_axis`` is set, the channel dimension is excluded.
506
+
507
+ Returns:
508
+ int: Number of spatial dimensions, or None if no array is loaded.
509
+ """
510
+ if self._store is None or self.meta._has_array == False:
511
+ return None
512
+ ndim = len(self._store.shape)
513
+ if self.meta.spatial.channel_axis is not None:
514
+ ndim -= 1
515
+ return ndim
516
+
517
+ def comp_blosc2_params(
518
+ self,
519
+ image_size: Union[Tuple[int, int], Tuple[int, int, int], Tuple[int, int, int, int]],
520
+ patch_size: Union[Tuple[int, int], Tuple[int, int, int]],
521
+ channel_axis: Optional[int] = None,
522
+ bytes_per_pixel: int = 4, # 4 byte are float32
523
+ l1_cache_size_per_core_in_bytes: int = 32768, # 1 Kibibyte (KiB) = 2^10 Byte; 32 KiB = 32768 Byte
524
+ l3_cache_size_per_core_in_bytes: int = 1441792, # 1 Mibibyte (MiB) = 2^20 Byte = 1.048.576 Byte; 1.375MiB = 1441792 Byte
525
+ safety_factor: float = 0.8 # we dont will the caches to the brim. 0.8 means we target 80% of the caches
526
+ ):
527
+ """
528
+ Computes a recommended block and chunk size for saving arrays with Blosc v2.
529
+
530
+ Blosc2 NDIM documentation:
531
+ "Having a second partition allows for greater flexibility in fitting different partitions to different CPU cache levels.
532
+ Typically, the first partition (also known as chunks) should be sized to fit within the L3 cache,
533
+ while the second partition (also known as blocks) should be sized to fit within the L2 or L1 caches,
534
+ depending on whether the priority is compression ratio or speed."
535
+ (Source: https://www.blosc.org/posts/blosc2-ndim-intro/)
536
+
537
+ Our approach is not fully optimized for this yet.
538
+ Currently, we aim to fit the uncompressed block within the L1 cache, accepting that it might occasionally spill over into L2, which we consider acceptable.
539
+
540
+ Note: This configuration is specifically optimized for nnU-Net data loading, where each read operation is performed by a single core, so multi-threading is not an option.
541
+
542
+ The default cache values are based on an older Intel 4110 CPU with 32KB L1, 128KB L2, and 1408KB L3 cache per core.
543
+ We haven't further optimized for modern CPUs with larger caches, as our data must still be compatible with the older systems.
544
+
545
+ Args:
546
+ image_size (Union[Tuple[int, int], Tuple[int, int, int], Tuple[int, int, int, int]]):
547
+ Image shape. Use a 2D, 3D, or 4D size; 2D/3D inputs are
548
+ internally expanded to 4D (with channels first).
549
+ patch_size (Union[Tuple[int, int], Tuple[int, int, int]]): Patch
550
+ size for spatial dimensions. Use a 2-tuple (x, y) or 3-tuple
551
+ (x, y, z).
552
+ channel_axis (Optional[int]): Axis index for channels in the
553
+ original array. If set, the size is moved to channels-first
554
+ for cache calculations.
555
+ bytes_per_pixel (int): Number of bytes per element. Defaults to 4
556
+ for float32.
557
+ l1_cache_size_per_core_in_bytes (int): L1 cache per core in bytes.
558
+ l3_cache_size_per_core_in_bytes (int): L3 cache per core in bytes.
559
+ safety_factor (float): Safety factor to avoid filling caches.
560
+
561
+ Returns:
562
+ Tuple[List[int], List[int]]: Recommended chunk size and block size.
563
+ """
564
+ def _move_index_list(a, src, dst):
565
+ a = list(a)
566
+ x = a.pop(src)
567
+ a.insert(dst, x)
568
+ return a
569
+
570
+ num_squeezes = 0
571
+ if len(image_size) == 2:
572
+ image_size = (1, 1, *image_size)
573
+ num_squeezes = 2
574
+ elif len(image_size) == 3:
575
+ image_size = (1, *image_size)
576
+ num_squeezes = 1
577
+
578
+ if channel_axis is not None:
579
+ image_size = _move_index_list(image_size, channel_axis+num_squeezes, 0)
580
+
581
+ if len(image_size) != 4:
582
+ raise RuntimeError("Image size must be 4D.")
583
+
584
+ if not (len(patch_size) == 2 or len(patch_size) == 3):
585
+ raise RuntimeError("Patch size must be 2D or 3D.")
586
+
587
+ num_channels = image_size[0]
588
+ if len(patch_size) == 2:
589
+ patch_size = [1, *patch_size]
590
+ patch_size = np.array(patch_size)
591
+ block_size = np.array((num_channels, *[2 ** (max(0, math.ceil(math.log2(i)))) for i in patch_size]))
592
+
593
+ # shrink the block size until it fits in L1
594
+ estimated_nbytes_block = np.prod(block_size) * bytes_per_pixel
595
+ while estimated_nbytes_block > (l1_cache_size_per_core_in_bytes * safety_factor):
596
+ # pick largest deviation from patch_size that is not 1
597
+ axis_order = np.argsort(block_size[1:] / patch_size)[::-1]
598
+ idx = 0
599
+ picked_axis = axis_order[idx]
600
+ while block_size[picked_axis + 1] == 1 or block_size[picked_axis + 1] == 1:
601
+ idx += 1
602
+ picked_axis = axis_order[idx]
603
+ # now reduce that axis to the next lowest power of 2
604
+ block_size[picked_axis + 1] = 2 ** (max(0, math.floor(math.log2(block_size[picked_axis + 1] - 1))))
605
+ block_size[picked_axis + 1] = min(block_size[picked_axis + 1], image_size[picked_axis + 1])
606
+ estimated_nbytes_block = np.prod(block_size) * bytes_per_pixel
607
+
608
+ block_size = np.array([min(i, j) for i, j in zip(image_size, block_size)])
609
+
610
+ # note: there is no use extending the chunk size to 3d when we have a 2d patch size! This would unnecessarily
611
+ # load data into L3
612
+ # now tile the blocks into chunks until we hit image_size or the l3 cache per core limit
613
+ chunk_size = deepcopy(block_size)
614
+ estimated_nbytes_chunk = np.prod(chunk_size) * bytes_per_pixel
615
+ while estimated_nbytes_chunk < (l3_cache_size_per_core_in_bytes * safety_factor):
616
+ if patch_size[0] == 1 and all([i == j for i, j in zip(chunk_size[2:], image_size[2:])]):
617
+ break
618
+ if all([i == j for i, j in zip(chunk_size, image_size)]):
619
+ break
620
+ # find axis that deviates from block_size the most
621
+ axis_order = np.argsort(chunk_size[1:] / block_size[1:])
622
+ idx = 0
623
+ picked_axis = axis_order[idx]
624
+ while chunk_size[picked_axis + 1] == image_size[picked_axis + 1] or patch_size[picked_axis] == 1:
625
+ idx += 1
626
+ picked_axis = axis_order[idx]
627
+ chunk_size[picked_axis + 1] += block_size[picked_axis + 1]
628
+ chunk_size[picked_axis + 1] = min(chunk_size[picked_axis + 1], image_size[picked_axis + 1])
629
+ estimated_nbytes_chunk = np.prod(chunk_size) * bytes_per_pixel
630
+ if np.mean([i / j for i, j in zip(chunk_size[1:], patch_size)]) > 1.5:
631
+ # chunk size should not exceed patch size * 1.5 on average
632
+ chunk_size[picked_axis + 1] -= block_size[picked_axis + 1]
633
+ break
634
+ # better safe than sorry
635
+ chunk_size = [min(i, j) for i, j in zip(image_size, chunk_size)]
636
+
637
+ if channel_axis is not None:
638
+ block_size = _move_index_list(block_size, 0, channel_axis+num_squeezes)
639
+ chunk_size = _move_index_list(chunk_size, 0, channel_axis+num_squeezes)
640
+
641
+ block_size = block_size[num_squeezes:]
642
+ chunk_size = chunk_size[num_squeezes:]
643
+
644
+ return [int(value) for value in chunk_size], [int(value) for value in block_size]
645
+
646
+ def _comp_and_validate_blosc2_meta(self, meta_blosc2, patch_size, chunk_size, block_size, shape, channel_axis):
647
+ """Compute and validate Blosc2 chunk/block metadata.
648
+
649
+ Args:
650
+ meta_blosc2 (Optional[MetaBlosc2]): Existing Blosc2 metadata to use
651
+ as defaults.
652
+ patch_size (Optional[Union[int, List, Tuple, str]]): Patch size hint
653
+ or "default". See ``open``/``save`` for expected shapes.
654
+ chunk_size (Optional[Union[int, List, Tuple]]): Explicit chunk size.
655
+ block_size (Optional[Union[int, List, Tuple]]): Explicit block size.
656
+ shape (Union[List, Tuple, np.ndarray]): Full array shape including
657
+ channels if present.
658
+ channel_axis (Optional[int]): Channel axis index, if any.
659
+
660
+ Returns:
661
+ MetaBlosc2: Validated Blosc2 metadata instance.
662
+ """
663
+ if patch_size is not None and patch_size != "default" and not ((len(shape) == 2 and channel_axis is None) or (len(shape) == 3 and channel_axis is None) or (len(shape) == 4 and channel_axis is not None) or (len(shape) == 4 and channel_axis is not None)):
664
+ raise NotImplementedError("Chunk and block size optimization based on patch size is only implemented for 2D and 3D images. Please set the chunk and block size manually or set to None for blosc2 to determine a chunk and block size.")
665
+ if patch_size is not None and patch_size != "default" and (chunk_size is not None or block_size is not None):
666
+ raise RuntimeError("patch_size and chunk_size / block_size cannot both be explicitly set.")
667
+
668
+ ndims = len(shape) if channel_axis is None else len(shape) - 1
669
+ if patch_size == "default":
670
+ if meta_blosc2 is not None and meta_blosc2.patch_size is not None: # Use previously loaded patch size, when patch size is not explicitly set and a patch size from a previously loaded image exists
671
+ patch_size = meta_blosc2.patch_size
672
+ else: # Use default patch size, when patch size is not explicitly set and no patch size from a previously loaded image exists
673
+ patch_size = [MLARRAY_DEFAULT_PATCH_SIZE] * ndims
674
+
675
+ patch_size = [patch_size] * len(shape) if isinstance(patch_size, int) else patch_size
676
+
677
+ if patch_size is not None:
678
+ chunk_size, block_size = self.comp_blosc2_params(shape, patch_size, channel_axis)
679
+
680
+ meta_blosc2 = MetaBlosc2(chunk_size, block_size, patch_size)
681
+ meta_blosc2._validate_and_cast(len(shape), channel_axis)
682
+ return meta_blosc2
683
+
684
+ def _read_meta(self):
685
+ """Read MLArray metadata from the underlying store, if available."""
686
+ meta = Meta()
687
+ if self.support_metadata and isinstance(self._store, blosc2.ndarray.NDArray):
688
+ meta = self._store.vlmeta["mlarray"]
689
+ meta = Meta.from_dict(meta)
690
+ self._validate_and_add_meta(meta)
691
+
692
+ def _write_metadata(self, force=False):
693
+ """Write MLArray metadata to the underlying store if supported.
694
+
695
+ Args:
696
+ force (bool): If True, write even when mmap mode is read-only.
697
+ """
698
+ if self.support_metadata and isinstance(self._store, blosc2.ndarray.NDArray) and (self.mmap in ('r+', 'w+') or force):
699
+ metadata = self.meta.to_dict()
700
+ if not is_serializable(metadata):
701
+ raise RuntimeError("Metadata is not serializable.")
702
+ self._store.vlmeta["mlarray"] = metadata
703
+
704
+ def _validate_and_add_meta(self, meta, spacing=None, origin=None, direction=None, channel_axis=None):
705
+ """Validate and attach metadata to the MLArray instance.
706
+
707
+ Args:
708
+ meta (Optional[Union[dict, Meta]]): Metadata to attach. Dicts are
709
+ interpreted as ``meta.image`` fields.
710
+ spacing (Optional[Union[List, Tuple, np.ndarray]]): Spacing per
711
+ spatial axis.
712
+ origin (Optional[Union[List, Tuple, np.ndarray]]): Origin per
713
+ spatial axis.
714
+ direction (Optional[Union[List, Tuple, np.ndarray]]): Direction
715
+ cosine matrix with shape (ndims, ndims).
716
+ channel_axis (Optional[int]): Channel axis index, if any.
717
+
718
+ Raises:
719
+ ValueError: If ``meta`` is not None, dict, or Meta.
720
+ """
721
+ if meta is not None:
722
+ if not isinstance(meta, (dict, Meta)):
723
+ raise ValueError("Meta must be None, a dict or a Meta object.")
724
+ if isinstance(meta, dict):
725
+ meta = Meta(image=meta)
726
+ else:
727
+ meta = Meta()
728
+ self.meta = meta
729
+ self.meta._mlarray_version = MLARRAY_VERSION
730
+ if spacing is not None:
731
+ self.meta.spatial.spacing = spacing
732
+ if origin is not None:
733
+ self.meta.spatial.origin = origin
734
+ if direction is not None:
735
+ self.meta.spatial.direction = direction
736
+ if channel_axis is not None:
737
+ self.meta.spatial.channel_axis = channel_axis
738
+ self.meta.spatial.shape = self.shape
739
+ self.meta.spatial._validate_and_cast(self._spatial_ndim)