mlarray 0.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlarray/__init__.py +54 -0
- mlarray/cli.py +58 -0
- mlarray/meta.py +578 -0
- mlarray/mlarray.py +576 -0
- mlarray/utils.py +17 -0
- mlarray-0.0.10.data/data/mlarray/assets/banner.png +0 -0
- mlarray-0.0.10.data/data/mlarray/assets/banner.png~ +0 -0
- mlarray-0.0.10.dist-info/METADATA +247 -0
- mlarray-0.0.10.dist-info/RECORD +13 -0
- mlarray-0.0.10.dist-info/WHEEL +5 -0
- mlarray-0.0.10.dist-info/entry_points.txt +3 -0
- mlarray-0.0.10.dist-info/licenses/LICENSE +21 -0
- mlarray-0.0.10.dist-info/top_level.txt +1 -0
mlarray/mlarray.py
ADDED
|
@@ -0,0 +1,576 @@
|
|
|
1
|
+
from copy import deepcopy
|
|
2
|
+
import numpy as np
|
|
3
|
+
import blosc2
|
|
4
|
+
import math
|
|
5
|
+
from typing import Dict, Optional, Union, List, Tuple
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
import os
|
|
8
|
+
from mlarray.meta import Meta, MetaBlosc2
|
|
9
|
+
from mlarray.utils import is_serializable
|
|
10
|
+
|
|
11
|
+
MLARRAY_SUFFIX = "mla"
|
|
12
|
+
MLARRAY_VERSION = "v0"
|
|
13
|
+
MLARRAY_DEFAULT_PATCH_SIZE = 192
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class MLArray:
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
array: Optional[Union[np.ndarray, str, Path]] = None,
|
|
20
|
+
spacing: Optional[Union[List, Tuple, np.ndarray]] = None,
|
|
21
|
+
origin: Optional[Union[List, Tuple, np.ndarray]] = None,
|
|
22
|
+
direction: Optional[Union[List, Tuple, np.ndarray]] = None,
|
|
23
|
+
meta: Optional[Union[Dict, Meta]] = None,
|
|
24
|
+
channel_axis: Optional[int] = None,
|
|
25
|
+
num_threads: int = 1,
|
|
26
|
+
copy: Optional['MLArray'] = None) -> None:
|
|
27
|
+
"""Initializes a MLArray instance.
|
|
28
|
+
|
|
29
|
+
The MLArray file format (".mla") is a Blosc2-compressed container
|
|
30
|
+
with standardized metadata support for N-dimensional medical images.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
array (Union[np.ndarray, str, Path]): Input data or file path. Use
|
|
34
|
+
a numpy ndarray for in-memory arrays. Use a string or Path to
|
|
35
|
+
load a ".b2nd" or ".mla" file.
|
|
36
|
+
spacing (Optional[Union[List, Tuple, np.ndarray]]): Spacing per
|
|
37
|
+
axis. Provide a list/tuple/ndarray with length equal to the
|
|
38
|
+
number of dimensions (e.g., [sx, sy, sz]).
|
|
39
|
+
origin (Optional[Union[List, Tuple, np.ndarray]]): Origin per axis.
|
|
40
|
+
Provide a list/tuple/ndarray with length equal to the number of
|
|
41
|
+
dimensions.
|
|
42
|
+
direction (Optional[Union[List, Tuple, np.ndarray]]): Direction
|
|
43
|
+
cosine matrix. Provide a 2D list/tuple/ndarray with shape
|
|
44
|
+
(ndims, ndims).
|
|
45
|
+
meta (Optional[Dict | Meta]): Free-form metadata dictionary or Meta
|
|
46
|
+
instance. Must be JSON-serializable when saving.
|
|
47
|
+
If meta is passed as a Dict, it will internally be converted into a Meta object with the dict being interpreted as meta.image metadata.
|
|
48
|
+
num_threads (int): Number of threads for Blosc2 operations.
|
|
49
|
+
mode (str): Blosc2 open mode
|
|
50
|
+
- 'r': read-only, must exist (Default)
|
|
51
|
+
- 'a': read/write, create if doesn't exist (Currently not supported)
|
|
52
|
+
- 'w': create, overwrite if it exists (Currently not supported)
|
|
53
|
+
copy (Optional[MLArray]): Another MLArray instance to copy
|
|
54
|
+
metadata fields from.
|
|
55
|
+
"""
|
|
56
|
+
self.filepath = None
|
|
57
|
+
self.support_metadata = None
|
|
58
|
+
self.mmap = None
|
|
59
|
+
if isinstance(array, (str, Path)):
|
|
60
|
+
self.load(array, num_threads)
|
|
61
|
+
else:
|
|
62
|
+
self._store = array
|
|
63
|
+
self._validate_and_add_meta(meta, spacing, origin, direction, channel_axis)
|
|
64
|
+
|
|
65
|
+
if copy is not None:
|
|
66
|
+
self.meta.copy_from(copy.meta)
|
|
67
|
+
|
|
68
|
+
def open(
|
|
69
|
+
self,
|
|
70
|
+
filepath: Union[str, Path],
|
|
71
|
+
shape: Optional[Union[List, Tuple, np.ndarray]] = None,
|
|
72
|
+
dtype: Optional[np.dtype] = None,
|
|
73
|
+
channel_axis: Optional[int] = None,
|
|
74
|
+
mmap: str = 'r',
|
|
75
|
+
patch_size: Optional[Union[int, List, Tuple]] = 'default', # 'default' means that the default of 192 is used. However, if set to 'default', the patch_size will be skipped if self.patch_size is set from a previously loaded MLArray image. In that case the self.patch_size is used.
|
|
76
|
+
chunk_size: Optional[Union[int, List, Tuple]]= None,
|
|
77
|
+
block_size: Optional[Union[int, List, Tuple]] = None,
|
|
78
|
+
num_threads: int = 1,
|
|
79
|
+
cparams: Optional[Dict] = None,
|
|
80
|
+
dparams: Optional[Dict] = None
|
|
81
|
+
):
|
|
82
|
+
self.filepath = str(filepath)
|
|
83
|
+
if not str(filepath).endswith(".b2nd") and not str(filepath).endswith(f".{MLARRAY_SUFFIX}"):
|
|
84
|
+
raise RuntimeError(f"MLArray requires '.b2nd' or '.{MLARRAY_SUFFIX}' as extension.")
|
|
85
|
+
|
|
86
|
+
if Path(filepath).is_file() and (shape is not None or dtype is not None):
|
|
87
|
+
raise RuntimeError("Cannot create a new file as a file exists already under that path. Explicitly set shape and dtype only if you intent to create a new file.")
|
|
88
|
+
if (shape is not None and dtype is None) or (shape is None and dtype is not None):
|
|
89
|
+
raise RuntimeError("Both shape and dtype must be set if you intend to create a new file.")
|
|
90
|
+
if shape is not None and mmap == 'r':
|
|
91
|
+
raise RuntimeError("mmap_mode cannot be 'r' (read-only) if you intend to write a new file. Explicitly set shape and dtype only if you intent to create a new file.")
|
|
92
|
+
if mmap not in ('r', 'r+', 'w+', 'c'):
|
|
93
|
+
raise RuntimeError("mmap_mode must be one of the following: 'r', 'r+', 'w+', 'c'")
|
|
94
|
+
|
|
95
|
+
create_array = shape is not None
|
|
96
|
+
|
|
97
|
+
if create_array:
|
|
98
|
+
self.meta._blosc2 = self._comp_and_validate_blosc2_meta(self.meta._blosc2, patch_size, chunk_size, block_size, shape, channel_axis)
|
|
99
|
+
self.meta._has_array = True
|
|
100
|
+
|
|
101
|
+
self.support_metadata = str(filepath).endswith(f".{MLARRAY_SUFFIX}")
|
|
102
|
+
|
|
103
|
+
blosc2.set_nthreads(num_threads)
|
|
104
|
+
if cparams is None:
|
|
105
|
+
cparams = {'codec': blosc2.Codec.ZSTD, 'clevel': 8,}
|
|
106
|
+
if dparams is None:
|
|
107
|
+
dparams = {'nthreads': num_threads}
|
|
108
|
+
|
|
109
|
+
if create_array:
|
|
110
|
+
self._store = blosc2.empty(shape=shape, dtype=dtype, urlpath=str(filepath), chunks=self.meta._blosc2.chunk_size, blocks=self.meta._blosc2.block_size, cparams=cparams, dparams=dparams, mmap_mode=mmap)
|
|
111
|
+
else:
|
|
112
|
+
self._store = blosc2.open(urlpath=str(filepath), dparams=dparams, mmap_mode=mmap)
|
|
113
|
+
self._read_meta()
|
|
114
|
+
if self.meta._has_array == True:
|
|
115
|
+
self.meta._blosc2.chunk_size = list(self._store.chunks)
|
|
116
|
+
self.meta._blosc2.block_size = list(self._store.blocks)
|
|
117
|
+
self.mmap = mmap
|
|
118
|
+
self._write_metadata()
|
|
119
|
+
|
|
120
|
+
def close(self):
|
|
121
|
+
self._write_metadata()
|
|
122
|
+
self._store = None
|
|
123
|
+
self.filepath = None
|
|
124
|
+
self.support_metadata = None
|
|
125
|
+
self.mmap = None
|
|
126
|
+
|
|
127
|
+
def load(
|
|
128
|
+
self,
|
|
129
|
+
filepath: Union[str, Path],
|
|
130
|
+
num_threads: int = 1,
|
|
131
|
+
):
|
|
132
|
+
"""Loads a Blosc2-compressed file. Both MLArray ('.mla') and Blosc2 ('.b2nd') files are supported.
|
|
133
|
+
|
|
134
|
+
WARNING:
|
|
135
|
+
MLArray supports both ".b2nd" and ".mla" files. The MLArray
|
|
136
|
+
format standard and standardized metadata are honored only for
|
|
137
|
+
".mla". For ".b2nd", metadata is ignored when loading.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
filepath (Union[str, Path]): Path to the Blosc2 file to be loaded.
|
|
141
|
+
The filepath needs to have the extension ".b2nd" or ".mla".
|
|
142
|
+
num_threads (int): Number of threads to use for loading the file.
|
|
143
|
+
mode (str): Blosc2 open mode (e.g., "r", "a").
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
Tuple[blosc2.ndarray, dict]: Loaded data and its metadata.
|
|
147
|
+
|
|
148
|
+
Raises:
|
|
149
|
+
RuntimeError: If the file extension is not ".b2nd" or ".mla".
|
|
150
|
+
"""
|
|
151
|
+
self.filepath = str(filepath)
|
|
152
|
+
if not str(filepath).endswith(".b2nd") and not str(filepath).endswith(f".{MLARRAY_SUFFIX}"):
|
|
153
|
+
raise RuntimeError(f"MLArray requires '.b2nd' or '.{MLARRAY_SUFFIX}' as extension.")
|
|
154
|
+
self.support_metadata = str(filepath).endswith(f".{MLARRAY_SUFFIX}")
|
|
155
|
+
blosc2.set_nthreads(num_threads)
|
|
156
|
+
dparams = {'nthreads': num_threads}
|
|
157
|
+
self._store = blosc2.open(urlpath=str(filepath), cdparams=dparams, mode='r')
|
|
158
|
+
self.mmap = None
|
|
159
|
+
self._read_meta()
|
|
160
|
+
if self.meta._has_array == True:
|
|
161
|
+
self.meta._blosc2.chunk_size = list(self._store.chunks)
|
|
162
|
+
self.meta._blosc2.block_size = list(self._store.blocks)
|
|
163
|
+
|
|
164
|
+
def save(
|
|
165
|
+
self,
|
|
166
|
+
filepath: Union[str, Path],
|
|
167
|
+
patch_size: Optional[Union[int, List, Tuple]] = 'default', # 'default' means that the default of 192 is used. However, if set to 'default', the patch_size will be skipped if self.patch_size is set from a previously loaded MLArray image. In that case the self.patch_size is used.
|
|
168
|
+
chunk_size: Optional[Union[int, List, Tuple]]= None,
|
|
169
|
+
block_size: Optional[Union[int, List, Tuple]] = None,
|
|
170
|
+
num_threads: int = 1,
|
|
171
|
+
cparams: Optional[Dict] = None,
|
|
172
|
+
dparams: Optional[Dict] = None
|
|
173
|
+
):
|
|
174
|
+
"""Saves the array to a Blosc2-compressed file. Both MLArray ('.mla') and Blosc2 ('.b2nd') files are supported.
|
|
175
|
+
|
|
176
|
+
WARNING:
|
|
177
|
+
MLArray supports both ".b2nd" and ".mla" files. The MLArray
|
|
178
|
+
format standard and standardized metadata are honored only for
|
|
179
|
+
".mla". For ".b2nd", metadata is ignored when saving.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
filepath (Union[str, Path]): Path to save the file. Must end with
|
|
183
|
+
".b2nd" or ".mla".
|
|
184
|
+
patch_size (Optional[Union[int, List, Tuple]]): Patch size hint for
|
|
185
|
+
chunk/block optimization. Provide an int for isotropic sizes or
|
|
186
|
+
a list/tuple with length equal to the number of dimensions.
|
|
187
|
+
Use "default" to use the default patch size of 192.
|
|
188
|
+
chunk_size (Optional[Union[int, List, Tuple]]): Explicit chunk size.
|
|
189
|
+
Provide an int or a tuple/list with length equal to the number
|
|
190
|
+
of dimensions, or None to let Blosc2 decide. Ignored when
|
|
191
|
+
patch_size is not None.
|
|
192
|
+
block_size (Optional[Union[int, List, Tuple]]): Explicit block size.
|
|
193
|
+
Provide an int or a tuple/list with length equal to the number
|
|
194
|
+
of dimensions, or None to let Blosc2 decide. Ignored when
|
|
195
|
+
patch_size is not None.
|
|
196
|
+
num_threads (int): Number of threads to use for saving the file.
|
|
197
|
+
|
|
198
|
+
Raises:
|
|
199
|
+
RuntimeError: If the file extension is not ".b2nd" or ".mla".
|
|
200
|
+
"""
|
|
201
|
+
if not str(filepath).endswith(".b2nd") and not str(filepath).endswith(f".{MLARRAY_SUFFIX}"):
|
|
202
|
+
raise RuntimeError(f"MLArray requires '.b2nd' or '.{MLARRAY_SUFFIX}' as extension.")
|
|
203
|
+
|
|
204
|
+
if self._store is not None:
|
|
205
|
+
self.meta._blosc2 = self._comp_and_validate_blosc2_meta(self.meta._blosc2, patch_size, chunk_size, block_size, self._store.shape, self.meta.spatial.channel_axis)
|
|
206
|
+
self.meta._has_array = True
|
|
207
|
+
else:
|
|
208
|
+
self.meta._has_array = False
|
|
209
|
+
|
|
210
|
+
self.support_metadata = str(filepath).endswith(f".{MLARRAY_SUFFIX}")
|
|
211
|
+
|
|
212
|
+
blosc2.set_nthreads(num_threads)
|
|
213
|
+
if cparams is None:
|
|
214
|
+
cparams = {'codec': blosc2.Codec.ZSTD, 'clevel': 8,}
|
|
215
|
+
if dparams is None:
|
|
216
|
+
dparams = {'nthreads': num_threads}
|
|
217
|
+
|
|
218
|
+
if self._store is not None:
|
|
219
|
+
array = np.ascontiguousarray(self._store[...])
|
|
220
|
+
self._store = blosc2.asarray(array, urlpath=str(filepath), chunks=self.meta._blosc2.chunk_size, blocks=self.meta._blosc2.block_size, cparams=cparams, dparams=dparams)
|
|
221
|
+
else:
|
|
222
|
+
array = np.empty((0,))
|
|
223
|
+
self._store = blosc2.asarray(array, urlpath=str(filepath), chunks=self.meta._blosc2.chunk_size, blocks=self.meta._blosc2.block_size, cparams=cparams, dparams=dparams)
|
|
224
|
+
if self.meta._has_array == True:
|
|
225
|
+
self.meta._blosc2.chunk_size = list(self._store.chunks)
|
|
226
|
+
self.meta._blosc2.block_size = list(self._store.blocks)
|
|
227
|
+
self.mmap = None
|
|
228
|
+
self._write_metadata(force=True)
|
|
229
|
+
|
|
230
|
+
def to_numpy(self):
|
|
231
|
+
if self._store is None or self.meta._has_array == False:
|
|
232
|
+
raise TypeError("MLArray has no array data loaded.")
|
|
233
|
+
return self._store[...]
|
|
234
|
+
|
|
235
|
+
def __getitem__(self, key):
|
|
236
|
+
if self._store is None or self.meta._has_array == False:
|
|
237
|
+
raise TypeError("MLArray has no array data loaded.")
|
|
238
|
+
return self._store[key]
|
|
239
|
+
|
|
240
|
+
def __setitem__(self, key, value):
|
|
241
|
+
if self._store is None or self.meta._has_array == False:
|
|
242
|
+
raise TypeError("MLArray has no array data loaded.")
|
|
243
|
+
self._store[key] = value
|
|
244
|
+
|
|
245
|
+
def __iter__(self):
|
|
246
|
+
if self._store is None or self.meta._has_array == False:
|
|
247
|
+
raise TypeError("MLArray has no array data loaded.")
|
|
248
|
+
return iter(self._store)
|
|
249
|
+
|
|
250
|
+
def __len__(self):
|
|
251
|
+
if self._store is None or self.meta._has_array == False:
|
|
252
|
+
return 0
|
|
253
|
+
return len(self._store)
|
|
254
|
+
|
|
255
|
+
def __array__(self, dtype=None):
|
|
256
|
+
if self._store is None or self.meta._has_array == False:
|
|
257
|
+
raise TypeError("MLArray has no array data loaded.")
|
|
258
|
+
arr = np.asarray(self._store)
|
|
259
|
+
if dtype is not None:
|
|
260
|
+
return arr.astype(dtype)
|
|
261
|
+
return arr
|
|
262
|
+
|
|
263
|
+
@property
|
|
264
|
+
def spacing(self):
|
|
265
|
+
"""Returns the image spacing.
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
list: The image spacing with length equal to the number of
|
|
269
|
+
dimensions.
|
|
270
|
+
"""
|
|
271
|
+
return self.meta.spatial.spacing
|
|
272
|
+
|
|
273
|
+
@property
|
|
274
|
+
def origin(self):
|
|
275
|
+
"""Returns the image origin.
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
list: The image origin with length equal to the number of
|
|
279
|
+
dimensions.
|
|
280
|
+
"""
|
|
281
|
+
return self.meta.spatial.origin
|
|
282
|
+
|
|
283
|
+
@property
|
|
284
|
+
def direction(self):
|
|
285
|
+
"""Returns the image direction.
|
|
286
|
+
|
|
287
|
+
Returns:
|
|
288
|
+
list: The image direction with shape (ndims, ndims).
|
|
289
|
+
"""
|
|
290
|
+
return self.meta.spatial.direction
|
|
291
|
+
|
|
292
|
+
@property
|
|
293
|
+
def affine(self) -> np.ndarray:
|
|
294
|
+
"""Computes the affine transformation matrix for the image.
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
list: Affine matrix with shape (ndims + 1, ndims + 1).
|
|
298
|
+
"""
|
|
299
|
+
if self._store is None or self.meta._has_array == False:
|
|
300
|
+
return None
|
|
301
|
+
spacing = np.array(self.spacing) if self.spacing is not None else np.ones(self._spatial_ndim)
|
|
302
|
+
origin = np.array(self.origin) if self.origin is not None else np.zeros(self._spatial_ndim)
|
|
303
|
+
direction = np.array(self.direction) if self.direction is not None else np.eye(self._spatial_ndim)
|
|
304
|
+
affine = np.eye(self._spatial_ndim + 1)
|
|
305
|
+
affine[:self._spatial_ndim, :self._spatial_ndim] = direction @ np.diag(spacing)
|
|
306
|
+
affine[:self._spatial_ndim, self._spatial_ndim] = origin
|
|
307
|
+
return affine.tolist()
|
|
308
|
+
|
|
309
|
+
@property
|
|
310
|
+
def translation(self):
|
|
311
|
+
"""Extracts the translation vector from the affine matrix.
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
list: Translation vector with length equal to the number of
|
|
315
|
+
dimensions.
|
|
316
|
+
"""
|
|
317
|
+
if self._store is None or self.meta._has_array == False:
|
|
318
|
+
return None
|
|
319
|
+
return np.array(self.affine)[:-1, -1].tolist()
|
|
320
|
+
|
|
321
|
+
@property
|
|
322
|
+
def scale(self):
|
|
323
|
+
"""Extracts the scaling factors from the affine matrix.
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
list: Scaling factors per axis with length equal to the number of
|
|
327
|
+
dimensions.
|
|
328
|
+
"""
|
|
329
|
+
if self._store is None or self.meta._has_array == False:
|
|
330
|
+
return None
|
|
331
|
+
scales = np.linalg.norm(np.array(self.affine)[:-1, :-1], axis=0)
|
|
332
|
+
return scales.tolist()
|
|
333
|
+
|
|
334
|
+
@property
|
|
335
|
+
def rotation(self):
|
|
336
|
+
"""Extracts the rotation matrix from the affine matrix.
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
list: Rotation matrix with shape (ndims, ndims).
|
|
340
|
+
"""
|
|
341
|
+
if self._store is None or self.meta._has_array == False:
|
|
342
|
+
return None
|
|
343
|
+
rotation_matrix = np.array(self.affine)[:-1, :-1] / np.array(self.scale)
|
|
344
|
+
return rotation_matrix.tolist()
|
|
345
|
+
|
|
346
|
+
@property
|
|
347
|
+
def shear(self):
|
|
348
|
+
"""Computes the shear matrix from the affine matrix.
|
|
349
|
+
|
|
350
|
+
Returns:
|
|
351
|
+
list: Shear matrix with shape (ndims, ndims).
|
|
352
|
+
"""
|
|
353
|
+
if self._store is None or self.meta._has_array == False:
|
|
354
|
+
return None
|
|
355
|
+
scales = np.array(self.scale)
|
|
356
|
+
rotation_matrix = np.array(self.rotation)
|
|
357
|
+
shearing_matrix = np.dot(rotation_matrix.T, np.array(self.affine)[:-1, :-1]) / scales[:, None]
|
|
358
|
+
return shearing_matrix.tolist()
|
|
359
|
+
|
|
360
|
+
@property
|
|
361
|
+
def shape(self):
|
|
362
|
+
"""Returns the shape of the array.
|
|
363
|
+
|
|
364
|
+
Returns:
|
|
365
|
+
tuple: Shape of the underlying array.
|
|
366
|
+
"""
|
|
367
|
+
if self._store is None or self.meta._has_array == False:
|
|
368
|
+
return None
|
|
369
|
+
return self._store.shape
|
|
370
|
+
|
|
371
|
+
@property
|
|
372
|
+
def dtype(self):
|
|
373
|
+
"""Returns the dtype of the array."""
|
|
374
|
+
if self._store is None or self.meta._has_array == False:
|
|
375
|
+
return None
|
|
376
|
+
return self._store.dtype
|
|
377
|
+
|
|
378
|
+
@property
|
|
379
|
+
def ndim(self) -> int:
|
|
380
|
+
"""Returns the number of dimensions of the image."""
|
|
381
|
+
if self._store is None or self.meta._has_array == False:
|
|
382
|
+
return None
|
|
383
|
+
return len(self._store.shape)
|
|
384
|
+
|
|
385
|
+
@property
|
|
386
|
+
def _spatial_ndim(self) -> int:
|
|
387
|
+
"""Returns the number of dimensions of the image."""
|
|
388
|
+
if self._store is None or self.meta._has_array == False:
|
|
389
|
+
return None
|
|
390
|
+
ndim = len(self._store.shape)
|
|
391
|
+
if self.meta.spatial.channel_axis is not None:
|
|
392
|
+
ndim -= 1
|
|
393
|
+
return ndim
|
|
394
|
+
|
|
395
|
+
def comp_blosc2_params(
|
|
396
|
+
self,
|
|
397
|
+
image_size: Union[Tuple[int, int], Tuple[int, int, int], Tuple[int, int, int, int]],
|
|
398
|
+
patch_size: Union[Tuple[int, int], Tuple[int, int, int]],
|
|
399
|
+
channel_axis: Optional[int] = None,
|
|
400
|
+
bytes_per_pixel: int = 4, # 4 byte are float32
|
|
401
|
+
l1_cache_size_per_core_in_bytes: int = 32768, # 1 Kibibyte (KiB) = 2^10 Byte; 32 KiB = 32768 Byte
|
|
402
|
+
l3_cache_size_per_core_in_bytes: int = 1441792, # 1 Mibibyte (MiB) = 2^20 Byte = 1.048.576 Byte; 1.375MiB = 1441792 Byte
|
|
403
|
+
safety_factor: float = 0.8 # we dont will the caches to the brim. 0.8 means we target 80% of the caches
|
|
404
|
+
):
|
|
405
|
+
"""
|
|
406
|
+
Computes a recommended block and chunk size for saving arrays with Blosc v2.
|
|
407
|
+
|
|
408
|
+
Blosc2 NDIM documentation:
|
|
409
|
+
"Having a second partition allows for greater flexibility in fitting different partitions to different CPU cache levels.
|
|
410
|
+
Typically, the first partition (also known as chunks) should be sized to fit within the L3 cache,
|
|
411
|
+
while the second partition (also known as blocks) should be sized to fit within the L2 or L1 caches,
|
|
412
|
+
depending on whether the priority is compression ratio or speed."
|
|
413
|
+
(Source: https://www.blosc.org/posts/blosc2-ndim-intro/)
|
|
414
|
+
|
|
415
|
+
Our approach is not fully optimized for this yet.
|
|
416
|
+
Currently, we aim to fit the uncompressed block within the L1 cache, accepting that it might occasionally spill over into L2, which we consider acceptable.
|
|
417
|
+
|
|
418
|
+
Note: This configuration is specifically optimized for nnU-Net data loading, where each read operation is performed by a single core, so multi-threading is not an option.
|
|
419
|
+
|
|
420
|
+
The default cache values are based on an older Intel 4110 CPU with 32KB L1, 128KB L2, and 1408KB L3 cache per core.
|
|
421
|
+
We haven't further optimized for modern CPUs with larger caches, as our data must still be compatible with the older systems.
|
|
422
|
+
|
|
423
|
+
Args:
|
|
424
|
+
image_size (Tuple[int, int, int, int]): Image shape. Use a 2D, 3D,
|
|
425
|
+
or 4D size; 2D/3D inputs are internally expanded.
|
|
426
|
+
patch_size (Union[Tuple[int, int], Tuple[int, int, int]]): Patch
|
|
427
|
+
size for spatial dimensions. Use a 2-tuple (x, y) or 3-tuple
|
|
428
|
+
(x, y, z).
|
|
429
|
+
bytes_per_pixel (int): Number of bytes per element. Defaults to 4
|
|
430
|
+
for float32.
|
|
431
|
+
l1_cache_size_per_core_in_bytes (int): L1 cache per core in bytes.
|
|
432
|
+
l3_cache_size_per_core_in_bytes (int): L3 cache per core in bytes.
|
|
433
|
+
safety_factor (float): Safety factor to avoid filling caches.
|
|
434
|
+
|
|
435
|
+
Returns:
|
|
436
|
+
Tuple[Tuple[int, ...], Tuple[int, ...]]: Recommended chunk size and block size.
|
|
437
|
+
"""
|
|
438
|
+
def _move_index_list(a, src, dst):
|
|
439
|
+
a = list(a)
|
|
440
|
+
x = a.pop(src)
|
|
441
|
+
a.insert(dst, x)
|
|
442
|
+
return a
|
|
443
|
+
|
|
444
|
+
num_squeezes = 0
|
|
445
|
+
if len(image_size) == 2:
|
|
446
|
+
image_size = (1, 1, *image_size)
|
|
447
|
+
num_squeezes = 2
|
|
448
|
+
elif len(image_size) == 3:
|
|
449
|
+
image_size = (1, *image_size)
|
|
450
|
+
num_squeezes = 1
|
|
451
|
+
|
|
452
|
+
if channel_axis is not None:
|
|
453
|
+
image_size = _move_index_list(image_size, channel_axis+num_squeezes, 0)
|
|
454
|
+
|
|
455
|
+
if len(image_size) != 4:
|
|
456
|
+
raise RuntimeError("Image size must be 4D.")
|
|
457
|
+
|
|
458
|
+
if not (len(patch_size) == 2 or len(patch_size) == 3):
|
|
459
|
+
raise RuntimeError("Patch size must be 2D or 3D.")
|
|
460
|
+
|
|
461
|
+
num_channels = image_size[0]
|
|
462
|
+
if len(patch_size) == 2:
|
|
463
|
+
patch_size = [1, *patch_size]
|
|
464
|
+
patch_size = np.array(patch_size)
|
|
465
|
+
block_size = np.array((num_channels, *[2 ** (max(0, math.ceil(math.log2(i)))) for i in patch_size]))
|
|
466
|
+
|
|
467
|
+
# shrink the block size until it fits in L1
|
|
468
|
+
estimated_nbytes_block = np.prod(block_size) * bytes_per_pixel
|
|
469
|
+
while estimated_nbytes_block > (l1_cache_size_per_core_in_bytes * safety_factor):
|
|
470
|
+
# pick largest deviation from patch_size that is not 1
|
|
471
|
+
axis_order = np.argsort(block_size[1:] / patch_size)[::-1]
|
|
472
|
+
idx = 0
|
|
473
|
+
picked_axis = axis_order[idx]
|
|
474
|
+
while block_size[picked_axis + 1] == 1 or block_size[picked_axis + 1] == 1:
|
|
475
|
+
idx += 1
|
|
476
|
+
picked_axis = axis_order[idx]
|
|
477
|
+
# now reduce that axis to the next lowest power of 2
|
|
478
|
+
block_size[picked_axis + 1] = 2 ** (max(0, math.floor(math.log2(block_size[picked_axis + 1] - 1))))
|
|
479
|
+
block_size[picked_axis + 1] = min(block_size[picked_axis + 1], image_size[picked_axis + 1])
|
|
480
|
+
estimated_nbytes_block = np.prod(block_size) * bytes_per_pixel
|
|
481
|
+
|
|
482
|
+
block_size = np.array([min(i, j) for i, j in zip(image_size, block_size)])
|
|
483
|
+
|
|
484
|
+
# note: there is no use extending the chunk size to 3d when we have a 2d patch size! This would unnecessarily
|
|
485
|
+
# load data into L3
|
|
486
|
+
# now tile the blocks into chunks until we hit image_size or the l3 cache per core limit
|
|
487
|
+
chunk_size = deepcopy(block_size)
|
|
488
|
+
estimated_nbytes_chunk = np.prod(chunk_size) * bytes_per_pixel
|
|
489
|
+
while estimated_nbytes_chunk < (l3_cache_size_per_core_in_bytes * safety_factor):
|
|
490
|
+
if patch_size[0] == 1 and all([i == j for i, j in zip(chunk_size[2:], image_size[2:])]):
|
|
491
|
+
break
|
|
492
|
+
if all([i == j for i, j in zip(chunk_size, image_size)]):
|
|
493
|
+
break
|
|
494
|
+
# find axis that deviates from block_size the most
|
|
495
|
+
axis_order = np.argsort(chunk_size[1:] / block_size[1:])
|
|
496
|
+
idx = 0
|
|
497
|
+
picked_axis = axis_order[idx]
|
|
498
|
+
while chunk_size[picked_axis + 1] == image_size[picked_axis + 1] or patch_size[picked_axis] == 1:
|
|
499
|
+
idx += 1
|
|
500
|
+
picked_axis = axis_order[idx]
|
|
501
|
+
chunk_size[picked_axis + 1] += block_size[picked_axis + 1]
|
|
502
|
+
chunk_size[picked_axis + 1] = min(chunk_size[picked_axis + 1], image_size[picked_axis + 1])
|
|
503
|
+
estimated_nbytes_chunk = np.prod(chunk_size) * bytes_per_pixel
|
|
504
|
+
if np.mean([i / j for i, j in zip(chunk_size[1:], patch_size)]) > 1.5:
|
|
505
|
+
# chunk size should not exceed patch size * 1.5 on average
|
|
506
|
+
chunk_size[picked_axis + 1] -= block_size[picked_axis + 1]
|
|
507
|
+
break
|
|
508
|
+
# better safe than sorry
|
|
509
|
+
chunk_size = [min(i, j) for i, j in zip(image_size, chunk_size)]
|
|
510
|
+
|
|
511
|
+
if channel_axis is not None:
|
|
512
|
+
block_size = _move_index_list(block_size, 0, channel_axis+num_squeezes)
|
|
513
|
+
chunk_size = _move_index_list(chunk_size, 0, channel_axis+num_squeezes)
|
|
514
|
+
|
|
515
|
+
block_size = block_size[num_squeezes:]
|
|
516
|
+
chunk_size = chunk_size[num_squeezes:]
|
|
517
|
+
|
|
518
|
+
return [int(value) for value in chunk_size], [int(value) for value in block_size]
|
|
519
|
+
|
|
520
|
+
def _comp_and_validate_blosc2_meta(self, meta_blosc2, patch_size, chunk_size, block_size, shape, channel_axis):
|
|
521
|
+
if patch_size is not None and patch_size != "default" and not ((len(shape) == 2 and channel_axis is None) or (len(shape) == 3 and channel_axis is None) or (len(shape) == 4 and channel_axis is not None) or (len(shape) == 4 and channel_axis is not None)):
|
|
522
|
+
raise NotImplementedError("Chunk and block size optimization based on patch size is only implemented for 2D and 3D images. Please set the chunk and block size manually or set to None for blosc2 to determine a chunk and block size.")
|
|
523
|
+
if patch_size is not None and patch_size != "default" and (chunk_size is not None or block_size is not None):
|
|
524
|
+
raise RuntimeError("patch_size and chunk_size / block_size cannot both be explicitly set.")
|
|
525
|
+
|
|
526
|
+
ndims = len(shape) if channel_axis is None else len(shape) - 1
|
|
527
|
+
if patch_size == "default":
|
|
528
|
+
if meta_blosc2 is not None and meta_blosc2.patch_size is not None: # Use previously loaded patch size, when patch size is not explicitly set and a patch size from a previously loaded image exists
|
|
529
|
+
patch_size = meta_blosc2.patch_size
|
|
530
|
+
else: # Use default patch size, when patch size is not explicitly set and no patch size from a previously loaded image exists
|
|
531
|
+
patch_size = [MLARRAY_DEFAULT_PATCH_SIZE] * ndims
|
|
532
|
+
|
|
533
|
+
patch_size = [patch_size] * len(shape) if isinstance(patch_size, int) else patch_size
|
|
534
|
+
|
|
535
|
+
if patch_size is not None:
|
|
536
|
+
chunk_size, block_size = self.comp_blosc2_params(shape, patch_size, channel_axis)
|
|
537
|
+
|
|
538
|
+
meta_blosc2 = MetaBlosc2(chunk_size, block_size, patch_size)
|
|
539
|
+
meta_blosc2._validate_and_cast(len(shape), channel_axis)
|
|
540
|
+
return meta_blosc2
|
|
541
|
+
|
|
542
|
+
def _read_meta(self):
|
|
543
|
+
meta = Meta()
|
|
544
|
+
if self.support_metadata and isinstance(self._store, blosc2.ndarray.NDArray):
|
|
545
|
+
meta = self._store.vlmeta["mlarray"]
|
|
546
|
+
meta = Meta.from_dict(meta)
|
|
547
|
+
self._validate_and_add_meta(meta)
|
|
548
|
+
|
|
549
|
+
def _write_metadata(self, force=False):
|
|
550
|
+
if self.support_metadata and isinstance(self._store, blosc2.ndarray.NDArray) and (self.mmap in ('r+', 'w+') or force):
|
|
551
|
+
metadata = self.meta.to_dict()
|
|
552
|
+
if not is_serializable(metadata):
|
|
553
|
+
raise RuntimeError("Metadata is not serializable.")
|
|
554
|
+
self._store.vlmeta["mlarray"] = metadata
|
|
555
|
+
|
|
556
|
+
def _validate_and_add_meta(self, meta, spacing=None, origin=None, direction=None, channel_axis=None):
|
|
557
|
+
if meta is not None:
|
|
558
|
+
if not isinstance(meta, (dict, Meta)):
|
|
559
|
+
raise ValueError("Meta must be None, a dict or a Meta object.")
|
|
560
|
+
if isinstance(meta, dict):
|
|
561
|
+
meta = Meta(image=meta)
|
|
562
|
+
else:
|
|
563
|
+
meta = Meta()
|
|
564
|
+
self.meta = meta
|
|
565
|
+
self.meta._mlarray_version = MLARRAY_VERSION
|
|
566
|
+
if spacing is not None:
|
|
567
|
+
self.meta.spatial.spacing = spacing
|
|
568
|
+
if origin is not None:
|
|
569
|
+
self.meta.spatial.origin = origin
|
|
570
|
+
if direction is not None:
|
|
571
|
+
self.meta.spatial.direction = direction
|
|
572
|
+
if channel_axis is not None:
|
|
573
|
+
self.meta.spatial.channel_axis = channel_axis
|
|
574
|
+
self.meta.spatial.shape = self.shape
|
|
575
|
+
self.meta.spatial._validate_and_cast(self._spatial_ndim)
|
|
576
|
+
|
mlarray/utils.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def is_serializable(d: dict) -> bool:
|
|
5
|
+
"""Checks whether a dictionary is JSON-serializable.
|
|
6
|
+
|
|
7
|
+
Args:
|
|
8
|
+
d (dict): Input dictionary to test.
|
|
9
|
+
|
|
10
|
+
Returns:
|
|
11
|
+
bool: True when serializable, otherwise False.
|
|
12
|
+
"""
|
|
13
|
+
try:
|
|
14
|
+
json.dumps(d)
|
|
15
|
+
return True
|
|
16
|
+
except (TypeError, OverflowError):
|
|
17
|
+
return False
|
|
Binary file
|
|
Binary file
|