miblab-ssa 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- miblab_ssa/__init__.py +14 -0
- miblab_ssa/lb.py +260 -0
- miblab_ssa/metrics.py +280 -0
- miblab_ssa/normalize.py +532 -0
- miblab_ssa/pca.py +98 -0
- miblab_ssa/pdm.py +177 -0
- miblab_ssa/sdf_cheby.py +153 -0
- miblab_ssa/sdf_ft.py +78 -0
- miblab_ssa/sdf_ft_simple.py +47 -0
- miblab_ssa/sdf_mono.py +214 -0
- miblab_ssa/sh.py +444 -0
- miblab_ssa/ssa.py +525 -0
- miblab_ssa/zernike.py +144 -0
- miblab_ssa-0.0.0.dist-info/METADATA +34 -0
- miblab_ssa-0.0.0.dist-info/RECORD +18 -0
- miblab_ssa-0.0.0.dist-info/WHEEL +5 -0
- miblab_ssa-0.0.0.dist-info/licenses/LICENSE +201 -0
- miblab_ssa-0.0.0.dist-info/top_level.txt +1 -0
miblab_ssa/ssa.py
ADDED
|
@@ -0,0 +1,525 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import numpy as np
|
|
3
|
+
from sklearn.decomposition import PCA
|
|
4
|
+
import logging
|
|
5
|
+
import dask
|
|
6
|
+
from dask.diagnostics import ProgressBar
|
|
7
|
+
from itertools import product
|
|
8
|
+
from tqdm import tqdm
|
|
9
|
+
import zarr
|
|
10
|
+
import dask.array as da
|
|
11
|
+
from dask_ml.decomposition import PCA as DaskPCA
|
|
12
|
+
import numpy as np
|
|
13
|
+
import psutil
|
|
14
|
+
from collections.abc import Callable
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def features_from_dataset_in_memory(
|
|
18
|
+
features_from_mask:Callable,
|
|
19
|
+
masks_file:str,
|
|
20
|
+
filepath:str,
|
|
21
|
+
**kwargs, # kwargs for features_from_mask
|
|
22
|
+
):
|
|
23
|
+
logging.info("Features: loading masks..")
|
|
24
|
+
with np.load(masks_file) as data:
|
|
25
|
+
masks = data['masks']
|
|
26
|
+
labels = data['labels']
|
|
27
|
+
|
|
28
|
+
logging.info("Features: scheduling tasks..")
|
|
29
|
+
tasks = [
|
|
30
|
+
dask.delayed(features_from_mask)(masks[i,...], **kwargs)
|
|
31
|
+
for i in range(masks.shape[0])
|
|
32
|
+
]
|
|
33
|
+
logging.info('Features: computing..')
|
|
34
|
+
with ProgressBar():
|
|
35
|
+
features = dask.compute(*tasks)
|
|
36
|
+
feature_matrix = np.stack(features, axis=0, dtype=np.float32)
|
|
37
|
+
|
|
38
|
+
logging.info('Features: saving..')
|
|
39
|
+
if not filepath.endswith('.npz'):
|
|
40
|
+
filepath += '.npz'
|
|
41
|
+
np.savez_compressed(
|
|
42
|
+
filepath,
|
|
43
|
+
features=feature_matrix,
|
|
44
|
+
original_shape=masks.shape[1:],
|
|
45
|
+
labels=labels,
|
|
46
|
+
**kwargs,
|
|
47
|
+
)
|
|
48
|
+
logging.info('Spectral features: finished..')
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def features_from_dataset_zarr(
|
|
52
|
+
features_from_mask:Callable,
|
|
53
|
+
masks_zarr_path: str,
|
|
54
|
+
output_zarr_path: str,
|
|
55
|
+
chunk_size='auto',
|
|
56
|
+
**kwargs, # keyword arguments for features_from_mask
|
|
57
|
+
):
|
|
58
|
+
logging.info(f"Feature calc: connecting to {os.path.basename(masks_zarr_path)}..")
|
|
59
|
+
|
|
60
|
+
# 1. Input: Connect to the Masks Zarr (Lazy)
|
|
61
|
+
d_masks = da.from_zarr(masks_zarr_path, component='masks')
|
|
62
|
+
n_samples = d_masks.shape[0]
|
|
63
|
+
|
|
64
|
+
# 2. Metadata: Determine output shape dynamically
|
|
65
|
+
# We compute ONE sample immediately to find out how big the feature vector is.
|
|
66
|
+
# This prevents us from hardcoding the feature size.
|
|
67
|
+
logging.info("Feature calc: computing shape probe on first mask..")
|
|
68
|
+
|
|
69
|
+
# We use .compute() on the first slice to run it eagerly
|
|
70
|
+
sample_mask = d_masks[0].compute()
|
|
71
|
+
sample_feature = features_from_mask(sample_mask, **kwargs)
|
|
72
|
+
|
|
73
|
+
n_features = sample_feature.shape[0]
|
|
74
|
+
dtype = sample_feature.dtype
|
|
75
|
+
logging.info(f"Feature vector shape detected: ({n_features},). Type: {dtype}")
|
|
76
|
+
|
|
77
|
+
# 3. Construction: Build the Dask Graph (The "Lazy" Array)
|
|
78
|
+
# We create a list of Dask Arrays, one per mask.
|
|
79
|
+
lazy_rows = []
|
|
80
|
+
|
|
81
|
+
# Create the delayed function wrapper once
|
|
82
|
+
delayed_func = dask.delayed(features_from_mask)
|
|
83
|
+
|
|
84
|
+
for i in range(n_samples):
|
|
85
|
+
# Create a delayed task for this mask
|
|
86
|
+
# Note: d_masks[i] is lazy, so we aren't reading the mask yet
|
|
87
|
+
task = delayed_func(d_masks[i], **kwargs)
|
|
88
|
+
|
|
89
|
+
# Convert the delayed task into a Dask Array (Row)
|
|
90
|
+
# We MUST specify shape and dtype so Dask knows how to stitch them together
|
|
91
|
+
d_row = da.from_delayed(task, shape=(n_features,), dtype=dtype)
|
|
92
|
+
|
|
93
|
+
# Reshape to (1, F) so we can stack them vertically later
|
|
94
|
+
d_row = d_row[None, :]
|
|
95
|
+
lazy_rows.append(d_row)
|
|
96
|
+
|
|
97
|
+
# Stack them into one big matrix (N, F)
|
|
98
|
+
# This matrix exists only as a graph of future tasks, not in RAM.
|
|
99
|
+
d_feature_matrix = da.vstack(lazy_rows)
|
|
100
|
+
|
|
101
|
+
# 4. Storage: Prepare Output Zarr
|
|
102
|
+
if not output_zarr_path.endswith('.zarr'):
|
|
103
|
+
output_zarr_path += '.zarr'
|
|
104
|
+
|
|
105
|
+
store = zarr.DirectoryStore(output_zarr_path)
|
|
106
|
+
root = zarr.group(store=store, overwrite=True)
|
|
107
|
+
|
|
108
|
+
# 5. Output Chunking logic
|
|
109
|
+
# If the output matrix is huge, we should chunk it sensibly on disk.
|
|
110
|
+
# Defaulting to None lets Dask decide, or we can enforce typical sizes.
|
|
111
|
+
# If 'auto', we let Dask decide based on the input chunks.
|
|
112
|
+
if chunk_size != 'auto':
|
|
113
|
+
d_feature_matrix = d_feature_matrix.rechunk({0: chunk_size})
|
|
114
|
+
|
|
115
|
+
logging.info(f"Feature calc: Streaming results to {output_zarr_path}...")
|
|
116
|
+
|
|
117
|
+
# 6. Execution: Stream to Disk
|
|
118
|
+
# .to_zarr computes the chunks in parallel and writes them directly to disk.
|
|
119
|
+
# It never holds the full matrix in memory.
|
|
120
|
+
with ProgressBar():
|
|
121
|
+
d_feature_matrix.to_zarr(store, component='features', compute=True)
|
|
122
|
+
|
|
123
|
+
# 7. Metadata: Save labels and attributes
|
|
124
|
+
# Copy labels from input to output
|
|
125
|
+
input_root = zarr.open(masks_zarr_path, mode='r')
|
|
126
|
+
root.array('labels', input_root['labels'][:])
|
|
127
|
+
|
|
128
|
+
# Save attributes (original shape, order, etc.)
|
|
129
|
+
root.attrs['original_shape'] = d_masks.shape[1:] # (D, H, W)
|
|
130
|
+
root.attrs['kwargs'] = kwargs
|
|
131
|
+
|
|
132
|
+
logging.info('Feature calc: finished.')
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def pca_from_features_in_memory(feature_file, pca_file):
|
|
138
|
+
"""
|
|
139
|
+
Fits PCA and saves results while preserving all original metadata.
|
|
140
|
+
"""
|
|
141
|
+
with np.load(feature_file) as data:
|
|
142
|
+
features = data['features']
|
|
143
|
+
original_shape = data['original_shape']
|
|
144
|
+
labels = data['labels']
|
|
145
|
+
kwargs = data['kwargs']
|
|
146
|
+
|
|
147
|
+
# Fit the PCA
|
|
148
|
+
pca = PCA()
|
|
149
|
+
pca.fit(features)
|
|
150
|
+
|
|
151
|
+
# This saves the original metadata + the new PCA keys
|
|
152
|
+
np.savez(pca_file,
|
|
153
|
+
mean = pca.mean_,
|
|
154
|
+
components = pca.components_,
|
|
155
|
+
variance = pca.explained_variance_,
|
|
156
|
+
variance_ratio = pca.explained_variance_ratio_,
|
|
157
|
+
original_shape = original_shape,
|
|
158
|
+
labels = labels,
|
|
159
|
+
kwargs = kwargs,
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
return pca.explained_variance_ratio_
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def pca_from_features_zarr(
|
|
167
|
+
features_zarr_path: str,
|
|
168
|
+
output_zarr_path: str,
|
|
169
|
+
n_components=None,
|
|
170
|
+
chunk_size='auto'
|
|
171
|
+
):
|
|
172
|
+
"""
|
|
173
|
+
Fits PCA on a large-than-memory features Zarr array and saves results to Zarr.
|
|
174
|
+
"""
|
|
175
|
+
logging.info(f"PCA: Connecting to feature store at {os.path.basename(features_zarr_path)}..")
|
|
176
|
+
|
|
177
|
+
# 1. Connect to Features
|
|
178
|
+
# Note: Component must match what was saved in the previous step ('features')
|
|
179
|
+
d_features = da.from_zarr(features_zarr_path, component='features')
|
|
180
|
+
|
|
181
|
+
# 2. Optimize Chunking
|
|
182
|
+
if chunk_size == 'auto':
|
|
183
|
+
chunk_size = get_optimal_chunk_size(
|
|
184
|
+
d_features.shape[1:],
|
|
185
|
+
dtype=d_features.dtype
|
|
186
|
+
)
|
|
187
|
+
logging.info(f"PCA: Auto-chunking set to {chunk_size} samples per batch.")
|
|
188
|
+
|
|
189
|
+
d_features = d_features.rechunk({0: chunk_size})
|
|
190
|
+
|
|
191
|
+
# 3. Fit PCA
|
|
192
|
+
if n_components is None:
|
|
193
|
+
n_components = min(d_features.shape)
|
|
194
|
+
|
|
195
|
+
logging.info(f"PCA: Fitting model with n_components={n_components}...")
|
|
196
|
+
|
|
197
|
+
# svd_solver='randomized' is efficient for large Dask arrays
|
|
198
|
+
pca = DaskPCA(n_components=n_components, svd_solver='auto')
|
|
199
|
+
|
|
200
|
+
# This triggers the computation.
|
|
201
|
+
# Note: pca.components_ becomes a NumPy array in RAM after this.
|
|
202
|
+
pca.fit(d_features)
|
|
203
|
+
|
|
204
|
+
# --- FIX STARTS HERE ---
|
|
205
|
+
# dask_ml keeps attributes as lazy arrays. We must compute them to get NumPy arrays.
|
|
206
|
+
logging.info("PCA: Computing attributes (mean, components) into memory...")
|
|
207
|
+
|
|
208
|
+
# We compute these efficiently in parallel
|
|
209
|
+
# components_ is (n_components, n_features)
|
|
210
|
+
# mean_ is (n_features,)
|
|
211
|
+
pca_mean, pca_components, pca_var, pca_ratio = dask.compute(
|
|
212
|
+
pca.mean_,
|
|
213
|
+
pca.components_,
|
|
214
|
+
pca.explained_variance_,
|
|
215
|
+
pca.explained_variance_ratio_
|
|
216
|
+
)
|
|
217
|
+
# --- FIX ENDS HERE ---
|
|
218
|
+
|
|
219
|
+
# 4. Prepare Output Zarr
|
|
220
|
+
logging.info(f"PCA: Saving results to {output_zarr_path}...")
|
|
221
|
+
|
|
222
|
+
if not output_zarr_path.endswith('.zarr'):
|
|
223
|
+
output_zarr_path += '.zarr'
|
|
224
|
+
|
|
225
|
+
store = zarr.DirectoryStore(output_zarr_path)
|
|
226
|
+
root = zarr.group(store=store, overwrite=True)
|
|
227
|
+
|
|
228
|
+
# 5. Save PCA Attributes to Zarr
|
|
229
|
+
# We use a compressor to save disk space for these dense matrices
|
|
230
|
+
compressor = zarr.Blosc(cname='zstd', clevel=3, shuffle=2)
|
|
231
|
+
|
|
232
|
+
# A. Components (The largest array: n_components x n_features)
|
|
233
|
+
# We chunk it by component (1 component = 1 chunk) to make retrieving single modes fast
|
|
234
|
+
root.create_dataset(
|
|
235
|
+
'components',
|
|
236
|
+
data=pca.components_,
|
|
237
|
+
chunks=(1, None), # Chunk per component
|
|
238
|
+
compressor=compressor
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
# B. Mean (n_features,)
|
|
242
|
+
root.create_dataset('mean', data=pca_mean, compressor=compressor)
|
|
243
|
+
|
|
244
|
+
# C. Variance stats (Small 1D arrays)
|
|
245
|
+
root.create_dataset('variance', data=pca_var)
|
|
246
|
+
root.create_dataset('variance_ratio', data=pca_ratio)
|
|
247
|
+
|
|
248
|
+
# 6. Transfer Metadata & Labels
|
|
249
|
+
logging.info("PCA: Copying all original metadata...")
|
|
250
|
+
input_root = zarr.open(features_zarr_path, mode='r')
|
|
251
|
+
|
|
252
|
+
# Preserve kwargs
|
|
253
|
+
root.attrs['kwargs'] = input_root.attrs['kwargs']
|
|
254
|
+
root.attrs['original_shape'] = input_root.attrs['original_shape']
|
|
255
|
+
|
|
256
|
+
# Preserve labels
|
|
257
|
+
root.create_dataset('labels', data=input_root['labels'][:])
|
|
258
|
+
|
|
259
|
+
logging.info("PCA: Finished.")
|
|
260
|
+
return pca_ratio
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def coefficients_from_features_in_memory(feature_file, pca_file, coeffs_file):
|
|
264
|
+
|
|
265
|
+
# Load the features
|
|
266
|
+
with np.load(feature_file) as data:
|
|
267
|
+
features = data['features'] # (n_samples, n_features)
|
|
268
|
+
labels = data['labels']
|
|
269
|
+
|
|
270
|
+
# Load the PCA matrices
|
|
271
|
+
# 1. Load the matrices
|
|
272
|
+
with np.load(pca_file) as data:
|
|
273
|
+
mean_vec = data['mean'] # Shape: (n_features,)
|
|
274
|
+
components = data['components'] # Shape: (n_components, n_features)
|
|
275
|
+
variance = data['variance'] # Shape: (n_components,)
|
|
276
|
+
|
|
277
|
+
# 1. Center the data
|
|
278
|
+
# Broadcasting handles (N, F) - (F,) automatically
|
|
279
|
+
centered_features = features - mean_vec
|
|
280
|
+
|
|
281
|
+
# 2. Projection (The "Transform" step)
|
|
282
|
+
# Matrix Multiplication: (N, F) @ (F, K) -> (N, K)
|
|
283
|
+
scores = centered_features @ components.T
|
|
284
|
+
|
|
285
|
+
# 3. Calculate Sigma (Z-Score)
|
|
286
|
+
# Broadcasting handles (N, K) / (K,) automatically
|
|
287
|
+
coeffs = scores / np.sqrt(variance)
|
|
288
|
+
|
|
289
|
+
np.savez(coeffs_file, coeffs=coeffs, labels=labels)
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def coefficients_from_features_zarr(
|
|
293
|
+
features_zarr_path: str,
|
|
294
|
+
pca_zarr_path: str,
|
|
295
|
+
output_zarr_path: str,
|
|
296
|
+
chunk_size='auto'
|
|
297
|
+
):
|
|
298
|
+
"""
|
|
299
|
+
Computes PCA coefficients (scores normalized by variance) from Zarr inputs
|
|
300
|
+
and streams the results to a new Zarr store.
|
|
301
|
+
"""
|
|
302
|
+
logging.info(f"Coeffs: Connecting to features at {os.path.basename(features_zarr_path)}..")
|
|
303
|
+
|
|
304
|
+
# 1. Connect to Inputs (Lazy)
|
|
305
|
+
# Features (N, F)
|
|
306
|
+
d_features = da.from_zarr(features_zarr_path, component='features')
|
|
307
|
+
|
|
308
|
+
# PCA Model (Loaded into RAM)
|
|
309
|
+
# Since the PCA matrices (components, mean) are usually fit for RAM
|
|
310
|
+
# (unless F is massive >100k), we typically load them as NumPy arrays
|
|
311
|
+
# to broadcast them efficiently across the Dask chunks.
|
|
312
|
+
logging.info(f"Coeffs: Loading PCA model from {os.path.basename(pca_zarr_path)}..")
|
|
313
|
+
z_pca = zarr.open(pca_zarr_path, mode='r')
|
|
314
|
+
|
|
315
|
+
mean_vec = z_pca['mean'][:] # (F,)
|
|
316
|
+
components = z_pca['components'][:] # (K, F)
|
|
317
|
+
variance = z_pca['variance'][:] # (K,)
|
|
318
|
+
|
|
319
|
+
# 2. Rechunk Features
|
|
320
|
+
# We apply the same optimization logic as before to handle large N
|
|
321
|
+
if chunk_size == 'auto':
|
|
322
|
+
chunk_size = get_optimal_chunk_size(
|
|
323
|
+
d_features.shape[1:],
|
|
324
|
+
dtype=d_features.dtype
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
d_features = d_features.rechunk({0: chunk_size})
|
|
328
|
+
|
|
329
|
+
# 3. Define the Computation (Lazy Graph)
|
|
330
|
+
|
|
331
|
+
# A. Center the data
|
|
332
|
+
# Dask handles the broadcasting: (Chunk_i, F) - (F,)
|
|
333
|
+
centered_features = d_features - mean_vec
|
|
334
|
+
|
|
335
|
+
# B. Projection
|
|
336
|
+
# Matrix Multiplication: (N, F) @ (F, K) -> (N, K)
|
|
337
|
+
# Since 'components' is a numpy array, Dask sends it to every worker automatically.
|
|
338
|
+
scores = centered_features @ components.T
|
|
339
|
+
|
|
340
|
+
# C. Normalize (Z-Score)
|
|
341
|
+
# (N, K) / (K,)
|
|
342
|
+
coeffs = scores / np.sqrt(variance)
|
|
343
|
+
|
|
344
|
+
# 4. Prepare Output Storage
|
|
345
|
+
if not output_zarr_path.endswith('.zarr'):
|
|
346
|
+
output_zarr_path += '.zarr'
|
|
347
|
+
|
|
348
|
+
logging.info(f"Coeffs: Streaming results to {output_zarr_path}...")
|
|
349
|
+
store = zarr.DirectoryStore(output_zarr_path)
|
|
350
|
+
root = zarr.group(store=store, overwrite=True)
|
|
351
|
+
|
|
352
|
+
# 5. Execute and Save
|
|
353
|
+
# We save the coefficients to component 'coeffs'
|
|
354
|
+
with ProgressBar():
|
|
355
|
+
coeffs.to_zarr(store, component='coeffs', compute=True)
|
|
356
|
+
|
|
357
|
+
# 6. Transfer Metadata (Labels, etc.)
|
|
358
|
+
# Often helpful to keep the labels associated with these coefficients
|
|
359
|
+
input_root = zarr.open(features_zarr_path, mode='r')
|
|
360
|
+
root.create_dataset('labels', data=input_root['labels'][:])
|
|
361
|
+
|
|
362
|
+
logging.info("Coeffs: Finished.")
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def modes_from_pca_in_memory(
|
|
367
|
+
mask_from_features: Callable,
|
|
368
|
+
pca_file,
|
|
369
|
+
modes_file,
|
|
370
|
+
n_components=8,
|
|
371
|
+
n_coeffs=11,
|
|
372
|
+
max_coeff=2,
|
|
373
|
+
):
|
|
374
|
+
# coeffs is list of coefficient vectors
|
|
375
|
+
# Each coefficient vector has dimensionless coefficients in the components
|
|
376
|
+
# x_i = mean + α_i * sqrt(variance_i) * component_i
|
|
377
|
+
coeffs = np.linspace(-max_coeff, max_coeff, n_coeffs)
|
|
378
|
+
|
|
379
|
+
with np.load(pca_file) as data:
|
|
380
|
+
var = data['variance']
|
|
381
|
+
avr = data['mean']
|
|
382
|
+
comps = data['components']
|
|
383
|
+
original_shape = data['original_shape']
|
|
384
|
+
kwargs = data['kwargs']
|
|
385
|
+
|
|
386
|
+
sdev = np.sqrt(var) # Shape: (n_components,)
|
|
387
|
+
mask_shape = (n_coeffs, n_components) + tuple(original_shape)
|
|
388
|
+
masks = np.empty(mask_shape, dtype=bool)
|
|
389
|
+
|
|
390
|
+
n_iter = n_coeffs * n_components
|
|
391
|
+
iterator = product(range(n_coeffs), range(n_components))
|
|
392
|
+
for j, i in tqdm(iterator, total=n_iter, desc='Computing modes from PCA'):
|
|
393
|
+
feat = avr + coeffs[j] * sdev[i] * comps[i,:]
|
|
394
|
+
masks[j,i,...] = mask_from_features(feat, original_shape, **kwargs)
|
|
395
|
+
|
|
396
|
+
np.savez(modes_file, masks=masks, coeffs=coeffs)
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
def modes_from_pca_zarr(
|
|
401
|
+
mask_from_features: Callable,
|
|
402
|
+
pca_zarr_path: str,
|
|
403
|
+
modes_zarr_path: str,
|
|
404
|
+
n_components=8,
|
|
405
|
+
n_coeffs=11,
|
|
406
|
+
max_coeff=2
|
|
407
|
+
):
|
|
408
|
+
"""
|
|
409
|
+
Generates 3D shape modes from a Zarr PCA model and saves them to a Zarr array.
|
|
410
|
+
|
|
411
|
+
Output Shape: (n_coeffs, n_components, Depth, Height, Width)
|
|
412
|
+
"""
|
|
413
|
+
logging.info(f"Modes: Loading PCA model from {os.path.basename(pca_zarr_path)}..")
|
|
414
|
+
|
|
415
|
+
# 1. Load PCA Model (Small enough for RAM)
|
|
416
|
+
# We open in read mode
|
|
417
|
+
z_pca = zarr.open(pca_zarr_path, mode='r')
|
|
418
|
+
|
|
419
|
+
# Read the attributes we need
|
|
420
|
+
avr = z_pca['mean'][:] # (F,)
|
|
421
|
+
|
|
422
|
+
# Handle case where stored components > requested n_components
|
|
423
|
+
stored_components = z_pca['components'] # Lazy load first
|
|
424
|
+
limit_k = min(n_components, stored_components.shape[0])
|
|
425
|
+
comps = stored_components[:limit_k] # Load only what we need (K, F)
|
|
426
|
+
|
|
427
|
+
# Calculate Standard Deviation from Variance
|
|
428
|
+
variance = z_pca['variance'][:limit_k]
|
|
429
|
+
sdev = np.sqrt(variance)
|
|
430
|
+
|
|
431
|
+
# Retrieve Metadata
|
|
432
|
+
# We need the original 3D shape to reconstruct the masks
|
|
433
|
+
shape = tuple(z_pca.attrs['original_shape'])
|
|
434
|
+
kwargs = z_pca.attrs['kwargs']
|
|
435
|
+
|
|
436
|
+
# 2. Setup Coefficients
|
|
437
|
+
# e.g., linspace(-2, 2, 11) -> [-2., -1.6, ... 0 ... 1.6, 2.]
|
|
438
|
+
coeffs = np.linspace(-max_coeff, max_coeff, n_coeffs)
|
|
439
|
+
|
|
440
|
+
# 3. Setup Output Zarr
|
|
441
|
+
if not modes_zarr_path.endswith('.zarr'):
|
|
442
|
+
modes_zarr_path += '.zarr'
|
|
443
|
+
|
|
444
|
+
logging.info(f"Modes: Creating 5D output store at {modes_zarr_path}..")
|
|
445
|
+
store = zarr.DirectoryStore(modes_zarr_path)
|
|
446
|
+
root = zarr.group(store=store, overwrite=True)
|
|
447
|
+
|
|
448
|
+
# Define 5D Shape: (Steps, Modes, D, H, W)
|
|
449
|
+
out_shape = (n_coeffs, limit_k) + shape
|
|
450
|
+
|
|
451
|
+
# Chunking Strategy:
|
|
452
|
+
# We write 1 mask at a time. So a chunk size of (1, 1, D, H, W) is safest.
|
|
453
|
+
# It ensures that updating one mask doesn't require reading/writing neighbors.
|
|
454
|
+
chunks = (1, 1) + shape
|
|
455
|
+
|
|
456
|
+
z_masks = root.create_dataset(
|
|
457
|
+
'modes',
|
|
458
|
+
shape=out_shape,
|
|
459
|
+
chunks=chunks,
|
|
460
|
+
dtype=bool, # Masks are boolean
|
|
461
|
+
compressor=zarr.Blosc(cname='zstd', clevel=3, shuffle=2)
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
# Save metadata for the viewer
|
|
465
|
+
root.attrs['coeffs'] = coeffs.tolist()
|
|
466
|
+
root.attrs['n_components'] = limit_k
|
|
467
|
+
|
|
468
|
+
# 4. Generate Modes
|
|
469
|
+
# Total iterations
|
|
470
|
+
n_iter = n_coeffs * limit_k
|
|
471
|
+
iterator = product(range(n_coeffs), range(limit_k))
|
|
472
|
+
|
|
473
|
+
logging.info(f"Modes: Generating {n_iter} 3D masks...")
|
|
474
|
+
|
|
475
|
+
for j, i in tqdm(iterator, total=n_iter, desc='Reconstructing Modes'):
|
|
476
|
+
# Formula: x = mean + (sigma * scalar * vector)
|
|
477
|
+
# j = coefficient index (e.g., -2 sigma)
|
|
478
|
+
# i = component index (e.g., Mode 1)
|
|
479
|
+
|
|
480
|
+
# Calculate feature vector
|
|
481
|
+
feat = avr + (coeffs[j] * sdev[i] * comps[i, :])
|
|
482
|
+
|
|
483
|
+
# Reconstruct 3D mask (CPU intensive step)
|
|
484
|
+
mask_3d = mask_from_features(feat, shape, **kwargs)
|
|
485
|
+
|
|
486
|
+
# Write directly to disk
|
|
487
|
+
# This writes to the specific chunk for (j, i), keeping RAM clean
|
|
488
|
+
z_masks[j, i, ...] = mask_3d
|
|
489
|
+
|
|
490
|
+
logging.info("Modes: Finished.")
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
# Helper function
|
|
497
|
+
def get_optimal_chunk_size(shape, dtype, target_mb=250):
|
|
498
|
+
"""
|
|
499
|
+
Calculates the optimal number of masks per chunk based on the specific dtype size.
|
|
500
|
+
"""
|
|
501
|
+
# 1. Dynamically get bytes per voxel based on the dtype argument
|
|
502
|
+
# np.int32 -> 4 bytes
|
|
503
|
+
# np.float64 -> 8 bytes
|
|
504
|
+
# np.bool_ -> 1 byte
|
|
505
|
+
bytes_per_voxel = np.dtype(dtype).itemsize
|
|
506
|
+
|
|
507
|
+
# 2. Calculate size of ONE mask in Megabytes (MB)
|
|
508
|
+
one_mask_bytes = np.prod(shape) * bytes_per_voxel
|
|
509
|
+
one_mask_mb = one_mask_bytes / (1024**2)
|
|
510
|
+
|
|
511
|
+
# 3. Constraint A: Dask Target Size (~250MB)
|
|
512
|
+
if one_mask_mb > target_mb:
|
|
513
|
+
dask_optimal_count = 1
|
|
514
|
+
else:
|
|
515
|
+
dask_optimal_count = int(target_mb / one_mask_mb)
|
|
516
|
+
|
|
517
|
+
# 4. Constraint B: System RAM Safety Net (10% of Available RAM)
|
|
518
|
+
available_ram_mb = psutil.virtual_memory().available / (1024**2)
|
|
519
|
+
safe_ram_limit_mb = available_ram_mb * 0.10
|
|
520
|
+
ram_limited_count = int(safe_ram_limit_mb / one_mask_mb)
|
|
521
|
+
|
|
522
|
+
# 5. Pick the safer number
|
|
523
|
+
final_count = min(dask_optimal_count, ram_limited_count)
|
|
524
|
+
|
|
525
|
+
return max(1, final_count)
|
miblab_ssa/zernike.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from scipy.special import sph_harm, factorial
|
|
3
|
+
from tqdm import tqdm
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def radial_poly(n, l, r):
|
|
8
|
+
"""
|
|
9
|
+
Compute the radial polynomial R_nl(r) for 3D Zernike moments.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
n (int): Radial order.
|
|
13
|
+
l (int): Angular order.
|
|
14
|
+
r (np.ndarray): Radial coordinates (0 <= r <= 1).
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
np.ndarray: Radial polynomial evaluated at r.
|
|
18
|
+
"""
|
|
19
|
+
rad = np.zeros_like(r, dtype=complex)
|
|
20
|
+
for s in range((n - l) // 2 + 1):
|
|
21
|
+
num = (-1)**s * factorial(n - s)
|
|
22
|
+
den = (
|
|
23
|
+
factorial(s)
|
|
24
|
+
* factorial((n + l) // 2 - s)
|
|
25
|
+
* factorial((n - l) // 2 - s)
|
|
26
|
+
)
|
|
27
|
+
rad += (num / den) * r**(n - 2 * s)
|
|
28
|
+
return rad
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def zernike_moments_3d(mask, n_max):
|
|
32
|
+
"""
|
|
33
|
+
Computes 3D Zernike moments for a given 3D boolean mask.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
mask (np.ndarray): 3D binary mask (dtype=bool).
|
|
37
|
+
n_max (int): Maximum order of moments.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
dict: Dictionary of Zernike moments A_nml.
|
|
41
|
+
tuple: (max_dist, centroid) used for normalization.
|
|
42
|
+
"""
|
|
43
|
+
coords = np.argwhere(mask)
|
|
44
|
+
if coords.size == 0:
|
|
45
|
+
return {}, (0, np.zeros(3))
|
|
46
|
+
|
|
47
|
+
centroid = np.mean(coords, axis=0)
|
|
48
|
+
shifted_coords = coords - centroid
|
|
49
|
+
max_dist = np.max(np.linalg.norm(shifted_coords, axis=1))
|
|
50
|
+
|
|
51
|
+
if max_dist == 0: # single point mask
|
|
52
|
+
return {(0, 0, 0): np.sum(mask)}, (max_dist, centroid)
|
|
53
|
+
|
|
54
|
+
# normalize coordinates into unit sphere
|
|
55
|
+
normalized_coords = shifted_coords / max_dist
|
|
56
|
+
x, y, z = normalized_coords.T
|
|
57
|
+
r = np.sqrt(x**2 + y**2 + z**2)
|
|
58
|
+
|
|
59
|
+
non_zero = r > 1e-9
|
|
60
|
+
x, y, z, r = x[non_zero], y[non_zero], z[non_zero], r[non_zero]
|
|
61
|
+
mask_values = mask[coords[:, 0], coords[:, 1], coords[:, 2]][non_zero]
|
|
62
|
+
|
|
63
|
+
# spherical coords
|
|
64
|
+
z_over_r = np.clip(z / r, -1.0, 1.0)
|
|
65
|
+
theta = np.arccos(z_over_r)
|
|
66
|
+
phi = np.arctan2(y, x)
|
|
67
|
+
|
|
68
|
+
moments = {}
|
|
69
|
+
for l in tqdm(range(n_max + 1), desc="Computing moments.."):
|
|
70
|
+
for m in range(-l, l + 1):
|
|
71
|
+
if (l - abs(m)) % 2 != 0:
|
|
72
|
+
continue # parity condition
|
|
73
|
+
sph_h = sph_harm(m, l, phi, theta) # computed once per (l,m)
|
|
74
|
+
|
|
75
|
+
for n in range(l, n_max + 1, 2): # n >= l, same parity
|
|
76
|
+
rad = radial_poly(n, l, r)
|
|
77
|
+
zernike_poly = rad * sph_h
|
|
78
|
+
A_nml = np.sum(mask_values * np.conj(zernike_poly))
|
|
79
|
+
moments[(n, m, l)] = A_nml
|
|
80
|
+
|
|
81
|
+
return moments, (max_dist, centroid)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def dice_coefficient(a, b):
|
|
87
|
+
"""Compute Dice similarity coefficient between two boolean masks."""
|
|
88
|
+
a = a.astype(bool)
|
|
89
|
+
b = b.astype(bool)
|
|
90
|
+
intersection = np.logical_and(a, b).sum()
|
|
91
|
+
return 2.0 * intersection / (a.sum() + b.sum() + 1e-9)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def reconstruct_volume_3d(moments, size, max_dist, centroid):
|
|
97
|
+
"""
|
|
98
|
+
Reconstructs a 3D volume from Zernike moments.
|
|
99
|
+
If reference_volume is provided, finds the threshold that maximizes similarity.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
moments (dict): Zernike moments A_nml.
|
|
103
|
+
size (tuple): Volume dimensions (z,y,x).
|
|
104
|
+
max_dist (float): Normalization factor.
|
|
105
|
+
centroid (np.ndarray): Centroid of mask.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
float: real reconstruction
|
|
109
|
+
"""
|
|
110
|
+
zdim, ydim, xdim = size
|
|
111
|
+
z_grid, y_grid, x_grid = np.ogrid[0:zdim, 0:ydim, 0:xdim]
|
|
112
|
+
|
|
113
|
+
# Normalize coordinates
|
|
114
|
+
x = (x_grid - centroid[2]) / max_dist
|
|
115
|
+
y = (y_grid - centroid[1]) / max_dist
|
|
116
|
+
z = (z_grid - centroid[0]) / max_dist
|
|
117
|
+
r = np.sqrt(x**2 + y**2 + z**2)
|
|
118
|
+
|
|
119
|
+
z_over_r = np.clip(z / (r + 1e-9), -1.0, 1.0)
|
|
120
|
+
theta = np.arccos(z_over_r)
|
|
121
|
+
phi = np.arctan2(y, x)
|
|
122
|
+
|
|
123
|
+
# Reconstruct complex volume
|
|
124
|
+
reconstructed_volume = np.zeros(size, dtype=complex)
|
|
125
|
+
moments_by_lm = defaultdict(list)
|
|
126
|
+
for (n, m, l), A_nml in moments.items():
|
|
127
|
+
moments_by_lm[(l, m)].append((n, A_nml))
|
|
128
|
+
|
|
129
|
+
for (l, m), nm_list in tqdm(moments_by_lm.items(), desc="Reconstructing.."):
|
|
130
|
+
sph_h = sph_harm(m, l, phi, theta)
|
|
131
|
+
for n, A_nml in nm_list:
|
|
132
|
+
rad = radial_poly(n, l, r)
|
|
133
|
+
reconstructed_volume += A_nml * rad * sph_h
|
|
134
|
+
|
|
135
|
+
recon_real = np.real(reconstructed_volume)
|
|
136
|
+
|
|
137
|
+
return recon_real
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: miblab-ssa
|
|
3
|
+
Version: 0.0.0
|
|
4
|
+
Summary: Statistical shape analysis for medical imaging
|
|
5
|
+
Author-email: Steven Sourbron <s.sourbron@sheffield.ac.uk>
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://miblab.org/
|
|
8
|
+
Project-URL: Source Code, https://github.com/openmiblab/pckg-miblab-ssa
|
|
9
|
+
Keywords: python,medical imaging,MRI
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Requires-Dist: tqdm
|
|
20
|
+
Requires-Dist: numpy
|
|
21
|
+
Requires-Dist: scipy
|
|
22
|
+
Requires-Dist: scikit-learn
|
|
23
|
+
Requires-Dist: scikit-image
|
|
24
|
+
Requires-Dist: trimesh
|
|
25
|
+
Requires-Dist: dask
|
|
26
|
+
Requires-Dist: dask-ml
|
|
27
|
+
Requires-Dist: zarr
|
|
28
|
+
Requires-Dist: pyshtools
|
|
29
|
+
Requires-Dist: psutil
|
|
30
|
+
Requires-Dist: vreg
|
|
31
|
+
Dynamic: license-file
|
|
32
|
+
|
|
33
|
+
# miblab-ssa
|
|
34
|
+
Statistical shape analysis for medical imaging data
|