httomolibgpu 5.1__py3-none-any.whl → 5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
httomolibgpu/__init__.py CHANGED
@@ -9,6 +9,7 @@ from httomolibgpu.prep.normalize import dark_flat_field_correction, minus_log
9
9
  from httomolibgpu.prep.phase import paganin_filter, paganin_filter_savu_legacy
10
10
  from httomolibgpu.prep.stripe import (
11
11
  remove_stripe_based_sorting,
12
+ remove_stripe_fw,
12
13
  remove_stripe_ti,
13
14
  remove_all_stripe,
14
15
  raven_filter,
@@ -0,0 +1,155 @@
1
+ template<int WSize>
2
+ __global__ void grouped_convolution_x(
3
+ int dim_x,
4
+ int dim_y,
5
+ int dim_z,
6
+ const float* in,
7
+ int in_stride_x,
8
+ int in_stride_y,
9
+ int in_stride_z,
10
+ float* out,
11
+ int out_stride_z,
12
+ int out_stride_group,
13
+ const float* w
14
+ )
15
+ {
16
+ const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
17
+ const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
18
+ const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
19
+ if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
20
+ {
21
+ return;
22
+ }
23
+
24
+ constexpr int out_groups = 2;
25
+ for (int i = 0; i < out_groups; ++i)
26
+ {
27
+ float acc = 0.F;
28
+ for (int j = 0; j < WSize; ++j)
29
+ {
30
+ const int w_idx = i * WSize + j;
31
+ const int in_idx = (g_thd_x * in_stride_x + j) + g_thd_y * in_stride_y + g_thd_z * in_stride_z;
32
+ acc += w[w_idx] * in[in_idx];
33
+ }
34
+ const int out_idx = g_thd_x + g_thd_y * dim_x + g_thd_z * out_stride_z + i * out_stride_group;
35
+ out[out_idx] = acc;
36
+ }
37
+ }
38
+
39
+ template<int WSize>
40
+ __global__ void grouped_convolution_y(
41
+ int dim_x,
42
+ int dim_y,
43
+ int dim_z,
44
+ const float* in,
45
+ int in_stride_x,
46
+ int in_stride_y,
47
+ int in_stride_z,
48
+ int in_stride_group,
49
+ float* out,
50
+ int out_stride_z,
51
+ int out_stride_group,
52
+ const float* w
53
+ )
54
+ {
55
+ const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
56
+ const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
57
+ const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
58
+ if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
59
+ {
60
+ return;
61
+ }
62
+
63
+ constexpr int in_groups = 2;
64
+ constexpr int out_groups = 2;
65
+ constexpr int item_stride_y = 2;
66
+ for (int group = 0; group < in_groups; ++group)
67
+ {
68
+ for (int i = 0; i < out_groups; ++i)
69
+ {
70
+ float acc = 0.F;
71
+ for (int j = 0; j < WSize; ++j)
72
+ {
73
+ const int w_idx = (out_groups * group + i) * WSize + j;
74
+ const int in_idx = g_thd_x * in_stride_x + (item_stride_y * g_thd_y + j) * in_stride_y + group * in_stride_group + g_thd_z * in_stride_z;
75
+ acc += w[w_idx] * in[in_idx];
76
+ }
77
+ const int out_idx = g_thd_x + g_thd_y * dim_x + g_thd_z * out_stride_z + (out_groups * group + i) * out_stride_group;
78
+ out[out_idx] = acc;
79
+ }
80
+ }
81
+ }
82
+
83
+ template<int WSize>
84
+ __global__ void transposed_convolution_x(
85
+ int dim_x,
86
+ int dim_y,
87
+ int dim_z,
88
+ const float* in,
89
+ int in_dim_x,
90
+ int in_stride_y,
91
+ int in_stride_z,
92
+ const float* w,
93
+ float* out
94
+ )
95
+ {
96
+ const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
97
+ const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
98
+ const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
99
+ if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
100
+ {
101
+ return;
102
+ }
103
+
104
+ constexpr int item_out_stride = 2;
105
+ float acc = 0.F;
106
+ for (int i = 0; i < WSize; ++i)
107
+ {
108
+ const int in_x = (g_thd_x - i) / item_out_stride;
109
+ const int in_x_mod = (g_thd_x - i) % item_out_stride;
110
+ if (in_x_mod == 0 && in_x >= 0 && in_x < in_dim_x)
111
+ {
112
+ const int in_idx = in_x + g_thd_y * in_stride_y + g_thd_z * in_stride_z;
113
+ acc += in[in_idx] * w[i];
114
+ }
115
+ }
116
+ const int out_idx = g_thd_x + dim_x * g_thd_y + dim_x * dim_y * g_thd_z;
117
+ out[out_idx] = acc;
118
+ }
119
+
120
+ template<int WSize>
121
+ __global__ void transposed_convolution_y(
122
+ int dim_x,
123
+ int dim_y,
124
+ int dim_z,
125
+ const float* in,
126
+ int in_dim_y,
127
+ int in_stride_y,
128
+ int in_stride_z,
129
+ const float* w,
130
+ float* out
131
+ )
132
+ {
133
+ const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
134
+ const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
135
+ const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
136
+ if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
137
+ {
138
+ return;
139
+ }
140
+
141
+ constexpr int item_out_stride = 2;
142
+ float acc = 0.F;
143
+ for (int i = 0; i < WSize; ++i)
144
+ {
145
+ const int in_y = (g_thd_y - i) / item_out_stride;
146
+ const int in_y_mod = (g_thd_y - i) % item_out_stride;
147
+ if (in_y_mod == 0 && in_y >= 0 && in_y < in_dim_y)
148
+ {
149
+ const int in_idx = g_thd_x + in_y * in_stride_y + g_thd_z * in_stride_z;
150
+ acc += in[in_idx] * w[i];
151
+ }
152
+ }
153
+ const int out_idx = g_thd_x + dim_x * g_thd_y + dim_x * dim_y * g_thd_z;
154
+ out[out_idx] = acc;
155
+ }
@@ -21,6 +21,7 @@
21
21
  """Module for stripes removal"""
22
22
 
23
23
  import numpy as np
24
+ import pywt
24
25
  from httomolibgpu import cupywrapper
25
26
 
26
27
  cp = cupywrapper.cp
@@ -31,6 +32,7 @@ from unittest.mock import Mock
31
32
  if cupy_run:
32
33
  from cupyx.scipy.ndimage import median_filter, binary_dilation, uniform_filter1d
33
34
  from cupyx.scipy.fft import fft2, ifft2, fftshift
35
+ from cupyx.scipy.fftpack import get_fft_plan
34
36
  from httomolibgpu.cuda_kernels import load_cuda_module
35
37
  else:
36
38
  median_filter = Mock()
@@ -41,10 +43,11 @@ else:
41
43
  fftshift = Mock()
42
44
 
43
45
 
44
- from typing import Union
46
+ from typing import Optional, Tuple, Union
45
47
 
46
48
  __all__ = [
47
49
  "remove_stripe_based_sorting",
50
+ "remove_stripe_fw",
48
51
  "remove_stripe_ti",
49
52
  "remove_all_stripe",
50
53
  "raven_filter",
@@ -156,6 +159,604 @@ def remove_stripe_ti(
156
159
  return data
157
160
 
158
161
 
162
+ ###### Ring removal with wavelet filtering (adapted for cupy from pytroch_wavelet package https://pytorch-wavelets.readthedocs.io/)##########
163
+ # These functions are taken from TomoCuPy package
164
+ # *************************************************************************** #
165
+ # Copyright © 2022, UChicago Argonne, LLC #
166
+ # All Rights Reserved #
167
+ # Software Name: Tomocupy #
168
+ # By: Argonne National Laboratory #
169
+ # #
170
+ # OPEN SOURCE LICENSE #
171
+ # #
172
+ # Redistribution and use in source and binary forms, with or without #
173
+ # modification, are permitted provided that the following conditions are met: #
174
+ # #
175
+ # 1. Redistributions of source code must retain the above copyright notice, #
176
+ # this list of conditions and the following disclaimer. #
177
+ # 2. Redistributions in binary form must reproduce the above copyright #
178
+ # notice, this list of conditions and the following disclaimer in the #
179
+ # documentation and/or other materials provided with the distribution. #
180
+ # 3. Neither the name of the copyright holder nor the names of its #
181
+ # contributors may be used to endorse or promote products derived #
182
+ # from this software without specific prior written permission. #
183
+ # #
184
+ # #
185
+ # *************************************************************************** #
186
+
187
+
188
+ def _reflect(x: np.ndarray, minx: float, maxx: float) -> np.ndarray:
189
+ """Reflect the values in matrix *x* about the scalar values *minx* and
190
+ *maxx*. Hence a vector *x* containing a long linearly increasing series is
191
+ converted into a waveform which ramps linearly up and down between *minx*
192
+ and *maxx*. If *x* contains integers and *minx* and *maxx* are (integers +
193
+ 0.5), the ramps will have repeated max and min samples.
194
+
195
+ .. codeauthor:: Rich Wareham <rjw57@cantab.net>, Aug 2013
196
+ .. codeauthor:: Nick Kingsbury, Cambridge University, January 1999.
197
+
198
+ """
199
+ rng = maxx - minx
200
+ rng_by_2 = 2 * rng
201
+ mod = np.fmod(x - minx, rng_by_2)
202
+ normed_mod = np.where(mod < 0, mod + rng_by_2, mod)
203
+ out = np.where(normed_mod >= rng, rng_by_2 - normed_mod, normed_mod) + minx
204
+ return np.array(out, dtype=x.dtype)
205
+
206
+
207
+ class _DeviceMemStack:
208
+ def __init__(self) -> None:
209
+ self.allocations = []
210
+ self.current = 0
211
+ self.highwater = 0
212
+
213
+ def malloc(self, bytes):
214
+ self.allocations.append(bytes)
215
+ allocated = self._round_up(bytes)
216
+ self.current += allocated
217
+ self.highwater = max(self.current, self.highwater)
218
+
219
+ def free(self, bytes):
220
+ assert bytes in self.allocations
221
+ self.allocations.remove(bytes)
222
+ self.current -= self._round_up(bytes)
223
+ assert self.current >= 0
224
+
225
+ def _round_up(self, size):
226
+ ALLOCATION_UNIT_SIZE = 512
227
+ size = (size + ALLOCATION_UNIT_SIZE - 1) // ALLOCATION_UNIT_SIZE
228
+ return size * ALLOCATION_UNIT_SIZE
229
+
230
+
231
+ def _mypad(
232
+ x: cp.ndarray, pad: Tuple[int, int, int, int], mem_stack: Optional[_DeviceMemStack]
233
+ ) -> cp.ndarray:
234
+ """Function to do numpy like padding on Arrays. Only works for 2-D
235
+ padding.
236
+
237
+ Inputs:
238
+ x (array): Array to pad
239
+ pad (tuple): tuple of (left, right, top, bottom) pad sizes
240
+ """
241
+ # Vertical only
242
+ if pad[0] == 0 and pad[1] == 0:
243
+ m1, m2 = pad[2], pad[3]
244
+ l = x.shape[-2] if not mem_stack else x[-2]
245
+ xe = _reflect(np.arange(-m1, l + m2, dtype="int32"), -0.5, l - 0.5)
246
+ if mem_stack:
247
+ ret_shape = [x[0], x[1], xe.size, x[3]]
248
+ mem_stack.malloc(np.prod(ret_shape) * np.float32().itemsize)
249
+ return ret_shape
250
+ return x[:, :, xe, :]
251
+ # horizontal only
252
+ elif pad[2] == 0 and pad[3] == 0:
253
+ m1, m2 = pad[0], pad[1]
254
+ l = x.shape[-1] if not mem_stack else x[-1]
255
+ xe = _reflect(np.arange(-m1, l + m2, dtype="int32"), -0.5, l - 0.5)
256
+ if mem_stack:
257
+ ret_shape = [x[0], x[1], x[2], xe.size]
258
+ mem_stack.malloc(np.prod(ret_shape) * np.float32().itemsize)
259
+ return ret_shape
260
+ return x[:, :, :, xe]
261
+
262
+
263
+ def _next_power_of_two(x: int, max_val: int = 128) -> int:
264
+ n = 1
265
+ while n < x and n < max_val:
266
+ n *= 2
267
+ return n
268
+
269
+
270
+ def _conv2d(
271
+ x: cp.ndarray,
272
+ w: np.ndarray,
273
+ stride: Tuple[int, int],
274
+ groups: int,
275
+ mem_stack: Optional[_DeviceMemStack],
276
+ ) -> cp.ndarray:
277
+ """Convolution (equivalent pytorch.conv2d)"""
278
+ b, ci, hi, wi = x.shape if not mem_stack else x
279
+ co, _, hk, wk = w.shape
280
+ ho = int(np.floor(1 + (hi - hk) / stride[0]))
281
+ wo = int(np.floor(1 + (wi - wk) / stride[1]))
282
+ out_shape = [b, co, ho, wo]
283
+ if mem_stack:
284
+ mem_stack.malloc(np.prod(out_shape) * np.float32().itemsize)
285
+ return out_shape
286
+
287
+ out = cp.zeros(out_shape, dtype="float32")
288
+ w = cp.asarray(w)
289
+ x = cp.expand_dims(x, axis=1)
290
+ w = np.expand_dims(w, axis=0)
291
+ symbol_names = [f"grouped_convolution_x<{wk}>", f"grouped_convolution_y<{hk}>"]
292
+ module = load_cuda_module("remove_stripe_fw", name_expressions=symbol_names)
293
+ dim_x = out.shape[-1]
294
+ dim_y = out.shape[-2]
295
+ dim_z = out.shape[0]
296
+ in_stride_x = stride[1]
297
+ in_stride_y = x.strides[-2] // x.dtype.itemsize
298
+ in_stride_z = x.strides[0] // x.dtype.itemsize
299
+ out_stride_z = out.strides[0] // x.dtype.itemsize
300
+ out_stride_group = out.strides[1] // x.dtype.itemsize
301
+
302
+ block_x = _next_power_of_two(dim_x)
303
+ block_dim = (block_x, 1, 1)
304
+ grid_x = (dim_x + block_x - 1) // block_x
305
+ grid_dim = (grid_x, dim_y, dim_z)
306
+
307
+ if groups == 1:
308
+ grouped_convolution_kernel_x = module.get_function(symbol_names[0])
309
+ grouped_convolution_kernel_x(
310
+ grid_dim,
311
+ block_dim,
312
+ (
313
+ dim_x,
314
+ dim_y,
315
+ dim_z,
316
+ x,
317
+ in_stride_x,
318
+ in_stride_y,
319
+ in_stride_z,
320
+ out,
321
+ out_stride_z,
322
+ out_stride_group,
323
+ w,
324
+ ),
325
+ )
326
+ return out
327
+
328
+ grouped_convolution_kernel_y = module.get_function(symbol_names[1])
329
+ in_stride_group = x.strides[2] // x.dtype.itemsize
330
+ grouped_convolution_kernel_y(
331
+ grid_dim,
332
+ block_dim,
333
+ (
334
+ dim_x,
335
+ dim_y,
336
+ dim_z,
337
+ x,
338
+ in_stride_x,
339
+ in_stride_y,
340
+ in_stride_z,
341
+ in_stride_group,
342
+ out,
343
+ out_stride_z,
344
+ out_stride_group,
345
+ w,
346
+ ),
347
+ )
348
+ del w
349
+ return out
350
+
351
+
352
+ def _conv_transpose2d(
353
+ x: cp.ndarray,
354
+ w: np.ndarray,
355
+ stride: Tuple[int, int],
356
+ pad: Tuple[int, int],
357
+ groups: int,
358
+ mem_stack: Optional[_DeviceMemStack],
359
+ ) -> cp.ndarray:
360
+ """Transposed convolution (equivalent pytorch.conv_transpose2d)"""
361
+ b, co, ho, wo = x.shape if not mem_stack else x
362
+ co, ci, hk, wk = w.shape
363
+
364
+ hi = (ho - 1) * stride[0] + hk
365
+ wi = (wo - 1) * stride[1] + wk
366
+ out_shape = [b, ci, hi, wi]
367
+ if mem_stack:
368
+ mem_stack.malloc(np.prod(out_shape) * np.float32().itemsize)
369
+ mem_stack.malloc(w.size * np.float32().itemsize)
370
+ if pad != 0:
371
+ new_out_shape = [
372
+ out_shape[0],
373
+ out_shape[1],
374
+ out_shape[2] - 2 * pad[0],
375
+ out_shape[3] - 2 * pad[1],
376
+ ]
377
+ mem_stack.malloc(np.prod(new_out_shape) * np.float32().itemsize)
378
+ mem_stack.free(np.prod(out_shape) * np.float32().itemsize)
379
+ out_shape = new_out_shape
380
+ mem_stack.free(w.size * np.float32().itemsize)
381
+ return out_shape
382
+
383
+ out = cp.zeros(out_shape, dtype="float32")
384
+ w = cp.asarray(w)
385
+
386
+ symbol_names = [
387
+ f"transposed_convolution_x<{wk}>",
388
+ f"transposed_convolution_y<{hk}>",
389
+ ]
390
+ module = load_cuda_module("remove_stripe_fw", name_expressions=symbol_names)
391
+ dim_x = out.shape[-1]
392
+ dim_y = out.shape[-2]
393
+ dim_z = out.shape[0]
394
+ in_dim_x = x.shape[-1]
395
+ in_dim_y = x.shape[-2]
396
+ in_stride_y = x.strides[-2] // x.dtype.itemsize
397
+ in_stride_z = x.strides[0] // x.dtype.itemsize
398
+
399
+ block_x = _next_power_of_two(dim_x)
400
+ block_dim = (block_x, 1, 1)
401
+ grid_x = (dim_x + block_x - 1) // block_x
402
+ grid_dim = (grid_x, dim_y, dim_z)
403
+
404
+ if wk > 1:
405
+ transposed_convolution_kernel_x = module.get_function(symbol_names[0])
406
+ transposed_convolution_kernel_x(
407
+ grid_dim,
408
+ block_dim,
409
+ (dim_x, dim_y, dim_z, x, in_dim_x, in_stride_y, in_stride_z, w, out),
410
+ )
411
+ elif hk > 1:
412
+ transposed_convolution_kernel_y = module.get_function(symbol_names[1])
413
+ transposed_convolution_kernel_y(
414
+ grid_dim,
415
+ block_dim,
416
+ (dim_x, dim_y, dim_z, x, in_dim_y, in_stride_y, in_stride_z, w, out),
417
+ )
418
+ else:
419
+ assert False
420
+
421
+ if pad != 0:
422
+ out = out[:, :, pad[0] : out.shape[2] - pad[0], pad[1] : out.shape[3] - pad[1]]
423
+ return cp.ascontiguousarray(out)
424
+
425
+
426
+ def _afb1d(
427
+ x: cp.ndarray,
428
+ h0: np.ndarray,
429
+ h1: np.ndarray,
430
+ dim: int,
431
+ mem_stack: Optional[_DeviceMemStack],
432
+ ) -> cp.ndarray:
433
+ """1D analysis filter bank (along one dimension only) of an image
434
+
435
+ Parameters
436
+ ----------
437
+ x (array): 4D input with the last two dimensions the spatial input
438
+ h0 (array): 4D input for the lowpass filter. Should have shape (1, 1,
439
+ h, 1) or (1, 1, 1, w)
440
+ h1 (array): 4D input for the highpass filter. Should have shape (1, 1,
441
+ h, 1) or (1, 1, 1, w)
442
+ dim (int) - dimension of filtering. d=2 is for a vertical filter (called
443
+ column filtering but filters across the rows). d=3 is for a
444
+ horizontal filter, (called row filtering but filters across the
445
+ columns).
446
+
447
+ Returns
448
+ -------
449
+ lohi: lowpass and highpass subbands concatenated along the channel
450
+ dimension
451
+ """
452
+ C = x.shape[1] if not mem_stack else x[1]
453
+ # Convert the dim to positive
454
+ d = dim % 4
455
+ s = (2, 1) if d == 2 else (1, 2)
456
+ N = x.shape[d] if not mem_stack else x[d]
457
+ L = h0.size
458
+ shape = [1, 1, 1, 1]
459
+ shape[d] = L
460
+ h = np.concatenate([h0.reshape(*shape), h1.reshape(*shape)] * C, axis=0)
461
+ # Calculate the pad size
462
+ outsize = pywt.dwt_coeff_len(N, L, mode="symmetric")
463
+ p = 2 * (outsize - 1) - N + L
464
+ pad = (0, 0, p // 2, (p + 1) // 2) if d == 2 else (p // 2, (p + 1) // 2, 0, 0)
465
+ padded_x = _mypad(x, pad=pad, mem_stack=mem_stack)
466
+ lohi = _conv2d(padded_x, h, stride=s, groups=C, mem_stack=mem_stack)
467
+ if mem_stack:
468
+ mem_stack.free(np.prod(padded_x) * np.float32().itemsize)
469
+ del padded_x
470
+ return lohi
471
+
472
+
473
+ def _sfb1d(
474
+ lo: cp.ndarray,
475
+ hi: cp.ndarray,
476
+ g0: np.ndarray,
477
+ g1: np.ndarray,
478
+ dim: int,
479
+ mem_stack: Optional[_DeviceMemStack],
480
+ ) -> cp.ndarray:
481
+ """1D synthesis filter bank of an image Array"""
482
+
483
+ C = lo.shape[1] if not mem_stack else lo[1]
484
+ d = dim % 4
485
+ L = g0.size
486
+ shape = [1, 1, 1, 1]
487
+ shape[d] = L
488
+ s = (2, 1) if d == 2 else (1, 2)
489
+ g0 = np.concatenate([g0.reshape(*shape)] * C, axis=0)
490
+ g1 = np.concatenate([g1.reshape(*shape)] * C, axis=0)
491
+ pad = (L - 2, 0) if d == 2 else (0, L - 2)
492
+ y_lo = _conv_transpose2d(lo, g0, stride=s, pad=pad, groups=C, mem_stack=mem_stack)
493
+ y_hi = _conv_transpose2d(hi, g1, stride=s, pad=pad, groups=C, mem_stack=mem_stack)
494
+ if mem_stack:
495
+ # Allocation of the sum
496
+ mem_stack.malloc(np.prod(y_hi) * np.float32().itemsize)
497
+ mem_stack.free(np.prod(y_lo) * np.float32().itemsize)
498
+ mem_stack.free(np.prod(y_hi) * np.float32().itemsize)
499
+ return y_lo
500
+ return y_lo + y_hi
501
+
502
+
503
+ class _DWTForward:
504
+ """Performs a 2d DWT Forward decomposition of an image
505
+
506
+ Args:
507
+ wave (str): Which wavelet to use.
508
+ """
509
+
510
+ def __init__(self, wave: str):
511
+ super().__init__()
512
+
513
+ wave = pywt.Wavelet(wave)
514
+ h0_col, h1_col = wave.dec_lo, wave.dec_hi
515
+ h0_row, h1_row = h0_col, h1_col
516
+
517
+ self.h0_col = np.array(h0_col).astype("float32")[::-1].reshape((1, 1, -1, 1))
518
+ self.h1_col = np.array(h1_col).astype("float32")[::-1].reshape((1, 1, -1, 1))
519
+ self.h0_row = np.array(h0_row).astype("float32")[::-1].reshape((1, 1, 1, -1))
520
+ self.h1_row = np.array(h1_row).astype("float32")[::-1].reshape((1, 1, 1, -1))
521
+
522
+ def apply(
523
+ self, x: cp.ndarray, mem_stack: Optional[_DeviceMemStack] = None
524
+ ) -> Tuple[cp.ndarray, cp.ndarray]:
525
+ """Forward pass of the DWT.
526
+
527
+ Args:
528
+ x (array): Input of shape :math:`(N, C_{in}, H_{in}, W_{in})`
529
+
530
+ Returns:
531
+ (yl, yh)
532
+ tuple of lowpass (yl) and bandpass (yh) coefficients.
533
+ yh is a list of scale coefficients. yl has shape
534
+ :math:`(N, C_{in}, H_{in}', W_{in}')` and yh has shape
535
+ :math:`list(N, C_{in}, 3, H_{in}'', W_{in}'')`. The new
536
+ dimension in yh iterates over the LH, HL and HH coefficients.
537
+
538
+ Note:
539
+ :math:`H_{in}', W_{in}', H_{in}'', W_{in}''` denote the correctly
540
+ downsampled shapes of the DWT pyramid.
541
+ """
542
+ # Do a multilevel transform
543
+ # Do 1 level of the transform
544
+ lohi = _afb1d(x, self.h0_row, self.h1_row, dim=3, mem_stack=mem_stack)
545
+ y = _afb1d(lohi, self.h0_col, self.h1_col, dim=2, mem_stack=mem_stack)
546
+ if mem_stack:
547
+ y_shape = [y[0], np.prod(y) // y[0] // 4 // y[-2] // y[-1], 4, y[-2], y[-1]]
548
+ x_shape = [y_shape[0], y_shape[1], y_shape[3], y_shape[4]]
549
+ yh_shape = [y_shape[0], y_shape[1], y_shape[2] - 1, y_shape[3], y_shape[4]]
550
+
551
+ mem_stack.free(np.prod(lohi) * np.float32().itemsize)
552
+ mem_stack.malloc(np.prod(x_shape) * np.float32().itemsize)
553
+ mem_stack.malloc(np.prod(yh_shape) * np.float32().itemsize)
554
+ mem_stack.free(np.prod(y) * np.float32().itemsize)
555
+ return x_shape, yh_shape
556
+ del lohi
557
+ s = y.shape
558
+ y = y.reshape(s[0], -1, 4, s[-2], s[-1])
559
+ x = cp.ascontiguousarray(y[:, :, 0])
560
+ yh = cp.ascontiguousarray(y[:, :, 1:])
561
+ return (x, yh)
562
+
563
+
564
+ class _DWTInverse:
565
+ """Performs a 2d DWT Inverse reconstruction of an image
566
+
567
+ Args:
568
+ wave (str): Which wavelet to use.
569
+ """
570
+
571
+ def __init__(self, wave: str):
572
+ super().__init__()
573
+ wave = pywt.Wavelet(wave)
574
+ g0_col, g1_col = wave.rec_lo, wave.rec_hi
575
+ g0_row, g1_row = g0_col, g1_col
576
+ # Prepare the filters
577
+ self.g0_col = np.array(g0_col).astype("float32").reshape((1, 1, -1, 1))
578
+ self.g1_col = np.array(g1_col).astype("float32").reshape((1, 1, -1, 1))
579
+ self.g0_row = np.array(g0_row).astype("float32").reshape((1, 1, 1, -1))
580
+ self.g1_row = np.array(g1_row).astype("float32").reshape((1, 1, 1, -1))
581
+
582
+ def apply(
583
+ self,
584
+ coeffs: Tuple[cp.ndarray, cp.ndarray],
585
+ mem_stack: Optional[_DeviceMemStack] = None,
586
+ ) -> cp.ndarray:
587
+ """
588
+ Args:
589
+ coeffs (yl, yh): tuple of lowpass and bandpass coefficients, where:
590
+ yl is a lowpass array of shape :math:`(N, C_{in}, H_{in}',
591
+ W_{in}')` and yh is a list of bandpass arrays of shape
592
+ :math:`list(N, C_{in}, 3, H_{in}'', W_{in}'')`. I.e. should match
593
+ the format returned by DWTForward
594
+
595
+ Returns:
596
+ Reconstructed input of shape :math:`(N, C_{in}, H_{in}, W_{in})`
597
+
598
+ Note:
599
+ :math:`H_{in}', W_{in}', H_{in}'', W_{in}''` denote the correctly
600
+ downsampled shapes of the DWT pyramid.
601
+
602
+ """
603
+ yl, yh = coeffs
604
+ lh = yh[:, :, 0, :, :] if not mem_stack else [yh[0], yh[1], yh[3], yh[4]]
605
+ hl = yh[:, :, 1, :, :] if not mem_stack else [yh[0], yh[1], yh[3], yh[4]]
606
+ hh = yh[:, :, 2, :, :] if not mem_stack else [yh[0], yh[1], yh[3], yh[4]]
607
+ lo = _sfb1d(yl, lh, self.g0_col, self.g1_col, dim=2, mem_stack=mem_stack)
608
+ hi = _sfb1d(hl, hh, self.g0_col, self.g1_col, dim=2, mem_stack=mem_stack)
609
+ yl = _sfb1d(lo, hi, self.g0_row, self.g1_row, dim=3, mem_stack=mem_stack)
610
+ if mem_stack:
611
+ mem_stack.free(np.prod(lo) * np.float32().itemsize)
612
+ mem_stack.free(np.prod(hi) * np.float32().itemsize)
613
+ del lo
614
+ del hi
615
+ return yl
616
+
617
+
618
+ def _repair_memory_fragmentation_if_needed(fragmentation_threshold: float = 0.2):
619
+ pool = cp.get_default_memory_pool()
620
+ total = pool.total_bytes()
621
+ if (total / pool.used_bytes()) - 1 > fragmentation_threshold:
622
+ pool.free_all_blocks()
623
+
624
+
625
+ def remove_stripe_fw(
626
+ data: cp.ndarray,
627
+ sigma: float = 2,
628
+ wname: str = "db5",
629
+ level: Optional[int] = None,
630
+ calc_peak_gpu_mem: bool = False,
631
+ ) -> cp.ndarray:
632
+ """
633
+ Remove horizontal stripes from sinogram using the Fourier-Wavelet (FW) based method :cite:`munch2009stripe`. The original source code
634
+ taken from TomoCupy and NABU packages.
635
+
636
+ Parameters
637
+ ----------
638
+ data : ndarray
639
+ 3D tomographic data as a CuPy array.
640
+ sigma : float
641
+ Damping parameter in Fourier space.
642
+ wname : str
643
+ Type of the wavelet filter: select from 'db5', 'db7', 'haar', 'sym5', 'sym16' 'bior4.4'.
644
+ level : int, optional
645
+ Number of discrete wavelet transform levels.
646
+ calc_peak_gpu_mem: str:
647
+ Parameter to support memory estimation in HTTomo. Irrelevant to the method itself and can be ignored by user.
648
+
649
+ Returns
650
+ -------
651
+ ndarray
652
+ Stripe-corrected 3D tomographic data as a CuPy array.
653
+ """
654
+
655
+ if level is None:
656
+ if calc_peak_gpu_mem:
657
+ size = np.max(data) # data is a tuple in this case
658
+ else:
659
+ size = np.max(data.shape)
660
+ level = int(np.ceil(np.log2(size)))
661
+
662
+ [nproj, nz, ni] = data.shape if not calc_peak_gpu_mem else data
663
+
664
+ nproj_pad = nproj + nproj // 8
665
+
666
+ # Accepts all wave types available to PyWavelets
667
+ xfm = _DWTForward(wave=wname)
668
+ ifm = _DWTInverse(wave=wname)
669
+
670
+ # Wavelet decomposition.
671
+ cc = []
672
+ sli_shape = [nz, 1, nproj_pad, ni]
673
+
674
+ if calc_peak_gpu_mem:
675
+ mem_stack = _DeviceMemStack()
676
+ # A data copy is assumed when invoking the function
677
+ mem_stack.malloc(np.prod(data) * np.float32().itemsize)
678
+ mem_stack.malloc(np.prod(sli_shape) * np.float32().itemsize)
679
+ cc = []
680
+ fcV_bytes = None
681
+ for k in range(level):
682
+ new_sli_shape, c = xfm.apply(sli_shape, mem_stack)
683
+ mem_stack.free(np.prod(sli_shape) * np.float32().itemsize)
684
+ sli_shape = new_sli_shape
685
+ cc.append(c)
686
+
687
+ if fcV_bytes:
688
+ mem_stack.free(fcV_bytes)
689
+ fcV_shape = [c[0], c[3], c[4]]
690
+ fcV_bytes = np.prod(fcV_shape) * np.complex64().itemsize
691
+ mem_stack.malloc(fcV_bytes)
692
+
693
+ # For the FFT
694
+ mem_stack.malloc(2 * np.prod(fcV_shape) * np.float32().itemsize)
695
+ mem_stack.malloc(2 * fcV_bytes)
696
+
697
+ fft_dummy = cp.empty(fcV_shape, dtype="float32")
698
+ fft_plan = get_fft_plan(fft_dummy)
699
+ fft_plan_size = fft_plan.work_area.mem.size
700
+ del fft_dummy
701
+ del fft_plan
702
+ mem_stack.malloc(fft_plan_size)
703
+ mem_stack.free(2 * np.prod(fcV_shape) * np.float32().itemsize)
704
+ mem_stack.free(fft_plan_size)
705
+ mem_stack.free(2 * fcV_bytes)
706
+
707
+ # The rest of the iteration doesn't contribute to the peak
708
+ # NOTE: The last iteration of fcV is "leaked"
709
+
710
+ for k in range(level)[::-1]:
711
+ new_sli_shape = [sli_shape[0], sli_shape[1], cc[k][-2], cc[k][-1]]
712
+ new_sli_shape = ifm.apply((new_sli_shape, cc[k]), mem_stack)
713
+ mem_stack.free(np.prod(sli_shape) * np.float32().itemsize)
714
+ sli_shape = new_sli_shape
715
+
716
+ mem_stack.malloc(np.prod(data) * np.float32().itemsize)
717
+ for c in cc:
718
+ mem_stack.free(np.prod(c) * np.float32().itemsize)
719
+ mem_stack.free(np.prod(sli_shape) * np.float32().itemsize)
720
+ return int(mem_stack.highwater * 1.1)
721
+
722
+ sli = cp.zeros(sli_shape, dtype="float32")
723
+ sli[:, 0, (nproj_pad - nproj) // 2 : (nproj_pad + nproj) // 2] = data.swapaxes(0, 1)
724
+ for k in range(level):
725
+ sli, c = xfm.apply(sli)
726
+ cc.append(c)
727
+ # FFT
728
+ fft_in = cp.ascontiguousarray(cc[k][:, 0, 1])
729
+ fft_plan = get_fft_plan(fft_in, axes=1)
730
+ with fft_plan:
731
+ fcV = cp.fft.fft(fft_in, axis=1)
732
+ del fft_plan
733
+ del fft_in
734
+ _, my, mx = fcV.shape
735
+ # Damping of ring artifact information.
736
+ y_hat = np.fft.ifftshift((np.arange(-my, my, 2) + 1) / 2)
737
+ damp = -np.expm1(-(y_hat**2) / (2 * sigma**2))
738
+ fcV *= cp.tile(damp, (mx, 1)).swapaxes(0, 1)
739
+ # Inverse FFT.
740
+ ifft_in = cp.ascontiguousarray(fcV)
741
+ ifft_plan = get_fft_plan(ifft_in, axes=1)
742
+ with ifft_plan:
743
+ cc[k][:, 0, 1] = cp.fft.ifft(ifft_in, my, axis=1).real
744
+ del ifft_plan
745
+ del ifft_in
746
+ _repair_memory_fragmentation_if_needed()
747
+
748
+ # Wavelet reconstruction.
749
+ for k in range(level)[::-1]:
750
+ shape0 = cc[k][0, 0, 1].shape
751
+ sli = sli[:, :, : shape0[0], : shape0[1]]
752
+ sli = ifm.apply((sli, cc[k]))
753
+ _repair_memory_fragmentation_if_needed()
754
+
755
+ data = sli[:, 0, (nproj_pad - nproj) // 2 : (nproj_pad + nproj) // 2, :ni]
756
+ data = data.swapaxes(0, 1)
757
+ return cp.ascontiguousarray(data)
758
+
759
+
159
760
  ######## Optimized version for Vo-all ring removal in tomopy########
160
761
  # This function is taken from TomoCuPy package
161
762
  # *************************************************************************** #
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: httomolibgpu
3
- Version: 5.1
3
+ Version: 5.2
4
4
  Summary: Commonly used tomography data processing methods at DLS.
5
5
  Author-email: Daniil Kazantsev <daniil.kazantsev@diamond.ac.uk>, Yousef Moazzam <yousef.moazzam@diamond.ac.uk>, Naman Gera <naman.gera@diamond.ac.uk>
6
6
  License: BSD-3-Clause
@@ -19,6 +19,7 @@ Requires-Dist: scipy
19
19
  Requires-Dist: pillow
20
20
  Requires-Dist: scikit-image
21
21
  Requires-Dist: tomobar
22
+ Requires-Dist: PyWavelets
22
23
  Provides-Extra: dev
23
24
  Requires-Dist: pytest; extra == "dev"
24
25
  Requires-Dist: pytest-cov; extra == "dev"
@@ -1,4 +1,4 @@
1
- httomolibgpu/__init__.py,sha256=Fdj5ipIGgeKqSCYRb5bBVMAZ04ZvZJzuBoGOAqc0zgk,937
1
+ httomolibgpu/__init__.py,sha256=Dt_TYhjJGPVathlceTYQhoRSyH8n7FGQJlRMUlFZNdc,959
2
2
  httomolibgpu/cupywrapper.py,sha256=6ITGJ2Jw5I5kVmKEL5LlsnLRniEqqBLsHiAjvLtk0Xk,493
3
3
  httomolibgpu/memory_estimator_helpers.py,sha256=QaJady-z8y9Emw7W-lB608vBTNvVYv3obboQKVj6E9M,705
4
4
  httomolibgpu/cuda_kernels/__init__.py,sha256=VQNMaGcVDwiE-C64FfLtubHpLriLG0Y3_QnjHBSHrN0,884
@@ -8,6 +8,7 @@ httomolibgpu/cuda_kernels/generate_mask.cu,sha256=3il3r1J2cnTCd3UXO4GWGfBgGxj4pv
8
8
  httomolibgpu/cuda_kernels/median_kernel.cu,sha256=EECLUCoJkT9GQ9Db_FF6fYOG6cDSiAePTRZNxE4VZ68,1692
9
9
  httomolibgpu/cuda_kernels/raven_filter.cu,sha256=KX2TM_9tMpvoGCHezDNWYABCnv2cT9mlMo4IhxRUac0,1437
10
10
  httomolibgpu/cuda_kernels/remove_nan_inf.cu,sha256=gv0ihkf6A_D_po9x7pmgFsQFhwZ1dB_HYc_0Tu-bpUU,630
11
+ httomolibgpu/cuda_kernels/remove_stripe_fw.cu,sha256=J_vy0RUYYKT-mOzERsn3kjgt4hbE7vHPFRuJYNzs6sM,4504
11
12
  httomolibgpu/misc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
13
  httomolibgpu/misc/corr.py,sha256=e1eUsWLSM9SB5xzWTDW0o9pAD_lbrr4DL-QQmyM8v4c,4503
13
14
  httomolibgpu/misc/denoise.py,sha256=-D9UPbZyUAcCptBHUUXsmj1NFzd6HrrRjJJh4T5gmhQ,4787
@@ -18,13 +19,13 @@ httomolibgpu/prep/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
18
19
  httomolibgpu/prep/alignment.py,sha256=GVxnyioipmqsHb4s3mPQ8tKGoPIQMPftDrQxUO-HBuE,5491
19
20
  httomolibgpu/prep/normalize.py,sha256=hee0H4mE7FrSZgcF1fjLsKT06xjTJymkyAxpe2itQe4,4202
20
21
  httomolibgpu/prep/phase.py,sha256=yKJe9gmWuFaUSIuoctV5X1Pb7yEgOmkQ6jxvZkSSwpQ,12128
21
- httomolibgpu/prep/stripe.py,sha256=8_DV0ON6AWARuziqkmhom56gWTardtqC_z3xG8geg0o,14774
22
+ httomolibgpu/prep/stripe.py,sha256=OZPimFxe9TOSaEcErORFxd6HCcFcR62-q5XYBvC10FM,36918
22
23
  httomolibgpu/recon/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
24
  httomolibgpu/recon/_phase_cross_correlation.py,sha256=Ru2oLAPv8XOSSuZer5yNQrxD_8lMAwBSvtkVAVs5TCc,16469
24
25
  httomolibgpu/recon/algorithm.py,sha256=ds-_io7kGzo5FiJq8k4--PYtIWak3y9H7yuyg1lymyQ,25121
25
26
  httomolibgpu/recon/rotation.py,sha256=GaSwNrlDnlP_iIrTfKUQLiXsShJ5aSDvdKPwofggtwQ,27948
26
- httomolibgpu-5.1.dist-info/licenses/LICENSE,sha256=bXeLsgelPUUXw8HCIYiVC97Dpjhm2nB54m7TACdH8ng,48032
27
- httomolibgpu-5.1.dist-info/METADATA,sha256=zSD4pi1w0lyFkgkZrB38m1DuhmGj5ad4uWJENNX_J44,3339
28
- httomolibgpu-5.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
- httomolibgpu-5.1.dist-info/top_level.txt,sha256=nV0Ty_YvSPVd1O6MNWuIplD0w1nwk5hT76YgBZ-bzUw,13
30
- httomolibgpu-5.1.dist-info/RECORD,,
27
+ httomolibgpu-5.2.dist-info/licenses/LICENSE,sha256=bXeLsgelPUUXw8HCIYiVC97Dpjhm2nB54m7TACdH8ng,48032
28
+ httomolibgpu-5.2.dist-info/METADATA,sha256=K8rMjvvrs2ZvfOiZeEar2tinoGMjH5b7fYfv-xG0F3E,3365
29
+ httomolibgpu-5.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
30
+ httomolibgpu-5.2.dist-info/top_level.txt,sha256=nV0Ty_YvSPVd1O6MNWuIplD0w1nwk5hT76YgBZ-bzUw,13
31
+ httomolibgpu-5.2.dist-info/RECORD,,