httomolibgpu 5.0__py3-none-any.whl → 5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
httomolibgpu/__init__.py CHANGED
@@ -9,6 +9,7 @@ from httomolibgpu.prep.normalize import dark_flat_field_correction, minus_log
9
9
  from httomolibgpu.prep.phase import paganin_filter, paganin_filter_savu_legacy
10
10
  from httomolibgpu.prep.stripe import (
11
11
  remove_stripe_based_sorting,
12
+ remove_stripe_fw,
12
13
  remove_stripe_ti,
13
14
  remove_all_stripe,
14
15
  raven_filter,
@@ -0,0 +1,155 @@
1
+ template<int WSize>
2
+ __global__ void grouped_convolution_x(
3
+ int dim_x,
4
+ int dim_y,
5
+ int dim_z,
6
+ const float* in,
7
+ int in_stride_x,
8
+ int in_stride_y,
9
+ int in_stride_z,
10
+ float* out,
11
+ int out_stride_z,
12
+ int out_stride_group,
13
+ const float* w
14
+ )
15
+ {
16
+ const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
17
+ const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
18
+ const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
19
+ if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
20
+ {
21
+ return;
22
+ }
23
+
24
+ constexpr int out_groups = 2;
25
+ for (int i = 0; i < out_groups; ++i)
26
+ {
27
+ float acc = 0.F;
28
+ for (int j = 0; j < WSize; ++j)
29
+ {
30
+ const int w_idx = i * WSize + j;
31
+ const int in_idx = (g_thd_x * in_stride_x + j) + g_thd_y * in_stride_y + g_thd_z * in_stride_z;
32
+ acc += w[w_idx] * in[in_idx];
33
+ }
34
+ const int out_idx = g_thd_x + g_thd_y * dim_x + g_thd_z * out_stride_z + i * out_stride_group;
35
+ out[out_idx] = acc;
36
+ }
37
+ }
38
+
39
+ template<int WSize>
40
+ __global__ void grouped_convolution_y(
41
+ int dim_x,
42
+ int dim_y,
43
+ int dim_z,
44
+ const float* in,
45
+ int in_stride_x,
46
+ int in_stride_y,
47
+ int in_stride_z,
48
+ int in_stride_group,
49
+ float* out,
50
+ int out_stride_z,
51
+ int out_stride_group,
52
+ const float* w
53
+ )
54
+ {
55
+ const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
56
+ const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
57
+ const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
58
+ if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
59
+ {
60
+ return;
61
+ }
62
+
63
+ constexpr int in_groups = 2;
64
+ constexpr int out_groups = 2;
65
+ constexpr int item_stride_y = 2;
66
+ for (int group = 0; group < in_groups; ++group)
67
+ {
68
+ for (int i = 0; i < out_groups; ++i)
69
+ {
70
+ float acc = 0.F;
71
+ for (int j = 0; j < WSize; ++j)
72
+ {
73
+ const int w_idx = (out_groups * group + i) * WSize + j;
74
+ const int in_idx = g_thd_x * in_stride_x + (item_stride_y * g_thd_y + j) * in_stride_y + group * in_stride_group + g_thd_z * in_stride_z;
75
+ acc += w[w_idx] * in[in_idx];
76
+ }
77
+ const int out_idx = g_thd_x + g_thd_y * dim_x + g_thd_z * out_stride_z + (out_groups * group + i) * out_stride_group;
78
+ out[out_idx] = acc;
79
+ }
80
+ }
81
+ }
82
+
83
+ template<int WSize>
84
+ __global__ void transposed_convolution_x(
85
+ int dim_x,
86
+ int dim_y,
87
+ int dim_z,
88
+ const float* in,
89
+ int in_dim_x,
90
+ int in_stride_y,
91
+ int in_stride_z,
92
+ const float* w,
93
+ float* out
94
+ )
95
+ {
96
+ const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
97
+ const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
98
+ const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
99
+ if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
100
+ {
101
+ return;
102
+ }
103
+
104
+ constexpr int item_out_stride = 2;
105
+ float acc = 0.F;
106
+ for (int i = 0; i < WSize; ++i)
107
+ {
108
+ const int in_x = (g_thd_x - i) / item_out_stride;
109
+ const int in_x_mod = (g_thd_x - i) % item_out_stride;
110
+ if (in_x_mod == 0 && in_x >= 0 && in_x < in_dim_x)
111
+ {
112
+ const int in_idx = in_x + g_thd_y * in_stride_y + g_thd_z * in_stride_z;
113
+ acc += in[in_idx] * w[i];
114
+ }
115
+ }
116
+ const int out_idx = g_thd_x + dim_x * g_thd_y + dim_x * dim_y * g_thd_z;
117
+ out[out_idx] = acc;
118
+ }
119
+
120
+ template<int WSize>
121
+ __global__ void transposed_convolution_y(
122
+ int dim_x,
123
+ int dim_y,
124
+ int dim_z,
125
+ const float* in,
126
+ int in_dim_y,
127
+ int in_stride_y,
128
+ int in_stride_z,
129
+ const float* w,
130
+ float* out
131
+ )
132
+ {
133
+ const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
134
+ const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
135
+ const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
136
+ if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
137
+ {
138
+ return;
139
+ }
140
+
141
+ constexpr int item_out_stride = 2;
142
+ float acc = 0.F;
143
+ for (int i = 0; i < WSize; ++i)
144
+ {
145
+ const int in_y = (g_thd_y - i) / item_out_stride;
146
+ const int in_y_mod = (g_thd_y - i) % item_out_stride;
147
+ if (in_y_mod == 0 && in_y >= 0 && in_y < in_dim_y)
148
+ {
149
+ const int in_idx = g_thd_x + in_y * in_stride_y + g_thd_z * in_stride_z;
150
+ acc += in[in_idx] * w[i];
151
+ }
152
+ }
153
+ const int out_idx = g_thd_x + dim_x * g_thd_y + dim_x * dim_y * g_thd_z;
154
+ out[out_idx] = acc;
155
+ }
@@ -0,0 +1,24 @@
1
+ ALLOCATION_UNIT_SIZE = 512
2
+
3
+
4
+ class _DeviceMemStack:
5
+ def __init__(self) -> None:
6
+ self.allocations = []
7
+ self.current = 0
8
+ self.highwater = 0
9
+
10
+ def malloc(self, bytes):
11
+ self.allocations.append(bytes)
12
+ allocated = self._round_up(bytes)
13
+ self.current += allocated
14
+ self.highwater = max(self.current, self.highwater)
15
+
16
+ def free(self, bytes):
17
+ assert bytes in self.allocations
18
+ self.allocations.remove(bytes)
19
+ self.current -= self._round_up(bytes)
20
+ assert self.current >= 0
21
+
22
+ def _round_up(self, size):
23
+ size = (size + ALLOCATION_UNIT_SIZE - 1) // ALLOCATION_UNIT_SIZE
24
+ return size * ALLOCATION_UNIT_SIZE
@@ -22,6 +22,7 @@
22
22
 
23
23
  import numpy as np
24
24
  from httomolibgpu import cupywrapper
25
+ from httomolibgpu.memory_estimator_helpers import _DeviceMemStack
25
26
 
26
27
  cp = cupywrapper.cp
27
28
  cupy_run = cupywrapper.cupy_run
@@ -30,13 +31,14 @@ from unittest.mock import Mock
30
31
 
31
32
  if cupy_run:
32
33
  from cupyx.scipy.fft import fft2, ifft2, fftshift
34
+ from cupyx.scipy.fftpack import get_fft_plan
33
35
  else:
34
36
  fft2 = Mock()
35
37
  ifft2 = Mock()
36
38
  fftshift = Mock()
37
39
 
38
40
  from numpy import float32
39
- from typing import Tuple
41
+ from typing import Optional, Tuple
40
42
  import math
41
43
 
42
44
  __all__ = [
@@ -54,6 +56,7 @@ def paganin_filter(
54
56
  distance: float = 1.0,
55
57
  energy: float = 53.0,
56
58
  ratio_delta_beta: float = 250,
59
+ calc_peak_gpu_mem: bool = False,
57
60
  ) -> cp.ndarray:
58
61
  """
59
62
  Perform single-material phase retrieval from flats/darks corrected tomographic measurements. For more detailed information, see :ref:`phase_contrast_module`.
@@ -71,30 +74,50 @@ def paganin_filter(
71
74
  Beam energy in keV.
72
75
  ratio_delta_beta : float
73
76
  The ratio of delta/beta, where delta is the phase shift and real part of the complex material refractive index and beta is the absorption.
77
+ calc_peak_gpu_mem: bool
78
+ Parameter to support memory estimation in HTTomo. Irrelevant to the method itself and can be ignored by user.
74
79
 
75
80
  Returns
76
81
  -------
77
82
  cp.ndarray
78
83
  The 3D array of Paganin phase-filtered projection images.
79
84
  """
85
+ mem_stack = _DeviceMemStack() if calc_peak_gpu_mem else None
80
86
  # Check the input data is valid
81
- if tomo.ndim != 3:
87
+ if not mem_stack and tomo.ndim != 3:
82
88
  raise ValueError(
83
89
  f"Invalid number of dimensions in data: {tomo.ndim},"
84
90
  " please provide a stack of 2D projections."
85
91
  )
86
-
87
- dz_orig, dy_orig, dx_orig = tomo.shape
92
+ if mem_stack:
93
+ mem_stack.malloc(np.prod(tomo) * np.float32().itemsize)
94
+ dz_orig, dy_orig, dx_orig = tomo.shape if not mem_stack else tomo
88
95
 
89
96
  # Perform padding to the power of 2 as FFT is O(n*log(n)) complexity
90
97
  # TODO: adding other options of padding?
91
- padded_tomo, pad_tup = _pad_projections_to_second_power(tomo)
98
+ padded_tomo, pad_tup = _pad_projections_to_second_power(tomo, mem_stack)
92
99
 
93
- dz, dy, dx = padded_tomo.shape
100
+ dz, dy, dx = padded_tomo.shape if not mem_stack else padded_tomo
94
101
 
95
102
  # 3D FFT of tomo data
96
- padded_tomo = cp.asarray(padded_tomo, dtype=cp.complex64)
97
- fft_tomo = fft2(padded_tomo, axes=(-2, -1), overwrite_x=True)
103
+ if mem_stack:
104
+ mem_stack.malloc(np.prod(padded_tomo) * np.complex64().itemsize)
105
+ mem_stack.free(np.prod(padded_tomo) * np.float32().itemsize)
106
+ fft_input = cp.empty(padded_tomo, dtype=cp.complex64)
107
+ else:
108
+ padded_tomo = cp.asarray(padded_tomo, dtype=cp.complex64)
109
+ fft_input = padded_tomo
110
+
111
+ fft_plan = get_fft_plan(fft_input, axes=(-2, -1))
112
+ if mem_stack:
113
+ mem_stack.malloc(fft_plan.work_area.mem.size)
114
+ mem_stack.free(fft_plan.work_area.mem.size)
115
+ else:
116
+ with fft_plan:
117
+ fft_tomo = fft2(padded_tomo, axes=(-2, -1), overwrite_x=True)
118
+ del padded_tomo
119
+ del fft_input
120
+ del fft_plan
98
121
 
99
122
  # calculate alpha constant
100
123
  alpha = _calculate_alpha(energy, distance / 1e-6, ratio_delta_beta)
@@ -103,18 +126,56 @@ def paganin_filter(
103
126
  indx = _reciprocal_coord(pixel_size, dy)
104
127
  indy = _reciprocal_coord(pixel_size, dx)
105
128
 
106
- # Build Lorentzian-type filter
107
- phase_filter = fftshift(
108
- 1.0 / (1.0 + alpha * (cp.add.outer(cp.square(indx), cp.square(indy))))
109
- )
129
+ if mem_stack:
130
+ mem_stack.malloc(indx.size * indx.dtype.itemsize) # cp.asarray(indx)
131
+ mem_stack.malloc(indx.size * indx.dtype.itemsize) # cp.square
132
+ mem_stack.free(indx.size * indx.dtype.itemsize) # cp.asarray(indx)
133
+ mem_stack.malloc(indy.size * indy.dtype.itemsize) # cp.asarray(indy)
134
+ mem_stack.malloc(indy.size * indy.dtype.itemsize) # cp.square
135
+ mem_stack.free(indy.size * indy.dtype.itemsize) # cp.asarray(indy)
136
+
137
+ mem_stack.malloc(indx.size * indy.size * indx.dtype.itemsize) # cp.add.outer
138
+ mem_stack.free(indx.size * indx.dtype.itemsize) # cp.square
139
+ mem_stack.free(indy.size * indy.dtype.itemsize) # cp.square
140
+ mem_stack.malloc(indx.size * indy.size * indx.dtype.itemsize) # phase_filter
141
+ mem_stack.free(indx.size * indy.size * indx.dtype.itemsize) # cp.add.outer
142
+ mem_stack.free(indx.size * indy.size * indx.dtype.itemsize) # phase_filter
143
+
144
+ else:
145
+ # Build Lorentzian-type filter
146
+ phase_filter = fftshift(
147
+ 1.0
148
+ / (
149
+ 1.0
150
+ + alpha
151
+ * (
152
+ cp.add.outer(
153
+ cp.square(cp.asarray(indx)), cp.square(cp.asarray(indy))
154
+ )
155
+ )
156
+ )
157
+ )
110
158
 
111
- phase_filter = phase_filter / phase_filter.max() # normalisation
159
+ phase_filter = phase_filter / phase_filter.max() # normalisation
112
160
 
113
- # Filter projections
114
- fft_tomo *= phase_filter
161
+ # Filter projections
162
+ fft_tomo *= phase_filter
163
+ del phase_filter
115
164
 
116
165
  # Apply filter and take inverse FFT
117
- ifft_filtered_tomo = ifft2(fft_tomo, axes=(-2, -1), overwrite_x=True).real
166
+ ifft_input = (
167
+ fft_tomo if not mem_stack else cp.empty(padded_tomo, dtype=cp.complex64)
168
+ )
169
+ ifft_plan = get_fft_plan(ifft_input, axes=(-2, -1))
170
+ if mem_stack:
171
+ mem_stack.malloc(ifft_plan.work_area.mem.size)
172
+ mem_stack.free(ifft_plan.work_area.mem.size)
173
+ else:
174
+ with ifft_plan:
175
+ ifft_filtered_tomo = ifft2(fft_tomo, axes=(-2, -1), overwrite_x=True).real
176
+ del fft_tomo
177
+ del ifft_plan
178
+ del ifft_input
118
179
 
119
180
  # slicing indices for cropping
120
181
  slc_indices = (
@@ -123,8 +184,19 @@ def paganin_filter(
123
184
  slice(pad_tup[2][0], pad_tup[2][0] + dx_orig, 1),
124
185
  )
125
186
 
187
+ if mem_stack:
188
+ mem_stack.malloc(np.prod(tomo) * np.float32().itemsize) # astype(cp.float32)
189
+ mem_stack.free(
190
+ np.prod(padded_tomo) * np.complex64().itemsize
191
+ ) # ifft_filtered_tomo
192
+ mem_stack.malloc(
193
+ np.prod(tomo) * np.float32().itemsize
194
+ ) # return _log_kernel(tomo)
195
+ return mem_stack.highwater
196
+
126
197
  # crop the padded filtered data:
127
198
  tomo = ifft_filtered_tomo[slc_indices].astype(cp.float32)
199
+ del ifft_filtered_tomo
128
200
 
129
201
  # taking the negative log
130
202
  _log_kernel = cp.ElementwiseKernel(
@@ -177,7 +249,7 @@ def _calculate_pad_size(datashape: tuple) -> list:
177
249
 
178
250
 
179
251
  def _pad_projections_to_second_power(
180
- tomo: cp.ndarray,
252
+ tomo: cp.ndarray, mem_stack: Optional[_DeviceMemStack]
181
253
  ) -> Tuple[cp.ndarray, Tuple[int, int]]:
182
254
  """
183
255
  Performs padding of each projection to the next power of 2.
@@ -194,11 +266,17 @@ def _pad_projections_to_second_power(
194
266
  ndarray: padded 3d projection data
195
267
  tuple: a tuple with padding dimensions
196
268
  """
197
- full_shape_tomo = cp.shape(tomo)
269
+ full_shape_tomo = cp.shape(tomo) if not mem_stack else tomo
198
270
 
199
271
  pad_list = _calculate_pad_size(full_shape_tomo)
200
272
 
201
- padded_tomo = cp.pad(tomo, tuple(pad_list), "edge")
273
+ if mem_stack:
274
+ padded_tomo = [
275
+ sh + pad[0] + pad[1] for sh, pad in zip(full_shape_tomo, pad_list)
276
+ ]
277
+ mem_stack.malloc(np.prod(padded_tomo) * np.float32().itemsize)
278
+ else:
279
+ padded_tomo = cp.pad(tomo, tuple(pad_list), "edge")
202
280
 
203
281
  return padded_tomo, tuple(pad_list)
204
282
 
@@ -209,7 +287,7 @@ def _wavelength_micron(energy: float) -> float:
209
287
  return 2 * math.pi * PLANCK_CONSTANT * SPEED_OF_LIGHT / energy
210
288
 
211
289
 
212
- def _reciprocal_coord(pixel_size: float, num_grid: int) -> cp.ndarray:
290
+ def _reciprocal_coord(pixel_size: float, num_grid: int) -> np.ndarray:
213
291
  """
214
292
  Calculate reciprocal grid coordinates for a given pixel size
215
293
  and discretization.
@@ -227,7 +305,7 @@ def _reciprocal_coord(pixel_size: float, num_grid: int) -> cp.ndarray:
227
305
  Grid coordinates.
228
306
  """
229
307
  n = num_grid - 1
230
- rc = cp.arange(-n, num_grid, 2, dtype=cp.float32)
308
+ rc = np.arange(-n, num_grid, 2, dtype=cp.float32)
231
309
  rc *= 2 * math.pi / (n * pixel_size)
232
310
  return rc
233
311
 
@@ -238,6 +316,7 @@ def paganin_filter_savu_legacy(
238
316
  distance: float = 1.0,
239
317
  energy: float = 53.0,
240
318
  ratio_delta_beta: float = 250,
319
+ calc_peak_gpu_mem: bool = False,
241
320
  ) -> cp.ndarray:
242
321
  """
243
322
  Perform single-material phase retrieval from flats/darks corrected tomographic measurements. For more detailed information, see :ref:`phase_contrast_module`.
@@ -256,6 +335,8 @@ def paganin_filter_savu_legacy(
256
335
  Beam energy in keV.
257
336
  ratio_delta_beta : float
258
337
  The ratio of delta/beta, where delta is the phase shift and real part of the complex material refractive index and beta is the absorption.
338
+ calc_peak_gpu_mem: bool
339
+ Parameter to support memory estimation in HTTomo. Irrelevant to the method itself and can be ignored by user.
259
340
 
260
341
  Returns
261
342
  -------
@@ -263,4 +344,11 @@ def paganin_filter_savu_legacy(
263
344
  The 3D array of Paganin phase-filtered projection images.
264
345
  """
265
346
 
266
- return paganin_filter(tomo, pixel_size, distance, energy, ratio_delta_beta / 4)
347
+ return paganin_filter(
348
+ tomo,
349
+ pixel_size,
350
+ distance,
351
+ energy,
352
+ ratio_delta_beta / 4,
353
+ calc_peak_gpu_mem=calc_peak_gpu_mem,
354
+ )
@@ -21,6 +21,7 @@
21
21
  """Module for stripes removal"""
22
22
 
23
23
  import numpy as np
24
+ import pywt
24
25
  from httomolibgpu import cupywrapper
25
26
 
26
27
  cp = cupywrapper.cp
@@ -31,6 +32,7 @@ from unittest.mock import Mock
31
32
  if cupy_run:
32
33
  from cupyx.scipy.ndimage import median_filter, binary_dilation, uniform_filter1d
33
34
  from cupyx.scipy.fft import fft2, ifft2, fftshift
35
+ from cupyx.scipy.fftpack import get_fft_plan
34
36
  from httomolibgpu.cuda_kernels import load_cuda_module
35
37
  else:
36
38
  median_filter = Mock()
@@ -41,10 +43,11 @@ else:
41
43
  fftshift = Mock()
42
44
 
43
45
 
44
- from typing import Union
46
+ from typing import Optional, Tuple, Union
45
47
 
46
48
  __all__ = [
47
49
  "remove_stripe_based_sorting",
50
+ "remove_stripe_fw",
48
51
  "remove_stripe_ti",
49
52
  "remove_all_stripe",
50
53
  "raven_filter",
@@ -156,6 +159,604 @@ def remove_stripe_ti(
156
159
  return data
157
160
 
158
161
 
162
+ ###### Ring removal with wavelet filtering (adapted for cupy from pytroch_wavelet package https://pytorch-wavelets.readthedocs.io/)##########
163
+ # These functions are taken from TomoCuPy package
164
+ # *************************************************************************** #
165
+ # Copyright © 2022, UChicago Argonne, LLC #
166
+ # All Rights Reserved #
167
+ # Software Name: Tomocupy #
168
+ # By: Argonne National Laboratory #
169
+ # #
170
+ # OPEN SOURCE LICENSE #
171
+ # #
172
+ # Redistribution and use in source and binary forms, with or without #
173
+ # modification, are permitted provided that the following conditions are met: #
174
+ # #
175
+ # 1. Redistributions of source code must retain the above copyright notice, #
176
+ # this list of conditions and the following disclaimer. #
177
+ # 2. Redistributions in binary form must reproduce the above copyright #
178
+ # notice, this list of conditions and the following disclaimer in the #
179
+ # documentation and/or other materials provided with the distribution. #
180
+ # 3. Neither the name of the copyright holder nor the names of its #
181
+ # contributors may be used to endorse or promote products derived #
182
+ # from this software without specific prior written permission. #
183
+ # #
184
+ # #
185
+ # *************************************************************************** #
186
+
187
+
188
+ def _reflect(x: np.ndarray, minx: float, maxx: float) -> np.ndarray:
189
+ """Reflect the values in matrix *x* about the scalar values *minx* and
190
+ *maxx*. Hence a vector *x* containing a long linearly increasing series is
191
+ converted into a waveform which ramps linearly up and down between *minx*
192
+ and *maxx*. If *x* contains integers and *minx* and *maxx* are (integers +
193
+ 0.5), the ramps will have repeated max and min samples.
194
+
195
+ .. codeauthor:: Rich Wareham <rjw57@cantab.net>, Aug 2013
196
+ .. codeauthor:: Nick Kingsbury, Cambridge University, January 1999.
197
+
198
+ """
199
+ rng = maxx - minx
200
+ rng_by_2 = 2 * rng
201
+ mod = np.fmod(x - minx, rng_by_2)
202
+ normed_mod = np.where(mod < 0, mod + rng_by_2, mod)
203
+ out = np.where(normed_mod >= rng, rng_by_2 - normed_mod, normed_mod) + minx
204
+ return np.array(out, dtype=x.dtype)
205
+
206
+
207
+ class _DeviceMemStack:
208
+ def __init__(self) -> None:
209
+ self.allocations = []
210
+ self.current = 0
211
+ self.highwater = 0
212
+
213
+ def malloc(self, bytes):
214
+ self.allocations.append(bytes)
215
+ allocated = self._round_up(bytes)
216
+ self.current += allocated
217
+ self.highwater = max(self.current, self.highwater)
218
+
219
+ def free(self, bytes):
220
+ assert bytes in self.allocations
221
+ self.allocations.remove(bytes)
222
+ self.current -= self._round_up(bytes)
223
+ assert self.current >= 0
224
+
225
+ def _round_up(self, size):
226
+ ALLOCATION_UNIT_SIZE = 512
227
+ size = (size + ALLOCATION_UNIT_SIZE - 1) // ALLOCATION_UNIT_SIZE
228
+ return size * ALLOCATION_UNIT_SIZE
229
+
230
+
231
+ def _mypad(
232
+ x: cp.ndarray, pad: Tuple[int, int, int, int], mem_stack: Optional[_DeviceMemStack]
233
+ ) -> cp.ndarray:
234
+ """Function to do numpy like padding on Arrays. Only works for 2-D
235
+ padding.
236
+
237
+ Inputs:
238
+ x (array): Array to pad
239
+ pad (tuple): tuple of (left, right, top, bottom) pad sizes
240
+ """
241
+ # Vertical only
242
+ if pad[0] == 0 and pad[1] == 0:
243
+ m1, m2 = pad[2], pad[3]
244
+ l = x.shape[-2] if not mem_stack else x[-2]
245
+ xe = _reflect(np.arange(-m1, l + m2, dtype="int32"), -0.5, l - 0.5)
246
+ if mem_stack:
247
+ ret_shape = [x[0], x[1], xe.size, x[3]]
248
+ mem_stack.malloc(np.prod(ret_shape) * np.float32().itemsize)
249
+ return ret_shape
250
+ return x[:, :, xe, :]
251
+ # horizontal only
252
+ elif pad[2] == 0 and pad[3] == 0:
253
+ m1, m2 = pad[0], pad[1]
254
+ l = x.shape[-1] if not mem_stack else x[-1]
255
+ xe = _reflect(np.arange(-m1, l + m2, dtype="int32"), -0.5, l - 0.5)
256
+ if mem_stack:
257
+ ret_shape = [x[0], x[1], x[2], xe.size]
258
+ mem_stack.malloc(np.prod(ret_shape) * np.float32().itemsize)
259
+ return ret_shape
260
+ return x[:, :, :, xe]
261
+
262
+
263
+ def _next_power_of_two(x: int, max_val: int = 128) -> int:
264
+ n = 1
265
+ while n < x and n < max_val:
266
+ n *= 2
267
+ return n
268
+
269
+
270
+ def _conv2d(
271
+ x: cp.ndarray,
272
+ w: np.ndarray,
273
+ stride: Tuple[int, int],
274
+ groups: int,
275
+ mem_stack: Optional[_DeviceMemStack],
276
+ ) -> cp.ndarray:
277
+ """Convolution (equivalent pytorch.conv2d)"""
278
+ b, ci, hi, wi = x.shape if not mem_stack else x
279
+ co, _, hk, wk = w.shape
280
+ ho = int(np.floor(1 + (hi - hk) / stride[0]))
281
+ wo = int(np.floor(1 + (wi - wk) / stride[1]))
282
+ out_shape = [b, co, ho, wo]
283
+ if mem_stack:
284
+ mem_stack.malloc(np.prod(out_shape) * np.float32().itemsize)
285
+ return out_shape
286
+
287
+ out = cp.zeros(out_shape, dtype="float32")
288
+ w = cp.asarray(w)
289
+ x = cp.expand_dims(x, axis=1)
290
+ w = np.expand_dims(w, axis=0)
291
+ symbol_names = [f"grouped_convolution_x<{wk}>", f"grouped_convolution_y<{hk}>"]
292
+ module = load_cuda_module("remove_stripe_fw", name_expressions=symbol_names)
293
+ dim_x = out.shape[-1]
294
+ dim_y = out.shape[-2]
295
+ dim_z = out.shape[0]
296
+ in_stride_x = stride[1]
297
+ in_stride_y = x.strides[-2] // x.dtype.itemsize
298
+ in_stride_z = x.strides[0] // x.dtype.itemsize
299
+ out_stride_z = out.strides[0] // x.dtype.itemsize
300
+ out_stride_group = out.strides[1] // x.dtype.itemsize
301
+
302
+ block_x = _next_power_of_two(dim_x)
303
+ block_dim = (block_x, 1, 1)
304
+ grid_x = (dim_x + block_x - 1) // block_x
305
+ grid_dim = (grid_x, dim_y, dim_z)
306
+
307
+ if groups == 1:
308
+ grouped_convolution_kernel_x = module.get_function(symbol_names[0])
309
+ grouped_convolution_kernel_x(
310
+ grid_dim,
311
+ block_dim,
312
+ (
313
+ dim_x,
314
+ dim_y,
315
+ dim_z,
316
+ x,
317
+ in_stride_x,
318
+ in_stride_y,
319
+ in_stride_z,
320
+ out,
321
+ out_stride_z,
322
+ out_stride_group,
323
+ w,
324
+ ),
325
+ )
326
+ return out
327
+
328
+ grouped_convolution_kernel_y = module.get_function(symbol_names[1])
329
+ in_stride_group = x.strides[2] // x.dtype.itemsize
330
+ grouped_convolution_kernel_y(
331
+ grid_dim,
332
+ block_dim,
333
+ (
334
+ dim_x,
335
+ dim_y,
336
+ dim_z,
337
+ x,
338
+ in_stride_x,
339
+ in_stride_y,
340
+ in_stride_z,
341
+ in_stride_group,
342
+ out,
343
+ out_stride_z,
344
+ out_stride_group,
345
+ w,
346
+ ),
347
+ )
348
+ del w
349
+ return out
350
+
351
+
352
+ def _conv_transpose2d(
353
+ x: cp.ndarray,
354
+ w: np.ndarray,
355
+ stride: Tuple[int, int],
356
+ pad: Tuple[int, int],
357
+ groups: int,
358
+ mem_stack: Optional[_DeviceMemStack],
359
+ ) -> cp.ndarray:
360
+ """Transposed convolution (equivalent pytorch.conv_transpose2d)"""
361
+ b, co, ho, wo = x.shape if not mem_stack else x
362
+ co, ci, hk, wk = w.shape
363
+
364
+ hi = (ho - 1) * stride[0] + hk
365
+ wi = (wo - 1) * stride[1] + wk
366
+ out_shape = [b, ci, hi, wi]
367
+ if mem_stack:
368
+ mem_stack.malloc(np.prod(out_shape) * np.float32().itemsize)
369
+ mem_stack.malloc(w.size * np.float32().itemsize)
370
+ if pad != 0:
371
+ new_out_shape = [
372
+ out_shape[0],
373
+ out_shape[1],
374
+ out_shape[2] - 2 * pad[0],
375
+ out_shape[3] - 2 * pad[1],
376
+ ]
377
+ mem_stack.malloc(np.prod(new_out_shape) * np.float32().itemsize)
378
+ mem_stack.free(np.prod(out_shape) * np.float32().itemsize)
379
+ out_shape = new_out_shape
380
+ mem_stack.free(w.size * np.float32().itemsize)
381
+ return out_shape
382
+
383
+ out = cp.zeros(out_shape, dtype="float32")
384
+ w = cp.asarray(w)
385
+
386
+ symbol_names = [
387
+ f"transposed_convolution_x<{wk}>",
388
+ f"transposed_convolution_y<{hk}>",
389
+ ]
390
+ module = load_cuda_module("remove_stripe_fw", name_expressions=symbol_names)
391
+ dim_x = out.shape[-1]
392
+ dim_y = out.shape[-2]
393
+ dim_z = out.shape[0]
394
+ in_dim_x = x.shape[-1]
395
+ in_dim_y = x.shape[-2]
396
+ in_stride_y = x.strides[-2] // x.dtype.itemsize
397
+ in_stride_z = x.strides[0] // x.dtype.itemsize
398
+
399
+ block_x = _next_power_of_two(dim_x)
400
+ block_dim = (block_x, 1, 1)
401
+ grid_x = (dim_x + block_x - 1) // block_x
402
+ grid_dim = (grid_x, dim_y, dim_z)
403
+
404
+ if wk > 1:
405
+ transposed_convolution_kernel_x = module.get_function(symbol_names[0])
406
+ transposed_convolution_kernel_x(
407
+ grid_dim,
408
+ block_dim,
409
+ (dim_x, dim_y, dim_z, x, in_dim_x, in_stride_y, in_stride_z, w, out),
410
+ )
411
+ elif hk > 1:
412
+ transposed_convolution_kernel_y = module.get_function(symbol_names[1])
413
+ transposed_convolution_kernel_y(
414
+ grid_dim,
415
+ block_dim,
416
+ (dim_x, dim_y, dim_z, x, in_dim_y, in_stride_y, in_stride_z, w, out),
417
+ )
418
+ else:
419
+ assert False
420
+
421
+ if pad != 0:
422
+ out = out[:, :, pad[0] : out.shape[2] - pad[0], pad[1] : out.shape[3] - pad[1]]
423
+ return cp.ascontiguousarray(out)
424
+
425
+
426
+ def _afb1d(
427
+ x: cp.ndarray,
428
+ h0: np.ndarray,
429
+ h1: np.ndarray,
430
+ dim: int,
431
+ mem_stack: Optional[_DeviceMemStack],
432
+ ) -> cp.ndarray:
433
+ """1D analysis filter bank (along one dimension only) of an image
434
+
435
+ Parameters
436
+ ----------
437
+ x (array): 4D input with the last two dimensions the spatial input
438
+ h0 (array): 4D input for the lowpass filter. Should have shape (1, 1,
439
+ h, 1) or (1, 1, 1, w)
440
+ h1 (array): 4D input for the highpass filter. Should have shape (1, 1,
441
+ h, 1) or (1, 1, 1, w)
442
+ dim (int) - dimension of filtering. d=2 is for a vertical filter (called
443
+ column filtering but filters across the rows). d=3 is for a
444
+ horizontal filter, (called row filtering but filters across the
445
+ columns).
446
+
447
+ Returns
448
+ -------
449
+ lohi: lowpass and highpass subbands concatenated along the channel
450
+ dimension
451
+ """
452
+ C = x.shape[1] if not mem_stack else x[1]
453
+ # Convert the dim to positive
454
+ d = dim % 4
455
+ s = (2, 1) if d == 2 else (1, 2)
456
+ N = x.shape[d] if not mem_stack else x[d]
457
+ L = h0.size
458
+ shape = [1, 1, 1, 1]
459
+ shape[d] = L
460
+ h = np.concatenate([h0.reshape(*shape), h1.reshape(*shape)] * C, axis=0)
461
+ # Calculate the pad size
462
+ outsize = pywt.dwt_coeff_len(N, L, mode="symmetric")
463
+ p = 2 * (outsize - 1) - N + L
464
+ pad = (0, 0, p // 2, (p + 1) // 2) if d == 2 else (p // 2, (p + 1) // 2, 0, 0)
465
+ padded_x = _mypad(x, pad=pad, mem_stack=mem_stack)
466
+ lohi = _conv2d(padded_x, h, stride=s, groups=C, mem_stack=mem_stack)
467
+ if mem_stack:
468
+ mem_stack.free(np.prod(padded_x) * np.float32().itemsize)
469
+ del padded_x
470
+ return lohi
471
+
472
+
473
+ def _sfb1d(
474
+ lo: cp.ndarray,
475
+ hi: cp.ndarray,
476
+ g0: np.ndarray,
477
+ g1: np.ndarray,
478
+ dim: int,
479
+ mem_stack: Optional[_DeviceMemStack],
480
+ ) -> cp.ndarray:
481
+ """1D synthesis filter bank of an image Array"""
482
+
483
+ C = lo.shape[1] if not mem_stack else lo[1]
484
+ d = dim % 4
485
+ L = g0.size
486
+ shape = [1, 1, 1, 1]
487
+ shape[d] = L
488
+ s = (2, 1) if d == 2 else (1, 2)
489
+ g0 = np.concatenate([g0.reshape(*shape)] * C, axis=0)
490
+ g1 = np.concatenate([g1.reshape(*shape)] * C, axis=0)
491
+ pad = (L - 2, 0) if d == 2 else (0, L - 2)
492
+ y_lo = _conv_transpose2d(lo, g0, stride=s, pad=pad, groups=C, mem_stack=mem_stack)
493
+ y_hi = _conv_transpose2d(hi, g1, stride=s, pad=pad, groups=C, mem_stack=mem_stack)
494
+ if mem_stack:
495
+ # Allocation of the sum
496
+ mem_stack.malloc(np.prod(y_hi) * np.float32().itemsize)
497
+ mem_stack.free(np.prod(y_lo) * np.float32().itemsize)
498
+ mem_stack.free(np.prod(y_hi) * np.float32().itemsize)
499
+ return y_lo
500
+ return y_lo + y_hi
501
+
502
+
503
+ class _DWTForward:
504
+ """Performs a 2d DWT Forward decomposition of an image
505
+
506
+ Args:
507
+ wave (str): Which wavelet to use.
508
+ """
509
+
510
+ def __init__(self, wave: str):
511
+ super().__init__()
512
+
513
+ wave = pywt.Wavelet(wave)
514
+ h0_col, h1_col = wave.dec_lo, wave.dec_hi
515
+ h0_row, h1_row = h0_col, h1_col
516
+
517
+ self.h0_col = np.array(h0_col).astype("float32")[::-1].reshape((1, 1, -1, 1))
518
+ self.h1_col = np.array(h1_col).astype("float32")[::-1].reshape((1, 1, -1, 1))
519
+ self.h0_row = np.array(h0_row).astype("float32")[::-1].reshape((1, 1, 1, -1))
520
+ self.h1_row = np.array(h1_row).astype("float32")[::-1].reshape((1, 1, 1, -1))
521
+
522
+ def apply(
523
+ self, x: cp.ndarray, mem_stack: Optional[_DeviceMemStack] = None
524
+ ) -> Tuple[cp.ndarray, cp.ndarray]:
525
+ """Forward pass of the DWT.
526
+
527
+ Args:
528
+ x (array): Input of shape :math:`(N, C_{in}, H_{in}, W_{in})`
529
+
530
+ Returns:
531
+ (yl, yh)
532
+ tuple of lowpass (yl) and bandpass (yh) coefficients.
533
+ yh is a list of scale coefficients. yl has shape
534
+ :math:`(N, C_{in}, H_{in}', W_{in}')` and yh has shape
535
+ :math:`list(N, C_{in}, 3, H_{in}'', W_{in}'')`. The new
536
+ dimension in yh iterates over the LH, HL and HH coefficients.
537
+
538
+ Note:
539
+ :math:`H_{in}', W_{in}', H_{in}'', W_{in}''` denote the correctly
540
+ downsampled shapes of the DWT pyramid.
541
+ """
542
+ # Do a multilevel transform
543
+ # Do 1 level of the transform
544
+ lohi = _afb1d(x, self.h0_row, self.h1_row, dim=3, mem_stack=mem_stack)
545
+ y = _afb1d(lohi, self.h0_col, self.h1_col, dim=2, mem_stack=mem_stack)
546
+ if mem_stack:
547
+ y_shape = [y[0], np.prod(y) // y[0] // 4 // y[-2] // y[-1], 4, y[-2], y[-1]]
548
+ x_shape = [y_shape[0], y_shape[1], y_shape[3], y_shape[4]]
549
+ yh_shape = [y_shape[0], y_shape[1], y_shape[2] - 1, y_shape[3], y_shape[4]]
550
+
551
+ mem_stack.free(np.prod(lohi) * np.float32().itemsize)
552
+ mem_stack.malloc(np.prod(x_shape) * np.float32().itemsize)
553
+ mem_stack.malloc(np.prod(yh_shape) * np.float32().itemsize)
554
+ mem_stack.free(np.prod(y) * np.float32().itemsize)
555
+ return x_shape, yh_shape
556
+ del lohi
557
+ s = y.shape
558
+ y = y.reshape(s[0], -1, 4, s[-2], s[-1])
559
+ x = cp.ascontiguousarray(y[:, :, 0])
560
+ yh = cp.ascontiguousarray(y[:, :, 1:])
561
+ return (x, yh)
562
+
563
+
564
+ class _DWTInverse:
565
+ """Performs a 2d DWT Inverse reconstruction of an image
566
+
567
+ Args:
568
+ wave (str): Which wavelet to use.
569
+ """
570
+
571
+ def __init__(self, wave: str):
572
+ super().__init__()
573
+ wave = pywt.Wavelet(wave)
574
+ g0_col, g1_col = wave.rec_lo, wave.rec_hi
575
+ g0_row, g1_row = g0_col, g1_col
576
+ # Prepare the filters
577
+ self.g0_col = np.array(g0_col).astype("float32").reshape((1, 1, -1, 1))
578
+ self.g1_col = np.array(g1_col).astype("float32").reshape((1, 1, -1, 1))
579
+ self.g0_row = np.array(g0_row).astype("float32").reshape((1, 1, 1, -1))
580
+ self.g1_row = np.array(g1_row).astype("float32").reshape((1, 1, 1, -1))
581
+
582
+ def apply(
583
+ self,
584
+ coeffs: Tuple[cp.ndarray, cp.ndarray],
585
+ mem_stack: Optional[_DeviceMemStack] = None,
586
+ ) -> cp.ndarray:
587
+ """
588
+ Args:
589
+ coeffs (yl, yh): tuple of lowpass and bandpass coefficients, where:
590
+ yl is a lowpass array of shape :math:`(N, C_{in}, H_{in}',
591
+ W_{in}')` and yh is a list of bandpass arrays of shape
592
+ :math:`list(N, C_{in}, 3, H_{in}'', W_{in}'')`. I.e. should match
593
+ the format returned by DWTForward
594
+
595
+ Returns:
596
+ Reconstructed input of shape :math:`(N, C_{in}, H_{in}, W_{in})`
597
+
598
+ Note:
599
+ :math:`H_{in}', W_{in}', H_{in}'', W_{in}''` denote the correctly
600
+ downsampled shapes of the DWT pyramid.
601
+
602
+ """
603
+ yl, yh = coeffs
604
+ lh = yh[:, :, 0, :, :] if not mem_stack else [yh[0], yh[1], yh[3], yh[4]]
605
+ hl = yh[:, :, 1, :, :] if not mem_stack else [yh[0], yh[1], yh[3], yh[4]]
606
+ hh = yh[:, :, 2, :, :] if not mem_stack else [yh[0], yh[1], yh[3], yh[4]]
607
+ lo = _sfb1d(yl, lh, self.g0_col, self.g1_col, dim=2, mem_stack=mem_stack)
608
+ hi = _sfb1d(hl, hh, self.g0_col, self.g1_col, dim=2, mem_stack=mem_stack)
609
+ yl = _sfb1d(lo, hi, self.g0_row, self.g1_row, dim=3, mem_stack=mem_stack)
610
+ if mem_stack:
611
+ mem_stack.free(np.prod(lo) * np.float32().itemsize)
612
+ mem_stack.free(np.prod(hi) * np.float32().itemsize)
613
+ del lo
614
+ del hi
615
+ return yl
616
+
617
+
618
+ def _repair_memory_fragmentation_if_needed(fragmentation_threshold: float = 0.2):
619
+ pool = cp.get_default_memory_pool()
620
+ total = pool.total_bytes()
621
+ if (total / pool.used_bytes()) - 1 > fragmentation_threshold:
622
+ pool.free_all_blocks()
623
+
624
+
625
+ def remove_stripe_fw(
626
+ data: cp.ndarray,
627
+ sigma: float = 2,
628
+ wname: str = "db5",
629
+ level: Optional[int] = None,
630
+ calc_peak_gpu_mem: bool = False,
631
+ ) -> cp.ndarray:
632
+ """
633
+ Remove horizontal stripes from sinogram using the Fourier-Wavelet (FW) based method :cite:`munch2009stripe`. The original source code
634
+ taken from TomoCupy and NABU packages.
635
+
636
+ Parameters
637
+ ----------
638
+ data : ndarray
639
+ 3D tomographic data as a CuPy array.
640
+ sigma : float
641
+ Damping parameter in Fourier space.
642
+ wname : str
643
+ Type of the wavelet filter: select from 'db5', 'db7', 'haar', 'sym5', 'sym16' 'bior4.4'.
644
+ level : int, optional
645
+ Number of discrete wavelet transform levels.
646
+ calc_peak_gpu_mem: str:
647
+ Parameter to support memory estimation in HTTomo. Irrelevant to the method itself and can be ignored by user.
648
+
649
+ Returns
650
+ -------
651
+ ndarray
652
+ Stripe-corrected 3D tomographic data as a CuPy array.
653
+ """
654
+
655
+ if level is None:
656
+ if calc_peak_gpu_mem:
657
+ size = np.max(data) # data is a tuple in this case
658
+ else:
659
+ size = np.max(data.shape)
660
+ level = int(np.ceil(np.log2(size)))
661
+
662
+ [nproj, nz, ni] = data.shape if not calc_peak_gpu_mem else data
663
+
664
+ nproj_pad = nproj + nproj // 8
665
+
666
+ # Accepts all wave types available to PyWavelets
667
+ xfm = _DWTForward(wave=wname)
668
+ ifm = _DWTInverse(wave=wname)
669
+
670
+ # Wavelet decomposition.
671
+ cc = []
672
+ sli_shape = [nz, 1, nproj_pad, ni]
673
+
674
+ if calc_peak_gpu_mem:
675
+ mem_stack = _DeviceMemStack()
676
+ # A data copy is assumed when invoking the function
677
+ mem_stack.malloc(np.prod(data) * np.float32().itemsize)
678
+ mem_stack.malloc(np.prod(sli_shape) * np.float32().itemsize)
679
+ cc = []
680
+ fcV_bytes = None
681
+ for k in range(level):
682
+ new_sli_shape, c = xfm.apply(sli_shape, mem_stack)
683
+ mem_stack.free(np.prod(sli_shape) * np.float32().itemsize)
684
+ sli_shape = new_sli_shape
685
+ cc.append(c)
686
+
687
+ if fcV_bytes:
688
+ mem_stack.free(fcV_bytes)
689
+ fcV_shape = [c[0], c[3], c[4]]
690
+ fcV_bytes = np.prod(fcV_shape) * np.complex64().itemsize
691
+ mem_stack.malloc(fcV_bytes)
692
+
693
+ # For the FFT
694
+ mem_stack.malloc(2 * np.prod(fcV_shape) * np.float32().itemsize)
695
+ mem_stack.malloc(2 * fcV_bytes)
696
+
697
+ fft_dummy = cp.empty(fcV_shape, dtype="float32")
698
+ fft_plan = get_fft_plan(fft_dummy)
699
+ fft_plan_size = fft_plan.work_area.mem.size
700
+ del fft_dummy
701
+ del fft_plan
702
+ mem_stack.malloc(fft_plan_size)
703
+ mem_stack.free(2 * np.prod(fcV_shape) * np.float32().itemsize)
704
+ mem_stack.free(fft_plan_size)
705
+ mem_stack.free(2 * fcV_bytes)
706
+
707
+ # The rest of the iteration doesn't contribute to the peak
708
+ # NOTE: The last iteration of fcV is "leaked"
709
+
710
+ for k in range(level)[::-1]:
711
+ new_sli_shape = [sli_shape[0], sli_shape[1], cc[k][-2], cc[k][-1]]
712
+ new_sli_shape = ifm.apply((new_sli_shape, cc[k]), mem_stack)
713
+ mem_stack.free(np.prod(sli_shape) * np.float32().itemsize)
714
+ sli_shape = new_sli_shape
715
+
716
+ mem_stack.malloc(np.prod(data) * np.float32().itemsize)
717
+ for c in cc:
718
+ mem_stack.free(np.prod(c) * np.float32().itemsize)
719
+ mem_stack.free(np.prod(sli_shape) * np.float32().itemsize)
720
+ return int(mem_stack.highwater * 1.1)
721
+
722
+ sli = cp.zeros(sli_shape, dtype="float32")
723
+ sli[:, 0, (nproj_pad - nproj) // 2 : (nproj_pad + nproj) // 2] = data.swapaxes(0, 1)
724
+ for k in range(level):
725
+ sli, c = xfm.apply(sli)
726
+ cc.append(c)
727
+ # FFT
728
+ fft_in = cp.ascontiguousarray(cc[k][:, 0, 1])
729
+ fft_plan = get_fft_plan(fft_in, axes=1)
730
+ with fft_plan:
731
+ fcV = cp.fft.fft(fft_in, axis=1)
732
+ del fft_plan
733
+ del fft_in
734
+ _, my, mx = fcV.shape
735
+ # Damping of ring artifact information.
736
+ y_hat = np.fft.ifftshift((np.arange(-my, my, 2) + 1) / 2)
737
+ damp = -np.expm1(-(y_hat**2) / (2 * sigma**2))
738
+ fcV *= cp.tile(damp, (mx, 1)).swapaxes(0, 1)
739
+ # Inverse FFT.
740
+ ifft_in = cp.ascontiguousarray(fcV)
741
+ ifft_plan = get_fft_plan(ifft_in, axes=1)
742
+ with ifft_plan:
743
+ cc[k][:, 0, 1] = cp.fft.ifft(ifft_in, my, axis=1).real
744
+ del ifft_plan
745
+ del ifft_in
746
+ _repair_memory_fragmentation_if_needed()
747
+
748
+ # Wavelet reconstruction.
749
+ for k in range(level)[::-1]:
750
+ shape0 = cc[k][0, 0, 1].shape
751
+ sli = sli[:, :, : shape0[0], : shape0[1]]
752
+ sli = ifm.apply((sli, cc[k]))
753
+ _repair_memory_fragmentation_if_needed()
754
+
755
+ data = sli[:, 0, (nproj_pad - nproj) // 2 : (nproj_pad + nproj) // 2, :ni]
756
+ data = data.swapaxes(0, 1)
757
+ return cp.ascontiguousarray(data)
758
+
759
+
159
760
  ######## Optimized version for Vo-all ring removal in tomopy########
160
761
  # This function is taken from TomoCuPy package
161
762
  # *************************************************************************** #
@@ -36,9 +36,8 @@ import cupy as cp
36
36
  import cupyx.scipy.ndimage as ndi
37
37
  import numpy as np
38
38
 
39
- def _upsampled_dft(
40
- data, upsampled_region_size, upsample_factor=1, axis_offsets=None
41
- ):
39
+
40
+ def _upsampled_dft(data, upsampled_region_size, upsample_factor=1, axis_offsets=None):
42
41
  """
43
42
  Upsampled DFT by matrix multiplication.
44
43
 
@@ -148,9 +147,7 @@ def _compute_error(cross_correlation_max, src_amp, target_amp):
148
147
  )
149
148
 
150
149
  with np.errstate(invalid="ignore"):
151
- error = 1.0 - cross_correlation_max * cross_correlation_max.conj() / (
152
- amp
153
- )
150
+ error = 1.0 - cross_correlation_max * cross_correlation_max.conj() / (amp)
154
151
 
155
152
  return cp.sqrt(cp.abs(error))
156
153
 
@@ -192,9 +189,7 @@ def _disambiguate_shift(reference_image, moving_image, shift):
192
189
  negative_shift = [shift_i - s for shift_i, s in zip(positive_shift, shape)]
193
190
  subpixel = any(s % 1 != 0 for s in shift)
194
191
  interp_order = 3 if subpixel else 0
195
- shifted = ndi.shift(
196
- moving_image, shift, mode="grid-wrap", order=interp_order
197
- )
192
+ shifted = ndi.shift(moving_image, shift, mode="grid-wrap", order=interp_order)
198
193
  indices = tuple(round(s) for s in positive_shift)
199
194
  splits_per_dim = [(slice(0, i), slice(i, None)) for i in indices]
200
195
  max_corr = -1.0
@@ -217,9 +212,7 @@ def _disambiguate_shift(reference_image, moving_image, shift):
217
212
  )
218
213
  return shift
219
214
  real_shift_acc = []
220
- for sl, pos_shift, neg_shift in zip(
221
- max_slice, positive_shift, negative_shift
222
- ):
215
+ for sl, pos_shift, neg_shift in zip(max_slice, positive_shift, negative_shift):
223
216
  real_shift_acc.append(pos_shift if sl.stop is None else neg_shift)
224
217
  if not subpixel:
225
218
  real_shift = tuple(map(int, real_shift_acc))
@@ -359,16 +352,12 @@ def phase_cross_correlation(
359
352
  # Initial shift estimate in upsampled grid
360
353
  # shift = cp.around(shift * upsample_factor) / upsample_factor
361
354
  upsample_factor = float(upsample_factor)
362
- shift = tuple(
363
- round(s * upsample_factor) / upsample_factor for s in shift
364
- )
355
+ shift = tuple(round(s * upsample_factor) / upsample_factor for s in shift)
365
356
  upsampled_region_size = math.ceil(upsample_factor * 1.5)
366
357
  # Center of output array at dftshift + 1
367
358
  dftshift = float(upsampled_region_size // 2)
368
359
  # Matrix multiply DFT around the current shift estimate
369
- sample_region_offset = tuple(
370
- dftshift - s * upsample_factor for s in shift
371
- )
360
+ sample_region_offset = tuple(dftshift - s * upsample_factor for s in shift)
372
361
  cross_correlation = _upsampled_dft(
373
362
  image_product.conj(),
374
363
  upsampled_region_size,
@@ -394,9 +383,7 @@ def phase_cross_correlation(
394
383
 
395
384
  # If its only one row or column the shift along that dimension has no
396
385
  # effect. We set to zero.
397
- shift = tuple(
398
- s if axis_size != 1 else 0 for s, axis_size in zip(shift, shape)
399
- )
386
+ shift = tuple(s if axis_size != 1 else 0 for s, axis_size in zip(shift, shape))
400
387
 
401
388
  if disambiguate:
402
389
  if space.lower() != "real":
@@ -406,10 +393,7 @@ def phase_cross_correlation(
406
393
 
407
394
  # Redirect user to masked_phase_cross_correlation if NaNs are observed
408
395
  if cp.isnan(CCmax) or cp.isnan(src_amp) or cp.isnan(target_amp):
409
- raise ValueError(
410
- "NaN values found, please remove NaNs from your "
411
- "input data"
412
- )
396
+ raise ValueError("NaN values found, please remove NaNs from your " "input data")
413
397
 
414
398
  return (
415
399
  shift,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: httomolibgpu
3
- Version: 5.0
3
+ Version: 5.2
4
4
  Summary: Commonly used tomography data processing methods at DLS.
5
5
  Author-email: Daniil Kazantsev <daniil.kazantsev@diamond.ac.uk>, Yousef Moazzam <yousef.moazzam@diamond.ac.uk>, Naman Gera <naman.gera@diamond.ac.uk>
6
6
  License: BSD-3-Clause
@@ -19,6 +19,7 @@ Requires-Dist: scipy
19
19
  Requires-Dist: pillow
20
20
  Requires-Dist: scikit-image
21
21
  Requires-Dist: tomobar
22
+ Requires-Dist: PyWavelets
22
23
  Provides-Extra: dev
23
24
  Requires-Dist: pytest; extra == "dev"
24
25
  Requires-Dist: pytest-cov; extra == "dev"
@@ -1,5 +1,6 @@
1
- httomolibgpu/__init__.py,sha256=Fdj5ipIGgeKqSCYRb5bBVMAZ04ZvZJzuBoGOAqc0zgk,937
1
+ httomolibgpu/__init__.py,sha256=Dt_TYhjJGPVathlceTYQhoRSyH8n7FGQJlRMUlFZNdc,959
2
2
  httomolibgpu/cupywrapper.py,sha256=6ITGJ2Jw5I5kVmKEL5LlsnLRniEqqBLsHiAjvLtk0Xk,493
3
+ httomolibgpu/memory_estimator_helpers.py,sha256=QaJady-z8y9Emw7W-lB608vBTNvVYv3obboQKVj6E9M,705
3
4
  httomolibgpu/cuda_kernels/__init__.py,sha256=VQNMaGcVDwiE-C64FfLtubHpLriLG0Y3_QnjHBSHrN0,884
4
5
  httomolibgpu/cuda_kernels/calc_metrics.cu,sha256=oV7ZPcwjWafmZjbNsUkBYPvOViJ_nX3zBoOAuPCmIrA,11335
5
6
  httomolibgpu/cuda_kernels/center_360_shifts.cu,sha256=Ya_8hxjXGtPBsPY3qfGJaugwnYrTFjFFretRcLiUfFQ,1631
@@ -7,6 +8,7 @@ httomolibgpu/cuda_kernels/generate_mask.cu,sha256=3il3r1J2cnTCd3UXO4GWGfBgGxj4pv
7
8
  httomolibgpu/cuda_kernels/median_kernel.cu,sha256=EECLUCoJkT9GQ9Db_FF6fYOG6cDSiAePTRZNxE4VZ68,1692
8
9
  httomolibgpu/cuda_kernels/raven_filter.cu,sha256=KX2TM_9tMpvoGCHezDNWYABCnv2cT9mlMo4IhxRUac0,1437
9
10
  httomolibgpu/cuda_kernels/remove_nan_inf.cu,sha256=gv0ihkf6A_D_po9x7pmgFsQFhwZ1dB_HYc_0Tu-bpUU,630
11
+ httomolibgpu/cuda_kernels/remove_stripe_fw.cu,sha256=J_vy0RUYYKT-mOzERsn3kjgt4hbE7vHPFRuJYNzs6sM,4504
10
12
  httomolibgpu/misc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
13
  httomolibgpu/misc/corr.py,sha256=e1eUsWLSM9SB5xzWTDW0o9pAD_lbrr4DL-QQmyM8v4c,4503
12
14
  httomolibgpu/misc/denoise.py,sha256=-D9UPbZyUAcCptBHUUXsmj1NFzd6HrrRjJJh4T5gmhQ,4787
@@ -16,14 +18,14 @@ httomolibgpu/misc/utils.py,sha256=rHRuQUO47SlTanvKDBgiC0im4tXlGLCw5B_zvlLzzbc,47
16
18
  httomolibgpu/prep/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
19
  httomolibgpu/prep/alignment.py,sha256=GVxnyioipmqsHb4s3mPQ8tKGoPIQMPftDrQxUO-HBuE,5491
18
20
  httomolibgpu/prep/normalize.py,sha256=hee0H4mE7FrSZgcF1fjLsKT06xjTJymkyAxpe2itQe4,4202
19
- httomolibgpu/prep/phase.py,sha256=eDi4Y2dZ0ZDgblCku1XhHiSuK6rHnmsDFuZdDvlnHMU,8505
20
- httomolibgpu/prep/stripe.py,sha256=8_DV0ON6AWARuziqkmhom56gWTardtqC_z3xG8geg0o,14774
21
+ httomolibgpu/prep/phase.py,sha256=yKJe9gmWuFaUSIuoctV5X1Pb7yEgOmkQ6jxvZkSSwpQ,12128
22
+ httomolibgpu/prep/stripe.py,sha256=OZPimFxe9TOSaEcErORFxd6HCcFcR62-q5XYBvC10FM,36918
21
23
  httomolibgpu/recon/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- httomolibgpu/recon/_phase_cross_correlation.py,sha256=h5r1g5lMmS9p32k2SuC8pjji6McpwBZiN95zQd2xvBo,16616
24
+ httomolibgpu/recon/_phase_cross_correlation.py,sha256=Ru2oLAPv8XOSSuZer5yNQrxD_8lMAwBSvtkVAVs5TCc,16469
23
25
  httomolibgpu/recon/algorithm.py,sha256=ds-_io7kGzo5FiJq8k4--PYtIWak3y9H7yuyg1lymyQ,25121
24
26
  httomolibgpu/recon/rotation.py,sha256=GaSwNrlDnlP_iIrTfKUQLiXsShJ5aSDvdKPwofggtwQ,27948
25
- httomolibgpu-5.0.dist-info/licenses/LICENSE,sha256=bXeLsgelPUUXw8HCIYiVC97Dpjhm2nB54m7TACdH8ng,48032
26
- httomolibgpu-5.0.dist-info/METADATA,sha256=0_lrMXVwbSoLpLzIx_i24kCU7VWAMkXFaBaT6rQ0O-c,3339
27
- httomolibgpu-5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
28
- httomolibgpu-5.0.dist-info/top_level.txt,sha256=nV0Ty_YvSPVd1O6MNWuIplD0w1nwk5hT76YgBZ-bzUw,13
29
- httomolibgpu-5.0.dist-info/RECORD,,
27
+ httomolibgpu-5.2.dist-info/licenses/LICENSE,sha256=bXeLsgelPUUXw8HCIYiVC97Dpjhm2nB54m7TACdH8ng,48032
28
+ httomolibgpu-5.2.dist-info/METADATA,sha256=K8rMjvvrs2ZvfOiZeEar2tinoGMjH5b7fYfv-xG0F3E,3365
29
+ httomolibgpu-5.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
30
+ httomolibgpu-5.2.dist-info/top_level.txt,sha256=nV0Ty_YvSPVd1O6MNWuIplD0w1nwk5hT76YgBZ-bzUw,13
31
+ httomolibgpu-5.2.dist-info/RECORD,,