httomolibgpu 5.1__py3-none-any.whl → 5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- httomolibgpu/__init__.py +1 -0
- httomolibgpu/cuda_kernels/remove_stripe_fw.cu +155 -0
- httomolibgpu/cupywrapper.py +2 -0
- httomolibgpu/prep/phase.py +70 -11
- httomolibgpu/prep/stripe.py +602 -1
- {httomolibgpu-5.1.dist-info → httomolibgpu-5.3.dist-info}/METADATA +2 -1
- {httomolibgpu-5.1.dist-info → httomolibgpu-5.3.dist-info}/RECORD +10 -9
- {httomolibgpu-5.1.dist-info → httomolibgpu-5.3.dist-info}/WHEEL +0 -0
- {httomolibgpu-5.1.dist-info → httomolibgpu-5.3.dist-info}/licenses/LICENSE +0 -0
- {httomolibgpu-5.1.dist-info → httomolibgpu-5.3.dist-info}/top_level.txt +0 -0
httomolibgpu/__init__.py
CHANGED
|
@@ -9,6 +9,7 @@ from httomolibgpu.prep.normalize import dark_flat_field_correction, minus_log
|
|
|
9
9
|
from httomolibgpu.prep.phase import paganin_filter, paganin_filter_savu_legacy
|
|
10
10
|
from httomolibgpu.prep.stripe import (
|
|
11
11
|
remove_stripe_based_sorting,
|
|
12
|
+
remove_stripe_fw,
|
|
12
13
|
remove_stripe_ti,
|
|
13
14
|
remove_all_stripe,
|
|
14
15
|
raven_filter,
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
template<int WSize>
|
|
2
|
+
__global__ void grouped_convolution_x(
|
|
3
|
+
int dim_x,
|
|
4
|
+
int dim_y,
|
|
5
|
+
int dim_z,
|
|
6
|
+
const float* in,
|
|
7
|
+
int in_stride_x,
|
|
8
|
+
int in_stride_y,
|
|
9
|
+
int in_stride_z,
|
|
10
|
+
float* out,
|
|
11
|
+
int out_stride_z,
|
|
12
|
+
int out_stride_group,
|
|
13
|
+
const float* w
|
|
14
|
+
)
|
|
15
|
+
{
|
|
16
|
+
const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
|
|
17
|
+
const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
|
|
18
|
+
const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
|
|
19
|
+
if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
|
|
20
|
+
{
|
|
21
|
+
return;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
constexpr int out_groups = 2;
|
|
25
|
+
for (int i = 0; i < out_groups; ++i)
|
|
26
|
+
{
|
|
27
|
+
float acc = 0.F;
|
|
28
|
+
for (int j = 0; j < WSize; ++j)
|
|
29
|
+
{
|
|
30
|
+
const int w_idx = i * WSize + j;
|
|
31
|
+
const int in_idx = (g_thd_x * in_stride_x + j) + g_thd_y * in_stride_y + g_thd_z * in_stride_z;
|
|
32
|
+
acc += w[w_idx] * in[in_idx];
|
|
33
|
+
}
|
|
34
|
+
const int out_idx = g_thd_x + g_thd_y * dim_x + g_thd_z * out_stride_z + i * out_stride_group;
|
|
35
|
+
out[out_idx] = acc;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
template<int WSize>
|
|
40
|
+
__global__ void grouped_convolution_y(
|
|
41
|
+
int dim_x,
|
|
42
|
+
int dim_y,
|
|
43
|
+
int dim_z,
|
|
44
|
+
const float* in,
|
|
45
|
+
int in_stride_x,
|
|
46
|
+
int in_stride_y,
|
|
47
|
+
int in_stride_z,
|
|
48
|
+
int in_stride_group,
|
|
49
|
+
float* out,
|
|
50
|
+
int out_stride_z,
|
|
51
|
+
int out_stride_group,
|
|
52
|
+
const float* w
|
|
53
|
+
)
|
|
54
|
+
{
|
|
55
|
+
const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
|
|
56
|
+
const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
|
|
57
|
+
const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
|
|
58
|
+
if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
|
|
59
|
+
{
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
constexpr int in_groups = 2;
|
|
64
|
+
constexpr int out_groups = 2;
|
|
65
|
+
constexpr int item_stride_y = 2;
|
|
66
|
+
for (int group = 0; group < in_groups; ++group)
|
|
67
|
+
{
|
|
68
|
+
for (int i = 0; i < out_groups; ++i)
|
|
69
|
+
{
|
|
70
|
+
float acc = 0.F;
|
|
71
|
+
for (int j = 0; j < WSize; ++j)
|
|
72
|
+
{
|
|
73
|
+
const int w_idx = (out_groups * group + i) * WSize + j;
|
|
74
|
+
const int in_idx = g_thd_x * in_stride_x + (item_stride_y * g_thd_y + j) * in_stride_y + group * in_stride_group + g_thd_z * in_stride_z;
|
|
75
|
+
acc += w[w_idx] * in[in_idx];
|
|
76
|
+
}
|
|
77
|
+
const int out_idx = g_thd_x + g_thd_y * dim_x + g_thd_z * out_stride_z + (out_groups * group + i) * out_stride_group;
|
|
78
|
+
out[out_idx] = acc;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
template<int WSize>
|
|
84
|
+
__global__ void transposed_convolution_x(
|
|
85
|
+
int dim_x,
|
|
86
|
+
int dim_y,
|
|
87
|
+
int dim_z,
|
|
88
|
+
const float* in,
|
|
89
|
+
int in_dim_x,
|
|
90
|
+
int in_stride_y,
|
|
91
|
+
int in_stride_z,
|
|
92
|
+
const float* w,
|
|
93
|
+
float* out
|
|
94
|
+
)
|
|
95
|
+
{
|
|
96
|
+
const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
|
|
97
|
+
const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
|
|
98
|
+
const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
|
|
99
|
+
if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
|
|
100
|
+
{
|
|
101
|
+
return;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
constexpr int item_out_stride = 2;
|
|
105
|
+
float acc = 0.F;
|
|
106
|
+
for (int i = 0; i < WSize; ++i)
|
|
107
|
+
{
|
|
108
|
+
const int in_x = (g_thd_x - i) / item_out_stride;
|
|
109
|
+
const int in_x_mod = (g_thd_x - i) % item_out_stride;
|
|
110
|
+
if (in_x_mod == 0 && in_x >= 0 && in_x < in_dim_x)
|
|
111
|
+
{
|
|
112
|
+
const int in_idx = in_x + g_thd_y * in_stride_y + g_thd_z * in_stride_z;
|
|
113
|
+
acc += in[in_idx] * w[i];
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
const int out_idx = g_thd_x + dim_x * g_thd_y + dim_x * dim_y * g_thd_z;
|
|
117
|
+
out[out_idx] = acc;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
template<int WSize>
|
|
121
|
+
__global__ void transposed_convolution_y(
|
|
122
|
+
int dim_x,
|
|
123
|
+
int dim_y,
|
|
124
|
+
int dim_z,
|
|
125
|
+
const float* in,
|
|
126
|
+
int in_dim_y,
|
|
127
|
+
int in_stride_y,
|
|
128
|
+
int in_stride_z,
|
|
129
|
+
const float* w,
|
|
130
|
+
float* out
|
|
131
|
+
)
|
|
132
|
+
{
|
|
133
|
+
const int g_thd_x = blockDim.x * blockIdx.x + threadIdx.x;
|
|
134
|
+
const int g_thd_y = blockDim.y * blockIdx.y + threadIdx.y;
|
|
135
|
+
const int g_thd_z = blockDim.z * blockIdx.z + threadIdx.z;
|
|
136
|
+
if (g_thd_x >= dim_x || g_thd_y >= dim_y || g_thd_z >= dim_z)
|
|
137
|
+
{
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
constexpr int item_out_stride = 2;
|
|
142
|
+
float acc = 0.F;
|
|
143
|
+
for (int i = 0; i < WSize; ++i)
|
|
144
|
+
{
|
|
145
|
+
const int in_y = (g_thd_y - i) / item_out_stride;
|
|
146
|
+
const int in_y_mod = (g_thd_y - i) % item_out_stride;
|
|
147
|
+
if (in_y_mod == 0 && in_y >= 0 && in_y < in_dim_y)
|
|
148
|
+
{
|
|
149
|
+
const int in_idx = g_thd_x + in_y * in_stride_y + g_thd_z * in_stride_z;
|
|
150
|
+
acc += in[in_idx] * w[i];
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
const int out_idx = g_thd_x + dim_x * g_thd_y + dim_x * dim_y * g_thd_z;
|
|
154
|
+
out[out_idx] = acc;
|
|
155
|
+
}
|
httomolibgpu/cupywrapper.py
CHANGED
|
@@ -2,6 +2,7 @@ cupy_run = False
|
|
|
2
2
|
try:
|
|
3
3
|
import cupy as cp
|
|
4
4
|
import nvtx
|
|
5
|
+
from cupyx.scipy.fft import next_fast_len
|
|
5
6
|
|
|
6
7
|
try:
|
|
7
8
|
cp.cuda.Device(0).compute_capability
|
|
@@ -15,5 +16,6 @@ except ImportError as e:
|
|
|
15
16
|
)
|
|
16
17
|
from unittest.mock import Mock
|
|
17
18
|
import numpy as cp
|
|
19
|
+
from scipy.fft import next_fast_len
|
|
18
20
|
|
|
19
21
|
nvtx = Mock()
|
httomolibgpu/prep/phase.py
CHANGED
|
@@ -26,6 +26,7 @@ from httomolibgpu.memory_estimator_helpers import _DeviceMemStack
|
|
|
26
26
|
|
|
27
27
|
cp = cupywrapper.cp
|
|
28
28
|
cupy_run = cupywrapper.cupy_run
|
|
29
|
+
next_fast_len = cupywrapper.next_fast_len
|
|
29
30
|
|
|
30
31
|
from unittest.mock import Mock
|
|
31
32
|
|
|
@@ -38,7 +39,7 @@ else:
|
|
|
38
39
|
fftshift = Mock()
|
|
39
40
|
|
|
40
41
|
from numpy import float32
|
|
41
|
-
from typing import Optional, Tuple
|
|
42
|
+
from typing import Literal, Optional, Tuple
|
|
42
43
|
import math
|
|
43
44
|
|
|
44
45
|
__all__ = [
|
|
@@ -56,6 +57,10 @@ def paganin_filter(
|
|
|
56
57
|
distance: float = 1.0,
|
|
57
58
|
energy: float = 53.0,
|
|
58
59
|
ratio_delta_beta: float = 250,
|
|
60
|
+
calculate_padding_value_method: Literal[
|
|
61
|
+
"next_power_of_2", "next_fast_length", "use_pad_x_y"
|
|
62
|
+
] = "next_power_of_2",
|
|
63
|
+
pad_x_y: Optional[list] = None,
|
|
59
64
|
calc_peak_gpu_mem: bool = False,
|
|
60
65
|
) -> cp.ndarray:
|
|
61
66
|
"""
|
|
@@ -74,6 +79,10 @@ def paganin_filter(
|
|
|
74
79
|
Beam energy in keV.
|
|
75
80
|
ratio_delta_beta : float
|
|
76
81
|
The ratio of delta/beta, where delta is the phase shift and real part of the complex material refractive index and beta is the absorption.
|
|
82
|
+
calculate_padding_value_method: str
|
|
83
|
+
Method to calculate the padded size of the input data. Accepted values are 'next_power_of_2', 'next_fast_length' and 'use_pad_x_y`.
|
|
84
|
+
pad_x_y list | None:
|
|
85
|
+
Padding values in pixels horizontally and vertically. Must be None, unless `calculate_padding_value_method` is 'use_pad_x_y'.
|
|
77
86
|
calc_peak_gpu_mem: bool
|
|
78
87
|
Parameter to support memory estimation in HTTomo. Irrelevant to the method itself and can be ignored by user.
|
|
79
88
|
|
|
@@ -93,9 +102,9 @@ def paganin_filter(
|
|
|
93
102
|
mem_stack.malloc(np.prod(tomo) * np.float32().itemsize)
|
|
94
103
|
dz_orig, dy_orig, dx_orig = tomo.shape if not mem_stack else tomo
|
|
95
104
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
105
|
+
padded_tomo, pad_tup = _pad_projections(
|
|
106
|
+
tomo, calculate_padding_value_method, pad_x_y, mem_stack
|
|
107
|
+
)
|
|
99
108
|
|
|
100
109
|
dz, dy, dx = padded_tomo.shape if not mem_stack else padded_tomo
|
|
101
110
|
|
|
@@ -219,21 +228,59 @@ def _shift_bit_length(x: int) -> int:
|
|
|
219
228
|
return 1 << (x - 1).bit_length()
|
|
220
229
|
|
|
221
230
|
|
|
222
|
-
def _calculate_pad_size(
|
|
231
|
+
def _calculate_pad_size(
|
|
232
|
+
datashape: tuple,
|
|
233
|
+
calculate_padding_value_method: Literal[
|
|
234
|
+
"next_power_of_2", "next_fast_length", "use_pad_x_y"
|
|
235
|
+
],
|
|
236
|
+
pad_x_y: Optional[list],
|
|
237
|
+
) -> list:
|
|
223
238
|
"""Calculating the padding size
|
|
224
239
|
|
|
225
240
|
Args:
|
|
226
|
-
datashape (tuple):
|
|
241
|
+
datashape (tuple):
|
|
242
|
+
the shape of the 3D data
|
|
243
|
+
calculate_padding_value_method: str
|
|
244
|
+
Method to calculate the padded size of the input data. Accepted values are 'next_power_of_2', 'next_fast_length' and 'use_pad_x_y`.
|
|
245
|
+
pad_x_y (int, int) | None:
|
|
246
|
+
Padding values in pixels horizontally and vertically. Must be None, unless `calculate_padding_value_method` is 'use_pad_x_y'.
|
|
227
247
|
|
|
228
248
|
Returns:
|
|
229
249
|
list: the padded dimensions
|
|
230
250
|
"""
|
|
251
|
+
if pad_x_y is not None and calculate_padding_value_method != "use_pad_x_y":
|
|
252
|
+
raise ValueError(
|
|
253
|
+
'calculate_padding_value_method must be "use_pad_x_y" when pad_x_y is specified'
|
|
254
|
+
)
|
|
255
|
+
elif calculate_padding_value_method == "use_pad_x_y":
|
|
256
|
+
if pad_x_y is None:
|
|
257
|
+
raise ValueError(
|
|
258
|
+
'pad_x_y must be provided when calculate_padding_value_method is "use_pad_x_y"'
|
|
259
|
+
)
|
|
260
|
+
elif (
|
|
261
|
+
not isinstance(pad_x_y, list)
|
|
262
|
+
or len(pad_x_y) != 2
|
|
263
|
+
or not isinstance(pad_x_y[0], int)
|
|
264
|
+
or not isinstance(pad_x_y[1], int)
|
|
265
|
+
):
|
|
266
|
+
raise ValueError("pad_x_y must be a list of two integers")
|
|
267
|
+
|
|
268
|
+
if calculate_padding_value_method == "next_power_of_2":
|
|
269
|
+
calculate_padded_dim = lambda _, size: _shift_bit_length(size + 1)
|
|
270
|
+
elif calculate_padding_value_method == "next_fast_length":
|
|
271
|
+
calculate_padded_dim = lambda _, size: next_fast_len(size)
|
|
272
|
+
elif calculate_padding_value_method == "use_pad_x_y":
|
|
273
|
+
calculate_padded_dim = lambda dim, size: size + 2 * pad_x_y[2 - dim]
|
|
274
|
+
else:
|
|
275
|
+
raise ValueError(
|
|
276
|
+
f'Unexpected calculate_padding_value_method: "{calculate_padding_value_method}"'
|
|
277
|
+
)
|
|
231
278
|
pad_list = []
|
|
232
279
|
for index, element in enumerate(datashape):
|
|
233
280
|
if index == 0:
|
|
234
281
|
pad_width = (0, 0) # do not pad the slicing dim
|
|
235
282
|
else:
|
|
236
|
-
diff =
|
|
283
|
+
diff = calculate_padded_dim(index, element) - element
|
|
237
284
|
if element % 2 == 0:
|
|
238
285
|
pad_width_scalar = diff // 2
|
|
239
286
|
pad_width = (pad_width_scalar, pad_width_scalar)
|
|
@@ -248,17 +295,27 @@ def _calculate_pad_size(datashape: tuple) -> list:
|
|
|
248
295
|
return pad_list
|
|
249
296
|
|
|
250
297
|
|
|
251
|
-
def
|
|
252
|
-
tomo: cp.ndarray,
|
|
298
|
+
def _pad_projections(
|
|
299
|
+
tomo: cp.ndarray,
|
|
300
|
+
calculate_padding_value_method: Literal[
|
|
301
|
+
"next_power_of_2", "next_fast_length", "use_pad_x_y"
|
|
302
|
+
],
|
|
303
|
+
pad_x_y: Optional[list],
|
|
304
|
+
mem_stack: Optional[_DeviceMemStack],
|
|
253
305
|
) -> Tuple[cp.ndarray, Tuple[int, int]]:
|
|
254
306
|
"""
|
|
255
|
-
Performs padding of each projection to
|
|
307
|
+
Performs padding of each projection to a size optimal for FFT.
|
|
256
308
|
If the shape is not even we also care of that before padding.
|
|
257
309
|
|
|
258
310
|
Parameters
|
|
259
311
|
----------
|
|
260
312
|
tomo : cp.ndarray
|
|
261
313
|
3d projection data
|
|
314
|
+
calculate_padding_value_method: str
|
|
315
|
+
Method to calculate the padded size of the input data. Accepted values are 'next_power_of_2', 'next_fast_length' and 'use_pad_x_y`.
|
|
316
|
+
pad_x_y: list | None:
|
|
317
|
+
Padding values in pixels horizontally and vertically. Must be None, unless `calculate_padding_value_method` is 'use_pad_x_y'.
|
|
318
|
+
|
|
262
319
|
|
|
263
320
|
Returns
|
|
264
321
|
-------
|
|
@@ -268,7 +325,9 @@ def _pad_projections_to_second_power(
|
|
|
268
325
|
"""
|
|
269
326
|
full_shape_tomo = cp.shape(tomo) if not mem_stack else tomo
|
|
270
327
|
|
|
271
|
-
pad_list = _calculate_pad_size(
|
|
328
|
+
pad_list = _calculate_pad_size(
|
|
329
|
+
full_shape_tomo, calculate_padding_value_method, pad_x_y
|
|
330
|
+
)
|
|
272
331
|
|
|
273
332
|
if mem_stack:
|
|
274
333
|
padded_tomo = [
|
httomolibgpu/prep/stripe.py
CHANGED
|
@@ -21,6 +21,7 @@
|
|
|
21
21
|
"""Module for stripes removal"""
|
|
22
22
|
|
|
23
23
|
import numpy as np
|
|
24
|
+
import pywt
|
|
24
25
|
from httomolibgpu import cupywrapper
|
|
25
26
|
|
|
26
27
|
cp = cupywrapper.cp
|
|
@@ -31,6 +32,7 @@ from unittest.mock import Mock
|
|
|
31
32
|
if cupy_run:
|
|
32
33
|
from cupyx.scipy.ndimage import median_filter, binary_dilation, uniform_filter1d
|
|
33
34
|
from cupyx.scipy.fft import fft2, ifft2, fftshift
|
|
35
|
+
from cupyx.scipy.fftpack import get_fft_plan
|
|
34
36
|
from httomolibgpu.cuda_kernels import load_cuda_module
|
|
35
37
|
else:
|
|
36
38
|
median_filter = Mock()
|
|
@@ -41,10 +43,11 @@ else:
|
|
|
41
43
|
fftshift = Mock()
|
|
42
44
|
|
|
43
45
|
|
|
44
|
-
from typing import Union
|
|
46
|
+
from typing import Optional, Tuple, Union
|
|
45
47
|
|
|
46
48
|
__all__ = [
|
|
47
49
|
"remove_stripe_based_sorting",
|
|
50
|
+
"remove_stripe_fw",
|
|
48
51
|
"remove_stripe_ti",
|
|
49
52
|
"remove_all_stripe",
|
|
50
53
|
"raven_filter",
|
|
@@ -156,6 +159,604 @@ def remove_stripe_ti(
|
|
|
156
159
|
return data
|
|
157
160
|
|
|
158
161
|
|
|
162
|
+
###### Ring removal with wavelet filtering (adapted for cupy from pytroch_wavelet package https://pytorch-wavelets.readthedocs.io/)##########
|
|
163
|
+
# These functions are taken from TomoCuPy package
|
|
164
|
+
# *************************************************************************** #
|
|
165
|
+
# Copyright © 2022, UChicago Argonne, LLC #
|
|
166
|
+
# All Rights Reserved #
|
|
167
|
+
# Software Name: Tomocupy #
|
|
168
|
+
# By: Argonne National Laboratory #
|
|
169
|
+
# #
|
|
170
|
+
# OPEN SOURCE LICENSE #
|
|
171
|
+
# #
|
|
172
|
+
# Redistribution and use in source and binary forms, with or without #
|
|
173
|
+
# modification, are permitted provided that the following conditions are met: #
|
|
174
|
+
# #
|
|
175
|
+
# 1. Redistributions of source code must retain the above copyright notice, #
|
|
176
|
+
# this list of conditions and the following disclaimer. #
|
|
177
|
+
# 2. Redistributions in binary form must reproduce the above copyright #
|
|
178
|
+
# notice, this list of conditions and the following disclaimer in the #
|
|
179
|
+
# documentation and/or other materials provided with the distribution. #
|
|
180
|
+
# 3. Neither the name of the copyright holder nor the names of its #
|
|
181
|
+
# contributors may be used to endorse or promote products derived #
|
|
182
|
+
# from this software without specific prior written permission. #
|
|
183
|
+
# #
|
|
184
|
+
# #
|
|
185
|
+
# *************************************************************************** #
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _reflect(x: np.ndarray, minx: float, maxx: float) -> np.ndarray:
|
|
189
|
+
"""Reflect the values in matrix *x* about the scalar values *minx* and
|
|
190
|
+
*maxx*. Hence a vector *x* containing a long linearly increasing series is
|
|
191
|
+
converted into a waveform which ramps linearly up and down between *minx*
|
|
192
|
+
and *maxx*. If *x* contains integers and *minx* and *maxx* are (integers +
|
|
193
|
+
0.5), the ramps will have repeated max and min samples.
|
|
194
|
+
|
|
195
|
+
.. codeauthor:: Rich Wareham <rjw57@cantab.net>, Aug 2013
|
|
196
|
+
.. codeauthor:: Nick Kingsbury, Cambridge University, January 1999.
|
|
197
|
+
|
|
198
|
+
"""
|
|
199
|
+
rng = maxx - minx
|
|
200
|
+
rng_by_2 = 2 * rng
|
|
201
|
+
mod = np.fmod(x - minx, rng_by_2)
|
|
202
|
+
normed_mod = np.where(mod < 0, mod + rng_by_2, mod)
|
|
203
|
+
out = np.where(normed_mod >= rng, rng_by_2 - normed_mod, normed_mod) + minx
|
|
204
|
+
return np.array(out, dtype=x.dtype)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class _DeviceMemStack:
|
|
208
|
+
def __init__(self) -> None:
|
|
209
|
+
self.allocations = []
|
|
210
|
+
self.current = 0
|
|
211
|
+
self.highwater = 0
|
|
212
|
+
|
|
213
|
+
def malloc(self, bytes):
|
|
214
|
+
self.allocations.append(bytes)
|
|
215
|
+
allocated = self._round_up(bytes)
|
|
216
|
+
self.current += allocated
|
|
217
|
+
self.highwater = max(self.current, self.highwater)
|
|
218
|
+
|
|
219
|
+
def free(self, bytes):
|
|
220
|
+
assert bytes in self.allocations
|
|
221
|
+
self.allocations.remove(bytes)
|
|
222
|
+
self.current -= self._round_up(bytes)
|
|
223
|
+
assert self.current >= 0
|
|
224
|
+
|
|
225
|
+
def _round_up(self, size):
|
|
226
|
+
ALLOCATION_UNIT_SIZE = 512
|
|
227
|
+
size = (size + ALLOCATION_UNIT_SIZE - 1) // ALLOCATION_UNIT_SIZE
|
|
228
|
+
return size * ALLOCATION_UNIT_SIZE
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def _mypad(
|
|
232
|
+
x: cp.ndarray, pad: Tuple[int, int, int, int], mem_stack: Optional[_DeviceMemStack]
|
|
233
|
+
) -> cp.ndarray:
|
|
234
|
+
"""Function to do numpy like padding on Arrays. Only works for 2-D
|
|
235
|
+
padding.
|
|
236
|
+
|
|
237
|
+
Inputs:
|
|
238
|
+
x (array): Array to pad
|
|
239
|
+
pad (tuple): tuple of (left, right, top, bottom) pad sizes
|
|
240
|
+
"""
|
|
241
|
+
# Vertical only
|
|
242
|
+
if pad[0] == 0 and pad[1] == 0:
|
|
243
|
+
m1, m2 = pad[2], pad[3]
|
|
244
|
+
l = x.shape[-2] if not mem_stack else x[-2]
|
|
245
|
+
xe = _reflect(np.arange(-m1, l + m2, dtype="int32"), -0.5, l - 0.5)
|
|
246
|
+
if mem_stack:
|
|
247
|
+
ret_shape = [x[0], x[1], xe.size, x[3]]
|
|
248
|
+
mem_stack.malloc(np.prod(ret_shape) * np.float32().itemsize)
|
|
249
|
+
return ret_shape
|
|
250
|
+
return x[:, :, xe, :]
|
|
251
|
+
# horizontal only
|
|
252
|
+
elif pad[2] == 0 and pad[3] == 0:
|
|
253
|
+
m1, m2 = pad[0], pad[1]
|
|
254
|
+
l = x.shape[-1] if not mem_stack else x[-1]
|
|
255
|
+
xe = _reflect(np.arange(-m1, l + m2, dtype="int32"), -0.5, l - 0.5)
|
|
256
|
+
if mem_stack:
|
|
257
|
+
ret_shape = [x[0], x[1], x[2], xe.size]
|
|
258
|
+
mem_stack.malloc(np.prod(ret_shape) * np.float32().itemsize)
|
|
259
|
+
return ret_shape
|
|
260
|
+
return x[:, :, :, xe]
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _next_power_of_two(x: int, max_val: int = 128) -> int:
|
|
264
|
+
n = 1
|
|
265
|
+
while n < x and n < max_val:
|
|
266
|
+
n *= 2
|
|
267
|
+
return n
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def _conv2d(
|
|
271
|
+
x: cp.ndarray,
|
|
272
|
+
w: np.ndarray,
|
|
273
|
+
stride: Tuple[int, int],
|
|
274
|
+
groups: int,
|
|
275
|
+
mem_stack: Optional[_DeviceMemStack],
|
|
276
|
+
) -> cp.ndarray:
|
|
277
|
+
"""Convolution (equivalent pytorch.conv2d)"""
|
|
278
|
+
b, ci, hi, wi = x.shape if not mem_stack else x
|
|
279
|
+
co, _, hk, wk = w.shape
|
|
280
|
+
ho = int(np.floor(1 + (hi - hk) / stride[0]))
|
|
281
|
+
wo = int(np.floor(1 + (wi - wk) / stride[1]))
|
|
282
|
+
out_shape = [b, co, ho, wo]
|
|
283
|
+
if mem_stack:
|
|
284
|
+
mem_stack.malloc(np.prod(out_shape) * np.float32().itemsize)
|
|
285
|
+
return out_shape
|
|
286
|
+
|
|
287
|
+
out = cp.zeros(out_shape, dtype="float32")
|
|
288
|
+
w = cp.asarray(w)
|
|
289
|
+
x = cp.expand_dims(x, axis=1)
|
|
290
|
+
w = np.expand_dims(w, axis=0)
|
|
291
|
+
symbol_names = [f"grouped_convolution_x<{wk}>", f"grouped_convolution_y<{hk}>"]
|
|
292
|
+
module = load_cuda_module("remove_stripe_fw", name_expressions=symbol_names)
|
|
293
|
+
dim_x = out.shape[-1]
|
|
294
|
+
dim_y = out.shape[-2]
|
|
295
|
+
dim_z = out.shape[0]
|
|
296
|
+
in_stride_x = stride[1]
|
|
297
|
+
in_stride_y = x.strides[-2] // x.dtype.itemsize
|
|
298
|
+
in_stride_z = x.strides[0] // x.dtype.itemsize
|
|
299
|
+
out_stride_z = out.strides[0] // x.dtype.itemsize
|
|
300
|
+
out_stride_group = out.strides[1] // x.dtype.itemsize
|
|
301
|
+
|
|
302
|
+
block_x = _next_power_of_two(dim_x)
|
|
303
|
+
block_dim = (block_x, 1, 1)
|
|
304
|
+
grid_x = (dim_x + block_x - 1) // block_x
|
|
305
|
+
grid_dim = (grid_x, dim_y, dim_z)
|
|
306
|
+
|
|
307
|
+
if groups == 1:
|
|
308
|
+
grouped_convolution_kernel_x = module.get_function(symbol_names[0])
|
|
309
|
+
grouped_convolution_kernel_x(
|
|
310
|
+
grid_dim,
|
|
311
|
+
block_dim,
|
|
312
|
+
(
|
|
313
|
+
dim_x,
|
|
314
|
+
dim_y,
|
|
315
|
+
dim_z,
|
|
316
|
+
x,
|
|
317
|
+
in_stride_x,
|
|
318
|
+
in_stride_y,
|
|
319
|
+
in_stride_z,
|
|
320
|
+
out,
|
|
321
|
+
out_stride_z,
|
|
322
|
+
out_stride_group,
|
|
323
|
+
w,
|
|
324
|
+
),
|
|
325
|
+
)
|
|
326
|
+
return out
|
|
327
|
+
|
|
328
|
+
grouped_convolution_kernel_y = module.get_function(symbol_names[1])
|
|
329
|
+
in_stride_group = x.strides[2] // x.dtype.itemsize
|
|
330
|
+
grouped_convolution_kernel_y(
|
|
331
|
+
grid_dim,
|
|
332
|
+
block_dim,
|
|
333
|
+
(
|
|
334
|
+
dim_x,
|
|
335
|
+
dim_y,
|
|
336
|
+
dim_z,
|
|
337
|
+
x,
|
|
338
|
+
in_stride_x,
|
|
339
|
+
in_stride_y,
|
|
340
|
+
in_stride_z,
|
|
341
|
+
in_stride_group,
|
|
342
|
+
out,
|
|
343
|
+
out_stride_z,
|
|
344
|
+
out_stride_group,
|
|
345
|
+
w,
|
|
346
|
+
),
|
|
347
|
+
)
|
|
348
|
+
del w
|
|
349
|
+
return out
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def _conv_transpose2d(
|
|
353
|
+
x: cp.ndarray,
|
|
354
|
+
w: np.ndarray,
|
|
355
|
+
stride: Tuple[int, int],
|
|
356
|
+
pad: Tuple[int, int],
|
|
357
|
+
groups: int,
|
|
358
|
+
mem_stack: Optional[_DeviceMemStack],
|
|
359
|
+
) -> cp.ndarray:
|
|
360
|
+
"""Transposed convolution (equivalent pytorch.conv_transpose2d)"""
|
|
361
|
+
b, co, ho, wo = x.shape if not mem_stack else x
|
|
362
|
+
co, ci, hk, wk = w.shape
|
|
363
|
+
|
|
364
|
+
hi = (ho - 1) * stride[0] + hk
|
|
365
|
+
wi = (wo - 1) * stride[1] + wk
|
|
366
|
+
out_shape = [b, ci, hi, wi]
|
|
367
|
+
if mem_stack:
|
|
368
|
+
mem_stack.malloc(np.prod(out_shape) * np.float32().itemsize)
|
|
369
|
+
mem_stack.malloc(w.size * np.float32().itemsize)
|
|
370
|
+
if pad != 0:
|
|
371
|
+
new_out_shape = [
|
|
372
|
+
out_shape[0],
|
|
373
|
+
out_shape[1],
|
|
374
|
+
out_shape[2] - 2 * pad[0],
|
|
375
|
+
out_shape[3] - 2 * pad[1],
|
|
376
|
+
]
|
|
377
|
+
mem_stack.malloc(np.prod(new_out_shape) * np.float32().itemsize)
|
|
378
|
+
mem_stack.free(np.prod(out_shape) * np.float32().itemsize)
|
|
379
|
+
out_shape = new_out_shape
|
|
380
|
+
mem_stack.free(w.size * np.float32().itemsize)
|
|
381
|
+
return out_shape
|
|
382
|
+
|
|
383
|
+
out = cp.zeros(out_shape, dtype="float32")
|
|
384
|
+
w = cp.asarray(w)
|
|
385
|
+
|
|
386
|
+
symbol_names = [
|
|
387
|
+
f"transposed_convolution_x<{wk}>",
|
|
388
|
+
f"transposed_convolution_y<{hk}>",
|
|
389
|
+
]
|
|
390
|
+
module = load_cuda_module("remove_stripe_fw", name_expressions=symbol_names)
|
|
391
|
+
dim_x = out.shape[-1]
|
|
392
|
+
dim_y = out.shape[-2]
|
|
393
|
+
dim_z = out.shape[0]
|
|
394
|
+
in_dim_x = x.shape[-1]
|
|
395
|
+
in_dim_y = x.shape[-2]
|
|
396
|
+
in_stride_y = x.strides[-2] // x.dtype.itemsize
|
|
397
|
+
in_stride_z = x.strides[0] // x.dtype.itemsize
|
|
398
|
+
|
|
399
|
+
block_x = _next_power_of_two(dim_x)
|
|
400
|
+
block_dim = (block_x, 1, 1)
|
|
401
|
+
grid_x = (dim_x + block_x - 1) // block_x
|
|
402
|
+
grid_dim = (grid_x, dim_y, dim_z)
|
|
403
|
+
|
|
404
|
+
if wk > 1:
|
|
405
|
+
transposed_convolution_kernel_x = module.get_function(symbol_names[0])
|
|
406
|
+
transposed_convolution_kernel_x(
|
|
407
|
+
grid_dim,
|
|
408
|
+
block_dim,
|
|
409
|
+
(dim_x, dim_y, dim_z, x, in_dim_x, in_stride_y, in_stride_z, w, out),
|
|
410
|
+
)
|
|
411
|
+
elif hk > 1:
|
|
412
|
+
transposed_convolution_kernel_y = module.get_function(symbol_names[1])
|
|
413
|
+
transposed_convolution_kernel_y(
|
|
414
|
+
grid_dim,
|
|
415
|
+
block_dim,
|
|
416
|
+
(dim_x, dim_y, dim_z, x, in_dim_y, in_stride_y, in_stride_z, w, out),
|
|
417
|
+
)
|
|
418
|
+
else:
|
|
419
|
+
assert False
|
|
420
|
+
|
|
421
|
+
if pad != 0:
|
|
422
|
+
out = out[:, :, pad[0] : out.shape[2] - pad[0], pad[1] : out.shape[3] - pad[1]]
|
|
423
|
+
return cp.ascontiguousarray(out)
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
def _afb1d(
|
|
427
|
+
x: cp.ndarray,
|
|
428
|
+
h0: np.ndarray,
|
|
429
|
+
h1: np.ndarray,
|
|
430
|
+
dim: int,
|
|
431
|
+
mem_stack: Optional[_DeviceMemStack],
|
|
432
|
+
) -> cp.ndarray:
|
|
433
|
+
"""1D analysis filter bank (along one dimension only) of an image
|
|
434
|
+
|
|
435
|
+
Parameters
|
|
436
|
+
----------
|
|
437
|
+
x (array): 4D input with the last two dimensions the spatial input
|
|
438
|
+
h0 (array): 4D input for the lowpass filter. Should have shape (1, 1,
|
|
439
|
+
h, 1) or (1, 1, 1, w)
|
|
440
|
+
h1 (array): 4D input for the highpass filter. Should have shape (1, 1,
|
|
441
|
+
h, 1) or (1, 1, 1, w)
|
|
442
|
+
dim (int) - dimension of filtering. d=2 is for a vertical filter (called
|
|
443
|
+
column filtering but filters across the rows). d=3 is for a
|
|
444
|
+
horizontal filter, (called row filtering but filters across the
|
|
445
|
+
columns).
|
|
446
|
+
|
|
447
|
+
Returns
|
|
448
|
+
-------
|
|
449
|
+
lohi: lowpass and highpass subbands concatenated along the channel
|
|
450
|
+
dimension
|
|
451
|
+
"""
|
|
452
|
+
C = x.shape[1] if not mem_stack else x[1]
|
|
453
|
+
# Convert the dim to positive
|
|
454
|
+
d = dim % 4
|
|
455
|
+
s = (2, 1) if d == 2 else (1, 2)
|
|
456
|
+
N = x.shape[d] if not mem_stack else x[d]
|
|
457
|
+
L = h0.size
|
|
458
|
+
shape = [1, 1, 1, 1]
|
|
459
|
+
shape[d] = L
|
|
460
|
+
h = np.concatenate([h0.reshape(*shape), h1.reshape(*shape)] * C, axis=0)
|
|
461
|
+
# Calculate the pad size
|
|
462
|
+
outsize = pywt.dwt_coeff_len(N, L, mode="symmetric")
|
|
463
|
+
p = 2 * (outsize - 1) - N + L
|
|
464
|
+
pad = (0, 0, p // 2, (p + 1) // 2) if d == 2 else (p // 2, (p + 1) // 2, 0, 0)
|
|
465
|
+
padded_x = _mypad(x, pad=pad, mem_stack=mem_stack)
|
|
466
|
+
lohi = _conv2d(padded_x, h, stride=s, groups=C, mem_stack=mem_stack)
|
|
467
|
+
if mem_stack:
|
|
468
|
+
mem_stack.free(np.prod(padded_x) * np.float32().itemsize)
|
|
469
|
+
del padded_x
|
|
470
|
+
return lohi
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
def _sfb1d(
|
|
474
|
+
lo: cp.ndarray,
|
|
475
|
+
hi: cp.ndarray,
|
|
476
|
+
g0: np.ndarray,
|
|
477
|
+
g1: np.ndarray,
|
|
478
|
+
dim: int,
|
|
479
|
+
mem_stack: Optional[_DeviceMemStack],
|
|
480
|
+
) -> cp.ndarray:
|
|
481
|
+
"""1D synthesis filter bank of an image Array"""
|
|
482
|
+
|
|
483
|
+
C = lo.shape[1] if not mem_stack else lo[1]
|
|
484
|
+
d = dim % 4
|
|
485
|
+
L = g0.size
|
|
486
|
+
shape = [1, 1, 1, 1]
|
|
487
|
+
shape[d] = L
|
|
488
|
+
s = (2, 1) if d == 2 else (1, 2)
|
|
489
|
+
g0 = np.concatenate([g0.reshape(*shape)] * C, axis=0)
|
|
490
|
+
g1 = np.concatenate([g1.reshape(*shape)] * C, axis=0)
|
|
491
|
+
pad = (L - 2, 0) if d == 2 else (0, L - 2)
|
|
492
|
+
y_lo = _conv_transpose2d(lo, g0, stride=s, pad=pad, groups=C, mem_stack=mem_stack)
|
|
493
|
+
y_hi = _conv_transpose2d(hi, g1, stride=s, pad=pad, groups=C, mem_stack=mem_stack)
|
|
494
|
+
if mem_stack:
|
|
495
|
+
# Allocation of the sum
|
|
496
|
+
mem_stack.malloc(np.prod(y_hi) * np.float32().itemsize)
|
|
497
|
+
mem_stack.free(np.prod(y_lo) * np.float32().itemsize)
|
|
498
|
+
mem_stack.free(np.prod(y_hi) * np.float32().itemsize)
|
|
499
|
+
return y_lo
|
|
500
|
+
return y_lo + y_hi
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
class _DWTForward:
|
|
504
|
+
"""Performs a 2d DWT Forward decomposition of an image
|
|
505
|
+
|
|
506
|
+
Args:
|
|
507
|
+
wave (str): Which wavelet to use.
|
|
508
|
+
"""
|
|
509
|
+
|
|
510
|
+
def __init__(self, wave: str):
|
|
511
|
+
super().__init__()
|
|
512
|
+
|
|
513
|
+
wave = pywt.Wavelet(wave)
|
|
514
|
+
h0_col, h1_col = wave.dec_lo, wave.dec_hi
|
|
515
|
+
h0_row, h1_row = h0_col, h1_col
|
|
516
|
+
|
|
517
|
+
self.h0_col = np.array(h0_col).astype("float32")[::-1].reshape((1, 1, -1, 1))
|
|
518
|
+
self.h1_col = np.array(h1_col).astype("float32")[::-1].reshape((1, 1, -1, 1))
|
|
519
|
+
self.h0_row = np.array(h0_row).astype("float32")[::-1].reshape((1, 1, 1, -1))
|
|
520
|
+
self.h1_row = np.array(h1_row).astype("float32")[::-1].reshape((1, 1, 1, -1))
|
|
521
|
+
|
|
522
|
+
def apply(
|
|
523
|
+
self, x: cp.ndarray, mem_stack: Optional[_DeviceMemStack] = None
|
|
524
|
+
) -> Tuple[cp.ndarray, cp.ndarray]:
|
|
525
|
+
"""Forward pass of the DWT.
|
|
526
|
+
|
|
527
|
+
Args:
|
|
528
|
+
x (array): Input of shape :math:`(N, C_{in}, H_{in}, W_{in})`
|
|
529
|
+
|
|
530
|
+
Returns:
|
|
531
|
+
(yl, yh)
|
|
532
|
+
tuple of lowpass (yl) and bandpass (yh) coefficients.
|
|
533
|
+
yh is a list of scale coefficients. yl has shape
|
|
534
|
+
:math:`(N, C_{in}, H_{in}', W_{in}')` and yh has shape
|
|
535
|
+
:math:`list(N, C_{in}, 3, H_{in}'', W_{in}'')`. The new
|
|
536
|
+
dimension in yh iterates over the LH, HL and HH coefficients.
|
|
537
|
+
|
|
538
|
+
Note:
|
|
539
|
+
:math:`H_{in}', W_{in}', H_{in}'', W_{in}''` denote the correctly
|
|
540
|
+
downsampled shapes of the DWT pyramid.
|
|
541
|
+
"""
|
|
542
|
+
# Do a multilevel transform
|
|
543
|
+
# Do 1 level of the transform
|
|
544
|
+
lohi = _afb1d(x, self.h0_row, self.h1_row, dim=3, mem_stack=mem_stack)
|
|
545
|
+
y = _afb1d(lohi, self.h0_col, self.h1_col, dim=2, mem_stack=mem_stack)
|
|
546
|
+
if mem_stack:
|
|
547
|
+
y_shape = [y[0], np.prod(y) // y[0] // 4 // y[-2] // y[-1], 4, y[-2], y[-1]]
|
|
548
|
+
x_shape = [y_shape[0], y_shape[1], y_shape[3], y_shape[4]]
|
|
549
|
+
yh_shape = [y_shape[0], y_shape[1], y_shape[2] - 1, y_shape[3], y_shape[4]]
|
|
550
|
+
|
|
551
|
+
mem_stack.free(np.prod(lohi) * np.float32().itemsize)
|
|
552
|
+
mem_stack.malloc(np.prod(x_shape) * np.float32().itemsize)
|
|
553
|
+
mem_stack.malloc(np.prod(yh_shape) * np.float32().itemsize)
|
|
554
|
+
mem_stack.free(np.prod(y) * np.float32().itemsize)
|
|
555
|
+
return x_shape, yh_shape
|
|
556
|
+
del lohi
|
|
557
|
+
s = y.shape
|
|
558
|
+
y = y.reshape(s[0], -1, 4, s[-2], s[-1])
|
|
559
|
+
x = cp.ascontiguousarray(y[:, :, 0])
|
|
560
|
+
yh = cp.ascontiguousarray(y[:, :, 1:])
|
|
561
|
+
return (x, yh)
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
class _DWTInverse:
|
|
565
|
+
"""Performs a 2d DWT Inverse reconstruction of an image
|
|
566
|
+
|
|
567
|
+
Args:
|
|
568
|
+
wave (str): Which wavelet to use.
|
|
569
|
+
"""
|
|
570
|
+
|
|
571
|
+
def __init__(self, wave: str):
|
|
572
|
+
super().__init__()
|
|
573
|
+
wave = pywt.Wavelet(wave)
|
|
574
|
+
g0_col, g1_col = wave.rec_lo, wave.rec_hi
|
|
575
|
+
g0_row, g1_row = g0_col, g1_col
|
|
576
|
+
# Prepare the filters
|
|
577
|
+
self.g0_col = np.array(g0_col).astype("float32").reshape((1, 1, -1, 1))
|
|
578
|
+
self.g1_col = np.array(g1_col).astype("float32").reshape((1, 1, -1, 1))
|
|
579
|
+
self.g0_row = np.array(g0_row).astype("float32").reshape((1, 1, 1, -1))
|
|
580
|
+
self.g1_row = np.array(g1_row).astype("float32").reshape((1, 1, 1, -1))
|
|
581
|
+
|
|
582
|
+
def apply(
|
|
583
|
+
self,
|
|
584
|
+
coeffs: Tuple[cp.ndarray, cp.ndarray],
|
|
585
|
+
mem_stack: Optional[_DeviceMemStack] = None,
|
|
586
|
+
) -> cp.ndarray:
|
|
587
|
+
"""
|
|
588
|
+
Args:
|
|
589
|
+
coeffs (yl, yh): tuple of lowpass and bandpass coefficients, where:
|
|
590
|
+
yl is a lowpass array of shape :math:`(N, C_{in}, H_{in}',
|
|
591
|
+
W_{in}')` and yh is a list of bandpass arrays of shape
|
|
592
|
+
:math:`list(N, C_{in}, 3, H_{in}'', W_{in}'')`. I.e. should match
|
|
593
|
+
the format returned by DWTForward
|
|
594
|
+
|
|
595
|
+
Returns:
|
|
596
|
+
Reconstructed input of shape :math:`(N, C_{in}, H_{in}, W_{in})`
|
|
597
|
+
|
|
598
|
+
Note:
|
|
599
|
+
:math:`H_{in}', W_{in}', H_{in}'', W_{in}''` denote the correctly
|
|
600
|
+
downsampled shapes of the DWT pyramid.
|
|
601
|
+
|
|
602
|
+
"""
|
|
603
|
+
yl, yh = coeffs
|
|
604
|
+
lh = yh[:, :, 0, :, :] if not mem_stack else [yh[0], yh[1], yh[3], yh[4]]
|
|
605
|
+
hl = yh[:, :, 1, :, :] if not mem_stack else [yh[0], yh[1], yh[3], yh[4]]
|
|
606
|
+
hh = yh[:, :, 2, :, :] if not mem_stack else [yh[0], yh[1], yh[3], yh[4]]
|
|
607
|
+
lo = _sfb1d(yl, lh, self.g0_col, self.g1_col, dim=2, mem_stack=mem_stack)
|
|
608
|
+
hi = _sfb1d(hl, hh, self.g0_col, self.g1_col, dim=2, mem_stack=mem_stack)
|
|
609
|
+
yl = _sfb1d(lo, hi, self.g0_row, self.g1_row, dim=3, mem_stack=mem_stack)
|
|
610
|
+
if mem_stack:
|
|
611
|
+
mem_stack.free(np.prod(lo) * np.float32().itemsize)
|
|
612
|
+
mem_stack.free(np.prod(hi) * np.float32().itemsize)
|
|
613
|
+
del lo
|
|
614
|
+
del hi
|
|
615
|
+
return yl
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
def _repair_memory_fragmentation_if_needed(fragmentation_threshold: float = 0.2):
|
|
619
|
+
pool = cp.get_default_memory_pool()
|
|
620
|
+
total = pool.total_bytes()
|
|
621
|
+
if (total / pool.used_bytes()) - 1 > fragmentation_threshold:
|
|
622
|
+
pool.free_all_blocks()
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
def remove_stripe_fw(
|
|
626
|
+
data: cp.ndarray,
|
|
627
|
+
sigma: float = 2,
|
|
628
|
+
wname: str = "db5",
|
|
629
|
+
level: Optional[int] = None,
|
|
630
|
+
calc_peak_gpu_mem: bool = False,
|
|
631
|
+
) -> cp.ndarray:
|
|
632
|
+
"""
|
|
633
|
+
Remove horizontal stripes from sinogram using the Fourier-Wavelet (FW) based method :cite:`munch2009stripe`. The original source code
|
|
634
|
+
taken from TomoCupy and NABU packages.
|
|
635
|
+
|
|
636
|
+
Parameters
|
|
637
|
+
----------
|
|
638
|
+
data : ndarray
|
|
639
|
+
3D tomographic data as a CuPy array.
|
|
640
|
+
sigma : float
|
|
641
|
+
Damping parameter in Fourier space.
|
|
642
|
+
wname : str
|
|
643
|
+
Type of the wavelet filter: select from 'db5', 'db7', 'haar', 'sym5', 'sym16' 'bior4.4'.
|
|
644
|
+
level : int, optional
|
|
645
|
+
Number of discrete wavelet transform levels.
|
|
646
|
+
calc_peak_gpu_mem: str:
|
|
647
|
+
Parameter to support memory estimation in HTTomo. Irrelevant to the method itself and can be ignored by user.
|
|
648
|
+
|
|
649
|
+
Returns
|
|
650
|
+
-------
|
|
651
|
+
ndarray
|
|
652
|
+
Stripe-corrected 3D tomographic data as a CuPy array.
|
|
653
|
+
"""
|
|
654
|
+
|
|
655
|
+
if level is None:
|
|
656
|
+
if calc_peak_gpu_mem:
|
|
657
|
+
size = np.max(data) # data is a tuple in this case
|
|
658
|
+
else:
|
|
659
|
+
size = np.max(data.shape)
|
|
660
|
+
level = int(np.ceil(np.log2(size)))
|
|
661
|
+
|
|
662
|
+
[nproj, nz, ni] = data.shape if not calc_peak_gpu_mem else data
|
|
663
|
+
|
|
664
|
+
nproj_pad = nproj + nproj // 8
|
|
665
|
+
|
|
666
|
+
# Accepts all wave types available to PyWavelets
|
|
667
|
+
xfm = _DWTForward(wave=wname)
|
|
668
|
+
ifm = _DWTInverse(wave=wname)
|
|
669
|
+
|
|
670
|
+
# Wavelet decomposition.
|
|
671
|
+
cc = []
|
|
672
|
+
sli_shape = [nz, 1, nproj_pad, ni]
|
|
673
|
+
|
|
674
|
+
if calc_peak_gpu_mem:
|
|
675
|
+
mem_stack = _DeviceMemStack()
|
|
676
|
+
# A data copy is assumed when invoking the function
|
|
677
|
+
mem_stack.malloc(np.prod(data) * np.float32().itemsize)
|
|
678
|
+
mem_stack.malloc(np.prod(sli_shape) * np.float32().itemsize)
|
|
679
|
+
cc = []
|
|
680
|
+
fcV_bytes = None
|
|
681
|
+
for k in range(level):
|
|
682
|
+
new_sli_shape, c = xfm.apply(sli_shape, mem_stack)
|
|
683
|
+
mem_stack.free(np.prod(sli_shape) * np.float32().itemsize)
|
|
684
|
+
sli_shape = new_sli_shape
|
|
685
|
+
cc.append(c)
|
|
686
|
+
|
|
687
|
+
if fcV_bytes:
|
|
688
|
+
mem_stack.free(fcV_bytes)
|
|
689
|
+
fcV_shape = [c[0], c[3], c[4]]
|
|
690
|
+
fcV_bytes = np.prod(fcV_shape) * np.complex64().itemsize
|
|
691
|
+
mem_stack.malloc(fcV_bytes)
|
|
692
|
+
|
|
693
|
+
# For the FFT
|
|
694
|
+
mem_stack.malloc(2 * np.prod(fcV_shape) * np.float32().itemsize)
|
|
695
|
+
mem_stack.malloc(2 * fcV_bytes)
|
|
696
|
+
|
|
697
|
+
fft_dummy = cp.empty(fcV_shape, dtype="float32")
|
|
698
|
+
fft_plan = get_fft_plan(fft_dummy)
|
|
699
|
+
fft_plan_size = fft_plan.work_area.mem.size
|
|
700
|
+
del fft_dummy
|
|
701
|
+
del fft_plan
|
|
702
|
+
mem_stack.malloc(fft_plan_size)
|
|
703
|
+
mem_stack.free(2 * np.prod(fcV_shape) * np.float32().itemsize)
|
|
704
|
+
mem_stack.free(fft_plan_size)
|
|
705
|
+
mem_stack.free(2 * fcV_bytes)
|
|
706
|
+
|
|
707
|
+
# The rest of the iteration doesn't contribute to the peak
|
|
708
|
+
# NOTE: The last iteration of fcV is "leaked"
|
|
709
|
+
|
|
710
|
+
for k in range(level)[::-1]:
|
|
711
|
+
new_sli_shape = [sli_shape[0], sli_shape[1], cc[k][-2], cc[k][-1]]
|
|
712
|
+
new_sli_shape = ifm.apply((new_sli_shape, cc[k]), mem_stack)
|
|
713
|
+
mem_stack.free(np.prod(sli_shape) * np.float32().itemsize)
|
|
714
|
+
sli_shape = new_sli_shape
|
|
715
|
+
|
|
716
|
+
mem_stack.malloc(np.prod(data) * np.float32().itemsize)
|
|
717
|
+
for c in cc:
|
|
718
|
+
mem_stack.free(np.prod(c) * np.float32().itemsize)
|
|
719
|
+
mem_stack.free(np.prod(sli_shape) * np.float32().itemsize)
|
|
720
|
+
return int(mem_stack.highwater * 1.1)
|
|
721
|
+
|
|
722
|
+
sli = cp.zeros(sli_shape, dtype="float32")
|
|
723
|
+
sli[:, 0, (nproj_pad - nproj) // 2 : (nproj_pad + nproj) // 2] = data.swapaxes(0, 1)
|
|
724
|
+
for k in range(level):
|
|
725
|
+
sli, c = xfm.apply(sli)
|
|
726
|
+
cc.append(c)
|
|
727
|
+
# FFT
|
|
728
|
+
fft_in = cp.ascontiguousarray(cc[k][:, 0, 1])
|
|
729
|
+
fft_plan = get_fft_plan(fft_in, axes=1)
|
|
730
|
+
with fft_plan:
|
|
731
|
+
fcV = cp.fft.fft(fft_in, axis=1)
|
|
732
|
+
del fft_plan
|
|
733
|
+
del fft_in
|
|
734
|
+
_, my, mx = fcV.shape
|
|
735
|
+
# Damping of ring artifact information.
|
|
736
|
+
y_hat = np.fft.ifftshift((np.arange(-my, my, 2) + 1) / 2)
|
|
737
|
+
damp = -np.expm1(-(y_hat**2) / (2 * sigma**2))
|
|
738
|
+
fcV *= cp.tile(damp, (mx, 1)).swapaxes(0, 1)
|
|
739
|
+
# Inverse FFT.
|
|
740
|
+
ifft_in = cp.ascontiguousarray(fcV)
|
|
741
|
+
ifft_plan = get_fft_plan(ifft_in, axes=1)
|
|
742
|
+
with ifft_plan:
|
|
743
|
+
cc[k][:, 0, 1] = cp.fft.ifft(ifft_in, my, axis=1).real
|
|
744
|
+
del ifft_plan
|
|
745
|
+
del ifft_in
|
|
746
|
+
_repair_memory_fragmentation_if_needed()
|
|
747
|
+
|
|
748
|
+
# Wavelet reconstruction.
|
|
749
|
+
for k in range(level)[::-1]:
|
|
750
|
+
shape0 = cc[k][0, 0, 1].shape
|
|
751
|
+
sli = sli[:, :, : shape0[0], : shape0[1]]
|
|
752
|
+
sli = ifm.apply((sli, cc[k]))
|
|
753
|
+
_repair_memory_fragmentation_if_needed()
|
|
754
|
+
|
|
755
|
+
data = sli[:, 0, (nproj_pad - nproj) // 2 : (nproj_pad + nproj) // 2, :ni]
|
|
756
|
+
data = data.swapaxes(0, 1)
|
|
757
|
+
return cp.ascontiguousarray(data)
|
|
758
|
+
|
|
759
|
+
|
|
159
760
|
######## Optimized version for Vo-all ring removal in tomopy########
|
|
160
761
|
# This function is taken from TomoCuPy package
|
|
161
762
|
# *************************************************************************** #
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: httomolibgpu
|
|
3
|
-
Version: 5.
|
|
3
|
+
Version: 5.3
|
|
4
4
|
Summary: Commonly used tomography data processing methods at DLS.
|
|
5
5
|
Author-email: Daniil Kazantsev <daniil.kazantsev@diamond.ac.uk>, Yousef Moazzam <yousef.moazzam@diamond.ac.uk>, Naman Gera <naman.gera@diamond.ac.uk>
|
|
6
6
|
License: BSD-3-Clause
|
|
@@ -19,6 +19,7 @@ Requires-Dist: scipy
|
|
|
19
19
|
Requires-Dist: pillow
|
|
20
20
|
Requires-Dist: scikit-image
|
|
21
21
|
Requires-Dist: tomobar
|
|
22
|
+
Requires-Dist: PyWavelets
|
|
22
23
|
Provides-Extra: dev
|
|
23
24
|
Requires-Dist: pytest; extra == "dev"
|
|
24
25
|
Requires-Dist: pytest-cov; extra == "dev"
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
httomolibgpu/__init__.py,sha256=
|
|
2
|
-
httomolibgpu/cupywrapper.py,sha256=
|
|
1
|
+
httomolibgpu/__init__.py,sha256=Dt_TYhjJGPVathlceTYQhoRSyH8n7FGQJlRMUlFZNdc,959
|
|
2
|
+
httomolibgpu/cupywrapper.py,sha256=vHuBN4Wo3YxPnQP0OAJypLfZA6AXyXFgVmaZw_67pvo,579
|
|
3
3
|
httomolibgpu/memory_estimator_helpers.py,sha256=QaJady-z8y9Emw7W-lB608vBTNvVYv3obboQKVj6E9M,705
|
|
4
4
|
httomolibgpu/cuda_kernels/__init__.py,sha256=VQNMaGcVDwiE-C64FfLtubHpLriLG0Y3_QnjHBSHrN0,884
|
|
5
5
|
httomolibgpu/cuda_kernels/calc_metrics.cu,sha256=oV7ZPcwjWafmZjbNsUkBYPvOViJ_nX3zBoOAuPCmIrA,11335
|
|
@@ -8,6 +8,7 @@ httomolibgpu/cuda_kernels/generate_mask.cu,sha256=3il3r1J2cnTCd3UXO4GWGfBgGxj4pv
|
|
|
8
8
|
httomolibgpu/cuda_kernels/median_kernel.cu,sha256=EECLUCoJkT9GQ9Db_FF6fYOG6cDSiAePTRZNxE4VZ68,1692
|
|
9
9
|
httomolibgpu/cuda_kernels/raven_filter.cu,sha256=KX2TM_9tMpvoGCHezDNWYABCnv2cT9mlMo4IhxRUac0,1437
|
|
10
10
|
httomolibgpu/cuda_kernels/remove_nan_inf.cu,sha256=gv0ihkf6A_D_po9x7pmgFsQFhwZ1dB_HYc_0Tu-bpUU,630
|
|
11
|
+
httomolibgpu/cuda_kernels/remove_stripe_fw.cu,sha256=J_vy0RUYYKT-mOzERsn3kjgt4hbE7vHPFRuJYNzs6sM,4504
|
|
11
12
|
httomolibgpu/misc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
13
|
httomolibgpu/misc/corr.py,sha256=e1eUsWLSM9SB5xzWTDW0o9pAD_lbrr4DL-QQmyM8v4c,4503
|
|
13
14
|
httomolibgpu/misc/denoise.py,sha256=-D9UPbZyUAcCptBHUUXsmj1NFzd6HrrRjJJh4T5gmhQ,4787
|
|
@@ -17,14 +18,14 @@ httomolibgpu/misc/utils.py,sha256=rHRuQUO47SlTanvKDBgiC0im4tXlGLCw5B_zvlLzzbc,47
|
|
|
17
18
|
httomolibgpu/prep/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
19
|
httomolibgpu/prep/alignment.py,sha256=GVxnyioipmqsHb4s3mPQ8tKGoPIQMPftDrQxUO-HBuE,5491
|
|
19
20
|
httomolibgpu/prep/normalize.py,sha256=hee0H4mE7FrSZgcF1fjLsKT06xjTJymkyAxpe2itQe4,4202
|
|
20
|
-
httomolibgpu/prep/phase.py,sha256=
|
|
21
|
-
httomolibgpu/prep/stripe.py,sha256=
|
|
21
|
+
httomolibgpu/prep/phase.py,sha256=N3Ep_Krn4rqbGOnNhApSbIYM7gVstBtequXTklBDQLk,14907
|
|
22
|
+
httomolibgpu/prep/stripe.py,sha256=OZPimFxe9TOSaEcErORFxd6HCcFcR62-q5XYBvC10FM,36918
|
|
22
23
|
httomolibgpu/recon/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
24
|
httomolibgpu/recon/_phase_cross_correlation.py,sha256=Ru2oLAPv8XOSSuZer5yNQrxD_8lMAwBSvtkVAVs5TCc,16469
|
|
24
25
|
httomolibgpu/recon/algorithm.py,sha256=ds-_io7kGzo5FiJq8k4--PYtIWak3y9H7yuyg1lymyQ,25121
|
|
25
26
|
httomolibgpu/recon/rotation.py,sha256=GaSwNrlDnlP_iIrTfKUQLiXsShJ5aSDvdKPwofggtwQ,27948
|
|
26
|
-
httomolibgpu-5.
|
|
27
|
-
httomolibgpu-5.
|
|
28
|
-
httomolibgpu-5.
|
|
29
|
-
httomolibgpu-5.
|
|
30
|
-
httomolibgpu-5.
|
|
27
|
+
httomolibgpu-5.3.dist-info/licenses/LICENSE,sha256=bXeLsgelPUUXw8HCIYiVC97Dpjhm2nB54m7TACdH8ng,48032
|
|
28
|
+
httomolibgpu-5.3.dist-info/METADATA,sha256=prUDINLOyJMUnUz3YQCkfhuDJtPyQCoELYedj2ktUD0,3365
|
|
29
|
+
httomolibgpu-5.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
30
|
+
httomolibgpu-5.3.dist-info/top_level.txt,sha256=nV0Ty_YvSPVd1O6MNWuIplD0w1nwk5hT76YgBZ-bzUw,13
|
|
31
|
+
httomolibgpu-5.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|