nabu 2023.2.1__py3-none-any.whl → 2024.1.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- doc/conf.py +1 -1
- doc/doc_config.py +32 -0
- nabu/__init__.py +2 -1
- nabu/app/bootstrap_stitching.py +1 -1
- nabu/app/cli_configs.py +122 -2
- nabu/app/composite_cor.py +27 -2
- nabu/app/correct_rot.py +70 -0
- nabu/app/create_distortion_map_from_poly.py +42 -18
- nabu/app/diag_to_pix.py +358 -0
- nabu/app/diag_to_rot.py +449 -0
- nabu/app/generate_header.py +4 -3
- nabu/app/histogram.py +2 -2
- nabu/app/multicor.py +6 -1
- nabu/app/parse_reconstruction_log.py +151 -0
- nabu/app/prepare_weights_double.py +83 -22
- nabu/app/reconstruct.py +5 -1
- nabu/app/reconstruct_helical.py +7 -0
- nabu/app/reduce_dark_flat.py +6 -3
- nabu/app/rotate.py +4 -4
- nabu/app/stitching.py +16 -2
- nabu/app/tests/test_reduce_dark_flat.py +18 -2
- nabu/app/validator.py +4 -4
- nabu/cuda/convolution.py +8 -376
- nabu/cuda/fft.py +4 -0
- nabu/cuda/kernel.py +4 -4
- nabu/cuda/medfilt.py +5 -158
- nabu/cuda/padding.py +5 -71
- nabu/cuda/processing.py +23 -2
- nabu/cuda/src/ElementOp.cu +78 -0
- nabu/cuda/src/backproj.cu +28 -2
- nabu/cuda/src/fourier_wavelets.cu +2 -2
- nabu/cuda/src/normalization.cu +23 -0
- nabu/cuda/src/padding.cu +2 -2
- nabu/cuda/src/transpose.cu +16 -0
- nabu/cuda/utils.py +39 -0
- nabu/estimation/alignment.py +10 -1
- nabu/estimation/cor.py +808 -38
- nabu/estimation/cor_sino.py +7 -9
- nabu/estimation/tests/test_cor.py +85 -3
- nabu/io/reader.py +26 -18
- nabu/io/tests/test_cast_volume.py +3 -3
- nabu/io/tests/test_detector_distortion.py +3 -3
- nabu/io/tiffwriter_zmm.py +2 -2
- nabu/io/utils.py +14 -4
- nabu/io/writer.py +5 -3
- nabu/misc/fftshift.py +6 -0
- nabu/misc/histogram.py +5 -285
- nabu/misc/histogram_cuda.py +8 -104
- nabu/misc/kernel_base.py +3 -121
- nabu/misc/padding_base.py +5 -69
- nabu/misc/processing_base.py +3 -107
- nabu/misc/rotation.py +5 -62
- nabu/misc/rotation_cuda.py +5 -65
- nabu/misc/transpose.py +6 -0
- nabu/misc/unsharp.py +3 -78
- nabu/misc/unsharp_cuda.py +5 -52
- nabu/misc/unsharp_opencl.py +8 -85
- nabu/opencl/fft.py +6 -0
- nabu/opencl/kernel.py +21 -6
- nabu/opencl/padding.py +5 -72
- nabu/opencl/processing.py +27 -5
- nabu/opencl/src/backproj.cl +3 -3
- nabu/opencl/src/fftshift.cl +65 -12
- nabu/opencl/src/padding.cl +2 -2
- nabu/opencl/src/roll.cl +96 -0
- nabu/opencl/src/transpose.cl +16 -0
- nabu/pipeline/config_validators.py +63 -3
- nabu/pipeline/dataset_validator.py +2 -2
- nabu/pipeline/estimators.py +193 -35
- nabu/pipeline/fullfield/chunked.py +34 -17
- nabu/pipeline/fullfield/chunked_cuda.py +7 -5
- nabu/pipeline/fullfield/computations.py +48 -13
- nabu/pipeline/fullfield/nabu_config.py +13 -13
- nabu/pipeline/fullfield/processconfig.py +10 -5
- nabu/pipeline/fullfield/reconstruction.py +1 -2
- nabu/pipeline/helical/fbp.py +5 -0
- nabu/pipeline/helical/filtering.py +12 -9
- nabu/pipeline/helical/gridded_accumulator.py +179 -33
- nabu/pipeline/helical/helical_chunked_regridded.py +262 -151
- nabu/pipeline/helical/helical_chunked_regridded_cuda.py +4 -11
- nabu/pipeline/helical/helical_reconstruction.py +56 -18
- nabu/pipeline/helical/span_strategy.py +1 -1
- nabu/pipeline/helical/tests/test_accumulator.py +4 -0
- nabu/pipeline/params.py +23 -2
- nabu/pipeline/processconfig.py +3 -8
- nabu/pipeline/tests/test_chunk_reader.py +78 -0
- nabu/pipeline/tests/test_estimators.py +120 -2
- nabu/pipeline/utils.py +25 -0
- nabu/pipeline/writer.py +2 -0
- nabu/preproc/ccd_cuda.py +9 -7
- nabu/preproc/ctf.py +21 -26
- nabu/preproc/ctf_cuda.py +25 -25
- nabu/preproc/double_flatfield.py +14 -2
- nabu/preproc/double_flatfield_cuda.py +7 -11
- nabu/preproc/flatfield_cuda.py +23 -27
- nabu/preproc/phase.py +19 -24
- nabu/preproc/phase_cuda.py +21 -21
- nabu/preproc/shift_cuda.py +58 -28
- nabu/preproc/tests/test_ctf.py +5 -5
- nabu/preproc/tests/test_double_flatfield.py +2 -2
- nabu/preproc/tests/test_vshift.py +13 -2
- nabu/processing/__init__.py +0 -0
- nabu/processing/convolution_cuda.py +375 -0
- nabu/processing/fft_base.py +163 -0
- nabu/processing/fft_cuda.py +256 -0
- nabu/processing/fft_opencl.py +54 -0
- nabu/processing/fftshift.py +134 -0
- nabu/processing/histogram.py +286 -0
- nabu/processing/histogram_cuda.py +103 -0
- nabu/processing/kernel_base.py +126 -0
- nabu/processing/medfilt_cuda.py +159 -0
- nabu/processing/muladd.py +29 -0
- nabu/processing/muladd_cuda.py +68 -0
- nabu/processing/padding_base.py +71 -0
- nabu/processing/padding_cuda.py +75 -0
- nabu/processing/padding_opencl.py +77 -0
- nabu/processing/processing_base.py +123 -0
- nabu/processing/roll_opencl.py +64 -0
- nabu/processing/rotation.py +63 -0
- nabu/processing/rotation_cuda.py +66 -0
- nabu/processing/tests/__init__.py +0 -0
- nabu/processing/tests/test_fft.py +268 -0
- nabu/processing/tests/test_fftshift.py +71 -0
- nabu/{misc → processing}/tests/test_histogram.py +2 -4
- nabu/{cuda → processing}/tests/test_medfilt.py +1 -1
- nabu/processing/tests/test_muladd.py +54 -0
- nabu/{cuda → processing}/tests/test_padding.py +119 -75
- nabu/processing/tests/test_roll.py +63 -0
- nabu/{misc → processing}/tests/test_rotation.py +3 -2
- nabu/processing/tests/test_transpose.py +72 -0
- nabu/{misc → processing}/tests/test_unsharp.py +41 -8
- nabu/processing/transpose.py +126 -0
- nabu/processing/unsharp.py +79 -0
- nabu/processing/unsharp_cuda.py +53 -0
- nabu/processing/unsharp_opencl.py +75 -0
- nabu/reconstruction/fbp.py +34 -10
- nabu/reconstruction/fbp_base.py +35 -16
- nabu/reconstruction/fbp_opencl.py +7 -12
- nabu/reconstruction/filtering.py +2 -2
- nabu/reconstruction/filtering_cuda.py +13 -14
- nabu/reconstruction/filtering_opencl.py +3 -4
- nabu/reconstruction/projection.py +2 -0
- nabu/reconstruction/rings.py +158 -1
- nabu/reconstruction/rings_cuda.py +218 -58
- nabu/reconstruction/sinogram_cuda.py +16 -12
- nabu/reconstruction/tests/test_deringer.py +116 -14
- nabu/reconstruction/tests/test_fbp.py +22 -31
- nabu/reconstruction/tests/test_filtering.py +11 -2
- nabu/resources/dataset_analyzer.py +89 -26
- nabu/resources/nxflatfield.py +2 -2
- nabu/resources/tests/test_nxflatfield.py +1 -1
- nabu/resources/utils.py +9 -2
- nabu/stitching/alignment.py +184 -0
- nabu/stitching/config.py +241 -39
- nabu/stitching/definitions.py +6 -0
- nabu/stitching/frame_composition.py +4 -2
- nabu/stitching/overlap.py +99 -3
- nabu/stitching/sample_normalization.py +60 -0
- nabu/stitching/slurm_utils.py +10 -10
- nabu/stitching/tests/test_alignment.py +99 -0
- nabu/stitching/tests/test_config.py +16 -1
- nabu/stitching/tests/test_overlap.py +68 -2
- nabu/stitching/tests/test_sample_normalization.py +49 -0
- nabu/stitching/tests/test_slurm_utils.py +5 -5
- nabu/stitching/tests/test_utils.py +3 -33
- nabu/stitching/tests/test_z_stitching.py +391 -22
- nabu/stitching/utils.py +144 -202
- nabu/stitching/z_stitching.py +309 -126
- nabu/testutils.py +18 -0
- nabu/thirdparty/tomocupy_remove_stripe.py +586 -0
- nabu/utils.py +32 -6
- {nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/LICENSE +1 -1
- {nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/METADATA +5 -5
- nabu-2024.1.0rc3.dist-info/RECORD +296 -0
- {nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/WHEEL +1 -1
- {nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/entry_points.txt +5 -1
- nabu/conftest.py +0 -14
- nabu/opencl/fftshift.py +0 -92
- nabu/opencl/tests/test_fftshift.py +0 -55
- nabu/opencl/tests/test_padding.py +0 -84
- nabu-2023.2.1.dist-info/RECORD +0 -252
- /nabu/cuda/src/{fftshift.cu → dfi_fftshift.cu} +0 -0
- {nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/top_level.txt +0 -0
nabu/cuda/convolution.py
CHANGED
@@ -1,376 +1,8 @@
|
|
1
|
-
from
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
import pycuda.gpuarray as garray
|
10
|
-
from pycuda.compiler import SourceModule
|
11
|
-
|
12
|
-
|
13
|
-
class Convolution:
|
14
|
-
"""
|
15
|
-
A class for performing convolution on GPU with CUDA, but without using
|
16
|
-
textures (unlike for example in ``silx.opencl.convolution``)
|
17
|
-
"""
|
18
|
-
|
19
|
-
def __init__(self, shape, kernel, axes=None, mode=None, extra_options=None, cuda_options=None):
|
20
|
-
"""
|
21
|
-
Constructor of Cuda Convolution.
|
22
|
-
|
23
|
-
Parameters
|
24
|
-
-----------
|
25
|
-
shape: tuple
|
26
|
-
Shape of the array.
|
27
|
-
kernel: array-like
|
28
|
-
Convolution kernel (1D, 2D or 3D).
|
29
|
-
axes: tuple, optional
|
30
|
-
Axes along which the convolution is performed,
|
31
|
-
for batched convolutions.
|
32
|
-
mode: str, optional
|
33
|
-
Boundary handling mode. Available modes are:
|
34
|
-
- "reflect": cba|abcd|dcb
|
35
|
-
- "nearest": aaa|abcd|ddd
|
36
|
-
- "wrap": bcd|abcd|abc
|
37
|
-
- "constant": 000|abcd|000
|
38
|
-
|
39
|
-
Default is "reflect".
|
40
|
-
extra_options: dict, optional
|
41
|
-
Advanced options (dict). Current options are:
|
42
|
-
- "allocate_input_array": True
|
43
|
-
- "allocate_output_array": True
|
44
|
-
- "allocate_tmp_array": True
|
45
|
-
- "sourcemodule_kwargs": {}
|
46
|
-
- "batch_along_flat_dims": True
|
47
|
-
"""
|
48
|
-
self.cuda = CudaProcessing(**(cuda_options or {}))
|
49
|
-
self._configure_extra_options(extra_options)
|
50
|
-
self._determine_use_case(shape, kernel, axes)
|
51
|
-
self._allocate_memory(mode)
|
52
|
-
self._init_kernels()
|
53
|
-
|
54
|
-
def _configure_extra_options(self, extra_options):
|
55
|
-
self.extra_options = {
|
56
|
-
"allocate_input_array": True,
|
57
|
-
"allocate_output_array": True,
|
58
|
-
"allocate_tmp_array": True,
|
59
|
-
"sourcemodule_kwargs": {},
|
60
|
-
"batch_along_flat_dims": True,
|
61
|
-
}
|
62
|
-
extra_opts = extra_options or {}
|
63
|
-
self.extra_options.update(extra_opts)
|
64
|
-
self.sourcemodule_kwargs = self.extra_options["sourcemodule_kwargs"]
|
65
|
-
|
66
|
-
def _get_dimensions(self, shape, kernel):
|
67
|
-
self.shape = shape
|
68
|
-
self.data_ndim = self._check_dimensions(shape=shape, name="Data")
|
69
|
-
self.kernel_ndim = self._check_dimensions(arr=kernel, name="Kernel")
|
70
|
-
Nx = shape[-1]
|
71
|
-
if self.data_ndim >= 2:
|
72
|
-
Ny = shape[-2]
|
73
|
-
else:
|
74
|
-
Ny = 1
|
75
|
-
if self.data_ndim >= 3:
|
76
|
-
Nz = shape[-3]
|
77
|
-
else:
|
78
|
-
Nz = 1
|
79
|
-
self.Nx = np.int32(Nx)
|
80
|
-
self.Ny = np.int32(Ny)
|
81
|
-
self.Nz = np.int32(Nz)
|
82
|
-
|
83
|
-
def _determine_use_case(self, shape, kernel, axes):
|
84
|
-
"""
|
85
|
-
Determine the convolution use case from the input/kernel shape, and axes.
|
86
|
-
"""
|
87
|
-
self._get_dimensions(shape, kernel)
|
88
|
-
if self.kernel_ndim > self.data_ndim:
|
89
|
-
raise ValueError("Kernel dimensions cannot exceed data dimensions")
|
90
|
-
data_ndim = self.data_ndim
|
91
|
-
kernel_ndim = self.kernel_ndim
|
92
|
-
self.kernel = kernel.astype("f")
|
93
|
-
|
94
|
-
convol_infos = ConvolutionInfos()
|
95
|
-
k = (data_ndim, kernel_ndim)
|
96
|
-
if k not in convol_infos.use_cases:
|
97
|
-
raise ValueError(
|
98
|
-
"Cannot find a use case for data ndim = %d and kernel ndim = %d" % (data_ndim, kernel_ndim)
|
99
|
-
)
|
100
|
-
possible_use_cases = convol_infos.use_cases[k]
|
101
|
-
|
102
|
-
# If some dimensions are "flat", make a batched convolution along them
|
103
|
-
# Ex. data_dim = (1, Nx) -> batched 1D convolution
|
104
|
-
if self.extra_options["batch_along_flat_dims"] and (1 in self.shape):
|
105
|
-
axes = tuple([curr_dim for numels, curr_dim in zip(self.shape, range(len(self.shape))) if numels != 1])
|
106
|
-
#
|
107
|
-
self.use_case_name = None
|
108
|
-
for uc_name, uc_params in possible_use_cases.items():
|
109
|
-
if axes in convol_infos.allowed_axes[uc_name]:
|
110
|
-
self.use_case_name = uc_name
|
111
|
-
self.use_case_desc = uc_params["name"]
|
112
|
-
self.use_case_kernels = uc_params["kernels"].copy()
|
113
|
-
if self.use_case_name is None:
|
114
|
-
raise ValueError(
|
115
|
-
"Cannot find a use case for data ndim = %d, kernel ndim = %d and axes=%s"
|
116
|
-
% (data_ndim, kernel_ndim, str(axes))
|
117
|
-
)
|
118
|
-
# TODO implement this use case
|
119
|
-
if self.use_case_name == "batched_separable_2D_1D_3D":
|
120
|
-
raise NotImplementedError("The use case %s is not implemented" % self.use_case_name)
|
121
|
-
#
|
122
|
-
self.axes = axes
|
123
|
-
# Replace "axes=None" with an actual value (except for ND-ND)
|
124
|
-
allowed_axes = convol_infos.allowed_axes[self.use_case_name]
|
125
|
-
if len(allowed_axes) > 1:
|
126
|
-
# The default choice might impact perfs
|
127
|
-
self.axes = allowed_axes[0] or allowed_axes[1]
|
128
|
-
self.separable = self.use_case_name.startswith("separable")
|
129
|
-
self.batched = self.use_case_name.startswith("batched")
|
130
|
-
|
131
|
-
def _allocate_memory(self, mode):
|
132
|
-
self.mode = mode or "reflect"
|
133
|
-
# The current implementation does not support kernel size bigger than data size,
|
134
|
-
# except for mode="nearest"
|
135
|
-
for i, dim_size in enumerate(self.shape):
|
136
|
-
if min(self.kernel.shape) > dim_size and i in self.axes:
|
137
|
-
print(
|
138
|
-
"Warning: kernel support is too large for data dimension %d (%d). Forcing convolution mode to 'nearest'"
|
139
|
-
% (i, dim_size)
|
140
|
-
)
|
141
|
-
self.mode = "nearest"
|
142
|
-
#
|
143
|
-
option_array_names = {
|
144
|
-
"allocate_input_array": "data_in",
|
145
|
-
"allocate_output_array": "data_out",
|
146
|
-
"allocate_tmp_array": "data_tmp",
|
147
|
-
}
|
148
|
-
# Nonseparable transforms do not need tmp array
|
149
|
-
if not (self.separable):
|
150
|
-
self.extra_options["allocate_tmp_array"] = False
|
151
|
-
# Allocate arrays
|
152
|
-
for option_name, array_name in option_array_names.items():
|
153
|
-
if self.extra_options[option_name]:
|
154
|
-
value = garray.zeros(self.shape, np.float32)
|
155
|
-
else:
|
156
|
-
value = None
|
157
|
-
setattr(self, array_name, value)
|
158
|
-
|
159
|
-
if isinstance(self.kernel, np.ndarray):
|
160
|
-
self.d_kernel = garray.to_gpu(self.kernel)
|
161
|
-
else:
|
162
|
-
if not (isinstance(self.kernel, garray.GPUArray)):
|
163
|
-
raise ValueError("kernel must be either numpy array or pycuda array")
|
164
|
-
self.d_kernel = self.kernel
|
165
|
-
self._old_input_ref = None
|
166
|
-
self._old_output_ref = None
|
167
|
-
self._c_modes_mapping = {
|
168
|
-
"periodic": 2,
|
169
|
-
"wrap": 2,
|
170
|
-
"nearest": 1,
|
171
|
-
"replicate": 1,
|
172
|
-
"reflect": 0,
|
173
|
-
"constant": 3,
|
174
|
-
}
|
175
|
-
mp = self._c_modes_mapping
|
176
|
-
if self.mode.lower() not in mp:
|
177
|
-
raise ValueError(
|
178
|
-
"""
|
179
|
-
Mode %s is not available. Available modes are:
|
180
|
-
%s
|
181
|
-
"""
|
182
|
-
% (self.mode, str(mp.keys()))
|
183
|
-
)
|
184
|
-
if self.mode.lower() == "constant":
|
185
|
-
raise NotImplementedError("mode='constant' is not implemented yet")
|
186
|
-
self._c_conv_mode = mp[self.mode]
|
187
|
-
|
188
|
-
def _init_kernels(self):
|
189
|
-
if self.kernel_ndim > 1:
|
190
|
-
if np.abs(np.diff(self.kernel.shape)).max() > 0:
|
191
|
-
raise NotImplementedError("Non-separable convolution with non-square kernels is not implemented yet")
|
192
|
-
# Compile source module
|
193
|
-
compile_options = [str("-DUSED_CONV_MODE=%d" % self._c_conv_mode)]
|
194
|
-
fname = get_cuda_srcfile("convolution.cu")
|
195
|
-
nabu_cuda_dir = dirname(fname)
|
196
|
-
include_dirs = [nabu_cuda_dir]
|
197
|
-
self.sourcemodule_kwargs["options"] = compile_options
|
198
|
-
self.sourcemodule_kwargs["include_dirs"] = include_dirs
|
199
|
-
with open(fname) as fid:
|
200
|
-
cuda_src = fid.read()
|
201
|
-
self._module = SourceModule(cuda_src, **self.sourcemodule_kwargs)
|
202
|
-
# Blocks, grid
|
203
|
-
self._block_size = {1: (32, 1, 1), 2: (32, 32, 1), 3: (16, 8, 8)}[self.data_ndim] # TODO tune
|
204
|
-
self._n_blocks = tuple([int(updiv(a, b)) for a, b in zip(self.shape[::-1], self._block_size)])
|
205
|
-
# Prepare cuda kernel calls
|
206
|
-
self._cudakernel_signature = {
|
207
|
-
1: "PPPiiii",
|
208
|
-
2: "PPPiiiii",
|
209
|
-
3: "PPPiiiiii",
|
210
|
-
}[self.kernel_ndim]
|
211
|
-
self.cuda_kernels = {}
|
212
|
-
for axis, kern_name in enumerate(self.use_case_kernels):
|
213
|
-
self.cuda_kernels[axis] = self._module.get_function(kern_name)
|
214
|
-
self.cuda_kernels[axis].prepare(self._cudakernel_signature)
|
215
|
-
|
216
|
-
# Cuda kernel arguments
|
217
|
-
kernel_args = [
|
218
|
-
self._n_blocks,
|
219
|
-
self._block_size,
|
220
|
-
None,
|
221
|
-
None,
|
222
|
-
self.d_kernel.gpudata,
|
223
|
-
np.int32(self.kernel.shape[0]),
|
224
|
-
self.Nx,
|
225
|
-
self.Ny,
|
226
|
-
self.Nz,
|
227
|
-
]
|
228
|
-
if self.kernel_ndim == 2:
|
229
|
-
kernel_args.insert(5, np.int32(self.kernel.shape[1]))
|
230
|
-
if self.kernel_ndim == 3:
|
231
|
-
kernel_args.insert(5, np.int32(self.kernel.shape[2]))
|
232
|
-
kernel_args.insert(6, np.int32(self.kernel.shape[1]))
|
233
|
-
self.kernel_args = tuple(kernel_args)
|
234
|
-
# If self.data_tmp is allocated, separable transforms can be performed
|
235
|
-
# by a series of batched transforms, without any copy, by swapping refs.
|
236
|
-
self.swap_pattern = None
|
237
|
-
if self.separable:
|
238
|
-
if self.data_tmp is not None:
|
239
|
-
self.swap_pattern = {
|
240
|
-
2: [("data_in", "data_tmp"), ("data_tmp", "data_out")],
|
241
|
-
3: [
|
242
|
-
("data_in", "data_out"),
|
243
|
-
("data_out", "data_tmp"),
|
244
|
-
("data_tmp", "data_out"),
|
245
|
-
],
|
246
|
-
}
|
247
|
-
else:
|
248
|
-
raise NotImplementedError("For now, data_tmp has to be allocated")
|
249
|
-
|
250
|
-
def _get_swapped_arrays(self, i):
|
251
|
-
"""
|
252
|
-
Get the input and output arrays to use when using a "swap pattern".
|
253
|
-
Swapping refs enables to avoid copies between temp. array and output.
|
254
|
-
For example, a separable 2D->1D convolution on 2D data reads:
|
255
|
-
data_tmp = convol(data_input, kernel, axis=1) # step i=0
|
256
|
-
data_out = convol(data_tmp, kernel, axis=0) # step i=1
|
257
|
-
|
258
|
-
:param i: current step number of the separable convolution
|
259
|
-
"""
|
260
|
-
n_batchs = len(self.axes)
|
261
|
-
in_ref, out_ref = self.swap_pattern[n_batchs][i]
|
262
|
-
d_in = getattr(self, in_ref)
|
263
|
-
d_out = getattr(self, out_ref)
|
264
|
-
return d_in, d_out
|
265
|
-
|
266
|
-
def _configure_kernel_args(self, cuda_kernel_args, input_ref, output_ref):
|
267
|
-
# TODO more elegant
|
268
|
-
if isinstance(input_ref, garray.GPUArray):
|
269
|
-
input_ref = input_ref.gpudata
|
270
|
-
if isinstance(output_ref, garray.GPUArray):
|
271
|
-
output_ref = output_ref.gpudata
|
272
|
-
if input_ref is not None or output_ref is not None:
|
273
|
-
cuda_kernel_args = list(cuda_kernel_args)
|
274
|
-
if input_ref is not None:
|
275
|
-
cuda_kernel_args[2] = input_ref
|
276
|
-
if output_ref is not None:
|
277
|
-
cuda_kernel_args[3] = output_ref
|
278
|
-
cuda_kernel_args = tuple(cuda_kernel_args)
|
279
|
-
return cuda_kernel_args
|
280
|
-
|
281
|
-
@staticmethod
|
282
|
-
def _check_dimensions(arr=None, shape=None, name="", dim_min=1, dim_max=3):
|
283
|
-
if shape is not None:
|
284
|
-
ndim = len(shape)
|
285
|
-
elif arr is not None:
|
286
|
-
ndim = arr.ndim
|
287
|
-
else:
|
288
|
-
raise ValueError("Please provide either arr= or shape=")
|
289
|
-
if ndim < dim_min or ndim > dim_max:
|
290
|
-
raise ValueError("%s dimensions should be between %d and %d" % (name, dim_min, dim_max))
|
291
|
-
return ndim
|
292
|
-
|
293
|
-
def _check_array(self, arr):
|
294
|
-
if not (isinstance(arr, garray.GPUArray) or isinstance(arr, np.ndarray)):
|
295
|
-
raise TypeError("Expected either pycuda.gpuarray or numpy.ndarray")
|
296
|
-
if arr.dtype != np.float32:
|
297
|
-
raise TypeError("Data must be float32")
|
298
|
-
if arr.shape != self.shape:
|
299
|
-
raise ValueError("Expected data shape = %s" % str(self.shape))
|
300
|
-
|
301
|
-
def _set_arrays(self, array, output=None):
|
302
|
-
# Either copy H->D or update references.
|
303
|
-
if isinstance(array, np.ndarray):
|
304
|
-
self.data_in[:] = array[:]
|
305
|
-
else:
|
306
|
-
self._old_input_ref = self.data_in
|
307
|
-
self.data_in = array
|
308
|
-
data_in_ref = self.data_in
|
309
|
-
if output is not None:
|
310
|
-
if not (isinstance(output, np.ndarray)):
|
311
|
-
self._old_output_ref = self.data_out
|
312
|
-
self.data_out = output
|
313
|
-
# Update Cuda kernel arguments with new array references
|
314
|
-
self.kernel_args = self._configure_kernel_args(self.kernel_args, data_in_ref, self.data_out)
|
315
|
-
|
316
|
-
def _separable_convolution(self):
|
317
|
-
assert len(self.axes) == len(self.use_case_kernels)
|
318
|
-
# Separable: one kernel call per data dimension
|
319
|
-
for i, axis in enumerate(self.axes):
|
320
|
-
in_ref, out_ref = self._get_swapped_arrays(i)
|
321
|
-
self._batched_convolution(axis, input_ref=in_ref, output_ref=out_ref)
|
322
|
-
|
323
|
-
def _batched_convolution(self, axis, input_ref=None, output_ref=None):
|
324
|
-
# Batched: one kernel call in total
|
325
|
-
cuda_kernel = self.cuda_kernels[axis]
|
326
|
-
cuda_kernel_args = self._configure_kernel_args(self.kernel_args, input_ref, output_ref)
|
327
|
-
ev = cuda_kernel.prepared_call(*cuda_kernel_args)
|
328
|
-
|
329
|
-
def _nd_convolution(self):
|
330
|
-
assert len(self.use_case_kernels) == 1
|
331
|
-
cuda_kernel = self._module.get_function(self.use_case_kernels[0])
|
332
|
-
ev = cuda_kernel.prepared_call(*self.kernel_args)
|
333
|
-
|
334
|
-
def _recover_arrays_references(self):
|
335
|
-
if self._old_input_ref is not None:
|
336
|
-
self.data_in = self._old_input_ref
|
337
|
-
self._old_input_ref = None
|
338
|
-
if self._old_output_ref is not None:
|
339
|
-
self.data_out = self._old_output_ref
|
340
|
-
self._old_output_ref = None
|
341
|
-
self.kernel_args = self._configure_kernel_args(self.kernel_args, self.data_in, self.data_out)
|
342
|
-
|
343
|
-
def _get_output(self, output):
|
344
|
-
if output is None:
|
345
|
-
res = self.data_out.get()
|
346
|
-
else:
|
347
|
-
res = output
|
348
|
-
if isinstance(output, np.ndarray):
|
349
|
-
output[:] = self.data_out[:]
|
350
|
-
self._recover_arrays_references()
|
351
|
-
return res
|
352
|
-
|
353
|
-
def convolve(self, array, output=None):
|
354
|
-
"""
|
355
|
-
Convolve an array with the class kernel.
|
356
|
-
|
357
|
-
:param array: Input array. Can be numpy.ndarray or pycuda.gpuarray.GPUArray.
|
358
|
-
:param output: Output array. Can be numpy.ndarray or pycuda.gpuarray.GPUArray.
|
359
|
-
"""
|
360
|
-
self._check_array(array)
|
361
|
-
self._set_arrays(array, output=output)
|
362
|
-
if self.axes is not None:
|
363
|
-
if self.separable:
|
364
|
-
self._separable_convolution()
|
365
|
-
elif self.batched:
|
366
|
-
assert len(self.axes) == 1
|
367
|
-
self._batched_convolution(self.axes[0])
|
368
|
-
# else: ND-ND convol
|
369
|
-
else:
|
370
|
-
# ND-ND convol
|
371
|
-
self._nd_convolution()
|
372
|
-
|
373
|
-
res = self._get_output(output)
|
374
|
-
return res
|
375
|
-
|
376
|
-
__call__ = convolve
|
1
|
+
from ..processing.convolution_cuda import *
|
2
|
+
from ..utils import deprecation_warning
|
3
|
+
|
4
|
+
deprecation_warning(
|
5
|
+
"nabu.cuda.convolution has been moved to nabu.processing.convolution_cuda",
|
6
|
+
do_print=True,
|
7
|
+
func_name="convolution_cuda",
|
8
|
+
)
|
nabu/cuda/fft.py
ADDED
nabu/cuda/kernel.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
import pycuda.gpuarray as garray
|
2
2
|
from pycuda.compiler import SourceModule
|
3
|
-
from ..
|
3
|
+
from ..processing.kernel_base import KernelBase
|
4
4
|
|
5
5
|
|
6
6
|
class CudaKernel(KernelBase):
|
@@ -35,7 +35,7 @@ class CudaKernel(KernelBase):
|
|
35
35
|
filename=None,
|
36
36
|
src=None,
|
37
37
|
signature=None,
|
38
|
-
texrefs=
|
38
|
+
texrefs=None,
|
39
39
|
automation_params=None,
|
40
40
|
**sourcemodule_kwargs,
|
41
41
|
):
|
@@ -52,9 +52,9 @@ class CudaKernel(KernelBase):
|
|
52
52
|
def prepare(self, kernel_signature, texrefs):
|
53
53
|
self.prepared = False
|
54
54
|
self.kernel_signature = kernel_signature
|
55
|
-
self.texrefs = texrefs
|
55
|
+
self.texrefs = texrefs or []
|
56
56
|
if kernel_signature is not None:
|
57
|
-
self.func.prepare(self.kernel_signature, texrefs=texrefs)
|
57
|
+
self.func.prepare(self.kernel_signature, texrefs=self.texrefs)
|
58
58
|
self.prepared = True
|
59
59
|
|
60
60
|
def follow_device_arr(self, args):
|
nabu/cuda/medfilt.py
CHANGED
@@ -1,159 +1,6 @@
|
|
1
|
-
from
|
2
|
-
|
3
|
-
from pycuda.compiler import SourceModule
|
4
|
-
from ..utils import updiv, get_cuda_srcfile
|
5
|
-
from .processing import CudaProcessing
|
1
|
+
from ..processing.medfilt_cuda import *
|
2
|
+
from ..utils import deprecation_warning
|
6
3
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
A class for performing median filter on GPU with CUDA
|
11
|
-
"""
|
12
|
-
|
13
|
-
def __init__(
|
14
|
-
self,
|
15
|
-
shape,
|
16
|
-
footprint=(3, 3),
|
17
|
-
mode="reflect",
|
18
|
-
threshold=None,
|
19
|
-
cuda_options=None,
|
20
|
-
abs_diff=False,
|
21
|
-
):
|
22
|
-
"""Constructor of Cuda Median Filter.
|
23
|
-
|
24
|
-
Parameters
|
25
|
-
----------
|
26
|
-
shape: tuple
|
27
|
-
Shape of the array, in the format (n_rows, n_columns)
|
28
|
-
footprint: tuple
|
29
|
-
Size of the median filter, in the format (y, x).
|
30
|
-
mode: str
|
31
|
-
Boundary handling mode. Available modes are:
|
32
|
-
- "reflect": cba|abcd|dcb
|
33
|
-
- "nearest": aaa|abcd|ddd
|
34
|
-
- "wrap": bcd|abcd|abc
|
35
|
-
- "constant": 000|abcd|000
|
36
|
-
|
37
|
-
Default is "reflect".
|
38
|
-
threshold: float, optional
|
39
|
-
Threshold for the "thresholded median filter".
|
40
|
-
A thresholded median filter only replaces a pixel value by the median
|
41
|
-
if this pixel value is greater or equal than median + threshold.
|
42
|
-
abs_diff: bool, optional
|
43
|
-
Whether to perform conditional threshold as abs(value - median)
|
44
|
-
|
45
|
-
Notes
|
46
|
-
------
|
47
|
-
Please refer to the documentation of the CudaProcessing class for
|
48
|
-
the other parameters.
|
49
|
-
"""
|
50
|
-
self.cuda_processing = CudaProcessing(**(cuda_options or {}))
|
51
|
-
self._set_params(shape, footprint, mode, threshold, abs_diff)
|
52
|
-
self.cuda_processing.init_arrays_to_none(["d_input", "d_output"])
|
53
|
-
self._init_kernels()
|
54
|
-
|
55
|
-
def _set_params(self, shape, footprint, mode, threshold, abs_diff):
|
56
|
-
self.data_ndim = len(shape)
|
57
|
-
if self.data_ndim == 2:
|
58
|
-
ny, nx = shape
|
59
|
-
nz = 1
|
60
|
-
elif self.data_ndim == 3:
|
61
|
-
nz, ny, nx = shape
|
62
|
-
else:
|
63
|
-
raise ValueError("Expected 2D or 3D data")
|
64
|
-
self.shape = shape
|
65
|
-
self.Nx = np.int32(nx)
|
66
|
-
self.Ny = np.int32(ny)
|
67
|
-
self.Nz = np.int32(nz)
|
68
|
-
if len(footprint) != 2:
|
69
|
-
raise ValueError("3D median filter is not implemented yet")
|
70
|
-
if not ((footprint[0] & 1) and (footprint[1] & 1)):
|
71
|
-
raise ValueError("Must have odd-sized footprint")
|
72
|
-
self.footprint = footprint
|
73
|
-
self._set_boundary_mode(mode)
|
74
|
-
self.do_threshold = False
|
75
|
-
self.abs_diff = abs_diff
|
76
|
-
if threshold is not None:
|
77
|
-
self.threshold = np.float32(threshold)
|
78
|
-
self.do_threshold = True
|
79
|
-
else:
|
80
|
-
self.threshold = np.float32(0)
|
81
|
-
|
82
|
-
def _set_boundary_mode(self, mode):
|
83
|
-
self.mode = mode
|
84
|
-
# Some code duplication from convolution
|
85
|
-
self._c_modes_mapping = {
|
86
|
-
"periodic": 2,
|
87
|
-
"wrap": 2,
|
88
|
-
"nearest": 1,
|
89
|
-
"replicate": 1,
|
90
|
-
"reflect": 0,
|
91
|
-
"constant": 3,
|
92
|
-
}
|
93
|
-
mp = self._c_modes_mapping
|
94
|
-
if self.mode.lower() not in mp:
|
95
|
-
raise ValueError(
|
96
|
-
"""
|
97
|
-
Mode %s is not available. Available modes are:
|
98
|
-
%s
|
99
|
-
"""
|
100
|
-
% (self.mode, str(mp.keys()))
|
101
|
-
)
|
102
|
-
if self.mode.lower() == "constant":
|
103
|
-
raise NotImplementedError("mode='constant' is not implemented yet")
|
104
|
-
self._c_conv_mode = mp[self.mode]
|
105
|
-
|
106
|
-
def _init_kernels(self):
|
107
|
-
# Compile source module
|
108
|
-
compile_options = [
|
109
|
-
"-DUSED_CONV_MODE=%d" % self._c_conv_mode,
|
110
|
-
"-DMEDFILT_X=%d" % self.footprint[1],
|
111
|
-
"-DMEDFILT_Y=%d" % self.footprint[0],
|
112
|
-
"-DDO_THRESHOLD=%d" % (int(self.do_threshold) + int(self.abs_diff)),
|
113
|
-
]
|
114
|
-
fname = get_cuda_srcfile("medfilt.cu")
|
115
|
-
nabu_cuda_dir = dirname(fname)
|
116
|
-
include_dirs = [nabu_cuda_dir]
|
117
|
-
self.sourcemodule_kwargs = {}
|
118
|
-
self.sourcemodule_kwargs["options"] = compile_options
|
119
|
-
self.sourcemodule_kwargs["include_dirs"] = include_dirs
|
120
|
-
with open(fname) as fid:
|
121
|
-
cuda_src = fid.read()
|
122
|
-
self._module = SourceModule(cuda_src, **self.sourcemodule_kwargs)
|
123
|
-
self.cuda_kernel_2d = self._module.get_function("medfilt2d")
|
124
|
-
# Blocks, grid
|
125
|
-
self._block_size = {2: (32, 32, 1), 3: (16, 8, 8)}[self.data_ndim] # TODO tune
|
126
|
-
self._n_blocks = tuple([updiv(a, b) for a, b in zip(self.shape[::-1], self._block_size)])
|
127
|
-
|
128
|
-
def medfilt2(self, image, output=None):
|
129
|
-
"""
|
130
|
-
Perform a median filter on an image (or batch of images).
|
131
|
-
|
132
|
-
Parameters
|
133
|
-
-----------
|
134
|
-
images: numpy.ndarray or pycuda.gpuarray
|
135
|
-
2D image or 3D stack of 2D images
|
136
|
-
output: numpy.ndarray or pycuda.gpuarray, optional
|
137
|
-
Output of filtering. If provided, it must have the same shape
|
138
|
-
as the input array.
|
139
|
-
"""
|
140
|
-
self.cuda_processing.set_array("d_input", image)
|
141
|
-
if output is not None:
|
142
|
-
self.cuda_processing.set_array("d_output", output)
|
143
|
-
else:
|
144
|
-
self.cuda_processing.allocate_array("d_output", self.shape)
|
145
|
-
self.cuda_kernel_2d(
|
146
|
-
self.cuda_processing.d_input,
|
147
|
-
self.cuda_processing.d_output,
|
148
|
-
self.Nx,
|
149
|
-
self.Ny,
|
150
|
-
self.Nz,
|
151
|
-
self.threshold,
|
152
|
-
grid=self._n_blocks,
|
153
|
-
block=self._block_size,
|
154
|
-
)
|
155
|
-
self.cuda_processing.recover_arrays_references(["d_input", "d_output"])
|
156
|
-
if output is None:
|
157
|
-
return self.cuda_processing.d_output.get()
|
158
|
-
else:
|
159
|
-
return output
|
4
|
+
deprecation_warning(
|
5
|
+
"nabu.cuda.medfilt has been moved to nabu.processing.medfilt_cuda", do_print=True, func_name="medfilt_cuda"
|
6
|
+
)
|
nabu/cuda/padding.py
CHANGED
@@ -1,72 +1,6 @@
|
|
1
|
-
|
2
|
-
from ..utils import
|
3
|
-
from .kernel import CudaKernel
|
4
|
-
from .processing import CudaProcessing
|
5
|
-
import pycuda.gpuarray as garray
|
6
|
-
from ..misc.padding_base import PaddingBase
|
1
|
+
from ..processing.padding_cuda import *
|
2
|
+
from ..utils import deprecation_warning
|
7
3
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
A class for performing padding on GPU using Cuda
|
12
|
-
"""
|
13
|
-
|
14
|
-
# TODO docstring from base class
|
15
|
-
def __init__(self, shape, pad_width, mode="constant", cuda_options=None, **kwargs):
|
16
|
-
super().__init__(shape, pad_width, mode=mode, **kwargs)
|
17
|
-
self.cuda_processing = CudaProcessing(**(cuda_options or {}))
|
18
|
-
self._init_cuda_coordinate_transform()
|
19
|
-
|
20
|
-
def _init_cuda_coordinate_transform(self):
|
21
|
-
if self.mode == "constant":
|
22
|
-
self.d_padded_array_constant = garray.to_gpu(self.padded_array_constant)
|
23
|
-
return
|
24
|
-
self._coords_transform_kernel = CudaKernel(
|
25
|
-
"coordinate_transform",
|
26
|
-
filename=get_cuda_srcfile("padding.cu"),
|
27
|
-
signature="PPPPiii",
|
28
|
-
)
|
29
|
-
self._coords_transform_block = (32, 32, 1)
|
30
|
-
self._coords_transform_grid = [
|
31
|
-
updiv(a, b) for a, b in zip(self.padded_shape[::-1], self._coords_transform_block)
|
32
|
-
]
|
33
|
-
self.d_coords_rows = garray.to_gpu(self.coords_rows)
|
34
|
-
self.d_coords_cols = garray.to_gpu(self.coords_cols)
|
35
|
-
|
36
|
-
def _pad_constant(self, image, output):
|
37
|
-
pad_y, pad_x = self.pad_width
|
38
|
-
self.d_padded_array_constant[pad_y[0] : pad_y[0] + self.shape[0], pad_x[0] : pad_x[0] + self.shape[1]] = image[
|
39
|
-
:
|
40
|
-
]
|
41
|
-
output[:] = self.d_padded_array_constant[:]
|
42
|
-
return output
|
43
|
-
|
44
|
-
def pad(self, image, output=None):
|
45
|
-
"""
|
46
|
-
Pad an array.
|
47
|
-
|
48
|
-
Parameters
|
49
|
-
----------
|
50
|
-
image: pycuda.gpuarray.GPUArray
|
51
|
-
Image to pad
|
52
|
-
output: pycuda.gpuarray.GPUArray, optional
|
53
|
-
Output image. If provided, must be in the expected shape.
|
54
|
-
"""
|
55
|
-
if output is None:
|
56
|
-
output = self.cuda_processing.allocate_array("d_output", self.padded_shape)
|
57
|
-
if self.mode == "constant":
|
58
|
-
return self._pad_constant(image, output)
|
59
|
-
self._coords_transform_kernel(
|
60
|
-
image,
|
61
|
-
output,
|
62
|
-
self.d_coords_cols,
|
63
|
-
self.d_coords_rows,
|
64
|
-
np.int32(self.shape[1]),
|
65
|
-
np.int32(self.padded_shape[1]),
|
66
|
-
np.int32(self.padded_shape[0]),
|
67
|
-
grid=self._coords_transform_grid,
|
68
|
-
block=self._coords_transform_block,
|
69
|
-
)
|
70
|
-
return output
|
71
|
-
|
72
|
-
__call__ = pad
|
4
|
+
deprecation_warning(
|
5
|
+
"nabu.cuda.padding has been moved to nabu.processing.padding_cuda", do_print=True, func_name="padding_cuda"
|
6
|
+
)
|