nabu 2023.2.1__py3-none-any.whl → 2024.1.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- doc/conf.py +1 -1
- doc/doc_config.py +32 -0
- nabu/__init__.py +2 -1
- nabu/app/bootstrap_stitching.py +1 -1
- nabu/app/cli_configs.py +122 -2
- nabu/app/composite_cor.py +27 -2
- nabu/app/correct_rot.py +70 -0
- nabu/app/create_distortion_map_from_poly.py +42 -18
- nabu/app/diag_to_pix.py +358 -0
- nabu/app/diag_to_rot.py +449 -0
- nabu/app/generate_header.py +4 -3
- nabu/app/histogram.py +2 -2
- nabu/app/multicor.py +6 -1
- nabu/app/parse_reconstruction_log.py +151 -0
- nabu/app/prepare_weights_double.py +83 -22
- nabu/app/reconstruct.py +5 -1
- nabu/app/reconstruct_helical.py +7 -0
- nabu/app/reduce_dark_flat.py +6 -3
- nabu/app/rotate.py +4 -4
- nabu/app/stitching.py +16 -2
- nabu/app/tests/test_reduce_dark_flat.py +18 -2
- nabu/app/validator.py +4 -4
- nabu/cuda/convolution.py +8 -376
- nabu/cuda/fft.py +4 -0
- nabu/cuda/kernel.py +4 -4
- nabu/cuda/medfilt.py +5 -158
- nabu/cuda/padding.py +5 -71
- nabu/cuda/processing.py +23 -2
- nabu/cuda/src/ElementOp.cu +78 -0
- nabu/cuda/src/backproj.cu +28 -2
- nabu/cuda/src/fourier_wavelets.cu +2 -2
- nabu/cuda/src/normalization.cu +23 -0
- nabu/cuda/src/padding.cu +2 -2
- nabu/cuda/src/transpose.cu +16 -0
- nabu/cuda/utils.py +39 -0
- nabu/estimation/alignment.py +10 -1
- nabu/estimation/cor.py +808 -38
- nabu/estimation/cor_sino.py +7 -9
- nabu/estimation/tests/test_cor.py +85 -3
- nabu/io/reader.py +26 -18
- nabu/io/tests/test_cast_volume.py +3 -3
- nabu/io/tests/test_detector_distortion.py +3 -3
- nabu/io/tiffwriter_zmm.py +2 -2
- nabu/io/utils.py +14 -4
- nabu/io/writer.py +5 -3
- nabu/misc/fftshift.py +6 -0
- nabu/misc/histogram.py +5 -285
- nabu/misc/histogram_cuda.py +8 -104
- nabu/misc/kernel_base.py +3 -121
- nabu/misc/padding_base.py +5 -69
- nabu/misc/processing_base.py +3 -107
- nabu/misc/rotation.py +5 -62
- nabu/misc/rotation_cuda.py +5 -65
- nabu/misc/transpose.py +6 -0
- nabu/misc/unsharp.py +3 -78
- nabu/misc/unsharp_cuda.py +5 -52
- nabu/misc/unsharp_opencl.py +8 -85
- nabu/opencl/fft.py +6 -0
- nabu/opencl/kernel.py +21 -6
- nabu/opencl/padding.py +5 -72
- nabu/opencl/processing.py +27 -5
- nabu/opencl/src/backproj.cl +3 -3
- nabu/opencl/src/fftshift.cl +65 -12
- nabu/opencl/src/padding.cl +2 -2
- nabu/opencl/src/roll.cl +96 -0
- nabu/opencl/src/transpose.cl +16 -0
- nabu/pipeline/config_validators.py +63 -3
- nabu/pipeline/dataset_validator.py +2 -2
- nabu/pipeline/estimators.py +193 -35
- nabu/pipeline/fullfield/chunked.py +34 -17
- nabu/pipeline/fullfield/chunked_cuda.py +7 -5
- nabu/pipeline/fullfield/computations.py +48 -13
- nabu/pipeline/fullfield/nabu_config.py +13 -13
- nabu/pipeline/fullfield/processconfig.py +10 -5
- nabu/pipeline/fullfield/reconstruction.py +1 -2
- nabu/pipeline/helical/fbp.py +5 -0
- nabu/pipeline/helical/filtering.py +12 -9
- nabu/pipeline/helical/gridded_accumulator.py +179 -33
- nabu/pipeline/helical/helical_chunked_regridded.py +262 -151
- nabu/pipeline/helical/helical_chunked_regridded_cuda.py +4 -11
- nabu/pipeline/helical/helical_reconstruction.py +56 -18
- nabu/pipeline/helical/span_strategy.py +1 -1
- nabu/pipeline/helical/tests/test_accumulator.py +4 -0
- nabu/pipeline/params.py +23 -2
- nabu/pipeline/processconfig.py +3 -8
- nabu/pipeline/tests/test_chunk_reader.py +78 -0
- nabu/pipeline/tests/test_estimators.py +120 -2
- nabu/pipeline/utils.py +25 -0
- nabu/pipeline/writer.py +2 -0
- nabu/preproc/ccd_cuda.py +9 -7
- nabu/preproc/ctf.py +21 -26
- nabu/preproc/ctf_cuda.py +25 -25
- nabu/preproc/double_flatfield.py +14 -2
- nabu/preproc/double_flatfield_cuda.py +7 -11
- nabu/preproc/flatfield_cuda.py +23 -27
- nabu/preproc/phase.py +19 -24
- nabu/preproc/phase_cuda.py +21 -21
- nabu/preproc/shift_cuda.py +58 -28
- nabu/preproc/tests/test_ctf.py +5 -5
- nabu/preproc/tests/test_double_flatfield.py +2 -2
- nabu/preproc/tests/test_vshift.py +13 -2
- nabu/processing/__init__.py +0 -0
- nabu/processing/convolution_cuda.py +375 -0
- nabu/processing/fft_base.py +163 -0
- nabu/processing/fft_cuda.py +256 -0
- nabu/processing/fft_opencl.py +54 -0
- nabu/processing/fftshift.py +134 -0
- nabu/processing/histogram.py +286 -0
- nabu/processing/histogram_cuda.py +103 -0
- nabu/processing/kernel_base.py +126 -0
- nabu/processing/medfilt_cuda.py +159 -0
- nabu/processing/muladd.py +29 -0
- nabu/processing/muladd_cuda.py +68 -0
- nabu/processing/padding_base.py +71 -0
- nabu/processing/padding_cuda.py +75 -0
- nabu/processing/padding_opencl.py +77 -0
- nabu/processing/processing_base.py +123 -0
- nabu/processing/roll_opencl.py +64 -0
- nabu/processing/rotation.py +63 -0
- nabu/processing/rotation_cuda.py +66 -0
- nabu/processing/tests/__init__.py +0 -0
- nabu/processing/tests/test_fft.py +268 -0
- nabu/processing/tests/test_fftshift.py +71 -0
- nabu/{misc → processing}/tests/test_histogram.py +2 -4
- nabu/{cuda → processing}/tests/test_medfilt.py +1 -1
- nabu/processing/tests/test_muladd.py +54 -0
- nabu/{cuda → processing}/tests/test_padding.py +119 -75
- nabu/processing/tests/test_roll.py +63 -0
- nabu/{misc → processing}/tests/test_rotation.py +3 -2
- nabu/processing/tests/test_transpose.py +72 -0
- nabu/{misc → processing}/tests/test_unsharp.py +41 -8
- nabu/processing/transpose.py +126 -0
- nabu/processing/unsharp.py +79 -0
- nabu/processing/unsharp_cuda.py +53 -0
- nabu/processing/unsharp_opencl.py +75 -0
- nabu/reconstruction/fbp.py +34 -10
- nabu/reconstruction/fbp_base.py +35 -16
- nabu/reconstruction/fbp_opencl.py +7 -12
- nabu/reconstruction/filtering.py +2 -2
- nabu/reconstruction/filtering_cuda.py +13 -14
- nabu/reconstruction/filtering_opencl.py +3 -4
- nabu/reconstruction/projection.py +2 -0
- nabu/reconstruction/rings.py +158 -1
- nabu/reconstruction/rings_cuda.py +218 -58
- nabu/reconstruction/sinogram_cuda.py +16 -12
- nabu/reconstruction/tests/test_deringer.py +116 -14
- nabu/reconstruction/tests/test_fbp.py +22 -31
- nabu/reconstruction/tests/test_filtering.py +11 -2
- nabu/resources/dataset_analyzer.py +89 -26
- nabu/resources/nxflatfield.py +2 -2
- nabu/resources/tests/test_nxflatfield.py +1 -1
- nabu/resources/utils.py +9 -2
- nabu/stitching/alignment.py +184 -0
- nabu/stitching/config.py +241 -39
- nabu/stitching/definitions.py +6 -0
- nabu/stitching/frame_composition.py +4 -2
- nabu/stitching/overlap.py +99 -3
- nabu/stitching/sample_normalization.py +60 -0
- nabu/stitching/slurm_utils.py +10 -10
- nabu/stitching/tests/test_alignment.py +99 -0
- nabu/stitching/tests/test_config.py +16 -1
- nabu/stitching/tests/test_overlap.py +68 -2
- nabu/stitching/tests/test_sample_normalization.py +49 -0
- nabu/stitching/tests/test_slurm_utils.py +5 -5
- nabu/stitching/tests/test_utils.py +3 -33
- nabu/stitching/tests/test_z_stitching.py +391 -22
- nabu/stitching/utils.py +144 -202
- nabu/stitching/z_stitching.py +309 -126
- nabu/testutils.py +18 -0
- nabu/thirdparty/tomocupy_remove_stripe.py +586 -0
- nabu/utils.py +32 -6
- {nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/LICENSE +1 -1
- {nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/METADATA +5 -5
- nabu-2024.1.0rc3.dist-info/RECORD +296 -0
- {nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/WHEEL +1 -1
- {nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/entry_points.txt +5 -1
- nabu/conftest.py +0 -14
- nabu/opencl/fftshift.py +0 -92
- nabu/opencl/tests/test_fftshift.py +0 -55
- nabu/opencl/tests/test_padding.py +0 -84
- nabu-2023.2.1.dist-info/RECORD +0 -252
- /nabu/cuda/src/{fftshift.cu → dfi_fftshift.cu} +0 -0
- {nabu-2023.2.1.dist-info → nabu-2024.1.0rc3.dist-info}/top_level.txt +0 -0
nabu/cuda/processing.py
CHANGED
@@ -1,16 +1,24 @@
|
|
1
|
-
from ..
|
1
|
+
from ..utils import MissingComponentError
|
2
|
+
from ..processing.processing_base import ProcessingBase
|
2
3
|
from .utils import get_cuda_context, __has_pycuda__
|
3
4
|
|
4
5
|
if __has_pycuda__:
|
5
6
|
import pycuda.driver as cuda
|
6
7
|
import pycuda.gpuarray as garray
|
8
|
+
from ..cuda.kernel import CudaKernel
|
7
9
|
|
8
10
|
dev_attrs = cuda.device_attribute
|
11
|
+
GPUArray = garray.GPUArray
|
12
|
+
from pycuda.tools import dtype_to_ctype
|
13
|
+
else:
|
14
|
+
GPUArray = MissingComponentError("pycuda")
|
15
|
+
dtype_to_ctype = MissingComponentError("pycuda")
|
9
16
|
|
10
17
|
|
11
18
|
# NB: we must detach from a context before creating another context
|
12
19
|
class CudaProcessing(ProcessingBase):
|
13
|
-
|
20
|
+
array_class = GPUArray if __has_pycuda__ else None
|
21
|
+
dtype_to_ctype = dtype_to_ctype
|
14
22
|
|
15
23
|
def __init__(self, device_id=None, ctx=None, stream=None, cleanup_at_exit=True):
|
16
24
|
"""
|
@@ -52,3 +60,16 @@ class CudaProcessing(ProcessingBase):
|
|
52
60
|
|
53
61
|
def _allocate_array_mem(self, shape, dtype):
|
54
62
|
return garray.zeros(shape, dtype)
|
63
|
+
|
64
|
+
def kernel(
|
65
|
+
self, kernel_name, filename=None, src=None, signature=None, texrefs=None, automation_params=None, **build_kwargs
|
66
|
+
):
|
67
|
+
return CudaKernel(
|
68
|
+
kernel_name,
|
69
|
+
filename=filename,
|
70
|
+
src=src,
|
71
|
+
signature=signature,
|
72
|
+
texrefs=texrefs,
|
73
|
+
automation_params=automation_params,
|
74
|
+
**build_kwargs,
|
75
|
+
)
|
nabu/cuda/src/ElementOp.cu
CHANGED
@@ -47,6 +47,33 @@ __global__ void inplace_generic_op_2Dby2D(float* arr2D, float* arr2D_other, int
|
|
47
47
|
}
|
48
48
|
|
49
49
|
|
50
|
+
// launched with (Nx, Ny, Nz) threads
|
51
|
+
// does array3D[x, y, z] = op(array3D[x, y, z], array1D[x]) (in the "numpy broadcasting" sense)
|
52
|
+
__global__ void inplace_generic_op_3Dby1D(
|
53
|
+
float * array3D,
|
54
|
+
float* array1D,
|
55
|
+
int Nx, // input/output number of columns
|
56
|
+
int Ny, // input/output number of rows
|
57
|
+
int Nz // input/output depth
|
58
|
+
) {
|
59
|
+
uint x = blockDim.x * blockIdx.x + threadIdx.x;
|
60
|
+
uint y = blockDim.y * blockIdx.y + threadIdx.y;
|
61
|
+
uint z = blockDim.z * blockIdx.z + threadIdx.z;
|
62
|
+
if ((x >= Nx) || (y >= Ny) || (z >= Nz)) return;
|
63
|
+
size_t idx = ((z * Ny) + y)*Nx + x;
|
64
|
+
|
65
|
+
#if GENERIC_OP == OP_ADD
|
66
|
+
array3D[idx] += array1D[x];
|
67
|
+
#elif GENERIC_OP == OP_SUB
|
68
|
+
array3D[idx] -= array1D[x];
|
69
|
+
#elif GENERIC_OP == OP_MUL
|
70
|
+
array3D[idx] *= array1D[x];
|
71
|
+
#elif GENERIC_OP == OP_DIV
|
72
|
+
array3D[idx] /= array1D[x];
|
73
|
+
#endif
|
74
|
+
}
|
75
|
+
|
76
|
+
|
50
77
|
// arr3D *= arr1D (along fast dim)
|
51
78
|
__global__ void inplace_complex_mul_3Dby1D(complex* arr3D, complex* arr1D, int width, int height, int depth) {
|
52
79
|
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
@@ -161,3 +188,54 @@ __global__ void reverse2D_x(float* array, int Nx, int Ny) {
|
|
161
188
|
array[pos2] = tmp;
|
162
189
|
}
|
163
190
|
|
191
|
+
|
192
|
+
/**
|
193
|
+
|
194
|
+
Generic mul-add kernel with possibly-complicated indexing.
|
195
|
+
|
196
|
+
dst[DST_IDX] = fac_dst*dst[DST_IDX] + fac_other*other[OTHER_IDX]
|
197
|
+
where
|
198
|
+
DST_IDX = dst_start_row:dst_end_row, dst_start_col:dst_end_col
|
199
|
+
OTHER_IDX = other_start_row:other_end_row, other_start_col:other_end_col
|
200
|
+
|
201
|
+
Usage:
|
202
|
+
mul_add(dst, other, dst_nx, other_nx, a, b, (x1, x2), (y1, y2), (x3, x4), (y3, y4))
|
203
|
+
*/
|
204
|
+
|
205
|
+
__global__ void mul_add(
|
206
|
+
float* dst,
|
207
|
+
float* other,
|
208
|
+
int dst_width,
|
209
|
+
int other_width,
|
210
|
+
float fac_dst,
|
211
|
+
float fac_other,
|
212
|
+
int2 dst_x_range,
|
213
|
+
int2 dst_y_range,
|
214
|
+
int2 other_x_range,
|
215
|
+
int2 other_y_range
|
216
|
+
)
|
217
|
+
{
|
218
|
+
size_t x = blockDim.x * blockIdx.x + threadIdx.x;
|
219
|
+
size_t y = blockDim.y * blockIdx.y + threadIdx.y;
|
220
|
+
|
221
|
+
int x_start_dst = dst_x_range.x;
|
222
|
+
int x_stop_dst = dst_x_range.y;
|
223
|
+
int y_start_dst = dst_y_range.x;
|
224
|
+
int y_stop_dst = dst_y_range.y;
|
225
|
+
|
226
|
+
int x_start_other = other_x_range.x;
|
227
|
+
int x_stop_other = other_x_range.y;
|
228
|
+
int y_start_other = other_y_range.x;
|
229
|
+
int y_stop_other = other_y_range.y;
|
230
|
+
|
231
|
+
int operation_width = x_stop_dst - x_start_dst; // assumed == x_stop_other - x_start_other
|
232
|
+
int operation_height = y_stop_dst - y_start_dst; // assumed == y_stop_other - y_start_other
|
233
|
+
|
234
|
+
if ((x >= operation_width) || (y >= operation_height)) return;
|
235
|
+
|
236
|
+
size_t idx_in_dst = (y + y_start_dst)*dst_width + (x + x_start_dst);
|
237
|
+
size_t idx_in_other = (y + y_start_other)*other_width + (x + x_start_other);
|
238
|
+
|
239
|
+
dst[idx_in_dst] = fac_dst * dst[idx_in_dst] + fac_other * other[idx_in_other];
|
240
|
+
}
|
241
|
+
|
nabu/cuda/src/backproj.cu
CHANGED
@@ -2,7 +2,9 @@
|
|
2
2
|
#define SHARED_SIZE 256
|
3
3
|
#endif
|
4
4
|
|
5
|
+
#ifdef USE_TEXTURES
|
5
6
|
texture<float, 2, cudaReadModeElementType> tex_projections;
|
7
|
+
#endif
|
6
8
|
|
7
9
|
#ifdef CLIP_OUTER_CIRCLE
|
8
10
|
inline __device__ int is_in_circle(int x, int y, float center_x, float center_y, int radius2) {
|
@@ -11,6 +13,20 @@ inline __device__ int is_in_circle(int x, int y, float center_x, float center_y,
|
|
11
13
|
#endif
|
12
14
|
|
13
15
|
|
16
|
+
/*
|
17
|
+
Linear interpolation on a 2D array, horizontally.
|
18
|
+
This will return arr[y][x] where y is an int (exact access) and x is a float (linear interp horizontally)
|
19
|
+
*/
|
20
|
+
static inline __device__ float linear_interpolation(float* arr, int Nx, float x, int y) {
|
21
|
+
// check commented to gain a bit of speed - the check was done before function call
|
22
|
+
// if (x < 0 || x >= Nx) return 0.0f; // texture address mode CLAMP_TO_EDGE
|
23
|
+
int xm = (int) floorf(x);
|
24
|
+
int xp = (int) ceilf(x);
|
25
|
+
if ((xm == xp) || (xp >= Nx)) return arr[y*Nx+xm];
|
26
|
+
else return (arr[y*Nx+xm] * (xp - x)) + (arr[y*Nx+xp] * (x - xm));
|
27
|
+
}
|
28
|
+
|
29
|
+
|
14
30
|
/**
|
15
31
|
|
16
32
|
Implementation details
|
@@ -32,13 +48,16 @@ blocks.
|
|
32
48
|
// One thread handles up to 4 pixels in the output slice
|
33
49
|
__global__ void backproj(
|
34
50
|
float* d_slice,
|
51
|
+
#ifndef USE_TEXTURES
|
52
|
+
float* d_sino,
|
53
|
+
#endif
|
35
54
|
int num_projs,
|
36
55
|
int num_bins,
|
37
56
|
float axis_position,
|
38
57
|
int n_x,
|
39
58
|
int n_y,
|
40
|
-
|
41
|
-
|
59
|
+
float offset_x,
|
60
|
+
float offset_y,
|
42
61
|
float* d_cos,
|
43
62
|
float* d_msin,
|
44
63
|
#ifdef DO_AXIS_CORRECTION
|
@@ -107,10 +126,17 @@ __global__ void backproj(
|
|
107
126
|
h4 += axcorr;
|
108
127
|
#endif
|
109
128
|
|
129
|
+
#ifdef USE_TEXTURES
|
110
130
|
if (h1 >= 0 && h1 < num_bins) sum1 += tex2D(tex_projections, h1 + 0.5f, proj + 0.5f);
|
111
131
|
if (h2 >= 0 && h2 < num_bins) sum2 += tex2D(tex_projections, h2 + 0.5f, proj + 0.5f);
|
112
132
|
if (h3 >= 0 && h3 < num_bins) sum3 += tex2D(tex_projections, h3 + 0.5f, proj + 0.5f);
|
113
133
|
if (h4 >= 0 && h4 < num_bins) sum4 += tex2D(tex_projections, h4 + 0.5f, proj + 0.5f);
|
134
|
+
#else
|
135
|
+
if (h1 >= 0 && h1 < num_bins) sum1 += linear_interpolation(d_sino, num_bins, h1, proj);
|
136
|
+
if (h2 >= 0 && h2 < num_bins) sum2 += linear_interpolation(d_sino, num_bins, h2, proj);
|
137
|
+
if (h3 >= 0 && h3 < num_bins) sum3 += linear_interpolation(d_sino, num_bins, h3, proj);
|
138
|
+
if (h4 >= 0 && h4 < num_bins) sum4 += linear_interpolation(d_sino, num_bins, h4, proj);
|
139
|
+
#endif
|
114
140
|
}
|
115
141
|
|
116
142
|
int write_topleft = 1, write_topright = 1, write_botleft = 1, write_botright = 1;
|
@@ -12,6 +12,6 @@ __global__ void kern_fourierwavelets(float2* sinoF, int Nx, int Ny, float wsigma
|
|
12
12
|
|
13
13
|
int tid = gidy*Nx + gidx;
|
14
14
|
// do not forget the scale factor (here Ny)
|
15
|
-
sinoF[tid].x *= factor
|
16
|
-
sinoF[tid].y *= factor
|
15
|
+
sinoF[tid].x *= factor;
|
16
|
+
sinoF[tid].y *= factor;
|
17
17
|
}
|
nabu/cuda/src/normalization.cu
CHANGED
@@ -45,3 +45,26 @@ __global__ void normalize_chebyshev(
|
|
45
45
|
array[pos] -= ff0*f0/sum0 + ff1*f1/sum1 + ff2*f2/sum2;
|
46
46
|
}
|
47
47
|
}
|
48
|
+
|
49
|
+
|
50
|
+
// launched with (Nx, 1, Nz) threads
|
51
|
+
__global__ void vertical_mean(
|
52
|
+
float * array,
|
53
|
+
float* output,
|
54
|
+
int Nx, // input/output number of columns
|
55
|
+
int Ny, // input/output number of rows
|
56
|
+
int Nz // input/output depth
|
57
|
+
) {
|
58
|
+
uint x = blockDim.x * blockIdx.x + threadIdx.x;
|
59
|
+
uint y = blockDim.y * blockIdx.y + threadIdx.y;
|
60
|
+
uint z = blockDim.z * blockIdx.z + threadIdx.z;
|
61
|
+
if ((x >= Nx) || (y >= 1) || (z >= Nz)) return;
|
62
|
+
|
63
|
+
float m = 0.0f;
|
64
|
+
for (uint i = 0; i < Ny; i++) {
|
65
|
+
float s = array[(z * Ny + i) * Nx + x];
|
66
|
+
m += (s - m)/(i+1);
|
67
|
+
}
|
68
|
+
output[z * Nx + x] = (float) m;
|
69
|
+
}
|
70
|
+
|
nabu/cuda/src/padding.cu
CHANGED
@@ -129,8 +129,8 @@ __global__ void coordinate_transform(
|
|
129
129
|
uint y = blockDim.y * blockIdx.y + threadIdx.y;
|
130
130
|
if ((x >= Nx_padded) || (y >= Ny_padded)) return;
|
131
131
|
uint idx = y*Nx_padded + x;
|
132
|
-
int x2 = cols_inds[
|
133
|
-
int y2 = rows_inds[
|
132
|
+
int x2 = cols_inds[x];
|
133
|
+
int y2 = rows_inds[y];
|
134
134
|
array_out[idx] = array_in[y2*Nx + x2];
|
135
135
|
}
|
136
136
|
|
@@ -0,0 +1,16 @@
|
|
1
|
+
#ifndef SRC_DTYPE
|
2
|
+
#define SRC_DTYPE float
|
3
|
+
#endif
|
4
|
+
#ifndef DST_DTYPE
|
5
|
+
#define DST_DTYPE float
|
6
|
+
#endif
|
7
|
+
|
8
|
+
#include <pycuda-complex.hpp>
|
9
|
+
|
10
|
+
__global__ void transpose(SRC_DTYPE* src, DST_DTYPE* dst, int src_width, int src_height) {
|
11
|
+
// coordinates for "dst"
|
12
|
+
uint x = blockDim.x * blockIdx.x + threadIdx.x;
|
13
|
+
uint y = blockDim.y * blockIdx.y + threadIdx.y;
|
14
|
+
if ((x >= src_height) || (y >= src_width)) return;
|
15
|
+
dst[y*src_height + x] = (DST_DTYPE) src[x*src_width + y];
|
16
|
+
}
|
nabu/cuda/utils.py
CHANGED
@@ -8,6 +8,7 @@ try:
|
|
8
8
|
import pycuda.driver as cuda
|
9
9
|
from pycuda import gpuarray as garray
|
10
10
|
from pycuda.tools import clear_context_caches
|
11
|
+
from pycuda.compiler import get_nvcc_version as pycuda_get_nvcc_version
|
11
12
|
|
12
13
|
__has_pycuda__ = True
|
13
14
|
__pycuda_error_msg__ = None
|
@@ -24,6 +25,13 @@ try:
|
|
24
25
|
except ImportError:
|
25
26
|
__has_cufft__ = False
|
26
27
|
|
28
|
+
try:
|
29
|
+
import cupy
|
30
|
+
|
31
|
+
__has_cupy__ = True
|
32
|
+
except ImportError:
|
33
|
+
__has_cupy__ = False
|
34
|
+
|
27
35
|
|
28
36
|
def get_cuda_context(device_id=None, cleanup_at_exit=True):
|
29
37
|
"""
|
@@ -133,6 +141,27 @@ def collect_cuda_gpus():
|
|
133
141
|
return cuda_gpus
|
134
142
|
|
135
143
|
|
144
|
+
def get_nvcc_version(nvcc_cmd="nvcc"):
|
145
|
+
try:
|
146
|
+
ver = "".join(pycuda_get_nvcc_version(nvcc_cmd)).split("release")[1].strip().split(" ")[0].strip(",")
|
147
|
+
except:
|
148
|
+
ver = None
|
149
|
+
return ver
|
150
|
+
|
151
|
+
|
152
|
+
def check_textures_availability():
|
153
|
+
"""
|
154
|
+
Check whether Cuda textures can be used.
|
155
|
+
The only limitation is pycuda which does not support texture objects.
|
156
|
+
Textures references were deprecated, and removed from Cuda 12.
|
157
|
+
"""
|
158
|
+
nvcc_ver = get_nvcc_version()
|
159
|
+
if nvcc_ver is None:
|
160
|
+
return False # unknown - can't parse NVCC version for some reason
|
161
|
+
nvcc_major = int(nvcc_ver.split(".")[0])
|
162
|
+
return nvcc_major < 12
|
163
|
+
|
164
|
+
|
136
165
|
"""
|
137
166
|
pycuda/driver.py
|
138
167
|
np.complex64: SIGNED_INT32, num_channels = 2
|
@@ -290,3 +319,13 @@ def replace_array_memory(arr, new_shape):
|
|
290
319
|
arr.shape = new_shape
|
291
320
|
# TODO re-compute strides
|
292
321
|
return arr
|
322
|
+
|
323
|
+
|
324
|
+
def pycuda_to_cupy(arr_pycuda):
|
325
|
+
arr_cupy_mem = cupy.cuda.UnownedMemory(arr_pycuda.ptr, arr_pycuda.size, arr_pycuda)
|
326
|
+
arr_cupy_memptr = cupy.cuda.MemoryPointer(arr_cupy_mem, offset=0)
|
327
|
+
return cupy.ndarray(arr_pycuda.shape, dtype=arr_pycuda.dtype, memptr=arr_cupy_memptr) # pylint: disable=E1123
|
328
|
+
|
329
|
+
|
330
|
+
def cupy_to_pycuda(arr_cupy):
|
331
|
+
return garray.empty(arr_cupy.shape, arr_cupy.dtype, gpudata=arr_cupy.data.ptr)
|
nabu/estimation/alignment.py
CHANGED
@@ -38,6 +38,7 @@ local_ifftn = scipy.fft.irfftn
|
|
38
38
|
|
39
39
|
class AlignmentBase:
|
40
40
|
default_extra_options = {"blocking_plots": False}
|
41
|
+
_default_cor_options = {}
|
41
42
|
|
42
43
|
def __init__(
|
43
44
|
self,
|
@@ -47,6 +48,7 @@ class AlignmentBase:
|
|
47
48
|
logger=None,
|
48
49
|
data_type=np.float32,
|
49
50
|
extra_options=None,
|
51
|
+
cor_options=None,
|
50
52
|
):
|
51
53
|
"""
|
52
54
|
Alignment basic functions.
|
@@ -71,6 +73,13 @@ class AlignmentBase:
|
|
71
73
|
|
72
74
|
self._init_parameters(vert_fft_width, horz_fft_width, verbose, logger, data_type, extra_options=extra_options)
|
73
75
|
self._plot_windows = {}
|
76
|
+
self._set_cor_options(cor_options)
|
77
|
+
|
78
|
+
def _set_cor_options(self, cor_options):
|
79
|
+
default_dict = self._default_cor_options.copy()
|
80
|
+
if cor_options is not None:
|
81
|
+
default_dict.update(cor_options)
|
82
|
+
self.cor_options = default_dict
|
74
83
|
|
75
84
|
def _init_parameters(self, vert_fft_width, horz_fft_width, verbose, logger, data_type, extra_options=None):
|
76
85
|
self.logger = LoggerOrPrint(logger)
|
@@ -273,7 +282,7 @@ class AlignmentBase:
|
|
273
282
|
vertex_x, vertex_min_x, vertex_max_x
|
274
283
|
)
|
275
284
|
else:
|
276
|
-
message = "Fitted positions
|
285
|
+
message = "Fitted positions outside the input margins [{}, {}]: {} below and {} above".format(
|
277
286
|
vertex_min_x,
|
278
287
|
vertex_max_x,
|
279
288
|
np.sum(1 - lower_bound_ok),
|