httomolibgpu 2.2__tar.gz → 2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {httomolibgpu-2.2/httomolibgpu.egg-info → httomolibgpu-2.3}/PKG-INFO +8 -6
- {httomolibgpu-2.2 → httomolibgpu-2.3}/README.rst +3 -3
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu/__init__.py +1 -0
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu/cuda_kernels/center_360_shifts.cu +8 -8
- httomolibgpu-2.3/httomolibgpu/cuda_kernels/generate_mask.cu +103 -0
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu/cupywrapper.py +2 -3
- httomolibgpu-2.3/httomolibgpu/misc/denoise.py +141 -0
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu/misc/morph.py +29 -15
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu/prep/phase.py +0 -1
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu/prep/stripe.py +3 -2
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu/recon/algorithm.py +13 -13
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu/recon/rotation.py +157 -103
- {httomolibgpu-2.2 → httomolibgpu-2.3/httomolibgpu.egg-info}/PKG-INFO +8 -6
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu.egg-info/SOURCES.txt +1 -1
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu.egg-info/requires.txt +3 -1
- {httomolibgpu-2.2 → httomolibgpu-2.3}/pyproject.toml +4 -2
- httomolibgpu-2.2/httomolibgpu/cuda_kernels/downsample_sino.cu +0 -36
- httomolibgpu-2.2/httomolibgpu/cuda_kernels/generate_mask.cu +0 -51
- {httomolibgpu-2.2 → httomolibgpu-2.3}/LICENSE +0 -0
- {httomolibgpu-2.2 → httomolibgpu-2.3}/MANIFEST.in +0 -0
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu/cuda_kernels/__init__.py +0 -0
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu/cuda_kernels/calc_metrics.cu +0 -0
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu/cuda_kernels/median_kernel.cu +0 -0
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu/cuda_kernels/paganin_filter_gen.cu +0 -0
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu/cuda_kernels/raven_filter.cu +0 -0
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu/misc/__init__.py +0 -0
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu/misc/corr.py +0 -0
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu/misc/rescale.py +0 -0
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu/prep/__init__.py +0 -0
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu/prep/alignment.py +0 -0
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu/prep/normalize.py +0 -0
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu/recon/__init__.py +0 -0
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu.egg-info/dependency_links.txt +0 -0
- {httomolibgpu-2.2 → httomolibgpu-2.3}/httomolibgpu.egg-info/top_level.txt +0 -0
- {httomolibgpu-2.2 → httomolibgpu-2.3}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: httomolibgpu
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.3
|
|
4
4
|
Summary: Commonly used tomography data processing methods at DLS.
|
|
5
5
|
Author-email: Daniil Kazantsev <daniil.kazantsev@diamond.ac.uk>, Yousef Moazzam <yousef.moazzam@diamond.ac.uk>, Naman Gera <naman.gera@diamond.ac.uk>
|
|
6
6
|
License: BSD-3-Clause
|
|
@@ -12,12 +12,14 @@ Classifier: Environment :: GPU :: NVIDIA CUDA
|
|
|
12
12
|
Requires-Python: >=3.10
|
|
13
13
|
Description-Content-Type: text/x-rst
|
|
14
14
|
License-File: LICENSE
|
|
15
|
-
Requires-Dist: cupy
|
|
15
|
+
Requires-Dist: cupy==12.3.0
|
|
16
|
+
Requires-Dist: nvtx
|
|
16
17
|
Requires-Dist: numpy
|
|
17
18
|
Requires-Dist: scipy
|
|
18
19
|
Requires-Dist: pillow
|
|
19
20
|
Requires-Dist: scikit-image
|
|
20
21
|
Requires-Dist: tomobar
|
|
22
|
+
Requires-Dist: ccpi-regularisation-cupy
|
|
21
23
|
Provides-Extra: dev
|
|
22
24
|
Requires-Dist: pytest; extra == "dev"
|
|
23
25
|
Requires-Dist: pytest-cov; extra == "dev"
|
|
@@ -43,10 +45,10 @@ Some of the methods also have been optimised to ensure higher computational effi
|
|
|
43
45
|
The purpose of HTTomolibGPU
|
|
44
46
|
===========================
|
|
45
47
|
|
|
46
|
-
**HTTomolibGPU** can be used as a stand-alone library,
|
|
47
|
-
|
|
48
|
+
Although **HTTomolibGPU** can be used as a stand-alone library, it has been specifically developed to work together with the
|
|
49
|
+
`HTTomo <https://diamondlightsource.github.io/httomo/>`_ package as
|
|
48
50
|
its backend for data processing. HTTomo is a user interface (UI) written in Python for fast big tomographic data processing using
|
|
49
|
-
MPI protocols.
|
|
51
|
+
MPI protocols or as well serially.
|
|
50
52
|
|
|
51
53
|
Install HTTomolibGPU as a PyPi package
|
|
52
54
|
=========================================================
|
|
@@ -9,10 +9,10 @@ Some of the methods also have been optimised to ensure higher computational effi
|
|
|
9
9
|
The purpose of HTTomolibGPU
|
|
10
10
|
===========================
|
|
11
11
|
|
|
12
|
-
**HTTomolibGPU** can be used as a stand-alone library,
|
|
13
|
-
|
|
12
|
+
Although **HTTomolibGPU** can be used as a stand-alone library, it has been specifically developed to work together with the
|
|
13
|
+
`HTTomo <https://diamondlightsource.github.io/httomo/>`_ package as
|
|
14
14
|
its backend for data processing. HTTomo is a user interface (UI) written in Python for fast big tomographic data processing using
|
|
15
|
-
MPI protocols.
|
|
15
|
+
MPI protocols or as well serially.
|
|
16
16
|
|
|
17
17
|
Install HTTomolibGPU as a PyPi package
|
|
18
18
|
=========================================================
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from httomolibgpu.misc.corr import median_filter, remove_outlier
|
|
2
|
+
from httomolibgpu.misc.denoise import total_variation_ROF, total_variation_PD
|
|
2
3
|
from httomolibgpu.misc.morph import sino_360_to_180, data_resampler
|
|
3
4
|
from httomolibgpu.misc.rescale import rescale_to_int
|
|
4
5
|
from httomolibgpu.prep.alignment import distortion_correction_proj_discorpy
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#include <cupy/complex.cuh>
|
|
2
2
|
|
|
3
3
|
extern "C" __global__ void
|
|
4
|
-
shift_whole_shifts(const float *
|
|
4
|
+
shift_whole_shifts(const float *flip_sino, const float *comp_sino,
|
|
5
5
|
const float *__restrict__ list_shift, float *mat, int nx,
|
|
6
6
|
int nymat) {
|
|
7
7
|
int xid = threadIdx.x + blockIdx.x * blockDim.x;
|
|
@@ -17,14 +17,14 @@ shift_whole_shifts(const float *sino2, const float *sino3,
|
|
|
17
17
|
float frac_part = modf(shift_col, &int_part);
|
|
18
18
|
if (abs(frac_part) > 1e-5f) {
|
|
19
19
|
// we have a floating point shift, so we only roll in
|
|
20
|
-
//
|
|
20
|
+
// comp_sino, but we leave the rest for later using scipy
|
|
21
21
|
int shift_int =
|
|
22
22
|
shift_col >= 0.0 ? int(ceil(shift_col)) : int(floor(shift_col));
|
|
23
23
|
if (shift_int >= 0 && xid < shift_int) {
|
|
24
|
-
mat[zid * nymat * nx + yid * nx + xid] =
|
|
24
|
+
mat[zid * nymat * nx + yid * nx + xid] = comp_sino[yid * nx + xid];
|
|
25
25
|
}
|
|
26
26
|
if (shift_int < 0 && xid >= nx + shift_int) {
|
|
27
|
-
mat[zid * nymat * nx + yid * nx + xid] =
|
|
27
|
+
mat[zid * nymat * nx + yid * nx + xid] = comp_sino[yid * nx + xid];
|
|
28
28
|
}
|
|
29
29
|
} else {
|
|
30
30
|
// we have an integer shift, so we can roll in directly
|
|
@@ -33,16 +33,16 @@ shift_whole_shifts(const float *sino2, const float *sino3,
|
|
|
33
33
|
if (shift_int >= 0) {
|
|
34
34
|
if (xid >= shift_int) {
|
|
35
35
|
mat[zid * nymat * nx + yid * nx + xid] =
|
|
36
|
-
|
|
36
|
+
flip_sino[yid * nx + xid - shift_int];
|
|
37
37
|
} else {
|
|
38
|
-
mat[zid * nymat * nx + yid * nx + xid] =
|
|
38
|
+
mat[zid * nymat * nx + yid * nx + xid] = comp_sino[yid * nx + xid];
|
|
39
39
|
}
|
|
40
40
|
} else {
|
|
41
41
|
if (xid < nx + shift_int) {
|
|
42
42
|
mat[zid * nymat * nx + yid * nx + xid] =
|
|
43
|
-
|
|
43
|
+
flip_sino[yid * nx + xid - shift_int];
|
|
44
44
|
} else {
|
|
45
|
-
mat[zid * nymat * nx + yid * nx + xid] =
|
|
45
|
+
mat[zid * nymat * nx + yid * nx + xid] = comp_sino[yid * nx + xid];
|
|
46
46
|
}
|
|
47
47
|
}
|
|
48
48
|
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
extern "C" __global__ void generate_mask(const int ncol, const int nrow,
|
|
2
|
+
const int cen_col, const int cen_row,
|
|
3
|
+
const float du, const float dv,
|
|
4
|
+
const float radius, const float drop,
|
|
5
|
+
unsigned short *mask) {
|
|
6
|
+
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
|
7
|
+
int j = blockIdx.y;
|
|
8
|
+
|
|
9
|
+
if (i >= ncol/2+1)
|
|
10
|
+
return;
|
|
11
|
+
|
|
12
|
+
// we only need to look at the right half as we're using a real2complex FFT
|
|
13
|
+
int outi = i;
|
|
14
|
+
i += ncol/2-1;
|
|
15
|
+
|
|
16
|
+
int pos = __float2int_ru(((j - cen_row) * dv / radius) / du);
|
|
17
|
+
int pos1 = -pos + cen_col;
|
|
18
|
+
int pos2 = pos + cen_col;
|
|
19
|
+
|
|
20
|
+
if (pos1 > pos2) {
|
|
21
|
+
int temp = pos1;
|
|
22
|
+
pos1 = pos2;
|
|
23
|
+
pos2 = temp;
|
|
24
|
+
if (pos1 >= ncol) {
|
|
25
|
+
pos1 = ncol - 1;
|
|
26
|
+
}
|
|
27
|
+
if (pos2 < 0) {
|
|
28
|
+
pos2 = 0;
|
|
29
|
+
}
|
|
30
|
+
} else {
|
|
31
|
+
if (pos1 < 0) {
|
|
32
|
+
pos1 = 0;
|
|
33
|
+
}
|
|
34
|
+
if (pos2 >= ncol) {
|
|
35
|
+
pos2 = ncol - 1;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
short outval = (pos1 <= i && i <= pos2) ? 1 : 0;
|
|
40
|
+
|
|
41
|
+
// mask[cen_row - drop: cen_row + drop + 1, :] = 0
|
|
42
|
+
if (j >= cen_row - drop && j <= cen_row + drop) {
|
|
43
|
+
outval = 0;
|
|
44
|
+
}
|
|
45
|
+
// mask[:, cen_col - 1: cen_col + 2] = 0
|
|
46
|
+
if (i >= cen_col - 1 && i <= cen_col + 1) {
|
|
47
|
+
outval = 0;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
mask[j * (ncol/2+1) + outi] = outval;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
extern "C" __global__ void generate_mask_full(const int ncol, const int nrow,
|
|
54
|
+
const int cen_col, const int cen_row,
|
|
55
|
+
const float du, const float dv,
|
|
56
|
+
const float radius, const float drop,
|
|
57
|
+
float *mask) {
|
|
58
|
+
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
|
59
|
+
int j = blockIdx.y;
|
|
60
|
+
|
|
61
|
+
if (i >= ncol)
|
|
62
|
+
return;
|
|
63
|
+
|
|
64
|
+
// we only need to look at the right half as we're using a real2complex FFT
|
|
65
|
+
int outi = i;
|
|
66
|
+
//i += ncol-1;
|
|
67
|
+
|
|
68
|
+
int pos = __float2int_ru(((j - cen_row) * dv / radius) / du);
|
|
69
|
+
int pos1 = -pos + cen_col;
|
|
70
|
+
int pos2 = pos + cen_col;
|
|
71
|
+
|
|
72
|
+
if (pos1 > pos2) {
|
|
73
|
+
int temp = pos1;
|
|
74
|
+
pos1 = pos2;
|
|
75
|
+
pos2 = temp;
|
|
76
|
+
if (pos1 >= ncol) {
|
|
77
|
+
pos1 = ncol - 1;
|
|
78
|
+
}
|
|
79
|
+
if (pos2 < 0) {
|
|
80
|
+
pos2 = 0;
|
|
81
|
+
}
|
|
82
|
+
} else {
|
|
83
|
+
if (pos1 < 0) {
|
|
84
|
+
pos1 = 0;
|
|
85
|
+
}
|
|
86
|
+
if (pos2 >= ncol) {
|
|
87
|
+
pos2 = ncol - 1;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
float outval = (pos1 <= i && i <= pos2) ? 1.0 : 0.0;
|
|
92
|
+
|
|
93
|
+
// mask[cen_row - drop: cen_row + drop + 1, :] = 0
|
|
94
|
+
if (j >= cen_row - drop && j <= cen_row + drop) {
|
|
95
|
+
outval = 0;
|
|
96
|
+
}
|
|
97
|
+
// mask[:, cen_col - 1: cen_col + 2] = 0
|
|
98
|
+
if (i >= cen_col - 1 && i <= cen_col + 1) {
|
|
99
|
+
outval = 0;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
mask[j * ncol + outi] = outval;
|
|
103
|
+
}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
cupy_run = False
|
|
1
2
|
try:
|
|
2
3
|
import cupy as cp
|
|
3
4
|
import nvtx
|
|
@@ -6,7 +7,7 @@ try:
|
|
|
6
7
|
cp.cuda.Device(0).compute_capability
|
|
7
8
|
cupy_run = True
|
|
8
9
|
except cp.cuda.runtime.CUDARuntimeError:
|
|
9
|
-
print("CuPy library is
|
|
10
|
+
print("CuPy library is installed but GPU is not accessible")
|
|
10
11
|
import numpy as cp
|
|
11
12
|
except ImportError as e:
|
|
12
13
|
print(
|
|
@@ -15,6 +16,4 @@ except ImportError as e:
|
|
|
15
16
|
from unittest.mock import Mock
|
|
16
17
|
import numpy as cp
|
|
17
18
|
|
|
18
|
-
cupy_run = False
|
|
19
|
-
|
|
20
19
|
nvtx = Mock()
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# ---------------------------------------------------------------------------
|
|
4
|
+
# Copyright 2022 Diamond Light Source Ltd.
|
|
5
|
+
#
|
|
6
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
# you may not use this file except in compliance with the License.
|
|
8
|
+
# You may obtain a copy of the License at
|
|
9
|
+
#
|
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
#
|
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
# See the License for the specific language governing permissions and
|
|
16
|
+
# limitations under the License.
|
|
17
|
+
# ---------------------------------------------------------------------------
|
|
18
|
+
# Created By : Tomography Team at DLS <scientificsoftware@diamond.ac.uk>
|
|
19
|
+
# Created Date: 18/December/2024
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
""" Module for data denoising. For more detailed information see :ref:`data_denoising_module`.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import numpy as np
|
|
25
|
+
from typing import Union, Optional
|
|
26
|
+
|
|
27
|
+
from httomolibgpu import cupywrapper
|
|
28
|
+
|
|
29
|
+
cp = cupywrapper.cp
|
|
30
|
+
cupy_run = cupywrapper.cupy_run
|
|
31
|
+
|
|
32
|
+
from numpy import float32
|
|
33
|
+
from unittest.mock import Mock
|
|
34
|
+
|
|
35
|
+
from ccpi.filters.regularisersCuPy import ROF_TV, PD_TV
|
|
36
|
+
|
|
37
|
+
__all__ = [
|
|
38
|
+
"total_variation_ROF",
|
|
39
|
+
"total_variation_PD",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def total_variation_ROF(
|
|
44
|
+
data: cp.ndarray,
|
|
45
|
+
regularisation_parameter: Optional[float] = 1e-05,
|
|
46
|
+
iterations: Optional[int] = 3000,
|
|
47
|
+
time_marching_parameter: Optional[float] = 0.001,
|
|
48
|
+
gpu_id: Optional[int] = 0,
|
|
49
|
+
) -> cp.ndarray:
|
|
50
|
+
"""
|
|
51
|
+
Total Variation using Rudin-Osher-Fatemi (ROF) :cite:`rudin1992nonlinear` explicit iteration scheme to perform edge-preserving image denoising.
|
|
52
|
+
This is a gradient-based algorithm for a smoothed TV term which requires a small time marching parameter and a significant number of iterations.
|
|
53
|
+
See more in :ref:`method_total_variation_ROF`.
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
Parameters
|
|
57
|
+
----------
|
|
58
|
+
data : cp.ndarray
|
|
59
|
+
Input CuPy 3D array of float32 data type.
|
|
60
|
+
regularisation_parameter : float, optional
|
|
61
|
+
Regularisation parameter to control the level of smoothing. Defaults to 1e-05.
|
|
62
|
+
iterations : int, optional
|
|
63
|
+
The number of iterations. Defaults to 3000.
|
|
64
|
+
time_marching_parameter : float, optional
|
|
65
|
+
Time marching parameter, needs to be small to ensure convergence. Defaults to 0.001.
|
|
66
|
+
gpu_id : int, optional
|
|
67
|
+
GPU device index to perform processing on. Defaults to 0.
|
|
68
|
+
|
|
69
|
+
Returns
|
|
70
|
+
-------
|
|
71
|
+
ndarray
|
|
72
|
+
TV-ROF filtered 3D CuPy array in float32 data type.
|
|
73
|
+
|
|
74
|
+
Raises
|
|
75
|
+
------
|
|
76
|
+
ValueError
|
|
77
|
+
If the input array is not float32 data type.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
return ROF_TV(
|
|
81
|
+
data, regularisation_parameter, iterations, time_marching_parameter, gpu_id
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def total_variation_PD(
|
|
86
|
+
data: cp.ndarray,
|
|
87
|
+
regularisation_parameter: Optional[float] = 1e-05,
|
|
88
|
+
iterations: Optional[int] = 1000,
|
|
89
|
+
isotropic: Optional[bool] = True,
|
|
90
|
+
nonnegativity: Optional[bool] = False,
|
|
91
|
+
lipschitz_const: Optional[float] = 8.0,
|
|
92
|
+
gpu_id: Optional[int] = 0,
|
|
93
|
+
) -> cp.ndarray:
|
|
94
|
+
"""
|
|
95
|
+
Primal Dual algorithm for non-smooth convex Total Variation functional :cite:`chan1999nonlinear`. See more in :ref:`method_total_variation_PD`.
|
|
96
|
+
|
|
97
|
+
Parameters
|
|
98
|
+
----------
|
|
99
|
+
data : cp.ndarray
|
|
100
|
+
Input CuPy 3D array of float32 data type.
|
|
101
|
+
regularisation_parameter : float
|
|
102
|
+
Regularisation parameter to control the level of smoothing. Defaults to 1e-05.
|
|
103
|
+
iterations : int
|
|
104
|
+
The number of iterations. Defaults to 1000.
|
|
105
|
+
isotropic : bool
|
|
106
|
+
Choose between isotropic or anisotropic TV norm. Defaults to isotropic.
|
|
107
|
+
nonnegativity : bool
|
|
108
|
+
Enable non-negativity in iterations. Defaults to False.
|
|
109
|
+
lipschitz_const : float
|
|
110
|
+
Lipschitz constant to control convergence. Defaults to 8.
|
|
111
|
+
gpu_id : int
|
|
112
|
+
GPU device index to perform processing on. Defaults to 0.
|
|
113
|
+
|
|
114
|
+
Returns
|
|
115
|
+
-------
|
|
116
|
+
ndarray
|
|
117
|
+
TV-PD filtered 3D CuPy array in float32 data type.
|
|
118
|
+
|
|
119
|
+
Raises
|
|
120
|
+
------
|
|
121
|
+
ValueError
|
|
122
|
+
If the input array is not float32 data type.
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
methodTV = 0
|
|
126
|
+
if not isotropic:
|
|
127
|
+
methodTV = 1
|
|
128
|
+
|
|
129
|
+
nonneg = 0
|
|
130
|
+
if nonnegativity:
|
|
131
|
+
nonneg = 1
|
|
132
|
+
|
|
133
|
+
return PD_TV(
|
|
134
|
+
data,
|
|
135
|
+
regularisation_parameter,
|
|
136
|
+
iterations,
|
|
137
|
+
methodTV,
|
|
138
|
+
nonneg,
|
|
139
|
+
lipschitz_const,
|
|
140
|
+
gpu_id,
|
|
141
|
+
)
|
|
@@ -104,25 +104,37 @@ def sino_360_to_180(
|
|
|
104
104
|
def data_resampler(
|
|
105
105
|
data: cp.ndarray, newshape: list, axis: int = 1, interpolation: str = "linear"
|
|
106
106
|
) -> cp.ndarray:
|
|
107
|
-
"""
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
107
|
+
"""
|
|
108
|
+
Down/Up-resampler of the input data implemented through interpn function.
|
|
109
|
+
Please note that the method will leave the specified axis
|
|
110
|
+
dimension unchanged, e.g. (128,128,128) -> (128,256,256) for axis = 0 and
|
|
111
|
+
newshape = [256,256].
|
|
112
|
+
|
|
113
|
+
Parameters
|
|
114
|
+
----------
|
|
115
|
+
data : cp.ndarray
|
|
116
|
+
3d cupy array.
|
|
117
|
+
newshape : list
|
|
118
|
+
2d list that defines the 2D slice shape of new shape data.
|
|
119
|
+
axis : int, optional
|
|
120
|
+
Axis along which the scaling is applied. Defaults to 1.
|
|
121
|
+
interpolation : str, optional
|
|
122
|
+
Selection of interpolation method. Defaults to 'linear'.
|
|
123
|
+
|
|
124
|
+
Raises
|
|
125
|
+
----------
|
|
119
126
|
ValueError: When data is not 3D
|
|
120
127
|
|
|
121
|
-
Returns
|
|
128
|
+
Returns
|
|
129
|
+
-------
|
|
122
130
|
cp.ndarray: Up/Down-scaled 3D cupy array
|
|
123
131
|
"""
|
|
124
|
-
|
|
125
|
-
|
|
132
|
+
expanded = False
|
|
133
|
+
# if 2d data is given it is extended into a 3D array along the vertical dimension
|
|
134
|
+
if data.ndim == 2:
|
|
135
|
+
expanded = True
|
|
136
|
+
data = cp.expand_dims(data, 1)
|
|
137
|
+
axis = 1
|
|
126
138
|
|
|
127
139
|
N, M, Z = cp.shape(data)
|
|
128
140
|
|
|
@@ -214,4 +226,6 @@ def data_resampler(
|
|
|
214
226
|
res, [newshape[0], newshape[1]], order="C"
|
|
215
227
|
)
|
|
216
228
|
|
|
229
|
+
if expanded:
|
|
230
|
+
scaled_data = cp.squeeze(scaled_data, axis=axis)
|
|
217
231
|
return scaled_data
|
|
@@ -128,7 +128,6 @@ def paganin_filter_savu(
|
|
|
128
128
|
# Note: this takes considerable time on GPU...
|
|
129
129
|
data = cp.pad(data, ((0, 0), (pad_y, pad_y), (pad_x, pad_x)), mode=pad_method)
|
|
130
130
|
|
|
131
|
-
# Define array to hold result, which will not have the padding applied to it
|
|
132
131
|
precond_kernel_float = cp.ElementwiseKernel(
|
|
133
132
|
"T data",
|
|
134
133
|
"T out",
|
|
@@ -394,13 +394,13 @@ def raven_filter(
|
|
|
394
394
|
Numpy pad method to use.
|
|
395
395
|
|
|
396
396
|
uvalue : int, optional
|
|
397
|
-
|
|
397
|
+
Cut-off frequency. To control the strength of filter, e.g., strong=10, moderate=20, weak=50.
|
|
398
398
|
|
|
399
399
|
nvalue : int, optional
|
|
400
400
|
The shape of filter.
|
|
401
401
|
|
|
402
402
|
vvalue : int, optional
|
|
403
|
-
|
|
403
|
+
Number of image-rows around the zero-frequency to be applied the filter.
|
|
404
404
|
|
|
405
405
|
Returns
|
|
406
406
|
-------
|
|
@@ -447,6 +447,7 @@ def raven_filter(
|
|
|
447
447
|
raven_filt = raven_module.get_function(kernel_args)
|
|
448
448
|
|
|
449
449
|
raven_filt(grid_dims, block_dims, params)
|
|
450
|
+
del fft_data_shifted
|
|
450
451
|
|
|
451
452
|
# raven_filt already doing ifftshifting
|
|
452
453
|
data = ifft2(fft_data, axes=(0, 2), overwrite_x=True)
|
|
@@ -54,15 +54,15 @@ def FBP(
|
|
|
54
54
|
data: cp.ndarray,
|
|
55
55
|
angles: np.ndarray,
|
|
56
56
|
center: Optional[float] = None,
|
|
57
|
-
filter_freq_cutoff: Optional[float] =
|
|
57
|
+
filter_freq_cutoff: Optional[float] = 0.35,
|
|
58
58
|
recon_size: Optional[int] = None,
|
|
59
|
-
recon_mask_radius: Optional[float] =
|
|
59
|
+
recon_mask_radius: Optional[float] = 0.95,
|
|
60
60
|
gpu_id: int = 0,
|
|
61
61
|
) -> cp.ndarray:
|
|
62
62
|
"""
|
|
63
63
|
Perform Filtered Backprojection (FBP) reconstruction using ASTRA toolbox :cite:`van2016fast` and
|
|
64
64
|
ToMoBAR :cite:`kazantsev2020tomographic` wrappers.
|
|
65
|
-
This is a 3D recon from
|
|
65
|
+
This is a 3D recon from the CuPy array directly and using a custom built SINC filter for filtration in Fourier space.
|
|
66
66
|
|
|
67
67
|
Parameters
|
|
68
68
|
----------
|
|
@@ -72,16 +72,16 @@ def FBP(
|
|
|
72
72
|
An array of angles given in radians.
|
|
73
73
|
center : float, optional
|
|
74
74
|
The center of rotation (CoR).
|
|
75
|
-
filter_freq_cutoff : float
|
|
76
|
-
Cutoff frequency parameter for the
|
|
75
|
+
filter_freq_cutoff : float
|
|
76
|
+
Cutoff frequency parameter for the SINC filter, the lower values produce better contrast but noisy reconstruction.
|
|
77
77
|
recon_size : int, optional
|
|
78
78
|
The [recon_size, recon_size] shape of the reconstructed slice in pixels.
|
|
79
79
|
By default (None), the reconstructed size will be the dimension of the horizontal detector.
|
|
80
|
-
recon_mask_radius: float
|
|
80
|
+
recon_mask_radius: float
|
|
81
81
|
The radius of the circular mask that applies to the reconstructed slice in order to crop
|
|
82
|
-
out some undesirable artifacts. The values outside the diameter will be set to zero.
|
|
83
|
-
|
|
84
|
-
gpu_id : int
|
|
82
|
+
out some undesirable artifacts. The values outside the given diameter will be set to zero.
|
|
83
|
+
It is recommended to keep the value in the range [0.7-1.0].
|
|
84
|
+
gpu_id : int
|
|
85
85
|
A GPU device index to perform operation on.
|
|
86
86
|
|
|
87
87
|
Returns
|
|
@@ -109,7 +109,7 @@ def LPRec(
|
|
|
109
109
|
angles: np.ndarray,
|
|
110
110
|
center: Optional[float] = None,
|
|
111
111
|
recon_size: Optional[int] = None,
|
|
112
|
-
recon_mask_radius: Optional[float] =
|
|
112
|
+
recon_mask_radius: Optional[float] = 0.95,
|
|
113
113
|
) -> cp.ndarray:
|
|
114
114
|
"""
|
|
115
115
|
Fourier direct inversion in 3D on unequally spaced (also called as Log-Polar) grids using
|
|
@@ -127,10 +127,10 @@ def LPRec(
|
|
|
127
127
|
recon_size : int, optional
|
|
128
128
|
The [recon_size, recon_size] shape of the reconstructed slice in pixels.
|
|
129
129
|
By default (None), the reconstructed size will be the dimension of the horizontal detector.
|
|
130
|
-
recon_mask_radius: float
|
|
130
|
+
recon_mask_radius: float
|
|
131
131
|
The radius of the circular mask that applies to the reconstructed slice in order to crop
|
|
132
|
-
out some undesirable artifacts. The values outside the diameter will be set to zero.
|
|
133
|
-
|
|
132
|
+
out some undesirable artifacts. The values outside the given diameter will be set to zero.
|
|
133
|
+
It is recommended to keep the value in the range [0.7-1.0].
|
|
134
134
|
|
|
135
135
|
Returns
|
|
136
136
|
-------
|
|
@@ -33,13 +33,16 @@ if cupy_run:
|
|
|
33
33
|
from cupyx.scipy.ndimage import shift, gaussian_filter
|
|
34
34
|
from skimage.registration import phase_cross_correlation
|
|
35
35
|
from cupyx.scipy.fftpack import get_fft_plan
|
|
36
|
-
from cupyx.scipy.fft import
|
|
36
|
+
from cupyx.scipy.fft import fft2, fftshift
|
|
37
37
|
else:
|
|
38
38
|
load_cuda_module = Mock()
|
|
39
39
|
shift = Mock()
|
|
40
40
|
gaussian_filter = Mock()
|
|
41
41
|
phase_cross_correlation = Mock()
|
|
42
42
|
get_fft_plan = Mock()
|
|
43
|
+
fft2 = Mock()
|
|
44
|
+
fftshift = Mock()
|
|
45
|
+
fft = Mock()
|
|
43
46
|
rfft2 = Mock()
|
|
44
47
|
|
|
45
48
|
import math
|
|
@@ -55,92 +58,131 @@ __all__ = [
|
|
|
55
58
|
def find_center_vo(
|
|
56
59
|
data: cp.ndarray,
|
|
57
60
|
ind: Optional[int] = None,
|
|
58
|
-
|
|
59
|
-
|
|
61
|
+
average_radius: int = 0,
|
|
62
|
+
cor_initialisation_value: Optional[float] = None,
|
|
63
|
+
smin: int = -100,
|
|
64
|
+
smax: int = 100,
|
|
60
65
|
srad: float = 6.0,
|
|
61
66
|
step: float = 0.25,
|
|
62
67
|
ratio: float = 0.5,
|
|
63
68
|
drop: int = 20,
|
|
64
|
-
) ->
|
|
69
|
+
) -> np.float32:
|
|
65
70
|
"""
|
|
66
|
-
Find rotation axis location (aka
|
|
71
|
+
Find the rotation axis location (aka the centre of rotation) using Nghia Vo's method. See the paper
|
|
67
72
|
:cite:`vo2014reliable`.
|
|
68
73
|
|
|
69
74
|
Parameters
|
|
70
75
|
----------
|
|
71
76
|
data : cp.ndarray
|
|
72
|
-
3D tomographic data or a 2D sinogram as a CuPy array.
|
|
77
|
+
3D [angles, detY, detX] tomographic data or a 2D [angles, detX] sinogram as a CuPy array.
|
|
73
78
|
ind : int, optional
|
|
74
|
-
Index of the slice to be used to estimate the CoR.
|
|
75
|
-
|
|
79
|
+
Index of the slice to be used to estimate the CoR. If None is given, then the central sinogram will be extracted from the data array with a possible averaging, see .
|
|
80
|
+
average_radius : int
|
|
81
|
+
Averaging multiple sinograms around the ind-indexed sinogram to improve the signal-to-noise ratio. It is recommended to keep this parameter smaller than 10.
|
|
82
|
+
cor_initialisation_value : float, optional
|
|
83
|
+
The initial approximation for the centre of rotation. If the value is None, use the horizontal centre of the projection/sinogram image.
|
|
84
|
+
smin : int
|
|
76
85
|
Coarse search radius. Reference to the horizontal center of
|
|
77
86
|
the sinogram.
|
|
78
|
-
smax : int
|
|
87
|
+
smax : int
|
|
79
88
|
Coarse search radius. Reference to the horizontal center of
|
|
80
89
|
the sinogram.
|
|
81
|
-
srad : float
|
|
90
|
+
srad : float
|
|
82
91
|
Fine search radius.
|
|
83
|
-
step : float
|
|
92
|
+
step : float
|
|
84
93
|
Step of fine searching.
|
|
85
|
-
ratio : float
|
|
94
|
+
ratio : float
|
|
86
95
|
The ratio between the FOV of the camera and the size of object.
|
|
87
96
|
It's used to generate the mask.
|
|
88
|
-
drop : int
|
|
97
|
+
drop : int
|
|
89
98
|
Drop lines around vertical center of the mask.
|
|
90
99
|
|
|
91
100
|
Returns
|
|
92
101
|
-------
|
|
93
|
-
|
|
94
|
-
Rotation axis location.
|
|
102
|
+
float32
|
|
103
|
+
Rotation axis location with a subpixel precision.
|
|
95
104
|
"""
|
|
105
|
+
# if 2d sinogram is given it is extended into a 3D array along the vertical dimension
|
|
96
106
|
if data.ndim == 2:
|
|
97
107
|
data = cp.expand_dims(data, 1)
|
|
98
108
|
ind = 0
|
|
99
109
|
|
|
100
|
-
|
|
110
|
+
angles_tot, detY_size, detX_size = data.shape
|
|
101
111
|
|
|
102
112
|
if ind is None:
|
|
103
|
-
ind =
|
|
104
|
-
|
|
105
|
-
|
|
113
|
+
ind = detY_size // 2 # middle slice index
|
|
114
|
+
# averaging the data here to improve SNR
|
|
115
|
+
if 2 * average_radius >= detY_size:
|
|
116
|
+
# reduce the averaging radius
|
|
117
|
+
average_radius = ind
|
|
118
|
+
if ind > 0:
|
|
119
|
+
_sino = cp.mean(
|
|
120
|
+
data[:, ind - average_radius : ind + average_radius, :], axis=1
|
|
121
|
+
)
|
|
106
122
|
else:
|
|
107
123
|
_sino = data[:, ind, :]
|
|
108
124
|
else:
|
|
109
125
|
_sino = data[:, ind, :]
|
|
110
126
|
|
|
127
|
+
if cor_initialisation_value is None:
|
|
128
|
+
cor_initialisation_value = (detX_size - 1.0) / 2.0
|
|
129
|
+
|
|
130
|
+
# downsampling ratios
|
|
131
|
+
dsp_angle = 1
|
|
132
|
+
dsp_detX = 1
|
|
133
|
+
if detX_size > 2000:
|
|
134
|
+
dsp_detX = 4
|
|
135
|
+
if angles_tot > 2000:
|
|
136
|
+
dsp_angle = 2
|
|
137
|
+
|
|
138
|
+
start_cor = np.int16(np.floor(1.0 * (cor_initialisation_value + smin) / dsp_detX))
|
|
139
|
+
stop_cor = np.int16(np.ceil(1.0 * (cor_initialisation_value + smax) / dsp_detX))
|
|
140
|
+
fine_srange = max(srad, dsp_detX)
|
|
141
|
+
off_set = 0.5 * dsp_detX if dsp_detX > 1 else 0.0
|
|
142
|
+
|
|
143
|
+
# initiate denoising
|
|
111
144
|
_sino_cs = gaussian_filter(_sino, (3, 1), mode="reflect")
|
|
112
145
|
_sino_fs = gaussian_filter(_sino, (2, 2), mode="reflect")
|
|
113
146
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
fine_cen = _search_fine(_sino_fs, srad, step, init_cen * 4.0, ratio, drop)
|
|
120
|
-
else:
|
|
121
|
-
init_cen = _search_coarse(_sino_cs, smin, smax, ratio, drop)
|
|
122
|
-
fine_cen = _search_fine(_sino_fs, srad, step, init_cen, ratio, drop)
|
|
147
|
+
# Downsampling by averaging along a chosen dimension
|
|
148
|
+
if dsp_angle > 1 or dsp_detX > 1:
|
|
149
|
+
_sino_cs = _downsample(_sino_cs, dsp_angle, dsp_detX)
|
|
150
|
+
|
|
151
|
+
init_cen = _search_coarse(_sino_cs, start_cor, stop_cor, ratio, drop)
|
|
123
152
|
|
|
124
|
-
|
|
153
|
+
fine_cen = _search_fine(
|
|
154
|
+
_sino_fs, fine_srange, step, float(init_cen) * dsp_detX + off_set, ratio, drop
|
|
155
|
+
)
|
|
156
|
+
cen_np = np.float32(cp.asnumpy(fine_cen))
|
|
157
|
+
if cen_np == 0.0:
|
|
158
|
+
return np.float32(cor_initialisation_value)
|
|
159
|
+
else:
|
|
160
|
+
return cen_np
|
|
125
161
|
|
|
126
162
|
|
|
127
163
|
def _search_coarse(sino, smin, smax, ratio, drop):
|
|
128
164
|
(nrow, ncol) = sino.shape
|
|
129
165
|
flip_sino = cp.ascontiguousarray(cp.fliplr(sino))
|
|
130
166
|
comp_sino = cp.ascontiguousarray(cp.flipud(sino))
|
|
131
|
-
mask = _create_mask(2 * nrow, ncol, 0.5 * ratio * ncol, drop)
|
|
132
167
|
|
|
168
|
+
mask = _create_mask(2 * nrow, ncol, 0.5 * ratio * ncol, drop)
|
|
133
169
|
cen_fliplr = (ncol - 1.0) / 2.0
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
start_cor = ncol // 2 + smin
|
|
139
|
-
stop_cor = ncol // 2 + smax
|
|
140
|
-
list_cor = cp.arange(start_cor, stop_cor + 0.5, 0.5, dtype=cp.float32)
|
|
170
|
+
start_cor, stop_cor = np.sort((smin, smax))
|
|
171
|
+
start_cor = np.int16(np.clip(start_cor, 0, ncol - 1))
|
|
172
|
+
stop_cor = np.int16(np.clip(stop_cor, 0, ncol - 1))
|
|
173
|
+
list_cor = cp.arange(start_cor, stop_cor + 1.0, dtype=cp.float32)
|
|
141
174
|
list_shift = 2.0 * (list_cor - cen_fliplr)
|
|
142
175
|
list_metric = cp.empty(list_shift.shape, dtype=cp.float32)
|
|
143
|
-
|
|
176
|
+
|
|
177
|
+
sino_sino = cp.vstack((sino, flip_sino))
|
|
178
|
+
for i, shift in enumerate(list_shift):
|
|
179
|
+
_sino = sino_sino[nrow:]
|
|
180
|
+
_sino[...] = cp.roll(flip_sino, int(shift), axis=1)
|
|
181
|
+
if shift >= 0:
|
|
182
|
+
_sino[:, :shift] = comp_sino[:, :shift]
|
|
183
|
+
else:
|
|
184
|
+
_sino[:, shift:] = comp_sino[:, shift:]
|
|
185
|
+
list_metric[i] = cp.mean(cp.abs(fftshift(fft2(sino_sino))) * mask)
|
|
144
186
|
|
|
145
187
|
minpos = cp.argmin(list_metric)
|
|
146
188
|
if minpos == 0:
|
|
@@ -161,10 +203,10 @@ def _search_fine(sino, srad, step, init_cen, ratio, drop):
|
|
|
161
203
|
mask = _create_mask(2 * nrow, ncol, 0.5 * ratio * ncol, drop)
|
|
162
204
|
|
|
163
205
|
cen_fliplr = (ncol - 1.0) / 2.0
|
|
164
|
-
srad =
|
|
165
|
-
step =
|
|
166
|
-
init_cen =
|
|
167
|
-
list_cor = init_cen + cp.arange(-srad, srad + step, step, dtype=
|
|
206
|
+
srad = np.clip(np.abs(srad), 1, ncol // 10 - 1)
|
|
207
|
+
step = np.clip(np.abs(step), 0.1, 1.1)
|
|
208
|
+
init_cen = np.clip(init_cen, srad, ncol - srad - 1)
|
|
209
|
+
list_cor = init_cen + cp.arange(-srad, srad + step, step, dtype=cp.float32)
|
|
168
210
|
list_shift = 2.0 * (list_cor - cen_fliplr)
|
|
169
211
|
list_metric = cp.empty(list_shift.shape, dtype="float32")
|
|
170
212
|
|
|
@@ -183,10 +225,10 @@ def _create_mask(nrow, ncol, radius, drop):
|
|
|
183
225
|
block_x = 128
|
|
184
226
|
block_y = 1
|
|
185
227
|
block_dims = (block_x, block_y)
|
|
186
|
-
grid_x = (ncol
|
|
228
|
+
grid_x = (ncol + block_x - 1) // block_x
|
|
187
229
|
grid_y = nrow
|
|
188
230
|
grid_dims = (grid_x, grid_y)
|
|
189
|
-
mask = cp.empty((nrow, ncol
|
|
231
|
+
mask = cp.empty((nrow, ncol), dtype="float32")
|
|
190
232
|
params = (
|
|
191
233
|
ncol,
|
|
192
234
|
nrow,
|
|
@@ -199,7 +241,7 @@ def _create_mask(nrow, ncol, radius, drop):
|
|
|
199
241
|
mask,
|
|
200
242
|
)
|
|
201
243
|
module = load_cuda_module("generate_mask")
|
|
202
|
-
kernel = module.get_function("
|
|
244
|
+
kernel = module.get_function("generate_mask_full")
|
|
203
245
|
kernel(grid_dims, block_dims, params)
|
|
204
246
|
return mask
|
|
205
247
|
|
|
@@ -229,10 +271,10 @@ def _calculate_chunks(
|
|
|
229
271
|
|
|
230
272
|
available_memory -= shift_size
|
|
231
273
|
freq_domain_size = (
|
|
232
|
-
shift_size # it needs
|
|
274
|
+
shift_size * 2 # it needs full (FFT), with complex64, so it's double
|
|
233
275
|
)
|
|
234
276
|
fft_plan_size = freq_domain_size
|
|
235
|
-
size_per_shift = 2 * (fft_plan_size + freq_domain_size + shift_size)
|
|
277
|
+
size_per_shift = 2.5 * (fft_plan_size + freq_domain_size + shift_size)
|
|
236
278
|
nshift_max = available_memory // size_per_shift
|
|
237
279
|
assert nshift_max > 0, "Not enough memory to process"
|
|
238
280
|
num_chunks = int(np.ceil(nshifts / nshift_max))
|
|
@@ -243,28 +285,28 @@ def _calculate_chunks(
|
|
|
243
285
|
return stop_idx
|
|
244
286
|
|
|
245
287
|
|
|
246
|
-
def _calculate_metric(list_shift,
|
|
288
|
+
def _calculate_metric(list_shift, sino, flip_sino, comp_sino, mask, out):
|
|
247
289
|
# this tries to simplify - if shift_col is integer, no need to spline interpolate
|
|
248
290
|
assert list_shift.dtype == cp.float32, "shifts must be single precision floats"
|
|
249
|
-
assert
|
|
250
|
-
assert
|
|
251
|
-
assert
|
|
252
|
-
assert out.dtype == cp.float32, "
|
|
253
|
-
assert
|
|
254
|
-
assert
|
|
291
|
+
assert sino.dtype == cp.float32, "sino must be float32"
|
|
292
|
+
assert flip_sino.dtype == cp.float32, "flip_sino must be float32"
|
|
293
|
+
assert comp_sino.dtype == cp.float32, "comp_sino must be float32"
|
|
294
|
+
assert out.dtype == cp.float32, "out must be float32"
|
|
295
|
+
assert flip_sino.flags["C_CONTIGUOUS"], "flip_sino must be C-contiguous"
|
|
296
|
+
assert comp_sino.flags["C_CONTIGUOUS"], "comp_sino must be C-contiguous"
|
|
255
297
|
assert list_shift.flags["C_CONTIGUOUS"], "list_shift must be C-contiguous"
|
|
256
298
|
nshifts = list_shift.shape[0]
|
|
257
|
-
na1 =
|
|
258
|
-
na2 =
|
|
299
|
+
na1 = sino.shape[0]
|
|
300
|
+
na2 = flip_sino.shape[0]
|
|
259
301
|
|
|
260
302
|
module = load_cuda_module("center_360_shifts")
|
|
261
303
|
shift_whole_shifts = module.get_function("shift_whole_shifts")
|
|
262
304
|
# note: we don't have to calculate the mean here, as we're only looking for minimum metric.
|
|
263
305
|
# The sum is enough.
|
|
264
306
|
masked_sum_abs_kernel = cp.ReductionKernel(
|
|
265
|
-
in_params="complex64 x,
|
|
307
|
+
in_params="complex64 x, float32 mask", # input, complex + mask
|
|
266
308
|
out_params="float32 out", # output, real
|
|
267
|
-
map_expr="
|
|
309
|
+
map_expr="abs(x) * mask",
|
|
268
310
|
reduce_expr="a + b",
|
|
269
311
|
post_map_expr="out = a",
|
|
270
312
|
identity="0.0f",
|
|
@@ -275,13 +317,14 @@ def _calculate_metric(list_shift, sino1, sino2, sino3, mask, out):
|
|
|
275
317
|
# determine how many shifts we can fit in the available memory
|
|
276
318
|
# and iterate in chunks
|
|
277
319
|
chunks = _calculate_chunks(
|
|
278
|
-
nshifts, (na1 + na2) *
|
|
320
|
+
nshifts, (na1 + na2) * flip_sino.shape[1] * cp.float32().nbytes
|
|
279
321
|
)
|
|
280
322
|
|
|
281
|
-
mat = cp.empty((chunks[0], na1 + na2,
|
|
282
|
-
mat[:, :na1, :] =
|
|
323
|
+
mat = cp.empty((chunks[0], na1 + na2, flip_sino.shape[1]), dtype=cp.float32)
|
|
324
|
+
mat[:, :na1, :] = sino
|
|
325
|
+
|
|
283
326
|
# explicitly create FFT plan here, so it's not cached and clearly re-used
|
|
284
|
-
plan = get_fft_plan(mat, mat.shape[-2:], axes=(1, 2), value_type="
|
|
327
|
+
plan = get_fft_plan(mat, mat.shape[-2:], axes=(1, 2), value_type="C2C")
|
|
285
328
|
|
|
286
329
|
for i, stop_idx in enumerate(chunks):
|
|
287
330
|
if i > 0:
|
|
@@ -293,18 +336,18 @@ def _calculate_metric(list_shift, sino1, sino2, sino3, mask, out):
|
|
|
293
336
|
size = stop_idx - start_idx
|
|
294
337
|
|
|
295
338
|
# first, handle the integer shifts without spline in a raw kernel,
|
|
296
|
-
# and shift in the
|
|
339
|
+
# and shift in the comp_sino one accordingly
|
|
297
340
|
bx = 128
|
|
298
|
-
gx = (
|
|
341
|
+
gx = (comp_sino.shape[1] + bx - 1) // bx
|
|
299
342
|
shift_whole_shifts(
|
|
300
343
|
grid=(gx, na2, size), ####
|
|
301
344
|
block=(bx, 1, 1),
|
|
302
345
|
args=(
|
|
303
|
-
|
|
304
|
-
|
|
346
|
+
flip_sino,
|
|
347
|
+
comp_sino,
|
|
305
348
|
list_shift[start_idx:stop_idx],
|
|
306
349
|
mat[:, na1:, :],
|
|
307
|
-
|
|
350
|
+
comp_sino.shape[1],
|
|
308
351
|
na1 + na2,
|
|
309
352
|
),
|
|
310
353
|
)
|
|
@@ -314,7 +357,7 @@ def _calculate_metric(list_shift, sino1, sino2, sino3, mask, out):
|
|
|
314
357
|
for i in range(list_shift_host.shape[0]):
|
|
315
358
|
shift_col = float(list_shift_host[i])
|
|
316
359
|
if not shift_col.is_integer():
|
|
317
|
-
shifted = shift(
|
|
360
|
+
shifted = shift(flip_sino, (0, shift_col), order=3, prefilter=True)
|
|
318
361
|
shift_int = round_up(shift_col)
|
|
319
362
|
if shift_int >= 0:
|
|
320
363
|
mat[i, na1:, shift_int:] = shifted[:, shift_int:]
|
|
@@ -324,38 +367,41 @@ def _calculate_metric(list_shift, sino1, sino2, sino3, mask, out):
|
|
|
324
367
|
# stack and transform
|
|
325
368
|
# (we do the full sized mat FFT, even though the last chunk may be smaller, to
|
|
326
369
|
# make sure we can re-use the same FFT plan as before)
|
|
327
|
-
mat_freq =
|
|
370
|
+
mat_freq = fftshift(fft2(mat, axes=(1, 2), norm=None, plan=plan), axes=(1, 2))
|
|
371
|
+
|
|
328
372
|
masked_sum_abs_kernel(
|
|
329
373
|
mat_freq[:size, :, :], mask, out=out[start_idx:stop_idx], axis=(1, 2)
|
|
330
374
|
)
|
|
331
375
|
|
|
332
376
|
|
|
333
|
-
def _downsample(
|
|
334
|
-
|
|
335
|
-
assert sino.flags["C_CONTIGUOUS"], "list_shift must be C-contiguous"
|
|
377
|
+
def _downsample(image, dsp_fact0, dsp_fact1):
|
|
378
|
+
"""Downsample an image by averaging.
|
|
336
379
|
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
downsampled_data = cp.empty(shape, dtype="float32")
|
|
380
|
+
Parameters
|
|
381
|
+
----------
|
|
382
|
+
image : 2D array.
|
|
383
|
+
dsp_fact0 : downsampling factor along axis 0.
|
|
384
|
+
dsp_fact1 : downsampling factor along axis 1.
|
|
343
385
|
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
386
|
+
Returns
|
|
387
|
+
---------
|
|
388
|
+
image_dsp : Downsampled image.
|
|
389
|
+
"""
|
|
390
|
+
(height, width) = image.shape
|
|
391
|
+
dsp_fact0 = cp.clip(cp.int16(dsp_fact0), 1, height // 2)
|
|
392
|
+
dsp_fact1 = cp.clip(cp.int16(dsp_fact1), 1, width // 2)
|
|
393
|
+
height_dsp = height // dsp_fact0
|
|
394
|
+
width_dsp = width // dsp_fact1
|
|
395
|
+
if dsp_fact0 == 1 and dsp_fact1 == 1:
|
|
396
|
+
image_dsp = image
|
|
397
|
+
else:
|
|
398
|
+
image_dsp = image[0 : dsp_fact0 * height_dsp, 0 : dsp_fact1 * width_dsp]
|
|
399
|
+
image_dsp = (
|
|
400
|
+
image_dsp.reshape(height_dsp, dsp_fact0, width_dsp, dsp_fact1)
|
|
401
|
+
.mean(-1)
|
|
402
|
+
.mean(1)
|
|
403
|
+
)
|
|
404
|
+
return image_dsp
|
|
359
405
|
|
|
360
406
|
|
|
361
407
|
##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
@@ -688,22 +734,30 @@ def find_center_pc(
|
|
|
688
734
|
proj2: cp.ndarray,
|
|
689
735
|
tol: float = 0.5,
|
|
690
736
|
rotc_guess: Union[float, Optional[str]] = None,
|
|
691
|
-
) ->
|
|
692
|
-
"""
|
|
737
|
+
) -> np.float32:
|
|
738
|
+
"""
|
|
739
|
+
Find rotation axis location by finding the offset between the first
|
|
693
740
|
projection and a mirrored projection 180 degrees apart using
|
|
694
741
|
phase correlation in Fourier space.
|
|
695
742
|
The `phase_cross_correlation` function uses cross-correlation in Fourier
|
|
696
743
|
space, optionally employing an upsampled matrix-multiplication DFT to
|
|
697
744
|
achieve arbitrary subpixel precision. See :cite:`guizar2008efficient`.
|
|
698
745
|
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
746
|
+
Parameters
|
|
747
|
+
----------
|
|
748
|
+
proj1 : cp.ndarray
|
|
749
|
+
Projection from the 0th degree angle.
|
|
750
|
+
proj2 : cp.ndarray
|
|
751
|
+
Projection from the 180th degree angle.
|
|
752
|
+
tol : float, optional
|
|
753
|
+
Subpixel accuracy. Defaults to 0.5.
|
|
754
|
+
rotc_guess : float, optional
|
|
755
|
+
Initial guess value for the rotation center. Defaults to None.
|
|
704
756
|
|
|
705
|
-
Returns
|
|
706
|
-
|
|
757
|
+
Returns
|
|
758
|
+
----------
|
|
759
|
+
np.float32
|
|
760
|
+
Rotation axis location.
|
|
707
761
|
"""
|
|
708
762
|
imgshift = 0.0 if rotc_guess is None else rotc_guess - (proj1.shape[1] - 1.0) / 2.0
|
|
709
763
|
|
|
@@ -722,7 +776,7 @@ def find_center_pc(
|
|
|
722
776
|
# registered translation with the second image
|
|
723
777
|
center = (proj1.shape[1] + shiftr[0][1] - 1.0) / 2.0
|
|
724
778
|
|
|
725
|
-
return center + imgshift
|
|
779
|
+
return np.float32(center + imgshift)
|
|
726
780
|
|
|
727
781
|
|
|
728
782
|
##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: httomolibgpu
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.3
|
|
4
4
|
Summary: Commonly used tomography data processing methods at DLS.
|
|
5
5
|
Author-email: Daniil Kazantsev <daniil.kazantsev@diamond.ac.uk>, Yousef Moazzam <yousef.moazzam@diamond.ac.uk>, Naman Gera <naman.gera@diamond.ac.uk>
|
|
6
6
|
License: BSD-3-Clause
|
|
@@ -12,12 +12,14 @@ Classifier: Environment :: GPU :: NVIDIA CUDA
|
|
|
12
12
|
Requires-Python: >=3.10
|
|
13
13
|
Description-Content-Type: text/x-rst
|
|
14
14
|
License-File: LICENSE
|
|
15
|
-
Requires-Dist: cupy
|
|
15
|
+
Requires-Dist: cupy==12.3.0
|
|
16
|
+
Requires-Dist: nvtx
|
|
16
17
|
Requires-Dist: numpy
|
|
17
18
|
Requires-Dist: scipy
|
|
18
19
|
Requires-Dist: pillow
|
|
19
20
|
Requires-Dist: scikit-image
|
|
20
21
|
Requires-Dist: tomobar
|
|
22
|
+
Requires-Dist: ccpi-regularisation-cupy
|
|
21
23
|
Provides-Extra: dev
|
|
22
24
|
Requires-Dist: pytest; extra == "dev"
|
|
23
25
|
Requires-Dist: pytest-cov; extra == "dev"
|
|
@@ -43,10 +45,10 @@ Some of the methods also have been optimised to ensure higher computational effi
|
|
|
43
45
|
The purpose of HTTomolibGPU
|
|
44
46
|
===========================
|
|
45
47
|
|
|
46
|
-
**HTTomolibGPU** can be used as a stand-alone library,
|
|
47
|
-
|
|
48
|
+
Although **HTTomolibGPU** can be used as a stand-alone library, it has been specifically developed to work together with the
|
|
49
|
+
`HTTomo <https://diamondlightsource.github.io/httomo/>`_ package as
|
|
48
50
|
its backend for data processing. HTTomo is a user interface (UI) written in Python for fast big tomographic data processing using
|
|
49
|
-
MPI protocols.
|
|
51
|
+
MPI protocols or as well serially.
|
|
50
52
|
|
|
51
53
|
Install HTTomolibGPU as a PyPi package
|
|
52
54
|
=========================================================
|
|
@@ -12,13 +12,13 @@ httomolibgpu.egg-info/top_level.txt
|
|
|
12
12
|
httomolibgpu/cuda_kernels/__init__.py
|
|
13
13
|
httomolibgpu/cuda_kernels/calc_metrics.cu
|
|
14
14
|
httomolibgpu/cuda_kernels/center_360_shifts.cu
|
|
15
|
-
httomolibgpu/cuda_kernels/downsample_sino.cu
|
|
16
15
|
httomolibgpu/cuda_kernels/generate_mask.cu
|
|
17
16
|
httomolibgpu/cuda_kernels/median_kernel.cu
|
|
18
17
|
httomolibgpu/cuda_kernels/paganin_filter_gen.cu
|
|
19
18
|
httomolibgpu/cuda_kernels/raven_filter.cu
|
|
20
19
|
httomolibgpu/misc/__init__.py
|
|
21
20
|
httomolibgpu/misc/corr.py
|
|
21
|
+
httomolibgpu/misc/denoise.py
|
|
22
22
|
httomolibgpu/misc/morph.py
|
|
23
23
|
httomolibgpu/misc/rescale.py
|
|
24
24
|
httomolibgpu/prep/__init__.py
|
|
@@ -37,12 +37,14 @@ classifiers = [
|
|
|
37
37
|
requires-python = ">=3.10"
|
|
38
38
|
dynamic = ["version"]
|
|
39
39
|
dependencies = [
|
|
40
|
-
"cupy",
|
|
40
|
+
"cupy==12.3.0",
|
|
41
|
+
"nvtx",
|
|
41
42
|
"numpy",
|
|
42
43
|
"scipy",
|
|
43
44
|
"pillow",
|
|
44
45
|
"scikit-image",
|
|
45
|
-
"tomobar"
|
|
46
|
+
"tomobar",
|
|
47
|
+
"ccpi-regularisation-cupy"
|
|
46
48
|
]
|
|
47
49
|
|
|
48
50
|
[project.optional-dependencies]
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
extern "C" __global__ void downsample_sino(float *sino, int dx, int dz,
|
|
2
|
-
int level, float *out) {
|
|
3
|
-
// use shared memory to store the values used to "merge" columns of the
|
|
4
|
-
// sinogram in the downsampling process
|
|
5
|
-
extern __shared__ float downsampled_vals[];
|
|
6
|
-
unsigned int binsize, i, j, k, orig_ind, out_ind, output_bin_no;
|
|
7
|
-
i = blockDim.x * blockIdx.x + threadIdx.x;
|
|
8
|
-
j = 0;
|
|
9
|
-
k = blockDim.y * blockIdx.y + threadIdx.y;
|
|
10
|
-
orig_ind = (k * dz) + i;
|
|
11
|
-
binsize = 1 << level;
|
|
12
|
-
unsigned int dz_downsampled =
|
|
13
|
-
__float2uint_rd(fdividef(__uint2float_rd(dz), __uint2float_rd(binsize)));
|
|
14
|
-
unsigned int i_downsampled =
|
|
15
|
-
__float2uint_rd(fdividef(__uint2float_rd(i), __uint2float_rd(binsize)));
|
|
16
|
-
if (orig_ind < dx * dz) {
|
|
17
|
-
output_bin_no =
|
|
18
|
-
__float2uint_rd(fdividef(__uint2float_rd(i), __uint2float_rd(binsize)));
|
|
19
|
-
out_ind = (k * dz_downsampled) + i_downsampled;
|
|
20
|
-
downsampled_vals[threadIdx.y * 8 + threadIdx.x] =
|
|
21
|
-
sino[orig_ind] / __uint2float_rd(binsize);
|
|
22
|
-
// synchronise threads within thread-block so that it's guaranteed
|
|
23
|
-
// that all the required values have been copied into shared memeory
|
|
24
|
-
// to then sum and save in the downsampled output
|
|
25
|
-
__syncthreads();
|
|
26
|
-
// arbitrarily use the "beginning thread" in each "lot" of pixels
|
|
27
|
-
// for downsampling to then save the desired value in the
|
|
28
|
-
// downsampled output array
|
|
29
|
-
if (i % 4 == 0) {
|
|
30
|
-
out[out_ind] = downsampled_vals[threadIdx.y * 8 + threadIdx.x] +
|
|
31
|
-
downsampled_vals[threadIdx.y * 8 + threadIdx.x + 1] +
|
|
32
|
-
downsampled_vals[threadIdx.y * 8 + threadIdx.x + 2] +
|
|
33
|
-
downsampled_vals[threadIdx.y * 8 + threadIdx.x + 3];
|
|
34
|
-
}
|
|
35
|
-
}
|
|
36
|
-
}
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
extern "C" __global__ void generate_mask(const int ncol, const int nrow,
|
|
2
|
-
const int cen_col, const int cen_row,
|
|
3
|
-
const float du, const float dv,
|
|
4
|
-
const float radius, const float drop,
|
|
5
|
-
unsigned short *mask) {
|
|
6
|
-
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
|
7
|
-
int j = blockIdx.y;
|
|
8
|
-
|
|
9
|
-
if (i >= ncol/2+1)
|
|
10
|
-
return;
|
|
11
|
-
|
|
12
|
-
// we only need to look at the right half as we're using a real2complex FFT
|
|
13
|
-
int outi = i;
|
|
14
|
-
i += ncol/2-1;
|
|
15
|
-
|
|
16
|
-
int pos = __float2int_ru(((j - cen_row) * dv / radius) / du);
|
|
17
|
-
int pos1 = -pos + cen_col;
|
|
18
|
-
int pos2 = pos + cen_col;
|
|
19
|
-
|
|
20
|
-
if (pos1 > pos2) {
|
|
21
|
-
int temp = pos1;
|
|
22
|
-
pos1 = pos2;
|
|
23
|
-
pos2 = temp;
|
|
24
|
-
if (pos1 >= ncol) {
|
|
25
|
-
pos1 = ncol - 1;
|
|
26
|
-
}
|
|
27
|
-
if (pos2 < 0) {
|
|
28
|
-
pos2 = 0;
|
|
29
|
-
}
|
|
30
|
-
} else {
|
|
31
|
-
if (pos1 < 0) {
|
|
32
|
-
pos1 = 0;
|
|
33
|
-
}
|
|
34
|
-
if (pos2 >= ncol) {
|
|
35
|
-
pos2 = ncol - 1;
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
short outval = (pos1 <= i && i <= pos2) ? 1 : 0;
|
|
40
|
-
|
|
41
|
-
// mask[cen_row - drop: cen_row + drop + 1, :] = 0
|
|
42
|
-
if (j >= cen_row - drop && j <= cen_row + drop) {
|
|
43
|
-
outval = 0;
|
|
44
|
-
}
|
|
45
|
-
// mask[:, cen_col - 1: cen_col + 2] = 0
|
|
46
|
-
if (i >= cen_col - 1 && i <= cen_col + 1) {
|
|
47
|
-
outval = 0;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
mask[j * (ncol/2+1) + outi] = outval;
|
|
51
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|