imops 0.8.2__cp36-cp36m-win_amd64.whl → 0.8.3__cp36-cp36m-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of imops might be problematic. Click here for more details.
- imops/__init__.py +1 -0
- imops/__version__.py +1 -1
- imops/backend.py +14 -10
- imops/crop.py +18 -2
- imops/interp1d.py +7 -4
- imops/measure.py +7 -7
- imops/morphology.py +6 -5
- imops/numeric.py +376 -0
- imops/pad.py +41 -5
- imops/radon.py +7 -5
- imops/src/_backprojection.cp36-win_amd64.pyd +0 -0
- imops/src/_fast_backprojection.cp36-win_amd64.pyd +0 -0
- imops/src/_fast_measure.cp36-win_amd64.pyd +0 -0
- imops/src/_fast_morphology.cp36-win_amd64.pyd +0 -0
- imops/src/_fast_numeric.cp36-win_amd64.pyd +0 -0
- imops/src/_fast_numeric.pyx +208 -30
- imops/src/_fast_radon.cp36-win_amd64.pyd +0 -0
- imops/src/_fast_zoom.cp36-win_amd64.pyd +0 -0
- imops/src/_measure.cp36-win_amd64.pyd +0 -0
- imops/src/_morphology.cp36-win_amd64.pyd +0 -0
- imops/src/_numeric.cp36-win_amd64.pyd +0 -0
- imops/src/_numeric.pyx +208 -30
- imops/src/_radon.cp36-win_amd64.pyd +0 -0
- imops/src/_zoom.cp36-win_amd64.pyd +0 -0
- imops/utils.py +65 -12
- imops/zoom.py +2 -2
- {imops-0.8.2.dist-info → imops-0.8.3.dist-info}/METADATA +3 -2
- imops-0.8.3.dist-info/RECORD +46 -0
- imops/_numeric.py +0 -124
- imops-0.8.2.dist-info/RECORD +0 -46
- {imops-0.8.2.dist-info → imops-0.8.3.dist-info}/LICENSE +0 -0
- {imops-0.8.2.dist-info → imops-0.8.3.dist-info}/WHEEL +0 -0
- {imops-0.8.2.dist-info → imops-0.8.3.dist-info}/top_level.txt +0 -0
imops/pad.py
CHANGED
|
@@ -2,6 +2,8 @@ from typing import Callable, Sequence, Union
|
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
|
|
5
|
+
from .backend import BackendLike
|
|
6
|
+
from .numeric import _NUMERIC_DEFAULT_NUM_THREADS, copy
|
|
5
7
|
from .utils import AxesLike, AxesParams, axis_from_dim, broadcast_axis, broadcast_to_axis, fill_by_indices
|
|
6
8
|
|
|
7
9
|
|
|
@@ -10,6 +12,8 @@ def pad(
|
|
|
10
12
|
padding: Union[AxesLike, Sequence[Sequence[int]]],
|
|
11
13
|
axis: AxesLike = None,
|
|
12
14
|
padding_values: Union[AxesParams, Callable] = 0,
|
|
15
|
+
num_threads: int = _NUMERIC_DEFAULT_NUM_THREADS,
|
|
16
|
+
backend: BackendLike = None,
|
|
13
17
|
) -> np.ndarray:
|
|
14
18
|
"""
|
|
15
19
|
Pad `x` according to `padding` along the `axis`.
|
|
@@ -28,6 +32,11 @@ def pad(
|
|
|
28
32
|
padding_values: Union[AxesParams, Callable]
|
|
29
33
|
values to pad with, must be broadcastable to the resulting array.
|
|
30
34
|
If Callable (e.g. `numpy.min`) - `padding_values(x)` will be used
|
|
35
|
+
num_threads: int
|
|
36
|
+
the number of threads to use for computation. Default = 4. If negative value passed
|
|
37
|
+
cpu count + num_threads + 1 threads will be used
|
|
38
|
+
backend: BackendLike
|
|
39
|
+
which backend to use. `cython` and `scipy` are available, `cython` is used by default
|
|
31
40
|
|
|
32
41
|
Returns
|
|
33
42
|
-------
|
|
@@ -52,10 +61,11 @@ def pad(
|
|
|
52
61
|
|
|
53
62
|
new_shape = np.array(x.shape) + np.sum(padding, axis=1)
|
|
54
63
|
new_x = np.array(padding_values, dtype=x.dtype)
|
|
55
|
-
new_x = np.broadcast_to(new_x, new_shape)
|
|
64
|
+
new_x = copy(np.broadcast_to(new_x, new_shape), order='C', num_threads=num_threads, backend=backend)
|
|
56
65
|
|
|
57
66
|
start = padding[:, 0]
|
|
58
67
|
end = np.where(padding[:, 1] != 0, -padding[:, 1], None)
|
|
68
|
+
# TODO: how to parallelize this?
|
|
59
69
|
new_x[tuple(map(slice, start, end))] = x
|
|
60
70
|
|
|
61
71
|
return new_x
|
|
@@ -67,6 +77,8 @@ def pad_to_shape(
|
|
|
67
77
|
axis: AxesLike = None,
|
|
68
78
|
padding_values: Union[AxesParams, Callable] = 0,
|
|
69
79
|
ratio: AxesParams = 0.5,
|
|
80
|
+
num_threads: int = _NUMERIC_DEFAULT_NUM_THREADS,
|
|
81
|
+
backend: BackendLike = None,
|
|
70
82
|
) -> np.ndarray:
|
|
71
83
|
"""
|
|
72
84
|
Pad `x` to match `shape` along the `axis`.
|
|
@@ -85,6 +97,11 @@ def pad_to_shape(
|
|
|
85
97
|
ratio: AxesParams
|
|
86
98
|
float or sequence of floats describing what proportion of padding to apply on the left sides of padding axes.
|
|
87
99
|
Remaining ratio of padding will be applied on the right sides
|
|
100
|
+
num_threads: int
|
|
101
|
+
the number of threads to use for computation. Default = 4. If negative value passed
|
|
102
|
+
cpu count + num_threads + 1 threads will be used
|
|
103
|
+
backend: BackendLike
|
|
104
|
+
which backend to use. `cython` and `scipy` are available, `cython` is used by default
|
|
88
105
|
|
|
89
106
|
Returns
|
|
90
107
|
-------
|
|
@@ -108,7 +125,7 @@ def pad_to_shape(
|
|
|
108
125
|
start = (delta * ratio).astype(int)
|
|
109
126
|
padding = np.array((start, delta - start)).T.astype(int)
|
|
110
127
|
|
|
111
|
-
return pad(x, padding, axis, padding_values=padding_values)
|
|
128
|
+
return pad(x, padding, axis, padding_values=padding_values, num_threads=num_threads, backend=backend)
|
|
112
129
|
|
|
113
130
|
|
|
114
131
|
def pad_to_divisible(
|
|
@@ -118,6 +135,8 @@ def pad_to_divisible(
|
|
|
118
135
|
padding_values: Union[AxesParams, Callable] = 0,
|
|
119
136
|
ratio: AxesParams = 0.5,
|
|
120
137
|
remainder: AxesLike = 0,
|
|
138
|
+
num_threads: int = _NUMERIC_DEFAULT_NUM_THREADS,
|
|
139
|
+
backend: BackendLike = None,
|
|
121
140
|
) -> np.ndarray:
|
|
122
141
|
"""
|
|
123
142
|
Pad `x` to be divisible by `divisor` along the `axis`.
|
|
@@ -137,6 +156,11 @@ def pad_to_divisible(
|
|
|
137
156
|
Remaining ratio of padding will be applied on the right sides
|
|
138
157
|
remainder: AxesLike
|
|
139
158
|
`x` will be padded such that its shape gives the remainder `remainder` when divided by `divisor`
|
|
159
|
+
num_threads: int
|
|
160
|
+
the number of threads to use for computation. Default = 4. If negative value passed
|
|
161
|
+
cpu count + num_threads + 1 threads will be used
|
|
162
|
+
backend: BackendLike
|
|
163
|
+
which backend to use. `cython` and `scipy` are available, `cython` is used by default
|
|
140
164
|
|
|
141
165
|
Returns
|
|
142
166
|
-------
|
|
@@ -157,11 +181,18 @@ def pad_to_divisible(
|
|
|
157
181
|
assert np.all(remainder >= 0)
|
|
158
182
|
shape = np.maximum(np.array(x.shape)[list(axis)], remainder)
|
|
159
183
|
|
|
160
|
-
return pad_to_shape(
|
|
184
|
+
return pad_to_shape(
|
|
185
|
+
x, shape + (remainder - shape) % divisor, axis, padding_values, ratio, num_threads=num_threads, backend=backend
|
|
186
|
+
)
|
|
161
187
|
|
|
162
188
|
|
|
163
189
|
def restore_crop(
|
|
164
|
-
x: np.ndarray,
|
|
190
|
+
x: np.ndarray,
|
|
191
|
+
box: np.ndarray,
|
|
192
|
+
shape: AxesLike,
|
|
193
|
+
padding_values: Union[AxesParams, Callable] = 0,
|
|
194
|
+
num_threads: int = _NUMERIC_DEFAULT_NUM_THREADS,
|
|
195
|
+
backend: BackendLike = None,
|
|
165
196
|
) -> np.ndarray:
|
|
166
197
|
"""
|
|
167
198
|
Pad `x` to match `shape`. The left padding is taken equal to `box`'s start.
|
|
@@ -176,6 +207,11 @@ def restore_crop(
|
|
|
176
207
|
shape to restore crop to
|
|
177
208
|
padding_values: Union[AxesParams, Callable]
|
|
178
209
|
values to pad with. If Callable (e.g. `numpy.min`) - `padding_values(x)` will be used
|
|
210
|
+
num_threads: int
|
|
211
|
+
the number of threads to use for computation. Default = 4. If negative value passed
|
|
212
|
+
cpu count + num_threads + 1 threads will be used
|
|
213
|
+
backend: BackendLike
|
|
214
|
+
which backend to use. `cython` and `scipy` are available, `cython` is used by default
|
|
179
215
|
|
|
180
216
|
Returns
|
|
181
217
|
-------
|
|
@@ -203,7 +239,7 @@ def restore_crop(
|
|
|
203
239
|
)
|
|
204
240
|
|
|
205
241
|
padding = np.array([start, shape - stop], dtype=int).T
|
|
206
|
-
x = pad(x, padding, padding_values=padding_values)
|
|
242
|
+
x = pad(x, padding, padding_values=padding_values, num_threads=num_threads, backend=backend)
|
|
207
243
|
assert all(np.array(x.shape) == shape)
|
|
208
244
|
|
|
209
245
|
return x
|
imops/radon.py
CHANGED
|
@@ -4,6 +4,7 @@ import numpy as np
|
|
|
4
4
|
from scipy.fftpack import fft, ifft
|
|
5
5
|
|
|
6
6
|
from .backend import BackendLike, resolve_backend
|
|
7
|
+
from .numeric import copy
|
|
7
8
|
from .src._backprojection import backprojection3d
|
|
8
9
|
from .src._fast_backprojection import backprojection3d as fast_backprojection3d
|
|
9
10
|
from .src._fast_radon import radon3d as fast_radon3d
|
|
@@ -52,7 +53,7 @@ def radon(
|
|
|
52
53
|
>>> sinogram, fill_value = radon(image, return_fill=True) # 2d image with fill value
|
|
53
54
|
>>> sinogram = radon(image, axes=(-2, -1)) # nd image
|
|
54
55
|
"""
|
|
55
|
-
backend = resolve_backend(backend)
|
|
56
|
+
backend = resolve_backend(backend, warn_stacklevel=3)
|
|
56
57
|
if backend.name not in ('Cython',):
|
|
57
58
|
raise ValueError(f'Unsupported backend "{backend.name}".')
|
|
58
59
|
|
|
@@ -78,13 +79,14 @@ def radon(
|
|
|
78
79
|
)
|
|
79
80
|
|
|
80
81
|
if min_ != 0 or max_ != 0:
|
|
81
|
-
|
|
82
|
+
# FIXME: how to accurately pass `num_threads` and `backend` arguments to `copy`?
|
|
83
|
+
image = copy(image, order='C')
|
|
82
84
|
image[:, outside_circle] = 0
|
|
83
85
|
|
|
84
86
|
# TODO: f(arange)?
|
|
85
87
|
limits = ((squared[:, None] + squared[None, :]) > (radius + 2) ** 2).sum(0) // 2
|
|
86
88
|
|
|
87
|
-
num_threads = normalize_num_threads(num_threads, backend)
|
|
89
|
+
num_threads = normalize_num_threads(num_threads, backend, warn_stacklevel=3)
|
|
88
90
|
|
|
89
91
|
radon3d_ = fast_radon3d if backend.fast else radon3d
|
|
90
92
|
|
|
@@ -141,7 +143,7 @@ def inverse_radon(
|
|
|
141
143
|
>>> image = inverse_radon(sinogram, fill_value=-1000) # 2d image with fill value
|
|
142
144
|
>>> image = inverse_radon(sinogram, axes=(-2, -1)) # nd image
|
|
143
145
|
"""
|
|
144
|
-
backend = resolve_backend(backend)
|
|
146
|
+
backend = resolve_backend(backend, warn_stacklevel=3)
|
|
145
147
|
if backend.name not in ('Cython',):
|
|
146
148
|
raise ValueError(f'Unsupported backend "{backend.name}".')
|
|
147
149
|
|
|
@@ -183,7 +185,7 @@ def inverse_radon(
|
|
|
183
185
|
filtered_sinogram = filtered_sinogram.astype(dtype, copy=False)
|
|
184
186
|
theta, xs = np.deg2rad(theta, dtype=dtype), xs.astype(dtype, copy=False)
|
|
185
187
|
|
|
186
|
-
num_threads = normalize_num_threads(num_threads, backend)
|
|
188
|
+
num_threads = normalize_num_threads(num_threads, backend, warn_stacklevel=3)
|
|
187
189
|
|
|
188
190
|
backprojection3d_ = fast_backprojection3d if backend.fast else backprojection3d
|
|
189
191
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
imops/src/_fast_numeric.pyx
CHANGED
|
@@ -8,10 +8,19 @@
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
|
|
10
10
|
cimport numpy as np
|
|
11
|
+
from libc.stdint cimport uint16_t
|
|
11
12
|
|
|
12
13
|
from cython.parallel import prange
|
|
13
14
|
|
|
14
15
|
|
|
16
|
+
# https://stackoverflow.com/questions/47421443/using-half-precision-numpy-floats-in-cython
|
|
17
|
+
cdef extern from "numpy/halffloat.h":
|
|
18
|
+
ctypedef uint16_t npy_half
|
|
19
|
+
|
|
20
|
+
float npy_half_to_float(npy_half h) nogil
|
|
21
|
+
npy_half npy_float_to_half(float f) nogil
|
|
22
|
+
|
|
23
|
+
|
|
15
24
|
ctypedef fused NUM:
|
|
16
25
|
short
|
|
17
26
|
int
|
|
@@ -20,45 +29,214 @@ ctypedef fused NUM:
|
|
|
20
29
|
double
|
|
21
30
|
|
|
22
31
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
32
|
+
ctypedef fused NUM_AND_NPY_HALF:
|
|
33
|
+
NUM
|
|
34
|
+
npy_half
|
|
26
35
|
|
|
27
|
-
for i in prange(len_nums, num_threads=num_threads, nogil=True):
|
|
28
|
-
res += nums[i]
|
|
29
36
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def _parallel_pointwise_mul(
|
|
37
|
+
# TODO: Generalize code below to n-d
|
|
38
|
+
def _pointwise_add_array_3d(
|
|
34
39
|
NUM[:, :, :] nums1,
|
|
35
40
|
NUM[:, :, :] nums2,
|
|
36
|
-
|
|
37
|
-
Py_ssize_t num_threads
|
|
41
|
+
NUM[:, :, :] out,
|
|
42
|
+
Py_ssize_t num_threads,
|
|
38
43
|
) -> np.ndarray:
|
|
39
|
-
cdef
|
|
40
|
-
cdef Py_ssize_t
|
|
44
|
+
cdef Py_ssize_t rows = out.shape[0], cols = out.shape[1], dims = out.shape[2]
|
|
45
|
+
cdef Py_ssize_t i, j, k
|
|
46
|
+
|
|
47
|
+
for i in prange(rows, nogil=True, num_threads=num_threads):
|
|
48
|
+
for j in prange(cols):
|
|
49
|
+
for k in prange(dims):
|
|
50
|
+
out[i, j, k] = nums1[i, j, k] + nums2[i, j, k]
|
|
51
|
+
|
|
52
|
+
return np.asarray(out)
|
|
41
53
|
|
|
42
|
-
cdef char[:] broadcast_mask1 = np.array([x == y for x, y in zip(res_shape, nums1.shape)], dtype=np.int8)
|
|
43
|
-
cdef char[:] broadcast_mask2 = np.array([x == y for x, y in zip(res_shape, nums2.shape)], dtype=np.int8)
|
|
44
54
|
|
|
45
|
-
|
|
55
|
+
def _pointwise_add_array_4d(
|
|
56
|
+
NUM[:, :, :, :] nums1,
|
|
57
|
+
NUM[:, :, :, :] nums2,
|
|
58
|
+
NUM[:, :, :, :] out,
|
|
59
|
+
Py_ssize_t num_threads,
|
|
60
|
+
) -> np.ndarray:
|
|
61
|
+
cdef Py_ssize_t dim1 = out.shape[0], dim2 = out.shape[1], dim3 = out.shape[2], dim4 = out.shape[3]
|
|
62
|
+
cdef Py_ssize_t i1, i2, i3, i4
|
|
63
|
+
|
|
64
|
+
for i1 in prange(dim1, nogil=True, num_threads=num_threads):
|
|
65
|
+
for i2 in prange(dim2):
|
|
66
|
+
for i3 in prange(dim3):
|
|
67
|
+
for i4 in prange(dim4):
|
|
68
|
+
out[i1, i2, i3, i4] = nums1[i1, i2, i3, i4] + nums2[i1, i2, i3, i4]
|
|
69
|
+
|
|
70
|
+
return np.asarray(out)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _pointwise_add_value_3d(
|
|
74
|
+
NUM[:, :, :] nums,
|
|
75
|
+
NUM value,
|
|
76
|
+
NUM[:, :, :] out,
|
|
77
|
+
Py_ssize_t num_threads,
|
|
78
|
+
) -> np.ndarray:
|
|
79
|
+
cdef Py_ssize_t rows = out.shape[0], cols = out.shape[1], dims = out.shape[2]
|
|
46
80
|
cdef Py_ssize_t i, j, k
|
|
47
81
|
|
|
48
82
|
for i in prange(rows, nogil=True, num_threads=num_threads):
|
|
49
83
|
for j in prange(cols):
|
|
50
84
|
for k in prange(dims):
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
85
|
+
out[i, j, k] = nums[i, j, k] + value
|
|
86
|
+
|
|
87
|
+
return np.asarray(out)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _pointwise_add_value_4d(
|
|
91
|
+
NUM[:, :, :, :] nums,
|
|
92
|
+
NUM value,
|
|
93
|
+
NUM[:, :, :, :] out,
|
|
94
|
+
Py_ssize_t num_threads,
|
|
95
|
+
) -> np.ndarray:
|
|
96
|
+
cdef Py_ssize_t dim1 = out.shape[0], dim2 = out.shape[1], dim3 = out.shape[2], dim4 = out.shape[3]
|
|
97
|
+
cdef Py_ssize_t i1, i2, i3, i4
|
|
98
|
+
|
|
99
|
+
for i1 in prange(dim1, nogil=True, num_threads=num_threads):
|
|
100
|
+
for i2 in prange(dim2):
|
|
101
|
+
for i3 in prange(dim3):
|
|
102
|
+
for i4 in prange(dim4):
|
|
103
|
+
out[i1, i2, i3, i4] = nums[i1, i2, i3, i4] + value
|
|
104
|
+
|
|
105
|
+
return np.asarray(out)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _pointwise_add_array_3d_fp16(
|
|
109
|
+
npy_half[:, :, :] nums1,
|
|
110
|
+
npy_half[:, :, :] nums2,
|
|
111
|
+
npy_half[:, :, :] out,
|
|
112
|
+
Py_ssize_t num_threads,
|
|
113
|
+
) -> np.ndarray:
|
|
114
|
+
cdef Py_ssize_t rows = out.shape[0], cols = out.shape[1], dims = out.shape[2]
|
|
115
|
+
cdef Py_ssize_t i, j, k
|
|
116
|
+
|
|
117
|
+
for i in prange(rows, nogil=True, num_threads=num_threads):
|
|
118
|
+
for j in prange(cols):
|
|
119
|
+
for k in prange(dims):
|
|
120
|
+
out[i, j, k] = (npy_float_to_half(npy_half_to_float(nums1[i, j, k]) +
|
|
121
|
+
npy_half_to_float(nums2[i, j, k])))
|
|
122
|
+
|
|
123
|
+
return np.asarray(out)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _pointwise_add_array_4d_fp16(
|
|
127
|
+
npy_half[:, :, :, :] nums1,
|
|
128
|
+
npy_half[:, :, :, :] nums2,
|
|
129
|
+
npy_half[:, :, :, :] out,
|
|
130
|
+
Py_ssize_t num_threads,
|
|
131
|
+
) -> np.ndarray:
|
|
132
|
+
cdef Py_ssize_t dim1 = out.shape[0], dim2 = out.shape[1], dim3 = out.shape[2], dim4 = out.shape[3]
|
|
133
|
+
cdef Py_ssize_t i1, i2, i3, i4
|
|
134
|
+
|
|
135
|
+
for i1 in prange(dim1, nogil=True, num_threads=num_threads):
|
|
136
|
+
for i2 in prange(dim2):
|
|
137
|
+
for i3 in prange(dim3):
|
|
138
|
+
for i4 in prange(dim4):
|
|
139
|
+
out[i1, i2, i3, i4] = (npy_float_to_half(npy_half_to_float(nums1[i1, i2, i3, i4]) +
|
|
140
|
+
npy_half_to_float(nums2[i1, i2, i3, i4])))
|
|
141
|
+
|
|
142
|
+
return np.asarray(out)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _pointwise_add_value_3d_fp16(
|
|
146
|
+
npy_half[:, :, :] nums,
|
|
147
|
+
npy_half value,
|
|
148
|
+
npy_half[:, :, :] out,
|
|
149
|
+
Py_ssize_t num_threads,
|
|
150
|
+
) -> np.ndarray:
|
|
151
|
+
cdef Py_ssize_t rows = out.shape[0], cols = out.shape[1], dims = out.shape[2]
|
|
152
|
+
cdef Py_ssize_t i, j, k
|
|
153
|
+
|
|
154
|
+
for i in prange(rows, nogil=True, num_threads=num_threads):
|
|
155
|
+
for j in prange(cols):
|
|
156
|
+
for k in prange(dims):
|
|
157
|
+
out[i, j, k] = npy_float_to_half(npy_half_to_float(nums[i, j, k]) + npy_half_to_float(value))
|
|
158
|
+
|
|
159
|
+
return np.asarray(out)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _pointwise_add_value_4d_fp16(
|
|
163
|
+
npy_half[:, :, :, :] nums,
|
|
164
|
+
npy_half value,
|
|
165
|
+
npy_half[:, :, :, :] out,
|
|
166
|
+
Py_ssize_t num_threads,
|
|
167
|
+
) -> np.ndarray:
|
|
168
|
+
cdef Py_ssize_t dim1 = out.shape[0], dim2 = out.shape[1], dim3 = out.shape[2], dim4 = out.shape[3]
|
|
169
|
+
cdef Py_ssize_t i1, i2, i3, i4
|
|
170
|
+
|
|
171
|
+
for i1 in prange(dim1, nogil=True, num_threads=num_threads):
|
|
172
|
+
for i2 in prange(dim2):
|
|
173
|
+
for i3 in prange(dim3):
|
|
174
|
+
for i4 in prange(dim4):
|
|
175
|
+
out[i1, i2, i3, i4] = (npy_float_to_half(npy_half_to_float(nums[i1, i2, i3, i4]) +
|
|
176
|
+
npy_half_to_float(value)))
|
|
177
|
+
|
|
178
|
+
return np.asarray(out)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _fill_3d(NUM_AND_NPY_HALF[:, :, :] nums, NUM_AND_NPY_HALF value, Py_ssize_t num_threads) -> None:
|
|
182
|
+
cdef Py_ssize_t rows = nums.shape[0], cols = nums.shape[1], dims = nums.shape[2]
|
|
183
|
+
cdef Py_ssize_t i, j, k
|
|
184
|
+
|
|
185
|
+
for i in prange(rows, nogil=True, num_threads=num_threads):
|
|
186
|
+
for j in prange(cols):
|
|
187
|
+
for k in prange(dims):
|
|
188
|
+
nums[i, j, k] = value
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _fill_4d(NUM_AND_NPY_HALF[:, :, :, :] nums, NUM_AND_NPY_HALF value, Py_ssize_t num_threads) -> None:
|
|
192
|
+
cdef Py_ssize_t dim1 = nums.shape[0], dim2 = nums.shape[1], dim3 = nums.shape[2], dim4 = nums.shape[3]
|
|
193
|
+
cdef Py_ssize_t i1, i2, i3, i4
|
|
194
|
+
|
|
195
|
+
for i1 in prange(dim1, nogil=True, num_threads=num_threads):
|
|
196
|
+
for i2 in prange(dim2):
|
|
197
|
+
for i3 in prange(dim3):
|
|
198
|
+
for i4 in prange(dim4):
|
|
199
|
+
nums[i1, i2, i3, i4] = value
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# FIXME: somehow `const NUM_AND_NPY_HALF` is not working
|
|
203
|
+
cpdef void _copy_3d(const NUM[:, :, :] nums1, NUM[:, :, :] nums2, Py_ssize_t num_threads):
|
|
204
|
+
cdef Py_ssize_t rows = nums1.shape[0], cols = nums1.shape[1], dims = nums1.shape[2]
|
|
205
|
+
cdef Py_ssize_t i, j, k
|
|
206
|
+
|
|
207
|
+
for i in prange(rows, nogil=True, num_threads=num_threads):
|
|
208
|
+
for j in prange(cols):
|
|
209
|
+
for k in prange(dims):
|
|
210
|
+
nums2[i, j, k] = nums1[i, j, k]
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
cpdef void _copy_4d(const NUM[:, :, :, :] nums1, NUM[:, :, :, :] nums2, Py_ssize_t num_threads):
|
|
214
|
+
cdef Py_ssize_t dim1 = nums1.shape[0], dim2 = nums1.shape[1], dim3 = nums1.shape[2], dim4 = nums1.shape[3]
|
|
215
|
+
cdef Py_ssize_t i1, i2, i3, i4
|
|
216
|
+
|
|
217
|
+
for i1 in prange(dim1, nogil=True, num_threads=num_threads):
|
|
218
|
+
for i2 in prange(dim2):
|
|
219
|
+
for i3 in prange(dim3):
|
|
220
|
+
for i4 in prange(dim4):
|
|
221
|
+
nums2[i1, i2, i3, i4] = nums1[i1, i2, i3, i4]
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
cpdef void _copy_3d_fp16(const npy_half[:, :, :] nums1, npy_half[:, :, :] nums2, Py_ssize_t num_threads):
|
|
225
|
+
cdef Py_ssize_t rows = nums1.shape[0], cols = nums1.shape[1], dims = nums1.shape[2]
|
|
226
|
+
cdef Py_ssize_t i, j, k
|
|
227
|
+
|
|
228
|
+
for i in prange(rows, nogil=True, num_threads=num_threads):
|
|
229
|
+
for j in prange(cols):
|
|
230
|
+
for k in prange(dims):
|
|
231
|
+
nums2[i, j, k] = nums1[i, j, k]
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
cpdef void _copy_4d_fp16(const npy_half[:, :, :, :] nums1, npy_half[:, :, :, :] nums2, Py_ssize_t num_threads):
|
|
235
|
+
cdef Py_ssize_t dim1 = nums1.shape[0], dim2 = nums1.shape[1], dim3 = nums1.shape[2], dim4 = nums1.shape[3]
|
|
236
|
+
cdef Py_ssize_t i1, i2, i3, i4
|
|
237
|
+
|
|
238
|
+
for i1 in prange(dim1, nogil=True, num_threads=num_threads):
|
|
239
|
+
for i2 in prange(dim2):
|
|
240
|
+
for i3 in prange(dim3):
|
|
241
|
+
for i4 in prange(dim4):
|
|
242
|
+
nums2[i1, i2, i3, i4] = nums1[i1, i2, i3, i4]
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|