numba-cuda 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +232 -113
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_fp16.h +661 -661
- numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
- numba_cuda/numba/cuda/cuda_paths.py +291 -99
- numba_cuda/numba/cuda/cudadecl.py +125 -69
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +463 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +317 -233
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +8 -6
- numba_cuda/numba/cuda/decorators.py +75 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +69 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +1 -1
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
- numba_cuda/numba/cuda/intrinsics.py +31 -27
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +139 -102
- numba_cuda/numba/cuda/target.py +64 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +2 -2
- numba_cuda/numba/cuda/vectorizers.py +37 -32
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
- numba_cuda-0.9.0.dist-info/RECORD +253 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.0.dist-info/RECORD +0 -251
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -17,13 +17,23 @@ RSQRT2PI = 0.39894228040143267793994605993438
|
|
17
17
|
|
18
18
|
def cnd(d):
|
19
19
|
K = 1.0 / (1.0 + 0.2316419 * np.abs(d))
|
20
|
-
ret_val = (
|
21
|
-
|
20
|
+
ret_val = (
|
21
|
+
RSQRT2PI
|
22
|
+
* np.exp(-0.5 * d * d)
|
23
|
+
* (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))))
|
24
|
+
)
|
22
25
|
return np.where(d > 0, 1.0 - ret_val, ret_val)
|
23
26
|
|
24
27
|
|
25
|
-
def black_scholes(
|
26
|
-
|
28
|
+
def black_scholes(
|
29
|
+
callResult,
|
30
|
+
putResult,
|
31
|
+
stockPrice,
|
32
|
+
optionStrike,
|
33
|
+
optionYears,
|
34
|
+
Riskfree,
|
35
|
+
Volatility,
|
36
|
+
):
|
27
37
|
S = stockPrice
|
28
38
|
X = optionStrike
|
29
39
|
T = optionYears
|
@@ -35,9 +45,9 @@ def black_scholes(callResult, putResult, stockPrice, optionStrike, optionYears,
|
|
35
45
|
cndd1 = cnd(d1)
|
36
46
|
cndd2 = cnd(d2)
|
37
47
|
|
38
|
-
expRT = np.exp(-
|
39
|
-
callResult[:] =
|
40
|
-
putResult[:] =
|
48
|
+
expRT = np.exp(-R * T)
|
49
|
+
callResult[:] = S * cndd1 - X * expRT * cndd2
|
50
|
+
putResult[:] = X * expRT * (1.0 - cndd2) - S * (1.0 - cndd1)
|
41
51
|
|
42
52
|
|
43
53
|
def randfloat(rand_var, low, high):
|
@@ -61,34 +71,54 @@ class TestBlackScholes(CUDATestCase):
|
|
61
71
|
|
62
72
|
# numpy
|
63
73
|
for i in range(iterations):
|
64
|
-
black_scholes(
|
65
|
-
|
74
|
+
black_scholes(
|
75
|
+
callResultNumpy,
|
76
|
+
putResultNumpy,
|
77
|
+
stockPrice,
|
78
|
+
optionStrike,
|
79
|
+
optionYears,
|
80
|
+
RISKFREE,
|
81
|
+
VOLATILITY,
|
82
|
+
)
|
66
83
|
|
67
84
|
@cuda.jit(double(double), device=True, inline=True)
|
68
85
|
def cnd_cuda(d):
|
69
86
|
K = 1.0 / (1.0 + 0.2316419 * math.fabs(d))
|
70
|
-
ret_val = (
|
71
|
-
|
87
|
+
ret_val = (
|
88
|
+
RSQRT2PI
|
89
|
+
* math.exp(-0.5 * d * d)
|
90
|
+
* (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))))
|
91
|
+
)
|
72
92
|
if d > 0:
|
73
93
|
ret_val = 1.0 - ret_val
|
74
94
|
return ret_val
|
75
95
|
|
76
|
-
@cuda.jit(
|
77
|
-
|
96
|
+
@cuda.jit(
|
97
|
+
void(
|
98
|
+
double[:],
|
99
|
+
double[:],
|
100
|
+
double[:],
|
101
|
+
double[:],
|
102
|
+
double[:],
|
103
|
+
double,
|
104
|
+
double,
|
105
|
+
)
|
106
|
+
)
|
78
107
|
def black_scholes_cuda(callResult, putResult, S, X, T, R, V):
|
79
108
|
i = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x
|
80
109
|
if i >= S.shape[0]:
|
81
110
|
return
|
82
111
|
sqrtT = math.sqrt(T[i])
|
83
|
-
d1 = (
|
84
|
-
|
112
|
+
d1 = (math.log(S[i] / X[i]) + (R + 0.5 * V * V) * T[i]) / (
|
113
|
+
V * sqrtT
|
114
|
+
)
|
85
115
|
d2 = d1 - V * sqrtT
|
86
116
|
cndd1 = cnd_cuda(d1)
|
87
117
|
cndd2 = cnd_cuda(d2)
|
88
118
|
|
89
|
-
expRT = math.exp((-1. * R) * T[i])
|
90
|
-
callResult[i] =
|
91
|
-
putResult[i] =
|
119
|
+
expRT = math.exp((-1.0 * R) * T[i])
|
120
|
+
callResult[i] = S[i] * cndd1 - X[i] * expRT * cndd2
|
121
|
+
putResult[i] = X[i] * expRT * (1.0 - cndd2) - S[i] * (1.0 - cndd1)
|
92
122
|
|
93
123
|
# numba
|
94
124
|
blockdim = 512, 1
|
@@ -102,8 +132,14 @@ class TestBlackScholes(CUDATestCase):
|
|
102
132
|
|
103
133
|
for i in range(iterations):
|
104
134
|
black_scholes_cuda[griddim, blockdim, stream](
|
105
|
-
d_callResult,
|
106
|
-
|
135
|
+
d_callResult,
|
136
|
+
d_putResult,
|
137
|
+
d_stockPrice,
|
138
|
+
d_optionStrike,
|
139
|
+
d_optionYears,
|
140
|
+
RISKFREE,
|
141
|
+
VOLATILITY,
|
142
|
+
)
|
107
143
|
d_callResult.copy_to_host(callResultNumba, stream)
|
108
144
|
d_putResult.copy_to_host(putResultNumba, stream)
|
109
145
|
stream.synchronize()
|
@@ -116,5 +152,5 @@ class TestBlackScholes(CUDATestCase):
|
|
116
152
|
self.assertTrue(max_abs_err < 1e-13)
|
117
153
|
|
118
154
|
|
119
|
-
if __name__ ==
|
155
|
+
if __name__ == "__main__":
|
120
156
|
unittest.main()
|
@@ -12,13 +12,13 @@ def boolean_func(A, vertial):
|
|
12
12
|
|
13
13
|
class TestCudaBoolean(CUDATestCase):
|
14
14
|
def test_boolean(self):
|
15
|
-
func = cuda.jit(
|
16
|
-
A = np.array([0], dtype=
|
15
|
+
func = cuda.jit("void(float64[:], bool_)")(boolean_func)
|
16
|
+
A = np.array([0], dtype="float64")
|
17
17
|
func[1, 1](A, True)
|
18
18
|
self.assertTrue(A[0] == 123)
|
19
19
|
func[1, 1](A, False)
|
20
20
|
self.assertTrue(A[0] == 321)
|
21
21
|
|
22
22
|
|
23
|
-
if __name__ ==
|
23
|
+
if __name__ == "__main__":
|
24
24
|
unittest.main()
|
@@ -8,15 +8,22 @@ import warnings
|
|
8
8
|
|
9
9
|
from numba import cuda
|
10
10
|
from numba.core.errors import NumbaWarning
|
11
|
-
from numba.cuda.testing import (
|
12
|
-
|
13
|
-
|
11
|
+
from numba.cuda.testing import (
|
12
|
+
CUDATestCase,
|
13
|
+
skip_on_cudasim,
|
14
|
+
skip_unless_cc_60,
|
15
|
+
skip_if_cudadevrt_missing,
|
16
|
+
skip_if_mvc_enabled,
|
17
|
+
test_data_dir,
|
18
|
+
)
|
14
19
|
from numba.tests.support import SerialMixin
|
15
|
-
from numba.tests.test_caching import (
|
16
|
-
|
20
|
+
from numba.tests.test_caching import (
|
21
|
+
DispatcherCacheUsecasesTest,
|
22
|
+
skip_bad_access,
|
23
|
+
)
|
17
24
|
|
18
25
|
|
19
|
-
@skip_on_cudasim(
|
26
|
+
@skip_on_cudasim("Simulator does not implement caching")
|
20
27
|
class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
|
21
28
|
here = os.path.dirname(__file__)
|
22
29
|
usecases_file = os.path.join(here, "cache_usecases.py")
|
@@ -72,23 +79,23 @@ class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
|
|
72
79
|
mod = self.import_module()
|
73
80
|
f = mod.many_locals
|
74
81
|
f[1, 1]()
|
75
|
-
self.check_pycache(2)
|
82
|
+
self.check_pycache(2) # 1 index, 1 data
|
76
83
|
|
77
84
|
def test_closure(self):
|
78
85
|
mod = self.import_module()
|
79
86
|
|
80
87
|
with warnings.catch_warnings():
|
81
|
-
warnings.simplefilter(
|
88
|
+
warnings.simplefilter("error", NumbaWarning)
|
82
89
|
|
83
90
|
f = mod.closure1
|
84
|
-
self.assertPreciseEqual(f(3), 6)
|
91
|
+
self.assertPreciseEqual(f(3), 6) # 3 + 3 = 6
|
85
92
|
f = mod.closure2
|
86
|
-
self.assertPreciseEqual(f(3), 8)
|
93
|
+
self.assertPreciseEqual(f(3), 8) # 3 + 5 = 8
|
87
94
|
f = mod.closure3
|
88
|
-
self.assertPreciseEqual(f(3), 10)
|
95
|
+
self.assertPreciseEqual(f(3), 10) # 3 + 7 = 10
|
89
96
|
f = mod.closure4
|
90
|
-
self.assertPreciseEqual(f(3), 12)
|
91
|
-
self.check_pycache(5)
|
97
|
+
self.assertPreciseEqual(f(3), 12) # 3 + 9 = 12
|
98
|
+
self.check_pycache(5) # 1 nbi, 4 nbc
|
92
99
|
|
93
100
|
def test_cache_reuse(self):
|
94
101
|
mod = self.import_module()
|
@@ -158,7 +165,7 @@ class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
|
|
158
165
|
|
159
166
|
@skip_unless_cc_60
|
160
167
|
@skip_if_cudadevrt_missing
|
161
|
-
@skip_if_mvc_enabled(
|
168
|
+
@skip_if_mvc_enabled("CG not supported with MVC")
|
162
169
|
def test_cache_cg(self):
|
163
170
|
# Functions using cooperative groups should be cacheable. See Issue
|
164
171
|
# #8888: https://github.com/numba/numba/issues/8888
|
@@ -174,7 +181,7 @@ class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
|
|
174
181
|
|
175
182
|
@skip_unless_cc_60
|
176
183
|
@skip_if_cudadevrt_missing
|
177
|
-
@skip_if_mvc_enabled(
|
184
|
+
@skip_if_mvc_enabled("CG not supported with MVC")
|
178
185
|
def test_cache_cg_clean_run(self):
|
179
186
|
# See Issue #9432: https://github.com/numba/numba/issues/9432
|
180
187
|
# If a cached function using CG sync was the first thing to compile,
|
@@ -191,9 +198,11 @@ class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
|
|
191
198
|
mod.cg_usecase(0)
|
192
199
|
""" % dict(tempdir=self.tempdir, modname=self.modname)
|
193
200
|
|
194
|
-
popen = subprocess.Popen(
|
195
|
-
|
196
|
-
|
201
|
+
popen = subprocess.Popen(
|
202
|
+
[sys.executable, "-c", code],
|
203
|
+
stdout=subprocess.PIPE,
|
204
|
+
stderr=subprocess.PIPE,
|
205
|
+
)
|
197
206
|
out, err = popen.communicate(timeout=60)
|
198
207
|
if popen.returncode != 0:
|
199
208
|
raise AssertionError(
|
@@ -212,8 +221,9 @@ class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
|
|
212
221
|
f = mod.add_usecase
|
213
222
|
# Remove this function's cache files at the end, to avoid accumulation
|
214
223
|
# across test calls.
|
215
|
-
self.addCleanup(
|
216
|
-
|
224
|
+
self.addCleanup(
|
225
|
+
shutil.rmtree, f.func.stats.cache_path, ignore_errors=True
|
226
|
+
)
|
217
227
|
|
218
228
|
self.assertPreciseEqual(f(2, 3), 6)
|
219
229
|
# It's a cache miss since the file was copied to a new temp location
|
@@ -230,8 +240,9 @@ class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
|
|
230
240
|
self.check_pycache(0)
|
231
241
|
|
232
242
|
@skip_bad_access
|
233
|
-
@unittest.skipIf(
|
234
|
-
|
243
|
+
@unittest.skipIf(
|
244
|
+
os.name == "nt", "cannot easily make a directory read-only on Windows"
|
245
|
+
)
|
235
246
|
def test_non_creatable_pycache(self):
|
236
247
|
# Make it impossible to create the __pycache__ directory
|
237
248
|
old_perms = os.stat(self.tempdir).st_mode
|
@@ -241,11 +252,12 @@ class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
|
|
241
252
|
self._test_pycache_fallback()
|
242
253
|
|
243
254
|
@skip_bad_access
|
244
|
-
@unittest.skipIf(
|
245
|
-
|
255
|
+
@unittest.skipIf(
|
256
|
+
os.name == "nt", "cannot easily make a directory read-only on Windows"
|
257
|
+
)
|
246
258
|
def test_non_writable_pycache(self):
|
247
259
|
# Make it impossible to write to the __pycache__ directory
|
248
|
-
pycache = os.path.join(self.tempdir,
|
260
|
+
pycache = os.path.join(self.tempdir, "__pycache__")
|
249
261
|
os.mkdir(pycache)
|
250
262
|
old_perms = os.stat(pycache).st_mode
|
251
263
|
os.chmod(pycache, 0o500)
|
@@ -254,15 +266,16 @@ class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
|
|
254
266
|
self._test_pycache_fallback()
|
255
267
|
|
256
268
|
def test_cannot_cache_linking_libraries(self):
|
257
|
-
link = str(test_data_dir /
|
258
|
-
msg =
|
269
|
+
link = str(test_data_dir / "jitlink.ptx")
|
270
|
+
msg = "Cannot pickle CUDACodeLibrary with linking files"
|
259
271
|
with self.assertRaisesRegex(RuntimeError, msg):
|
260
|
-
|
272
|
+
|
273
|
+
@cuda.jit("void()", cache=True, link=[link])
|
261
274
|
def f():
|
262
275
|
pass
|
263
276
|
|
264
277
|
|
265
|
-
@skip_on_cudasim(
|
278
|
+
@skip_on_cudasim("Simulator does not implement caching")
|
266
279
|
class CUDAAndCPUCachingTest(SerialMixin, DispatcherCacheUsecasesTest):
|
267
280
|
here = os.path.dirname(__file__)
|
268
281
|
usecases_file = os.path.join(here, "cache_with_cpu_usecases.py")
|
@@ -353,7 +366,7 @@ def get_different_cc_gpus():
|
|
353
366
|
return None
|
354
367
|
|
355
368
|
|
356
|
-
@skip_on_cudasim(
|
369
|
+
@skip_on_cudasim("Simulator does not implement caching")
|
357
370
|
class TestMultiCCCaching(SerialMixin, DispatcherCacheUsecasesTest):
|
358
371
|
here = os.path.dirname(__file__)
|
359
372
|
usecases_file = os.path.join(here, "cache_usecases.py")
|
@@ -370,7 +383,7 @@ class TestMultiCCCaching(SerialMixin, DispatcherCacheUsecasesTest):
|
|
370
383
|
def test_cache(self):
|
371
384
|
gpus = get_different_cc_gpus()
|
372
385
|
if not gpus:
|
373
|
-
self.skipTest(
|
386
|
+
self.skipTest("Need two different CCs for multi-CC cache test")
|
374
387
|
|
375
388
|
self.check_pycache(0)
|
376
389
|
mod = self.import_module()
|
@@ -482,13 +495,13 @@ def child_initializer():
|
|
482
495
|
# Disable occupancy and implicit copy warnings in processes in a
|
483
496
|
# multiprocessing pool.
|
484
497
|
from numba.core import config
|
498
|
+
|
485
499
|
config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
|
486
500
|
config.CUDA_WARN_ON_IMPLICIT_COPY = 0
|
487
501
|
|
488
502
|
|
489
|
-
@skip_on_cudasim(
|
503
|
+
@skip_on_cudasim("Simulator does not implement caching")
|
490
504
|
class TestMultiprocessCache(SerialMixin, DispatcherCacheUsecasesTest):
|
491
|
-
|
492
505
|
# Nested multiprocessing.Pool raises AssertionError:
|
493
506
|
# "daemonic processes are not allowed to have children"
|
494
507
|
_numba_parallel_test_ = False
|
@@ -513,7 +526,7 @@ class TestMultiprocessCache(SerialMixin, DispatcherCacheUsecasesTest):
|
|
513
526
|
f = mod.simple_usecase_caller
|
514
527
|
n = 3
|
515
528
|
try:
|
516
|
-
ctx = multiprocessing.get_context(
|
529
|
+
ctx = multiprocessing.get_context("spawn")
|
517
530
|
except AttributeError:
|
518
531
|
ctx = multiprocessing
|
519
532
|
|
@@ -526,7 +539,7 @@ class TestMultiprocessCache(SerialMixin, DispatcherCacheUsecasesTest):
|
|
526
539
|
self.assertEqual(res, n * (n - 1) // 2)
|
527
540
|
|
528
541
|
|
529
|
-
@skip_on_cudasim(
|
542
|
+
@skip_on_cudasim("Simulator does not implement the CUDACodeLibrary")
|
530
543
|
class TestCUDACodeLibrary(CUDATestCase):
|
531
544
|
# For tests of miscellaneous CUDACodeLibrary behaviour that we wish to
|
532
545
|
# explicitly check
|
@@ -539,7 +552,7 @@ class TestCUDACodeLibrary(CUDATestCase):
|
|
539
552
|
# Usually a CodeLibrary requires a real CodeGen, but since we don't
|
540
553
|
# interact with it, anything will do
|
541
554
|
codegen = object()
|
542
|
-
name =
|
555
|
+
name = "library"
|
543
556
|
cl = CUDACodeLibrary(codegen, name)
|
544
|
-
with self.assertRaisesRegex(RuntimeError,
|
557
|
+
with self.assertRaisesRegex(RuntimeError, "Cannot pickle unfinalized"):
|
545
558
|
cl._reduce_states()
|
@@ -4,8 +4,7 @@ from numba.cuda import compile_ptx
|
|
4
4
|
from numba.core.types import f2, i1, i2, i4, i8, u1, u2, u4, u8
|
5
5
|
from numba import cuda
|
6
6
|
from numba.core import types
|
7
|
-
from numba.cuda.testing import
|
8
|
-
skip_unless_cc_53)
|
7
|
+
from numba.cuda.testing import CUDATestCase, skip_on_cudasim, skip_unless_cc_53
|
9
8
|
from numba.types import float16, float32
|
10
9
|
import itertools
|
11
10
|
import unittest
|
@@ -50,7 +49,7 @@ def to_uint64(x):
|
|
50
49
|
def to_float16(x):
|
51
50
|
# When division and operators on float16 types are supported, this should
|
52
51
|
# be changed to match the implementation in to_float32.
|
53
|
-
return
|
52
|
+
return np.float16(x) * np.float16(0.5)
|
54
53
|
|
55
54
|
|
56
55
|
def to_float32(x):
|
@@ -76,6 +75,7 @@ def to_complex128(x):
|
|
76
75
|
# - The device version uses cuda.fp16.hmul
|
77
76
|
# - The host version uses the * operator
|
78
77
|
|
78
|
+
|
79
79
|
def cuda_int_literal_to_float16(x):
|
80
80
|
# Note that we need to use `2` and not `np.float16(2)` to ensure that this
|
81
81
|
# types as a literal int and not a const float16.
|
@@ -128,7 +128,7 @@ class TestCasting(CUDATestCase):
|
|
128
128
|
self.assertEqual(cfunc(-12.3), pyfunc(-12.3))
|
129
129
|
self.assertEqual(cfunc(-12.3), int(-12.3))
|
130
130
|
|
131
|
-
@skip_on_cudasim(
|
131
|
+
@skip_on_cudasim("Compilation unsupported in the simulator")
|
132
132
|
def test_float16_to_int_ptx(self):
|
133
133
|
pyfuncs = (to_int8, to_int16, to_int32, to_int64)
|
134
134
|
sizes = (8, 16, 32, 64)
|
@@ -150,7 +150,7 @@ class TestCasting(CUDATestCase):
|
|
150
150
|
self.assertEqual(cfunc(12.3), pyfunc(12.3))
|
151
151
|
self.assertEqual(cfunc(12.3), int(12.3))
|
152
152
|
|
153
|
-
@skip_on_cudasim(
|
153
|
+
@skip_on_cudasim("Compilation unsupported in the simulator")
|
154
154
|
def test_float16_to_uint_ptx(self):
|
155
155
|
pyfuncs = (to_uint8, to_uint16, to_uint32, to_uint64)
|
156
156
|
sizes = (8, 16, 32, 64)
|
@@ -171,17 +171,18 @@ class TestCasting(CUDATestCase):
|
|
171
171
|
|
172
172
|
@skip_unless_cc_53
|
173
173
|
def test_literal_to_float16(self):
|
174
|
-
cudafuncs = (cuda_int_literal_to_float16,
|
175
|
-
|
176
|
-
|
177
|
-
|
174
|
+
cudafuncs = (cuda_int_literal_to_float16, cuda_float_literal_to_float16)
|
175
|
+
hostfuncs = (
|
176
|
+
reference_int_literal_to_float16,
|
177
|
+
reference_float_literal_to_float16,
|
178
|
+
)
|
178
179
|
|
179
180
|
for cudafunc, hostfunc in zip(cudafuncs, hostfuncs):
|
180
181
|
with self.subTest(func=cudafunc):
|
181
182
|
cfunc = self._create_wrapped(cudafunc, np.float16, np.float16)
|
182
183
|
self.assertEqual(cfunc(321), hostfunc(321))
|
183
184
|
|
184
|
-
@skip_on_cudasim(
|
185
|
+
@skip_on_cudasim("Compilation unsupported in the simulator")
|
185
186
|
def test_int_to_float16_ptx(self):
|
186
187
|
fromtys = (i1, i2, i4, i8)
|
187
188
|
sizes = (8, 16, 32, 64)
|
@@ -190,7 +191,7 @@ class TestCasting(CUDATestCase):
|
|
190
191
|
ptx, _ = compile_ptx(to_float16, (ty,), device=True)
|
191
192
|
self.assertIn(f"cvt.rn.f16.s{size}", ptx)
|
192
193
|
|
193
|
-
@skip_on_cudasim(
|
194
|
+
@skip_on_cudasim("Compilation unsupported in the simulator")
|
194
195
|
def test_uint_to_float16_ptx(self):
|
195
196
|
fromtys = (u1, u2, u4, u8)
|
196
197
|
sizes = (8, 16, 32, 64)
|
@@ -211,12 +212,14 @@ class TestCasting(CUDATestCase):
|
|
211
212
|
# the CUDA target doesn't yet implement division (or operators)
|
212
213
|
# for float16 values, so we test by comparing with the computed
|
213
214
|
# expression instead.
|
214
|
-
np.testing.assert_allclose(
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
215
|
+
np.testing.assert_allclose(
|
216
|
+
cfunc(12.3), toty(12.3) / toty(2), rtol=0.0003
|
217
|
+
)
|
218
|
+
np.testing.assert_allclose(
|
219
|
+
cfunc(-12.3), toty(-12.3) / toty(2), rtol=0.0003
|
220
|
+
)
|
221
|
+
|
222
|
+
@skip_on_cudasim("Compilation unsupported in the simulator")
|
220
223
|
def test_float16_to_float_ptx(self):
|
221
224
|
pyfuncs = (to_float32, to_float64)
|
222
225
|
postfixes = ("f32", "f64")
|
@@ -239,12 +242,14 @@ class TestCasting(CUDATestCase):
|
|
239
242
|
# to match the casting that is automatically applied when
|
240
243
|
# passing the input to the cfunc as part of wrapping it in
|
241
244
|
# an array of type fromtype.
|
242
|
-
np.testing.assert_allclose(
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
245
|
+
np.testing.assert_allclose(
|
246
|
+
cfunc(3.21), pyfunc(fromty(3.21))
|
247
|
+
)
|
248
|
+
np.testing.assert_allclose(
|
249
|
+
cfunc(-3.21), pyfunc(fromty(-3.21)) + 0j
|
250
|
+
)
|
251
|
+
|
252
|
+
@skip_on_cudasim("Compilation unsupported in the simulator")
|
248
253
|
def test_native_cast(self):
|
249
254
|
float32_ptx, _ = cuda.compile_ptx(native_cast, (float32,), device=True)
|
250
255
|
self.assertIn("st.f32", float32_ptx)
|
@@ -253,5 +258,5 @@ class TestCasting(CUDATestCase):
|
|
253
258
|
self.assertIn("st.u16", float16_ptx)
|
254
259
|
|
255
260
|
|
256
|
-
if __name__ ==
|
261
|
+
if __name__ == "__main__":
|
257
262
|
unittest.main()
|
@@ -1,21 +1,26 @@
|
|
1
1
|
import numpy as np
|
2
2
|
|
3
3
|
from numba import cuda, types
|
4
|
-
from numba.cuda.testing import (
|
5
|
-
|
4
|
+
from numba.cuda.testing import (
|
5
|
+
skip_on_cudasim,
|
6
|
+
test_data_dir,
|
7
|
+
unittest,
|
8
|
+
CUDATestCase,
|
9
|
+
)
|
6
10
|
from numba.tests.support import skip_unless_cffi
|
7
11
|
|
8
12
|
|
9
13
|
@skip_unless_cffi
|
10
|
-
@skip_on_cudasim(
|
14
|
+
@skip_on_cudasim("Simulator does not support linking")
|
11
15
|
class TestCFFI(CUDATestCase):
|
12
16
|
def test_from_buffer(self):
|
13
17
|
import cffi
|
18
|
+
|
14
19
|
ffi = cffi.FFI()
|
15
20
|
|
16
|
-
link = str(test_data_dir /
|
21
|
+
link = str(test_data_dir / "jitlink.ptx")
|
17
22
|
sig = types.void(types.CPointer(types.int32))
|
18
|
-
array_mutator = cuda.declare_device(
|
23
|
+
array_mutator = cuda.declare_device("array_mutator", sig)
|
19
24
|
|
20
25
|
@cuda.jit(link=[link])
|
21
26
|
def mutate_array(x):
|
@@ -29,5 +34,5 @@ class TestCFFI(CUDATestCase):
|
|
29
34
|
self.assertEqual(x[0], x[1])
|
30
35
|
|
31
36
|
|
32
|
-
if __name__ ==
|
37
|
+
if __name__ == "__main__":
|
33
38
|
unittest.main()
|