numba-cuda 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. _numba_cuda_redirector.py +17 -13
  2. numba_cuda/VERSION +1 -1
  3. numba_cuda/_version.py +4 -1
  4. numba_cuda/numba/cuda/__init__.py +6 -2
  5. numba_cuda/numba/cuda/api.py +129 -86
  6. numba_cuda/numba/cuda/api_util.py +3 -3
  7. numba_cuda/numba/cuda/args.py +12 -16
  8. numba_cuda/numba/cuda/cg.py +6 -6
  9. numba_cuda/numba/cuda/codegen.py +74 -43
  10. numba_cuda/numba/cuda/compiler.py +232 -113
  11. numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
  12. numba_cuda/numba/cuda/cuda_fp16.h +661 -661
  13. numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
  14. numba_cuda/numba/cuda/cuda_paths.py +291 -99
  15. numba_cuda/numba/cuda/cudadecl.py +125 -69
  16. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
  17. numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
  18. numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
  19. numba_cuda/numba/cuda/cudadrv/driver.py +463 -297
  20. numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
  21. numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
  22. numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
  23. numba_cuda/numba/cuda/cudadrv/error.py +6 -2
  24. numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
  25. numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
  26. numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
  27. numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
  28. numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
  29. numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
  30. numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
  31. numba_cuda/numba/cuda/cudaimpl.py +317 -233
  32. numba_cuda/numba/cuda/cudamath.py +1 -1
  33. numba_cuda/numba/cuda/debuginfo.py +8 -6
  34. numba_cuda/numba/cuda/decorators.py +75 -45
  35. numba_cuda/numba/cuda/descriptor.py +1 -1
  36. numba_cuda/numba/cuda/device_init.py +69 -18
  37. numba_cuda/numba/cuda/deviceufunc.py +143 -98
  38. numba_cuda/numba/cuda/dispatcher.py +300 -213
  39. numba_cuda/numba/cuda/errors.py +13 -10
  40. numba_cuda/numba/cuda/extending.py +1 -1
  41. numba_cuda/numba/cuda/initialize.py +5 -3
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
  43. numba_cuda/numba/cuda/intrinsics.py +31 -27
  44. numba_cuda/numba/cuda/kernels/reduction.py +13 -13
  45. numba_cuda/numba/cuda/kernels/transpose.py +3 -6
  46. numba_cuda/numba/cuda/libdevice.py +317 -317
  47. numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
  48. numba_cuda/numba/cuda/locks.py +16 -0
  49. numba_cuda/numba/cuda/mathimpl.py +62 -57
  50. numba_cuda/numba/cuda/models.py +1 -5
  51. numba_cuda/numba/cuda/nvvmutils.py +103 -88
  52. numba_cuda/numba/cuda/printimpl.py +9 -5
  53. numba_cuda/numba/cuda/random.py +46 -36
  54. numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
  55. numba_cuda/numba/cuda/runtime/__init__.py +1 -1
  56. numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
  57. numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
  58. numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
  59. numba_cuda/numba/cuda/runtime/nrt.py +48 -43
  60. numba_cuda/numba/cuda/simulator/__init__.py +22 -12
  61. numba_cuda/numba/cuda/simulator/api.py +38 -22
  62. numba_cuda/numba/cuda/simulator/compiler.py +2 -2
  63. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
  64. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
  65. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
  66. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
  67. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
  68. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
  69. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
  70. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
  71. numba_cuda/numba/cuda/simulator/kernel.py +43 -34
  72. numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
  73. numba_cuda/numba/cuda/simulator/reduction.py +1 -0
  74. numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
  75. numba_cuda/numba/cuda/simulator_init.py +2 -4
  76. numba_cuda/numba/cuda/stubs.py +139 -102
  77. numba_cuda/numba/cuda/target.py +64 -47
  78. numba_cuda/numba/cuda/testing.py +24 -19
  79. numba_cuda/numba/cuda/tests/__init__.py +14 -12
  80. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
  81. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
  88. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
  89. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
  90. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
  91. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
  92. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
  93. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
  94. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
  95. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
  98. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
  100. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
  101. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
  102. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
  103. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
  104. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
  105. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
  107. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
  109. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
  110. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
  111. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
  112. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
  113. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
  115. numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
  117. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
  118. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
  119. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
  120. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
  121. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
  122. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
  123. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
  124. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
  126. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
  127. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
  128. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
  129. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
  131. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
  132. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
  133. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
  134. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
  135. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
  136. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
  137. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
  138. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
  139. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
  140. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
  141. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
  142. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
  143. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
  144. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
  145. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
  148. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
  149. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
  150. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
  151. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
  152. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
  153. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
  154. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
  155. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
  156. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
  157. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
  158. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
  159. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
  160. numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
  161. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
  162. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
  163. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
  164. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
  165. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
  166. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
  167. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
  168. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
  169. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
  170. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
  171. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
  172. numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
  173. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
  174. numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
  175. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
  176. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
  177. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
  178. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
  179. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
  180. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
  182. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
  183. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
  184. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
  185. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
  186. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
  187. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
  188. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
  192. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
  193. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
  194. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
  195. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
  197. numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
  198. numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
  199. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
  200. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
  201. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
  202. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
  203. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
  204. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
  206. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
  207. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
  208. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
  209. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
  210. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
  211. numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
  212. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
  213. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
  214. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
  215. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
  216. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
  217. numba_cuda/numba/cuda/types.py +5 -2
  218. numba_cuda/numba/cuda/ufuncs.py +382 -362
  219. numba_cuda/numba/cuda/utils.py +2 -2
  220. numba_cuda/numba/cuda/vector_types.py +2 -2
  221. numba_cuda/numba/cuda/vectorizers.py +37 -32
  222. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
  223. numba_cuda-0.9.0.dist-info/RECORD +253 -0
  224. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
  225. numba_cuda-0.8.0.dist-info/RECORD +0 -251
  226. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
  227. {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -17,13 +17,23 @@ RSQRT2PI = 0.39894228040143267793994605993438
17
17
 
18
18
  def cnd(d):
19
19
  K = 1.0 / (1.0 + 0.2316419 * np.abs(d))
20
- ret_val = (RSQRT2PI * np.exp(-0.5 * d * d) *
21
- (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5))))))
20
+ ret_val = (
21
+ RSQRT2PI
22
+ * np.exp(-0.5 * d * d)
23
+ * (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))))
24
+ )
22
25
  return np.where(d > 0, 1.0 - ret_val, ret_val)
23
26
 
24
27
 
25
- def black_scholes(callResult, putResult, stockPrice, optionStrike, optionYears,
26
- Riskfree, Volatility):
28
+ def black_scholes(
29
+ callResult,
30
+ putResult,
31
+ stockPrice,
32
+ optionStrike,
33
+ optionYears,
34
+ Riskfree,
35
+ Volatility,
36
+ ):
27
37
  S = stockPrice
28
38
  X = optionStrike
29
39
  T = optionYears
@@ -35,9 +45,9 @@ def black_scholes(callResult, putResult, stockPrice, optionStrike, optionYears,
35
45
  cndd1 = cnd(d1)
36
46
  cndd2 = cnd(d2)
37
47
 
38
- expRT = np.exp(- R * T)
39
- callResult[:] = (S * cndd1 - X * expRT * cndd2)
40
- putResult[:] = (X * expRT * (1.0 - cndd2) - S * (1.0 - cndd1))
48
+ expRT = np.exp(-R * T)
49
+ callResult[:] = S * cndd1 - X * expRT * cndd2
50
+ putResult[:] = X * expRT * (1.0 - cndd2) - S * (1.0 - cndd1)
41
51
 
42
52
 
43
53
  def randfloat(rand_var, low, high):
@@ -61,34 +71,54 @@ class TestBlackScholes(CUDATestCase):
61
71
 
62
72
  # numpy
63
73
  for i in range(iterations):
64
- black_scholes(callResultNumpy, putResultNumpy, stockPrice,
65
- optionStrike, optionYears, RISKFREE, VOLATILITY)
74
+ black_scholes(
75
+ callResultNumpy,
76
+ putResultNumpy,
77
+ stockPrice,
78
+ optionStrike,
79
+ optionYears,
80
+ RISKFREE,
81
+ VOLATILITY,
82
+ )
66
83
 
67
84
  @cuda.jit(double(double), device=True, inline=True)
68
85
  def cnd_cuda(d):
69
86
  K = 1.0 / (1.0 + 0.2316419 * math.fabs(d))
70
- ret_val = (RSQRT2PI * math.exp(-0.5 * d * d) *
71
- (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5))))))
87
+ ret_val = (
88
+ RSQRT2PI
89
+ * math.exp(-0.5 * d * d)
90
+ * (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))))
91
+ )
72
92
  if d > 0:
73
93
  ret_val = 1.0 - ret_val
74
94
  return ret_val
75
95
 
76
- @cuda.jit(void(double[:], double[:], double[:], double[:], double[:],
77
- double, double))
96
+ @cuda.jit(
97
+ void(
98
+ double[:],
99
+ double[:],
100
+ double[:],
101
+ double[:],
102
+ double[:],
103
+ double,
104
+ double,
105
+ )
106
+ )
78
107
  def black_scholes_cuda(callResult, putResult, S, X, T, R, V):
79
108
  i = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x
80
109
  if i >= S.shape[0]:
81
110
  return
82
111
  sqrtT = math.sqrt(T[i])
83
- d1 = ((math.log(S[i] / X[i]) + (R + 0.5 * V * V) * T[i])
84
- / (V * sqrtT))
112
+ d1 = (math.log(S[i] / X[i]) + (R + 0.5 * V * V) * T[i]) / (
113
+ V * sqrtT
114
+ )
85
115
  d2 = d1 - V * sqrtT
86
116
  cndd1 = cnd_cuda(d1)
87
117
  cndd2 = cnd_cuda(d2)
88
118
 
89
- expRT = math.exp((-1. * R) * T[i])
90
- callResult[i] = (S[i] * cndd1 - X[i] * expRT * cndd2)
91
- putResult[i] = (X[i] * expRT * (1.0 - cndd2) - S[i] * (1.0 - cndd1))
119
+ expRT = math.exp((-1.0 * R) * T[i])
120
+ callResult[i] = S[i] * cndd1 - X[i] * expRT * cndd2
121
+ putResult[i] = X[i] * expRT * (1.0 - cndd2) - S[i] * (1.0 - cndd1)
92
122
 
93
123
  # numba
94
124
  blockdim = 512, 1
@@ -102,8 +132,14 @@ class TestBlackScholes(CUDATestCase):
102
132
 
103
133
  for i in range(iterations):
104
134
  black_scholes_cuda[griddim, blockdim, stream](
105
- d_callResult, d_putResult, d_stockPrice, d_optionStrike,
106
- d_optionYears, RISKFREE, VOLATILITY)
135
+ d_callResult,
136
+ d_putResult,
137
+ d_stockPrice,
138
+ d_optionStrike,
139
+ d_optionYears,
140
+ RISKFREE,
141
+ VOLATILITY,
142
+ )
107
143
  d_callResult.copy_to_host(callResultNumba, stream)
108
144
  d_putResult.copy_to_host(putResultNumba, stream)
109
145
  stream.synchronize()
@@ -116,5 +152,5 @@ class TestBlackScholes(CUDATestCase):
116
152
  self.assertTrue(max_abs_err < 1e-13)
117
153
 
118
154
 
119
- if __name__ == '__main__':
155
+ if __name__ == "__main__":
120
156
  unittest.main()
@@ -12,13 +12,13 @@ def boolean_func(A, vertial):
12
12
 
13
13
  class TestCudaBoolean(CUDATestCase):
14
14
  def test_boolean(self):
15
- func = cuda.jit('void(float64[:], bool_)')(boolean_func)
16
- A = np.array([0], dtype='float64')
15
+ func = cuda.jit("void(float64[:], bool_)")(boolean_func)
16
+ A = np.array([0], dtype="float64")
17
17
  func[1, 1](A, True)
18
18
  self.assertTrue(A[0] == 123)
19
19
  func[1, 1](A, False)
20
20
  self.assertTrue(A[0] == 321)
21
21
 
22
22
 
23
- if __name__ == '__main__':
23
+ if __name__ == "__main__":
24
24
  unittest.main()
@@ -8,15 +8,22 @@ import warnings
8
8
 
9
9
  from numba import cuda
10
10
  from numba.core.errors import NumbaWarning
11
- from numba.cuda.testing import (CUDATestCase, skip_on_cudasim,
12
- skip_unless_cc_60, skip_if_cudadevrt_missing,
13
- skip_if_mvc_enabled, test_data_dir)
11
+ from numba.cuda.testing import (
12
+ CUDATestCase,
13
+ skip_on_cudasim,
14
+ skip_unless_cc_60,
15
+ skip_if_cudadevrt_missing,
16
+ skip_if_mvc_enabled,
17
+ test_data_dir,
18
+ )
14
19
  from numba.tests.support import SerialMixin
15
- from numba.tests.test_caching import (DispatcherCacheUsecasesTest,
16
- skip_bad_access)
20
+ from numba.tests.test_caching import (
21
+ DispatcherCacheUsecasesTest,
22
+ skip_bad_access,
23
+ )
17
24
 
18
25
 
19
- @skip_on_cudasim('Simulator does not implement caching')
26
+ @skip_on_cudasim("Simulator does not implement caching")
20
27
  class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
21
28
  here = os.path.dirname(__file__)
22
29
  usecases_file = os.path.join(here, "cache_usecases.py")
@@ -72,23 +79,23 @@ class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
72
79
  mod = self.import_module()
73
80
  f = mod.many_locals
74
81
  f[1, 1]()
75
- self.check_pycache(2) # 1 index, 1 data
82
+ self.check_pycache(2) # 1 index, 1 data
76
83
 
77
84
  def test_closure(self):
78
85
  mod = self.import_module()
79
86
 
80
87
  with warnings.catch_warnings():
81
- warnings.simplefilter('error', NumbaWarning)
88
+ warnings.simplefilter("error", NumbaWarning)
82
89
 
83
90
  f = mod.closure1
84
- self.assertPreciseEqual(f(3), 6) # 3 + 3 = 6
91
+ self.assertPreciseEqual(f(3), 6) # 3 + 3 = 6
85
92
  f = mod.closure2
86
- self.assertPreciseEqual(f(3), 8) # 3 + 5 = 8
93
+ self.assertPreciseEqual(f(3), 8) # 3 + 5 = 8
87
94
  f = mod.closure3
88
- self.assertPreciseEqual(f(3), 10) # 3 + 7 = 10
95
+ self.assertPreciseEqual(f(3), 10) # 3 + 7 = 10
89
96
  f = mod.closure4
90
- self.assertPreciseEqual(f(3), 12) # 3 + 9 = 12
91
- self.check_pycache(5) # 1 nbi, 4 nbc
97
+ self.assertPreciseEqual(f(3), 12) # 3 + 9 = 12
98
+ self.check_pycache(5) # 1 nbi, 4 nbc
92
99
 
93
100
  def test_cache_reuse(self):
94
101
  mod = self.import_module()
@@ -158,7 +165,7 @@ class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
158
165
 
159
166
  @skip_unless_cc_60
160
167
  @skip_if_cudadevrt_missing
161
- @skip_if_mvc_enabled('CG not supported with MVC')
168
+ @skip_if_mvc_enabled("CG not supported with MVC")
162
169
  def test_cache_cg(self):
163
170
  # Functions using cooperative groups should be cacheable. See Issue
164
171
  # #8888: https://github.com/numba/numba/issues/8888
@@ -174,7 +181,7 @@ class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
174
181
 
175
182
  @skip_unless_cc_60
176
183
  @skip_if_cudadevrt_missing
177
- @skip_if_mvc_enabled('CG not supported with MVC')
184
+ @skip_if_mvc_enabled("CG not supported with MVC")
178
185
  def test_cache_cg_clean_run(self):
179
186
  # See Issue #9432: https://github.com/numba/numba/issues/9432
180
187
  # If a cached function using CG sync was the first thing to compile,
@@ -191,9 +198,11 @@ class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
191
198
  mod.cg_usecase(0)
192
199
  """ % dict(tempdir=self.tempdir, modname=self.modname)
193
200
 
194
- popen = subprocess.Popen([sys.executable, "-c", code],
195
- stdout=subprocess.PIPE,
196
- stderr=subprocess.PIPE)
201
+ popen = subprocess.Popen(
202
+ [sys.executable, "-c", code],
203
+ stdout=subprocess.PIPE,
204
+ stderr=subprocess.PIPE,
205
+ )
197
206
  out, err = popen.communicate(timeout=60)
198
207
  if popen.returncode != 0:
199
208
  raise AssertionError(
@@ -212,8 +221,9 @@ class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
212
221
  f = mod.add_usecase
213
222
  # Remove this function's cache files at the end, to avoid accumulation
214
223
  # across test calls.
215
- self.addCleanup(shutil.rmtree, f.func.stats.cache_path,
216
- ignore_errors=True)
224
+ self.addCleanup(
225
+ shutil.rmtree, f.func.stats.cache_path, ignore_errors=True
226
+ )
217
227
 
218
228
  self.assertPreciseEqual(f(2, 3), 6)
219
229
  # It's a cache miss since the file was copied to a new temp location
@@ -230,8 +240,9 @@ class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
230
240
  self.check_pycache(0)
231
241
 
232
242
  @skip_bad_access
233
- @unittest.skipIf(os.name == "nt",
234
- "cannot easily make a directory read-only on Windows")
243
+ @unittest.skipIf(
244
+ os.name == "nt", "cannot easily make a directory read-only on Windows"
245
+ )
235
246
  def test_non_creatable_pycache(self):
236
247
  # Make it impossible to create the __pycache__ directory
237
248
  old_perms = os.stat(self.tempdir).st_mode
@@ -241,11 +252,12 @@ class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
241
252
  self._test_pycache_fallback()
242
253
 
243
254
  @skip_bad_access
244
- @unittest.skipIf(os.name == "nt",
245
- "cannot easily make a directory read-only on Windows")
255
+ @unittest.skipIf(
256
+ os.name == "nt", "cannot easily make a directory read-only on Windows"
257
+ )
246
258
  def test_non_writable_pycache(self):
247
259
  # Make it impossible to write to the __pycache__ directory
248
- pycache = os.path.join(self.tempdir, '__pycache__')
260
+ pycache = os.path.join(self.tempdir, "__pycache__")
249
261
  os.mkdir(pycache)
250
262
  old_perms = os.stat(pycache).st_mode
251
263
  os.chmod(pycache, 0o500)
@@ -254,15 +266,16 @@ class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
254
266
  self._test_pycache_fallback()
255
267
 
256
268
  def test_cannot_cache_linking_libraries(self):
257
- link = str(test_data_dir / 'jitlink.ptx')
258
- msg = 'Cannot pickle CUDACodeLibrary with linking files'
269
+ link = str(test_data_dir / "jitlink.ptx")
270
+ msg = "Cannot pickle CUDACodeLibrary with linking files"
259
271
  with self.assertRaisesRegex(RuntimeError, msg):
260
- @cuda.jit('void()', cache=True, link=[link])
272
+
273
+ @cuda.jit("void()", cache=True, link=[link])
261
274
  def f():
262
275
  pass
263
276
 
264
277
 
265
- @skip_on_cudasim('Simulator does not implement caching')
278
+ @skip_on_cudasim("Simulator does not implement caching")
266
279
  class CUDAAndCPUCachingTest(SerialMixin, DispatcherCacheUsecasesTest):
267
280
  here = os.path.dirname(__file__)
268
281
  usecases_file = os.path.join(here, "cache_with_cpu_usecases.py")
@@ -353,7 +366,7 @@ def get_different_cc_gpus():
353
366
  return None
354
367
 
355
368
 
356
- @skip_on_cudasim('Simulator does not implement caching')
369
+ @skip_on_cudasim("Simulator does not implement caching")
357
370
  class TestMultiCCCaching(SerialMixin, DispatcherCacheUsecasesTest):
358
371
  here = os.path.dirname(__file__)
359
372
  usecases_file = os.path.join(here, "cache_usecases.py")
@@ -370,7 +383,7 @@ class TestMultiCCCaching(SerialMixin, DispatcherCacheUsecasesTest):
370
383
  def test_cache(self):
371
384
  gpus = get_different_cc_gpus()
372
385
  if not gpus:
373
- self.skipTest('Need two different CCs for multi-CC cache test')
386
+ self.skipTest("Need two different CCs for multi-CC cache test")
374
387
 
375
388
  self.check_pycache(0)
376
389
  mod = self.import_module()
@@ -482,13 +495,13 @@ def child_initializer():
482
495
  # Disable occupancy and implicit copy warnings in processes in a
483
496
  # multiprocessing pool.
484
497
  from numba.core import config
498
+
485
499
  config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
486
500
  config.CUDA_WARN_ON_IMPLICIT_COPY = 0
487
501
 
488
502
 
489
- @skip_on_cudasim('Simulator does not implement caching')
503
+ @skip_on_cudasim("Simulator does not implement caching")
490
504
  class TestMultiprocessCache(SerialMixin, DispatcherCacheUsecasesTest):
491
-
492
505
  # Nested multiprocessing.Pool raises AssertionError:
493
506
  # "daemonic processes are not allowed to have children"
494
507
  _numba_parallel_test_ = False
@@ -513,7 +526,7 @@ class TestMultiprocessCache(SerialMixin, DispatcherCacheUsecasesTest):
513
526
  f = mod.simple_usecase_caller
514
527
  n = 3
515
528
  try:
516
- ctx = multiprocessing.get_context('spawn')
529
+ ctx = multiprocessing.get_context("spawn")
517
530
  except AttributeError:
518
531
  ctx = multiprocessing
519
532
 
@@ -526,7 +539,7 @@ class TestMultiprocessCache(SerialMixin, DispatcherCacheUsecasesTest):
526
539
  self.assertEqual(res, n * (n - 1) // 2)
527
540
 
528
541
 
529
- @skip_on_cudasim('Simulator does not implement the CUDACodeLibrary')
542
+ @skip_on_cudasim("Simulator does not implement the CUDACodeLibrary")
530
543
  class TestCUDACodeLibrary(CUDATestCase):
531
544
  # For tests of miscellaneous CUDACodeLibrary behaviour that we wish to
532
545
  # explicitly check
@@ -539,7 +552,7 @@ class TestCUDACodeLibrary(CUDATestCase):
539
552
  # Usually a CodeLibrary requires a real CodeGen, but since we don't
540
553
  # interact with it, anything will do
541
554
  codegen = object()
542
- name = 'library'
555
+ name = "library"
543
556
  cl = CUDACodeLibrary(codegen, name)
544
- with self.assertRaisesRegex(RuntimeError, 'Cannot pickle unfinalized'):
557
+ with self.assertRaisesRegex(RuntimeError, "Cannot pickle unfinalized"):
545
558
  cl._reduce_states()
@@ -4,8 +4,7 @@ from numba.cuda import compile_ptx
4
4
  from numba.core.types import f2, i1, i2, i4, i8, u1, u2, u4, u8
5
5
  from numba import cuda
6
6
  from numba.core import types
7
- from numba.cuda.testing import (CUDATestCase, skip_on_cudasim,
8
- skip_unless_cc_53)
7
+ from numba.cuda.testing import CUDATestCase, skip_on_cudasim, skip_unless_cc_53
9
8
  from numba.types import float16, float32
10
9
  import itertools
11
10
  import unittest
@@ -50,7 +49,7 @@ def to_uint64(x):
50
49
  def to_float16(x):
51
50
  # When division and operators on float16 types are supported, this should
52
51
  # be changed to match the implementation in to_float32.
53
- return (np.float16(x) * np.float16(0.5))
52
+ return np.float16(x) * np.float16(0.5)
54
53
 
55
54
 
56
55
  def to_float32(x):
@@ -76,6 +75,7 @@ def to_complex128(x):
76
75
  # - The device version uses cuda.fp16.hmul
77
76
  # - The host version uses the * operator
78
77
 
78
+
79
79
  def cuda_int_literal_to_float16(x):
80
80
  # Note that we need to use `2` and not `np.float16(2)` to ensure that this
81
81
  # types as a literal int and not a const float16.
@@ -128,7 +128,7 @@ class TestCasting(CUDATestCase):
128
128
  self.assertEqual(cfunc(-12.3), pyfunc(-12.3))
129
129
  self.assertEqual(cfunc(-12.3), int(-12.3))
130
130
 
131
- @skip_on_cudasim('Compilation unsupported in the simulator')
131
+ @skip_on_cudasim("Compilation unsupported in the simulator")
132
132
  def test_float16_to_int_ptx(self):
133
133
  pyfuncs = (to_int8, to_int16, to_int32, to_int64)
134
134
  sizes = (8, 16, 32, 64)
@@ -150,7 +150,7 @@ class TestCasting(CUDATestCase):
150
150
  self.assertEqual(cfunc(12.3), pyfunc(12.3))
151
151
  self.assertEqual(cfunc(12.3), int(12.3))
152
152
 
153
- @skip_on_cudasim('Compilation unsupported in the simulator')
153
+ @skip_on_cudasim("Compilation unsupported in the simulator")
154
154
  def test_float16_to_uint_ptx(self):
155
155
  pyfuncs = (to_uint8, to_uint16, to_uint32, to_uint64)
156
156
  sizes = (8, 16, 32, 64)
@@ -171,17 +171,18 @@ class TestCasting(CUDATestCase):
171
171
 
172
172
  @skip_unless_cc_53
173
173
  def test_literal_to_float16(self):
174
- cudafuncs = (cuda_int_literal_to_float16,
175
- cuda_float_literal_to_float16)
176
- hostfuncs = (reference_int_literal_to_float16,
177
- reference_float_literal_to_float16)
174
+ cudafuncs = (cuda_int_literal_to_float16, cuda_float_literal_to_float16)
175
+ hostfuncs = (
176
+ reference_int_literal_to_float16,
177
+ reference_float_literal_to_float16,
178
+ )
178
179
 
179
180
  for cudafunc, hostfunc in zip(cudafuncs, hostfuncs):
180
181
  with self.subTest(func=cudafunc):
181
182
  cfunc = self._create_wrapped(cudafunc, np.float16, np.float16)
182
183
  self.assertEqual(cfunc(321), hostfunc(321))
183
184
 
184
- @skip_on_cudasim('Compilation unsupported in the simulator')
185
+ @skip_on_cudasim("Compilation unsupported in the simulator")
185
186
  def test_int_to_float16_ptx(self):
186
187
  fromtys = (i1, i2, i4, i8)
187
188
  sizes = (8, 16, 32, 64)
@@ -190,7 +191,7 @@ class TestCasting(CUDATestCase):
190
191
  ptx, _ = compile_ptx(to_float16, (ty,), device=True)
191
192
  self.assertIn(f"cvt.rn.f16.s{size}", ptx)
192
193
 
193
- @skip_on_cudasim('Compilation unsupported in the simulator')
194
+ @skip_on_cudasim("Compilation unsupported in the simulator")
194
195
  def test_uint_to_float16_ptx(self):
195
196
  fromtys = (u1, u2, u4, u8)
196
197
  sizes = (8, 16, 32, 64)
@@ -211,12 +212,14 @@ class TestCasting(CUDATestCase):
211
212
  # the CUDA target doesn't yet implement division (or operators)
212
213
  # for float16 values, so we test by comparing with the computed
213
214
  # expression instead.
214
- np.testing.assert_allclose(cfunc(12.3),
215
- toty(12.3) / toty(2), rtol=0.0003)
216
- np.testing.assert_allclose(cfunc(-12.3),
217
- toty(-12.3) / toty(2), rtol=0.0003)
218
-
219
- @skip_on_cudasim('Compilation unsupported in the simulator')
215
+ np.testing.assert_allclose(
216
+ cfunc(12.3), toty(12.3) / toty(2), rtol=0.0003
217
+ )
218
+ np.testing.assert_allclose(
219
+ cfunc(-12.3), toty(-12.3) / toty(2), rtol=0.0003
220
+ )
221
+
222
+ @skip_on_cudasim("Compilation unsupported in the simulator")
220
223
  def test_float16_to_float_ptx(self):
221
224
  pyfuncs = (to_float32, to_float64)
222
225
  postfixes = ("f32", "f64")
@@ -239,12 +242,14 @@ class TestCasting(CUDATestCase):
239
242
  # to match the casting that is automatically applied when
240
243
  # passing the input to the cfunc as part of wrapping it in
241
244
  # an array of type fromtype.
242
- np.testing.assert_allclose(cfunc(3.21),
243
- pyfunc(fromty(3.21)))
244
- np.testing.assert_allclose(cfunc(-3.21),
245
- pyfunc(fromty(-3.21)) + 0j)
246
-
247
- @skip_on_cudasim('Compilation unsupported in the simulator')
245
+ np.testing.assert_allclose(
246
+ cfunc(3.21), pyfunc(fromty(3.21))
247
+ )
248
+ np.testing.assert_allclose(
249
+ cfunc(-3.21), pyfunc(fromty(-3.21)) + 0j
250
+ )
251
+
252
+ @skip_on_cudasim("Compilation unsupported in the simulator")
248
253
  def test_native_cast(self):
249
254
  float32_ptx, _ = cuda.compile_ptx(native_cast, (float32,), device=True)
250
255
  self.assertIn("st.f32", float32_ptx)
@@ -253,5 +258,5 @@ class TestCasting(CUDATestCase):
253
258
  self.assertIn("st.u16", float16_ptx)
254
259
 
255
260
 
256
- if __name__ == '__main__':
261
+ if __name__ == "__main__":
257
262
  unittest.main()
@@ -1,21 +1,26 @@
1
1
  import numpy as np
2
2
 
3
3
  from numba import cuda, types
4
- from numba.cuda.testing import (skip_on_cudasim, test_data_dir, unittest,
5
- CUDATestCase)
4
+ from numba.cuda.testing import (
5
+ skip_on_cudasim,
6
+ test_data_dir,
7
+ unittest,
8
+ CUDATestCase,
9
+ )
6
10
  from numba.tests.support import skip_unless_cffi
7
11
 
8
12
 
9
13
  @skip_unless_cffi
10
- @skip_on_cudasim('Simulator does not support linking')
14
+ @skip_on_cudasim("Simulator does not support linking")
11
15
  class TestCFFI(CUDATestCase):
12
16
  def test_from_buffer(self):
13
17
  import cffi
18
+
14
19
  ffi = cffi.FFI()
15
20
 
16
- link = str(test_data_dir / 'jitlink.ptx')
21
+ link = str(test_data_dir / "jitlink.ptx")
17
22
  sig = types.void(types.CPointer(types.int32))
18
- array_mutator = cuda.declare_device('array_mutator', sig)
23
+ array_mutator = cuda.declare_device("array_mutator", sig)
19
24
 
20
25
  @cuda.jit(link=[link])
21
26
  def mutate_array(x):
@@ -29,5 +34,5 @@ class TestCFFI(CUDATestCase):
29
34
  self.assertEqual(x[0], x[1])
30
35
 
31
36
 
32
- if __name__ == '__main__':
37
+ if __name__ == "__main__":
33
38
  unittest.main()