numba-cuda 0.0.0__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. _numba_cuda_redirector.pth +1 -0
  2. _numba_cuda_redirector.py +74 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +5 -0
  5. numba_cuda/_version.py +19 -0
  6. numba_cuda/numba/cuda/__init__.py +22 -0
  7. numba_cuda/numba/cuda/api.py +526 -0
  8. numba_cuda/numba/cuda/api_util.py +30 -0
  9. numba_cuda/numba/cuda/args.py +77 -0
  10. numba_cuda/numba/cuda/cg.py +62 -0
  11. numba_cuda/numba/cuda/codegen.py +378 -0
  12. numba_cuda/numba/cuda/compiler.py +422 -0
  13. numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
  14. numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
  15. numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
  16. numba_cuda/numba/cuda/cuda_paths.py +258 -0
  17. numba_cuda/numba/cuda/cudadecl.py +806 -0
  18. numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
  19. numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
  20. numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
  21. numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
  22. numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
  23. numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
  24. numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
  25. numba_cuda/numba/cuda/cudadrv/error.py +36 -0
  26. numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
  27. numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
  28. numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
  29. numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
  30. numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
  31. numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
  32. numba_cuda/numba/cuda/cudaimpl.py +1055 -0
  33. numba_cuda/numba/cuda/cudamath.py +140 -0
  34. numba_cuda/numba/cuda/decorators.py +189 -0
  35. numba_cuda/numba/cuda/descriptor.py +33 -0
  36. numba_cuda/numba/cuda/device_init.py +89 -0
  37. numba_cuda/numba/cuda/deviceufunc.py +908 -0
  38. numba_cuda/numba/cuda/dispatcher.py +1057 -0
  39. numba_cuda/numba/cuda/errors.py +59 -0
  40. numba_cuda/numba/cuda/extending.py +7 -0
  41. numba_cuda/numba/cuda/initialize.py +13 -0
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
  43. numba_cuda/numba/cuda/intrinsics.py +198 -0
  44. numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  45. numba_cuda/numba/cuda/kernels/reduction.py +262 -0
  46. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  47. numba_cuda/numba/cuda/libdevice.py +3382 -0
  48. numba_cuda/numba/cuda/libdevicedecl.py +17 -0
  49. numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
  50. numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
  51. numba_cuda/numba/cuda/mathimpl.py +448 -0
  52. numba_cuda/numba/cuda/models.py +48 -0
  53. numba_cuda/numba/cuda/nvvmutils.py +235 -0
  54. numba_cuda/numba/cuda/printimpl.py +86 -0
  55. numba_cuda/numba/cuda/random.py +292 -0
  56. numba_cuda/numba/cuda/simulator/__init__.py +38 -0
  57. numba_cuda/numba/cuda/simulator/api.py +110 -0
  58. numba_cuda/numba/cuda/simulator/compiler.py +9 -0
  59. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
  60. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
  61. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
  62. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
  63. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
  64. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
  65. numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
  66. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
  67. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
  68. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
  69. numba_cuda/numba/cuda/simulator/kernel.py +308 -0
  70. numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
  71. numba_cuda/numba/cuda/simulator/reduction.py +15 -0
  72. numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
  73. numba_cuda/numba/cuda/simulator_init.py +17 -0
  74. numba_cuda/numba/cuda/stubs.py +902 -0
  75. numba_cuda/numba/cuda/target.py +440 -0
  76. numba_cuda/numba/cuda/testing.py +202 -0
  77. numba_cuda/numba/cuda/tests/__init__.py +58 -0
  78. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
  79. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
  80. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
  81. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
  88. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
  89. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
  90. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
  91. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
  92. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
  93. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
  94. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
  95. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
  98. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
  100. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
  101. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
  102. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
  103. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
  104. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
  105. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
  106. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
  107. numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
  108. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
  109. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
  110. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
  111. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
  112. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
  113. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
  115. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
  117. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
  118. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
  120. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
  121. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
  123. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
  124. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
  126. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
  127. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
  129. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
  131. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
  132. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
  133. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
  134. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
  136. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
  137. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
  138. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
  139. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
  141. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
  143. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
  144. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
  145. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
  148. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
  149. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
  150. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
  151. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
  152. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
  153. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
  154. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
  155. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
  156. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
  157. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
  158. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
  159. numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
  161. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
  162. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
  163. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
  164. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
  165. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
  166. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
  167. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
  168. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
  169. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
  170. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
  171. numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
  172. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
  173. numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
  174. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
  175. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
  176. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
  177. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  178. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
  179. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
  180. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
  182. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
  183. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
  184. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
  185. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
  186. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
  187. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
  188. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
  192. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
  193. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
  194. numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
  195. numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
  197. numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  198. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  199. numba_cuda/numba/cuda/tests/data/error.cu +7 -0
  200. numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
  201. numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
  202. numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
  203. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
  204. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
  206. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
  207. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
  208. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
  209. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
  210. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
  211. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
  212. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
  213. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
  214. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
  216. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
  217. numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
  218. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
  219. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
  220. numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
  221. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
  222. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
  223. numba_cuda/numba/cuda/types.py +37 -0
  224. numba_cuda/numba/cuda/ufuncs.py +662 -0
  225. numba_cuda/numba/cuda/vector_types.py +209 -0
  226. numba_cuda/numba/cuda/vectorizers.py +252 -0
  227. numba_cuda-0.0.12.dist-info/LICENSE +25 -0
  228. numba_cuda-0.0.12.dist-info/METADATA +68 -0
  229. numba_cuda-0.0.12.dist-info/RECORD +231 -0
  230. {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
  231. numba_cuda-0.0.0.dist-info/METADATA +0 -6
  232. numba_cuda-0.0.0.dist-info/RECORD +0 -5
  233. {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,120 @@
1
+ import numpy as np
2
+ import math
3
+ from numba import cuda, double, void
4
+ from numba.cuda.testing import unittest, CUDATestCase
5
+
6
+
7
+ RISKFREE = 0.02
8
+ VOLATILITY = 0.30
9
+
10
+ A1 = 0.31938153
11
+ A2 = -0.356563782
12
+ A3 = 1.781477937
13
+ A4 = -1.821255978
14
+ A5 = 1.330274429
15
+ RSQRT2PI = 0.39894228040143267793994605993438
16
+
17
+
18
+ def cnd(d):
19
+ K = 1.0 / (1.0 + 0.2316419 * np.abs(d))
20
+ ret_val = (RSQRT2PI * np.exp(-0.5 * d * d) *
21
+ (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5))))))
22
+ return np.where(d > 0, 1.0 - ret_val, ret_val)
23
+
24
+
25
+ def black_scholes(callResult, putResult, stockPrice, optionStrike, optionYears,
26
+ Riskfree, Volatility):
27
+ S = stockPrice
28
+ X = optionStrike
29
+ T = optionYears
30
+ R = Riskfree
31
+ V = Volatility
32
+ sqrtT = np.sqrt(T)
33
+ d1 = (np.log(S / X) + (R + 0.5 * V * V) * T) / (V * sqrtT)
34
+ d2 = d1 - V * sqrtT
35
+ cndd1 = cnd(d1)
36
+ cndd2 = cnd(d2)
37
+
38
+ expRT = np.exp(- R * T)
39
+ callResult[:] = (S * cndd1 - X * expRT * cndd2)
40
+ putResult[:] = (X * expRT * (1.0 - cndd2) - S * (1.0 - cndd1))
41
+
42
+
43
+ def randfloat(rand_var, low, high):
44
+ return (1.0 - rand_var) * low + rand_var * high
45
+
46
+
47
+ class TestBlackScholes(CUDATestCase):
48
+ def test_blackscholes(self):
49
+ OPT_N = 400
50
+ iterations = 2
51
+
52
+ stockPrice = randfloat(np.random.random(OPT_N), 5.0, 30.0)
53
+ optionStrike = randfloat(np.random.random(OPT_N), 1.0, 100.0)
54
+ optionYears = randfloat(np.random.random(OPT_N), 0.25, 10.0)
55
+
56
+ callResultNumpy = np.zeros(OPT_N)
57
+ putResultNumpy = -np.ones(OPT_N)
58
+
59
+ callResultNumba = np.zeros(OPT_N)
60
+ putResultNumba = -np.ones(OPT_N)
61
+
62
+ # numpy
63
+ for i in range(iterations):
64
+ black_scholes(callResultNumpy, putResultNumpy, stockPrice,
65
+ optionStrike, optionYears, RISKFREE, VOLATILITY)
66
+
67
+ @cuda.jit(double(double), device=True, inline=True)
68
+ def cnd_cuda(d):
69
+ K = 1.0 / (1.0 + 0.2316419 * math.fabs(d))
70
+ ret_val = (RSQRT2PI * math.exp(-0.5 * d * d) *
71
+ (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5))))))
72
+ if d > 0:
73
+ ret_val = 1.0 - ret_val
74
+ return ret_val
75
+
76
+ @cuda.jit(void(double[:], double[:], double[:], double[:], double[:],
77
+ double, double))
78
+ def black_scholes_cuda(callResult, putResult, S, X, T, R, V):
79
+ i = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x
80
+ if i >= S.shape[0]:
81
+ return
82
+ sqrtT = math.sqrt(T[i])
83
+ d1 = ((math.log(S[i] / X[i]) + (R + 0.5 * V * V) * T[i])
84
+ / (V * sqrtT))
85
+ d2 = d1 - V * sqrtT
86
+ cndd1 = cnd_cuda(d1)
87
+ cndd2 = cnd_cuda(d2)
88
+
89
+ expRT = math.exp((-1. * R) * T[i])
90
+ callResult[i] = (S[i] * cndd1 - X[i] * expRT * cndd2)
91
+ putResult[i] = (X[i] * expRT * (1.0 - cndd2) - S[i] * (1.0 - cndd1))
92
+
93
+ # numba
94
+ blockdim = 512, 1
95
+ griddim = int(math.ceil(float(OPT_N) / blockdim[0])), 1
96
+ stream = cuda.stream()
97
+ d_callResult = cuda.to_device(callResultNumba, stream)
98
+ d_putResult = cuda.to_device(putResultNumba, stream)
99
+ d_stockPrice = cuda.to_device(stockPrice, stream)
100
+ d_optionStrike = cuda.to_device(optionStrike, stream)
101
+ d_optionYears = cuda.to_device(optionYears, stream)
102
+
103
+ for i in range(iterations):
104
+ black_scholes_cuda[griddim, blockdim, stream](
105
+ d_callResult, d_putResult, d_stockPrice, d_optionStrike,
106
+ d_optionYears, RISKFREE, VOLATILITY)
107
+ d_callResult.copy_to_host(callResultNumba, stream)
108
+ d_putResult.copy_to_host(putResultNumba, stream)
109
+ stream.synchronize()
110
+
111
+ delta = np.abs(callResultNumpy - callResultNumba)
112
+ L1norm = delta.sum() / np.abs(callResultNumpy).sum()
113
+
114
+ max_abs_err = delta.max()
115
+ self.assertTrue(L1norm < 1e-13)
116
+ self.assertTrue(max_abs_err < 1e-13)
117
+
118
+
119
+ if __name__ == '__main__':
120
+ unittest.main()
@@ -0,0 +1,24 @@
1
+ import numpy as np
2
+ from numba.cuda.testing import unittest, CUDATestCase
3
+ from numba import cuda
4
+
5
+
6
+ def boolean_func(A, vertial):
7
+ if vertial:
8
+ A[0] = 123
9
+ else:
10
+ A[0] = 321
11
+
12
+
13
+ class TestCudaBoolean(CUDATestCase):
14
+ def test_boolean(self):
15
+ func = cuda.jit('void(float64[:], bool_)')(boolean_func)
16
+ A = np.array([0], dtype='float64')
17
+ func[1, 1](A, True)
18
+ self.assertTrue(A[0] == 123)
19
+ func[1, 1](A, False)
20
+ self.assertTrue(A[0] == 321)
21
+
22
+
23
+ if __name__ == '__main__':
24
+ unittest.main()
@@ -0,0 +1,545 @@
1
+ import multiprocessing
2
+ import os
3
+ import shutil
4
+ import subprocess
5
+ import sys
6
+ import unittest
7
+ import warnings
8
+
9
+ from numba import cuda
10
+ from numba.core.errors import NumbaWarning
11
+ from numba.cuda.testing import (CUDATestCase, skip_on_cudasim,
12
+ skip_unless_cc_60, skip_if_cudadevrt_missing,
13
+ skip_if_mvc_enabled, test_data_dir)
14
+ from numba.tests.support import SerialMixin
15
+ from numba.tests.test_caching import (DispatcherCacheUsecasesTest,
16
+ skip_bad_access)
17
+
18
+
19
+ @skip_on_cudasim('Simulator does not implement caching')
20
+ class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
21
+ here = os.path.dirname(__file__)
22
+ usecases_file = os.path.join(here, "cache_usecases.py")
23
+ modname = "cuda_caching_test_fodder"
24
+
25
+ def setUp(self):
26
+ DispatcherCacheUsecasesTest.setUp(self)
27
+ CUDATestCase.setUp(self)
28
+
29
+ def tearDown(self):
30
+ CUDATestCase.tearDown(self)
31
+ DispatcherCacheUsecasesTest.tearDown(self)
32
+
33
+ def test_caching(self):
34
+ self.check_pycache(0)
35
+ mod = self.import_module()
36
+ self.check_pycache(0)
37
+
38
+ f = mod.add_usecase
39
+ self.assertPreciseEqual(f(2, 3), 6)
40
+ self.check_pycache(2) # 1 index, 1 data
41
+ self.assertPreciseEqual(f(2.5, 3), 6.5)
42
+ self.check_pycache(3) # 1 index, 2 data
43
+ self.check_hits(f.func, 0, 2)
44
+
45
+ f = mod.record_return_aligned
46
+ rec = f(mod.aligned_arr, 1)
47
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
48
+
49
+ f = mod.record_return_packed
50
+ rec = f(mod.packed_arr, 1)
51
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
52
+ self.check_pycache(6) # 2 index, 4 data
53
+ self.check_hits(f.func, 0, 2)
54
+
55
+ # Check the code runs ok from another process
56
+ self.run_in_separate_process()
57
+
58
+ def test_no_caching(self):
59
+ mod = self.import_module()
60
+
61
+ f = mod.add_nocache_usecase
62
+ self.assertPreciseEqual(f(2, 3), 6)
63
+ self.check_pycache(0)
64
+
65
+ def test_many_locals(self):
66
+ # Declaring many local arrays creates a very large LLVM IR, which
67
+ # cannot be pickled due to the level of recursion it requires to
68
+ # pickle. This test ensures that kernels with many locals (and
69
+ # therefore large IR) can be cached. See Issue #8373:
70
+ # https://github.com/numba/numba/issues/8373
71
+ self.check_pycache(0)
72
+ mod = self.import_module()
73
+ f = mod.many_locals
74
+ f[1, 1]()
75
+ self.check_pycache(2) # 1 index, 1 data
76
+
77
+ def test_closure(self):
78
+ mod = self.import_module()
79
+
80
+ with warnings.catch_warnings():
81
+ warnings.simplefilter('error', NumbaWarning)
82
+
83
+ f = mod.closure1
84
+ self.assertPreciseEqual(f(3), 6) # 3 + 3 = 6
85
+ f = mod.closure2
86
+ self.assertPreciseEqual(f(3), 8) # 3 + 5 = 8
87
+ f = mod.closure3
88
+ self.assertPreciseEqual(f(3), 10) # 3 + 7 = 10
89
+ f = mod.closure4
90
+ self.assertPreciseEqual(f(3), 12) # 3 + 9 = 12
91
+ self.check_pycache(5) # 1 nbi, 4 nbc
92
+
93
+ def test_cache_reuse(self):
94
+ mod = self.import_module()
95
+ mod.add_usecase(2, 3)
96
+ mod.add_usecase(2.5, 3.5)
97
+ mod.outer_uncached(2, 3)
98
+ mod.outer(2, 3)
99
+ mod.record_return_packed(mod.packed_arr, 0)
100
+ mod.record_return_aligned(mod.aligned_arr, 1)
101
+ mod.simple_usecase_caller(2)
102
+ mtimes = self.get_cache_mtimes()
103
+ # Two signatures compiled
104
+ self.check_hits(mod.add_usecase.func, 0, 2)
105
+
106
+ mod2 = self.import_module()
107
+ self.assertIsNot(mod, mod2)
108
+ f = mod2.add_usecase
109
+ f(2, 3)
110
+ self.check_hits(f.func, 1, 0)
111
+ f(2.5, 3.5)
112
+ self.check_hits(f.func, 2, 0)
113
+
114
+ # The files haven't changed
115
+ self.assertEqual(self.get_cache_mtimes(), mtimes)
116
+
117
+ self.run_in_separate_process()
118
+ self.assertEqual(self.get_cache_mtimes(), mtimes)
119
+
120
+ def test_cache_invalidate(self):
121
+ mod = self.import_module()
122
+ f = mod.add_usecase
123
+ self.assertPreciseEqual(f(2, 3), 6)
124
+
125
+ # This should change the functions' results
126
+ with open(self.modfile, "a") as f:
127
+ f.write("\nZ = 10\n")
128
+
129
+ mod = self.import_module()
130
+ f = mod.add_usecase
131
+ self.assertPreciseEqual(f(2, 3), 15)
132
+
133
+ def test_recompile(self):
134
+ # Explicit call to recompile() should overwrite the cache
135
+ mod = self.import_module()
136
+ f = mod.add_usecase
137
+ self.assertPreciseEqual(f(2, 3), 6)
138
+
139
+ mod = self.import_module()
140
+ f = mod.add_usecase
141
+ mod.Z = 10
142
+ self.assertPreciseEqual(f(2, 3), 6)
143
+ f.func.recompile()
144
+ self.assertPreciseEqual(f(2, 3), 15)
145
+
146
+ # Freshly recompiled version is re-used from other imports
147
+ mod = self.import_module()
148
+ f = mod.add_usecase
149
+ self.assertPreciseEqual(f(2, 3), 15)
150
+
151
+ def test_same_names(self):
152
+ # Function with the same names should still disambiguate
153
+ mod = self.import_module()
154
+ f = mod.renamed_function1
155
+ self.assertPreciseEqual(f(2), 4)
156
+ f = mod.renamed_function2
157
+ self.assertPreciseEqual(f(2), 8)
158
+
159
+ @skip_unless_cc_60
160
+ @skip_if_cudadevrt_missing
161
+ @skip_if_mvc_enabled('CG not supported with MVC')
162
+ def test_cache_cg(self):
163
+ # Functions using cooperative groups should be cacheable. See Issue
164
+ # #8888: https://github.com/numba/numba/issues/8888
165
+ self.check_pycache(0)
166
+ mod = self.import_module()
167
+ self.check_pycache(0)
168
+
169
+ mod.cg_usecase(0)
170
+ self.check_pycache(2) # 1 index, 1 data
171
+
172
+ # Check the code runs ok from another process
173
+ self.run_in_separate_process()
174
+
175
+ @skip_unless_cc_60
176
+ @skip_if_cudadevrt_missing
177
+ @skip_if_mvc_enabled('CG not supported with MVC')
178
+ def test_cache_cg_clean_run(self):
179
+ # See Issue #9432: https://github.com/numba/numba/issues/9432
180
+ # If a cached function using CG sync was the first thing to compile,
181
+ # the compile would fail.
182
+ self.check_pycache(0)
183
+
184
+ # This logic is modelled on run_in_separate_process(), but executes the
185
+ # CG usecase directly in the subprocess.
186
+ code = """if 1:
187
+ import sys
188
+
189
+ sys.path.insert(0, %(tempdir)r)
190
+ mod = __import__(%(modname)r)
191
+ mod.cg_usecase(0)
192
+ """ % dict(tempdir=self.tempdir, modname=self.modname)
193
+
194
+ popen = subprocess.Popen([sys.executable, "-c", code],
195
+ stdout=subprocess.PIPE,
196
+ stderr=subprocess.PIPE)
197
+ out, err = popen.communicate(timeout=60)
198
+ if popen.returncode != 0:
199
+ raise AssertionError(
200
+ "process failed with code %s: \n"
201
+ "stdout follows\n%s\n"
202
+ "stderr follows\n%s\n"
203
+ % (popen.returncode, out.decode(), err.decode()),
204
+ )
205
+
206
+ def _test_pycache_fallback(self):
207
+ """
208
+ With a disabled __pycache__, test there is a working fallback
209
+ (e.g. on the user-wide cache dir)
210
+ """
211
+ mod = self.import_module()
212
+ f = mod.add_usecase
213
+ # Remove this function's cache files at the end, to avoid accumulation
214
+ # across test calls.
215
+ self.addCleanup(shutil.rmtree, f.func.stats.cache_path,
216
+ ignore_errors=True)
217
+
218
+ self.assertPreciseEqual(f(2, 3), 6)
219
+ # It's a cache miss since the file was copied to a new temp location
220
+ self.check_hits(f.func, 0, 1)
221
+
222
+ # Test re-use
223
+ mod2 = self.import_module()
224
+ f = mod2.add_usecase
225
+ self.assertPreciseEqual(f(2, 3), 6)
226
+ self.check_hits(f.func, 1, 0)
227
+
228
+ # The __pycache__ is empty (otherwise the test's preconditions
229
+ # wouldn't be met)
230
+ self.check_pycache(0)
231
+
232
+ @skip_bad_access
233
+ @unittest.skipIf(os.name == "nt",
234
+ "cannot easily make a directory read-only on Windows")
235
+ def test_non_creatable_pycache(self):
236
+ # Make it impossible to create the __pycache__ directory
237
+ old_perms = os.stat(self.tempdir).st_mode
238
+ os.chmod(self.tempdir, 0o500)
239
+ self.addCleanup(os.chmod, self.tempdir, old_perms)
240
+
241
+ self._test_pycache_fallback()
242
+
243
+ @skip_bad_access
244
+ @unittest.skipIf(os.name == "nt",
245
+ "cannot easily make a directory read-only on Windows")
246
+ def test_non_writable_pycache(self):
247
+ # Make it impossible to write to the __pycache__ directory
248
+ pycache = os.path.join(self.tempdir, '__pycache__')
249
+ os.mkdir(pycache)
250
+ old_perms = os.stat(pycache).st_mode
251
+ os.chmod(pycache, 0o500)
252
+ self.addCleanup(os.chmod, pycache, old_perms)
253
+
254
+ self._test_pycache_fallback()
255
+
256
+ def test_cannot_cache_linking_libraries(self):
257
+ link = str(test_data_dir / 'jitlink.ptx')
258
+ msg = 'Cannot pickle CUDACodeLibrary with linking files'
259
+ with self.assertRaisesRegex(RuntimeError, msg):
260
+ @cuda.jit('void()', cache=True, link=[link])
261
+ def f():
262
+ pass
263
+
264
+
265
+ @skip_on_cudasim('Simulator does not implement caching')
266
+ class CUDAAndCPUCachingTest(SerialMixin, DispatcherCacheUsecasesTest):
267
+ here = os.path.dirname(__file__)
268
+ usecases_file = os.path.join(here, "cache_with_cpu_usecases.py")
269
+ modname = "cuda_and_cpu_caching_test_fodder"
270
+
271
+ def setUp(self):
272
+ DispatcherCacheUsecasesTest.setUp(self)
273
+ CUDATestCase.setUp(self)
274
+
275
+ def tearDown(self):
276
+ CUDATestCase.tearDown(self)
277
+ DispatcherCacheUsecasesTest.tearDown(self)
278
+
279
+ def test_cpu_and_cuda_targets(self):
280
+ # The same function jitted for CPU and CUDA targets should maintain
281
+ # separate caches for each target.
282
+ self.check_pycache(0)
283
+ mod = self.import_module()
284
+ self.check_pycache(0)
285
+
286
+ f_cpu = mod.assign_cpu
287
+ f_cuda = mod.assign_cuda
288
+ self.assertPreciseEqual(f_cpu(5), 5)
289
+ self.check_pycache(2) # 1 index, 1 data
290
+ self.assertPreciseEqual(f_cuda(5), 5)
291
+ self.check_pycache(3) # 1 index, 2 data
292
+
293
+ self.check_hits(f_cpu.func, 0, 1)
294
+ self.check_hits(f_cuda.func, 0, 1)
295
+
296
+ self.assertPreciseEqual(f_cpu(5.5), 5.5)
297
+ self.check_pycache(4) # 1 index, 3 data
298
+ self.assertPreciseEqual(f_cuda(5.5), 5.5)
299
+ self.check_pycache(5) # 1 index, 4 data
300
+
301
+ self.check_hits(f_cpu.func, 0, 2)
302
+ self.check_hits(f_cuda.func, 0, 2)
303
+
304
+ def test_cpu_and_cuda_reuse(self):
305
+ # Existing cache files for the CPU and CUDA targets are reused.
306
+ mod = self.import_module()
307
+ mod.assign_cpu(5)
308
+ mod.assign_cpu(5.5)
309
+ mod.assign_cuda(5)
310
+ mod.assign_cuda(5.5)
311
+
312
+ mtimes = self.get_cache_mtimes()
313
+
314
+ # Two signatures compiled
315
+ self.check_hits(mod.assign_cpu.func, 0, 2)
316
+ self.check_hits(mod.assign_cuda.func, 0, 2)
317
+
318
+ mod2 = self.import_module()
319
+ self.assertIsNot(mod, mod2)
320
+ f_cpu = mod2.assign_cpu
321
+ f_cuda = mod2.assign_cuda
322
+
323
+ f_cpu(2)
324
+ self.check_hits(f_cpu.func, 1, 0)
325
+ f_cpu(2.5)
326
+ self.check_hits(f_cpu.func, 2, 0)
327
+ f_cuda(2)
328
+ self.check_hits(f_cuda.func, 1, 0)
329
+ f_cuda(2.5)
330
+ self.check_hits(f_cuda.func, 2, 0)
331
+
332
+ # The files haven't changed
333
+ self.assertEqual(self.get_cache_mtimes(), mtimes)
334
+
335
+ self.run_in_separate_process()
336
+ self.assertEqual(self.get_cache_mtimes(), mtimes)
337
+
338
+
339
+ def get_different_cc_gpus():
340
+ # Find two GPUs with different Compute Capabilities and return them as a
341
+ # tuple. If two GPUs with distinct Compute Capabilities cannot be found,
342
+ # then None is returned.
343
+ first_gpu = cuda.gpus[0]
344
+ with first_gpu:
345
+ first_cc = cuda.current_context().device.compute_capability
346
+
347
+ for gpu in cuda.gpus[1:]:
348
+ with gpu:
349
+ cc = cuda.current_context().device.compute_capability
350
+ if cc != first_cc:
351
+ return (first_gpu, gpu)
352
+
353
+ return None
354
+
355
+
356
+ @skip_on_cudasim('Simulator does not implement caching')
357
+ class TestMultiCCCaching(SerialMixin, DispatcherCacheUsecasesTest):
358
+ here = os.path.dirname(__file__)
359
+ usecases_file = os.path.join(here, "cache_usecases.py")
360
+ modname = "cuda_multi_cc_caching_test_fodder"
361
+
362
+ def setUp(self):
363
+ DispatcherCacheUsecasesTest.setUp(self)
364
+ CUDATestCase.setUp(self)
365
+
366
+ def tearDown(self):
367
+ CUDATestCase.tearDown(self)
368
+ DispatcherCacheUsecasesTest.tearDown(self)
369
+
370
+ def test_cache(self):
371
+ gpus = get_different_cc_gpus()
372
+ if not gpus:
373
+ self.skipTest('Need two different CCs for multi-CC cache test')
374
+
375
+ self.check_pycache(0)
376
+ mod = self.import_module()
377
+ self.check_pycache(0)
378
+
379
+ # Step 1. Populate the cache with the first GPU
380
+ with gpus[0]:
381
+ f = mod.add_usecase
382
+ self.assertPreciseEqual(f(2, 3), 6)
383
+ self.check_pycache(2) # 1 index, 1 data
384
+ self.assertPreciseEqual(f(2.5, 3), 6.5)
385
+ self.check_pycache(3) # 1 index, 2 data
386
+ self.check_hits(f.func, 0, 2)
387
+
388
+ f = mod.record_return_aligned
389
+ rec = f(mod.aligned_arr, 1)
390
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
391
+
392
+ f = mod.record_return_packed
393
+ rec = f(mod.packed_arr, 1)
394
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
395
+ self.check_pycache(6) # 2 index, 4 data
396
+ self.check_hits(f.func, 0, 2)
397
+
398
+ # Step 2. Run with the second GPU - under present behaviour this
399
+ # doesn't further populate the cache.
400
+ with gpus[1]:
401
+ f = mod.add_usecase
402
+ self.assertPreciseEqual(f(2, 3), 6)
403
+ self.check_pycache(6) # cache unchanged
404
+ self.assertPreciseEqual(f(2.5, 3), 6.5)
405
+ self.check_pycache(6) # cache unchanged
406
+ self.check_hits(f.func, 0, 2)
407
+
408
+ f = mod.record_return_aligned
409
+ rec = f(mod.aligned_arr, 1)
410
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
411
+
412
+ f = mod.record_return_packed
413
+ rec = f(mod.packed_arr, 1)
414
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
415
+ self.check_pycache(6) # cache unchanged
416
+ self.check_hits(f.func, 0, 2)
417
+
418
+ # Step 3. Run in a separate module with the second GPU - this populates
419
+ # the cache for the second CC.
420
+ mod2 = self.import_module()
421
+ self.assertIsNot(mod, mod2)
422
+
423
+ with gpus[1]:
424
+ f = mod2.add_usecase
425
+ self.assertPreciseEqual(f(2, 3), 6)
426
+ self.check_pycache(7) # 2 index, 5 data
427
+ self.assertPreciseEqual(f(2.5, 3), 6.5)
428
+ self.check_pycache(8) # 2 index, 6 data
429
+ self.check_hits(f.func, 0, 2)
430
+
431
+ f = mod2.record_return_aligned
432
+ rec = f(mod.aligned_arr, 1)
433
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
434
+
435
+ f = mod2.record_return_packed
436
+ rec = f(mod.packed_arr, 1)
437
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
438
+ self.check_pycache(10) # 2 index, 8 data
439
+ self.check_hits(f.func, 0, 2)
440
+
441
+ # The following steps check that we can use the NVVM IR loaded from the
442
+ # cache to generate PTX for a different compute capability to the
443
+ # cached cubin's CC. To check this, we create another module that loads
444
+ # the cached version containing a cubin for GPU 1. There will be no
445
+ # cubin for GPU 0, so when we try to use it the PTX must be generated.
446
+
447
+ mod3 = self.import_module()
448
+ self.assertIsNot(mod, mod3)
449
+
450
+ # Step 4. Run with GPU 1 and get a cache hit, loading the cache created
451
+ # during Step 3.
452
+ with gpus[1]:
453
+ f = mod3.add_usecase
454
+ self.assertPreciseEqual(f(2, 3), 6)
455
+ self.assertPreciseEqual(f(2.5, 3), 6.5)
456
+
457
+ f = mod3.record_return_aligned
458
+ rec = f(mod.aligned_arr, 1)
459
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
460
+
461
+ f = mod3.record_return_packed
462
+ rec = f(mod.packed_arr, 1)
463
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
464
+
465
+ # Step 5. Run with GPU 0 using the module from Step 4, to force PTX
466
+ # generation from cached NVVM IR.
467
+ with gpus[0]:
468
+ f = mod3.add_usecase
469
+ self.assertPreciseEqual(f(2, 3), 6)
470
+ self.assertPreciseEqual(f(2.5, 3), 6.5)
471
+
472
+ f = mod3.record_return_aligned
473
+ rec = f(mod.aligned_arr, 1)
474
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
475
+
476
+ f = mod3.record_return_packed
477
+ rec = f(mod.packed_arr, 1)
478
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
479
+
480
+
481
+ def child_initializer():
482
+ # Disable occupancy and implicit copy warnings in processes in a
483
+ # multiprocessing pool.
484
+ from numba.core import config
485
+ config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
486
+ config.CUDA_WARN_ON_IMPLICIT_COPY = 0
487
+
488
+
489
+ @skip_on_cudasim('Simulator does not implement caching')
490
+ class TestMultiprocessCache(SerialMixin, DispatcherCacheUsecasesTest):
491
+
492
+ # Nested multiprocessing.Pool raises AssertionError:
493
+ # "daemonic processes are not allowed to have children"
494
+ _numba_parallel_test_ = False
495
+
496
+ here = os.path.dirname(__file__)
497
+ usecases_file = os.path.join(here, "cache_usecases.py")
498
+ modname = "cuda_mp_caching_test_fodder"
499
+
500
+ def setUp(self):
501
+ DispatcherCacheUsecasesTest.setUp(self)
502
+ CUDATestCase.setUp(self)
503
+
504
+ def tearDown(self):
505
+ CUDATestCase.tearDown(self)
506
+ DispatcherCacheUsecasesTest.tearDown(self)
507
+
508
+ def test_multiprocessing(self):
509
+ # Check caching works from multiple processes at once (#2028)
510
+ mod = self.import_module()
511
+ # Calling a pure Python caller of the JIT-compiled function is
512
+ # necessary to reproduce the issue.
513
+ f = mod.simple_usecase_caller
514
+ n = 3
515
+ try:
516
+ ctx = multiprocessing.get_context('spawn')
517
+ except AttributeError:
518
+ ctx = multiprocessing
519
+
520
+ pool = ctx.Pool(n, child_initializer)
521
+
522
+ try:
523
+ res = sum(pool.imap(f, range(n)))
524
+ finally:
525
+ pool.close()
526
+ self.assertEqual(res, n * (n - 1) // 2)
527
+
528
+
529
+ @skip_on_cudasim('Simulator does not implement the CUDACodeLibrary')
530
+ class TestCUDACodeLibrary(CUDATestCase):
531
+ # For tests of miscellaneous CUDACodeLibrary behaviour that we wish to
532
+ # explicitly check
533
+
534
+ def test_cannot_serialize_unfinalized(self):
535
+ # The CUDA codegen failes to import under the simulator, so we cannot
536
+ # import it at the top level
537
+ from numba.cuda.codegen import CUDACodeLibrary
538
+
539
+ # Usually a CodeLibrary requires a real CodeGen, but since we don't
540
+ # interact with it, anything will do
541
+ codegen = object()
542
+ name = 'library'
543
+ cl = CUDACodeLibrary(codegen, name)
544
+ with self.assertRaisesRegex(RuntimeError, 'Cannot pickle unfinalized'):
545
+ cl._reduce_states()