numba-cuda 0.0.0__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. _numba_cuda_redirector.pth +1 -0
  2. _numba_cuda_redirector.py +74 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +5 -0
  5. numba_cuda/_version.py +19 -0
  6. numba_cuda/numba/cuda/__init__.py +22 -0
  7. numba_cuda/numba/cuda/api.py +526 -0
  8. numba_cuda/numba/cuda/api_util.py +30 -0
  9. numba_cuda/numba/cuda/args.py +77 -0
  10. numba_cuda/numba/cuda/cg.py +62 -0
  11. numba_cuda/numba/cuda/codegen.py +378 -0
  12. numba_cuda/numba/cuda/compiler.py +422 -0
  13. numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
  14. numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
  15. numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
  16. numba_cuda/numba/cuda/cuda_paths.py +258 -0
  17. numba_cuda/numba/cuda/cudadecl.py +806 -0
  18. numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
  19. numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
  20. numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
  21. numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
  22. numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
  23. numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
  24. numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
  25. numba_cuda/numba/cuda/cudadrv/error.py +36 -0
  26. numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
  27. numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
  28. numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
  29. numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
  30. numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
  31. numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
  32. numba_cuda/numba/cuda/cudaimpl.py +1055 -0
  33. numba_cuda/numba/cuda/cudamath.py +140 -0
  34. numba_cuda/numba/cuda/decorators.py +189 -0
  35. numba_cuda/numba/cuda/descriptor.py +33 -0
  36. numba_cuda/numba/cuda/device_init.py +89 -0
  37. numba_cuda/numba/cuda/deviceufunc.py +908 -0
  38. numba_cuda/numba/cuda/dispatcher.py +1057 -0
  39. numba_cuda/numba/cuda/errors.py +59 -0
  40. numba_cuda/numba/cuda/extending.py +7 -0
  41. numba_cuda/numba/cuda/initialize.py +13 -0
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
  43. numba_cuda/numba/cuda/intrinsics.py +198 -0
  44. numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  45. numba_cuda/numba/cuda/kernels/reduction.py +262 -0
  46. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  47. numba_cuda/numba/cuda/libdevice.py +3382 -0
  48. numba_cuda/numba/cuda/libdevicedecl.py +17 -0
  49. numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
  50. numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
  51. numba_cuda/numba/cuda/mathimpl.py +448 -0
  52. numba_cuda/numba/cuda/models.py +48 -0
  53. numba_cuda/numba/cuda/nvvmutils.py +235 -0
  54. numba_cuda/numba/cuda/printimpl.py +86 -0
  55. numba_cuda/numba/cuda/random.py +292 -0
  56. numba_cuda/numba/cuda/simulator/__init__.py +38 -0
  57. numba_cuda/numba/cuda/simulator/api.py +110 -0
  58. numba_cuda/numba/cuda/simulator/compiler.py +9 -0
  59. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
  60. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
  61. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
  62. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
  63. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
  64. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
  65. numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
  66. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
  67. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
  68. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
  69. numba_cuda/numba/cuda/simulator/kernel.py +308 -0
  70. numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
  71. numba_cuda/numba/cuda/simulator/reduction.py +15 -0
  72. numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
  73. numba_cuda/numba/cuda/simulator_init.py +17 -0
  74. numba_cuda/numba/cuda/stubs.py +902 -0
  75. numba_cuda/numba/cuda/target.py +440 -0
  76. numba_cuda/numba/cuda/testing.py +202 -0
  77. numba_cuda/numba/cuda/tests/__init__.py +58 -0
  78. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
  79. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
  80. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
  81. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
  88. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
  89. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
  90. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
  91. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
  92. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
  93. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
  94. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
  95. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
  98. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
  100. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
  101. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
  102. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
  103. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
  104. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
  105. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
  106. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
  107. numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
  108. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
  109. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
  110. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
  111. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
  112. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
  113. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
  115. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
  117. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
  118. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
  120. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
  121. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
  123. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
  124. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
  126. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
  127. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
  129. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
  131. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
  132. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
  133. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
  134. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
  136. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
  137. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
  138. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
  139. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
  141. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
  143. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
  144. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
  145. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
  148. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
  149. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
  150. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
  151. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
  152. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
  153. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
  154. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
  155. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
  156. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
  157. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
  158. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
  159. numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
  161. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
  162. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
  163. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
  164. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
  165. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
  166. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
  167. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
  168. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
  169. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
  170. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
  171. numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
  172. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
  173. numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
  174. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
  175. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
  176. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
  177. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  178. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
  179. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
  180. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
  182. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
  183. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
  184. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
  185. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
  186. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
  187. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
  188. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
  192. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
  193. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
  194. numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
  195. numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
  197. numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  198. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  199. numba_cuda/numba/cuda/tests/data/error.cu +7 -0
  200. numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
  201. numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
  202. numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
  203. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
  204. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
  206. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
  207. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
  208. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
  209. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
  210. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
  211. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
  212. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
  213. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
  214. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
  216. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
  217. numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
  218. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
  219. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
  220. numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
  221. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
  222. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
  223. numba_cuda/numba/cuda/types.py +37 -0
  224. numba_cuda/numba/cuda/ufuncs.py +662 -0
  225. numba_cuda/numba/cuda/vector_types.py +209 -0
  226. numba_cuda/numba/cuda/vectorizers.py +252 -0
  227. numba_cuda-0.0.12.dist-info/LICENSE +25 -0
  228. numba_cuda-0.0.12.dist-info/METADATA +68 -0
  229. numba_cuda-0.0.12.dist-info/RECORD +231 -0
  230. {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
  231. numba_cuda-0.0.0.dist-info/METADATA +0 -6
  232. numba_cuda-0.0.0.dist-info/RECORD +0 -5
  233. {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,139 @@
1
+ import multiprocessing as mp
2
+ import os
3
+
4
+ from numba import cuda
5
+ from numba.cuda.cudadrv.driver import CudaAPIError, driver
6
+ from numba.cuda.cudadrv.error import CudaSupportError
7
+ from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase
8
+
9
+
10
+ # A mock of cuInit that always raises a CudaAPIError
11
+ def cuInit_raising(arg):
12
+ raise CudaAPIError(999, 'CUDA_ERROR_UNKNOWN')
13
+
14
+
15
+ # Test code to run in a child that patches driver.cuInit to a variant that
16
+ # always raises. We can't use mock.patch.object here because driver.cuInit is
17
+ # not assigned until we attempt to initialize - mock.patch.object cannot locate
18
+ # the non-existent original method, and so fails. Instead we patch
19
+ # driver.cuInit with our raising version prior to any attempt to initialize.
20
+ def cuInit_raising_test(result_queue):
21
+ driver.cuInit = cuInit_raising
22
+
23
+ success = False
24
+ msg = None
25
+
26
+ try:
27
+ # A CUDA operation that forces initialization of the device
28
+ cuda.device_array(1)
29
+ except CudaSupportError as e:
30
+ success = True
31
+ msg = e.msg
32
+
33
+ result_queue.put((success, msg))
34
+
35
+
36
+ # Similar to cuInit_raising_test above, but for testing that the string
37
+ # returned by cuda_error() is as expected.
38
+ def initialization_error_test(result_queue):
39
+ driver.cuInit = cuInit_raising
40
+
41
+ success = False
42
+ msg = None
43
+
44
+ try:
45
+ # A CUDA operation that forces initialization of the device
46
+ cuda.device_array(1)
47
+ except CudaSupportError:
48
+ success = True
49
+
50
+ msg = cuda.cuda_error()
51
+ result_queue.put((success, msg))
52
+
53
+
54
+ # For testing the path where Driver.__init__() catches a CudaSupportError
55
+ def cuda_disabled_test(result_queue):
56
+ success = False
57
+ msg = None
58
+
59
+ try:
60
+ # A CUDA operation that forces initialization of the device
61
+ cuda.device_array(1)
62
+ except CudaSupportError as e:
63
+ success = True
64
+ msg = e.msg
65
+
66
+ result_queue.put((success, msg))
67
+
68
+
69
+ # Similar to cuda_disabled_test, but checks cuda.cuda_error() instead of the
70
+ # exception raised on initialization
71
+ def cuda_disabled_error_test(result_queue):
72
+ success = False
73
+ msg = None
74
+
75
+ try:
76
+ # A CUDA operation that forces initialization of the device
77
+ cuda.device_array(1)
78
+ except CudaSupportError:
79
+ success = True
80
+
81
+ msg = cuda.cuda_error()
82
+ result_queue.put((success, msg))
83
+
84
+
85
+ @skip_on_cudasim('CUDA Simulator does not initialize driver')
86
+ class TestInit(CUDATestCase):
87
+ def _test_init_failure(self, target, expected):
88
+ # Run the initialization failure test in a separate subprocess
89
+ ctx = mp.get_context('spawn')
90
+ result_queue = ctx.Queue()
91
+ proc = ctx.Process(target=target, args=(result_queue,))
92
+ proc.start()
93
+ proc.join(30) # should complete within 30s
94
+ success, msg = result_queue.get()
95
+
96
+ # Ensure the child process raised an exception during initialization
97
+ # before checking the message
98
+ if not success:
99
+ self.fail('CudaSupportError not raised')
100
+
101
+ self.assertIn(expected, msg)
102
+
103
+ def test_init_failure_raising(self):
104
+ expected = 'Error at driver init: CUDA_ERROR_UNKNOWN (999)'
105
+ self._test_init_failure(cuInit_raising_test, expected)
106
+
107
+ def test_init_failure_error(self):
108
+ expected = 'CUDA_ERROR_UNKNOWN (999)'
109
+ self._test_init_failure(initialization_error_test, expected)
110
+
111
+ def _test_cuda_disabled(self, target):
112
+ # Uses _test_init_failure to launch the test in a separate subprocess
113
+ # with CUDA disabled.
114
+ cuda_disabled = os.environ.get('NUMBA_DISABLE_CUDA')
115
+ os.environ['NUMBA_DISABLE_CUDA'] = "1"
116
+ try:
117
+ expected = 'CUDA is disabled due to setting NUMBA_DISABLE_CUDA=1'
118
+ self._test_init_failure(cuda_disabled_test, expected)
119
+ finally:
120
+ if cuda_disabled is not None:
121
+ os.environ['NUMBA_DISABLE_CUDA'] = cuda_disabled
122
+ else:
123
+ os.environ.pop('NUMBA_DISABLE_CUDA')
124
+
125
+ def test_cuda_disabled_raising(self):
126
+ self._test_cuda_disabled(cuda_disabled_test)
127
+
128
+ def test_cuda_disabled_error(self):
129
+ self._test_cuda_disabled(cuda_disabled_error_test)
130
+
131
+ def test_init_success(self):
132
+ # Here we assume that initialization is successful (because many bad
133
+ # things will happen with the test suite if it is not) and check that
134
+ # there is no error recorded.
135
+ self.assertIsNone(cuda.cuda_error())
136
+
137
+
138
+ if __name__ == '__main__':
139
+ unittest.main()
@@ -0,0 +1,37 @@
1
+ from llvmlite import ir
2
+
3
+ from numba.cuda.cudadrv import nvvm
4
+ from numba.cuda.testing import unittest, ContextResettingTestCase
5
+ from numba.cuda.testing import skip_on_cudasim
6
+
7
+
8
+ @skip_on_cudasim('Inline PTX cannot be used in the simulator')
9
+ class TestCudaInlineAsm(ContextResettingTestCase):
10
+ def test_inline_rsqrt(self):
11
+ mod = ir.Module(__name__)
12
+ mod.triple = 'nvptx64-nvidia-cuda'
13
+ nvvm.add_ir_version(mod)
14
+ fnty = ir.FunctionType(ir.VoidType(), [ir.PointerType(ir.FloatType())])
15
+ fn = ir.Function(mod, fnty, 'cu_rsqrt')
16
+ bldr = ir.IRBuilder(fn.append_basic_block('entry'))
17
+
18
+ rsqrt_approx_fnty = ir.FunctionType(ir.FloatType(), [ir.FloatType()])
19
+ inlineasm = ir.InlineAsm(rsqrt_approx_fnty,
20
+ 'rsqrt.approx.f32 $0, $1;',
21
+ '=f,f', side_effect=True)
22
+ val = bldr.load(fn.args[0])
23
+ res = bldr.call(inlineasm, [val])
24
+
25
+ bldr.store(res, fn.args[0])
26
+ bldr.ret_void()
27
+
28
+ # generate ptx
29
+ mod.data_layout = nvvm.NVVM().data_layout
30
+ nvvm.set_cuda_kernel(fn)
31
+ nvvmir = str(mod)
32
+ ptx = nvvm.compile_ir(nvvmir)
33
+ self.assertTrue('rsqrt.approx.f32' in str(ptx))
34
+
35
+
36
+ if __name__ == '__main__':
37
+ unittest.main()
@@ -0,0 +1,12 @@
1
+ from numba import cuda
2
+ from numba.cuda.testing import CUDATestCase, skip_on_cudasim, skip_unless_cc_53
3
+
4
+
5
+ class TestIsFP16Supported(CUDATestCase):
6
+ def test_is_fp16_supported(self):
7
+ self.assertTrue(cuda.is_float16_supported())
8
+
9
+ @skip_on_cudasim
10
+ @skip_unless_cc_53
11
+ def test_device_supports_float16(self):
12
+ self.assertTrue(cuda.get_current_device().supports_float16)
@@ -0,0 +1,317 @@
1
+ import numpy as np
2
+ import warnings
3
+ from numba.cuda.testing import unittest
4
+ from numba.cuda.testing import (skip_on_cudasim, skip_if_cuda_includes_missing)
5
+ from numba.cuda.testing import CUDATestCase, test_data_dir
6
+ from numba.cuda.cudadrv.driver import (CudaAPIError, Linker,
7
+ LinkerError)
8
+ from numba.cuda.cudadrv.error import NvrtcError
9
+ from numba.cuda import require_context
10
+ from numba.tests.support import ignore_internal_warnings
11
+ from numba import cuda, void, float64, int64, int32, typeof, float32
12
+
13
+
14
+ CONST1D = np.arange(10, dtype=np.float64)
15
+
16
+
17
+ def simple_const_mem(A):
18
+ C = cuda.const.array_like(CONST1D)
19
+ i = cuda.grid(1)
20
+
21
+ A[i] = C[i] + 1.0
22
+
23
+
24
+ def func_with_lots_of_registers(x, a, b, c, d, e, f):
25
+ a1 = 1.0
26
+ a2 = 1.0
27
+ a3 = 1.0
28
+ a4 = 1.0
29
+ a5 = 1.0
30
+ b1 = 1.0
31
+ b2 = 1.0
32
+ b3 = 1.0
33
+ b4 = 1.0
34
+ b5 = 1.0
35
+ c1 = 1.0
36
+ c2 = 1.0
37
+ c3 = 1.0
38
+ c4 = 1.0
39
+ c5 = 1.0
40
+ d1 = 10
41
+ d2 = 10
42
+ d3 = 10
43
+ d4 = 10
44
+ d5 = 10
45
+ for i in range(a):
46
+ a1 += b
47
+ a2 += c
48
+ a3 += d
49
+ a4 += e
50
+ a5 += f
51
+ b1 *= b
52
+ b2 *= c
53
+ b3 *= d
54
+ b4 *= e
55
+ b5 *= f
56
+ c1 /= b
57
+ c2 /= c
58
+ c3 /= d
59
+ c4 /= e
60
+ c5 /= f
61
+ d1 <<= b
62
+ d2 <<= c
63
+ d3 <<= d
64
+ d4 <<= e
65
+ d5 <<= f
66
+ x[cuda.grid(1)] = a1 + a2 + a3 + a4 + a5
67
+ x[cuda.grid(1)] += b1 + b2 + b3 + b4 + b5
68
+ x[cuda.grid(1)] += c1 + c2 + c3 + c4 + c5
69
+ x[cuda.grid(1)] += d1 + d2 + d3 + d4 + d5
70
+
71
+
72
+ def simple_smem(ary, dty):
73
+ sm = cuda.shared.array(100, dty)
74
+ i = cuda.grid(1)
75
+ if i == 0:
76
+ for j in range(100):
77
+ sm[j] = j
78
+ cuda.syncthreads()
79
+ ary[i] = sm[i]
80
+
81
+
82
+ def coop_smem2d(ary):
83
+ i, j = cuda.grid(2)
84
+ sm = cuda.shared.array((10, 20), float32)
85
+ sm[i, j] = (i + 1) / (j + 1)
86
+ cuda.syncthreads()
87
+ ary[i, j] = sm[i, j]
88
+
89
+
90
+ def simple_maxthreads(ary):
91
+ i = cuda.grid(1)
92
+ ary[i] = i
93
+
94
+
95
+ LMEM_SIZE = 1000
96
+
97
+
98
+ def simple_lmem(A, B, dty):
99
+ C = cuda.local.array(LMEM_SIZE, dty)
100
+ for i in range(C.shape[0]):
101
+ C[i] = A[i]
102
+ for i in range(C.shape[0]):
103
+ B[i] = C[i]
104
+
105
+
106
+ @skip_on_cudasim('Linking unsupported in the simulator')
107
+ class TestLinker(CUDATestCase):
108
+ _NUMBA_NVIDIA_BINDING_0_ENV = {'NUMBA_CUDA_USE_NVIDIA_BINDING': '0'}
109
+
110
+ @require_context
111
+ def test_linker_basic(self):
112
+ '''Simply go through the constructor and destructor
113
+ '''
114
+ linker = Linker.new(cc=(5, 3))
115
+ del linker
116
+
117
+ def _test_linking(self, eager):
118
+ global bar # must be a global; other it is recognized as a freevar
119
+ bar = cuda.declare_device('bar', 'int32(int32)')
120
+
121
+ link = str(test_data_dir / 'jitlink.ptx')
122
+
123
+ if eager:
124
+ args = ['void(int32[:], int32[:])']
125
+ else:
126
+ args = []
127
+
128
+ @cuda.jit(*args, link=[link])
129
+ def foo(x, y):
130
+ i = cuda.grid(1)
131
+ x[i] += bar(y[i])
132
+
133
+ A = np.array([123], dtype=np.int32)
134
+ B = np.array([321], dtype=np.int32)
135
+
136
+ foo[1, 1](A, B)
137
+
138
+ self.assertTrue(A[0] == 123 + 2 * 321)
139
+
140
+ def test_linking_lazy_compile(self):
141
+ self._test_linking(eager=False)
142
+
143
+ def test_linking_eager_compile(self):
144
+ self._test_linking(eager=True)
145
+
146
+ def test_linking_cu(self):
147
+ bar = cuda.declare_device('bar', 'int32(int32)')
148
+
149
+ link = str(test_data_dir / 'jitlink.cu')
150
+
151
+ @cuda.jit(link=[link])
152
+ def kernel(r, x):
153
+ i = cuda.grid(1)
154
+
155
+ if i < len(r):
156
+ r[i] = bar(x[i])
157
+
158
+ x = np.arange(10, dtype=np.int32)
159
+ r = np.zeros_like(x)
160
+
161
+ kernel[1, 32](r, x)
162
+
163
+ # Matches the operation of bar() in jitlink.cu
164
+ expected = x * 2
165
+ np.testing.assert_array_equal(r, expected)
166
+
167
+ def test_linking_cu_log_warning(self):
168
+ bar = cuda.declare_device('bar', 'int32(int32)')
169
+
170
+ link = str(test_data_dir / 'warn.cu')
171
+
172
+ with warnings.catch_warnings(record=True) as w:
173
+ ignore_internal_warnings()
174
+
175
+ @cuda.jit('void(int32)', link=[link])
176
+ def kernel(x):
177
+ bar(x)
178
+
179
+ self.assertEqual(len(w), 1, 'Expected warnings from NVRTC')
180
+ # Check the warning refers to the log messages
181
+ self.assertIn('NVRTC log messages', str(w[0].message))
182
+ # Check the message pertaining to the unused variable is provided
183
+ self.assertIn('declared but never referenced', str(w[0].message))
184
+
185
+ def test_linking_cu_error(self):
186
+ bar = cuda.declare_device('bar', 'int32(int32)')
187
+
188
+ link = str(test_data_dir / 'error.cu')
189
+
190
+ with self.assertRaises(NvrtcError) as e:
191
+ @cuda.jit('void(int32)', link=[link])
192
+ def kernel(x):
193
+ bar(x)
194
+
195
+ msg = e.exception.args[0]
196
+ # Check the error message refers to the NVRTC compile
197
+ self.assertIn('NVRTC Compilation failure', msg)
198
+ # Check the expected error in the CUDA source is reported
199
+ self.assertIn('identifier "SYNTAX" is undefined', msg)
200
+ # Check the filename is reported correctly
201
+ self.assertIn('in the compilation of "error.cu"', msg)
202
+
203
+ def test_linking_unknown_filetype_error(self):
204
+ expected_err = "Don't know how to link file with extension .cuh"
205
+ with self.assertRaisesRegex(RuntimeError, expected_err):
206
+ @cuda.jit('void()', link=['header.cuh'])
207
+ def kernel():
208
+ pass
209
+
210
+ def test_linking_file_with_no_extension_error(self):
211
+ expected_err = "Don't know how to link file with no extension"
212
+ with self.assertRaisesRegex(RuntimeError, expected_err):
213
+ @cuda.jit('void()', link=['data'])
214
+ def kernel():
215
+ pass
216
+
217
+ @skip_if_cuda_includes_missing
218
+ def test_linking_cu_cuda_include(self):
219
+ link = str(test_data_dir / 'cuda_include.cu')
220
+
221
+ # An exception will be raised when linking this kernel due to the
222
+ # compile failure if CUDA includes cannot be found by Nvrtc.
223
+ @cuda.jit('void()', link=[link])
224
+ def kernel():
225
+ pass
226
+
227
+ def test_try_to_link_nonexistent(self):
228
+ with self.assertRaises(LinkerError) as e:
229
+ @cuda.jit('void(int32[::1])', link=['nonexistent.a'])
230
+ def f(x):
231
+ x[0] = 0
232
+ self.assertIn('nonexistent.a not found', e.exception.args)
233
+
234
+ def test_set_registers_no_max(self):
235
+ """Ensure that the jitted kernel used in the test_set_registers_* tests
236
+ uses more than 57 registers - this ensures that test_set_registers_*
237
+ are really checking that they reduced the number of registers used from
238
+ something greater than the maximum."""
239
+ compiled = cuda.jit(func_with_lots_of_registers)
240
+ compiled = compiled.specialize(np.empty(32), *range(6))
241
+ self.assertGreater(compiled.get_regs_per_thread(), 57)
242
+
243
+ def test_set_registers_57(self):
244
+ compiled = cuda.jit(max_registers=57)(func_with_lots_of_registers)
245
+ compiled = compiled.specialize(np.empty(32), *range(6))
246
+ self.assertLessEqual(compiled.get_regs_per_thread(), 57)
247
+
248
+ def test_set_registers_38(self):
249
+ compiled = cuda.jit(max_registers=38)(func_with_lots_of_registers)
250
+ compiled = compiled.specialize(np.empty(32), *range(6))
251
+ self.assertLessEqual(compiled.get_regs_per_thread(), 38)
252
+
253
+ def test_set_registers_eager(self):
254
+ sig = void(float64[::1], int64, int64, int64, int64, int64, int64)
255
+ compiled = cuda.jit(sig, max_registers=38)(func_with_lots_of_registers)
256
+ self.assertLessEqual(compiled.get_regs_per_thread(), 38)
257
+
258
+ def test_get_const_mem_size(self):
259
+ sig = void(float64[::1])
260
+ compiled = cuda.jit(sig)(simple_const_mem)
261
+ const_mem_size = compiled.get_const_mem_size()
262
+ self.assertGreaterEqual(const_mem_size, CONST1D.nbytes)
263
+
264
+ def test_get_no_shared_memory(self):
265
+ compiled = cuda.jit(func_with_lots_of_registers)
266
+ compiled = compiled.specialize(np.empty(32), *range(6))
267
+ shared_mem_size = compiled.get_shared_mem_per_block()
268
+ self.assertEqual(shared_mem_size, 0)
269
+
270
+ def test_get_shared_mem_per_block(self):
271
+ sig = void(int32[::1], typeof(np.int32))
272
+ compiled = cuda.jit(sig)(simple_smem)
273
+ shared_mem_size = compiled.get_shared_mem_per_block()
274
+ self.assertEqual(shared_mem_size, 400)
275
+
276
+ def test_get_shared_mem_per_specialized(self):
277
+ compiled = cuda.jit(simple_smem)
278
+ compiled_specialized = compiled.specialize(
279
+ np.zeros(100, dtype=np.int32), np.float64)
280
+ shared_mem_size = compiled_specialized.get_shared_mem_per_block()
281
+ self.assertEqual(shared_mem_size, 800)
282
+
283
+ def test_get_max_threads_per_block(self):
284
+ compiled = cuda.jit("void(float32[:,::1])")(coop_smem2d)
285
+ max_threads = compiled.get_max_threads_per_block()
286
+ self.assertGreater(max_threads, 0)
287
+
288
+ def test_max_threads_exceeded(self):
289
+ compiled = cuda.jit("void(int32[::1])")(simple_maxthreads)
290
+ max_threads = compiled.get_max_threads_per_block()
291
+ nelem = max_threads + 1
292
+ ary = np.empty(nelem, dtype=np.int32)
293
+ try:
294
+ compiled[1, nelem](ary)
295
+ except CudaAPIError as e:
296
+ self.assertIn("cuLaunchKernel", e.msg)
297
+
298
+ def test_get_local_mem_per_thread(self):
299
+ sig = void(int32[::1], int32[::1], typeof(np.int32))
300
+ compiled = cuda.jit(sig)(simple_lmem)
301
+ local_mem_size = compiled.get_local_mem_per_thread()
302
+ calc_size = np.dtype(np.int32).itemsize * LMEM_SIZE
303
+ self.assertGreaterEqual(local_mem_size, calc_size)
304
+
305
+ def test_get_local_mem_per_specialized(self):
306
+ compiled = cuda.jit(simple_lmem)
307
+ compiled_specialized = compiled.specialize(
308
+ np.zeros(LMEM_SIZE, dtype=np.int32),
309
+ np.zeros(LMEM_SIZE, dtype=np.int32),
310
+ np.float64)
311
+ local_mem_size = compiled_specialized.get_local_mem_per_thread()
312
+ calc_size = np.dtype(np.float64).itemsize * LMEM_SIZE
313
+ self.assertGreaterEqual(local_mem_size, calc_size)
314
+
315
+
316
+ if __name__ == '__main__':
317
+ unittest.main()
@@ -0,0 +1,127 @@
1
+ import numpy as np
2
+ from ctypes import byref, c_size_t
3
+ from numba.cuda.cudadrv.driver import device_memset, driver, USE_NV_BINDING
4
+ from numba import cuda
5
+ from numba.cuda.testing import unittest, ContextResettingTestCase
6
+ from numba.cuda.testing import skip_on_cudasim, skip_on_arm
7
+ from numba.tests.support import linux_only
8
+
9
+
10
+ @skip_on_cudasim('CUDA Driver API unsupported in the simulator')
11
+ @linux_only
12
+ @skip_on_arm('Managed Alloc support is experimental/untested on ARM')
13
+ class TestManagedAlloc(ContextResettingTestCase):
14
+
15
+ def get_total_gpu_memory(self):
16
+ # We use a driver function to directly get the total GPU memory because
17
+ # an EMM plugin may report something different (or not implement
18
+ # get_memory_info at all).
19
+ if USE_NV_BINDING:
20
+ free, total = driver.cuMemGetInfo()
21
+ return total
22
+ else:
23
+ free = c_size_t()
24
+ total = c_size_t()
25
+ driver.cuMemGetInfo(byref(free), byref(total))
26
+ return total.value
27
+
28
+ def skip_if_cc_major_lt(self, min_required, reason):
29
+ """
30
+ Skip the current test if the compute capability of the device is
31
+ less than `min_required`.
32
+ """
33
+ ctx = cuda.current_context()
34
+ cc_major = ctx.device.compute_capability[0]
35
+ if cc_major < min_required:
36
+ self.skipTest(reason)
37
+
38
+ # CUDA Unified Memory comes in two flavors. For GPUs in the Kepler and
39
+ # Maxwell generations, managed memory allocations work as opaque,
40
+ # contiguous segments that can either be on the device or the host. For
41
+ # GPUs in the Pascal or later generations, managed memory operates on a
42
+ # per-page basis, so we can have arrays larger than GPU memory, where only
43
+ # part of them is resident on the device at one time. To ensure that this
44
+ # test works correctly on all supported GPUs, we'll select the size of our
45
+ # memory such that we only oversubscribe the GPU memory if we're on a
46
+ # Pascal or newer GPU (compute capability at least 6.0).
47
+
48
+ def test_managed_alloc_driver_undersubscribe(self):
49
+ msg = "Managed memory unsupported prior to CC 3.0"
50
+ self.skip_if_cc_major_lt(3, msg)
51
+ self._test_managed_alloc_driver(0.5)
52
+
53
+ # This test is skipped by default because it is easy to hang the machine
54
+ # for a very long time or get OOM killed if the GPU memory size is >50% of
55
+ # the system memory size. Even if the system does have more than 2x the RAM
56
+ # of the GPU, this test runs for a very long time (in comparison to the
57
+ # rest of the tests in the suite).
58
+ #
59
+ # However, it is left in here for manual testing as required.
60
+
61
+ @unittest.skip
62
+ def test_managed_alloc_driver_oversubscribe(self):
63
+ msg = "Oversubscription of managed memory unsupported prior to CC 6.0"
64
+ self.skip_if_cc_major_lt(6, msg)
65
+ self._test_managed_alloc_driver(2.0)
66
+
67
+ def test_managed_alloc_driver_host_attach(self):
68
+ msg = "Host attached managed memory is not accessible prior to CC 6.0"
69
+ self.skip_if_cc_major_lt(6, msg)
70
+ # Only test with a small array (0.01 * memory size) to keep the test
71
+ # quick.
72
+ self._test_managed_alloc_driver(0.01, attach_global=False)
73
+
74
+ def _test_managed_alloc_driver(self, memory_factor, attach_global=True):
75
+ # Verify that we can allocate and operate on managed
76
+ # memory through the CUDA driver interface.
77
+
78
+ total_mem_size = self.get_total_gpu_memory()
79
+ n_bytes = int(memory_factor * total_mem_size)
80
+
81
+ ctx = cuda.current_context()
82
+ mem = ctx.memallocmanaged(n_bytes, attach_global=attach_global)
83
+
84
+ dtype = np.dtype(np.uint8)
85
+ n_elems = n_bytes // dtype.itemsize
86
+ ary = np.ndarray(shape=n_elems, dtype=dtype, buffer=mem)
87
+
88
+ magic = 0xab
89
+ device_memset(mem, magic, n_bytes)
90
+ ctx.synchronize()
91
+
92
+ # Note that this assertion operates on the CPU, so this
93
+ # test effectively drives both the CPU and the GPU on
94
+ # managed memory.
95
+
96
+ self.assertTrue(np.all(ary == magic))
97
+
98
+ def _test_managed_array(self, attach_global=True):
99
+ # Check the managed_array interface on both host and device.
100
+
101
+ ary = cuda.managed_array(100, dtype=np.double)
102
+ ary.fill(123.456)
103
+ self.assertTrue(all(ary == 123.456))
104
+
105
+ @cuda.jit('void(double[:])')
106
+ def kernel(x):
107
+ i = cuda.grid(1)
108
+ if i < x.shape[0]:
109
+ x[i] = 1.0
110
+
111
+ kernel[10, 10](ary)
112
+ cuda.current_context().synchronize()
113
+
114
+ self.assertTrue(all(ary == 1.0))
115
+
116
+ def test_managed_array_attach_global(self):
117
+ self._test_managed_array()
118
+
119
+ def test_managed_array_attach_host(self):
120
+ self._test_managed_array()
121
+ msg = "Host attached managed memory is not accessible prior to CC 6.0"
122
+ self.skip_if_cc_major_lt(6, msg)
123
+ self._test_managed_array(attach_global=False)
124
+
125
+
126
+ if __name__ == '__main__':
127
+ unittest.main()
@@ -0,0 +1,54 @@
1
+ import multiprocessing as mp
2
+ import traceback
3
+ from numba.cuda.testing import unittest, CUDATestCase
4
+ from numba.cuda.testing import (skip_on_cudasim, skip_under_cuda_memcheck,
5
+ skip_if_mvc_libraries_unavailable)
6
+ from numba.tests.support import linux_only
7
+
8
+
9
+ def child_test():
10
+ from numba import config, cuda
11
+
12
+ # Change the MVC config after importing numba.cuda
13
+ config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY = 1
14
+
15
+ @cuda.jit
16
+ def f():
17
+ pass
18
+
19
+ f[1, 1]()
20
+
21
+
22
+ def child_test_wrapper(result_queue):
23
+ try:
24
+ output = child_test()
25
+ success = True
26
+ # Catch anything raised so it can be propagated
27
+ except: # noqa: E722
28
+ output = traceback.format_exc()
29
+ success = False
30
+
31
+ result_queue.put((success, output))
32
+
33
+
34
+ @linux_only
35
+ @skip_under_cuda_memcheck('May hang CUDA memcheck')
36
+ @skip_on_cudasim('Simulator does not require or implement MVC')
37
+ @skip_if_mvc_libraries_unavailable
38
+ class TestMinorVersionCompatibility(CUDATestCase):
39
+ def test_mvc(self):
40
+ # Run test with Minor Version Compatibility enabled in a child process
41
+ ctx = mp.get_context('spawn')
42
+ result_queue = ctx.Queue()
43
+ proc = ctx.Process(target=child_test_wrapper, args=(result_queue,))
44
+ proc.start()
45
+ proc.join()
46
+ success, output = result_queue.get()
47
+
48
+ # Ensure the child process ran to completion before checking its output
49
+ if not success:
50
+ self.fail(output)
51
+
52
+
53
+ if __name__ == '__main__':
54
+ unittest.main()