numba-cuda 0.0.1__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. _numba_cuda_redirector.pth +1 -0
  2. _numba_cuda_redirector.py +74 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +5 -0
  5. numba_cuda/_version.py +19 -0
  6. numba_cuda/numba/cuda/__init__.py +22 -0
  7. numba_cuda/numba/cuda/api.py +526 -0
  8. numba_cuda/numba/cuda/api_util.py +30 -0
  9. numba_cuda/numba/cuda/args.py +77 -0
  10. numba_cuda/numba/cuda/cg.py +62 -0
  11. numba_cuda/numba/cuda/codegen.py +378 -0
  12. numba_cuda/numba/cuda/compiler.py +422 -0
  13. numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
  14. numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
  15. numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
  16. numba_cuda/numba/cuda/cuda_paths.py +258 -0
  17. numba_cuda/numba/cuda/cudadecl.py +806 -0
  18. numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
  19. numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
  20. numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
  21. numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
  22. numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
  23. numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
  24. numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
  25. numba_cuda/numba/cuda/cudadrv/error.py +36 -0
  26. numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
  27. numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
  28. numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
  29. numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
  30. numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
  31. numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
  32. numba_cuda/numba/cuda/cudaimpl.py +1055 -0
  33. numba_cuda/numba/cuda/cudamath.py +140 -0
  34. numba_cuda/numba/cuda/decorators.py +189 -0
  35. numba_cuda/numba/cuda/descriptor.py +33 -0
  36. numba_cuda/numba/cuda/device_init.py +89 -0
  37. numba_cuda/numba/cuda/deviceufunc.py +908 -0
  38. numba_cuda/numba/cuda/dispatcher.py +1057 -0
  39. numba_cuda/numba/cuda/errors.py +59 -0
  40. numba_cuda/numba/cuda/extending.py +7 -0
  41. numba_cuda/numba/cuda/initialize.py +13 -0
  42. numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
  43. numba_cuda/numba/cuda/intrinsics.py +198 -0
  44. numba_cuda/numba/cuda/kernels/__init__.py +0 -0
  45. numba_cuda/numba/cuda/kernels/reduction.py +262 -0
  46. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  47. numba_cuda/numba/cuda/libdevice.py +3382 -0
  48. numba_cuda/numba/cuda/libdevicedecl.py +17 -0
  49. numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
  50. numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
  51. numba_cuda/numba/cuda/mathimpl.py +448 -0
  52. numba_cuda/numba/cuda/models.py +48 -0
  53. numba_cuda/numba/cuda/nvvmutils.py +235 -0
  54. numba_cuda/numba/cuda/printimpl.py +86 -0
  55. numba_cuda/numba/cuda/random.py +292 -0
  56. numba_cuda/numba/cuda/simulator/__init__.py +38 -0
  57. numba_cuda/numba/cuda/simulator/api.py +110 -0
  58. numba_cuda/numba/cuda/simulator/compiler.py +9 -0
  59. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
  60. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
  61. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
  62. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
  63. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
  64. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
  65. numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
  66. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
  67. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
  68. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
  69. numba_cuda/numba/cuda/simulator/kernel.py +308 -0
  70. numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
  71. numba_cuda/numba/cuda/simulator/reduction.py +15 -0
  72. numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
  73. numba_cuda/numba/cuda/simulator_init.py +17 -0
  74. numba_cuda/numba/cuda/stubs.py +902 -0
  75. numba_cuda/numba/cuda/target.py +440 -0
  76. numba_cuda/numba/cuda/testing.py +202 -0
  77. numba_cuda/numba/cuda/tests/__init__.py +58 -0
  78. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
  79. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
  80. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
  81. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
  82. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
  83. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
  84. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
  85. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
  86. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
  87. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
  88. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
  89. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
  90. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
  91. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
  92. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
  93. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
  94. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
  95. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
  96. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
  97. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
  98. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
  99. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
  100. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
  101. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
  102. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
  103. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
  104. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
  105. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
  106. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
  107. numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
  108. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
  109. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
  110. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
  111. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
  112. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
  113. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
  114. numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
  115. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
  116. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
  117. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
  118. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
  120. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
  121. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
  122. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
  123. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
  124. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
  125. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
  126. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
  127. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
  128. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
  129. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
  130. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
  131. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
  132. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
  133. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
  134. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
  136. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
  137. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
  138. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
  139. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
  141. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
  143. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
  144. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
  145. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
  146. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
  147. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
  148. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
  149. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
  150. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
  151. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
  152. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
  153. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
  154. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
  155. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
  156. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
  157. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
  158. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
  159. numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
  160. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
  161. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
  162. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
  163. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
  164. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
  165. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
  166. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
  167. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
  168. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
  169. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
  170. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
  171. numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
  172. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
  173. numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
  174. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
  175. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
  176. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
  177. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  178. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
  179. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
  180. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
  181. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
  182. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
  183. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
  184. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
  185. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
  186. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
  187. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
  188. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
  189. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
  190. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
  191. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
  192. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
  193. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
  194. numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
  195. numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
  196. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
  197. numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
  198. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  199. numba_cuda/numba/cuda/tests/data/error.cu +7 -0
  200. numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
  201. numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
  202. numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
  203. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
  204. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
  205. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
  206. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
  207. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
  208. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
  209. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
  210. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
  211. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
  212. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
  213. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
  214. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
  215. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
  216. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
  217. numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
  218. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
  219. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
  220. numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
  221. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
  222. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
  223. numba_cuda/numba/cuda/types.py +37 -0
  224. numba_cuda/numba/cuda/ufuncs.py +662 -0
  225. numba_cuda/numba/cuda/vector_types.py +209 -0
  226. numba_cuda/numba/cuda/vectorizers.py +252 -0
  227. numba_cuda-0.0.12.dist-info/LICENSE +25 -0
  228. numba_cuda-0.0.12.dist-info/METADATA +68 -0
  229. numba_cuda-0.0.12.dist-info/RECORD +231 -0
  230. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
  231. numba_cuda-0.0.1.dist-info/METADATA +0 -10
  232. numba_cuda-0.0.1.dist-info/RECORD +0 -5
  233. {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,199 @@
1
+ import warnings
2
+
3
+ from llvmlite import ir
4
+ from numba.cuda.cudadrv import nvvm, runtime
5
+ from numba.cuda.testing import unittest
6
+ from numba.cuda.cudadrv.nvvm import LibDevice, NvvmError, NVVM
7
+ from numba.cuda.testing import skip_on_cudasim
8
+
9
+
10
+ @skip_on_cudasim('NVVM Driver unsupported in the simulator')
11
+ class TestNvvmDriver(unittest.TestCase):
12
+ def get_nvvmir(self):
13
+ versions = NVVM().get_ir_version()
14
+ data_layout = NVVM().data_layout
15
+ return nvvmir_generic.format(data_layout=data_layout, v=versions)
16
+
17
+ def test_nvvm_compile_simple(self):
18
+ nvvmir = self.get_nvvmir()
19
+ ptx = nvvm.compile_ir(nvvmir).decode('utf8')
20
+ self.assertTrue('simple' in ptx)
21
+ self.assertTrue('ave' in ptx)
22
+
23
+ def test_nvvm_compile_nullary_option(self):
24
+ # Tests compilation with an option that doesn't take an argument
25
+ # ("-gen-lto") - all other NVVM options are of the form
26
+ # "-<name>=<value>"
27
+
28
+ # -gen-lto is not available prior to CUDA 11.5
29
+ if runtime.get_version() < (11, 5):
30
+ self.skipTest("-gen-lto unavailable in this toolkit version")
31
+
32
+ nvvmir = self.get_nvvmir()
33
+ ltoir = nvvm.compile_ir(nvvmir, opt=3, gen_lto=None, arch="compute_52")
34
+
35
+ # Verify we correctly passed the option by checking if we got LTOIR
36
+ # from NVVM (by looking for the expected magic number for LTOIR)
37
+ self.assertEqual(ltoir[:4], b'\xed\x43\x4e\x7f')
38
+
39
+ def test_nvvm_bad_option(self):
40
+ # Ensure that unsupported / non-existent options are reported as such
41
+ # to the user / caller
42
+ msg = "-made-up-option=2 is an unsupported option"
43
+ with self.assertRaisesRegex(NvvmError, msg):
44
+ nvvm.compile_ir("", made_up_option=2)
45
+
46
+ def test_nvvm_from_llvm(self):
47
+ m = ir.Module("test_nvvm_from_llvm")
48
+ m.triple = 'nvptx64-nvidia-cuda'
49
+ nvvm.add_ir_version(m)
50
+ fty = ir.FunctionType(ir.VoidType(), [ir.IntType(32)])
51
+ kernel = ir.Function(m, fty, name='mycudakernel')
52
+ bldr = ir.IRBuilder(kernel.append_basic_block('entry'))
53
+ bldr.ret_void()
54
+ nvvm.set_cuda_kernel(kernel)
55
+
56
+ m.data_layout = NVVM().data_layout
57
+ ptx = nvvm.compile_ir(str(m)).decode('utf8')
58
+ self.assertTrue('mycudakernel' in ptx)
59
+ self.assertTrue('.address_size 64' in ptx)
60
+
61
+ def test_used_list(self):
62
+ # Construct a module
63
+ m = ir.Module("test_used_list")
64
+ m.triple = 'nvptx64-nvidia-cuda'
65
+ m.data_layout = NVVM().data_layout
66
+ nvvm.add_ir_version(m)
67
+
68
+ # Add a function and mark it as a kernel
69
+ fty = ir.FunctionType(ir.VoidType(), [ir.IntType(32)])
70
+ kernel = ir.Function(m, fty, name='mycudakernel')
71
+ bldr = ir.IRBuilder(kernel.append_basic_block('entry'))
72
+ bldr.ret_void()
73
+ nvvm.set_cuda_kernel(kernel)
74
+
75
+ # Verify that the used list was correctly constructed
76
+ used_lines = [line for line in str(m).splitlines()
77
+ if 'llvm.used' in line]
78
+ msg = 'Expected exactly one @"llvm.used" array'
79
+ self.assertEqual(len(used_lines), 1, msg)
80
+
81
+ used_line = used_lines[0]
82
+ # Kernel should be referenced in the used list
83
+ self.assertIn("mycudakernel", used_line)
84
+ # Check linkage of the used list
85
+ self.assertIn("appending global", used_line)
86
+ # Ensure used list is in the metadata section
87
+ self.assertIn('section "llvm.metadata"', used_line)
88
+
89
+ def test_nvvm_ir_verify_fail(self):
90
+ m = ir.Module("test_bad_ir")
91
+ m.triple = "unknown-unknown-unknown"
92
+ m.data_layout = NVVM().data_layout
93
+ nvvm.add_ir_version(m)
94
+ with self.assertRaisesRegex(NvvmError, 'Invalid target triple'):
95
+ nvvm.compile_ir(str(m))
96
+
97
+ def _test_nvvm_support(self, arch):
98
+ compute_xx = 'compute_{0}{1}'.format(*arch)
99
+ nvvmir = self.get_nvvmir()
100
+ ptx = nvvm.compile_ir(nvvmir, arch=compute_xx, ftz=1, prec_sqrt=0,
101
+ prec_div=0).decode('utf8')
102
+ self.assertIn(".target sm_{0}{1}".format(*arch), ptx)
103
+ self.assertIn('simple', ptx)
104
+ self.assertIn('ave', ptx)
105
+
106
+ def test_nvvm_support(self):
107
+ """Test supported CC by NVVM
108
+ """
109
+ for arch in nvvm.get_supported_ccs():
110
+ self._test_nvvm_support(arch=arch)
111
+
112
+ def test_nvvm_warning(self):
113
+ m = ir.Module("test_nvvm_warning")
114
+ m.triple = 'nvptx64-nvidia-cuda'
115
+ m.data_layout = NVVM().data_layout
116
+ nvvm.add_ir_version(m)
117
+
118
+ fty = ir.FunctionType(ir.VoidType(), [])
119
+ kernel = ir.Function(m, fty, name='inlinekernel')
120
+ builder = ir.IRBuilder(kernel.append_basic_block('entry'))
121
+ builder.ret_void()
122
+ nvvm.set_cuda_kernel(kernel)
123
+
124
+ # Add the noinline attribute to trigger NVVM to generate a warning
125
+ kernel.attributes.add('noinline')
126
+
127
+ with warnings.catch_warnings(record=True) as w:
128
+ nvvm.compile_ir(str(m))
129
+
130
+ self.assertEqual(len(w), 1)
131
+ self.assertIn('overriding noinline attribute', str(w[0]))
132
+
133
+
134
+ @skip_on_cudasim('NVVM Driver unsupported in the simulator')
135
+ class TestArchOption(unittest.TestCase):
136
+ def test_get_arch_option(self):
137
+ # Test returning the nearest lowest arch.
138
+ self.assertEqual(nvvm.get_arch_option(5, 3), 'compute_53')
139
+ self.assertEqual(nvvm.get_arch_option(7, 5), 'compute_75')
140
+ self.assertEqual(nvvm.get_arch_option(7, 7), 'compute_75')
141
+ # Test known arch.
142
+ supported_cc = nvvm.get_supported_ccs()
143
+ for arch in supported_cc:
144
+ self.assertEqual(nvvm.get_arch_option(*arch), 'compute_%d%d' % arch)
145
+ self.assertEqual(nvvm.get_arch_option(1000, 0),
146
+ 'compute_%d%d' % supported_cc[-1])
147
+
148
+
149
+ @skip_on_cudasim('NVVM Driver unsupported in the simulator')
150
+ class TestLibDevice(unittest.TestCase):
151
+ def test_libdevice_load(self):
152
+ # Test that constructing LibDevice gives a bitcode file
153
+ libdevice = LibDevice()
154
+ self.assertEqual(libdevice.bc[:4], b'BC\xc0\xde')
155
+
156
+
157
+ nvvmir_generic = '''\
158
+ target triple="nvptx64-nvidia-cuda"
159
+ target datalayout = "{data_layout}"
160
+
161
+ define i32 @ave(i32 %a, i32 %b) {{
162
+ entry:
163
+ %add = add nsw i32 %a, %b
164
+ %div = sdiv i32 %add, 2
165
+ ret i32 %div
166
+ }}
167
+
168
+ define void @simple(i32* %data) {{
169
+ entry:
170
+ %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
171
+ %1 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
172
+ %mul = mul i32 %0, %1
173
+ %2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
174
+ %add = add i32 %mul, %2
175
+ %call = call i32 @ave(i32 %add, i32 %add)
176
+ %idxprom = sext i32 %add to i64
177
+ %arrayidx = getelementptr inbounds i32, i32* %data, i64 %idxprom
178
+ store i32 %call, i32* %arrayidx, align 4
179
+ ret void
180
+ }}
181
+
182
+ declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() nounwind readnone
183
+
184
+ declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() nounwind readnone
185
+
186
+ declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() nounwind readnone
187
+
188
+ !nvvmir.version = !{{!1}}
189
+ !1 = !{{i32 {v[0]}, i32 {v[1]}, i32 {v[2]}, i32 {v[3]}}}
190
+
191
+ !nvvm.annotations = !{{!2}}
192
+ !2 = !{{void (i32*)* @simple, !"kernel", i32 1}}
193
+
194
+ @"llvm.used" = appending global [1 x i8*] [i8* bitcast (void (i32*)* @simple to i8*)], section "llvm.metadata"
195
+ ''' # noqa: E501
196
+
197
+
198
+ if __name__ == '__main__':
199
+ unittest.main()
@@ -0,0 +1,37 @@
1
+ import numpy as np
2
+ import platform
3
+
4
+ from numba import cuda
5
+ from numba.cuda.testing import unittest, ContextResettingTestCase
6
+
7
+
8
+ class TestPinned(ContextResettingTestCase):
9
+
10
+ def _run_copies(self, A):
11
+ A0 = np.copy(A)
12
+
13
+ stream = cuda.stream()
14
+ ptr = cuda.to_device(A, copy=False, stream=stream)
15
+ ptr.copy_to_device(A, stream=stream)
16
+ ptr.copy_to_host(A, stream=stream)
17
+ stream.synchronize()
18
+
19
+ self.assertTrue(np.allclose(A, A0))
20
+
21
+ def test_pinned(self):
22
+ machine = platform.machine()
23
+ if machine.startswith('arm') or machine.startswith('aarch64'):
24
+ count = 262144 # 2MB
25
+ else:
26
+ count = 2097152 # 16MB
27
+ A = np.arange(count)
28
+ with cuda.pinned(A):
29
+ self._run_copies(A)
30
+
31
+ def test_unpinned(self):
32
+ A = np.arange(2 * 1024 * 1024) # 16 MB
33
+ self._run_copies(A)
34
+
35
+
36
+ if __name__ == '__main__':
37
+ unittest.main()
@@ -0,0 +1,20 @@
1
+ import unittest
2
+ from numba.cuda.testing import ContextResettingTestCase
3
+ from numba import cuda
4
+ from numba.cuda.testing import skip_on_cudasim
5
+
6
+
7
+ @skip_on_cudasim('CUDA Profiler unsupported in the simulator')
8
+ class TestProfiler(ContextResettingTestCase):
9
+ def test_profiling(self):
10
+ with cuda.profiling():
11
+ a = cuda.device_array(10)
12
+ del a
13
+
14
+ with cuda.profiling():
15
+ a = cuda.device_array(100)
16
+ del a
17
+
18
+
19
+ if __name__ == '__main__':
20
+ unittest.main()
@@ -0,0 +1,149 @@
1
+ import multiprocessing as mp
2
+ import logging
3
+ import traceback
4
+ from numba.cuda.testing import unittest, CUDATestCase
5
+ from numba.cuda.testing import (skip_on_cudasim, skip_with_cuda_python,
6
+ skip_under_cuda_memcheck)
7
+ from numba.tests.support import linux_only
8
+
9
+
10
+ def child_test():
11
+ from numba import cuda, int32, void
12
+ from numba.core import config
13
+ import io
14
+ import numpy as np
15
+ import threading
16
+
17
+ # Enable PTDS before we make any CUDA driver calls. Enabling it first
18
+ # ensures that PTDS APIs are used because the CUDA driver looks up API
19
+ # functions on first use and memoizes them.
20
+ config.CUDA_PER_THREAD_DEFAULT_STREAM = 1
21
+
22
+ # Set up log capture for the Driver API so we can see what API calls were
23
+ # used.
24
+ logbuf = io.StringIO()
25
+ handler = logging.StreamHandler(logbuf)
26
+ cudadrv_logger = logging.getLogger('numba.cuda.cudadrv.driver')
27
+ cudadrv_logger.addHandler(handler)
28
+ cudadrv_logger.setLevel(logging.DEBUG)
29
+
30
+ # Set up data for our test, and copy over to the device
31
+ N = 2 ** 16
32
+ N_THREADS = 10
33
+ N_ADDITIONS = 4096
34
+
35
+ # Seed the RNG for repeatability
36
+ np.random.seed(1)
37
+ x = np.random.randint(low=0, high=1000, size=N, dtype=np.int32)
38
+ r = np.zeros_like(x)
39
+
40
+ # One input and output array for each thread
41
+ xs = [cuda.to_device(x) for _ in range(N_THREADS)]
42
+ rs = [cuda.to_device(r) for _ in range(N_THREADS)]
43
+
44
+ # Compute the grid size and get the [per-thread] default stream
45
+ n_threads = 256
46
+ n_blocks = N // n_threads
47
+ stream = cuda.default_stream()
48
+
49
+ # A simple multiplication-by-addition kernel. What it does exactly is not
50
+ # too important; only that we have a kernel that does something.
51
+ @cuda.jit(void(int32[::1], int32[::1]))
52
+ def f(r, x):
53
+ i = cuda.grid(1)
54
+
55
+ if i > len(r):
56
+ return
57
+
58
+ # Accumulate x into r
59
+ for j in range(N_ADDITIONS):
60
+ r[i] += x[i]
61
+
62
+ # This function will be used to launch the kernel from each thread on its
63
+ # own unique data.
64
+ def kernel_thread(n):
65
+ f[n_blocks, n_threads, stream](rs[n], xs[n])
66
+
67
+ # Create threads
68
+ threads = [threading.Thread(target=kernel_thread, args=(i,))
69
+ for i in range(N_THREADS)]
70
+
71
+ # Start all threads
72
+ for thread in threads:
73
+ thread.start()
74
+
75
+ # Wait for all threads to finish, to ensure that we don't synchronize with
76
+ # the device until all kernels are scheduled.
77
+ for thread in threads:
78
+ thread.join()
79
+
80
+ # Synchronize with the device
81
+ cuda.synchronize()
82
+
83
+ # Check output is as expected
84
+ expected = x * N_ADDITIONS
85
+ for i in range(N_THREADS):
86
+ np.testing.assert_equal(rs[i].copy_to_host(), expected)
87
+
88
+ # Return the driver log output to the calling process for checking
89
+ handler.flush()
90
+ return logbuf.getvalue()
91
+
92
+
93
+ def child_test_wrapper(result_queue):
94
+ try:
95
+ output = child_test()
96
+ success = True
97
+ # Catch anything raised so it can be propagated
98
+ except: # noqa: E722
99
+ output = traceback.format_exc()
100
+ success = False
101
+
102
+ result_queue.put((success, output))
103
+
104
+
105
+ # Run on Linux only until the reason for test hangs on Windows (Issue #8635,
106
+ # https://github.com/numba/numba/issues/8635) is diagnosed
107
+ @linux_only
108
+ @skip_under_cuda_memcheck('Hangs cuda-memcheck')
109
+ @skip_on_cudasim('Streams not supported on the simulator')
110
+ class TestPTDS(CUDATestCase):
111
+ @skip_with_cuda_python('Function names unchanged for PTDS with NV Binding')
112
+ def test_ptds(self):
113
+ # Run a test with PTDS enabled in a child process
114
+ ctx = mp.get_context('spawn')
115
+ result_queue = ctx.Queue()
116
+ proc = ctx.Process(target=child_test_wrapper, args=(result_queue,))
117
+ proc.start()
118
+ proc.join()
119
+ success, output = result_queue.get()
120
+
121
+ # Ensure the child process ran to completion before checking its output
122
+ if not success:
123
+ self.fail(output)
124
+
125
+ # Functions with a per-thread default stream variant that we expect to
126
+ # see in the output
127
+ ptds_functions = ('cuMemcpyHtoD_v2_ptds', 'cuLaunchKernel_ptsz',
128
+ 'cuMemcpyDtoH_v2_ptds')
129
+
130
+ for fn in ptds_functions:
131
+ with self.subTest(fn=fn, expected=True):
132
+ self.assertIn(fn, output)
133
+
134
+ # Non-PTDS versions of the functions that we should not see in the
135
+ # output:
136
+ legacy_functions = ('cuMemcpyHtoD_v2', 'cuLaunchKernel',
137
+ 'cuMemcpyDtoH_v2')
138
+
139
+ for fn in legacy_functions:
140
+ with self.subTest(fn=fn, expected=False):
141
+ # Ensure we only spot these function names appearing without a
142
+ # _ptds or _ptsz suffix by checking including the end of the
143
+ # line in the log
144
+ fn_at_end = f'{fn}\n'
145
+ self.assertNotIn(fn_at_end, output)
146
+
147
+
148
+ if __name__ == '__main__':
149
+ unittest.main()
@@ -0,0 +1,36 @@
1
+ import threading
2
+ from numba import cuda
3
+ from numba.cuda.cudadrv.driver import driver
4
+ from numba.cuda.testing import unittest, ContextResettingTestCase
5
+ from queue import Queue
6
+
7
+
8
+ class TestResetDevice(ContextResettingTestCase):
9
+ def test_reset_device(self):
10
+
11
+ def newthread(exception_queue):
12
+ try:
13
+ devices = range(driver.get_device_count())
14
+ for _ in range(2):
15
+ for d in devices:
16
+ cuda.select_device(d)
17
+ cuda.close()
18
+ except Exception as e:
19
+ exception_queue.put(e)
20
+
21
+ # Do test on a separate thread so that we don't affect
22
+ # the current context in the main thread.
23
+
24
+ exception_queue = Queue()
25
+ t = threading.Thread(target=newthread, args=(exception_queue,))
26
+ t.start()
27
+ t.join()
28
+
29
+ exceptions = []
30
+ while not exception_queue.empty():
31
+ exceptions.append(exception_queue.get())
32
+ self.assertEqual(exceptions, [])
33
+
34
+
35
+ if __name__ == '__main__':
36
+ unittest.main()
@@ -0,0 +1,85 @@
1
+ import multiprocessing
2
+ import os
3
+ from numba.core import config
4
+ from numba.cuda.cudadrv.runtime import runtime
5
+ from numba.cuda.testing import unittest, SerialMixin, skip_on_cudasim
6
+ from unittest.mock import patch
7
+
8
+
9
+ def set_visible_devices_and_check(q):
10
+ try:
11
+ from numba import cuda
12
+ import os
13
+
14
+ os.environ['CUDA_VISIBLE_DEVICES'] = '0'
15
+ q.put(len(cuda.gpus.lst))
16
+ except: # noqa: E722
17
+ # Sentinel value for error executing test code
18
+ q.put(-1)
19
+
20
+
21
+ if config.ENABLE_CUDASIM:
22
+ SUPPORTED_VERSIONS = (-1, -1),
23
+ else:
24
+ SUPPORTED_VERSIONS = ((11, 0), (11, 1), (11, 2), (11, 3), (11, 4), (11, 5),
25
+ (11, 6), (11, 7))
26
+
27
+
28
+ class TestRuntime(unittest.TestCase):
29
+ def test_is_supported_version_true(self):
30
+ for v in SUPPORTED_VERSIONS:
31
+ with patch.object(runtime, 'get_version', return_value=v):
32
+ self.assertTrue(runtime.is_supported_version())
33
+
34
+ @skip_on_cudasim('The simulator always simulates a supported runtime')
35
+ def test_is_supported_version_false(self):
36
+ # Check with an old unsupported version and some potential future
37
+ # versions
38
+ for v in ((10, 2), (11, 8), (12, 0)):
39
+ with patch.object(runtime, 'get_version', return_value=v):
40
+ self.assertFalse(runtime.is_supported_version())
41
+
42
+ def test_supported_versions(self):
43
+ self.assertEqual(SUPPORTED_VERSIONS, runtime.supported_versions)
44
+
45
+
46
+ class TestVisibleDevices(unittest.TestCase, SerialMixin):
47
+ def test_visible_devices_set_after_import(self):
48
+ # See Issue #6149. This test checks that we can set
49
+ # CUDA_VISIBLE_DEVICES after importing Numba and have the value
50
+ # reflected in the available list of GPUs. Prior to the fix for this
51
+ # issue, Numba made a call to runtime.get_version() on import that
52
+ # initialized the driver and froze the list of available devices before
53
+ # CUDA_VISIBLE_DEVICES could be set by the user.
54
+
55
+ # Avoid importing cuda at the top level so that
56
+ # set_visible_devices_and_check gets to import it first in its process
57
+ from numba import cuda
58
+
59
+ if len(cuda.gpus.lst) in (0, 1):
60
+ self.skipTest('This test requires multiple GPUs')
61
+
62
+ if os.environ.get('CUDA_VISIBLE_DEVICES'):
63
+ msg = 'Cannot test when CUDA_VISIBLE_DEVICES already set'
64
+ self.skipTest(msg)
65
+
66
+ ctx = multiprocessing.get_context('spawn')
67
+ q = ctx.Queue()
68
+ p = ctx.Process(target=set_visible_devices_and_check, args=(q,))
69
+ p.start()
70
+ try:
71
+ visible_gpu_count = q.get()
72
+ finally:
73
+ p.join()
74
+
75
+ # Make an obvious distinction between an error running the test code
76
+ # and an incorrect number of GPUs in the list
77
+ msg = 'Error running set_visible_devices_and_check'
78
+ self.assertNotEqual(visible_gpu_count, -1, msg=msg)
79
+
80
+ # The actual check that we see only one GPU
81
+ self.assertEqual(visible_gpu_count, 1)
82
+
83
+
84
+ if __name__ == '__main__':
85
+ unittest.main()
@@ -0,0 +1,41 @@
1
+ #
2
+ # Test does not work on some cards.
3
+ #
4
+ import threading
5
+ from queue import Queue
6
+
7
+ import numpy as np
8
+ from numba import cuda
9
+ from numba.cuda.testing import unittest, ContextResettingTestCase
10
+
11
+
12
+ def newthread(exception_queue):
13
+ try:
14
+ cuda.select_device(0)
15
+ stream = cuda.stream()
16
+ A = np.arange(100)
17
+ dA = cuda.to_device(A, stream=stream)
18
+ stream.synchronize()
19
+ del dA
20
+ del stream
21
+ cuda.close()
22
+ except Exception as e:
23
+ exception_queue.put(e)
24
+
25
+
26
+ class TestSelectDevice(ContextResettingTestCase):
27
+ def test_select_device(self):
28
+ exception_queue = Queue()
29
+ for i in range(10):
30
+ t = threading.Thread(target=newthread, args=(exception_queue,))
31
+ t.start()
32
+ t.join()
33
+
34
+ exceptions = []
35
+ while not exception_queue.empty():
36
+ exceptions.append(exception_queue.get())
37
+ self.assertEqual(exceptions, [])
38
+
39
+
40
+ if __name__ == '__main__':
41
+ unittest.main()
@@ -0,0 +1,122 @@
1
+ import asyncio
2
+ import functools
3
+ import threading
4
+ import numpy as np
5
+ from numba import cuda
6
+ from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
7
+
8
+
9
+ def with_asyncio_loop(f):
10
+ @functools.wraps(f)
11
+ def runner(*args, **kwds):
12
+ loop = asyncio.new_event_loop()
13
+ loop.set_debug(True)
14
+ try:
15
+ return loop.run_until_complete(f(*args, **kwds))
16
+ finally:
17
+ loop.close()
18
+ return runner
19
+
20
+
21
+ @skip_on_cudasim('CUDA Driver API unsupported in the simulator')
22
+ class TestCudaStream(CUDATestCase):
23
+ def test_add_callback(self):
24
+ def callback(stream, status, event):
25
+ event.set()
26
+
27
+ stream = cuda.stream()
28
+ callback_event = threading.Event()
29
+ stream.add_callback(callback, callback_event)
30
+ self.assertTrue(callback_event.wait(1.0))
31
+
32
+ def test_add_callback_with_default_arg(self):
33
+ callback_event = threading.Event()
34
+
35
+ def callback(stream, status, arg):
36
+ self.assertIsNone(arg)
37
+ callback_event.set()
38
+
39
+ stream = cuda.stream()
40
+ stream.add_callback(callback)
41
+ self.assertTrue(callback_event.wait(1.0))
42
+
43
+ @with_asyncio_loop
44
+ async def test_async_done(self):
45
+ stream = cuda.stream()
46
+ await stream.async_done()
47
+
48
+ @with_asyncio_loop
49
+ async def test_parallel_tasks(self):
50
+ async def async_cuda_fn(value_in: float) -> float:
51
+ stream = cuda.stream()
52
+ h_src, h_dst = cuda.pinned_array(8), cuda.pinned_array(8)
53
+ h_src[:] = value_in
54
+ d_ary = cuda.to_device(h_src, stream=stream)
55
+ d_ary.copy_to_host(h_dst, stream=stream)
56
+ done_result = await stream.async_done()
57
+ self.assertEqual(done_result, stream)
58
+ return h_dst.mean()
59
+
60
+ values_in = [1, 2, 3, 4]
61
+ tasks = [asyncio.create_task(async_cuda_fn(v)) for v in values_in]
62
+ values_out = await asyncio.gather(*tasks)
63
+ self.assertTrue(np.allclose(values_in, values_out))
64
+
65
+ @with_asyncio_loop
66
+ async def test_multiple_async_done(self):
67
+ stream = cuda.stream()
68
+ done_aws = [stream.async_done() for _ in range(4)]
69
+ done = await asyncio.gather(*done_aws)
70
+ for d in done:
71
+ self.assertEqual(d, stream)
72
+
73
+ @with_asyncio_loop
74
+ async def test_multiple_async_done_multiple_streams(self):
75
+ streams = [cuda.stream() for _ in range(4)]
76
+ done_aws = [stream.async_done() for stream in streams]
77
+ done = await asyncio.gather(*done_aws)
78
+
79
+ # Ensure we got the four original streams in done
80
+ self.assertSetEqual(set(done), set(streams))
81
+
82
+ @with_asyncio_loop
83
+ async def test_cancelled_future(self):
84
+ stream = cuda.stream()
85
+ done1, done2 = stream.async_done(), stream.async_done()
86
+ done1.cancel()
87
+ await done2
88
+ self.assertTrue(done1.cancelled())
89
+ self.assertTrue(done2.done())
90
+
91
+
92
+ @skip_on_cudasim('CUDA Driver API unsupported in the simulator')
93
+ class TestFailingStream(CUDATestCase):
94
+ # This test can only be run in isolation because it corrupts the CUDA
95
+ # context, which cannot be recovered from within the same process. It is
96
+ # left here so that it can be run manually for debugging / testing purposes
97
+ # - or may be re-enabled if in future there is infrastructure added for
98
+ # running tests in a separate process (a subprocess cannot be used because
99
+ # CUDA will have been initialized before the fork, so it cannot be used in
100
+ # the child process).
101
+ @unittest.skip
102
+ @with_asyncio_loop
103
+ async def test_failed_stream(self):
104
+ ctx = cuda.current_context()
105
+ module = ctx.create_module_ptx("""
106
+ .version 6.5
107
+ .target sm_30
108
+ .address_size 64
109
+ .visible .entry failing_kernel() { trap; }
110
+ """)
111
+ failing_kernel = module.get_function("failing_kernel")
112
+
113
+ stream = cuda.stream()
114
+ failing_kernel.configure((1,), (1,), stream=stream).__call__()
115
+ done = stream.async_done()
116
+ with self.assertRaises(Exception):
117
+ await done
118
+ self.assertIsNotNone(done.exception())
119
+
120
+
121
+ if __name__ == '__main__':
122
+ unittest.main()