numba-cuda 0.22.0__cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (487) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +580 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpython-312-aarch64-linux-gnu.so +0 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpython-312-aarch64-linux-gnu.so +0 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cpython-312-aarch64-linux-gnu.so +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpython-312-aarch64-linux-gnu.so +0 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cpython-312-aarch64-linux-gnu.so +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +543 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +983 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +997 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +155 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsics.py +531 -0
  161. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  162. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  163. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  164. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  165. numba_cuda/numba/cuda/libdevice.py +3386 -0
  166. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  167. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  168. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  169. numba_cuda/numba/cuda/locks.py +19 -0
  170. numba_cuda/numba/cuda/lowering.py +1980 -0
  171. numba_cuda/numba/cuda/mathimpl.py +374 -0
  172. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  173. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  175. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  178. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  179. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  180. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  181. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  182. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  183. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  184. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  185. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  186. numba_cuda/numba/cuda/misc/literal.py +28 -0
  187. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  188. numba_cuda/numba/cuda/misc/special.py +94 -0
  189. numba_cuda/numba/cuda/models.py +56 -0
  190. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  191. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  192. numba_cuda/numba/cuda/np/extensions.py +11 -0
  193. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  194. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  195. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  196. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  197. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  198. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  199. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  200. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  201. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  202. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  203. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  204. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  206. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  207. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  208. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  209. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  210. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  211. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  212. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  213. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  214. numba_cuda/numba/cuda/printimpl.py +126 -0
  215. numba_cuda/numba/cuda/random.py +308 -0
  216. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  217. numba_cuda/numba/cuda/serialize.py +267 -0
  218. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  219. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  220. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  221. numba_cuda/numba/cuda/simulator/api.py +179 -0
  222. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  223. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  224. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  236. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  237. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  238. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  239. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  241. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  242. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  243. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  244. numba_cuda/numba/cuda/simulator_init.py +18 -0
  245. numba_cuda/numba/cuda/stubs.py +624 -0
  246. numba_cuda/numba/cuda/target.py +505 -0
  247. numba_cuda/numba/cuda/testing.py +347 -0
  248. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  249. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  251. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  252. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  253. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  254. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  255. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  285. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  286. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  289. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  290. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  291. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  292. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  293. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  294. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  295. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
  396. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  397. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  399. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  400. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  401. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  402. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  403. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  404. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  406. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  407. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  424. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  425. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  430. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  431. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  433. numba_cuda/numba/cuda/tests/support.py +900 -0
  434. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  435. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  436. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  437. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  438. numba_cuda/numba/cuda/types/__init__.py +233 -0
  439. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  440. numba_cuda/numba/cuda/types/abstract.py +9 -0
  441. numba_cuda/numba/cuda/types/common.py +9 -0
  442. numba_cuda/numba/cuda/types/containers.py +9 -0
  443. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  444. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  445. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  446. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  447. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  448. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  449. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  450. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  451. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  452. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  453. numba_cuda/numba/cuda/types/function_type.py +11 -0
  454. numba_cuda/numba/cuda/types/functions.py +9 -0
  455. numba_cuda/numba/cuda/types/iterators.py +9 -0
  456. numba_cuda/numba/cuda/types/misc.py +9 -0
  457. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  458. numba_cuda/numba/cuda/types/scalars.py +9 -0
  459. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  460. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  461. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  462. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  463. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  464. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  465. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  466. numba_cuda/numba/cuda/typing/collections.py +138 -0
  467. numba_cuda/numba/cuda/typing/context.py +782 -0
  468. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  469. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  470. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  471. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  472. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  473. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  474. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  475. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  476. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  477. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  478. numba_cuda/numba/cuda/ufuncs.py +746 -0
  479. numba_cuda/numba/cuda/utils.py +724 -0
  480. numba_cuda/numba/cuda/vector_types.py +214 -0
  481. numba_cuda/numba/cuda/vectorizers.py +260 -0
  482. numba_cuda-0.22.0.dist-info/METADATA +109 -0
  483. numba_cuda-0.22.0.dist-info/RECORD +487 -0
  484. numba_cuda-0.22.0.dist-info/WHEEL +6 -0
  485. numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
  486. numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
  487. numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,185 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import pytest
5
+
6
+ from llvmlite import ir
7
+ from numba.cuda.cudadrv import nvrtc, nvvm, runtime
8
+ from numba.cuda.testing import unittest
9
+ from numba.cuda.cudadrv.nvvm import LibDevice, NvvmError, NVVM
10
+ from numba.cuda.testing import skip_on_cudasim
11
+
12
+
13
+ @skip_on_cudasim("NVVM Driver unsupported in the simulator")
14
+ class TestNvvmDriver(unittest.TestCase):
15
+ def get_nvvmir(self):
16
+ versions = NVVM().get_ir_version()
17
+ data_layout = NVVM().data_layout
18
+ return nvvmir_generic.format(data_layout=data_layout, v=versions)
19
+
20
+ def test_nvvm_compile_simple(self):
21
+ nvvmir = self.get_nvvmir()
22
+ ptx = nvvm.compile_ir(nvvmir).decode("utf8")
23
+ self.assertTrue("simple" in ptx)
24
+ self.assertTrue("ave" in ptx)
25
+
26
+ def test_nvvm_compile_nullary_option(self):
27
+ # Tests compilation with an option that doesn't take an argument
28
+ # ("-gen-lto") - all other NVVM options are of the form
29
+ # "-<name>=<value>"
30
+
31
+ nvvmir = self.get_nvvmir()
32
+ arch = "compute_%d%d" % nvrtc.get_lowest_supported_cc()
33
+ ltoir = nvvm.compile_ir(nvvmir, opt=3, gen_lto=None, arch=arch)
34
+
35
+ # Verify we correctly passed the option by checking if we got LTOIR
36
+ # from NVVM (by looking for the expected magic number for LTOIR)
37
+ self.assertEqual(ltoir[:4], b"\xed\x43\x4e\x7f")
38
+
39
+ def test_nvvm_bad_option(self):
40
+ # Ensure that unsupported / non-existent options are reported as such
41
+ # to the user / caller
42
+ msg = "-made-up-option=2 is an unsupported option"
43
+ with self.assertRaisesRegex(NvvmError, msg):
44
+ nvvm.compile_ir("", made_up_option=2)
45
+
46
+ def test_nvvm_from_llvm(self):
47
+ m = ir.Module("test_nvvm_from_llvm")
48
+ m.triple = "nvptx64-nvidia-cuda"
49
+ nvvm.add_ir_version(m)
50
+ fty = ir.FunctionType(ir.VoidType(), [ir.IntType(32)])
51
+ kernel = ir.Function(m, fty, name="mycudakernel")
52
+ bldr = ir.IRBuilder(kernel.append_basic_block("entry"))
53
+ bldr.ret_void()
54
+ nvvm.set_cuda_kernel(kernel)
55
+
56
+ m.data_layout = NVVM().data_layout
57
+ ptx = nvvm.compile_ir(str(m)).decode("utf8")
58
+ self.assertTrue("mycudakernel" in ptx)
59
+ self.assertTrue(".address_size 64" in ptx)
60
+
61
+ def test_used_list(self):
62
+ # Construct a module
63
+ m = ir.Module("test_used_list")
64
+ m.triple = "nvptx64-nvidia-cuda"
65
+ m.data_layout = NVVM().data_layout
66
+ nvvm.add_ir_version(m)
67
+
68
+ # Add a function and mark it as a kernel
69
+ fty = ir.FunctionType(ir.VoidType(), [ir.IntType(32)])
70
+ kernel = ir.Function(m, fty, name="mycudakernel")
71
+ bldr = ir.IRBuilder(kernel.append_basic_block("entry"))
72
+ bldr.ret_void()
73
+ nvvm.set_cuda_kernel(kernel)
74
+
75
+ # Verify that the used list was correctly constructed
76
+ used_lines = [
77
+ line for line in str(m).splitlines() if "llvm.used" in line
78
+ ]
79
+ msg = 'Expected exactly one @"llvm.used" array'
80
+ self.assertEqual(len(used_lines), 1, msg)
81
+
82
+ used_line = used_lines[0]
83
+ # Kernel should be referenced in the used list
84
+ self.assertIn("mycudakernel", used_line)
85
+ # Check linkage of the used list
86
+ self.assertIn("appending global", used_line)
87
+ # Ensure used list is in the metadata section
88
+ self.assertIn('section "llvm.metadata"', used_line)
89
+
90
+ def test_nvvm_ir_verify_fail(self):
91
+ if runtime.get_version() >= (12, 5):
92
+ self.skipTest("Bad triple doesn't fail verify on CUDA >= 12.5")
93
+ m = ir.Module("test_bad_ir")
94
+ m.triple = "unknown-unknown-unknown"
95
+ m.data_layout = NVVM().data_layout
96
+ nvvm.add_ir_version(m)
97
+ with self.assertRaisesRegex(NvvmError, "Invalid target triple"):
98
+ nvvm.compile_ir(str(m))
99
+
100
+ def _test_nvvm_support(self, arch):
101
+ compute_xx = "compute_{0}{1}".format(*arch)
102
+ nvvmir = self.get_nvvmir()
103
+ ptx = nvvm.compile_ir(
104
+ nvvmir, arch=compute_xx, ftz=1, prec_sqrt=0, prec_div=0
105
+ ).decode("utf8")
106
+ self.assertIn(".target sm_{0}{1}".format(*arch), ptx)
107
+ self.assertIn("simple", ptx)
108
+ self.assertIn("ave", ptx)
109
+
110
+ def test_nvvm_support(self):
111
+ """Test supported CC by NVVM"""
112
+ for arch in nvrtc.get_supported_ccs():
113
+ self._test_nvvm_support(arch=arch)
114
+
115
+ def test_nvvm_warning(self):
116
+ m = ir.Module("test_nvvm_warning")
117
+ m.triple = "nvptx64-nvidia-cuda"
118
+ m.data_layout = NVVM().data_layout
119
+ nvvm.add_ir_version(m)
120
+
121
+ fty = ir.FunctionType(ir.VoidType(), [])
122
+ kernel = ir.Function(m, fty, name="inlinekernel")
123
+ builder = ir.IRBuilder(kernel.append_basic_block("entry"))
124
+ builder.ret_void()
125
+ nvvm.set_cuda_kernel(kernel)
126
+
127
+ # Add the noinline attribute to trigger NVVM to generate a warning
128
+ kernel.attributes.add("noinline")
129
+
130
+ code = str(m)
131
+ with pytest.warns(Warning, match="overriding noinline attribute"):
132
+ nvvm.compile_ir(code)
133
+
134
+
135
+ @skip_on_cudasim("NVVM Driver unsupported in the simulator")
136
+ class TestLibDevice(unittest.TestCase):
137
+ def test_libdevice_load(self):
138
+ # Test that constructing LibDevice gives a bitcode file
139
+ libdevice = LibDevice()
140
+ self.assertEqual(libdevice.bc[:4], b"BC\xc0\xde")
141
+
142
+
143
+ nvvmir_generic = """\
144
+ target triple="nvptx64-nvidia-cuda"
145
+ target datalayout = "{data_layout}"
146
+
147
+ define i32 @ave(i32 %a, i32 %b) {{
148
+ entry:
149
+ %add = add nsw i32 %a, %b
150
+ %div = sdiv i32 %add, 2
151
+ ret i32 %div
152
+ }}
153
+
154
+ define void @simple(i32* %data) {{
155
+ entry:
156
+ %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
157
+ %1 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
158
+ %mul = mul i32 %0, %1
159
+ %2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
160
+ %add = add i32 %mul, %2
161
+ %call = call i32 @ave(i32 %add, i32 %add)
162
+ %idxprom = sext i32 %add to i64
163
+ %arrayidx = getelementptr inbounds i32, i32* %data, i64 %idxprom
164
+ store i32 %call, i32* %arrayidx, align 4
165
+ ret void
166
+ }}
167
+
168
+ declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() nounwind readnone
169
+
170
+ declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() nounwind readnone
171
+
172
+ declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() nounwind readnone
173
+
174
+ !nvvmir.version = !{{!1}}
175
+ !1 = !{{i32 {v[0]}, i32 {v[1]}, i32 {v[2]}, i32 {v[3]}}}
176
+
177
+ !nvvm.annotations = !{{!2}}
178
+ !2 = !{{void (i32*)* @simple, !"kernel", i32 1}}
179
+
180
+ @"llvm.used" = appending global [1 x i8*] [i8* bitcast (void (i32*)* @simple to i8*)], section "llvm.metadata"
181
+ """ # noqa: E501
182
+
183
+
184
+ if __name__ == "__main__":
185
+ unittest.main()
@@ -0,0 +1,39 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import numpy as np
5
+ import platform
6
+
7
+ from numba import cuda
8
+ from numba.cuda.testing import unittest, CUDATestCase
9
+
10
+
11
+ class TestPinned(CUDATestCase):
12
+ def _run_copies(self, A):
13
+ A0 = np.copy(A)
14
+
15
+ stream = cuda.stream()
16
+ ptr = cuda.to_device(A, copy=False, stream=stream)
17
+ ptr.copy_to_device(A, stream=stream)
18
+ ptr.copy_to_host(A, stream=stream)
19
+ stream.synchronize()
20
+
21
+ self.assertTrue(np.allclose(A, A0))
22
+
23
+ def test_pinned(self):
24
+ machine = platform.machine()
25
+ if machine.startswith("arm") or machine.startswith("aarch64"):
26
+ count = 262144 # 2MB
27
+ else:
28
+ count = 2097152 # 16MB
29
+ A = np.arange(count)
30
+ with cuda.pinned(A):
31
+ self._run_copies(A)
32
+
33
+ def test_unpinned(self):
34
+ A = np.arange(2 * 1024 * 1024) # 16 MB
35
+ self._run_copies(A)
36
+
37
+
38
+ if __name__ == "__main__":
39
+ unittest.main()
@@ -0,0 +1,23 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import unittest
5
+ from numba.cuda.testing import CUDATestCase
6
+ from numba import cuda
7
+ from numba.cuda.testing import skip_on_cudasim
8
+
9
+
10
+ @skip_on_cudasim("CUDA Profiler unsupported in the simulator")
11
+ class TestProfiler(CUDATestCase):
12
+ def test_profiling(self):
13
+ with cuda.profiling():
14
+ a = cuda.device_array(10)
15
+ del a
16
+
17
+ with cuda.profiling():
18
+ a = cuda.device_array(100)
19
+ del a
20
+
21
+
22
+ if __name__ == "__main__":
23
+ unittest.main()
@@ -0,0 +1,38 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import threading
5
+ from numba import cuda
6
+ from numba.cuda.cudadrv.driver import driver
7
+ from numba.cuda.testing import unittest, CUDATestCase
8
+ from queue import Queue
9
+
10
+
11
+ class TestResetDevice(CUDATestCase):
12
+ def test_reset_device(self):
13
+ def newthread(exception_queue):
14
+ try:
15
+ devices = range(driver.get_device_count())
16
+ for _ in range(2):
17
+ for d in devices:
18
+ cuda.select_device(d)
19
+ cuda.close()
20
+ except Exception as e:
21
+ exception_queue.put(e)
22
+
23
+ # Do test on a separate thread so that we don't affect
24
+ # the current context in the main thread.
25
+
26
+ exception_queue = Queue()
27
+ t = threading.Thread(target=newthread, args=(exception_queue,))
28
+ t.start()
29
+ t.join()
30
+
31
+ exceptions = []
32
+ while not exception_queue.empty():
33
+ exceptions.append(exception_queue.get())
34
+ self.assertEqual(exceptions, [])
35
+
36
+
37
+ if __name__ == "__main__":
38
+ unittest.main()
@@ -0,0 +1,48 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import concurrent.futures
5
+ import multiprocessing
6
+ import os
7
+ from numba.cuda.testing import unittest
8
+
9
+
10
+ def set_visible_devices_and_check():
11
+ from numba import cuda
12
+ import os
13
+
14
+ os.environ["CUDA_VISIBLE_DEVICES"] = "0"
15
+ return len(cuda.gpus.lst)
16
+
17
+
18
+ class TestVisibleDevices(unittest.TestCase):
19
+ def test_visible_devices_set_after_import(self):
20
+ # See Issue #6149. This test checks that we can set
21
+ # CUDA_VISIBLE_DEVICES after importing Numba and have the value
22
+ # reflected in the available list of GPUs. Prior to the fix for this
23
+ # issue, Numba made a call to runtime.get_version() on import that
24
+ # initialized the driver and froze the list of available devices before
25
+ # CUDA_VISIBLE_DEVICES could be set by the user.
26
+
27
+ # Avoid importing cuda at the top level so that
28
+ # set_visible_devices_and_check gets to import it first in its process
29
+ from numba import cuda
30
+
31
+ if len(cuda.gpus.lst) in (0, 1):
32
+ self.skipTest("This test requires multiple GPUs")
33
+
34
+ if os.environ.get("CUDA_VISIBLE_DEVICES"):
35
+ msg = "Cannot test when CUDA_VISIBLE_DEVICES already set"
36
+ self.skipTest(msg)
37
+
38
+ with concurrent.futures.ProcessPoolExecutor(
39
+ mp_context=multiprocessing.get_context("spawn")
40
+ ) as exe:
41
+ future = exe.submit(set_visible_devices_and_check)
42
+
43
+ visible_gpu_count = future.result()
44
+ assert visible_gpu_count == 1
45
+
46
+
47
+ if __name__ == "__main__":
48
+ unittest.main()
@@ -0,0 +1,44 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ #
5
+ # Test does not work on some cards.
6
+ #
7
+ import threading
8
+ from queue import Queue
9
+
10
+ import numpy as np
11
+ from numba import cuda
12
+ from numba.cuda.testing import unittest, CUDATestCase
13
+
14
+
15
+ def newthread(exception_queue):
16
+ try:
17
+ cuda.select_device(0)
18
+ stream = cuda.stream()
19
+ A = np.arange(100)
20
+ dA = cuda.to_device(A, stream=stream)
21
+ stream.synchronize()
22
+ del dA
23
+ del stream
24
+ cuda.synchronize()
25
+ except Exception as e:
26
+ exception_queue.put(e)
27
+
28
+
29
+ class TestSelectDevice(CUDATestCase):
30
+ def test_select_device(self):
31
+ exception_queue = Queue()
32
+ for i in range(10):
33
+ t = threading.Thread(target=newthread, args=(exception_queue,))
34
+ t.start()
35
+ t.join()
36
+
37
+ exceptions = []
38
+ while not exception_queue.empty():
39
+ exceptions.append(exception_queue.get())
40
+ self.assertEqual(exceptions, [])
41
+
42
+
43
+ if __name__ == "__main__":
44
+ unittest.main()
@@ -0,0 +1,127 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import asyncio
5
+ import functools
6
+ import threading
7
+ import numpy as np
8
+ from numba import cuda
9
+ from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
10
+
11
+
12
+ def with_asyncio_loop(f):
13
+ @functools.wraps(f)
14
+ def runner(*args, **kwds):
15
+ loop = asyncio.new_event_loop()
16
+ loop.set_debug(True)
17
+ try:
18
+ return loop.run_until_complete(f(*args, **kwds))
19
+ finally:
20
+ loop.close()
21
+
22
+ return runner
23
+
24
+
25
+ @unittest.skip("Disabled temporarily due to Issue #317")
26
+ @skip_on_cudasim("CUDA Driver API unsupported in the simulator")
27
+ class TestCudaStream(CUDATestCase):
28
+ def test_add_callback(self):
29
+ def callback(stream, status, event):
30
+ event.set()
31
+
32
+ stream = cuda.stream()
33
+ callback_event = threading.Event()
34
+ stream.add_callback(callback, callback_event)
35
+ self.assertTrue(callback_event.wait(1.0))
36
+
37
+ def test_add_callback_with_default_arg(self):
38
+ callback_event = threading.Event()
39
+
40
+ def callback(stream, status, arg):
41
+ self.assertIsNone(arg)
42
+ callback_event.set()
43
+
44
+ stream = cuda.stream()
45
+ stream.add_callback(callback)
46
+ self.assertTrue(callback_event.wait(1.0))
47
+
48
+ @with_asyncio_loop
49
+ async def test_async_done(self):
50
+ stream = cuda.stream()
51
+ await stream.async_done()
52
+
53
+ @with_asyncio_loop
54
+ async def test_parallel_tasks(self):
55
+ async def async_cuda_fn(value_in: float) -> float:
56
+ stream = cuda.stream()
57
+ h_src, h_dst = cuda.pinned_array(8), cuda.pinned_array(8)
58
+ h_src[:] = value_in
59
+ d_ary = cuda.to_device(h_src, stream=stream)
60
+ d_ary.copy_to_host(h_dst, stream=stream)
61
+ done_result = await stream.async_done()
62
+ self.assertEqual(done_result, stream)
63
+ return h_dst.mean()
64
+
65
+ values_in = [1, 2, 3, 4]
66
+ tasks = [asyncio.create_task(async_cuda_fn(v)) for v in values_in]
67
+ values_out = await asyncio.gather(*tasks)
68
+ self.assertTrue(np.allclose(values_in, values_out))
69
+
70
+ @with_asyncio_loop
71
+ async def test_multiple_async_done(self):
72
+ stream = cuda.stream()
73
+ done_aws = [stream.async_done() for _ in range(4)]
74
+ done = await asyncio.gather(*done_aws)
75
+ for d in done:
76
+ self.assertEqual(d, stream)
77
+
78
+ @with_asyncio_loop
79
+ async def test_multiple_async_done_multiple_streams(self):
80
+ streams = [cuda.stream() for _ in range(4)]
81
+ done_aws = [stream.async_done() for stream in streams]
82
+ done = await asyncio.gather(*done_aws)
83
+
84
+ # Ensure we got the four original streams in done
85
+ self.assertSetEqual(set(done), set(streams))
86
+
87
+ @with_asyncio_loop
88
+ async def test_cancelled_future(self):
89
+ stream = cuda.stream()
90
+ done1, done2 = stream.async_done(), stream.async_done()
91
+ done1.cancel()
92
+ await done2
93
+ self.assertTrue(done1.cancelled())
94
+ self.assertTrue(done2.done())
95
+
96
+
97
+ @skip_on_cudasim("CUDA Driver API unsupported in the simulator")
98
+ class TestFailingStream(CUDATestCase):
99
+ # This test can only be run in isolation because it corrupts the CUDA
100
+ # context, which cannot be recovered from within the same process. It is
101
+ # left here so that it can be run manually for debugging / testing purposes
102
+ # - or may be re-enabled if in future there is infrastructure added for
103
+ # running tests in a separate process (a subprocess cannot be used because
104
+ # CUDA will have been initialized before the fork, so it cannot be used in
105
+ # the child process).
106
+ @unittest.skip
107
+ @with_asyncio_loop
108
+ async def test_failed_stream(self):
109
+ ctx = cuda.current_context()
110
+ module = ctx.create_module_ptx("""
111
+ .version 6.5
112
+ .target sm_30
113
+ .address_size 64
114
+ .visible .entry failing_kernel() { trap; }
115
+ """)
116
+ failing_kernel = module.get_function("failing_kernel")
117
+
118
+ stream = cuda.stream()
119
+ failing_kernel.configure((1,), (1,), stream=stream).__call__()
120
+ done = stream.async_done()
121
+ with self.assertRaises(Exception):
122
+ await done
123
+ self.assertIsNotNone(done.exception())
124
+
125
+
126
+ if __name__ == "__main__":
127
+ unittest.main()
@@ -0,0 +1,9 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba.cuda.tests import load_testsuite
5
+ import os
6
+
7
+
8
+ def load_tests(loader, tests, pattern):
9
+ return load_testsuite(loader, os.path.dirname(__file__))