numba-cuda 0.22.0__cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (487) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +580 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpython-312-aarch64-linux-gnu.so +0 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpython-312-aarch64-linux-gnu.so +0 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cpython-312-aarch64-linux-gnu.so +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpython-312-aarch64-linux-gnu.so +0 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cpython-312-aarch64-linux-gnu.so +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +543 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +983 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +997 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +155 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsics.py +531 -0
  161. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  162. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  163. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  164. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  165. numba_cuda/numba/cuda/libdevice.py +3386 -0
  166. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  167. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  168. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  169. numba_cuda/numba/cuda/locks.py +19 -0
  170. numba_cuda/numba/cuda/lowering.py +1980 -0
  171. numba_cuda/numba/cuda/mathimpl.py +374 -0
  172. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  173. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  175. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  178. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  179. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  180. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  181. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  182. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  183. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  184. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  185. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  186. numba_cuda/numba/cuda/misc/literal.py +28 -0
  187. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  188. numba_cuda/numba/cuda/misc/special.py +94 -0
  189. numba_cuda/numba/cuda/models.py +56 -0
  190. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  191. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  192. numba_cuda/numba/cuda/np/extensions.py +11 -0
  193. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  194. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  195. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  196. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  197. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  198. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  199. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  200. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  201. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  202. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  203. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  204. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  206. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  207. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  208. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  209. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  210. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  211. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  212. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  213. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  214. numba_cuda/numba/cuda/printimpl.py +126 -0
  215. numba_cuda/numba/cuda/random.py +308 -0
  216. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  217. numba_cuda/numba/cuda/serialize.py +267 -0
  218. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  219. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  220. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  221. numba_cuda/numba/cuda/simulator/api.py +179 -0
  222. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  223. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  224. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  236. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  237. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  238. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  239. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  241. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  242. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  243. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  244. numba_cuda/numba/cuda/simulator_init.py +18 -0
  245. numba_cuda/numba/cuda/stubs.py +624 -0
  246. numba_cuda/numba/cuda/target.py +505 -0
  247. numba_cuda/numba/cuda/testing.py +347 -0
  248. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  249. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  251. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  252. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  253. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  254. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  255. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  285. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  286. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  289. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  290. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  291. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  292. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  293. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  294. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  295. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
  396. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  397. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  399. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  400. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  401. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  402. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  403. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  404. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  406. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  407. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  424. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  425. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  430. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  431. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  433. numba_cuda/numba/cuda/tests/support.py +900 -0
  434. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  435. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  436. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  437. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  438. numba_cuda/numba/cuda/types/__init__.py +233 -0
  439. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  440. numba_cuda/numba/cuda/types/abstract.py +9 -0
  441. numba_cuda/numba/cuda/types/common.py +9 -0
  442. numba_cuda/numba/cuda/types/containers.py +9 -0
  443. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  444. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  445. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  446. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  447. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  448. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  449. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  450. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  451. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  452. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  453. numba_cuda/numba/cuda/types/function_type.py +11 -0
  454. numba_cuda/numba/cuda/types/functions.py +9 -0
  455. numba_cuda/numba/cuda/types/iterators.py +9 -0
  456. numba_cuda/numba/cuda/types/misc.py +9 -0
  457. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  458. numba_cuda/numba/cuda/types/scalars.py +9 -0
  459. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  460. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  461. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  462. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  463. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  464. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  465. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  466. numba_cuda/numba/cuda/typing/collections.py +138 -0
  467. numba_cuda/numba/cuda/typing/context.py +782 -0
  468. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  469. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  470. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  471. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  472. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  473. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  474. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  475. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  476. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  477. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  478. numba_cuda/numba/cuda/ufuncs.py +746 -0
  479. numba_cuda/numba/cuda/utils.py +724 -0
  480. numba_cuda/numba/cuda/vector_types.py +214 -0
  481. numba_cuda/numba/cuda/vectorizers.py +260 -0
  482. numba_cuda-0.22.0.dist-info/METADATA +109 -0
  483. numba_cuda-0.22.0.dist-info/RECORD +487 -0
  484. numba_cuda-0.22.0.dist-info/WHEEL +6 -0
  485. numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
  486. numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
  487. numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,718 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import os
5
+ from math import sqrt
6
+ from numba import cuda
7
+ from numba.cuda import float32, int16, int32, int64, types, uint32, void
8
+ from numba.cuda import (
9
+ compile,
10
+ compile_for_current_device,
11
+ compile_ptx,
12
+ compile_ptx_for_current_device,
13
+ compile_all,
14
+ LinkableCode,
15
+ )
16
+ from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase
17
+
18
+ TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR")
19
+ if TEST_BIN_DIR:
20
+ test_device_functions_a = os.path.join(
21
+ TEST_BIN_DIR, "test_device_functions.a"
22
+ )
23
+ test_device_functions_cubin = os.path.join(
24
+ TEST_BIN_DIR, "test_device_functions.cubin"
25
+ )
26
+ test_device_functions_cu = os.path.join(
27
+ TEST_BIN_DIR, "test_device_functions.cu"
28
+ )
29
+ test_device_functions_fatbin = os.path.join(
30
+ TEST_BIN_DIR, "test_device_functions.fatbin"
31
+ )
32
+ test_device_functions_fatbin_multi = os.path.join(
33
+ TEST_BIN_DIR, "test_device_functions_multi.fatbin"
34
+ )
35
+ test_device_functions_o = os.path.join(
36
+ TEST_BIN_DIR, "test_device_functions.o"
37
+ )
38
+ test_device_functions_ptx = os.path.join(
39
+ TEST_BIN_DIR, "test_device_functions.ptx"
40
+ )
41
+ test_device_functions_ltoir = os.path.join(
42
+ TEST_BIN_DIR, "test_device_functions.ltoir"
43
+ )
44
+
45
+
46
+ # A test function at the module scope to ensure we get the name right for the C
47
+ # ABI whether a function is at module or local scope.
48
+ def f_module(x, y):
49
+ return x + y
50
+
51
+
52
+ @skip_on_cudasim("Compilation unsupported in the simulator")
53
+ class TestCompile(unittest.TestCase):
54
+ def _handle_compile_result(self, ret, compile_function):
55
+ ptx_or_code_list, resty = ret
56
+ if compile_function in (compile_ptx, compile):
57
+ ptx = ptx_or_code_list
58
+ else:
59
+ ptx = ptx_or_code_list[0]
60
+ return ptx, resty
61
+
62
+ def test_global_kernel(self):
63
+ with self.subTest("compile_ptx"):
64
+ self._test_global_kernel(compile_ptx, {})
65
+
66
+ with self.subTest("compile_all"):
67
+ self._test_global_kernel(
68
+ compile_all, {"device": False, "abi": "numba", "output": "ptx"}
69
+ )
70
+
71
+ def _test_global_kernel(self, compile_function, default_kwargs):
72
+ def f(r, x, y):
73
+ i = cuda.grid(1)
74
+ if i < len(r):
75
+ r[i] = x[i] + y[i]
76
+
77
+ args = (float32[:], float32[:], float32[:])
78
+
79
+ ret = compile_function(f, args, **default_kwargs)
80
+ ptx, resty = self._handle_compile_result(ret, compile_function)
81
+
82
+ # Kernels should not have a func_retval parameter
83
+ self.assertNotIn("func_retval", ptx)
84
+ # .visible .func is used to denote a device function
85
+ self.assertNotIn(".visible .func", ptx)
86
+ # .visible .entry would denote the presence of a global function
87
+ self.assertIn(".visible .entry", ptx)
88
+ # Return type for kernels should always be void
89
+ self.assertEqual(resty, void)
90
+
91
+ def test_device_function(self):
92
+ with self.subTest("compile_ptx"):
93
+ self._test_device_function(compile_ptx, {"device": True})
94
+
95
+ with self.subTest("compile_all"):
96
+ self._test_device_function(
97
+ compile_all, {"device": True, "abi": "c", "output": "ptx"}
98
+ )
99
+
100
+ def _test_device_function(self, compile_function, default_kwargs):
101
+ def add(x, y):
102
+ return x + y
103
+
104
+ args = (float32, float32)
105
+
106
+ ret = compile_function(add, args, **default_kwargs)
107
+ ptx, resty = self._handle_compile_result(ret, compile_function)
108
+
109
+ # Device functions take a func_retval parameter for storing the
110
+ # returned value in by reference
111
+ self.assertIn("func_retval", ptx)
112
+ # .visible .func is used to denote a device function
113
+ self.assertIn(".visible .func", ptx)
114
+ # .visible .entry would denote the presence of a global function
115
+ self.assertNotIn(".visible .entry", ptx)
116
+ # Inferred return type as expected?
117
+ self.assertEqual(resty, float32)
118
+
119
+ # Check that function's output matches signature
120
+ sig_int32 = int32(int32, int32)
121
+ ret = compile_function(add, sig_int32, **default_kwargs)
122
+ ptx, resty = self._handle_compile_result(ret, compile_function)
123
+ self.assertEqual(resty, int32)
124
+
125
+ sig_int16 = int16(int16, int16)
126
+ ret = compile_function(add, sig_int16, **default_kwargs)
127
+ ptx, resty = self._handle_compile_result(ret, compile_function)
128
+ self.assertEqual(resty, int16)
129
+ # Using string as signature
130
+ sig_string = "uint32(uint32, uint32)"
131
+ ret = compile_function(add, sig_string, **default_kwargs)
132
+ ptx, resty = self._handle_compile_result(ret, compile_function)
133
+ self.assertEqual(resty, uint32)
134
+
135
+ def test_fastmath(self):
136
+ with self.subTest("compile_ptx"):
137
+ self._test_fastmath(compile_ptx, {"device": True})
138
+
139
+ with self.subTest("compile_all"):
140
+ self._test_fastmath(compile_all, {"device": True, "output": "ptx"})
141
+
142
+ def _test_fastmath(self, compile_function, default_kwargs):
143
+ def f(x, y, z, d):
144
+ return sqrt((x * y + z) / d)
145
+
146
+ args = (float32, float32, float32, float32)
147
+
148
+ # Without fastmath, fma contraction is enabled by default, but ftz and
149
+ # approximate div / sqrt are not.
150
+ ret = compile_function(f, args, **default_kwargs)
151
+ ptx, resty = self._handle_compile_result(ret, compile_function)
152
+ self.assertIn("fma.rn.f32", ptx)
153
+ self.assertIn("div.rn.f32", ptx)
154
+ self.assertIn("sqrt.rn.f32", ptx)
155
+
156
+ # With fastmath, ftz and approximate div / sqrt are enabled
157
+ ret = compile_function(f, args, fastmath=True, **default_kwargs)
158
+ ptx, resty = self._handle_compile_result(ret, compile_function)
159
+ self.assertIn("fma.rn.ftz.f32", ptx)
160
+ self.assertIn("div.approx.ftz.f32", ptx)
161
+ self.assertIn("sqrt.approx.ftz.f32", ptx)
162
+
163
+ def check_debug_info(self, ptx):
164
+ # A debug_info section should exist in the PTX. Whitespace varies
165
+ # between CUDA toolkit versions.
166
+ self.assertRegex(ptx, "\\.section\\s+\\.debug_info")
167
+ # A .file directive should be produced and include the name of the
168
+ # source. The path and whitespace may vary, so we accept anything
169
+ # ending in the filename of this module.
170
+ self.assertRegex(ptx, '\\.file.*test_compiler.py"')
171
+
172
+ def test_device_function_with_debug(self):
173
+ # See Issue #6719 - this ensures that compilation with debug succeeds
174
+ # with CUDA 11.2 / NVVM 7.0 onwards. Previously it failed because NVVM
175
+ # IR version metadata was not added when compiling device functions,
176
+ # and NVVM assumed DBG version 1.0 if not specified, which is
177
+ # incompatible with the 3.0 IR we use. This was specified only for
178
+ # kernels.
179
+
180
+ with self.subTest("compile_ptx"):
181
+ self._test_device_function_with_debug(
182
+ compile_ptx, {"device": True, "debug": True, "opt": False}
183
+ )
184
+
185
+ with self.subTest("compile_all"):
186
+ self._test_device_function_with_debug(
187
+ compile_all,
188
+ {
189
+ "device": True,
190
+ "debug": True,
191
+ "opt": False,
192
+ "output": "ptx",
193
+ },
194
+ )
195
+
196
+ def _test_device_function_with_debug(
197
+ self, compile_function, default_kwargs
198
+ ):
199
+ def f():
200
+ pass
201
+
202
+ ret = compile_function(f, (), **default_kwargs)
203
+ ptx, resty = self._handle_compile_result(ret, compile_function)
204
+ self.check_debug_info(ptx)
205
+
206
+ def test_kernel_with_debug(self):
207
+ # Inspired by (but not originally affected by) Issue #6719
208
+
209
+ with self.subTest("compile_ptx"):
210
+ self._test_kernel_with_debug(
211
+ compile_ptx, {"debug": True, "opt": False}
212
+ )
213
+
214
+ with self.subTest("compile_all"):
215
+ self._test_kernel_with_debug(
216
+ compile_all,
217
+ {
218
+ "device": False,
219
+ "abi": "numba",
220
+ "debug": True,
221
+ "opt": False,
222
+ "output": "ptx",
223
+ },
224
+ )
225
+
226
+ def _test_kernel_with_debug(self, compile_function, default_kwargs):
227
+ def f():
228
+ pass
229
+
230
+ ret = compile_function(f, (), **default_kwargs)
231
+ ptx, resty = self._handle_compile_result(ret, compile_function)
232
+ self.check_debug_info(ptx)
233
+
234
+ def check_line_info(self, ptx):
235
+ # A .file directive should be produced and include the name of the
236
+ # source. The path and whitespace may vary, so we accept anything
237
+ # ending in the filename of this module.
238
+ self.assertRegex(ptx, '\\.file.*test_compiler.py"')
239
+
240
+ def test_device_function_with_line_info(self):
241
+ with self.subTest("compile_ptx"):
242
+ self._test_device_function_with_line_info(
243
+ compile_ptx, {"device": True, "lineinfo": True}
244
+ )
245
+
246
+ with self.subTest("compile_all"):
247
+ self._test_device_function_with_line_info(
248
+ compile_all,
249
+ {
250
+ "device": True,
251
+ "abi": "numba",
252
+ "lineinfo": True,
253
+ "output": "ptx",
254
+ },
255
+ )
256
+
257
+ def _test_device_function_with_line_info(
258
+ self, compile_function, default_kwargs
259
+ ):
260
+ def f():
261
+ pass
262
+
263
+ ret = compile_function(f, (), **default_kwargs)
264
+ ptx, resty = self._handle_compile_result(ret, compile_function)
265
+ self.check_line_info(ptx)
266
+
267
+ def test_kernel_with_line_info(self):
268
+ with self.subTest("compile_ptx"):
269
+ self._test_kernel_with_line_info(compile_ptx, {"lineinfo": True})
270
+
271
+ with self.subTest("compile_all"):
272
+ self._test_kernel_with_line_info(
273
+ compile_all,
274
+ {
275
+ "device": False,
276
+ "abi": "numba",
277
+ "lineinfo": True,
278
+ "output": "ptx",
279
+ },
280
+ )
281
+
282
+ def _test_kernel_with_line_info(self, compile_function, default_kwargs):
283
+ def f():
284
+ pass
285
+
286
+ ret = compile_function(f, (), **default_kwargs)
287
+ ptx, resty = self._handle_compile_result(ret, compile_function)
288
+ self.check_line_info(ptx)
289
+
290
+ def test_non_void_return_type(self):
291
+ def f(x, y):
292
+ return x[0] + y[0]
293
+
294
+ with self.subTest("compile_ptx"):
295
+ with self.assertRaisesRegex(
296
+ TypeError, "must have void return type"
297
+ ):
298
+ compile_ptx(f, (uint32[::1], uint32[::1]))
299
+
300
+ with self.subTest("compile_all"):
301
+ with self.assertRaisesRegex(
302
+ TypeError, "must have void return type"
303
+ ):
304
+ compile_all(
305
+ f,
306
+ (uint32[::1], uint32[::1]),
307
+ device=False,
308
+ abi="numba",
309
+ output="ptx",
310
+ )
311
+
312
+ def test_c_abi_disallowed_for_kernel(self):
313
+ def f(x, y):
314
+ return x + y
315
+
316
+ with self.subTest("compile_ptx"):
317
+ with self.assertRaisesRegex(
318
+ NotImplementedError, "The C ABI is not supported for kernels"
319
+ ):
320
+ compile_ptx(f, (int32, int32), abi="c")
321
+
322
+ with self.subTest("compile_all"):
323
+ with self.assertRaisesRegex(
324
+ NotImplementedError, "The C ABI is not supported for kernels"
325
+ ):
326
+ compile_all(
327
+ f, (int32, int32), abi="c", device=False, output="ptx"
328
+ )
329
+
330
+ def test_unsupported_abi(self):
331
+ def f(x, y):
332
+ return x + y
333
+
334
+ with self.subTest("compile_ptx"):
335
+ with self.assertRaisesRegex(
336
+ NotImplementedError, "Unsupported ABI: fastcall"
337
+ ):
338
+ compile_ptx(f, (int32, int32), abi="fastcall")
339
+
340
+ with self.subTest("compile_all"):
341
+ with self.assertRaisesRegex(
342
+ NotImplementedError, "Unsupported ABI: fastcall"
343
+ ):
344
+ compile_all(f, (int32, int32), abi="fastcall", output="ptx")
345
+
346
+ def test_c_abi_device_function(self):
347
+ with self.subTest("compile_ptx"):
348
+ self._test_c_abi_device_function(
349
+ compile_ptx, {"device": True, "abi": "c"}
350
+ )
351
+
352
+ with self.subTest("compile_all"):
353
+ self._test_c_abi_device_function(
354
+ compile_all, {"device": True, "abi": "c", "output": "ptx"}
355
+ )
356
+
357
+ def _test_c_abi_device_function(self, compile_function, default_kwargs):
358
+ def f(x, y):
359
+ return x + y
360
+
361
+ # 32-bit signature
362
+ ret = compile_function(f, int32(int32, int32), **default_kwargs)
363
+ ptx, resty = self._handle_compile_result(ret, compile_function)
364
+ # There should be no more than two parameters
365
+ self.assertNotIn(ptx, "param_2")
366
+ # The function name should match the Python function name (not the
367
+ # qualname, which includes additional info), and its return value
368
+ # should be 32 bits
369
+ self.assertRegex(
370
+ ptx,
371
+ r"\.visible\s+\.func\s+\(\.param\s+\.b32\s+"
372
+ r"func_retval0\)\s+f\(",
373
+ )
374
+
375
+ # 64-bit signature should produce 64-bit return parameter
376
+ ret = compile_function(f, int64(int64, int64), **default_kwargs)
377
+ ptx, resty = self._handle_compile_result(ret, compile_function)
378
+ self.assertRegex(ptx, r"\.visible\s+\.func\s+\(\.param\s+\.b64")
379
+
380
+ def test_c_abi_device_function_module_scope(self):
381
+ with self.subTest("compile_ptx"):
382
+ self._test_c_abi_device_function_module_scope(
383
+ compile_ptx, {"device": True, "abi": "c"}
384
+ )
385
+
386
+ with self.subTest("compile_all"):
387
+ self._test_c_abi_device_function_module_scope(
388
+ compile_all,
389
+ {"device": True, "abi": "c", "output": "ptx"},
390
+ )
391
+
392
+ def _test_c_abi_device_function_module_scope(
393
+ self, compile_function, default_kwargs
394
+ ):
395
+ ret = compile_function(f_module, int32(int32, int32), **default_kwargs)
396
+ ptx, resty = self._handle_compile_result(ret, compile_function)
397
+
398
+ # The function name should match the Python function name, and its
399
+ # return value should be 32 bits
400
+ self.assertRegex(
401
+ ptx,
402
+ r"\.visible\s+\.func\s+\(\.param\s+\.b32\s+"
403
+ r"func_retval0\)\s+f_module\(",
404
+ )
405
+
406
+ def test_c_abi_with_abi_name(self):
407
+ abi_info = {"abi_name": "_Z4funcii"}
408
+
409
+ with self.subTest("compile_ptx"):
410
+ self._test_c_abi_with_abi_name(
411
+ compile_ptx,
412
+ {"device": True, "abi": "c", "abi_info": abi_info},
413
+ )
414
+
415
+ with self.subTest("compile_all"):
416
+ self._test_c_abi_with_abi_name(
417
+ compile_all,
418
+ {
419
+ "device": True,
420
+ "abi": "c",
421
+ "abi_info": abi_info,
422
+ "output": "ptx",
423
+ },
424
+ )
425
+
426
+ def _test_c_abi_with_abi_name(self, compile_function, default_kwargs):
427
+ ret = compile_function(f_module, int32(int32, int32), **default_kwargs)
428
+ ptx, resty = self._handle_compile_result(ret, compile_function)
429
+
430
+ # The function name should match the one given in the ABI info, and its
431
+ # return value should be 32 bits
432
+ self.assertRegex(
433
+ ptx,
434
+ r"\.visible\s+\.func\s+\(\.param\s+\.b32\s+"
435
+ r"func_retval0\)\s+_Z4funcii\(",
436
+ )
437
+
438
+ def test_compile_defaults_to_c_abi(self):
439
+ with self.subTest("compile"):
440
+ self._test_compile_defaults_to_c_abi(compile, {"device": True})
441
+
442
+ with self.subTest("compile_all"):
443
+ self._test_compile_defaults_to_c_abi(
444
+ compile_all,
445
+ {"device": True, "output": "ptx"},
446
+ )
447
+
448
+ def _test_compile_defaults_to_c_abi(self, compile_function, default_kwargs):
449
+ ret = compile_function(f_module, int32(int32, int32), **default_kwargs)
450
+ ptx, resty = self._handle_compile_result(ret, compile_function)
451
+
452
+ # The function name should match the Python function name, and its
453
+ # return value should be 32 bits
454
+ self.assertRegex(
455
+ ptx,
456
+ r"\.visible\s+\.func\s+\(\.param\s+\.b32\s+"
457
+ r"func_retval0\)\s+f_module\(",
458
+ )
459
+
460
+ def test_compile_to_ltoir(self):
461
+ with self.subTest("compile"):
462
+ self._test_compile_to_ltoir(
463
+ compile, {"device": True, "output": "ltoir"}
464
+ )
465
+
466
+ with self.subTest("compile_all"):
467
+ self._test_compile_to_ltoir(
468
+ compile_all,
469
+ {"device": True, "abi": "c", "output": "ltoir"},
470
+ )
471
+
472
+ def _test_compile_to_ltoir(self, compile_function, default_kwargs):
473
+ ret = compile_function(f_module, int32(int32, int32), **default_kwargs)
474
+ code, resty = self._handle_compile_result(ret, compile_function)
475
+
476
+ # There are no tools to interpret the LTOIR output, but we can check
477
+ # that we appear to have obtained an LTOIR file. This magic number is
478
+ # not documented, but is expected to remain consistent.
479
+ LTOIR_MAGIC = 0x7F4E43ED
480
+ header = int.from_bytes(code[:4], byteorder="little")
481
+ self.assertEqual(header, LTOIR_MAGIC)
482
+ self.assertEqual(resty, int32)
483
+
484
+ def test_compile_to_invalid_error(self):
485
+ illegal_output = "illegal"
486
+ msg = f"Unsupported output type: {illegal_output}"
487
+ with self.subTest("compile"):
488
+ with self.assertRaisesRegex(NotImplementedError, msg):
489
+ compile(
490
+ f_module,
491
+ int32(int32, int32),
492
+ device=True,
493
+ output=illegal_output,
494
+ )
495
+
496
+ with self.subTest("compile_all"):
497
+ with self.assertRaisesRegex(NotImplementedError, msg):
498
+ compile_all(
499
+ f_module,
500
+ int32(int32, int32),
501
+ device=True,
502
+ abi="c",
503
+ output=illegal_output,
504
+ )
505
+
506
+ def test_functioncompiler_locals(self):
507
+ # Tests against regression fixed in:
508
+ # https://github.com/NVIDIA/numba-cuda/pull/381
509
+ #
510
+ # "AttributeError: '_FunctionCompiler' object has no attribute
511
+ # 'locals'"
512
+ cond = None
513
+
514
+ @cuda.jit("void(float32[::1])")
515
+ def f(b_arg):
516
+ b_smem = cuda.shared.array(shape=(1,), dtype=float32)
517
+
518
+ if cond:
519
+ b_smem[0] = b_arg[0]
520
+
521
+ @unittest.skipIf(not TEST_BIN_DIR, "necessary binaries not generated.")
522
+ def test_compile_all_with_external_functions(self):
523
+ for link in [
524
+ test_device_functions_a,
525
+ test_device_functions_cubin,
526
+ test_device_functions_cu,
527
+ test_device_functions_fatbin,
528
+ test_device_functions_fatbin_multi,
529
+ test_device_functions_o,
530
+ test_device_functions_ptx,
531
+ test_device_functions_ltoir,
532
+ ]:
533
+ with self.subTest(link=link):
534
+ add = cuda.declare_device(
535
+ "add_from_numba", "uint32(uint32, uint32)", link=[link]
536
+ )
537
+
538
+ def f(z, x, y):
539
+ z[0] = add(x, y)
540
+
541
+ code_list, resty = compile_all(
542
+ f, (uint32[::1], uint32, uint32), device=False, abi="numba"
543
+ )
544
+
545
+ assert resty == void
546
+ assert len(code_list) == 2
547
+ link_obj = LinkableCode.from_path(link)
548
+ if link_obj.kind == "cu":
549
+ # if link is a cu file, result contains a compiled object code
550
+ from cuda.core.experimental import ObjectCode
551
+
552
+ assert isinstance(code_list[1], ObjectCode)
553
+ else:
554
+ assert code_list[1].kind == link_obj.kind
555
+
556
+ @unittest.skipIf(not TEST_BIN_DIR, "necessary binaries not generated.")
557
+ def test_compile_all_lineinfo(self):
558
+ add = cuda.declare_device(
559
+ "add", "float32(float32, float32)", link=[test_device_functions_cu]
560
+ )
561
+
562
+ def f(z, x, y):
563
+ z[0] = add(x, y)
564
+
565
+ args = (float32[::1], float32, float32)
566
+ code_list, resty = compile_all(
567
+ f, args, lineinfo=True, output="ptx", device=False, abi="numba"
568
+ )
569
+ assert len(code_list) == 2
570
+
571
+ self.assertRegex(
572
+ str(code_list[1].code.decode()),
573
+ r"\.file.*test_device_functions",
574
+ )
575
+
576
+ @unittest.skipIf(not TEST_BIN_DIR, "necessary binaries not generated.")
577
+ def test_compile_all_debug(self):
578
+ add = cuda.declare_device(
579
+ "add", "float32(float32, float32)", link=[test_device_functions_cu]
580
+ )
581
+
582
+ def f(z, x, y):
583
+ z[0] = add(x, y)
584
+
585
+ args = (float32[::1], float32, float32)
586
+ code_list, resty = compile_all(
587
+ f,
588
+ args,
589
+ debug=True,
590
+ output="ptx",
591
+ device=False,
592
+ abi="numba",
593
+ opt=False,
594
+ )
595
+ assert len(code_list) == 2
596
+
597
+ self.assertRegex(
598
+ str(code_list[1].code.decode()), r"\.section\s+\.debug_info"
599
+ )
600
+
601
+
602
+ @skip_on_cudasim("Compilation unsupported in the simulator")
603
+ class TestCompileForCurrentDevice(CUDATestCase):
604
+ def _check_ptx_for_current_device(self, compile_function):
605
+ def add(x, y):
606
+ return x + y
607
+
608
+ args = (float32, float32)
609
+ ptx, resty = compile_function(add, args, device=True)
610
+
611
+ # Check we target the current device's compute capability, or the
612
+ # closest compute capability supported by the current toolkit.
613
+ device_cc = cuda.get_current_device().compute_capability
614
+ cc = cuda.cudadrv.nvrtc.find_closest_arch(device_cc)
615
+ target = f".target sm_{cc[0]}{cc[1]}"
616
+ self.assertIn(target, ptx)
617
+
618
+ def test_compile_ptx_for_current_device(self):
619
+ self._check_ptx_for_current_device(compile_ptx_for_current_device)
620
+
621
+ def test_compile_for_current_device(self):
622
+ self._check_ptx_for_current_device(compile_for_current_device)
623
+
624
+
625
+ @skip_on_cudasim("Compilation unsupported in the simulator")
626
+ class TestCompileOnlyTests(unittest.TestCase):
627
+ """For tests where we can only check correctness by examining the compiler
628
+ output rather than observing the effects of execution."""
629
+
630
+ def test_nanosleep(self):
631
+ def use_nanosleep(x):
632
+ # Sleep for a constant time
633
+ cuda.nanosleep(32)
634
+ # Sleep for a variable time
635
+ cuda.nanosleep(x)
636
+
637
+ ptx, resty = compile_ptx(use_nanosleep, (uint32,))
638
+
639
+ nanosleep_count = 0
640
+ for line in ptx.split("\n"):
641
+ if "nanosleep.u32" in line:
642
+ nanosleep_count += 1
643
+
644
+ expected = 2
645
+ self.assertEqual(
646
+ expected,
647
+ nanosleep_count,
648
+ (
649
+ f"Got {nanosleep_count} nanosleep instructions, "
650
+ f"expected {expected}"
651
+ ),
652
+ )
653
+
654
+
655
+ @skip_on_cudasim("Compilation unsupported in the simulator")
656
+ class TestCompileWithLaunchBounds(unittest.TestCase):
657
+ def _test_launch_bounds_common(self, launch_bounds):
658
+ def f():
659
+ pass
660
+
661
+ sig = "void()"
662
+ ptx, resty = cuda.compile_ptx(f, sig, launch_bounds=launch_bounds)
663
+ self.assertIsInstance(resty, types.NoneType)
664
+ # Match either `.maxntid, 128, 1, 1` or `.maxntid 128` on a line by
665
+ # itself:
666
+ self.assertRegex(ptx, r".maxntid\s+128(?:,\s+1,\s+1)?\s*\n")
667
+ return ptx
668
+
669
+ def test_launch_bounds_scalar(self):
670
+ launch_bounds = 128
671
+ ptx = self._test_launch_bounds_common(launch_bounds)
672
+
673
+ self.assertNotIn(".minnctapersm", ptx)
674
+ self.assertNotIn(".maxclusterrank", ptx)
675
+
676
+ def test_launch_bounds_tuple(self):
677
+ launch_bounds = (128,)
678
+ ptx = self._test_launch_bounds_common(launch_bounds)
679
+
680
+ self.assertNotIn(".minnctapersm", ptx)
681
+ self.assertNotIn(".maxclusterrank", ptx)
682
+
683
+ def test_launch_bounds_with_min_cta(self):
684
+ launch_bounds = (128, 2)
685
+ ptx = self._test_launch_bounds_common(launch_bounds)
686
+
687
+ self.assertRegex(ptx, r".minnctapersm\s+2")
688
+ self.assertNotIn(".maxclusterrank", ptx)
689
+
690
+ def test_launch_bounds_with_max_cluster_rank(self):
691
+ def f():
692
+ pass
693
+
694
+ launch_bounds = (128, 2, 4)
695
+ cc = (9, 0)
696
+ sig = "void()"
697
+ ptx, resty = cuda.compile_ptx(
698
+ f, sig, launch_bounds=launch_bounds, cc=cc
699
+ )
700
+ self.assertIsInstance(resty, types.NoneType)
701
+ self.assertRegex(ptx, r".maxntid\s+128,\s+1,\s+1")
702
+
703
+ self.assertRegex(ptx, r".minnctapersm\s+2")
704
+ self.assertRegex(ptx, r".maxclusterrank\s+4")
705
+
706
+ def test_too_many_launch_bounds(self):
707
+ def f():
708
+ pass
709
+
710
+ sig = "void()"
711
+ launch_bounds = (128, 2, 4, 8)
712
+
713
+ with self.assertRaisesRegex(ValueError, "Got 4 launch bounds:"):
714
+ cuda.compile_ptx(f, sig, launch_bounds=launch_bounds)
715
+
716
+
717
+ if __name__ == "__main__":
718
+ unittest.main()