numba-cuda 0.22.0__cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (487) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +580 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpython-312-aarch64-linux-gnu.so +0 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpython-312-aarch64-linux-gnu.so +0 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cpython-312-aarch64-linux-gnu.so +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpython-312-aarch64-linux-gnu.so +0 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cpython-312-aarch64-linux-gnu.so +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +543 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +983 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +997 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +155 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsics.py +531 -0
  161. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  162. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  163. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  164. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  165. numba_cuda/numba/cuda/libdevice.py +3386 -0
  166. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  167. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  168. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  169. numba_cuda/numba/cuda/locks.py +19 -0
  170. numba_cuda/numba/cuda/lowering.py +1980 -0
  171. numba_cuda/numba/cuda/mathimpl.py +374 -0
  172. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  173. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  175. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  178. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  179. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  180. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  181. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  182. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  183. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  184. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  185. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  186. numba_cuda/numba/cuda/misc/literal.py +28 -0
  187. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  188. numba_cuda/numba/cuda/misc/special.py +94 -0
  189. numba_cuda/numba/cuda/models.py +56 -0
  190. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  191. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  192. numba_cuda/numba/cuda/np/extensions.py +11 -0
  193. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  194. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  195. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  196. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  197. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  198. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  199. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  200. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  201. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  202. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  203. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  204. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  206. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  207. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  208. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  209. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  210. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  211. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  212. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  213. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  214. numba_cuda/numba/cuda/printimpl.py +126 -0
  215. numba_cuda/numba/cuda/random.py +308 -0
  216. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  217. numba_cuda/numba/cuda/serialize.py +267 -0
  218. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  219. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  220. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  221. numba_cuda/numba/cuda/simulator/api.py +179 -0
  222. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  223. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  224. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  236. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  237. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  238. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  239. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  241. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  242. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  243. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  244. numba_cuda/numba/cuda/simulator_init.py +18 -0
  245. numba_cuda/numba/cuda/stubs.py +624 -0
  246. numba_cuda/numba/cuda/target.py +505 -0
  247. numba_cuda/numba/cuda/testing.py +347 -0
  248. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  249. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  251. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  252. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  253. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  254. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  255. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  285. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  286. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  289. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  290. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  291. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  292. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  293. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  294. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  295. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
  396. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  397. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  399. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  400. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  401. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  402. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  403. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  404. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  406. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  407. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  424. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  425. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  430. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  431. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  433. numba_cuda/numba/cuda/tests/support.py +900 -0
  434. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  435. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  436. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  437. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  438. numba_cuda/numba/cuda/types/__init__.py +233 -0
  439. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  440. numba_cuda/numba/cuda/types/abstract.py +9 -0
  441. numba_cuda/numba/cuda/types/common.py +9 -0
  442. numba_cuda/numba/cuda/types/containers.py +9 -0
  443. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  444. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  445. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  446. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  447. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  448. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  449. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  450. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  451. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  452. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  453. numba_cuda/numba/cuda/types/function_type.py +11 -0
  454. numba_cuda/numba/cuda/types/functions.py +9 -0
  455. numba_cuda/numba/cuda/types/iterators.py +9 -0
  456. numba_cuda/numba/cuda/types/misc.py +9 -0
  457. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  458. numba_cuda/numba/cuda/types/scalars.py +9 -0
  459. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  460. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  461. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  462. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  463. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  464. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  465. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  466. numba_cuda/numba/cuda/typing/collections.py +138 -0
  467. numba_cuda/numba/cuda/typing/context.py +782 -0
  468. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  469. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  470. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  471. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  472. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  473. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  474. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  475. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  476. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  477. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  478. numba_cuda/numba/cuda/ufuncs.py +746 -0
  479. numba_cuda/numba/cuda/utils.py +724 -0
  480. numba_cuda/numba/cuda/vector_types.py +214 -0
  481. numba_cuda/numba/cuda/vectorizers.py +260 -0
  482. numba_cuda-0.22.0.dist-info/METADATA +109 -0
  483. numba_cuda-0.22.0.dist-info/RECORD +487 -0
  484. numba_cuda-0.22.0.dist-info/WHEEL +6 -0
  485. numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
  486. numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
  487. numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,756 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ """
5
+ This is a direct translation of nvvm.h
6
+ """
7
+
8
+ import logging
9
+ import re
10
+ import sys
11
+ import warnings
12
+ from ctypes import c_void_p, c_int, POINTER, c_char_p, c_size_t, byref, c_char
13
+
14
+ import threading
15
+
16
+ from llvmlite import ir
17
+
18
+ from .error import NvvmError, NvvmSupportError, NvvmWarning
19
+ from .libs import get_libdevice, open_libdevice, open_cudalib
20
+ from numba.cuda import cgutils
21
+
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ ADDRSPACE_GENERIC = 0
26
+ ADDRSPACE_GLOBAL = 1
27
+ ADDRSPACE_SHARED = 3
28
+ ADDRSPACE_CONSTANT = 4
29
+ ADDRSPACE_LOCAL = 5
30
+
31
+ # Opaque handle for compilation unit
32
+ nvvm_program = c_void_p
33
+
34
+ # Result code
35
+ nvvm_result = c_int
36
+
37
+ RESULT_CODE_NAMES = """
38
+ NVVM_SUCCESS
39
+ NVVM_ERROR_OUT_OF_MEMORY
40
+ NVVM_ERROR_PROGRAM_CREATION_FAILURE
41
+ NVVM_ERROR_IR_VERSION_MISMATCH
42
+ NVVM_ERROR_INVALID_INPUT
43
+ NVVM_ERROR_INVALID_PROGRAM
44
+ NVVM_ERROR_INVALID_IR
45
+ NVVM_ERROR_INVALID_OPTION
46
+ NVVM_ERROR_NO_MODULE_IN_PROGRAM
47
+ NVVM_ERROR_COMPILATION
48
+ """.split()
49
+
50
+ for i, k in enumerate(RESULT_CODE_NAMES):
51
+ setattr(sys.modules[__name__], k, i)
52
+
53
+ _datalayout = (
54
+ "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
55
+ "i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-"
56
+ "v64:64:64-v128:128:128-n16:32:64"
57
+ )
58
+
59
+
60
+ def is_available():
61
+ """
62
+ Return if libNVVM is available
63
+ """
64
+ try:
65
+ NVVM()
66
+ except NvvmSupportError:
67
+ return False
68
+ else:
69
+ return True
70
+
71
+
72
+ _nvvm_lock = threading.Lock()
73
+
74
+
75
+ class NVVM(object):
76
+ """Process-wide singleton."""
77
+
78
+ _PROTOTYPES = {
79
+ # nvvmResult nvvmVersion(int *major, int *minor)
80
+ "nvvmVersion": (nvvm_result, POINTER(c_int), POINTER(c_int)),
81
+ # nvvmResult nvvmCreateProgram(nvvmProgram *cu)
82
+ "nvvmCreateProgram": (nvvm_result, POINTER(nvvm_program)),
83
+ # nvvmResult nvvmDestroyProgram(nvvmProgram *cu)
84
+ "nvvmDestroyProgram": (nvvm_result, POINTER(nvvm_program)),
85
+ # nvvmResult nvvmAddModuleToProgram(nvvmProgram cu, const char *buffer,
86
+ # size_t size, const char *name)
87
+ "nvvmAddModuleToProgram": (
88
+ nvvm_result,
89
+ nvvm_program,
90
+ c_char_p,
91
+ c_size_t,
92
+ c_char_p,
93
+ ),
94
+ # nvvmResult nvvmLazyAddModuleToProgram(nvvmProgram cu,
95
+ # const char* buffer,
96
+ # size_t size,
97
+ # const char *name)
98
+ "nvvmLazyAddModuleToProgram": (
99
+ nvvm_result,
100
+ nvvm_program,
101
+ c_char_p,
102
+ c_size_t,
103
+ c_char_p,
104
+ ),
105
+ # nvvmResult nvvmCompileProgram(nvvmProgram cu, int numOptions,
106
+ # const char **options)
107
+ "nvvmCompileProgram": (
108
+ nvvm_result,
109
+ nvvm_program,
110
+ c_int,
111
+ POINTER(c_char_p),
112
+ ),
113
+ # nvvmResult nvvmGetCompiledResultSize(nvvmProgram cu,
114
+ # size_t *bufferSizeRet)
115
+ "nvvmGetCompiledResultSize": (
116
+ nvvm_result,
117
+ nvvm_program,
118
+ POINTER(c_size_t),
119
+ ),
120
+ # nvvmResult nvvmGetCompiledResult(nvvmProgram cu, char *buffer)
121
+ "nvvmGetCompiledResult": (nvvm_result, nvvm_program, c_char_p),
122
+ # nvvmResult nvvmGetProgramLogSize(nvvmProgram cu,
123
+ # size_t *bufferSizeRet)
124
+ "nvvmGetProgramLogSize": (nvvm_result, nvvm_program, POINTER(c_size_t)),
125
+ # nvvmResult nvvmGetProgramLog(nvvmProgram cu, char *buffer)
126
+ "nvvmGetProgramLog": (nvvm_result, nvvm_program, c_char_p),
127
+ # nvvmResult nvvmIRVersion (int* majorIR, int* minorIR, int* majorDbg,
128
+ # int* minorDbg )
129
+ "nvvmIRVersion": (
130
+ nvvm_result,
131
+ POINTER(c_int),
132
+ POINTER(c_int),
133
+ POINTER(c_int),
134
+ POINTER(c_int),
135
+ ),
136
+ # nvvmResult nvvmVerifyProgram (nvvmProgram prog, int numOptions,
137
+ # const char** options)
138
+ "nvvmVerifyProgram": (
139
+ nvvm_result,
140
+ nvvm_program,
141
+ c_int,
142
+ POINTER(c_char_p),
143
+ ),
144
+ }
145
+
146
+ # Singleton reference
147
+ __INSTANCE = None
148
+
149
+ def __new__(cls):
150
+ with _nvvm_lock:
151
+ if cls.__INSTANCE is None:
152
+ cls.__INSTANCE = inst = object.__new__(cls)
153
+ try:
154
+ inst.driver = open_cudalib("nvvm")
155
+ except OSError as e:
156
+ cls.__INSTANCE = None
157
+ errmsg = "libNVVM cannot be found. Please install the cuda-toolkit conda package:\n%s"
158
+ raise NvvmSupportError(errmsg % e)
159
+
160
+ # Find & populate functions
161
+ for name, proto in inst._PROTOTYPES.items():
162
+ func = getattr(inst.driver, name)
163
+ func.restype = proto[0]
164
+ func.argtypes = proto[1:]
165
+ setattr(inst, name, func)
166
+
167
+ return cls.__INSTANCE
168
+
169
+ def __init__(self):
170
+ ir_versions = self.get_ir_version()
171
+ self._majorIR = ir_versions[0]
172
+ self._minorIR = ir_versions[1]
173
+ self._majorDbg = ir_versions[2]
174
+ self._minorDbg = ir_versions[3]
175
+
176
+ @property
177
+ def data_layout(self):
178
+ return _datalayout
179
+
180
+ def get_version(self):
181
+ major = c_int()
182
+ minor = c_int()
183
+ err = self.nvvmVersion(byref(major), byref(minor))
184
+ self.check_error(err, "Failed to get version.")
185
+ return major.value, minor.value
186
+
187
+ def get_ir_version(self):
188
+ majorIR = c_int()
189
+ minorIR = c_int()
190
+ majorDbg = c_int()
191
+ minorDbg = c_int()
192
+ err = self.nvvmIRVersion(
193
+ byref(majorIR), byref(minorIR), byref(majorDbg), byref(minorDbg)
194
+ )
195
+ self.check_error(err, "Failed to get IR version.")
196
+ return majorIR.value, minorIR.value, majorDbg.value, minorDbg.value
197
+
198
+ def check_error(self, error, msg, exit=False):
199
+ if error:
200
+ exc = NvvmError(msg, RESULT_CODE_NAMES[error])
201
+ if exit:
202
+ print(exc)
203
+ sys.exit(1)
204
+ else:
205
+ raise exc
206
+
207
+
208
+ class CompilationUnit(object):
209
+ """
210
+ A CompilationUnit is a set of LLVM modules that are compiled to PTX or
211
+ LTO-IR with NVVM.
212
+
213
+ Compilation options are accepted as a dict mapping option names to values,
214
+ with the following considerations:
215
+
216
+ - Underscores (`_`) in option names are converted to dashes (`-`), to match
217
+ NVVM's option name format.
218
+ - Options that take a value will be emitted in the form "-<name>=<value>".
219
+ - Booleans passed as option values will be converted to integers.
220
+ - Options which take no value (such as `-gen-lto`) should have a value of
221
+ `None` and will be emitted in the form "-<name>".
222
+
223
+ For documentation on NVVM compilation options, see the CUDA Toolkit
224
+ Documentation:
225
+
226
+ https://docs.nvidia.com/cuda/libnvvm-api/index.html#_CPPv418nvvmCompileProgram11nvvmProgramiPPKc
227
+ """
228
+
229
+ def __init__(self, options):
230
+ self.driver = NVVM()
231
+ self._handle = nvvm_program()
232
+ err = self.driver.nvvmCreateProgram(byref(self._handle))
233
+ self.driver.check_error(err, "Failed to create CU")
234
+
235
+ def stringify_option(k, v):
236
+ k = k.replace("_", "-")
237
+
238
+ if v is None:
239
+ return f"-{k}".encode("utf-8")
240
+
241
+ if isinstance(v, bool):
242
+ v = int(v)
243
+
244
+ return f"-{k}={v}".encode("utf-8")
245
+
246
+ options = [stringify_option(k, v) for k, v in options.items()]
247
+ option_ptrs = (c_char_p * len(options))(*[c_char_p(x) for x in options])
248
+
249
+ # We keep both the options and the pointers to them so that options are
250
+ # not destroyed before we've used their values
251
+ self.options = options
252
+ self.option_ptrs = option_ptrs
253
+ self.n_options = len(options)
254
+
255
+ def __del__(self):
256
+ driver = NVVM()
257
+ err = driver.nvvmDestroyProgram(byref(self._handle))
258
+ driver.check_error(err, "Failed to destroy CU", exit=True)
259
+
260
+ def add_module(self, buffer):
261
+ """
262
+ Add a module level NVVM IR to a compilation unit.
263
+ - The buffer should contain an NVVM module IR either in the bitcode
264
+ representation (LLVM3.0) or in the text representation.
265
+ """
266
+ err = self.driver.nvvmAddModuleToProgram(
267
+ self._handle, buffer, len(buffer), None
268
+ )
269
+ self.driver.check_error(err, "Failed to add module")
270
+
271
+ def lazy_add_module(self, buffer):
272
+ """
273
+ Lazily add an NVVM IR module to a compilation unit.
274
+ The buffer should contain NVVM module IR either in the bitcode
275
+ representation or in the text representation.
276
+ """
277
+ err = self.driver.nvvmLazyAddModuleToProgram(
278
+ self._handle, buffer, len(buffer), None
279
+ )
280
+ self.driver.check_error(err, "Failed to add module")
281
+
282
+ def verify(self):
283
+ """
284
+ Run the NVVM verifier on all code added to the compilation unit.
285
+ """
286
+ err = self.driver.nvvmVerifyProgram(
287
+ self._handle, self.n_options, self.option_ptrs
288
+ )
289
+ self._try_error(err, "Failed to verify\n")
290
+
291
+ def compile(self):
292
+ """
293
+ Compile all modules added to the compilation unit and return the
294
+ resulting PTX or LTO-IR (depending on the options).
295
+ """
296
+ err = self.driver.nvvmCompileProgram(
297
+ self._handle, self.n_options, self.option_ptrs
298
+ )
299
+ self._try_error(err, "Failed to compile\n")
300
+
301
+ # Get result
302
+ result_size = c_size_t()
303
+ err = self.driver.nvvmGetCompiledResultSize(
304
+ self._handle, byref(result_size)
305
+ )
306
+
307
+ self._try_error(err, "Failed to get size of compiled result.")
308
+
309
+ output_buffer = (c_char * result_size.value)()
310
+ err = self.driver.nvvmGetCompiledResult(self._handle, output_buffer)
311
+ self._try_error(err, "Failed to get compiled result.")
312
+
313
+ # Get log
314
+ self.log = self.get_log()
315
+ if self.log:
316
+ warnings.warn(self.log, category=NvvmWarning)
317
+
318
+ return output_buffer[:]
319
+
320
+ def _try_error(self, err, msg):
321
+ self.driver.check_error(err, "%s\n%s" % (msg, self.get_log()))
322
+
323
+ def get_log(self):
324
+ reslen = c_size_t()
325
+ err = self.driver.nvvmGetProgramLogSize(self._handle, byref(reslen))
326
+ self.driver.check_error(err, "Failed to get compilation log size.")
327
+
328
+ if reslen.value > 1:
329
+ logbuf = (c_char * reslen.value)()
330
+ err = self.driver.nvvmGetProgramLog(self._handle, logbuf)
331
+ self.driver.check_error(err, "Failed to get compilation log.")
332
+
333
+ return logbuf.value.decode("utf8") # populate log attribute
334
+
335
+ return ""
336
+
337
+
338
+ MISSING_LIBDEVICE_FILE_MSG = """Missing libdevice file.
339
+ ``cuda-nvcc`` and ``cuda-nvrtc`` are required:
340
+
341
+ $ conda install -c conda-forge cuda-nvcc cuda-nvrtc "cuda-version>=12.0"
342
+ """
343
+
344
+
345
+ class LibDevice(object):
346
+ _cache_ = None
347
+
348
+ def __init__(self):
349
+ if self._cache_ is None:
350
+ if get_libdevice() is None:
351
+ raise RuntimeError(MISSING_LIBDEVICE_FILE_MSG)
352
+ self._cache_ = open_libdevice()
353
+
354
+ self.bc = self._cache_
355
+
356
+ def get(self):
357
+ return self.bc
358
+
359
+
360
+ cas_nvvm = """
361
+ %cas_success = cmpxchg volatile {Ti}* %iptr, {Ti} %old, {Ti} %new monotonic monotonic
362
+ %cas = extractvalue {{ {Ti}, i1 }} %cas_success, 0
363
+ """ # noqa: E501
364
+
365
+
366
+ # Translation of code from CUDA Programming Guide v6.5, section B.12
367
+ ir_numba_atomic_binary_template = """
368
+ define internal {T} @___numba_atomic_{T}_{FUNC}({T}* %ptr, {T} %val) alwaysinline {{
369
+ entry:
370
+ %iptr = bitcast {T}* %ptr to {Ti}*
371
+ %old2 = load volatile {Ti}, {Ti}* %iptr
372
+ br label %attempt
373
+
374
+ attempt:
375
+ %old = phi {Ti} [ %old2, %entry ], [ %cas, %attempt ]
376
+ %dold = bitcast {Ti} %old to {T}
377
+ %dnew = {OP} {T} %dold, %val
378
+ %new = bitcast {T} %dnew to {Ti}
379
+ {CAS}
380
+ %repeat = icmp ne {Ti} %cas, %old
381
+ br i1 %repeat, label %attempt, label %done
382
+
383
+ done:
384
+ %result = bitcast {Ti} %old to {T}
385
+ ret {T} %result
386
+ }}
387
+ """ # noqa: E501
388
+
389
+ ir_numba_atomic_inc_template = """
390
+ define internal {T} @___numba_atomic_{Tu}_inc({T}* %iptr, {T} %val) alwaysinline {{
391
+ entry:
392
+ %old2 = load volatile {T}, {T}* %iptr
393
+ br label %attempt
394
+
395
+ attempt:
396
+ %old = phi {T} [ %old2, %entry ], [ %cas, %attempt ]
397
+ %bndchk = icmp ult {T} %old, %val
398
+ %inc = add {T} %old, 1
399
+ %new = select i1 %bndchk, {T} %inc, {T} 0
400
+ {CAS}
401
+ %repeat = icmp ne {T} %cas, %old
402
+ br i1 %repeat, label %attempt, label %done
403
+
404
+ done:
405
+ ret {T} %old
406
+ }}
407
+ """ # noqa: E501
408
+
409
+ ir_numba_atomic_dec_template = """
410
+ define internal {T} @___numba_atomic_{Tu}_dec({T}* %iptr, {T} %val) alwaysinline {{
411
+ entry:
412
+ %old2 = load volatile {T}, {T}* %iptr
413
+ br label %attempt
414
+
415
+ attempt:
416
+ %old = phi {T} [ %old2, %entry ], [ %cas, %attempt ]
417
+ %dec = add {T} %old, -1
418
+ %bndchk = icmp ult {T} %dec, %val
419
+ %new = select i1 %bndchk, {T} %dec, {T} %val
420
+ {CAS}
421
+ %repeat = icmp ne {T} %cas, %old
422
+ br i1 %repeat, label %attempt, label %done
423
+
424
+ done:
425
+ ret {T} %old
426
+ }}
427
+ """ # noqa: E501
428
+
429
+ ir_numba_atomic_minmax_template = """
430
+ define internal {T} @___numba_atomic_{T}_{NAN}{FUNC}({T}* %ptr, {T} %val) alwaysinline {{
431
+ entry:
432
+ %ptrval = load volatile {T}, {T}* %ptr
433
+ ; Return early when:
434
+ ; - For nanmin / nanmax when val is a NaN
435
+ ; - For min / max when val or ptr is a NaN
436
+ %early_return = fcmp uno {T} %val, %{PTR_OR_VAL}val
437
+ br i1 %early_return, label %done, label %lt_check
438
+
439
+ lt_check:
440
+ %dold = phi {T} [ %ptrval, %entry ], [ %dcas, %attempt ]
441
+ ; Continue attempts if dold less or greater than val (depending on whether min or max)
442
+ ; or if dold is NaN (for nanmin / nanmax)
443
+ %cmp = fcmp {OP} {T} %dold, %val
444
+ br i1 %cmp, label %attempt, label %done
445
+
446
+ attempt:
447
+ ; Attempt to swap in the value
448
+ %old = bitcast {T} %dold to {Ti}
449
+ %iptr = bitcast {T}* %ptr to {Ti}*
450
+ %new = bitcast {T} %val to {Ti}
451
+ {CAS}
452
+ %dcas = bitcast {Ti} %cas to {T}
453
+ br label %lt_check
454
+
455
+ done:
456
+ ret {T} %ptrval
457
+ }}
458
+ """ # noqa: E501
459
+
460
+
461
+ def ir_cas(Ti):
462
+ return cas_nvvm.format(Ti=Ti)
463
+
464
+
465
+ def ir_numba_atomic_binary(T, Ti, OP, FUNC):
466
+ params = dict(T=T, Ti=Ti, OP=OP, FUNC=FUNC, CAS=ir_cas(Ti))
467
+ return ir_numba_atomic_binary_template.format(**params)
468
+
469
+
470
+ def ir_numba_atomic_minmax(T, Ti, NAN, OP, PTR_OR_VAL, FUNC):
471
+ params = dict(
472
+ T=T,
473
+ Ti=Ti,
474
+ NAN=NAN,
475
+ OP=OP,
476
+ PTR_OR_VAL=PTR_OR_VAL,
477
+ FUNC=FUNC,
478
+ CAS=ir_cas(Ti),
479
+ )
480
+
481
+ return ir_numba_atomic_minmax_template.format(**params)
482
+
483
+
484
+ def ir_numba_atomic_inc(T, Tu):
485
+ return ir_numba_atomic_inc_template.format(T=T, Tu=Tu, CAS=ir_cas(T))
486
+
487
+
488
+ def ir_numba_atomic_dec(T, Tu):
489
+ return ir_numba_atomic_dec_template.format(T=T, Tu=Tu, CAS=ir_cas(T))
490
+
491
+
492
+ def llvm_replace(llvmir):
493
+ replacements = [
494
+ (
495
+ 'declare double @"___numba_atomic_double_add"(double* %".1", double %".2")', # noqa: E501
496
+ ir_numba_atomic_binary(T="double", Ti="i64", OP="fadd", FUNC="add"),
497
+ ),
498
+ (
499
+ 'declare float @"___numba_atomic_float_sub"(float* %".1", float %".2")', # noqa: E501
500
+ ir_numba_atomic_binary(T="float", Ti="i32", OP="fsub", FUNC="sub"),
501
+ ),
502
+ (
503
+ 'declare double @"___numba_atomic_double_sub"(double* %".1", double %".2")', # noqa: E501
504
+ ir_numba_atomic_binary(T="double", Ti="i64", OP="fsub", FUNC="sub"),
505
+ ),
506
+ (
507
+ 'declare i64 @"___numba_atomic_u64_inc"(i64* %".1", i64 %".2")',
508
+ ir_numba_atomic_inc(T="i64", Tu="u64"),
509
+ ),
510
+ (
511
+ 'declare i64 @"___numba_atomic_u64_dec"(i64* %".1", i64 %".2")',
512
+ ir_numba_atomic_dec(T="i64", Tu="u64"),
513
+ ),
514
+ (
515
+ 'declare float @"___numba_atomic_float_max"(float* %".1", float %".2")', # noqa: E501
516
+ ir_numba_atomic_minmax(
517
+ T="float",
518
+ Ti="i32",
519
+ NAN="",
520
+ OP="nnan olt",
521
+ PTR_OR_VAL="ptr",
522
+ FUNC="max",
523
+ ),
524
+ ),
525
+ (
526
+ 'declare double @"___numba_atomic_double_max"(double* %".1", double %".2")', # noqa: E501
527
+ ir_numba_atomic_minmax(
528
+ T="double",
529
+ Ti="i64",
530
+ NAN="",
531
+ OP="nnan olt",
532
+ PTR_OR_VAL="ptr",
533
+ FUNC="max",
534
+ ),
535
+ ),
536
+ (
537
+ 'declare float @"___numba_atomic_float_min"(float* %".1", float %".2")', # noqa: E501
538
+ ir_numba_atomic_minmax(
539
+ T="float",
540
+ Ti="i32",
541
+ NAN="",
542
+ OP="nnan ogt",
543
+ PTR_OR_VAL="ptr",
544
+ FUNC="min",
545
+ ),
546
+ ),
547
+ (
548
+ 'declare double @"___numba_atomic_double_min"(double* %".1", double %".2")', # noqa: E501
549
+ ir_numba_atomic_minmax(
550
+ T="double",
551
+ Ti="i64",
552
+ NAN="",
553
+ OP="nnan ogt",
554
+ PTR_OR_VAL="ptr",
555
+ FUNC="min",
556
+ ),
557
+ ),
558
+ (
559
+ 'declare float @"___numba_atomic_float_nanmax"(float* %".1", float %".2")', # noqa: E501
560
+ ir_numba_atomic_minmax(
561
+ T="float",
562
+ Ti="i32",
563
+ NAN="nan",
564
+ OP="ult",
565
+ PTR_OR_VAL="",
566
+ FUNC="max",
567
+ ),
568
+ ),
569
+ (
570
+ 'declare double @"___numba_atomic_double_nanmax"(double* %".1", double %".2")', # noqa: E501
571
+ ir_numba_atomic_minmax(
572
+ T="double",
573
+ Ti="i64",
574
+ NAN="nan",
575
+ OP="ult",
576
+ PTR_OR_VAL="",
577
+ FUNC="max",
578
+ ),
579
+ ),
580
+ (
581
+ 'declare float @"___numba_atomic_float_nanmin"(float* %".1", float %".2")', # noqa: E501
582
+ ir_numba_atomic_minmax(
583
+ T="float",
584
+ Ti="i32",
585
+ NAN="nan",
586
+ OP="ugt",
587
+ PTR_OR_VAL="",
588
+ FUNC="min",
589
+ ),
590
+ ),
591
+ (
592
+ 'declare double @"___numba_atomic_double_nanmin"(double* %".1", double %".2")', # noqa: E501
593
+ ir_numba_atomic_minmax(
594
+ T="double",
595
+ Ti="i64",
596
+ NAN="nan",
597
+ OP="ugt",
598
+ PTR_OR_VAL="",
599
+ FUNC="min",
600
+ ),
601
+ ),
602
+ ("immarg", ""),
603
+ ]
604
+
605
+ for decl, fn in replacements:
606
+ llvmir = llvmir.replace(decl, fn)
607
+
608
+ llvmir = llvm150_to_70_ir(llvmir)
609
+
610
+ return llvmir
611
+
612
+
613
+ def compile_ir(llvmir, **options):
614
+ if isinstance(llvmir, str):
615
+ llvmir = [llvmir]
616
+
617
+ if options.pop("fastmath", False):
618
+ options.update(
619
+ {
620
+ "ftz": True,
621
+ "fma": True,
622
+ "prec_div": False,
623
+ "prec_sqrt": False,
624
+ }
625
+ )
626
+
627
+ cu = CompilationUnit(options)
628
+
629
+ for mod in llvmir:
630
+ mod = llvm_replace(mod)
631
+ cu.add_module(mod.encode("utf8"))
632
+ cu.verify()
633
+
634
+ # We add libdevice following verification so that it is not subject to the
635
+ # verifier's requirements
636
+ libdevice = LibDevice()
637
+ cu.lazy_add_module(libdevice.get())
638
+
639
+ return cu.compile()
640
+
641
+
642
+ re_attributes_def = re.compile(r"^attributes #\d+ = \{ ([\w\s]+)\ }")
643
+
644
+
645
+ def llvm150_to_70_ir(ir):
646
+ """
647
+ Convert LLVM 15.0 IR for LLVM 7.0.
648
+ """
649
+ buf = []
650
+ for line in ir.splitlines():
651
+ if line.startswith("attributes #"):
652
+ # Remove function attributes unsupported by LLVM 7.0
653
+ m = re_attributes_def.match(line)
654
+ attrs = m.group(1).split()
655
+ attrs = " ".join(a for a in attrs if a != "willreturn")
656
+ line = line.replace(m.group(1), attrs)
657
+
658
+ buf.append(line)
659
+
660
+ return "\n".join(buf)
661
+
662
+
663
+ def set_cuda_kernel(function):
664
+ """
665
+ Mark a function as a CUDA kernel. Kernels have the following requirements:
666
+
667
+ - Metadata that marks them as a kernel.
668
+ - Addition to the @llvm.used list, so that they will not be discarded.
669
+ - The noinline attribute is not permitted, because this causes NVVM to emit
670
+ a warning, which counts as failing IR verification.
671
+
672
+ Presently it is assumed that there is one kernel per module, which holds
673
+ for Numba-jitted functions. If this changes in future or this function is
674
+ to be used externally, this function may need modification to add to the
675
+ @llvm.used list rather than creating it.
676
+ """
677
+ module = function.module
678
+
679
+ # Add kernel metadata
680
+ mdstr = ir.MetaDataString(module, "kernel")
681
+ mdvalue = ir.Constant(ir.IntType(32), 1)
682
+ md = module.add_metadata((function, mdstr, mdvalue))
683
+
684
+ nmd = cgutils.get_or_insert_named_metadata(module, "nvvm.annotations")
685
+ nmd.add(md)
686
+
687
+ # Create the used list
688
+ ptrty = ir.IntType(8).as_pointer()
689
+ usedty = ir.ArrayType(ptrty, 1)
690
+
691
+ fnptr = function.bitcast(ptrty)
692
+
693
+ llvm_used = ir.GlobalVariable(module, usedty, "llvm.used")
694
+ llvm_used.linkage = "appending"
695
+ llvm_used.section = "llvm.metadata"
696
+ llvm_used.initializer = ir.Constant(usedty, [fnptr])
697
+
698
+ # Remove 'noinline' if it is present.
699
+ function.attributes.discard("noinline")
700
+
701
+
702
+ def set_launch_bounds(kernel, launch_bounds):
703
+ # Based on: CUDA C / C++ Programming Guide 12.9, Section 8.38:
704
+ # https://docs.nvidia.com/cuda/archive/12.9.0/cuda-c-programming-guide/index.html#launch-bounds
705
+ # PTX ISA Specification Version 8.7, Section 11.4:
706
+ # https://docs.nvidia.com/cuda/archive/12.8.1/parallel-thread-execution/index.html#performance-tuning-directives
707
+ # NVVM IR Specification 12.9, Section 13:
708
+ # https://docs.nvidia.com/cuda/archive/12.9.0/nvvm-ir-spec/index.html#global-property-annotation
709
+
710
+ if launch_bounds is None:
711
+ return
712
+
713
+ if isinstance(launch_bounds, int):
714
+ launch_bounds = (launch_bounds,)
715
+
716
+ if (n := len(launch_bounds)) > 3:
717
+ raise ValueError(
718
+ f"Got {n} launch bounds: {launch_bounds}. A maximum of three are supported: "
719
+ "(max_threads_per_block, min_blocks_per_sm, max_blocks_per_cluster)"
720
+ )
721
+
722
+ module = kernel.module
723
+ nvvm_annotations = cgutils.get_or_insert_named_metadata(
724
+ module, "nvvm.annotations"
725
+ )
726
+
727
+ # Note that only maxntidx is used even though NVVM IR and PTX allow
728
+ # maxntidy and maxntidz. This is because the thread block size limit
729
+ # pertains only to the total number of threads, and therefore bounds on
730
+ # individual dimensions may be exceeded anyway. To prevent an unsurprising
731
+ # interface, it is cleaner to only allow setting total size via maxntidx
732
+ # and assuming y and z to be 1 (as is the case in CUDA C/C++).
733
+
734
+ properties = (
735
+ # Max threads per block
736
+ "maxntidx",
737
+ # Min blocks per multiprocessor
738
+ "minctasm",
739
+ # Max blocks per cluster
740
+ "cluster_max_blocks",
741
+ )
742
+
743
+ for prop, bound in zip(properties, launch_bounds):
744
+ mdstr = ir.MetaDataString(module, prop)
745
+ mdvalue = ir.Constant(ir.IntType(32), bound)
746
+ md = module.add_metadata((kernel, mdstr, mdvalue))
747
+ nvvm_annotations.add(md)
748
+
749
+
750
+ def add_ir_version(mod):
751
+ """Add NVVM IR version to module"""
752
+ # We specify the IR version to match the current NVVM's IR version
753
+ i32 = ir.IntType(32)
754
+ ir_versions = [i32(v) for v in NVVM().get_ir_version()]
755
+ md_ver = mod.add_metadata(ir_versions)
756
+ mod.add_named_metadata("nvvmir.version", md_ver)