numba-cuda 0.18.1__py3-none-any.whl → 0.19.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (301) hide show
  1. _numba_cuda_redirector.pth +3 -0
  2. _numba_cuda_redirector.py +3 -0
  3. numba_cuda/VERSION +1 -1
  4. numba_cuda/__init__.py +2 -1
  5. numba_cuda/_version.py +2 -13
  6. numba_cuda/numba/cuda/__init__.py +4 -1
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +5 -2
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +4 -1
  9. numba_cuda/numba/cuda/api.py +5 -7
  10. numba_cuda/numba/cuda/api_util.py +3 -0
  11. numba_cuda/numba/cuda/args.py +3 -0
  12. numba_cuda/numba/cuda/bf16.py +3 -0
  13. numba_cuda/numba/cuda/cg.py +3 -0
  14. numba_cuda/numba/cuda/cgutils.py +3 -0
  15. numba_cuda/numba/cuda/codegen.py +3 -0
  16. numba_cuda/numba/cuda/compiler.py +10 -4
  17. numba_cuda/numba/cuda/core/caching.py +3 -0
  18. numba_cuda/numba/cuda/core/callconv.py +3 -0
  19. numba_cuda/numba/cuda/core/codegen.py +3 -0
  20. numba_cuda/numba/cuda/core/compiler.py +3 -0
  21. numba_cuda/numba/cuda/core/interpreter.py +3595 -0
  22. numba_cuda/numba/cuda/core/ir_utils.py +2644 -0
  23. numba_cuda/numba/cuda/core/sigutils.py +58 -0
  24. numba_cuda/numba/cuda/core/typed_passes.py +3 -0
  25. numba_cuda/numba/cuda/cuda_paths.py +12 -17
  26. numba_cuda/numba/cuda/cudadecl.py +4 -1
  27. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -0
  28. numba_cuda/numba/cuda/cudadrv/devicearray.py +3 -0
  29. numba_cuda/numba/cuda/cudadrv/devices.py +3 -0
  30. numba_cuda/numba/cuda/cudadrv/driver.py +7 -19
  31. numba_cuda/numba/cuda/cudadrv/drvapi.py +3 -0
  32. numba_cuda/numba/cuda/cudadrv/dummyarray.py +3 -0
  33. numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
  34. numba_cuda/numba/cuda/cudadrv/error.py +4 -0
  35. numba_cuda/numba/cuda/cudadrv/libs.py +4 -2
  36. numba_cuda/numba/cuda/cudadrv/linkable_code.py +3 -0
  37. numba_cuda/numba/cuda/cudadrv/mappings.py +3 -0
  38. numba_cuda/numba/cuda/cudadrv/ndarray.py +3 -0
  39. numba_cuda/numba/cuda/cudadrv/nvrtc.py +47 -44
  40. numba_cuda/numba/cuda/cudadrv/nvvm.py +6 -18
  41. numba_cuda/numba/cuda/cudadrv/rtapi.py +3 -0
  42. numba_cuda/numba/cuda/cudadrv/runtime.py +15 -1
  43. numba_cuda/numba/cuda/cudaimpl.py +3 -0
  44. numba_cuda/numba/cuda/cudamath.py +4 -1
  45. numba_cuda/numba/cuda/debuginfo.py +3 -0
  46. numba_cuda/numba/cuda/decorators.py +7 -3
  47. numba_cuda/numba/cuda/descriptor.py +3 -0
  48. numba_cuda/numba/cuda/device_init.py +3 -0
  49. numba_cuda/numba/cuda/deviceufunc.py +5 -1
  50. numba_cuda/numba/cuda/dispatcher.py +6 -2
  51. numba_cuda/numba/cuda/errors.py +10 -0
  52. numba_cuda/numba/cuda/extending.py +4 -1
  53. numba_cuda/numba/cuda/flags.py +2 -0
  54. numba_cuda/numba/cuda/fp16.py +3 -0
  55. numba_cuda/numba/cuda/initialize.py +4 -0
  56. numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -0
  57. numba_cuda/numba/cuda/intrinsics.py +3 -0
  58. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  59. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  60. numba_cuda/numba/cuda/kernels/reduction.py +3 -0
  61. numba_cuda/numba/cuda/kernels/transpose.py +3 -0
  62. numba_cuda/numba/cuda/libdevice.py +4 -0
  63. numba_cuda/numba/cuda/libdevicedecl.py +4 -1
  64. numba_cuda/numba/cuda/libdevicefuncs.py +4 -1
  65. numba_cuda/numba/cuda/libdeviceimpl.py +3 -0
  66. numba_cuda/numba/cuda/locks.py +3 -0
  67. numba_cuda/numba/cuda/lowering.py +53 -16
  68. numba_cuda/numba/cuda/mathimpl.py +3 -0
  69. numba_cuda/numba/cuda/memory_management/__init__.py +3 -0
  70. numba_cuda/numba/cuda/memory_management/memsys.cu +5 -0
  71. numba_cuda/numba/cuda/memory_management/memsys.cuh +5 -0
  72. numba_cuda/numba/cuda/memory_management/nrt.cu +5 -0
  73. numba_cuda/numba/cuda/memory_management/nrt.cuh +5 -0
  74. numba_cuda/numba/cuda/memory_management/nrt.py +5 -1
  75. numba_cuda/numba/cuda/models.py +3 -0
  76. numba_cuda/numba/cuda/nvvmutils.py +3 -0
  77. numba_cuda/numba/cuda/printimpl.py +3 -0
  78. numba_cuda/numba/cuda/random.py +3 -0
  79. numba_cuda/numba/cuda/reshape_funcs.cu +5 -0
  80. numba_cuda/numba/cuda/serialize.py +3 -0
  81. numba_cuda/numba/cuda/simulator/__init__.py +3 -0
  82. numba_cuda/numba/cuda/simulator/_internal/__init__.py +3 -0
  83. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  84. numba_cuda/numba/cuda/simulator/api.py +4 -1
  85. numba_cuda/numba/cuda/simulator/bf16.py +3 -0
  86. numba_cuda/numba/cuda/simulator/compiler.py +3 -0
  87. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +3 -0
  88. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +3 -0
  89. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +3 -0
  90. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +3 -7
  91. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +3 -0
  92. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +3 -0
  93. numba_cuda/numba/cuda/simulator/cudadrv/error.py +4 -0
  94. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +4 -0
  95. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +4 -0
  96. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +3 -0
  97. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -0
  98. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -0
  99. numba_cuda/numba/cuda/simulator/dispatcher.py +4 -0
  100. numba_cuda/numba/cuda/simulator/kernel.py +3 -0
  101. numba_cuda/numba/cuda/simulator/kernelapi.py +3 -0
  102. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +3 -0
  103. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +3 -0
  104. numba_cuda/numba/cuda/simulator/reduction.py +3 -0
  105. numba_cuda/numba/cuda/simulator/vector_types.py +3 -0
  106. numba_cuda/numba/cuda/simulator_init.py +3 -0
  107. numba_cuda/numba/cuda/stubs.py +3 -0
  108. numba_cuda/numba/cuda/target.py +4 -2
  109. numba_cuda/numba/cuda/testing.py +7 -6
  110. numba_cuda/numba/cuda/tests/__init__.py +3 -0
  111. numba_cuda/numba/cuda/tests/complex_usecases.py +3 -0
  112. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +3 -0
  113. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  114. numba_cuda/numba/cuda/tests/core/test_serialize.py +3 -0
  115. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +3 -0
  116. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +3 -0
  117. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +3 -0
  118. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +3 -0
  119. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +3 -0
  120. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +3 -0
  121. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +3 -0
  122. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -0
  123. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +3 -0
  124. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +4 -1
  125. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +4 -1
  126. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +4 -1
  127. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +4 -1
  128. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +3 -0
  129. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +3 -0
  130. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +3 -0
  131. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +3 -0
  132. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +3 -0
  133. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +4 -1
  134. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +4 -1
  135. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +3 -0
  136. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +4 -1
  137. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +3 -0
  138. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +7 -6
  139. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -4
  140. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +3 -0
  141. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +3 -0
  142. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +4 -1
  143. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +3 -0
  144. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +3 -0
  145. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +3 -0
  146. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +3 -0
  147. numba_cuda/numba/cuda/tests/cudapy/__init__.py +3 -0
  148. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +3 -0
  149. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +3 -0
  150. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +3 -0
  151. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +3 -0
  152. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +3 -0
  153. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +3 -0
  154. numba_cuda/numba/cuda/tests/cudapy/test_array.py +3 -0
  155. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +3 -0
  156. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +3 -0
  157. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +3 -0
  158. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +3 -0
  159. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +4 -3
  160. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +4 -3
  161. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +3 -0
  162. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -0
  163. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +149 -3
  164. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +3 -0
  165. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +4 -1
  166. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +3 -4
  167. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +3 -0
  168. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +3 -0
  169. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +3 -0
  170. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +3 -0
  171. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +3 -0
  172. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +4 -1
  173. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +4 -1
  174. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +3 -0
  175. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +4 -1
  176. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +23 -284
  177. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  178. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +4 -1
  179. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +3 -0
  180. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +3 -0
  181. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +4 -1
  182. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +3 -0
  183. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +4 -6
  184. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +3 -0
  185. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +3 -0
  186. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +3 -0
  187. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +3 -0
  188. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -0
  189. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +4 -1
  190. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +3 -0
  191. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +3 -0
  192. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +3 -0
  193. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +3 -0
  194. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +3 -0
  195. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +3 -0
  196. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +4 -1
  197. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +298 -0
  198. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +3 -0
  199. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +3 -0
  200. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +3 -0
  201. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +3 -0
  202. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +4 -1
  203. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +3 -0
  204. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +3 -0
  205. numba_cuda/numba/cuda/tests/cudapy/test_math.py +3 -0
  206. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +3 -0
  207. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +3 -0
  208. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +3 -0
  209. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +3 -0
  210. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +3 -0
  211. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +3 -0
  212. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +3 -0
  213. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +4 -1
  214. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +3 -0
  215. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +3 -0
  216. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +3 -0
  217. numba_cuda/numba/cuda/tests/cudapy/test_print.py +3 -0
  218. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +3 -0
  219. numba_cuda/numba/cuda/tests/cudapy/test_random.py +3 -0
  220. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +3 -0
  221. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +3 -0
  222. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +3 -0
  223. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +3 -0
  224. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +3 -0
  225. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +3 -0
  226. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +3 -0
  227. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +3 -0
  228. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +3 -0
  229. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +3 -0
  230. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +3 -0
  231. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +4 -1
  232. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +3 -0
  233. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +3 -0
  234. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +3 -0
  235. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +3 -0
  236. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +3 -0
  237. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +3 -0
  238. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +3 -0
  239. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +8 -1
  240. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +3 -0
  241. numba_cuda/numba/cuda/tests/cudasim/__init__.py +3 -0
  242. numba_cuda/numba/cuda/tests/cudasim/support.py +3 -0
  243. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +3 -0
  244. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  245. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +5 -0
  246. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  247. numba_cuda/numba/cuda/tests/data/error.cu +5 -0
  248. numba_cuda/numba/cuda/tests/data/include/add.cuh +5 -0
  249. numba_cuda/numba/cuda/tests/data/jitlink.cu +5 -0
  250. numba_cuda/numba/cuda/tests/data/warn.cu +5 -0
  251. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +3 -0
  252. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  253. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +5 -0
  254. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +5 -0
  255. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +5 -0
  256. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +3 -0
  257. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +4 -1
  258. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -1
  259. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +4 -1
  260. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +4 -1
  261. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +4 -1
  262. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +4 -1
  263. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +3 -0
  264. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +4 -1
  265. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +4 -1
  266. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +4 -1
  267. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +4 -1
  268. numba_cuda/numba/cuda/tests/enum_usecases.py +3 -0
  269. numba_cuda/numba/cuda/tests/nocuda/__init__.py +3 -0
  270. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +3 -0
  271. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +3 -0
  272. numba_cuda/numba/cuda/tests/nocuda/test_import.py +4 -1
  273. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +3 -0
  274. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +3 -0
  275. numba_cuda/numba/cuda/tests/nrt/__init__.py +3 -0
  276. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +5 -2
  277. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +4 -1
  278. numba_cuda/numba/cuda/tests/support.py +755 -0
  279. numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +6 -3
  280. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +6 -2
  281. numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu +5 -0
  282. numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +5 -0
  283. numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +5 -0
  284. numba_cuda/numba/cuda/types.py +3 -0
  285. numba_cuda/numba/cuda/typing/__init__.py +11 -0
  286. numba_cuda/numba/cuda/typing/templates.py +1448 -0
  287. numba_cuda/numba/cuda/ufuncs.py +3 -0
  288. numba_cuda/numba/cuda/utils.py +3 -0
  289. numba_cuda/numba/cuda/vector_types.py +6 -3
  290. numba_cuda/numba/cuda/vectorizers.py +3 -0
  291. {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.1.dist-info}/METADATA +25 -29
  292. numba_cuda-0.19.1.dist-info/RECORD +302 -0
  293. {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.1.dist-info}/licenses/LICENSE +1 -0
  294. numba_cuda-0.19.1.dist-info/licenses/LICENSE.numba +24 -0
  295. numba_cuda/numba/cuda/include/11/cuda_bf16.h +0 -3749
  296. numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +0 -2683
  297. numba_cuda/numba/cuda/include/11/cuda_fp16.h +0 -3794
  298. numba_cuda/numba/cuda/include/11/cuda_fp16.hpp +0 -2614
  299. numba_cuda-0.18.1.dist-info/RECORD +0 -296
  300. {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.1.dist-info}/WHEEL +0 -0
  301. {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,58 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba.core import types, typing
5
+
6
+
7
+ def is_signature(sig):
8
+ """
9
+ Return whether *sig* is a potentially valid signature
10
+ specification (for user-facing APIs).
11
+ """
12
+ return isinstance(sig, (str, tuple, typing.Signature))
13
+
14
+
15
+ def _parse_signature_string(signature_str):
16
+ """
17
+ Parameters
18
+ ----------
19
+ signature_str : str
20
+ """
21
+ # Just eval signature_str using the types submodules as globals
22
+ return eval(signature_str, {}, types.__dict__)
23
+
24
+
25
+ def normalize_signature(sig):
26
+ """
27
+ From *sig* (a signature specification), return a ``(args, return_type)``
28
+ tuple, where ``args`` itself is a tuple of types, and ``return_type``
29
+ can be None if not specified.
30
+ """
31
+ if isinstance(sig, str):
32
+ parsed = _parse_signature_string(sig)
33
+ else:
34
+ parsed = sig
35
+ if isinstance(parsed, tuple):
36
+ args, return_type = parsed, None
37
+ elif isinstance(parsed, typing.Signature):
38
+ args, return_type = parsed.args, parsed.return_type
39
+ else:
40
+ raise TypeError(
41
+ "invalid signature: %r (type: %r) evaluates to %r "
42
+ "instead of tuple or Signature"
43
+ % (sig, sig.__class__.__name__, parsed.__class__.__name__)
44
+ )
45
+
46
+ def check_type(ty):
47
+ if not isinstance(ty, types.Type):
48
+ raise TypeError(
49
+ "invalid type in signature: expected a type "
50
+ "instance, got %r" % (ty,)
51
+ )
52
+
53
+ if return_type is not None:
54
+ check_type(return_type)
55
+ for ty in args:
56
+ check_type(ty)
57
+
58
+ return args, return_type
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  import abc
2
5
  import warnings
3
6
  from contextlib import contextmanager
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  import sys
2
5
  import re
3
6
  import os
@@ -148,7 +151,6 @@ def get_nvrtc_dso_path():
148
151
  # Check for each version of the NVRTC DLL, preferring the most
149
152
  # recent.
150
153
  versions = (
151
- "112" if IS_WIN32 else "11.2",
152
154
  "120" if IS_WIN32 else "12",
153
155
  "130" if IS_WIN32 else "13",
154
156
  )
@@ -303,16 +305,16 @@ def get_nvidia_nvvm_ctk():
303
305
 
304
306
  # Assume the existence of NVVM in the conda env implies that a CUDA toolkit
305
307
  # conda package is installed.
308
+ if IS_WIN32:
309
+ # The path used on Windows
310
+ libdir = os.path.join(sys.prefix, "Library", "nvvm", _cudalib_path())
311
+ else:
312
+ # The path used on Linux is different to that on Windows
313
+ libdir = os.path.join(sys.prefix, "nvvm", _cudalib_path())
306
314
 
307
- # First, try the location used on Linux and the Windows 11.x packages
308
- libdir = os.path.join(sys.prefix, "nvvm", _cudalib_path())
309
315
  if not os.path.exists(libdir) or not os.path.isdir(libdir):
310
- # If that fails, try the location used for Windows 12.x packages
311
- libdir = os.path.join(sys.prefix, "Library", "nvvm", _cudalib_path())
312
- if not os.path.exists(libdir) or not os.path.isdir(libdir):
313
- # If that doesn't exist either, assume we don't have the NVIDIA
314
- # conda package
315
- return
316
+ # If the path doesn't exist, we didn't find the NVIDIA conda package
317
+ return
316
318
 
317
319
  paths = find_lib("nvvm", libdir=libdir)
318
320
  if not paths:
@@ -346,15 +348,8 @@ def get_nvidia_static_cudalib_ctk():
346
348
  if not nvvm_ctk:
347
349
  return
348
350
 
349
- if IS_WIN32 and ("Library" not in nvvm_ctk):
350
- # Location specific to CUDA 11.x packages on Windows
351
- dirs = ("Lib", "x64")
352
- else:
353
- # Linux, or Windows with CUDA 12.x packages
354
- dirs = ("lib",)
355
-
356
351
  env_dir = os.path.dirname(os.path.dirname(nvvm_ctk))
357
- return os.path.join(env_dir, *dirs)
352
+ return os.path.join(env_dir, "lib")
358
353
 
359
354
 
360
355
  def get_cuda_home(*subdirs):
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from numba.core import errors, types
2
5
  from numba.core.typing.npydecl import (
3
6
  parse_dtype,
@@ -9,7 +12,7 @@ from numba.core.typing.npydecl import (
9
12
  math_operations,
10
13
  bit_twiddling_functions,
11
14
  )
12
- from numba.core.typing.templates import (
15
+ from numba.cuda.typing.templates import (
13
16
  AttributeTemplate,
14
17
  ConcreteTemplate,
15
18
  AbstractTemplate,
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """CUDA Driver
2
5
 
3
6
  - Driver API binding
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """
2
5
  A CUDA ND Array is recognized by checking the __cuda_memory__ attribute
3
6
  on the object. If it exists and evaluate to True, it must define shape,
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """
2
5
  Expose each GPU devices directly.
3
6
 
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """
2
5
  CUDA driver bridge implementation
3
6
 
@@ -54,12 +57,6 @@ from .linkable_code import LinkableCode, LTOIR, Fatbin, Object
54
57
  from numba.cuda.utils import cached_file_read
55
58
  from numba.cuda.cudadrv import enums, drvapi, nvrtc
56
59
 
57
- try:
58
- from pynvjitlink.api import NvJitLinker, NvJitLinkError
59
- except ImportError:
60
- NvJitLinker, NvJitLinkError = None, None
61
-
62
-
63
60
  USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
64
61
 
65
62
  if USE_NV_BINDING:
@@ -640,7 +637,7 @@ class Device(object):
640
637
 
641
638
  if USE_NV_BINDING:
642
639
  buf = driver.cuDeviceGetName(bufsz, self.id)
643
- name = buf.decode("utf-8").rstrip("\0")
640
+ name = buf.split(b"\x00")[0]
644
641
  else:
645
642
  buf = (c_char * bufsz)()
646
643
  driver.cuDeviceGetName(buf, bufsz, self.id)
@@ -2808,19 +2805,10 @@ class _LinkerBase(metaclass=ABCMeta):
2808
2805
  lto=None,
2809
2806
  additional_flags=None,
2810
2807
  ):
2811
- driver_ver = driver.get_version()
2812
- if driver_ver < (12, 0):
2813
- if config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY:
2814
- linker = MVCLinker
2815
- elif USE_NV_BINDING:
2816
- linker = _Linker
2817
- else:
2818
- linker = CtypesLinker
2808
+ if USE_NV_BINDING:
2809
+ linker = _Linker
2819
2810
  else:
2820
- if USE_NV_BINDING:
2821
- linker = _Linker
2822
- else:
2823
- linker = CtypesLinker
2811
+ linker = CtypesLinker
2824
2812
 
2825
2813
  params = (max_registers, lineinfo, cc)
2826
2814
  if linker is _Linker:
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from ctypes import (
2
5
  c_byte,
3
6
  c_char_p,
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from collections import namedtuple
2
5
  import itertools
3
6
  import functools
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """
2
5
  Enum values for CUDA driver. Information about the values
3
6
  can be found on the official NVIDIA documentation website.
@@ -1,3 +1,7 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+
1
5
  class CudaDriverError(Exception):
2
6
  pass
3
7
 
@@ -1,9 +1,11 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """CUDA Toolkit libraries lookup utilities.
2
5
 
3
6
  CUDA Toolkit libraries can be available via either:
4
7
 
5
- - the `cuda-nvcc` and `cuda-nvrtc` conda packages for CUDA 12,
6
- - the `cudatoolkit` conda package for CUDA 11,
8
+ - the `cuda-nvcc` and `cuda-nvrtc` conda packages,
7
9
  - a user supplied location from CUDA_HOME,
8
10
  - a system wide location,
9
11
  - package-specific locations (e.g. the Debian NVIDIA packages),
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  import io
2
5
  from .mappings import FILE_EXTENSION_MAP
3
6
 
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from numba import config
2
5
  from . import enums
3
6
 
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from numba.cuda.cudadrv import devices, driver
2
5
  from numba.core.registry import cpu_target
3
6
 
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from ctypes import byref, c_char, c_char_p, c_int, c_size_t, c_void_p, POINTER
2
5
  from enum import IntEnum
3
6
  from numba.cuda.cudadrv.error import (
@@ -29,6 +32,7 @@ nvrtc_program = c_void_p
29
32
  nvrtc_result = c_int
30
33
 
31
34
  if config.CUDA_USE_NVIDIA_BINDING:
35
+ from cuda.bindings import nvrtc as bindings_nvrtc
32
36
  from cuda.core.experimental import Program, ProgramOptions
33
37
 
34
38
 
@@ -142,6 +146,10 @@ class NVRTC:
142
146
 
143
147
  def __new__(cls):
144
148
  with _nvrtc_lock:
149
+ if config.CUDA_USE_NVIDIA_BINDING:
150
+ raise RuntimeError(
151
+ "NVRTC objects should not be used with cuda-python bindings"
152
+ )
145
153
  if cls.__INSTANCE is None:
146
154
  from numba.cuda.cudadrv.libs import open_cudalib
147
155
 
@@ -154,16 +162,9 @@ class NVRTC:
154
162
 
155
163
  # Find & populate functions
156
164
  for name, proto in inst._PROTOTYPES.items():
157
- try:
158
- func = getattr(lib, name)
159
- func.restype = proto[0]
160
- func.argtypes = proto[1:]
161
- except AttributeError:
162
- if "LTOIR" in name:
163
- # CUDA 11 does not have LTOIR functions; ignore
164
- continue
165
- else:
166
- raise
165
+ func = getattr(lib, name)
166
+ func.restype = proto[0]
167
+ func.argtypes = proto[1:]
167
168
 
168
169
  @functools.wraps(func)
169
170
  def checked_call(*args, func=func, name=name):
@@ -303,32 +304,35 @@ def compile(src, name, cc, ltoir=False):
303
304
  :return: The compiled PTX and compilation log
304
305
  :rtype: tuple
305
306
  """
306
- nvrtc = NVRTC()
307
- program = nvrtc.create_program(src, name)
308
307
 
309
- version = nvrtc.get_version()
310
- ver_str = lambda v: ".".join(v)
311
- if version < (11, 2):
312
- raise RuntimeError(
313
- "Unsupported CUDA version. CUDA 11.2 or higher is required."
314
- )
315
- else:
316
- supported_arch = nvrtc.get_supported_archs()
317
- try:
318
- found = max(filter(lambda v: v <= cc, [v for v in supported_arch]))
319
- except ValueError:
308
+ if config.CUDA_USE_NVIDIA_BINDING:
309
+ retcode, *version = bindings_nvrtc.nvrtcVersion()
310
+ if retcode != bindings_nvrtc.nvrtcResult.NVRTC_SUCCESS:
320
311
  raise RuntimeError(
321
- f"Device compute capability {ver_str(cc)} is less than the "
322
- f"minimum supported by NVRTC {ver_str(version)}. Supported "
323
- "compute capabilities are "
324
- f"{', '.join([ver_str(v) for v in supported_arch])}."
312
+ f"{retcode.name} when calling nvrtcGetSupportedArchs()"
325
313
  )
314
+ version = tuple(version)
315
+ else:
316
+ nvrtc = NVRTC()
317
+ version = nvrtc.get_version()
326
318
 
327
- if found != cc:
328
- warnings.warn(
329
- f"Device compute capability {ver_str(cc)} is not supported by "
330
- f"NVRTC {ver_str(version)}. Using {ver_str(found)} instead."
331
- )
319
+ ver_str = lambda version: ".".join(str(v) for v in version)
320
+ supported_ccs = get_supported_ccs()
321
+ try:
322
+ found = max(filter(lambda v: v <= cc, [v for v in supported_ccs]))
323
+ except ValueError:
324
+ raise RuntimeError(
325
+ f"Device compute capability {ver_str(cc)} is less than the "
326
+ f"minimum supported by NVRTC {ver_str(version)}. Supported "
327
+ "compute capabilities are "
328
+ f"{', '.join([ver_str(v) for v in supported_ccs])}."
329
+ )
330
+
331
+ if found != cc:
332
+ warnings.warn(
333
+ f"Device compute capability {ver_str(cc)} is not supported by "
334
+ f"NVRTC {ver_str(version)}. Using {ver_str(found)} instead."
335
+ )
332
336
 
333
337
  # Compilation options:
334
338
  # - Compile for the current device's compute capability.
@@ -348,16 +352,10 @@ def compile(src, name, cc, ltoir=False):
348
352
  f"{os.path.join(cuda_include_dir, 'cccl')}",
349
353
  ]
350
354
 
351
- nvrtc_version = nvrtc.get_version()
352
- nvrtc_ver_major = nvrtc_version[0]
353
-
354
355
  cudadrv_path = os.path.dirname(os.path.abspath(__file__))
355
356
  numba_cuda_path = os.path.dirname(cudadrv_path)
356
357
 
357
- if nvrtc_ver_major == 11:
358
- numba_include = f"{os.path.join(numba_cuda_path, 'include', '11')}"
359
- else:
360
- numba_include = f"{os.path.join(numba_cuda_path, 'include', '12')}"
358
+ numba_include = f"{os.path.join(numba_cuda_path, 'include', '12')}"
361
359
 
362
360
  if config.CUDA_NVRTC_EXTRA_SEARCH_PATHS:
363
361
  extra_includes = config.CUDA_NVRTC_EXTRA_SEARCH_PATHS.split(":")
@@ -373,7 +371,6 @@ def compile(src, name, cc, ltoir=False):
373
371
  arch=arch,
374
372
  include_path=includes,
375
373
  relocatable_device_code=True,
376
- std="c++17" if nvrtc_version < (12, 0) else None,
377
374
  link_time_optimization=ltoir,
378
375
  name=name,
379
376
  )
@@ -399,6 +396,7 @@ def compile(src, name, cc, ltoir=False):
399
396
  return result, log
400
397
 
401
398
  else:
399
+ program = nvrtc.create_program(src, name)
402
400
  includes = [f"-I{path}" for path in includes]
403
401
  options = [
404
402
  arch,
@@ -410,9 +408,6 @@ def compile(src, name, cc, ltoir=False):
410
408
  if ltoir:
411
409
  options.append("-dlto")
412
410
 
413
- if nvrtc_version < (12, 0):
414
- options.append("-std=c++17")
415
-
416
411
  # Compile the program
417
412
  compile_error = nvrtc.compile_program(program, options)
418
413
 
@@ -482,4 +477,12 @@ def get_lowest_supported_cc():
482
477
 
483
478
 
484
479
  def get_supported_ccs():
485
- return NVRTC().get_supported_archs()
480
+ if config.CUDA_USE_NVIDIA_BINDING:
481
+ retcode, archs = bindings_nvrtc.nvrtcGetSupportedArchs()
482
+ if retcode != bindings_nvrtc.nvrtcResult.NVRTC_SUCCESS:
483
+ raise RuntimeError(
484
+ f"{retcode.name} when calling nvrtcGetSupportedArchs()"
485
+ )
486
+ return [(arch // 10, arch % 10) for arch in archs]
487
+ else:
488
+ return NVRTC().get_supported_archs()
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """
2
5
  This is a direct translation of nvvm.h
3
6
  """
@@ -47,14 +50,7 @@ NVVM_ERROR_COMPILATION
47
50
  for i, k in enumerate(RESULT_CODE_NAMES):
48
51
  setattr(sys.modules[__name__], k, i)
49
52
 
50
- # Data layouts. NVVM IR 1.8 (CUDA 11.6) introduced 128-bit integer support.
51
-
52
- _datalayout_original = (
53
- "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
54
- "i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-"
55
- "v64:64:64-v128:128:128-n16:32:64"
56
- )
57
- _datalayout_i128 = (
53
+ _datalayout = (
58
54
  "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
59
55
  "i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-"
60
56
  "v64:64:64-v128:128:128-n16:32:64"
@@ -182,10 +178,7 @@ class NVVM(object):
182
178
 
183
179
  @property
184
180
  def data_layout(self):
185
- if (self._majorIR, self._minorIR) < (1, 8):
186
- return _datalayout_original
187
- else:
188
- return _datalayout_i128
181
+ return _datalayout
189
182
 
190
183
  def get_version(self):
191
184
  major = c_int()
@@ -346,14 +339,9 @@ class CompilationUnit(object):
346
339
 
347
340
 
348
341
  MISSING_LIBDEVICE_FILE_MSG = """Missing libdevice file.
349
- Please ensure you have a CUDA Toolkit 11.2 or higher.
350
- For CUDA 12, ``cuda-nvcc`` and ``cuda-nvrtc`` are required:
342
+ ``cuda-nvcc`` and ``cuda-nvrtc`` are required:
351
343
 
352
344
  $ conda install -c conda-forge cuda-nvcc cuda-nvrtc "cuda-version>=12.0"
353
-
354
- For CUDA 11, ``cudatoolkit`` is required:
355
-
356
- $ conda install -c conda-forge cudatoolkit "cuda-version>=11.2,<12.0"
357
345
  """
358
346
 
359
347
 
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """
2
5
  Declarations of the Runtime API functions.
3
6
  """
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """
2
5
  Former CUDA Runtime wrapper.
3
6
 
@@ -5,12 +8,23 @@ The toolkit version can now be obtained from NVRTC, so we don't use a binding
5
8
  to the runtime anymore. This file is provided to maintain the existing API.
6
9
  """
7
10
 
11
+ from numba import config
8
12
  from numba.cuda.cudadrv.nvrtc import NVRTC
9
13
 
10
14
 
11
15
  class Runtime:
12
16
  def get_version(self):
13
- return NVRTC().get_version()
17
+ if config.CUDA_USE_NVIDIA_BINDING:
18
+ from cuda.bindings import nvrtc
19
+
20
+ retcode, *version = nvrtc.nvrtcVersion()
21
+ if retcode != nvrtc.nvrtcResult.NVRTC_SUCCESS:
22
+ raise RuntimeError(
23
+ f"{retcode.name} when calling nvrtcGetVersion()"
24
+ )
25
+ return tuple(version)
26
+ else:
27
+ return NVRTC().get_version()
14
28
 
15
29
 
16
30
  runtime = Runtime()
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from functools import reduce
2
5
  import operator
3
6
  import math
@@ -1,6 +1,9 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  import math
2
5
  from numba.core import types
3
- from numba.core.typing.templates import ConcreteTemplate, signature, Registry
6
+ from numba.cuda.typing.templates import ConcreteTemplate, signature, Registry
4
7
 
5
8
 
6
9
  registry = Registry()
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  import os
2
5
 
3
6
  from llvmlite import ir
@@ -1,7 +1,11 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from warnings import warn
2
- from numba.core import types, config, sigutils
5
+ from numba.core import types, config
3
6
  from numba.core.errors import DeprecationError, NumbaInvalidConfigWarning
4
7
  from numba.cuda.compiler import declare_device_function
8
+ from numba.cuda.core import sigutils
5
9
  from numba.cuda.dispatcher import CUDADispatcher
6
10
  from numba.cuda.simulator.kernel import FakeCUDAKernel
7
11
  from numba.cuda.cudadrv.driver import _have_nvjitlink
@@ -86,7 +90,7 @@ def jit(
86
90
  number of threads per block.
87
91
  :type launch_bounds: int | tuple[int]
88
92
  :param lto: Whether to enable LTO. If unspecified, LTO is enabled by
89
- default when pynvjitlink is available, except for kernels where
93
+ default when nvjitlink is available, except for kernels where
90
94
  ``debug=True``.
91
95
  :type lto: bool
92
96
  """
@@ -143,7 +147,7 @@ def jit(
143
147
  raise ValueError("link keyword invalid for device function")
144
148
 
145
149
  if lto is None:
146
- # Default to using LTO if pynvjitlink is available and we're not debugging
150
+ # Default to using LTO if nvjitlink is available and we're not debugging
147
151
  lto = _have_nvjitlink() and not debug
148
152
  else:
149
153
  if lto and not _have_nvjitlink():
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from numba.core.descriptors import TargetDescriptor
2
5
  from numba.core.options import TargetOptions
3
6
  from .target import CUDATargetContext, CUDATypingContext
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  # Re export
2
5
  import sys
3
6
  from numba.cuda import cg
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """
2
5
  Implements custom ufunc dispatch mechanism for non-CPU devices.
3
6
  """
@@ -11,8 +14,9 @@ from functools import reduce
11
14
  import numpy as np
12
15
 
13
16
  from numba.np.ufunc.ufuncbuilder import _BaseUFuncBuilder, parse_identity
14
- from numba.core import types, sigutils
17
+ from numba.core import types
15
18
  from numba.core.typing import signature
19
+ from numba.cuda.core import sigutils
16
20
  from numba.np.ufunc.sigparse import parse_signature
17
21
 
18
22
 
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  import numpy as np
2
5
  import os
3
6
  import sys
@@ -8,13 +11,13 @@ import types as pytypes
8
11
  import weakref
9
12
  import uuid
10
13
 
11
- from numba.core import compiler, sigutils, types, typing, config
14
+ from numba.core import compiler, types, typing, config
12
15
  from numba.cuda import serialize, utils
13
16
  from numba.cuda.core.caching import Cache, CacheImpl, NullCache
14
17
  from numba.core.compiler_lock import global_compiler_lock
15
18
  from numba.core.dispatcher import _DispatcherBase
16
19
  from numba.core.errors import NumbaPerformanceWarning, TypingError
17
- from numba.core.typing.templates import fold_arguments
20
+ from numba.cuda.typing.templates import fold_arguments
18
21
  from numba.core.typing.typeof import Purpose, typeof
19
22
  from numba.cuda.api import get_current_device
20
23
  from numba.cuda.args import wrap_arg
@@ -23,6 +26,7 @@ from numba.cuda.compiler import (
23
26
  CUDACompiler,
24
27
  kernel_fixup,
25
28
  )
29
+ from numba.cuda.core import sigutils
26
30
  import re
27
31
  from numba.cuda.cudadrv import driver, nvvm
28
32
  from numba.cuda.cudadrv.linkable_code import LinkableCode