numba-cuda 0.19.0__py3-none-any.whl → 0.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (353) hide show
  1. _numba_cuda_redirector.pth +3 -0
  2. _numba_cuda_redirector.py +3 -0
  3. numba_cuda/VERSION +1 -1
  4. numba_cuda/__init__.py +2 -1
  5. numba_cuda/_version.py +2 -13
  6. numba_cuda/numba/cuda/__init__.py +4 -1
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +12708 -1469
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +2656 -8769
  9. numba_cuda/numba/cuda/api.py +9 -1
  10. numba_cuda/numba/cuda/api_util.py +3 -0
  11. numba_cuda/numba/cuda/args.py +3 -0
  12. numba_cuda/numba/cuda/bf16.py +288 -2
  13. numba_cuda/numba/cuda/cg.py +3 -0
  14. numba_cuda/numba/cuda/cgutils.py +5 -2
  15. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  16. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  17. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  18. numba_cuda/numba/cuda/codegen.py +4 -1
  19. numba_cuda/numba/cuda/compiler.py +376 -30
  20. numba_cuda/numba/cuda/core/analysis.py +319 -0
  21. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  22. numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
  23. numba_cuda/numba/cuda/core/base.py +1289 -0
  24. numba_cuda/numba/cuda/core/bytecode.py +727 -0
  25. numba_cuda/numba/cuda/core/caching.py +5 -2
  26. numba_cuda/numba/cuda/core/callconv.py +3 -0
  27. numba_cuda/numba/cuda/core/codegen.py +3 -0
  28. numba_cuda/numba/cuda/core/compiler.py +9 -14
  29. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  30. numba_cuda/numba/cuda/core/config.py +747 -0
  31. numba_cuda/numba/cuda/core/consts.py +124 -0
  32. numba_cuda/numba/cuda/core/cpu.py +370 -0
  33. numba_cuda/numba/cuda/core/environment.py +68 -0
  34. numba_cuda/numba/cuda/core/event.py +511 -0
  35. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  36. numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
  37. numba_cuda/numba/cuda/core/interpreter.py +52 -27
  38. numba_cuda/numba/cuda/core/ir_utils.py +17 -29
  39. numba_cuda/numba/cuda/core/options.py +262 -0
  40. numba_cuda/numba/cuda/core/postproc.py +249 -0
  41. numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
  42. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  43. numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
  44. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  45. numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
  46. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
  47. numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
  48. numba_cuda/numba/cuda/core/sigutils.py +3 -0
  49. numba_cuda/numba/cuda/core/ssa.py +496 -0
  50. numba_cuda/numba/cuda/core/targetconfig.py +329 -0
  51. numba_cuda/numba/cuda/core/tracing.py +231 -0
  52. numba_cuda/numba/cuda/core/transforms.py +952 -0
  53. numba_cuda/numba/cuda/core/typed_passes.py +741 -7
  54. numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
  55. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  56. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  57. numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
  58. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  59. numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
  60. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  61. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  62. numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
  63. numba_cuda/numba/cuda/cuda_paths.py +425 -246
  64. numba_cuda/numba/cuda/cudadecl.py +4 -1
  65. numba_cuda/numba/cuda/cudadrv/__init__.py +4 -1
  66. numba_cuda/numba/cuda/cudadrv/devicearray.py +5 -1
  67. numba_cuda/numba/cuda/cudadrv/devices.py +3 -0
  68. numba_cuda/numba/cuda/cudadrv/driver.py +14 -140
  69. numba_cuda/numba/cuda/cudadrv/drvapi.py +3 -0
  70. numba_cuda/numba/cuda/cudadrv/dummyarray.py +114 -24
  71. numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
  72. numba_cuda/numba/cuda/cudadrv/error.py +4 -0
  73. numba_cuda/numba/cuda/cudadrv/libs.py +8 -5
  74. numba_cuda/numba/cuda/cudadrv/linkable_code.py +3 -0
  75. numba_cuda/numba/cuda/cudadrv/mappings.py +4 -1
  76. numba_cuda/numba/cuda/cudadrv/ndarray.py +3 -0
  77. numba_cuda/numba/cuda/cudadrv/nvrtc.py +22 -8
  78. numba_cuda/numba/cuda/cudadrv/nvvm.py +4 -4
  79. numba_cuda/numba/cuda/cudadrv/rtapi.py +3 -0
  80. numba_cuda/numba/cuda/cudadrv/runtime.py +4 -1
  81. numba_cuda/numba/cuda/cudaimpl.py +8 -1
  82. numba_cuda/numba/cuda/cudamath.py +3 -0
  83. numba_cuda/numba/cuda/debuginfo.py +88 -2
  84. numba_cuda/numba/cuda/decorators.py +6 -3
  85. numba_cuda/numba/cuda/descriptor.py +6 -4
  86. numba_cuda/numba/cuda/device_init.py +3 -0
  87. numba_cuda/numba/cuda/deviceufunc.py +69 -2
  88. numba_cuda/numba/cuda/dispatcher.py +21 -39
  89. numba_cuda/numba/cuda/errors.py +10 -0
  90. numba_cuda/numba/cuda/extending.py +3 -0
  91. numba_cuda/numba/cuda/flags.py +143 -1
  92. numba_cuda/numba/cuda/fp16.py +3 -2
  93. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  94. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  95. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  96. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  97. numba_cuda/numba/cuda/initialize.py +4 -0
  98. numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -0
  99. numba_cuda/numba/cuda/intrinsics.py +3 -0
  100. numba_cuda/numba/cuda/itanium_mangler.py +3 -0
  101. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  102. numba_cuda/numba/cuda/kernels/reduction.py +3 -0
  103. numba_cuda/numba/cuda/kernels/transpose.py +3 -0
  104. numba_cuda/numba/cuda/libdevice.py +4 -0
  105. numba_cuda/numba/cuda/libdevicedecl.py +3 -0
  106. numba_cuda/numba/cuda/libdevicefuncs.py +3 -0
  107. numba_cuda/numba/cuda/libdeviceimpl.py +3 -0
  108. numba_cuda/numba/cuda/locks.py +3 -0
  109. numba_cuda/numba/cuda/lowering.py +59 -159
  110. numba_cuda/numba/cuda/mathimpl.py +5 -1
  111. numba_cuda/numba/cuda/memory_management/__init__.py +3 -0
  112. numba_cuda/numba/cuda/memory_management/memsys.cu +5 -0
  113. numba_cuda/numba/cuda/memory_management/memsys.cuh +5 -0
  114. numba_cuda/numba/cuda/memory_management/nrt.cu +5 -0
  115. numba_cuda/numba/cuda/memory_management/nrt.cuh +5 -0
  116. numba_cuda/numba/cuda/memory_management/nrt.py +48 -18
  117. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  118. numba_cuda/numba/cuda/models.py +12 -1
  119. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  120. numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
  121. numba_cuda/numba/cuda/np/numpy_support.py +553 -0
  122. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
  123. numba_cuda/numba/cuda/nvvmutils.py +4 -1
  124. numba_cuda/numba/cuda/printimpl.py +15 -1
  125. numba_cuda/numba/cuda/random.py +4 -1
  126. numba_cuda/numba/cuda/reshape_funcs.cu +5 -0
  127. numba_cuda/numba/cuda/serialize.py +4 -1
  128. numba_cuda/numba/cuda/simulator/__init__.py +4 -1
  129. numba_cuda/numba/cuda/simulator/_internal/__init__.py +3 -0
  130. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  131. numba_cuda/numba/cuda/simulator/api.py +4 -1
  132. numba_cuda/numba/cuda/simulator/bf16.py +3 -0
  133. numba_cuda/numba/cuda/simulator/compiler.py +7 -0
  134. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +3 -0
  135. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +4 -1
  136. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +3 -0
  137. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +3 -0
  138. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +3 -0
  139. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +3 -0
  140. numba_cuda/numba/cuda/simulator/cudadrv/error.py +4 -0
  141. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +4 -0
  142. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +4 -0
  143. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +3 -0
  144. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -0
  145. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -0
  146. numba_cuda/numba/cuda/simulator/dispatcher.py +4 -0
  147. numba_cuda/numba/cuda/simulator/kernel.py +3 -0
  148. numba_cuda/numba/cuda/simulator/kernelapi.py +4 -1
  149. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +3 -0
  150. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +17 -2
  151. numba_cuda/numba/cuda/simulator/reduction.py +3 -0
  152. numba_cuda/numba/cuda/simulator/vector_types.py +3 -0
  153. numba_cuda/numba/cuda/simulator_init.py +3 -0
  154. numba_cuda/numba/cuda/stubs.py +3 -0
  155. numba_cuda/numba/cuda/target.py +38 -17
  156. numba_cuda/numba/cuda/testing.py +7 -19
  157. numba_cuda/numba/cuda/tests/__init__.py +4 -1
  158. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  159. numba_cuda/numba/cuda/tests/complex_usecases.py +3 -0
  160. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +3 -0
  161. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +3 -0
  162. numba_cuda/numba/cuda/tests/core/test_serialize.py +7 -4
  163. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +3 -0
  164. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +3 -0
  165. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +3 -0
  166. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +3 -0
  167. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +3 -0
  168. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +4 -1
  169. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +3 -0
  170. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +4 -1
  171. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +3 -0
  172. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +3 -0
  173. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +4 -1
  174. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +9 -3
  175. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +4 -1
  176. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +3 -0
  177. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +3 -0
  178. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +3 -0
  179. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +3 -0
  180. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +3 -0
  181. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +21 -2
  182. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +3 -0
  183. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +5 -1
  184. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +4 -1
  185. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +3 -0
  186. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -0
  187. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +3 -0
  188. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +3 -0
  189. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +4 -1
  190. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +3 -0
  191. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +3 -0
  192. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +3 -0
  193. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +3 -0
  194. numba_cuda/numba/cuda/tests/cudapy/__init__.py +3 -0
  195. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +3 -0
  196. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +3 -0
  197. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +3 -0
  198. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +4 -1
  199. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +3 -0
  200. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +3 -0
  201. numba_cuda/numba/cuda/tests/cudapy/test_array.py +5 -1
  202. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +3 -0
  203. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +3 -0
  204. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +3 -0
  205. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +4 -1
  206. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +542 -2
  207. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +84 -1
  208. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +3 -0
  209. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -0
  210. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +4 -3
  211. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +3 -0
  212. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +3 -0
  213. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +3 -0
  214. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +4 -1
  215. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +3 -0
  216. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +3 -0
  217. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +4 -1
  218. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +5 -3
  219. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
  220. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +3 -0
  221. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -0
  222. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +4 -1
  223. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +4 -1
  224. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +314 -3
  225. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +4 -1
  226. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +3 -0
  227. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +4 -1
  228. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +3 -0
  229. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +4 -1
  230. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +4 -1
  231. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +5 -1
  232. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +3 -0
  233. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +3 -0
  234. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +3 -0
  235. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +3 -0
  236. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -0
  237. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +3 -0
  238. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +3 -0
  239. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +3 -0
  240. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +3 -0
  241. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +21 -8
  242. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +3 -0
  243. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +3 -0
  244. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +3 -0
  245. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +13 -37
  246. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +3 -0
  247. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +3 -0
  248. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +4 -1
  249. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +3 -0
  250. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +3 -0
  251. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +3 -0
  252. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +3 -0
  253. numba_cuda/numba/cuda/tests/cudapy/test_math.py +4 -1
  254. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +4 -1
  255. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +3 -0
  256. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +3 -0
  257. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +3 -0
  258. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +3 -0
  259. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +3 -0
  260. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +3 -0
  261. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +4 -1
  262. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +3 -0
  263. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +3 -0
  264. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +3 -0
  265. numba_cuda/numba/cuda/tests/cudapy/test_print.py +23 -0
  266. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +3 -0
  267. numba_cuda/numba/cuda/tests/cudapy/test_random.py +3 -0
  268. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +4 -1
  269. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +3 -0
  270. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +4 -1
  271. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +3 -0
  272. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -1
  273. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +3 -0
  274. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +4 -1
  275. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +3 -0
  276. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
  277. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +3 -0
  278. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +4 -1
  279. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +3 -0
  280. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  281. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +266 -2
  282. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +4 -1
  283. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +4 -1
  284. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +3 -0
  285. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +3 -0
  286. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +115 -6
  287. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +3 -0
  288. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +3 -0
  289. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +4 -1
  290. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +4 -1
  291. numba_cuda/numba/cuda/tests/cudasim/__init__.py +3 -0
  292. numba_cuda/numba/cuda/tests/cudasim/support.py +3 -0
  293. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +3 -0
  294. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  295. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +5 -0
  296. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  297. numba_cuda/numba/cuda/tests/data/error.cu +5 -0
  298. numba_cuda/numba/cuda/tests/data/include/add.cuh +5 -0
  299. numba_cuda/numba/cuda/tests/data/jitlink.cu +5 -0
  300. numba_cuda/numba/cuda/tests/data/warn.cu +5 -0
  301. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +3 -0
  302. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  303. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +5 -0
  304. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +5 -0
  305. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +5 -0
  306. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +3 -2
  307. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +3 -0
  308. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +3 -0
  309. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +6 -2
  310. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +3 -2
  311. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +3 -0
  312. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +3 -0
  313. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +3 -0
  314. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +3 -0
  315. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +3 -2
  316. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +3 -0
  317. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +3 -0
  318. numba_cuda/numba/cuda/tests/enum_usecases.py +3 -0
  319. numba_cuda/numba/cuda/tests/nocuda/__init__.py +3 -0
  320. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +3 -0
  321. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +3 -0
  322. numba_cuda/numba/cuda/tests/nocuda/test_import.py +6 -1
  323. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +27 -12
  324. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +3 -0
  325. numba_cuda/numba/cuda/tests/nrt/__init__.py +3 -0
  326. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +5 -1
  327. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +3 -0
  328. numba_cuda/numba/cuda/tests/support.py +58 -15
  329. numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +3 -0
  330. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -1
  331. numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu +5 -0
  332. numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +5 -0
  333. numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +5 -0
  334. numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
  335. numba_cuda/numba/cuda/types.py +59 -0
  336. numba_cuda/numba/cuda/typing/__init__.py +12 -1
  337. numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
  338. numba_cuda/numba/cuda/typing/context.py +751 -0
  339. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  340. numba_cuda/numba/cuda/typing/npydecl.py +658 -0
  341. numba_cuda/numba/cuda/typing/templates.py +10 -14
  342. numba_cuda/numba/cuda/ufuncs.py +6 -3
  343. numba_cuda/numba/cuda/utils.py +9 -112
  344. numba_cuda/numba/cuda/vector_types.py +3 -0
  345. numba_cuda/numba/cuda/vectorizers.py +3 -0
  346. {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/METADATA +6 -2
  347. numba_cuda-0.20.0.dist-info/RECORD +357 -0
  348. {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE +1 -0
  349. numba_cuda-0.20.0.dist-info/licenses/LICENSE.numba +24 -0
  350. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -57
  351. numba_cuda-0.19.0.dist-info/RECORD +0 -301
  352. {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/WHEEL +0 -0
  353. {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,8 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from numba.core import errors, types
2
- from numba.core.typing.npydecl import (
5
+ from numba.cuda.typing.npydecl import (
3
6
  parse_dtype,
4
7
  parse_shape,
5
8
  register_number_classes,
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """CUDA Driver
2
5
 
3
6
  - Driver API binding
@@ -6,6 +9,6 @@
6
9
 
7
10
  """
8
11
 
9
- from numba.core import config
12
+ from numba.cuda.core import config
10
13
 
11
14
  assert not config.ENABLE_CUDASIM, "Cannot use real driver API with simulator"
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """
2
5
  A CUDA ND Array is recognized by checking the __cuda_memory__ attribute
3
6
  on the object. If it exists and evaluate to True, it must define shape,
@@ -16,7 +19,8 @@ import numba
16
19
  from numba import _devicearray
17
20
  from numba.cuda.cudadrv import devices, dummyarray
18
21
  from numba.cuda.cudadrv import driver as _driver
19
- from numba.core import types, config
22
+ from numba.core import types
23
+ from numba.cuda.core import config
20
24
  from numba.np.unsafe.ndarray import to_fixed_tuple
21
25
  from numba.np.numpy_support import numpy_version
22
26
  from numba.np import numpy_support
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """
2
5
  Expose each GPU devices directly.
3
6
 
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """
2
5
  CUDA driver bridge implementation
3
6
 
@@ -44,7 +47,7 @@ from collections import namedtuple, deque
44
47
 
45
48
 
46
49
  from numba import mviewbuf
47
- from numba.core import config
50
+ from numba.cuda.core import config
48
51
  from numba.cuda import utils, serialize
49
52
  from .error import CudaSupportError, CudaDriverError
50
53
  from .drvapi import API_PROTOTYPES
@@ -79,12 +82,6 @@ _py_incref = ctypes.pythonapi.Py_IncRef
79
82
  _py_decref.argtypes = [ctypes.py_object]
80
83
  _py_incref.argtypes = [ctypes.py_object]
81
84
 
82
-
83
- _MVC_ERROR_MESSAGE = (
84
- "Minor version compatibility requires ptxcompiler and cubinlinker packages "
85
- "to be available"
86
- )
87
-
88
85
  USE_NV_BINDING = config.CUDA_USE_NVIDIA_BINDING
89
86
 
90
87
  if USE_NV_BINDING:
@@ -134,7 +131,7 @@ def _have_nvjitlink():
134
131
  nvjitlink_internal._inspect_function_pointer("__nvJitLinkVersion")
135
132
  != 0
136
133
  )
137
- except NotSupportedError:
134
+ except (RuntimeError, NotSupportedError):
138
135
  # no driver
139
136
  return False
140
137
 
@@ -158,12 +155,6 @@ class CudaAPIError(CudaDriverError):
158
155
 
159
156
 
160
157
  def locate_driver_and_loader():
161
- envpath = config.CUDA_DRIVER
162
-
163
- if envpath == "0":
164
- # Force fail
165
- _raise_driver_not_found()
166
-
167
158
  # Determine DLL type
168
159
  if sys.platform == "win32":
169
160
  dlloader = ctypes.WinDLL
@@ -179,26 +170,11 @@ def locate_driver_and_loader():
179
170
  dldir = ["/usr/lib", "/usr/lib64"]
180
171
  dlnames = ["libcuda.so", "libcuda.so.1"]
181
172
 
182
- if envpath:
183
- try:
184
- envpath = os.path.abspath(envpath)
185
- except ValueError:
186
- raise ValueError(
187
- "NUMBA_CUDA_DRIVER %s is not a valid path" % envpath
188
- )
189
- if not os.path.isfile(envpath):
190
- raise ValueError(
191
- "NUMBA_CUDA_DRIVER %s is not a valid file "
192
- "path. Note it must be a filepath of the .so/"
193
- ".dll/.dylib or the driver" % envpath
194
- )
195
- candidates = [envpath]
196
- else:
197
- # First search for the name in the default library path.
198
- # If that is not found, try the specific path.
199
- candidates = dlnames + [
200
- os.path.join(x, y) for x, y in product(dldir, dlnames)
201
- ]
173
+ # First search for the name in the default library path.
174
+ # If that is not found, try specific common paths.
175
+ candidates = dlnames + [
176
+ os.path.join(x, y) for x, y in product(dldir, dlnames)
177
+ ]
202
178
 
203
179
  return dlloader, candidates
204
180
 
@@ -234,9 +210,7 @@ def find_driver():
234
210
 
235
211
  DRIVER_NOT_FOUND_MSG = """
236
212
  CUDA driver library cannot be found.
237
- If you are sure that a CUDA driver is installed,
238
- try setting environment variable NUMBA_CUDA_DRIVER
239
- with the file path of the CUDA driver shared library.
213
+ Ensure that a compatible NVIDIA driver is installed and available on your system path.
240
214
  """
241
215
 
242
216
  DRIVER_LOAD_ERROR_MSG = """
@@ -2839,10 +2813,7 @@ class _LinkerBase(metaclass=ABCMeta):
2839
2813
  def add_cu(self, cu, name):
2840
2814
  """Add CUDA source in a string to the link. The name of the source
2841
2815
  file should be specified in `name`."""
2842
- with driver.get_active_context() as ac:
2843
- dev = driver.get_device(ac.devnum)
2844
- cc = dev.compute_capability
2845
- ptx, log = nvrtc.compile(cu, name, cc)
2816
+ ptx, log = nvrtc.compile(cu, name, self.cc)
2846
2817
 
2847
2818
  if config.DUMP_ASSEMBLY:
2848
2819
  print(("ASSEMBLY %s" % name).center(80, "-"))
@@ -3006,10 +2977,7 @@ class _Linker(_LinkerBase):
3006
2977
  self._object_codes.append(obj)
3007
2978
 
3008
2979
  def add_cu(self, cu, name="<cudapy-cu>"):
3009
- with driver.get_active_context() as ac:
3010
- dev = driver.get_device(ac.devnum)
3011
- cc = dev.compute_capability
3012
- obj, log = nvrtc.compile(cu, name, cc, ltoir=self.lto)
2980
+ obj, log = nvrtc.compile(cu, name, self.cc, ltoir=self.lto)
3013
2981
 
3014
2982
  if not self.lto and config.DUMP_ASSEMBLY:
3015
2983
  print(("ASSEMBLY %s" % name).center(80, "-"))
@@ -3096,101 +3064,6 @@ class _Linker(_LinkerBase):
3096
3064
  return result
3097
3065
 
3098
3066
 
3099
- class MVCLinker(_LinkerBase):
3100
- """
3101
- Linker supporting Minor Version Compatibility, backed by the cubinlinker
3102
- package.
3103
- """
3104
-
3105
- def __init__(self, max_registers=None, lineinfo=False, cc=None):
3106
- try:
3107
- from cubinlinker import CubinLinker
3108
- except ImportError as err:
3109
- raise ImportError(_MVC_ERROR_MESSAGE) from err
3110
-
3111
- if cc is None:
3112
- raise RuntimeError(
3113
- "MVCLinker requires Compute Capability to be "
3114
- "specified, but cc is None"
3115
- )
3116
-
3117
- super().__init__(max_registers, lineinfo, cc)
3118
-
3119
- arch = f"sm_{cc[0] * 10 + cc[1]}"
3120
- ptx_compile_opts = ["--gpu-name", arch, "-c"]
3121
- if max_registers:
3122
- arg = f"--maxrregcount={max_registers}"
3123
- ptx_compile_opts.append(arg)
3124
- if lineinfo:
3125
- ptx_compile_opts.append("--generate-line-info")
3126
- self.ptx_compile_options = tuple(ptx_compile_opts)
3127
-
3128
- self._linker = CubinLinker(f"--arch={arch}")
3129
-
3130
- @property
3131
- def info_log(self):
3132
- return self._linker.info_log
3133
-
3134
- @property
3135
- def error_log(self):
3136
- return self._linker.error_log
3137
-
3138
- def add_ptx(self, ptx, name="<cudapy-ptx>"):
3139
- try:
3140
- from ptxcompiler import compile_ptx
3141
- from cubinlinker import CubinLinkerError
3142
- except ImportError as err:
3143
- raise ImportError(_MVC_ERROR_MESSAGE) from err
3144
- compile_result = compile_ptx(ptx.decode(), self.ptx_compile_options)
3145
- try:
3146
- self._linker.add_cubin(compile_result.compiled_program, name)
3147
- except CubinLinkerError as e:
3148
- raise LinkerError from e
3149
-
3150
- def add_data(self, data, kind, name):
3151
- msg = "Adding in-memory data unsupported in the MVC linker"
3152
- raise LinkerError(msg)
3153
-
3154
- def add_file(self, path, kind):
3155
- try:
3156
- from cubinlinker import CubinLinkerError
3157
- except ImportError as err:
3158
- raise ImportError(_MVC_ERROR_MESSAGE) from err
3159
-
3160
- try:
3161
- data = cached_file_read(path, how="rb")
3162
- except FileNotFoundError:
3163
- raise LinkerError(f"{path} not found")
3164
-
3165
- name = pathlib.Path(path).name
3166
- if kind == FILE_EXTENSION_MAP["cubin"]:
3167
- fn = self._linker.add_cubin
3168
- elif kind == FILE_EXTENSION_MAP["fatbin"]:
3169
- fn = self._linker.add_fatbin
3170
- elif kind == FILE_EXTENSION_MAP["a"]:
3171
- raise LinkerError(f"Don't know how to link {kind}")
3172
- elif kind == FILE_EXTENSION_MAP["ptx"]:
3173
- return self.add_ptx(data, name)
3174
- else:
3175
- raise LinkerError(f"Don't know how to link {kind}")
3176
-
3177
- try:
3178
- fn(data, name)
3179
- except CubinLinkerError as e:
3180
- raise LinkerError from e
3181
-
3182
- def complete(self):
3183
- try:
3184
- from cubinlinker import CubinLinkerError
3185
- except ImportError as err:
3186
- raise ImportError(_MVC_ERROR_MESSAGE) from err
3187
-
3188
- try:
3189
- return self._linker.complete()
3190
- except CubinLinkerError as e:
3191
- raise LinkerError from e
3192
-
3193
-
3194
3067
  class CtypesLinker(_LinkerBase):
3195
3068
  """
3196
3069
  Links for current device if no CC given
@@ -3215,6 +3088,7 @@ class CtypesLinker(_LinkerBase):
3215
3088
  if lineinfo:
3216
3089
  options[enums.CU_JIT_GENERATE_LINE_INFO] = c_void_p(1)
3217
3090
 
3091
+ self.cc = cc
3218
3092
  if cc is None:
3219
3093
  # No option value is needed, but we need something as a placeholder
3220
3094
  options[enums.CU_JIT_TARGET_FROM_CUCONTEXT] = 1
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from ctypes import (
2
5
  c_byte,
3
6
  c_char_p,
@@ -1,26 +1,122 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from collections import namedtuple
2
5
  import itertools
3
6
  import functools
4
7
  import operator
5
- import ctypes
6
-
7
- import numpy as np
8
8
 
9
- from numba import _helperlib
10
9
 
11
10
  Extent = namedtuple("Extent", ["begin", "end"])
12
11
 
13
- attempt_nocopy_reshape = ctypes.CFUNCTYPE(
14
- ctypes.c_int,
15
- ctypes.c_long, # nd
16
- np.ctypeslib.ndpointer(np.ctypeslib.c_intp, ndim=1), # dims
17
- np.ctypeslib.ndpointer(np.ctypeslib.c_intp, ndim=1), # strides
18
- ctypes.c_long, # newnd
19
- np.ctypeslib.ndpointer(np.ctypeslib.c_intp, ndim=1), # newdims
20
- np.ctypeslib.ndpointer(np.ctypeslib.c_intp, ndim=1), # newstrides
21
- ctypes.c_long, # itemsize
22
- ctypes.c_int, # is_f_order
23
- )(_helperlib.c_helpers["attempt_nocopy_reshape"])
12
+
13
+ def attempt_nocopy_reshape(
14
+ nd, dims, strides, newnd, newdims, newstrides, itemsize, is_f_order
15
+ ):
16
+ """
17
+ Attempt to reshape an array without copying data.
18
+
19
+ This function should correctly handle all reshapes, including
20
+ axes of length 1. Zero strides should work but are untested.
21
+
22
+ If a copy is needed, returns 0
23
+ If no copy is needed, returns 1 and fills `newstrides`
24
+ with appropriate strides
25
+ """
26
+
27
+ olddims = []
28
+ oldstrides = []
29
+ oldnd = 0
30
+
31
+ # Remove axes with dimension 1 from the old array. They have no effect
32
+ # but would need special cases since their strides do not matter.
33
+ for oi in range(nd):
34
+ if dims[oi] != 1:
35
+ olddims.append(dims[oi])
36
+ oldstrides.append(strides[oi])
37
+ oldnd += 1
38
+
39
+ # Calculate total sizes
40
+ np_total = 1
41
+ for ni in range(newnd):
42
+ np_total *= newdims[ni]
43
+
44
+ op_total = 1
45
+ for oi in range(oldnd):
46
+ op_total *= olddims[oi]
47
+
48
+ if np_total != op_total:
49
+ # Different total sizes; no hope
50
+ return 0
51
+
52
+ if np_total == 0:
53
+ # Handle zero-sized arrays
54
+ # Just make the strides vaguely reasonable
55
+ # (they can have any value in theory).
56
+ for i in range(newnd):
57
+ newstrides[i] = itemsize
58
+ return 1
59
+
60
+ # oi to oj and ni to nj give the axis ranges currently worked with
61
+ oi = 0
62
+ oj = 1
63
+ ni = 0
64
+ nj = 1
65
+
66
+ while ni < newnd and oi < oldnd:
67
+ np = newdims[ni]
68
+ op = olddims[oi]
69
+
70
+ while np != op:
71
+ if np < op:
72
+ # Misses trailing 1s, these are handled later
73
+ np *= newdims[nj]
74
+ nj += 1
75
+ else:
76
+ op *= olddims[oj]
77
+ oj += 1
78
+
79
+ # Check whether the original axes can be combined
80
+ for ok in range(oi, oj - 1):
81
+ if is_f_order:
82
+ if oldstrides[ok + 1] != olddims[ok] * oldstrides[ok]:
83
+ # not contiguous enough
84
+ return 0
85
+ else:
86
+ # C order
87
+ if oldstrides[ok] != olddims[ok + 1] * oldstrides[ok + 1]:
88
+ # not contiguous enough
89
+ return 0
90
+
91
+ # Calculate new strides for all axes currently worked with
92
+ if is_f_order:
93
+ newstrides[ni] = oldstrides[oi]
94
+ for nk in range(ni + 1, nj):
95
+ newstrides[nk] = newstrides[nk - 1] * newdims[nk - 1]
96
+ else:
97
+ # C order
98
+ newstrides[nj - 1] = oldstrides[oj - 1]
99
+ for nk in range(nj - 1, ni, -1):
100
+ newstrides[nk - 1] = newstrides[nk] * newdims[nk]
101
+
102
+ ni = nj
103
+ nj += 1
104
+ oi = oj
105
+ oj += 1
106
+
107
+ # Set strides corresponding to trailing 1s of the new shape
108
+ if ni >= 1:
109
+ last_stride = newstrides[ni - 1]
110
+ else:
111
+ last_stride = itemsize
112
+
113
+ if is_f_order:
114
+ last_stride *= newdims[ni - 1]
115
+
116
+ for nk in range(ni, newnd):
117
+ newstrides[nk] = last_stride
118
+
119
+ return 1
24
120
 
25
121
 
26
122
  class Dim(object):
@@ -330,18 +426,12 @@ class Array(object):
330
426
  else:
331
427
  raise AssertionError("unreachable")
332
428
  else:
333
- newstrides = np.empty(newnd, np.ctypeslib.c_intp)
334
-
335
- # need to keep these around in variables, not temporaries, so they
336
- # don't get GC'ed before we call into the C code
337
- olddims = np.array(self.shape, dtype=np.ctypeslib.c_intp)
338
- oldstrides = np.array(self.strides, dtype=np.ctypeslib.c_intp)
339
- newdims = np.array(newdims, dtype=np.ctypeslib.c_intp)
429
+ newstrides = [0] * newnd
340
430
 
341
431
  if not attempt_nocopy_reshape(
342
432
  oldnd,
343
- olddims,
344
- oldstrides,
433
+ self.shape,
434
+ self.strides,
345
435
  newnd,
346
436
  newdims,
347
437
  newstrides,
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """
2
5
  Enum values for CUDA driver. Information about the values
3
6
  can be found on the official NVIDIA documentation website.
@@ -1,3 +1,7 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+
1
5
  class CudaDriverError(Exception):
2
6
  pass
3
7
 
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """CUDA Toolkit libraries lookup utilities.
2
5
 
3
6
  CUDA Toolkit libraries can be available via either:
@@ -13,11 +16,11 @@ import os
13
16
  import sys
14
17
  import ctypes
15
18
 
16
- from numba.misc.findlib import find_lib
19
+ from numba.cuda.misc.findlib import find_lib
17
20
  from numba.cuda.cuda_paths import get_cuda_paths
18
21
  from numba.cuda.cudadrv.driver import locate_driver_and_loader, load_driver
19
22
  from numba.cuda.cudadrv.error import CudaSupportError
20
- from numba.core import config
23
+ from numba.cuda.core import config
21
24
 
22
25
 
23
26
  if sys.platform == "win32":
@@ -51,9 +54,9 @@ def get_cudalib(lib, static=False):
51
54
  """
52
55
  if lib in {"nvrtc", "nvvm"}:
53
56
  return get_cuda_paths()[lib].info or _dllnamepattern % lib
54
- else:
55
- dir_type = "static_cudalib_dir" if static else "cudalib_dir"
56
- libdir = get_cuda_paths()[dir_type].info
57
+
58
+ dir_type = "static_cudalib_dir" if static else "cudalib_dir"
59
+ libdir = get_cuda_paths()[dir_type].info
57
60
 
58
61
  candidates = find_lib(lib, libdir, static=static)
59
62
  namepattern = _staticnamepattern if static else _dllnamepattern
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  import io
2
5
  from .mappings import FILE_EXTENSION_MAP
3
6
 
@@ -1,4 +1,7 @@
1
- from numba import config
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba.cuda import config
2
5
  from . import enums
3
6
 
4
7
  if config.CUDA_USE_NVIDIA_BINDING:
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from numba.cuda.cudadrv import devices, driver
2
5
  from numba.core.registry import cpu_target
3
6
 
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from ctypes import byref, c_char, c_char_p, c_int, c_size_t, c_void_p, POINTER
2
5
  from enum import IntEnum
3
6
  from numba.cuda.cudadrv.error import (
@@ -7,7 +10,7 @@ from numba.cuda.cudadrv.error import (
7
10
  NvrtcCompilationError,
8
11
  NvrtcSupportError,
9
12
  )
10
- from numba import config
13
+ from numba.cuda import config
11
14
  from numba.cuda.cuda_paths import get_cuda_paths
12
15
  from numba.cuda.utils import _readenv
13
16
 
@@ -18,8 +21,8 @@ import warnings
18
21
 
19
22
  NVRTC_EXTRA_SEARCH_PATHS = _readenv(
20
23
  "NUMBA_CUDA_NVRTC_EXTRA_SEARCH_PATHS", str, ""
21
- ) or getattr(config, "NUMBA_CUDA_NVRTC_EXTRA_SEARCH_PATHS", "")
22
- if not hasattr(config, "NUMBA_CUDA_NVRTC_EXTRA_SEARCH_PATHS"):
24
+ ) or getattr(config, "CUDA_NVRTC_EXTRA_SEARCH_PATHS", "")
25
+ if not hasattr(config, "CUDA_NVRTC_EXTRA_SEARCH_PATHS"):
23
26
  config.CUDA_NVRTC_EXTRA_SEARCH_PATHS = NVRTC_EXTRA_SEARCH_PATHS
24
27
 
25
28
  # Opaque handle for compilation unit
@@ -344,15 +347,26 @@ def compile(src, name, cc, ltoir=False):
344
347
  arch = f"--gpu-architecture=compute_{major}{minor}"
345
348
 
346
349
  cuda_include_dir = get_cuda_paths()["include_dir"].info
347
- cuda_includes = [
348
- f"{cuda_include_dir}",
349
- f"{os.path.join(cuda_include_dir, 'cccl')}",
350
- ]
350
+ cuda_includes = [f"{cuda_include_dir}"]
351
351
 
352
352
  cudadrv_path = os.path.dirname(os.path.abspath(__file__))
353
353
  numba_cuda_path = os.path.dirname(cudadrv_path)
354
354
 
355
- numba_include = f"{os.path.join(numba_cuda_path, 'include', '12')}"
355
+ nvrtc_ver_major = version[0]
356
+ if nvrtc_ver_major == 12:
357
+ numba_include = f"{os.path.join(numba_cuda_path, 'include', '12')}"
358
+ # For CUDA 12 wheels, `cuda_include_dir` is `site-packages/nvidia/cuda_runtime/include`
359
+ # We need to find CCCL at `site-packages/nvidia/cuda_cccl/include`
360
+ # For CUDA 12 conda / system install, CCCL is just in the `include` directory
361
+ cuda_includes.append(
362
+ f"{os.path.join(cuda_include_dir, '..', '..', 'cuda_cccl', 'include')}"
363
+ )
364
+ elif nvrtc_ver_major == 13:
365
+ numba_include = f"{os.path.join(numba_cuda_path, 'include', '13')}"
366
+ # For CUDA 13 wheels, `cuda_include_dir` is `site-packages/nvidia/cu13/include`
367
+ # We need to find CCCL at `site-packages/nvidia/cu13/include/cccl`
368
+ # For CUDA 13 conda / system install, CCCL is in the `include/cccl` directory
369
+ cuda_includes.append(f"{os.path.join(cuda_include_dir, 'cccl')}")
356
370
 
357
371
  if config.CUDA_NVRTC_EXTRA_SEARCH_PATHS:
358
372
  extra_includes = config.CUDA_NVRTC_EXTRA_SEARCH_PATHS.split(":")
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """
2
5
  This is a direct translation of nvvm.h
3
6
  """
@@ -151,10 +154,7 @@ class NVVM(object):
151
154
  inst.driver = open_cudalib("nvvm")
152
155
  except OSError as e:
153
156
  cls.__INSTANCE = None
154
- errmsg = (
155
- "libNVVM cannot be found. Do `conda install "
156
- "cudatoolkit`:\n%s"
157
- )
157
+ errmsg = "libNVVM cannot be found. Please install the cuda-toolkit conda package:\n%s"
158
158
  raise NvvmSupportError(errmsg % e)
159
159
 
160
160
  # Find & populate functions
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """
2
5
  Declarations of the Runtime API functions.
3
6
  """
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """
2
5
  Former CUDA Runtime wrapper.
3
6
 
@@ -5,7 +8,7 @@ The toolkit version can now be obtained from NVRTC, so we don't use a binding
5
8
  to the runtime anymore. This file is provided to maintain the existing API.
6
9
  """
7
10
 
8
- from numba import config
11
+ from numba.cuda import config
9
12
  from numba.cuda.cudadrv.nvrtc import NVRTC
10
13
 
11
14
 
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from functools import reduce
2
5
  import operator
3
6
  import math
@@ -7,7 +10,7 @@ from llvmlite import ir
7
10
  import llvmlite.binding as ll
8
11
 
9
12
  from numba.core.imputils import Registry
10
- from numba.core.typing.npydecl import parse_dtype
13
+ from numba.cuda.typing.npydecl import parse_dtype
11
14
  from numba.core.datamodel import models
12
15
  from numba.core import types
13
16
  from numba.cuda import cgutils
@@ -22,6 +25,10 @@ registry = Registry()
22
25
  lower = registry.lower
23
26
  lower_attr = registry.lower_getattr
24
27
  lower_constant = registry.lower_constant
28
+ lower_getattr_generic = registry.lower_getattr_generic
29
+ lower_setattr = registry.lower_setattr
30
+ lower_setattr_generic = registry.lower_setattr_generic
31
+ lower_cast = registry.lower_cast
25
32
 
26
33
 
27
34
  def initialize_dim3(builder, prefix):
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  import math
2
5
  from numba.core import types
3
6
  from numba.cuda.typing.templates import ConcreteTemplate, signature, Registry