numba-cuda 0.19.0__py3-none-any.whl → 0.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (353) hide show
  1. _numba_cuda_redirector.pth +3 -0
  2. _numba_cuda_redirector.py +3 -0
  3. numba_cuda/VERSION +1 -1
  4. numba_cuda/__init__.py +2 -1
  5. numba_cuda/_version.py +2 -13
  6. numba_cuda/numba/cuda/__init__.py +4 -1
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +12708 -1469
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +2656 -8769
  9. numba_cuda/numba/cuda/api.py +9 -1
  10. numba_cuda/numba/cuda/api_util.py +3 -0
  11. numba_cuda/numba/cuda/args.py +3 -0
  12. numba_cuda/numba/cuda/bf16.py +288 -2
  13. numba_cuda/numba/cuda/cg.py +3 -0
  14. numba_cuda/numba/cuda/cgutils.py +5 -2
  15. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  16. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  17. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  18. numba_cuda/numba/cuda/codegen.py +4 -1
  19. numba_cuda/numba/cuda/compiler.py +376 -30
  20. numba_cuda/numba/cuda/core/analysis.py +319 -0
  21. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  22. numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
  23. numba_cuda/numba/cuda/core/base.py +1289 -0
  24. numba_cuda/numba/cuda/core/bytecode.py +727 -0
  25. numba_cuda/numba/cuda/core/caching.py +5 -2
  26. numba_cuda/numba/cuda/core/callconv.py +3 -0
  27. numba_cuda/numba/cuda/core/codegen.py +3 -0
  28. numba_cuda/numba/cuda/core/compiler.py +9 -14
  29. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  30. numba_cuda/numba/cuda/core/config.py +747 -0
  31. numba_cuda/numba/cuda/core/consts.py +124 -0
  32. numba_cuda/numba/cuda/core/cpu.py +370 -0
  33. numba_cuda/numba/cuda/core/environment.py +68 -0
  34. numba_cuda/numba/cuda/core/event.py +511 -0
  35. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  36. numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
  37. numba_cuda/numba/cuda/core/interpreter.py +52 -27
  38. numba_cuda/numba/cuda/core/ir_utils.py +17 -29
  39. numba_cuda/numba/cuda/core/options.py +262 -0
  40. numba_cuda/numba/cuda/core/postproc.py +249 -0
  41. numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
  42. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  43. numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
  44. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  45. numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
  46. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
  47. numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
  48. numba_cuda/numba/cuda/core/sigutils.py +3 -0
  49. numba_cuda/numba/cuda/core/ssa.py +496 -0
  50. numba_cuda/numba/cuda/core/targetconfig.py +329 -0
  51. numba_cuda/numba/cuda/core/tracing.py +231 -0
  52. numba_cuda/numba/cuda/core/transforms.py +952 -0
  53. numba_cuda/numba/cuda/core/typed_passes.py +741 -7
  54. numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
  55. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  56. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  57. numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
  58. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  59. numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
  60. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  61. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  62. numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
  63. numba_cuda/numba/cuda/cuda_paths.py +425 -246
  64. numba_cuda/numba/cuda/cudadecl.py +4 -1
  65. numba_cuda/numba/cuda/cudadrv/__init__.py +4 -1
  66. numba_cuda/numba/cuda/cudadrv/devicearray.py +5 -1
  67. numba_cuda/numba/cuda/cudadrv/devices.py +3 -0
  68. numba_cuda/numba/cuda/cudadrv/driver.py +14 -140
  69. numba_cuda/numba/cuda/cudadrv/drvapi.py +3 -0
  70. numba_cuda/numba/cuda/cudadrv/dummyarray.py +114 -24
  71. numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
  72. numba_cuda/numba/cuda/cudadrv/error.py +4 -0
  73. numba_cuda/numba/cuda/cudadrv/libs.py +8 -5
  74. numba_cuda/numba/cuda/cudadrv/linkable_code.py +3 -0
  75. numba_cuda/numba/cuda/cudadrv/mappings.py +4 -1
  76. numba_cuda/numba/cuda/cudadrv/ndarray.py +3 -0
  77. numba_cuda/numba/cuda/cudadrv/nvrtc.py +22 -8
  78. numba_cuda/numba/cuda/cudadrv/nvvm.py +4 -4
  79. numba_cuda/numba/cuda/cudadrv/rtapi.py +3 -0
  80. numba_cuda/numba/cuda/cudadrv/runtime.py +4 -1
  81. numba_cuda/numba/cuda/cudaimpl.py +8 -1
  82. numba_cuda/numba/cuda/cudamath.py +3 -0
  83. numba_cuda/numba/cuda/debuginfo.py +88 -2
  84. numba_cuda/numba/cuda/decorators.py +6 -3
  85. numba_cuda/numba/cuda/descriptor.py +6 -4
  86. numba_cuda/numba/cuda/device_init.py +3 -0
  87. numba_cuda/numba/cuda/deviceufunc.py +69 -2
  88. numba_cuda/numba/cuda/dispatcher.py +21 -39
  89. numba_cuda/numba/cuda/errors.py +10 -0
  90. numba_cuda/numba/cuda/extending.py +3 -0
  91. numba_cuda/numba/cuda/flags.py +143 -1
  92. numba_cuda/numba/cuda/fp16.py +3 -2
  93. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  94. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  95. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  96. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  97. numba_cuda/numba/cuda/initialize.py +4 -0
  98. numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -0
  99. numba_cuda/numba/cuda/intrinsics.py +3 -0
  100. numba_cuda/numba/cuda/itanium_mangler.py +3 -0
  101. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  102. numba_cuda/numba/cuda/kernels/reduction.py +3 -0
  103. numba_cuda/numba/cuda/kernels/transpose.py +3 -0
  104. numba_cuda/numba/cuda/libdevice.py +4 -0
  105. numba_cuda/numba/cuda/libdevicedecl.py +3 -0
  106. numba_cuda/numba/cuda/libdevicefuncs.py +3 -0
  107. numba_cuda/numba/cuda/libdeviceimpl.py +3 -0
  108. numba_cuda/numba/cuda/locks.py +3 -0
  109. numba_cuda/numba/cuda/lowering.py +59 -159
  110. numba_cuda/numba/cuda/mathimpl.py +5 -1
  111. numba_cuda/numba/cuda/memory_management/__init__.py +3 -0
  112. numba_cuda/numba/cuda/memory_management/memsys.cu +5 -0
  113. numba_cuda/numba/cuda/memory_management/memsys.cuh +5 -0
  114. numba_cuda/numba/cuda/memory_management/nrt.cu +5 -0
  115. numba_cuda/numba/cuda/memory_management/nrt.cuh +5 -0
  116. numba_cuda/numba/cuda/memory_management/nrt.py +48 -18
  117. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  118. numba_cuda/numba/cuda/models.py +12 -1
  119. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  120. numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
  121. numba_cuda/numba/cuda/np/numpy_support.py +553 -0
  122. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
  123. numba_cuda/numba/cuda/nvvmutils.py +4 -1
  124. numba_cuda/numba/cuda/printimpl.py +15 -1
  125. numba_cuda/numba/cuda/random.py +4 -1
  126. numba_cuda/numba/cuda/reshape_funcs.cu +5 -0
  127. numba_cuda/numba/cuda/serialize.py +4 -1
  128. numba_cuda/numba/cuda/simulator/__init__.py +4 -1
  129. numba_cuda/numba/cuda/simulator/_internal/__init__.py +3 -0
  130. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  131. numba_cuda/numba/cuda/simulator/api.py +4 -1
  132. numba_cuda/numba/cuda/simulator/bf16.py +3 -0
  133. numba_cuda/numba/cuda/simulator/compiler.py +7 -0
  134. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +3 -0
  135. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +4 -1
  136. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +3 -0
  137. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +3 -0
  138. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +3 -0
  139. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +3 -0
  140. numba_cuda/numba/cuda/simulator/cudadrv/error.py +4 -0
  141. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +4 -0
  142. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +4 -0
  143. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +3 -0
  144. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -0
  145. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -0
  146. numba_cuda/numba/cuda/simulator/dispatcher.py +4 -0
  147. numba_cuda/numba/cuda/simulator/kernel.py +3 -0
  148. numba_cuda/numba/cuda/simulator/kernelapi.py +4 -1
  149. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +3 -0
  150. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +17 -2
  151. numba_cuda/numba/cuda/simulator/reduction.py +3 -0
  152. numba_cuda/numba/cuda/simulator/vector_types.py +3 -0
  153. numba_cuda/numba/cuda/simulator_init.py +3 -0
  154. numba_cuda/numba/cuda/stubs.py +3 -0
  155. numba_cuda/numba/cuda/target.py +38 -17
  156. numba_cuda/numba/cuda/testing.py +7 -19
  157. numba_cuda/numba/cuda/tests/__init__.py +4 -1
  158. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  159. numba_cuda/numba/cuda/tests/complex_usecases.py +3 -0
  160. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +3 -0
  161. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +3 -0
  162. numba_cuda/numba/cuda/tests/core/test_serialize.py +7 -4
  163. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +3 -0
  164. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +3 -0
  165. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +3 -0
  166. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +3 -0
  167. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +3 -0
  168. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +4 -1
  169. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +3 -0
  170. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +4 -1
  171. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +3 -0
  172. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +3 -0
  173. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +4 -1
  174. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +9 -3
  175. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +4 -1
  176. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +3 -0
  177. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +3 -0
  178. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +3 -0
  179. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +3 -0
  180. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +3 -0
  181. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +21 -2
  182. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +3 -0
  183. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +5 -1
  184. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +4 -1
  185. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +3 -0
  186. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -0
  187. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +3 -0
  188. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +3 -0
  189. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +4 -1
  190. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +3 -0
  191. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +3 -0
  192. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +3 -0
  193. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +3 -0
  194. numba_cuda/numba/cuda/tests/cudapy/__init__.py +3 -0
  195. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +3 -0
  196. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +3 -0
  197. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +3 -0
  198. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +4 -1
  199. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +3 -0
  200. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +3 -0
  201. numba_cuda/numba/cuda/tests/cudapy/test_array.py +5 -1
  202. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +3 -0
  203. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +3 -0
  204. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +3 -0
  205. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +4 -1
  206. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +542 -2
  207. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +84 -1
  208. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +3 -0
  209. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -0
  210. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +4 -3
  211. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +3 -0
  212. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +3 -0
  213. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +3 -0
  214. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +4 -1
  215. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +3 -0
  216. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +3 -0
  217. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +4 -1
  218. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +5 -3
  219. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
  220. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +3 -0
  221. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -0
  222. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +4 -1
  223. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +4 -1
  224. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +314 -3
  225. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +4 -1
  226. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +3 -0
  227. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +4 -1
  228. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +3 -0
  229. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +4 -1
  230. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +4 -1
  231. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +5 -1
  232. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +3 -0
  233. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +3 -0
  234. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +3 -0
  235. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +3 -0
  236. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -0
  237. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +3 -0
  238. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +3 -0
  239. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +3 -0
  240. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +3 -0
  241. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +21 -8
  242. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +3 -0
  243. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +3 -0
  244. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +3 -0
  245. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +13 -37
  246. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +3 -0
  247. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +3 -0
  248. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +4 -1
  249. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +3 -0
  250. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +3 -0
  251. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +3 -0
  252. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +3 -0
  253. numba_cuda/numba/cuda/tests/cudapy/test_math.py +4 -1
  254. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +4 -1
  255. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +3 -0
  256. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +3 -0
  257. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +3 -0
  258. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +3 -0
  259. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +3 -0
  260. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +3 -0
  261. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +4 -1
  262. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +3 -0
  263. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +3 -0
  264. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +3 -0
  265. numba_cuda/numba/cuda/tests/cudapy/test_print.py +23 -0
  266. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +3 -0
  267. numba_cuda/numba/cuda/tests/cudapy/test_random.py +3 -0
  268. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +4 -1
  269. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +3 -0
  270. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +4 -1
  271. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +3 -0
  272. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -1
  273. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +3 -0
  274. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +4 -1
  275. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +3 -0
  276. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
  277. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +3 -0
  278. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +4 -1
  279. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +3 -0
  280. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  281. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +266 -2
  282. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +4 -1
  283. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +4 -1
  284. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +3 -0
  285. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +3 -0
  286. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +115 -6
  287. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +3 -0
  288. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +3 -0
  289. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +4 -1
  290. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +4 -1
  291. numba_cuda/numba/cuda/tests/cudasim/__init__.py +3 -0
  292. numba_cuda/numba/cuda/tests/cudasim/support.py +3 -0
  293. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +3 -0
  294. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  295. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +5 -0
  296. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  297. numba_cuda/numba/cuda/tests/data/error.cu +5 -0
  298. numba_cuda/numba/cuda/tests/data/include/add.cuh +5 -0
  299. numba_cuda/numba/cuda/tests/data/jitlink.cu +5 -0
  300. numba_cuda/numba/cuda/tests/data/warn.cu +5 -0
  301. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +3 -0
  302. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  303. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +5 -0
  304. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +5 -0
  305. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +5 -0
  306. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +3 -2
  307. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +3 -0
  308. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +3 -0
  309. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +6 -2
  310. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +3 -2
  311. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +3 -0
  312. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +3 -0
  313. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +3 -0
  314. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +3 -0
  315. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +3 -2
  316. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +3 -0
  317. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +3 -0
  318. numba_cuda/numba/cuda/tests/enum_usecases.py +3 -0
  319. numba_cuda/numba/cuda/tests/nocuda/__init__.py +3 -0
  320. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +3 -0
  321. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +3 -0
  322. numba_cuda/numba/cuda/tests/nocuda/test_import.py +6 -1
  323. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +27 -12
  324. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +3 -0
  325. numba_cuda/numba/cuda/tests/nrt/__init__.py +3 -0
  326. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +5 -1
  327. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +3 -0
  328. numba_cuda/numba/cuda/tests/support.py +58 -15
  329. numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +3 -0
  330. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -1
  331. numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu +5 -0
  332. numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +5 -0
  333. numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +5 -0
  334. numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
  335. numba_cuda/numba/cuda/types.py +59 -0
  336. numba_cuda/numba/cuda/typing/__init__.py +12 -1
  337. numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
  338. numba_cuda/numba/cuda/typing/context.py +751 -0
  339. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  340. numba_cuda/numba/cuda/typing/npydecl.py +658 -0
  341. numba_cuda/numba/cuda/typing/templates.py +10 -14
  342. numba_cuda/numba/cuda/ufuncs.py +6 -3
  343. numba_cuda/numba/cuda/utils.py +9 -112
  344. numba_cuda/numba/cuda/vector_types.py +3 -0
  345. numba_cuda/numba/cuda/vectorizers.py +3 -0
  346. {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/METADATA +6 -2
  347. numba_cuda-0.20.0.dist-info/RECORD +357 -0
  348. {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE +1 -0
  349. numba_cuda-0.20.0.dist-info/licenses/LICENSE.numba +24 -0
  350. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -57
  351. numba_cuda-0.19.0.dist-info/RECORD +0 -301
  352. {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/WHEEL +0 -0
  353. {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/top_level.txt +0 -0
@@ -1,27 +1,34 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  import sys
2
- import re
3
5
  import os
4
6
  from collections import namedtuple
5
7
  import platform
6
- import site
7
- from pathlib import Path
8
- from numba.core.config import IS_WIN32
9
- from numba.misc.findlib import find_lib
10
- from numba import config
11
- import ctypes
8
+ import importlib.metadata
9
+ from numba.cuda.core.config import IS_WIN32
10
+ from numba.cuda.misc.findlib import find_lib
11
+ from numba.cuda import config
12
12
 
13
13
  _env_path_tuple = namedtuple("_env_path_tuple", ["by", "info"])
14
14
 
15
15
  SEARCH_PRIORITY = [
16
16
  "Conda environment",
17
- "Conda environment (NVIDIA package)",
18
17
  "NVIDIA NVCC Wheel",
19
18
  "CUDA_HOME",
20
19
  "System",
21
- "Debian package",
22
20
  ]
23
21
 
24
22
 
23
+ def _get_distribution(distribution_name):
24
+ """Get the distribution path using importlib.metadata, returning None if not found."""
25
+ try:
26
+ dist = importlib.metadata.distribution(distribution_name)
27
+ return dist
28
+ except importlib.metadata.PackageNotFoundError:
29
+ return None
30
+
31
+
25
32
  def _priority_index(label):
26
33
  if label in SEARCH_PRIORITY:
27
34
  return SEARCH_PRIORITY.index(label)
@@ -61,182 +68,183 @@ def _find_valid_path(options):
61
68
  def _get_libdevice_path_decision():
62
69
  options = _build_options(
63
70
  [
64
- ("Conda environment", get_conda_ctk),
65
- ("Conda environment (NVIDIA package)", get_nvidia_libdevice_ctk),
66
- ("CUDA_HOME", lambda: get_cuda_home("nvvm", "libdevice")),
67
- ("NVIDIA NVCC Wheel", get_libdevice_wheel),
68
- ("System", lambda: get_system_ctk("nvvm", "libdevice")),
69
- ("Debian package", get_debian_pkg_libdevice),
71
+ ("Conda environment", get_libdevice_conda_path),
72
+ ("NVIDIA NVCC Wheel", get_libdevice_wheel_path),
73
+ (
74
+ "CUDA_HOME",
75
+ lambda: get_cuda_home("nvvm", "libdevice", "libdevice.10.bc"),
76
+ ),
77
+ (
78
+ "System",
79
+ lambda: get_system_ctk("nvvm", "libdevice", "libdevice.10.bc"),
80
+ ),
70
81
  ]
71
82
  )
72
83
  return _find_first_valid_lazy(options)
73
84
 
74
85
 
75
- def _nvvm_lib_dir():
76
- if IS_WIN32:
77
- return "nvvm", "bin"
78
- else:
79
- return "nvvm", "lib64"
80
-
81
-
82
86
  def _get_nvvm_path_decision():
83
- options = [
84
- ("Conda environment", get_conda_ctk),
85
- ("Conda environment (NVIDIA package)", get_nvidia_nvvm_ctk),
86
- ("NVIDIA NVCC Wheel", _get_nvvm_wheel),
87
- ("CUDA_HOME", lambda: get_cuda_home(*_nvvm_lib_dir())),
88
- ("System", lambda: get_system_ctk(*_nvvm_lib_dir())),
89
- ]
87
+ options = _build_options(
88
+ [
89
+ ("Conda environment", _get_nvvm_conda_path),
90
+ ("NVIDIA NVCC Wheel", _get_nvvm_wheel_path),
91
+ ("CUDA_HOME", _get_nvvm_cuda_home_path),
92
+ ("System", _get_nvvm_system_path),
93
+ ]
94
+ )
90
95
  return _find_first_valid_lazy(options)
91
96
 
92
97
 
93
- def _get_nvrtc_system_ctk():
94
- sys_path = get_system_ctk("bin" if IS_WIN32 else "lib64")
95
- candidates = find_lib("nvrtc", sys_path)
96
- if candidates:
97
- return max(candidates)
98
-
99
-
100
98
  def _get_nvrtc_path_decision():
101
99
  options = _build_options(
102
100
  [
103
- ("CUDA_HOME", lambda: get_cuda_home(_cudalib_path())),
104
- ("Conda environment", get_conda_ctk),
105
- ("Conda environment (NVIDIA package)", get_nvidia_cudalib_ctk),
106
- ("NVIDIA NVCC Wheel", _get_nvrtc_wheel),
107
- ("System", _get_nvrtc_system_ctk),
101
+ ("Conda environment", get_conda_ctk_libdir),
102
+ ("NVIDIA NVCC Wheel", _get_nvrtc_wheel_libdir),
103
+ ("CUDA_HOME", get_cuda_home_libdir),
104
+ ("System", get_system_ctk_libdir),
108
105
  ]
109
106
  )
110
107
  return _find_first_valid_lazy(options)
111
108
 
112
109
 
113
- def _get_nvvm_wheel():
114
- platform_map = {
115
- "linux": ("lib64", "libnvvm.so"),
116
- "win32": ("bin", "nvvm64_40_0.dll"),
117
- }
118
-
119
- for plat, (dso_dir, dso_path) in platform_map.items():
120
- if sys.platform.startswith(plat):
121
- break
122
- else:
123
- raise NotImplementedError("Unsupported platform")
124
-
125
- site_paths = [site.getusersitepackages()] + site.getsitepackages()
110
+ def _get_nvvm_wheel_path():
111
+ dso_path = None
112
+ # CUDA 12
113
+ nvcc_distribution = _get_distribution("nvidia-cuda-nvcc-cu12")
114
+ if nvcc_distribution is not None:
115
+ site_packages_path = nvcc_distribution.locate_file("")
116
+ nvvm_lib_dir = os.path.join(
117
+ site_packages_path,
118
+ "nvidia",
119
+ "cuda_nvcc",
120
+ "nvvm",
121
+ "bin" if IS_WIN32 else "lib64",
122
+ )
123
+ dso_path = os.path.join(
124
+ nvvm_lib_dir, "nvvm64_40_0.dll" if IS_WIN32 else "libnvvm.so"
125
+ )
126
126
 
127
- for sp in filter(None, site_paths):
128
- nvvm_path = Path(sp, "nvidia", "cuda_nvcc", "nvvm", dso_dir, dso_path)
129
- if nvvm_path.exists():
130
- return str(nvvm_path.parent)
127
+ # CUDA 13
128
+ if dso_path is None:
129
+ nvcc_distribution = _get_distribution("nvidia-nvvm")
130
+ if (
131
+ nvcc_distribution is not None
132
+ and nvcc_distribution.version.startswith("13.")
133
+ ):
134
+ site_packages_path = nvcc_distribution.locate_file("")
135
+ nvvm_lib_dir = os.path.join(
136
+ site_packages_path,
137
+ "nvidia",
138
+ "cu13",
139
+ "bin" if IS_WIN32 else "lib",
140
+ "x86_64" if IS_WIN32 else "",
141
+ )
142
+ dso_path = os.path.join(
143
+ nvvm_lib_dir, "nvvm64_40_0.dll" if IS_WIN32 else "libnvvm.so.4"
144
+ )
131
145
 
146
+ if dso_path and os.path.isfile(dso_path):
147
+ return dso_path
132
148
  return None
133
149
 
134
150
 
135
- def get_nvrtc_dso_path():
136
- site_paths = [site.getusersitepackages()] + site.getsitepackages()
137
-
138
- for sp in site_paths:
139
- lib_dir = os.path.join(
140
- sp,
151
+ def _get_nvrtc_wheel_libdir():
152
+ dso_path = None
153
+ # CUDA 12
154
+ nvrtc_distribution = _get_distribution("nvidia-cuda-nvrtc-cu12")
155
+ if nvrtc_distribution is not None:
156
+ site_packages_path = nvrtc_distribution.locate_file("")
157
+ nvrtc_lib_dir = os.path.join(
158
+ site_packages_path,
141
159
  "nvidia",
142
160
  "cuda_nvrtc",
143
- ("bin" if IS_WIN32 else "lib") if sp else None,
161
+ "bin" if IS_WIN32 else "lib",
162
+ )
163
+ dso_path = os.path.join(
164
+ nvrtc_lib_dir, "nvrtc64_120_0.dll" if IS_WIN32 else "libnvrtc.so.12"
144
165
  )
145
- if lib_dir and os.path.exists(lib_dir):
146
- chosen_path = None
147
-
148
- # Check for each version of the NVRTC DLL, preferring the most
149
- # recent.
150
- versions = (
151
- "120" if IS_WIN32 else "12",
152
- "130" if IS_WIN32 else "13",
153
- )
154
-
155
- for version in versions:
156
- dso_path = os.path.join(
157
- lib_dir,
158
- f"nvrtc64_{version}_0.dll"
159
- if IS_WIN32
160
- else f"libnvrtc.so.{version}",
161
- )
162
166
 
163
- if os.path.exists(dso_path) and os.path.isfile(dso_path):
164
- chosen_path = dso_path
167
+ # CUDA 13
168
+ if dso_path is None:
169
+ nvrtc_distribution = _get_distribution("nvidia-cuda-nvrtc")
170
+ if (
171
+ nvrtc_distribution is not None
172
+ and nvrtc_distribution.version.startswith("13.")
173
+ ):
174
+ site_packages_path = nvrtc_distribution.locate_file("")
175
+ nvrtc_lib_dir = os.path.join(
176
+ site_packages_path,
177
+ "nvidia",
178
+ "cu13",
179
+ "bin" if IS_WIN32 else "lib",
180
+ "x86_64" if IS_WIN32 else "",
181
+ )
182
+ dso_path = os.path.join(
183
+ nvrtc_lib_dir,
184
+ "nvrtc64_130_0.dll" if IS_WIN32 else "libnvrtc.so.13",
185
+ )
165
186
 
166
- return chosen_path
187
+ if dso_path and os.path.isfile(dso_path):
188
+ return os.path.dirname(dso_path)
189
+ return None
167
190
 
168
191
 
169
- def _get_nvrtc_wheel():
170
- dso_path = get_nvrtc_dso_path()
171
- if dso_path:
172
- try:
173
- result = ctypes.CDLL(dso_path, mode=ctypes.RTLD_GLOBAL)
174
- except OSError:
175
- pass
176
- else:
177
- if IS_WIN32:
178
- import win32api
179
-
180
- # This absolute path will
181
- # always be correct regardless of the package source
182
- nvrtc_path = win32api.GetModuleFileNameW(result._handle)
183
- dso_dir = os.path.dirname(nvrtc_path)
184
- builtins_path = os.path.join(
185
- dso_dir,
186
- [
187
- f
188
- for f in os.listdir(dso_dir)
189
- if re.match("^nvrtc-builtins.*.dll$", f)
190
- ][0],
191
- )
192
- if not os.path.exists(builtins_path):
193
- raise RuntimeError(
194
- f'Path does not exist: "{builtins_path}"'
195
- )
196
- return Path(dso_path)
197
-
198
-
199
- def _get_libdevice_paths():
200
- by, libdir = _get_libdevice_path_decision()
201
- if not libdir:
192
+ def _get_libdevice_path():
193
+ by, out = _get_libdevice_path_decision()
194
+ if not out:
202
195
  return _env_path_tuple(by, None)
203
- out = os.path.join(libdir, "libdevice.10.bc")
204
196
  return _env_path_tuple(by, out)
205
197
 
206
198
 
207
- def _cudalib_path():
199
+ def _cuda_static_libdir():
208
200
  if IS_WIN32:
209
- return "bin"
201
+ return ("lib", "x64")
210
202
  else:
211
- return "lib64"
203
+ return ("lib64",)
212
204
 
213
205
 
214
- def _cuda_home_static_cudalib_path():
215
- if IS_WIN32:
216
- return ("lib", "x64")
206
+ def _get_cudalib_wheel_libdir():
207
+ """Get the cudalib path from the cudart wheel."""
208
+ cuda_module_lib_dir = None
209
+ cuda_runtime_distribution = _get_distribution("nvidia-cuda-runtime-cu12")
210
+ if cuda_runtime_distribution is not None:
211
+ site_packages_path = cuda_runtime_distribution.locate_file("")
212
+ cuda_module_lib_dir = os.path.join(
213
+ site_packages_path,
214
+ "nvidia",
215
+ "cuda_runtime",
216
+ "bin" if IS_WIN32 else "lib",
217
+ )
217
218
  else:
218
- return ("lib64",)
219
+ cuda_runtime_distribution = _get_distribution("nvidia-cuda-runtime")
220
+ if (
221
+ cuda_runtime_distribution is not None
222
+ and cuda_runtime_distribution.version.startswith("13.")
223
+ ):
224
+ site_packages_path = cuda_runtime_distribution.locate_file("")
225
+ cuda_module_lib_dir = os.path.join(
226
+ site_packages_path,
227
+ "nvidia",
228
+ "cu13",
229
+ "bin" if IS_WIN32 else "lib",
230
+ "x86_64" if IS_WIN32 else "",
231
+ )
219
232
 
233
+ if cuda_module_lib_dir is None:
234
+ return None
220
235
 
221
- def _get_cudalib_wheel():
222
- """Get the cudalib path from the NVCC wheel."""
223
- site_paths = [site.getusersitepackages()] + site.getsitepackages()
224
- libdir = "bin" if IS_WIN32 else "lib"
225
- for sp in filter(None, site_paths):
226
- cudalib_path = Path(sp, "nvidia", "cuda_runtime", libdir)
227
- if cudalib_path.exists():
228
- return str(cudalib_path)
236
+ if cuda_module_lib_dir and os.path.isdir(cuda_module_lib_dir):
237
+ return cuda_module_lib_dir
229
238
  return None
230
239
 
231
240
 
232
241
  def _get_cudalib_dir_path_decision():
233
242
  options = _build_options(
234
243
  [
235
- ("Conda environment", get_conda_ctk),
236
- ("Conda environment (NVIDIA package)", get_nvidia_cudalib_ctk),
237
- ("NVIDIA NVCC Wheel", _get_cudalib_wheel),
238
- ("CUDA_HOME", lambda: get_cuda_home(_cudalib_path())),
239
- ("System", lambda: get_system_ctk(_cudalib_path())),
244
+ ("Conda environment", get_conda_ctk_libdir),
245
+ ("NVIDIA NVCC Wheel", _get_cudalib_wheel_libdir),
246
+ ("CUDA_HOME", get_cuda_home_libdir),
247
+ ("System", get_system_ctk_libdir),
240
248
  ]
241
249
  )
242
250
  return _find_first_valid_lazy(options)
@@ -245,16 +253,13 @@ def _get_cudalib_dir_path_decision():
245
253
  def _get_static_cudalib_dir_path_decision():
246
254
  options = _build_options(
247
255
  [
248
- ("Conda environment", get_conda_ctk),
249
- (
250
- "Conda environment (NVIDIA package)",
251
- get_nvidia_static_cudalib_ctk,
252
- ),
256
+ ("Conda environment", get_conda_ctk_libdir),
257
+ ("NVIDIA NVCC Wheel", get_wheel_static_libdir),
253
258
  (
254
259
  "CUDA_HOME",
255
- lambda: get_cuda_home(*_cuda_home_static_cudalib_path()),
260
+ lambda: get_cuda_home(*_cuda_static_libdir()),
256
261
  ),
257
- ("System", lambda: get_system_ctk(_cudalib_path())),
262
+ ("System", lambda: get_system_ctk(*_cuda_static_libdir())),
258
263
  ]
259
264
  )
260
265
  return _find_first_valid_lazy(options)
@@ -279,74 +284,196 @@ def get_system_ctk(*subdirs):
279
284
  result = os.path.join("/usr/local/cuda", *subdirs)
280
285
  if os.path.exists(result):
281
286
  return result
287
+ return None
288
+ return None
289
+
290
+
291
+ def get_system_ctk_libdir():
292
+ """Return path to directory containing the shared libraries of cudatoolkit."""
293
+ system_ctk_dir = get_system_ctk()
294
+ if system_ctk_dir is None:
295
+ return None
296
+ libdir = os.path.join(
297
+ system_ctk_dir,
298
+ "Library" if IS_WIN32 else "lib64",
299
+ "bin" if IS_WIN32 else "",
300
+ )
301
+ # Windows CUDA 13 system CTK uses "bin\x64" directory
302
+ if IS_WIN32 and os.path.isdir(os.path.join(libdir, "x64")):
303
+ libdir = os.path.join(libdir, "x64")
304
+
305
+ if libdir and os.path.isdir(libdir):
306
+ return os.path.normpath(libdir)
307
+ return None
308
+
309
+
310
+ def get_system_ctk_include():
311
+ system_ctk_dir = get_system_ctk()
312
+ if system_ctk_dir is None:
313
+ return None
314
+ include_dir = os.path.join(system_ctk_dir, "include")
315
+
316
+ if include_dir and os.path.isdir(include_dir):
317
+ if os.path.isfile(
318
+ os.path.join(include_dir, "cuda_device_runtime_api.h")
319
+ ):
320
+ return include_dir
321
+ return None
282
322
 
283
323
 
284
- def get_conda_ctk():
324
+ def _get_nvvm_system_path():
325
+ nvvm_lib_dir = get_system_ctk("nvvm")
326
+ if nvvm_lib_dir is None:
327
+ return None
328
+ nvvm_lib_dir = os.path.join(nvvm_lib_dir, "bin" if IS_WIN32 else "lib64")
329
+ if IS_WIN32 and os.path.isdir(os.path.join(nvvm_lib_dir, "x64")):
330
+ nvvm_lib_dir = os.path.join(nvvm_lib_dir, "x64")
331
+
332
+ nvvm_path = os.path.join(
333
+ nvvm_lib_dir, "nvvm64_40_0.dll" if IS_WIN32 else "libnvvm.so.4"
334
+ )
335
+ # if os.path.isfile(nvvm_path):
336
+ # return nvvm_path
337
+ return nvvm_path
338
+
339
+
340
+ def get_conda_ctk_libdir():
285
341
  """Return path to directory containing the shared libraries of cudatoolkit."""
286
- is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta"))
342
+ is_conda_env = os.path.isdir(os.path.join(sys.prefix, "conda-meta"))
287
343
  if not is_conda_env:
288
- return
289
- # Assume the existence of NVVM to imply cudatoolkit installed
290
- paths = find_lib("nvvm")
344
+ return None
345
+ libdir = os.path.join(
346
+ sys.prefix,
347
+ "Library" if IS_WIN32 else "lib",
348
+ "bin" if IS_WIN32 else "",
349
+ )
350
+ # Windows CUDA 13.0.0 uses "bin\x64" directory but 13.0.1+ just uses "bin" directory
351
+ if IS_WIN32 and os.path.isdir(os.path.join(libdir, "x64")):
352
+ libdir = os.path.join(libdir, "x64")
353
+ # Assume the existence of nvrtc to imply needed CTK libraries are installed
354
+ paths = find_lib("nvrtc", libdir)
291
355
  if not paths:
292
- return
356
+ return None
293
357
  # Use the directory name of the max path
294
358
  return os.path.dirname(max(paths))
295
359
 
296
360
 
297
- def get_nvidia_nvvm_ctk():
298
- """Return path to directory containing the NVVM shared library."""
299
- is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta"))
361
+ def get_libdevice_conda_path():
362
+ """Return path to directory containing the libdevice bitcode library."""
363
+ is_conda_env = os.path.isdir(os.path.join(sys.prefix, "conda-meta"))
300
364
  if not is_conda_env:
301
- return
365
+ return None
302
366
 
303
- # Assume the existence of NVVM in the conda env implies that a CUDA toolkit
304
- # conda package is installed.
305
- if IS_WIN32:
306
- # The path used on Windows
307
- libdir = os.path.join(sys.prefix, "Library", "nvvm", _cudalib_path())
308
- else:
309
- # The path used on Linux is different to that on Windows
310
- libdir = os.path.join(sys.prefix, "nvvm", _cudalib_path())
367
+ # Linux: nvvm/libdevice/libdevice.10.bc
368
+ # Windows: Library/nvvm/libdevice/libdevice.10.bc
369
+ libdevice_path = os.path.join(
370
+ sys.prefix,
371
+ "Library" if IS_WIN32 else "",
372
+ "nvvm",
373
+ "libdevice",
374
+ "libdevice.10.bc",
375
+ )
376
+ if os.path.isfile(libdevice_path):
377
+ return libdevice_path
378
+ return None
311
379
 
312
- if not os.path.exists(libdir) or not os.path.isdir(libdir):
313
- # If the path doesn't exist, we didn't find the NVIDIA conda package
314
- return
315
380
 
316
- paths = find_lib("nvvm", libdir=libdir)
317
- if not paths:
318
- return
319
- # Use the directory name of the max path
320
- return os.path.dirname(max(paths))
381
+ def _get_nvvm_conda_path():
382
+ """Return path to directory containing the nvvm library."""
383
+ is_conda_env = os.path.isdir(os.path.join(sys.prefix, "conda-meta"))
384
+ if not is_conda_env:
385
+ return None
386
+ nvvm_dir = os.path.join(
387
+ sys.prefix,
388
+ "Library" if IS_WIN32 else "",
389
+ "nvvm",
390
+ "bin" if IS_WIN32 else "lib64",
391
+ )
392
+ # Windows CUDA 13.0.0 puts in "bin\x64" directory but 13.0.1+ just uses "bin" directory
393
+ if IS_WIN32 and os.path.isdir(os.path.join(nvvm_dir, "x64")):
394
+ nvvm_dir = os.path.join(nvvm_dir, "x64")
321
395
 
396
+ nvvm_path = os.path.join(
397
+ nvvm_dir, "nvvm64_40_0.dll" if IS_WIN32 else "libnvvm.so.4"
398
+ )
399
+ if os.path.isfile(nvvm_path):
400
+ return nvvm_path
401
+ return None
322
402
 
323
- def get_nvidia_libdevice_ctk():
324
- """Return path to directory containing the libdevice library."""
325
- nvvm_ctk = get_nvidia_nvvm_ctk()
326
- if not nvvm_ctk:
327
- return
328
- nvvm_dir = os.path.dirname(nvvm_ctk)
329
- return os.path.join(nvvm_dir, "libdevice")
330
403
 
404
+ def get_wheel_static_libdir():
405
+ cuda_module_static_lib_dir = None
406
+ # CUDA 12
407
+ cuda_runtime_distribution = _get_distribution("nvidia-cuda-runtime-cu12")
408
+ if cuda_runtime_distribution is not None:
409
+ site_packages_path = cuda_runtime_distribution.locate_file("")
410
+ cuda_module_static_lib_dir = os.path.join(
411
+ site_packages_path,
412
+ "nvidia",
413
+ "cuda_runtime",
414
+ "lib",
415
+ "x64" if IS_WIN32 else "",
416
+ )
417
+ else:
418
+ cuda_runtime_distribution = _get_distribution("nvidia-cuda-runtime")
419
+ if (
420
+ cuda_runtime_distribution is not None
421
+ and cuda_runtime_distribution.version.startswith("13.")
422
+ ):
423
+ site_packages_path = cuda_runtime_distribution.locate_file("")
424
+ cuda_module_static_lib_dir = os.path.join(
425
+ site_packages_path,
426
+ "nvidia",
427
+ "cu13",
428
+ "lib",
429
+ "x64" if IS_WIN32 else "",
430
+ )
331
431
 
332
- def get_nvidia_cudalib_ctk():
333
- """Return path to directory containing the shared libraries of cudatoolkit."""
334
- nvvm_ctk = get_nvidia_nvvm_ctk()
335
- if not nvvm_ctk:
336
- return
337
- env_dir = os.path.dirname(os.path.dirname(nvvm_ctk))
338
- subdir = "bin" if IS_WIN32 else "lib"
339
- return os.path.join(env_dir, subdir)
432
+ if cuda_module_static_lib_dir is None:
433
+ return None
340
434
 
435
+ cudadevrt_path = os.path.join(
436
+ cuda_module_static_lib_dir,
437
+ "cudadevrt.lib" if IS_WIN32 else "libcudadevrt.a",
438
+ )
341
439
 
342
- def get_nvidia_static_cudalib_ctk():
343
- """Return path to directory containing the static libraries of cudatoolkit."""
344
- nvvm_ctk = get_nvidia_nvvm_ctk()
345
- if not nvvm_ctk:
346
- return
440
+ if cudadevrt_path and os.path.isfile(cudadevrt_path):
441
+ return os.path.dirname(cudadevrt_path)
442
+ return None
347
443
 
348
- env_dir = os.path.dirname(os.path.dirname(nvvm_ctk))
349
- return os.path.join(env_dir, "lib")
444
+
445
+ def get_wheel_include():
446
+ cuda_module_include_dir = None
447
+ # CUDA 12
448
+ cuda_runtime_distribution = _get_distribution("nvidia-cuda-runtime-cu12")
449
+ if cuda_runtime_distribution is not None:
450
+ site_packages_path = cuda_runtime_distribution.locate_file("")
451
+ cuda_module_include_dir = os.path.join(
452
+ site_packages_path,
453
+ "nvidia",
454
+ "cuda_runtime",
455
+ "include",
456
+ )
457
+ else:
458
+ cuda_runtime_distribution = _get_distribution("nvidia-cuda-runtime")
459
+ if (
460
+ cuda_runtime_distribution is not None
461
+ and cuda_runtime_distribution.version.startswith("13.")
462
+ ):
463
+ site_packages_path = cuda_runtime_distribution.locate_file("")
464
+ cuda_module_include_dir = os.path.join(
465
+ site_packages_path,
466
+ "nvidia",
467
+ "cu13",
468
+ "include",
469
+ )
470
+
471
+ if cuda_module_include_dir and os.path.isdir(cuda_module_include_dir):
472
+ if os.path.isfile(
473
+ os.path.join(cuda_module_include_dir, "cuda_device_runtime_api.h")
474
+ ):
475
+ return cuda_module_include_dir
476
+ return None
350
477
 
351
478
 
352
479
  def get_cuda_home(*subdirs):
@@ -360,39 +487,74 @@ def get_cuda_home(*subdirs):
360
487
  cuda_home = os.environ.get("CUDA_PATH")
361
488
  if cuda_home is not None:
362
489
  return os.path.join(cuda_home, *subdirs)
490
+ return None
363
491
 
364
492
 
365
- def _get_nvvm_path():
366
- by, path = _get_nvvm_path_decision()
493
+ def get_cuda_home_libdir():
494
+ """Return path to directory containing the shared libraries of cudatoolkit."""
495
+ cuda_home_dir = get_cuda_home()
496
+ if cuda_home_dir is None:
497
+ return None
498
+ libdir = os.path.join(
499
+ cuda_home_dir,
500
+ "Library" if IS_WIN32 else "lib64",
501
+ "bin" if IS_WIN32 else "",
502
+ )
503
+ # Windows CUDA 13 system CTK uses "bin\x64" directory while conda just uses "bin" directory
504
+ if IS_WIN32 and os.path.isdir(os.path.join(libdir, "x64")):
505
+ libdir = os.path.join(libdir, "x64")
506
+ return os.path.normpath(libdir)
367
507
 
368
- if by == "NVIDIA NVCC Wheel":
369
- platform_map = {
370
- "linux": "libnvvm.so",
371
- "win32": "nvvm64_40_0.dll",
372
- }
373
508
 
374
- for plat, dso_name in platform_map.items():
375
- if sys.platform.startswith(plat):
376
- break
509
+ def get_cuda_home_include():
510
+ cuda_home_dir = get_cuda_home()
511
+ if cuda_home_dir is None:
512
+ return None
513
+ include_dir = cuda_home_dir
514
+ # For Windows, CTK puts it in $CTK/include but conda puts it in $CTK/Library/include
515
+ if IS_WIN32:
516
+ if os.path.isdir(os.path.join(include_dir, "Library")):
517
+ include_dir = os.path.join(include_dir, "Library", "include")
377
518
  else:
378
- raise NotImplementedError("Unsupported platform")
379
-
380
- path = os.path.join(path, dso_name)
519
+ include_dir = os.path.join(include_dir, "include")
381
520
  else:
382
- candidates = find_lib("nvvm", path)
383
- path = max(candidates) if candidates else None
384
- return _env_path_tuple(by, path)
521
+ include_dir = os.path.join(include_dir, "include")
522
+
523
+ if include_dir and os.path.isdir(include_dir):
524
+ if os.path.isfile(
525
+ os.path.join(include_dir, "cuda_device_runtime_api.h")
526
+ ):
527
+ return include_dir
528
+ return None
529
+
530
+
531
+ def _get_nvvm_cuda_home_path():
532
+ nvvm_lib_dir = get_cuda_home("nvvm")
533
+ if nvvm_lib_dir is None:
534
+ return
535
+ nvvm_lib_dir = os.path.join(nvvm_lib_dir, "bin" if IS_WIN32 else "lib64")
536
+ if IS_WIN32 and os.path.isdir(os.path.join(nvvm_lib_dir, "x64")):
537
+ nvvm_lib_dir = os.path.join(nvvm_lib_dir, "x64")
538
+
539
+ nvvm_path = os.path.join(
540
+ nvvm_lib_dir, "nvvm64_40_0.dll" if IS_WIN32 else "libnvvm.so.4"
541
+ )
542
+ # if os.path.isfile(nvvm_path):
543
+ # return nvvm_path
544
+ return nvvm_path
545
+
546
+
547
+ def _get_nvvm_path():
548
+ by, out = _get_nvvm_path_decision()
549
+ if not out:
550
+ return _env_path_tuple(by, None)
551
+ return _env_path_tuple(by, out)
385
552
 
386
553
 
387
554
  def _get_nvrtc_path():
388
555
  by, path = _get_nvrtc_path_decision()
389
- if by == "NVIDIA NVCC Wheel":
390
- path = str(path)
391
- elif by == "System":
392
- return _env_path_tuple(by, path)
393
- else:
394
- candidates = find_lib("nvrtc", path)
395
- path = max(candidates) if candidates else None
556
+ candidates = find_lib("nvrtc", libdir=path)
557
+ path = max(candidates) if candidates else None
396
558
  return _env_path_tuple(by, path)
397
559
 
398
560
 
@@ -402,8 +564,11 @@ def get_cuda_paths():
402
564
 
403
565
  The returned dictionary will have the following keys and infos:
404
566
  - "nvvm": file_path
405
- - "libdevice": List[Tuple[arch, file_path]]
567
+ - "nvrtc": file_path
568
+ - "libdevice": file_path
406
569
  - "cudalib_dir": directory_path
570
+ - "static_cudalib_dir": directory_path
571
+ - "include_dir": directory_path
407
572
 
408
573
  Note: The result of the function is cached.
409
574
  """
@@ -415,7 +580,7 @@ def get_cuda_paths():
415
580
  d = {
416
581
  "nvvm": _get_nvvm_path(),
417
582
  "nvrtc": _get_nvrtc_path(),
418
- "libdevice": _get_libdevice_paths(),
583
+ "libdevice": _get_libdevice_path(),
419
584
  "cudalib_dir": _get_cudalib_dir(),
420
585
  "static_cudalib_dir": _get_static_cudalib_dir(),
421
586
  "include_dir": _get_include_dir(),
@@ -425,25 +590,41 @@ def get_cuda_paths():
425
590
  return d
426
591
 
427
592
 
428
- def get_debian_pkg_libdevice():
429
- """
430
- Return the Debian NVIDIA Maintainers-packaged libdevice location, if it
431
- exists.
432
- """
433
- pkg_libdevice_location = "/usr/lib/nvidia-cuda-toolkit/libdevice"
434
- if not os.path.exists(pkg_libdevice_location):
435
- return None
436
- return pkg_libdevice_location
437
-
593
+ def get_libdevice_wheel_path():
594
+ libdevice_path = None
595
+ # CUDA 12
596
+ nvvm_distribution = _get_distribution("nvidia-cuda-nvcc-cu12")
597
+ if nvvm_distribution is not None:
598
+ site_packages_path = nvvm_distribution.locate_file("")
599
+ libdevice_path = os.path.join(
600
+ site_packages_path,
601
+ "nvidia",
602
+ "cuda_nvcc",
603
+ "nvvm",
604
+ "libdevice",
605
+ "libdevice.10.bc",
606
+ )
438
607
 
439
- def get_libdevice_wheel():
440
- nvvm_path = _get_nvvm_wheel()
441
- if nvvm_path is None:
442
- return None
443
- nvvm_path = Path(nvvm_path)
444
- libdevice_path = nvvm_path.parent / "libdevice"
608
+ # CUDA 13
609
+ if libdevice_path is None:
610
+ nvvm_distribution = _get_distribution("nvidia-nvvm")
611
+ if (
612
+ nvvm_distribution is not None
613
+ and nvvm_distribution.version.startswith("13.")
614
+ ):
615
+ site_packages_path = nvvm_distribution.locate_file("")
616
+ libdevice_path = os.path.join(
617
+ site_packages_path,
618
+ "nvidia",
619
+ "cu13",
620
+ "nvvm",
621
+ "libdevice",
622
+ "libdevice.10.bc",
623
+ )
445
624
 
446
- return str(libdevice_path)
625
+ if libdevice_path and os.path.isfile(libdevice_path):
626
+ return libdevice_path
627
+ return None
447
628
 
448
629
 
449
630
  def get_current_cuda_target_name():
@@ -475,11 +656,11 @@ def get_conda_include_dir():
475
656
  Return the include directory in the current conda environment, if one
476
657
  is active and it exists.
477
658
  """
478
- is_conda_env = os.path.exists(os.path.join(sys.prefix, "conda-meta"))
659
+ is_conda_env = os.path.isdir(os.path.join(sys.prefix, "conda-meta"))
479
660
  if not is_conda_env:
480
661
  return
481
662
 
482
- if platform.system() == "Windows":
663
+ if IS_WIN32:
483
664
  include_dir = os.path.join(sys.prefix, "Library", "include")
484
665
  elif target_name := get_current_cuda_target_name():
485
666
  include_dir = os.path.join(
@@ -490,23 +671,21 @@ def get_conda_include_dir():
490
671
  # though usually it shouldn't.
491
672
  include_dir = os.path.join(sys.prefix, "include")
492
673
 
493
- if (
494
- os.path.exists(include_dir)
495
- and os.path.isdir(include_dir)
496
- and os.path.exists(
497
- os.path.join(include_dir, "cuda_device_runtime_api.h")
498
- )
674
+ if os.path.isdir(include_dir) and os.path.isfile(
675
+ os.path.join(include_dir, "cuda_device_runtime_api.h")
499
676
  ):
500
677
  return include_dir
501
- return
678
+ return None
502
679
 
503
680
 
504
681
  def _get_include_dir():
505
682
  """Find the root include directory."""
506
683
  options = [
507
684
  ("Conda environment (NVIDIA package)", get_conda_include_dir()),
685
+ ("NVIDIA NVCC Wheel", get_wheel_include()),
686
+ ("CUDA_HOME", get_cuda_home_include()),
687
+ ("System", get_system_ctk_include()),
508
688
  ("CUDA_INCLUDE_PATH Config Entry", config.CUDA_INCLUDE_PATH),
509
- # TODO: add others
510
689
  ]
511
690
  by, include_dir = _find_valid_path(options)
512
691
  return _env_path_tuple(by, include_dir)