numba-cuda 0.21.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (488) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +577 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +556 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +995 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +903 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +158 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
  161. numba_cuda/numba/cuda/intrinsics.py +382 -0
  162. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  163. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  164. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  165. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  166. numba_cuda/numba/cuda/libdevice.py +3386 -0
  167. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  168. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  169. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  170. numba_cuda/numba/cuda/locks.py +19 -0
  171. numba_cuda/numba/cuda/lowering.py +1951 -0
  172. numba_cuda/numba/cuda/mathimpl.py +374 -0
  173. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  175. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  178. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  179. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  180. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  181. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  182. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  183. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  184. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  185. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  186. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  187. numba_cuda/numba/cuda/misc/literal.py +28 -0
  188. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  189. numba_cuda/numba/cuda/misc/special.py +94 -0
  190. numba_cuda/numba/cuda/models.py +56 -0
  191. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  192. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  193. numba_cuda/numba/cuda/np/extensions.py +11 -0
  194. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  195. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  196. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  197. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  198. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  199. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  200. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  201. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  202. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  203. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  204. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  206. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  207. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  208. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  209. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  210. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  211. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  212. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  213. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  214. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  215. numba_cuda/numba/cuda/printimpl.py +126 -0
  216. numba_cuda/numba/cuda/random.py +308 -0
  217. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  218. numba_cuda/numba/cuda/serialize.py +267 -0
  219. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  220. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  221. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  222. numba_cuda/numba/cuda/simulator/api.py +179 -0
  223. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  224. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  236. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  237. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  238. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  239. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  241. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  242. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  243. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  244. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  245. numba_cuda/numba/cuda/simulator_init.py +18 -0
  246. numba_cuda/numba/cuda/stubs.py +635 -0
  247. numba_cuda/numba/cuda/target.py +505 -0
  248. numba_cuda/numba/cuda/testing.py +347 -0
  249. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  251. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  252. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  253. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  254. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  255. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  285. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  286. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  289. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  290. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  291. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  292. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  293. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  294. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  295. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  396. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
  397. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  399. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  400. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  401. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  402. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  403. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  404. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  406. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  407. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  424. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  425. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  430. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  431. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  433. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  434. numba_cuda/numba/cuda/tests/support.py +900 -0
  435. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  436. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  437. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  438. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  439. numba_cuda/numba/cuda/types/__init__.py +233 -0
  440. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  441. numba_cuda/numba/cuda/types/abstract.py +9 -0
  442. numba_cuda/numba/cuda/types/common.py +9 -0
  443. numba_cuda/numba/cuda/types/containers.py +9 -0
  444. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  445. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  446. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  447. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  448. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  449. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  450. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  451. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  452. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  453. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  454. numba_cuda/numba/cuda/types/function_type.py +11 -0
  455. numba_cuda/numba/cuda/types/functions.py +9 -0
  456. numba_cuda/numba/cuda/types/iterators.py +9 -0
  457. numba_cuda/numba/cuda/types/misc.py +9 -0
  458. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  459. numba_cuda/numba/cuda/types/scalars.py +9 -0
  460. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  461. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  462. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  463. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  464. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  465. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  466. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  467. numba_cuda/numba/cuda/typing/collections.py +138 -0
  468. numba_cuda/numba/cuda/typing/context.py +782 -0
  469. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  470. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  471. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  472. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  473. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  474. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  475. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  476. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  477. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  478. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  479. numba_cuda/numba/cuda/ufuncs.py +746 -0
  480. numba_cuda/numba/cuda/utils.py +724 -0
  481. numba_cuda/numba/cuda/vector_types.py +214 -0
  482. numba_cuda/numba/cuda/vectorizers.py +260 -0
  483. numba_cuda-0.21.1.dist-info/METADATA +109 -0
  484. numba_cuda-0.21.1.dist-info/RECORD +488 -0
  485. numba_cuda-0.21.1.dist-info/WHEEL +5 -0
  486. numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
  487. numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
  488. numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,691 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import sys
5
+ import os
6
+ from collections import namedtuple
7
+ import platform
8
+ import importlib.metadata
9
+ from numba.cuda.core.config import IS_WIN32
10
+ from numba.cuda.misc.findlib import find_lib
11
+ from numba.cuda import config
12
+
13
+ _env_path_tuple = namedtuple("_env_path_tuple", ["by", "info"])
14
+
15
+ SEARCH_PRIORITY = [
16
+ "Conda environment",
17
+ "NVIDIA NVCC Wheel",
18
+ "CUDA_HOME",
19
+ "System",
20
+ ]
21
+
22
+
23
+ def _get_distribution(distribution_name):
24
+ """Get the distribution path using importlib.metadata, returning None if not found."""
25
+ try:
26
+ dist = importlib.metadata.distribution(distribution_name)
27
+ return dist
28
+ except importlib.metadata.PackageNotFoundError:
29
+ return None
30
+
31
+
32
+ def _priority_index(label):
33
+ if label in SEARCH_PRIORITY:
34
+ return SEARCH_PRIORITY.index(label)
35
+ else:
36
+ raise ValueError(f"Can't determine search priority for {label}")
37
+
38
+
39
+ def _find_first_valid_lazy(options):
40
+ sorted_options = sorted(options, key=lambda x: _priority_index(x[0]))
41
+ for label, fn in sorted_options:
42
+ value = fn()
43
+ if value:
44
+ return label, value
45
+ return "<unknown>", None
46
+
47
+
48
+ def _build_options(pairs):
49
+ """Sorts and returns a list of (label, value) tuples according to SEARCH_PRIORITY."""
50
+ priority_index = {label: i for i, label in enumerate(SEARCH_PRIORITY)}
51
+ return sorted(
52
+ pairs, key=lambda pair: priority_index.get(pair[0], float("inf"))
53
+ )
54
+
55
+
56
+ def _find_valid_path(options):
57
+ """Find valid path from *options*, which is a list of 2-tuple of
58
+ (name, path). Return first pair where *path* is not None.
59
+ If no valid path is found, return ('<unknown>', None)
60
+ """
61
+ for by, data in options:
62
+ if data is not None:
63
+ return by, data
64
+ else:
65
+ return "<unknown>", None
66
+
67
+
68
+ def _get_libdevice_path_decision():
69
+ options = _build_options(
70
+ [
71
+ ("Conda environment", get_libdevice_conda_path),
72
+ ("NVIDIA NVCC Wheel", get_libdevice_wheel_path),
73
+ (
74
+ "CUDA_HOME",
75
+ lambda: get_cuda_home("nvvm", "libdevice", "libdevice.10.bc"),
76
+ ),
77
+ (
78
+ "System",
79
+ lambda: get_system_ctk("nvvm", "libdevice", "libdevice.10.bc"),
80
+ ),
81
+ ]
82
+ )
83
+ return _find_first_valid_lazy(options)
84
+
85
+
86
+ def _get_nvvm_path_decision():
87
+ options = _build_options(
88
+ [
89
+ ("Conda environment", _get_nvvm_conda_path),
90
+ ("NVIDIA NVCC Wheel", _get_nvvm_wheel_path),
91
+ ("CUDA_HOME", _get_nvvm_cuda_home_path),
92
+ ("System", _get_nvvm_system_path),
93
+ ]
94
+ )
95
+ return _find_first_valid_lazy(options)
96
+
97
+
98
+ def _get_nvrtc_path_decision():
99
+ options = _build_options(
100
+ [
101
+ ("Conda environment", get_conda_ctk_libdir),
102
+ ("NVIDIA NVCC Wheel", _get_nvrtc_wheel_libdir),
103
+ ("CUDA_HOME", get_cuda_home_libdir),
104
+ ("System", get_system_ctk_libdir),
105
+ ]
106
+ )
107
+ return _find_first_valid_lazy(options)
108
+
109
+
110
+ def _get_nvvm_wheel_path():
111
+ dso_path = None
112
+ # CUDA 12
113
+ nvcc_distribution = _get_distribution("nvidia-cuda-nvcc-cu12")
114
+ if nvcc_distribution is not None:
115
+ site_packages_path = nvcc_distribution.locate_file("")
116
+ nvvm_lib_dir = os.path.join(
117
+ site_packages_path,
118
+ "nvidia",
119
+ "cuda_nvcc",
120
+ "nvvm",
121
+ "bin" if IS_WIN32 else "lib64",
122
+ )
123
+ dso_path = os.path.join(
124
+ nvvm_lib_dir, "nvvm64_40_0.dll" if IS_WIN32 else "libnvvm.so"
125
+ )
126
+
127
+ # CUDA 13
128
+ if dso_path is None:
129
+ nvcc_distribution = _get_distribution("nvidia-nvvm")
130
+ if (
131
+ nvcc_distribution is not None
132
+ and nvcc_distribution.version.startswith("13.")
133
+ ):
134
+ site_packages_path = nvcc_distribution.locate_file("")
135
+ nvvm_lib_dir = os.path.join(
136
+ site_packages_path,
137
+ "nvidia",
138
+ "cu13",
139
+ "bin" if IS_WIN32 else "lib",
140
+ "x86_64" if IS_WIN32 else "",
141
+ )
142
+ dso_path = os.path.join(
143
+ nvvm_lib_dir, "nvvm64_40_0.dll" if IS_WIN32 else "libnvvm.so.4"
144
+ )
145
+
146
+ if dso_path and os.path.isfile(dso_path):
147
+ return dso_path
148
+ return None
149
+
150
+
151
+ def _get_nvrtc_wheel_libdir():
152
+ dso_path = None
153
+ # CUDA 12
154
+ nvrtc_distribution = _get_distribution("nvidia-cuda-nvrtc-cu12")
155
+ if nvrtc_distribution is not None:
156
+ site_packages_path = nvrtc_distribution.locate_file("")
157
+ nvrtc_lib_dir = os.path.join(
158
+ site_packages_path,
159
+ "nvidia",
160
+ "cuda_nvrtc",
161
+ "bin" if IS_WIN32 else "lib",
162
+ )
163
+ dso_path = os.path.join(
164
+ nvrtc_lib_dir, "nvrtc64_120_0.dll" if IS_WIN32 else "libnvrtc.so.12"
165
+ )
166
+
167
+ # CUDA 13
168
+ if dso_path is None:
169
+ nvrtc_distribution = _get_distribution("nvidia-cuda-nvrtc")
170
+ if (
171
+ nvrtc_distribution is not None
172
+ and nvrtc_distribution.version.startswith("13.")
173
+ ):
174
+ site_packages_path = nvrtc_distribution.locate_file("")
175
+ nvrtc_lib_dir = os.path.join(
176
+ site_packages_path,
177
+ "nvidia",
178
+ "cu13",
179
+ "bin" if IS_WIN32 else "lib",
180
+ "x86_64" if IS_WIN32 else "",
181
+ )
182
+ dso_path = os.path.join(
183
+ nvrtc_lib_dir,
184
+ "nvrtc64_130_0.dll" if IS_WIN32 else "libnvrtc.so.13",
185
+ )
186
+
187
+ if dso_path and os.path.isfile(dso_path):
188
+ return os.path.dirname(dso_path)
189
+ return None
190
+
191
+
192
+ def _get_libdevice_path():
193
+ by, out = _get_libdevice_path_decision()
194
+ if not out:
195
+ return _env_path_tuple(by, None)
196
+ return _env_path_tuple(by, out)
197
+
198
+
199
+ def _cuda_static_libdir():
200
+ if IS_WIN32:
201
+ return ("lib", "x64")
202
+ else:
203
+ return ("lib64",)
204
+
205
+
206
+ def _get_cudalib_wheel_libdir():
207
+ """Get the cudalib path from the cudart wheel."""
208
+ cuda_module_lib_dir = None
209
+ cuda_runtime_distribution = _get_distribution("nvidia-cuda-runtime-cu12")
210
+ if cuda_runtime_distribution is not None:
211
+ site_packages_path = cuda_runtime_distribution.locate_file("")
212
+ cuda_module_lib_dir = os.path.join(
213
+ site_packages_path,
214
+ "nvidia",
215
+ "cuda_runtime",
216
+ "bin" if IS_WIN32 else "lib",
217
+ )
218
+ else:
219
+ cuda_runtime_distribution = _get_distribution("nvidia-cuda-runtime")
220
+ if (
221
+ cuda_runtime_distribution is not None
222
+ and cuda_runtime_distribution.version.startswith("13.")
223
+ ):
224
+ site_packages_path = cuda_runtime_distribution.locate_file("")
225
+ cuda_module_lib_dir = os.path.join(
226
+ site_packages_path,
227
+ "nvidia",
228
+ "cu13",
229
+ "bin" if IS_WIN32 else "lib",
230
+ "x86_64" if IS_WIN32 else "",
231
+ )
232
+
233
+ if cuda_module_lib_dir is None:
234
+ return None
235
+
236
+ if cuda_module_lib_dir and os.path.isdir(cuda_module_lib_dir):
237
+ return cuda_module_lib_dir
238
+ return None
239
+
240
+
241
+ def _get_cudalib_dir_path_decision():
242
+ options = _build_options(
243
+ [
244
+ ("Conda environment", get_conda_ctk_libdir),
245
+ ("NVIDIA NVCC Wheel", _get_cudalib_wheel_libdir),
246
+ ("CUDA_HOME", get_cuda_home_libdir),
247
+ ("System", get_system_ctk_libdir),
248
+ ]
249
+ )
250
+ return _find_first_valid_lazy(options)
251
+
252
+
253
+ def _get_static_cudalib_dir_path_decision():
254
+ options = _build_options(
255
+ [
256
+ ("Conda environment", get_conda_ctk_libdir),
257
+ ("NVIDIA NVCC Wheel", get_wheel_static_libdir),
258
+ (
259
+ "CUDA_HOME",
260
+ lambda: get_cuda_home(*_cuda_static_libdir()),
261
+ ),
262
+ ("System", lambda: get_system_ctk(*_cuda_static_libdir())),
263
+ ]
264
+ )
265
+ return _find_first_valid_lazy(options)
266
+
267
+
268
+ def _get_cudalib_dir():
269
+ by, libdir = _get_cudalib_dir_path_decision()
270
+ return _env_path_tuple(by, libdir)
271
+
272
+
273
+ def _get_static_cudalib_dir():
274
+ by, libdir = _get_static_cudalib_dir_path_decision()
275
+ return _env_path_tuple(by, libdir)
276
+
277
+
278
+ def get_system_ctk(*subdirs):
279
+ """Return path to system-wide cudatoolkit; or, None if it doesn't exist."""
280
+ # Linux?
281
+ if not IS_WIN32:
282
+ # Is cuda alias to /usr/local/cuda?
283
+ # We are intentionally not getting versioned cuda installation.
284
+ result = os.path.join("/usr/local/cuda", *subdirs)
285
+ if os.path.exists(result):
286
+ return result
287
+ return None
288
+ return None
289
+
290
+
291
+ def get_system_ctk_libdir():
292
+ """Return path to directory containing the shared libraries of cudatoolkit."""
293
+ system_ctk_dir = get_system_ctk()
294
+ if system_ctk_dir is None:
295
+ return None
296
+ libdir = os.path.join(
297
+ system_ctk_dir,
298
+ "Library" if IS_WIN32 else "lib64",
299
+ "bin" if IS_WIN32 else "",
300
+ )
301
+ # Windows CUDA 13 system CTK uses "bin\x64" directory
302
+ if IS_WIN32 and os.path.isdir(os.path.join(libdir, "x64")):
303
+ libdir = os.path.join(libdir, "x64")
304
+
305
+ if libdir and os.path.isdir(libdir):
306
+ return os.path.normpath(libdir)
307
+ return None
308
+
309
+
310
+ def get_system_ctk_include():
311
+ system_ctk_dir = get_system_ctk()
312
+ if system_ctk_dir is None:
313
+ return None
314
+ include_dir = os.path.join(system_ctk_dir, "include")
315
+
316
+ if include_dir and os.path.isdir(include_dir):
317
+ if os.path.isfile(
318
+ os.path.join(include_dir, "cuda_device_runtime_api.h")
319
+ ):
320
+ return include_dir
321
+ return None
322
+
323
+
324
+ def _get_nvvm_system_path():
325
+ nvvm_lib_dir = get_system_ctk("nvvm")
326
+ if nvvm_lib_dir is None:
327
+ return None
328
+ nvvm_lib_dir = os.path.join(nvvm_lib_dir, "bin" if IS_WIN32 else "lib64")
329
+ if IS_WIN32 and os.path.isdir(os.path.join(nvvm_lib_dir, "x64")):
330
+ nvvm_lib_dir = os.path.join(nvvm_lib_dir, "x64")
331
+
332
+ nvvm_path = os.path.join(
333
+ nvvm_lib_dir, "nvvm64_40_0.dll" if IS_WIN32 else "libnvvm.so.4"
334
+ )
335
+ # if os.path.isfile(nvvm_path):
336
+ # return nvvm_path
337
+ return nvvm_path
338
+
339
+
340
+ def get_conda_ctk_libdir():
341
+ """Return path to directory containing the shared libraries of cudatoolkit."""
342
+ is_conda_env = os.path.isdir(os.path.join(sys.prefix, "conda-meta"))
343
+ if not is_conda_env:
344
+ return None
345
+ libdir = os.path.join(
346
+ sys.prefix,
347
+ "Library" if IS_WIN32 else "lib",
348
+ "bin" if IS_WIN32 else "",
349
+ )
350
+ # Windows CUDA 13.0.0 uses "bin\x64" directory but 13.0.1+ just uses "bin" directory
351
+ if IS_WIN32 and os.path.isdir(os.path.join(libdir, "x64")):
352
+ libdir = os.path.join(libdir, "x64")
353
+ # Assume the existence of nvrtc to imply needed CTK libraries are installed
354
+ paths = find_lib("nvrtc", libdir)
355
+ if not paths:
356
+ return None
357
+ # Use the directory name of the max path
358
+ return os.path.dirname(max(paths))
359
+
360
+
361
+ def get_libdevice_conda_path():
362
+ """Return path to directory containing the libdevice bitcode library."""
363
+ is_conda_env = os.path.isdir(os.path.join(sys.prefix, "conda-meta"))
364
+ if not is_conda_env:
365
+ return None
366
+
367
+ # Linux: nvvm/libdevice/libdevice.10.bc
368
+ # Windows: Library/nvvm/libdevice/libdevice.10.bc
369
+ libdevice_path = os.path.join(
370
+ sys.prefix,
371
+ "Library" if IS_WIN32 else "",
372
+ "nvvm",
373
+ "libdevice",
374
+ "libdevice.10.bc",
375
+ )
376
+ if os.path.isfile(libdevice_path):
377
+ return libdevice_path
378
+ return None
379
+
380
+
381
+ def _get_nvvm_conda_path():
382
+ """Return path to directory containing the nvvm library."""
383
+ is_conda_env = os.path.isdir(os.path.join(sys.prefix, "conda-meta"))
384
+ if not is_conda_env:
385
+ return None
386
+ nvvm_dir = os.path.join(
387
+ sys.prefix,
388
+ "Library" if IS_WIN32 else "",
389
+ "nvvm",
390
+ "bin" if IS_WIN32 else "lib64",
391
+ )
392
+ # Windows CUDA 13.0.0 puts in "bin\x64" directory but 13.0.1+ just uses "bin" directory
393
+ if IS_WIN32 and os.path.isdir(os.path.join(nvvm_dir, "x64")):
394
+ nvvm_dir = os.path.join(nvvm_dir, "x64")
395
+
396
+ nvvm_path = os.path.join(
397
+ nvvm_dir, "nvvm64_40_0.dll" if IS_WIN32 else "libnvvm.so.4"
398
+ )
399
+ if os.path.isfile(nvvm_path):
400
+ return nvvm_path
401
+ return None
402
+
403
+
404
+ def get_wheel_static_libdir():
405
+ cuda_module_static_lib_dir = None
406
+ # CUDA 12
407
+ cuda_runtime_distribution = _get_distribution("nvidia-cuda-runtime-cu12")
408
+ if cuda_runtime_distribution is not None:
409
+ site_packages_path = cuda_runtime_distribution.locate_file("")
410
+ cuda_module_static_lib_dir = os.path.join(
411
+ site_packages_path,
412
+ "nvidia",
413
+ "cuda_runtime",
414
+ "lib",
415
+ "x64" if IS_WIN32 else "",
416
+ )
417
+ else:
418
+ cuda_runtime_distribution = _get_distribution("nvidia-cuda-runtime")
419
+ if (
420
+ cuda_runtime_distribution is not None
421
+ and cuda_runtime_distribution.version.startswith("13.")
422
+ ):
423
+ site_packages_path = cuda_runtime_distribution.locate_file("")
424
+ cuda_module_static_lib_dir = os.path.join(
425
+ site_packages_path,
426
+ "nvidia",
427
+ "cu13",
428
+ "lib",
429
+ "x64" if IS_WIN32 else "",
430
+ )
431
+
432
+ if cuda_module_static_lib_dir is None:
433
+ return None
434
+
435
+ cudadevrt_path = os.path.join(
436
+ cuda_module_static_lib_dir,
437
+ "cudadevrt.lib" if IS_WIN32 else "libcudadevrt.a",
438
+ )
439
+
440
+ if cudadevrt_path and os.path.isfile(cudadevrt_path):
441
+ return os.path.dirname(cudadevrt_path)
442
+ return None
443
+
444
+
445
+ def get_wheel_include():
446
+ cuda_module_include_dir = None
447
+ # CUDA 12
448
+ cuda_runtime_distribution = _get_distribution("nvidia-cuda-runtime-cu12")
449
+ if cuda_runtime_distribution is not None:
450
+ site_packages_path = cuda_runtime_distribution.locate_file("")
451
+ cuda_module_include_dir = os.path.join(
452
+ site_packages_path,
453
+ "nvidia",
454
+ "cuda_runtime",
455
+ "include",
456
+ )
457
+ else:
458
+ cuda_runtime_distribution = _get_distribution("nvidia-cuda-runtime")
459
+ if (
460
+ cuda_runtime_distribution is not None
461
+ and cuda_runtime_distribution.version.startswith("13.")
462
+ ):
463
+ site_packages_path = cuda_runtime_distribution.locate_file("")
464
+ cuda_module_include_dir = os.path.join(
465
+ site_packages_path,
466
+ "nvidia",
467
+ "cu13",
468
+ "include",
469
+ )
470
+
471
+ if cuda_module_include_dir and os.path.isdir(cuda_module_include_dir):
472
+ if os.path.isfile(
473
+ os.path.join(cuda_module_include_dir, "cuda_device_runtime_api.h")
474
+ ):
475
+ return cuda_module_include_dir
476
+ return None
477
+
478
+
479
+ def get_cuda_home(*subdirs):
480
+ """Get paths of CUDA_HOME.
481
+ If *subdirs* are the subdirectory name to be appended in the resulting
482
+ path.
483
+ """
484
+ cuda_home = os.environ.get("CUDA_HOME")
485
+ if cuda_home is None:
486
+ # Try Windows CUDA installation without Anaconda
487
+ cuda_home = os.environ.get("CUDA_PATH")
488
+ if cuda_home is not None:
489
+ return os.path.join(cuda_home, *subdirs)
490
+ return None
491
+
492
+
493
+ def get_cuda_home_libdir():
494
+ """Return path to directory containing the shared libraries of cudatoolkit."""
495
+ cuda_home_dir = get_cuda_home()
496
+ if cuda_home_dir is None:
497
+ return None
498
+ libdir = os.path.join(
499
+ cuda_home_dir,
500
+ "Library" if IS_WIN32 else "lib64",
501
+ "bin" if IS_WIN32 else "",
502
+ )
503
+ # Windows CUDA 13 system CTK uses "bin\x64" directory while conda just uses "bin" directory
504
+ if IS_WIN32 and os.path.isdir(os.path.join(libdir, "x64")):
505
+ libdir = os.path.join(libdir, "x64")
506
+ return os.path.normpath(libdir)
507
+
508
+
509
+ def get_cuda_home_include():
510
+ cuda_home_dir = get_cuda_home()
511
+ if cuda_home_dir is None:
512
+ return None
513
+ include_dir = cuda_home_dir
514
+ # For Windows, CTK puts it in $CTK/include but conda puts it in $CTK/Library/include
515
+ if IS_WIN32:
516
+ if os.path.isdir(os.path.join(include_dir, "Library")):
517
+ include_dir = os.path.join(include_dir, "Library", "include")
518
+ else:
519
+ include_dir = os.path.join(include_dir, "include")
520
+ else:
521
+ include_dir = os.path.join(include_dir, "include")
522
+
523
+ if include_dir and os.path.isdir(include_dir):
524
+ if os.path.isfile(
525
+ os.path.join(include_dir, "cuda_device_runtime_api.h")
526
+ ):
527
+ return include_dir
528
+ return None
529
+
530
+
531
+ def _get_nvvm_cuda_home_path():
532
+ nvvm_lib_dir = get_cuda_home("nvvm")
533
+ if nvvm_lib_dir is None:
534
+ return
535
+ nvvm_lib_dir = os.path.join(nvvm_lib_dir, "bin" if IS_WIN32 else "lib64")
536
+ if IS_WIN32 and os.path.isdir(os.path.join(nvvm_lib_dir, "x64")):
537
+ nvvm_lib_dir = os.path.join(nvvm_lib_dir, "x64")
538
+
539
+ nvvm_path = os.path.join(
540
+ nvvm_lib_dir, "nvvm64_40_0.dll" if IS_WIN32 else "libnvvm.so.4"
541
+ )
542
+ # if os.path.isfile(nvvm_path):
543
+ # return nvvm_path
544
+ return nvvm_path
545
+
546
+
547
+ def _get_nvvm_path():
548
+ by, out = _get_nvvm_path_decision()
549
+ if not out:
550
+ return _env_path_tuple(by, None)
551
+ return _env_path_tuple(by, out)
552
+
553
+
554
+ def _get_nvrtc_path():
555
+ by, path = _get_nvrtc_path_decision()
556
+ candidates = find_lib("nvrtc", libdir=path)
557
+ path = max(candidates) if candidates else None
558
+ return _env_path_tuple(by, path)
559
+
560
+
561
+ def get_cuda_paths():
562
+ """Returns a dictionary mapping component names to a 2-tuple
563
+ of (source_variable, info).
564
+
565
+ The returned dictionary will have the following keys and infos:
566
+ - "nvvm": file_path
567
+ - "nvrtc": file_path
568
+ - "libdevice": file_path
569
+ - "cudalib_dir": directory_path
570
+ - "static_cudalib_dir": directory_path
571
+ - "include_dir": directory_path
572
+
573
+ Note: The result of the function is cached.
574
+ """
575
+ # Check cache
576
+ if hasattr(get_cuda_paths, "_cached_result"):
577
+ return get_cuda_paths._cached_result
578
+ else:
579
+ # Not in cache
580
+ d = {
581
+ "nvvm": _get_nvvm_path(),
582
+ "nvrtc": _get_nvrtc_path(),
583
+ "libdevice": _get_libdevice_path(),
584
+ "cudalib_dir": _get_cudalib_dir(),
585
+ "static_cudalib_dir": _get_static_cudalib_dir(),
586
+ "include_dir": _get_include_dir(),
587
+ }
588
+ # Cache result
589
+ get_cuda_paths._cached_result = d
590
+ return d
591
+
592
+
593
+ def get_libdevice_wheel_path():
594
+ libdevice_path = None
595
+ # CUDA 12
596
+ nvvm_distribution = _get_distribution("nvidia-cuda-nvcc-cu12")
597
+ if nvvm_distribution is not None:
598
+ site_packages_path = nvvm_distribution.locate_file("")
599
+ libdevice_path = os.path.join(
600
+ site_packages_path,
601
+ "nvidia",
602
+ "cuda_nvcc",
603
+ "nvvm",
604
+ "libdevice",
605
+ "libdevice.10.bc",
606
+ )
607
+
608
+ # CUDA 13
609
+ if libdevice_path is None:
610
+ nvvm_distribution = _get_distribution("nvidia-nvvm")
611
+ if (
612
+ nvvm_distribution is not None
613
+ and nvvm_distribution.version.startswith("13.")
614
+ ):
615
+ site_packages_path = nvvm_distribution.locate_file("")
616
+ libdevice_path = os.path.join(
617
+ site_packages_path,
618
+ "nvidia",
619
+ "cu13",
620
+ "nvvm",
621
+ "libdevice",
622
+ "libdevice.10.bc",
623
+ )
624
+
625
+ if libdevice_path and os.path.isfile(libdevice_path):
626
+ return libdevice_path
627
+ return None
628
+
629
+
630
+ def get_current_cuda_target_name():
631
+ """Determine conda's CTK target folder based on system and machine arch.
632
+
633
+ CTK's conda package delivers headers based on its architecture type. For example,
634
+ `x86_64` machine places header under `$CONDA_PREFIX/targets/x86_64-linux`, and
635
+ `aarch64` places under `$CONDA_PREFIX/targets/sbsa-linux`. Read more about the
636
+ nuances at cudart's conda feedstock:
637
+ https://github.com/conda-forge/cuda-cudart-feedstock/blob/main/recipe/meta.yaml#L8-L11 # noqa: E501
638
+ """
639
+ system = platform.system()
640
+ machine = platform.machine()
641
+
642
+ if system == "Linux":
643
+ arch_to_targets = {"x86_64": "x86_64-linux", "aarch64": "sbsa-linux"}
644
+ elif system == "Windows":
645
+ arch_to_targets = {
646
+ "AMD64": "x64",
647
+ }
648
+ else:
649
+ arch_to_targets = {}
650
+
651
+ return arch_to_targets.get(machine, None)
652
+
653
+
654
+ def get_conda_include_dir():
655
+ """
656
+ Return the include directory in the current conda environment, if one
657
+ is active and it exists.
658
+ """
659
+ is_conda_env = os.path.isdir(os.path.join(sys.prefix, "conda-meta"))
660
+ if not is_conda_env:
661
+ return
662
+
663
+ if IS_WIN32:
664
+ include_dir = os.path.join(sys.prefix, "Library", "include")
665
+ elif target_name := get_current_cuda_target_name():
666
+ include_dir = os.path.join(
667
+ sys.prefix, "targets", target_name, "include"
668
+ )
669
+ else:
670
+ # A fallback when target cannot determined
671
+ # though usually it shouldn't.
672
+ include_dir = os.path.join(sys.prefix, "include")
673
+
674
+ if os.path.isdir(include_dir) and os.path.isfile(
675
+ os.path.join(include_dir, "cuda_device_runtime_api.h")
676
+ ):
677
+ return include_dir
678
+ return None
679
+
680
+
681
+ def _get_include_dir():
682
+ """Find the root include directory."""
683
+ options = [
684
+ ("Conda environment (NVIDIA package)", get_conda_include_dir()),
685
+ ("NVIDIA NVCC Wheel", get_wheel_include()),
686
+ ("CUDA_HOME", get_cuda_home_include()),
687
+ ("System", get_system_ctk_include()),
688
+ ("CUDA_INCLUDE_PATH Config Entry", config.CUDA_INCLUDE_PATH),
689
+ ]
690
+ by, include_dir = _find_valid_path(options)
691
+ return _env_path_tuple(by, include_dir)