numba-cuda 0.21.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (488) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +577 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +556 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +995 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +903 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +158 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
  161. numba_cuda/numba/cuda/intrinsics.py +382 -0
  162. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  163. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  164. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  165. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  166. numba_cuda/numba/cuda/libdevice.py +3386 -0
  167. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  168. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  169. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  170. numba_cuda/numba/cuda/locks.py +19 -0
  171. numba_cuda/numba/cuda/lowering.py +1951 -0
  172. numba_cuda/numba/cuda/mathimpl.py +374 -0
  173. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  175. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  178. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  179. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  180. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  181. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  182. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  183. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  184. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  185. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  186. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  187. numba_cuda/numba/cuda/misc/literal.py +28 -0
  188. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  189. numba_cuda/numba/cuda/misc/special.py +94 -0
  190. numba_cuda/numba/cuda/models.py +56 -0
  191. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  192. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  193. numba_cuda/numba/cuda/np/extensions.py +11 -0
  194. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  195. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  196. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  197. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  198. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  199. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  200. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  201. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  202. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  203. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  204. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  206. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  207. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  208. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  209. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  210. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  211. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  212. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  213. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  214. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  215. numba_cuda/numba/cuda/printimpl.py +126 -0
  216. numba_cuda/numba/cuda/random.py +308 -0
  217. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  218. numba_cuda/numba/cuda/serialize.py +267 -0
  219. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  220. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  221. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  222. numba_cuda/numba/cuda/simulator/api.py +179 -0
  223. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  224. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  236. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  237. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  238. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  239. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  241. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  242. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  243. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  244. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  245. numba_cuda/numba/cuda/simulator_init.py +18 -0
  246. numba_cuda/numba/cuda/stubs.py +635 -0
  247. numba_cuda/numba/cuda/target.py +505 -0
  248. numba_cuda/numba/cuda/testing.py +347 -0
  249. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  251. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  252. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  253. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  254. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  255. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  285. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  286. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  289. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  290. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  291. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  292. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  293. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  294. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  295. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  396. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
  397. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  399. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  400. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  401. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  402. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  403. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  404. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  406. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  407. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  424. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  425. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  430. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  431. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  433. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  434. numba_cuda/numba/cuda/tests/support.py +900 -0
  435. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  436. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  437. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  438. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  439. numba_cuda/numba/cuda/types/__init__.py +233 -0
  440. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  441. numba_cuda/numba/cuda/types/abstract.py +9 -0
  442. numba_cuda/numba/cuda/types/common.py +9 -0
  443. numba_cuda/numba/cuda/types/containers.py +9 -0
  444. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  445. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  446. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  447. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  448. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  449. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  450. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  451. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  452. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  453. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  454. numba_cuda/numba/cuda/types/function_type.py +11 -0
  455. numba_cuda/numba/cuda/types/functions.py +9 -0
  456. numba_cuda/numba/cuda/types/iterators.py +9 -0
  457. numba_cuda/numba/cuda/types/misc.py +9 -0
  458. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  459. numba_cuda/numba/cuda/types/scalars.py +9 -0
  460. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  461. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  462. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  463. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  464. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  465. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  466. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  467. numba_cuda/numba/cuda/typing/collections.py +138 -0
  468. numba_cuda/numba/cuda/typing/context.py +782 -0
  469. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  470. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  471. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  472. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  473. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  474. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  475. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  476. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  477. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  478. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  479. numba_cuda/numba/cuda/ufuncs.py +746 -0
  480. numba_cuda/numba/cuda/utils.py +724 -0
  481. numba_cuda/numba/cuda/vector_types.py +214 -0
  482. numba_cuda/numba/cuda/vectorizers.py +260 -0
  483. numba_cuda-0.21.1.dist-info/METADATA +109 -0
  484. numba_cuda-0.21.1.dist-info/RECORD +488 -0
  485. numba_cuda-0.21.1.dist-info/WHEEL +5 -0
  486. numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
  487. numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
  488. numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,220 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ """CUDA Toolkit libraries lookup utilities.
5
+
6
+ CUDA Toolkit libraries can be available via either:
7
+
8
+ - the `cuda-nvcc` and `cuda-nvrtc` conda packages,
9
+ - a user supplied location from CUDA_HOME,
10
+ - a system wide location,
11
+ - package-specific locations (e.g. the Debian NVIDIA packages),
12
+ - or can be discovered by the system loader.
13
+ """
14
+
15
+ import os
16
+ import sys
17
+ import ctypes
18
+
19
+ from numba.cuda.misc.findlib import find_lib
20
+ from numba.cuda.cuda_paths import get_cuda_paths
21
+ from numba.cuda.cudadrv.driver import locate_driver_and_loader, load_driver
22
+ from numba.cuda.cudadrv.error import CudaSupportError
23
+ from numba.cuda.core import config
24
+
25
+
26
+ if sys.platform == "win32":
27
+ _dllnamepattern = "%s.dll"
28
+ _staticnamepattern = "%s.lib"
29
+ elif sys.platform == "darwin":
30
+ _dllnamepattern = "lib%s.dylib"
31
+ _staticnamepattern = "lib%s.a"
32
+ else:
33
+ _dllnamepattern = "lib%s.so"
34
+ _staticnamepattern = "lib%s.a"
35
+
36
+
37
+ def get_libdevice():
38
+ d = get_cuda_paths()
39
+ paths = d["libdevice"].info
40
+ return paths
41
+
42
+
43
+ def open_libdevice():
44
+ with open(get_libdevice(), "rb") as bcfile:
45
+ return bcfile.read()
46
+
47
+
48
+ def get_cudalib(lib, static=False):
49
+ """
50
+ Find the path of a CUDA library based on a search of known locations. If
51
+ the search fails, return a generic filename for the library (e.g.
52
+ 'libnvvm.so' for 'nvvm') so that we may attempt to load it using the system
53
+ loader's search mechanism.
54
+ """
55
+ if lib in {"nvrtc", "nvvm"}:
56
+ return get_cuda_paths()[lib].info or _dllnamepattern % lib
57
+
58
+ dir_type = "static_cudalib_dir" if static else "cudalib_dir"
59
+ libdir = get_cuda_paths()[dir_type].info
60
+
61
+ candidates = find_lib(lib, libdir, static=static)
62
+ namepattern = _staticnamepattern if static else _dllnamepattern
63
+ return max(candidates) if candidates else namepattern % lib
64
+
65
+
66
+ def get_cuda_include_dir():
67
+ """
68
+ Find the path to cuda include dir based on a list of default locations.
69
+ Note that this does not list the `CUDA_INCLUDE_PATH` entry in user
70
+ configuration.
71
+ """
72
+
73
+ return get_cuda_paths()["include_dir"].info
74
+
75
+
76
+ def check_cuda_include_dir(path):
77
+ if path is None or not os.path.exists(path):
78
+ raise FileNotFoundError(f"{path} not found")
79
+
80
+ if not os.path.exists(os.path.join(path, "cuda_runtime.h")):
81
+ raise FileNotFoundError(f"Unable to find cuda_runtime.h from {path}")
82
+
83
+
84
+ def open_cudalib(lib):
85
+ path = get_cudalib(lib)
86
+ return ctypes.CDLL(path)
87
+
88
+
89
+ def check_static_lib(path):
90
+ if not os.path.isfile(path):
91
+ raise FileNotFoundError(f"{path} not found")
92
+
93
+
94
+ def _get_source_variable(lib, static=False):
95
+ if lib == "nvvm":
96
+ return get_cuda_paths()["nvvm"].by
97
+ elif lib == "nvrtc":
98
+ return get_cuda_paths()["nvrtc"].by
99
+ elif lib == "libdevice":
100
+ return get_cuda_paths()["libdevice"].by
101
+ elif lib == "include_dir":
102
+ return get_cuda_paths()["include_dir"].by
103
+ else:
104
+ dir_type = "static_cudalib_dir" if static else "cudalib_dir"
105
+ return get_cuda_paths()[dir_type].by
106
+
107
+
108
+ def test():
109
+ """Test library lookup. Path info is printed to stdout."""
110
+ failed = False
111
+
112
+ # Check for the driver
113
+ try:
114
+ dlloader, candidates = locate_driver_and_loader()
115
+ print("Finding driver from candidates:")
116
+ for location in candidates:
117
+ print(f"\t{location}")
118
+ print(f"Using loader {dlloader}")
119
+ print("\tTrying to load driver", end="...")
120
+ dll, path = load_driver(dlloader, candidates)
121
+ print("\tok")
122
+ print(f"\t\tLoaded from {path}")
123
+ except CudaSupportError as e:
124
+ print(f"\tERROR: failed to open driver: {e}")
125
+ failed = True
126
+
127
+ # Find the absolute location of the driver on Linux. Various driver-related
128
+ # issues have been reported by WSL2 users, and it is almost always due to a
129
+ # Linux (i.e. not- WSL2) driver being installed in a WSL2 system.
130
+ # Providing the absolute location of the driver indicates its version
131
+ # number in the soname (e.g. "libcuda.so.530.30.02"), which can be used to
132
+ # look up whether the driver was intended for "native" Linux.
133
+ if sys.platform == "linux" and not failed:
134
+ pid = os.getpid()
135
+ mapsfile = os.path.join(os.path.sep, "proc", f"{pid}", "maps")
136
+ try:
137
+ with open(mapsfile) as f:
138
+ maps = f.read()
139
+ # It's difficult to predict all that might go wrong reading the maps
140
+ # file - in case various error conditions ensue (the file is not found,
141
+ # not readable, etc.) we use OSError to hopefully catch any of them.
142
+ except OSError:
143
+ # It's helpful to report that this went wrong to the user, but we
144
+ # don't set failed to True because this doesn't have any connection
145
+ # to actual CUDA functionality.
146
+ print(
147
+ f"\tERROR: Could not open {mapsfile} to determine absolute "
148
+ "path to libcuda.so"
149
+ )
150
+ else:
151
+ # In this case we could read the maps, so we can report the
152
+ # relevant ones to the user
153
+ locations = set(s for s in maps.split() if "libcuda.so" in s)
154
+ print("\tMapped libcuda.so paths:")
155
+ for location in locations:
156
+ print(f"\t\t{location}")
157
+
158
+ # Checks for dynamic libraries
159
+ libs = "nvvm nvrtc".split()
160
+ for lib in libs:
161
+ path = get_cudalib(lib)
162
+ print("Finding {} from {}".format(lib, _get_source_variable(lib)))
163
+ print("\tLocated at", path)
164
+
165
+ try:
166
+ print("\tTrying to open library", end="...")
167
+ open_cudalib(lib)
168
+ print("\tok")
169
+ except OSError as e:
170
+ print("\tERROR: failed to open %s:\n%s" % (lib, e))
171
+ failed = True
172
+
173
+ # Check for cudadevrt (the only static library)
174
+ lib = "cudadevrt"
175
+ path = get_cudalib(lib, static=True)
176
+ print(
177
+ "Finding {} from {}".format(lib, _get_source_variable(lib, static=True))
178
+ )
179
+ print("\tLocated at", path)
180
+
181
+ try:
182
+ print("\tChecking library", end="...")
183
+ check_static_lib(path)
184
+ print("\tok")
185
+ except FileNotFoundError as e:
186
+ print("\tERROR: failed to find %s:\n%s" % (lib, e))
187
+ failed = True
188
+
189
+ # Check for libdevice
190
+ where = _get_source_variable("libdevice")
191
+ print(f"Finding libdevice from {where}")
192
+ path = get_libdevice()
193
+ print("\tLocated at", path)
194
+
195
+ try:
196
+ print("\tChecking library", end="...")
197
+ check_static_lib(path)
198
+ print("\tok")
199
+ except FileNotFoundError as e:
200
+ print("\tERROR: failed to find %s:\n%s" % (lib, e))
201
+ failed = True
202
+
203
+ # Check cuda include paths
204
+
205
+ print("Include directory configuration variable:")
206
+ print(f"\tCUDA_INCLUDE_PATH={config.CUDA_INCLUDE_PATH}")
207
+
208
+ where = _get_source_variable("include_dir")
209
+ print(f"Finding include directory from {where}")
210
+ include = get_cuda_include_dir()
211
+ print("\tLocated at", include)
212
+ try:
213
+ print("\tChecking include directory", end="...")
214
+ check_cuda_include_dir(include)
215
+ print("\tok")
216
+ except FileNotFoundError as e:
217
+ print("\tERROR: failed to find cuda include directory:\n%s" % e)
218
+ failed = True
219
+
220
+ return not failed
@@ -0,0 +1,184 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import io
5
+ import os
6
+ from typing import Union, Type
7
+
8
+ from .mappings import FILE_EXTENSION_MAP
9
+
10
+
11
+ class LinkableCode:
12
+ """An object that holds code to be linked from memory.
13
+
14
+ :param data: A buffer, StringIO or BytesIO containing the data to link.
15
+ If a file object is passed, the content in the object is
16
+ read when `data` property is accessed.
17
+ :param name: The name of the file to be referenced in any compilation or
18
+ linking errors that may be produced.
19
+ :param setup_callback: A function called prior to the launch of a kernel
20
+ contained within a module that has this code object
21
+ linked into it.
22
+ :param teardown_callback: A function called just prior to the unloading of
23
+ a module that has this code object linked into
24
+ it.
25
+ :param nrt: If True, assume this object contains NRT function calls and
26
+ add NRT source code to the final link.
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ data,
32
+ name=None,
33
+ setup_callback=None,
34
+ teardown_callback=None,
35
+ nrt=False,
36
+ ):
37
+ if setup_callback and not callable(setup_callback):
38
+ raise TypeError("setup_callback must be callable")
39
+ if teardown_callback and not callable(teardown_callback):
40
+ raise TypeError("teardown_callback must be callable")
41
+
42
+ self.nrt = nrt
43
+ self._name = name
44
+ self._data = data
45
+ self.setup_callback = setup_callback
46
+ self.teardown_callback = teardown_callback
47
+
48
+ @property
49
+ def name(self):
50
+ return self._name or self.default_name
51
+
52
+ @property
53
+ def data(self):
54
+ if isinstance(self._data, (io.StringIO, io.BytesIO)):
55
+ return self._data.getvalue()
56
+ return self._data
57
+
58
+ @staticmethod
59
+ def from_path(path: str):
60
+ """
61
+ Load a linkable code object from a file.
62
+
63
+ Parameters
64
+ ----------
65
+ path : str
66
+ The path to the file to load.
67
+
68
+ Returns
69
+ -------
70
+ LinkableCode
71
+ The linkable code object.
72
+
73
+ Raises
74
+ ------
75
+ ValueError
76
+ If the file extension is not supported.
77
+ """
78
+ root, extension = os.path.splitext(path)
79
+ basename = os.path.basename(root)
80
+ if extension in (".cu", ".ptx"):
81
+ mode = "r"
82
+ else:
83
+ mode = "rb"
84
+
85
+ with open(path, mode) as f:
86
+ data = f.read()
87
+
88
+ cls = _extension_to_linkable_code_kind(extension)
89
+ return cls(data, name=basename)
90
+
91
+ @classmethod
92
+ def from_path_or_obj(cls, path_or_obj: Union[str, "LinkableCode"]):
93
+ """
94
+ Load a linkable code object from a file or a LinkableCode object.
95
+
96
+ If a path is provided, the file is loaded and the LinkableCode object
97
+ is returned. If a LinkableCode object is provided, it is returned as is.
98
+
99
+ Parameters
100
+ ----------
101
+ path_or_obj : str or LinkableCode
102
+ The path to the file or the LinkableCode object to load.
103
+
104
+ Returns
105
+ -------
106
+ LinkableCode
107
+ The linkable code object.
108
+
109
+ Raises
110
+ ------
111
+ ValueError
112
+ If the file extension is not supported.
113
+ """
114
+ if isinstance(path_or_obj, str):
115
+ return cls.from_path(path_or_obj)
116
+ return path_or_obj
117
+
118
+
119
+ class PTXSource(LinkableCode):
120
+ """PTX source code in memory."""
121
+
122
+ kind = FILE_EXTENSION_MAP["ptx"]
123
+ default_name = "<unnamed-ptx>"
124
+
125
+
126
+ class CUSource(LinkableCode):
127
+ """CUDA C/C++ source code in memory."""
128
+
129
+ kind = "cu"
130
+ default_name = "<unnamed-cu>"
131
+
132
+
133
+ class Fatbin(LinkableCode):
134
+ """An ELF Fatbin in memory."""
135
+
136
+ kind = FILE_EXTENSION_MAP["fatbin"]
137
+ default_name = "<unnamed-fatbin>"
138
+
139
+
140
+ class Cubin(LinkableCode):
141
+ """An ELF Cubin in memory."""
142
+
143
+ kind = FILE_EXTENSION_MAP["cubin"]
144
+ default_name = "<unnamed-cubin>"
145
+
146
+
147
+ class Archive(LinkableCode):
148
+ """An archive of objects in memory."""
149
+
150
+ kind = FILE_EXTENSION_MAP["a"]
151
+ default_name = "<unnamed-archive>"
152
+
153
+
154
+ class Object(LinkableCode):
155
+ """An object file in memory."""
156
+
157
+ kind = FILE_EXTENSION_MAP["o"]
158
+ default_name = "<unnamed-object>"
159
+
160
+
161
+ class LTOIR(LinkableCode):
162
+ """An LTOIR file in memory."""
163
+
164
+ kind = FILE_EXTENSION_MAP["ltoir"]
165
+ default_name = "<unnamed-ltoir>"
166
+
167
+
168
+ def _extension_to_linkable_code_kind(extension: str) -> Type[LinkableCode]:
169
+ if extension == ".cu":
170
+ return CUSource
171
+ elif extension == ".ptx":
172
+ return PTXSource
173
+ elif extension == ".fatbin":
174
+ return Fatbin
175
+ elif extension == ".cubin":
176
+ return Cubin
177
+ elif extension == ".a":
178
+ return Archive
179
+ elif extension == ".o":
180
+ return Object
181
+ elif extension == ".ltoir":
182
+ return LTOIR
183
+ else:
184
+ raise ValueError(f"Unknown extension: {extension}")
@@ -0,0 +1,14 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from cuda.bindings.driver import CUjitInputType
5
+
6
+ FILE_EXTENSION_MAP = {
7
+ "o": CUjitInputType.CU_JIT_INPUT_OBJECT,
8
+ "ptx": CUjitInputType.CU_JIT_INPUT_PTX,
9
+ "a": CUjitInputType.CU_JIT_INPUT_LIBRARY,
10
+ "lib": CUjitInputType.CU_JIT_INPUT_LIBRARY,
11
+ "cubin": CUjitInputType.CU_JIT_INPUT_CUBIN,
12
+ "fatbin": CUjitInputType.CU_JIT_INPUT_FATBINARY,
13
+ "ltoir": CUjitInputType.CU_JIT_INPUT_NVVM,
14
+ }
@@ -0,0 +1,26 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba.cuda.cudadrv import devices, driver
5
+
6
+ try:
7
+ from numba.core.registry import cpu_target
8
+
9
+ def _calc_array_sizeof(ndim):
10
+ """
11
+ Use the ABI size in the CPU target
12
+ """
13
+ ctx = cpu_target.target_context
14
+ return ctx.calc_array_sizeof(ndim)
15
+ except ImportError:
16
+ pass
17
+
18
+
19
+ def ndarray_device_allocate_data(ary):
20
+ """
21
+ Allocate gpu data buffer
22
+ """
23
+ datasize = driver.host_memory_size(ary)
24
+ # allocate
25
+ gpu_data = devices.get_context().memalloc(datasize)
26
+ return gpu_data
@@ -0,0 +1,193 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba.cuda.cudadrv.error import (
5
+ CCSupportError,
6
+ )
7
+ from numba.cuda import config
8
+ from numba.cuda.cuda_paths import get_cuda_paths
9
+ from numba.cuda.utils import _readenv
10
+
11
+ import os
12
+ import warnings
13
+ import functools
14
+
15
+ from cuda.core.experimental import Program, ProgramOptions
16
+ from cuda.bindings import nvrtc as bindings_nvrtc
17
+
18
+ NVRTC_EXTRA_SEARCH_PATHS = _readenv(
19
+ "NUMBA_CUDA_NVRTC_EXTRA_SEARCH_PATHS", str, ""
20
+ ) or getattr(config, "CUDA_NVRTC_EXTRA_SEARCH_PATHS", "")
21
+ if not hasattr(config, "CUDA_NVRTC_EXTRA_SEARCH_PATHS"):
22
+ config.CUDA_NVRTC_EXTRA_SEARCH_PATHS = NVRTC_EXTRA_SEARCH_PATHS
23
+
24
+
25
+ @functools.cache
26
+ def _get_nvrtc_version():
27
+ retcode, major, minor = bindings_nvrtc.nvrtcVersion()
28
+ if retcode != bindings_nvrtc.nvrtcResult.NVRTC_SUCCESS:
29
+ raise RuntimeError(f"{retcode.name} when calling nvrtcVersion()")
30
+ return (major, minor)
31
+
32
+
33
+ def compile(src, name, cc, ltoir=False, lineinfo=False, debug=False):
34
+ """
35
+ Compile a CUDA C/C++ source to PTX or LTOIR for a given compute capability.
36
+
37
+ :param src: The source code to compile
38
+ :type src: str
39
+ :param name: The filename of the source (for information only)
40
+ :type name: str
41
+ :param cc: A tuple ``(major, minor)`` of the compute capability
42
+ :type cc: tuple
43
+ :param ltoir: Compile into LTOIR if True, otherwise into PTX
44
+ :type ltoir: bool
45
+ :param lineinfo: Whether to include line information in the compiled code
46
+ :type lineinfo: bool
47
+ :param debug: Whether to include debug information in the compiled code
48
+ :type debug: bool
49
+ :return: The compiled PTX or LTOIR and compilation log
50
+ :rtype: tuple
51
+ """
52
+ version = _get_nvrtc_version()
53
+
54
+ ver_str = lambda version: ".".join(str(v) for v in version)
55
+ supported_ccs = get_supported_ccs()
56
+ try:
57
+ found = max(filter(lambda v: v <= cc, [v for v in supported_ccs]))
58
+ except ValueError:
59
+ raise RuntimeError(
60
+ f"Device compute capability {ver_str(cc)} is less than the "
61
+ f"minimum supported by NVRTC {ver_str(version)}. Supported "
62
+ "compute capabilities are "
63
+ f"{', '.join([ver_str(v) for v in supported_ccs])}."
64
+ )
65
+
66
+ if found != cc:
67
+ warnings.warn(
68
+ f"Device compute capability {ver_str(cc)} is not supported by "
69
+ f"NVRTC {ver_str(version)}. Using {ver_str(found)} instead."
70
+ )
71
+
72
+ # Compilation options:
73
+ # - Compile for the current device's compute capability.
74
+ # - The CUDA include path is added.
75
+ # - Relocatable Device Code (rdc) is needed to prevent device functions
76
+ # being optimized away.
77
+ major, minor = found
78
+
79
+ arch = f"sm_{major}{minor}"
80
+
81
+ cuda_include_dir = get_cuda_paths()["include_dir"].info
82
+ cuda_includes = [f"{cuda_include_dir}"]
83
+
84
+ cudadrv_path = os.path.dirname(os.path.abspath(__file__))
85
+ numba_cuda_path = os.path.dirname(cudadrv_path)
86
+
87
+ nvrtc_ver_major = version[0]
88
+ if nvrtc_ver_major == 12:
89
+ numba_include = f"{os.path.join(numba_cuda_path, 'include', '12')}"
90
+ # For CUDA 12 wheels, `cuda_include_dir` is `site-packages/nvidia/cuda_runtime/include`
91
+ # We need to find CCCL at `site-packages/nvidia/cuda_cccl/include`
92
+ # For CUDA 12 conda / system install, CCCL is just in the `include` directory
93
+ cuda_includes.append(
94
+ f"{os.path.join(cuda_include_dir, '..', '..', 'cuda_cccl', 'include')}"
95
+ )
96
+ elif nvrtc_ver_major == 13:
97
+ numba_include = f"{os.path.join(numba_cuda_path, 'include', '13')}"
98
+ # For CUDA 13 wheels, `cuda_include_dir` is `site-packages/nvidia/cu13/include`
99
+ # We need to find CCCL at `site-packages/nvidia/cu13/include/cccl`
100
+ # For CUDA 13 conda / system install, CCCL is in the `include/cccl` directory
101
+ cuda_includes.append(f"{os.path.join(cuda_include_dir, 'cccl')}")
102
+
103
+ if config.CUDA_NVRTC_EXTRA_SEARCH_PATHS:
104
+ extra_includes = config.CUDA_NVRTC_EXTRA_SEARCH_PATHS.split(":")
105
+ else:
106
+ extra_includes = []
107
+
108
+ nrt_include = os.path.join(numba_cuda_path, "memory_management")
109
+
110
+ includes = [numba_include, *cuda_includes, nrt_include, *extra_includes]
111
+
112
+ options = ProgramOptions(
113
+ arch=arch,
114
+ include_path=includes,
115
+ relocatable_device_code=True,
116
+ link_time_optimization=ltoir,
117
+ name=name,
118
+ debug=debug,
119
+ lineinfo=lineinfo,
120
+ )
121
+
122
+ class Logger:
123
+ def __init__(self):
124
+ self.log = []
125
+
126
+ def write(self, msg):
127
+ self.log.append(msg)
128
+
129
+ logger = Logger()
130
+ if isinstance(src, bytes):
131
+ src = src.decode("utf8")
132
+
133
+ prog = Program(src, "c++", options=options)
134
+ result = prog.compile("ltoir" if ltoir else "ptx", logs=logger)
135
+ log = ""
136
+ if logger.log:
137
+ log = logger.log
138
+ joined_logs = "\n".join(log)
139
+ warnings.warn(f"NVRTC log messages: {joined_logs}")
140
+ return result, log
141
+
142
+
143
+ def find_closest_arch(mycc):
144
+ """
145
+ Given a compute capability, return the closest compute capability supported
146
+ by the CUDA toolkit.
147
+
148
+ :param mycc: Compute capability as a tuple ``(MAJOR, MINOR)``
149
+ :return: Closest supported CC as a tuple ``(MAJOR, MINOR)``
150
+ """
151
+ supported_ccs = get_supported_ccs()
152
+
153
+ for i, cc in enumerate(supported_ccs):
154
+ if cc == mycc:
155
+ # Matches
156
+ return cc
157
+ elif cc > mycc:
158
+ # Exceeded
159
+ if i == 0:
160
+ # CC lower than supported
161
+ msg = (
162
+ "GPU compute capability %d.%d is not supported"
163
+ "(requires >=%d.%d)" % (mycc + cc)
164
+ )
165
+ raise CCSupportError(msg)
166
+ else:
167
+ # return the previous CC
168
+ return supported_ccs[i - 1]
169
+
170
+ # CC higher than supported
171
+ return supported_ccs[-1] # Choose the highest
172
+
173
+
174
+ def get_arch_option(major, minor):
175
+ """Matches with the closest architecture option"""
176
+ if config.FORCE_CUDA_CC:
177
+ arch = config.FORCE_CUDA_CC
178
+ else:
179
+ arch = find_closest_arch((major, minor))
180
+ return "compute_%d%d" % arch
181
+
182
+
183
+ def get_lowest_supported_cc():
184
+ return min(get_supported_ccs())
185
+
186
+
187
+ def get_supported_ccs():
188
+ retcode, archs = bindings_nvrtc.nvrtcGetSupportedArchs()
189
+ if retcode != bindings_nvrtc.nvrtcResult.NVRTC_SUCCESS:
190
+ raise RuntimeError(
191
+ f"{retcode.name} when calling nvrtcGetSupportedArchs()"
192
+ )
193
+ return [(arch // 10, arch % 10) for arch in archs]