numba-cuda 0.21.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (488) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +577 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +556 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +995 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +903 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +158 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
  161. numba_cuda/numba/cuda/intrinsics.py +382 -0
  162. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  163. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  164. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  165. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  166. numba_cuda/numba/cuda/libdevice.py +3386 -0
  167. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  168. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  169. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  170. numba_cuda/numba/cuda/locks.py +19 -0
  171. numba_cuda/numba/cuda/lowering.py +1951 -0
  172. numba_cuda/numba/cuda/mathimpl.py +374 -0
  173. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  175. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  178. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  179. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  180. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  181. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  182. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  183. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  184. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  185. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  186. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  187. numba_cuda/numba/cuda/misc/literal.py +28 -0
  188. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  189. numba_cuda/numba/cuda/misc/special.py +94 -0
  190. numba_cuda/numba/cuda/models.py +56 -0
  191. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  192. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  193. numba_cuda/numba/cuda/np/extensions.py +11 -0
  194. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  195. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  196. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  197. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  198. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  199. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  200. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  201. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  202. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  203. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  204. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  206. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  207. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  208. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  209. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  210. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  211. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  212. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  213. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  214. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  215. numba_cuda/numba/cuda/printimpl.py +126 -0
  216. numba_cuda/numba/cuda/random.py +308 -0
  217. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  218. numba_cuda/numba/cuda/serialize.py +267 -0
  219. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  220. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  221. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  222. numba_cuda/numba/cuda/simulator/api.py +179 -0
  223. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  224. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  236. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  237. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  238. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  239. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  241. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  242. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  243. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  244. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  245. numba_cuda/numba/cuda/simulator_init.py +18 -0
  246. numba_cuda/numba/cuda/stubs.py +635 -0
  247. numba_cuda/numba/cuda/target.py +505 -0
  248. numba_cuda/numba/cuda/testing.py +347 -0
  249. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  251. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  252. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  253. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  254. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  255. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  285. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  286. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  289. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  290. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  291. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  292. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  293. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  294. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  295. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  396. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
  397. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  399. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  400. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  401. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  402. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  403. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  404. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  406. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  407. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  424. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  425. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  430. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  431. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  433. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  434. numba_cuda/numba/cuda/tests/support.py +900 -0
  435. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  436. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  437. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  438. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  439. numba_cuda/numba/cuda/types/__init__.py +233 -0
  440. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  441. numba_cuda/numba/cuda/types/abstract.py +9 -0
  442. numba_cuda/numba/cuda/types/common.py +9 -0
  443. numba_cuda/numba/cuda/types/containers.py +9 -0
  444. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  445. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  446. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  447. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  448. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  449. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  450. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  451. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  452. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  453. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  454. numba_cuda/numba/cuda/types/function_type.py +11 -0
  455. numba_cuda/numba/cuda/types/functions.py +9 -0
  456. numba_cuda/numba/cuda/types/iterators.py +9 -0
  457. numba_cuda/numba/cuda/types/misc.py +9 -0
  458. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  459. numba_cuda/numba/cuda/types/scalars.py +9 -0
  460. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  461. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  462. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  463. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  464. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  465. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  466. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  467. numba_cuda/numba/cuda/typing/collections.py +138 -0
  468. numba_cuda/numba/cuda/typing/context.py +782 -0
  469. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  470. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  471. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  472. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  473. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  474. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  475. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  476. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  477. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  478. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  479. numba_cuda/numba/cuda/ufuncs.py +746 -0
  480. numba_cuda/numba/cuda/utils.py +724 -0
  481. numba_cuda/numba/cuda/vector_types.py +214 -0
  482. numba_cuda/numba/cuda/vectorizers.py +260 -0
  483. numba_cuda-0.21.1.dist-info/METADATA +109 -0
  484. numba_cuda-0.21.1.dist-info/RECORD +488 -0
  485. numba_cuda-0.21.1.dist-info/WHEEL +5 -0
  486. numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
  487. numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
  488. numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,138 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import concurrent.futures
5
+ import multiprocessing as mp
6
+ import os
7
+
8
+ from numba import cuda
9
+ from numba.cuda.cudadrv.driver import CudaAPIError, driver
10
+ from numba.cuda.cudadrv.error import CudaSupportError
11
+ from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase
12
+
13
+
14
+ # A mock of cuInit that always raises a CudaAPIError
15
+ def cuInit_raising(arg):
16
+ raise CudaAPIError(999, "CUDA_ERROR_UNKNOWN")
17
+
18
+
19
+ # Test code to run in a child that patches driver.cuInit to a variant that
20
+ # always raises. We can't use mock.patch.object here because driver.cuInit is
21
+ # not assigned until we attempt to initialize - mock.patch.object cannot locate
22
+ # the non-existent original method, and so fails. Instead we patch
23
+ # driver.cuInit with our raising version prior to any attempt to initialize.
24
+ def cuInit_raising_test():
25
+ driver.cuInit = cuInit_raising
26
+
27
+ try:
28
+ # A CUDA operation that forces initialization of the device
29
+ cuda.device_array(1)
30
+ except CudaSupportError as e:
31
+ success = True
32
+ msg = e.msg
33
+ else:
34
+ success = False
35
+ msg = None
36
+
37
+ return success, msg
38
+
39
+
40
+ # Similar to cuInit_raising_test above, but for testing that the string
41
+ # returned by cuda_error() is as expected.
42
+ def initialization_error_test():
43
+ driver.cuInit = cuInit_raising
44
+
45
+ try:
46
+ # A CUDA operation that forces initialization of the device
47
+ cuda.device_array(1)
48
+ except CudaSupportError:
49
+ success = True
50
+ else:
51
+ success = False
52
+
53
+ return success, cuda.cuda_error()
54
+
55
+
56
+ # For testing the path where Driver.__init__() catches a CudaSupportError
57
+ def cuda_disabled_test():
58
+ try:
59
+ # A CUDA operation that forces initialization of the device
60
+ cuda.device_array(1)
61
+ except CudaSupportError as e:
62
+ success = True
63
+ msg = e.msg
64
+ else:
65
+ success = False
66
+ msg = None
67
+
68
+ return success, msg
69
+
70
+
71
+ # Similar to cuda_disabled_test, but checks cuda.cuda_error() instead of the
72
+ # exception raised on initialization
73
+ def cuda_disabled_error_test():
74
+ try:
75
+ # A CUDA operation that forces initialization of the device
76
+ cuda.device_array(1)
77
+ except CudaSupportError:
78
+ success = True
79
+ else:
80
+ success = False
81
+
82
+ return success, cuda.cuda_error()
83
+
84
+
85
+ @skip_on_cudasim("CUDA Simulator does not initialize driver")
86
+ class TestInit(CUDATestCase):
87
+ def _test_init_failure(self, target, expected):
88
+ # Run the initialization failure test in a separate subprocess
89
+ with concurrent.futures.ProcessPoolExecutor(
90
+ mp_context=mp.get_context("spawn")
91
+ ) as exe:
92
+ # should complete within 30s
93
+ success, msg = exe.submit(target).result(timeout=30)
94
+
95
+ # Ensure the child process raised an exception during initialization
96
+ # before checking the message
97
+ if not success:
98
+ assert "CudaSupportError not raised" in msg
99
+
100
+ assert expected in msg
101
+
102
+ def test_init_failure_raising(self):
103
+ expected = "Error at driver init: CUDA_ERROR_UNKNOWN (999)"
104
+ self._test_init_failure(cuInit_raising_test, expected)
105
+
106
+ def test_init_failure_error(self):
107
+ expected = "CUDA_ERROR_UNKNOWN (999)"
108
+ self._test_init_failure(initialization_error_test, expected)
109
+
110
+ def _test_cuda_disabled(self, target):
111
+ # Uses _test_init_failure to launch the test in a separate subprocess
112
+ # with CUDA disabled.
113
+ cuda_disabled = os.environ.get("NUMBA_DISABLE_CUDA")
114
+ os.environ["NUMBA_DISABLE_CUDA"] = "1"
115
+ try:
116
+ expected = "CUDA is disabled due to setting NUMBA_DISABLE_CUDA=1"
117
+ self._test_init_failure(cuda_disabled_test, expected)
118
+ finally:
119
+ if cuda_disabled is not None:
120
+ os.environ["NUMBA_DISABLE_CUDA"] = cuda_disabled
121
+ else:
122
+ os.environ.pop("NUMBA_DISABLE_CUDA")
123
+
124
+ def test_cuda_disabled_raising(self):
125
+ self._test_cuda_disabled(cuda_disabled_test)
126
+
127
+ def test_cuda_disabled_error(self):
128
+ self._test_cuda_disabled(cuda_disabled_error_test)
129
+
130
+ def test_init_success(self):
131
+ # Here we assume that initialization is successful (because many bad
132
+ # things will happen with the test suite if it is not) and check that
133
+ # there is no error recorded.
134
+ self.assertIsNone(cuda.cuda_error())
135
+
136
+
137
+ if __name__ == "__main__":
138
+ unittest.main()
@@ -0,0 +1,43 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from llvmlite import ir
5
+
6
+ from numba.cuda.cudadrv import nvvm
7
+ from numba.cuda.testing import unittest, CUDATestCase
8
+ from numba.cuda.testing import skip_on_cudasim
9
+
10
+
11
+ @skip_on_cudasim("Inline PTX cannot be used in the simulator")
12
+ class TestCudaInlineAsm(CUDATestCase):
13
+ def test_inline_rsqrt(self):
14
+ mod = ir.Module(__name__)
15
+ mod.triple = "nvptx64-nvidia-cuda"
16
+ nvvm.add_ir_version(mod)
17
+ fnty = ir.FunctionType(ir.VoidType(), [ir.PointerType(ir.FloatType())])
18
+ fn = ir.Function(mod, fnty, "cu_rsqrt")
19
+ bldr = ir.IRBuilder(fn.append_basic_block("entry"))
20
+
21
+ rsqrt_approx_fnty = ir.FunctionType(ir.FloatType(), [ir.FloatType()])
22
+ inlineasm = ir.InlineAsm(
23
+ rsqrt_approx_fnty,
24
+ "rsqrt.approx.f32 $0, $1;",
25
+ "=f,f",
26
+ side_effect=True,
27
+ )
28
+ val = bldr.load(fn.args[0])
29
+ res = bldr.call(inlineasm, [val])
30
+
31
+ bldr.store(res, fn.args[0])
32
+ bldr.ret_void()
33
+
34
+ # generate ptx
35
+ mod.data_layout = nvvm.NVVM().data_layout
36
+ nvvm.set_cuda_kernel(fn)
37
+ nvvmir = str(mod)
38
+ ptx = nvvm.compile_ir(nvvmir)
39
+ self.assertTrue("rsqrt.approx.f32" in str(ptx))
40
+
41
+
42
+ if __name__ == "__main__":
43
+ unittest.main()
@@ -0,0 +1,15 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba import cuda
5
+ from numba.cuda.testing import CUDATestCase, skip_on_cudasim, skip_unless_cc_53
6
+
7
+
8
+ class TestIsFP16Supported(CUDATestCase):
9
+ def test_is_fp16_supported(self):
10
+ self.assertTrue(cuda.is_float16_supported())
11
+
12
+ @skip_on_cudasim("fp16 not available in sim")
13
+ @skip_unless_cc_53
14
+ def test_device_supports_float16(self):
15
+ self.assertTrue(cuda.get_current_device().supports_float16)
@@ -0,0 +1,58 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import os
5
+ from numba import cuda
6
+ from numba.cuda.cudadrv.linkable_code import LinkableCode
7
+ from numba.cuda.testing import CUDATestCase, skip_on_cudasim
8
+ import unittest
9
+
10
+ TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR")
11
+ if TEST_BIN_DIR:
12
+ test_device_functions_a = os.path.join(
13
+ TEST_BIN_DIR, "test_device_functions.a"
14
+ )
15
+ test_device_functions_cubin = os.path.join(
16
+ TEST_BIN_DIR, "test_device_functions.cubin"
17
+ )
18
+ test_device_functions_cu = os.path.join(
19
+ TEST_BIN_DIR, "test_device_functions.cu"
20
+ )
21
+ test_device_functions_fatbin = os.path.join(
22
+ TEST_BIN_DIR, "test_device_functions.fatbin"
23
+ )
24
+ test_device_functions_fatbin_multi = os.path.join(
25
+ TEST_BIN_DIR, "test_device_functions_multi.fatbin"
26
+ )
27
+ test_device_functions_o = os.path.join(
28
+ TEST_BIN_DIR, "test_device_functions.o"
29
+ )
30
+ test_device_functions_ptx = os.path.join(
31
+ TEST_BIN_DIR, "test_device_functions.ptx"
32
+ )
33
+ test_device_functions_ltoir = os.path.join(
34
+ TEST_BIN_DIR, "test_device_functions.ltoir"
35
+ )
36
+
37
+
38
+ class TestLinkableCode(CUDATestCase):
39
+ @skip_on_cudasim(reason="Simulator does not support linkable code")
40
+ @unittest.skipIf(not TEST_BIN_DIR, "necessary binaries not generated.")
41
+ def test_linkable_code_from_path_or_obj(self):
42
+ files_kind = [
43
+ (test_device_functions_a, cuda.Archive),
44
+ (test_device_functions_cubin, cuda.Cubin),
45
+ (test_device_functions_cu, cuda.CUSource),
46
+ (test_device_functions_fatbin, cuda.Fatbin),
47
+ (test_device_functions_o, cuda.Object),
48
+ (test_device_functions_ptx, cuda.PTXSource),
49
+ (test_device_functions_ltoir, cuda.LTOIR),
50
+ ]
51
+
52
+ for path, kind in files_kind:
53
+ obj = LinkableCode.from_path_or_obj(path)
54
+ assert isinstance(obj, kind)
55
+
56
+ # test identity of from_path_or_obj
57
+ obj2 = LinkableCode.from_path_or_obj(obj)
58
+ assert obj2 is obj
@@ -0,0 +1,348 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import numpy as np
5
+ import pytest
6
+ from numba.cuda.testing import unittest
7
+ from numba.cuda.testing import (
8
+ skip_on_cudasim,
9
+ skip_if_cuda_includes_missing,
10
+ skip_if_nvjitlink_missing,
11
+ )
12
+ from numba.cuda.testing import CUDATestCase, test_data_dir
13
+ from numba.cuda.cudadrv.driver import CudaAPIError, _Linker, LinkerError
14
+ from numba.cuda import require_context
15
+ from numba import cuda
16
+ from numba.cuda import void, float64, int64, int32, float32
17
+ from numba.cuda.typing.typeof import typeof
18
+
19
+ CONST1D = np.arange(10, dtype=np.float64)
20
+
21
+
22
+ def simple_const_mem(A):
23
+ C = cuda.const.array_like(CONST1D)
24
+ i = cuda.grid(1)
25
+
26
+ A[i] = C[i] + 1.0
27
+
28
+
29
+ def func_with_lots_of_registers(x, a, b, c, d, e, f):
30
+ a1 = 1.0
31
+ a2 = 1.0
32
+ a3 = 1.0
33
+ a4 = 1.0
34
+ a5 = 1.0
35
+ b1 = 1.0
36
+ b2 = 1.0
37
+ b3 = 1.0
38
+ b4 = 1.0
39
+ b5 = 1.0
40
+ c1 = 1.0
41
+ c2 = 1.0
42
+ c3 = 1.0
43
+ c4 = 1.0
44
+ c5 = 1.0
45
+ d1 = 10
46
+ d2 = 10
47
+ d3 = 10
48
+ d4 = 10
49
+ d5 = 10
50
+ for i in range(a):
51
+ a1 += b
52
+ a2 += c
53
+ a3 += d
54
+ a4 += e
55
+ a5 += f
56
+ b1 *= b
57
+ b2 *= c
58
+ b3 *= d
59
+ b4 *= e
60
+ b5 *= f
61
+ c1 /= b
62
+ c2 /= c
63
+ c3 /= d
64
+ c4 /= e
65
+ c5 /= f
66
+ d1 <<= b
67
+ d2 <<= c
68
+ d3 <<= d
69
+ d4 <<= e
70
+ d5 <<= f
71
+ x[cuda.grid(1)] = a1 + a2 + a3 + a4 + a5
72
+ x[cuda.grid(1)] += b1 + b2 + b3 + b4 + b5
73
+ x[cuda.grid(1)] += c1 + c2 + c3 + c4 + c5
74
+ x[cuda.grid(1)] += d1 + d2 + d3 + d4 + d5
75
+
76
+
77
+ def simple_smem(ary, dty):
78
+ sm = cuda.shared.array(100, dty)
79
+ i = cuda.grid(1)
80
+ if i == 0:
81
+ for j in range(100):
82
+ sm[j] = j
83
+ cuda.syncthreads()
84
+ ary[i] = sm[i]
85
+
86
+
87
+ def coop_smem2d(ary):
88
+ i, j = cuda.grid(2)
89
+ sm = cuda.shared.array((10, 20), float32)
90
+ sm[i, j] = (i + 1) / (j + 1)
91
+ cuda.syncthreads()
92
+ ary[i, j] = sm[i, j]
93
+
94
+
95
+ def simple_maxthreads(ary):
96
+ i = cuda.grid(1)
97
+ ary[i] = i
98
+
99
+
100
+ LMEM_SIZE = 1000
101
+
102
+
103
+ def simple_lmem(A, B, dty):
104
+ C = cuda.local.array(LMEM_SIZE, dty)
105
+ for i in range(C.shape[0]):
106
+ C[i] = A[i]
107
+ for i in range(C.shape[0]):
108
+ B[i] = C[i]
109
+
110
+
111
+ @skip_on_cudasim("Linking unsupported in the simulator")
112
+ class TestLinker(CUDATestCase):
113
+ @require_context
114
+ def test_linker_basic(self):
115
+ """Simply go through the constructor and destructor"""
116
+ linker = _Linker.new(cc=(7, 5))
117
+ del linker
118
+
119
+ def _test_linking(self, eager):
120
+ global bar # must be a global; other it is recognized as a freevar
121
+ bar = cuda.declare_device("bar", "int32(int32)")
122
+
123
+ link = str(test_data_dir / "jitlink.ptx")
124
+
125
+ if eager:
126
+ args = ["void(int32[:], int32[:])"]
127
+ else:
128
+ args = []
129
+
130
+ @cuda.jit(*args, link=[link])
131
+ def foo(x, y):
132
+ i = cuda.grid(1)
133
+ x[i] += bar(y[i])
134
+
135
+ A = np.array([123], dtype=np.int32)
136
+ B = np.array([321], dtype=np.int32)
137
+
138
+ foo[1, 1](A, B)
139
+
140
+ self.assertTrue(A[0] == 123 + 2 * 321)
141
+
142
+ def test_linking_lazy_compile(self):
143
+ self._test_linking(eager=False)
144
+
145
+ def test_linking_eager_compile(self):
146
+ self._test_linking(eager=True)
147
+
148
+ def test_linking_cu(self):
149
+ bar = cuda.declare_device("bar", "int32(int32)")
150
+
151
+ link = str(test_data_dir / "jitlink.cu")
152
+
153
+ @cuda.jit(link=[link])
154
+ def kernel(r, x):
155
+ i = cuda.grid(1)
156
+
157
+ if i < len(r):
158
+ r[i] = bar(x[i])
159
+
160
+ x = np.arange(10, dtype=np.int32)
161
+ r = np.zeros_like(x)
162
+
163
+ kernel[1, 32](r, x)
164
+
165
+ # Matches the operation of bar() in jitlink.cu
166
+ expected = x * 2
167
+ np.testing.assert_array_equal(r, expected)
168
+
169
+ def test_linking_cu_log_warning(self):
170
+ bar = cuda.declare_device("bar", "int32(int32)")
171
+
172
+ link = str(test_data_dir / "warn.cu")
173
+
174
+ with pytest.warns(UserWarning) as w:
175
+
176
+ @cuda.jit("void(int32)", link=[link])
177
+ def kernel(x):
178
+ bar(x)
179
+
180
+ nvrtc_log_warnings = [
181
+ wi for wi in w if "NVRTC log messages" in str(wi.message)
182
+ ]
183
+ self.assertEqual(
184
+ len(nvrtc_log_warnings), 1, "Expected warnings from NVRTC"
185
+ )
186
+ # Check the warning refers to the log messages
187
+ self.assertIn("NVRTC log messages", str(nvrtc_log_warnings[0].message))
188
+ # Check the message pertaining to the unused variable is provided
189
+ self.assertIn(
190
+ "declared but never referenced", str(nvrtc_log_warnings[0].message)
191
+ )
192
+
193
+ def test_linking_cu_error(self):
194
+ bar = cuda.declare_device("bar", "int32(int32)")
195
+
196
+ link = str(test_data_dir / "error.cu")
197
+
198
+ from cuda.core.experimental._utils.cuda_utils import NVRTCError
199
+
200
+ errty = NVRTCError
201
+ with self.assertRaises(errty) as e:
202
+
203
+ @cuda.jit("void(int32)", link=[link])
204
+ def kernel(x):
205
+ bar(x)
206
+
207
+ msg = e.exception.args[0]
208
+ # Check the error message refers to the NVRTC compile
209
+ nvrtc_err_str = "NVRTC_ERROR_COMPILATION"
210
+ self.assertIn(nvrtc_err_str, msg)
211
+ # Check the expected error in the CUDA source is reported
212
+ self.assertIn('identifier "SYNTAX" is undefined', msg)
213
+ # Check the filename is reported correctly
214
+ self.assertIn('in the compilation of "error.cu"', msg)
215
+
216
+ def test_linking_unknown_filetype_error(self):
217
+ expected_err = "Don't know how to link file with extension .cuh"
218
+ with self.assertRaisesRegex(RuntimeError, expected_err):
219
+
220
+ @cuda.jit("void()", link=["header.cuh"])
221
+ def kernel():
222
+ pass
223
+
224
+ def test_linking_file_with_no_extension_error(self):
225
+ expected_err = "Don't know how to link file with no extension"
226
+ with self.assertRaisesRegex(RuntimeError, expected_err):
227
+
228
+ @cuda.jit("void()", link=["data"])
229
+ def kernel():
230
+ pass
231
+
232
+ @skip_if_cuda_includes_missing
233
+ def test_linking_cu_cuda_include(self):
234
+ link = str(test_data_dir / "cuda_include.cu")
235
+
236
+ # An exception will be raised when linking this kernel due to the
237
+ # compile failure if CUDA includes cannot be found by Nvrtc.
238
+ @cuda.jit("void()", link=[link])
239
+ def kernel():
240
+ pass
241
+
242
+ def test_try_to_link_nonexistent(self):
243
+ with self.assertRaises(LinkerError) as e:
244
+
245
+ @cuda.jit("void(int32[::1])", link=["nonexistent.a"])
246
+ def f(x):
247
+ x[0] = 0
248
+
249
+ self.assertIn("nonexistent.a not found", e.exception.args)
250
+
251
+ def test_set_registers_no_max(self):
252
+ """Ensure that the jitted kernel used in the test_set_registers_* tests
253
+ uses more than 57 registers - this ensures that test_set_registers_*
254
+ are really checking that they reduced the number of registers used from
255
+ something greater than the maximum."""
256
+ compiled = cuda.jit(func_with_lots_of_registers)
257
+ compiled = compiled.specialize(np.empty(32), *range(6))
258
+ self.assertGreater(compiled.get_regs_per_thread(), 57)
259
+
260
+ def test_set_registers_57(self):
261
+ compiled = cuda.jit(max_registers=57)(func_with_lots_of_registers)
262
+ compiled = compiled.specialize(np.empty(32), *range(6))
263
+ self.assertLessEqual(compiled.get_regs_per_thread(), 57)
264
+
265
+ def test_set_registers_38(self):
266
+ compiled = cuda.jit(max_registers=38)(func_with_lots_of_registers)
267
+ compiled = compiled.specialize(np.empty(32), *range(6))
268
+ self.assertLessEqual(compiled.get_regs_per_thread(), 38)
269
+
270
+ def test_set_registers_eager(self):
271
+ sig = void(float64[::1], int64, int64, int64, int64, int64, int64)
272
+ compiled = cuda.jit(sig, max_registers=38)(func_with_lots_of_registers)
273
+ self.assertLessEqual(compiled.get_regs_per_thread(), 38)
274
+
275
+ def test_get_const_mem_size(self):
276
+ sig = void(float64[::1])
277
+ compiled = cuda.jit(sig)(simple_const_mem)
278
+ const_mem_size = compiled.get_const_mem_size()
279
+ self.assertGreaterEqual(const_mem_size, CONST1D.nbytes)
280
+
281
+ def test_get_no_shared_memory(self):
282
+ compiled = cuda.jit(func_with_lots_of_registers)
283
+ compiled = compiled.specialize(np.empty(32), *range(6))
284
+ shared_mem_size = compiled.get_shared_mem_per_block()
285
+ self.assertEqual(shared_mem_size, 0)
286
+
287
+ def test_get_shared_mem_per_block(self):
288
+ sig = void(int32[::1], typeof(np.int32))
289
+ compiled = cuda.jit(sig)(simple_smem)
290
+ shared_mem_size = compiled.get_shared_mem_per_block()
291
+ self.assertEqual(shared_mem_size, 400)
292
+
293
+ def test_get_shared_mem_per_specialized(self):
294
+ compiled = cuda.jit(simple_smem)
295
+ compiled_specialized = compiled.specialize(
296
+ np.zeros(100, dtype=np.int32), np.float64
297
+ )
298
+ shared_mem_size = compiled_specialized.get_shared_mem_per_block()
299
+ self.assertEqual(shared_mem_size, 800)
300
+
301
+ def test_get_max_threads_per_block(self):
302
+ compiled = cuda.jit("void(float32[:,::1])")(coop_smem2d)
303
+ max_threads = compiled.get_max_threads_per_block()
304
+ self.assertGreater(max_threads, 0)
305
+
306
+ def test_max_threads_exceeded(self):
307
+ compiled = cuda.jit("void(int32[::1])")(simple_maxthreads)
308
+ max_threads = compiled.get_max_threads_per_block()
309
+ nelem = max_threads + 1
310
+ ary = np.empty(nelem, dtype=np.int32)
311
+ try:
312
+ compiled[1, nelem](ary)
313
+ except CudaAPIError as e:
314
+ self.assertIn("cuLaunchKernel", e.msg)
315
+
316
+ def test_get_local_mem_per_thread(self):
317
+ sig = void(int32[::1], int32[::1], typeof(np.int32))
318
+ compiled = cuda.jit(sig)(simple_lmem)
319
+ local_mem_size = compiled.get_local_mem_per_thread()
320
+ calc_size = np.dtype(np.int32).itemsize * LMEM_SIZE
321
+ self.assertGreaterEqual(local_mem_size, calc_size)
322
+
323
+ def test_get_local_mem_per_specialized(self):
324
+ compiled = cuda.jit(simple_lmem)
325
+ compiled_specialized = compiled.specialize(
326
+ np.zeros(LMEM_SIZE, dtype=np.int32),
327
+ np.zeros(LMEM_SIZE, dtype=np.int32),
328
+ np.float64,
329
+ )
330
+ local_mem_size = compiled_specialized.get_local_mem_per_thread()
331
+ calc_size = np.dtype(np.float64).itemsize * LMEM_SIZE
332
+ self.assertGreaterEqual(local_mem_size, calc_size)
333
+
334
+ @skip_if_nvjitlink_missing("nvJitLink not installed or new enough (>12.3)")
335
+ def test_link_for_different_cc(self):
336
+ linker = _Linker.new(cc=(7, 5), lto=True)
337
+ code = """
338
+ __device__ int foo(int x) {
339
+ return x + 1;
340
+ }
341
+ """
342
+ linker.add_cu(code, "foo")
343
+ ptx = linker.get_linked_ptx().decode()
344
+ assert "target sm_75" in ptx
345
+
346
+
347
+ if __name__ == "__main__":
348
+ unittest.main()