numba-cuda 0.22.0__cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (487) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +580 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpython-313-aarch64-linux-gnu.so +0 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpython-313-aarch64-linux-gnu.so +0 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cpython-313-aarch64-linux-gnu.so +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpython-313-aarch64-linux-gnu.so +0 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cpython-313-aarch64-linux-gnu.so +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +543 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +983 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +997 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +155 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsics.py +531 -0
  161. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  162. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  163. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  164. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  165. numba_cuda/numba/cuda/libdevice.py +3386 -0
  166. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  167. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  168. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  169. numba_cuda/numba/cuda/locks.py +19 -0
  170. numba_cuda/numba/cuda/lowering.py +1980 -0
  171. numba_cuda/numba/cuda/mathimpl.py +374 -0
  172. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  173. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  175. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  178. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  179. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  180. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  181. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  182. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  183. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  184. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  185. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  186. numba_cuda/numba/cuda/misc/literal.py +28 -0
  187. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  188. numba_cuda/numba/cuda/misc/special.py +94 -0
  189. numba_cuda/numba/cuda/models.py +56 -0
  190. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  191. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  192. numba_cuda/numba/cuda/np/extensions.py +11 -0
  193. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  194. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  195. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  196. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  197. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  198. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  199. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  200. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  201. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  202. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  203. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  204. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  206. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  207. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  208. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  209. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  210. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  211. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  212. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  213. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  214. numba_cuda/numba/cuda/printimpl.py +126 -0
  215. numba_cuda/numba/cuda/random.py +308 -0
  216. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  217. numba_cuda/numba/cuda/serialize.py +267 -0
  218. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  219. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  220. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  221. numba_cuda/numba/cuda/simulator/api.py +179 -0
  222. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  223. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  224. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  236. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  237. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  238. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  239. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  241. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  242. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  243. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  244. numba_cuda/numba/cuda/simulator_init.py +18 -0
  245. numba_cuda/numba/cuda/stubs.py +624 -0
  246. numba_cuda/numba/cuda/target.py +505 -0
  247. numba_cuda/numba/cuda/testing.py +347 -0
  248. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  249. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  251. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  252. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  253. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  254. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  255. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  285. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  286. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  289. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  290. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  291. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  292. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  293. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  294. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  295. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
  396. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  397. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  399. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  400. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  401. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  402. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  403. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  404. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  406. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  407. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  424. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  425. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  430. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  431. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  433. numba_cuda/numba/cuda/tests/support.py +900 -0
  434. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  435. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  436. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  437. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  438. numba_cuda/numba/cuda/types/__init__.py +233 -0
  439. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  440. numba_cuda/numba/cuda/types/abstract.py +9 -0
  441. numba_cuda/numba/cuda/types/common.py +9 -0
  442. numba_cuda/numba/cuda/types/containers.py +9 -0
  443. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  444. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  445. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  446. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  447. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  448. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  449. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  450. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  451. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  452. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  453. numba_cuda/numba/cuda/types/function_type.py +11 -0
  454. numba_cuda/numba/cuda/types/functions.py +9 -0
  455. numba_cuda/numba/cuda/types/iterators.py +9 -0
  456. numba_cuda/numba/cuda/types/misc.py +9 -0
  457. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  458. numba_cuda/numba/cuda/types/scalars.py +9 -0
  459. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  460. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  461. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  462. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  463. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  464. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  465. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  466. numba_cuda/numba/cuda/typing/collections.py +138 -0
  467. numba_cuda/numba/cuda/typing/context.py +782 -0
  468. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  469. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  470. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  471. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  472. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  473. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  474. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  475. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  476. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  477. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  478. numba_cuda/numba/cuda/ufuncs.py +746 -0
  479. numba_cuda/numba/cuda/utils.py +724 -0
  480. numba_cuda/numba/cuda/vector_types.py +214 -0
  481. numba_cuda/numba/cuda/vectorizers.py +260 -0
  482. numba_cuda-0.22.0.dist-info/METADATA +109 -0
  483. numba_cuda-0.22.0.dist-info/RECORD +487 -0
  484. numba_cuda-0.22.0.dist-info/WHEEL +6 -0
  485. numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
  486. numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
  487. numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,402 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba import cuda
5
+ from numba.cuda import types
6
+ from numba.cuda import HAS_NUMBA
7
+
8
+ if HAS_NUMBA:
9
+ from numba.core.errors import TypingError
10
+ from numba import njit
11
+ import numba
12
+ else:
13
+ from numba.cuda.core.errors import TypingError
14
+ from numba.cuda.extending import overload, overload_attribute
15
+ from numba.cuda.typing.typeof import typeof
16
+ from numba.core.typing.typeof import typeof as cpu_typeof
17
+ from numba.cuda.testing import (
18
+ CUDATestCase,
19
+ skip_on_cudasim,
20
+ unittest,
21
+ skip_on_standalone_numba_cuda,
22
+ )
23
+ import numpy as np
24
+
25
+
26
+ # Dummy function definitions to overload
27
+
28
+
29
+ def generic_func_1():
30
+ pass
31
+
32
+
33
+ def cuda_func_1():
34
+ pass
35
+
36
+
37
+ def generic_func_2():
38
+ pass
39
+
40
+
41
+ def cuda_func_2():
42
+ pass
43
+
44
+
45
+ def generic_calls_generic():
46
+ pass
47
+
48
+
49
+ def generic_calls_cuda():
50
+ pass
51
+
52
+
53
+ def cuda_calls_generic():
54
+ pass
55
+
56
+
57
+ def cuda_calls_cuda():
58
+ pass
59
+
60
+
61
+ def target_overloaded():
62
+ pass
63
+
64
+
65
+ def generic_calls_target_overloaded():
66
+ pass
67
+
68
+
69
+ def cuda_calls_target_overloaded():
70
+ pass
71
+
72
+
73
+ def target_overloaded_calls_target_overloaded():
74
+ pass
75
+
76
+
77
+ def default_values_and_kwargs():
78
+ pass
79
+
80
+
81
+ # To recognise which functions are resolved for a call, we identify each with a
82
+ # prime number. Each function called multiplies a value by its prime (starting
83
+ # with the value 1), and we can check that the result is as expected based on
84
+ # the final value after all multiplications.
85
+
86
+ GENERIC_FUNCTION_1 = 2
87
+ CUDA_FUNCTION_1 = 3
88
+ GENERIC_FUNCTION_2 = 5
89
+ CUDA_FUNCTION_2 = 7
90
+ GENERIC_CALLS_GENERIC = 11
91
+ GENERIC_CALLS_CUDA = 13
92
+ CUDA_CALLS_GENERIC = 17
93
+ CUDA_CALLS_CUDA = 19
94
+ GENERIC_TARGET_OL = 23
95
+ CUDA_TARGET_OL = 29
96
+ GENERIC_CALLS_TARGET_OL = 31
97
+ CUDA_CALLS_TARGET_OL = 37
98
+ GENERIC_TARGET_OL_CALLS_TARGET_OL = 41
99
+ CUDA_TARGET_OL_CALLS_TARGET_OL = 43
100
+
101
+
102
+ # Overload implementations
103
+
104
+
105
+ @overload(generic_func_1, target="generic")
106
+ def ol_generic_func_1(x):
107
+ def impl(x):
108
+ x[0] *= GENERIC_FUNCTION_1
109
+
110
+ return impl
111
+
112
+
113
+ @overload(cuda_func_1, target="cuda")
114
+ def ol_cuda_func_1(x):
115
+ def impl(x):
116
+ x[0] *= CUDA_FUNCTION_1
117
+
118
+ return impl
119
+
120
+
121
+ @overload(generic_func_2, target="generic")
122
+ def ol_generic_func_2(x):
123
+ def impl(x):
124
+ x[0] *= GENERIC_FUNCTION_2
125
+
126
+ return impl
127
+
128
+
129
+ @overload(cuda_func_2, target="cuda")
130
+ def ol_cuda_func(x):
131
+ def impl(x):
132
+ x[0] *= CUDA_FUNCTION_2
133
+
134
+ return impl
135
+
136
+
137
+ @overload(generic_calls_generic, target="generic")
138
+ def ol_generic_calls_generic(x):
139
+ def impl(x):
140
+ x[0] *= GENERIC_CALLS_GENERIC
141
+ generic_func_1(x)
142
+
143
+ return impl
144
+
145
+
146
+ @overload(generic_calls_cuda, target="generic")
147
+ def ol_generic_calls_cuda(x):
148
+ def impl(x):
149
+ x[0] *= GENERIC_CALLS_CUDA
150
+ cuda_func_1(x)
151
+
152
+ return impl
153
+
154
+
155
+ @overload(cuda_calls_generic, target="cuda")
156
+ def ol_cuda_calls_generic(x):
157
+ def impl(x):
158
+ x[0] *= CUDA_CALLS_GENERIC
159
+ generic_func_1(x)
160
+
161
+ return impl
162
+
163
+
164
+ @overload(cuda_calls_cuda, target="cuda")
165
+ def ol_cuda_calls_cuda(x):
166
+ def impl(x):
167
+ x[0] *= CUDA_CALLS_CUDA
168
+ cuda_func_1(x)
169
+
170
+ return impl
171
+
172
+
173
+ @overload(target_overloaded, target="generic")
174
+ def ol_target_overloaded_generic(x):
175
+ def impl(x):
176
+ x[0] *= GENERIC_TARGET_OL
177
+
178
+ return impl
179
+
180
+
181
+ @overload(target_overloaded, target="cuda")
182
+ def ol_target_overloaded_cuda(x):
183
+ def impl(x):
184
+ x[0] *= CUDA_TARGET_OL
185
+
186
+ return impl
187
+
188
+
189
+ @overload(generic_calls_target_overloaded, target="generic")
190
+ def ol_generic_calls_target_overloaded(x):
191
+ def impl(x):
192
+ x[0] *= GENERIC_CALLS_TARGET_OL
193
+ target_overloaded(x)
194
+
195
+ return impl
196
+
197
+
198
+ @overload(cuda_calls_target_overloaded, target="cuda")
199
+ def ol_cuda_calls_target_overloaded(x):
200
+ def impl(x):
201
+ x[0] *= CUDA_CALLS_TARGET_OL
202
+ target_overloaded(x)
203
+
204
+ return impl
205
+
206
+
207
+ @overload(target_overloaded_calls_target_overloaded, target="generic")
208
+ def ol_generic_calls_target_overloaded_generic(x):
209
+ def impl(x):
210
+ x[0] *= GENERIC_TARGET_OL_CALLS_TARGET_OL
211
+ target_overloaded(x)
212
+
213
+ return impl
214
+
215
+
216
+ @overload(target_overloaded_calls_target_overloaded, target="cuda")
217
+ def ol_generic_calls_target_overloaded_cuda(x):
218
+ def impl(x):
219
+ x[0] *= CUDA_TARGET_OL_CALLS_TARGET_OL
220
+ target_overloaded(x)
221
+
222
+ return impl
223
+
224
+
225
+ @overload(default_values_and_kwargs)
226
+ def ol_default_values_and_kwargs(out, x, y=5, z=6):
227
+ def impl(out, x, y=5, z=6):
228
+ out[0], out[1] = x + y, z
229
+
230
+ return impl
231
+
232
+
233
+ @skip_on_cudasim("Overloading not supported in cudasim")
234
+ class TestOverload(CUDATestCase):
235
+ def check_overload(self, kernel, expected):
236
+ x = np.ones(1, dtype=np.int32)
237
+ cuda.jit(kernel)[1, 1](x)
238
+ self.assertEqual(x[0], expected)
239
+
240
+ @skip_on_standalone_numba_cuda
241
+ def check_overload_cpu(self, kernel, expected):
242
+ x = np.ones(1, dtype=np.int32)
243
+ njit(kernel)(x)
244
+ self.assertEqual(x[0], expected)
245
+
246
+ def test_generic(self):
247
+ def kernel(x):
248
+ generic_func_1(x)
249
+
250
+ expected = GENERIC_FUNCTION_1
251
+ self.check_overload(kernel, expected)
252
+
253
+ def test_cuda(self):
254
+ def kernel(x):
255
+ cuda_func_1(x)
256
+
257
+ expected = CUDA_FUNCTION_1
258
+ self.check_overload(kernel, expected)
259
+
260
+ def test_generic_and_cuda(self):
261
+ def kernel(x):
262
+ generic_func_1(x)
263
+ cuda_func_1(x)
264
+
265
+ expected = GENERIC_FUNCTION_1 * CUDA_FUNCTION_1
266
+ self.check_overload(kernel, expected)
267
+
268
+ def test_call_two_generic_calls(self):
269
+ def kernel(x):
270
+ generic_func_1(x)
271
+ generic_func_2(x)
272
+
273
+ expected = GENERIC_FUNCTION_1 * GENERIC_FUNCTION_2
274
+ self.check_overload(kernel, expected)
275
+
276
+ def test_call_two_cuda_calls(self):
277
+ def kernel(x):
278
+ cuda_func_1(x)
279
+ cuda_func_2(x)
280
+
281
+ expected = CUDA_FUNCTION_1 * CUDA_FUNCTION_2
282
+ self.check_overload(kernel, expected)
283
+
284
+ def test_generic_calls_generic(self):
285
+ def kernel(x):
286
+ generic_calls_generic(x)
287
+
288
+ expected = GENERIC_CALLS_GENERIC * GENERIC_FUNCTION_1
289
+ self.check_overload(kernel, expected)
290
+
291
+ def test_generic_calls_cuda(self):
292
+ def kernel(x):
293
+ generic_calls_cuda(x)
294
+
295
+ expected = GENERIC_CALLS_CUDA * CUDA_FUNCTION_1
296
+ self.check_overload(kernel, expected)
297
+
298
+ def test_cuda_calls_generic(self):
299
+ def kernel(x):
300
+ cuda_calls_generic(x)
301
+
302
+ expected = CUDA_CALLS_GENERIC * GENERIC_FUNCTION_1
303
+ self.check_overload(kernel, expected)
304
+
305
+ def test_cuda_calls_cuda(self):
306
+ def kernel(x):
307
+ cuda_calls_cuda(x)
308
+
309
+ expected = CUDA_CALLS_CUDA * CUDA_FUNCTION_1
310
+ self.check_overload(kernel, expected)
311
+
312
+ def test_call_target_overloaded(self):
313
+ def kernel(x):
314
+ target_overloaded(x)
315
+
316
+ expected = CUDA_TARGET_OL
317
+ self.check_overload(kernel, expected)
318
+
319
+ def test_generic_calls_target_overloaded(self):
320
+ def kernel(x):
321
+ generic_calls_target_overloaded(x)
322
+
323
+ expected = GENERIC_CALLS_TARGET_OL * CUDA_TARGET_OL
324
+ self.check_overload(kernel, expected)
325
+
326
+ def test_cuda_calls_target_overloaded(self):
327
+ def kernel(x):
328
+ cuda_calls_target_overloaded(x)
329
+
330
+ expected = CUDA_CALLS_TARGET_OL * CUDA_TARGET_OL
331
+ self.check_overload(kernel, expected)
332
+
333
+ def test_target_overloaded_calls_target_overloaded(self):
334
+ def kernel(x):
335
+ target_overloaded_calls_target_overloaded(x)
336
+
337
+ # Check the CUDA overloads are used on CUDA
338
+ expected = CUDA_TARGET_OL_CALLS_TARGET_OL * CUDA_TARGET_OL
339
+ self.check_overload(kernel, expected)
340
+
341
+ @skip_on_standalone_numba_cuda
342
+ def test_target_overloaded_calls_target_overloaded_cpu(self):
343
+ def kernel(x):
344
+ target_overloaded_calls_target_overloaded(x)
345
+
346
+ # Check that the CPU overloads are used on the CPU
347
+ expected = GENERIC_TARGET_OL_CALLS_TARGET_OL * GENERIC_TARGET_OL
348
+ self.check_overload_cpu(kernel, expected)
349
+
350
+ @skip_on_standalone_numba_cuda
351
+ def test_overload_attribute_target(self):
352
+ MyDummy, MyDummyType = self.make_dummy_type()
353
+ mydummy_type_cpu = cpu_typeof(MyDummy()) # For @njit (cpu)
354
+ mydummy_type = typeof(MyDummy()) # For @cuda.jit (CUDA)
355
+
356
+ @overload_attribute(MyDummyType, "cuda_only", target="cuda")
357
+ def ov_dummy_cuda_attr(obj):
358
+ def imp(obj):
359
+ return 42
360
+
361
+ return imp
362
+
363
+ # Ensure that we cannot use the CUDA target-specific attribute on the
364
+ # CPU, and that an appropriate typing error is raised
365
+
366
+ # A different error is produced prior to version 0.60
367
+ # (the fixes in #9454 improved the message)
368
+ # https://github.com/numba/numba/pull/9454
369
+ if HAS_NUMBA and numba.version_info[:2] < (0, 60):
370
+ msg = 'resolving type of attribute "cuda_only" of "x"'
371
+ else:
372
+ msg = "Unknown attribute 'cuda_only'"
373
+
374
+ with self.assertRaisesRegex(TypingError, msg):
375
+
376
+ @njit(types.int64(mydummy_type_cpu))
377
+ def illegal_target_attr_use(x):
378
+ return x.cuda_only
379
+
380
+ # Ensure that the CUDA target-specific attribute is usable and works
381
+ # correctly when the target is CUDA - note eager compilation via
382
+ # signature
383
+ @cuda.jit(types.void(types.int64[::1], mydummy_type))
384
+ def cuda_target_attr_use(res, dummy):
385
+ res[0] = dummy.cuda_only
386
+
387
+ def test_default_values_and_kwargs(self):
388
+ """
389
+ Test default values and kwargs.
390
+ """
391
+
392
+ @cuda.jit()
393
+ def kernel(a, b, out):
394
+ default_values_and_kwargs(out, a, z=b)
395
+
396
+ out = np.empty(2, dtype=np.int64)
397
+ kernel[1, 1](1, 2, out)
398
+ self.assertEqual(tuple(out), (6, 2))
399
+
400
+
401
+ if __name__ == "__main__":
402
+ unittest.main()
@@ -0,0 +1,128 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import math
5
+ import numpy as np
6
+ from numba import cuda
7
+ from numba.cuda import float64, int8, int32, void
8
+ from numba.cuda.testing import unittest, CUDATestCase
9
+
10
+
11
+ def cu_mat_power(A, power, power_A):
12
+ y, x = cuda.grid(2)
13
+
14
+ m, n = power_A.shape
15
+ if x >= n or y >= m:
16
+ return
17
+
18
+ power_A[y, x] = math.pow(A[y, x], int32(power))
19
+
20
+
21
+ def cu_mat_power_binop(A, power, power_A):
22
+ y, x = cuda.grid(2)
23
+
24
+ m, n = power_A.shape
25
+ if x >= n or y >= m:
26
+ return
27
+
28
+ power_A[y, x] = A[y, x] ** power
29
+
30
+
31
+ def vec_pow(r, x, y):
32
+ i = cuda.grid(1)
33
+
34
+ if i < len(r):
35
+ r[i] = pow(x[i], y[i])
36
+
37
+
38
+ def vec_pow_binop(r, x, y):
39
+ i = cuda.grid(1)
40
+
41
+ if i < len(r):
42
+ r[i] = x[i] ** y[i]
43
+
44
+
45
+ def vec_pow_inplace_binop(r, x):
46
+ i = cuda.grid(1)
47
+
48
+ if i < len(r):
49
+ r[i] **= x[i]
50
+
51
+
52
+ def random_complex(N):
53
+ np.random.seed(123)
54
+ return np.random.random(1) + np.random.random(1) * 1j
55
+
56
+
57
+ class TestCudaPowi(CUDATestCase):
58
+ def test_powi(self):
59
+ dec = cuda.jit(void(float64[:, :], int8, float64[:, :]))
60
+ kernel = dec(cu_mat_power)
61
+
62
+ power = 2
63
+ A = np.arange(10, dtype=np.float64).reshape(2, 5)
64
+ Aout = np.empty_like(A)
65
+ kernel[1, A.shape](A, power, Aout)
66
+ self.assertTrue(np.allclose(Aout, A**power))
67
+
68
+ def test_powi_binop(self):
69
+ dec = cuda.jit(void(float64[:, :], int8, float64[:, :]))
70
+ kernel = dec(cu_mat_power_binop)
71
+
72
+ power = 2
73
+ A = np.arange(10, dtype=np.float64).reshape(2, 5)
74
+ Aout = np.empty_like(A)
75
+ kernel[1, A.shape](A, power, Aout)
76
+ self.assertTrue(np.allclose(Aout, A**power))
77
+
78
+ # Relative tolerance kwarg is provided because 1.0e-7 (the default for
79
+ # assert_allclose) is a bit tight for single precision.
80
+ def _test_cpow(self, dtype, func, rtol=1.0e-7):
81
+ N = 32
82
+ x = random_complex(N).astype(dtype)
83
+ y = random_complex(N).astype(dtype)
84
+ r = np.zeros_like(x)
85
+
86
+ cfunc = cuda.jit(func)
87
+ cfunc[1, N](r, x, y)
88
+ np.testing.assert_allclose(r, x**y, rtol=rtol)
89
+
90
+ # Checks special cases
91
+ x = np.asarray([0.0j, 1.0j], dtype=dtype)
92
+ y = np.asarray([0.0j, 1.0], dtype=dtype)
93
+ r = np.zeros_like(x)
94
+
95
+ cfunc[1, 2](r, x, y)
96
+ np.testing.assert_allclose(r, x**y, rtol=rtol)
97
+
98
+ def test_cpow_complex64_pow(self):
99
+ self._test_cpow(np.complex64, vec_pow, rtol=3.0e-7)
100
+
101
+ def test_cpow_complex64_binop(self):
102
+ self._test_cpow(np.complex64, vec_pow_binop, rtol=3.0e-7)
103
+
104
+ def test_cpow_complex128_pow(self):
105
+ self._test_cpow(np.complex128, vec_pow)
106
+
107
+ def test_cpow_complex128_binop(self):
108
+ self._test_cpow(np.complex128, vec_pow_binop)
109
+
110
+ def _test_cpow_inplace_binop(self, dtype, rtol=1.0e-7):
111
+ N = 32
112
+ x = random_complex(N).astype(dtype)
113
+ y = random_complex(N).astype(dtype)
114
+ r = x**y
115
+
116
+ cfunc = cuda.jit(vec_pow_inplace_binop)
117
+ cfunc[1, N](x, y)
118
+ np.testing.assert_allclose(x, r, rtol=rtol)
119
+
120
+ def test_cpow_complex64_inplace_binop(self):
121
+ self._test_cpow_inplace_binop(np.complex64, rtol=3.0e-7)
122
+
123
+ def test_cpow_complex128_inplace_binop(self):
124
+ self._test_cpow_inplace_binop(np.complex128, rtol=3.0e-7)
125
+
126
+
127
+ if __name__ == "__main__":
128
+ unittest.main()
@@ -0,0 +1,193 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba.cuda.testing import CUDATestCase, skip_on_cudasim
5
+ import numpy as np
6
+ import subprocess
7
+ import sys
8
+ import unittest
9
+
10
+
11
+ cuhello_usecase = """\
12
+ from numba import cuda
13
+
14
+ @cuda.jit
15
+ def cuhello():
16
+ i = cuda.grid(1)
17
+ print(i, 999)
18
+ print(-42)
19
+
20
+ cuhello[2, 3]()
21
+ cuda.synchronize()
22
+ """
23
+
24
+
25
+ printfloat_usecase = """\
26
+ from numba import cuda
27
+
28
+ @cuda.jit
29
+ def printfloat():
30
+ i = cuda.grid(1)
31
+ print(i, 23, 34.75, 321)
32
+
33
+ printfloat[1, 1]()
34
+ cuda.synchronize()
35
+ """
36
+
37
+
38
+ printbool_usecase = """\
39
+ from numba import cuda
40
+
41
+ @cuda.jit
42
+ def printbool(x):
43
+ print(True)
44
+ print(False)
45
+ print(x == 0)
46
+
47
+ printbool[1, 1](0)
48
+ printbool[1, 1](1)
49
+ cuda.synchronize()
50
+ """
51
+
52
+
53
+ printstring_usecase = """\
54
+ from numba import cuda
55
+
56
+ @cuda.jit
57
+ def printstring():
58
+ i = cuda.grid(1)
59
+ print(i, "hop!", 999)
60
+
61
+ printstring[1, 3]()
62
+ cuda.synchronize()
63
+ """
64
+
65
+
66
+ printdim3_usecase = """\
67
+ from numba import cuda
68
+
69
+ @cuda.jit
70
+ def printdim3():
71
+ print(cuda.threadIdx)
72
+
73
+ printdim3[1, (2, 2, 2)]()
74
+ cuda.synchronize()
75
+ """
76
+
77
+
78
+ printempty_usecase = """\
79
+ from numba import cuda
80
+
81
+ @cuda.jit
82
+ def printempty():
83
+ print()
84
+
85
+ printempty[1, 1]()
86
+ cuda.synchronize()
87
+ """
88
+
89
+
90
+ print_too_many_usecase = """\
91
+ from numba import cuda
92
+ import numpy as np
93
+
94
+ @cuda.jit
95
+ def print_too_many(r):
96
+ print(r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8], r[9], r[10],
97
+ r[11], r[12], r[13], r[14], r[15], r[16], r[17], r[18], r[19], r[20],
98
+ r[21], r[22], r[23], r[24], r[25], r[26], r[27], r[28], r[29], r[30],
99
+ r[31], r[32])
100
+
101
+ print_too_many[1, 1](np.arange(33))
102
+ cuda.synchronize()
103
+ """
104
+
105
+ print_bfloat16_usecase = """\
106
+ from numba import cuda
107
+ from numba.cuda import config
108
+
109
+ @cuda.jit
110
+ def print_bfloat16():
111
+ # 0.9375 is a dyadic rational, it's integer significand can expand within 7 digits.
112
+ # printing this should not give any rounding error.
113
+ a = cuda.bfloat16(0.9375)
114
+ print(a, a, a)
115
+
116
+ print_bfloat16[1, 1]()
117
+ cuda.synchronize()
118
+ """
119
+
120
+
121
+ class TestPrint(CUDATestCase):
122
+ # Note that in these tests we generally strip the output to avoid dealing
123
+ # with platform-specific line ending issues, e.g. '\r\n' vs '\n' etc.
124
+
125
+ def run_code(self, code):
126
+ """Runs code in a subprocess and returns the captured output"""
127
+ cmd = [sys.executable, "-c", code]
128
+ cp = subprocess.run(cmd, timeout=60, capture_output=True, check=True)
129
+ return cp.stdout.decode(), cp.stderr.decode()
130
+
131
+ def test_cuhello(self):
132
+ output, _ = self.run_code(cuhello_usecase)
133
+ actual = [line.strip() for line in output.splitlines()]
134
+ expected = ["-42"] * 6 + ["%d 999" % i for i in range(6)]
135
+ # The output of GPU threads is intermingled, but each print()
136
+ # call is still atomic
137
+ self.assertEqual(sorted(actual), expected)
138
+
139
+ def test_printfloat(self):
140
+ output, _ = self.run_code(printfloat_usecase)
141
+ # CUDA and the simulator use different formats for float formatting
142
+ expected_cases = ["0 23 34.750000 321", "0 23 34.75 321"]
143
+ self.assertIn(output.strip(), expected_cases)
144
+
145
+ def test_bool(self):
146
+ output, _ = self.run_code(printbool_usecase)
147
+ expected = "True\r?\nFalse\r?\nTrue\r?\nTrue\r?\nFalse\r?\nFalse"
148
+ self.assertRegex(output.strip(), expected)
149
+
150
+ def test_printempty(self):
151
+ output, _ = self.run_code(printempty_usecase)
152
+ self.assertEqual(output.strip(), "")
153
+
154
+ def test_string(self):
155
+ output, _ = self.run_code(printstring_usecase)
156
+ lines = [line.strip() for line in output.splitlines(True)]
157
+ expected = ["%d hop! 999" % i for i in range(3)]
158
+ self.assertEqual(sorted(lines), expected)
159
+
160
+ def test_dim3(self):
161
+ output, _ = self.run_code(printdim3_usecase)
162
+ lines = [line.strip() for line in output.splitlines(True)]
163
+ expected = [str(i) for i in np.ndindex(2, 2, 2)]
164
+ self.assertEqual(sorted(lines), expected)
165
+
166
+ @skip_on_cudasim("bfloat16 on host is not yet supported.")
167
+ def test_bfloat16(self):
168
+ output, _ = self.run_code(print_bfloat16_usecase)
169
+ self.assertEqual(output.strip(), "0.937500 0.937500 0.937500")
170
+
171
+ @skip_on_cudasim("cudasim can print unlimited output")
172
+ def test_too_many_args(self):
173
+ # Tests that we emit the format string and warn when there are more
174
+ # than 32 arguments, in common with CUDA C/C++ printf - this is due to
175
+ # a limitation in CUDA vprintf, see:
176
+ # https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#limitations
177
+
178
+ output, errors = self.run_code(print_too_many_usecase)
179
+
180
+ # Check that the format string was printed instead of formatted garbage
181
+ expected_fmt_string = " ".join(["%lld" for _ in range(33)])
182
+ self.assertIn(expected_fmt_string, output)
183
+
184
+ # Check for the expected warning about formatting more than 32 items
185
+ warn_msg = (
186
+ "CUDA print() cannot print more than 32 items. The raw "
187
+ "format string will be emitted by the kernel instead."
188
+ )
189
+ self.assertIn(warn_msg, errors)
190
+
191
+
192
+ if __name__ == "__main__":
193
+ unittest.main()