numba-cuda 0.22.0__cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (487) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +580 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpython-312-aarch64-linux-gnu.so +0 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpython-312-aarch64-linux-gnu.so +0 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cpython-312-aarch64-linux-gnu.so +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpython-312-aarch64-linux-gnu.so +0 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cpython-312-aarch64-linux-gnu.so +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +543 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +983 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +997 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +155 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsics.py +531 -0
  161. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  162. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  163. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  164. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  165. numba_cuda/numba/cuda/libdevice.py +3386 -0
  166. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  167. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  168. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  169. numba_cuda/numba/cuda/locks.py +19 -0
  170. numba_cuda/numba/cuda/lowering.py +1980 -0
  171. numba_cuda/numba/cuda/mathimpl.py +374 -0
  172. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  173. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  175. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  178. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  179. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  180. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  181. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  182. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  183. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  184. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  185. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  186. numba_cuda/numba/cuda/misc/literal.py +28 -0
  187. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  188. numba_cuda/numba/cuda/misc/special.py +94 -0
  189. numba_cuda/numba/cuda/models.py +56 -0
  190. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  191. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  192. numba_cuda/numba/cuda/np/extensions.py +11 -0
  193. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  194. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  195. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  196. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  197. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  198. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  199. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  200. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  201. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  202. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  203. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  204. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  206. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  207. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  208. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  209. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  210. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  211. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  212. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  213. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  214. numba_cuda/numba/cuda/printimpl.py +126 -0
  215. numba_cuda/numba/cuda/random.py +308 -0
  216. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  217. numba_cuda/numba/cuda/serialize.py +267 -0
  218. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  219. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  220. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  221. numba_cuda/numba/cuda/simulator/api.py +179 -0
  222. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  223. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  224. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  236. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  237. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  238. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  239. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  241. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  242. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  243. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  244. numba_cuda/numba/cuda/simulator_init.py +18 -0
  245. numba_cuda/numba/cuda/stubs.py +624 -0
  246. numba_cuda/numba/cuda/target.py +505 -0
  247. numba_cuda/numba/cuda/testing.py +347 -0
  248. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  249. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  251. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  252. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  253. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  254. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  255. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  285. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  286. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  289. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  290. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  291. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  292. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  293. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  294. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  295. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
  396. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  397. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  399. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  400. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  401. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  402. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  403. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  404. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  406. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  407. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  424. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  425. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  430. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  431. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  433. numba_cuda/numba/cuda/tests/support.py +900 -0
  434. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  435. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  436. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  437. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  438. numba_cuda/numba/cuda/types/__init__.py +233 -0
  439. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  440. numba_cuda/numba/cuda/types/abstract.py +9 -0
  441. numba_cuda/numba/cuda/types/common.py +9 -0
  442. numba_cuda/numba/cuda/types/containers.py +9 -0
  443. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  444. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  445. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  446. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  447. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  448. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  449. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  450. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  451. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  452. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  453. numba_cuda/numba/cuda/types/function_type.py +11 -0
  454. numba_cuda/numba/cuda/types/functions.py +9 -0
  455. numba_cuda/numba/cuda/types/iterators.py +9 -0
  456. numba_cuda/numba/cuda/types/misc.py +9 -0
  457. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  458. numba_cuda/numba/cuda/types/scalars.py +9 -0
  459. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  460. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  461. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  462. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  463. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  464. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  465. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  466. numba_cuda/numba/cuda/typing/collections.py +138 -0
  467. numba_cuda/numba/cuda/typing/context.py +782 -0
  468. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  469. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  470. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  471. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  472. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  473. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  474. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  475. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  476. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  477. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  478. numba_cuda/numba/cuda/ufuncs.py +746 -0
  479. numba_cuda/numba/cuda/utils.py +724 -0
  480. numba_cuda/numba/cuda/vector_types.py +214 -0
  481. numba_cuda/numba/cuda/vectorizers.py +260 -0
  482. numba_cuda-0.22.0.dist-info/METADATA +109 -0
  483. numba_cuda-0.22.0.dist-info/RECORD +487 -0
  484. numba_cuda-0.22.0.dist-info/WHEEL +6 -0
  485. numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
  486. numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
  487. numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,312 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import numpy as np
5
+
6
+ from collections import namedtuple
7
+ from functools import partial
8
+ from itertools import product
9
+ from numba.cuda import vectorize as cuda_vectorize
10
+ from numba import cuda, vectorize as numba_vectorize
11
+ from numba.cuda.types import int32, float32, float64
12
+ from numba.cuda.cudadrv.driver import CudaAPIError, driver
13
+ from numba.cuda.testing import skip_on_cudasim
14
+ from numba.cuda.testing import CUDATestCase
15
+ import unittest
16
+
17
+
18
+ # Signatures to test with - these are all homogeneous in dtype, so the output
19
+ # dtype should match the input dtype - the output should not have been cast
20
+ # upwards, as reported in #8400: https://github.com/numba/numba/issues/8400
21
+ signatures = [
22
+ int32(int32, int32),
23
+ float32(float32, float32),
24
+ float64(float64, float64),
25
+ ]
26
+
27
+ # The order here is chosen such that each subsequent dtype might have been
28
+ # casted to a previously-used dtype. This is unlikely to be an issue for CUDA,
29
+ # but there might be future circumstances in which it becomes relevant, perhaps
30
+ # if it supported Dynamic UFuncs, and we want to ensure that an implementation
31
+ # for a the given dtype is used rather than casting the input upwards.
32
+ dtypes = (np.float64, np.float32, np.int32)
33
+
34
+ # NumPy ndarray orders
35
+ orders = ("C", "F")
36
+
37
+ # Input sizes corresponding to operations:
38
+ # - Less than one warp,
39
+ # - Less than one block,
40
+ # - Greater than one block (i.e. many blocks)
41
+ input_sizes = (8, 100, 2**10 + 1)
42
+
43
+ # Vectorize functions to test
44
+ # cuda.vectorize doesn't need target parameter, numba.vectorize needs target="cuda"
45
+ vectorize_funcs = [cuda_vectorize, partial(numba_vectorize, target="cuda")]
46
+
47
+
48
+ @skip_on_cudasim("ufunc API unsupported in the simulator")
49
+ class TestCUDAVectorize(CUDATestCase):
50
+ # Presumably chosen as an odd number unlikely to coincide with the total
51
+ # thread count, and large enough to ensure a significant number of blocks
52
+ # are used.
53
+ N = 1000001
54
+
55
+ def test_scalar(self):
56
+ for vectorize in vectorize_funcs:
57
+
58
+ @vectorize(signatures)
59
+ def vector_add(a, b):
60
+ return a + b
61
+
62
+ a = 1.2
63
+ b = 2.3
64
+ c = vector_add(a, b)
65
+ self.assertEqual(c, a + b)
66
+
67
+ def test_1d(self):
68
+ for vectorize in vectorize_funcs:
69
+
70
+ @vectorize(signatures)
71
+ def vector_add(a, b):
72
+ return a + b
73
+
74
+ for ty in dtypes:
75
+ data = np.array(np.random.random(self.N), dtype=ty)
76
+ expected = np.add(data, data)
77
+ actual = vector_add(data, data)
78
+ np.testing.assert_allclose(expected, actual)
79
+ self.assertEqual(actual.dtype, ty)
80
+
81
+ def test_1d_async(self):
82
+ for vectorize in vectorize_funcs:
83
+
84
+ @vectorize(signatures)
85
+ def vector_add(a, b):
86
+ return a + b
87
+
88
+ stream = cuda.stream()
89
+
90
+ for ty in dtypes:
91
+ data = np.array(np.random.random(self.N), dtype=ty)
92
+ device_data = cuda.to_device(data, stream)
93
+
94
+ dresult = vector_add(device_data, device_data, stream=stream)
95
+ actual = dresult.copy_to_host()
96
+
97
+ expected = np.add(data, data)
98
+
99
+ np.testing.assert_allclose(expected, actual)
100
+ self.assertEqual(actual.dtype, ty)
101
+
102
+ def test_nd(self):
103
+ for vectorize in vectorize_funcs:
104
+
105
+ @vectorize(signatures)
106
+ def vector_add(a, b):
107
+ return a + b
108
+
109
+ for nd, dtype, order in product(range(1, 8), dtypes, orders):
110
+ shape = (4,) * nd
111
+ data = np.random.random(shape).astype(dtype)
112
+ data2 = np.array(data.T, order=order)
113
+
114
+ expected = data + data2
115
+ actual = vector_add(data, data2)
116
+ np.testing.assert_allclose(expected, actual)
117
+ self.assertEqual(actual.dtype, dtype)
118
+
119
+ def test_output_arg(self):
120
+ for vectorize in vectorize_funcs:
121
+
122
+ @vectorize(signatures)
123
+ def vector_add(a, b):
124
+ return a + b
125
+
126
+ A = np.arange(10, dtype=np.float32)
127
+ B = np.arange(10, dtype=np.float32)
128
+
129
+ expected = A + B
130
+ actual = np.empty_like(A)
131
+ vector_add(A, B, out=actual)
132
+
133
+ np.testing.assert_allclose(expected, actual)
134
+ self.assertEqual(expected.dtype, actual.dtype)
135
+
136
+ def test_reduce(self):
137
+ for vectorize in vectorize_funcs:
138
+
139
+ @vectorize(signatures)
140
+ def vector_add(a, b):
141
+ return a + b
142
+
143
+ dtype = np.int32
144
+
145
+ for n in input_sizes:
146
+ x = np.arange(n, dtype=dtype)
147
+ expected = np.add.reduce(x)
148
+ actual = vector_add.reduce(x)
149
+ np.testing.assert_allclose(expected, actual)
150
+ # np.add.reduce is special-cased to return an int64 for any int
151
+ # arguments, so we can't compare against its returned dtype when
152
+ # we're checking the general reduce machinery (which just happens
153
+ # to be using addition). Instead, compare against the input dtype.
154
+ self.assertEqual(dtype, actual.dtype)
155
+
156
+ def test_reduce_async(self):
157
+ for vectorize in vectorize_funcs:
158
+
159
+ @vectorize(signatures)
160
+ def vector_add(a, b):
161
+ return a + b
162
+
163
+ stream = cuda.stream()
164
+ dtype = np.int32
165
+
166
+ for n in input_sizes:
167
+ x = np.arange(n, dtype=dtype)
168
+ expected = np.add.reduce(x)
169
+ dx = cuda.to_device(x, stream)
170
+ actual = vector_add.reduce(dx, stream=stream)
171
+ np.testing.assert_allclose(expected, actual)
172
+ # Compare against the input dtype as in test_reduce().
173
+ self.assertEqual(dtype, actual.dtype)
174
+
175
+ def test_manual_transfer(self):
176
+ for vectorize in vectorize_funcs:
177
+
178
+ @vectorize(signatures)
179
+ def vector_add(a, b):
180
+ return a + b
181
+
182
+ n = 10
183
+ x = np.arange(n, dtype=np.int32)
184
+ dx = cuda.to_device(x)
185
+ expected = x + x
186
+ actual = vector_add(x, dx).copy_to_host()
187
+ np.testing.assert_equal(expected, actual)
188
+ self.assertEqual(expected.dtype, actual.dtype)
189
+
190
+ def test_ufunc_output_2d(self):
191
+ for vectorize in vectorize_funcs:
192
+
193
+ @vectorize(signatures)
194
+ def vector_add(a, b):
195
+ return a + b
196
+
197
+ n = 10
198
+ x = np.arange(n, dtype=np.int32).reshape(2, 5)
199
+ dx = cuda.to_device(x)
200
+ vector_add(dx, dx, out=dx)
201
+
202
+ expected = x + x
203
+ actual = dx.copy_to_host()
204
+ np.testing.assert_equal(expected, actual)
205
+ self.assertEqual(expected.dtype, actual.dtype)
206
+
207
+ def check_tuple_arg(self, a, b):
208
+ for vectorize in vectorize_funcs:
209
+
210
+ @vectorize(signatures)
211
+ def vector_add(a, b):
212
+ return a + b
213
+
214
+ r = vector_add(a, b)
215
+ np.testing.assert_equal(np.asarray(a) + np.asarray(b), r)
216
+
217
+ def test_tuple_arg(self):
218
+ a = (1.0, 2.0, 3.0)
219
+ b = (4.0, 5.0, 6.0)
220
+ self.check_tuple_arg(a, b)
221
+
222
+ def test_namedtuple_arg(self):
223
+ Point = namedtuple("Point", ("x", "y", "z"))
224
+ a = Point(x=1.0, y=2.0, z=3.0)
225
+ b = Point(x=4.0, y=5.0, z=6.0)
226
+ self.check_tuple_arg(a, b)
227
+
228
+ def test_tuple_of_array_arg(self):
229
+ arr = np.arange(10, dtype=np.int32)
230
+ a = (arr, arr + 1)
231
+ b = (arr + 2, arr + 2)
232
+ self.check_tuple_arg(a, b)
233
+
234
+ def test_tuple_of_namedtuple_arg(self):
235
+ Point = namedtuple("Point", ("x", "y", "z"))
236
+ a = (Point(x=1.0, y=2.0, z=3.0), Point(x=1.5, y=2.5, z=3.5))
237
+ b = (Point(x=4.0, y=5.0, z=6.0), Point(x=4.5, y=5.5, z=6.5))
238
+ self.check_tuple_arg(a, b)
239
+
240
+ def test_namedtuple_of_array_arg(self):
241
+ xs1 = np.arange(10, dtype=np.int32)
242
+ ys1 = xs1 + 2
243
+ xs2 = np.arange(10, dtype=np.int32) * 2
244
+ ys2 = xs2 + 1
245
+ Points = namedtuple("Points", ("xs", "ys"))
246
+ a = Points(xs=xs1, ys=ys1)
247
+ b = Points(xs=xs2, ys=ys2)
248
+ self.check_tuple_arg(a, b)
249
+
250
+ def test_name_attribute(self):
251
+ for vectorize in vectorize_funcs:
252
+
253
+ @vectorize("f8(f8)")
254
+ def bar(x):
255
+ return x**2
256
+
257
+ self.assertEqual(bar.__name__, "bar")
258
+
259
+ def test_no_transfer_for_device_data(self):
260
+ for vectorize in vectorize_funcs:
261
+ # Initialize test data on the device prior to banning host <-> device
262
+ # transfer
263
+
264
+ noise = np.random.randn(1, 3, 64, 64).astype(np.float32)
265
+ noise = cuda.to_device(noise)
266
+
267
+ # A mock of a CUDA function that always raises a CudaAPIError
268
+
269
+ def raising_transfer(*args, **kwargs):
270
+ raise CudaAPIError(999, "Transfer not allowed")
271
+
272
+ # Use the mock for transfers between the host and device
273
+
274
+ old_HtoD = getattr(driver, "cuMemcpyHtoD", None)
275
+ old_DtoH = getattr(driver, "cuMemcpyDtoH", None)
276
+
277
+ setattr(driver, "cuMemcpyHtoD", raising_transfer)
278
+ setattr(driver, "cuMemcpyDtoH", raising_transfer)
279
+
280
+ # Ensure that the mock functions are working as expected
281
+
282
+ with self.assertRaisesRegex(CudaAPIError, "Transfer not allowed"):
283
+ noise.copy_to_host()
284
+
285
+ with self.assertRaisesRegex(CudaAPIError, "Transfer not allowed"):
286
+ cuda.to_device([1])
287
+
288
+ try:
289
+ # Check that defining and calling a ufunc with data on the device
290
+ # induces no transfers
291
+
292
+ @vectorize(["float32(float32)"])
293
+ def func(noise):
294
+ return noise + 1.0
295
+
296
+ func(noise)
297
+ finally:
298
+ # Replace our mocks with the original implementations. If there was
299
+ # no original implementation, simply remove ours.
300
+
301
+ if old_HtoD is not None:
302
+ setattr(driver, "cuMemcpyHtoD", old_HtoD)
303
+ else:
304
+ del driver.cuMemcpyHtoD
305
+ if old_DtoH is not None:
306
+ setattr(driver, "cuMemcpyDtoH", old_DtoH)
307
+ else:
308
+ del driver.cuMemcpyDtoH
309
+
310
+
311
+ if __name__ == "__main__":
312
+ unittest.main()
@@ -0,0 +1,23 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import numpy as np
5
+ from numba.cuda import vectorize
6
+ from numba.cuda.testing import skip_on_cudasim, CUDATestCase
7
+ import unittest
8
+
9
+
10
+ @skip_on_cudasim("ufunc API unsupported in the simulator")
11
+ class TestVectorizeComplex(CUDATestCase):
12
+ def test_vectorize_complex(self):
13
+ @vectorize(["complex128(complex128)"], target="cuda")
14
+ def vcomp(a):
15
+ return a * a + 1.0
16
+
17
+ A = np.arange(5, dtype=np.complex128)
18
+ B = vcomp(A)
19
+ self.assertTrue(np.allclose(A * A + 1.0, B))
20
+
21
+
22
+ if __name__ == "__main__":
23
+ unittest.main()
@@ -0,0 +1,183 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import numpy as np
5
+ import math
6
+
7
+ from numba import cuda
8
+ from numba.cuda import vectorize, int32, uint32, float32, float64
9
+ from numba.cuda.testing import skip_on_cudasim, CUDATestCase
10
+ from numba.cuda.tests.support import CheckWarningsMixin
11
+
12
+ import unittest
13
+
14
+
15
+ pi = math.pi
16
+
17
+
18
+ def sinc(x):
19
+ if x == 0.0:
20
+ return 1.0
21
+ else:
22
+ return math.sin(x * pi) / (pi * x)
23
+
24
+
25
+ def scaled_sinc(x, scale):
26
+ if x == 0.0:
27
+ return scale
28
+ else:
29
+ return scale * (math.sin(x * pi) / (pi * x))
30
+
31
+
32
+ def vector_add(a, b):
33
+ return a + b
34
+
35
+
36
+ class BaseVectorizeDecor(object):
37
+ target = None
38
+ wrapper = None
39
+ funcs = {
40
+ "func1": sinc,
41
+ "func2": scaled_sinc,
42
+ "func3": vector_add,
43
+ }
44
+
45
+ @classmethod
46
+ def _run_and_compare(cls, func, sig, A, *args, **kwargs):
47
+ if cls.wrapper is not None:
48
+ func = cls.wrapper(func)
49
+ numba_func = vectorize(sig, target=cls.target)(func)
50
+ numpy_func = np.vectorize(func)
51
+ result = numba_func(A, *args)
52
+ gold = numpy_func(A, *args)
53
+ np.testing.assert_allclose(result, gold, **kwargs)
54
+
55
+ def test_1(self):
56
+ sig = ["float64(float64)", "float32(float32)"]
57
+ func = self.funcs["func1"]
58
+ A = np.arange(100, dtype=np.float64)
59
+ self._run_and_compare(func, sig, A)
60
+
61
+ def test_2(self):
62
+ sig = [float64(float64), float32(float32)]
63
+ func = self.funcs["func1"]
64
+ A = np.arange(100, dtype=np.float64)
65
+ self._run_and_compare(func, sig, A)
66
+
67
+ def test_3(self):
68
+ sig = ["float64(float64, uint32)"]
69
+ func = self.funcs["func2"]
70
+ A = np.arange(100, dtype=np.float64)
71
+ scale = np.uint32(3)
72
+ self._run_and_compare(func, sig, A, scale, atol=1e-8)
73
+
74
+ def test_4(self):
75
+ sig = [
76
+ int32(int32, int32),
77
+ uint32(uint32, uint32),
78
+ float32(float32, float32),
79
+ float64(float64, float64),
80
+ ]
81
+ func = self.funcs["func3"]
82
+ A = np.arange(100, dtype=np.float64)
83
+ self._run_and_compare(func, sig, A, A)
84
+ A = A.astype(np.float32)
85
+ self._run_and_compare(func, sig, A, A)
86
+ A = A.astype(np.int32)
87
+ self._run_and_compare(func, sig, A, A)
88
+ A = A.astype(np.uint32)
89
+ self._run_and_compare(func, sig, A, A)
90
+
91
+
92
+ class BaseVectorizeNopythonArg(unittest.TestCase, CheckWarningsMixin):
93
+ """
94
+ Test passing the nopython argument to the vectorize decorator.
95
+ """
96
+
97
+ def _test_target_nopython(self, target, warnings, with_sig=True):
98
+ a = np.array([2.0], dtype=np.float32)
99
+ b = np.array([3.0], dtype=np.float32)
100
+ sig = [float32(float32, float32)]
101
+ args = with_sig and [sig] or []
102
+ with self.check_warnings(warnings):
103
+ f = vectorize(*args, target=target, nopython=True)(vector_add)
104
+ f(a, b)
105
+
106
+
107
+ class BaseVectorizeUnrecognizedArg(unittest.TestCase, CheckWarningsMixin):
108
+ """
109
+ Test passing an unrecognized argument to the vectorize decorator.
110
+ """
111
+
112
+ def _test_target_unrecognized_arg(self, target, with_sig=True):
113
+ a = np.array([2.0], dtype=np.float32)
114
+ b = np.array([3.0], dtype=np.float32)
115
+ sig = [float32(float32, float32)]
116
+ args = with_sig and [sig] or []
117
+ with self.assertRaises(KeyError) as raises:
118
+ f = vectorize(*args, target=target, nonexistent=2)(vector_add)
119
+ f(a, b)
120
+ self.assertIn("Unrecognized options", str(raises.exception))
121
+
122
+
123
+ @skip_on_cudasim("ufunc API unsupported in the simulator")
124
+ class TestVectorizeDecor(CUDATestCase, BaseVectorizeDecor):
125
+ """
126
+ Runs the tests from BaseVectorizeDecor with the CUDA target.
127
+ """
128
+
129
+ target = "cuda"
130
+
131
+
132
+ @skip_on_cudasim("ufunc API unsupported in the simulator")
133
+ class TestGPUVectorizeBroadcast(CUDATestCase):
134
+ def test_broadcast(self):
135
+ a = np.random.randn(100, 3, 1)
136
+ b = a.transpose(2, 1, 0)
137
+
138
+ def fn(a, b):
139
+ return a - b
140
+
141
+ @vectorize(["float64(float64,float64)"], target="cuda")
142
+ def fngpu(a, b):
143
+ return a - b
144
+
145
+ expect = fn(a, b)
146
+ got = fngpu(a, b)
147
+ np.testing.assert_almost_equal(expect, got)
148
+
149
+ def test_device_broadcast(self):
150
+ """
151
+ Same test as .test_broadcast() but with device array as inputs
152
+ """
153
+
154
+ a = np.random.randn(100, 3, 1)
155
+ b = a.transpose(2, 1, 0)
156
+
157
+ def fn(a, b):
158
+ return a - b
159
+
160
+ @vectorize(["float64(float64,float64)"], target="cuda")
161
+ def fngpu(a, b):
162
+ return a - b
163
+
164
+ expect = fn(a, b)
165
+ got = fngpu(cuda.to_device(a), cuda.to_device(b))
166
+ np.testing.assert_almost_equal(expect, got.copy_to_host())
167
+
168
+
169
+ @skip_on_cudasim("ufunc API unsupported in the simulator")
170
+ class TestVectorizeNopythonArg(BaseVectorizeNopythonArg, CUDATestCase):
171
+ def test_target_cuda_nopython(self):
172
+ warnings = ["nopython kwarg for cuda target is redundant"]
173
+ self._test_target_nopython("cuda", warnings)
174
+
175
+
176
+ @skip_on_cudasim("ufunc API unsupported in the simulator")
177
+ class TestVectorizeUnrecognizedArg(BaseVectorizeUnrecognizedArg, CUDATestCase):
178
+ def test_target_cuda_unrecognized_arg(self):
179
+ self._test_target_unrecognized_arg("cuda")
180
+
181
+
182
+ if __name__ == "__main__":
183
+ unittest.main()
@@ -0,0 +1,40 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba.cuda import vectorize
5
+ from numba import cuda
6
+ from numba.cuda import float32
7
+ import numpy as np
8
+ from numba.cuda.testing import skip_on_cudasim, CUDATestCase
9
+ import unittest
10
+
11
+
12
+ @skip_on_cudasim("ufunc API unsupported in the simulator")
13
+ class TestCudaVectorizeDeviceCall(CUDATestCase):
14
+ def test_cuda_vectorize_device_call(self):
15
+ @cuda.jit(float32(float32, float32, float32), device=True)
16
+ def cu_device_fn(x, y, z):
17
+ return x**y / z
18
+
19
+ def cu_ufunc(x, y, z):
20
+ return cu_device_fn(x, y, z)
21
+
22
+ ufunc = vectorize([float32(float32, float32, float32)], target="cuda")(
23
+ cu_ufunc
24
+ )
25
+
26
+ N = 100
27
+
28
+ X = np.array(np.random.sample(N), dtype=np.float32)
29
+ Y = np.array(np.random.sample(N), dtype=np.float32)
30
+ Z = np.array(np.random.sample(N), dtype=np.float32) + 0.1
31
+
32
+ out = ufunc(X, Y, Z)
33
+
34
+ gold = (X**Y) / Z
35
+
36
+ self.assertTrue(np.allclose(out, gold))
37
+
38
+
39
+ if __name__ == "__main__":
40
+ unittest.main()
@@ -0,0 +1,40 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import numpy as np
5
+ from numba.cuda import vectorize
6
+ from numba import cuda
7
+ from numba.cuda import float64
8
+ from numba.cuda.testing import skip_on_cudasim, CUDATestCase
9
+ import unittest
10
+
11
+ sig = [float64(float64, float64)]
12
+
13
+
14
+ @skip_on_cudasim("ufunc API unsupported in the simulator")
15
+ class TestCUDAVectorizeScalarArg(CUDATestCase):
16
+ def test_vectorize_scalar_arg(self):
17
+ @vectorize(sig, target="cuda")
18
+ def vector_add(a, b):
19
+ return a + b
20
+
21
+ A = np.arange(10, dtype=np.float64)
22
+ dA = cuda.to_device(A)
23
+ v = vector_add(1.0, dA)
24
+
25
+ np.testing.assert_array_almost_equal(
26
+ v.copy_to_host(), np.arange(1, 11, dtype=np.float64)
27
+ )
28
+
29
+ def test_vectorize_all_scalars(self):
30
+ @vectorize(sig, target="cuda")
31
+ def vector_add(a, b):
32
+ return a + b
33
+
34
+ v = vector_add(1.0, 1.0)
35
+
36
+ np.testing.assert_almost_equal(2.0, v)
37
+
38
+
39
+ if __name__ == "__main__":
40
+ unittest.main()