numba-cuda 0.21.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (488) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +577 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +556 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +995 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +903 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +158 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
  161. numba_cuda/numba/cuda/intrinsics.py +382 -0
  162. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  163. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  164. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  165. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  166. numba_cuda/numba/cuda/libdevice.py +3386 -0
  167. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  168. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  169. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  170. numba_cuda/numba/cuda/locks.py +19 -0
  171. numba_cuda/numba/cuda/lowering.py +1951 -0
  172. numba_cuda/numba/cuda/mathimpl.py +374 -0
  173. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  175. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  178. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  179. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  180. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  181. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  182. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  183. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  184. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  185. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  186. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  187. numba_cuda/numba/cuda/misc/literal.py +28 -0
  188. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  189. numba_cuda/numba/cuda/misc/special.py +94 -0
  190. numba_cuda/numba/cuda/models.py +56 -0
  191. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  192. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  193. numba_cuda/numba/cuda/np/extensions.py +11 -0
  194. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  195. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  196. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  197. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  198. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  199. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  200. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  201. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  202. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  203. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  204. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  206. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  207. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  208. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  209. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  210. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  211. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  212. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  213. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  214. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  215. numba_cuda/numba/cuda/printimpl.py +126 -0
  216. numba_cuda/numba/cuda/random.py +308 -0
  217. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  218. numba_cuda/numba/cuda/serialize.py +267 -0
  219. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  220. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  221. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  222. numba_cuda/numba/cuda/simulator/api.py +179 -0
  223. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  224. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  236. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  237. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  238. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  239. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  241. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  242. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  243. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  244. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  245. numba_cuda/numba/cuda/simulator_init.py +18 -0
  246. numba_cuda/numba/cuda/stubs.py +635 -0
  247. numba_cuda/numba/cuda/target.py +505 -0
  248. numba_cuda/numba/cuda/testing.py +347 -0
  249. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  251. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  252. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  253. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  254. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  255. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  285. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  286. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  289. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  290. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  291. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  292. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  293. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  294. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  295. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  396. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
  397. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  399. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  400. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  401. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  402. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  403. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  404. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  406. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  407. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  424. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  425. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  430. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  431. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  433. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  434. numba_cuda/numba/cuda/tests/support.py +900 -0
  435. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  436. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  437. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  438. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  439. numba_cuda/numba/cuda/types/__init__.py +233 -0
  440. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  441. numba_cuda/numba/cuda/types/abstract.py +9 -0
  442. numba_cuda/numba/cuda/types/common.py +9 -0
  443. numba_cuda/numba/cuda/types/containers.py +9 -0
  444. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  445. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  446. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  447. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  448. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  449. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  450. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  451. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  452. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  453. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  454. numba_cuda/numba/cuda/types/function_type.py +11 -0
  455. numba_cuda/numba/cuda/types/functions.py +9 -0
  456. numba_cuda/numba/cuda/types/iterators.py +9 -0
  457. numba_cuda/numba/cuda/types/misc.py +9 -0
  458. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  459. numba_cuda/numba/cuda/types/scalars.py +9 -0
  460. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  461. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  462. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  463. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  464. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  465. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  466. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  467. numba_cuda/numba/cuda/typing/collections.py +138 -0
  468. numba_cuda/numba/cuda/typing/context.py +782 -0
  469. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  470. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  471. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  472. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  473. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  474. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  475. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  476. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  477. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  478. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  479. numba_cuda/numba/cuda/ufuncs.py +746 -0
  480. numba_cuda/numba/cuda/utils.py +724 -0
  481. numba_cuda/numba/cuda/vector_types.py +214 -0
  482. numba_cuda/numba/cuda/vectorizers.py +260 -0
  483. numba_cuda-0.21.1.dist-info/METADATA +109 -0
  484. numba_cuda-0.21.1.dist-info/RECORD +488 -0
  485. numba_cuda-0.21.1.dist-info/WHEEL +5 -0
  486. numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
  487. numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
  488. numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,57 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import numpy as np
5
+
6
+ from numba import cuda
7
+ import unittest
8
+ from numba.cuda.testing import CUDATestCase
9
+
10
+
11
+ @cuda.jit
12
+ def foo(x):
13
+ i = cuda.grid(1)
14
+ if i < x.size:
15
+ x[i] += 1
16
+
17
+
18
+ class TestForAll(CUDATestCase):
19
+ def test_forall_1(self):
20
+ arr = np.arange(11)
21
+ orig = arr.copy()
22
+ foo.forall(arr.size)(arr)
23
+ np.testing.assert_array_almost_equal(arr, orig + 1)
24
+
25
+ def test_forall_2(self):
26
+ @cuda.jit("void(float32, float32[:], float32[:])")
27
+ def bar(a, x, y):
28
+ i = cuda.grid(1)
29
+ if i < x.size:
30
+ y[i] = a * x[i] + y[i]
31
+
32
+ x = np.arange(13, dtype=np.float32)
33
+ y = np.arange(13, dtype=np.float32)
34
+ oldy = y.copy()
35
+ a = 1.234
36
+ bar.forall(y.size)(a, x, y)
37
+ np.testing.assert_array_almost_equal(y, a * x + oldy, decimal=3)
38
+
39
+ def test_forall_no_work(self):
40
+ # Ensure that forall doesn't launch a kernel with no blocks when called
41
+ # with 0 elements. See Issue #5017.
42
+ arr = np.arange(11)
43
+ foo.forall(0)(arr)
44
+
45
+ def test_forall_negative_work(self):
46
+ # Ensure that forall doesn't allow the creation of a forall with a
47
+ # negative element count.
48
+ with self.assertRaises(ValueError) as raises:
49
+ foo.forall(-1)
50
+ self.assertIn(
51
+ "Can't create ForAll with negative task count",
52
+ str(raises.exception),
53
+ )
54
+
55
+
56
+ if __name__ == "__main__":
57
+ unittest.main()
@@ -0,0 +1,34 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import numpy as np
5
+
6
+ from numba import cuda
7
+ from numba.cuda.testing import unittest, CUDATestCase
8
+
9
+
10
+ class TestFreeVar(CUDATestCase):
11
+ def test_freevar(self):
12
+ """Make sure we can compile the following kernel with freevar reference
13
+ in arguments to shared.array
14
+ """
15
+ from numba.cuda import float32
16
+
17
+ size = 1024
18
+ nbtype = float32
19
+
20
+ @cuda.jit("(float32[::1], intp)")
21
+ def foo(A, i):
22
+ "Dummy function"
23
+ sdata = cuda.shared.array(
24
+ size, # size is freevar
25
+ dtype=nbtype,
26
+ ) # nbtype is freevar
27
+ A[i] = sdata[i]
28
+
29
+ A = np.arange(2, dtype="float32")
30
+ foo[1, 1](A, 0)
31
+
32
+
33
+ if __name__ == "__main__":
34
+ unittest.main()
@@ -0,0 +1,69 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import numpy as np
5
+ import math
6
+ from numba import cuda
7
+ from numba.cuda.types import float32, float64, int32, void
8
+ from numba.cuda.testing import unittest, CUDATestCase
9
+
10
+
11
+ def simple_frexp(aryx, aryexp, arg):
12
+ aryx[0], aryexp[0] = math.frexp(arg)
13
+
14
+
15
+ def simple_ldexp(aryx, arg, exp):
16
+ aryx[0] = math.ldexp(arg, exp)
17
+
18
+
19
+ class TestCudaFrexpLdexp(CUDATestCase):
20
+ def template_test_frexp(self, nptype, nbtype):
21
+ compiled = cuda.jit(void(nbtype[:], int32[:], nbtype))(simple_frexp)
22
+ arg = 3.1415
23
+ aryx = np.zeros(1, dtype=nptype)
24
+ aryexp = np.zeros(1, dtype=np.int32)
25
+ compiled[1, 1](aryx, aryexp, arg)
26
+ np.testing.assert_array_equal(aryx, nptype(0.785375))
27
+ self.assertEqual(aryexp, 2)
28
+
29
+ arg = np.inf
30
+ compiled[1, 1](aryx, aryexp, arg)
31
+ np.testing.assert_array_equal(aryx, nptype(np.inf))
32
+ self.assertEqual(aryexp, 0) # np.frexp gives -1
33
+
34
+ arg = np.nan
35
+ compiled[1, 1](aryx, aryexp, arg)
36
+ np.testing.assert_array_equal(aryx, nptype(np.nan))
37
+ self.assertEqual(aryexp, 0) # np.frexp gives -1
38
+
39
+ def template_test_ldexp(self, nptype, nbtype):
40
+ compiled = cuda.jit(void(nbtype[:], nbtype, int32))(simple_ldexp)
41
+ arg = 0.785375
42
+ exp = 2
43
+ aryx = np.zeros(1, dtype=nptype)
44
+ compiled[1, 1](aryx, arg, exp)
45
+ np.testing.assert_array_equal(aryx, nptype(3.1415))
46
+
47
+ arg = np.inf
48
+ compiled[1, 1](aryx, arg, exp)
49
+ np.testing.assert_array_equal(aryx, nptype(np.inf))
50
+
51
+ arg = np.nan
52
+ compiled[1, 1](aryx, arg, exp)
53
+ np.testing.assert_array_equal(aryx, nptype(np.nan))
54
+
55
+ def test_frexp_f4(self):
56
+ self.template_test_frexp(np.float32, float32)
57
+
58
+ def test_ldexp_f4(self):
59
+ self.template_test_ldexp(np.float32, float32)
60
+
61
+ def test_frexp_f8(self):
62
+ self.template_test_frexp(np.float64, float64)
63
+
64
+ def test_ldexp_f8(self):
65
+ self.template_test_ldexp(np.float64, float64)
66
+
67
+
68
+ if __name__ == "__main__":
69
+ unittest.main()
@@ -0,0 +1,62 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import numpy as np
5
+ from numba import cuda
6
+ from numba.cuda import int32, float32
7
+ from numba.cuda.testing import unittest, CUDATestCase
8
+
9
+ N = 100
10
+
11
+
12
+ def simple_smem(ary):
13
+ sm = cuda.shared.array(N, int32)
14
+ i = cuda.grid(1)
15
+ if i == 0:
16
+ for j in range(N):
17
+ sm[j] = j
18
+ cuda.syncthreads()
19
+ ary[i] = sm[i]
20
+
21
+
22
+ S0 = 10
23
+ S1 = 20
24
+
25
+
26
+ def coop_smem2d(ary):
27
+ i, j = cuda.grid(2)
28
+ sm = cuda.shared.array((S0, S1), float32)
29
+ sm[i, j] = (i + 1) / (j + 1)
30
+ cuda.syncthreads()
31
+ ary[i, j] = sm[i, j]
32
+
33
+
34
+ class TestCudaTestGlobal(CUDATestCase):
35
+ def test_global_int_const(self):
36
+ """Test simple_smem"""
37
+ compiled = cuda.jit("void(int32[:])")(simple_smem)
38
+
39
+ nelem = 100
40
+ ary = np.empty(nelem, dtype=np.int32)
41
+ compiled[1, nelem](ary)
42
+
43
+ self.assertTrue(np.all(ary == np.arange(nelem, dtype=np.int32)))
44
+
45
+ @unittest.SkipTest
46
+ def test_global_tuple_const(self):
47
+ """Test coop_smem2d"""
48
+ compiled = cuda.jit("void(float32[:,:])")(coop_smem2d)
49
+
50
+ shape = 10, 20
51
+ ary = np.empty(shape, dtype=np.float32)
52
+ compiled[1, shape](ary)
53
+
54
+ exp = np.empty_like(ary)
55
+ for i in range(ary.shape[0]):
56
+ for j in range(ary.shape[1]):
57
+ exp[i, j] = float(i + 1) / (j + 1)
58
+ self.assertTrue(np.allclose(ary, exp))
59
+
60
+
61
+ if __name__ == "__main__":
62
+ unittest.main()
@@ -0,0 +1,474 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import numpy as np
5
+
6
+ import pytest
7
+ from collections import namedtuple
8
+ from numba.cuda import void, int32, float32, float64
9
+ from numba.cuda import guvectorize
10
+ from numba import cuda
11
+ from numba.cuda.testing import skip_on_cudasim, CUDATestCase
12
+ import unittest
13
+ from numba.cuda.core.errors import NumbaPerformanceWarning, TypingError
14
+ from numba.cuda.tests.support import override_config
15
+
16
+
17
+ def _get_matmulcore_gufunc(dtype=float32):
18
+ @guvectorize(
19
+ [void(dtype[:, :], dtype[:, :], dtype[:, :])],
20
+ "(m,n),(n,p)->(m,p)",
21
+ target="cuda",
22
+ )
23
+ def matmulcore(A, B, C):
24
+ m, n = A.shape
25
+ n, p = B.shape
26
+ for i in range(m):
27
+ for j in range(p):
28
+ C[i, j] = 0
29
+ for k in range(n):
30
+ C[i, j] += A[i, k] * B[k, j]
31
+
32
+ return matmulcore
33
+
34
+
35
+ @skip_on_cudasim("ufunc API unsupported in the simulator")
36
+ class TestCUDAGufunc(CUDATestCase):
37
+ def test_gufunc_small(self):
38
+ gufunc = _get_matmulcore_gufunc()
39
+
40
+ matrix_ct = 2
41
+ A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(
42
+ matrix_ct, 2, 4
43
+ )
44
+ B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(
45
+ matrix_ct, 4, 5
46
+ )
47
+
48
+ C = gufunc(A, B)
49
+ Gold = np.matmul(A, B)
50
+ self.assertTrue(np.allclose(C, Gold))
51
+
52
+ def test_gufunc_auto_transfer(self):
53
+ gufunc = _get_matmulcore_gufunc()
54
+
55
+ matrix_ct = 2
56
+ A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(
57
+ matrix_ct, 2, 4
58
+ )
59
+ B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(
60
+ matrix_ct, 4, 5
61
+ )
62
+
63
+ dB = cuda.to_device(B)
64
+
65
+ C = gufunc(A, dB).copy_to_host()
66
+ Gold = np.matmul(A, B)
67
+ self.assertTrue(np.allclose(C, Gold))
68
+
69
+ def test_gufunc(self):
70
+ gufunc = _get_matmulcore_gufunc()
71
+
72
+ matrix_ct = 1001 # an odd number to test thread/block division in CUDA
73
+ A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(
74
+ matrix_ct, 2, 4
75
+ )
76
+ B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(
77
+ matrix_ct, 4, 5
78
+ )
79
+
80
+ C = gufunc(A, B)
81
+ Gold = np.matmul(A, B)
82
+ self.assertTrue(np.allclose(C, Gold))
83
+
84
+ def test_gufunc_hidim(self):
85
+ gufunc = _get_matmulcore_gufunc()
86
+
87
+ matrix_ct = 100 # an odd number to test thread/block division in CUDA
88
+ A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(4, 25, 2, 4)
89
+ B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(4, 25, 4, 5)
90
+
91
+ C = gufunc(A, B)
92
+ Gold = np.matmul(A, B)
93
+ self.assertTrue(np.allclose(C, Gold))
94
+
95
+ def test_gufunc_new_axis(self):
96
+ gufunc = _get_matmulcore_gufunc(dtype=float64)
97
+
98
+ X = np.random.randn(10, 3, 3)
99
+ Y = np.random.randn(3, 3)
100
+
101
+ gold = np.matmul(X, Y)
102
+
103
+ res1 = gufunc(X, Y)
104
+ np.testing.assert_allclose(gold, res1)
105
+
106
+ res2 = gufunc(X, np.tile(Y, (10, 1, 1)))
107
+ np.testing.assert_allclose(gold, res2)
108
+
109
+ def test_gufunc_stream(self):
110
+ gufunc = _get_matmulcore_gufunc()
111
+
112
+ # cuda.driver.flush_pending_free()
113
+ matrix_ct = 1001 # an odd number to test thread/block division in CUDA
114
+ A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(
115
+ matrix_ct, 2, 4
116
+ )
117
+ B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(
118
+ matrix_ct, 4, 5
119
+ )
120
+
121
+ stream = cuda.stream()
122
+ dA = cuda.to_device(A, stream)
123
+ dB = cuda.to_device(B, stream)
124
+
125
+ dC = cuda.device_array(shape=(1001, 2, 5), dtype=A.dtype, stream=stream)
126
+ dC = gufunc(dA, dB, out=dC, stream=stream)
127
+ C = dC.copy_to_host(stream=stream)
128
+ stream.synchronize()
129
+
130
+ Gold = np.matmul(A, B)
131
+
132
+ self.assertTrue(np.allclose(C, Gold))
133
+
134
+ def test_copy(self):
135
+ @guvectorize([void(float32[:], float32[:])], "(x)->(x)", target="cuda")
136
+ def copy(A, B):
137
+ for i in range(B.size):
138
+ B[i] = A[i]
139
+
140
+ A = np.arange(10, dtype=np.float32) + 1
141
+ B = np.zeros_like(A)
142
+ copy(A, out=B)
143
+ np.testing.assert_allclose(A, B)
144
+
145
+ def test_copy_unspecified_return(self):
146
+ # Ensure that behaviour is correct when the return type is not
147
+ # specified in the signature.
148
+ @guvectorize([(float32[:], float32[:])], "(x)->(x)", target="cuda")
149
+ def copy(A, B):
150
+ for i in range(B.size):
151
+ B[i] = A[i]
152
+
153
+ A = np.arange(10, dtype=np.float32) + 1
154
+ B = np.zeros_like(A)
155
+ copy(A, out=B)
156
+ self.assertTrue(np.allclose(A, B))
157
+
158
+ def test_copy_odd(self):
159
+ @guvectorize([void(float32[:], float32[:])], "(x)->(x)", target="cuda")
160
+ def copy(A, B):
161
+ for i in range(B.size):
162
+ B[i] = A[i]
163
+
164
+ A = np.arange(11, dtype=np.float32) + 1
165
+ B = np.zeros_like(A)
166
+ copy(A, out=B)
167
+ self.assertTrue(np.allclose(A, B))
168
+
169
+ def test_copy2d(self):
170
+ @guvectorize(
171
+ [void(float32[:, :], float32[:, :])],
172
+ "(x, y)->(x, y)",
173
+ target="cuda",
174
+ )
175
+ def copy2d(A, B):
176
+ for x in range(B.shape[0]):
177
+ for y in range(B.shape[1]):
178
+ B[x, y] = A[x, y]
179
+
180
+ A = np.arange(30, dtype=np.float32).reshape(5, 6) + 1
181
+ B = np.zeros_like(A)
182
+ copy2d(A, out=B)
183
+ self.assertTrue(np.allclose(A, B))
184
+
185
+ def test_not_supported_call_from_jit(self):
186
+ # not supported
187
+ @guvectorize([void(int32[:], int32[:])], "(n)->(n)", target="cuda")
188
+ def gufunc_copy(A, b):
189
+ for i in range(A.shape[0]):
190
+ b[i] = A[i]
191
+
192
+ @cuda.jit
193
+ def cuda_jit(A, b):
194
+ return gufunc_copy(A, b)
195
+
196
+ A = np.arange(1024 * 32).astype("int32")
197
+ b = np.zeros_like(A)
198
+ msg = "Untyped global name 'gufunc_copy'.*"
199
+ with self.assertRaisesRegex(TypingError, msg):
200
+ cuda_jit[1, 1](A, b)
201
+
202
+ # Test inefficient use of the GPU where the inputs are all mapped onto a
203
+ # single thread in a single block.
204
+ def test_inefficient_launch_configuration(self):
205
+ @guvectorize(
206
+ ["void(float32[:], float32[:], float32[:])"],
207
+ "(n),(n)->(n)",
208
+ target="cuda",
209
+ )
210
+ def numba_dist_cuda(a, b, dist):
211
+ len = a.shape[0]
212
+ for i in range(len):
213
+ dist[i] = a[i] * b[i]
214
+
215
+ a = np.random.rand(1024 * 32).astype("float32")
216
+ b = np.random.rand(1024 * 32).astype("float32")
217
+ dist = np.zeros(a.shape[0]).astype("float32")
218
+
219
+ with override_config("CUDA_LOW_OCCUPANCY_WARNINGS", 1):
220
+ with pytest.warns(
221
+ NumbaPerformanceWarning, match="Grid size .+ low occupancy"
222
+ ):
223
+ numba_dist_cuda(a, b, dist)
224
+
225
+ def test_efficient_launch_configuration(self):
226
+ @guvectorize(
227
+ ["void(float32[:], float32[:], float32[:])"],
228
+ "(n),(n)->(n)",
229
+ nopython=True,
230
+ target="cuda",
231
+ )
232
+ def numba_dist_cuda2(a, b, dist):
233
+ len = a.shape[0]
234
+ for i in range(len):
235
+ dist[i] = a[i] * b[i]
236
+
237
+ a = np.random.rand(524288 * 2).astype("float32").reshape((524288, 2))
238
+ b = np.random.rand(524288 * 2).astype("float32").reshape((524288, 2))
239
+ dist = np.zeros_like(a)
240
+
241
+ with override_config("CUDA_LOW_OCCUPANCY_WARNINGS", 1):
242
+ numba_dist_cuda2(a, b, dist)
243
+
244
+ def test_nopython_flag(self):
245
+ def foo(A, B):
246
+ pass
247
+
248
+ # nopython = True is fine
249
+ guvectorize(
250
+ [void(float32[:], float32[:])],
251
+ "(x)->(x)",
252
+ target="cuda",
253
+ nopython=True,
254
+ )(foo)
255
+
256
+ # nopython = False is bad
257
+ with self.assertRaises(TypeError) as raises:
258
+ guvectorize(
259
+ [void(float32[:], float32[:])],
260
+ "(x)->(x)",
261
+ target="cuda",
262
+ nopython=False,
263
+ )(foo)
264
+ self.assertEqual("nopython flag must be True", str(raises.exception))
265
+
266
+ def test_invalid_flags(self):
267
+ # Check invalid flags
268
+ def foo(A, B):
269
+ pass
270
+
271
+ with self.assertRaises(TypeError) as raises:
272
+ guvectorize(
273
+ [void(float32[:], float32[:])],
274
+ "(x)->(x)",
275
+ target="cuda",
276
+ what1=True,
277
+ ever2=False,
278
+ )(foo)
279
+ head = "The following target options are not supported:"
280
+ msg = str(raises.exception)
281
+ self.assertEqual(msg[: len(head)], head)
282
+ items = msg[len(head) :].strip().split(",")
283
+ items = [i.strip("'\" ") for i in items]
284
+ self.assertEqual(set(["what1", "ever2"]), set(items))
285
+
286
+ def test_duplicated_output(self):
287
+ @guvectorize([void(float32[:], float32[:])], "(x)->(x)", target="cuda")
288
+ def foo(inp, out):
289
+ pass # intentionally empty; never executed
290
+
291
+ inp = out = np.zeros(10, dtype=np.float32)
292
+ with self.assertRaises(ValueError) as raises:
293
+ foo(inp, out, out=out)
294
+
295
+ msg = "cannot specify argument 'out' as both positional and keyword"
296
+ self.assertEqual(str(raises.exception), msg)
297
+
298
+ def check_tuple_arg(self, a, b):
299
+ @guvectorize(
300
+ [(float64[:], float64[:], float64[:])], "(n),(n)->()", target="cuda"
301
+ )
302
+ def gu_reduce(x, y, r):
303
+ s = 0
304
+ for i in range(len(x)):
305
+ s += x[i] * y[i]
306
+ r[0] = s
307
+
308
+ r = gu_reduce(a, b)
309
+ expected = np.sum(np.asarray(a) * np.asarray(b), axis=1)
310
+ np.testing.assert_equal(expected, r)
311
+
312
+ def test_tuple_of_tuple_arg(self):
313
+ a = ((1.0, 2.0, 3.0), (4.0, 5.0, 6.0))
314
+ b = ((1.5, 2.5, 3.5), (4.5, 5.5, 6.5))
315
+ self.check_tuple_arg(a, b)
316
+
317
+ def test_tuple_of_namedtuple_arg(self):
318
+ Point = namedtuple("Point", ("x", "y", "z"))
319
+ a = (Point(x=1.0, y=2.0, z=3.0), Point(x=4.0, y=5.0, z=6.0))
320
+ b = (Point(x=1.5, y=2.5, z=3.5), Point(x=4.5, y=5.5, z=6.5))
321
+ self.check_tuple_arg(a, b)
322
+
323
+ def test_tuple_of_array_arg(self):
324
+ a = (np.asarray((1.0, 2.0, 3.0)), np.asarray((4.0, 5.0, 6.0)))
325
+ b = (np.asarray((1.5, 2.5, 3.5)), np.asarray((4.5, 5.5, 6.5)))
326
+ self.check_tuple_arg(a, b)
327
+
328
+ def test_gufunc_name(self):
329
+ gufunc = _get_matmulcore_gufunc()
330
+ self.assertEqual(gufunc.__name__, "matmulcore")
331
+
332
+ def test_bad_return_type(self):
333
+ with self.assertRaises(TypeError) as te:
334
+
335
+ @guvectorize([int32(int32[:], int32[:])], "(m)->(m)", target="cuda")
336
+ def f(x, y):
337
+ pass
338
+
339
+ msg = str(te.exception)
340
+ self.assertIn("guvectorized functions cannot return values", msg)
341
+ self.assertIn("specifies int32 return type", msg)
342
+
343
+ def test_incorrect_number_of_pos_args(self):
344
+ @guvectorize(
345
+ [(int32[:], int32[:], int32[:])], "(m),(m)->(m)", target="cuda"
346
+ )
347
+ def f(x, y, z):
348
+ pass
349
+
350
+ arr = np.arange(5)
351
+
352
+ # Inputs only, too few
353
+ with self.assertRaises(TypeError) as te:
354
+ f(arr)
355
+
356
+ msg = str(te.exception)
357
+ self.assertIn("gufunc accepts 2 positional arguments", msg)
358
+ self.assertIn("or 3 positional arguments", msg)
359
+ self.assertIn("Got 1 positional argument.", msg)
360
+
361
+ # Inputs and outputs, too many
362
+ with self.assertRaises(TypeError) as te:
363
+ f(arr, arr, arr, arr)
364
+
365
+ msg = str(te.exception)
366
+ self.assertIn("gufunc accepts 2 positional arguments", msg)
367
+ self.assertIn("or 3 positional arguments", msg)
368
+ self.assertIn("Got 4 positional arguments.", msg)
369
+
370
+
371
+ @skip_on_cudasim("ufunc API unsupported in the simulator")
372
+ class TestMultipleOutputs(CUDATestCase):
373
+ def test_multiple_outputs_same_type_passed_in(self):
374
+ @guvectorize(
375
+ [void(float32[:], float32[:], float32[:])],
376
+ "(x)->(x),(x)",
377
+ target="cuda",
378
+ )
379
+ def copy(A, B, C):
380
+ for i in range(B.size):
381
+ B[i] = A[i]
382
+ C[i] = A[i]
383
+
384
+ A = np.arange(10, dtype=np.float32) + 1
385
+ B = np.zeros_like(A)
386
+ C = np.zeros_like(A)
387
+ copy(A, B, C)
388
+ np.testing.assert_allclose(A, B)
389
+ np.testing.assert_allclose(A, C)
390
+
391
+ def test_multiple_outputs_distinct_values(self):
392
+ @guvectorize(
393
+ [void(float32[:], float32[:], float32[:])],
394
+ "(x)->(x),(x)",
395
+ target="cuda",
396
+ )
397
+ def copy_and_double(A, B, C):
398
+ for i in range(B.size):
399
+ B[i] = A[i]
400
+ C[i] = A[i] * 2
401
+
402
+ A = np.arange(10, dtype=np.float32) + 1
403
+ B = np.zeros_like(A)
404
+ C = np.zeros_like(A)
405
+ copy_and_double(A, B, C)
406
+ np.testing.assert_allclose(A, B)
407
+ np.testing.assert_allclose(A * 2, C)
408
+
409
+ def test_multiple_output_allocation(self):
410
+ @guvectorize(
411
+ [void(float32[:], float32[:], float32[:])],
412
+ "(x)->(x),(x)",
413
+ target="cuda",
414
+ )
415
+ def copy_and_double(A, B, C):
416
+ for i in range(B.size):
417
+ B[i] = A[i]
418
+ C[i] = A[i] * 2
419
+
420
+ A = np.arange(10, dtype=np.float32) + 1
421
+ B, C = copy_and_double(A)
422
+ np.testing.assert_allclose(A, B)
423
+ np.testing.assert_allclose(A * 2, C)
424
+
425
+ def test_multiple_output_dtypes(self):
426
+ @guvectorize(
427
+ [void(int32[:], int32[:], float64[:])],
428
+ "(x)->(x),(x)",
429
+ target="cuda",
430
+ )
431
+ def copy_and_multiply(A, B, C):
432
+ for i in range(B.size):
433
+ B[i] = A[i]
434
+ C[i] = A[i] * 1.5
435
+
436
+ A = np.arange(10, dtype=np.int32) + 1
437
+ B = np.zeros_like(A)
438
+ C = np.zeros_like(A, dtype=np.float64)
439
+ copy_and_multiply(A, B, C)
440
+ np.testing.assert_allclose(A, B)
441
+ np.testing.assert_allclose(A * np.float64(1.5), C)
442
+
443
+ def test_incorrect_number_of_pos_args(self):
444
+ @guvectorize(
445
+ [(int32[:], int32[:], int32[:], int32[:])],
446
+ "(m),(m)->(m),(m)",
447
+ target="cuda",
448
+ )
449
+ def f(x, y, z, w):
450
+ pass
451
+
452
+ arr = np.arange(5)
453
+
454
+ # Inputs only, too few
455
+ with self.assertRaises(TypeError) as te:
456
+ f(arr)
457
+
458
+ msg = str(te.exception)
459
+ self.assertIn("gufunc accepts 2 positional arguments", msg)
460
+ self.assertIn("or 4 positional arguments", msg)
461
+ self.assertIn("Got 1 positional argument.", msg)
462
+
463
+ # Inputs and outputs, too many
464
+ with self.assertRaises(TypeError) as te:
465
+ f(arr, arr, arr, arr, arr)
466
+
467
+ msg = str(te.exception)
468
+ self.assertIn("gufunc accepts 2 positional arguments", msg)
469
+ self.assertIn("or 4 positional arguments", msg)
470
+ self.assertIn("Got 5 positional arguments.", msg)
471
+
472
+
473
+ if __name__ == "__main__":
474
+ unittest.main()