numba-cuda 0.22.0__cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (487) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +580 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpython-312-aarch64-linux-gnu.so +0 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpython-312-aarch64-linux-gnu.so +0 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cpython-312-aarch64-linux-gnu.so +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpython-312-aarch64-linux-gnu.so +0 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cpython-312-aarch64-linux-gnu.so +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +543 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +983 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +997 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +155 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsics.py +531 -0
  161. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  162. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  163. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  164. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  165. numba_cuda/numba/cuda/libdevice.py +3386 -0
  166. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  167. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  168. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  169. numba_cuda/numba/cuda/locks.py +19 -0
  170. numba_cuda/numba/cuda/lowering.py +1980 -0
  171. numba_cuda/numba/cuda/mathimpl.py +374 -0
  172. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  173. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  175. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  178. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  179. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  180. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  181. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  182. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  183. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  184. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  185. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  186. numba_cuda/numba/cuda/misc/literal.py +28 -0
  187. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  188. numba_cuda/numba/cuda/misc/special.py +94 -0
  189. numba_cuda/numba/cuda/models.py +56 -0
  190. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  191. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  192. numba_cuda/numba/cuda/np/extensions.py +11 -0
  193. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  194. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  195. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  196. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  197. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  198. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  199. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  200. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  201. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  202. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  203. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  204. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  206. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  207. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  208. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  209. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  210. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  211. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  212. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  213. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  214. numba_cuda/numba/cuda/printimpl.py +126 -0
  215. numba_cuda/numba/cuda/random.py +308 -0
  216. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  217. numba_cuda/numba/cuda/serialize.py +267 -0
  218. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  219. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  220. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  221. numba_cuda/numba/cuda/simulator/api.py +179 -0
  222. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  223. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  224. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  236. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  237. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  238. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  239. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  241. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  242. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  243. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  244. numba_cuda/numba/cuda/simulator_init.py +18 -0
  245. numba_cuda/numba/cuda/stubs.py +624 -0
  246. numba_cuda/numba/cuda/target.py +505 -0
  247. numba_cuda/numba/cuda/testing.py +347 -0
  248. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  249. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  251. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  252. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  253. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  254. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  255. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  285. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  286. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  289. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  290. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  291. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  292. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  293. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  294. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  295. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
  396. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  397. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  399. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  400. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  401. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  402. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  403. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  404. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  406. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  407. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  424. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  425. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  430. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  431. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  433. numba_cuda/numba/cuda/tests/support.py +900 -0
  434. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  435. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  436. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  437. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  438. numba_cuda/numba/cuda/types/__init__.py +233 -0
  439. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  440. numba_cuda/numba/cuda/types/abstract.py +9 -0
  441. numba_cuda/numba/cuda/types/common.py +9 -0
  442. numba_cuda/numba/cuda/types/containers.py +9 -0
  443. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  444. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  445. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  446. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  447. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  448. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  449. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  450. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  451. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  452. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  453. numba_cuda/numba/cuda/types/function_type.py +11 -0
  454. numba_cuda/numba/cuda/types/functions.py +9 -0
  455. numba_cuda/numba/cuda/types/iterators.py +9 -0
  456. numba_cuda/numba/cuda/types/misc.py +9 -0
  457. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  458. numba_cuda/numba/cuda/types/scalars.py +9 -0
  459. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  460. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  461. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  462. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  463. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  464. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  465. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  466. numba_cuda/numba/cuda/typing/collections.py +138 -0
  467. numba_cuda/numba/cuda/typing/context.py +782 -0
  468. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  469. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  470. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  471. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  472. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  473. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  474. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  475. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  476. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  477. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  478. numba_cuda/numba/cuda/ufuncs.py +746 -0
  479. numba_cuda/numba/cuda/utils.py +724 -0
  480. numba_cuda/numba/cuda/vector_types.py +214 -0
  481. numba_cuda/numba/cuda/vectorizers.py +260 -0
  482. numba_cuda-0.22.0.dist-info/METADATA +109 -0
  483. numba_cuda-0.22.0.dist-info/RECORD +487 -0
  484. numba_cuda-0.22.0.dist-info/WHEEL +6 -0
  485. numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
  486. numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
  487. numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,206 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import pytest
5
+ import numpy as np
6
+ from numba import cuda
7
+ from numba.cuda.cudadrv import driver
8
+ from numba.cuda.testing import (
9
+ unittest,
10
+ CUDATestCase,
11
+ skip_on_cudasim,
12
+ )
13
+ from numba.cuda.tests.support import (
14
+ linux_only,
15
+ override_config,
16
+ run_in_subprocess,
17
+ )
18
+ from numba.cuda.core.errors import (
19
+ NumbaPerformanceWarning,
20
+ NumbaInvalidConfigWarning,
21
+ )
22
+ from numba.cuda.core import config
23
+ import warnings
24
+
25
+
26
+ @skip_on_cudasim("cudasim does not raise performance warnings")
27
+ class TestWarnings(CUDATestCase):
28
+ def test_float16_warn_if_lto_missing(self):
29
+ fp16_kernel_invocation = """
30
+ import math
31
+ from numba import cuda
32
+
33
+ @cuda.jit
34
+ def kernel():
35
+ x = cuda.types.float16(1.0)
36
+ y = math.sin(x)
37
+
38
+ kernel[1,1]()
39
+ kernel[1,1]()
40
+ """
41
+ performance_warning = "float16 relies on LTO for performance"
42
+ expected_warning_count = 0 if driver._have_nvjitlink() else 1
43
+ _, err = run_in_subprocess(fp16_kernel_invocation)
44
+ self.assertEqual(
45
+ err.decode().count(performance_warning), expected_warning_count
46
+ )
47
+
48
+ def test_inefficient_launch_configuration(self):
49
+ @cuda.jit
50
+ def kernel():
51
+ pass
52
+
53
+ with override_config("CUDA_LOW_OCCUPANCY_WARNINGS", 1):
54
+ with pytest.warns(
55
+ NumbaPerformanceWarning, match="Grid size .+ low occupancy"
56
+ ):
57
+ func = kernel[1, 1]
58
+ func()
59
+
60
+ def test_efficient_launch_configuration(self):
61
+ @cuda.jit
62
+ def kernel():
63
+ pass
64
+
65
+ with override_config("CUDA_LOW_OCCUPANCY_WARNINGS", 1):
66
+ with warnings.catch_warnings(record=True) as w:
67
+ kernel[256, 256]()
68
+
69
+ self.assertEqual(len(w), 0)
70
+
71
+ def test_warn_on_host_array(self):
72
+ @cuda.jit
73
+ def foo(r, x):
74
+ r[0] = x + 1
75
+
76
+ N = 10
77
+ arr_f32 = np.zeros(N, dtype=np.float32)
78
+ func = foo[1, N]
79
+ with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
80
+ with pytest.warns(
81
+ NumbaPerformanceWarning,
82
+ match="Host array used in CUDA kernel will incur.+copy overhead",
83
+ ):
84
+ func(arr_f32, N)
85
+
86
+ def test_pinned_warn_on_host_array(self):
87
+ @cuda.jit
88
+ def foo(r, x):
89
+ r[0] = x + 1
90
+
91
+ N = 10
92
+ ary = cuda.pinned_array(N, dtype=np.float32)
93
+
94
+ func = foo[1, N]
95
+ with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
96
+ with pytest.warns(
97
+ NumbaPerformanceWarning,
98
+ match="Host array used in CUDA kernel will incur.+copy overhead",
99
+ ):
100
+ func(ary, N)
101
+
102
+ def test_nowarn_on_mapped_array(self):
103
+ @cuda.jit
104
+ def foo(r, x):
105
+ r[0] = x + 1
106
+
107
+ N = 10
108
+ ary = cuda.mapped_array(N, dtype=np.float32)
109
+
110
+ with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
111
+ with warnings.catch_warnings(record=True) as w:
112
+ foo[1, N](ary, N)
113
+
114
+ self.assertEqual(len(w), 0)
115
+
116
+ @linux_only
117
+ def test_nowarn_on_managed_array(self):
118
+ @cuda.jit
119
+ def foo(r, x):
120
+ r[0] = x + 1
121
+
122
+ N = 10
123
+ ary = cuda.managed_array(N, dtype=np.float32)
124
+
125
+ with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
126
+ with warnings.catch_warnings(record=True) as w:
127
+ foo[1, N](ary, N)
128
+
129
+ self.assertEqual(len(w), 0)
130
+
131
+ def test_nowarn_on_device_array(self):
132
+ @cuda.jit
133
+ def foo(r, x):
134
+ r[0] = x + 1
135
+
136
+ N = 10
137
+ ary = cuda.device_array(N, dtype=np.float32)
138
+
139
+ with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
140
+ with warnings.catch_warnings(record=True) as w:
141
+ foo[1, N](ary, N)
142
+
143
+ self.assertEqual(len(w), 0)
144
+
145
+ def test_warn_on_debug_and_opt(self):
146
+ with pytest.warns(
147
+ NumbaInvalidConfigWarning, match="not supported by CUDA"
148
+ ):
149
+ cuda.jit(debug=True, opt=True)
150
+
151
+ def test_warn_on_debug_and_opt_default(self):
152
+ with pytest.warns(
153
+ NumbaInvalidConfigWarning, match="not supported by CUDA"
154
+ ):
155
+ cuda.jit(debug=True)
156
+
157
+ def test_no_warn_on_debug_and_no_opt(self):
158
+ with warnings.catch_warnings(record=True) as w:
159
+ cuda.jit(debug=True, opt=False)
160
+
161
+ self.assertEqual(len(w), 0)
162
+
163
+ def test_no_warn_with_no_debug_and_opt_kwargs(self):
164
+ with warnings.catch_warnings(record=True) as w:
165
+ cuda.jit()
166
+
167
+ self.assertEqual(len(w), 0)
168
+
169
+ def test_no_warn_on_debug_and_opt_with_config(self):
170
+ with override_config("CUDA_DEBUGINFO_DEFAULT", 1):
171
+ with override_config("OPT", config._OptLevel(0)):
172
+ with warnings.catch_warnings(record=True) as w:
173
+ cuda.jit()
174
+
175
+ self.assertEqual(len(w), 0)
176
+
177
+ with warnings.catch_warnings(record=True) as w:
178
+ cuda.jit(opt=False)
179
+
180
+ self.assertEqual(len(w), 0)
181
+
182
+ with override_config("OPT", config._OptLevel(0)):
183
+ with warnings.catch_warnings(record=True) as w:
184
+ cuda.jit(debug=True)
185
+
186
+ self.assertEqual(len(w), 0)
187
+
188
+ def test_warn_on_debug_and_opt_with_config(self):
189
+ with override_config("CUDA_DEBUGINFO_DEFAULT", 1):
190
+ for opt in (1, 2, 3, "max"):
191
+ with override_config("OPT", config._OptLevel(opt)):
192
+ with pytest.warns(
193
+ NumbaInvalidConfigWarning, match="not supported by CUDA"
194
+ ):
195
+ cuda.jit()
196
+
197
+ for opt in (1, 2, 3, "max"):
198
+ with override_config("OPT", config._OptLevel(opt)):
199
+ with pytest.warns(
200
+ NumbaInvalidConfigWarning, match="not supported by CUDA"
201
+ ):
202
+ cuda.jit(debug=True)
203
+
204
+
205
+ if __name__ == "__main__":
206
+ unittest.main()
@@ -0,0 +1,446 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import re
5
+
6
+ import numpy as np
7
+ from numba import cuda, errors
8
+ from numba.cuda import int32, int64, float32, float64
9
+ from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
10
+ from numba.cuda.compiler import compile_ptx
11
+ from numba.cuda.core import config
12
+
13
+
14
+ def useful_syncwarp(ary):
15
+ i = cuda.grid(1)
16
+ if i == 0:
17
+ ary[0] = 42
18
+ cuda.syncwarp(0xFFFFFFFF)
19
+ ary[i] = ary[0]
20
+
21
+
22
+ def use_shfl_sync_idx(ary, idx):
23
+ i = cuda.grid(1)
24
+ val = cuda.shfl_sync(0xFFFFFFFF, i, idx)
25
+ ary[i] = val
26
+
27
+
28
+ def use_shfl_sync_up(ary, delta):
29
+ i = cuda.grid(1)
30
+ val = cuda.shfl_up_sync(0xFFFFFFFF, i, delta)
31
+ ary[i] = val
32
+
33
+
34
+ def use_shfl_sync_down(ary, delta):
35
+ i = cuda.grid(1)
36
+ val = cuda.shfl_down_sync(0xFFFFFFFF, i, delta)
37
+ ary[i] = val
38
+
39
+
40
+ def use_shfl_sync_xor(ary, xor):
41
+ i = cuda.grid(1)
42
+ val = cuda.shfl_xor_sync(0xFFFFFFFF, i, xor)
43
+ ary[i] = val
44
+
45
+
46
+ def use_shfl_sync_with_val(ary, into):
47
+ i = cuda.grid(1)
48
+ val = cuda.shfl_sync(0xFFFFFFFF, into, 0)
49
+ ary[i] = val
50
+
51
+
52
+ def use_vote_sync_all(ary_in, ary_out):
53
+ i = cuda.grid(1)
54
+ pred = cuda.all_sync(0xFFFFFFFF, ary_in[i])
55
+ ary_out[i] = pred
56
+
57
+
58
+ def use_vote_sync_any(ary_in, ary_out):
59
+ i = cuda.grid(1)
60
+ pred = cuda.any_sync(0xFFFFFFFF, ary_in[i])
61
+ ary_out[i] = pred
62
+
63
+
64
+ def use_vote_sync_eq(ary_in, ary_out):
65
+ i = cuda.grid(1)
66
+ pred = cuda.eq_sync(0xFFFFFFFF, ary_in[i])
67
+ ary_out[i] = pred
68
+
69
+
70
+ def use_vote_sync_ballot(ary):
71
+ i = cuda.threadIdx.x
72
+ ballot = cuda.ballot_sync(0xFFFFFFFF, True)
73
+ ary[i] = ballot
74
+
75
+
76
+ def use_match_any_sync(ary_in, ary_out):
77
+ i = cuda.grid(1)
78
+ ballot = cuda.match_any_sync(0xFFFFFFFF, ary_in[i])
79
+ ary_out[i] = ballot
80
+
81
+
82
+ def use_match_all_sync(ary_in, ary_out):
83
+ i = cuda.grid(1)
84
+ ballot, pred = cuda.match_all_sync(0xFFFFFFFF, ary_in[i])
85
+ ary_out[i] = ballot if pred else 0
86
+
87
+
88
+ def use_independent_scheduling(arr):
89
+ i = cuda.threadIdx.x
90
+ if i % 4 == 0:
91
+ ballot = cuda.ballot_sync(0x11111111, True)
92
+ elif i % 4 == 1:
93
+ ballot = cuda.ballot_sync(0x22222222, True)
94
+ elif i % 4 == 2:
95
+ ballot = cuda.ballot_sync(0x44444444, True)
96
+ elif i % 4 == 3:
97
+ ballot = cuda.ballot_sync(0x88888888, True)
98
+ arr[i] = ballot
99
+
100
+
101
+ def _safe_cc_check(cc):
102
+ if config.ENABLE_CUDASIM:
103
+ return True
104
+ else:
105
+ return cuda.get_current_device().compute_capability >= cc
106
+
107
+
108
+ @skip_on_cudasim("Warp Operations are not yet implemented on cudasim")
109
+ class TestCudaWarpOperations(CUDATestCase):
110
+ def test_useful_syncwarp(self):
111
+ compiled = cuda.jit("void(int32[:])")(useful_syncwarp)
112
+ nelem = 32
113
+ ary = np.empty(nelem, dtype=np.int32)
114
+ compiled[1, nelem](ary)
115
+ self.assertTrue(np.all(ary == 42))
116
+
117
+ def test_shfl_sync_idx(self):
118
+ compiled = cuda.jit("void(int32[:], int32)")(use_shfl_sync_idx)
119
+ nelem = 32
120
+ idx = 4
121
+ ary = np.empty(nelem, dtype=np.int32)
122
+ compiled[1, nelem](ary, idx)
123
+ self.assertTrue(np.all(ary == idx))
124
+
125
+ def test_shfl_sync_up(self):
126
+ compiled = cuda.jit("void(int32[:], int32)")(use_shfl_sync_up)
127
+ nelem = 32
128
+ delta = 4
129
+ ary = np.empty(nelem, dtype=np.int32)
130
+ exp = np.arange(nelem, dtype=np.int32)
131
+ exp[delta:] -= delta
132
+ compiled[1, nelem](ary, delta)
133
+ self.assertTrue(np.all(ary == exp))
134
+
135
+ def test_shfl_sync_down(self):
136
+ compiled = cuda.jit("void(int32[:], int32)")(use_shfl_sync_down)
137
+ nelem = 32
138
+ delta = 4
139
+ ary = np.empty(nelem, dtype=np.int32)
140
+ exp = np.arange(nelem, dtype=np.int32)
141
+ exp[:-delta] += delta
142
+ compiled[1, nelem](ary, delta)
143
+ self.assertTrue(np.all(ary == exp))
144
+
145
+ def test_shfl_sync_xor(self):
146
+ compiled = cuda.jit("void(int32[:], int32)")(use_shfl_sync_xor)
147
+ nelem = 32
148
+ xor = 16
149
+ ary = np.empty(nelem, dtype=np.int32)
150
+ exp = np.arange(nelem, dtype=np.int32) ^ xor
151
+ compiled[1, nelem](ary, xor)
152
+ self.assertTrue(np.all(ary == exp))
153
+
154
+ def test_shfl_sync_const_mode_val(self):
155
+ # Test `mode` argument is constant in shfl_sync calls.
156
+ # Related to https://github.com/NVIDIA/numba-cuda/pull/231
157
+ subtest = [
158
+ (use_shfl_sync_idx, 4),
159
+ (use_shfl_sync_up, 4),
160
+ (use_shfl_sync_down, 4),
161
+ (use_shfl_sync_xor, 16),
162
+ ]
163
+
164
+ args_re = r"\((.*)\)"
165
+ m = re.compile(args_re)
166
+
167
+ for func, value in subtest:
168
+ with self.subTest(func=func.__name__):
169
+ compiled = cuda.jit("void(int32[:], int32)")(func)
170
+ nelem = 32
171
+ ary = np.empty(nelem, dtype=np.int32)
172
+ compiled[1, nelem](ary, value)
173
+ irs = next(iter(compiled.inspect_llvm().values()))
174
+
175
+ for ir in irs.split("\n"):
176
+ if "call" in ir and "llvm.nvvm.shfl.sync.i32" in ir:
177
+ args = m.search(ir).group(0)
178
+ arglist = args.split(",")
179
+ mode_arg = arglist[1]
180
+ self.assertNotIn("%", mode_arg)
181
+
182
+ def test_shfl_sync_const_mode_val_sm100(self):
183
+ # Test shfl_sync compiles with cc=(10, 0)
184
+ subtest = [
185
+ use_shfl_sync_idx,
186
+ use_shfl_sync_up,
187
+ use_shfl_sync_down,
188
+ use_shfl_sync_xor,
189
+ ]
190
+
191
+ for func in subtest:
192
+ with self.subTest(func=func.__name__):
193
+ compile_ptx(func, (int32[:], int32), cc=(10, 0))
194
+
195
+ def test_shfl_sync_types(self):
196
+ types = int32, int64, float32, float64
197
+ values = (
198
+ np.int32(-1),
199
+ np.int64(1 << 42),
200
+ np.float32(np.pi),
201
+ np.float64(np.pi),
202
+ )
203
+ for typ, val in zip(types, values):
204
+ with self.subTest(typ=typ):
205
+ compiled = cuda.jit((typ[:], typ))(use_shfl_sync_with_val)
206
+ nelem = 32
207
+ ary = np.empty(nelem, dtype=val.dtype)
208
+ compiled[1, nelem](ary, val)
209
+ self.assertTrue(np.all(ary == val))
210
+
211
+ def test_vote_sync_const_mode_val(self):
212
+ nelem = 32
213
+ ary1 = np.ones(nelem, dtype=np.int32)
214
+ ary2 = np.empty(nelem, dtype=np.int32)
215
+
216
+ subtest = [
217
+ (use_vote_sync_all, "void(int32[:], int32[:])", (ary1, ary2)),
218
+ (use_vote_sync_any, "void(int32[:], int32[:])", (ary1, ary2)),
219
+ (use_vote_sync_eq, "void(int32[:], int32[:])", (ary1, ary2)),
220
+ (use_vote_sync_ballot, "void(uint32[:])", (ary2,)),
221
+ ]
222
+
223
+ args_re = r"\((.*)\)"
224
+ m = re.compile(args_re)
225
+
226
+ for func, sig, input in subtest:
227
+ with self.subTest(func=func.__name__):
228
+ compiled = cuda.jit(sig)(func)
229
+ compiled[1, nelem](*input)
230
+ irs = next(iter(compiled.inspect_llvm().values()))
231
+
232
+ for ir in irs.split("\n"):
233
+ if "call" in ir and "llvm.nvvm.vote.sync" in ir:
234
+ args = m.search(ir).group(0)
235
+ arglist = args.split(",")
236
+ mode_arg = arglist[1]
237
+ self.assertNotIn("%", mode_arg)
238
+
239
+ def test_vote_sync_const_mode_val_sm100(self):
240
+ subtest = [
241
+ (use_vote_sync_all, "void(int32[:], int32[:])"),
242
+ (use_vote_sync_any, "void(int32[:], int32[:])"),
243
+ (use_vote_sync_eq, "void(int32[:], int32[:])"),
244
+ (use_vote_sync_ballot, "void(uint32[:])"),
245
+ ]
246
+
247
+ for func, sig in subtest:
248
+ with self.subTest(func=func.__name__):
249
+ compile_ptx(func, sig, cc=(10, 0))
250
+
251
+ def test_vote_sync_type_validation(self):
252
+ nelem = 32
253
+
254
+ def use_vote_sync_all_with_mask(mask, predicate, result):
255
+ i = cuda.grid(1)
256
+ if i < result.shape[0]:
257
+ result[i] = cuda.all_sync(mask[i], predicate[i])
258
+
259
+ invalid_cases = [
260
+ (
261
+ "void(float32[:], int32[:], int32[:])",
262
+ "Mask type must be an integer",
263
+ ),
264
+ (
265
+ "void(boolean[:], int32[:], int32[:])",
266
+ "Mask type must be an integer",
267
+ ),
268
+ (
269
+ "void(float64[:], int32[:], int32[:])",
270
+ "Mask type must be an integer",
271
+ ),
272
+ (
273
+ "void(int32[:], float32[:], int32[:])",
274
+ "Predicate must be an integer or boolean",
275
+ ),
276
+ (
277
+ "void(int32[:], float64[:], int32[:])",
278
+ "Predicate must be an integer or boolean",
279
+ ),
280
+ ]
281
+
282
+ for sig, expected_msg in invalid_cases:
283
+ with self.subTest(sig=sig):
284
+ with self.assertRaisesRegex(errors.TypingError, expected_msg):
285
+ cuda.jit(sig)(use_vote_sync_all_with_mask)
286
+
287
+ valid_cases = [
288
+ # mask: unsigned/signed integer
289
+ # predicate: unsigned/signed integer, boolean
290
+ ("void(uint32[:], uint32[:], int32[:])", np.uint32, np.uint32, 1),
291
+ ("void(int64[:], int64[:], int32[:])", np.int64, np.int64, 1),
292
+ ("void(uint64[:], uint64[:], int32[:])", np.uint64, np.uint64, 1),
293
+ ("void(int32[:], int32[:], int32[:])", np.int32, np.int32, 1),
294
+ ("void(uint32[:], boolean[:], int32[:])", np.uint32, np.bool_, 1),
295
+ ("void(uint64[:], boolean[:], int32[:])", np.uint64, np.bool_, 1),
296
+ ]
297
+
298
+ for sig, mask_dtype, pred_dtype, mask_val in valid_cases:
299
+ with self.subTest(sig=sig):
300
+ compiled = cuda.jit(sig)(use_vote_sync_all_with_mask)
301
+ ary_mask = np.full(nelem, mask_val, dtype=mask_dtype)
302
+ ary_pred = np.ones(nelem, dtype=pred_dtype)
303
+ ary_result = np.empty(nelem, dtype=np.int32)
304
+ compiled[1, nelem](ary_mask, ary_pred, ary_result)
305
+
306
+ # literals
307
+ @cuda.jit
308
+ def use_vote_sync_all_with_literal(result):
309
+ i = cuda.grid(1)
310
+ if i < result.shape[0]:
311
+ result[i] = cuda.all_sync(0xFFFFFFFF, 1)
312
+
313
+ ary_result = np.empty(nelem, dtype=np.int32)
314
+ use_vote_sync_all_with_literal[1, nelem](ary_result)
315
+
316
+ @cuda.jit
317
+ def use_vote_sync_all_with_predicate_literal(mask, result):
318
+ i = cuda.grid(1)
319
+ if i < mask.shape[0]:
320
+ result[i] = cuda.all_sync(mask[i], 1)
321
+
322
+ ary_mask = np.full(nelem, 0xFFFFFFFF, dtype=np.uint32)
323
+ ary_result = np.empty(nelem, dtype=np.int32)
324
+ use_vote_sync_all_with_predicate_literal[1, nelem](ary_mask, ary_result)
325
+
326
+ def test_vote_sync_all(self):
327
+ compiled = cuda.jit("void(int32[:], int32[:])")(use_vote_sync_all)
328
+ nelem = 32
329
+ ary_in = np.ones(nelem, dtype=np.int32)
330
+ ary_out = np.empty(nelem, dtype=np.int32)
331
+ compiled[1, nelem](ary_in, ary_out)
332
+ self.assertTrue(np.all(ary_out == 1))
333
+ ary_in[-1] = 0
334
+ compiled[1, nelem](ary_in, ary_out)
335
+ self.assertTrue(np.all(ary_out == 0))
336
+
337
+ def test_vote_sync_any(self):
338
+ compiled = cuda.jit("void(int32[:], int32[:])")(use_vote_sync_any)
339
+ nelem = 32
340
+ ary_in = np.zeros(nelem, dtype=np.int32)
341
+ ary_out = np.empty(nelem, dtype=np.int32)
342
+ compiled[1, nelem](ary_in, ary_out)
343
+ self.assertTrue(np.all(ary_out == 0))
344
+ ary_in[2] = 1
345
+ ary_in[5] = 1
346
+ compiled[1, nelem](ary_in, ary_out)
347
+ self.assertTrue(np.all(ary_out == 1))
348
+
349
+ def test_vote_sync_eq(self):
350
+ compiled = cuda.jit("void(int32[:], int32[:])")(use_vote_sync_eq)
351
+ nelem = 32
352
+ ary_in = np.zeros(nelem, dtype=np.int32)
353
+ ary_out = np.empty(nelem, dtype=np.int32)
354
+ compiled[1, nelem](ary_in, ary_out)
355
+ self.assertTrue(np.all(ary_out == 1))
356
+ ary_in[1] = 1
357
+ compiled[1, nelem](ary_in, ary_out)
358
+ self.assertTrue(np.all(ary_out == 0))
359
+ ary_in[:] = 1
360
+ compiled[1, nelem](ary_in, ary_out)
361
+ self.assertTrue(np.all(ary_out == 1))
362
+
363
+ def test_vote_sync_ballot(self):
364
+ compiled = cuda.jit("void(uint32[:])")(use_vote_sync_ballot)
365
+ nelem = 32
366
+ ary = np.empty(nelem, dtype=np.uint32)
367
+ compiled[1, nelem](ary)
368
+ self.assertTrue(np.all(ary == np.uint32(0xFFFFFFFF)))
369
+
370
+ @unittest.skipUnless(
371
+ _safe_cc_check((7, 0)), "Matching requires at least Volta Architecture"
372
+ )
373
+ def test_match_any_sync(self):
374
+ compiled = cuda.jit("void(int32[:], int32[:])")(use_match_any_sync)
375
+ nelem = 10
376
+ ary_in = np.arange(nelem, dtype=np.int32) % 2
377
+ ary_out = np.empty(nelem, dtype=np.int32)
378
+ exp = np.tile((0b0101010101, 0b1010101010), 5)
379
+ compiled[1, nelem](ary_in, ary_out)
380
+ self.assertTrue(np.all(ary_out == exp))
381
+
382
+ @unittest.skipUnless(
383
+ _safe_cc_check((7, 0)), "Matching requires at least Volta Architecture"
384
+ )
385
+ def test_match_all_sync(self):
386
+ compiled = cuda.jit("void(int32[:], int32[:])")(use_match_all_sync)
387
+ nelem = 10
388
+ ary_in = np.zeros(nelem, dtype=np.int32)
389
+ ary_out = np.empty(nelem, dtype=np.int32)
390
+ compiled[1, nelem](ary_in, ary_out)
391
+ self.assertTrue(np.all(ary_out == 0b1111111111))
392
+ ary_in[1] = 4
393
+ compiled[1, nelem](ary_in, ary_out)
394
+ self.assertTrue(np.all(ary_out == 0))
395
+
396
+ @unittest.skipUnless(
397
+ _safe_cc_check((7, 0)),
398
+ "Independent scheduling requires at least Volta Architecture",
399
+ )
400
+ def test_independent_scheduling(self):
401
+ compiled = cuda.jit("void(uint32[:])")(use_independent_scheduling)
402
+ arr = np.empty(32, dtype=np.uint32)
403
+ exp = np.tile((0x11111111, 0x22222222, 0x44444444, 0x88888888), 8)
404
+ compiled[1, 32](arr)
405
+ self.assertTrue(np.all(arr == exp))
406
+
407
+ def test_activemask(self):
408
+ @cuda.jit
409
+ def use_activemask(x):
410
+ i = cuda.grid(1)
411
+ if (i % 2) == 0:
412
+ # Even numbered threads fill in even numbered array entries
413
+ # with binary "...01010101"
414
+ x[i] = cuda.activemask()
415
+ else:
416
+ # Odd numbered threads fill in odd numbered array entries
417
+ # with binary "...10101010"
418
+ x[i] = cuda.activemask()
419
+
420
+ out = np.zeros(32, dtype=np.uint32)
421
+ use_activemask[1, 32](out)
422
+
423
+ # 0x5 = 0101: The pattern from even-numbered threads
424
+ # 0xA = 1010: The pattern from odd-numbered threads
425
+ expected = np.tile((0x55555555, 0xAAAAAAAA), 16)
426
+ np.testing.assert_equal(expected, out)
427
+
428
+ def test_lanemask_lt(self):
429
+ @cuda.jit
430
+ def use_lanemask_lt(x):
431
+ i = cuda.grid(1)
432
+ x[i] = cuda.lanemask_lt()
433
+
434
+ out = np.zeros(32, dtype=np.uint32)
435
+ use_lanemask_lt[1, 32](out)
436
+
437
+ # A string of 1s that grows from the LSB for each entry:
438
+ # 0, 1, 3, 7, F, 1F, 3F, 7F, FF, 1FF, etc.
439
+ # or in binary:
440
+ # ...0001, ....0011, ...0111, etc.
441
+ expected = np.asarray([(2**i) - 1 for i in range(32)], dtype=np.uint32)
442
+ np.testing.assert_equal(expected, out)
443
+
444
+
445
+ if __name__ == "__main__":
446
+ unittest.main()
@@ -0,0 +1,9 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba.cuda.tests import load_testsuite
5
+ import os
6
+
7
+
8
+ def load_tests(loader, tests, pattern):
9
+ return load_testsuite(loader, os.path.dirname(__file__))
@@ -0,0 +1,9 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba import cuda
5
+
6
+
7
+ @cuda.jit(device=True)
8
+ def cuda_module_in_device_function():
9
+ return cuda.threadIdx.x