numba-cuda 0.21.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (488) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +577 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +556 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +995 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +903 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +158 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
  161. numba_cuda/numba/cuda/intrinsics.py +382 -0
  162. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  163. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  164. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  165. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  166. numba_cuda/numba/cuda/libdevice.py +3386 -0
  167. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  168. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  169. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  170. numba_cuda/numba/cuda/locks.py +19 -0
  171. numba_cuda/numba/cuda/lowering.py +1951 -0
  172. numba_cuda/numba/cuda/mathimpl.py +374 -0
  173. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  175. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  178. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  179. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  180. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  181. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  182. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  183. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  184. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  185. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  186. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  187. numba_cuda/numba/cuda/misc/literal.py +28 -0
  188. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  189. numba_cuda/numba/cuda/misc/special.py +94 -0
  190. numba_cuda/numba/cuda/models.py +56 -0
  191. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  192. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  193. numba_cuda/numba/cuda/np/extensions.py +11 -0
  194. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  195. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  196. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  197. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  198. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  199. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  200. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  201. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  202. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  203. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  204. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  206. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  207. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  208. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  209. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  210. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  211. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  212. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  213. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  214. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  215. numba_cuda/numba/cuda/printimpl.py +126 -0
  216. numba_cuda/numba/cuda/random.py +308 -0
  217. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  218. numba_cuda/numba/cuda/serialize.py +267 -0
  219. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  220. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  221. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  222. numba_cuda/numba/cuda/simulator/api.py +179 -0
  223. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  224. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  236. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  237. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  238. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  239. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  241. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  242. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  243. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  244. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  245. numba_cuda/numba/cuda/simulator_init.py +18 -0
  246. numba_cuda/numba/cuda/stubs.py +635 -0
  247. numba_cuda/numba/cuda/target.py +505 -0
  248. numba_cuda/numba/cuda/testing.py +347 -0
  249. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  251. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  252. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  253. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  254. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  255. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  285. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  286. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  289. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  290. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  291. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  292. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  293. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  294. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  295. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  396. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
  397. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  399. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  400. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  401. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  402. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  403. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  404. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  406. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  407. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  424. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  425. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  430. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  431. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  433. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  434. numba_cuda/numba/cuda/tests/support.py +900 -0
  435. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  436. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  437. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  438. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  439. numba_cuda/numba/cuda/types/__init__.py +233 -0
  440. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  441. numba_cuda/numba/cuda/types/abstract.py +9 -0
  442. numba_cuda/numba/cuda/types/common.py +9 -0
  443. numba_cuda/numba/cuda/types/containers.py +9 -0
  444. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  445. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  446. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  447. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  448. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  449. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  450. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  451. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  452. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  453. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  454. numba_cuda/numba/cuda/types/function_type.py +11 -0
  455. numba_cuda/numba/cuda/types/functions.py +9 -0
  456. numba_cuda/numba/cuda/types/iterators.py +9 -0
  457. numba_cuda/numba/cuda/types/misc.py +9 -0
  458. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  459. numba_cuda/numba/cuda/types/scalars.py +9 -0
  460. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  461. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  462. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  463. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  464. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  465. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  466. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  467. numba_cuda/numba/cuda/typing/collections.py +138 -0
  468. numba_cuda/numba/cuda/typing/context.py +782 -0
  469. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  470. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  471. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  472. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  473. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  474. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  475. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  476. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  477. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  478. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  479. numba_cuda/numba/cuda/ufuncs.py +746 -0
  480. numba_cuda/numba/cuda/utils.py +724 -0
  481. numba_cuda/numba/cuda/vector_types.py +214 -0
  482. numba_cuda/numba/cuda/vectorizers.py +260 -0
  483. numba_cuda-0.21.1.dist-info/METADATA +109 -0
  484. numba_cuda-0.21.1.dist-info/RECORD +488 -0
  485. numba_cuda-0.21.1.dist-info/WHEEL +5 -0
  486. numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
  487. numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
  488. numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,206 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import pytest
5
+ import numpy as np
6
+ from numba import cuda
7
+ from numba.cuda.cudadrv import driver
8
+ from numba.cuda.testing import (
9
+ unittest,
10
+ CUDATestCase,
11
+ skip_on_cudasim,
12
+ )
13
+ from numba.cuda.tests.support import (
14
+ linux_only,
15
+ override_config,
16
+ run_in_subprocess,
17
+ )
18
+ from numba.cuda.core.errors import (
19
+ NumbaPerformanceWarning,
20
+ NumbaInvalidConfigWarning,
21
+ )
22
+ from numba.cuda.core import config
23
+ import warnings
24
+
25
+
26
+ @skip_on_cudasim("cudasim does not raise performance warnings")
27
+ class TestWarnings(CUDATestCase):
28
+ def test_float16_warn_if_lto_missing(self):
29
+ fp16_kernel_invocation = """
30
+ import math
31
+ from numba import cuda
32
+
33
+ @cuda.jit
34
+ def kernel():
35
+ x = cuda.types.float16(1.0)
36
+ y = math.sin(x)
37
+
38
+ kernel[1,1]()
39
+ kernel[1,1]()
40
+ """
41
+ performance_warning = "float16 relies on LTO for performance"
42
+ expected_warning_count = 0 if driver._have_nvjitlink() else 1
43
+ _, err = run_in_subprocess(fp16_kernel_invocation)
44
+ self.assertEqual(
45
+ err.decode().count(performance_warning), expected_warning_count
46
+ )
47
+
48
+ def test_inefficient_launch_configuration(self):
49
+ @cuda.jit
50
+ def kernel():
51
+ pass
52
+
53
+ with override_config("CUDA_LOW_OCCUPANCY_WARNINGS", 1):
54
+ with pytest.warns(
55
+ NumbaPerformanceWarning, match="Grid size .+ low occupancy"
56
+ ):
57
+ func = kernel[1, 1]
58
+ func()
59
+
60
+ def test_efficient_launch_configuration(self):
61
+ @cuda.jit
62
+ def kernel():
63
+ pass
64
+
65
+ with override_config("CUDA_LOW_OCCUPANCY_WARNINGS", 1):
66
+ with warnings.catch_warnings(record=True) as w:
67
+ kernel[256, 256]()
68
+
69
+ self.assertEqual(len(w), 0)
70
+
71
+ def test_warn_on_host_array(self):
72
+ @cuda.jit
73
+ def foo(r, x):
74
+ r[0] = x + 1
75
+
76
+ N = 10
77
+ arr_f32 = np.zeros(N, dtype=np.float32)
78
+ func = foo[1, N]
79
+ with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
80
+ with pytest.warns(
81
+ NumbaPerformanceWarning,
82
+ match="Host array used in CUDA kernel will incur.+copy overhead",
83
+ ):
84
+ func(arr_f32, N)
85
+
86
+ def test_pinned_warn_on_host_array(self):
87
+ @cuda.jit
88
+ def foo(r, x):
89
+ r[0] = x + 1
90
+
91
+ N = 10
92
+ ary = cuda.pinned_array(N, dtype=np.float32)
93
+
94
+ func = foo[1, N]
95
+ with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
96
+ with pytest.warns(
97
+ NumbaPerformanceWarning,
98
+ match="Host array used in CUDA kernel will incur.+copy overhead",
99
+ ):
100
+ func(ary, N)
101
+
102
+ def test_nowarn_on_mapped_array(self):
103
+ @cuda.jit
104
+ def foo(r, x):
105
+ r[0] = x + 1
106
+
107
+ N = 10
108
+ ary = cuda.mapped_array(N, dtype=np.float32)
109
+
110
+ with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
111
+ with warnings.catch_warnings(record=True) as w:
112
+ foo[1, N](ary, N)
113
+
114
+ self.assertEqual(len(w), 0)
115
+
116
+ @linux_only
117
+ def test_nowarn_on_managed_array(self):
118
+ @cuda.jit
119
+ def foo(r, x):
120
+ r[0] = x + 1
121
+
122
+ N = 10
123
+ ary = cuda.managed_array(N, dtype=np.float32)
124
+
125
+ with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
126
+ with warnings.catch_warnings(record=True) as w:
127
+ foo[1, N](ary, N)
128
+
129
+ self.assertEqual(len(w), 0)
130
+
131
+ def test_nowarn_on_device_array(self):
132
+ @cuda.jit
133
+ def foo(r, x):
134
+ r[0] = x + 1
135
+
136
+ N = 10
137
+ ary = cuda.device_array(N, dtype=np.float32)
138
+
139
+ with override_config("CUDA_WARN_ON_IMPLICIT_COPY", 1):
140
+ with warnings.catch_warnings(record=True) as w:
141
+ foo[1, N](ary, N)
142
+
143
+ self.assertEqual(len(w), 0)
144
+
145
+ def test_warn_on_debug_and_opt(self):
146
+ with pytest.warns(
147
+ NumbaInvalidConfigWarning, match="not supported by CUDA"
148
+ ):
149
+ cuda.jit(debug=True, opt=True)
150
+
151
+ def test_warn_on_debug_and_opt_default(self):
152
+ with pytest.warns(
153
+ NumbaInvalidConfigWarning, match="not supported by CUDA"
154
+ ):
155
+ cuda.jit(debug=True)
156
+
157
+ def test_no_warn_on_debug_and_no_opt(self):
158
+ with warnings.catch_warnings(record=True) as w:
159
+ cuda.jit(debug=True, opt=False)
160
+
161
+ self.assertEqual(len(w), 0)
162
+
163
+ def test_no_warn_with_no_debug_and_opt_kwargs(self):
164
+ with warnings.catch_warnings(record=True) as w:
165
+ cuda.jit()
166
+
167
+ self.assertEqual(len(w), 0)
168
+
169
+ def test_no_warn_on_debug_and_opt_with_config(self):
170
+ with override_config("CUDA_DEBUGINFO_DEFAULT", 1):
171
+ with override_config("OPT", config._OptLevel(0)):
172
+ with warnings.catch_warnings(record=True) as w:
173
+ cuda.jit()
174
+
175
+ self.assertEqual(len(w), 0)
176
+
177
+ with warnings.catch_warnings(record=True) as w:
178
+ cuda.jit(opt=False)
179
+
180
+ self.assertEqual(len(w), 0)
181
+
182
+ with override_config("OPT", config._OptLevel(0)):
183
+ with warnings.catch_warnings(record=True) as w:
184
+ cuda.jit(debug=True)
185
+
186
+ self.assertEqual(len(w), 0)
187
+
188
+ def test_warn_on_debug_and_opt_with_config(self):
189
+ with override_config("CUDA_DEBUGINFO_DEFAULT", 1):
190
+ for opt in (1, 2, 3, "max"):
191
+ with override_config("OPT", config._OptLevel(opt)):
192
+ with pytest.warns(
193
+ NumbaInvalidConfigWarning, match="not supported by CUDA"
194
+ ):
195
+ cuda.jit()
196
+
197
+ for opt in (1, 2, 3, "max"):
198
+ with override_config("OPT", config._OptLevel(opt)):
199
+ with pytest.warns(
200
+ NumbaInvalidConfigWarning, match="not supported by CUDA"
201
+ ):
202
+ cuda.jit(debug=True)
203
+
204
+
205
+ if __name__ == "__main__":
206
+ unittest.main()
@@ -0,0 +1,331 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import re
5
+
6
+ import numpy as np
7
+ from numba import cuda
8
+ from numba.cuda import int32, int64, float32, float64
9
+ from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
10
+ from numba.cuda.compiler import compile_ptx
11
+ from numba.cuda.core import config
12
+
13
+
14
+ def useful_syncwarp(ary):
15
+ i = cuda.grid(1)
16
+ if i == 0:
17
+ ary[0] = 42
18
+ cuda.syncwarp(0xFFFFFFFF)
19
+ ary[i] = ary[0]
20
+
21
+
22
+ def use_shfl_sync_idx(ary, idx):
23
+ i = cuda.grid(1)
24
+ val = cuda.shfl_sync(0xFFFFFFFF, i, idx)
25
+ ary[i] = val
26
+
27
+
28
+ def use_shfl_sync_up(ary, delta):
29
+ i = cuda.grid(1)
30
+ val = cuda.shfl_up_sync(0xFFFFFFFF, i, delta)
31
+ ary[i] = val
32
+
33
+
34
+ def use_shfl_sync_down(ary, delta):
35
+ i = cuda.grid(1)
36
+ val = cuda.shfl_down_sync(0xFFFFFFFF, i, delta)
37
+ ary[i] = val
38
+
39
+
40
+ def use_shfl_sync_xor(ary, xor):
41
+ i = cuda.grid(1)
42
+ val = cuda.shfl_xor_sync(0xFFFFFFFF, i, xor)
43
+ ary[i] = val
44
+
45
+
46
+ def use_shfl_sync_with_val(ary, into):
47
+ i = cuda.grid(1)
48
+ val = cuda.shfl_sync(0xFFFFFFFF, into, 0)
49
+ ary[i] = val
50
+
51
+
52
+ def use_vote_sync_all(ary_in, ary_out):
53
+ i = cuda.grid(1)
54
+ pred = cuda.all_sync(0xFFFFFFFF, ary_in[i])
55
+ ary_out[i] = pred
56
+
57
+
58
+ def use_vote_sync_any(ary_in, ary_out):
59
+ i = cuda.grid(1)
60
+ pred = cuda.any_sync(0xFFFFFFFF, ary_in[i])
61
+ ary_out[i] = pred
62
+
63
+
64
+ def use_vote_sync_eq(ary_in, ary_out):
65
+ i = cuda.grid(1)
66
+ pred = cuda.eq_sync(0xFFFFFFFF, ary_in[i])
67
+ ary_out[i] = pred
68
+
69
+
70
+ def use_vote_sync_ballot(ary):
71
+ i = cuda.threadIdx.x
72
+ ballot = cuda.ballot_sync(0xFFFFFFFF, True)
73
+ ary[i] = ballot
74
+
75
+
76
+ def use_match_any_sync(ary_in, ary_out):
77
+ i = cuda.grid(1)
78
+ ballot = cuda.match_any_sync(0xFFFFFFFF, ary_in[i])
79
+ ary_out[i] = ballot
80
+
81
+
82
+ def use_match_all_sync(ary_in, ary_out):
83
+ i = cuda.grid(1)
84
+ ballot, pred = cuda.match_all_sync(0xFFFFFFFF, ary_in[i])
85
+ ary_out[i] = ballot if pred else 0
86
+
87
+
88
+ def use_independent_scheduling(arr):
89
+ i = cuda.threadIdx.x
90
+ if i % 4 == 0:
91
+ ballot = cuda.ballot_sync(0x11111111, True)
92
+ elif i % 4 == 1:
93
+ ballot = cuda.ballot_sync(0x22222222, True)
94
+ elif i % 4 == 2:
95
+ ballot = cuda.ballot_sync(0x44444444, True)
96
+ elif i % 4 == 3:
97
+ ballot = cuda.ballot_sync(0x88888888, True)
98
+ arr[i] = ballot
99
+
100
+
101
+ def _safe_cc_check(cc):
102
+ if config.ENABLE_CUDASIM:
103
+ return True
104
+ else:
105
+ return cuda.get_current_device().compute_capability >= cc
106
+
107
+
108
+ @skip_on_cudasim("Warp Operations are not yet implemented on cudasim")
109
+ class TestCudaWarpOperations(CUDATestCase):
110
+ def test_useful_syncwarp(self):
111
+ compiled = cuda.jit("void(int32[:])")(useful_syncwarp)
112
+ nelem = 32
113
+ ary = np.empty(nelem, dtype=np.int32)
114
+ compiled[1, nelem](ary)
115
+ self.assertTrue(np.all(ary == 42))
116
+
117
+ def test_shfl_sync_idx(self):
118
+ compiled = cuda.jit("void(int32[:], int32)")(use_shfl_sync_idx)
119
+ nelem = 32
120
+ idx = 4
121
+ ary = np.empty(nelem, dtype=np.int32)
122
+ compiled[1, nelem](ary, idx)
123
+ self.assertTrue(np.all(ary == idx))
124
+
125
+ def test_shfl_sync_up(self):
126
+ compiled = cuda.jit("void(int32[:], int32)")(use_shfl_sync_up)
127
+ nelem = 32
128
+ delta = 4
129
+ ary = np.empty(nelem, dtype=np.int32)
130
+ exp = np.arange(nelem, dtype=np.int32)
131
+ exp[delta:] -= delta
132
+ compiled[1, nelem](ary, delta)
133
+ self.assertTrue(np.all(ary == exp))
134
+
135
+ def test_shfl_sync_down(self):
136
+ compiled = cuda.jit("void(int32[:], int32)")(use_shfl_sync_down)
137
+ nelem = 32
138
+ delta = 4
139
+ ary = np.empty(nelem, dtype=np.int32)
140
+ exp = np.arange(nelem, dtype=np.int32)
141
+ exp[:-delta] += delta
142
+ compiled[1, nelem](ary, delta)
143
+ self.assertTrue(np.all(ary == exp))
144
+
145
+ def test_shfl_sync_xor(self):
146
+ compiled = cuda.jit("void(int32[:], int32)")(use_shfl_sync_xor)
147
+ nelem = 32
148
+ xor = 16
149
+ ary = np.empty(nelem, dtype=np.int32)
150
+ exp = np.arange(nelem, dtype=np.int32) ^ xor
151
+ compiled[1, nelem](ary, xor)
152
+ self.assertTrue(np.all(ary == exp))
153
+
154
+ def test_shfl_sync_const_mode_val(self):
155
+ # Test `mode` argument is constant in shfl_sync calls.
156
+ # Related to https://github.com/NVIDIA/numba-cuda/pull/231
157
+ subtest = [
158
+ (use_shfl_sync_idx, 4),
159
+ (use_shfl_sync_up, 4),
160
+ (use_shfl_sync_down, 4),
161
+ (use_shfl_sync_xor, 16),
162
+ ]
163
+
164
+ args_re = r"\((.*)\)"
165
+ m = re.compile(args_re)
166
+
167
+ for func, value in subtest:
168
+ with self.subTest(func=func.__name__):
169
+ compiled = cuda.jit("void(int32[:], int32)")(func)
170
+ nelem = 32
171
+ ary = np.empty(nelem, dtype=np.int32)
172
+ compiled[1, nelem](ary, value)
173
+ irs = next(iter(compiled.inspect_llvm().values()))
174
+
175
+ for ir in irs.split("\n"):
176
+ if "call" in ir and "llvm.nvvm.shfl.sync.i32" in ir:
177
+ args = m.search(ir).group(0)
178
+ arglist = args.split(",")
179
+ mode_arg = arglist[1]
180
+ self.assertNotIn("%", mode_arg)
181
+
182
+ def test_shfl_sync_const_mode_val_sm100(self):
183
+ # Test shfl_sync compiles with cc=(10, 0)
184
+ subtest = [
185
+ use_shfl_sync_idx,
186
+ use_shfl_sync_up,
187
+ use_shfl_sync_down,
188
+ use_shfl_sync_xor,
189
+ ]
190
+
191
+ for func in subtest:
192
+ with self.subTest(func=func.__name__):
193
+ compile_ptx(func, (int32[:], int32), cc=(10, 0))
194
+
195
+ def test_shfl_sync_types(self):
196
+ types = int32, int64, float32, float64
197
+ values = (
198
+ np.int32(-1),
199
+ np.int64(1 << 42),
200
+ np.float32(np.pi),
201
+ np.float64(np.pi),
202
+ )
203
+ for typ, val in zip(types, values):
204
+ with self.subTest(typ=typ):
205
+ compiled = cuda.jit((typ[:], typ))(use_shfl_sync_with_val)
206
+ nelem = 32
207
+ ary = np.empty(nelem, dtype=val.dtype)
208
+ compiled[1, nelem](ary, val)
209
+ self.assertTrue(np.all(ary == val))
210
+
211
+ def test_vote_sync_all(self):
212
+ compiled = cuda.jit("void(int32[:], int32[:])")(use_vote_sync_all)
213
+ nelem = 32
214
+ ary_in = np.ones(nelem, dtype=np.int32)
215
+ ary_out = np.empty(nelem, dtype=np.int32)
216
+ compiled[1, nelem](ary_in, ary_out)
217
+ self.assertTrue(np.all(ary_out == 1))
218
+ ary_in[-1] = 0
219
+ compiled[1, nelem](ary_in, ary_out)
220
+ self.assertTrue(np.all(ary_out == 0))
221
+
222
+ def test_vote_sync_any(self):
223
+ compiled = cuda.jit("void(int32[:], int32[:])")(use_vote_sync_any)
224
+ nelem = 32
225
+ ary_in = np.zeros(nelem, dtype=np.int32)
226
+ ary_out = np.empty(nelem, dtype=np.int32)
227
+ compiled[1, nelem](ary_in, ary_out)
228
+ self.assertTrue(np.all(ary_out == 0))
229
+ ary_in[2] = 1
230
+ ary_in[5] = 1
231
+ compiled[1, nelem](ary_in, ary_out)
232
+ self.assertTrue(np.all(ary_out == 1))
233
+
234
+ def test_vote_sync_eq(self):
235
+ compiled = cuda.jit("void(int32[:], int32[:])")(use_vote_sync_eq)
236
+ nelem = 32
237
+ ary_in = np.zeros(nelem, dtype=np.int32)
238
+ ary_out = np.empty(nelem, dtype=np.int32)
239
+ compiled[1, nelem](ary_in, ary_out)
240
+ self.assertTrue(np.all(ary_out == 1))
241
+ ary_in[1] = 1
242
+ compiled[1, nelem](ary_in, ary_out)
243
+ self.assertTrue(np.all(ary_out == 0))
244
+ ary_in[:] = 1
245
+ compiled[1, nelem](ary_in, ary_out)
246
+ self.assertTrue(np.all(ary_out == 1))
247
+
248
+ def test_vote_sync_ballot(self):
249
+ compiled = cuda.jit("void(uint32[:])")(use_vote_sync_ballot)
250
+ nelem = 32
251
+ ary = np.empty(nelem, dtype=np.uint32)
252
+ compiled[1, nelem](ary)
253
+ self.assertTrue(np.all(ary == np.uint32(0xFFFFFFFF)))
254
+
255
+ @unittest.skipUnless(
256
+ _safe_cc_check((7, 0)), "Matching requires at least Volta Architecture"
257
+ )
258
+ def test_match_any_sync(self):
259
+ compiled = cuda.jit("void(int32[:], int32[:])")(use_match_any_sync)
260
+ nelem = 10
261
+ ary_in = np.arange(nelem, dtype=np.int32) % 2
262
+ ary_out = np.empty(nelem, dtype=np.int32)
263
+ exp = np.tile((0b0101010101, 0b1010101010), 5)
264
+ compiled[1, nelem](ary_in, ary_out)
265
+ self.assertTrue(np.all(ary_out == exp))
266
+
267
+ @unittest.skipUnless(
268
+ _safe_cc_check((7, 0)), "Matching requires at least Volta Architecture"
269
+ )
270
+ def test_match_all_sync(self):
271
+ compiled = cuda.jit("void(int32[:], int32[:])")(use_match_all_sync)
272
+ nelem = 10
273
+ ary_in = np.zeros(nelem, dtype=np.int32)
274
+ ary_out = np.empty(nelem, dtype=np.int32)
275
+ compiled[1, nelem](ary_in, ary_out)
276
+ self.assertTrue(np.all(ary_out == 0b1111111111))
277
+ ary_in[1] = 4
278
+ compiled[1, nelem](ary_in, ary_out)
279
+ self.assertTrue(np.all(ary_out == 0))
280
+
281
+ @unittest.skipUnless(
282
+ _safe_cc_check((7, 0)),
283
+ "Independent scheduling requires at least Volta Architecture",
284
+ )
285
+ def test_independent_scheduling(self):
286
+ compiled = cuda.jit("void(uint32[:])")(use_independent_scheduling)
287
+ arr = np.empty(32, dtype=np.uint32)
288
+ exp = np.tile((0x11111111, 0x22222222, 0x44444444, 0x88888888), 8)
289
+ compiled[1, 32](arr)
290
+ self.assertTrue(np.all(arr == exp))
291
+
292
+ def test_activemask(self):
293
+ @cuda.jit
294
+ def use_activemask(x):
295
+ i = cuda.grid(1)
296
+ if (i % 2) == 0:
297
+ # Even numbered threads fill in even numbered array entries
298
+ # with binary "...01010101"
299
+ x[i] = cuda.activemask()
300
+ else:
301
+ # Odd numbered threads fill in odd numbered array entries
302
+ # with binary "...10101010"
303
+ x[i] = cuda.activemask()
304
+
305
+ out = np.zeros(32, dtype=np.uint32)
306
+ use_activemask[1, 32](out)
307
+
308
+ # 0x5 = 0101: The pattern from even-numbered threads
309
+ # 0xA = 1010: The pattern from odd-numbered threads
310
+ expected = np.tile((0x55555555, 0xAAAAAAAA), 16)
311
+ np.testing.assert_equal(expected, out)
312
+
313
+ def test_lanemask_lt(self):
314
+ @cuda.jit
315
+ def use_lanemask_lt(x):
316
+ i = cuda.grid(1)
317
+ x[i] = cuda.lanemask_lt()
318
+
319
+ out = np.zeros(32, dtype=np.uint32)
320
+ use_lanemask_lt[1, 32](out)
321
+
322
+ # A string of 1s that grows from the LSB for each entry:
323
+ # 0, 1, 3, 7, F, 1F, 3F, 7F, FF, 1FF, etc.
324
+ # or in binary:
325
+ # ...0001, ....0011, ...0111, etc.
326
+ expected = np.asarray([(2**i) - 1 for i in range(32)], dtype=np.uint32)
327
+ np.testing.assert_equal(expected, out)
328
+
329
+
330
+ if __name__ == "__main__":
331
+ unittest.main()
@@ -0,0 +1,9 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba.cuda.tests import load_testsuite
5
+ import os
6
+
7
+
8
+ def load_tests(loader, tests, pattern):
9
+ return load_testsuite(loader, os.path.dirname(__file__))
@@ -0,0 +1,9 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba import cuda
5
+
6
+
7
+ @cuda.jit(device=True)
8
+ def cuda_module_in_device_function():
9
+ return cuda.threadIdx.x
@@ -0,0 +1,111 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import threading
5
+
6
+ import numpy as np
7
+
8
+ from numba import cuda
9
+ from numba.cuda.testing import CUDATestCase, skip_unless_cudasim
10
+ import numba.cuda.simulator as simulator
11
+ import unittest
12
+
13
+
14
+ class TestCudaSimIssues(CUDATestCase):
15
+ def test_record_access(self):
16
+ backyard_type = [
17
+ ("statue", np.float64),
18
+ ("newspaper", np.float64, (6,)),
19
+ ]
20
+
21
+ goose_type = [
22
+ ("garden", np.float64, (12,)),
23
+ ("town", np.float64, (42,)),
24
+ ("backyard", backyard_type),
25
+ ]
26
+
27
+ goose_np_type = np.dtype(goose_type, align=True)
28
+
29
+ @cuda.jit
30
+ def simple_kernel(f):
31
+ f.garden[0] = 45.0
32
+ f.backyard.newspaper[3] = 2.0
33
+ f.backyard.newspaper[3] = f.backyard.newspaper[3] + 3.0
34
+
35
+ item = np.recarray(1, dtype=goose_np_type)
36
+ simple_kernel[1, 1](item[0])
37
+ np.testing.assert_equal(item[0]["garden"][0], 45)
38
+ np.testing.assert_equal(item[0]["backyard"]["newspaper"][3], 5)
39
+
40
+ def test_recarray_setting(self):
41
+ recordwith2darray = np.dtype(
42
+ [("i", np.int32), ("j", np.float32, (3, 2))]
43
+ )
44
+ rec = np.recarray(2, dtype=recordwith2darray)
45
+ rec[0]["i"] = 45
46
+
47
+ @cuda.jit
48
+ def simple_kernel(f):
49
+ f[1] = f[0]
50
+
51
+ simple_kernel[1, 1](rec)
52
+ np.testing.assert_equal(rec[0]["i"], rec[1]["i"])
53
+
54
+ def test_cuda_module_in_device_function(self):
55
+ """
56
+ Discovered in https://github.com/numba/numba/issues/1837.
57
+ When the `cuda` module is referenced in a device function,
58
+ it does not have the kernel API (e.g. cuda.threadIdx, cuda.shared)
59
+ """
60
+ from numba.cuda.tests.cudasim import support
61
+
62
+ inner = support.cuda_module_in_device_function
63
+
64
+ @cuda.jit
65
+ def outer(out):
66
+ tid = inner()
67
+ if tid < out.size:
68
+ out[tid] = tid
69
+
70
+ arr = np.zeros(10, dtype=np.int32)
71
+ outer[1, 11](arr)
72
+ expected = np.arange(arr.size, dtype=np.int32)
73
+ np.testing.assert_equal(expected, arr)
74
+
75
+ @skip_unless_cudasim("Only works on CUDASIM")
76
+ def test_deadlock_on_exception(self):
77
+ def assert_no_blockthreads():
78
+ blockthreads = []
79
+ for t in threading.enumerate():
80
+ if not isinstance(t, simulator.kernel.BlockThread):
81
+ continue
82
+
83
+ # join blockthreads with a short timeout to allow aborted
84
+ # threads to exit
85
+ t.join(1)
86
+ if t.is_alive():
87
+ self.fail("Blocked kernel thread: %s" % t)
88
+
89
+ self.assertListEqual(blockthreads, [])
90
+
91
+ @simulator.jit
92
+ def assign_with_sync(x, y):
93
+ i = cuda.grid(1)
94
+ y[i] = x[i]
95
+
96
+ cuda.syncthreads()
97
+ cuda.syncthreads()
98
+
99
+ x = np.arange(3)
100
+ y = np.empty(3)
101
+ assign_with_sync[1, 3](x, y)
102
+ np.testing.assert_array_equal(x, y)
103
+ assert_no_blockthreads()
104
+
105
+ with self.assertRaises(IndexError):
106
+ assign_with_sync[1, 6](x, y)
107
+ assert_no_blockthreads()
108
+
109
+
110
+ if __name__ == "__main__":
111
+ unittest.main()
@@ -0,0 +1,2 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
@@ -0,0 +1,28 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: BSD-2-Clause
4
+ */
5
+
6
+ #include <cooperative_groups.h>
7
+ #include <cuda/barrier>
8
+
9
+ namespace cg = cooperative_groups;
10
+
11
+ __device__ void _wait_on_tile(cuda::barrier<cuda::thread_scope_block> &tile)
12
+ {
13
+ auto token = tile.arrive();
14
+ tile.wait(std::move(token));
15
+ }
16
+
17
+ extern "C"
18
+ __device__ int cta_barrier(int *ret) {
19
+ auto cta = cg::this_thread_block();
20
+ cg::thread_block_tile<32> tile = cg::tiled_partition<32>(cta);
21
+ __shared__ cuda::barrier<cuda::thread_scope_block> barrier;
22
+ if (threadIdx.x == 0) {
23
+ init(&barrier, blockDim.x);
24
+ }
25
+
26
+ _wait_on_tile(barrier);
27
+ return 0;
28
+ }