numba-cuda 0.21.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (488) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +577 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +556 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +995 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +903 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +158 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
  161. numba_cuda/numba/cuda/intrinsics.py +382 -0
  162. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  163. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  164. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  165. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  166. numba_cuda/numba/cuda/libdevice.py +3386 -0
  167. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  168. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  169. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  170. numba_cuda/numba/cuda/locks.py +19 -0
  171. numba_cuda/numba/cuda/lowering.py +1951 -0
  172. numba_cuda/numba/cuda/mathimpl.py +374 -0
  173. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  175. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  178. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  179. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  180. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  181. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  182. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  183. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  184. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  185. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  186. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  187. numba_cuda/numba/cuda/misc/literal.py +28 -0
  188. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  189. numba_cuda/numba/cuda/misc/special.py +94 -0
  190. numba_cuda/numba/cuda/models.py +56 -0
  191. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  192. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  193. numba_cuda/numba/cuda/np/extensions.py +11 -0
  194. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  195. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  196. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  197. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  198. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  199. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  200. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  201. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  202. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  203. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  204. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  206. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  207. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  208. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  209. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  210. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  211. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  212. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  213. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  214. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  215. numba_cuda/numba/cuda/printimpl.py +126 -0
  216. numba_cuda/numba/cuda/random.py +308 -0
  217. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  218. numba_cuda/numba/cuda/serialize.py +267 -0
  219. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  220. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  221. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  222. numba_cuda/numba/cuda/simulator/api.py +179 -0
  223. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  224. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  236. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  237. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  238. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  239. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  241. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  242. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  243. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  244. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  245. numba_cuda/numba/cuda/simulator_init.py +18 -0
  246. numba_cuda/numba/cuda/stubs.py +635 -0
  247. numba_cuda/numba/cuda/target.py +505 -0
  248. numba_cuda/numba/cuda/testing.py +347 -0
  249. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  251. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  252. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  253. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  254. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  255. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  285. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  286. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  289. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  290. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  291. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  292. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  293. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  294. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  295. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  396. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
  397. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  399. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  400. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  401. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  402. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  403. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  404. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  406. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  407. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  424. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  425. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  430. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  431. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  433. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  434. numba_cuda/numba/cuda/tests/support.py +900 -0
  435. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  436. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  437. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  438. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  439. numba_cuda/numba/cuda/types/__init__.py +233 -0
  440. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  441. numba_cuda/numba/cuda/types/abstract.py +9 -0
  442. numba_cuda/numba/cuda/types/common.py +9 -0
  443. numba_cuda/numba/cuda/types/containers.py +9 -0
  444. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  445. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  446. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  447. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  448. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  449. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  450. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  451. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  452. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  453. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  454. numba_cuda/numba/cuda/types/function_type.py +11 -0
  455. numba_cuda/numba/cuda/types/functions.py +9 -0
  456. numba_cuda/numba/cuda/types/iterators.py +9 -0
  457. numba_cuda/numba/cuda/types/misc.py +9 -0
  458. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  459. numba_cuda/numba/cuda/types/scalars.py +9 -0
  460. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  461. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  462. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  463. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  464. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  465. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  466. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  467. numba_cuda/numba/cuda/typing/collections.py +138 -0
  468. numba_cuda/numba/cuda/typing/context.py +782 -0
  469. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  470. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  471. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  472. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  473. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  474. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  475. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  476. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  477. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  478. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  479. numba_cuda/numba/cuda/ufuncs.py +746 -0
  480. numba_cuda/numba/cuda/utils.py +724 -0
  481. numba_cuda/numba/cuda/vector_types.py +214 -0
  482. numba_cuda/numba/cuda/vectorizers.py +260 -0
  483. numba_cuda-0.21.1.dist-info/METADATA +109 -0
  484. numba_cuda-0.21.1.dist-info/RECORD +488 -0
  485. numba_cuda-0.21.1.dist-info/WHEEL +5 -0
  486. numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
  487. numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
  488. numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,10 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: BSD-2-Clause
4
+ */
5
+
6
+ // Not all CUDA includes are safe to include in device code compiled by NVRTC,
7
+ // because it does not have paths to all system include directories. Headers
8
+ // such as cuda_device_runtime_api.h are safe to use in NVRTC without adding
9
+ // additional includes.
10
+ #include <cuda_device_runtime_api.h>
@@ -0,0 +1,12 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: BSD-2-Clause
4
+ */
5
+
6
+ extern "C" __device__
7
+ int bar(int* out, int a) {
8
+ // Explicitly placed to generate an error
9
+ SYNTAX ERROR
10
+ *out = a * 2;
11
+ return 0;
12
+ }
@@ -0,0 +1,8 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: BSD-2-Clause
4
+ */
5
+
6
+ // Templated addition function: myadd
7
+ template <typename T>
8
+ __device__ T myadd(T a, T b) { return a + b; }
@@ -0,0 +1,28 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: BSD-2-Clause
4
+ */
5
+
6
+ // Compile with:
7
+ //
8
+ // nvcc -gencode arch=compute_50,code=compute_50 -rdc true -ptx jitlink.cu
9
+ //
10
+ // using the oldest supported toolkit version (10.2 at the time of writing).
11
+
12
+ extern "C" __device__
13
+ int bar(int *out, int a)
14
+ {
15
+ *out = a * 2;
16
+ return 0;
17
+ }
18
+
19
+
20
+ // The out argument is necessary due to Numba's CUDA calling convention, which
21
+ // always reserves the first parameter for a pointer to a returned value, even
22
+ // if there is no return value.
23
+ extern "C" __device__
24
+ int array_mutator(void *out, int *a)
25
+ {
26
+ a[0] = a[1];
27
+ return 0;
28
+ }
@@ -0,0 +1,49 @@
1
+ //
2
+ // Generated by NVIDIA NVVM Compiler
3
+ //
4
+ // Compiler Build ID: CL-27506705
5
+ // Cuda compilation tools, release 10.2, V10.2.89
6
+ // Based on LLVM 3.4svn
7
+ //
8
+
9
+ .version 6.5
10
+ .target sm_50
11
+ .address_size 64
12
+
13
+ // .globl bar
14
+
15
+ .visible .func (.param .b32 func_retval0) bar(
16
+ .param .b64 bar_param_0,
17
+ .param .b32 bar_param_1
18
+ )
19
+ {
20
+ .reg .b32 %r<4>;
21
+ .reg .b64 %rd<2>;
22
+
23
+
24
+ ld.param.u64 %rd1, [bar_param_0];
25
+ ld.param.u32 %r1, [bar_param_1];
26
+ shl.b32 %r2, %r1, 1;
27
+ st.u32 [%rd1], %r2;
28
+ mov.u32 %r3, 0;
29
+ st.param.b32 [func_retval0+0], %r3;
30
+ ret;
31
+ }
32
+
33
+ // .globl array_mutator
34
+ .visible .func (.param .b32 func_retval0) array_mutator(
35
+ .param .b64 array_mutator_param_0,
36
+ .param .b64 array_mutator_param_1
37
+ )
38
+ {
39
+ .reg .b32 %r<3>;
40
+ .reg .b64 %rd<2>;
41
+
42
+
43
+ ld.param.u64 %rd1, [array_mutator_param_1];
44
+ ld.u32 %r1, [%rd1+4];
45
+ st.u32 [%rd1], %r1;
46
+ mov.u32 %r2, 0;
47
+ st.param.b32 [func_retval0+0], %r2;
48
+ ret;
49
+ }
@@ -0,0 +1,12 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: BSD-2-Clause
4
+ */
5
+
6
+ extern "C" __device__
7
+ int bar(int* out, int a) {
8
+ // Explicitly placed to generate a warning for testing the NVRTC program log
9
+ int unused;
10
+ *out = a * 2;
11
+ return 0;
12
+ }
@@ -0,0 +1,9 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba.cuda.tests import load_testsuite
5
+ import os
6
+
7
+
8
+ def load_tests(loader, tests, pattern):
9
+ return load_testsuite(loader, os.path.dirname(__file__))
@@ -0,0 +1,2 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
@@ -0,0 +1,54 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: BSD-2-Clause
4
+ */
5
+
6
+ // magictoken.ex_mul_f32_f32.begin
7
+ // Foreign function example: multiplication of a pair of floats
8
+
9
+ extern "C" __device__ int
10
+ mul_f32_f32(
11
+ float* return_value,
12
+ float x,
13
+ float y)
14
+ {
15
+ // Compute result and store in caller-provided slot
16
+ *return_value = x * y;
17
+
18
+ // Signal that no Python exception occurred
19
+ return 0;
20
+ }
21
+ // magictoken.ex_mul_f32_f32.end
22
+
23
+
24
+ // magictoken.ex_sum_reduce_proto.begin
25
+ extern "C"
26
+ __device__ int
27
+ sum_reduce(
28
+ float* return_value,
29
+ float* array,
30
+ int n
31
+ );
32
+ // magictoken.ex_sum_reduce_proto.end
33
+
34
+
35
+ // Performs a simple reduction on an array passed by pointer using the
36
+ // ffi.from_buffer() method. Implements the prototype above.
37
+ extern "C"
38
+ __device__ int
39
+ sum_reduce(
40
+ float* return_value,
41
+ float* array,
42
+ int n
43
+ )
44
+ {
45
+ double sum = 0.0;
46
+
47
+ for (size_t i = 0; i < n; ++i) {
48
+ sum += array[i];
49
+ }
50
+
51
+ *return_value = (float)sum;
52
+
53
+ return 0;
54
+ }
@@ -0,0 +1,8 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: BSD-2-Clause
4
+ */
5
+
6
+ // Templated multiplication function: mymul
7
+ template <typename T>
8
+ __device__ T mymul(T a, T b) { return a * b; }
@@ -0,0 +1,14 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: BSD-2-Clause
4
+ */
5
+
6
+ #include <add.cuh> // In numba/cuda/tests/data/include
7
+ #include <mul.cuh> // In numba/cuda/tests/doc_examples/ffi/include
8
+
9
+ extern "C"
10
+ __device__ int saxpy(float *ret, float a, float x, float y)
11
+ {
12
+ *ret = myadd(mymul(a, x), y);
13
+ return 0;
14
+ }
@@ -0,0 +1,86 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ # Contents in this file are referenced from the sphinx-generated docs.
5
+ # "magictoken" is used for markers as beginning and ending of example text.
6
+
7
+ import unittest
8
+ from numba.cuda.testing import (
9
+ CUDATestCase,
10
+ skip_on_cudasim,
11
+ skip_if_cudadevrt_missing,
12
+ skip_unless_cc_60,
13
+ )
14
+
15
+
16
+ @skip_if_cudadevrt_missing
17
+ @skip_unless_cc_60
18
+ @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
19
+ class TestCooperativeGroups(CUDATestCase):
20
+ def test_ex_grid_sync(self):
21
+ # magictoken.ex_grid_sync_kernel.begin
22
+ from numba import cuda
23
+ from numba.cuda import int32
24
+ import numpy as np
25
+
26
+ sig = (int32[:, ::1],)
27
+
28
+ @cuda.jit(sig)
29
+ def sequential_rows(M):
30
+ col = cuda.grid(1)
31
+ g = cuda.cg.this_grid()
32
+
33
+ rows = M.shape[0]
34
+ cols = M.shape[1]
35
+
36
+ for row in range(1, rows):
37
+ opposite = cols - col - 1
38
+ # Each row's elements are one greater than the previous row
39
+ M[row, col] = M[row - 1, opposite] + 1
40
+ # Wait until all threads have written their column element,
41
+ # and that the write is visible to all other threads
42
+ g.sync()
43
+
44
+ # magictoken.ex_grid_sync_kernel.end
45
+
46
+ # magictoken.ex_grid_sync_data.begin
47
+ # Empty input data
48
+ A = np.zeros((1024, 1024), dtype=np.int32)
49
+ # A somewhat arbitrary choice (one warp), but generally smaller block sizes
50
+ # allow more blocks to be launched (noting that other limitations on
51
+ # occupancy apply such as shared memory size)
52
+ blockdim = 32
53
+ griddim = A.shape[1] // blockdim
54
+ # magictoken.ex_grid_sync_data.end
55
+
56
+ # Skip this test if the grid size used in the example is too large for
57
+ # a cooperative launch on the current GPU
58
+ mb = sequential_rows.overloads[sig].max_cooperative_grid_blocks(
59
+ blockdim
60
+ )
61
+ if mb < griddim:
62
+ self.skipTest("Device does not support a large enough coop grid")
63
+
64
+ # magictoken.ex_grid_sync_launch.begin
65
+ # Kernel launch - this is implicitly a cooperative launch
66
+ sequential_rows[griddim, blockdim](A)
67
+
68
+ # What do the results look like?
69
+ # print(A)
70
+ #
71
+ # [[ 0 0 0 ... 0 0 0]
72
+ # [ 1 1 1 ... 1 1 1]
73
+ # [ 2 2 2 ... 2 2 2]
74
+ # ...
75
+ # [1021 1021 1021 ... 1021 1021 1021]
76
+ # [1022 1022 1022 ... 1022 1022 1022]
77
+ # [1023 1023 1023 ... 1023 1023 1023]]
78
+ # magictoken.ex_grid_sync_launch.end
79
+
80
+ # Sanity check - are the results what we expect?
81
+ reference = np.tile(np.arange(1024), (1024, 1)).T
82
+ np.testing.assert_equal(A, reference)
83
+
84
+
85
+ if __name__ == "__main__":
86
+ unittest.main()
@@ -0,0 +1,68 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import unittest
5
+
6
+ from numba.cuda.testing import CUDATestCase, skip_on_cudasim
7
+ from numba.cuda.tests.support import captured_stdout
8
+
9
+
10
+ @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
11
+ class TestCPointer(CUDATestCase):
12
+ """
13
+ Test simple vector addition
14
+ """
15
+
16
+ def setUp(self):
17
+ # Prevent output from this test showing
18
+ # up when running the test suite
19
+ self._captured_stdout = captured_stdout()
20
+ self._captured_stdout.__enter__()
21
+ super().setUp()
22
+
23
+ def tearDown(self):
24
+ # No exception type, value, or traceback
25
+ self._captured_stdout.__exit__(None, None, None)
26
+ super().tearDown()
27
+
28
+ def test_ex_cpointer(self):
29
+ # ex_cpointer.sig.begin
30
+ import numpy as np
31
+ from numba import cuda
32
+ from numba.cuda import types
33
+
34
+ # The first kernel argument is a pointer to a uint8 array.
35
+ # The second argument holds the length as a uint32.
36
+ # The return type of a kernel is always void.
37
+ sig = types.void(types.CPointer(types.uint8), types.uint32)
38
+ # ex_cpointer.sig.end
39
+
40
+ # ex_cpointer.kernel.begin
41
+ @cuda.jit(sig)
42
+ def add_one(x, n):
43
+ i = cuda.grid(1)
44
+ if i < n:
45
+ x[i] += 1
46
+
47
+ # ex_cpointer.kernel.end
48
+
49
+ # ex_cpointer.launch.begin
50
+ x = cuda.to_device(np.arange(10, dtype=np.uint8))
51
+
52
+ # Print initial values of x
53
+ print(x.copy_to_host()) # [0 1 2 3 4 5 6 7 8 9]
54
+
55
+ # Obtain a pointer to the data from from the CUDA Array Interface
56
+ x_ptr = x.__cuda_array_interface__["data"][0]
57
+ x_len = len(x)
58
+
59
+ # Launch the kernel with the pointer and length
60
+ add_one[1, 32](x_ptr, x_len)
61
+
62
+ # Demonstrate that the data was updated by the kernel
63
+ print(x.copy_to_host()) # [ 1 2 3 4 5 6 7 8 9 10]
64
+ # ex_cpointer.launch.end
65
+
66
+
67
+ if __name__ == "__main__":
68
+ unittest.main()
@@ -0,0 +1,81 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import unittest
5
+
6
+ from numba.cuda.testing import (
7
+ CUDATestCase,
8
+ skip_on_cudasim,
9
+ skip_on_standalone_numba_cuda,
10
+ )
11
+ from numba.cuda.tests.support import captured_stdout
12
+ import numpy as np
13
+
14
+
15
+ @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
16
+ class TestCpuGpuCompat(CUDATestCase):
17
+ """
18
+ Test compatibility of CPU and GPU functions
19
+ """
20
+
21
+ def setUp(self):
22
+ # Prevent output from this test showing up when running the test suite
23
+ self._captured_stdout = captured_stdout()
24
+ self._captured_stdout.__enter__()
25
+ super().setUp()
26
+
27
+ def tearDown(self):
28
+ # No exception type, value, or traceback
29
+ self._captured_stdout.__exit__(None, None, None)
30
+ super().tearDown()
31
+
32
+ @skip_on_standalone_numba_cuda
33
+ def test_ex_cpu_gpu_compat(self):
34
+ # ex_cpu_gpu_compat.import.begin
35
+ from math import pi
36
+
37
+ import numba
38
+ from numba import cuda
39
+ # ex_cpu_gpu_compat.import.end
40
+
41
+ # ex_cpu_gpu_compat.allocate.begin
42
+ X = cuda.to_device([1, 10, 234])
43
+ Y = cuda.to_device([2, 2, 4014])
44
+ Z = cuda.to_device([3, 14, 2211])
45
+ results = cuda.to_device([0.0, 0.0, 0.0])
46
+ # ex_cpu_gpu_compat.allocate.end
47
+
48
+ # ex_cpu_gpu_compat.define.begin
49
+ @numba.jit
50
+ def business_logic(x, y, z):
51
+ return 4 * z * (2 * x - (4 * y) / 2 * pi)
52
+
53
+ # ex_cpu_gpu_compat.define.end
54
+
55
+ # ex_cpu_gpu_compat.cpurun.begin
56
+ print(business_logic(1, 2, 3)) # -126.79644737231007
57
+ # ex_cpu_gpu_compat.cpurun.end
58
+
59
+ # ex_cpu_gpu_compat.usegpu.begin
60
+ @cuda.jit
61
+ def f(res, xarr, yarr, zarr):
62
+ tid = cuda.grid(1)
63
+ if tid < len(xarr):
64
+ # The function decorated with numba.jit may be directly reused
65
+ res[tid] = business_logic(xarr[tid], yarr[tid], zarr[tid])
66
+
67
+ # ex_cpu_gpu_compat.usegpu.end
68
+
69
+ # ex_cpu_gpu_compat.launch.begin
70
+ f.forall(len(X))(results, X, Y, Z)
71
+ print(results)
72
+ # [-126.79644737231007, 416.28324559588634, -218912930.2987788]
73
+ # ex_cpu_gpu_compat.launch.end
74
+
75
+ expect = [business_logic(x, y, z) for x, y, z in zip(X, Y, Z)]
76
+
77
+ np.testing.assert_equal(expect, results.copy_to_host())
78
+
79
+
80
+ if __name__ == "__main__":
81
+ unittest.main()
@@ -0,0 +1,141 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ # Contents in this file are referenced from the sphinx-generated docs.
5
+ # "magictoken" is used for markers as beginning and ending of example text.
6
+
7
+ import unittest
8
+ from numba.cuda.testing import CUDATestCase, skip_on_cudasim
9
+ from numba.cuda.tests.support import skip_unless_cffi, override_config
10
+
11
+
12
+ @skip_unless_cffi
13
+ @skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
14
+ class TestFFI(CUDATestCase):
15
+ def test_ex_linking_cu(self):
16
+ # magictoken.ex_linking_cu.begin
17
+ from numba import cuda
18
+ import numpy as np
19
+ import os
20
+
21
+ # Path to the source containing the foreign function
22
+ # (here assumed to be in a subdirectory called "ffi")
23
+ basedir = os.path.dirname(os.path.abspath(__file__))
24
+ functions_cu = os.path.join(basedir, "ffi", "functions.cu")
25
+
26
+ # Declaration of the foreign function
27
+ mul = cuda.declare_device(
28
+ "mul_f32_f32", "float32(float32, float32)", link=functions_cu
29
+ )
30
+
31
+ # A kernel that calls mul; functions.cu is linked automatically due to
32
+ # the call to mul.
33
+ @cuda.jit
34
+ def multiply_vectors(r, x, y):
35
+ i = cuda.grid(1)
36
+
37
+ if i < len(r):
38
+ r[i] = mul(x[i], y[i])
39
+
40
+ # Generate random data
41
+ N = 32
42
+ np.random.seed(1)
43
+ x = np.random.rand(N).astype(np.float32)
44
+ y = np.random.rand(N).astype(np.float32)
45
+ r = np.zeros_like(x)
46
+
47
+ # Run the kernel
48
+ multiply_vectors[1, 32](r, x, y)
49
+
50
+ # Sanity check - ensure the results match those expected
51
+ np.testing.assert_array_equal(r, x * y)
52
+ # magictoken.ex_linking_cu.end
53
+
54
+ def test_ex_from_buffer(self):
55
+ from numba import cuda
56
+ import os
57
+
58
+ basedir = os.path.dirname(os.path.abspath(__file__))
59
+ functions_cu = os.path.join(basedir, "ffi", "functions.cu")
60
+
61
+ # magictoken.ex_from_buffer_decl.begin
62
+ signature = "float32(CPointer(float32), int32)"
63
+ sum_reduce = cuda.declare_device(
64
+ "sum_reduce", signature, link=functions_cu
65
+ )
66
+ # magictoken.ex_from_buffer_decl.end
67
+
68
+ # magictoken.ex_from_buffer_kernel.begin
69
+ import cffi
70
+
71
+ ffi = cffi.FFI()
72
+
73
+ @cuda.jit
74
+ def reduction_caller(result, array):
75
+ array_ptr = ffi.from_buffer(array)
76
+ result[()] = sum_reduce(array_ptr, len(array))
77
+
78
+ # magictoken.ex_from_buffer_kernel.end
79
+
80
+ import numpy as np
81
+
82
+ x = np.arange(10).astype(np.float32)
83
+ r = np.ndarray((), dtype=np.float32)
84
+
85
+ reduction_caller[1, 1](r, x)
86
+
87
+ expected = np.sum(x)
88
+ actual = r[()]
89
+ np.testing.assert_allclose(expected, actual)
90
+
91
+ def test_ex_extra_includes(self):
92
+ import numpy as np
93
+ from numba import cuda
94
+ from numba.cuda import config
95
+ import os
96
+
97
+ basedir = os.path.dirname(os.path.abspath(__file__))
98
+ mul_dir = os.path.join(basedir, "ffi", "include")
99
+ saxpy_cu = os.path.join(basedir, "ffi", "saxpy.cu")
100
+
101
+ testdir = os.path.dirname(basedir)
102
+ add_dir = os.path.join(testdir, "data", "include")
103
+
104
+ includedir = ":".join([mul_dir, add_dir])
105
+ with override_config("CUDA_NVRTC_EXTRA_SEARCH_PATHS", includedir):
106
+ # magictoken.ex_extra_search_paths.begin
107
+ from numba.cuda import config
108
+
109
+ includedir = ":".join([mul_dir, add_dir])
110
+ config.CUDA_NVRTC_EXTRA_SEARCH_PATHS = includedir
111
+ # magictoken.ex_extra_search_paths.end
112
+
113
+ # magictoken.ex_extra_search_paths_kernel.begin
114
+ sig = "float32(float32, float32, float32)"
115
+ saxpy = cuda.declare_device("saxpy", sig=sig, link=saxpy_cu)
116
+
117
+ @cuda.jit
118
+ def vector_saxpy(a, x, y, res):
119
+ i = cuda.grid(1)
120
+ if i < len(res):
121
+ res[i] = saxpy(a, x[i], y[i])
122
+
123
+ # magictoken.ex_extra_search_paths_kernel.end
124
+
125
+ size = 10_000
126
+ a = 3.0
127
+ X = np.ones((size,), dtype="float32")
128
+ Y = np.ones((size,), dtype="float32")
129
+ R = np.zeros((size,), dtype="float32")
130
+
131
+ block_size = 32
132
+ num_blocks = (size // block_size) + 1
133
+
134
+ vector_saxpy[num_blocks, block_size](a, X, Y, R)
135
+
136
+ expected = a * X + Y
137
+ np.testing.assert_equal(R, expected)
138
+
139
+
140
+ if __name__ == "__main__":
141
+ unittest.main()