numba-cuda 0.21.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (488) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +577 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +556 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +995 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +903 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +158 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
  161. numba_cuda/numba/cuda/intrinsics.py +382 -0
  162. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  163. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  164. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  165. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  166. numba_cuda/numba/cuda/libdevice.py +3386 -0
  167. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  168. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  169. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  170. numba_cuda/numba/cuda/locks.py +19 -0
  171. numba_cuda/numba/cuda/lowering.py +1951 -0
  172. numba_cuda/numba/cuda/mathimpl.py +374 -0
  173. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  175. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  178. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  179. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  180. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  181. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  182. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  183. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  184. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  185. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  186. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  187. numba_cuda/numba/cuda/misc/literal.py +28 -0
  188. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  189. numba_cuda/numba/cuda/misc/special.py +94 -0
  190. numba_cuda/numba/cuda/models.py +56 -0
  191. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  192. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  193. numba_cuda/numba/cuda/np/extensions.py +11 -0
  194. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  195. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  196. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  197. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  198. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  199. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  200. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  201. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  202. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  203. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  204. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  206. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  207. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  208. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  209. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  210. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  211. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  212. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  213. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  214. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  215. numba_cuda/numba/cuda/printimpl.py +126 -0
  216. numba_cuda/numba/cuda/random.py +308 -0
  217. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  218. numba_cuda/numba/cuda/serialize.py +267 -0
  219. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  220. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  221. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  222. numba_cuda/numba/cuda/simulator/api.py +179 -0
  223. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  224. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  236. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  237. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  238. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  239. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  241. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  242. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  243. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  244. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  245. numba_cuda/numba/cuda/simulator_init.py +18 -0
  246. numba_cuda/numba/cuda/stubs.py +635 -0
  247. numba_cuda/numba/cuda/target.py +505 -0
  248. numba_cuda/numba/cuda/testing.py +347 -0
  249. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  251. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  252. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  253. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  254. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  255. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  285. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  286. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  289. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  290. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  291. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  292. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  293. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  294. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  295. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  396. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
  397. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  399. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  400. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  401. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  402. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  403. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  404. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  406. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  407. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  424. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  425. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  430. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  431. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  433. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  434. numba_cuda/numba/cuda/tests/support.py +900 -0
  435. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  436. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  437. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  438. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  439. numba_cuda/numba/cuda/types/__init__.py +233 -0
  440. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  441. numba_cuda/numba/cuda/types/abstract.py +9 -0
  442. numba_cuda/numba/cuda/types/common.py +9 -0
  443. numba_cuda/numba/cuda/types/containers.py +9 -0
  444. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  445. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  446. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  447. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  448. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  449. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  450. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  451. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  452. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  453. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  454. numba_cuda/numba/cuda/types/function_type.py +11 -0
  455. numba_cuda/numba/cuda/types/functions.py +9 -0
  456. numba_cuda/numba/cuda/types/iterators.py +9 -0
  457. numba_cuda/numba/cuda/types/misc.py +9 -0
  458. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  459. numba_cuda/numba/cuda/types/scalars.py +9 -0
  460. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  461. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  462. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  463. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  464. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  465. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  466. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  467. numba_cuda/numba/cuda/typing/collections.py +138 -0
  468. numba_cuda/numba/cuda/typing/context.py +782 -0
  469. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  470. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  471. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  472. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  473. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  474. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  475. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  476. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  477. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  478. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  479. numba_cuda/numba/cuda/ufuncs.py +746 -0
  480. numba_cuda/numba/cuda/utils.py +724 -0
  481. numba_cuda/numba/cuda/vector_types.py +214 -0
  482. numba_cuda/numba/cuda/vectorizers.py +260 -0
  483. numba_cuda-0.21.1.dist-info/METADATA +109 -0
  484. numba_cuda-0.21.1.dist-info/RECORD +488 -0
  485. numba_cuda-0.21.1.dist-info/WHEEL +5 -0
  486. numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
  487. numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
  488. numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,128 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import numpy as np
5
+ from numba.cuda.cudadrv.driver import device_memset, driver
6
+ from numba import cuda
7
+ from numba.cuda.testing import unittest, CUDATestCase
8
+ from numba.cuda.testing import skip_on_cudasim, skip_on_arm
9
+ from numba.cuda.tests.support import linux_only
10
+
11
+
12
+ @skip_on_cudasim("CUDA Driver API unsupported in the simulator")
13
+ @linux_only
14
+ @skip_on_arm("Managed Alloc support is experimental/untested on ARM")
15
+ class TestManagedAlloc(CUDATestCase):
16
+ def tearDown(self):
17
+ super().tearDown()
18
+ cuda.current_context().reset()
19
+
20
+ def get_total_gpu_memory(self):
21
+ # We use a driver function to directly get the total GPU memory because
22
+ # an EMM plugin may report something different (or not implement
23
+ # get_memory_info at all).
24
+ free, total = driver.cuMemGetInfo()
25
+ return total
26
+
27
+ def skip_if_cc_major_lt(self, min_required, reason):
28
+ """
29
+ Skip the current test if the compute capability of the device is
30
+ less than `min_required`.
31
+ """
32
+ ctx = cuda.current_context()
33
+ cc_major = ctx.device.compute_capability[0]
34
+ if cc_major < min_required:
35
+ self.skipTest(reason)
36
+
37
+ # CUDA Unified Memory comes in two flavors. For GPUs in the Kepler and
38
+ # Maxwell generations, managed memory allocations work as opaque,
39
+ # contiguous segments that can either be on the device or the host. For
40
+ # GPUs in the Pascal or later generations, managed memory operates on a
41
+ # per-page basis, so we can have arrays larger than GPU memory, where only
42
+ # part of them is resident on the device at one time. To ensure that this
43
+ # test works correctly on all supported GPUs, we'll select the size of our
44
+ # memory such that we only oversubscribe the GPU memory if we're on a
45
+ # Pascal or newer GPU (compute capability at least 6.0).
46
+
47
+ def test_managed_alloc_driver_undersubscribe(self):
48
+ msg = "Managed memory unsupported prior to CC 3.0"
49
+ self.skip_if_cc_major_lt(3, msg)
50
+ # We keep the allocation small so that it doesn't hang on GPUs
51
+ # with large memory (H100)
52
+ self._test_managed_alloc_driver(0.1)
53
+
54
+ # This test is skipped by default because it is easy to hang the machine
55
+ # for a very long time or get OOM killed if the GPU memory size is >50% of
56
+ # the system memory size. Even if the system does have more than 2x the RAM
57
+ # of the GPU, this test runs for a very long time (in comparison to the
58
+ # rest of the tests in the suite).
59
+ #
60
+ # However, it is left in here for manual testing as required.
61
+
62
+ @unittest.skip
63
+ def test_managed_alloc_driver_oversubscribe(self):
64
+ msg = "Oversubscription of managed memory unsupported prior to CC 6.0"
65
+ self.skip_if_cc_major_lt(6, msg)
66
+ self._test_managed_alloc_driver(2.0)
67
+
68
+ def test_managed_alloc_driver_host_attach(self):
69
+ msg = "Host attached managed memory is not accessible prior to CC 6.0"
70
+ self.skip_if_cc_major_lt(6, msg)
71
+ # Only test with a small array (0.01 * memory size) to keep the test
72
+ # quick.
73
+ self._test_managed_alloc_driver(0.01, attach_global=False)
74
+
75
+ def _test_managed_alloc_driver(self, memory_factor, attach_global=True):
76
+ # Verify that we can allocate and operate on managed
77
+ # memory through the CUDA driver interface.
78
+
79
+ total_mem_size = self.get_total_gpu_memory()
80
+ n_bytes = int(memory_factor * total_mem_size)
81
+
82
+ ctx = cuda.current_context()
83
+ mem = ctx.memallocmanaged(n_bytes, attach_global=attach_global)
84
+
85
+ dtype = np.dtype(np.uint8)
86
+ n_elems = n_bytes // dtype.itemsize
87
+ ary = np.ndarray(shape=n_elems, dtype=dtype, buffer=mem)
88
+
89
+ magic = 0xAB
90
+ device_memset(mem, magic, n_bytes)
91
+ ctx.synchronize()
92
+
93
+ # Note that this assertion operates on the CPU, so this
94
+ # test effectively drives both the CPU and the GPU on
95
+ # managed memory.
96
+
97
+ self.assertTrue(np.all(ary == magic))
98
+
99
+ def _test_managed_array(self, attach_global=True):
100
+ # Check the managed_array interface on both host and device.
101
+
102
+ ary = cuda.managed_array(100, dtype=np.double)
103
+ ary.fill(123.456)
104
+ self.assertTrue(all(ary == 123.456))
105
+
106
+ @cuda.jit("void(double[:])")
107
+ def kernel(x):
108
+ i = cuda.grid(1)
109
+ if i < x.shape[0]:
110
+ x[i] = 1.0
111
+
112
+ kernel[10, 10](ary)
113
+ cuda.current_context().synchronize()
114
+
115
+ self.assertTrue(all(ary == 1.0))
116
+
117
+ def test_managed_array_attach_global(self):
118
+ self._test_managed_array()
119
+
120
+ def test_managed_array_attach_host(self):
121
+ self._test_managed_array()
122
+ msg = "Host attached managed memory is not accessible prior to CC 6.0"
123
+ self.skip_if_cc_major_lt(6, msg)
124
+ self._test_managed_array(attach_global=False)
125
+
126
+
127
+ if __name__ == "__main__":
128
+ unittest.main()
@@ -0,0 +1,301 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import unittest
5
+ import threading
6
+
7
+ import numpy as np
8
+
9
+ from numba import cuda
10
+ from numba.cuda import config
11
+ from numba.cuda.cudadrv.linkable_code import CUSource
12
+ from numba.cuda.testing import (
13
+ CUDATestCase,
14
+ skip_on_cudasim,
15
+ )
16
+
17
+ if not config.ENABLE_CUDASIM:
18
+ from cuda.bindings.driver import cuModuleGetGlobal, cuMemcpyHtoD
19
+
20
+ from cuda.bindings.driver import CUmodule as cu_module_type
21
+
22
+
23
+ def wipe_all_modules_in_context():
24
+ """Cleans all modules reference held by current context.
25
+ This simulates the behavior on interpreter shutdown.
26
+
27
+ TODO: This is a temp solution until
28
+ https://github.com/NVIDIA/numba-cuda/issues/171 is implemented.
29
+ """
30
+ ctx = cuda.current_context()
31
+ ctx.reset()
32
+
33
+
34
+ def get_hashable_handle_value(handle):
35
+ return handle
36
+
37
+
38
+ @skip_on_cudasim("Module loading not implemented in the simulator")
39
+ class TestModuleCallbacksBasic(CUDATestCase):
40
+ def test_basic(self):
41
+ counter = 0
42
+
43
+ def setup(handle):
44
+ self.assertTrue(isinstance(handle, cu_module_type))
45
+ nonlocal counter
46
+ counter += 1
47
+
48
+ def teardown(handle):
49
+ self.assertTrue(isinstance(handle, cu_module_type))
50
+ nonlocal counter
51
+ counter -= 1
52
+
53
+ lib = CUSource("", setup_callback=setup, teardown_callback=teardown)
54
+
55
+ @cuda.jit(link=[lib])
56
+ def kernel():
57
+ pass
58
+
59
+ self.assertEqual(counter, 0)
60
+ kernel[1, 1]()
61
+ self.assertEqual(counter, 1)
62
+ kernel[1, 1]() # cached
63
+ self.assertEqual(counter, 1)
64
+
65
+ wipe_all_modules_in_context()
66
+ del kernel
67
+ self.assertEqual(counter, 0)
68
+
69
+ def test_different_argtypes(self):
70
+ counter = 0
71
+ setup_seen = set()
72
+ teardown_seen = set()
73
+
74
+ def setup(handle):
75
+ nonlocal counter, setup_seen
76
+ counter += 1
77
+ setup_seen.add(get_hashable_handle_value(handle))
78
+
79
+ def teardown(handle):
80
+ nonlocal counter
81
+ counter -= 1
82
+ teardown_seen.add(get_hashable_handle_value(handle))
83
+
84
+ lib = CUSource("", setup_callback=setup, teardown_callback=teardown)
85
+
86
+ @cuda.jit(link=[lib])
87
+ def kernel(arg):
88
+ pass
89
+
90
+ self.assertEqual(counter, 0)
91
+ kernel[1, 1](42) # (int64)->() : module 1
92
+ self.assertEqual(counter, 1)
93
+ kernel[1, 1](100) # (int64)->() : module 1, cached
94
+ self.assertEqual(counter, 1)
95
+ kernel[1, 1](3.14) # (float64)->() : module 2
96
+ self.assertEqual(counter, 2)
97
+
98
+ wipe_all_modules_in_context()
99
+ del kernel
100
+ self.assertEqual(counter, 0)
101
+
102
+ self.assertEqual(len(setup_seen), 2)
103
+ self.assertEqual(len(teardown_seen), 2)
104
+
105
+ def test_two_kernels(self):
106
+ counter = 0
107
+ setup_seen = set()
108
+ teardown_seen = set()
109
+
110
+ def setup(handle):
111
+ nonlocal counter, setup_seen
112
+ counter += 1
113
+ setup_seen.add(get_hashable_handle_value(handle))
114
+
115
+ def teardown(handle):
116
+ nonlocal counter, teardown_seen
117
+ counter -= 1
118
+ teardown_seen.add(get_hashable_handle_value(handle))
119
+
120
+ lib = CUSource("", setup_callback=setup, teardown_callback=teardown)
121
+
122
+ @cuda.jit(link=[lib])
123
+ def kernel():
124
+ pass
125
+
126
+ @cuda.jit(link=[lib])
127
+ def kernel2():
128
+ pass
129
+
130
+ kernel[1, 1]()
131
+ self.assertEqual(counter, 1)
132
+ kernel2[1, 1]()
133
+ self.assertEqual(counter, 2)
134
+
135
+ wipe_all_modules_in_context()
136
+ del kernel
137
+ self.assertEqual(counter, 0)
138
+
139
+ self.assertEqual(len(setup_seen), 2)
140
+ self.assertEqual(len(teardown_seen), 2)
141
+
142
+
143
+ @skip_on_cudasim("Module loading not implemented in the simulator")
144
+ class TestModuleCallbacksAPICompleteness(CUDATestCase):
145
+ def test_api(self):
146
+ def setup(handle):
147
+ pass
148
+
149
+ def teardown(handle):
150
+ pass
151
+
152
+ api_combo = [
153
+ (setup, teardown),
154
+ (setup, None),
155
+ (None, teardown),
156
+ (None, None),
157
+ ]
158
+
159
+ for setup, teardown in api_combo:
160
+ with self.subTest(setup=setup, teardown=teardown):
161
+ lib = CUSource(
162
+ "", setup_callback=setup, teardown_callback=teardown
163
+ )
164
+
165
+ @cuda.jit(link=[lib])
166
+ def kernel():
167
+ pass
168
+
169
+ kernel[1, 1]()
170
+
171
+
172
+ @skip_on_cudasim("Module loading not implemented in the simulator")
173
+ class TestModuleCallbacks(CUDATestCase):
174
+ def setUp(self):
175
+ super().setUp()
176
+
177
+ module = """
178
+ __device__ int num = 0;
179
+ extern "C"
180
+ __device__ int get_num(int &retval) {
181
+ retval = num;
182
+ return 0;
183
+ }
184
+ """
185
+
186
+ def set_forty_two(handle):
187
+ # Initialize 42 to global variable `num`
188
+ res, dptr, size = cuModuleGetGlobal(
189
+ get_hashable_handle_value(handle), "num".encode()
190
+ )
191
+
192
+ arr = np.array([42], np.int32)
193
+ cuMemcpyHtoD(dptr, arr.ctypes.data, size)
194
+
195
+ self.lib = CUSource(
196
+ module, setup_callback=set_forty_two, teardown_callback=None
197
+ )
198
+
199
+ def test_decldevice_arg(self):
200
+ get_num = cuda.declare_device("get_num", "int32()", link=[self.lib])
201
+
202
+ @cuda.jit
203
+ def kernel(arr):
204
+ arr[0] = get_num()
205
+
206
+ arr = np.zeros(1, np.int32)
207
+ kernel[1, 1](arr)
208
+ self.assertEqual(arr[0], 42)
209
+
210
+ def test_jitarg(self):
211
+ get_num = cuda.declare_device("get_num", "int32()")
212
+
213
+ @cuda.jit(link=[self.lib])
214
+ def kernel(arr):
215
+ arr[0] = get_num()
216
+
217
+ arr = np.zeros(1, np.int32)
218
+ kernel[1, 1](arr)
219
+ self.assertEqual(arr[0], 42)
220
+
221
+
222
+ @skip_on_cudasim("Module loading not implemented in the simulator")
223
+ class TestMultithreadedCallbacks(CUDATestCase):
224
+ def test_concurrent_initialization(self):
225
+ seen_mods = set()
226
+ max_seen_mods = 0
227
+
228
+ def setup(mod):
229
+ nonlocal seen_mods, max_seen_mods
230
+ seen_mods.add(get_hashable_handle_value(mod))
231
+ max_seen_mods = max(max_seen_mods, len(seen_mods))
232
+
233
+ def teardown(mod):
234
+ nonlocal seen_mods
235
+ # Raises an error if the module is not found in the seen_mods
236
+ seen_mods.remove(get_hashable_handle_value(mod))
237
+
238
+ lib = CUSource("", setup_callback=setup, teardown_callback=teardown)
239
+
240
+ @cuda.jit(link=[lib])
241
+ def kernel():
242
+ pass
243
+
244
+ def concurrent_compilation_launch(kernel):
245
+ kernel[1, 1]()
246
+
247
+ threads = [
248
+ threading.Thread(
249
+ target=concurrent_compilation_launch, args=(kernel,)
250
+ )
251
+ for _ in range(4)
252
+ ]
253
+ for t in threads:
254
+ t.start()
255
+ for t in threads:
256
+ t.join()
257
+
258
+ wipe_all_modules_in_context()
259
+ self.assertEqual(len(seen_mods), 0)
260
+ self.assertEqual(max_seen_mods, 1) # one moduled shared across threads
261
+
262
+ def test_concurrent_initialization_different_args(self):
263
+ seen_mods = set()
264
+ max_seen_mods = 0
265
+
266
+ def setup(mod):
267
+ nonlocal seen_mods, max_seen_mods
268
+ seen_mods.add(get_hashable_handle_value(mod))
269
+ max_seen_mods = max(max_seen_mods, len(seen_mods))
270
+
271
+ def teardown(mod):
272
+ nonlocal seen_mods
273
+ seen_mods.remove(get_hashable_handle_value(mod))
274
+
275
+ lib = CUSource("", setup_callback=setup, teardown_callback=teardown)
276
+
277
+ @cuda.jit(link=[lib])
278
+ def kernel(a):
279
+ pass
280
+
281
+ def concurrent_compilation_launch():
282
+ kernel[1, 1](42) # (int64)->() : module 1
283
+ kernel[1, 1](9) # (int64)->() : module 1 from cache
284
+ kernel[1, 1](3.14) # (float64)->() : module 2
285
+
286
+ threads = [
287
+ threading.Thread(target=concurrent_compilation_launch)
288
+ for _ in range(4)
289
+ ]
290
+ for t in threads:
291
+ t.start()
292
+ for t in threads:
293
+ t.join()
294
+
295
+ wipe_all_modules_in_context()
296
+ assert len(seen_mods) == 0
297
+ self.assertEqual(max_seen_mods, 2) # two modules shared across threads
298
+
299
+
300
+ if __name__ == "__main__":
301
+ unittest.main()
@@ -0,0 +1,174 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import pytest
5
+ from numba.cuda.testing import unittest
6
+ from numba.cuda.testing import skip_on_cudasim
7
+ from numba.cuda.testing import CUDATestCase
8
+ from numba.cuda import get_current_device
9
+ from numba.cuda.cudadrv.driver import _Linker, _have_nvjitlink
10
+
11
+ from numba import cuda
12
+ from numba.cuda import config
13
+
14
+ import os
15
+ import io
16
+ import contextlib
17
+
18
+
19
+ TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR")
20
+ if TEST_BIN_DIR:
21
+ test_device_functions_a = os.path.join(
22
+ TEST_BIN_DIR, "test_device_functions.a"
23
+ )
24
+ test_device_functions_cubin = os.path.join(
25
+ TEST_BIN_DIR, "test_device_functions.cubin"
26
+ )
27
+ test_device_functions_cu = os.path.join(
28
+ TEST_BIN_DIR, "test_device_functions.cu"
29
+ )
30
+ test_device_functions_fatbin = os.path.join(
31
+ TEST_BIN_DIR, "test_device_functions.fatbin"
32
+ )
33
+ test_device_functions_fatbin_multi = os.path.join(
34
+ TEST_BIN_DIR, "test_device_functions_multi.fatbin"
35
+ )
36
+ test_device_functions_o = os.path.join(
37
+ TEST_BIN_DIR, "test_device_functions.o"
38
+ )
39
+ test_device_functions_ptx = os.path.join(
40
+ TEST_BIN_DIR, "test_device_functions.ptx"
41
+ )
42
+ test_device_functions_ltoir = os.path.join(
43
+ TEST_BIN_DIR, "test_device_functions.ltoir"
44
+ )
45
+
46
+
47
+ @unittest.skipIf(
48
+ not TEST_BIN_DIR or not _have_nvjitlink(),
49
+ "nvJitLink not installed or new enough (>12.3)",
50
+ )
51
+ @skip_on_cudasim("Linking unsupported in the simulator")
52
+ class TestLinker(CUDATestCase):
53
+ def test_nvjitlink_add_file_guess_ext_linkable_code(self):
54
+ files = (
55
+ test_device_functions_a,
56
+ test_device_functions_cubin,
57
+ test_device_functions_cu,
58
+ test_device_functions_fatbin,
59
+ test_device_functions_o,
60
+ test_device_functions_ptx,
61
+ )
62
+ for file in files:
63
+ with self.subTest(file=file):
64
+ linker = _Linker(cc=get_current_device().compute_capability)
65
+ linker.add_file_guess_ext(file)
66
+
67
+ def test_nvjitlink_test_add_file_guess_ext_invalid_input(self):
68
+ with open(test_device_functions_cubin, "rb") as f:
69
+ content = f.read()
70
+
71
+ linker = _Linker(cc=get_current_device().compute_capability)
72
+ with self.assertRaisesRegex(
73
+ TypeError, "Expected path to file or a LinkableCode"
74
+ ):
75
+ # Feeding raw data as bytes to add_file_guess_ext should raise,
76
+ # because there's no way to know what kind of file to treat it as
77
+ linker.add_file_guess_ext(content)
78
+
79
+ def test_nvjitlink_jit_with_linkable_code(self):
80
+ files = (
81
+ test_device_functions_a,
82
+ test_device_functions_cubin,
83
+ test_device_functions_cu,
84
+ test_device_functions_fatbin,
85
+ test_device_functions_o,
86
+ test_device_functions_ptx,
87
+ )
88
+ for lto in [True, False]:
89
+ for file in files:
90
+ with self.subTest(file=file):
91
+ sig = "uint32(uint32, uint32)"
92
+ add_from_numba = cuda.declare_device("add_from_numba", sig)
93
+
94
+ @cuda.jit(link=[file], lto=lto)
95
+ def kernel(result):
96
+ result[0] = add_from_numba(1, 2)
97
+
98
+ result = cuda.device_array(1)
99
+ kernel[1, 1](result)
100
+ assert result[0] == 3
101
+
102
+ def test_nvjitlink_jit_with_linkable_code_lto_dump_assembly(self):
103
+ files = [
104
+ test_device_functions_cu,
105
+ test_device_functions_ltoir,
106
+ test_device_functions_fatbin_multi,
107
+ ]
108
+
109
+ config.DUMP_ASSEMBLY = True
110
+
111
+ for file in files:
112
+ with self.subTest(file=file):
113
+ f = io.StringIO()
114
+ with contextlib.redirect_stdout(f):
115
+ sig = "uint32(uint32, uint32)"
116
+ add_from_numba = cuda.declare_device("add_from_numba", sig)
117
+
118
+ @cuda.jit(link=[file], lto=True)
119
+ def kernel(result):
120
+ result[0] = add_from_numba(1, 2)
121
+
122
+ result = cuda.device_array(1)
123
+ kernel[1, 1](result)
124
+ assert result[0] == 3
125
+
126
+ self.assertTrue("ASSEMBLY (AFTER LTO)" in f.getvalue())
127
+
128
+ config.DUMP_ASSEMBLY = False
129
+
130
+ def test_nvjitlink_jit_with_linkable_code_lto_dump_assembly_warn(self):
131
+ files = [
132
+ test_device_functions_a,
133
+ test_device_functions_cubin,
134
+ test_device_functions_fatbin,
135
+ test_device_functions_o,
136
+ test_device_functions_ptx,
137
+ ]
138
+
139
+ config.DUMP_ASSEMBLY = True
140
+
141
+ for file in files:
142
+ with self.subTest(file=file):
143
+ sig = "uint32(uint32, uint32)"
144
+ add_from_numba = cuda.declare_device("add_from_numba", sig)
145
+
146
+ @cuda.jit(link=[file], lto=True)
147
+ def kernel(result):
148
+ result[0] = add_from_numba(1, 2)
149
+
150
+ result = cuda.device_array(1)
151
+ func = kernel[1, 1]
152
+ with pytest.warns(
153
+ UserWarning,
154
+ match="it is not optimizable at link time, and `ignore_nonlto == True`",
155
+ ):
156
+ func(result)
157
+ assert result[0] == 3
158
+
159
+ config.DUMP_ASSEMBLY = False
160
+
161
+ def test_nvjitlink_jit_with_invalid_linkable_code(self):
162
+ with open(test_device_functions_cubin, "rb") as f:
163
+ content = f.read()
164
+ with self.assertRaisesRegex(
165
+ TypeError, "Expected path to file or a LinkableCode"
166
+ ):
167
+
168
+ @cuda.jit("void()", link=[content])
169
+ def kernel():
170
+ pass
171
+
172
+
173
+ if __name__ == "__main__":
174
+ unittest.main()
@@ -0,0 +1,28 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba.cuda.cudadrv import nvrtc
5
+ from numba.cuda.testing import skip_on_cudasim
6
+
7
+ import unittest
8
+
9
+
10
+ @skip_on_cudasim("NVVM Driver unsupported in the simulator")
11
+ class TestArchOption(unittest.TestCase):
12
+ def test_get_arch_option(self):
13
+ # Test returning the nearest lowest arch.
14
+ self.assertEqual(nvrtc.get_arch_option(7, 5), "compute_75")
15
+ self.assertEqual(nvrtc.get_arch_option(7, 7), "compute_75")
16
+ self.assertEqual(nvrtc.get_arch_option(8, 5), "compute_80")
17
+ self.assertEqual(nvrtc.get_arch_option(9, 1), "compute_90")
18
+ # Test known arch.
19
+ supported_ccs = nvrtc.get_supported_ccs()
20
+ for cc in supported_ccs:
21
+ self.assertEqual(nvrtc.get_arch_option(*cc), "compute_%d%d" % cc)
22
+ self.assertEqual(
23
+ nvrtc.get_arch_option(1000, 0), "compute_%d%d" % supported_ccs[-1]
24
+ )
25
+
26
+
27
+ if __name__ == "__main__":
28
+ unittest.main()