numba-cuda 0.21.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (488) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +577 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +556 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +995 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +903 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +158 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
  161. numba_cuda/numba/cuda/intrinsics.py +382 -0
  162. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  163. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  164. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  165. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  166. numba_cuda/numba/cuda/libdevice.py +3386 -0
  167. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  168. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  169. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  170. numba_cuda/numba/cuda/locks.py +19 -0
  171. numba_cuda/numba/cuda/lowering.py +1951 -0
  172. numba_cuda/numba/cuda/mathimpl.py +374 -0
  173. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  175. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  178. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  179. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  180. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  181. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  182. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  183. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  184. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  185. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  186. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  187. numba_cuda/numba/cuda/misc/literal.py +28 -0
  188. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  189. numba_cuda/numba/cuda/misc/special.py +94 -0
  190. numba_cuda/numba/cuda/models.py +56 -0
  191. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  192. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  193. numba_cuda/numba/cuda/np/extensions.py +11 -0
  194. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  195. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  196. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  197. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  198. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  199. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  200. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  201. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  202. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  203. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  204. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  206. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  207. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  208. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  209. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  210. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  211. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  212. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  213. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  214. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  215. numba_cuda/numba/cuda/printimpl.py +126 -0
  216. numba_cuda/numba/cuda/random.py +308 -0
  217. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  218. numba_cuda/numba/cuda/serialize.py +267 -0
  219. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  220. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  221. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  222. numba_cuda/numba/cuda/simulator/api.py +179 -0
  223. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  224. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  236. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  237. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  238. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  239. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  241. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  242. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  243. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  244. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  245. numba_cuda/numba/cuda/simulator_init.py +18 -0
  246. numba_cuda/numba/cuda/stubs.py +635 -0
  247. numba_cuda/numba/cuda/target.py +505 -0
  248. numba_cuda/numba/cuda/testing.py +347 -0
  249. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  251. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  252. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  253. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  254. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  255. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  285. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  286. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  289. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  290. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  291. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  292. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  293. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  294. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  295. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  396. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
  397. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  399. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  400. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  401. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  402. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  403. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  404. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  406. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  407. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  424. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  425. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  430. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  431. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  433. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  434. numba_cuda/numba/cuda/tests/support.py +900 -0
  435. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  436. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  437. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  438. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  439. numba_cuda/numba/cuda/types/__init__.py +233 -0
  440. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  441. numba_cuda/numba/cuda/types/abstract.py +9 -0
  442. numba_cuda/numba/cuda/types/common.py +9 -0
  443. numba_cuda/numba/cuda/types/containers.py +9 -0
  444. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  445. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  446. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  447. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  448. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  449. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  450. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  451. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  452. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  453. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  454. numba_cuda/numba/cuda/types/function_type.py +11 -0
  455. numba_cuda/numba/cuda/types/functions.py +9 -0
  456. numba_cuda/numba/cuda/types/iterators.py +9 -0
  457. numba_cuda/numba/cuda/types/misc.py +9 -0
  458. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  459. numba_cuda/numba/cuda/types/scalars.py +9 -0
  460. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  461. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  462. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  463. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  464. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  465. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  466. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  467. numba_cuda/numba/cuda/typing/collections.py +138 -0
  468. numba_cuda/numba/cuda/typing/context.py +782 -0
  469. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  470. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  471. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  472. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  473. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  474. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  475. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  476. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  477. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  478. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  479. numba_cuda/numba/cuda/ufuncs.py +746 -0
  480. numba_cuda/numba/cuda/utils.py +724 -0
  481. numba_cuda/numba/cuda/vector_types.py +214 -0
  482. numba_cuda/numba/cuda/vectorizers.py +260 -0
  483. numba_cuda-0.21.1.dist-info/METADATA +109 -0
  484. numba_cuda-0.21.1.dist-info/RECORD +488 -0
  485. numba_cuda-0.21.1.dist-info/WHEEL +5 -0
  486. numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
  487. numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
  488. numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,252 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import sys
5
+ import os
6
+ import multiprocessing as mp
7
+ import warnings
8
+
9
+
10
+ from numba.cuda.core.config import IS_WIN32
11
+ from numba.cuda.core.errors import NumbaWarning
12
+ from numba.cuda.cudadrv import nvvm
13
+ from numba.cuda.testing import (
14
+ unittest,
15
+ skip_on_cudasim,
16
+ skip_unless_conda_cudatoolkit,
17
+ )
18
+ from numba.cuda.cuda_paths import (
19
+ _get_libdevice_path_decision,
20
+ _get_nvvm_path_decision,
21
+ _get_cudalib_dir_path_decision,
22
+ get_system_ctk,
23
+ get_system_ctk_libdir,
24
+ )
25
+
26
+
27
+ has_cuda = nvvm.is_available()
28
+ has_mp_get_context = hasattr(mp, "get_context")
29
+
30
+
31
+ class LibraryLookupBase(unittest.TestCase):
32
+ def setUp(self):
33
+ ctx = mp.get_context("spawn")
34
+
35
+ qrecv = ctx.Queue()
36
+ qsend = ctx.Queue()
37
+ self.qsend = qsend
38
+ self.qrecv = qrecv
39
+ self.child_process = ctx.Process(
40
+ target=check_lib_lookup,
41
+ args=(qrecv, qsend),
42
+ daemon=True,
43
+ )
44
+ self.child_process.start()
45
+
46
+ def tearDown(self):
47
+ self.qsend.put(self.do_terminate)
48
+ self.child_process.join(3)
49
+ # Ensure the process is terminated
50
+ self.assertIsNotNone(self.child_process)
51
+
52
+ def remote_do(self, action):
53
+ self.qsend.put(action)
54
+ out = self.qrecv.get()
55
+ self.assertNotIsInstance(out, BaseException)
56
+ return out
57
+
58
+ @staticmethod
59
+ def do_terminate():
60
+ return False, None
61
+
62
+
63
+ def remove_env(name):
64
+ try:
65
+ del os.environ[name]
66
+ except KeyError:
67
+ return False
68
+ else:
69
+ return True
70
+
71
+
72
+ def check_lib_lookup(qout, qin):
73
+ status = True
74
+ while status:
75
+ try:
76
+ action = qin.get()
77
+ except Exception as e:
78
+ qout.put(e)
79
+ status = False
80
+ else:
81
+ try:
82
+ with warnings.catch_warnings(record=True) as w:
83
+ warnings.simplefilter("always", NumbaWarning)
84
+ status, result = action()
85
+ qout.put(result + (w,))
86
+ except Exception as e:
87
+ qout.put(e)
88
+ status = False
89
+
90
+
91
+ @skip_on_cudasim("Library detection unsupported in the simulator")
92
+ @unittest.skipUnless(has_mp_get_context, "mp.get_context not available")
93
+ @skip_unless_conda_cudatoolkit("test assumes conda installed cudatoolkit")
94
+ class TestLibDeviceLookUp(LibraryLookupBase):
95
+ def test_libdevice_path_decision(self):
96
+ # Check that the default is using conda environment
97
+ by, info, warns = self.remote_do(self.do_clear_envs)
98
+ if has_cuda:
99
+ self.assertEqual(by, "Conda environment")
100
+ else:
101
+ self.assertEqual(by, "<unknown>")
102
+ self.assertIsNone(info)
103
+ self.assertFalse(warns)
104
+ # Check that CUDA_HOME works by removing conda-env
105
+ by, info, warns = self.remote_do(self.do_set_cuda_home)
106
+ self.assertEqual(by, "CUDA_HOME")
107
+ self.assertTrue(
108
+ info.startswith(os.path.join("mycudahome", "nvvm", "libdevice"))
109
+ )
110
+ self.assertFalse(warns)
111
+
112
+ if get_system_ctk("nvvm", "libdevice") is None:
113
+ # Fake remove conda environment so no cudatoolkit is available
114
+ by, info, warns = self.remote_do(self.do_clear_envs)
115
+ self.assertEqual(by, "<unknown>")
116
+ self.assertIsNone(info)
117
+ self.assertFalse(warns)
118
+ else:
119
+ # Use system available cudatoolkit
120
+ by, info, warns = self.remote_do(self.do_clear_envs)
121
+ self.assertEqual(by, "System")
122
+ self.assertFalse(warns)
123
+
124
+ @staticmethod
125
+ def do_clear_envs():
126
+ remove_env("CUDA_HOME")
127
+ remove_env("CUDA_PATH")
128
+ return True, _get_libdevice_path_decision()
129
+
130
+ @staticmethod
131
+ def do_set_cuda_home():
132
+ os.environ["CUDA_HOME"] = os.path.join("mycudahome")
133
+ _fake_non_conda_env()
134
+ return True, _get_libdevice_path_decision()
135
+
136
+
137
+ @skip_on_cudasim("Library detection unsupported in the simulator")
138
+ @unittest.skipUnless(has_mp_get_context, "mp.get_context not available")
139
+ @skip_unless_conda_cudatoolkit("test assumes conda installed cudatoolkit")
140
+ class TestNvvmLookUp(LibraryLookupBase):
141
+ def test_nvvm_path_decision(self):
142
+ # Check that the default is using conda environment
143
+ by, info, warns = self.remote_do(self.do_clear_envs)
144
+ if has_cuda:
145
+ self.assertEqual(by, "Conda environment")
146
+ else:
147
+ self.assertEqual(by, "<unknown>")
148
+ self.assertIsNone(info)
149
+ self.assertFalse(warns)
150
+ # Check that CUDA_HOME works by removing conda-env
151
+ by, info, warns = self.remote_do(self.do_set_cuda_home)
152
+ self.assertEqual(by, "CUDA_HOME")
153
+ self.assertFalse(warns)
154
+ if IS_WIN32:
155
+ self.assertEqual(
156
+ os.path.dirname(info), os.path.join("mycudahome", "nvvm", "bin")
157
+ )
158
+ else:
159
+ self.assertEqual(
160
+ os.path.dirname(info),
161
+ os.path.join("mycudahome", "nvvm", "lib64"),
162
+ )
163
+
164
+ if get_system_ctk("nvvm") is None:
165
+ # Fake remove conda environment so no cudatoolkit is available
166
+ by, info, warns = self.remote_do(self.do_clear_envs)
167
+ self.assertEqual(by, "<unknown>")
168
+ self.assertIsNone(info)
169
+ self.assertFalse(warns)
170
+ else:
171
+ # Use system available cudatoolkit
172
+ by, info, warns = self.remote_do(self.do_clear_envs)
173
+ self.assertEqual(by, "System")
174
+ self.assertFalse(warns)
175
+
176
+ @staticmethod
177
+ def do_clear_envs():
178
+ remove_env("CUDA_HOME")
179
+ remove_env("CUDA_PATH")
180
+ return True, _get_nvvm_path_decision()
181
+
182
+ @staticmethod
183
+ def do_set_cuda_home():
184
+ os.environ["CUDA_HOME"] = os.path.join("mycudahome")
185
+ _fake_non_conda_env()
186
+ return True, _get_nvvm_path_decision()
187
+
188
+
189
+ @skip_on_cudasim("Library detection unsupported in the simulator")
190
+ @unittest.skipUnless(has_mp_get_context, "mp.get_context not available")
191
+ @skip_unless_conda_cudatoolkit("test assumes conda installed cudatoolkit")
192
+ class TestCudaLibLookUp(LibraryLookupBase):
193
+ def test_cudalib_path_decision(self):
194
+ # Check that the default is using conda environment
195
+ by, info, warns = self.remote_do(self.do_clear_envs)
196
+ if has_cuda:
197
+ self.assertEqual(by, "Conda environment")
198
+ else:
199
+ self.assertEqual(by, "<unknown>")
200
+ self.assertIsNone(info)
201
+ self.assertFalse(warns)
202
+
203
+ # Check that CUDA_HOME works by removing conda-env
204
+ self.remote_do(self.do_clear_envs)
205
+ by, info, warns = self.remote_do(self.do_set_cuda_home)
206
+ self.assertEqual(by, "CUDA_HOME")
207
+ self.assertFalse(warns)
208
+ if IS_WIN32:
209
+ # I think only wheels don't have the "Library" directory?
210
+ self.assertTrue(
211
+ info
212
+ in (
213
+ os.path.join("mycudahome", "bin"),
214
+ os.path.join("mycudahome", "Library", "bin"),
215
+ )
216
+ )
217
+ else:
218
+ self.assertEqual(info, os.path.join("mycudahome", "lib64"))
219
+ if get_system_ctk_libdir() is None:
220
+ # Fake remove conda environment so no cudatoolkit is available
221
+ by, info, warns = self.remote_do(self.do_clear_envs)
222
+ self.assertEqual(by, "<unknown>")
223
+ self.assertIsNone(info)
224
+ self.assertFalse(warns)
225
+ else:
226
+ # Use system available cudatoolkit
227
+ by, info, warns = self.remote_do(self.do_clear_envs)
228
+ self.assertEqual(by, "System")
229
+ self.assertFalse(warns)
230
+
231
+ @staticmethod
232
+ def do_clear_envs():
233
+ remove_env("CUDA_HOME")
234
+ remove_env("CUDA_PATH")
235
+ return True, _get_cudalib_dir_path_decision()
236
+
237
+ @staticmethod
238
+ def do_set_cuda_home():
239
+ os.environ["CUDA_HOME"] = os.path.join("mycudahome")
240
+ _fake_non_conda_env()
241
+ return True, _get_cudalib_dir_path_decision()
242
+
243
+
244
+ def _fake_non_conda_env():
245
+ """
246
+ Monkeypatch sys.prefix to hide the fact we are in a conda-env
247
+ """
248
+ sys.prefix = ""
249
+
250
+
251
+ if __name__ == "__main__":
252
+ unittest.main()
@@ -0,0 +1,59 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba.cuda.cudadrv import nvvm
5
+ from numba.cuda.testing import skip_on_cudasim
6
+ from numba.cuda import utils
7
+
8
+ from llvmlite import ir
9
+ from llvmlite import binding as llvm
10
+
11
+ import unittest
12
+
13
+
14
+ original = (
15
+ "call void @llvm.memset.p0i8.i64("
16
+ "i8* align 4 %arg.x.41, i8 0, i64 %0, i1 false)"
17
+ )
18
+
19
+ missing_align = (
20
+ "call void @llvm.memset.p0i8.i64(i8* %arg.x.41, i8 0, i64 %0, i1 false)"
21
+ )
22
+
23
+
24
+ @skip_on_cudasim("libNVVM not supported in simulator")
25
+ @unittest.skipIf(utils.MACHINE_BITS == 32, "CUDA not support for 32-bit")
26
+ @unittest.skipIf(not nvvm.is_available(), "No libNVVM")
27
+ class TestNvvmWithoutCuda(unittest.TestCase):
28
+ def test_nvvm_accepts_encoding(self):
29
+ # Test that NVVM will accept a constant containing all possible 8-bit
30
+ # characters. Taken from the test case added in llvmlite PR #53:
31
+ #
32
+ # https://github.com/numba/llvmlite/pull/53
33
+ #
34
+ # This test case is included in Numba to ensure that the encoding used
35
+ # by llvmlite (e.g. utf-8, latin1, etc.) does not result in an input to
36
+ # NVVM that it cannot parse correctly
37
+
38
+ # Create a module with a constant containing all 8-bit characters
39
+ c = ir.Constant(ir.ArrayType(ir.IntType(8), 256), bytearray(range(256)))
40
+ m = ir.Module()
41
+ m.triple = "nvptx64-nvidia-cuda"
42
+ nvvm.add_ir_version(m)
43
+ gv = ir.GlobalVariable(m, c.type, "myconstant")
44
+ gv.global_constant = True
45
+ gv.initializer = c
46
+ m.data_layout = nvvm.NVVM().data_layout
47
+
48
+ # Parse with LLVM then dump the parsed module into NVVM
49
+ parsed = llvm.parse_assembly(str(m))
50
+ ptx = nvvm.compile_ir(str(parsed))
51
+
52
+ # Ensure all characters appear in the generated constant array.
53
+ elements = ", ".join([str(i) for i in range(256)])
54
+ myconstant = f"myconstant[256] = {{{elements}}}".encode("utf-8")
55
+ self.assertIn(myconstant, ptx)
56
+
57
+
58
+ if __name__ == "__main__":
59
+ unittest.main()
@@ -0,0 +1,9 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba.cuda.tests import load_testsuite
5
+ import os
6
+
7
+
8
+ def load_tests(loader, tests, pattern):
9
+ return load_testsuite(loader, os.path.dirname(__file__))
@@ -0,0 +1,387 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import re
5
+ import os
6
+
7
+ import numpy as np
8
+ import unittest
9
+ from numba.cuda.testing import CUDATestCase, skip_on_cudasim
10
+ from numba.cuda.tests.support import run_in_subprocess, override_config
11
+ from numba.cuda import get_current_device
12
+ from numba.cuda.cudadrv.nvrtc import compile
13
+ from numba.cuda import types
14
+ from numba.cuda.typing import signature
15
+ from numba import cuda
16
+ from numba.cuda import config
17
+ from numba.cuda.typing.templates import AbstractTemplate
18
+ from numba.cuda.cudadrv.linkable_code import (
19
+ CUSource,
20
+ PTXSource,
21
+ Fatbin,
22
+ Cubin,
23
+ Archive,
24
+ Object,
25
+ )
26
+
27
+ TEST_BIN_DIR = os.getenv("NUMBA_CUDA_TEST_BIN_DIR")
28
+
29
+ if not config.ENABLE_CUDASIM:
30
+ from numba.cuda.memory_management.nrt import rtsys, get_include
31
+ from numba.cuda.cudadecl import registry as cuda_decl_registry
32
+ from numba.cuda.cudaimpl import lower as cuda_lower
33
+
34
+ def allocate_deallocate_handle():
35
+ """
36
+ Handle to call NRT_Allocate and NRT_Free
37
+ """
38
+ pass
39
+
40
+ @cuda_decl_registry.register_global(allocate_deallocate_handle)
41
+ class AllocateShimImpl(AbstractTemplate):
42
+ def generic(self, args, kws):
43
+ return signature(types.void)
44
+
45
+ device_fun_shim = cuda.declare_device(
46
+ "device_allocate_deallocate", types.int32()
47
+ )
48
+
49
+ # wrapper to turn the above into a python callable
50
+ def call_device_fun_shim():
51
+ return device_fun_shim()
52
+
53
+ @cuda_lower(allocate_deallocate_handle)
54
+ def allocate_deallocate_impl(context, builder, sig, args):
55
+ sig_ = types.int32()
56
+ # call the external function, passing the pointer
57
+ result = context.compile_internal(
58
+ builder,
59
+ call_device_fun_shim,
60
+ sig_,
61
+ (),
62
+ )
63
+
64
+ return result
65
+
66
+ if TEST_BIN_DIR:
67
+
68
+ def make_linkable_code(name, kind, mode):
69
+ path = os.path.join(TEST_BIN_DIR, name)
70
+ with open(path, mode) as f:
71
+ contents = f.read()
72
+ return kind(contents, nrt=True)
73
+
74
+ nrt_extern_a = make_linkable_code("nrt_extern.a", Archive, "rb")
75
+ nrt_extern_cubin = make_linkable_code("nrt_extern.cubin", Cubin, "rb")
76
+ nrt_extern_cu = make_linkable_code(
77
+ "nrt_extern.cu",
78
+ CUSource,
79
+ "rb",
80
+ )
81
+ nrt_extern_fatbin = make_linkable_code(
82
+ "nrt_extern.fatbin", Fatbin, "rb"
83
+ )
84
+ nrt_extern_fatbin_multi = make_linkable_code(
85
+ "nrt_extern_multi.fatbin", Fatbin, "rb"
86
+ )
87
+ nrt_extern_o = make_linkable_code("nrt_extern.o", Object, "rb")
88
+ nrt_extern_ptx = make_linkable_code("nrt_extern.ptx", PTXSource, "rb")
89
+
90
+
91
+ class TestNrtBasic(CUDATestCase):
92
+ def run(self, result=None):
93
+ with override_config("CUDA_ENABLE_NRT", True):
94
+ super(TestNrtBasic, self).run(result)
95
+
96
+ def test_nrt_launches(self):
97
+ @cuda.jit
98
+ def f(x):
99
+ return x[:5]
100
+
101
+ @cuda.jit
102
+ def g():
103
+ x = np.empty(10, np.int64)
104
+ f(x)
105
+
106
+ g[1, 1]()
107
+ cuda.synchronize()
108
+
109
+ @skip_on_cudasim("CUDA Simulator does not produce PTX")
110
+ def test_nrt_ptx_contains_refcount(self):
111
+ @cuda.jit
112
+ def f(x):
113
+ return x[:5]
114
+
115
+ @cuda.jit
116
+ def g():
117
+ x = np.empty(10, np.int64)
118
+ f(x)
119
+
120
+ g[1, 1]()
121
+
122
+ ptx = next(iter(g.inspect_asm().values()))
123
+
124
+ # The following checks that a `call` PTX instruction is
125
+ # emitted for NRT_MemInfo_alloc_aligned, NRT_incref and
126
+ # NRT_decref
127
+ p1 = r"call\.uni(.|\n)*NRT_MemInfo_alloc_aligned"
128
+ match = re.search(p1, ptx)
129
+ assert match is not None
130
+
131
+ p2 = r"call\.uni.*\n?.*NRT_incref"
132
+ match = re.search(p2, ptx)
133
+ assert match is not None
134
+
135
+ p3 = r"call\.uni.*\n?.*NRT_decref"
136
+ match = re.search(p3, ptx)
137
+ assert match is not None
138
+
139
+ def test_nrt_returns_correct(self):
140
+ @cuda.jit
141
+ def f(x):
142
+ return x[5:]
143
+
144
+ @cuda.jit
145
+ def g(out_ary):
146
+ x = np.empty(10, np.int64)
147
+ x[5] = 1
148
+ y = f(x)
149
+ out_ary[0] = y[0]
150
+
151
+ out_ary = np.zeros(1, dtype=np.int64)
152
+
153
+ g[1, 1](out_ary)
154
+
155
+ self.assertEqual(out_ary[0], 1)
156
+
157
+
158
+ class TestNrtLinking(CUDATestCase):
159
+ def run(self, result=None):
160
+ with override_config("CUDA_ENABLE_NRT", True):
161
+ super(TestNrtLinking, self).run(result)
162
+
163
+ @skip_on_cudasim("CUDA Simulator does not link PTX")
164
+ def test_nrt_detect_linked_ptx_file(self):
165
+ src = f"#include <{get_include()}/nrt.cuh>"
166
+ src += """
167
+ extern "C" __device__ int device_allocate_deallocate(int* nb_retval){
168
+ auto ptr = NRT_Allocate(1);
169
+ NRT_Free(ptr);
170
+ return 0;
171
+ }
172
+ """
173
+ cc = get_current_device().compute_capability
174
+ ptx, _ = compile(src, "external_nrt.cu", cc)
175
+
176
+ @cuda.jit(link=[PTXSource(ptx.code, nrt=True)])
177
+ def kernel():
178
+ allocate_deallocate_handle()
179
+
180
+ kernel[1, 1]()
181
+
182
+ @unittest.skipIf(not TEST_BIN_DIR, "necessary binaries not generated.")
183
+ @skip_on_cudasim("CUDA Simulator does not link code")
184
+ def test_nrt_detect_linkable_code(self):
185
+ codes = (
186
+ nrt_extern_a,
187
+ nrt_extern_cubin,
188
+ nrt_extern_cu,
189
+ nrt_extern_fatbin,
190
+ nrt_extern_fatbin_multi,
191
+ nrt_extern_o,
192
+ nrt_extern_ptx,
193
+ )
194
+ for code in codes:
195
+ with self.subTest(code=code):
196
+
197
+ @cuda.jit(link=[code])
198
+ def kernel():
199
+ allocate_deallocate_handle()
200
+
201
+ kernel[1, 1]()
202
+
203
+
204
+ @skip_on_cudasim("CUDASIM does not have NRT statistics")
205
+ class TestNrtStatistics(CUDATestCase):
206
+ def setUp(self):
207
+ self._stream = cuda.default_stream()
208
+ # Store the current stats state
209
+ self.__stats_state = rtsys.memsys_stats_enabled(self._stream)
210
+
211
+ def tearDown(self):
212
+ # Set stats state back to whatever it was before the test ran
213
+ if self.__stats_state:
214
+ rtsys.memsys_enable_stats(self._stream)
215
+ else:
216
+ rtsys.memsys_disable_stats(self._stream)
217
+
218
+ def test_stats_env_var_explicit_on(self):
219
+ # Checks that explicitly turning the stats on via the env var works.
220
+ src = """if 1:
221
+ from numba import cuda
222
+ from numba.cuda.memory_management import rtsys
223
+ import numpy as np
224
+
225
+ @cuda.jit
226
+ def foo():
227
+ x = np.arange(10)[0]
228
+
229
+ # initialize the NRT before use
230
+ rtsys.initialize()
231
+ assert rtsys.memsys_stats_enabled(), "Stats not enabled"
232
+ orig_stats = rtsys.get_allocation_stats()
233
+ foo[1, 1]()
234
+ new_stats = rtsys.get_allocation_stats()
235
+ total_alloc = new_stats.alloc - orig_stats.alloc
236
+ total_free = new_stats.free - orig_stats.free
237
+ total_mi_alloc = new_stats.mi_alloc - orig_stats.mi_alloc
238
+ total_mi_free = new_stats.mi_free - orig_stats.mi_free
239
+
240
+ expected = 1
241
+ assert total_alloc == expected, \\
242
+ f"total_alloc != expected, {total_alloc} != {expected}"
243
+ assert total_free == expected, \\
244
+ f"total_free != expected, {total_free} != {expected}"
245
+ assert total_mi_alloc == expected, \\
246
+ f"total_mi_alloc != expected, {total_mi_alloc} != {expected}"
247
+ assert total_mi_free == expected, \\
248
+ f"total_mi_free != expected, {total_mi_free} != {expected}"
249
+ """
250
+
251
+ # Check env var explicitly being set works
252
+ env = os.environ.copy()
253
+ env["NUMBA_CUDA_NRT_STATS"] = "1"
254
+ env["NUMBA_CUDA_ENABLE_NRT"] = "1"
255
+ run_in_subprocess(src, env=env)
256
+
257
+ def check_env_var_off(self, env):
258
+ src = """if 1:
259
+ from numba import cuda
260
+ import numpy as np
261
+ from numba.cuda.memory_management import rtsys
262
+
263
+ @cuda.jit
264
+ def foo():
265
+ arr = np.arange(10)[0]
266
+
267
+ assert rtsys.memsys_stats_enabled() == False
268
+ try:
269
+ rtsys.get_allocation_stats()
270
+ except RuntimeError as e:
271
+ assert "NRT stats are disabled." in str(e)
272
+ """
273
+ run_in_subprocess(src, env=env)
274
+
275
+ def test_stats_env_var_explicit_off(self):
276
+ # Checks that explicitly turning the stats off via the env var works.
277
+ env = os.environ.copy()
278
+ env["NUMBA_CUDA_NRT_STATS"] = "0"
279
+ self.check_env_var_off(env)
280
+
281
+ def test_stats_env_var_default_off(self):
282
+ # Checks that the env var not being set is the same as "off", i.e.
283
+ # default for Numba is off.
284
+ env = os.environ.copy()
285
+ env.pop("NUMBA_CUDA_NRT_STATS", None)
286
+ self.check_env_var_off(env)
287
+
288
+ def test_stats_status_toggle(self):
289
+ @cuda.jit
290
+ def foo():
291
+ tmp = np.ones(3)
292
+ arr = np.arange(5 * tmp[0]) # noqa: F841
293
+ return None
294
+
295
+ with (
296
+ override_config("CUDA_ENABLE_NRT", True),
297
+ override_config("CUDA_NRT_STATS", True),
298
+ ):
299
+ # Switch on stats
300
+ rtsys.memsys_enable_stats()
301
+ # check the stats are on
302
+ self.assertTrue(rtsys.memsys_stats_enabled())
303
+
304
+ for i in range(2):
305
+ # capture the stats state
306
+ stats_1 = rtsys.get_allocation_stats()
307
+ # Switch off stats
308
+ rtsys.memsys_disable_stats()
309
+ # check the stats are off
310
+ self.assertFalse(rtsys.memsys_stats_enabled())
311
+ # run something that would move the counters were they enabled
312
+ foo[1, 1]()
313
+ # Switch on stats
314
+ rtsys.memsys_enable_stats()
315
+ # check the stats are on
316
+ self.assertTrue(rtsys.memsys_stats_enabled())
317
+ # capture the stats state (should not have changed)
318
+ stats_2 = rtsys.get_allocation_stats()
319
+ # run something that will move the counters
320
+ foo[1, 1]()
321
+ # capture the stats state (should have changed)
322
+ stats_3 = rtsys.get_allocation_stats()
323
+ # check stats_1 == stats_2
324
+ self.assertEqual(stats_1, stats_2)
325
+ # check stats_2 < stats_3
326
+ self.assertLess(stats_2, stats_3)
327
+
328
+ def test_rtsys_stats_query_raises_exception_when_disabled(self):
329
+ # Checks that the standard rtsys.get_allocation_stats() query raises
330
+ # when stats counters are turned off.
331
+
332
+ rtsys.memsys_disable_stats()
333
+ self.assertFalse(rtsys.memsys_stats_enabled())
334
+
335
+ with self.assertRaises(RuntimeError) as raises:
336
+ rtsys.get_allocation_stats()
337
+
338
+ self.assertIn("NRT stats are disabled.", str(raises.exception))
339
+
340
+ def test_nrt_explicit_stats_query_raises_exception_when_disabled(self):
341
+ # Checks the various memsys_get_stats functions raise if queried when
342
+ # the stats counters are disabled.
343
+ method_variations = ("alloc", "free", "mi_alloc", "mi_free")
344
+ for meth in method_variations:
345
+ stats_func = getattr(rtsys, f"memsys_get_stats_{meth}")
346
+ with self.subTest(stats_func=stats_func):
347
+ # Turn stats off
348
+ rtsys.memsys_disable_stats()
349
+ self.assertFalse(rtsys.memsys_stats_enabled())
350
+ with self.assertRaises(RuntimeError) as raises:
351
+ stats_func()
352
+ self.assertIn("NRT stats are disabled.", str(raises.exception))
353
+
354
+ def test_read_one_stat(self):
355
+ @cuda.jit
356
+ def foo():
357
+ tmp = np.ones(3)
358
+ arr = np.arange(5 * tmp[0]) # noqa: F841
359
+ return None
360
+
361
+ with (
362
+ override_config("CUDA_ENABLE_NRT", True),
363
+ override_config("CUDA_NRT_STATS", True),
364
+ ):
365
+ # Switch on stats
366
+ rtsys.memsys_enable_stats()
367
+
368
+ # Launch the kernel a couple of times to increase stats
369
+ foo[1, 1]()
370
+ foo[1, 1]()
371
+
372
+ # Get stats struct and individual stats
373
+ stats = rtsys.get_allocation_stats()
374
+ stats_alloc = rtsys.memsys_get_stats_alloc()
375
+ stats_mi_alloc = rtsys.memsys_get_stats_mi_alloc()
376
+ stats_free = rtsys.memsys_get_stats_free()
377
+ stats_mi_free = rtsys.memsys_get_stats_mi_free()
378
+
379
+ # Check individual stats match stats struct
380
+ self.assertEqual(stats.alloc, stats_alloc)
381
+ self.assertEqual(stats.mi_alloc, stats_mi_alloc)
382
+ self.assertEqual(stats.free, stats_free)
383
+ self.assertEqual(stats.mi_free, stats_mi_free)
384
+
385
+
386
+ if __name__ == "__main__":
387
+ unittest.main()