numba-cuda 0.21.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (488) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +577 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +556 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +995 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +903 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +158 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
  161. numba_cuda/numba/cuda/intrinsics.py +382 -0
  162. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  163. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  164. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  165. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  166. numba_cuda/numba/cuda/libdevice.py +3386 -0
  167. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  168. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  169. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  170. numba_cuda/numba/cuda/locks.py +19 -0
  171. numba_cuda/numba/cuda/lowering.py +1951 -0
  172. numba_cuda/numba/cuda/mathimpl.py +374 -0
  173. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  175. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  178. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  179. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  180. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  181. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  182. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  183. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  184. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  185. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  186. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  187. numba_cuda/numba/cuda/misc/literal.py +28 -0
  188. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  189. numba_cuda/numba/cuda/misc/special.py +94 -0
  190. numba_cuda/numba/cuda/models.py +56 -0
  191. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  192. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  193. numba_cuda/numba/cuda/np/extensions.py +11 -0
  194. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  195. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  196. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  197. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  198. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  199. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  200. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  201. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  202. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  203. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  204. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  206. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  207. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  208. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  209. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  210. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  211. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  212. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  213. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  214. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  215. numba_cuda/numba/cuda/printimpl.py +126 -0
  216. numba_cuda/numba/cuda/random.py +308 -0
  217. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  218. numba_cuda/numba/cuda/serialize.py +267 -0
  219. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  220. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  221. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  222. numba_cuda/numba/cuda/simulator/api.py +179 -0
  223. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  224. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  236. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  237. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  238. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  239. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  241. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  242. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  243. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  244. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  245. numba_cuda/numba/cuda/simulator_init.py +18 -0
  246. numba_cuda/numba/cuda/stubs.py +635 -0
  247. numba_cuda/numba/cuda/target.py +505 -0
  248. numba_cuda/numba/cuda/testing.py +347 -0
  249. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  251. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  252. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  253. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  254. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  255. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  285. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  286. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  289. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  290. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  291. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  292. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  293. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  294. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  295. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  396. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
  397. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  399. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  400. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  401. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  402. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  403. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  404. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  406. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  407. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  424. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  425. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  430. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  431. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  433. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  434. numba_cuda/numba/cuda/tests/support.py +900 -0
  435. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  436. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  437. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  438. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  439. numba_cuda/numba/cuda/types/__init__.py +233 -0
  440. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  441. numba_cuda/numba/cuda/types/abstract.py +9 -0
  442. numba_cuda/numba/cuda/types/common.py +9 -0
  443. numba_cuda/numba/cuda/types/containers.py +9 -0
  444. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  445. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  446. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  447. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  448. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  449. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  450. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  451. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  452. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  453. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  454. numba_cuda/numba/cuda/types/function_type.py +11 -0
  455. numba_cuda/numba/cuda/types/functions.py +9 -0
  456. numba_cuda/numba/cuda/types/iterators.py +9 -0
  457. numba_cuda/numba/cuda/types/misc.py +9 -0
  458. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  459. numba_cuda/numba/cuda/types/scalars.py +9 -0
  460. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  461. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  462. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  463. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  464. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  465. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  466. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  467. numba_cuda/numba/cuda/typing/collections.py +138 -0
  468. numba_cuda/numba/cuda/typing/context.py +782 -0
  469. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  470. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  471. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  472. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  473. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  474. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  475. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  476. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  477. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  478. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  479. numba_cuda/numba/cuda/ufuncs.py +746 -0
  480. numba_cuda/numba/cuda/utils.py +724 -0
  481. numba_cuda/numba/cuda/vector_types.py +214 -0
  482. numba_cuda/numba/cuda/vectorizers.py +260 -0
  483. numba_cuda-0.21.1.dist-info/METADATA +109 -0
  484. numba_cuda-0.21.1.dist-info/RECORD +488 -0
  485. numba_cuda-0.21.1.dist-info/WHEEL +5 -0
  486. numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
  487. numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
  488. numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,180 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import numpy as np
5
+ import ctypes
6
+ from numba.cuda.cudadrv.devicearray import (
7
+ DeviceRecord,
8
+ from_record_like,
9
+ auto_device,
10
+ )
11
+ from numba.cuda.testing import unittest, CUDATestCase
12
+ from numba.cuda.testing import skip_on_cudasim
13
+ from numba.cuda.np import numpy_support
14
+ from numba import cuda
15
+
16
+ N_CHARS = 5
17
+
18
+ recordtype = np.dtype(
19
+ [
20
+ ("a", np.float64),
21
+ ("b", np.int32),
22
+ ("c", np.complex64),
23
+ ("d", (np.str_, N_CHARS)),
24
+ ],
25
+ align=True,
26
+ )
27
+
28
+ recordwitharray = np.dtype([("g", np.int32), ("h", np.float32, 2)], align=True)
29
+
30
+ recwithmat = np.dtype([("i", np.int32), ("j", np.float32, (3, 3))])
31
+
32
+ recwithrecwithmat = np.dtype([("x", np.int32), ("y", recwithmat)])
33
+
34
+
35
+ @skip_on_cudasim("Device Record API unsupported in the simulator")
36
+ class TestCudaDeviceRecord(CUDATestCase):
37
+ """
38
+ Tests the DeviceRecord class with np.void host types.
39
+ """
40
+
41
+ def setUp(self):
42
+ super().setUp()
43
+ self._create_data(np.zeros)
44
+
45
+ def _create_data(self, array_ctor):
46
+ self.dtype = np.dtype([("a", np.int32), ("b", np.float32)], align=True)
47
+ self.hostz = array_ctor(1, self.dtype)[0]
48
+ self.hostnz = array_ctor(1, self.dtype)[0]
49
+ self.hostnz["a"] = 10
50
+ self.hostnz["b"] = 11.0
51
+
52
+ def _check_device_record(self, reference, rec):
53
+ self.assertEqual(rec.shape, tuple())
54
+ self.assertEqual(rec.strides, tuple())
55
+ self.assertEqual(rec.dtype, reference.dtype)
56
+ self.assertEqual(rec.alloc_size, reference.dtype.itemsize)
57
+ self.assertIsNotNone(rec.gpu_data)
58
+ self.assertNotEqual(rec.device_ctypes_pointer, ctypes.c_void_p(0))
59
+
60
+ numba_type = numpy_support.from_dtype(reference.dtype)
61
+ self.assertEqual(rec._numba_type_, numba_type)
62
+
63
+ def test_device_record_interface(self):
64
+ hostrec = self.hostz.copy()
65
+ devrec = DeviceRecord(self.dtype)
66
+ self._check_device_record(hostrec, devrec)
67
+
68
+ def test_device_record_copy(self):
69
+ hostrec = self.hostz.copy()
70
+ devrec = DeviceRecord(self.dtype)
71
+ devrec.copy_to_device(hostrec)
72
+
73
+ # Copy back and check values are all zeros
74
+ hostrec2 = self.hostnz.copy()
75
+ devrec.copy_to_host(hostrec2)
76
+ np.testing.assert_equal(self.hostz, hostrec2)
77
+
78
+ # Copy non-zero values to GPU and back and check values
79
+ hostrec3 = self.hostnz.copy()
80
+ devrec.copy_to_device(hostrec3)
81
+
82
+ hostrec4 = self.hostz.copy()
83
+ devrec.copy_to_host(hostrec4)
84
+ np.testing.assert_equal(hostrec4, self.hostnz)
85
+
86
+ def test_from_record_like(self):
87
+ # Create record from host record
88
+ hostrec = self.hostz.copy()
89
+ devrec = from_record_like(hostrec)
90
+ self._check_device_record(hostrec, devrec)
91
+
92
+ # Create record from device record and check for distinct data
93
+ devrec2 = from_record_like(devrec)
94
+ self._check_device_record(devrec, devrec2)
95
+ self.assertNotEqual(devrec.gpu_data, devrec2.gpu_data)
96
+
97
+ def test_auto_device(self):
98
+ # Create record from host record
99
+ hostrec = self.hostnz.copy()
100
+ devrec, new_gpu_obj = auto_device(hostrec)
101
+ self._check_device_record(hostrec, devrec)
102
+ self.assertTrue(new_gpu_obj)
103
+
104
+ # Copy data back and check it is equal to auto_device arg
105
+ hostrec2 = self.hostz.copy()
106
+ devrec.copy_to_host(hostrec2)
107
+ np.testing.assert_equal(hostrec2, hostrec)
108
+
109
+
110
+ class TestCudaDeviceRecordWithRecord(TestCudaDeviceRecord):
111
+ """
112
+ Tests the DeviceRecord class with np.record host types
113
+ """
114
+
115
+ def setUp(self):
116
+ CUDATestCase.setUp(self)
117
+ self._create_data(np.recarray)
118
+
119
+
120
+ @skip_on_cudasim("Structured array attr access not supported in simulator")
121
+ class TestRecordDtypeWithStructArrays(CUDATestCase):
122
+ """
123
+ Test operation of device arrays on structured arrays.
124
+ """
125
+
126
+ def _createSampleArrays(self):
127
+ self.sample1d = cuda.device_array(3, dtype=recordtype)
128
+ self.samplerec1darr = cuda.device_array(1, dtype=recordwitharray)[0]
129
+ self.samplerecmat = cuda.device_array(1, dtype=recwithmat)[0]
130
+
131
+ def setUp(self):
132
+ super().setUp()
133
+ self._createSampleArrays()
134
+
135
+ ary = self.sample1d
136
+ for i in range(ary.size):
137
+ x = i + 1
138
+ ary[i]["a"] = x / 2
139
+ ary[i]["b"] = x
140
+ ary[i]["c"] = x * 1j
141
+ ary[i]["d"] = str(x) * N_CHARS
142
+
143
+ def test_structured_array1(self):
144
+ ary = self.sample1d
145
+ for i in range(self.sample1d.size):
146
+ x = i + 1
147
+ self.assertEqual(ary[i]["a"], x / 2)
148
+ self.assertEqual(ary[i]["b"], x)
149
+ self.assertEqual(ary[i]["c"], x * 1j)
150
+ self.assertEqual(ary[i]["d"], str(x) * N_CHARS)
151
+
152
+ def test_structured_array2(self):
153
+ ary = self.samplerec1darr
154
+ ary["g"] = 2
155
+ ary["h"][0] = 3.0
156
+ ary["h"][1] = 4.0
157
+ self.assertEqual(ary["g"], 2)
158
+ self.assertEqual(ary["h"][0], 3.0)
159
+ self.assertEqual(ary["h"][1], 4.0)
160
+
161
+ def test_structured_array3(self):
162
+ ary = self.samplerecmat
163
+ mat = np.array(
164
+ [[5.0, 10.0, 15.0], [20.0, 25.0, 30.0], [35.0, 40.0, 45.0]],
165
+ dtype=np.float32,
166
+ ).reshape(3, 3)
167
+ ary["j"][:] = mat
168
+ np.testing.assert_equal(ary["j"], mat)
169
+
170
+ def test_structured_array4(self):
171
+ arr = np.zeros(1, dtype=recwithrecwithmat)
172
+ d_arr = cuda.to_device(arr)
173
+ d_arr[0]["y"]["i"] = 1
174
+ self.assertEqual(d_arr[0]["y"]["i"], 1)
175
+ d_arr[0]["y"]["j"][0, 0] = 2.0
176
+ self.assertEqual(d_arr[0]["y"]["j"][0, 0], 2.0)
177
+
178
+
179
+ if __name__ == "__main__":
180
+ unittest.main()
@@ -0,0 +1,313 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from ctypes import c_int, sizeof
5
+
6
+ from numba.cuda.cudadrv.driver import (
7
+ host_to_device,
8
+ device_to_host,
9
+ driver,
10
+ launch_kernel,
11
+ )
12
+
13
+ from numba import cuda
14
+ from numba.cuda.cudadrv import devices, driver as _driver
15
+ from numba.cuda.testing import unittest, CUDATestCase
16
+ from numba.cuda.testing import skip_on_cudasim
17
+ import contextlib
18
+
19
+ from cuda.core.experimental import Device
20
+
21
+ ptx1 = """
22
+ .version 1.4
23
+ .target sm_10, map_f64_to_f32
24
+
25
+ .entry _Z10helloworldPi (
26
+ .param .u64 __cudaparm__Z10helloworldPi_A)
27
+ {
28
+ .reg .u32 %r<3>;
29
+ .reg .u64 %rd<6>;
30
+ .loc 14 4 0
31
+ $LDWbegin__Z10helloworldPi:
32
+ .loc 14 6 0
33
+ cvt.s32.u16 %r1, %tid.x;
34
+ ld.param.u64 %rd1, [__cudaparm__Z10helloworldPi_A];
35
+ cvt.u64.u16 %rd2, %tid.x;
36
+ mul.lo.u64 %rd3, %rd2, 4;
37
+ add.u64 %rd4, %rd1, %rd3;
38
+ st.global.s32 [%rd4+0], %r1;
39
+ .loc 14 7 0
40
+ exit;
41
+ $LDWend__Z10helloworldPi:
42
+ } // _Z10helloworldPi
43
+ """
44
+
45
+ ptx2 = """
46
+ .version 3.0
47
+ .target sm_20
48
+ .address_size 64
49
+
50
+ .file 1 "/tmp/tmpxft_000012c7_00000000-9_testcuda.cpp3.i"
51
+ .file 2 "testcuda.cu"
52
+
53
+ .entry _Z10helloworldPi(
54
+ .param .u64 _Z10helloworldPi_param_0
55
+ )
56
+ {
57
+ .reg .s32 %r<3>;
58
+ .reg .s64 %rl<5>;
59
+
60
+
61
+ ld.param.u64 %rl1, [_Z10helloworldPi_param_0];
62
+ cvta.to.global.u64 %rl2, %rl1;
63
+ .loc 2 6 1
64
+ mov.u32 %r1, %tid.x;
65
+ mul.wide.u32 %rl3, %r1, 4;
66
+ add.s64 %rl4, %rl2, %rl3;
67
+ st.global.u32 [%rl4], %r1;
68
+ .loc 2 7 2
69
+ ret;
70
+ }
71
+ """
72
+
73
+
74
+ @skip_on_cudasim("CUDA Driver API unsupported in the simulator")
75
+ class TestCudaDriver(CUDATestCase):
76
+ def setUp(self):
77
+ super().setUp()
78
+ self.assertTrue(len(devices.gpus) > 0)
79
+ self.context = devices.get_context()
80
+ device = self.context.device
81
+ ccmajor, _ = device.compute_capability
82
+ if ccmajor >= 2:
83
+ self.ptx = ptx2
84
+ else:
85
+ self.ptx = ptx1
86
+
87
+ def tearDown(self):
88
+ super().tearDown()
89
+ del self.context
90
+
91
+ def test_cuda_driver_basic(self):
92
+ module = self.context.create_module_ptx(self.ptx)
93
+ function = module.get_function("_Z10helloworldPi")
94
+
95
+ array = (c_int * 100)()
96
+
97
+ memory = self.context.memalloc(sizeof(array))
98
+ host_to_device(memory, array, sizeof(array))
99
+
100
+ ptr = memory.device_ctypes_pointer
101
+ stream = 0
102
+
103
+ stream = _driver.binding.CUstream(stream)
104
+
105
+ launch_kernel(
106
+ function.handle, # Kernel
107
+ 1,
108
+ 1,
109
+ 1, # gx, gy, gz
110
+ 100,
111
+ 1,
112
+ 1, # bx, by, bz
113
+ 0, # dynamic shared mem
114
+ stream, # stream
115
+ [ptr],
116
+ ) # arguments
117
+
118
+ device_to_host(array, memory, sizeof(array))
119
+ for i, v in enumerate(array):
120
+ self.assertEqual(i, v)
121
+
122
+ module.unload()
123
+
124
+ def test_cuda_driver_stream_operations(self):
125
+ module = self.context.create_module_ptx(self.ptx)
126
+ function = module.get_function("_Z10helloworldPi")
127
+
128
+ array = (c_int * 100)()
129
+
130
+ stream = self.context.create_stream()
131
+
132
+ with stream.auto_synchronize():
133
+ memory = self.context.memalloc(sizeof(array))
134
+ host_to_device(memory, array, sizeof(array), stream=stream)
135
+
136
+ ptr = memory.device_ctypes_pointer
137
+
138
+ stream_handle = stream.handle
139
+ stream_handle = stream_handle.value
140
+
141
+ launch_kernel(
142
+ function.handle, # Kernel
143
+ 1,
144
+ 1,
145
+ 1, # gx, gy, gz
146
+ 100,
147
+ 1,
148
+ 1, # bx, by, bz
149
+ 0, # dynamic shared mem
150
+ stream_handle, # stream
151
+ [ptr],
152
+ ) # arguments
153
+
154
+ device_to_host(array, memory, sizeof(array), stream=stream)
155
+
156
+ for i, v in enumerate(array):
157
+ self.assertEqual(i, v)
158
+
159
+ def test_cuda_core_stream_operations(self):
160
+ module = self.context.create_module_ptx(self.ptx)
161
+ function = module.get_function("_Z10helloworldPi")
162
+ array = (c_int * 100)()
163
+ dev = Device()
164
+ dev.set_current()
165
+ stream = dev.create_stream()
166
+
167
+ @contextlib.contextmanager
168
+ def auto_synchronize(stream):
169
+ try:
170
+ yield stream
171
+ finally:
172
+ stream.sync()
173
+
174
+ with auto_synchronize(stream):
175
+ memory = self.context.memalloc(sizeof(array))
176
+ host_to_device(memory, array, sizeof(array), stream=stream)
177
+
178
+ ptr = memory.device_ctypes_pointer
179
+
180
+ launch_kernel(
181
+ function.handle, # Kernel
182
+ 1,
183
+ 1,
184
+ 1, # gx, gy, gz
185
+ 100,
186
+ 1,
187
+ 1, # bx, by, bz
188
+ 0, # dynamic shared mem
189
+ stream.handle, # stream
190
+ [ptr],
191
+ )
192
+
193
+ device_to_host(array, memory, sizeof(array), stream=stream)
194
+ for i, v in enumerate(array):
195
+ self.assertEqual(i, v)
196
+
197
+ def test_cuda_core_stream_launch_user_facing(self):
198
+ @cuda.jit
199
+ def kernel(a):
200
+ idx = cuda.grid(1)
201
+ if idx < len(a):
202
+ a[idx] = idx
203
+
204
+ dev = Device()
205
+ dev.set_current()
206
+ stream = dev.create_stream()
207
+
208
+ ary = cuda.to_device([0] * 100, stream=stream)
209
+ stream.sync()
210
+
211
+ kernel[1, 100, stream](ary)
212
+ stream.sync()
213
+
214
+ result = ary.copy_to_host(stream=stream)
215
+ for i, v in enumerate(result):
216
+ self.assertEqual(i, v)
217
+
218
+ def test_cuda_driver_default_stream(self):
219
+ # Test properties of the default stream
220
+ ds = self.context.get_default_stream()
221
+ self.assertIn("Default CUDA stream", repr(ds))
222
+ self.assertEqual(0, int(ds))
223
+ # bool(stream) is the check that is done in memcpy to decide if async
224
+ # version should be used. So the default (0) stream should be true-ish
225
+ # even though 0 is usually false-ish in Python.
226
+ self.assertTrue(ds)
227
+ self.assertFalse(ds.external)
228
+
229
+ def test_cuda_driver_legacy_default_stream(self):
230
+ # Test properties of the legacy default stream
231
+ ds = self.context.get_legacy_default_stream()
232
+ self.assertIn("Legacy default CUDA stream", repr(ds))
233
+ self.assertEqual(1, int(ds))
234
+ self.assertTrue(ds)
235
+ self.assertFalse(ds.external)
236
+
237
+ def test_cuda_driver_per_thread_default_stream(self):
238
+ # Test properties of the per-thread default stream
239
+ ds = self.context.get_per_thread_default_stream()
240
+ self.assertIn("Per-thread default CUDA stream", repr(ds))
241
+ self.assertEqual(2, int(ds))
242
+ self.assertTrue(ds)
243
+ self.assertFalse(ds.external)
244
+
245
+ def test_cuda_driver_stream(self):
246
+ # Test properties of non-default streams
247
+ s = self.context.create_stream()
248
+ self.assertIn("CUDA stream", repr(s))
249
+ self.assertNotIn("Default", repr(s))
250
+ self.assertNotIn("External", repr(s))
251
+ self.assertNotEqual(0, int(s))
252
+ self.assertTrue(s)
253
+ self.assertFalse(s.external)
254
+
255
+ def test_cuda_driver_external_stream(self):
256
+ # Test properties of a stream created from an external stream object.
257
+ # We use the driver API directly to create a stream, to emulate an
258
+ # external library creating a stream
259
+ handle = driver.cuStreamCreate(0)
260
+ ptr = int(handle)
261
+ s = self.context.create_external_stream(ptr)
262
+
263
+ self.assertIn("External CUDA stream", repr(s))
264
+ # Ensure neither "Default" nor "default"
265
+ self.assertNotIn("efault", repr(s))
266
+ self.assertEqual(ptr, int(s))
267
+ self.assertTrue(s)
268
+ self.assertTrue(s.external)
269
+
270
+ def test_cuda_driver_occupancy(self):
271
+ module = self.context.create_module_ptx(self.ptx)
272
+ function = module.get_function("_Z10helloworldPi")
273
+
274
+ value = self.context.get_active_blocks_per_multiprocessor(
275
+ function, 128, 128
276
+ )
277
+ self.assertTrue(value > 0)
278
+
279
+ def b2d(bs):
280
+ return bs
281
+
282
+ grid, block = self.context.get_max_potential_block_size(
283
+ function, b2d, 128, 128
284
+ )
285
+ self.assertTrue(grid > 0)
286
+ self.assertTrue(block > 0)
287
+
288
+
289
+ class TestDevice(CUDATestCase):
290
+ def test_device_get_uuid(self):
291
+ # A device UUID looks like:
292
+ #
293
+ # GPU-e6489c45-5b68-3b03-bab7-0e7c8e809643
294
+ #
295
+ # To test, we construct an RE that matches this form and verify that
296
+ # the returned UUID matches.
297
+ #
298
+ # Device UUIDs may not conform to parts of the UUID specification (RFC
299
+ # 4122) pertaining to versions and variants, so we do not extract and
300
+ # validate the values of these bits.
301
+
302
+ h = "[0-9a-f]{%d}"
303
+ h4 = h % 4
304
+ h8 = h % 8
305
+ h12 = h % 12
306
+ uuid_format = f"^GPU-{h8}-{h4}-{h4}-{h4}-{h12}$"
307
+
308
+ dev = devices.get_context().device
309
+ self.assertRegex(dev.uuid, uuid_format)
310
+
311
+
312
+ if __name__ == "__main__":
313
+ unittest.main()
@@ -0,0 +1,187 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import ctypes
5
+
6
+ import numpy as np
7
+
8
+ from numba.cuda.cudadrv import driver, drvapi, devices
9
+ from numba.cuda.testing import unittest, CUDATestCase
10
+ from numba.cuda.testing import skip_on_cudasim
11
+
12
+
13
+ @skip_on_cudasim("CUDA Memory API unsupported in the simulator")
14
+ class TestCudaMemory(CUDATestCase):
15
+ def setUp(self):
16
+ super().setUp()
17
+ self.context = devices.get_context()
18
+
19
+ def tearDown(self):
20
+ self.context.reset()
21
+ del self.context
22
+ super(TestCudaMemory, self).tearDown()
23
+
24
+ def _template(self, obj):
25
+ self.assertTrue(driver.is_device_memory(obj))
26
+ driver.require_device_memory(obj)
27
+ expected_class = drvapi.cu_device_ptr
28
+ self.assertTrue(isinstance(obj.device_ctypes_pointer, expected_class))
29
+
30
+ def test_device_memory(self):
31
+ devmem = self.context.memalloc(1024)
32
+ self._template(devmem)
33
+
34
+ def test_device_view(self):
35
+ devmem = self.context.memalloc(1024)
36
+ self._template(devmem.view(10))
37
+
38
+ def test_host_alloc(self):
39
+ devmem = self.context.memhostalloc(1024, mapped=True)
40
+ self._template(devmem)
41
+
42
+ def test_pinned_memory(self):
43
+ ary = np.arange(10)
44
+ devmem = self.context.mempin(
45
+ ary, ary.ctypes.data, ary.size * ary.dtype.itemsize, mapped=True
46
+ )
47
+ self._template(devmem)
48
+
49
+ def test_managed_memory(self):
50
+ devmem = self.context.memallocmanaged(1024)
51
+ self._template(devmem)
52
+
53
+ def test_derived_pointer(self):
54
+ # Use MemoryPointer.view to create derived pointer
55
+
56
+ def handle_val(mem):
57
+ return int(mem.handle)
58
+
59
+ def check(m, offset):
60
+ # create view
61
+ v1 = m.view(offset)
62
+ self.assertEqual(handle_val(v1.owner), handle_val(m))
63
+ self.assertEqual(m.refct, 2)
64
+ self.assertEqual(handle_val(v1) - offset, handle_val(v1.owner))
65
+ # create a view
66
+ v2 = v1.view(offset)
67
+ self.assertEqual(handle_val(v2.owner), handle_val(m))
68
+ self.assertEqual(handle_val(v2.owner), handle_val(m))
69
+ self.assertEqual(handle_val(v2) - offset * 2, handle_val(v2.owner))
70
+ self.assertEqual(m.refct, 3)
71
+ del v2
72
+ self.assertEqual(m.refct, 2)
73
+ del v1
74
+ self.assertEqual(m.refct, 1)
75
+
76
+ m = self.context.memalloc(1024)
77
+ check(m=m, offset=0)
78
+ check(m=m, offset=1)
79
+
80
+ def test_user_extension(self):
81
+ # User can use MemoryPointer to wrap externally defined pointers.
82
+ # This test checks if the finalizer is invokded at correct time
83
+ fake_ptr = ctypes.c_void_p(0xDEADBEEF)
84
+ dtor_invoked = [0]
85
+
86
+ def dtor():
87
+ dtor_invoked[0] += 1
88
+
89
+ # Ensure finalizer is called when pointer is deleted
90
+ ptr = driver.MemoryPointer(pointer=fake_ptr, size=40, finalizer=dtor)
91
+ self.assertEqual(dtor_invoked[0], 0)
92
+ del ptr
93
+ self.assertEqual(dtor_invoked[0], 1)
94
+
95
+ # Ensure removing derived pointer doesn't call finalizer
96
+ ptr = driver.MemoryPointer(pointer=fake_ptr, size=40, finalizer=dtor)
97
+ owned = ptr.own()
98
+ del owned
99
+ self.assertEqual(dtor_invoked[0], 1)
100
+ del ptr
101
+ self.assertEqual(dtor_invoked[0], 2)
102
+
103
+
104
+ class TestCudaMemoryFunctions(CUDATestCase):
105
+ def setUp(self):
106
+ super().setUp()
107
+ self.context = devices.get_context()
108
+
109
+ def tearDown(self):
110
+ del self.context
111
+ super(TestCudaMemoryFunctions, self).tearDown()
112
+
113
+ def test_memcpy(self):
114
+ hstary = np.arange(100, dtype=np.uint32)
115
+ hstary2 = np.arange(100, dtype=np.uint32)
116
+ sz = hstary.size * hstary.dtype.itemsize
117
+ devary = self.context.memalloc(sz)
118
+
119
+ driver.host_to_device(devary, hstary, sz)
120
+ driver.device_to_host(hstary2, devary, sz)
121
+
122
+ self.assertTrue(np.all(hstary == hstary2))
123
+
124
+ def test_memset(self):
125
+ dtype = np.dtype("uint32")
126
+ n = 10
127
+ sz = dtype.itemsize * 10
128
+ devary = self.context.memalloc(sz)
129
+ driver.device_memset(devary, 0xAB, sz)
130
+
131
+ hstary = np.empty(n, dtype=dtype)
132
+ driver.device_to_host(hstary, devary, sz)
133
+
134
+ hstary2 = np.array([0xABABABAB] * n, dtype=np.dtype("uint32"))
135
+ self.assertTrue(np.all(hstary == hstary2))
136
+
137
+ def test_d2d(self):
138
+ hst = np.arange(100, dtype=np.uint32)
139
+ hst2 = np.empty_like(hst)
140
+ sz = hst.size * hst.dtype.itemsize
141
+ dev1 = self.context.memalloc(sz)
142
+ dev2 = self.context.memalloc(sz)
143
+ driver.host_to_device(dev1, hst, sz)
144
+ driver.device_to_device(dev2, dev1, sz)
145
+ driver.device_to_host(hst2, dev2, sz)
146
+ self.assertTrue(np.all(hst == hst2))
147
+
148
+
149
+ @skip_on_cudasim("CUDA Memory API unsupported in the simulator")
150
+ class TestMVExtent(CUDATestCase):
151
+ def test_c_contiguous_array(self):
152
+ ary = np.arange(100)
153
+ arysz = ary.dtype.itemsize * ary.size
154
+ s, e = driver.host_memory_extents(ary)
155
+ self.assertTrue(ary.ctypes.data == s)
156
+ self.assertTrue(arysz == driver.host_memory_size(ary))
157
+
158
+ def test_f_contiguous_array(self):
159
+ ary = np.asfortranarray(np.arange(100).reshape(2, 50))
160
+ arysz = ary.dtype.itemsize * np.prod(ary.shape)
161
+ s, e = driver.host_memory_extents(ary)
162
+ self.assertTrue(ary.ctypes.data == s)
163
+ self.assertTrue(arysz == driver.host_memory_size(ary))
164
+
165
+ def test_single_element_array(self):
166
+ ary = np.asarray(np.uint32(1234))
167
+ arysz = ary.dtype.itemsize
168
+ s, e = driver.host_memory_extents(ary)
169
+ self.assertTrue(ary.ctypes.data == s)
170
+ self.assertTrue(arysz == driver.host_memory_size(ary))
171
+
172
+ def test_ctypes_struct(self):
173
+ class mystruct(ctypes.Structure):
174
+ _fields_ = [("x", ctypes.c_int), ("y", ctypes.c_int)]
175
+
176
+ data = mystruct(x=123, y=432)
177
+ sz = driver.host_memory_size(data)
178
+ self.assertTrue(ctypes.sizeof(data) == sz)
179
+
180
+ def test_ctypes_double(self):
181
+ data = ctypes.c_double(1.234)
182
+ sz = driver.host_memory_size(data)
183
+ self.assertTrue(ctypes.sizeof(data) == sz)
184
+
185
+
186
+ if __name__ == "__main__":
187
+ unittest.main()