numba-cuda 0.21.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (488) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +577 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +556 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +995 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +903 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +158 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
  161. numba_cuda/numba/cuda/intrinsics.py +382 -0
  162. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  163. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  164. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  165. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  166. numba_cuda/numba/cuda/libdevice.py +3386 -0
  167. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  168. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  169. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  170. numba_cuda/numba/cuda/locks.py +19 -0
  171. numba_cuda/numba/cuda/lowering.py +1951 -0
  172. numba_cuda/numba/cuda/mathimpl.py +374 -0
  173. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  175. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  178. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  179. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  180. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  181. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  182. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  183. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  184. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  185. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  186. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  187. numba_cuda/numba/cuda/misc/literal.py +28 -0
  188. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  189. numba_cuda/numba/cuda/misc/special.py +94 -0
  190. numba_cuda/numba/cuda/models.py +56 -0
  191. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  192. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  193. numba_cuda/numba/cuda/np/extensions.py +11 -0
  194. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  195. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  196. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  197. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  198. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  199. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  200. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  201. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  202. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  203. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  204. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  206. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  207. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  208. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  209. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  210. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  211. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  212. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  213. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  214. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  215. numba_cuda/numba/cuda/printimpl.py +126 -0
  216. numba_cuda/numba/cuda/random.py +308 -0
  217. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  218. numba_cuda/numba/cuda/serialize.py +267 -0
  219. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  220. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  221. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  222. numba_cuda/numba/cuda/simulator/api.py +179 -0
  223. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  224. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  236. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  237. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  238. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  239. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  241. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  242. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  243. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  244. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  245. numba_cuda/numba/cuda/simulator_init.py +18 -0
  246. numba_cuda/numba/cuda/stubs.py +635 -0
  247. numba_cuda/numba/cuda/target.py +505 -0
  248. numba_cuda/numba/cuda/testing.py +347 -0
  249. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  251. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  252. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  253. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  254. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  255. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  285. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  286. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  289. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  290. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  291. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  292. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  293. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  294. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  295. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  396. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
  397. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  399. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  400. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  401. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  402. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  403. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  404. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  406. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  407. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  424. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  425. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  430. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  431. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  433. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  434. numba_cuda/numba/cuda/tests/support.py +900 -0
  435. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  436. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  437. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  438. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  439. numba_cuda/numba/cuda/types/__init__.py +233 -0
  440. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  441. numba_cuda/numba/cuda/types/abstract.py +9 -0
  442. numba_cuda/numba/cuda/types/common.py +9 -0
  443. numba_cuda/numba/cuda/types/containers.py +9 -0
  444. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  445. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  446. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  447. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  448. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  449. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  450. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  451. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  452. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  453. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  454. numba_cuda/numba/cuda/types/function_type.py +11 -0
  455. numba_cuda/numba/cuda/types/functions.py +9 -0
  456. numba_cuda/numba/cuda/types/iterators.py +9 -0
  457. numba_cuda/numba/cuda/types/misc.py +9 -0
  458. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  459. numba_cuda/numba/cuda/types/scalars.py +9 -0
  460. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  461. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  462. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  463. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  464. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  465. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  466. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  467. numba_cuda/numba/cuda/typing/collections.py +138 -0
  468. numba_cuda/numba/cuda/typing/context.py +782 -0
  469. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  470. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  471. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  472. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  473. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  474. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  475. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  476. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  477. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  478. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  479. numba_cuda/numba/cuda/ufuncs.py +746 -0
  480. numba_cuda/numba/cuda/utils.py +724 -0
  481. numba_cuda/numba/cuda/vector_types.py +214 -0
  482. numba_cuda/numba/cuda/vectorizers.py +260 -0
  483. numba_cuda-0.21.1.dist-info/METADATA +109 -0
  484. numba_cuda-0.21.1.dist-info/RECORD +488 -0
  485. numba_cuda-0.21.1.dist-info/WHEEL +5 -0
  486. numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
  487. numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
  488. numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,194 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import numpy as np
5
+ from numba.cuda import types
6
+ from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase
7
+ from numba import cuda
8
+ from numba.cuda import libdevice, compile_ptx
9
+ from numba.cuda.libdevicefuncs import functions, create_signature
10
+
11
+
12
+ def use_sincos(s, c, x):
13
+ i = cuda.grid(1)
14
+
15
+ if i < len(x):
16
+ sr, cr = libdevice.sincos(x[i])
17
+ s[i] = sr
18
+ c[i] = cr
19
+
20
+
21
+ def use_frexp(frac, exp, x):
22
+ i = cuda.grid(1)
23
+
24
+ if i < len(x):
25
+ fracr, expr = libdevice.frexp(x[i])
26
+ frac[i] = fracr
27
+ exp[i] = expr
28
+
29
+
30
+ def use_sad(r, x, y, z):
31
+ i = cuda.grid(1)
32
+
33
+ if i < len(x):
34
+ r[i] = libdevice.sad(x[i], y[i], z[i])
35
+
36
+
37
+ @skip_on_cudasim("Libdevice functions are not supported on cudasim")
38
+ class TestLibdevice(CUDATestCase):
39
+ """
40
+ Some tests of libdevice function wrappers that check the returned values.
41
+
42
+ These are mainly to check that the generation of the implementations
43
+ results in correct typing and lowering for each type of function return
44
+ (e.g. scalar return, UniTuple return, Tuple return, etc.).
45
+ """
46
+
47
+ def test_sincos(self):
48
+ # Tests return of a UniTuple from a libdevice function
49
+ arr = np.arange(100, dtype=np.float64)
50
+ sres = np.zeros_like(arr)
51
+ cres = np.zeros_like(arr)
52
+
53
+ cufunc = cuda.jit(use_sincos)
54
+ cufunc[4, 32](sres, cres, arr)
55
+
56
+ np.testing.assert_allclose(np.cos(arr), cres)
57
+ np.testing.assert_allclose(np.sin(arr), sres)
58
+
59
+ def test_frexp(self):
60
+ # Test return of a Tuple from a libdevice function
61
+ arr = np.linspace(start=1.0, stop=10.0, num=100, dtype=np.float64)
62
+ fracres = np.zeros_like(arr)
63
+ expres = np.zeros(shape=arr.shape, dtype=np.int32)
64
+
65
+ cufunc = cuda.jit(use_frexp)
66
+ cufunc[4, 32](fracres, expres, arr)
67
+
68
+ frac_expect, exp_expect = np.frexp(arr)
69
+
70
+ np.testing.assert_array_equal(frac_expect, fracres)
71
+ np.testing.assert_array_equal(exp_expect, expres)
72
+
73
+ def test_sad(self):
74
+ # Test return of a scalar from a libdevice function
75
+ x = np.arange(0, 200, 2)
76
+ y = np.arange(50, 150)
77
+ z = np.arange(15, 115)
78
+ r = np.zeros_like(x)
79
+
80
+ cufunc = cuda.jit(use_sad)
81
+ cufunc[4, 32](r, x, y, z)
82
+
83
+ np.testing.assert_array_equal(np.abs(x - y) + z, r)
84
+
85
+
86
+ # A template for generating tests of compiling calls to libdevice functions.
87
+ # The purpose of the call and assignment of the return variables is to ensure
88
+ # the actual function implementations are not thrown away resulting in a PTX
89
+ # implementation that only contains the ret instruction - this may hide certain
90
+ # errors.
91
+ function_template = """\
92
+ from numba.cuda import libdevice
93
+
94
+ def pyfunc(%(pyargs)s):
95
+ ret = libdevice.%(func)s(%(funcargs)s)
96
+ %(retvars)s = ret
97
+ """
98
+
99
+
100
+ def make_test_call(libname):
101
+ """
102
+ Generates a test function for each libdevice function.
103
+ """
104
+
105
+ def _test_call_functions(self):
106
+ # Strip off '__nv_' from libdevice name to get Python name
107
+ apiname = libname[5:]
108
+ apifunc = getattr(libdevice, apiname) # noqa: F841
109
+ retty, args = functions[libname]
110
+ sig = create_signature(retty, args)
111
+
112
+ # Construct arguments to the libdevice function. These are all
113
+ # non-pointer arguments to the underlying bitcode function.
114
+ funcargs = ", ".join(
115
+ ["a%d" % i for i, arg in enumerate(args) if not arg.is_ptr]
116
+ )
117
+
118
+ # Arguments to the Python function (`pyfunc` in the template above) are
119
+ # the arguments to the libdevice function, plus as many extra arguments
120
+ # as there are in the return type of the libdevice function - one for
121
+ # scalar-valued returns, or the length of the tuple for tuple-valued
122
+ # returns.
123
+ if isinstance(sig.return_type, (types.Tuple, types.UniTuple)):
124
+ # Start with the parameters for the return values
125
+ pyargs = ", ".join(["r%d" % i for i in range(len(sig.return_type))])
126
+ # Add the parameters for the argument values
127
+ pyargs += ", " + funcargs
128
+ # Generate the unpacking of the return value from the libdevice
129
+ # function into the Python function return values (`r0`, `r1`,
130
+ # etc.).
131
+ retvars = ", ".join(
132
+ ["r%d[0]" % i for i in range(len(sig.return_type))]
133
+ )
134
+ else:
135
+ # Scalar return is a more straightforward case
136
+ pyargs = "r0, " + funcargs
137
+ retvars = "r0[0]"
138
+
139
+ # Create the string containing the function to compile
140
+ d = {
141
+ "func": apiname,
142
+ "pyargs": pyargs,
143
+ "funcargs": funcargs,
144
+ "retvars": retvars,
145
+ }
146
+ code = function_template % d
147
+
148
+ # Convert the string to a Python function
149
+ locals = {}
150
+ exec(code, globals(), locals)
151
+ pyfunc = locals["pyfunc"]
152
+
153
+ # Compute the signature for compilation. This mirrors the creation of
154
+ # arguments to the Python function above.
155
+ pyargs = [arg.ty for arg in args if not arg.is_ptr]
156
+ if isinstance(sig.return_type, (types.Tuple, types.UniTuple)):
157
+ pyreturns = [ret[::1] for ret in sig.return_type]
158
+ pyargs = pyreturns + pyargs
159
+ else:
160
+ pyargs.insert(0, sig.return_type[::1])
161
+
162
+ pyargs = tuple(pyargs)
163
+ ptx, resty = compile_ptx(pyfunc, pyargs)
164
+
165
+ # If the function body was discarded by optimization (therefore making
166
+ # the test a bit weak), there won't be any loading of parameters -
167
+ # ensure that a load from parameters occurs somewhere in the PTX
168
+ self.assertIn("ld.param", ptx)
169
+
170
+ # Returning the result (through a passed-in array) should also require
171
+ # a store to global memory, so check for at least one of those too.
172
+ self.assertIn("st.global", ptx)
173
+
174
+ return _test_call_functions
175
+
176
+
177
+ @skip_on_cudasim("Compilation to PTX is not supported on cudasim")
178
+ class TestLibdeviceCompilation(unittest.TestCase):
179
+ """
180
+ Class for holding all tests of compiling calls to libdevice functions. We
181
+ generate the actual tests in this class (as opposed to using subTest and
182
+ one test within this class) because there are a lot of tests, and it makes
183
+ the test suite appear frozen to test them all as subTests in one test.
184
+ """
185
+
186
+
187
+ for libname in functions:
188
+ setattr(
189
+ TestLibdeviceCompilation, "test_%s" % libname, make_test_call(libname)
190
+ )
191
+
192
+
193
+ if __name__ == "__main__":
194
+ unittest.main()
@@ -0,0 +1,220 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba import cuda
5
+ from numba.cuda import float32, int32
6
+ from numba.cuda.core.errors import NumbaInvalidConfigWarning
7
+ import pytest
8
+
9
+ from numba.cuda.testing import CUDATestCase, skip_on_cudasim
10
+ import re
11
+ import unittest
12
+
13
+
14
+ @skip_on_cudasim("Simulator does not produce lineinfo")
15
+ class TestCudaLineInfo(CUDATestCase):
16
+ def _loc_directive_regex(self):
17
+ # This is used in several tests
18
+
19
+ pat = (
20
+ r"\.loc" # .loc directive beginning
21
+ r"\s+[0-9]+" # whitespace then file index
22
+ r"\s+[0-9]+" # whitespace then line number
23
+ r"\s+[0-9]+" # whitespace then column position
24
+ )
25
+ return re.compile(pat)
26
+
27
+ def _check(self, fn, sig, expect):
28
+ fn.compile(sig)
29
+ llvm = fn.inspect_llvm(sig)
30
+ ptx = fn.inspect_asm(sig)
31
+ assertfn = self.assertIsNotNone if expect else self.assertIsNone
32
+
33
+ # DICompileUnit debug info metadata should all be of the
34
+ # DebugDirectivesOnly kind, and not the FullDebug kind
35
+ pat = (
36
+ r"!DICompileUnit\(.*" # Opening of DICompileUnit metadata. Since
37
+ # the order of attributes is not
38
+ # guaranteed, we need to match arbitrarily
39
+ # afterwards.
40
+ r"emissionKind:\s+" # The emissionKind attribute followed by
41
+ # whitespace.
42
+ r"DebugDirectivesOnly" # The correct emissionKind.
43
+ )
44
+ match = re.compile(pat).search(llvm)
45
+ assertfn(match, msg=ptx)
46
+
47
+ pat = (
48
+ r"!DICompileUnit\(.*" # Same as the pattern above, but for the
49
+ r"emissionKind:\s+" # incorrect FullDebug emissionKind.
50
+ r"FullDebug" #
51
+ )
52
+ match = re.compile(pat).search(llvm)
53
+ self.assertIsNone(match, msg=ptx)
54
+
55
+ # The name of this file should be present in the line mapping
56
+ # if lineinfo was propagated through correctly.
57
+ pat = (
58
+ r"\.file" # .file directive beginning
59
+ r"\s+[0-9]+\s+" # file number surrounded by whitespace
60
+ r'".*test_lineinfo.py"' # filename in quotes, ignoring full path
61
+ )
62
+ match = re.compile(pat).search(ptx)
63
+ assertfn(match, msg=ptx)
64
+
65
+ # .loc directives should be present in the ptx
66
+ self._loc_directive_regex().search(ptx)
67
+ assertfn(match, msg=ptx)
68
+
69
+ # Debug info sections should not be present when only lineinfo is
70
+ # generated
71
+ pat = (
72
+ r"\.section\s+" # .section directive beginning
73
+ r"\.debug_info" # Section named ".debug_info"
74
+ )
75
+ match = re.compile(pat).search(ptx)
76
+ self.assertIsNone(match, msg=ptx)
77
+
78
+ def test_no_lineinfo_in_asm(self):
79
+ @cuda.jit(lineinfo=False)
80
+ def foo(x):
81
+ x[0] = 1
82
+
83
+ self._check(foo, sig=(int32[:],), expect=False)
84
+
85
+ def test_lineinfo_in_asm(self):
86
+ @cuda.jit(lineinfo=True)
87
+ def foo(x):
88
+ x[0] = 1
89
+
90
+ self._check(foo, sig=(int32[:],), expect=True)
91
+
92
+ def test_lineinfo_maintains_error_model(self):
93
+ sig = (float32[::1], float32[::1])
94
+
95
+ @cuda.jit(sig, lineinfo=True)
96
+ def divide_kernel(x, y):
97
+ x[0] /= y[0]
98
+
99
+ llvm = divide_kernel.inspect_llvm(sig)
100
+
101
+ # When the error model is Python, the device function returns 1 to
102
+ # signal an exception (e.g. divide by zero) has occurred. When the
103
+ # error model is the default NumPy one (as it should be when only
104
+ # lineinfo is enabled) the device function always returns 0.
105
+ self.assertNotIn("ret i32 1", llvm)
106
+
107
+ def test_no_lineinfo_in_device_function(self):
108
+ # Ensure that no lineinfo is generated in device functions by default.
109
+ @cuda.jit
110
+ def callee(x):
111
+ x[0] += 1
112
+
113
+ @cuda.jit
114
+ def caller(x):
115
+ x[0] = 1
116
+ callee(x)
117
+
118
+ sig = (int32[:],)
119
+ self._check(caller, sig=sig, expect=False)
120
+
121
+ def test_lineinfo_in_device_function(self):
122
+ # First we define a device function / kernel pair and run the usual
123
+ # checks on the generated LLVM and PTX.
124
+
125
+ @cuda.jit(lineinfo=True)
126
+ def callee(x):
127
+ x[0] += 1
128
+
129
+ @cuda.jit(lineinfo=True)
130
+ def caller(x):
131
+ x[0] = 1
132
+ callee(x)
133
+
134
+ sig = (int32[:],)
135
+ self._check(caller, sig=sig, expect=True)
136
+
137
+ # Now we can check the PTX of the device function specifically.
138
+
139
+ ptx = caller.inspect_asm(sig)
140
+ ptxlines = ptx.splitlines()
141
+
142
+ # Check that there is no device function in the PTX
143
+
144
+ # A line beginning with ".weak .func" that identifies a device function
145
+ devfn_start = re.compile(r"^\.weak\s+\.func")
146
+
147
+ for line in ptxlines:
148
+ if devfn_start.match(line) is not None:
149
+ self.fail(f"Found device function in PTX:\n\n{ptx}")
150
+
151
+ # Scan for .loc directives that refer to an inlined device function
152
+
153
+ loc_directive = self._loc_directive_regex()
154
+ found = False
155
+
156
+ for line in ptxlines:
157
+ if loc_directive.search(line) is not None:
158
+ if "inlined_at" in line:
159
+ found = True
160
+ break
161
+
162
+ if not found:
163
+ self.fail(
164
+ f"No .loc directive with inlined_at info foundin:\n\n{ptx}"
165
+ )
166
+
167
+ # We also inspect the LLVM to ensure that there's debug info for each
168
+ # subprogram (function). A lightweight way to check this is to ensure
169
+ # that we have as many DISubprograms as we expect.
170
+
171
+ llvm = caller.inspect_llvm(sig)
172
+ subprograms = 0
173
+ for line in llvm.splitlines():
174
+ if "distinct !DISubprogram" in line:
175
+ subprograms += 1
176
+
177
+ # One DISubprogram for each of:
178
+ # - The caller
179
+ # - The callee
180
+ expected_subprograms = 2
181
+
182
+ self.assertEqual(
183
+ subprograms,
184
+ expected_subprograms,
185
+ f'"Expected {expected_subprograms} DISubprograms; '
186
+ f"got {subprograms}",
187
+ )
188
+
189
+ def test_debug_and_lineinfo_warning(self):
190
+ with pytest.warns(
191
+ NumbaInvalidConfigWarning,
192
+ match="debug and lineinfo are mutually exclusive",
193
+ ):
194
+ # We pass opt=False to prevent the warning about opt and debug
195
+ # occurring as well
196
+ @cuda.jit(debug=True, lineinfo=True, opt=False)
197
+ def f():
198
+ pass
199
+
200
+ def test_lineinfo_with_compile_internal(self):
201
+ # Calling a function implemented using compile_internal should not
202
+ # enable full debug info generation. See Numba-CUDA Issue #271,
203
+ # https://github.com/NVIDIA/numba-cuda/issues/271
204
+
205
+ @cuda.jit("void(complex128[::1], complex128[::1])", lineinfo=True)
206
+ def complex_abs_use(r, x):
207
+ r[0] = abs(x[0])
208
+
209
+ cc = cuda.get_current_device().compute_capability
210
+ ov = complex_abs_use.overloads[complex_abs_use.signatures[0]]
211
+ ptx = ov.inspect_asm(cc)
212
+
213
+ target = ".target sm_%s%s" % cc
214
+ target_debug = f"{target}, debug"
215
+ self.assertIn(target, ptx)
216
+ self.assertNotIn(target_debug, ptx)
217
+
218
+
219
+ if __name__ == "__main__":
220
+ unittest.main()
@@ -0,0 +1,173 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import numpy as np
5
+
6
+ from numba import cuda
7
+ from numba.cuda import int32, complex128, void
8
+ from numba.cuda import types
9
+ from numba.cuda import HAS_NUMBA
10
+
11
+ if HAS_NUMBA:
12
+ from numba.core.errors import TypingError
13
+ else:
14
+ from numba.cuda.core.errors import TypingError
15
+ from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
16
+ from .extensions_usecases import struct_model_type, MyStruct
17
+
18
+
19
+ def culocal(A, B):
20
+ C = cuda.local.array(1000, dtype=int32)
21
+ for i in range(C.shape[0]):
22
+ C[i] = A[i]
23
+ for i in range(C.shape[0]):
24
+ B[i] = C[i]
25
+
26
+
27
+ def culocalcomplex(A, B):
28
+ C = cuda.local.array(100, dtype=complex128)
29
+ for i in range(C.shape[0]):
30
+ C[i] = A[i]
31
+ for i in range(C.shape[0]):
32
+ B[i] = C[i]
33
+
34
+
35
+ def culocal1tuple(A, B):
36
+ C = cuda.local.array((5,), dtype=int32)
37
+ for i in range(C.shape[0]):
38
+ C[i] = A[i]
39
+ for i in range(C.shape[0]):
40
+ B[i] = C[i]
41
+
42
+
43
+ @skip_on_cudasim("PTX inspection not available in cudasim")
44
+ class TestCudaLocalMem(CUDATestCase):
45
+ def test_local_array(self):
46
+ sig = (int32[:], int32[:])
47
+ jculocal = cuda.jit(sig)(culocal)
48
+ self.assertTrue(".local" in jculocal.inspect_asm(sig))
49
+ A = np.arange(1000, dtype="int32")
50
+ B = np.zeros_like(A)
51
+ jculocal[1, 1](A, B)
52
+ self.assertTrue(np.all(A == B))
53
+
54
+ def test_local_array_1_tuple(self):
55
+ """Ensure that local arrays can be constructed with 1-tuple shape"""
56
+ jculocal = cuda.jit("void(int32[:], int32[:])")(culocal1tuple)
57
+ # Don't check if .local is in the ptx because the optimizer
58
+ # may reduce it to registers.
59
+ A = np.arange(5, dtype="int32")
60
+ B = np.zeros_like(A)
61
+ jculocal[1, 1](A, B)
62
+ self.assertTrue(np.all(A == B))
63
+
64
+ def test_local_array_complex(self):
65
+ sig = "void(complex128[:], complex128[:])"
66
+ jculocalcomplex = cuda.jit(sig)(culocalcomplex)
67
+ A = (np.arange(100, dtype="complex128") - 1) / 2j
68
+ B = np.zeros_like(A)
69
+ jculocalcomplex[1, 1](A, B)
70
+ self.assertTrue(np.all(A == B))
71
+
72
+ def check_dtype(self, f, dtype):
73
+ # Find the typing of the dtype argument to cuda.local.array
74
+ annotation = next(iter(f.overloads.values()))._type_annotation
75
+ l_dtype = annotation.typemap["l"].dtype
76
+ # Ensure that the typing is correct
77
+ self.assertEqual(l_dtype, dtype)
78
+
79
+ @skip_on_cudasim("Can't check typing in simulator")
80
+ def test_numba_dtype(self):
81
+ # Check that Numba types can be used as the dtype of a local array
82
+ @cuda.jit(void(int32[::1]))
83
+ def f(x):
84
+ l = cuda.local.array(10, dtype=int32)
85
+ l[0] = x[0]
86
+ x[0] = l[0]
87
+
88
+ self.check_dtype(f, int32)
89
+
90
+ @skip_on_cudasim("Can't check typing in simulator")
91
+ def test_numpy_dtype(self):
92
+ # Check that NumPy types can be used as the dtype of a local array
93
+ @cuda.jit(void(int32[::1]))
94
+ def f(x):
95
+ l = cuda.local.array(10, dtype=np.int32)
96
+ l[0] = x[0]
97
+ x[0] = l[0]
98
+
99
+ self.check_dtype(f, int32)
100
+
101
+ @skip_on_cudasim("Can't check typing in simulator")
102
+ def test_string_dtype(self):
103
+ # Check that strings can be used to specify the dtype of a local array
104
+ @cuda.jit(void(int32[::1]))
105
+ def f(x):
106
+ l = cuda.local.array(10, dtype="int32")
107
+ l[0] = x[0]
108
+ x[0] = l[0]
109
+
110
+ self.check_dtype(f, int32)
111
+
112
+ @skip_on_cudasim("Can't check typing in simulator")
113
+ def test_invalid_string_dtype(self):
114
+ # Check that strings of invalid dtypes cause a typing error
115
+ re = ".*Invalid NumPy dtype specified: 'int33'.*"
116
+ with self.assertRaisesRegex(TypingError, re):
117
+
118
+ @cuda.jit(void(int32[::1]))
119
+ def f(x):
120
+ l = cuda.local.array(10, dtype="int33")
121
+ l[0] = x[0]
122
+ x[0] = l[0]
123
+
124
+ def test_type_with_struct_data_model(self):
125
+ @cuda.jit(void(struct_model_type[::1]))
126
+ def f(x):
127
+ l = cuda.local.array(10, dtype=struct_model_type)
128
+ l[0] = x[0]
129
+ x[0] = l[0]
130
+
131
+ self.check_dtype(f, struct_model_type)
132
+
133
+ def test_struct_model_type_arr(self):
134
+ @cuda.jit(void(int32[::1], int32[::1]))
135
+ def f(outx, outy):
136
+ # Test creation
137
+ arr = cuda.local.array(10, dtype=struct_model_type)
138
+ # Test set to arr
139
+ for i in range(len(arr)):
140
+ obj = MyStruct(int32(i), int32(i * 2))
141
+ arr[i] = obj
142
+ # Test get from arr
143
+ for i in range(len(arr)):
144
+ outx[i] = arr[i].x
145
+ outy[i] = arr[i].y
146
+
147
+ arrx = np.array((10,), dtype="int32")
148
+ arry = np.array((10,), dtype="int32")
149
+
150
+ f[1, 1](arrx, arry)
151
+
152
+ for i, x in enumerate(arrx):
153
+ self.assertEqual(x, i)
154
+ for i, y in enumerate(arry):
155
+ self.assertEqual(y, i * 2)
156
+
157
+ def _check_local_array_size_fp16(self, shape, expected, ty):
158
+ @cuda.jit
159
+ def s(a):
160
+ arr = cuda.local.array(shape, dtype=ty)
161
+ a[0] = arr.size
162
+
163
+ result = np.zeros(1, dtype=np.float16)
164
+ s[1, 1](result)
165
+ self.assertEqual(result[0], expected)
166
+
167
+ def test_issue_fp16_support(self):
168
+ self._check_local_array_size_fp16(2, 2, types.float16)
169
+ self._check_local_array_size_fp16(2, 2, np.float16)
170
+
171
+
172
+ if __name__ == "__main__":
173
+ unittest.main()